Merge "Return correct JDWP type tag for array class"
diff --git a/Android.mk b/Android.mk
index b87f0d3..6c388e5 100644
--- a/Android.mk
+++ b/Android.mk
@@ -232,7 +232,7 @@
 define declare-test-art-target-run-test
 .PHONY: test-art-target-run-test-$(1)
 test-art-target-run-test-$(1): test-art-target-sync $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
-	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(1)
+	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(DALVIKVM_FLAGS) $(1)
 	@echo test-art-target-run-test-$(1) PASSED
 
 TEST_ART_TARGET_RUN_TEST_TARGETS += test-art-target-run-test-$(1)
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index ba54e04..551b03c 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -84,6 +84,7 @@
     else
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
     endif
+    LOCAL_LDLIBS += -lpthread
   endif
 
   ifeq ($$(art_ndebug_or_debug),ndebug)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 62916e3..da0b500 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -176,6 +176,7 @@
         # GCC host compiled tests fail with this linked, presumably due to destructors that run.
         LOCAL_STATIC_LIBRARIES += libgtest_host
     endif
+    LOCAL_LDLIBS += -lpthread -ldl
     include $(BUILD_HOST_EXECUTABLE)
     art_gtest_exe := $(HOST_OUT_EXECUTABLES)/$$(LOCAL_MODULE)
     ART_HOST_GTEST_EXECUTABLES += $$(art_gtest_exe)
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 4eb9ff5..b17cd52 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -69,6 +69,7 @@
 	jni/quick/arm64/calling_convention_arm64.cc \
 	jni/quick/mips/calling_convention_mips.cc \
 	jni/quick/x86/calling_convention_x86.cc \
+	jni/quick/x86_64/calling_convention_x86_64.cc \
 	jni/quick/calling_convention.cc \
 	jni/quick/jni_compiler.cc \
 	optimizing/builder.cc \
@@ -89,6 +90,8 @@
 	utils/mips/managed_register_mips.cc \
 	utils/x86/assembler_x86.cc \
 	utils/x86/managed_register_x86.cc \
+	utils/x86_64/assembler_x86_64.cc \
+	utils/x86_64/managed_register_x86_64.cc \
 	utils/scoped_arena_allocator.cc \
 	buffered_output_stream.cc \
 	compilers.cc \
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 344f3ef..8e013c1 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -105,6 +105,7 @@
     case kArm64:
     case kMips:
     case kX86:
+    case kX86_64:
       return 0;
     case kThumb2: {
       // +1 to set the low-order bit so a BLX will switch to Thumb mode
@@ -123,6 +124,7 @@
     case kArm64:
     case kMips:
     case kX86:
+    case kX86_64:
       return code_pointer;
     case kThumb2: {
       uintptr_t address = reinterpret_cast<uintptr_t>(code_pointer);
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index eb4a336..964a222 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -417,7 +417,7 @@
 enum FixupKind {
   kFixupNone,
   kFixupLabel,       // For labels we just adjust the offset.
-  kFixupLoad,        // Mostly for imediates.
+  kFixupLoad,        // Mostly for immediates.
   kFixupVLoad,       // FP load which *may* be pc-relative.
   kFixupCBxZ,        // Cbz, Cbnz.
   kFixupPushPop,     // Not really pc relative, but changes size based on args.
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 64fa685..3f122de 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -287,9 +287,7 @@
   }
 
   cu.EndTiming();
-  driver.GetTimingsLogger()->Start();
   driver.GetTimingsLogger()->AddLogger(cu.timings);
-  driver.GetTimingsLogger()->End();
   return result;
 }
 
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index caf84d9..7955d6c 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -1062,134 +1062,142 @@
  */
 #define PADDING_MOV_R5_R5               0x1C2D
 
-void ArmMir2Lir::EncodeLIR(LIR* lir) {
-  int opcode = lir->opcode;
-  if (IsPseudoLirOp(opcode)) {
-    if (UNLIKELY(opcode == kPseudoPseudoAlign4)) {
-      // Note: size for this opcode will be either 0 or 2 depending on final alignment.
-      lir->u.a.bytes[0] = (PADDING_MOV_R5_R5 & 0xff);
-      lir->u.a.bytes[1] = ((PADDING_MOV_R5_R5 >> 8) & 0xff);
-      lir->flags.size = (lir->offset & 0x2);
-    }
-  } else if (LIKELY(!lir->flags.is_nop)) {
-    const ArmEncodingMap *encoder = &EncodingMap[lir->opcode];
-    uint32_t bits = encoder->skeleton;
-    for (int i = 0; i < 4; i++) {
-      uint32_t operand;
-      uint32_t value;
-      operand = lir->operands[i];
-      ArmEncodingKind kind = encoder->field_loc[i].kind;
-      if (LIKELY(kind == kFmtBitBlt)) {
-        value = (operand << encoder->field_loc[i].start) &
-            ((1 << (encoder->field_loc[i].end + 1)) - 1);
-        bits |= value;
-      } else {
-        switch (encoder->field_loc[i].kind) {
-          case kFmtSkip:
-            break;  // Nothing to do, but continue to next.
-          case kFmtUnused:
-            i = 4;  // Done, break out of the enclosing loop.
-            break;
-          case kFmtFPImm:
-            value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end;
-            value |= (operand & 0x0F) << encoder->field_loc[i].start;
-            bits |= value;
-            break;
-          case kFmtBrOffset:
-            value = ((operand  & 0x80000) >> 19) << 26;
-            value |= ((operand & 0x40000) >> 18) << 11;
-            value |= ((operand & 0x20000) >> 17) << 13;
-            value |= ((operand & 0x1f800) >> 11) << 16;
-            value |= (operand  & 0x007ff);
-            bits |= value;
-            break;
-          case kFmtShift5:
-            value = ((operand & 0x1c) >> 2) << 12;
-            value |= (operand & 0x03) << 6;
-            bits |= value;
-            break;
-          case kFmtShift:
-            value = ((operand & 0x70) >> 4) << 12;
-            value |= (operand & 0x0f) << 4;
-            bits |= value;
-            break;
-          case kFmtBWidth:
-            value = operand - 1;
-            bits |= value;
-            break;
-          case kFmtLsb:
-            value = ((operand & 0x1c) >> 2) << 12;
-            value |= (operand & 0x03) << 6;
-            bits |= value;
-            break;
-          case kFmtImm6:
-            value = ((operand & 0x20) >> 5) << 9;
-            value |= (operand & 0x1f) << 3;
-            bits |= value;
-            break;
-          case kFmtDfp: {
-            DCHECK(ARM_DOUBLEREG(operand));
-            DCHECK_EQ((operand & 0x1), 0U);
-            uint32_t reg_name = (operand & ARM_FP_REG_MASK) >> 1;
-            /* Snag the 1-bit slice and position it */
-            value = ((reg_name & 0x10) >> 4) << encoder->field_loc[i].end;
-            /* Extract and position the 4-bit slice */
-            value |= (reg_name & 0x0f) << encoder->field_loc[i].start;
-            bits |= value;
-            break;
+uint8_t* ArmMir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) {
+  for (; lir != NULL; lir = NEXT_LIR(lir)) {
+    if (!lir->flags.is_nop) {
+      int opcode = lir->opcode;
+      if (IsPseudoLirOp(opcode)) {
+        if (UNLIKELY(opcode == kPseudoPseudoAlign4)) {
+          // Note: size for this opcode will be either 0 or 2 depending on final alignment.
+          if (lir->offset & 0x2) {
+            write_pos[0] = (PADDING_MOV_R5_R5 & 0xff);
+            write_pos[1] = ((PADDING_MOV_R5_R5 >> 8) & 0xff);
+            write_pos += 2;
           }
-          case kFmtSfp:
-            DCHECK(ARM_SINGLEREG(operand));
-            /* Snag the 1-bit slice and position it */
-            value = (operand & 0x1) << encoder->field_loc[i].end;
-            /* Extract and position the 4-bit slice */
-            value |= ((operand & 0x1e) >> 1) << encoder->field_loc[i].start;
+        }
+      } else if (LIKELY(!lir->flags.is_nop)) {
+        const ArmEncodingMap *encoder = &EncodingMap[lir->opcode];
+        uint32_t bits = encoder->skeleton;
+        for (int i = 0; i < 4; i++) {
+          uint32_t operand;
+          uint32_t value;
+          operand = lir->operands[i];
+          ArmEncodingKind kind = encoder->field_loc[i].kind;
+          if (LIKELY(kind == kFmtBitBlt)) {
+            value = (operand << encoder->field_loc[i].start) &
+                ((1 << (encoder->field_loc[i].end + 1)) - 1);
             bits |= value;
-            break;
-          case kFmtImm12:
-          case kFmtModImm:
-            value = ((operand & 0x800) >> 11) << 26;
-            value |= ((operand & 0x700) >> 8) << 12;
-            value |= operand & 0x0ff;
-            bits |= value;
-            break;
-          case kFmtImm16:
-            value = ((operand & 0x0800) >> 11) << 26;
-            value |= ((operand & 0xf000) >> 12) << 16;
-            value |= ((operand & 0x0700) >> 8) << 12;
-            value |= operand & 0x0ff;
-            bits |= value;
-            break;
-          case kFmtOff24: {
-            uint32_t signbit = (operand >> 31) & 0x1;
-            uint32_t i1 = (operand >> 22) & 0x1;
-            uint32_t i2 = (operand >> 21) & 0x1;
-            uint32_t imm10 = (operand >> 11) & 0x03ff;
-            uint32_t imm11 = operand & 0x07ff;
-            uint32_t j1 = (i1 ^ signbit) ? 0 : 1;
-            uint32_t j2 = (i2 ^ signbit) ? 0 : 1;
-            value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) |
-                imm11;
-            bits |= value;
+          } else {
+            switch (encoder->field_loc[i].kind) {
+              case kFmtSkip:
+                break;  // Nothing to do, but continue to next.
+              case kFmtUnused:
+                i = 4;  // Done, break out of the enclosing loop.
+                break;
+              case kFmtFPImm:
+                value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end;
+                value |= (operand & 0x0F) << encoder->field_loc[i].start;
+                bits |= value;
+                break;
+              case kFmtBrOffset:
+                value = ((operand  & 0x80000) >> 19) << 26;
+                value |= ((operand & 0x40000) >> 18) << 11;
+                value |= ((operand & 0x20000) >> 17) << 13;
+                value |= ((operand & 0x1f800) >> 11) << 16;
+                value |= (operand  & 0x007ff);
+                bits |= value;
+                break;
+              case kFmtShift5:
+                value = ((operand & 0x1c) >> 2) << 12;
+                value |= (operand & 0x03) << 6;
+                bits |= value;
+                break;
+              case kFmtShift:
+                value = ((operand & 0x70) >> 4) << 12;
+                value |= (operand & 0x0f) << 4;
+                bits |= value;
+                break;
+              case kFmtBWidth:
+                value = operand - 1;
+                bits |= value;
+                break;
+              case kFmtLsb:
+                value = ((operand & 0x1c) >> 2) << 12;
+                value |= (operand & 0x03) << 6;
+                bits |= value;
+                break;
+              case kFmtImm6:
+                value = ((operand & 0x20) >> 5) << 9;
+                value |= (operand & 0x1f) << 3;
+                bits |= value;
+                break;
+              case kFmtDfp: {
+                DCHECK(ARM_DOUBLEREG(operand));
+                DCHECK_EQ((operand & 0x1), 0U);
+                uint32_t reg_name = (operand & ARM_FP_REG_MASK) >> 1;
+                /* Snag the 1-bit slice and position it */
+                value = ((reg_name & 0x10) >> 4) << encoder->field_loc[i].end;
+                /* Extract and position the 4-bit slice */
+                value |= (reg_name & 0x0f) << encoder->field_loc[i].start;
+                bits |= value;
+                break;
+              }
+              case kFmtSfp:
+                DCHECK(ARM_SINGLEREG(operand));
+                /* Snag the 1-bit slice and position it */
+                value = (operand & 0x1) << encoder->field_loc[i].end;
+                /* Extract and position the 4-bit slice */
+                value |= ((operand & 0x1e) >> 1) << encoder->field_loc[i].start;
+                bits |= value;
+                break;
+              case kFmtImm12:
+              case kFmtModImm:
+                value = ((operand & 0x800) >> 11) << 26;
+                value |= ((operand & 0x700) >> 8) << 12;
+                value |= operand & 0x0ff;
+                bits |= value;
+                break;
+              case kFmtImm16:
+                value = ((operand & 0x0800) >> 11) << 26;
+                value |= ((operand & 0xf000) >> 12) << 16;
+                value |= ((operand & 0x0700) >> 8) << 12;
+                value |= operand & 0x0ff;
+                bits |= value;
+                break;
+              case kFmtOff24: {
+                uint32_t signbit = (operand >> 31) & 0x1;
+                uint32_t i1 = (operand >> 22) & 0x1;
+                uint32_t i2 = (operand >> 21) & 0x1;
+                uint32_t imm10 = (operand >> 11) & 0x03ff;
+                uint32_t imm11 = operand & 0x07ff;
+                uint32_t j1 = (i1 ^ signbit) ? 0 : 1;
+                uint32_t j2 = (i2 ^ signbit) ? 0 : 1;
+                value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) |
+                    imm11;
+                bits |= value;
+                }
+                break;
+              default:
+                LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind;
             }
-            break;
-          default:
-            LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind;
+          }
+        }
+        if (encoder->size == 4) {
+          write_pos[0] = ((bits >> 16) & 0xff);
+          write_pos[1] = ((bits >> 24) & 0xff);
+          write_pos[2] = (bits & 0xff);
+          write_pos[3] = ((bits >> 8) & 0xff);
+          write_pos += 4;
+        } else {
+          DCHECK_EQ(encoder->size, 2);
+          write_pos[0] = (bits & 0xff);
+          write_pos[1] = ((bits >> 8) & 0xff);
+          write_pos += 2;
         }
       }
     }
-    if (encoder->size == 4) {
-      lir->u.a.bytes[0] = ((bits >> 16) & 0xff);
-      lir->u.a.bytes[1] = ((bits >> 24) & 0xff);
-      lir->u.a.bytes[2] = (bits & 0xff);
-      lir->u.a.bytes[3] = ((bits >> 8) & 0xff);
-    } else {
-      DCHECK_EQ(encoder->size, 2);
-      lir->u.a.bytes[0] = (bits & 0xff);
-      lir->u.a.bytes[1] = ((bits >> 8) & 0xff);
-    }
-    lir->flags.size = encoder->size;
   }
+  return write_pos;
 }
 
 // Assemble the LIR into binary instruction format.
@@ -1198,7 +1206,7 @@
   LIR* prev_lir;
   cu_->NewTimingSplit("Assemble");
   int assembler_retries = 0;
-  CodeOffset starting_offset = EncodeRange(first_lir_insn_, last_lir_insn_, 0);
+  CodeOffset starting_offset = LinkFixupInsns(first_lir_insn_, last_lir_insn_, 0);
   data_offset_ = (starting_offset + 0x3) & ~0x3;
   int32_t offset_adjustment;
   AssignDataOffsets();
@@ -1304,8 +1312,6 @@
               lir->operands[2] = 0;
               lir->operands[1] = base_reg;
             }
-            // Must redo encoding here - won't ever revisit this node.
-            EncodeLIR(lir);
             prev_lir = new_adr;  // Continue scan with new_adr;
             lir = new_adr->u.a.pcrel_next;
             res = kRetryAll;
@@ -1346,9 +1352,8 @@
             /* operand[0] is src1 in both cb[n]z & CmpRI8 */
             lir->operands[1] = 0;
             lir->target = 0;
-            EncodeLIR(lir);   // NOTE: sets flags.size.
+            lir->flags.size = EncodingMap[lir->opcode].size;
             // Add back the new size.
-            DCHECK_EQ(lir->flags.size, static_cast<uint32_t>(EncodingMap[lir->opcode].size));
             offset_adjustment += lir->flags.size;
             // Set up the new following inst.
             new_inst->offset = lir->offset + lir->flags.size;
@@ -1570,20 +1575,6 @@
         default:
           LOG(FATAL) << "Unexpected case " << lir->flags.fixup;
       }
-      /*
-       * If one of the pc-relative instructions expanded we'll have
-       * to make another pass.  Don't bother to fully assemble the
-       * instruction.
-       */
-      if (res == kSuccess) {
-        EncodeLIR(lir);
-        if (assembler_retries == 0) {
-          // Go ahead and fix up the code buffer image.
-          for (int i = 0; i < lir->flags.size; i++) {
-            code_buffer_[lir->offset + i] = lir->u.a.bytes[i];
-          }
-        }
-      }
       prev_lir = lir;
       lir = lir->u.a.pcrel_next;
     }
@@ -1602,21 +1593,15 @@
     }
   }
 
-  // Rebuild the CodeBuffer if we had to retry; otherwise it should be good as-is.
-  if (assembler_retries != 0) {
-    code_buffer_.clear();
-    for (LIR* lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) {
-      if (lir->flags.is_nop) {
-        continue;
-      } else  {
-        for (int i = 0; i < lir->flags.size; i++) {
-          code_buffer_.push_back(lir->u.a.bytes[i]);
-        }
-      }
-    }
-  }
+  // Build the CodeBuffer.
+  DCHECK_LE(data_offset_, total_size_);
+  code_buffer_.reserve(total_size_);
+  code_buffer_.resize(starting_offset);
+  uint8_t* write_pos = &code_buffer_[0];
+  write_pos = EncodeLIRs(write_pos, first_lir_insn_);
+  DCHECK_EQ(static_cast<CodeOffset>(write_pos - &code_buffer_[0]), starting_offset);
 
-  data_offset_ = (code_buffer_.size() + 0x3) & ~0x3;
+  DCHECK_EQ(data_offset_, (code_buffer_.size() + 0x3) & ~0x3);
 
   // Install literals
   InstallLiteralPools();
@@ -1641,19 +1626,11 @@
 }
 
 // Encode instruction bit pattern and assign offsets.
-uint32_t ArmMir2Lir::EncodeRange(LIR* head_lir, LIR* tail_lir, uint32_t offset) {
+uint32_t ArmMir2Lir::LinkFixupInsns(LIR* head_lir, LIR* tail_lir, uint32_t offset) {
   LIR* end_lir = tail_lir->next;
 
-  /*
-   * A significant percentage of methods can be assembled in a single pass.  We'll
-   * go ahead and build the code image here, leaving holes for pc-relative fixup
-   * codes.  If the code size changes during that pass, we'll have to throw away
-   * this work - but if not, we're ready to go.
-   */
-  code_buffer_.reserve(estimated_native_code_size_ + 256);  // Add a little slop.
   LIR* last_fixup = NULL;
   for (LIR* lir = head_lir; lir != end_lir; lir = NEXT_LIR(lir)) {
-    lir->offset = offset;
     if (!lir->flags.is_nop) {
       if (lir->flags.fixup != kFixupNone) {
         if (!IsPseudoLirOp(lir->opcode)) {
@@ -1675,11 +1652,7 @@
           last_fixup->u.a.pcrel_next = lir;
         }
         last_fixup = lir;
-      } else {
-        EncodeLIR(lir);
-      }
-      for (int i = 0; i < lir->flags.size; i++) {
-        code_buffer_.push_back(lir->u.a.bytes[i]);
+        lir->offset = offset;
       }
       offset += lir->flags.size;
     }
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 175fc06..d6724f1 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -183,15 +183,18 @@
   LockCallTemps();  // Prepare for explicit register usage
   constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
   if (kArchVariantHasGoodBranchPredictor) {
-    LIR* null_check_branch;
+    LIR* null_check_branch = nullptr;
     if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
       null_check_branch = nullptr;  // No null check.
     } else {
       // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+      if (Runtime::Current()->ExplicitNullChecks()) {
+        null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+      }
     }
     LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), rs_r2);
     NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    MarkPossibleNullPointerException(opt_flags);
     LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL);
     NewLIR4(kThumb2Strex, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
     LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, NULL);
@@ -216,8 +219,8 @@
     // Explicit null-check as slow-path is entered using an IT.
     GenNullCheck(rs_r0, opt_flags);
     LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), rs_r2);
-    MarkPossibleNullPointerException(opt_flags);
     NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+    MarkPossibleNullPointerException(opt_flags);
     OpRegImm(kOpCmp, rs_r1, 0);
     OpIT(kCondEq, "");
     NewLIR4(kThumb2Strex/*eq*/, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
@@ -241,7 +244,7 @@
   FlushAllRegs();
   LoadValueDirectFixed(rl_src, rs_r0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
-  LIR* null_check_branch;
+  LIR* null_check_branch = nullptr;
   LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), rs_r2);
   constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
   if (kArchVariantHasGoodBranchPredictor) {
@@ -249,9 +252,12 @@
       null_check_branch = nullptr;  // No null check.
     } else {
       // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+      if (Runtime::Current()->ExplicitNullChecks()) {
+        null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+      }
     }
     LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
+    MarkPossibleNullPointerException(opt_flags);
     LoadConstantNoClobber(rs_r3, 0);
     LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL);
     StoreWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
@@ -404,11 +410,17 @@
       }
     } else {
       // Implicit stack overflow check.
-      // Generate a load from [sp, #-framesize].  If this is in the stack
+      // Generate a load from [sp, #-overflowsize].  If this is in the stack
       // redzone we will get a segmentation fault.
-      OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
-      LoadWordDisp(rs_rARM_SP, 0, rs_rARM_LR);
+      //
+      // Caveat coder: if someone changes the kStackOverflowReservedBytes value
+      // we need to make sure that it's loadable in an immediate field of
+      // a sub instruction.  Otherwise we will get a temp allocation and the
+      // code size will increase.
+      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
+      LoadWordDisp(rs_r12, 0, rs_r12);
       MarkPossibleStackOverflowException();
+      OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
     }
   } else {
     OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 0f1e171..8bfdb6a 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -77,10 +77,10 @@
 
     // Required for target - miscellaneous.
     void AssembleLIR();
-    uint32_t EncodeRange(LIR* head_lir, LIR* tail_lir, uint32_t starting_offset);
+    uint32_t LinkFixupInsns(LIR* head_lir, LIR* tail_lir, CodeOffset offset);
     int AssignInsnOffsets();
     void AssignOffsets();
-    void EncodeLIR(LIR* lir);
+    static uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir);
     void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
     void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
     const char* GetTargetInstFmt(int opcode);
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 1ec0a2c..8df5b25 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -828,6 +828,7 @@
   int encoded_disp = displacement;
   bool already_generated = false;
   int dest_low_reg = r_dest.IsPair() ? r_dest.GetLowReg() : r_dest.GetReg();
+  bool null_pointer_safepoint = false;
   switch (size) {
     case kDouble:
     case kLong:
@@ -848,6 +849,7 @@
                          displacement >> 2);
         } else {
           load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), kWord, s_reg);
+          null_pointer_safepoint = true;
           LoadBaseDispBody(r_base, displacement + 4, r_dest.GetHigh(), kWord, INVALID_SREG);
         }
         already_generated = true;
@@ -939,6 +941,11 @@
   // TODO: in future may need to differentiate Dalvik accesses w/ spills
   if (r_base == rs_rARM_SP) {
     AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
+  } else {
+     // We might need to generate a safepoint if we have two store instructions (wide or double).
+     if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
+       MarkSafepointPC(load);
+     }
   }
   return load;
 }
@@ -965,6 +972,7 @@
   int encoded_disp = displacement;
   bool already_generated = false;
   int src_low_reg = r_src.IsPair() ? r_src.GetLowReg() : r_src.GetReg();
+  bool null_pointer_safepoint = false;
   switch (size) {
     case kLong:
     case kDouble:
@@ -974,6 +982,7 @@
                           displacement >> 2);
         } else {
           store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), kWord);
+          null_pointer_safepoint = true;
           StoreBaseDispBody(r_base, displacement + 4, r_src.GetHigh(), kWord);
         }
         already_generated = true;
@@ -1061,6 +1070,11 @@
   // TODO: In future, may need to differentiate Dalvik & spill accesses
   if (r_base == rs_rARM_SP) {
     AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
+  } else {
+    // We might need to generate a safepoint if we have two store instructions (wide or double).
+    if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
+      MarkSafepointPC(store);
+    }
   }
   return store;
 }
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 717ad86..4c6c7a4 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -702,7 +702,8 @@
     uint32_t native_offset = it.NativePcOffset();
     uint32_t dex_pc = it.DexPc();
     const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false);
-    CHECK(references != NULL) << "Missing ref for dex pc 0x" << std::hex << dex_pc;
+    CHECK(references != NULL) << "Missing ref for dex pc 0x" << std::hex << dex_pc <<
+        ": " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
     native_gc_map_builder.AddEntry(native_offset, references);
   }
 }
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 2afa5ca..866ce5f 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -74,14 +74,19 @@
 /* Perform null-check on a register.  */
 LIR* Mir2Lir::GenNullCheck(RegStorage m_reg, int opt_flags) {
   if (Runtime::Current()->ExplicitNullChecks()) {
-    if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
-      return NULL;
-    }
-    return GenImmedCheck(kCondEq, m_reg, 0, kThrowNullPointer);
+    return GenExplicitNullCheck(m_reg, opt_flags);
   }
   return nullptr;
 }
 
+/* Perform an explicit null-check on a register.  */
+LIR* Mir2Lir::GenExplicitNullCheck(RegStorage m_reg, int opt_flags) {
+  if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
+    return NULL;
+  }
+  return GenImmedCheck(kCondEq, m_reg, 0, kThrowNullPointer);
+}
+
 void Mir2Lir::MarkPossibleNullPointerException(int opt_flags) {
   if (!Runtime::Current()->ExplicitNullChecks()) {
     if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
@@ -732,6 +737,7 @@
         OpRegRegImm(kOpAdd, reg_ptr, rl_obj.reg, field_info.FieldOffset().Int32Value());
         rl_result = EvalLoc(rl_dest, reg_class, true);
         LoadBaseDispWide(reg_ptr, 0, rl_result.reg, INVALID_SREG);
+        MarkPossibleNullPointerException(opt_flags);
         if (field_info.IsVolatile()) {
           // Without context sensitive analysis, we must issue the most conservative barriers.
           // In this case, either a load or store may follow so we issue both barriers.
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index a0242d5..7689b51 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -781,7 +781,17 @@
                            type, skip_this);
 
   if (pcrLabel) {
-    *pcrLabel = GenNullCheck(TargetReg(kArg1), info->opt_flags);
+    if (Runtime::Current()->ExplicitNullChecks()) {
+      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
+    } else {
+      *pcrLabel = nullptr;
+      // In lieu of generating a check for kArg1 being null, we need to
+      // perform a load when doing implicit checks.
+      RegStorage tmp = AllocTemp();
+      LoadWordDisp(TargetReg(kArg1), 0, tmp);
+      MarkPossibleNullPointerException(info->opt_flags);
+      FreeTemp(tmp);
+    }
   }
   return call_state;
 }
@@ -987,7 +997,17 @@
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
   if (pcrLabel) {
-    *pcrLabel = GenNullCheck(TargetReg(kArg1), info->opt_flags);
+    if (Runtime::Current()->ExplicitNullChecks()) {
+      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
+    } else {
+      *pcrLabel = nullptr;
+      // In lieu of generating a check for kArg1 being null, we need to
+      // perform a load when doing implicit checks.
+      RegStorage tmp = AllocTemp();
+      LoadWordDisp(TargetReg(kArg1), 0, tmp);
+      MarkPossibleNullPointerException(info->opt_flags);
+      FreeTemp(tmp);
+    }
   }
   return call_state;
 }
@@ -1299,7 +1319,7 @@
     LoadValueDirectFixed(rl_start, reg_start);
   }
   RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pIndexOf));
-  GenNullCheck(reg_ptr, info->opt_flags);
+  GenExplicitNullCheck(reg_ptr, info->opt_flags);
   LIR* high_code_point_branch =
       rl_char.is_const ? nullptr : OpCmpImmBranch(kCondGt, reg_char, 0xFFFF, nullptr);
   // NOTE: not a safepoint
@@ -1337,7 +1357,7 @@
   LoadValueDirectFixed(rl_cmp, reg_cmp);
   RegStorage r_tgt = (cu_->instruction_set != kX86) ?
       LoadHelper(QUICK_ENTRYPOINT_OFFSET(pStringCompareTo)) : RegStorage::InvalidReg();
-  GenNullCheck(reg_this, info->opt_flags);
+  GenExplicitNullCheck(reg_this, info->opt_flags);
   info->opt_flags |= MIR_IGNORE_NULL_CHECK;  // Record that we've null checked.
   // TUNING: check if rl_cmp.s_reg_low is already null checked
   LIR* cmp_null_check_branch = OpCmpImmBranch(kCondEq, reg_cmp, 0, nullptr);
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index e81a037..cd3dadb 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -446,6 +446,7 @@
       GenNullCheck(rl_src[0].reg, opt_flags);
       rl_result = EvalLoc(rl_dest, kCoreReg, true);
       LoadWordDisp(rl_src[0].reg, len_offset, rl_result.reg);
+      MarkPossibleNullPointerException(opt_flags);
       StoreValue(rl_dest, rl_result);
       break;
 
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index bac35aa..10f431f 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -131,7 +131,6 @@
 
 struct AssemblyInfo {
   LIR* pcrel_next;           // Chain of LIR nodes needing pc relative fixups.
-  uint8_t bytes[16];         // Encoded instruction bytes.
 };
 
 struct LIR {
@@ -151,7 +150,7 @@
   } flags;
   union {
     UseDefMasks m;               // Use & Def masks used during optimization.
-    AssemblyInfo a;              // Instruction encoding used during assembly phase.
+    AssemblyInfo a;              // Instruction info used during assembly phase.
   } u;
   int32_t operands[5];           // [0..4] = [dest, src1, src2, extra, extra2].
 };
@@ -340,7 +339,7 @@
       return code_buffer_.size() / sizeof(code_buffer_[0]);
     }
 
-    bool IsPseudoLirOp(int opcode) {
+    static bool IsPseudoLirOp(int opcode) {
       return (opcode < 0);
     }
 
@@ -565,6 +564,7 @@
     void ForceImplicitNullCheck(RegStorage reg, int opt_flags);
     LIR* GenImmedCheck(ConditionCode c_code, RegStorage reg, int imm_val, ThrowKind kind);
     LIR* GenNullCheck(RegStorage m_reg, int opt_flags);
+    LIR* GenExplicitNullCheck(RegStorage m_reg, int opt_flags);
     LIR* GenRegRegCheck(ConditionCode c_code, RegStorage reg1, RegStorage reg2, ThrowKind kind);
     void GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1,
                              RegLocation rl_src2, LIR* taken, LIR* fall_through);
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index 78f403c..28b438e 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -85,7 +85,7 @@
   return result;
 }
 
-const std::vector<ManagedRegister>& ArmManagedRuntimeCallingConvention::EntrySpills() {
+const ManagedRegisterEntrySpills& ArmManagedRuntimeCallingConvention::EntrySpills() {
   // We spill the argument registers on ARM to free them up for scratch use, we then assume
   // all arguments are on the stack.
   if (entry_spills_.size() == 0) {
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index fc2d857..96bbb7e 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -36,10 +36,10 @@
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
   FrameOffset CurrentParamStackOffset() OVERRIDE;
-  const std::vector<ManagedRegister>& EntrySpills() OVERRIDE;
+  const ManagedRegisterEntrySpills& EntrySpills() OVERRIDE;
 
  private:
-  std::vector<ManagedRegister> entry_spills_;
+  ManagedRegisterEntrySpills entry_spills_;
 
   DISALLOW_COPY_AND_ASSIGN(ArmManagedRuntimeCallingConvention);
 };
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index c4d0d45..ff899b7 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -85,7 +85,7 @@
   return result;
 }
 
-const std::vector<ManagedRegister>& Arm64ManagedRuntimeCallingConvention::EntrySpills() {
+const ManagedRegisterEntrySpills& Arm64ManagedRuntimeCallingConvention::EntrySpills() {
   // We spill the argument registers on ARM64 to free them up for scratch use, we then assume
   // all arguments are on the stack.
   if (entry_spills_.size() == 0) {
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index 2dcf1af..7e33830 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -36,10 +36,10 @@
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
   FrameOffset CurrentParamStackOffset() OVERRIDE;
-  const std::vector<ManagedRegister>& EntrySpills() OVERRIDE;
+  const ManagedRegisterEntrySpills& EntrySpills() OVERRIDE;
 
  private:
-  std::vector<ManagedRegister> entry_spills_;
+  ManagedRegisterEntrySpills entry_spills_;
 
   DISALLOW_COPY_AND_ASSIGN(Arm64ManagedRuntimeCallingConvention);
 };
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index 5856df4..043bcea 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -21,6 +21,7 @@
 #include "jni/quick/arm64/calling_convention_arm64.h"
 #include "jni/quick/mips/calling_convention_mips.h"
 #include "jni/quick/x86/calling_convention_x86.h"
+#include "jni/quick/x86_64/calling_convention_x86_64.h"
 #include "utils.h"
 
 namespace art {
@@ -44,6 +45,8 @@
       return new mips::MipsManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
     case kX86:
       return new x86::X86ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+    case kX86_64:
+      return new x86_64::X86_64ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
       return NULL;
@@ -61,6 +64,9 @@
     itr_longs_and_doubles_++;
     itr_slots_++;
   }
+  if (IsParamAFloatOrDouble(itr_args_)) {
+    itr_float_and_doubles_++;
+  }
   if (IsCurrentParamAReference()) {
     itr_refs_++;
   }
@@ -85,6 +91,10 @@
   return IsParamAReference(itr_args_);
 }
 
+bool ManagedRuntimeCallingConvention::IsCurrentParamAFloatOrDouble() {
+  return IsParamAFloatOrDouble(itr_args_);
+}
+
 // JNI calling convention
 
 JniCallingConvention* JniCallingConvention::Create(bool is_static, bool is_synchronized,
@@ -100,6 +110,8 @@
       return new mips::MipsJniCallingConvention(is_static, is_synchronized, shorty);
     case kX86:
       return new x86::X86JniCallingConvention(is_static, is_synchronized, shorty);
+    case kX86_64:
+      return new x86_64::X86_64JniCallingConvention(is_static, is_synchronized, shorty);
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
       return NULL;
@@ -111,9 +123,8 @@
 }
 
 FrameOffset JniCallingConvention::SavedLocalReferenceCookieOffset() const {
-  size_t start_of_sirt = SirtNumRefsOffset().Int32Value() +  kPointerSize;
-  size_t references_size = kPointerSize * ReferenceCount();  // size excluding header
-  return FrameOffset(start_of_sirt + references_size);
+  size_t references_size = kSirtPointerSize * ReferenceCount();  // size excluding header
+  return FrameOffset(SirtReferencesOffset().Int32Value() + references_size);
 }
 
 FrameOffset JniCallingConvention::ReturnValueSaveLocation() const {
@@ -139,6 +150,9 @@
       itr_slots_++;
     }
   }
+  if (IsCurrentParamAFloatOrDouble()) {
+    itr_float_and_doubles_++;
+  }
   if (IsCurrentParamAReference()) {
     itr_refs_++;
   }
@@ -159,14 +173,25 @@
   }
 }
 
+bool JniCallingConvention::IsCurrentParamAFloatOrDouble() {
+  switch (itr_args_) {
+    case kJniEnv:
+      return false;  // JNIEnv*
+    case kObjectOrClass:
+      return false;   // jobject or jclass
+    default: {
+      int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
+      return IsParamAFloatOrDouble(arg_pos);
+    }
+  }
+}
+
 // Return position of SIRT entry holding reference at the current iterator
 // position
 FrameOffset JniCallingConvention::CurrentParamSirtEntryOffset() {
   CHECK(IsCurrentParamAReference());
   CHECK_LT(SirtLinkOffset(), SirtNumRefsOffset());
-  // Address of 1st SIRT entry
-  int result = SirtNumRefsOffset().Int32Value() + kPointerSize;
-  result += itr_refs_ * kPointerSize;
+  int result = SirtReferencesOffset().Int32Value() + itr_refs_ * kSirtPointerSize;
   CHECK_GT(result, SirtNumRefsOffset().Int32Value());
   return FrameOffset(result);
 }
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index f2b7fd9..fe3d1cd 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -60,23 +60,35 @@
     itr_args_ = 0;
     itr_refs_ = 0;
     itr_longs_and_doubles_ = 0;
+    itr_float_and_doubles_ = 0;
   }
 
   virtual ~CallingConvention() {}
 
  protected:
   CallingConvention(bool is_static, bool is_synchronized, const char* shorty)
-      : displacement_(0), is_static_(is_static), is_synchronized_(is_synchronized),
+      : displacement_(0), kSirtPointerSize(sizeof(StackReference<mirror::Object>)), is_static_(is_static), is_synchronized_(is_synchronized),
         shorty_(shorty) {
     num_args_ = (is_static ? 0 : 1) + strlen(shorty) - 1;
     num_ref_args_ = is_static ? 0 : 1;  // The implicit this pointer.
+    num_float_or_double_args_ = 0;
     num_long_or_double_args_ = 0;
     for (size_t i = 1; i < strlen(shorty); i++) {
       char ch = shorty_[i];
-      if (ch == 'L') {
+      switch (ch) {
+      case 'L':
         num_ref_args_++;
-      } else if ((ch == 'D') || (ch == 'J')) {
+        break;
+      case 'J':
         num_long_or_double_args_++;
+        break;
+      case 'D':
+        num_long_or_double_args_++;
+        num_float_or_double_args_++;
+        break;
+      case 'F':
+        num_float_or_double_args_++;
+        break;
       }
     }
   }
@@ -97,6 +109,16 @@
     char ch = shorty_[param];
     return (ch == 'J' || ch == 'D');
   }
+  bool IsParamAFloatOrDouble(unsigned int param) const {
+    DCHECK_LT(param, NumArgs());
+    if (IsStatic()) {
+      param++;  // 0th argument must skip return value at start of the shorty
+    } else if (param == 0) {
+      return false;  // this argument
+    }
+    char ch = shorty_[param];
+    return (ch == 'F' || ch == 'D');
+  }
   bool IsParamAReference(unsigned int param) const {
     DCHECK_LT(param, NumArgs());
     if (IsStatic()) {
@@ -112,6 +134,9 @@
   size_t NumLongOrDoubleArgs() const {
     return num_long_or_double_args_;
   }
+  size_t NumFloatOrDoubleArgs() const {
+    return num_float_or_double_args_;
+  }
   size_t NumReferenceArgs() const {
     return num_ref_args_;
   }
@@ -141,8 +166,11 @@
   unsigned int itr_args_;
   // Number of longs and doubles seen along argument list
   unsigned int itr_longs_and_doubles_;
+  // Number of float and doubles seen along argument list
+  unsigned int itr_float_and_doubles_;
   // Space for frames below this on the stack
   FrameOffset displacement_;
+  size_t kSirtPointerSize;
 
  private:
   const bool is_static_;
@@ -150,6 +178,7 @@
   std::string shorty_;
   size_t num_args_;
   size_t num_ref_args_;
+  size_t num_float_or_double_args_;
   size_t num_long_or_double_args_;
 };
 
@@ -174,6 +203,7 @@
   bool HasNext();
   void Next();
   bool IsCurrentParamAReference();
+  bool IsCurrentParamAFloatOrDouble();
   bool IsCurrentArgExplicit();  // ie a non-implict argument such as this
   bool IsCurrentArgPossiblyNull();
   size_t CurrentParamSize();
@@ -185,7 +215,7 @@
   virtual ~ManagedRuntimeCallingConvention() {}
 
   // Registers to spill to caller's out registers on entry.
-  virtual const std::vector<ManagedRegister>& EntrySpills() = 0;
+  virtual const ManagedRegisterEntrySpills& EntrySpills() = 0;
 
  protected:
   ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
@@ -241,6 +271,7 @@
   bool HasNext();
   virtual void Next();
   bool IsCurrentParamAReference();
+  bool IsCurrentParamAFloatOrDouble();
   size_t CurrentParamSize();
   virtual bool IsCurrentParamInRegister() = 0;
   virtual bool IsCurrentParamOnStack() = 0;
@@ -255,13 +286,21 @@
     return FrameOffset(displacement_.Int32Value() +
                        kPointerSize);  // above Method*
   }
+
+  FrameOffset SirtLinkOffset() const {
+    return FrameOffset(SirtOffset().Int32Value() +
+                       StackIndirectReferenceTable::LinkOffset());
+  }
+
   FrameOffset SirtNumRefsOffset() const {
     return FrameOffset(SirtOffset().Int32Value() +
                        StackIndirectReferenceTable::NumberOfReferencesOffset());
   }
-  FrameOffset SirtLinkOffset() const {
-    return FrameOffset(SirtOffset().Int32Value() +
-                       StackIndirectReferenceTable::LinkOffset());
+
+  FrameOffset SirtReferencesOffset() const {
+    // The StackIndirectReferenceTable::number_of_references_ type is uint32_t
+    return FrameOffset(SirtNumRefsOffset().Int32Value() +
+                       sizeof(uint32_t));
   }
 
   virtual ~JniCallingConvention() {}
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 1c9aed8..c89bc40 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -271,7 +271,7 @@
           mr_conv->InterproceduralScratchRegister());
 
   // 10. Fix differences in result widths.
-  if (instruction_set == kX86) {
+  if (instruction_set == kX86 || instruction_set == kX86_64) {
     if (main_jni_conv->GetReturnType() == Primitive::kPrimByte ||
         main_jni_conv->GetReturnType() == Primitive::kPrimShort) {
       __ SignExtend(main_jni_conv->ReturnRegister(),
diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc
index 0a48500..ea39d60 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.cc
+++ b/compiler/jni/quick/mips/calling_convention_mips.cc
@@ -85,7 +85,7 @@
   return result;
 }
 
-const std::vector<ManagedRegister>& MipsManagedRuntimeCallingConvention::EntrySpills() {
+const ManagedRegisterEntrySpills& MipsManagedRuntimeCallingConvention::EntrySpills() {
   // We spill the argument registers on MIPS to free them up for scratch use, we then assume
   // all arguments are on the stack.
   if (entry_spills_.size() == 0) {
diff --git a/compiler/jni/quick/mips/calling_convention_mips.h b/compiler/jni/quick/mips/calling_convention_mips.h
index 445f453..1a9053a 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.h
+++ b/compiler/jni/quick/mips/calling_convention_mips.h
@@ -35,10 +35,10 @@
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
   FrameOffset CurrentParamStackOffset() OVERRIDE;
-  const std::vector<ManagedRegister>& EntrySpills() OVERRIDE;
+  const ManagedRegisterEntrySpills& EntrySpills() OVERRIDE;
 
  private:
-  std::vector<ManagedRegister> entry_spills_;
+  ManagedRegisterEntrySpills entry_spills_;
 
   DISALLOW_COPY_AND_ASSIGN(MipsManagedRuntimeCallingConvention);
 };
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 8b5c86d..8d22fe6 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -90,7 +90,7 @@
                      (itr_slots_ * kPointerSize));  // offset into in args
 }
 
-const std::vector<ManagedRegister>& X86ManagedRuntimeCallingConvention::EntrySpills() {
+const ManagedRegisterEntrySpills& X86ManagedRuntimeCallingConvention::EntrySpills() {
   // We spill the argument registers on X86 to free them up for scratch use, we then assume
   // all arguments are on the stack.
   if (entry_spills_.size() == 0) {
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index e814c7e..2dab059 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -37,9 +37,9 @@
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
   FrameOffset CurrentParamStackOffset() OVERRIDE;
-  const std::vector<ManagedRegister>& EntrySpills() OVERRIDE;
+  const ManagedRegisterEntrySpills& EntrySpills() OVERRIDE;
  private:
-  std::vector<ManagedRegister> entry_spills_;
+  ManagedRegisterEntrySpills entry_spills_;
   DISALLOW_COPY_AND_ASSIGN(X86ManagedRuntimeCallingConvention);
 };
 
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
new file mode 100644
index 0000000..8ebea46
--- /dev/null
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "calling_convention_x86_64.h"
+
+#include "base/logging.h"
+#include "utils/x86_64/managed_register_x86_64.h"
+#include "utils.h"
+
+namespace art {
+namespace x86_64 {
+
+// Calling convention
+
+ManagedRegister X86_64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
+  return X86_64ManagedRegister::FromCpuRegister(RAX);
+}
+
+ManagedRegister X86_64JniCallingConvention::InterproceduralScratchRegister() {
+  return X86_64ManagedRegister::FromCpuRegister(RAX);
+}
+
+ManagedRegister X86_64JniCallingConvention::ReturnScratchRegister() const {
+  return ManagedRegister::NoRegister();  // No free regs, so assembler uses push/pop
+}
+
+static ManagedRegister ReturnRegisterForShorty(const char* shorty, bool jni) {
+  if (shorty[0] == 'F' || shorty[0] == 'D') {
+    return X86_64ManagedRegister::FromXmmRegister(_XMM0);
+  } else if (shorty[0] == 'J') {
+    return X86_64ManagedRegister::FromCpuRegister(RAX);
+  } else if (shorty[0] == 'V') {
+    return ManagedRegister::NoRegister();
+  } else {
+    return X86_64ManagedRegister::FromCpuRegister(RAX);
+  }
+}
+
+ManagedRegister X86_64ManagedRuntimeCallingConvention::ReturnRegister() {
+  return ReturnRegisterForShorty(GetShorty(), false);
+}
+
+ManagedRegister X86_64JniCallingConvention::ReturnRegister() {
+  return ReturnRegisterForShorty(GetShorty(), true);
+}
+
+ManagedRegister X86_64JniCallingConvention::IntReturnRegister() {
+  return X86_64ManagedRegister::FromCpuRegister(RAX);
+}
+
+// Managed runtime calling convention
+
+ManagedRegister X86_64ManagedRuntimeCallingConvention::MethodRegister() {
+  return X86_64ManagedRegister::FromCpuRegister(RDI);
+}
+
+bool X86_64ManagedRuntimeCallingConvention::IsCurrentParamInRegister() {
+  return !IsCurrentParamOnStack();
+}
+
+bool X86_64ManagedRuntimeCallingConvention::IsCurrentParamOnStack() {
+  // We assume all parameters are on stack, args coming via registers are spilled as entry_spills
+  return true;
+}
+
+ManagedRegister X86_64ManagedRuntimeCallingConvention::CurrentParamRegister() {
+  ManagedRegister res = ManagedRegister::NoRegister();
+  if (!IsCurrentParamAFloatOrDouble()) {
+    switch (itr_args_ - itr_float_and_doubles_) {
+    case 0: res = X86_64ManagedRegister::FromCpuRegister(RSI); break;
+    case 1: res = X86_64ManagedRegister::FromCpuRegister(RDX); break;
+    case 2: res = X86_64ManagedRegister::FromCpuRegister(RCX); break;
+    case 3: res = X86_64ManagedRegister::FromCpuRegister(R8); break;
+    case 4: res = X86_64ManagedRegister::FromCpuRegister(R9); break;
+    }
+  } else if (itr_float_and_doubles_ < 8) {
+    // First eight float parameters are passed via XMM0..XMM7
+    res = X86_64ManagedRegister::FromXmmRegister(
+                                 static_cast<XmmRegister>(_XMM0 + itr_float_and_doubles_));
+  }
+  return res;
+}
+
+FrameOffset X86_64ManagedRuntimeCallingConvention::CurrentParamStackOffset() {
+  return FrameOffset(displacement_.Int32Value() +   // displacement
+                     kPointerSize +                 // Method*
+                     (itr_slots_ * sizeof(uint32_t)));  // offset into in args
+}
+
+const ManagedRegisterEntrySpills& X86_64ManagedRuntimeCallingConvention::EntrySpills() {
+  // We spill the argument registers on X86 to free them up for scratch use, we then assume
+  // all arguments are on the stack.
+  if (entry_spills_.size() == 0) {
+    ResetIterator(FrameOffset(0));
+    while (HasNext()) {
+      ManagedRegister in_reg = CurrentParamRegister();
+      if (!in_reg.IsNoRegister()) {
+        int32_t size = IsParamALongOrDouble(itr_args_)? 8 : 4;
+        int32_t spill_offset = CurrentParamStackOffset().Uint32Value();
+        ManagedRegisterSpill spill(in_reg, size, spill_offset);
+        entry_spills_.push_back(spill);
+      }
+      Next();
+    }
+  }
+  return entry_spills_;
+}
+
+// JNI calling convention
+
+X86_64JniCallingConvention::X86_64JniCallingConvention(bool is_static, bool is_synchronized,
+                                                 const char* shorty)
+    : JniCallingConvention(is_static, is_synchronized, shorty) {
+  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(RBX));
+  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(RBP));
+  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R12));
+  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R13));
+  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R14));
+  callee_save_regs_.push_back(X86_64ManagedRegister::FromCpuRegister(R15));
+}
+
+uint32_t X86_64JniCallingConvention::CoreSpillMask() const {
+  return 1 << RBX | 1 << RBP | 1 << R12 | 1 << R13 | 1 << R14 | 1 << R15 | 1 << R13 | 1 << kNumberOfCpuRegisters;
+}
+
+size_t X86_64JniCallingConvention::FrameSize() {
+  // Method*, return address and callee save area size, local reference segment state
+  size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kPointerSize;
+  // References plus link_ (pointer) and number_of_references_ (uint32_t) for SIRT header
+  size_t sirt_size = kPointerSize + sizeof(uint32_t) + ReferenceCount()*kSirtPointerSize;
+  // Plus return value spill area size
+  return RoundUp(frame_data_size + sirt_size + SizeOfReturnValue(), kStackAlignment);
+}
+
+size_t X86_64JniCallingConvention::OutArgSize() {
+  return RoundUp(NumberOfOutgoingStackArgs() * kPointerSize, kStackAlignment);
+}
+
+bool X86_64JniCallingConvention::IsCurrentParamInRegister() {
+  return !IsCurrentParamOnStack();
+}
+
+bool X86_64JniCallingConvention::IsCurrentParamOnStack() {
+  return CurrentParamRegister().IsNoRegister();
+}
+
+ManagedRegister X86_64JniCallingConvention::CurrentParamRegister() {
+  ManagedRegister res = ManagedRegister::NoRegister();
+  if (!IsCurrentParamAFloatOrDouble()) {
+    switch (itr_args_ - itr_float_and_doubles_) {
+    case 0: res = X86_64ManagedRegister::FromCpuRegister(RDI); break;
+    case 1: res = X86_64ManagedRegister::FromCpuRegister(RSI); break;
+    case 2: res = X86_64ManagedRegister::FromCpuRegister(RDX); break;
+    case 3: res = X86_64ManagedRegister::FromCpuRegister(RCX); break;
+    case 4: res = X86_64ManagedRegister::FromCpuRegister(R8); break;
+    case 5: res = X86_64ManagedRegister::FromCpuRegister(R9); break;
+    }
+  } else if (itr_float_and_doubles_ < 8) {
+    // First eight float parameters are passed via XMM0..XMM7
+    res = X86_64ManagedRegister::FromXmmRegister(
+                                 static_cast<XmmRegister>(_XMM0 + itr_float_and_doubles_));
+  }
+  return res;
+}
+
+FrameOffset X86_64JniCallingConvention::CurrentParamStackOffset() {
+  size_t offset = itr_args_
+                  - std::min(8U, itr_float_and_doubles_)               // Float arguments passed through Xmm0..Xmm7
+                  - std::min(6U, itr_args_ - itr_float_and_doubles_);  // Integer arguments passed through GPR
+  return FrameOffset(displacement_.Int32Value() - OutArgSize() + (offset * kPointerSize));
+}
+
+size_t X86_64JniCallingConvention::NumberOfOutgoingStackArgs() {
+  size_t static_args = IsStatic() ? 1 : 0;  // count jclass
+  // regular argument parameters and this
+  size_t param_args = NumArgs() + NumLongOrDoubleArgs();
+  // count JNIEnv* and return pc (pushed after Method*)
+  size_t total_args = static_args + param_args + 2;
+
+  // Float arguments passed through Xmm0..Xmm7
+  // Other (integer) arguments passed through GPR (RDI, RSI, RDX, RCX, R8, R9)
+  size_t total_stack_args = total_args
+                            - std::min(8U, static_cast<unsigned int>(NumFloatOrDoubleArgs()))
+                            - std::min(6U, static_cast<unsigned int>(NumArgs() - NumFloatOrDoubleArgs()));
+
+  return total_stack_args;
+}
+
+}  // namespace x86_64
+}  // namespace art
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
new file mode 100644
index 0000000..d7f7762
--- /dev/null
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_JNI_QUICK_X86_64_CALLING_CONVENTION_X86_64_H_
+#define ART_COMPILER_JNI_QUICK_X86_64_CALLING_CONVENTION_X86_64_H_
+
+#include "jni/quick/calling_convention.h"
+
+namespace art {
+namespace x86_64 {
+
+class X86_64ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention {
+ public:
+  explicit X86_64ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized,
+                                              const char* shorty)
+      : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty) {}
+  ~X86_64ManagedRuntimeCallingConvention() OVERRIDE {}
+  // Calling convention
+  ManagedRegister ReturnRegister() OVERRIDE;
+  ManagedRegister InterproceduralScratchRegister() OVERRIDE;
+  // Managed runtime calling convention
+  ManagedRegister MethodRegister() OVERRIDE;
+  bool IsCurrentParamInRegister() OVERRIDE;
+  bool IsCurrentParamOnStack() OVERRIDE;
+  ManagedRegister CurrentParamRegister() OVERRIDE;
+  FrameOffset CurrentParamStackOffset() OVERRIDE;
+  const ManagedRegisterEntrySpills& EntrySpills() OVERRIDE;
+ private:
+  ManagedRegisterEntrySpills entry_spills_;
+  DISALLOW_COPY_AND_ASSIGN(X86_64ManagedRuntimeCallingConvention);
+};
+
+class X86_64JniCallingConvention FINAL : public JniCallingConvention {
+ public:
+  explicit X86_64JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty);
+  ~X86_64JniCallingConvention() OVERRIDE {}
+  // Calling convention
+  ManagedRegister ReturnRegister() OVERRIDE;
+  ManagedRegister IntReturnRegister() OVERRIDE;
+  ManagedRegister InterproceduralScratchRegister() OVERRIDE;
+  // JNI calling convention
+  size_t FrameSize() OVERRIDE;
+  size_t OutArgSize() OVERRIDE;
+  const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
+    return callee_save_regs_;
+  }
+  ManagedRegister ReturnScratchRegister() const OVERRIDE;
+  uint32_t CoreSpillMask() const OVERRIDE;
+  uint32_t FpSpillMask() const OVERRIDE {
+    return 0;
+  }
+  bool IsCurrentParamInRegister() OVERRIDE;
+  bool IsCurrentParamOnStack() OVERRIDE;
+  ManagedRegister CurrentParamRegister() OVERRIDE;
+  FrameOffset CurrentParamStackOffset() OVERRIDE;
+
+ protected:
+  size_t NumberOfOutgoingStackArgs() OVERRIDE;
+
+ private:
+  // TODO: these values aren't unique and can be shared amongst instances
+  std::vector<ManagedRegister> callee_save_regs_;
+
+  DISALLOW_COPY_AND_ASSIGN(X86_64JniCallingConvention);
+};
+
+}  // namespace x86_64
+}  // namespace art
+
+#endif  // ART_COMPILER_JNI_QUICK_X86_64_CALLING_CONVENTION_X86_64_H_
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 39535e9..64ecdb5 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -16,10 +16,12 @@
  */
 
 #include "dex_file.h"
+#include "dex_file-inl.h"
 #include "dex_instruction.h"
 #include "dex_instruction-inl.h"
 #include "builder.h"
 #include "nodes.h"
+#include "primitive.h"
 
 namespace art {
 
@@ -192,6 +194,55 @@
       break;
     }
 
+    case Instruction::INVOKE_STATIC: {
+      uint32_t method_idx = instruction.VRegB_35c();
+      const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
+      uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
+      const char* descriptor = dex_file_->StringByTypeIdx(return_type_idx);
+      const size_t number_of_arguments = instruction.VRegA_35c();
+      if (number_of_arguments != 0) {
+        return false;
+      }
+      if (Primitive::GetType(descriptor[0]) != Primitive::kPrimVoid) {
+        return false;
+      }
+      current_block_->AddInstruction(new (arena_) HInvokeStatic(
+          arena_, number_of_arguments, dex_offset, method_idx));
+      break;
+    }
+
+    case Instruction::ADD_INT: {
+      HInstruction* first = LoadLocal(instruction.VRegB());
+      HInstruction* second = LoadLocal(instruction.VRegC());
+      current_block_->AddInstruction(new (arena_) HAdd(Primitive::kPrimInt, first, second));
+      UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::ADD_INT_2ADDR: {
+      HInstruction* first = LoadLocal(instruction.VRegA());
+      HInstruction* second = LoadLocal(instruction.VRegB());
+      current_block_->AddInstruction(new (arena_) HAdd(Primitive::kPrimInt, first, second));
+      UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::ADD_INT_LIT16: {
+      HInstruction* first = LoadLocal(instruction.VRegB());
+      HInstruction* second = GetConstant(instruction.VRegC_22s());
+      current_block_->AddInstruction(new (arena_) HAdd(Primitive::kPrimInt, first, second));
+      UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::ADD_INT_LIT8: {
+      HInstruction* first = LoadLocal(instruction.VRegB());
+      HInstruction* second = GetConstant(instruction.VRegC_22b());
+      current_block_->AddInstruction(new (arena_) HAdd(Primitive::kPrimInt, first, second));
+      UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+      break;
+    }
+
     case Instruction::NOP:
       break;
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index fff83a1..46ca9aa 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_BUILDER_H_
 
 #include "dex_file.h"
+#include "driver/dex_compilation_unit.h"
 #include "utils/allocation.h"
 #include "utils/growable_array.h"
 
@@ -33,7 +34,9 @@
 
 class HGraphBuilder : public ValueObject {
  public:
-  explicit HGraphBuilder(ArenaAllocator* arena)
+  HGraphBuilder(ArenaAllocator* arena,
+                const DexCompilationUnit* dex_compilation_unit = nullptr,
+                const DexFile* dex_file = nullptr)
       : arena_(arena),
         branch_targets_(arena, 0),
         locals_(arena, 0),
@@ -42,7 +45,9 @@
         current_block_(nullptr),
         graph_(nullptr),
         constant0_(nullptr),
-        constant1_(nullptr) { }
+        constant1_(nullptr),
+        dex_file_(dex_file),
+        dex_compilation_unit_(dex_compilation_unit) { }
 
   HGraph* BuildGraph(const DexFile::CodeItem& code);
 
@@ -83,6 +88,9 @@
   HIntConstant* constant0_;
   HIntConstant* constant1_;
 
+  const DexFile* const dex_file_;
+  const DexCompilationUnit* const dex_compilation_unit_;
+
   DISALLOW_COPY_AND_ASSIGN(HGraphBuilder);
 };
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index bb6ac84..b86665b 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -21,8 +21,11 @@
 #include "dex/verified_method.h"
 #include "driver/dex_compilation_unit.h"
 #include "gc_map_builder.h"
+#include "leb128.h"
+#include "mapping_table.h"
 #include "utils/assembler.h"
 #include "verifier/dex_gc_map.h"
+#include "vmap_table.h"
 
 namespace art {
 
@@ -120,8 +123,95 @@
       dex_compilation_unit.GetVerifiedMethod()->GetDexGcMap();
   verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
 
-  GcMapBuilder builder(data, 0, 0, dex_gc_map.RegWidth());
+  uint32_t max_native_offset = 0;
+  for (size_t i = 0; i < pc_infos_.Size(); i++) {
+    uint32_t native_offset = pc_infos_.Get(i).native_pc;
+    if (native_offset > max_native_offset) {
+      max_native_offset = native_offset;
+    }
+  }
+
+  GcMapBuilder builder(data, pc_infos_.Size(), max_native_offset, dex_gc_map.RegWidth());
+  for (size_t i = 0; i < pc_infos_.Size(); i++) {
+    struct PcInfo pc_info = pc_infos_.Get(i);
+    uint32_t native_offset = pc_info.native_pc;
+    uint32_t dex_pc = pc_info.dex_pc;
+    const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false);
+    CHECK(references != NULL) << "Missing ref for dex pc 0x" << std::hex << dex_pc;
+    builder.AddEntry(native_offset, references);
+  }
 }
 
+void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const {
+  uint32_t pc2dex_data_size = 0u;
+  uint32_t pc2dex_entries = pc_infos_.Size();
+  uint32_t pc2dex_offset = 0u;
+  int32_t pc2dex_dalvik_offset = 0;
+  uint32_t dex2pc_data_size = 0u;
+  uint32_t dex2pc_entries = 0u;
+
+  // We currently only have pc2dex entries.
+  for (size_t i = 0; i < pc2dex_entries; i++) {
+    struct PcInfo pc_info = pc_infos_.Get(i);
+    pc2dex_data_size += UnsignedLeb128Size(pc_info.native_pc - pc2dex_offset);
+    pc2dex_data_size += SignedLeb128Size(pc_info.dex_pc - pc2dex_dalvik_offset);
+    pc2dex_offset = pc_info.native_pc;
+    pc2dex_dalvik_offset = pc_info.dex_pc;
+  }
+
+  uint32_t total_entries = pc2dex_entries + dex2pc_entries;
+  uint32_t hdr_data_size = UnsignedLeb128Size(total_entries) + UnsignedLeb128Size(pc2dex_entries);
+  uint32_t data_size = hdr_data_size + pc2dex_data_size + dex2pc_data_size;
+  data->resize(data_size);
+
+  uint8_t* data_ptr = &(*data)[0];
+  uint8_t* write_pos = data_ptr;
+  write_pos = EncodeUnsignedLeb128(write_pos, total_entries);
+  write_pos = EncodeUnsignedLeb128(write_pos, pc2dex_entries);
+  DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size);
+  uint8_t* write_pos2 = write_pos + pc2dex_data_size;
+
+  pc2dex_offset = 0u;
+  pc2dex_dalvik_offset = 0u;
+  for (size_t i = 0; i < pc2dex_entries; i++) {
+    struct PcInfo pc_info = pc_infos_.Get(i);
+    DCHECK(pc2dex_offset <= pc_info.native_pc);
+    write_pos = EncodeUnsignedLeb128(write_pos, pc_info.native_pc - pc2dex_offset);
+    write_pos = EncodeSignedLeb128(write_pos, pc_info.dex_pc - pc2dex_dalvik_offset);
+    pc2dex_offset = pc_info.native_pc;
+    pc2dex_dalvik_offset = pc_info.dex_pc;
+  }
+  DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size + pc2dex_data_size);
+  DCHECK_EQ(static_cast<size_t>(write_pos2 - data_ptr), data_size);
+
+  if (kIsDebugBuild) {
+    // Verify the encoded table holds the expected data.
+    MappingTable table(data_ptr);
+    CHECK_EQ(table.TotalSize(), total_entries);
+    CHECK_EQ(table.PcToDexSize(), pc2dex_entries);
+    auto it = table.PcToDexBegin();
+    auto it2 = table.DexToPcBegin();
+    for (size_t i = 0; i < pc2dex_entries; i++) {
+      struct PcInfo pc_info = pc_infos_.Get(i);
+      CHECK_EQ(pc_info.native_pc, it.NativePcOffset());
+      CHECK_EQ(pc_info.dex_pc, it.DexPc());
+      ++it;
+    }
+    CHECK(it == table.PcToDexEnd());
+    CHECK(it2 == table.DexToPcEnd());
+  }
+}
+
+void CodeGenerator::BuildVMapTable(std::vector<uint8_t>* data) const {
+  Leb128EncodingVector vmap_encoder;
+  size_t size = 1 + 1 /* marker */ + 0;
+  vmap_encoder.Reserve(size + 1u);  // All values are likely to be one byte in ULEB128 (<128).
+  vmap_encoder.PushBackUnsigned(size);
+  // We're currently always saving the frame pointer, so set it in the table as a temporary.
+  vmap_encoder.PushBackUnsigned(kVRegTempBaseReg + VmapTable::kEntryAdjustment);
+  vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
+
+  *data = vmap_encoder.GetData();
+}
 
 }  // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 63f8cbf..24dcab6 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -38,6 +38,11 @@
   DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
 };
 
+struct PcInfo {
+  uint32_t dex_pc;
+  uintptr_t native_pc;
+};
+
 /**
  * A Location is an abstraction over the potential location
  * of an instruction. It could be in register or stack.
@@ -81,7 +86,8 @@
 class LocationSummary : public ArenaObject {
  public:
   explicit LocationSummary(HInstruction* instruction)
-      : inputs(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()) {
+      : inputs(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()),
+        temps(instruction->GetBlock()->GetGraph()->GetArena(), 0) {
     inputs.SetSize(instruction->InputCount());
     for (int i = 0; i < instruction->InputCount(); i++) {
       inputs.Put(i, Location());
@@ -100,10 +106,19 @@
     output = Location(location);
   }
 
+  void AddTemp(Location location) {
+    temps.Add(location);
+  }
+
+  Location GetTemp(uint32_t at) const {
+    return temps.Get(at);
+  }
+
   Location Out() const { return output; }
 
  private:
   GrowableArray<Location> inputs;
+  GrowableArray<Location> temps;
   Location output;
 
   DISALLOW_COPY_AND_ASSIGN(LocationSummary);
@@ -134,9 +149,17 @@
 
   uint32_t GetFrameSize() const { return frame_size_; }
   void SetFrameSize(uint32_t size) { frame_size_ = size; }
+  uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
 
-  void BuildMappingTable(std::vector<uint8_t>* vector) const { }
-  void BuildVMapTable(std::vector<uint8_t>* vector) const { }
+  void RecordPcInfo(uint32_t dex_pc) {
+    struct PcInfo pc_info;
+    pc_info.dex_pc = dex_pc;
+    pc_info.native_pc = GetAssembler()->CodeSize();
+    pc_infos_.Add(pc_info);
+  }
+
+  void BuildMappingTable(std::vector<uint8_t>* vector) const;
+  void BuildVMapTable(std::vector<uint8_t>* vector) const;
   void BuildNativeGCMap(
       std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
 
@@ -144,23 +167,26 @@
   explicit CodeGenerator(HGraph* graph)
       : frame_size_(0),
         graph_(graph),
-        block_labels_(graph->GetArena(), 0) {
+        block_labels_(graph->GetArena(), 0),
+        pc_infos_(graph->GetArena(), 32) {
     block_labels_.SetSize(graph->GetBlocks()->Size());
   }
   ~CodeGenerator() { }
 
+  // Frame size required for this method.
+  uint32_t frame_size_;
+  uint32_t core_spill_mask_;
+
  private:
   void InitLocations(HInstruction* instruction);
   void CompileBlock(HBasicBlock* block);
   void CompileEntryBlock();
 
-  // Frame size required for this method.
-  uint32_t frame_size_;
-
   HGraph* const graph_;
 
   // Labels for each block that will be compiled.
   GrowableArray<Label> block_labels_;
+  GrowableArray<PcInfo> pc_infos_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
 };
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 04bdc34..68c997b 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -18,17 +18,27 @@
 #include "utils/assembler.h"
 #include "utils/arm/assembler_arm.h"
 
+#include "mirror/array.h"
+#include "mirror/art_method.h"
+
 #define __ reinterpret_cast<ArmAssembler*>(GetAssembler())->
 
 namespace art {
 namespace arm {
 
 void CodeGeneratorARM::GenerateFrameEntry() {
+  core_spill_mask_ |= (1 << LR);
+  // We're currently always using FP, which is callee-saved in Quick.
+  core_spill_mask_ |= (1 << FP);
+
   __ PushList((1 << FP) | (1 << LR));
   __ mov(FP, ShifterOperand(SP));
-  if (GetFrameSize() != 0) {
-    __ AddConstant(SP, -GetFrameSize());
-  }
+
+  // Add the current ART method to the frame size, the return pc, and FP.
+  SetFrameSize(RoundUp(GetFrameSize() + 3 * kWordSize, kStackAlignment));
+  // PC and FP have already been pushed on the stack.
+  __ AddConstant(SP, -(GetFrameSize() - 2 * kWordSize));
+  __ str(R0, Address(SP, 0));
 }
 
 void CodeGeneratorARM::GenerateFrameExit() {
@@ -173,5 +183,71 @@
   codegen_->GenerateFrameExit();
 }
 
+void LocationsBuilderARM::VisitInvokeStatic(HInvokeStatic* invoke) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
+  CHECK_EQ(invoke->InputCount(), 0);
+  locations->AddTemp(Location(R0));
+  invoke->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorARM::LoadCurrentMethod(Register reg) {
+  __ ldr(reg, Address(SP, 0));
+}
+
+void InstructionCodeGeneratorARM::VisitInvokeStatic(HInvokeStatic* invoke) {
+  Register temp = invoke->GetLocations()->GetTemp(0).reg<Register>();
+  size_t index_in_cache = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
+      invoke->GetIndexInDexCache() * kWordSize;
+
+  // TODO: Implement all kinds of calls:
+  // 1) boot -> boot
+  // 2) app -> boot
+  // 3) app -> app
+  //
+  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+
+  // temp = method;
+  LoadCurrentMethod(temp);
+  // temp = temp->dex_cache_resolved_methods_;
+  __ ldr(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
+  // temp = temp[index_in_cache]
+  __ ldr(temp, Address(temp, index_in_cache));
+  // LR = temp[offset_of_quick_compiled_code]
+  __ ldr(LR, Address(temp,
+                     mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value()));
+  // LR()
+  __ blx(LR);
+
+  codegen_->RecordPcInfo(invoke->GetDexPc());
+}
+
+void LocationsBuilderARM::VisitAdd(HAdd* add) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add);
+  switch (add->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location(R0));
+      locations->SetInAt(1, Location(R1));
+      locations->SetOut(Location(R0));
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unimplemented";
+  }
+  add->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorARM::VisitAdd(HAdd* add) {
+  LocationSummary* locations = add->GetLocations();
+  switch (add->GetResultType()) {
+    case Primitive::kPrimInt:
+      __ add(locations->Out().reg<Register>(),
+             locations->InAt(0).reg<Register>(),
+             ShifterOperand(locations->InAt(1).reg<Register>()));
+      break;
+    default:
+      LOG(FATAL) << "Unimplemented";
+  }
+}
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 52a7bf4..7a2835d 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -58,6 +58,7 @@
 #undef DECLARE_VISIT_INSTRUCTION
 
   Assembler* GetAssembler() const { return assembler_; }
+  void LoadCurrentMethod(Register reg);
 
  private:
   Assembler* const assembler_;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index c4bda56..1764486 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -18,18 +18,28 @@
 #include "utils/assembler.h"
 #include "utils/x86/assembler_x86.h"
 
+#include "mirror/array.h"
+#include "mirror/art_method.h"
+
 #define __ reinterpret_cast<X86Assembler*>(GetAssembler())->
 
 namespace art {
 namespace x86 {
 
 void CodeGeneratorX86::GenerateFrameEntry() {
+  // Create a fake register to mimic Quick.
+  static const int kFakeReturnRegister = 8;
+  core_spill_mask_ |= (1 << kFakeReturnRegister);
+  // We're currently always using EBP, which is callee-saved in Quick.
+  core_spill_mask_ |= (1 << EBP);
+
   __ pushl(EBP);
   __ movl(EBP, ESP);
-
-  if (GetFrameSize() != 0) {
-    __ subl(ESP, Immediate(GetFrameSize()));
-  }
+  // Add the current ART method to the frame size, the return pc, and EBP.
+  SetFrameSize(RoundUp(GetFrameSize() + 3 * kWordSize, kStackAlignment));
+  // The PC and EBP have already been pushed on the stack.
+  __ subl(ESP, Immediate(GetFrameSize() - 2 * kWordSize));
+  __ movl(Address(ESP, 0), EAX);
 }
 
 void CodeGeneratorX86::GenerateFrameExit() {
@@ -45,6 +55,10 @@
   __ pushl(location.reg<Register>());
 }
 
+void InstructionCodeGeneratorX86::LoadCurrentMethod(Register reg) {
+  __ movl(reg, Address(ESP, 0));
+}
+
 void CodeGeneratorX86::Move(HInstruction* instruction, Location location) {
   HIntConstant* constant = instruction->AsIntConstant();
   if (constant != nullptr) {
@@ -110,7 +124,8 @@
 
 static int32_t GetStackSlot(HLocal* local) {
   // We are currently using EBP to access locals, so the offset must be negative.
-  return (local->GetRegNumber() + 1) * -kWordSize;
+  // +1 for going backwards, +1 for the method pointer.
+  return (local->GetRegNumber() + 2) * -kWordSize;
 }
 
 void InstructionCodeGeneratorX86::VisitLoadLocal(HLoadLocal* load) {
@@ -172,5 +187,63 @@
   __ ret();
 }
 
+void LocationsBuilderX86::VisitInvokeStatic(HInvokeStatic* invoke) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
+  CHECK_EQ(invoke->InputCount(), 0);
+  locations->AddTemp(Location(EAX));
+  invoke->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86::VisitInvokeStatic(HInvokeStatic* invoke) {
+  Register temp = invoke->GetLocations()->GetTemp(0).reg<Register>();
+  size_t index_in_cache = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
+      invoke->GetIndexInDexCache() * kWordSize;
+
+  // TODO: Implement all kinds of calls:
+  // 1) boot -> boot
+  // 2) app -> boot
+  // 3) app -> app
+  //
+  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+
+  // temp = method;
+  LoadCurrentMethod(temp);
+  // temp = temp->dex_cache_resolved_methods_;
+  __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
+  // temp = temp[index_in_cache]
+  __ movl(temp, Address(temp, index_in_cache));
+  // (temp + offset_of_quick_compiled_code)()
+  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value()));
+
+  codegen_->RecordPcInfo(invoke->GetDexPc());
+}
+
+void LocationsBuilderX86::VisitAdd(HAdd* add) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add);
+  switch (add->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location(EAX));
+      locations->SetInAt(1, Location(ECX));
+      locations->SetOut(Location(EAX));
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unimplemented";
+  }
+  add->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
+  LocationSummary* locations = add->GetLocations();
+  switch (add->GetResultType()) {
+    case Primitive::kPrimInt:
+      DCHECK_EQ(locations->InAt(0).reg<Register>(), locations->Out().reg<Register>());
+      __ addl(locations->InAt(0).reg<Register>(), locations->InAt(1).reg<Register>());
+      break;
+    default:
+      LOG(FATAL) << "Unimplemented";
+  }
+}
+
 }  // namespace x86
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index ad2a061..505237b 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -57,6 +57,8 @@
 
 #undef DECLARE_VISIT_INSTRUCTION
 
+  void LoadCurrentMethod(Register reg);
+
   Assembler* GetAssembler() const { return assembler_; }
 
  private:
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index ff743d8..d40990e 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -196,4 +196,42 @@
   TestCode(data, true, 0);
 }
 
+TEST(CodegenTest, ReturnAdd1) {
+  const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 3 << 12 | 0,
+    Instruction::CONST_4 | 4 << 12 | 1 << 8,
+    Instruction::ADD_INT, 1 << 8 | 0,
+    Instruction::RETURN);
+
+  TestCode(data, true, 7);
+}
+
+TEST(CodegenTest, ReturnAdd2) {
+  const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 3 << 12 | 0,
+    Instruction::CONST_4 | 4 << 12 | 1 << 8,
+    Instruction::ADD_INT_2ADDR | 1 << 12,
+    Instruction::RETURN);
+
+  TestCode(data, true, 7);
+}
+
+TEST(CodegenTest, ReturnAdd3) {
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 4 << 12 | 0 << 8,
+    Instruction::ADD_INT_LIT8, 3 << 8 | 0,
+    Instruction::RETURN);
+
+  TestCode(data, true, 7);
+}
+
+TEST(CodegenTest, ReturnAdd4) {
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 4 << 12 | 0 << 8,
+    Instruction::ADD_INT_LIT16, 3,
+    Instruction::RETURN);
+
+  TestCode(data, true, 7);
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index e74ed82..fc67486 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -180,11 +180,13 @@
 };
 
 #define FOR_EACH_INSTRUCTION(M)                            \
+  M(Add)                                                   \
   M(Equal)                                                 \
   M(Exit)                                                  \
   M(Goto)                                                  \
   M(If)                                                    \
   M(IntConstant)                                           \
+  M(InvokeStatic)                                          \
   M(LoadLocal)                                             \
   M(Local)                                                 \
   M(Return)                                                \
@@ -476,14 +478,36 @@
   DISALLOW_COPY_AND_ASSIGN(HIf);
 };
 
-// Instruction to check if two inputs are equal to each other.
-class HEqual : public HTemplateInstruction<2> {
+class HBinaryOperation : public HTemplateInstruction<2> {
  public:
-  HEqual(HInstruction* first, HInstruction* second) {
-    SetRawInputAt(0, first);
-    SetRawInputAt(1, second);
+  HBinaryOperation(Primitive::Type result_type,
+                   HInstruction* left,
+                   HInstruction* right) : result_type_(result_type) {
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
   }
 
+  HInstruction* GetLeft() const { return InputAt(0); }
+  HInstruction* GetRight() const { return InputAt(1); }
+  Primitive::Type GetResultType() const { return result_type_; }
+
+  virtual bool IsCommutative() { return false; }
+
+ private:
+  const Primitive::Type result_type_;
+
+  DISALLOW_COPY_AND_ASSIGN(HBinaryOperation);
+};
+
+
+// Instruction to check if two inputs are equal to each other.
+class HEqual : public HBinaryOperation {
+ public:
+  HEqual(HInstruction* first, HInstruction* second)
+      : HBinaryOperation(Primitive::kPrimBoolean, first, second) {}
+
+  virtual bool IsCommutative() { return true; }
+
   DECLARE_INSTRUCTION(Equal)
 
  private:
@@ -554,6 +578,55 @@
   DISALLOW_COPY_AND_ASSIGN(HIntConstant);
 };
 
+class HInvoke : public HInstruction {
+ public:
+  HInvoke(ArenaAllocator* arena, uint32_t number_of_arguments, int32_t dex_pc)
+    : inputs_(arena, number_of_arguments),
+      dex_pc_(dex_pc) {
+    inputs_.SetSize(number_of_arguments);
+  }
+
+  virtual intptr_t InputCount() const { return inputs_.Size(); }
+  virtual HInstruction* InputAt(intptr_t i) const { return inputs_.Get(i); }
+
+  int32_t GetDexPc() const { return dex_pc_; }
+
+ protected:
+  GrowableArray<HInstruction*> inputs_;
+  const int32_t dex_pc_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HInvoke);
+};
+
+class HInvokeStatic : public HInvoke {
+ public:
+  HInvokeStatic(ArenaAllocator* arena, uint32_t number_of_arguments, int32_t dex_pc, int32_t index_in_dex_cache)
+      : HInvoke(arena, number_of_arguments, dex_pc), index_in_dex_cache_(index_in_dex_cache) { }
+
+  uint32_t GetIndexInDexCache() const { return index_in_dex_cache_; }
+
+  DECLARE_INSTRUCTION(InvokeStatic)
+
+ private:
+  uint32_t index_in_dex_cache_;
+
+  DISALLOW_COPY_AND_ASSIGN(HInvokeStatic);
+};
+
+class HAdd : public HBinaryOperation {
+ public:
+  HAdd(Primitive::Type result_type, HInstruction* left, HInstruction* right)
+      : HBinaryOperation(result_type, left, right) {}
+
+  virtual bool IsCommutative() { return true; }
+
+  DECLARE_INSTRUCTION(Add);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HAdd);
+};
+
 class HGraphVisitor : public ValueObject {
  public:
   explicit HGraphVisitor(HGraph* graph) : graph_(graph) { }
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 334b185..d19c40c 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -62,17 +62,31 @@
     nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item,
     class_def_idx, method_idx, access_flags, driver.GetVerifiedMethod(&dex_file, method_idx));
 
+  // For testing purposes, we put a special marker on method names that should be compiled
+  // with this compiler. This makes sure we're not regressing.
+  bool shouldCompile = dex_compilation_unit.GetSymbol().find("00024opt_00024") != std::string::npos;
+
   ArenaPool pool;
   ArenaAllocator arena(&pool);
-  HGraphBuilder builder(&arena);
+  HGraphBuilder builder(&arena, &dex_compilation_unit, &dex_file);
   HGraph* graph = builder.BuildGraph(*code_item);
   if (graph == nullptr) {
+    if (shouldCompile) {
+      LOG(FATAL) << "Could not build graph in optimizing compiler";
+    }
     return nullptr;
   }
 
   InstructionSet instruction_set = driver.GetInstructionSet();
+  // The optimizing compiler currently does not have a Thumb2 assembler.
+  if (instruction_set == kThumb2) {
+    instruction_set = kArm;
+  }
   CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set);
   if (codegen == nullptr) {
+    if (shouldCompile) {
+      LOG(FATAL) << "Could not find code generator for optimizing compiler";
+    }
     return nullptr;
   }
 
@@ -90,7 +104,7 @@
                             instruction_set,
                             allocator.GetMemory(),
                             codegen->GetFrameSize(),
-                            0, /* GPR spill mask, unused */
+                            codegen->GetCoreSpillMask(),
                             0, /* FPR spill mask, unused */
                             mapping_table,
                             vmap_table,
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index dbd078a..872a557 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -1438,7 +1438,7 @@
 
 void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                               const std::vector<ManagedRegister>& callee_save_regs,
-                              const std::vector<ManagedRegister>& entry_spills) {
+                              const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister());
 
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 757a8a2..bb9207c 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -440,7 +440,7 @@
   // Emit code that will create an activation on the stack
   virtual void BuildFrame(size_t frame_size, ManagedRegister method_reg,
                           const std::vector<ManagedRegister>& callee_save_regs,
-                          const std::vector<ManagedRegister>& entry_spills);
+                          const ManagedRegisterEntrySpills& entry_spills);
 
   // Emit code that will remove an activation from the stack
   virtual void RemoveFrame(size_t frame_size,
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 00ce923..f8b91d7 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -577,7 +577,7 @@
 
 void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                         const std::vector<ManagedRegister>& callee_save_regs,
-                        const std::vector<ManagedRegister>& entry_spills) {
+                        const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   CHECK(X0 == method_reg.AsArm64().AsCoreRegister());
 
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 1c47e77..44eb6ff 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -111,7 +111,7 @@
   // Emit code that will create an activation on the stack.
   void BuildFrame(size_t frame_size, ManagedRegister method_reg,
                   const std::vector<ManagedRegister>& callee_save_regs,
-                  const std::vector<ManagedRegister>& entry_spills);
+                  const ManagedRegisterEntrySpills& entry_spills);
 
   // Emit code that will remove an activation from the stack.
   void RemoveFrame(size_t frame_size,
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index a7cb278..1921b28 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -23,6 +23,7 @@
 #include "arm64/assembler_arm64.h"
 #include "mips/assembler_mips.h"
 #include "x86/assembler_x86.h"
+#include "x86_64/assembler_x86_64.h"
 #include "globals.h"
 #include "memory_region.h"
 
@@ -111,9 +112,10 @@
       return new arm64::Arm64Assembler();
     case kMips:
       return new mips::MipsAssembler();
-    case kX86:  // Fall-through.
-    case kX86_64:
+    case kX86:
       return new x86::X86Assembler();
+    case kX86_64:
+      return new x86_64::X86_64Assembler();
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
       return NULL;
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index cd4fc12..c23fd44 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -24,6 +24,7 @@
 #include "arm/constants_arm.h"
 #include "mips/constants_mips.h"
 #include "x86/constants_x86.h"
+#include "x86_64/constants_x86_64.h"
 #include "instruction_set.h"
 #include "managed_register.h"
 #include "memory_region.h"
@@ -47,6 +48,26 @@
 namespace x86 {
   class X86Assembler;
 }
+namespace x86_64 {
+  class X86_64Assembler;
+}
+
+class ExternalLabel {
+ public:
+  ExternalLabel(const char* name, uword address)
+      : name_(name), address_(address) {
+    DCHECK(name != nullptr);
+  }
+
+  const char* name() const { return name_; }
+  uword address() const {
+    return address_;
+  }
+
+ private:
+  const char* name_;
+  const uword address_;
+};
 
 class Label {
  public:
@@ -95,6 +116,7 @@
   friend class arm::ArmAssembler;
   friend class mips::MipsAssembler;
   friend class x86::X86Assembler;
+  friend class x86_64::X86_64Assembler;
 
   DISALLOW_COPY_AND_ASSIGN(Label);
 };
@@ -335,7 +357,7 @@
   // Emit code that will create an activation on the stack
   virtual void BuildFrame(size_t frame_size, ManagedRegister method_reg,
                           const std::vector<ManagedRegister>& callee_save_regs,
-                          const std::vector<ManagedRegister>& entry_spills) = 0;
+                          const ManagedRegisterEntrySpills& entry_spills) = 0;
 
   // Emit code that will remove an activation from the stack
   virtual void RemoveFrame(size_t frame_size,
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index 04c9723..f007d28 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -17,6 +17,8 @@
 #ifndef ART_COMPILER_UTILS_MANAGED_REGISTER_H_
 #define ART_COMPILER_UTILS_MANAGED_REGISTER_H_
 
+#include <vector>
+
 namespace art {
 
 namespace arm {
@@ -28,10 +30,15 @@
 namespace mips {
 class MipsManagedRegister;
 }
+
 namespace x86 {
 class X86ManagedRegister;
 }
 
+namespace x86_64 {
+class X86_64ManagedRegister;
+}
+
 class ManagedRegister {
  public:
   // ManagedRegister is a value class. There exists no method to change the
@@ -48,6 +55,7 @@
   arm64::Arm64ManagedRegister AsArm64() const;
   mips::MipsManagedRegister AsMips() const;
   x86::X86ManagedRegister AsX86() const;
+  x86_64::X86_64ManagedRegister AsX86_64() const;
 
   // It is valid to invoke Equals on and with a NoRegister.
   bool Equals(const ManagedRegister& other) const {
@@ -71,6 +79,44 @@
   int id_;
 };
 
+class ManagedRegisterSpill : public ManagedRegister {
+ public:
+  // ManagedRegisterSpill contains information about data type size and location in caller frame
+  // These additional attributes could be defined by calling convention (EntrySpills)
+  ManagedRegisterSpill(const ManagedRegister& other, uint32_t size, uint32_t spill_offset)
+      : ManagedRegister(other), size_(size), spill_offset_(spill_offset)  { }
+
+  explicit ManagedRegisterSpill(const ManagedRegister& other)
+      : ManagedRegister(other), size_(-1), spill_offset_(-1) { }
+
+  int32_t getSpillOffset() {
+    return spill_offset_;
+  }
+
+  int32_t getSize() {
+    return size_;
+  }
+
+ private:
+  int32_t size_;
+  int32_t spill_offset_;
+};
+
+class ManagedRegisterEntrySpills : public std::vector<ManagedRegisterSpill> {
+ public:
+  // The ManagedRegister does not have information about size and offset.
+  // In this case it's size and offset determined by BuildFrame (assembler)
+  void push_back(ManagedRegister __x) {
+    ManagedRegisterSpill spill(__x);
+    std::vector<ManagedRegisterSpill>::push_back(spill);
+  }
+
+  void push_back(ManagedRegisterSpill __x) {
+    std::vector<ManagedRegisterSpill>::push_back(__x);
+  }
+ private:
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_UTILS_MANAGED_REGISTER_H_
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index ce21b84..dfd3306 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -538,7 +538,7 @@
 
 void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                                const std::vector<ManagedRegister>& callee_save_regs,
-                               const std::vector<ManagedRegister>& entry_spills) {
+                               const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
 
   // Increase frame to required size.
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 0f5f2fe..0d1a94c 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -357,7 +357,7 @@
   // Emit code that will create an activation on the stack
   virtual void BuildFrame(size_t frame_size, ManagedRegister method_reg,
                           const std::vector<ManagedRegister>& callee_save_regs,
-                          const std::vector<ManagedRegister>& entry_spills);
+                          const ManagedRegisterEntrySpills& entry_spills);
 
   // Emit code that will remove an activation from the stack
   virtual void RemoveFrame(size_t frame_size,
diff --git a/compiler/utils/scoped_arena_allocator.cc b/compiler/utils/scoped_arena_allocator.cc
index a78d287..bd78eae 100644
--- a/compiler/utils/scoped_arena_allocator.cc
+++ b/compiler/utils/scoped_arena_allocator.cc
@@ -99,6 +99,7 @@
   }
   CurrentStats()->RecordAlloc(bytes, kind);
   top_ptr_ = ptr + rounded_bytes;
+  VALGRIND_MAKE_MEM_UNDEFINED(ptr, bytes);
   VALGRIND_MAKE_MEM_NOACCESS(ptr + bytes, rounded_bytes - bytes);
   return ptr;
 }
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index db8956d..ebbb43a 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -54,6 +54,16 @@
 }
 
 
+void X86Assembler::call(const ExternalLabel& label) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  intptr_t call_start = buffer_.GetPosition();
+  EmitUint8(0xE8);
+  EmitInt32(label.address());
+  static const intptr_t kCallExternalLabelSize = 5;
+  DCHECK_EQ((buffer_.GetPosition() - call_start), kCallExternalLabelSize);
+}
+
+
 void X86Assembler::pushl(Register reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x50 + reg);
@@ -1388,7 +1398,7 @@
 
 void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                               const std::vector<ManagedRegister>& spill_regs,
-                              const std::vector<ManagedRegister>& entry_spills) {
+                              const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
     pushl(spill_regs.at(i).AsX86().AsCpuRegister());
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index e284d8c..f906a6f 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -227,6 +227,7 @@
   void call(Register reg);
   void call(const Address& address);
   void call(Label* label);
+  void call(const ExternalLabel& label);
 
   void pushl(Register reg);
   void pushl(const Address& address);
@@ -466,7 +467,7 @@
   // Emit code that will create an activation on the stack
   virtual void BuildFrame(size_t frame_size, ManagedRegister method_reg,
                           const std::vector<ManagedRegister>& callee_save_regs,
-                          const std::vector<ManagedRegister>& entry_spills);
+                          const ManagedRegisterEntrySpills& entry_spills);
 
   // Emit code that will remove an activation from the stack
   virtual void RemoveFrame(size_t frame_size,
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
new file mode 100644
index 0000000..fa302c9
--- /dev/null
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -0,0 +1,1951 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_x86_64.h"
+
+#include "base/casts.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "memory_region.h"
+#include "thread.h"
+
+namespace art {
+namespace x86_64 {
+
+std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
+  return os << "XMM" << static_cast<int>(reg);
+}
+
+std::ostream& operator<<(std::ostream& os, const X87Register& reg) {
+  return os << "ST" << static_cast<int>(reg);
+}
+
+void X86_64Assembler::call(Register reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xFF);
+  EmitRegisterOperand(2, reg);
+}
+
+
+void X86_64Assembler::call(const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xFF);
+  EmitOperand(2, address);
+}
+
+
+void X86_64Assembler::call(Label* label) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xE8);
+  static const int kSize = 5;
+  EmitLabel(label, kSize);
+}
+
+
+void X86_64Assembler::pushq(Register reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  rex_rm(reg);
+  EmitUint8(0x50 + reg);
+}
+
+
+void X86_64Assembler::pushq(const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xFF);
+  EmitOperand(6, address);
+}
+
+
+void X86_64Assembler::pushq(const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  if (imm.is_int8()) {
+    EmitUint8(0x6A);
+    EmitUint8(imm.value() & 0xFF);
+  } else {
+    EmitUint8(0x68);
+    EmitImmediate(imm);
+  }
+}
+
+
+void X86_64Assembler::popq(Register reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  rex_rm(reg);
+  EmitUint8(0x58 + reg);
+}
+
+
+void X86_64Assembler::popq(const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x8F);
+  EmitOperand(0, address);
+}
+
+
+void X86_64Assembler::movq(Register dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x48);  // REX.W
+  EmitUint8(0xB8 + dst);
+  EmitImmediate(imm);
+}
+
+
+void X86_64Assembler::movl(Register dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xB8 + dst);
+  EmitImmediate(imm);
+}
+
+
+void X86_64Assembler::movq(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x48);  // REX.W
+  EmitUint8(0x89);
+  EmitRegisterOperand(src, dst);
+}
+
+
+void X86_64Assembler::movl(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x89);
+  EmitRegisterOperand(src, dst);
+}
+
+
+void X86_64Assembler::movq(Register dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  rex_reg(dst, 8);
+  EmitUint8(0x8B);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::movl(Register dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  rex_reg(dst, 4);
+  EmitUint8(0x8B);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::movq(const Address& dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  rex_reg(src, 8);
+  EmitUint8(0x89);
+  EmitOperand(src, dst);
+}
+
+
+void X86_64Assembler::movl(const Address& dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  rex_reg(src, 4);
+  EmitUint8(0x89);
+  EmitOperand(src, dst);
+}
+
+
+void X86_64Assembler::movl(const Address& dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xC7);
+  EmitOperand(0, dst);
+  EmitImmediate(imm);
+}
+
+void X86_64Assembler::movl(const Address& dst, Label* lbl) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xC7);
+  EmitOperand(0, dst);
+  EmitLabel(lbl, dst.length_ + 5);
+}
+
+void X86_64Assembler::movzxb(Register dst, ByteRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xB6);
+  EmitRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::movzxb(Register dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xB6);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::movsxb(Register dst, ByteRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xBE);
+  EmitRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::movsxb(Register dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xBE);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::movb(Register /*dst*/, const Address& /*src*/) {
+  LOG(FATAL) << "Use movzxb or movsxb instead.";
+}
+
+
+void X86_64Assembler::movb(const Address& dst, ByteRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x88);
+  EmitOperand(src, dst);
+}
+
+
+void X86_64Assembler::movb(const Address& dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xC6);
+  EmitOperand(RAX, dst);
+  CHECK(imm.is_int8());
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
+void X86_64Assembler::movzxw(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xB7);
+  EmitRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::movzxw(Register dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xB7);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::movsxw(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xBF);
+  EmitRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::movsxw(Register dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xBF);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::movw(Register /*dst*/, const Address& /*src*/) {
+  LOG(FATAL) << "Use movzxw or movsxw instead.";
+}
+
+
+void X86_64Assembler::movw(const Address& dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOperandSizeOverride();
+  EmitUint8(0x89);
+  EmitOperand(src, dst);
+}
+
+
+void X86_64Assembler::leaq(Register dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  rex_reg(dst, 8);
+  EmitUint8(0x8D);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::cmovl(Condition condition, Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x40 + condition);
+  EmitRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::setb(Condition condition, Register dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x90 + condition);
+  EmitOperand(0, Operand(dst));
+}
+
+
+void X86_64Assembler::movss(XmmRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0x10);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::movss(const Address& dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0x11);
+  EmitOperand(src, dst);
+}
+
+
+void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0x11);
+  EmitXmmRegisterOperand(src, dst);
+}
+
+
+void X86_64Assembler::movd(XmmRegister dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x6E);
+  EmitOperand(dst, Operand(src));
+}
+
+
+void X86_64Assembler::movd(Register dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x7E);
+  EmitOperand(src, Operand(dst));
+}
+
+
+void X86_64Assembler::addss(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0x58);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::addss(XmmRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0x58);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::subss(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0x5C);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::subss(XmmRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0x5C);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::mulss(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0x59);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::mulss(XmmRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0x59);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::divss(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0x5E);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::divss(XmmRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0x5E);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::flds(const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xD9);
+  EmitOperand(0, src);
+}
+
+
+void X86_64Assembler::fstps(const Address& dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xD9);
+  EmitOperand(3, dst);
+}
+
+
+void X86_64Assembler::movsd(XmmRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitUint8(0x0F);
+  EmitUint8(0x10);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::movsd(const Address& dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitUint8(0x0F);
+  EmitUint8(0x11);
+  EmitOperand(src, dst);
+}
+
+
+void X86_64Assembler::movsd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitUint8(0x0F);
+  EmitUint8(0x11);
+  EmitXmmRegisterOperand(src, dst);
+}
+
+
+void X86_64Assembler::addsd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitUint8(0x0F);
+  EmitUint8(0x58);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::addsd(XmmRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitUint8(0x0F);
+  EmitUint8(0x58);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::subsd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitUint8(0x0F);
+  EmitUint8(0x5C);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::subsd(XmmRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitUint8(0x0F);
+  EmitUint8(0x5C);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::mulsd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitUint8(0x0F);
+  EmitUint8(0x59);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::mulsd(XmmRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitUint8(0x0F);
+  EmitUint8(0x59);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::divsd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitUint8(0x0F);
+  EmitUint8(0x5E);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::divsd(XmmRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitUint8(0x0F);
+  EmitUint8(0x5E);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::cvtsi2ss(XmmRegister dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0x2A);
+  EmitOperand(dst, Operand(src));
+}
+
+
+void X86_64Assembler::cvtsi2sd(XmmRegister dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitUint8(0x0F);
+  EmitUint8(0x2A);
+  EmitOperand(dst, Operand(src));
+}
+
+
+void X86_64Assembler::cvtss2si(Register dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0x2D);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::cvtss2sd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0x5A);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::cvtsd2si(Register dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitUint8(0x0F);
+  EmitUint8(0x2D);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::cvttss2si(Register dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0x2C);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::cvttsd2si(Register dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitUint8(0x0F);
+  EmitUint8(0x2C);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitUint8(0x0F);
+  EmitUint8(0x5A);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0xE6);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::comiss(XmmRegister a, XmmRegister b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x2F);
+  EmitXmmRegisterOperand(a, b);
+}
+
+
+void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x2F);
+  EmitXmmRegisterOperand(a, b);
+}
+
+
+void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF2);
+  EmitUint8(0x0F);
+  EmitUint8(0x51);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::sqrtss(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0x0F);
+  EmitUint8(0x51);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::xorpd(XmmRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x57);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::xorpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x57);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::xorps(XmmRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x57);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::xorps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x57);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::andpd(XmmRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x54);
+  EmitOperand(dst, src);
+}
+
+
+void X86_64Assembler::fldl(const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xDD);
+  EmitOperand(0, src);
+}
+
+
+void X86_64Assembler::fstpl(const Address& dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xDD);
+  EmitOperand(3, dst);
+}
+
+
+void X86_64Assembler::fnstcw(const Address& dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xD9);
+  EmitOperand(7, dst);
+}
+
+
+void X86_64Assembler::fldcw(const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xD9);
+  EmitOperand(5, src);
+}
+
+
+void X86_64Assembler::fistpl(const Address& dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xDF);
+  EmitOperand(7, dst);
+}
+
+
+void X86_64Assembler::fistps(const Address& dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xDB);
+  EmitOperand(3, dst);
+}
+
+
+void X86_64Assembler::fildl(const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xDF);
+  EmitOperand(5, src);
+}
+
+
+void X86_64Assembler::fincstp() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xD9);
+  EmitUint8(0xF7);
+}
+
+
+void X86_64Assembler::ffree(const Immediate& index) {
+  CHECK_LT(index.value(), 7);
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xDD);
+  EmitUint8(0xC0 + index.value());
+}
+
+
+void X86_64Assembler::fsin() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xD9);
+  EmitUint8(0xFE);
+}
+
+
+void X86_64Assembler::fcos() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xD9);
+  EmitUint8(0xFF);
+}
+
+
+void X86_64Assembler::fptan() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xD9);
+  EmitUint8(0xF2);
+}
+
+
+void X86_64Assembler::xchgl(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x87);
+  EmitRegisterOperand(dst, src);
+}
+
+void X86_64Assembler::xchgl(Register reg, const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x87);
+  EmitOperand(reg, address);
+}
+
+
+void X86_64Assembler::cmpl(Register reg, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitComplex(7, Operand(reg), imm);
+}
+
+
+void X86_64Assembler::cmpl(Register reg0, Register reg1) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x3B);
+  EmitOperand(reg0, Operand(reg1));
+}
+
+
+void X86_64Assembler::cmpl(Register reg, const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x3B);
+  EmitOperand(reg, address);
+}
+
+
+void X86_64Assembler::addl(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x03);
+  EmitRegisterOperand(dst, src);
+}
+
+
+void X86_64Assembler::addl(Register reg, const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x03);
+  EmitOperand(reg, address);
+}
+
+
+void X86_64Assembler::cmpl(const Address& address, Register reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x39);
+  EmitOperand(reg, address);
+}
+
+
+void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitComplex(7, address, imm);
+}
+
+
+void X86_64Assembler::testl(Register reg1, Register reg2) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  rex(reg1, reg2, 4);
+  EmitUint8(0x85);
+  EmitRegisterOperand(reg1, reg2);
+}
+
+
+void X86_64Assembler::testl(Register reg, const Immediate& immediate) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  // For registers that have a byte variant (RAX, RBX, RCX, and RDX)
+  // we only test the byte register to keep the encoding short.
+  if (immediate.is_uint8() && reg < 4) {
+    // Use zero-extended 8-bit immediate.
+    if (reg == RAX) {
+      EmitUint8(0xA8);
+    } else {
+      EmitUint8(0xF6);
+      EmitUint8(0xC0 + reg);
+    }
+    EmitUint8(immediate.value() & 0xFF);
+  } else if (reg == RAX) {
+    // Use short form if the destination is RAX.
+    EmitUint8(0xA9);
+    EmitImmediate(immediate);
+  } else {
+    EmitUint8(0xF7);
+    EmitOperand(0, Operand(reg));
+    EmitImmediate(immediate);
+  }
+}
+
+
+void X86_64Assembler::andl(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x23);
+  EmitOperand(dst, Operand(src));
+}
+
+
+void X86_64Assembler::andl(Register dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitComplex(4, Operand(dst), imm);
+}
+
+
+void X86_64Assembler::orl(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0B);
+  EmitOperand(dst, Operand(src));
+}
+
+
+void X86_64Assembler::orl(Register dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitComplex(1, Operand(dst), imm);
+}
+
+
+void X86_64Assembler::xorl(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  rex(dst, src, 4);
+  EmitUint8(0x33);
+  EmitOperand(dst, Operand(src));
+}
+
+void X86_64Assembler::rex_reg(Register &dst, size_t size) {
+  Register src = kNoRegister;
+  rex(dst, src, size);
+}
+
+void X86_64Assembler::rex_rm(Register &src, size_t size) {
+  Register dst = kNoRegister;
+  rex(dst, src, size);
+}
+
+void X86_64Assembler::rex(Register &dst, Register &src, size_t size) {
+  uint8_t rex = 0;
+  // REX.WRXB
+  // W - 64-bit operand
+  // R - MODRM.reg
+  // X - SIB.index
+  // B - MODRM.rm/SIB.base
+  if (size == 8) {
+    rex |= 0x48;  // REX.W000
+  }
+  if (dst >= Register::R8 && dst < Register::kNumberOfCpuRegisters) {
+    rex |= 0x44;  // REX.0R00
+    dst = static_cast<Register>(dst - 8);
+  }
+  if (src >= Register::R8 && src < Register::kNumberOfCpuRegisters) {
+    rex |= 0x41;  // REX.000B
+    src = static_cast<Register>(src - 8);
+  }
+  if (rex != 0) {
+    EmitUint8(rex);
+  }
+}
+
+void X86_64Assembler::addl(Register reg, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitComplex(0, Operand(reg), imm);
+}
+
+
+void X86_64Assembler::addq(Register reg, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x48);  // REX.W
+  EmitComplex(0, Operand(reg), imm);
+}
+
+
+void X86_64Assembler::addl(const Address& address, Register reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x01);
+  EmitOperand(reg, address);
+}
+
+
+void X86_64Assembler::addl(const Address& address, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitComplex(0, address, imm);
+}
+
+
+void X86_64Assembler::adcl(Register reg, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitComplex(2, Operand(reg), imm);
+}
+
+
+void X86_64Assembler::adcl(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x13);
+  EmitOperand(dst, Operand(src));
+}
+
+
+void X86_64Assembler::adcl(Register dst, const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x13);
+  EmitOperand(dst, address);
+}
+
+
+void X86_64Assembler::subl(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x2B);
+  EmitOperand(dst, Operand(src));
+}
+
+
+void X86_64Assembler::subl(Register reg, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x48);  // REX.W
+  EmitComplex(5, Operand(reg), imm);
+}
+
+
+void X86_64Assembler::subl(Register reg, const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x2B);
+  EmitOperand(reg, address);
+}
+
+
+void X86_64Assembler::cdq() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x99);
+}
+
+
+void X86_64Assembler::idivl(Register reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF7);
+  EmitUint8(0xF8 | reg);
+}
+
+
+void X86_64Assembler::imull(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xAF);
+  EmitOperand(dst, Operand(src));
+}
+
+
+void X86_64Assembler::imull(Register reg, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x69);
+  EmitOperand(reg, Operand(reg));
+  EmitImmediate(imm);
+}
+
+
+void X86_64Assembler::imull(Register reg, const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xAF);
+  EmitOperand(reg, address);
+}
+
+
+void X86_64Assembler::imull(Register reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF7);
+  EmitOperand(5, Operand(reg));
+}
+
+
+void X86_64Assembler::imull(const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF7);
+  EmitOperand(5, address);
+}
+
+
+void X86_64Assembler::mull(Register reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF7);
+  EmitOperand(4, Operand(reg));
+}
+
+
+void X86_64Assembler::mull(const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF7);
+  EmitOperand(4, address);
+}
+
+
+void X86_64Assembler::sbbl(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x1B);
+  EmitOperand(dst, Operand(src));
+}
+
+
+void X86_64Assembler::sbbl(Register reg, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitComplex(3, Operand(reg), imm);
+}
+
+
+void X86_64Assembler::sbbl(Register dst, const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x1B);
+  EmitOperand(dst, address);
+}
+
+
+void X86_64Assembler::incl(Register reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x40 + reg);
+}
+
+
+void X86_64Assembler::incl(const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xFF);
+  EmitOperand(0, address);
+}
+
+
+void X86_64Assembler::decl(Register reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x48 + reg);
+}
+
+
+void X86_64Assembler::decl(const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xFF);
+  EmitOperand(1, address);
+}
+
+
+void X86_64Assembler::shll(Register reg, const Immediate& imm) {
+  EmitGenericShift(4, reg, imm);
+}
+
+
+void X86_64Assembler::shll(Register operand, Register shifter) {
+  EmitGenericShift(4, operand, shifter);
+}
+
+
+void X86_64Assembler::shrl(Register reg, const Immediate& imm) {
+  EmitGenericShift(5, reg, imm);
+}
+
+
+void X86_64Assembler::shrl(Register operand, Register shifter) {
+  EmitGenericShift(5, operand, shifter);
+}
+
+
+void X86_64Assembler::sarl(Register reg, const Immediate& imm) {
+  EmitGenericShift(7, reg, imm);
+}
+
+
+void X86_64Assembler::sarl(Register operand, Register shifter) {
+  EmitGenericShift(7, operand, shifter);
+}
+
+
+void X86_64Assembler::shld(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xA5);
+  EmitRegisterOperand(src, dst);
+}
+
+
+void X86_64Assembler::negl(Register reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF7);
+  EmitOperand(3, Operand(reg));
+}
+
+
+void X86_64Assembler::notl(Register reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF7);
+  EmitUint8(0xD0 | reg);
+}
+
+
+void X86_64Assembler::enter(const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xC8);
+  CHECK(imm.is_uint16());
+  EmitUint8(imm.value() & 0xFF);
+  EmitUint8((imm.value() >> 8) & 0xFF);
+  EmitUint8(0x00);
+}
+
+
+void X86_64Assembler::leave() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xC9);
+}
+
+
+void X86_64Assembler::ret() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xC3);
+}
+
+
+void X86_64Assembler::ret(const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xC2);
+  CHECK(imm.is_uint16());
+  EmitUint8(imm.value() & 0xFF);
+  EmitUint8((imm.value() >> 8) & 0xFF);
+}
+
+
+
+void X86_64Assembler::nop() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x90);
+}
+
+
+void X86_64Assembler::int3() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xCC);
+}
+
+
+void X86_64Assembler::hlt() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF4);
+}
+
+
+void X86_64Assembler::j(Condition condition, Label* label) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  if (label->IsBound()) {
+    static const int kShortSize = 2;
+    static const int kLongSize = 6;
+    int offset = label->Position() - buffer_.Size();
+    CHECK_LE(offset, 0);
+    if (IsInt(8, offset - kShortSize)) {
+      EmitUint8(0x70 + condition);
+      EmitUint8((offset - kShortSize) & 0xFF);
+    } else {
+      EmitUint8(0x0F);
+      EmitUint8(0x80 + condition);
+      EmitInt32(offset - kLongSize);
+    }
+  } else {
+    EmitUint8(0x0F);
+    EmitUint8(0x80 + condition);
+    EmitLabelLink(label);
+  }
+}
+
+
+void X86_64Assembler::jmp(Register reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xFF);
+  EmitRegisterOperand(4, reg);
+}
+
+void X86_64Assembler::jmp(const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xFF);
+  EmitOperand(4, address);
+}
+
+void X86_64Assembler::jmp(Label* label) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  if (label->IsBound()) {
+    static const int kShortSize = 2;
+    static const int kLongSize = 5;
+    int offset = label->Position() - buffer_.Size();
+    CHECK_LE(offset, 0);
+    if (IsInt(8, offset - kShortSize)) {
+      EmitUint8(0xEB);
+      EmitUint8((offset - kShortSize) & 0xFF);
+    } else {
+      EmitUint8(0xE9);
+      EmitInt32(offset - kLongSize);
+    }
+  } else {
+    EmitUint8(0xE9);
+    EmitLabelLink(label);
+  }
+}
+
+
+X86_64Assembler* X86_64Assembler::lock() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF0);
+  return this;
+}
+
+
+void X86_64Assembler::cmpxchgl(const Address& address, Register reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xB1);
+  EmitOperand(reg, address);
+}
+
+void X86_64Assembler::mfence() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xAE);
+  EmitUint8(0xF0);
+}
+
+X86_64Assembler* X86_64Assembler::gs() {
+  // TODO: fs is a prefix and not an instruction
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x65);
+  return this;
+}
+
+void X86_64Assembler::AddImmediate(Register reg, const Immediate& imm) {
+  int value = imm.value();
+  if (value > 0) {
+    if (value == 1) {
+      incl(reg);
+    } else if (value != 0) {
+      addl(reg, imm);
+    }
+  } else if (value < 0) {
+    value = -value;
+    if (value == 1) {
+      decl(reg);
+    } else if (value != 0) {
+      subl(reg, Immediate(value));
+    }
+  }
+}
+
+
+void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
+  // TODO: Need to have a code constants table.
+  int64_t constant = bit_cast<int64_t, double>(value);
+  pushq(Immediate(High32Bits(constant)));
+  pushq(Immediate(Low32Bits(constant)));
+  movsd(dst, Address(RSP, 0));
+  addq(RSP, Immediate(2 * kWordSize));
+}
+
+
+void X86_64Assembler::FloatNegate(XmmRegister f) {
+  static const struct {
+    uint32_t a;
+    uint32_t b;
+    uint32_t c;
+    uint32_t d;
+  } float_negate_constant __attribute__((aligned(16))) =
+      { 0x80000000, 0x00000000, 0x80000000, 0x00000000 };
+  xorps(f, Address::Absolute(reinterpret_cast<uword>(&float_negate_constant)));
+}
+
+
+void X86_64Assembler::DoubleNegate(XmmRegister d) {
+  static const struct {
+    uint64_t a;
+    uint64_t b;
+  } double_negate_constant __attribute__((aligned(16))) =
+      {0x8000000000000000LL, 0x8000000000000000LL};
+  xorpd(d, Address::Absolute(reinterpret_cast<uword>(&double_negate_constant)));
+}
+
+
+void X86_64Assembler::DoubleAbs(XmmRegister reg) {
+  static const struct {
+    uint64_t a;
+    uint64_t b;
+  } double_abs_constant __attribute__((aligned(16))) =
+      {0x7FFFFFFFFFFFFFFFLL, 0x7FFFFFFFFFFFFFFFLL};
+  andpd(reg, Address::Absolute(reinterpret_cast<uword>(&double_abs_constant)));
+}
+
+
+void X86_64Assembler::Align(int alignment, int offset) {
+  CHECK(IsPowerOfTwo(alignment));
+  // Emit nop instruction until the real position is aligned.
+  while (((offset + buffer_.GetPosition()) & (alignment-1)) != 0) {
+    nop();
+  }
+}
+
+
+void X86_64Assembler::Bind(Label* label) {
+  int bound = buffer_.Size();
+  CHECK(!label->IsBound());  // Labels can only be bound once.
+  while (label->IsLinked()) {
+    int position = label->LinkPosition();
+    int next = buffer_.Load<int32_t>(position);
+    buffer_.Store<int32_t>(position, bound - (position + 4));
+    label->position_ = next;
+  }
+  label->BindTo(bound);
+}
+
+
+void X86_64Assembler::EmitOperand(int reg_or_opcode, const Operand& operand) {
+  CHECK_GE(reg_or_opcode, 0);
+  CHECK_LT(reg_or_opcode, 8);
+  const int length = operand.length_;
+  CHECK_GT(length, 0);
+  // Emit the ModRM byte updated with the given reg value.
+  CHECK_EQ(operand.encoding_[0] & 0x38, 0);
+  EmitUint8(operand.encoding_[0] + (reg_or_opcode << 3));
+  // Emit the rest of the encoded operand.
+  for (int i = 1; i < length; i++) {
+    EmitUint8(operand.encoding_[i]);
+  }
+}
+
+
+void X86_64Assembler::EmitImmediate(const Immediate& imm) {
+  EmitInt32(imm.value());
+}
+
+
+void X86_64Assembler::EmitComplex(int reg_or_opcode,
+                               const Operand& operand,
+                               const Immediate& immediate) {
+  CHECK_GE(reg_or_opcode, 0);
+  CHECK_LT(reg_or_opcode, 8);
+  if (immediate.is_int8()) {
+    // Use sign-extended 8-bit immediate.
+    EmitUint8(0x83);
+    EmitOperand(reg_or_opcode, operand);
+    EmitUint8(immediate.value() & 0xFF);
+  } else if (operand.IsRegister(RAX)) {
+    // Use short form if the destination is eax.
+    EmitUint8(0x05 + (reg_or_opcode << 3));
+    EmitImmediate(immediate);
+  } else {
+    EmitUint8(0x81);
+    EmitOperand(reg_or_opcode, operand);
+    EmitImmediate(immediate);
+  }
+}
+
+
+void X86_64Assembler::EmitLabel(Label* label, int instruction_size) {
+  if (label->IsBound()) {
+    int offset = label->Position() - buffer_.Size();
+    CHECK_LE(offset, 0);
+    EmitInt32(offset - instruction_size);
+  } else {
+    EmitLabelLink(label);
+  }
+}
+
+
+void X86_64Assembler::EmitLabelLink(Label* label) {
+  CHECK(!label->IsBound());
+  int position = buffer_.Size();
+  EmitInt32(label->position_);
+  label->LinkTo(position);
+}
+
+
+void X86_64Assembler::EmitGenericShift(int reg_or_opcode,
+                                    Register reg,
+                                    const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int8());
+  if (imm.value() == 1) {
+    EmitUint8(0xD1);
+    EmitOperand(reg_or_opcode, Operand(reg));
+  } else {
+    EmitUint8(0xC1);
+    EmitOperand(reg_or_opcode, Operand(reg));
+    EmitUint8(imm.value() & 0xFF);
+  }
+}
+
+
+void X86_64Assembler::EmitGenericShift(int reg_or_opcode,
+                                    Register operand,
+                                    Register shifter) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK_EQ(shifter, RCX);
+  EmitUint8(0xD3);
+  EmitOperand(reg_or_opcode, Operand(operand));
+}
+
+void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
+                              const std::vector<ManagedRegister>& spill_regs,
+                              const ManagedRegisterEntrySpills& entry_spills) {
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  for (int i = spill_regs.size() - 1; i >= 0; --i) {
+    pushq(spill_regs.at(i).AsX86_64().AsCpuRegister());
+  }
+  // return address then method on stack
+  addq(RSP, Immediate(-frame_size + (spill_regs.size() * kPointerSize) +
+                      kPointerSize /*method*/ + kPointerSize /*return address*/));
+  pushq(method_reg.AsX86_64().AsCpuRegister());
+
+  for (size_t i = 0; i < entry_spills.size(); ++i) {
+    ManagedRegisterSpill spill = entry_spills.at(i);
+    if (spill.AsX86_64().IsCpuRegister()) {
+      if (spill.getSize() == 8) {
+        movq(Address(RSP, frame_size + spill.getSpillOffset()), spill.AsX86_64().AsCpuRegister());
+      } else {
+        CHECK_EQ(spill.getSize(), 4);
+        movl(Address(RSP, frame_size + spill.getSpillOffset()), spill.AsX86_64().AsCpuRegister());
+      }
+    } else {
+      if (spill.getSize() == 8) {
+        movsd(Address(RSP, frame_size + spill.getSpillOffset()), spill.AsX86_64().AsXmmRegister());
+      } else {
+        CHECK_EQ(spill.getSize(), 4);
+        movss(Address(RSP, frame_size + spill.getSpillOffset()), spill.AsX86_64().AsXmmRegister());
+      }
+    }
+  }
+}
+
+void X86_64Assembler::RemoveFrame(size_t frame_size,
+                            const std::vector<ManagedRegister>& spill_regs) {
+  CHECK_ALIGNED(frame_size, kStackAlignment);
+  addq(RSP, Immediate(frame_size - (spill_regs.size() * kPointerSize) - kPointerSize));
+  for (size_t i = 0; i < spill_regs.size(); ++i) {
+    popq(spill_regs.at(i).AsX86_64().AsCpuRegister());
+  }
+  ret();
+}
+
+void X86_64Assembler::IncreaseFrameSize(size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  addq(RSP, Immediate(-adjust));
+}
+
+void X86_64Assembler::DecreaseFrameSize(size_t adjust) {
+  CHECK_ALIGNED(adjust, kStackAlignment);
+  addq(RSP, Immediate(adjust));
+}
+
+void X86_64Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
+  X86_64ManagedRegister src = msrc.AsX86_64();
+  if (src.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (src.IsCpuRegister()) {
+    if (size == 4) {
+      CHECK_EQ(4u, size);
+      movl(Address(RSP, offs), src.AsCpuRegister());
+    } else {
+      CHECK_EQ(8u, size);
+      movq(Address(RSP, offs), src.AsCpuRegister());
+    }
+  } else if (src.IsRegisterPair()) {
+    CHECK_EQ(0u, size);
+    movq(Address(RSP, offs), src.AsRegisterPairLow());
+    movq(Address(RSP, FrameOffset(offs.Int32Value()+4)),
+         src.AsRegisterPairHigh());
+  } else if (src.IsX87Register()) {
+    if (size == 4) {
+      fstps(Address(RSP, offs));
+    } else {
+      fstpl(Address(RSP, offs));
+    }
+  } else {
+    CHECK(src.IsXmmRegister());
+    if (size == 4) {
+      movss(Address(RSP, offs), src.AsXmmRegister());
+    } else {
+      movsd(Address(RSP, offs), src.AsXmmRegister());
+    }
+  }
+}
+
+void X86_64Assembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
+  X86_64ManagedRegister src = msrc.AsX86_64();
+  CHECK(src.IsCpuRegister());
+  movq(Address(RSP, dest), src.AsCpuRegister());
+}
+
+void X86_64Assembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
+  X86_64ManagedRegister src = msrc.AsX86_64();
+  CHECK(src.IsCpuRegister());
+  movq(Address(RSP, dest), src.AsCpuRegister());
+}
+
+void X86_64Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
+                                         ManagedRegister) {
+  movl(Address(RSP, dest), Immediate(imm));  // TODO(64) movq?
+}
+
+void X86_64Assembler::StoreImmediateToThread(ThreadOffset dest, uint32_t imm,
+                                          ManagedRegister) {
+  gs()->movl(Address::Absolute(dest, true), Immediate(imm));  // TODO(64) movq?
+}
+
+void X86_64Assembler::StoreStackOffsetToThread(ThreadOffset thr_offs,
+                                            FrameOffset fr_offs,
+                                            ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  leaq(scratch.AsCpuRegister(), Address(RSP, fr_offs));
+  gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
+}
+
+void X86_64Assembler::StoreStackPointerToThread(ThreadOffset thr_offs) {
+  gs()->movq(Address::Absolute(thr_offs, true), RSP);
+}
+
+void X86_64Assembler::StoreLabelToThread(ThreadOffset thr_offs, Label* lbl) {
+  gs()->movl(Address::Absolute(thr_offs, true), lbl);  // TODO(64) movq?
+}
+
+void X86_64Assembler::StoreSpanning(FrameOffset /*dst*/, ManagedRegister /*src*/,
+                                 FrameOffset /*in_off*/, ManagedRegister /*scratch*/) {
+  UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
+}
+
+void X86_64Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (dest.IsCpuRegister()) {
+    if (size == 4) {
+      CHECK_EQ(4u, size);
+      movl(dest.AsCpuRegister(), Address(RSP, src));
+    } else {
+      CHECK_EQ(8u, size);
+      movq(dest.AsCpuRegister(), Address(RSP, src));
+    }
+  } else if (dest.IsRegisterPair()) {
+    CHECK_EQ(0u, size);
+    movq(dest.AsRegisterPairLow(), Address(RSP, src));
+    movq(dest.AsRegisterPairHigh(), Address(RSP, FrameOffset(src.Int32Value()+4)));
+  } else if (dest.IsX87Register()) {
+    if (size == 4) {
+      flds(Address(RSP, src));
+    } else {
+      fldl(Address(RSP, src));
+    }
+  } else {
+    CHECK(dest.IsXmmRegister());
+    if (size == 4) {
+      movss(dest.AsXmmRegister(), Address(RSP, src));
+    } else {
+      movsd(dest.AsXmmRegister(), Address(RSP, src));
+    }
+  }
+}
+
+void X86_64Assembler::Load(ManagedRegister mdest, ThreadOffset src, size_t size) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  if (dest.IsNoRegister()) {
+    CHECK_EQ(0u, size);
+  } else if (dest.IsCpuRegister()) {
+    CHECK_EQ(4u, size);
+    gs()->movq(dest.AsCpuRegister(), Address::Absolute(src, true));
+  } else if (dest.IsRegisterPair()) {
+    CHECK_EQ(8u, size);
+    gs()->movq(dest.AsRegisterPairLow(), Address::Absolute(src, true));
+    gs()->movq(dest.AsRegisterPairHigh(), Address::Absolute(ThreadOffset(src.Int32Value()+4), true));
+  } else if (dest.IsX87Register()) {
+    if (size == 4) {
+      gs()->flds(Address::Absolute(src, true));
+    } else {
+      gs()->fldl(Address::Absolute(src, true));
+    }
+  } else {
+    CHECK(dest.IsXmmRegister());
+    if (size == 4) {
+      gs()->movss(dest.AsXmmRegister(), Address::Absolute(src, true));
+    } else {
+      gs()->movsd(dest.AsXmmRegister(), Address::Absolute(src, true));
+    }
+  }
+}
+
+void X86_64Assembler::LoadRef(ManagedRegister mdest, FrameOffset  src) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  CHECK(dest.IsCpuRegister());
+  movq(dest.AsCpuRegister(), Address(RSP, src));
+}
+
+void X86_64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base,
+                           MemberOffset offs) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
+  movq(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs));
+}
+
+void X86_64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
+                              Offset offs) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
+  movq(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs));
+}
+
+void X86_64Assembler::LoadRawPtrFromThread(ManagedRegister mdest,
+                                        ThreadOffset offs) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  CHECK(dest.IsCpuRegister());
+  gs()->movq(dest.AsCpuRegister(), Address::Absolute(offs, true));
+}
+
+void X86_64Assembler::SignExtend(ManagedRegister mreg, size_t size) {
+  X86_64ManagedRegister reg = mreg.AsX86_64();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsCpuRegister()) << reg;
+  if (size == 1) {
+    movsxb(reg.AsCpuRegister(), reg.AsByteRegister());
+  } else {
+    movsxw(reg.AsCpuRegister(), reg.AsCpuRegister());
+  }
+}
+
+void X86_64Assembler::ZeroExtend(ManagedRegister mreg, size_t size) {
+  X86_64ManagedRegister reg = mreg.AsX86_64();
+  CHECK(size == 1 || size == 2) << size;
+  CHECK(reg.IsCpuRegister()) << reg;
+  if (size == 1) {
+    movzxb(reg.AsCpuRegister(), reg.AsByteRegister());
+  } else {
+    movzxw(reg.AsCpuRegister(), reg.AsCpuRegister());
+  }
+}
+
+void X86_64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  X86_64ManagedRegister src = msrc.AsX86_64();
+  if (!dest.Equals(src)) {
+    if (dest.IsCpuRegister() && src.IsCpuRegister()) {
+      movq(dest.AsCpuRegister(), src.AsCpuRegister());
+    } else if (src.IsX87Register() && dest.IsXmmRegister()) {
+      // Pass via stack and pop X87 register
+      subl(RSP, Immediate(16));
+      if (size == 4) {
+        CHECK_EQ(src.AsX87Register(), ST0);
+        fstps(Address(RSP, 0));
+        movss(dest.AsXmmRegister(), Address(RSP, 0));
+      } else {
+        CHECK_EQ(src.AsX87Register(), ST0);
+        fstpl(Address(RSP, 0));
+        movsd(dest.AsXmmRegister(), Address(RSP, 0));
+      }
+      addq(RSP, Immediate(16));
+    } else {
+      // TODO: x87, SSE
+      UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src;
+    }
+  }
+}
+
+void X86_64Assembler::CopyRef(FrameOffset dest, FrameOffset src,
+                           ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  movl(scratch.AsCpuRegister(), Address(RSP, src));
+  movl(Address(RSP, dest), scratch.AsCpuRegister());
+}
+
+void X86_64Assembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                        ThreadOffset thr_offs,
+                                        ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  gs()->movq(scratch.AsCpuRegister(), Address::Absolute(thr_offs, true));
+  Store(fr_offs, scratch, 8);
+}
+
+void X86_64Assembler::CopyRawPtrToThread(ThreadOffset thr_offs,
+                                      FrameOffset fr_offs,
+                                      ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  Load(scratch, fr_offs, 8);
+  gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
+}
+
+void X86_64Assembler::Copy(FrameOffset dest, FrameOffset src,
+                        ManagedRegister mscratch,
+                        size_t size) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  if (scratch.IsCpuRegister() && size == 8) {
+    Load(scratch, src, 4);
+    Store(dest, scratch, 4);
+    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
+    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
+  } else {
+    Load(scratch, src, size);
+    Store(dest, scratch, size);
+  }
+}
+
+void X86_64Assembler::Copy(FrameOffset /*dst*/, ManagedRegister /*src_base*/, Offset /*src_offset*/,
+                        ManagedRegister /*scratch*/, size_t /*size*/) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void X86_64Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
+                        ManagedRegister scratch, size_t size) {
+  CHECK(scratch.IsNoRegister());
+  CHECK_EQ(size, 4u);
+  pushq(Address(RSP, src));
+  popq(Address(dest_base.AsX86_64().AsCpuRegister(), dest_offset));
+}
+
+void X86_64Assembler::Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
+                        ManagedRegister mscratch, size_t size) {
+  Register scratch = mscratch.AsX86_64().AsCpuRegister();
+  CHECK_EQ(size, 4u);
+  movq(scratch, Address(RSP, src_base));
+  movq(scratch, Address(scratch, src_offset));
+  movq(Address(RSP, dest), scratch);
+}
+
+void X86_64Assembler::Copy(ManagedRegister dest, Offset dest_offset,
+                        ManagedRegister src, Offset src_offset,
+                        ManagedRegister scratch, size_t size) {
+  CHECK_EQ(size, 4u);
+  CHECK(scratch.IsNoRegister());
+  pushq(Address(src.AsX86_64().AsCpuRegister(), src_offset));
+  popq(Address(dest.AsX86_64().AsCpuRegister(), dest_offset));
+}
+
+void X86_64Assembler::Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
+                        ManagedRegister mscratch, size_t size) {
+  Register scratch = mscratch.AsX86_64().AsCpuRegister();
+  CHECK_EQ(size, 4u);
+  CHECK_EQ(dest.Int32Value(), src.Int32Value());
+  movq(scratch, Address(RSP, src));
+  pushq(Address(scratch, src_offset));
+  popq(Address(scratch, dest_offset));
+}
+
+void X86_64Assembler::MemoryBarrier(ManagedRegister) {
+#if ANDROID_SMP != 0
+  mfence();
+#endif
+}
+
+void X86_64Assembler::CreateSirtEntry(ManagedRegister mout_reg,
+                                   FrameOffset sirt_offset,
+                                   ManagedRegister min_reg, bool null_allowed) {
+  X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
+  X86_64ManagedRegister in_reg = min_reg.AsX86_64();
+  if (in_reg.IsNoRegister()) {  // TODO(64): && null_allowed
+    // Use out_reg as indicator of NULL
+    in_reg = out_reg;
+    // TODO: movzwl
+    movl(in_reg.AsCpuRegister(), Address(RSP, sirt_offset));
+  }
+  CHECK(in_reg.IsCpuRegister());
+  CHECK(out_reg.IsCpuRegister());
+  VerifyObject(in_reg, null_allowed);
+  if (null_allowed) {
+    Label null_arg;
+    if (!out_reg.Equals(in_reg)) {
+      xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
+    }
+    testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
+    j(kZero, &null_arg);
+    leaq(out_reg.AsCpuRegister(), Address(RSP, sirt_offset));
+    Bind(&null_arg);
+  } else {
+    leaq(out_reg.AsCpuRegister(), Address(RSP, sirt_offset));
+  }
+}
+
+void X86_64Assembler::CreateSirtEntry(FrameOffset out_off,
+                                   FrameOffset sirt_offset,
+                                   ManagedRegister mscratch,
+                                   bool null_allowed) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  CHECK(scratch.IsCpuRegister());
+  if (null_allowed) {
+    Label null_arg;
+    movl(scratch.AsCpuRegister(), Address(RSP, sirt_offset));
+    testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
+    j(kZero, &null_arg);
+    leaq(scratch.AsCpuRegister(), Address(RSP, sirt_offset));
+    Bind(&null_arg);
+  } else {
+    leaq(scratch.AsCpuRegister(), Address(RSP, sirt_offset));
+  }
+  Store(out_off, scratch, 8);
+}
+
+// Given a SIRT entry, load the associated reference.
+void X86_64Assembler::LoadReferenceFromSirt(ManagedRegister mout_reg,
+                                         ManagedRegister min_reg) {
+  X86_64ManagedRegister out_reg = mout_reg.AsX86_64();
+  X86_64ManagedRegister in_reg = min_reg.AsX86_64();
+  CHECK(out_reg.IsCpuRegister());
+  CHECK(in_reg.IsCpuRegister());
+  Label null_arg;
+  if (!out_reg.Equals(in_reg)) {
+    xorl(out_reg.AsCpuRegister(), out_reg.AsCpuRegister());
+  }
+  testl(in_reg.AsCpuRegister(), in_reg.AsCpuRegister());
+  j(kZero, &null_arg);
+  movq(out_reg.AsCpuRegister(), Address(in_reg.AsCpuRegister(), 0));
+  Bind(&null_arg);
+}
+
+void X86_64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references
+}
+
+void X86_64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+  // TODO: not validating references
+}
+
+void X86_64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
+  X86_64ManagedRegister base = mbase.AsX86_64();
+  CHECK(base.IsCpuRegister());
+  call(Address(base.AsCpuRegister(), offset.Int32Value()));
+  // TODO: place reference map on call
+}
+
+void X86_64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
+  Register scratch = mscratch.AsX86_64().AsCpuRegister();
+  movq(scratch, Address(RSP, base));
+  call(Address(scratch, offset));
+}
+
+void X86_64Assembler::Call(ThreadOffset offset, ManagedRegister /*mscratch*/) {
+  gs()->call(Address::Absolute(offset, true));
+}
+
+void X86_64Assembler::GetCurrentThread(ManagedRegister tr) {
+  gs()->movq(tr.AsX86_64().AsCpuRegister(),
+             Address::Absolute(Thread::SelfOffset(), true));
+}
+
+void X86_64Assembler::GetCurrentThread(FrameOffset offset,
+                                    ManagedRegister mscratch) {
+  X86_64ManagedRegister scratch = mscratch.AsX86_64();
+  gs()->movq(scratch.AsCpuRegister(), Address::Absolute(Thread::SelfOffset(), true));
+  movq(Address(RSP, offset), scratch.AsCpuRegister());
+}
+
+void X86_64Assembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
+  X86ExceptionSlowPath* slow = new X86ExceptionSlowPath(stack_adjust);
+  buffer_.EnqueueSlowPath(slow);
+  gs()->cmpl(Address::Absolute(Thread::ExceptionOffset(), true), Immediate(0));
+  j(kNotEqual, slow->Entry());
+}
+
+void X86ExceptionSlowPath::Emit(Assembler *sasm) {
+  X86_64Assembler* sp_asm = down_cast<X86_64Assembler*>(sasm);
+#define __ sp_asm->
+  __ Bind(&entry_);
+  // Note: the return value is dead
+  if (stack_adjust_ != 0) {  // Fix up the frame.
+    __ DecreaseFrameSize(stack_adjust_);
+  }
+  // Pass exception as argument in RAX
+  __ gs()->movq(RAX, Address::Absolute(Thread::ExceptionOffset(), true));  // TODO(64): Pass argument via RDI
+  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(pDeliverException), true));
+  // this call should never return
+  __ int3();
+#undef __
+}
+
+static const char* kRegisterNames[] = {
+  "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
+  "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
+};
+
+std::ostream& operator<<(std::ostream& os, const Register& rhs) {
+  if (rhs >= RAX && rhs <= R15) {
+    os << kRegisterNames[rhs];
+  } else {
+    os << "Register[" << static_cast<int>(rhs) << "]";
+  }
+  return os;
+}
+}  // namespace x86_64
+}  // namespace art
+
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
new file mode 100644
index 0000000..d48ba72
--- /dev/null
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -0,0 +1,657 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
+#define ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
+
+#include <vector>
+#include "base/macros.h"
+#include "constants_x86_64.h"
+#include "globals.h"
+#include "managed_register_x86_64.h"
+#include "offsets.h"
+#include "utils/assembler.h"
+#include "utils.h"
+
+namespace art {
+namespace x86_64 {
+
+class Immediate {
+ public:
+  explicit Immediate(int32_t value) : value_(value) {}
+
+  int32_t value() const { return value_; }
+
+  bool is_int8() const { return IsInt(8, value_); }
+  bool is_uint8() const { return IsUint(8, value_); }
+  bool is_uint16() const { return IsUint(16, value_); }
+
+ private:
+  const int32_t value_;
+
+  DISALLOW_COPY_AND_ASSIGN(Immediate);
+};
+
+
+class Operand {
+ public:
+  uint8_t mod() const {
+    return (encoding_at(0) >> 6) & 3;
+  }
+
+  Register rm() const {
+    return static_cast<Register>(encoding_at(0) & 7);
+  }
+
+  ScaleFactor scale() const {
+    return static_cast<ScaleFactor>((encoding_at(1) >> 6) & 3);
+  }
+
+  Register index() const {
+    return static_cast<Register>((encoding_at(1) >> 3) & 7);
+  }
+
+  Register base() const {
+    return static_cast<Register>(encoding_at(1) & 7);
+  }
+
+  int8_t disp8() const {
+    CHECK_GE(length_, 2);
+    return static_cast<int8_t>(encoding_[length_ - 1]);
+  }
+
+  int32_t disp32() const {
+    CHECK_GE(length_, 5);
+    int32_t value;
+    memcpy(&value, &encoding_[length_ - 4], sizeof(value));
+    return value;
+  }
+
+  bool IsRegister(Register reg) const {
+    return ((encoding_[0] & 0xF8) == 0xC0)  // Addressing mode is register only.
+        && ((encoding_[0] & 0x07) == reg);  // Register codes match.
+  }
+
+ protected:
+  // Operand can be sub classed (e.g: Address).
+  Operand() : length_(0) { }
+
+  void SetModRM(int mod, Register rm) {
+    CHECK_EQ(mod & ~3, 0);
+    encoding_[0] = (mod << 6) | rm;
+    length_ = 1;
+  }
+
+  void SetSIB(ScaleFactor scale, Register index, Register base) {
+    CHECK_EQ(length_, 1);
+    CHECK_EQ(scale & ~3, 0);
+    encoding_[1] = (scale << 6) | (index << 3) | base;
+    length_ = 2;
+  }
+
+  void SetDisp8(int8_t disp) {
+    CHECK(length_ == 1 || length_ == 2);
+    encoding_[length_++] = static_cast<uint8_t>(disp);
+  }
+
+  void SetDisp32(int32_t disp) {
+    CHECK(length_ == 1 || length_ == 2);
+    int disp_size = sizeof(disp);
+    memmove(&encoding_[length_], &disp, disp_size);
+    length_ += disp_size;
+  }
+
+ private:
+  byte length_;
+  byte encoding_[6];
+  byte padding_;
+
+  explicit Operand(Register reg) { SetModRM(3, reg); }
+
+  // Get the operand encoding byte at the given index.
+  uint8_t encoding_at(int index) const {
+    CHECK_GE(index, 0);
+    CHECK_LT(index, length_);
+    return encoding_[index];
+  }
+
+  friend class X86_64Assembler;
+
+  DISALLOW_COPY_AND_ASSIGN(Operand);
+};
+
+
+class Address : public Operand {
+ public:
+  Address(Register base, int32_t disp) {
+    Init(base, disp);
+  }
+
+  Address(Register base, Offset disp) {
+    Init(base, disp.Int32Value());
+  }
+
+  Address(Register base, FrameOffset disp) {
+    CHECK_EQ(base, RSP);
+    Init(RSP, disp.Int32Value());
+  }
+
+  Address(Register base, MemberOffset disp) {
+    Init(base, disp.Int32Value());
+  }
+
+  void Init(Register base, int32_t disp) {
+    if (disp == 0 && base != RBP) {
+      SetModRM(0, base);
+      if (base == RSP) SetSIB(TIMES_1, RSP, base);
+    } else if (disp >= -128 && disp <= 127) {
+      SetModRM(1, base);
+      if (base == RSP) SetSIB(TIMES_1, RSP, base);
+      SetDisp8(disp);
+    } else {
+      SetModRM(2, base);
+      if (base == RSP) SetSIB(TIMES_1, RSP, base);
+      SetDisp32(disp);
+    }
+  }
+
+
+  Address(Register index, ScaleFactor scale, int32_t disp) {
+    CHECK_NE(index, RSP);  // Illegal addressing mode.
+    SetModRM(0, RSP);
+    SetSIB(scale, index, RBP);
+    SetDisp32(disp);
+  }
+
+  Address(Register base, Register index, ScaleFactor scale, int32_t disp) {
+    CHECK_NE(index, RSP);  // Illegal addressing mode.
+    if (disp == 0 && base != RBP) {
+      SetModRM(0, RSP);
+      SetSIB(scale, index, base);
+    } else if (disp >= -128 && disp <= 127) {
+      SetModRM(1, RSP);
+      SetSIB(scale, index, base);
+      SetDisp8(disp);
+    } else {
+      SetModRM(2, RSP);
+      SetSIB(scale, index, base);
+      SetDisp32(disp);
+    }
+  }
+
+  static Address Absolute(uword addr, bool has_rip = false) {
+    Address result;
+    if (has_rip) {
+      result.SetModRM(0, RSP);
+      result.SetSIB(TIMES_1, RSP, RBP);
+      result.SetDisp32(addr);
+    } else {
+      result.SetModRM(0, RBP);
+      result.SetDisp32(addr);
+    }
+    return result;
+  }
+
+  static Address Absolute(ThreadOffset addr, bool has_rip = false) {
+    return Absolute(addr.Int32Value(), has_rip);
+  }
+
+ private:
+  Address() {}
+
+  DISALLOW_COPY_AND_ASSIGN(Address);
+};
+
+
+class X86_64Assembler : public Assembler {
+ public:
+  X86_64Assembler() {}
+  virtual ~X86_64Assembler() {}
+
+  /*
+   * Emit Machine Instructions.
+   */
+  void call(Register reg);
+  void call(const Address& address);
+  void call(Label* label);
+
+  void pushq(Register reg);
+  void pushq(const Address& address);
+  void pushq(const Immediate& imm);
+
+  void popq(Register reg);
+  void popq(const Address& address);
+
+  void movq(Register dst, const Immediate& src);
+  void movl(Register dst, const Immediate& src);
+  void movq(Register dst, Register src);
+  void movl(Register dst, Register src);
+
+  void movq(Register dst, const Address& src);
+  void movl(Register dst, const Address& src);
+  void movq(const Address& dst, Register src);
+  void movl(const Address& dst, Register src);
+  void movl(const Address& dst, const Immediate& imm);
+  void movl(const Address& dst, Label* lbl);
+
+  void movzxb(Register dst, ByteRegister src);
+  void movzxb(Register dst, const Address& src);
+  void movsxb(Register dst, ByteRegister src);
+  void movsxb(Register dst, const Address& src);
+  void movb(Register dst, const Address& src);
+  void movb(const Address& dst, ByteRegister src);
+  void movb(const Address& dst, const Immediate& imm);
+
+  void movzxw(Register dst, Register src);
+  void movzxw(Register dst, const Address& src);
+  void movsxw(Register dst, Register src);
+  void movsxw(Register dst, const Address& src);
+  void movw(Register dst, const Address& src);
+  void movw(const Address& dst, Register src);
+
+  void leaq(Register dst, const Address& src);
+
+  void cmovl(Condition condition, Register dst, Register src);
+
+  void setb(Condition condition, Register dst);
+
+  void movss(XmmRegister dst, const Address& src);
+  void movss(const Address& dst, XmmRegister src);
+  void movss(XmmRegister dst, XmmRegister src);
+
+  void movd(XmmRegister dst, Register src);
+  void movd(Register dst, XmmRegister src);
+
+  void addss(XmmRegister dst, XmmRegister src);
+  void addss(XmmRegister dst, const Address& src);
+  void subss(XmmRegister dst, XmmRegister src);
+  void subss(XmmRegister dst, const Address& src);
+  void mulss(XmmRegister dst, XmmRegister src);
+  void mulss(XmmRegister dst, const Address& src);
+  void divss(XmmRegister dst, XmmRegister src);
+  void divss(XmmRegister dst, const Address& src);
+
+  void movsd(XmmRegister dst, const Address& src);
+  void movsd(const Address& dst, XmmRegister src);
+  void movsd(XmmRegister dst, XmmRegister src);
+
+  void addsd(XmmRegister dst, XmmRegister src);
+  void addsd(XmmRegister dst, const Address& src);
+  void subsd(XmmRegister dst, XmmRegister src);
+  void subsd(XmmRegister dst, const Address& src);
+  void mulsd(XmmRegister dst, XmmRegister src);
+  void mulsd(XmmRegister dst, const Address& src);
+  void divsd(XmmRegister dst, XmmRegister src);
+  void divsd(XmmRegister dst, const Address& src);
+
+  void cvtsi2ss(XmmRegister dst, Register src);
+  void cvtsi2sd(XmmRegister dst, Register src);
+
+  void cvtss2si(Register dst, XmmRegister src);
+  void cvtss2sd(XmmRegister dst, XmmRegister src);
+
+  void cvtsd2si(Register dst, XmmRegister src);
+  void cvtsd2ss(XmmRegister dst, XmmRegister src);
+
+  void cvttss2si(Register dst, XmmRegister src);
+  void cvttsd2si(Register dst, XmmRegister src);
+
+  void cvtdq2pd(XmmRegister dst, XmmRegister src);
+
+  void comiss(XmmRegister a, XmmRegister b);
+  void comisd(XmmRegister a, XmmRegister b);
+
+  void sqrtsd(XmmRegister dst, XmmRegister src);
+  void sqrtss(XmmRegister dst, XmmRegister src);
+
+  void xorpd(XmmRegister dst, const Address& src);
+  void xorpd(XmmRegister dst, XmmRegister src);
+  void xorps(XmmRegister dst, const Address& src);
+  void xorps(XmmRegister dst, XmmRegister src);
+
+  void andpd(XmmRegister dst, const Address& src);
+
+  void flds(const Address& src);
+  void fstps(const Address& dst);
+
+  void fldl(const Address& src);
+  void fstpl(const Address& dst);
+
+  void fnstcw(const Address& dst);
+  void fldcw(const Address& src);
+
+  void fistpl(const Address& dst);
+  void fistps(const Address& dst);
+  void fildl(const Address& src);
+
+  void fincstp();
+  void ffree(const Immediate& index);
+
+  void fsin();
+  void fcos();
+  void fptan();
+
+  void xchgl(Register dst, Register src);
+  void xchgl(Register reg, const Address& address);
+
+  void cmpl(Register reg, const Immediate& imm);
+  void cmpl(Register reg0, Register reg1);
+  void cmpl(Register reg, const Address& address);
+
+  void cmpl(const Address& address, Register reg);
+  void cmpl(const Address& address, const Immediate& imm);
+
+  void testl(Register reg1, Register reg2);
+  void testl(Register reg, const Immediate& imm);
+
+  void andl(Register dst, const Immediate& imm);
+  void andl(Register dst, Register src);
+
+  void orl(Register dst, const Immediate& imm);
+  void orl(Register dst, Register src);
+
+  void xorl(Register dst, Register src);
+
+  void addl(Register dst, Register src);
+  void addq(Register reg, const Immediate& imm);
+  void addl(Register reg, const Immediate& imm);
+  void addl(Register reg, const Address& address);
+
+  void addl(const Address& address, Register reg);
+  void addl(const Address& address, const Immediate& imm);
+
+  void adcl(Register dst, Register src);
+  void adcl(Register reg, const Immediate& imm);
+  void adcl(Register dst, const Address& address);
+
+  void subl(Register dst, Register src);
+  void subl(Register reg, const Immediate& imm);
+  void subl(Register reg, const Address& address);
+
+  void cdq();
+
+  void idivl(Register reg);
+
+  void imull(Register dst, Register src);
+  void imull(Register reg, const Immediate& imm);
+  void imull(Register reg, const Address& address);
+
+  void imull(Register reg);
+  void imull(const Address& address);
+
+  void mull(Register reg);
+  void mull(const Address& address);
+
+  void sbbl(Register dst, Register src);
+  void sbbl(Register reg, const Immediate& imm);
+  void sbbl(Register reg, const Address& address);
+
+  void incl(Register reg);
+  void incl(const Address& address);
+
+  void decl(Register reg);
+  void decl(const Address& address);
+
+  void shll(Register reg, const Immediate& imm);
+  void shll(Register operand, Register shifter);
+  void shrl(Register reg, const Immediate& imm);
+  void shrl(Register operand, Register shifter);
+  void sarl(Register reg, const Immediate& imm);
+  void sarl(Register operand, Register shifter);
+  void shld(Register dst, Register src);
+
+  void negl(Register reg);
+  void notl(Register reg);
+
+  void enter(const Immediate& imm);
+  void leave();
+
+  void ret();
+  void ret(const Immediate& imm);
+
+  void nop();
+  void int3();
+  void hlt();
+
+  void j(Condition condition, Label* label);
+
+  void jmp(Register reg);
+  void jmp(const Address& address);
+  void jmp(Label* label);
+
+  X86_64Assembler* lock();
+  void cmpxchgl(const Address& address, Register reg);
+
+  void mfence();
+
+  X86_64Assembler* gs();
+
+  //
+  // Macros for High-level operations.
+  //
+
+  void AddImmediate(Register reg, const Immediate& imm);
+
+  void LoadDoubleConstant(XmmRegister dst, double value);
+
+  void DoubleNegate(XmmRegister d);
+  void FloatNegate(XmmRegister f);
+
+  void DoubleAbs(XmmRegister reg);
+
+  void LockCmpxchgl(const Address& address, Register reg) {
+    lock()->cmpxchgl(address, reg);
+  }
+
+  //
+  // Misc. functionality
+  //
+  int PreferredLoopAlignment() { return 16; }
+  void Align(int alignment, int offset);
+  void Bind(Label* label);
+
+  //
+  // Overridden common assembler high-level functionality
+  //
+
+  // Emit code that will create an activation on the stack
+  virtual void BuildFrame(size_t frame_size, ManagedRegister method_reg,
+                          const std::vector<ManagedRegister>& callee_save_regs,
+                          const ManagedRegisterEntrySpills& entry_spills);
+
+  // Emit code that will remove an activation from the stack
+  virtual void RemoveFrame(size_t frame_size,
+                           const std::vector<ManagedRegister>& callee_save_regs);
+
+  virtual void IncreaseFrameSize(size_t adjust);
+  virtual void DecreaseFrameSize(size_t adjust);
+
+  // Store routines
+  virtual void Store(FrameOffset offs, ManagedRegister src, size_t size);
+  virtual void StoreRef(FrameOffset dest, ManagedRegister src);
+  virtual void StoreRawPtr(FrameOffset dest, ManagedRegister src);
+
+  virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
+                                     ManagedRegister scratch);
+
+  virtual void StoreImmediateToThread(ThreadOffset dest, uint32_t imm,
+                                      ManagedRegister scratch);
+
+  virtual void StoreStackOffsetToThread(ThreadOffset thr_offs,
+                                        FrameOffset fr_offs,
+                                        ManagedRegister scratch);
+
+  virtual void StoreStackPointerToThread(ThreadOffset thr_offs);
+
+  void StoreLabelToThread(ThreadOffset thr_offs, Label* lbl);
+
+  virtual void StoreSpanning(FrameOffset dest, ManagedRegister src,
+                             FrameOffset in_off, ManagedRegister scratch);
+
+  // Load routines
+  virtual void Load(ManagedRegister dest, FrameOffset src, size_t size);
+
+  virtual void Load(ManagedRegister dest, ThreadOffset src, size_t size);
+
+  virtual void LoadRef(ManagedRegister dest, FrameOffset  src);
+
+  virtual void LoadRef(ManagedRegister dest, ManagedRegister base,
+                       MemberOffset offs);
+
+  virtual void LoadRawPtr(ManagedRegister dest, ManagedRegister base,
+                          Offset offs);
+
+  virtual void LoadRawPtrFromThread(ManagedRegister dest,
+                                    ThreadOffset offs);
+
+  // Copying routines
+  virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size);
+
+  virtual void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset thr_offs,
+                                    ManagedRegister scratch);
+
+  virtual void CopyRawPtrToThread(ThreadOffset thr_offs, FrameOffset fr_offs,
+                                  ManagedRegister scratch);
+
+  virtual void CopyRef(FrameOffset dest, FrameOffset src,
+                       ManagedRegister scratch);
+
+  virtual void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size);
+
+  virtual void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
+                    ManagedRegister scratch, size_t size);
+
+  virtual void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
+                    ManagedRegister scratch, size_t size);
+
+  virtual void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
+                    ManagedRegister scratch, size_t size);
+
+  virtual void Copy(ManagedRegister dest, Offset dest_offset,
+                    ManagedRegister src, Offset src_offset,
+                    ManagedRegister scratch, size_t size);
+
+  virtual void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
+                    ManagedRegister scratch, size_t size);
+
+  virtual void MemoryBarrier(ManagedRegister);
+
+  // Sign extension
+  virtual void SignExtend(ManagedRegister mreg, size_t size);
+
+  // Zero extension
+  virtual void ZeroExtend(ManagedRegister mreg, size_t size);
+
+  // Exploit fast access in managed code to Thread::Current()
+  virtual void GetCurrentThread(ManagedRegister tr);
+  virtual void GetCurrentThread(FrameOffset dest_offset,
+                                ManagedRegister scratch);
+
+  // Set up out_reg to hold a Object** into the SIRT, or to be NULL if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the SIRT entry to see if the value is
+  // NULL.
+  virtual void CreateSirtEntry(ManagedRegister out_reg, FrameOffset sirt_offset,
+                               ManagedRegister in_reg, bool null_allowed);
+
+  // Set up out_off to hold a Object** into the SIRT, or to be NULL if the
+  // value is null and null_allowed.
+  virtual void CreateSirtEntry(FrameOffset out_off, FrameOffset sirt_offset,
+                               ManagedRegister scratch, bool null_allowed);
+
+  // src holds a SIRT entry (Object**) load this into dst
+  virtual void LoadReferenceFromSirt(ManagedRegister dst,
+                                     ManagedRegister src);
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  virtual void VerifyObject(ManagedRegister src, bool could_be_null);
+  virtual void VerifyObject(FrameOffset src, bool could_be_null);
+
+  // Call to address held at [base+offset]
+  virtual void Call(ManagedRegister base, Offset offset,
+                    ManagedRegister scratch);
+  virtual void Call(FrameOffset base, Offset offset,
+                    ManagedRegister scratch);
+  virtual void Call(ThreadOffset offset, ManagedRegister scratch);
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust);
+
+ private:
+  inline void EmitUint8(uint8_t value);
+  inline void EmitInt32(int32_t value);
+  inline void EmitRegisterOperand(int rm, int reg);
+  inline void EmitXmmRegisterOperand(int rm, XmmRegister reg);
+  inline void EmitFixup(AssemblerFixup* fixup);
+  inline void EmitOperandSizeOverride();
+
+  void EmitOperand(int rm, const Operand& operand);
+  void EmitImmediate(const Immediate& imm);
+  void EmitComplex(int rm, const Operand& operand, const Immediate& immediate);
+  void EmitLabel(Label* label, int instruction_size);
+  void EmitLabelLink(Label* label);
+  void EmitNearLabelLink(Label* label);
+
+  void EmitGenericShift(int rm, Register reg, const Immediate& imm);
+  void EmitGenericShift(int rm, Register operand, Register shifter);
+  void rex(Register &dst, Register &src, size_t size = 4);
+  void rex_reg(Register &dst, size_t size = 4);
+  void rex_rm(Register &src, size_t size = 4);
+
+  DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
+};
+
+inline void X86_64Assembler::EmitUint8(uint8_t value) {
+  buffer_.Emit<uint8_t>(value);
+}
+
+inline void X86_64Assembler::EmitInt32(int32_t value) {
+  buffer_.Emit<int32_t>(value);
+}
+
+inline void X86_64Assembler::EmitRegisterOperand(int rm, int reg) {
+  CHECK_GE(rm, 0);
+  CHECK_LT(rm, 8);
+  buffer_.Emit<uint8_t>(0xC0 + (rm << 3) + reg);
+}
+
+inline void X86_64Assembler::EmitXmmRegisterOperand(int rm, XmmRegister reg) {
+  EmitRegisterOperand(rm, static_cast<Register>(reg));
+}
+
+inline void X86_64Assembler::EmitFixup(AssemblerFixup* fixup) {
+  buffer_.EmitFixup(fixup);
+}
+
+inline void X86_64Assembler::EmitOperandSizeOverride() {
+  EmitUint8(0x66);
+}
+
+// Slowpath entered when Thread::Current()->_exception is non-null
+class X86ExceptionSlowPath : public SlowPath {
+ public:
+  explicit X86ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {}
+  virtual void Emit(Assembler *sp_asm);
+ private:
+  const size_t stack_adjust_;
+};
+
+}  // namespace x86_64
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
new file mode 100644
index 0000000..df0d14e
--- /dev/null
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_x86_64.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+TEST(AssemblerX86_64, CreateBuffer) {
+  AssemblerBuffer buffer;
+  AssemblerBuffer::EnsureCapacity ensured(&buffer);
+  buffer.Emit<uint8_t>(0x42);
+  ASSERT_EQ(static_cast<size_t>(1), buffer.Size());
+  buffer.Emit<int32_t>(42);
+  ASSERT_EQ(static_cast<size_t>(5), buffer.Size());
+}
+
+}  // namespace art
diff --git a/compiler/utils/x86_64/constants_x86_64.h b/compiler/utils/x86_64/constants_x86_64.h
new file mode 100644
index 0000000..3340802
--- /dev/null
+++ b/compiler/utils/x86_64/constants_x86_64.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_X86_64_CONSTANTS_X86_64_H_
+#define ART_COMPILER_UTILS_X86_64_CONSTANTS_X86_64_H_
+
+#include <iosfwd>
+
+#include "arch/x86_64/registers_x86_64.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "globals.h"
+
+namespace art {
+namespace x86_64 {
+
+enum ByteRegister {
+  AL = 0,
+  CL = 1,
+  DL = 2,
+  BL = 3,
+  AH = 4,
+  CH = 5,
+  DH = 6,
+  BH = 7,
+  kNoByteRegister = -1  // Signals an illegal register.
+};
+
+
+enum XmmRegister {
+  _XMM0 = 0,
+  _XMM1 = 1,
+  _XMM2 = 2,
+  _XMM3 = 3,
+  _XMM4 = 4,
+  _XMM5 = 5,
+  _XMM6 = 6,
+  _XMM7 = 7,
+  kNumberOfXmmRegisters = 8,
+  kNoXmmRegister = -1  // Signals an illegal register.
+};
+std::ostream& operator<<(std::ostream& os, const XmmRegister& reg);
+
+enum X87Register {
+  ST0 = 0,
+  ST1 = 1,
+  ST2 = 2,
+  ST3 = 3,
+  ST4 = 4,
+  ST5 = 5,
+  ST6 = 6,
+  ST7 = 7,
+  kNumberOfX87Registers = 8,
+  kNoX87Register = -1  // Signals an illegal register.
+};
+std::ostream& operator<<(std::ostream& os, const X87Register& reg);
+
+enum ScaleFactor {
+  TIMES_1 = 0,
+  TIMES_2 = 1,
+  TIMES_4 = 2,
+  TIMES_8 = 3
+};
+
+enum Condition {
+  kOverflow     =  0,
+  kNoOverflow   =  1,
+  kBelow        =  2,
+  kAboveEqual   =  3,
+  kEqual        =  4,
+  kNotEqual     =  5,
+  kBelowEqual   =  6,
+  kAbove        =  7,
+  kSign         =  8,
+  kNotSign      =  9,
+  kParityEven   = 10,
+  kParityOdd    = 11,
+  kLess         = 12,
+  kGreaterEqual = 13,
+  kLessEqual    = 14,
+  kGreater      = 15,
+
+  kZero         = kEqual,
+  kNotZero      = kNotEqual,
+  kNegative     = kSign,
+  kPositive     = kNotSign
+};
+
+
+class Instr {
+ public:
+  static const uint8_t kHltInstruction = 0xF4;
+  // We prefer not to use the int3 instruction since it conflicts with gdb.
+  static const uint8_t kBreakPointInstruction = kHltInstruction;
+
+  bool IsBreakPoint() {
+    return (*reinterpret_cast<const uint8_t*>(this)) == kBreakPointInstruction;
+  }
+
+  // Instructions are read out of a code stream. The only way to get a
+  // reference to an instruction is to convert a pointer. There is no way
+  // to allocate or create instances of class Instr.
+  // Use the At(pc) function to create references to Instr.
+  static Instr* At(uintptr_t pc) { return reinterpret_cast<Instr*>(pc); }
+
+ private:
+  DISALLOW_IMPLICIT_CONSTRUCTORS(Instr);
+};
+
+}  // namespace x86_64
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_X86_64_CONSTANTS_X86_64_H_
diff --git a/compiler/utils/x86_64/managed_register_x86_64.cc b/compiler/utils/x86_64/managed_register_x86_64.cc
new file mode 100644
index 0000000..057a894
--- /dev/null
+++ b/compiler/utils/x86_64/managed_register_x86_64.cc
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "managed_register_x86_64.h"
+
+#include "globals.h"
+
+namespace art {
+namespace x86_64 {
+
+// Define register pairs.
+// This list must be kept in sync with the RegisterPair enum.
+#define REGISTER_PAIR_LIST(P) \
+  P(RAX, RDX)                 \
+  P(RAX, RCX)                 \
+  P(RAX, RBX)                 \
+  P(RAX, RDI)                 \
+  P(RDX, RCX)                 \
+  P(RDX, RBX)                 \
+  P(RDX, RDI)                 \
+  P(RCX, RBX)                 \
+  P(RCX, RDI)                 \
+  P(RBX, RDI)
+
+
+struct RegisterPairDescriptor {
+  RegisterPair reg;  // Used to verify that the enum is in sync.
+  Register low;
+  Register high;
+};
+
+
+static const RegisterPairDescriptor kRegisterPairs[] = {
+#define REGISTER_PAIR_ENUMERATION(low, high) { low##_##high, low, high },
+  REGISTER_PAIR_LIST(REGISTER_PAIR_ENUMERATION)
+#undef REGISTER_PAIR_ENUMERATION
+};
+
+std::ostream& operator<<(std::ostream& os, const RegisterPair& reg) {
+  os << X86_64ManagedRegister::FromRegisterPair(reg);
+  return os;
+}
+
+bool X86_64ManagedRegister::Overlaps(const X86_64ManagedRegister& other) const {
+  if (IsNoRegister() || other.IsNoRegister()) return false;
+  CHECK(IsValidManagedRegister());
+  CHECK(other.IsValidManagedRegister());
+  if (Equals(other)) return true;
+  if (IsRegisterPair()) {
+    Register low = AsRegisterPairLow();
+    Register high = AsRegisterPairHigh();
+    return X86_64ManagedRegister::FromCpuRegister(low).Overlaps(other) ||
+        X86_64ManagedRegister::FromCpuRegister(high).Overlaps(other);
+  }
+  if (other.IsRegisterPair()) {
+    return other.Overlaps(*this);
+  }
+  return false;
+}
+
+
+int X86_64ManagedRegister::AllocIdLow() const {
+  CHECK(IsRegisterPair());
+  const int r = RegId() - (kNumberOfCpuRegIds + kNumberOfXmmRegIds +
+                           kNumberOfX87RegIds);
+  CHECK_EQ(r, kRegisterPairs[r].reg);
+  return kRegisterPairs[r].low;
+}
+
+
+int X86_64ManagedRegister::AllocIdHigh() const {
+  CHECK(IsRegisterPair());
+  const int r = RegId() - (kNumberOfCpuRegIds + kNumberOfXmmRegIds +
+                           kNumberOfX87RegIds);
+  CHECK_EQ(r, kRegisterPairs[r].reg);
+  return kRegisterPairs[r].high;
+}
+
+
+void X86_64ManagedRegister::Print(std::ostream& os) const {
+  if (!IsValidManagedRegister()) {
+    os << "No Register";
+  } else if (IsXmmRegister()) {
+    os << "XMM: " << static_cast<int>(AsXmmRegister());
+  } else if (IsX87Register()) {
+    os << "X87: " << static_cast<int>(AsX87Register());
+  } else if (IsCpuRegister()) {
+    os << "CPU: " << static_cast<int>(AsCpuRegister());
+  } else if (IsRegisterPair()) {
+    os << "Pair: " << AsRegisterPairLow() << ", " << AsRegisterPairHigh();
+  } else {
+    os << "??: " << RegId();
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, const X86_64ManagedRegister& reg) {
+  reg.Print(os);
+  return os;
+}
+
+}  // namespace x86_64
+}  // namespace art
diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h
new file mode 100644
index 0000000..d68c59d
--- /dev/null
+++ b/compiler/utils/x86_64/managed_register_x86_64.h
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_X86_64_MANAGED_REGISTER_X86_64_H_
+#define ART_COMPILER_UTILS_X86_64_MANAGED_REGISTER_X86_64_H_
+
+#include "constants_x86_64.h"
+#include "utils/managed_register.h"
+
+namespace art {
+namespace x86_64 {
+
+// Values for register pairs.
+// The registers in kReservedCpuRegistersArray in x86.cc are not used in pairs.
+// The table kRegisterPairs in x86.cc must be kept in sync with this enum.
+enum RegisterPair {
+  RAX_RDX = 0,
+  RAX_RCX = 1,
+  RAX_RBX = 2,
+  RAX_RDI = 3,
+  RDX_RCX = 4,
+  RDX_RBX = 5,
+  RDX_RDI = 6,
+  RCX_RBX = 7,
+  RCX_RDI = 8,
+  RBX_RDI = 9,
+  kNumberOfRegisterPairs = 10,
+  kNoRegisterPair = -1,
+};
+
+std::ostream& operator<<(std::ostream& os, const RegisterPair& reg);
+
+const int kNumberOfCpuRegIds = kNumberOfCpuRegisters;
+const int kNumberOfCpuAllocIds = kNumberOfCpuRegisters;
+
+const int kNumberOfXmmRegIds = kNumberOfXmmRegisters;
+const int kNumberOfXmmAllocIds = kNumberOfXmmRegisters;
+
+const int kNumberOfX87RegIds = kNumberOfX87Registers;
+const int kNumberOfX87AllocIds = kNumberOfX87Registers;
+
+const int kNumberOfPairRegIds = kNumberOfRegisterPairs;
+
+const int kNumberOfRegIds = kNumberOfCpuRegIds + kNumberOfXmmRegIds +
+    kNumberOfX87RegIds + kNumberOfPairRegIds;
+const int kNumberOfAllocIds = kNumberOfCpuAllocIds + kNumberOfXmmAllocIds +
+    kNumberOfX87RegIds;
+
+// Register ids map:
+//   [0..R[  cpu registers (enum Register)
+//   [R..X[  xmm registers (enum XmmRegister)
+//   [X..S[  x87 registers (enum X87Register)
+//   [S..P[  register pairs (enum RegisterPair)
+// where
+//   R = kNumberOfCpuRegIds
+//   X = R + kNumberOfXmmRegIds
+//   S = X + kNumberOfX87RegIds
+//   P = X + kNumberOfRegisterPairs
+
+// Allocation ids map:
+//   [0..R[  cpu registers (enum Register)
+//   [R..X[  xmm registers (enum XmmRegister)
+//   [X..S[  x87 registers (enum X87Register)
+// where
+//   R = kNumberOfCpuRegIds
+//   X = R + kNumberOfXmmRegIds
+//   S = X + kNumberOfX87RegIds
+
+
+// An instance of class 'ManagedRegister' represents a single cpu register (enum
+// Register), an xmm register (enum XmmRegister), or a pair of cpu registers
+// (enum RegisterPair).
+// 'ManagedRegister::NoRegister()' provides an invalid register.
+// There is a one-to-one mapping between ManagedRegister and register id.
+class X86_64ManagedRegister : public ManagedRegister {
+ public:
+  ByteRegister AsByteRegister() const {
+    CHECK(IsCpuRegister());
+    CHECK_LT(AsCpuRegister(), RSP);  // RSP, RBP, ESI and RDI cannot be encoded as byte registers.
+    return static_cast<ByteRegister>(id_);
+  }
+
+  Register AsCpuRegister() const {
+    CHECK(IsCpuRegister());
+    return static_cast<Register>(id_);
+  }
+
+  XmmRegister AsXmmRegister() const {
+    CHECK(IsXmmRegister());
+    return static_cast<XmmRegister>(id_ - kNumberOfCpuRegIds);
+  }
+
+  X87Register AsX87Register() const {
+    CHECK(IsX87Register());
+    return static_cast<X87Register>(id_ -
+                                    (kNumberOfCpuRegIds + kNumberOfXmmRegIds));
+  }
+
+  Register AsRegisterPairLow() const {
+    CHECK(IsRegisterPair());
+    // Appropriate mapping of register ids allows to use AllocIdLow().
+    return FromRegId(AllocIdLow()).AsCpuRegister();
+  }
+
+  Register AsRegisterPairHigh() const {
+    CHECK(IsRegisterPair());
+    // Appropriate mapping of register ids allows to use AllocIdHigh().
+    return FromRegId(AllocIdHigh()).AsCpuRegister();
+  }
+
+  bool IsCpuRegister() const {
+    CHECK(IsValidManagedRegister());
+    return (0 <= id_) && (id_ < kNumberOfCpuRegIds);
+  }
+
+  bool IsXmmRegister() const {
+    CHECK(IsValidManagedRegister());
+    const int test = id_ - kNumberOfCpuRegIds;
+    return (0 <= test) && (test < kNumberOfXmmRegIds);
+  }
+
+  bool IsX87Register() const {
+    CHECK(IsValidManagedRegister());
+    const int test = id_ - (kNumberOfCpuRegIds + kNumberOfXmmRegIds);
+    return (0 <= test) && (test < kNumberOfX87RegIds);
+  }
+
+  bool IsRegisterPair() const {
+    CHECK(IsValidManagedRegister());
+    const int test = id_ -
+        (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds);
+    return (0 <= test) && (test < kNumberOfPairRegIds);
+  }
+
+  void Print(std::ostream& os) const;
+
+  // Returns true if the two managed-registers ('this' and 'other') overlap.
+  // Either managed-register may be the NoRegister. If both are the NoRegister
+  // then false is returned.
+  bool Overlaps(const X86_64ManagedRegister& other) const;
+
+  static X86_64ManagedRegister FromCpuRegister(Register r) {
+    CHECK_NE(r, kNoRegister);
+    return FromRegId(r);
+  }
+
+  static X86_64ManagedRegister FromXmmRegister(XmmRegister r) {
+    CHECK_NE(r, kNoXmmRegister);
+    return FromRegId(r + kNumberOfCpuRegIds);
+  }
+
+  static X86_64ManagedRegister FromX87Register(X87Register r) {
+    CHECK_NE(r, kNoX87Register);
+    return FromRegId(r + kNumberOfCpuRegIds + kNumberOfXmmRegIds);
+  }
+
+  static X86_64ManagedRegister FromRegisterPair(RegisterPair r) {
+    CHECK_NE(r, kNoRegisterPair);
+    return FromRegId(r + (kNumberOfCpuRegIds + kNumberOfXmmRegIds +
+                          kNumberOfX87RegIds));
+  }
+
+ private:
+  bool IsValidManagedRegister() const {
+    return (0 <= id_) && (id_ < kNumberOfRegIds);
+  }
+
+  int RegId() const {
+    CHECK(!IsNoRegister());
+    return id_;
+  }
+
+  int AllocId() const {
+    CHECK(IsValidManagedRegister() && !IsRegisterPair());
+    CHECK_LT(id_, kNumberOfAllocIds);
+    return id_;
+  }
+
+  int AllocIdLow() const;
+  int AllocIdHigh() const;
+
+  friend class ManagedRegister;
+
+  explicit X86_64ManagedRegister(int reg_id) : ManagedRegister(reg_id) {}
+
+  static X86_64ManagedRegister FromRegId(int reg_id) {
+    X86_64ManagedRegister reg(reg_id);
+    CHECK(reg.IsValidManagedRegister());
+    return reg;
+  }
+};
+
+std::ostream& operator<<(std::ostream& os, const X86_64ManagedRegister& reg);
+
+}  // namespace x86_64
+
+inline x86_64::X86_64ManagedRegister ManagedRegister::AsX86_64() const {
+  x86_64::X86_64ManagedRegister reg(id_);
+  CHECK(reg.IsNoRegister() || reg.IsValidManagedRegister());
+  return reg;
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_X86_64_MANAGED_REGISTER_X86_64_H_
diff --git a/compiler/utils/x86_64/managed_register_x86_64_test.cc b/compiler/utils/x86_64/managed_register_x86_64_test.cc
new file mode 100644
index 0000000..2dc7581
--- /dev/null
+++ b/compiler/utils/x86_64/managed_register_x86_64_test.cc
@@ -0,0 +1,359 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "globals.h"
+#include "managed_register_x86_64.h"
+#include "gtest/gtest.h"
+
+namespace art {
+namespace x86_64 {
+
+TEST(X86_64ManagedRegister, NoRegister) {
+  X86_64ManagedRegister reg = ManagedRegister::NoRegister().AsX86();
+  EXPECT_TRUE(reg.IsNoRegister());
+  EXPECT_TRUE(!reg.Overlaps(reg));
+}
+
+TEST(X86_64ManagedRegister, CpuRegister) {
+  X86_64ManagedRegister reg = X86_64ManagedRegister::FromCpuRegister(RAX);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsCpuRegister());
+  EXPECT_TRUE(!reg.IsXmmRegister());
+  EXPECT_TRUE(!reg.IsX87Register());
+  EXPECT_TRUE(!reg.IsRegisterPair());
+  EXPECT_EQ(RAX, reg.AsCpuRegister());
+
+  reg = X86_64ManagedRegister::FromCpuRegister(RBX);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsCpuRegister());
+  EXPECT_TRUE(!reg.IsXmmRegister());
+  EXPECT_TRUE(!reg.IsX87Register());
+  EXPECT_TRUE(!reg.IsRegisterPair());
+  EXPECT_EQ(RBX, reg.AsCpuRegister());
+
+  reg = X86_64ManagedRegister::FromCpuRegister(RCX);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsCpuRegister());
+  EXPECT_TRUE(!reg.IsXmmRegister());
+  EXPECT_TRUE(!reg.IsX87Register());
+  EXPECT_TRUE(!reg.IsRegisterPair());
+  EXPECT_EQ(RCX, reg.AsCpuRegister());
+
+  reg = X86_64ManagedRegister::FromCpuRegister(RDI);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsCpuRegister());
+  EXPECT_TRUE(!reg.IsXmmRegister());
+  EXPECT_TRUE(!reg.IsX87Register());
+  EXPECT_TRUE(!reg.IsRegisterPair());
+  EXPECT_EQ(RDI, reg.AsCpuRegister());
+}
+
+TEST(X86_64ManagedRegister, XmmRegister) {
+  X86_64ManagedRegister reg = X86_64ManagedRegister::FromXmmRegister(XMM0);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(!reg.IsCpuRegister());
+  EXPECT_TRUE(reg.IsXmmRegister());
+  EXPECT_TRUE(!reg.IsX87Register());
+  EXPECT_TRUE(!reg.IsRegisterPair());
+  EXPECT_EQ(XMM0, reg.AsXmmRegister());
+
+  reg = X86_64ManagedRegister::FromXmmRegister(XMM1);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(!reg.IsCpuRegister());
+  EXPECT_TRUE(reg.IsXmmRegister());
+  EXPECT_TRUE(!reg.IsX87Register());
+  EXPECT_TRUE(!reg.IsRegisterPair());
+  EXPECT_EQ(XMM1, reg.AsXmmRegister());
+
+  reg = X86_64ManagedRegister::FromXmmRegister(XMM7);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(!reg.IsCpuRegister());
+  EXPECT_TRUE(reg.IsXmmRegister());
+  EXPECT_TRUE(!reg.IsX87Register());
+  EXPECT_TRUE(!reg.IsRegisterPair());
+  EXPECT_EQ(XMM7, reg.AsXmmRegister());
+}
+
+TEST(X86_64ManagedRegister, X87Register) {
+  X86_64ManagedRegister reg = X86_64ManagedRegister::FromX87Register(ST0);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(!reg.IsCpuRegister());
+  EXPECT_TRUE(!reg.IsXmmRegister());
+  EXPECT_TRUE(reg.IsX87Register());
+  EXPECT_TRUE(!reg.IsRegisterPair());
+  EXPECT_EQ(ST0, reg.AsX87Register());
+
+  reg = X86_64ManagedRegister::FromX87Register(ST1);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(!reg.IsCpuRegister());
+  EXPECT_TRUE(!reg.IsXmmRegister());
+  EXPECT_TRUE(reg.IsX87Register());
+  EXPECT_TRUE(!reg.IsRegisterPair());
+  EXPECT_EQ(ST1, reg.AsX87Register());
+
+  reg = X86_64ManagedRegister::FromX87Register(ST7);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(!reg.IsCpuRegister());
+  EXPECT_TRUE(!reg.IsXmmRegister());
+  EXPECT_TRUE(reg.IsX87Register());
+  EXPECT_TRUE(!reg.IsRegisterPair());
+  EXPECT_EQ(ST7, reg.AsX87Register());
+}
+
+TEST(X86_64ManagedRegister, RegisterPair) {
+  X86_64ManagedRegister reg = X86_64ManagedRegister::FromRegisterPair(EAX_EDX);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(!reg.IsCpuRegister());
+  EXPECT_TRUE(!reg.IsXmmRegister());
+  EXPECT_TRUE(!reg.IsX87Register());
+  EXPECT_TRUE(reg.IsRegisterPair());
+  EXPECT_EQ(RAX, reg.AsRegisterPairLow());
+  EXPECT_EQ(RDX, reg.AsRegisterPairHigh());
+
+  reg = X86_64ManagedRegister::FromRegisterPair(EAX_ECX);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(!reg.IsCpuRegister());
+  EXPECT_TRUE(!reg.IsXmmRegister());
+  EXPECT_TRUE(!reg.IsX87Register());
+  EXPECT_TRUE(reg.IsRegisterPair());
+  EXPECT_EQ(RAX, reg.AsRegisterPairLow());
+  EXPECT_EQ(RCX, reg.AsRegisterPairHigh());
+
+  reg = X86_64ManagedRegister::FromRegisterPair(EAX_EBX);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(!reg.IsCpuRegister());
+  EXPECT_TRUE(!reg.IsXmmRegister());
+  EXPECT_TRUE(!reg.IsX87Register());
+  EXPECT_TRUE(reg.IsRegisterPair());
+  EXPECT_EQ(RAX, reg.AsRegisterPairLow());
+  EXPECT_EQ(RBX, reg.AsRegisterPairHigh());
+
+  reg = X86_64ManagedRegister::FromRegisterPair(EAX_EDI);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(!reg.IsCpuRegister());
+  EXPECT_TRUE(!reg.IsXmmRegister());
+  EXPECT_TRUE(!reg.IsX87Register());
+  EXPECT_TRUE(reg.IsRegisterPair());
+  EXPECT_EQ(RAX, reg.AsRegisterPairLow());
+  EXPECT_EQ(RDI, reg.AsRegisterPairHigh());
+
+  reg = X86_64ManagedRegister::FromRegisterPair(EDX_ECX);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(!reg.IsCpuRegister());
+  EXPECT_TRUE(!reg.IsXmmRegister());
+  EXPECT_TRUE(!reg.IsX87Register());
+  EXPECT_TRUE(reg.IsRegisterPair());
+  EXPECT_EQ(RDX, reg.AsRegisterPairLow());
+  EXPECT_EQ(RCX, reg.AsRegisterPairHigh());
+
+  reg = X86_64ManagedRegister::FromRegisterPair(EDX_EBX);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(!reg.IsCpuRegister());
+  EXPECT_TRUE(!reg.IsXmmRegister());
+  EXPECT_TRUE(!reg.IsX87Register());
+  EXPECT_TRUE(reg.IsRegisterPair());
+  EXPECT_EQ(RDX, reg.AsRegisterPairLow());
+  EXPECT_EQ(RBX, reg.AsRegisterPairHigh());
+
+  reg = X86_64ManagedRegister::FromRegisterPair(EDX_EDI);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(!reg.IsCpuRegister());
+  EXPECT_TRUE(!reg.IsXmmRegister());
+  EXPECT_TRUE(!reg.IsX87Register());
+  EXPECT_TRUE(reg.IsRegisterPair());
+  EXPECT_EQ(RDX, reg.AsRegisterPairLow());
+  EXPECT_EQ(RDI, reg.AsRegisterPairHigh());
+
+  reg = X86_64ManagedRegister::FromRegisterPair(ECX_EBX);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(!reg.IsCpuRegister());
+  EXPECT_TRUE(!reg.IsXmmRegister());
+  EXPECT_TRUE(!reg.IsX87Register());
+  EXPECT_TRUE(reg.IsRegisterPair());
+  EXPECT_EQ(RCX, reg.AsRegisterPairLow());
+  EXPECT_EQ(RBX, reg.AsRegisterPairHigh());
+
+  reg = X86_64ManagedRegister::FromRegisterPair(ECX_EDI);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(!reg.IsCpuRegister());
+  EXPECT_TRUE(!reg.IsXmmRegister());
+  EXPECT_TRUE(!reg.IsX87Register());
+  EXPECT_TRUE(reg.IsRegisterPair());
+  EXPECT_EQ(RCX, reg.AsRegisterPairLow());
+  EXPECT_EQ(RDI, reg.AsRegisterPairHigh());
+
+  reg = X86_64ManagedRegister::FromRegisterPair(EBX_EDI);
+  EXPECT_TRUE(!reg.IsNoRegister());
+  EXPECT_TRUE(!reg.IsCpuRegister());
+  EXPECT_TRUE(!reg.IsXmmRegister());
+  EXPECT_TRUE(!reg.IsX87Register());
+  EXPECT_TRUE(reg.IsRegisterPair());
+  EXPECT_EQ(RBX, reg.AsRegisterPairLow());
+  EXPECT_EQ(RDI, reg.AsRegisterPairHigh());
+}
+
+TEST(X86_64ManagedRegister, Equals) {
+  X86_64ManagedRegister reg_eax = X86_64ManagedRegister::FromCpuRegister(RAX);
+  EXPECT_TRUE(reg_eax.Equals(X86_64ManagedRegister::FromCpuRegister(RAX)));
+  EXPECT_TRUE(!reg_eax.Equals(X86_64ManagedRegister::FromCpuRegister(RBX)));
+  EXPECT_TRUE(!reg_eax.Equals(X86_64ManagedRegister::FromCpuRegister(RDI)));
+  EXPECT_TRUE(!reg_eax.Equals(X86_64ManagedRegister::FromXmmRegister(XMM0)));
+  EXPECT_TRUE(!reg_eax.Equals(X86_64ManagedRegister::FromXmmRegister(XMM7)));
+  EXPECT_TRUE(!reg_eax.Equals(X86_64ManagedRegister::FromX87Register(ST0)));
+  EXPECT_TRUE(!reg_eax.Equals(X86_64ManagedRegister::FromX87Register(ST7)));
+  EXPECT_TRUE(!reg_eax.Equals(X86_64ManagedRegister::FromRegisterPair(EAX_EDX)));
+  EXPECT_TRUE(!reg_eax.Equals(X86_64ManagedRegister::FromRegisterPair(EBX_EDI)));
+
+  X86_64ManagedRegister reg_xmm0 = X86_64ManagedRegister::FromXmmRegister(XMM0);
+  EXPECT_TRUE(!reg_xmm0.Equals(X86_64ManagedRegister::FromCpuRegister(RAX)));
+  EXPECT_TRUE(!reg_xmm0.Equals(X86_64ManagedRegister::FromCpuRegister(RBX)));
+  EXPECT_TRUE(!reg_xmm0.Equals(X86_64ManagedRegister::FromCpuRegister(RDI)));
+  EXPECT_TRUE(reg_xmm0.Equals(X86_64ManagedRegister::FromXmmRegister(XMM0)));
+  EXPECT_TRUE(!reg_xmm0.Equals(X86_64ManagedRegister::FromXmmRegister(XMM7)));
+  EXPECT_TRUE(!reg_xmm0.Equals(X86_64ManagedRegister::FromX87Register(ST0)));
+  EXPECT_TRUE(!reg_xmm0.Equals(X86_64ManagedRegister::FromX87Register(ST7)));
+  EXPECT_TRUE(!reg_xmm0.Equals(X86_64ManagedRegister::FromRegisterPair(EAX_EDX)));
+  EXPECT_TRUE(!reg_xmm0.Equals(X86_64ManagedRegister::FromRegisterPair(EBX_EDI)));
+
+  X86_64ManagedRegister reg_st0 = X86_64ManagedRegister::FromX87Register(ST0);
+  EXPECT_TRUE(!reg_st0.Equals(X86_64ManagedRegister::FromCpuRegister(RAX)));
+  EXPECT_TRUE(!reg_st0.Equals(X86_64ManagedRegister::FromCpuRegister(RBX)));
+  EXPECT_TRUE(!reg_st0.Equals(X86_64ManagedRegister::FromCpuRegister(RDI)));
+  EXPECT_TRUE(!reg_st0.Equals(X86_64ManagedRegister::FromXmmRegister(XMM0)));
+  EXPECT_TRUE(!reg_st0.Equals(X86_64ManagedRegister::FromXmmRegister(XMM7)));
+  EXPECT_TRUE(reg_st0.Equals(X86_64ManagedRegister::FromX87Register(ST0)));
+  EXPECT_TRUE(!reg_st0.Equals(X86_64ManagedRegister::FromX87Register(ST7)));
+  EXPECT_TRUE(!reg_st0.Equals(X86_64ManagedRegister::FromRegisterPair(EAX_EDX)));
+  EXPECT_TRUE(!reg_st0.Equals(X86_64ManagedRegister::FromRegisterPair(EBX_EDI)));
+
+  X86_64ManagedRegister reg_pair = X86_64ManagedRegister::FromRegisterPair(EAX_EDX);
+  EXPECT_TRUE(!reg_pair.Equals(X86_64ManagedRegister::FromCpuRegister(RAX)));
+  EXPECT_TRUE(!reg_pair.Equals(X86_64ManagedRegister::FromCpuRegister(RBX)));
+  EXPECT_TRUE(!reg_pair.Equals(X86_64ManagedRegister::FromCpuRegister(RDI)));
+  EXPECT_TRUE(!reg_pair.Equals(X86_64ManagedRegister::FromXmmRegister(XMM0)));
+  EXPECT_TRUE(!reg_pair.Equals(X86_64ManagedRegister::FromXmmRegister(XMM7)));
+  EXPECT_TRUE(!reg_pair.Equals(X86_64ManagedRegister::FromX87Register(ST0)));
+  EXPECT_TRUE(!reg_pair.Equals(X86_64ManagedRegister::FromX87Register(ST7)));
+  EXPECT_TRUE(reg_pair.Equals(X86_64ManagedRegister::FromRegisterPair(EAX_EDX)));
+  EXPECT_TRUE(!reg_pair.Equals(X86_64ManagedRegister::FromRegisterPair(EBX_EDI)));
+}
+
+TEST(X86_64ManagedRegister, Overlaps) {
+  X86_64ManagedRegister reg = X86_64ManagedRegister::FromCpuRegister(RAX);
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RAX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RBX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RDI)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM7)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST7)));
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EAX_EDX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EBX_EDI)));
+
+  reg = X86_64ManagedRegister::FromCpuRegister(RDX);
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RAX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RBX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RDI)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM7)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST7)));
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EAX_EDX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EBX_EDI)));
+
+  reg = X86_64ManagedRegister::FromCpuRegister(RDI);
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RAX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RBX)));
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RDI)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM7)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST7)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EAX_EDX)));
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EBX_EDI)));
+
+  reg = X86_64ManagedRegister::FromCpuRegister(RBX);
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RAX)));
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RBX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RDI)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM7)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST7)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EAX_EDX)));
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EBX_EDI)));
+
+  reg = X86_64ManagedRegister::FromXmmRegister(XMM0);
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RAX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RBX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RDI)));
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM7)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST7)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EAX_EDX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EBX_EDI)));
+
+  reg = X86_64ManagedRegister::FromX87Register(ST0);
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RAX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RBX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RDI)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM7)));
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST7)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EAX_EDX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EBX_EDI)));
+
+  reg = X86_64ManagedRegister::FromRegisterPair(EAX_EDX);
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RAX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RBX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RDI)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM7)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST7)));
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EAX_EDX)));
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EDX_ECX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EBX_EDI)));
+
+  reg = X86_64ManagedRegister::FromRegisterPair(EBX_EDI);
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RAX)));
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RBX)));
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RDI)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM7)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST7)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EAX_EDX)));
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EBX_EDI)));
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EDX_EBX)));
+
+  reg = X86_64ManagedRegister::FromRegisterPair(EDX_ECX);
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RAX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RBX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromCpuRegister(RDI)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromXmmRegister(XMM7)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST0)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromX87Register(ST7)));
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EAX_EDX)));
+  EXPECT_TRUE(!reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EBX_EDI)));
+  EXPECT_TRUE(reg.Overlaps(X86_64ManagedRegister::FromRegisterPair(EDX_EBX)));
+}
+
+}  // namespace x86_64
+}  // namespace art
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 1ca8e07..1576905 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -52,6 +52,7 @@
 	gc/accounting/mod_union_table.cc \
 	gc/accounting/remembered_set.cc \
 	gc/accounting/space_bitmap.cc \
+	gc/collector/concurrent_copying.cc \
 	gc/collector/garbage_collector.cc \
 	gc/collector/immune_region.cc \
 	gc/collector/mark_sweep.cc \
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index c748ce9..abce838 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -18,9 +18,12 @@
 #include "fault_handler.h"
 #include <sys/ucontext.h>
 #include "base/macros.h"
+#include "base/hex_dump.h"
 #include "globals.h"
 #include "base/logging.h"
 #include "base/hex_dump.h"
+#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
 #include "thread.h"
 #include "thread-inl.h"
 
@@ -31,16 +34,38 @@
 namespace art {
 
 extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_stack_overflow(void*);
 extern "C" void art_quick_test_suspend();
 
+// Get the size of a thumb2 instruction in bytes.
+static uint32_t GetInstructionSize(uint8_t* pc) {
+  uint16_t instr = pc[0] | pc[1] << 8;
+  bool is_32bit = ((instr & 0xF000) == 0xF000) || ((instr & 0xF800) == 0xE800);
+  uint32_t instr_size = is_32bit ? 4 : 2;
+  return instr_size;
+}
+
 void FaultManager::GetMethodAndReturnPC(void* context, uintptr_t& method, uintptr_t& return_pc) {
   struct ucontext *uc = (struct ucontext *)context;
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   uintptr_t* sp = reinterpret_cast<uint32_t*>(sc->arm_sp);
+  LOG(DEBUG) << "sp: " << sp;
   if (sp == nullptr) {
     return;
   }
 
+  // In the case of a stack overflow, the stack is not valid and we can't
+  // get the method from the top of the stack.  However it's in r0.
+  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
+  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
+      reinterpret_cast<uint8_t*>(sp) - Thread::kStackOverflowReservedBytes);
+  if (overflow_addr == fault_addr) {
+    method = sc->arm_r0;
+  } else {
+    // The method is at the top of the stack.
+    method = sp[0];
+  }
+
   // Work out the return PC.  This will be the address of the instruction
   // following the faulting ldr/str instruction.  This is in thumb mode so
   // the instruction might be a 16 or 32 bit one.  Also, the GC map always
@@ -48,13 +73,8 @@
 
   // Need to work out the size of the instruction that caused the exception.
   uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
-
-  uint16_t instr = ptr[0] | ptr[1] << 8;
-  bool is_32bit = ((instr & 0xF000) == 0xF000) || ((instr & 0xF800) == 0xE800);
-  uint32_t instr_size = is_32bit ? 4 : 2;
-
-  // The method is at the top of the stack.
-  method = sp[0];
+  LOG(DEBUG) << "pc: " << std::hex << static_cast<void*>(ptr);
+  uint32_t instr_size = GetInstructionSize(ptr);
 
   return_pc = (sc->arm_pc + instr_size) | 1;
 }
@@ -71,9 +91,7 @@
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
 
-  uint16_t instr = ptr[0] | ptr[1] << 8;
-  bool is_32bit = ((instr & 0xF000) == 0xF000) || ((instr & 0xF800) == 0xE800);
-  uint32_t instr_size = is_32bit ? 4 : 2;
+  uint32_t instr_size = GetInstructionSize(ptr);
   sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
   sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
   LOG(DEBUG) << "Generating null pointer exception";
@@ -142,7 +160,116 @@
   return false;
 }
 
+// Stack overflow fault handler.
+//
+// This checks that the fault address is equal to the current stack pointer
+// minus the overflow region size (16K typically).  The instruction sequence
+// that generates this signal is:
+//
+// sub r12,sp,#16384
+// ldr.w r12,[r12,#0]
+//
+// The second instruction will fault if r12 is inside the protected region
+// on the stack.
+//
+// If we determine this is a stack overflow we need to move the stack pointer
+// to the overflow region below the protected region.  Because we now have
+// a gap in the stack (skips over protected region), we need to arrange
+// for the rest of the system to be unaware of the new stack arrangement
+// and behave as if there is a fully valid stack.  We do this by placing
+// a unique address onto the stack followed by
+// the size of the gap.  The stack walker will detect this and skip over the
+// gap.
+
+// NB. We also need to be careful of stack alignment as the ARM EABI specifies that
+// stack must be 8 byte aligned when making any calls.
+
+// NB. The size of the gap is the difference between the previous frame's SP and
+// the SP at which the size word is pushed.
+
 bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
-  return false;
+  struct ucontext *uc = (struct ucontext *)context;
+  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  LOG(DEBUG) << "stack overflow handler with sp at " << std::hex << &uc;
+  LOG(DEBUG) << "sigcontext: " << std::hex << sc;
+
+  uint8_t* sp = reinterpret_cast<uint8_t*>(sc->arm_sp);
+  LOG(DEBUG) << "sp: " << static_cast<void*>(sp);
+
+  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
+  LOG(DEBUG) << "fault_addr: " << std::hex << fault_addr;
+  LOG(DEBUG) << "checking for stack overflow, sp: " << std::hex << static_cast<void*>(sp) <<
+    ", fault_addr: " << fault_addr;
+  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(sp - Thread::kStackOverflowReservedBytes);
+
+  // Check that the fault address is the value expected for a stack overflow.
+  if (fault_addr != overflow_addr) {
+    LOG(DEBUG) << "Not a stack overflow";
+    return false;
+  }
+
+  // We know this is a stack overflow.  We need to move the sp to the overflow region
+  // the exists below the protected region.  R9 contains the current Thread* so
+  // we can read the stack_end from that and subtract the size of the
+  // protected region.  This creates a gap in the stack that needs to be marked.
+  Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
+
+  uint8_t* prevsp = sp;
+  sp = self->GetStackEnd() - Thread::kStackOverflowProtectedSize;
+  LOG(DEBUG) << "setting sp to overflow region at " << std::hex << static_cast<void*>(sp);
+
+  // We need to find the previous frame.  Remember that
+  // this has not yet been fully constructed because the SP has not been
+  // decremented.  So we need to work out the size of the spill portion of the
+  // frame.  This consists of something like:
+  //
+  // 0xb6a1d49c: e92d40e0  push    {r5, r6, r7, lr}
+  // 0xb6a1d4a0: ed2d8a06  vpush.f32 {s16-s21}
+  //
+  // The first is encoded in the ArtMethod as the spill_mask, the second as the
+  // fp_spill_mask.  A population count on each will give the number of registers
+  // in each mask.  Each register is 4 bytes on ARM32.
+
+  mirror::ArtMethod* method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0);
+  uint32_t spill_mask = method->GetCoreSpillMask();
+  uint32_t numcores = __builtin_popcount(spill_mask);
+  uint32_t fp_spill_mask = method->GetFpSpillMask();
+  uint32_t numfps = __builtin_popcount(fp_spill_mask);
+  uint32_t spill_size = (numcores + numfps) * 4;
+  LOG(DEBUG) << "spill size: " << spill_size;
+  uint8_t* prevframe = prevsp + spill_size;
+  LOG(DEBUG) << "previous frame: " << static_cast<void*>(prevframe);
+
+  // NOTE: the ARM EABI needs an 8 byte alignment.  In the case of ARM32 a pointer
+  // is 4 bytes so that, together with the offset to the previous frame is 8
+  // bytes.  On other architectures we will need to align the stack.
+
+  // Push a marker onto the stack to tell the stack walker that there is a stack
+  // overflow and the stack is not contiguous.
+
+  // First the offset from SP to the previous frame.
+  sp -= sizeof(uint32_t);
+  LOG(DEBUG) << "push gap of " << static_cast<uint32_t>(prevframe - sp);
+  *reinterpret_cast<uint32_t*>(sp) = static_cast<uint32_t>(prevframe - sp);
+
+  // Now the gap marker (pointer sized).
+  sp -= sizeof(mirror::ArtMethod*);
+  *reinterpret_cast<void**>(sp) = stack_overflow_gap_marker;
+
+  // Now establish the stack pointer for the signal return.
+  sc->arm_sp = reinterpret_cast<uintptr_t>(sp);
+
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow.
+  // We need the LR to point to the GC map just after the fault instruction.
+  uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
+  uint32_t instr_size = GetInstructionSize(ptr);
+  sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
+  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
+
+  // The kernel will now return to the address in sc->arm_pc.  We have arranged the
+  // stack pointer to be in the overflow region.  Throwing the exception will perform
+  // a longjmp which will restore the stack pointer to the correct location for the
+  // exception catch.
+  return true;
 }
 }       // namespace art
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index ca2489c..d03a474 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -29,7 +29,7 @@
 
     // Clang's as(1) uses $0, $1, and so on for macro arguments.
     #define VAR(name,index) SYMBOL($index)
-    #define PLT_VAR(name, index) SYMBOL($index)
+    #define PLT_VAR(name, index) SYMBOL($index)@PLT
     #define REG_VAR(name,index) %$index
     #define CALL_MACRO(name,index) $index
     #define FUNCTION_TYPE(name,index) .type $index, @function
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 5fbf8cb..0d75a89 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -142,8 +142,13 @@
 
 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name, 0)
-    int3
-    int3
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    // Outgoing argument set up
+    mov %rsp, %rdx                    // pass SP
+    mov %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
+    mov %rax, %rdi                    // pass arg1
+    call PLT_VAR(cxx_name, 1)     // cxx_name(arg1, Thread*, SP)
+    int3                          // unreached
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
 
diff --git a/runtime/arch/x86_64/registers_x86_64.h b/runtime/arch/x86_64/registers_x86_64.h
index 8b0dc07..b9d06b5 100644
--- a/runtime/arch/x86_64/registers_x86_64.h
+++ b/runtime/arch/x86_64/registers_x86_64.h
@@ -67,7 +67,7 @@
   XMM15 = 15,
   kNumberOfFloatRegisters = 16
 };
-std::ostream& operator<<(std::ostream& os, const FloatRegister& rhs);
+std::ostream& operator<<(std::ostream& os, const Register& rhs);
 
 }  // namespace x86_64
 }  // namespace art
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 52a1672..fdd0249 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -649,7 +649,13 @@
   sequence_ = 0;
   num_waiters_ = 0;
 #else
-  CHECK_MUTEX_CALL(pthread_cond_init, (&cond_, NULL));
+  pthread_condattr_t cond_attrs;
+  CHECK_MUTEX_CALL(pthread_condattr_init(&cond_attrs));
+#if !defined(__APPLE__)
+  // Apple doesn't have CLOCK_MONOTONIC or pthread_condattr_setclock.
+  CHECK_MUTEX_CALL(pthread_condattr_setclock(&cond_attrs, CLOCK_MONOTONIC));
+#endif
+  CHECK_MUTEX_CALL(pthread_cond_init, (&cond_, &cond_attrs));
 #endif
 }
 
@@ -787,17 +793,15 @@
   CHECK_GE(guard_.num_contenders_, 0);
   guard_.num_contenders_--;
 #else
-#ifdef HAVE_TIMEDWAIT_MONOTONIC
-#define TIMEDWAIT pthread_cond_timedwait_monotonic
+#if !defined(__APPLE__)
   int clock = CLOCK_MONOTONIC;
 #else
-#define TIMEDWAIT pthread_cond_timedwait
   int clock = CLOCK_REALTIME;
 #endif
   guard_.recursion_count_ = 0;
   timespec ts;
   InitTimeSpec(true, clock, ms, ns, &ts);
-  int rc = TEMP_FAILURE_RETRY(TIMEDWAIT(&cond_, &guard_.mutex_, &ts));
+  int rc = TEMP_FAILURE_RETRY(pthread_cond_timedwait(&cond_, &guard_.mutex_, &ts));
   if (rc != 0 && rc != ETIMEDOUT) {
     errno = rc;
     PLOG(FATAL) << "TimedWait failed for " << name_;
diff --git a/runtime/base/timing_logger.cc b/runtime/base/timing_logger.cc
index c769f64..a155002 100644
--- a/runtime/base/timing_logger.cc
+++ b/runtime/base/timing_logger.cc
@@ -54,7 +54,7 @@
 
 void CumulativeLogger::End() {
   MutexLock mu(Thread::Current(), lock_);
-  iterations_++;
+  ++iterations_;
 }
 
 void CumulativeLogger::Reset() {
@@ -66,13 +66,12 @@
 
 void CumulativeLogger::AddLogger(const TimingLogger &logger) {
   MutexLock mu(Thread::Current(), lock_);
-  const TimingLogger::SplitTimings& splits = logger.GetSplits();
-  for (auto it = splits.begin(), end = splits.end(); it != end; ++it) {
-    TimingLogger::SplitTiming split = *it;
+  for (const TimingLogger::SplitTiming& split : logger.GetSplits()) {
     uint64_t split_time = split.first;
     const char* split_name = split.second;
     AddPair(split_name, split_time);
   }
+  ++iterations_;
 }
 
 size_t CumulativeLogger::GetIterations() const {
@@ -161,8 +160,7 @@
 
 uint64_t TimingLogger::GetTotalNs() const {
   uint64_t total_ns = 0;
-  for (auto it = splits_.begin(), end = splits_.end(); it != end; ++it) {
-    TimingLogger::SplitTiming split = *it;
+  for (const TimingLogger::SplitTiming& split : splits_) {
     total_ns += split.first;
   }
   return total_ns;
@@ -171,8 +169,7 @@
 void TimingLogger::Dump(std::ostream &os) const {
   uint64_t longest_split = 0;
   uint64_t total_ns = 0;
-  for (auto it = splits_.begin(), end = splits_.end(); it != end; ++it) {
-    TimingLogger::SplitTiming split = *it;
+  for (const SplitTiming& split : splits_) {
     uint64_t split_time = split.first;
     longest_split = std::max(longest_split, split_time);
     total_ns += split_time;
@@ -181,8 +178,7 @@
   TimeUnit tu = GetAppropriateTimeUnit(longest_split);
   uint64_t divisor = GetNsToTimeUnitDivisor(tu);
   // Print formatted splits.
-  for (auto it = splits_.begin(), end = splits_.end(); it != end; ++it) {
-    const TimingLogger::SplitTiming& split = *it;
+  for (const SplitTiming& split : splits_) {
     uint64_t split_time = split.first;
     if (!precise_ && divisor >= 1000) {
       // Make the fractional part 0.
@@ -194,7 +190,6 @@
   os << name_ << ": end, " << NsToMs(total_ns) << " ms\n";
 }
 
-
 TimingLogger::ScopedSplit::ScopedSplit(const char* label, TimingLogger* timing_logger) {
   DCHECK(label != NULL) << "New scoped split (" << label << ") with null label.";
   CHECK(timing_logger != NULL) << "New scoped split (" << label << ") without TimingLogger.";
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index 24d16c4..4b6d82b 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -175,6 +175,12 @@
   va_end(args);
 }
 
+// IllegalAccessException
+
+void ThrowIllegalAccessException(const ThrowLocation* throw_location, const char* msg) {
+  ThrowException(throw_location, "Ljava/lang/IllegalAccessException;", NULL, msg);
+}
+
 // IllegalArgumentException
 
 void ThrowIllegalArgumentException(const ThrowLocation* throw_location, const char* msg) {
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index 792cdef..c06763e 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -92,6 +92,11 @@
     __attribute__((__format__(__printf__, 2, 3)))
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
+// IllegalAccessException
+
+void ThrowIllegalAccessException(const ThrowLocation* throw_location, const char* msg)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
+
 // IllegalArgumentException
 
 void ThrowIllegalArgumentException(const ThrowLocation* throw_location, const char* msg)
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 26170de..024f830 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -3843,7 +3843,6 @@
   } else {
     gc::Heap* heap = Runtime::Current()->GetHeap();
     const std::vector<gc::space::ContinuousSpace*>& spaces = heap->GetContinuousSpaces();
-    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     typedef std::vector<gc::space::ContinuousSpace*>::const_iterator It;
     for (It cur = spaces.begin(), end = spaces.end(); cur != end; ++cur) {
       if ((*cur)->IsMallocSpace()) {
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 9e5f54c..c81706f 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -134,7 +134,9 @@
     LOG(ERROR) << "Couldn't throw new StackOverflowError because JNI ThrowNew failed.";
     CHECK(self->IsExceptionPending());
   }
-  self->ResetDefaultStackEnd();  // Return to default stack size.
+
+  bool explicit_overflow_check = Runtime::Current()->ExplicitStackOverflowChecks();
+  self->ResetDefaultStackEnd(!explicit_overflow_check);  // Return to default stack size.
 }
 
 JValue InvokeProxyInvocationHandler(ScopedObjectAccessUnchecked& soa, const char* shorty,
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index f9f3e25..fcb567e 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -61,8 +61,11 @@
 
 void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
   bool handled = false;
+  LOG(DEBUG) << "Handling fault";
   if (IsInGeneratedCode(context)) {
+    LOG(DEBUG) << "in generated code, looking for handler";
     for (auto& handler : handlers_) {
+      LOG(DEBUG) << "invoking Action on handler " << handler;
       handled = handler->Action(sig, info, context);
       if (handled) {
         return;
@@ -71,7 +74,7 @@
   }
 
   if (!handled) {
-    LOG(INFO)<< "Caught unknown SIGSEGV in ART fault handler";
+    LOG(ERROR)<< "Caught unknown SIGSEGV in ART fault handler";
     oldaction_.sa_sigaction(sig, info, context);
   }
 }
@@ -96,19 +99,23 @@
 bool FaultManager::IsInGeneratedCode(void *context) {
   // We can only be running Java code in the current thread if it
   // is in Runnable state.
+  LOG(DEBUG) << "Checking for generated code";
   Thread* thread = Thread::Current();
   if (thread == nullptr) {
+    LOG(DEBUG) << "no current thread";
     return false;
   }
 
   ThreadState state = thread->GetState();
   if (state != kRunnable) {
+    LOG(DEBUG) << "not runnable";
     return false;
   }
 
   // Current thread is runnable.
   // Make sure it has the mutator lock.
   if (!Locks::mutator_lock_->IsSharedHeld(thread)) {
+    LOG(DEBUG) << "no lock";
     return false;
   }
 
@@ -120,7 +127,9 @@
   GetMethodAndReturnPC(context, /*out*/potential_method, /*out*/return_pc);
 
   // If we don't have a potential method, we're outta here.
+  LOG(DEBUG) << "potential method: " << potential_method;
   if (potential_method == 0) {
+    LOG(DEBUG) << "no method";
     return false;
   }
 
@@ -133,19 +142,23 @@
   // Check that the class pointer inside the object is not null and is aligned.
   mirror::Class* cls = method_obj->GetClass<kVerifyNone>();
   if (cls == nullptr) {
+    LOG(DEBUG) << "not a class";
     return false;
   }
   if (!IsAligned<kObjectAlignment>(cls)) {
+    LOG(DEBUG) << "not aligned";
     return false;
   }
 
 
   if (!VerifyClassClass(cls)) {
+    LOG(DEBUG) << "not a class class";
     return false;
   }
 
   // Now make sure the class is a mirror::ArtMethod.
   if (!cls->IsArtMethodClass()) {
+    LOG(DEBUG) << "not a method";
     return false;
   }
 
@@ -153,7 +166,15 @@
   // at the return PC address.
   mirror::ArtMethod* method =
       reinterpret_cast<mirror::ArtMethod*>(potential_method);
-  return method->ToDexPc(return_pc, false) != DexFile::kDexNoIndex;
+  if (true || kIsDebugBuild) {
+    LOG(DEBUG) << "looking for dex pc for return pc " << std::hex << return_pc;
+    const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(method);
+    uint32_t sought_offset = return_pc - reinterpret_cast<uintptr_t>(code);
+    LOG(DEBUG) << "pc offset: " << std::hex << sought_offset;
+  }
+  uint32_t dexpc = method->ToDexPc(return_pc, false);
+  LOG(DEBUG) << "dexpc: " << dexpc;
+  return dexpc != DexFile::kDexNoIndex;
 }
 
 //
diff --git a/runtime/gc/accounting/heap_bitmap-inl.h b/runtime/gc/accounting/heap_bitmap-inl.h
index 18b93d4..04e85d2 100644
--- a/runtime/gc/accounting/heap_bitmap-inl.h
+++ b/runtime/gc/accounting/heap_bitmap-inl.h
@@ -19,6 +19,8 @@
 
 #include "heap_bitmap.h"
 
+#include "space_bitmap-inl.h"
+
 namespace art {
 namespace gc {
 namespace accounting {
@@ -34,6 +36,55 @@
   }
 }
 
+inline bool HeapBitmap::Test(const mirror::Object* obj) {
+  SpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
+  if (LIKELY(bitmap != nullptr)) {
+    return bitmap->Test(obj);
+  } else {
+    return GetDiscontinuousSpaceObjectSet(obj) != NULL;
+  }
+}
+
+inline void HeapBitmap::Clear(const mirror::Object* obj) {
+  SpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
+  if (LIKELY(bitmap != nullptr)) {
+    bitmap->Clear(obj);
+  } else {
+    ObjectSet* set = GetDiscontinuousSpaceObjectSet(obj);
+    DCHECK(set != NULL);
+    set->Clear(obj);
+  }
+}
+
+inline void HeapBitmap::Set(const mirror::Object* obj) {
+  SpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
+  if (LIKELY(bitmap != NULL)) {
+    bitmap->Set(obj);
+  } else {
+    ObjectSet* set = GetDiscontinuousSpaceObjectSet(obj);
+    DCHECK(set != NULL);
+    set->Set(obj);
+  }
+}
+
+inline SpaceBitmap* HeapBitmap::GetContinuousSpaceBitmap(const mirror::Object* obj) const {
+  for (const auto& bitmap : continuous_space_bitmaps_) {
+    if (bitmap->HasAddress(obj)) {
+      return bitmap;
+    }
+  }
+  return nullptr;
+}
+
+inline ObjectSet* HeapBitmap::GetDiscontinuousSpaceObjectSet(const mirror::Object* obj) const {
+  for (const auto& space_set : discontinuous_space_sets_) {
+    if (space_set->Test(obj)) {
+      return space_set;
+    }
+  }
+  return nullptr;
+}
+
 }  // namespace accounting
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h
index b23b12e..f729c0e 100644
--- a/runtime/gc/accounting/heap_bitmap.h
+++ b/runtime/gc/accounting/heap_bitmap.h
@@ -31,54 +31,11 @@
 
 class HeapBitmap {
  public:
-  bool Test(const mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
-    SpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
-    if (LIKELY(bitmap != nullptr)) {
-      return bitmap->Test(obj);
-    } else {
-      return GetDiscontinuousSpaceObjectSet(obj) != NULL;
-    }
-  }
-
-  void Clear(const mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
-    SpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
-    if (LIKELY(bitmap != NULL)) {
-      bitmap->Clear(obj);
-    } else {
-      ObjectSet* set = GetDiscontinuousSpaceObjectSet(obj);
-      DCHECK(set != NULL);
-      set->Clear(obj);
-    }
-  }
-
-  void Set(const mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
-    SpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
-    if (LIKELY(bitmap != NULL)) {
-      bitmap->Set(obj);
-    } else {
-      ObjectSet* set = GetDiscontinuousSpaceObjectSet(obj);
-      DCHECK(set != NULL);
-      set->Set(obj);
-    }
-  }
-
-  SpaceBitmap* GetContinuousSpaceBitmap(const mirror::Object* obj) {
-    for (const auto& bitmap : continuous_space_bitmaps_) {
-      if (bitmap->HasAddress(obj)) {
-        return bitmap;
-      }
-    }
-    return nullptr;
-  }
-
-  ObjectSet* GetDiscontinuousSpaceObjectSet(const mirror::Object* obj) {
-    for (const auto& space_set : discontinuous_space_sets_) {
-      if (space_set->Test(obj)) {
-        return space_set;
-      }
-    }
-    return nullptr;
-  }
+  bool Test(const mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void Clear(const mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void Set(const mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  SpaceBitmap* GetContinuousSpaceBitmap(const mirror::Object* obj) const;
+  ObjectSet* GetDiscontinuousSpaceObjectSet(const mirror::Object* obj) const;
 
   void Walk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
diff --git a/runtime/gc/accounting/mod_union_table-inl.h b/runtime/gc/accounting/mod_union_table-inl.h
index 76719b6..c756127 100644
--- a/runtime/gc/accounting/mod_union_table-inl.h
+++ b/runtime/gc/accounting/mod_union_table-inl.h
@@ -32,39 +32,8 @@
                                            space::ContinuousSpace* space)
       : ModUnionTableReferenceCache(name, heap, space) {}
 
-  bool AddReference(const mirror::Object* /* obj */, const mirror::Object* ref) ALWAYS_INLINE {
-    for (space::ContinuousSpace* space : GetHeap()->GetContinuousSpaces()) {
-      if (space->HasAddress(ref)) {
-        return !space->IsImageSpace();
-      }
-    }
-    // Assume it points to a large object.
-    // TODO: Check.
-    return true;
-  }
-};
-
-// A mod-union table to record Zygote references to the alloc space.
-class ModUnionTableToAllocspace : public ModUnionTableReferenceCache {
- public:
-  explicit ModUnionTableToAllocspace(const std::string& name, Heap* heap,
-                                     space::ContinuousSpace* space)
-      : ModUnionTableReferenceCache(name, heap, space) {}
-
-  bool AddReference(const mirror::Object* /* obj */, const mirror::Object* ref) ALWAYS_INLINE {
-    const std::vector<space::ContinuousSpace*>& spaces = GetHeap()->GetContinuousSpaces();
-    typedef std::vector<space::ContinuousSpace*>::const_iterator It;
-    for (It it = spaces.begin(); it != spaces.end(); ++it) {
-      space::ContinuousSpace* space = *it;
-      if (space->Contains(ref)) {
-        // The allocation space is always considered for collection whereas the Zygote space is
-        // only considered for full GC.
-        return space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect;
-      }
-    }
-    // Assume it points to a large object.
-    // TODO: Check.
-    return true;
+  bool ShouldAddReference(const mirror::Object* ref) const OVERRIDE ALWAYS_INLINE {
+    return !space_->HasAddress(ref);
   }
 };
 
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 314f3c5..34ca654 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -76,7 +76,7 @@
   }
 
   // Extra parameters are required since we use this same visitor signature for checking objects.
-  void operator()(Object* obj, MemberOffset offset, bool /* static */) const
+  void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Only add the reference if it is non null and fits our criteria.
     mirror::HeapReference<Object>* obj_ptr = obj->GetFieldObjectReferenceAddr(offset);
@@ -123,12 +123,12 @@
   }
 
   // Extra parameters are required since we use this same visitor signature for checking objects.
-  void operator()(Object* obj, MemberOffset offset, bool /* static */) const
+  void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::HeapReference<Object>* ref_ptr = obj->GetFieldObjectReferenceAddr(offset);
     mirror::Object* ref = ref_ptr->AsMirrorPtr();
     // Only add the reference if it is non null and fits our criteria.
-    if (ref  != nullptr && mod_union_table_->AddReference(obj, ref)) {
+    if (ref != nullptr && mod_union_table_->ShouldAddReference(ref)) {
       // Push the adddress of the reference.
       references_->push_back(ref_ptr);
     }
@@ -168,10 +168,10 @@
   }
 
   // Extra parameters are required since we use this same visitor signature for checking objects.
-  void operator()(Object* obj, MemberOffset offset, bool /* is_static */) const
+  void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
     mirror::Object* ref = obj->GetFieldObject<mirror::Object>(offset, false);
-    if (ref != nullptr && mod_union_table_->AddReference(obj, ref) &&
+    if (ref != nullptr && mod_union_table_->ShouldAddReference(ref) &&
         references_.find(ref) == references_.end()) {
       Heap* heap = mod_union_table_->GetHeap();
       space::ContinuousSpace* from_space = heap->FindContinuousSpaceFromObject(obj, false);
@@ -260,8 +260,7 @@
 
 void ModUnionTableReferenceCache::UpdateAndMarkReferences(MarkHeapReferenceCallback* callback,
                                                           void* arg) {
-  Heap* heap = GetHeap();
-  CardTable* card_table = heap->GetCardTable();
+  CardTable* card_table = heap_->GetCardTable();
 
   std::vector<mirror::HeapReference<Object>*> cards_references;
   ModUnionReferenceVisitor add_visitor(this, &cards_references);
@@ -271,7 +270,7 @@
     cards_references.clear();
     uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card));
     uintptr_t end = start + CardTable::kCardSize;
-    auto* space = heap->FindContinuousSpaceFromObject(reinterpret_cast<Object*>(start), false);
+    auto* space = heap_->FindContinuousSpaceFromObject(reinterpret_cast<Object*>(start), false);
     DCHECK(space != nullptr);
     SpaceBitmap* live_bitmap = space->GetLiveBitmap();
     live_bitmap->VisitMarkedRange(start, end, add_visitor);
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index c4b020b..c3a90e2 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -117,7 +117,7 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Function that tells whether or not to add a reference to the table.
-  virtual bool AddReference(const mirror::Object* obj, const mirror::Object* ref) = 0;
+  virtual bool ShouldAddReference(const mirror::Object* ref) const = 0;
 
   void Dump(std::ostream& os) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
new file mode 100644
index 0000000..079eeba
--- /dev/null
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "concurrent_copying.h"
+
+namespace art {
+namespace gc {
+namespace collector {
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
new file mode 100644
index 0000000..ab26a9c
--- /dev/null
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_COLLECTOR_CONCURRENT_COPYING_H_
+#define ART_RUNTIME_GC_COLLECTOR_CONCURRENT_COPYING_H_
+
+#include "garbage_collector.h"
+
+namespace art {
+namespace gc {
+namespace collector {
+
+class ConcurrentCopying : public GarbageCollector {
+ public:
+  explicit ConcurrentCopying(Heap* heap, bool generational = false,
+                             const std::string& name_prefix = "")
+      : GarbageCollector(heap,
+                         name_prefix + (name_prefix.empty() ? "" : " ") +
+                         "concurrent copying + mark sweep") {}
+
+  ~ConcurrentCopying() {}
+
+  virtual void InitializePhase() OVERRIDE {}
+  virtual void MarkingPhase() OVERRIDE {}
+  virtual void ReclaimPhase() OVERRIDE {}
+  virtual void FinishPhase() OVERRIDE {}
+  virtual GcType GetGcType() const OVERRIDE {
+    return kGcTypePartial;
+  }
+  virtual CollectorType GetCollectorType() const OVERRIDE {
+    return kCollectorTypeCC;
+  }
+  virtual void RevokeAllThreadLocalBuffers() OVERRIDE {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ConcurrentCopying);
+};
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_COLLECTOR_CONCURRENT_COPYING_H_
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 0c7565c..07951e0 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -46,8 +46,7 @@
   ResetCumulativeStatistics();
 }
 
-void GarbageCollector::HandleDirtyObjectsPhase() {
-  LOG(FATAL) << "Unreachable";
+void GarbageCollector::PausePhase() {
 }
 
 void GarbageCollector::RegisterPause(uint64_t nano_length) {
@@ -86,12 +85,13 @@
       // Pause is the entire length of the GC.
       uint64_t pause_start = NanoTime();
       ATRACE_BEGIN("Application threads suspended");
-      // Mutator lock may be already exclusively held when we do garbage collections for changing the
-      // current collector / allocator during process state updates.
+      // Mutator lock may be already exclusively held when we do garbage collections for changing
+      // the current collector / allocator during process state updates.
       if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
         // PreGcRosAllocVerification() is called in Heap::TransitionCollector().
         RevokeAllThreadLocalBuffers();
         MarkingPhase();
+        PausePhase();
         ReclaimPhase();
         // PostGcRosAllocVerification() is called in Heap::TransitionCollector().
       } else {
@@ -101,6 +101,7 @@
         GetHeap()->PreGcRosAllocVerification(&timings_);
         RevokeAllThreadLocalBuffers();
         MarkingPhase();
+        PausePhase();
         ReclaimPhase();
         GetHeap()->PostGcRosAllocVerification(&timings_);
         ATRACE_BEGIN("Resuming mutator threads");
@@ -125,7 +126,7 @@
       ATRACE_END();
       ATRACE_BEGIN("All mutator threads suspended");
       GetHeap()->PreGcRosAllocVerification(&timings_);
-      HandleDirtyObjectsPhase();
+      PausePhase();
       RevokeAllThreadLocalBuffers();
       GetHeap()->PostGcRosAllocVerification(&timings_);
       ATRACE_END();
@@ -141,12 +142,20 @@
       FinishPhase();
       break;
     }
+    case kCollectorTypeCC: {
+      // To be implemented.
+      break;
+    }
     default: {
       LOG(FATAL) << "Unreachable collector type=" << static_cast<size_t>(collector_type);
       break;
     }
   }
-
+  // Add the current timings to the cumulative timings.
+  cumulative_timings_.AddLogger(timings_);
+  // Update cumulative statistics with how many bytes the GC iteration freed.
+  total_freed_objects_ += GetFreedObjects() + GetFreedLargeObjects();
+  total_freed_bytes_ += GetFreedBytes() + GetFreedLargeObjectBytes();
   uint64_t end_time = NanoTime();
   duration_ns_ = end_time - start_time;
   total_time_ns_ += GetDurationNs();
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index f8c4579..ccfa9cf 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -129,7 +129,7 @@
   virtual void MarkingPhase() = 0;
 
   // Only called for concurrent GCs.
-  virtual void HandleDirtyObjectsPhase() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void PausePhase();
 
   // Called with mutators running.
   virtual void ReclaimPhase() = 0;
diff --git a/runtime/gc/collector/mark_sweep-inl.h b/runtime/gc/collector/mark_sweep-inl.h
index 1cb2adb..974952d 100644
--- a/runtime/gc/collector/mark_sweep-inl.h
+++ b/runtime/gc/collector/mark_sweep-inl.h
@@ -21,7 +21,7 @@
 
 #include "gc/heap.h"
 #include "mirror/art_field.h"
-#include "mirror/class.h"
+#include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/reference.h"
 
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 8abf5e2..91ccd64 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -27,31 +27,20 @@
 #include "base/mutex-inl.h"
 #include "base/timing_logger.h"
 #include "gc/accounting/card_table-inl.h"
-#include "gc/accounting/heap_bitmap.h"
+#include "gc/accounting/heap_bitmap-inl.h"
 #include "gc/accounting/mod_union_table.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
 #include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
-#include "indirect_reference_table.h"
-#include "intern_table.h"
-#include "jni_internal.h"
-#include "monitor.h"
 #include "mark_sweep-inl.h"
-#include "mirror/art_field.h"
 #include "mirror/art_field-inl.h"
-#include "mirror/class-inl.h"
-#include "mirror/class_loader.h"
-#include "mirror/dex_cache.h"
-#include "mirror/reference-inl.h"
 #include "mirror/object-inl.h"
-#include "mirror/object_array.h"
-#include "mirror/object_array-inl.h"
 #include "runtime.h"
+#include "scoped_thread_state_change.h"
 #include "thread-inl.h"
 #include "thread_list.h"
-#include "verifier/method_verifier.h"
 
 using ::art::mirror::ArtField;
 using ::art::mirror::Class;
@@ -163,49 +152,38 @@
   }
 }
 
-void MarkSweep::HandleDirtyObjectsPhase() {
-  TimingLogger::ScopedSplit split("(Paused)HandleDirtyObjectsPhase", &timings_);
+void MarkSweep::PausePhase() {
+  TimingLogger::ScopedSplit split("(Paused)PausePhase", &timings_);
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertExclusiveHeld(self);
-
-  {
+  if (IsConcurrent()) {
+    // Handle the dirty objects if we are a concurrent GC.
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-
     // Re-mark root set.
     ReMarkRoots();
-
     // Scan dirty objects, this is only required if we are not doing concurrent GC.
     RecursiveMarkDirtyObjects(true, accounting::CardTable::kCardDirty);
   }
-
   ProcessReferences(self);
-
-  // Only need to do this if we have the card mark verification on, and only during concurrent GC.
-  if (GetHeap()->verify_missing_card_marks_ || GetHeap()->verify_pre_gc_heap_||
-      GetHeap()->verify_post_gc_heap_) {
+  {
+    timings_.NewSplit("SwapStacks");
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    // This second sweep makes sure that we don't have any objects in the live stack which point to
-    // freed objects. These cause problems since their references may be previously freed objects.
-    SweepArray(GetHeap()->allocation_stack_.get(), false);
-    // Since SweepArray() above resets the (active) allocation
-    // stack. Need to revoke the thread-local allocation stacks that
-    // point into it.
+    heap_->SwapStacks(self);
+    live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
+    // Need to revoke all the thread local allocation stacks since we just swapped the allocation
+    // stacks and don't want anybody to allocate into the live stack.
     RevokeAllThreadLocalAllocationStacks(self);
   }
-
   timings_.StartSplit("PreSweepingGcVerification");
   heap_->PreSweepingGcVerification(this);
   timings_.EndSplit();
-
-  // Ensure that nobody inserted items in the live stack after we swapped the stacks.
-  ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  CHECK_GE(live_stack_freeze_size_, GetHeap()->GetLiveStack()->Size());
-
-  // Disallow new system weaks to prevent a race which occurs when someone adds a new system
-  // weak before we sweep them. Since this new system weak may not be marked, the GC may
-  // incorrectly sweep it. This also fixes a race where interning may attempt to return a strong
-  // reference to a string that is about to be swept.
-  Runtime::Current()->DisallowNewSystemWeaks();
+  if (IsConcurrent()) {
+    // Disallow new system weaks to prevent a race which occurs when someone adds a new system
+    // weak before we sweep them. Since this new system weak may not be marked, the GC may
+    // incorrectly sweep it. This also fixes a race where interning may attempt to return a strong
+    // reference to a string that is about to be swept.
+    Runtime::Current()->DisallowNewSystemWeaks();
+  }
 }
 
 void MarkSweep::PreCleanCards() {
@@ -227,8 +205,7 @@
     // reference write are visible to the GC before the card is scanned (this is due to locks being
     // acquired / released in the checkpoint code).
     // The other roots are also marked to help reduce the pause.
-    MarkThreadRoots(self);
-    // TODO: Only mark the dirty roots.
+    MarkRootsCheckpoint(self, false);
     MarkNonThreadRoots();
     MarkConcurrentRoots(
         static_cast<VisitRootFlags>(kVisitRootFlagClearRootLog | kVisitRootFlagNewRoots));
@@ -241,6 +218,7 @@
 
 void MarkSweep::RevokeAllThreadLocalAllocationStacks(Thread* self) {
   if (kUseThreadLocalAllocationStack) {
+    timings_.NewSplit("RevokeAllThreadLocalAllocationStacks");
     Locks::mutator_lock_->AssertExclusiveHeld(self);
     heap_->RevokeAllThreadLocalAllocationStacks(self);
   }
@@ -256,15 +234,8 @@
   // Process dirty cards and add dirty cards to mod union tables.
   heap_->ProcessCards(timings_, false);
 
-  // Need to do this before the checkpoint since we don't want any threads to add references to
-  // the live stack during the recursive mark.
-  timings_.NewSplit("SwapStacks");
-  heap_->SwapStacks(self);
-
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   MarkRoots(self);
-  live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
-  UpdateAndMarkModUnion();
   MarkReachableObjects();
   // Pre-clean dirtied cards to reduce pauses.
   PreCleanCards();
@@ -284,18 +255,8 @@
   }
 }
 
-void MarkSweep::MarkThreadRoots(Thread* self) {
-  MarkRootsCheckpoint(self);
-}
-
 void MarkSweep::MarkReachableObjects() {
-  // Mark everything allocated since the last as GC live so that we can sweep concurrently,
-  // knowing that new allocations won't be marked as live.
-  timings_.StartSplit("MarkStackAsLive");
-  accounting::ObjectStack* live_stack = heap_->GetLiveStack();
-  heap_->MarkAllocStackAsLive(live_stack);
-  live_stack->Reset();
-  timings_.EndSplit();
+  UpdateAndMarkModUnion();
   // Recursively mark all the non-image bits set in the mark bitmap.
   RecursiveMark();
 }
@@ -303,44 +264,10 @@
 void MarkSweep::ReclaimPhase() {
   TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
   Thread* self = Thread::Current();
-
-  if (!IsConcurrent()) {
-    ProcessReferences(self);
-  }
-
-  {
-    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    SweepSystemWeaks();
-  }
-
+  SweepSystemWeaks(self);
   if (IsConcurrent()) {
     Runtime::Current()->AllowNewSystemWeaks();
-
-    TimingLogger::ScopedSplit split("UnMarkAllocStack", &timings_);
-    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    accounting::ObjectStack* allocation_stack = GetHeap()->allocation_stack_.get();
-    if (!kPreCleanCards) {
-      // The allocation stack contains things allocated since the start of the GC. These may have
-      // been marked during this GC meaning they won't be eligible for reclaiming in the next
-      // sticky GC. Unmark these objects so that they are eligible for reclaiming in the next
-      // sticky GC.
-      // There is a race here which is safely handled. Another thread such as the hprof could
-      // have flushed the alloc stack after we resumed the threads. This is safe however, since
-      // reseting the allocation stack zeros it out with madvise. This means that we will either
-      // read NULLs or attempt to unmark a newly allocated object which will not be marked in the
-      // first place.
-      // We can't do this if we pre-clean cards since we will unmark objects which are no longer on
-      // a dirty card since we aged cards during the pre-cleaning process.
-      mirror::Object** end = allocation_stack->End();
-      for (mirror::Object** it = allocation_stack->Begin(); it != end; ++it) {
-        const Object* obj = *it;
-        if (obj != nullptr) {
-          UnMarkObjectNonNull(obj);
-        }
-      }
-    }
   }
-
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
 
@@ -414,28 +341,6 @@
   reinterpret_cast<MarkSweep*>(arg)->MarkObject(ref->AsMirrorPtr());
 }
 
-inline void MarkSweep::UnMarkObjectNonNull(const Object* obj) {
-  DCHECK(!immune_region_.ContainsObject(obj));
-  if (kUseBrooksPointer) {
-    // Verify all the objects have the correct Brooks pointer installed.
-    obj->AssertSelfBrooksPointer();
-  }
-  // Try to take advantage of locality of references within a space, failing this find the space
-  // the hard way.
-  accounting::SpaceBitmap* object_bitmap = current_space_bitmap_;
-  if (UNLIKELY(!object_bitmap->HasAddress(obj))) {
-    accounting::SpaceBitmap* new_bitmap = mark_bitmap_->GetContinuousSpaceBitmap(obj);
-    if (LIKELY(new_bitmap != NULL)) {
-      object_bitmap = new_bitmap;
-    } else {
-      MarkLargeObject(obj, false);
-      return;
-    }
-  }
-  DCHECK(object_bitmap->HasAddress(obj));
-  object_bitmap->Clear(obj);
-}
-
 inline void MarkSweep::MarkObjectNonNull(Object* obj) {
   DCHECK(obj != nullptr);
   if (kUseBrooksPointer) {
@@ -590,7 +495,7 @@
     timings_.EndSplit();
     RevokeAllThreadLocalAllocationStacks(self);
   } else {
-    MarkThreadRoots(self);
+    MarkRootsCheckpoint(self, kRevokeRosAllocThreadLocalBuffersAtCheckpoint);
     // At this point the live stack should no longer have any mutators which push into it.
     MarkNonThreadRoots();
     MarkConcurrentRoots(
@@ -1020,10 +925,10 @@
   }
 }
 
-void MarkSweep::SweepSystemWeaks() {
-  Runtime* runtime = Runtime::Current();
+void MarkSweep::SweepSystemWeaks(Thread* self) {
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   timings_.StartSplit("SweepSystemWeaks");
-  runtime->SweepSystemWeaks(IsMarkedCallback, this);
+  Runtime::Current()->SweepSystemWeaks(IsMarkedCallback, this);
   timings_.EndSplit();
 }
 
@@ -1034,14 +939,13 @@
 }
 
 void MarkSweep::VerifyIsLive(const Object* obj) {
-  Heap* heap = GetHeap();
-  if (!heap->GetLiveBitmap()->Test(obj)) {
-    space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
+  if (!heap_->GetLiveBitmap()->Test(obj)) {
+    space::LargeObjectSpace* large_object_space = heap_->GetLargeObjectsSpace();
     if (!large_object_space->GetLiveObjects()->Test(obj)) {
-      if (std::find(heap->allocation_stack_->Begin(), heap->allocation_stack_->End(), obj) ==
-          heap->allocation_stack_->End()) {
+      if (std::find(heap_->allocation_stack_->Begin(), heap_->allocation_stack_->End(), obj) ==
+          heap_->allocation_stack_->End()) {
         // Object not found!
-        heap->DumpSpaces();
+        heap_->DumpSpaces();
         LOG(FATAL) << "Found dead object " << obj;
       }
     }
@@ -1055,9 +959,14 @@
 
 class CheckpointMarkThreadRoots : public Closure {
  public:
-  explicit CheckpointMarkThreadRoots(MarkSweep* mark_sweep) : mark_sweep_(mark_sweep) {}
+  explicit CheckpointMarkThreadRoots(MarkSweep* mark_sweep,
+                                     bool revoke_ros_alloc_thread_local_buffers_at_checkpoint)
+      : mark_sweep_(mark_sweep),
+        revoke_ros_alloc_thread_local_buffers_at_checkpoint_(
+            revoke_ros_alloc_thread_local_buffers_at_checkpoint) {
+  }
 
-  virtual void Run(Thread* thread) NO_THREAD_SAFETY_ANALYSIS {
+  virtual void Run(Thread* thread) OVERRIDE NO_THREAD_SAFETY_ANALYSIS {
     ATRACE_BEGIN("Marking thread roots");
     // Note: self is not necessarily equal to thread since thread may be suspended.
     Thread* self = Thread::Current();
@@ -1065,21 +974,22 @@
         << thread->GetState() << " thread " << thread << " self " << self;
     thread->VisitRoots(MarkSweep::MarkRootParallelCallback, mark_sweep_);
     ATRACE_END();
-    if (kUseThreadLocalAllocationStack) {
-      thread->RevokeThreadLocalAllocationStack();
-    }
-    if (kRevokeRosAllocThreadLocalBuffersAtCheckpoint) {
+    if (revoke_ros_alloc_thread_local_buffers_at_checkpoint_) {
+      ATRACE_BEGIN("RevokeRosAllocThreadLocalBuffers");
       mark_sweep_->GetHeap()->RevokeRosAllocThreadLocalBuffers(thread);
+      ATRACE_END();
     }
     mark_sweep_->GetBarrier().Pass(self);
   }
 
  private:
-  MarkSweep* mark_sweep_;
+  MarkSweep* const mark_sweep_;
+  const bool revoke_ros_alloc_thread_local_buffers_at_checkpoint_;
 };
 
-void MarkSweep::MarkRootsCheckpoint(Thread* self) {
-  CheckpointMarkThreadRoots check_point(this);
+void MarkSweep::MarkRootsCheckpoint(Thread* self,
+                                    bool revoke_ros_alloc_thread_local_buffers_at_checkpoint) {
+  CheckpointMarkThreadRoots check_point(this, revoke_ros_alloc_thread_local_buffers_at_checkpoint);
   timings_.StartSplit("MarkRootsCheckpoint");
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
   // Request the check point is run on all threads returning a count of the threads that must
@@ -1089,10 +999,10 @@
   // TODO: optimize to not release locks when there are no threads to wait for.
   Locks::heap_bitmap_lock_->ExclusiveUnlock(self);
   Locks::mutator_lock_->SharedUnlock(self);
-  ThreadState old_state = self->SetState(kWaitingForCheckPointsToRun);
-  CHECK_EQ(old_state, kWaitingPerformingGc);
-  gc_barrier_->Increment(self, barrier_count);
-  self->SetState(kWaitingPerformingGc);
+  {
+    ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
+    gc_barrier_->Increment(self, barrier_count);
+  }
   Locks::mutator_lock_->SharedLock(self);
   Locks::heap_bitmap_lock_->ExclusiveLock(self);
   timings_.EndSplit();
@@ -1108,7 +1018,7 @@
   size_t freed_objects = 0;
   size_t freed_large_objects = 0;
   // How many objects are left in the array, modified after each space is swept.
-  Object** objects = const_cast<Object**>(allocations->Begin());
+  Object** objects = allocations->Begin();
   size_t count = allocations->Size();
   // Change the order to ensure that the non-moving space last swept as an optimization.
   std::vector<space::ContinuousSpace*> sweep_spaces;
@@ -1206,6 +1116,16 @@
 }
 
 void MarkSweep::Sweep(bool swap_bitmaps) {
+  // Ensure that nobody inserted items in the live stack after we swapped the stacks.
+  CHECK_GE(live_stack_freeze_size_, GetHeap()->GetLiveStack()->Size());
+  // Mark everything allocated since the last as GC live so that we can sweep concurrently,
+  // knowing that new allocations won't be marked as live.
+  timings_.StartSplit("MarkStackAsLive");
+  accounting::ObjectStack* live_stack = heap_->GetLiveStack();
+  heap_->MarkAllocStackAsLive(live_stack);
+  live_stack->Reset();
+  timings_.EndSplit();
+
   DCHECK(mark_stack_->IsEmpty());
   TimingLogger::ScopedSplit("Sweep", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
@@ -1326,7 +1246,7 @@
         }
         obj = mark_stack_->PopBack();
       }
-      DCHECK(obj != NULL);
+      DCHECK(obj != nullptr);
       ScanObject(obj);
     }
   }
@@ -1347,14 +1267,8 @@
 void MarkSweep::FinishPhase() {
   TimingLogger::ScopedSplit split("FinishPhase", &timings_);
   // Can't enqueue references if we hold the mutator lock.
-  Heap* heap = GetHeap();
   timings_.NewSplit("PostGcVerification");
-  heap->PostGcVerification(this);
-  // Update the cumulative statistics.
-  total_freed_objects_ += GetFreedObjects() + GetFreedLargeObjects();
-  total_freed_bytes_ += GetFreedBytes() + GetFreedLargeObjectBytes();
-  // Ensure that the mark stack is empty.
-  CHECK(mark_stack_->IsEmpty());
+  heap_->PostGcVerification(this);
   if (kCountScannedTypes) {
     VLOG(gc) << "MarkSweep scanned classes=" << class_count_ << " arrays=" << array_count_
              << " other=" << other_count_;
@@ -1375,22 +1289,10 @@
     VLOG(gc) << "Marked: null=" << mark_null_count_ << " immune=" <<  mark_immune_count_
         << " fastpath=" << mark_fastpath_count_ << " slowpath=" << mark_slowpath_count_;
   }
-  // Update the cumulative loggers.
-  cumulative_timings_.Start();
-  cumulative_timings_.AddLogger(timings_);
-  cumulative_timings_.End();
-  // Clear all of the spaces' mark bitmaps.
-  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    accounting::SpaceBitmap* bitmap = space->GetMarkBitmap();
-    if (bitmap != nullptr &&
-        space->GetGcRetentionPolicy() != space::kGcRetentionPolicyNeverCollect) {
-      bitmap->Clear();
-    }
-  }
+  CHECK(mark_stack_->IsEmpty());  // Ensure that the mark stack is empty.
   mark_stack_->Reset();
-  // Reset the marked large objects.
-  space::LargeObjectSpace* large_objects = GetHeap()->GetLargeObjectsSpace();
-  large_objects->GetMarkObjects()->Clear();
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  heap_->ClearMarkedObjects();
 }
 
 void MarkSweep::RevokeAllThreadLocalBuffers() {
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 84b775a..f1fd546 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -32,33 +32,22 @@
 namespace mirror {
   class Class;
   class Object;
-  template<class T> class ObjectArray;
   class Reference;
 }  // namespace mirror
 
-class StackVisitor;
 class Thread;
 enum VisitRootFlags : uint8_t;
 
 namespace gc {
 
+class Heap;
+
 namespace accounting {
-  template <typename T> class AtomicStack;
-  class MarkIfReachesAllocspaceVisitor;
-  class ModUnionClearCardVisitor;
-  class ModUnionVisitor;
-  class ModUnionTableBitmap;
-  class MarkStackChunk;
+  template<typename T> class AtomicStack;
   typedef AtomicStack<mirror::Object*> ObjectStack;
   class SpaceBitmap;
 }  // namespace accounting
 
-namespace space {
-  class ContinuousSpace;
-}  // namespace space
-
-class Heap;
-
 namespace collector {
 
 class MarkSweep : public GarbageCollector {
@@ -69,7 +58,7 @@
 
   virtual void InitializePhase() OVERRIDE;
   virtual void MarkingPhase() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void HandleDirtyObjectsPhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void PausePhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void ReclaimPhase() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void FinishPhase() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void MarkReachableObjects()
@@ -107,7 +96,7 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void MarkRootsCheckpoint(Thread* self)
+  void MarkRootsCheckpoint(Thread* self, bool revoke_ros_alloc_thread_local_buffers_at_checkpoint)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -137,8 +126,8 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Update and mark references from immune spaces. Virtual as overridden by StickyMarkSweep.
-  virtual void UpdateAndMarkModUnion()
+  // Update and mark references from immune spaces.
+  void UpdateAndMarkModUnion()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Pre clean cards to reduce how much work is needed in the pause.
@@ -170,8 +159,8 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
     EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  void SweepSystemWeaks()
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+  void SweepSystemWeaks(Thread* self)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
 
   static mirror::Object* VerifySystemWeakIsLiveCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
@@ -235,17 +224,6 @@
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
         EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  // Unmarks an object by clearing the bit inside of the corresponding bitmap, or if it is in a
-  // space set, removing the object from the set.
-  void UnMarkObjectNonNull(const mirror::Object* obj)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  // Mark the vm thread roots.
-  void MarkThreadRoots(Thread* self)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Marks an object atomically, safe to use from multiple threads.
   void MarkObjectNonNullParallel(mirror::Object* obj);
 
@@ -311,7 +289,7 @@
 
   accounting::ObjectStack* mark_stack_;
 
-  // Immune range, every object inside the immune range is assumed to be marked.
+  // Immune region, every object inside the immune range is assumed to be marked.
   ImmuneRegion immune_region_;
 
   // Parallel finger.
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index d4f47ef..222bd63 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -25,7 +25,7 @@
 #include "base/macros.h"
 #include "base/mutex-inl.h"
 #include "base/timing_logger.h"
-#include "gc/accounting/heap_bitmap.h"
+#include "gc/accounting/heap_bitmap-inl.h"
 #include "gc/accounting/mod_union_table.h"
 #include "gc/accounting/remembered_set.h"
 #include "gc/accounting/space_bitmap-inl.h"
@@ -726,8 +726,8 @@
     return obj;
   }
   if (from_space_->HasAddress(obj)) {
-    mirror::Object* forwarding_address = GetForwardingAddressInFromSpace(const_cast<Object*>(obj));
-    return forwarding_address;  // Returns either the forwarding address or nullptr.
+    // Returns either the forwarding address or nullptr.
+    return GetForwardingAddressInFromSpace(obj);
   } else if (to_space_->HasAddress(obj)) {
     // Should be unlikely.
     // Already forwarded, must be marked.
@@ -751,38 +751,12 @@
   Heap* heap = GetHeap();
   timings_.NewSplit("PostGcVerification");
   heap->PostGcVerification(this);
-
   // Null the "to" and "from" spaces since compacting from one to the other isn't valid until
   // further action is done by the heap.
   to_space_ = nullptr;
   from_space_ = nullptr;
-
-  // Update the cumulative statistics
-  total_freed_objects_ += GetFreedObjects() + GetFreedLargeObjects();
-  total_freed_bytes_ += GetFreedBytes() + GetFreedLargeObjectBytes();
-
-  // Ensure that the mark stack is empty.
   CHECK(mark_stack_->IsEmpty());
-
-  // Update the cumulative loggers.
-  cumulative_timings_.Start();
-  cumulative_timings_.AddLogger(timings_);
-  cumulative_timings_.End();
-
-  // Clear all of the spaces' mark bitmaps.
-  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    accounting::SpaceBitmap* bitmap = space->GetMarkBitmap();
-    if (bitmap != nullptr &&
-        space->GetGcRetentionPolicy() != space::kGcRetentionPolicyNeverCollect) {
-      bitmap->Clear();
-    }
-  }
   mark_stack_->Reset();
-
-  // Reset the marked large objects.
-  space::LargeObjectSpace* large_objects = GetHeap()->GetLargeObjectsSpace();
-  large_objects->GetMarkObjects()->Clear();
-
   if (generational_) {
     // Decide whether to do a whole heap collection or a bump pointer
     // only space collection at the next collection by updating
@@ -800,6 +774,9 @@
       whole_heap_collection_ = false;
     }
   }
+  // Clear all of the spaces' mark bitmaps.
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  heap_->ClearMarkedObjects();
 }
 
 void SemiSpace::RevokeAllThreadLocalBuffers() {
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 52b53aa..f067cb2 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -28,37 +28,28 @@
 
 namespace art {
 
+class Thread;
+
 namespace mirror {
   class Class;
   class Object;
-  template<class T> class ObjectArray;
 }  // namespace mirror
 
-class StackVisitor;
-class Thread;
-
 namespace gc {
 
+class Heap;
+
 namespace accounting {
   template <typename T> class AtomicStack;
-  class MarkIfReachesAllocspaceVisitor;
-  class ModUnionClearCardVisitor;
-  class ModUnionVisitor;
-  class ModUnionTableBitmap;
-  class MarkStackChunk;
   typedef AtomicStack<mirror::Object*> ObjectStack;
   class SpaceBitmap;
 }  // namespace accounting
 
 namespace space {
-  class BumpPointerSpace;
   class ContinuousMemMapAllocSpace;
   class ContinuousSpace;
-  class MallocSpace;
 }  // namespace space
 
-class Heap;
-
 namespace collector {
 
 class SemiSpace : public GarbageCollector {
@@ -189,12 +180,6 @@
   void ProcessMarkStack()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
-  void ProcessReferences(mirror::Object** soft_references, bool clear_soft_references,
-                         mirror::Object** weak_references,
-                         mirror::Object** finalizer_references,
-                         mirror::Object** phantom_references)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
-
   inline mirror::Object* GetForwardingAddressInFromSpace(mirror::Object* obj) const;
 
   // Revoke all the thread-local buffers.
diff --git a/runtime/gc/collector/sticky_mark_sweep.h b/runtime/gc/collector/sticky_mark_sweep.h
index 934b1bd..4f9dabf 100644
--- a/runtime/gc/collector/sticky_mark_sweep.h
+++ b/runtime/gc/collector/sticky_mark_sweep.h
@@ -46,10 +46,6 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  // Don't need to do anything special here since we scan all the cards which may have references
-  // to the newly allocated objects.
-  void UpdateAndMarkModUnion() OVERRIDE { }
-
  private:
   DISALLOW_COPY_AND_ASSIGN(StickyMarkSweep);
 };
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index 98c27fb..c0a6b6a 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -36,6 +36,8 @@
   kCollectorTypeGSS,
   // Heap trimming collector, doesn't do any actual collecting.
   kCollectorTypeHeapTrim,
+  // A (mostly) concurrent copying collector.
+  kCollectorTypeCC,
 };
 std::ostream& operator<<(std::ostream& os, const CollectorType& collector_type);
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index ff4b4ce..1a32a9a 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -35,6 +35,7 @@
 #include "gc/accounting/mod_union_table-inl.h"
 #include "gc/accounting/remembered_set.h"
 #include "gc/accounting/space_bitmap-inl.h"
+#include "gc/collector/concurrent_copying.h"
 #include "gc/collector/mark_sweep-inl.h"
 #include "gc/collector/partial_mark_sweep.h"
 #include "gc/collector/semi_space.h"
@@ -328,6 +329,9 @@
     bool generational = post_zygote_collector_type_ == kCollectorTypeGSS;
     semi_space_collector_ = new collector::SemiSpace(this, generational);
     garbage_collectors_.push_back(semi_space_collector_);
+
+    concurrent_copying_collector_ = new collector::ConcurrentCopying(this);
+    garbage_collectors_.push_back(concurrent_copying_collector_);
   }
 
   if (running_on_valgrind_) {
@@ -1153,13 +1157,29 @@
     ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated, usable_size);
   }
 
+  collector::GcType tried_type = next_gc_type_;
+  if (ptr == nullptr) {
+    const bool gc_ran =
+        CollectGarbageInternal(tried_type, kGcCauseForAlloc, false) != collector::kGcTypeNone;
+    if (was_default_allocator && allocator != GetCurrentAllocator()) {
+      *klass = sirt_klass.get();
+      return nullptr;
+    }
+    if (gc_ran) {
+      ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated, usable_size);
+    }
+  }
+
   // Loop through our different Gc types and try to Gc until we get enough free memory.
   for (collector::GcType gc_type : gc_plan_) {
     if (ptr != nullptr) {
       break;
     }
+    if (gc_type == tried_type) {
+      continue;
+    }
     // Attempt to run the collector, if we succeed, re-try the allocation.
-    bool gc_ran =
+    const bool gc_ran =
         CollectGarbageInternal(gc_type, kGcCauseForAlloc, false) != collector::kGcTypeNone;
     if (was_default_allocator && allocator != GetCurrentAllocator()) {
       *klass = sirt_klass.get();
@@ -1430,7 +1450,8 @@
       break;
     }
     default: {
-      LOG(FATAL) << "Attempted to transition to invalid collector type";
+      LOG(FATAL) << "Attempted to transition to invalid collector type "
+                 << static_cast<size_t>(collector_type);
       break;
     }
   }
@@ -1460,6 +1481,7 @@
     collector_type_ = collector_type;
     gc_plan_.clear();
     switch (collector_type_) {
+      case kCollectorTypeCC:  // Fall-through.
       case kCollectorTypeSS:  // Fall-through.
       case kCollectorTypeGSS: {
         gc_plan_.push_back(collector::kGcTypeFull);
@@ -1812,12 +1834,19 @@
   if (compacting_gc) {
     DCHECK(current_allocator_ == kAllocatorTypeBumpPointer ||
            current_allocator_ == kAllocatorTypeTLAB);
-    gc_type = semi_space_collector_->GetGcType();
-    CHECK(temp_space_->IsEmpty());
-    semi_space_collector_->SetFromSpace(bump_pointer_space_);
-    semi_space_collector_->SetToSpace(temp_space_);
+    if (collector_type_ == kCollectorTypeSS || collector_type_ == kCollectorTypeGSS) {
+      gc_type = semi_space_collector_->GetGcType();
+      semi_space_collector_->SetFromSpace(bump_pointer_space_);
+      semi_space_collector_->SetToSpace(temp_space_);
+      collector = semi_space_collector_;
+    } else if (collector_type_ == kCollectorTypeCC) {
+      gc_type = concurrent_copying_collector_->GetGcType();
+      collector = concurrent_copying_collector_;
+    } else {
+      LOG(FATAL) << "Unreachable - invalid collector type " << static_cast<size_t>(collector_type_);
+    }
     temp_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
-    collector = semi_space_collector_;
+    CHECK(temp_space_->IsEmpty());
     gc_type = collector::kGcTypeFull;
   } else if (current_allocator_ == kAllocatorTypeRosAlloc ||
       current_allocator_ == kAllocatorTypeDlMalloc) {
@@ -2821,5 +2850,19 @@
   CHECK(remembered_sets_.find(space) == remembered_sets_.end());
 }
 
+void Heap::ClearMarkedObjects() {
+  // Clear all of the spaces' mark bitmaps.
+  for (const auto& space : GetContinuousSpaces()) {
+    accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
+    if (space->GetLiveBitmap() != mark_bitmap) {
+      mark_bitmap->Clear();
+    }
+  }
+  // Clear the marked objects in the discontinous space object sets.
+  for (const auto& space : GetDiscontinuousSpaces()) {
+    space->GetMarkObjects()->Clear();
+  }
+}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index a522750..5879757 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -60,6 +60,7 @@
 }  // namespace accounting
 
 namespace collector {
+  class ConcurrentCopying;
   class GarbageCollector;
   class MarkSweep;
   class SemiSpace;
@@ -254,6 +255,9 @@
   void IncrementDisableMovingGC(Thread* self);
   void DecrementDisableMovingGC(Thread* self);
 
+  // Clear all of the mark bits, doesn't clear bitmaps which have the same live bits as mark bits.
+  void ClearMarkedObjects() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   // Initiates an explicit garbage collection.
   void CollectGarbage(bool clear_soft_references);
 
@@ -575,7 +579,8 @@
     return AllocatorHasAllocationStack(allocator_type);
   }
   static bool IsCompactingGC(CollectorType collector_type) {
-    return collector_type == kCollectorTypeSS || collector_type == kCollectorTypeGSS;
+    return collector_type == kCollectorTypeSS || collector_type == kCollectorTypeGSS ||
+        collector_type == kCollectorTypeCC;
   }
   bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -691,7 +696,7 @@
   // What kind of concurrency behavior is the runtime after? Currently true for concurrent mark
   // sweep GC, false for other GC types.
   bool IsGcConcurrent() const ALWAYS_INLINE {
-    return collector_type_ == kCollectorTypeCMS;
+    return collector_type_ == kCollectorTypeCMS || collector_type_ == kCollectorTypeCC;
   }
 
   // All-known continuous spaces, where objects lie within fixed bounds.
@@ -935,6 +940,7 @@
 
   std::vector<collector::GarbageCollector*> garbage_collectors_;
   collector::SemiSpace* semi_space_collector_;
+  collector::ConcurrentCopying* concurrent_copying_collector_;
 
   const bool running_on_valgrind_;
   const bool use_tlab_;
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index 3c65205..012267b 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -231,7 +231,7 @@
   if (!rosalloc_->DoesReleaseAllPages()) {
     VLOG(heap) << "RosAllocSpace::Trim() ";
     size_t reclaimed = 0;
-    InspectAllRosAlloc(DlmallocMadviseCallback, &reclaimed);
+    InspectAllRosAlloc(DlmallocMadviseCallback, &reclaimed, false);
     return reclaimed;
   }
   return 0;
@@ -239,8 +239,7 @@
 
 void RosAllocSpace::Walk(void(*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
                          void* arg) {
-  InspectAllRosAlloc(callback, arg);
-  callback(NULL, NULL, 0, arg);  // Indicate end of a space.
+  InspectAllRosAlloc(callback, arg, true);
 }
 
 size_t RosAllocSpace::GetFootprint() {
@@ -268,35 +267,56 @@
 
 uint64_t RosAllocSpace::GetBytesAllocated() {
   size_t bytes_allocated = 0;
-  InspectAllRosAlloc(art::gc::allocator::RosAlloc::BytesAllocatedCallback, &bytes_allocated);
+  InspectAllRosAlloc(art::gc::allocator::RosAlloc::BytesAllocatedCallback, &bytes_allocated, false);
   return bytes_allocated;
 }
 
 uint64_t RosAllocSpace::GetObjectsAllocated() {
   size_t objects_allocated = 0;
-  InspectAllRosAlloc(art::gc::allocator::RosAlloc::ObjectsAllocatedCallback, &objects_allocated);
+  InspectAllRosAlloc(art::gc::allocator::RosAlloc::ObjectsAllocatedCallback, &objects_allocated, false);
   return objects_allocated;
 }
 
+void RosAllocSpace::InspectAllRosAllocWithSuspendAll(
+    void (*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
+    void* arg, bool do_null_callback_at_end) NO_THREAD_SAFETY_ANALYSIS {
+  // TODO: NO_THREAD_SAFETY_ANALYSIS.
+  Thread* self = Thread::Current();
+  ThreadList* tl = Runtime::Current()->GetThreadList();
+  tl->SuspendAll();
+  {
+    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+    MutexLock mu2(self, *Locks::thread_list_lock_);
+    rosalloc_->InspectAll(callback, arg);
+    if (do_null_callback_at_end) {
+      callback(NULL, NULL, 0, arg);  // Indicate end of a space.
+    }
+  }
+  tl->ResumeAll();
+}
+
 void RosAllocSpace::InspectAllRosAlloc(void (*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
-                                       void* arg) NO_THREAD_SAFETY_ANALYSIS {
+                                       void* arg, bool do_null_callback_at_end) NO_THREAD_SAFETY_ANALYSIS {
   // TODO: NO_THREAD_SAFETY_ANALYSIS.
   Thread* self = Thread::Current();
   if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
     // The mutators are already suspended. For example, a call path
     // from SignalCatcher::HandleSigQuit().
     rosalloc_->InspectAll(callback, arg);
-  } else {
-    // The mutators are not suspended yet.
-    DCHECK(!Locks::mutator_lock_->IsSharedHeld(self));
-    ThreadList* tl = Runtime::Current()->GetThreadList();
-    tl->SuspendAll();
-    {
-      MutexLock mu(self, *Locks::runtime_shutdown_lock_);
-      MutexLock mu2(self, *Locks::thread_list_lock_);
-      rosalloc_->InspectAll(callback, arg);
+    if (do_null_callback_at_end) {
+      callback(NULL, NULL, 0, arg);  // Indicate end of a space.
     }
-    tl->ResumeAll();
+  } else if (Locks::mutator_lock_->IsSharedHeld(self)) {
+    // The mutators are not suspended yet and we have a shared access
+    // to the mutator lock. Temporarily release the shared access by
+    // transitioning to the suspend state, and suspend the mutators.
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    InspectAllRosAllocWithSuspendAll(callback, arg, do_null_callback_at_end);
+    self->TransitionFromSuspendedToRunnable();
+    Locks::mutator_lock_->AssertSharedHeld(self);
+  } else {
+    // The mutators are not suspended yet. Suspend the mutators.
+    InspectAllRosAllocWithSuspendAll(callback, arg, do_null_callback_at_end);
   }
 }
 
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
index 949ec08..900e7a9 100644
--- a/runtime/gc/space/rosalloc_space.h
+++ b/runtime/gc/space/rosalloc_space.h
@@ -124,7 +124,11 @@
                                              size_t maximum_size, bool low_memory_mode);
 
   void InspectAllRosAlloc(void (*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
-                          void* arg)
+                          void* arg, bool do_null_callback_at_end)
+      LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_);
+  void InspectAllRosAllocWithSuspendAll(
+      void (*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
+      void* arg, bool do_null_callback_at_end)
       LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_);
 
   // Underlying rosalloc.
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index b7e8ac2..2445b53 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -34,7 +34,8 @@
  * check.  We can also safely assume the constructor isn't associated
  * with an interface, array, or primitive class.
  */
-static jobject Constructor_newInstance(JNIEnv* env, jobject javaMethod, jobjectArray javaArgs) {
+static jobject Constructor_newInstance(JNIEnv* env, jobject javaMethod, jobjectArray javaArgs,
+                                       jboolean accessible) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::ArtMethod* m = mirror::ArtMethod::FromReflectedMethod(soa, javaMethod);
   SirtRef<mirror::Class> c(soa.Self(), m->GetDeclaringClass());
@@ -67,14 +68,14 @@
   }
 
   jobject javaReceiver = soa.AddLocalReference<jobject>(receiver);
-  InvokeMethod(soa, javaMethod, javaReceiver, javaArgs);
+  InvokeMethod(soa, javaMethod, javaReceiver, javaArgs, (accessible == JNI_TRUE));
 
   // Constructors are ()V methods, so we shouldn't touch the result of InvokeMethod.
   return javaReceiver;
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Constructor, newInstance, "!([Ljava/lang/Object;)Ljava/lang/Object;"),
+  NATIVE_METHOD(Constructor, newInstance, "!([Ljava/lang/Object;Z)Ljava/lang/Object;"),
 };
 
 void register_java_lang_reflect_Constructor(JNIEnv* env) {
diff --git a/runtime/native/java_lang_reflect_Field.cc b/runtime/native/java_lang_reflect_Field.cc
index 48b58bf..ce622d9 100644
--- a/runtime/native/java_lang_reflect_Field.cc
+++ b/runtime/native/java_lang_reflect_Field.cc
@@ -20,6 +20,7 @@
 #include "dex_file-inl.h"
 #include "jni_internal.h"
 #include "mirror/art_field-inl.h"
+#include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "object_utils.h"
 #include "reflection.h"
@@ -27,6 +28,21 @@
 
 namespace art {
 
+static bool VerifyFieldAccess(mirror::ArtField* field, mirror::Object* obj, bool is_set)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  if (field->IsFinal() && is_set) {
+    ThrowIllegalAccessException(nullptr, StringPrintf("Cannot set final field: %s",
+                                                      PrettyField(field).c_str()).c_str());
+    return false;
+  }
+  if (!VerifyAccess(obj, field->GetDeclaringClass(), field->GetAccessFlags())) {
+    ThrowIllegalAccessException(nullptr, StringPrintf("Cannot access field: %s",
+                                                      PrettyField(field).c_str()).c_str());
+    return false;
+  }
+  return true;
+}
+
 static bool GetFieldValue(const ScopedFastNativeObjectAccess& soa, mirror::Object* o,
                           mirror::ArtField* f, Primitive::Type field_type, bool allow_references,
                           JValue* value)
@@ -98,7 +114,7 @@
   return true;
 }
 
-static jobject Field_get(JNIEnv* env, jobject javaField, jobject javaObj) {
+static jobject Field_get(JNIEnv* env, jobject javaField, jobject javaObj, jboolean accessible) {
   ScopedFastNativeObjectAccess soa(env);
   CHECK(!kMovingFields) << "CheckReceiver may trigger thread suspension for initialization";
   mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
@@ -107,6 +123,11 @@
     DCHECK(soa.Self()->IsExceptionPending());
     return nullptr;
   }
+  // If field is not set to be accessible, verify it can be accessed by the caller.
+  if ((accessible == JNI_FALSE) && !VerifyFieldAccess(f, o, false)) {
+    DCHECK(soa.Self()->IsExceptionPending());
+    return nullptr;
+  }
   // We now don't expect suspension unless an exception is thrown.
   // Get the field's value, boxing if necessary.
   Primitive::Type field_type = FieldHelper(f).GetTypeAsPrimitiveType();
@@ -119,7 +140,7 @@
 }
 
 static JValue GetPrimitiveField(JNIEnv* env, jobject javaField, jobject javaObj,
-                                char dst_descriptor) {
+                                char dst_descriptor, jboolean accessible) {
   ScopedFastNativeObjectAccess soa(env);
   CHECK(!kMovingFields) << "CheckReceiver may trigger thread suspension for initialization";
   mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
@@ -128,6 +149,13 @@
     DCHECK(soa.Self()->IsExceptionPending());
     return JValue();
   }
+
+  // If field is not set to be accessible, verify it can be accessed by the caller.
+  if ((accessible == JNI_FALSE) && !VerifyFieldAccess(f, o, false)) {
+    DCHECK(soa.Self()->IsExceptionPending());
+    return JValue();
+  }
+
   // We now don't expect suspension unless an exception is thrown.
   // Read the value.
   Primitive::Type field_type = FieldHelper(f).GetTypeAsPrimitiveType();
@@ -147,36 +175,38 @@
   return wide_value;
 }
 
-static jboolean Field_getBoolean(JNIEnv* env, jobject javaField, jobject javaObj) {
-  return GetPrimitiveField(env, javaField, javaObj, 'Z').GetZ();
+static jboolean Field_getBoolean(JNIEnv* env, jobject javaField, jobject javaObj,
+                                 jboolean accessible) {
+  return GetPrimitiveField(env, javaField, javaObj, 'Z', accessible).GetZ();
 }
 
-static jbyte Field_getByte(JNIEnv* env, jobject javaField, jobject javaObj) {
-  return GetPrimitiveField(env, javaField, javaObj, 'B').GetB();
+static jbyte Field_getByte(JNIEnv* env, jobject javaField, jobject javaObj, jboolean accessible) {
+  return GetPrimitiveField(env, javaField, javaObj, 'B', accessible).GetB();
 }
 
-static jchar Field_getChar(JNIEnv* env, jobject javaField, jobject javaObj) {
-  return GetPrimitiveField(env, javaField, javaObj, 'C').GetC();
+static jchar Field_getChar(JNIEnv* env, jobject javaField, jobject javaObj, jboolean accessible) {
+  return GetPrimitiveField(env, javaField, javaObj, 'C', accessible).GetC();
 }
 
-static jdouble Field_getDouble(JNIEnv* env, jobject javaField, jobject javaObj) {
-  return GetPrimitiveField(env, javaField, javaObj, 'D').GetD();
+static jdouble Field_getDouble(JNIEnv* env, jobject javaField, jobject javaObj,
+                               jboolean accessible) {
+  return GetPrimitiveField(env, javaField, javaObj, 'D', accessible).GetD();
 }
 
-static jfloat Field_getFloat(JNIEnv* env, jobject javaField, jobject javaObj) {
-  return GetPrimitiveField(env, javaField, javaObj, 'F').GetF();
+static jfloat Field_getFloat(JNIEnv* env, jobject javaField, jobject javaObj, jboolean accessible) {
+  return GetPrimitiveField(env, javaField, javaObj, 'F', accessible).GetF();
 }
 
-static jint Field_getInt(JNIEnv* env, jobject javaField, jobject javaObj) {
-  return GetPrimitiveField(env, javaField, javaObj, 'I').GetI();
+static jint Field_getInt(JNIEnv* env, jobject javaField, jobject javaObj, jboolean accessible) {
+  return GetPrimitiveField(env, javaField, javaObj, 'I', accessible).GetI();
 }
 
-static jlong Field_getLong(JNIEnv* env, jobject javaField, jobject javaObj) {
-  return GetPrimitiveField(env, javaField, javaObj, 'J').GetJ();
+static jlong Field_getLong(JNIEnv* env, jobject javaField, jobject javaObj, jboolean accessible) {
+  return GetPrimitiveField(env, javaField, javaObj, 'J', accessible).GetJ();
 }
 
-static jshort Field_getShort(JNIEnv* env, jobject javaField, jobject javaObj) {
-  return GetPrimitiveField(env, javaField, javaObj, 'S').GetS();
+static jshort Field_getShort(JNIEnv* env, jobject javaField, jobject javaObj, jboolean accessible) {
+  return GetPrimitiveField(env, javaField, javaObj, 'S', accessible).GetS();
 }
 
 static void SetFieldValue(ScopedFastNativeObjectAccess& soa, mirror::Object* o,
@@ -223,7 +253,8 @@
   }
 }
 
-static void Field_set(JNIEnv* env, jobject javaField, jobject javaObj, jobject javaValue) {
+static void Field_set(JNIEnv* env, jobject javaField, jobject javaObj, jobject javaValue,
+                      jboolean accessible) {
   ScopedFastNativeObjectAccess soa(env);
   CHECK(!kMovingFields) << "CheckReceiver may trigger thread suspension for initialization";
   mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
@@ -260,11 +291,16 @@
     DCHECK(soa.Self()->IsExceptionPending());
     return;
   }
+  // If field is not set to be accessible, verify it can be accessed by the caller.
+  if ((accessible == JNI_FALSE) && !VerifyFieldAccess(f, o, true)) {
+    DCHECK(soa.Self()->IsExceptionPending());
+    return;
+  }
   SetFieldValue(soa, o, f, field_prim_type, true, unboxed_value);
 }
 
 static void SetPrimitiveField(JNIEnv* env, jobject javaField, jobject javaObj, char src_descriptor,
-                              const JValue& new_value) {
+                              const JValue& new_value, jboolean accessible) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
   mirror::Object* o = nullptr;
@@ -286,77 +322,91 @@
     return;
   }
 
+  // If field is not set to be accessible, verify it can be accessed by the caller.
+  if ((accessible == JNI_FALSE) && !VerifyFieldAccess(f, o, true)) {
+    DCHECK(soa.Self()->IsExceptionPending());
+    return;
+  }
+
   // Write the value.
   SetFieldValue(soa, o, f, field_type, false, wide_value);
 }
 
-static void Field_setBoolean(JNIEnv* env, jobject javaField, jobject javaObj, jboolean z) {
+static void Field_setBoolean(JNIEnv* env, jobject javaField, jobject javaObj, jboolean z,
+                             jboolean accessible) {
   JValue value;
   value.SetZ(z);
-  SetPrimitiveField(env, javaField, javaObj, 'Z', value);
+  SetPrimitiveField(env, javaField, javaObj, 'Z', value, accessible);
 }
 
-static void Field_setByte(JNIEnv* env, jobject javaField, jobject javaObj, jbyte b) {
+static void Field_setByte(JNIEnv* env, jobject javaField, jobject javaObj, jbyte b,
+                          jboolean accessible) {
   JValue value;
   value.SetB(b);
-  SetPrimitiveField(env, javaField, javaObj, 'B', value);
+  SetPrimitiveField(env, javaField, javaObj, 'B', value, accessible);
 }
 
-static void Field_setChar(JNIEnv* env, jobject javaField, jobject javaObj, jchar c) {
+static void Field_setChar(JNIEnv* env, jobject javaField, jobject javaObj, jchar c,
+                          jboolean accessible) {
   JValue value;
   value.SetC(c);
-  SetPrimitiveField(env, javaField, javaObj, 'C', value);
+  SetPrimitiveField(env, javaField, javaObj, 'C', value, accessible);
 }
 
-static void Field_setDouble(JNIEnv* env, jobject javaField, jobject javaObj, jdouble d) {
+static void Field_setDouble(JNIEnv* env, jobject javaField, jobject javaObj, jdouble d,
+                            jboolean accessible) {
   JValue value;
   value.SetD(d);
-  SetPrimitiveField(env, javaField, javaObj, 'D', value);
+  SetPrimitiveField(env, javaField, javaObj, 'D', value, accessible);
 }
 
-static void Field_setFloat(JNIEnv* env, jobject javaField, jobject javaObj, jfloat f) {
+static void Field_setFloat(JNIEnv* env, jobject javaField, jobject javaObj, jfloat f,
+                           jboolean accessible) {
   JValue value;
   value.SetF(f);
-  SetPrimitiveField(env, javaField, javaObj, 'F', value);
+  SetPrimitiveField(env, javaField, javaObj, 'F', value, accessible);
 }
 
-static void Field_setInt(JNIEnv* env, jobject javaField, jobject javaObj, jint i) {
+static void Field_setInt(JNIEnv* env, jobject javaField, jobject javaObj, jint i,
+                         jboolean accessible) {
   JValue value;
   value.SetI(i);
-  SetPrimitiveField(env, javaField, javaObj, 'I', value);
+  SetPrimitiveField(env, javaField, javaObj, 'I', value, accessible);
 }
 
-static void Field_setLong(JNIEnv* env, jobject javaField, jobject javaObj, jlong j) {
+static void Field_setLong(JNIEnv* env, jobject javaField, jobject javaObj, jlong j,
+                          jboolean accessible) {
   JValue value;
   value.SetJ(j);
-  SetPrimitiveField(env, javaField, javaObj, 'J', value);
+  SetPrimitiveField(env, javaField, javaObj, 'J', value, accessible);
 }
 
-static void Field_setShort(JNIEnv* env, jobject javaField, jobject javaObj, jshort s) {
+static void Field_setShort(JNIEnv* env, jobject javaField, jobject javaObj, jshort s,
+                           jboolean accessible) {
   JValue value;
   value.SetS(s);
-  SetPrimitiveField(env, javaField, javaObj, 'S', value);
+  SetPrimitiveField(env, javaField, javaObj, 'S', value, accessible);
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Field, get,        "!(Ljava/lang/Object;)Ljava/lang/Object;"),
-  NATIVE_METHOD(Field, getBoolean, "!(Ljava/lang/Object;)Z"),
-  NATIVE_METHOD(Field, getByte,    "!(Ljava/lang/Object;)B"),
-  NATIVE_METHOD(Field, getChar,    "!(Ljava/lang/Object;)C"),
-  NATIVE_METHOD(Field, getDouble,  "!(Ljava/lang/Object;)D"),
-  NATIVE_METHOD(Field, getFloat,   "!(Ljava/lang/Object;)F"),
-  NATIVE_METHOD(Field, getInt,     "!(Ljava/lang/Object;)I"),
-  NATIVE_METHOD(Field, getLong,    "!(Ljava/lang/Object;)J"),
-  NATIVE_METHOD(Field, getShort,   "!(Ljava/lang/Object;)S"),
-  NATIVE_METHOD(Field, set,        "!(Ljava/lang/Object;Ljava/lang/Object;)V"),
-  NATIVE_METHOD(Field, setBoolean, "!(Ljava/lang/Object;Z)V"),
-  NATIVE_METHOD(Field, setByte,    "!(Ljava/lang/Object;B)V"),
-  NATIVE_METHOD(Field, setChar,    "!(Ljava/lang/Object;C)V"),
-  NATIVE_METHOD(Field, setDouble,  "!(Ljava/lang/Object;D)V"),
-  NATIVE_METHOD(Field, setFloat,   "!(Ljava/lang/Object;F)V"),
-  NATIVE_METHOD(Field, setInt,     "!(Ljava/lang/Object;I)V"),
-  NATIVE_METHOD(Field, setLong,    "!(Ljava/lang/Object;J)V"),
-  NATIVE_METHOD(Field, setShort,   "!(Ljava/lang/Object;S)V"),
+  NATIVE_METHOD(Field, get,        "!(Ljava/lang/Object;Z)Ljava/lang/Object;"),
+  NATIVE_METHOD(Field, getBoolean, "!(Ljava/lang/Object;Z)Z"),
+  NATIVE_METHOD(Field, getByte,    "!(Ljava/lang/Object;Z)B"),
+  NATIVE_METHOD(Field, getChar,    "!(Ljava/lang/Object;Z)C"),
+  NATIVE_METHOD(Field, getDouble,  "!(Ljava/lang/Object;Z)D"),
+  NATIVE_METHOD(Field, getFloat,   "!(Ljava/lang/Object;Z)F"),
+  NATIVE_METHOD(Field, getInt,     "!(Ljava/lang/Object;Z)I"),
+  NATIVE_METHOD(Field, getLong,    "!(Ljava/lang/Object;Z)J"),
+  NATIVE_METHOD(Field, getShort,   "!(Ljava/lang/Object;Z)S"),
+  NATIVE_METHOD(Field, set,        "!(Ljava/lang/Object;Ljava/lang/Object;Z)V"),
+  NATIVE_METHOD(Field, setBoolean, "!(Ljava/lang/Object;ZZ)V"),
+  NATIVE_METHOD(Field, setByte,    "!(Ljava/lang/Object;BZ)V"),
+  NATIVE_METHOD(Field, setChar,    "!(Ljava/lang/Object;CZ)V"),
+  NATIVE_METHOD(Field, setDouble,  "!(Ljava/lang/Object;DZ)V"),
+  NATIVE_METHOD(Field, setFloat,   "!(Ljava/lang/Object;FZ)V"),
+  NATIVE_METHOD(Field, setInt,     "!(Ljava/lang/Object;IZ)V"),
+  NATIVE_METHOD(Field, setLong,    "!(Ljava/lang/Object;JZ)V"),
+  NATIVE_METHOD(Field, setShort,   "!(Ljava/lang/Object;SZ)V"),
 };
 
 void register_java_lang_reflect_Field(JNIEnv* env) {
diff --git a/runtime/native/java_lang_reflect_Method.cc b/runtime/native/java_lang_reflect_Method.cc
index abb73b6..22e81e4 100644
--- a/runtime/native/java_lang_reflect_Method.cc
+++ b/runtime/native/java_lang_reflect_Method.cc
@@ -29,10 +29,10 @@
 
 namespace art {
 
-static jobject Method_invoke(JNIEnv* env,
-                             jobject javaMethod, jobject javaReceiver, jobject javaArgs) {
+static jobject Method_invoke(JNIEnv* env, jobject javaMethod, jobject javaReceiver,
+                             jobject javaArgs, jboolean accessible) {
   ScopedFastNativeObjectAccess soa(env);
-  return InvokeMethod(soa, javaMethod, javaReceiver, javaArgs);
+  return InvokeMethod(soa, javaMethod, javaReceiver, javaArgs, (accessible == JNI_TRUE));
 }
 
 static jobject Method_getExceptionTypesNative(JNIEnv* env, jobject javaMethod) {
@@ -56,7 +56,7 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(Method, invoke, "!(Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/Object;"),
+  NATIVE_METHOD(Method, invoke, "!(Ljava/lang/Object;[Ljava/lang/Object;Z)Ljava/lang/Object;"),
   NATIVE_METHOD(Method, getExceptionTypesNative, "!()[Ljava/lang/Class;"),
 };
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 9b1c013..e2086f1 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -107,6 +107,8 @@
     return gc::kCollectorTypeSS;
   } else if (option == "GSS") {
     return gc::kCollectorTypeGSS;
+  } else if (option == "CC") {
+    return gc::kCollectorTypeCC;
   } else {
     return gc::kCollectorTypeNone;
   }
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 7f39e70..6ed61f6 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -26,6 +26,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object_array.h"
 #include "mirror/object_array-inl.h"
+#include "nth_caller_visitor.h"
 #include "object_utils.h"
 #include "scoped_thread_state_change.h"
 #include "stack.h"
@@ -461,7 +462,7 @@
 }
 
 jobject InvokeMethod(const ScopedObjectAccess& soa, jobject javaMethod,
-                     jobject javaReceiver, jobject javaArgs) {
+                     jobject javaReceiver, jobject javaArgs, bool accessible) {
   mirror::ArtMethod* m = mirror::ArtMethod::FromReflectedMethod(soa, javaMethod);
 
   mirror::Class* declaring_class = m->GetDeclaringClass();
@@ -499,6 +500,13 @@
     return NULL;
   }
 
+  // If method is not set to be accessible, verify it can be accessed by the caller.
+  if (!accessible && !VerifyAccess(receiver, declaring_class, m->GetAccessFlags())) {
+    ThrowIllegalAccessException(nullptr, StringPrintf("Cannot access method: %s",
+                                                      PrettyMethod(m).c_str()).c_str());
+    return nullptr;
+  }
+
   // Invoke the method.
   JValue result;
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
@@ -786,4 +794,30 @@
   return UnboxPrimitive(&throw_location, o, dst_class, nullptr, unboxed_value);
 }
 
+bool VerifyAccess(mirror::Object* obj, mirror::Class* declaring_class, uint32_t access_flags) {
+  NthCallerVisitor visitor(Thread::Current(), 2);
+  visitor.WalkStack();
+  mirror::Class* caller_class = visitor.caller->GetDeclaringClass();
+
+  if ((((access_flags & kAccPublic) != 0) && declaring_class->IsPublic()) ||
+      caller_class == declaring_class) {
+    return true;
+  }
+  if ((access_flags & kAccPrivate) != 0) {
+    return false;
+  }
+  if ((access_flags & kAccProtected) != 0) {
+    if (obj != nullptr && !obj->InstanceOf(caller_class) &&
+        !declaring_class->IsInSamePackage(caller_class)) {
+      return false;
+    } else if (declaring_class->IsAssignableFrom(caller_class)) {
+      return true;
+    }
+  }
+  if (!declaring_class->IsInSamePackage(caller_class)) {
+    return false;
+  }
+  return true;
+}
+
 }  // namespace art
diff --git a/runtime/reflection.h b/runtime/reflection.h
index 325998f..d9a7228 100644
--- a/runtime/reflection.h
+++ b/runtime/reflection.h
@@ -68,12 +68,15 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 jobject InvokeMethod(const ScopedObjectAccess& soa, jobject method, jobject receiver,
-                     jobject args)
+                     jobject args, bool accessible)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 bool VerifyObjectIsClass(mirror::Object* o, mirror::Class* c)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+bool VerifyAccess(mirror::Object* obj, mirror::Class* declaring_class, uint32_t access_flags)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_REFLECTION_H_
diff --git a/runtime/stack.cc b/runtime/stack.cc
index c33d1ab..ab3bd85 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -16,6 +16,7 @@
 
 #include "stack.h"
 
+#include "base/hex_dump.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object.h"
@@ -23,6 +24,7 @@
 #include "mirror/object_array-inl.h"
 #include "object_utils.h"
 #include "runtime.h"
+#include "thread.h"
 #include "thread_list.h"
 #include "throw_location.h"
 #include "verify_object-inl.h"
@@ -30,6 +32,14 @@
 
 namespace art {
 
+// Define a piece of memory, the address of which can be used as a marker
+// for the gap in the stack added during stack overflow handling.
+static uint32_t stack_overflow_object;
+
+// The stack overflow gap marker is simply a valid unique address.
+void* stack_overflow_gap_marker = &stack_overflow_object;
+
+
 mirror::Object* ShadowFrame::GetThisObject() const {
   mirror::ArtMethod* m = GetMethod();
   if (m->IsStatic()) {
@@ -294,20 +304,56 @@
   CHECK_EQ(cur_depth_, 0U);
   bool exit_stubs_installed = Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled();
   uint32_t instrumentation_stack_depth = 0;
+
+  bool kDebugStackWalk = false;
+  bool kDebugStackWalkVeryVerbose = false;            // The name says it all.
+
+  if (kDebugStackWalk) {
+    LOG(INFO) << "walking stack";
+  }
   for (const ManagedStack* current_fragment = thread_->GetManagedStack(); current_fragment != NULL;
        current_fragment = current_fragment->GetLink()) {
     cur_shadow_frame_ = current_fragment->GetTopShadowFrame();
     cur_quick_frame_ = current_fragment->GetTopQuickFrame();
     cur_quick_frame_pc_ = current_fragment->GetTopQuickFramePc();
+    if (kDebugStackWalkVeryVerbose) {
+      LOG(INFO) << "cur_quick_frame: " << cur_quick_frame_;
+      LOG(INFO) << "cur_quick_frame_pc: " << std::hex << cur_quick_frame_pc_;
+    }
+
     if (cur_quick_frame_ != NULL) {  // Handle quick stack frames.
       // Can't be both a shadow and a quick fragment.
       DCHECK(current_fragment->GetTopShadowFrame() == NULL);
       mirror::ArtMethod* method = *cur_quick_frame_;
       while (method != NULL) {
-        SanityCheckFrame();
-        bool should_continue = VisitFrame();
-        if (UNLIKELY(!should_continue)) {
-          return;
+        // Check for a stack overflow gap marker.
+        if (method == reinterpret_cast<mirror::ArtMethod*>(stack_overflow_gap_marker)) {
+          // Marker for a stack overflow.  This is followed by the offset from the
+          // current SP to the next frame.  There is a gap in the stack here.  Jump
+          // the gap silently.
+          // Caveat coder: the layout of the overflow marker depends on the architecture.
+          //   The first element is address sized (8 bytes on a 64 bit machine).  The second
+          //   element is 32 bits.  So be careful with those address calculations.
+
+          // Get the address of the offset, just beyond the marker pointer.
+          byte* gapsizeaddr = reinterpret_cast<byte*>(cur_quick_frame_) + sizeof(uintptr_t);
+          uint32_t gap = *reinterpret_cast<uint32_t*>(gapsizeaddr);
+          CHECK_GT(gap, Thread::kStackOverflowProtectedSize);
+          mirror::ArtMethod** next_frame = reinterpret_cast<mirror::ArtMethod**>(
+            reinterpret_cast<byte*>(gapsizeaddr) + gap);
+          if (kDebugStackWalk) {
+            LOG(INFO) << "stack overflow marker hit, gap: " << gap << ", next_frame: " <<
+                next_frame;
+          }
+          cur_quick_frame_ = next_frame;
+          method = *next_frame;
+          CHECK(method != nullptr);
+        } else {
+          SanityCheckFrame();
+          bool should_continue = VisitFrame();
+          if (UNLIKELY(!should_continue)) {
+            return;
+          }
         }
         if (context_ != NULL) {
           context_->FillCalleeSaves(*this);
@@ -317,6 +363,9 @@
         size_t return_pc_offset = method->GetReturnPcOffsetInBytes();
         byte* return_pc_addr = reinterpret_cast<byte*>(cur_quick_frame_) + return_pc_offset;
         uintptr_t return_pc = *reinterpret_cast<uintptr_t*>(return_pc_addr);
+        if (kDebugStackWalkVeryVerbose) {
+          LOG(INFO) << "frame size: " << frame_size << ", return_pc: " << std::hex << return_pc;
+        }
         if (UNLIKELY(exit_stubs_installed)) {
           // While profiling, the return pc is restored from the side stack, except when walking
           // the stack for an exception where the side stack will be unwound in VisitFrame.
@@ -349,6 +398,10 @@
         cur_quick_frame_ = reinterpret_cast<mirror::ArtMethod**>(next_frame);
         cur_depth_++;
         method = *cur_quick_frame_;
+        if (kDebugStackWalkVeryVerbose) {
+          LOG(INFO) << "new cur_quick_frame_: " << cur_quick_frame_;
+          LOG(INFO) << "new cur_quick_frame_pc_: " << std::hex << cur_quick_frame_pc_;
+        }
       }
     } else if (cur_shadow_frame_ != NULL) {
       do {
diff --git a/runtime/stack.h b/runtime/stack.h
index 4ee5de1..ab903d6 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -100,6 +100,14 @@
   kVRegNonSpecialTempBaseReg = -3,
 };
 
+// Special object used to mark the gap in the stack placed when a stack
+// overflow fault occurs during implicit stack checking.  This is not
+// a real object - it is used simply as a valid address to which a
+// mirror::ArtMethod* can be compared during a stack walk.  It is inserted
+// into the stack during the stack overflow signal handling to mark the gap
+// in which the memory is protected against read and write.
+extern void* stack_overflow_gap_marker;
+
 // A reference from the shadow stack to a MirrorType object within the Java heap.
 template<class MirrorType>
 class MANAGED StackReference : public mirror::ObjectReference<false, MirrorType> {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 38e4204..3692b9f 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -215,10 +215,16 @@
     stack_size = PTHREAD_STACK_MIN;
   }
 
-  // It's likely that callers are trying to ensure they have at least a certain amount of
-  // stack space, so we should add our reserved space on top of what they requested, rather
-  // than implicitly take it away from them.
-  stack_size += Thread::kStackOverflowReservedBytes;
+  if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+    // It's likely that callers are trying to ensure they have at least a certain amount of
+    // stack space, so we should add our reserved space on top of what they requested, rather
+    // than implicitly take it away from them.
+    stack_size += Thread::kStackOverflowReservedBytes;
+  } else {
+    // If we are going to use implicit stack checks, allocate space for the protected
+    // region at the bottom of the stack.
+    stack_size += Thread::kStackOverflowImplicitCheckSize;
+  }
 
   // Some systems require the stack size to be a multiple of the system page size, so round up.
   stack_size = RoundUp(stack_size, kPageSize);
@@ -226,6 +232,39 @@
   return stack_size;
 }
 
+// Install a protected region in the stack.  This is used to trigger a SIGSEGV if a stack
+// overflow is detected.  It is located right below the stack_end_.  Just below that
+// is the StackOverflow reserved region used when creating the StackOverflow
+// exception.
+void Thread::InstallImplicitProtection(bool is_main_stack) {
+  byte* pregion = stack_end_;
+
+  constexpr uint32_t kMarker = 0xdadadada;
+  uintptr_t *marker = reinterpret_cast<uintptr_t*>(pregion);
+  if (*marker == kMarker) {
+    // The region has already been set up.
+    return;
+  }
+  // Add marker so that we can detect a second attempt to do this.
+  *marker = kMarker;
+
+  pregion -= kStackOverflowProtectedSize;
+
+  // Touch the pages in the region to map them in.  Otherwise mprotect fails.  Only
+  // need to do this on the main stack.
+  if (is_main_stack) {
+    memset(pregion, 0x55, kStackOverflowProtectedSize);
+  }
+  VLOG(threads) << "installing stack protected region at " << std::hex <<
+      static_cast<void*>(pregion) << " to " <<
+      static_cast<void*>(pregion + kStackOverflowProtectedSize - 1);
+
+  if (mprotect(pregion, kStackOverflowProtectedSize, PROT_NONE) == -1) {
+    LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. Reason:"
+        << strerror(errno);
+  }
+}
+
 void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) {
   CHECK(java_peer != nullptr);
   Thread* self = static_cast<JNIEnvExt*>(env)->self;
@@ -472,7 +511,22 @@
 #endif
 
   // Set stack_end_ to the bottom of the stack saving space of stack overflows
-  ResetDefaultStackEnd();
+  bool implicit_stack_check = !Runtime::Current()->ExplicitStackOverflowChecks();
+  ResetDefaultStackEnd(implicit_stack_check);
+
+  // Install the protected region if we are doing implicit overflow checks.
+  if (implicit_stack_check) {
+    if (is_main_thread) {
+      // The main thread has a 16K protected region at the bottom.  We need
+      // to install our own region so we need to move the limits
+      // of the stack to make room for it.
+      constexpr uint32_t kDelta = 16 * KB;
+      stack_begin_ += kDelta;
+      stack_end_ += kDelta;
+      stack_size_ -= kDelta;
+    }
+    InstallImplicitProtection(is_main_thread);
+  }
 
   // Sanity check.
   int stack_variable;
@@ -967,6 +1021,7 @@
       pthread_self_(0),
       no_thread_suspension_(0),
       last_no_thread_suspension_cause_(nullptr),
+      suspend_trigger_(reinterpret_cast<uintptr_t*>(&suspend_trigger_)),
       thread_exit_check_count_(0),
       thread_local_start_(nullptr),
       thread_local_pos_(nullptr),
diff --git a/runtime/thread.h b/runtime/thread.h
index 32875e6..63d22c5 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -112,6 +112,14 @@
   static constexpr size_t kStackOverflowReservedUsableBytes =
       kStackOverflowReservedBytes - kStackOverflowSignalReservedBytes;
 
+  // For implicit overflow checks we reserve an extra piece of memory at the bottom
+  // of the stack (lowest memory).  The higher portion of the memory
+  // is protected against reads and the lower is available for use while
+  // throwing the StackOverflow exception.
+  static constexpr size_t kStackOverflowProtectedSize = 32 * KB;
+  static constexpr size_t kStackOverflowImplicitCheckSize = kStackOverflowProtectedSize +
+    kStackOverflowReservedBytes;
+
   // Creates a new native thread corresponding to the given managed peer.
   // Used to implement Thread.start.
   static void CreateNativeThread(JNIEnv* env, jobject peer, size_t stack_size, bool daemon);
@@ -461,12 +469,21 @@
   void SetStackEndForStackOverflow() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Set the stack end to that to be used during regular execution
-  void ResetDefaultStackEnd() {
+  void ResetDefaultStackEnd(bool implicit_overflow_check) {
     // Our stacks grow down, so we want stack_end_ to be near there, but reserving enough room
     // to throw a StackOverflowError.
-    stack_end_ = stack_begin_ + kStackOverflowReservedBytes;
+    if (implicit_overflow_check) {
+      // For implicit checks we also need to add in the protected region above the
+      // overflow region.
+      stack_end_ = stack_begin_ + kStackOverflowImplicitCheckSize;
+    } else {
+      stack_end_ = stack_begin_ + kStackOverflowReservedBytes;
+    }
   }
 
+  // Install the protected region for implicit stack checks.
+  void InstallImplicitProtection(bool is_main_stack);
+
   bool IsHandlingStackOverflow() const {
     return stack_end_ == stack_begin_;
   }
diff --git a/test/046-reflect/expected.txt b/test/046-reflect/expected.txt
index fcfaf2f..55b0dbe 100644
--- a/test/046-reflect/expected.txt
+++ b/test/046-reflect/expected.txt
@@ -81,7 +81,8 @@
  Field type is int
  Access flags are 0x11
   cantTouchThis is 77
-  cantTouchThis is now 99
+  as expected: set-final throws exception
+  cantTouchThis is still 77
   public final int Target.cantTouchThis accessible=false
   public final int Target.cantTouchThis accessible=true
   cantTouchThis is now 87
diff --git a/test/046-reflect/src/Main.java b/test/046-reflect/src/Main.java
index dfb0d8f..d60fcb4 100644
--- a/test/046-reflect/src/Main.java
+++ b/test/046-reflect/src/Main.java
@@ -335,11 +335,12 @@
             System.out.println("  cantTouchThis is " + intVal);
             try {
                 field.setInt(instance, 99);
+                System.out.println("ERROR: set-final did not throw exception");
             } catch (IllegalAccessException iae) {
-                System.out.println("ERROR: set-final failed");
+                System.out.println("  as expected: set-final throws exception");
             }
             intVal = field.getInt(instance);
-            System.out.println("  cantTouchThis is now " + intVal);
+            System.out.println("  cantTouchThis is still " + intVal);
 
             System.out.println("  " + field + " accessible=" + field.isAccessible());
             field.setAccessible(true);
diff --git a/test/064-field-access/src/Main.java b/test/064-field-access/src/Main.java
index 3b9a475..c9b93ba 100644
--- a/test/064-field-access/src/Main.java
+++ b/test/064-field-access/src/Main.java
@@ -16,6 +16,7 @@
 
 import other.PublicClass;
 import java.lang.reflect.Field;
+import java.lang.reflect.Method;
 
 /*
  * Test field access through reflection.
@@ -192,6 +193,11 @@
   /* package */ static float samePackagePackageFloatStaticField = 63.0f;
   /* package */ static double samePackagePackageDoubleStaticField = 64.0;
   /* package */ static Object samePackagePackageObjectStaticField = "65";
+
+  public void samePublicMethod() { }
+  protected void sameProtectedMethod() { }
+  private void samePrivateMethod() { }
+  /* package */ void samePackageMethod() { }
 }
 
 /*
@@ -510,23 +516,32 @@
     for (int round = 0; round < 3; round++) {
       Object validInst;
       Field[] fields;
+      Method[] methods;
       boolean same_package = false;
+      boolean protected_class = false;
       switch (round) {
         case 0:
           validInst = new SamePackage();
           fields = SamePackage.class.getDeclaredFields();
           check(fields.length == 72);
+          methods = SamePackage.class.getDeclaredMethods();
+          check(methods.length == 4);
           same_package = true;
           break;
         case 1:
           validInst = new PublicClass();
           fields = PublicClass.class.getDeclaredFields();
           check(fields.length == 72);
+          methods = PublicClass.class.getDeclaredMethods();
+          check(methods.length == 4);
           break;
         default:
           validInst = new PublicClass();
           fields = PublicClass.class.getSuperclass().getDeclaredFields();
           check(fields.length == 72);
+          methods = PublicClass.class.getSuperclass().getDeclaredMethods();
+          check(methods.length == 4);
+          protected_class = true;
           break;
       }
       for (Field f : fields) {
@@ -540,16 +555,15 @@
         // Check access or lack of to field.
         Class<?> subClassAccessExceptionClass = null;
         if (f.getName().contains("Private") ||
-            (!same_package && f.getName().contains("Package"))) {
-          // ART deliberately doesn't throw IllegalAccessException.
-          // subClassAccessExceptionClass = IllegalAccessException.class;
+            (!same_package && f.getName().contains("Package")) ||
+            (!same_package && f.getName().contains("Protected"))) {
+          subClassAccessExceptionClass = IllegalAccessException.class;
         }
         Class<?> mainClassAccessExceptionClass = null;
         if (f.getName().contains("Private") ||
             (!same_package && f.getName().contains("Package")) ||
             (!same_package && f.getName().contains("Protected"))) {
-          // ART deliberately doesn't throw IllegalAccessException.
-          // mainClassAccessExceptionClass = IllegalAccessException.class;
+          mainClassAccessExceptionClass = IllegalAccessException.class;
         }
 
         this.getValue(f, validInst, typeChar, subClassAccessExceptionClass);
@@ -588,6 +602,16 @@
           }
         }
       }
+
+      for (Method m : methods) {
+        Class<?> subClassAccessExceptionClass = null;
+        if (protected_class || m.getName().contains("Private") ||
+            (!same_package && m.getName().contains("Package")) ||
+            (!same_package && m.getName().contains("Protected"))) {
+          subClassAccessExceptionClass = IllegalAccessException.class;
+        }
+        this.invoke(m, validInst, subClassAccessExceptionClass);
+      }
     }
     System.out.println("good");
   }
@@ -598,7 +622,6 @@
    */
   public Object getValue(Field field, Object obj, char type,
       Class expectedException) {
-
     Object result = null;
     try {
       switch (type) {
@@ -657,4 +680,29 @@
 
     return result;
   }
+
+  public Object invoke(Method method, Object obj, Class expectedException) {
+    Object result = null;
+    try {
+      result = method.invoke(obj);
+      /* success; expected? */
+      if (expectedException != null) {
+        System.err.println("ERROR: call succeeded for method " + method + "', was expecting " +
+                           expectedException);
+        Thread.dumpStack();
+      }
+    } catch (Exception ex) {
+      if (expectedException == null) {
+        System.err.println("ERROR: call failed unexpectedly: " + ex.getClass());
+        ex.printStackTrace();
+      } else {
+        if (!expectedException.equals(ex.getClass())) {
+          System.err.println("ERROR: incorrect exception: wanted " + expectedException.getName() +
+                             ", got " + ex.getClass());
+          ex.printStackTrace();
+        }
+      }
+    }
+    return result;
+  }
 }
diff --git a/test/064-field-access/src/other/ProtectedClass.java b/test/064-field-access/src/other/ProtectedClass.java
index 779aa1d..756c97f 100644
--- a/test/064-field-access/src/other/ProtectedClass.java
+++ b/test/064-field-access/src/other/ProtectedClass.java
@@ -97,4 +97,10 @@
  /* package */ static float otherProtectedClassPackageFloatStaticField = 63.0f;
  /* package */ static double otherProtectedClassPackageDoubleStaticField = 64.0;
  /* package */ static Object otherProtectedClassPackageObjectStaticField = "65";
+
+    public void otherPublicMethod() { }
+    protected void otherProtectedMethod() { }
+    private void otherPrivateMethod() { }
+    /* package */ void otherPackageMethod() { }
+
 }
diff --git a/test/064-field-access/src/other/PublicClass.java b/test/064-field-access/src/other/PublicClass.java
index 1653973..dbcb4fd 100644
--- a/test/064-field-access/src/other/PublicClass.java
+++ b/test/064-field-access/src/other/PublicClass.java
@@ -97,4 +97,9 @@
  /* package */ static float otherPublicClassPackageFloatStaticField = -63.0f;
  /* package */ static double otherPublicClassPackageDoubleStaticField = -64.0;
  /* package */ static Object otherPublicClassPackageObjectStaticField = "-65";
+
+    public void otherPublicMethod() { }
+    protected void otherProtectedMethod() { }
+    private void otherPrivateMethod() { }
+    /* package */ void otherPackageMethod() { }
 }
diff --git a/test/100-reflect2/expected.txt b/test/100-reflect2/expected.txt
index 3d87ebc..bed0689 100644
--- a/test/100-reflect2/expected.txt
+++ b/test/100-reflect2/expected.txt
@@ -22,10 +22,7 @@
 30
 62
 14
-java.lang.IllegalArgumentException: Invalid primitive conversion from int to short
-	at java.lang.reflect.Field.set(Native Method)
-	at Main.testFieldReflection(Main.java:121)
-	at Main.main(Main.java:269)
+got expected IllegalArgumentException
 true (class java.lang.Boolean)
 6 (class java.lang.Byte)
 z (class java.lang.Character)
@@ -66,12 +63,6 @@
 true 0 1 2.0  hello world 3.0 4 5 6
 null (null)
 []
-java.lang.reflect.InvocationTargetException
-	at java.lang.reflect.Method.invoke(Native Method)
-	at Main.testMethodReflection(Main.java:210)
-	at Main.main(Main.java:270)
-Caused by: java.lang.ArithmeticException: surprise!
-	at Main.thrower(Main.java:218)
-	... 3 more
+got expected InvocationTargetException
  (class java.lang.String)
 yz (class java.lang.String)
diff --git a/test/100-reflect2/src/Main.java b/test/100-reflect2/src/Main.java
index 0404591..0cc1488 100644
--- a/test/100-reflect2/src/Main.java
+++ b/test/100-reflect2/src/Main.java
@@ -119,8 +119,9 @@
     try {
       f = Main.class.getDeclaredField("s");
       f.set(null, Integer.valueOf(14));
+      System.out.println("************* should have thrown!");
     } catch (IllegalArgumentException expected) {
-      expected.printStackTrace();
+      System.out.println("got expected IllegalArgumentException");
     }
 
     f = Main.class.getDeclaredField("z");
@@ -209,8 +210,8 @@
       System.out.println(Arrays.toString(m.getParameterTypes()));
       show(m.invoke(null));
       System.out.println("************* should have thrown!");
-    } catch (Exception expected) {
-      expected.printStackTrace();
+    } catch (InvocationTargetException expected) {
+      System.out.println("got expected InvocationTargetException");
     }
   }
 
diff --git a/test/401-optimizing-compiler/expected.txt b/test/401-optimizing-compiler/expected.txt
new file mode 100644
index 0000000..7b3a018
--- /dev/null
+++ b/test/401-optimizing-compiler/expected.txt
@@ -0,0 +1,3 @@
+In static method
+Forced GC
+java.lang.Error: Error
diff --git a/test/401-optimizing-compiler/info.txt b/test/401-optimizing-compiler/info.txt
new file mode 100644
index 0000000..2a89eb5
--- /dev/null
+++ b/test/401-optimizing-compiler/info.txt
@@ -0,0 +1,2 @@
+A set of tests for the optimizing compiler. They will incrementally cover what the
+optimizing compiler covers.
diff --git a/test/401-optimizing-compiler/src/Main.java b/test/401-optimizing-compiler/src/Main.java
new file mode 100644
index 0000000..2609e0f
--- /dev/null
+++ b/test/401-optimizing-compiler/src/Main.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Note that $opt$ is a marker for the optimizing compiler to ensure
+// it does compile the method.
+
+public class Main {
+  public static void main(String[] args) {
+    Error error = null;
+    try {
+      $opt$TestInvokeStatic();
+    } catch (Error e) {
+      error = e;
+    }
+    System.out.println(error);
+  }
+
+  public static void $opt$TestInvokeStatic() {
+    printStaticMethod();
+    forceGCStaticMethod();
+    throwStaticMethod();
+  }
+
+  public static void printStaticMethod() {
+    System.out.println("In static method");
+  }
+
+  public static void forceGCStaticMethod() {
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+    System.out.println("Forced GC");
+  }
+
+  public static void throwStaticMethod() {
+    throw new Error("Error");
+  }
+}
diff --git a/test/etc/push-and-run-test-jar b/test/etc/push-and-run-test-jar
index 2a2aa70..9e30f65 100755
--- a/test/etc/push-and-run-test-jar
+++ b/test/etc/push-and-run-test-jar
@@ -18,6 +18,7 @@
 QUIET="n"
 DEV_MODE="n"
 INVOKE_WITH=""
+FLAGS=""
 
 while true; do
     if [ "x$1" = "x--quiet" ]; then
@@ -31,6 +32,11 @@
         fi
         LIB="$1"
         shift
+    elif [ "x$1" = "x-Xcompiler-option" ]; then
+        shift
+        option="$1"
+        FLAGS="${FLAGS} -Xcompiler-option $option"
+        shift
     elif [ "x$1" = "x--boot" ]; then
         shift
         BOOT_OPT="$1"
@@ -141,7 +147,7 @@
 JNI_OPTS="-Xjnigreflimit:512 -Xcheck:jni"
 
 cmdline="cd $DEX_LOCATION && mkdir dalvik-cache && export ANDROID_DATA=$DEX_LOCATION && export DEX_LOCATION=$DEX_LOCATION && \
-    $INVOKE_WITH $gdb /system/bin/dalvikvm $gdbargs -XXlib:$LIB $ZYGOTE $JNI_OPTS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main"
+    $INVOKE_WITH $gdb /system/bin/dalvikvm $FLAGS $gdbargs -XXlib:$LIB $ZYGOTE $JNI_OPTS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main"
 if [ "$DEV_MODE" = "y" ]; then
   echo $cmdline "$@"
 fi
diff --git a/test/run-test b/test/run-test
index cc15e58..8ff0915 100755
--- a/test/run-test
+++ b/test/run-test
@@ -36,7 +36,7 @@
 tmp_dir="/tmp/${test_dir}"
 
 export JAVA="java"
-export JAVAC="javac -g -source 1.5 -target 1.5"
+export JAVAC="javac -g"
 export RUN="${progdir}/etc/push-and-run-test-jar"
 export DEX_LOCATION=/data/run-test/${test_dir}
 export NEED_DEX="true"
diff --git a/tools/art b/tools/art
index aa53a39..c9c0d4f 100755
--- a/tools/art
+++ b/tools/art
@@ -46,12 +46,15 @@
 PROG_DIR="$(cd "${PROG_NAME%/*}" ; pwd -P)"
 ANDROID_BUILD_TOP="$(cd "${PROG_DIR}/../../../../" ; pwd -P)/"
 ANDROID_HOST_OUT=$PROG_DIR/..
+ANDROID_DATA=$PWD/android-data$$
 
-mkdir -p /tmp/android-data/dalvik-cache
-ANDROID_DATA=/tmp/android-data \
+mkdir -p $ANDROID_DATA/dalvik-cache
+ANDROID_DATA=$ANDROID_DATA \
   ANDROID_ROOT=$ANDROID_HOST_OUT \
   LD_LIBRARY_PATH=$ANDROID_HOST_OUT/lib \
-  exec $invoke_with $ANDROID_HOST_OUT/bin/dalvikvm $lib \
-    -Xbootclasspath:$ANDROID_HOST_OUT/core-hostdex.jar \
+  $invoke_with $ANDROID_HOST_OUT/bin/dalvikvm $lib \
     -Ximage:$ANDROID_HOST_OUT/framework/core.art \
      "$@"
+EXIT_STATUS=$?
+rm -rf $ANDROID_DATA
+exit $EXIT_STATUS