Merge "Make test-art pass with heap reference poisoning enabled."
diff --git a/Android.mk b/Android.mk
index a179a97..b87f0d3 100644
--- a/Android.mk
+++ b/Android.mk
@@ -174,14 +174,14 @@
 define declare-test-art-host-run-test
 .PHONY: test-art-host-run-test-default-$(1)
 test-art-host-run-test-default-$(1): test-art-host-dependencies $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
-	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test --host $(1)
+	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(DALVIKVM_FLAGS) --host $(1)
 	@echo test-art-host-run-test-default-$(1) PASSED
 
 TEST_ART_HOST_RUN_TEST_DEFAULT_TARGETS += test-art-host-run-test-default-$(1)
 
 .PHONY: test-art-host-run-test-interpreter-$(1)
 test-art-host-run-test-interpreter-$(1): test-art-host-dependencies $(DX) $(HOST_OUT_EXECUTABLES)/jasmin
-	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test --host --interpreter $(1)
+	DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) art/test/run-test $(DALVIKVM_FLAGS) --host --interpreter $(1)
 	@echo test-art-host-run-test-interpreter-$(1) PASSED
 
 TEST_ART_HOST_RUN_TEST_INTERPRETER_TARGETS += test-art-host-run-test-interpreter-$(1)
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 219f1e2..d80d039 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -17,7 +17,7 @@
 ifndef ANDROID_COMMON_MK
 ANDROID_COMMON_MK = true
 
-ART_SUPPORTED_ARCH := arm mips x86 x86_64
+ART_SUPPORTED_ARCH := arm arm64 mips x86 x86_64
 
 ifeq (,$(filter $(TARGET_ARCH),$(ART_SUPPORTED_ARCH)))
 $(warning unsupported TARGET_ARCH=$(TARGET_ARCH))
diff --git a/compiler/Android.mk b/compiler/Android.mk
index bcd120b..4eb9ff5 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -66,6 +66,7 @@
 	driver/compiler_driver.cc \
 	driver/dex_compilation_unit.cc \
 	jni/quick/arm/calling_convention_arm.cc \
+	jni/quick/arm64/calling_convention_arm64.cc \
 	jni/quick/mips/calling_convention_mips.cc \
 	jni/quick/x86/calling_convention_x86.cc \
 	jni/quick/calling_convention.cc \
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 49c1283..6aa85d4 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -300,6 +300,10 @@
       // for ARM, do a runtime check to make sure that the features we are passed from
       // the build match the features we actually determine at runtime.
       ASSERT_EQ(instruction_set_features, runtime_features);
+#elif defined(__aarch64__)
+      instruction_set = kArm64;
+      // TODO: arm64 compilation support.
+      compiler_options_->SetCompilerFilter(CompilerOptions::kInterpretOnly);
 #elif defined(__mips__)
       instruction_set = kMips;
 #elif defined(__i386__)
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 17c2e94..344f3ef 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -86,6 +86,8 @@
     case kArm:
     case kThumb2:
       return RoundUp(offset, kArmAlignment);
+    case kArm64:
+      return RoundUp(offset, kArm64Alignment);
     case kMips:
       return RoundUp(offset, kMipsAlignment);
     case kX86:  // Fall-through.
@@ -100,6 +102,7 @@
 size_t CompiledCode::CodeDelta() const {
   switch (instruction_set_) {
     case kArm:
+    case kArm64:
     case kMips:
     case kX86:
       return 0;
@@ -117,6 +120,7 @@
                                       InstructionSet instruction_set) {
   switch (instruction_set) {
     case kArm:
+    case kArm64:
     case kMips:
     case kX86:
       return code_pointer;
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 147e840..718468f 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -328,7 +328,6 @@
   kThrowArrayBounds,
   kThrowConstantArrayBounds,
   kThrowNoSuchMethod,
-  kThrowStackOverflow,
 };
 
 enum DividePattern {
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 5a26064..7890d81 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -157,9 +157,9 @@
   cu.compiler_driver = &driver;
   cu.class_linker = class_linker;
   cu.instruction_set = driver.GetInstructionSet();
-  cu.target64 = cu.instruction_set == kX86_64;
+  cu.target64 = (cu.instruction_set == kX86_64) || (cu.instruction_set == kArm64);
   cu.compiler = compiler;
-  // TODO: x86_64 is not yet implemented.
+  // TODO: x86_64 & arm64 are not yet implemented.
   DCHECK((cu.instruction_set == kThumb2) ||
          (cu.instruction_set == kX86) ||
          (cu.instruction_set == kMips));
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index bba3d40..94f0ca4 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -358,23 +358,60 @@
      */
     NewLIR1(kThumb2VPushCS, num_fp_spills_);
   }
+
+  // TODO: 64 bit will be different code.
+  const int frame_size_without_spills = frame_size_ - spill_count * 4;
   if (!skip_overflow_check) {
     if (Runtime::Current()->ExplicitStackOverflowChecks()) {
-      OpRegRegImm(kOpSub, rARM_LR, rARM_SP, frame_size_ - (spill_count * 4));
-      GenRegRegCheck(kCondUlt, rARM_LR, r12, kThrowStackOverflow);
-      OpRegCopy(rARM_SP, rARM_LR);     // Establish stack
+      class StackOverflowSlowPath : public LIRSlowPath {
+       public:
+        StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace)
+            : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), restore_lr_(restore_lr),
+              sp_displace_(sp_displace) {
+        }
+        void Compile() OVERRIDE {
+          m2l_->ResetRegPool();
+          m2l_->ResetDefTracking();
+          GenerateTargetLabel();
+          if (restore_lr_) {
+            m2l_->LoadWordDisp(kArmRegSP, sp_displace_ - 4, kArmRegLR);
+          }
+          m2l_->OpRegImm(kOpAdd, kArmRegSP, sp_displace_);
+          m2l_->ClobberCallerSave();
+          ThreadOffset func_offset = QUICK_ENTRYPOINT_OFFSET(pThrowStackOverflow);
+          // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes
+          // codegen and target are in thumb2 mode.
+          m2l_->LoadWordDisp(rARM_SELF, func_offset.Int32Value(), rARM_PC);
+        }
+
+       private:
+        const bool restore_lr_;
+        const size_t sp_displace_;
+      };
+      if (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes) {
+        OpRegRegImm(kOpSub, rARM_LR, rARM_SP, frame_size_without_spills);
+        LIR* branch = OpCmpBranch(kCondUlt, rARM_LR, r12, nullptr);
+        // Need to restore LR since we used it as a temp.
+        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true,
+                                                     frame_size_without_spills));
+        OpRegCopy(rARM_SP, rARM_LR);     // Establish stack
+      } else {
+        // If the frame is small enough we are guaranteed to have enough space that remains to
+        // handle signals on the user stack.
+        OpRegRegImm(kOpSub, rARM_SP, rARM_SP, frame_size_without_spills);
+        LIR* branch = OpCmpBranch(kCondUlt, rARM_SP, r12, nullptr);
+        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, false, frame_size_));
+      }
     } else {
       // Implicit stack overflow check.
       // Generate a load from [sp, #-framesize].  If this is in the stack
       // redzone we will get a segmentation fault.
-      uint32_t full_frame_size = frame_size_ - (spill_count * 4);
-
-      OpRegImm(kOpSub, rARM_SP, full_frame_size);
+      OpRegImm(kOpSub, rARM_SP, frame_size_without_spills);
       LoadWordDisp(rARM_SP, 0, rARM_LR);
       MarkPossibleStackOverflowException();
     }
   } else {
-    OpRegImm(kOpSub, rARM_SP, frame_size_ - (spill_count * 4));
+    OpRegImm(kOpSub, rARM_SP, frame_size_without_spills);
   }
 
   FlushIns(ArgLocs, rl_method);
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 882a3bb..1a7f2fc 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -923,7 +923,13 @@
     } else {
       int reg_offset = AllocTemp();
       LoadConstant(reg_offset, encoded_disp);
-      load = LoadBaseIndexed(rBase, reg_offset, r_dest, 0, size);
+      if (ARM_FPREG(r_dest)) {
+        // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
+        OpRegReg(kOpAdd, reg_offset, rBase);
+        load = LoadBaseDispBody(reg_offset, 0, r_dest, r_dest_hi, size, s_reg);
+      } else {
+        load = LoadBaseIndexed(rBase, reg_offset, r_dest, 0, size);
+      }
       FreeTemp(reg_offset);
     }
   }
@@ -1037,7 +1043,13 @@
     } else {
       int r_scratch = AllocTemp();
       LoadConstant(r_scratch, encoded_disp);
-      store = StoreBaseIndexed(rBase, r_scratch, r_src, 0, size);
+      if (ARM_FPREG(r_src)) {
+        // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
+        OpRegReg(kOpAdd, r_scratch, rBase);
+        store = StoreBaseDispBody(r_scratch, 0, r_src, r_src_hi, size);
+      } else {
+        store = StoreBaseIndexed(rBase, r_scratch, r_src, 0, size);
+      }
       FreeTemp(r_scratch);
     }
   }
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 9e5ec6e..60f8796 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -17,6 +17,7 @@
 #include "dex/compiler_internals.h"
 #include "dex_file-inl.h"
 #include "gc_map.h"
+#include "gc_map_builder.h"
 #include "mapping_table.h"
 #include "mir_to_lir-inl.h"
 #include "dex/quick/dex_file_method_inliner.h"
@@ -677,84 +678,6 @@
   }
 }
 
-class NativePcToReferenceMapBuilder {
- public:
-  NativePcToReferenceMapBuilder(std::vector<uint8_t>* table,
-                                size_t entries, uint32_t max_native_offset,
-                                size_t references_width) : entries_(entries),
-                                references_width_(references_width), in_use_(entries),
-                                table_(table) {
-    // Compute width in bytes needed to hold max_native_offset.
-    native_offset_width_ = 0;
-    while (max_native_offset != 0) {
-      native_offset_width_++;
-      max_native_offset >>= 8;
-    }
-    // Resize table and set up header.
-    table->resize((EntryWidth() * entries) + sizeof(uint32_t));
-    CHECK_LT(native_offset_width_, 1U << 3);
-    (*table)[0] = native_offset_width_ & 7;
-    CHECK_LT(references_width_, 1U << 13);
-    (*table)[0] |= (references_width_ << 3) & 0xFF;
-    (*table)[1] = (references_width_ >> 5) & 0xFF;
-    CHECK_LT(entries, 1U << 16);
-    (*table)[2] = entries & 0xFF;
-    (*table)[3] = (entries >> 8) & 0xFF;
-  }
-
-  void AddEntry(uint32_t native_offset, const uint8_t* references) {
-    size_t table_index = TableIndex(native_offset);
-    while (in_use_[table_index]) {
-      table_index = (table_index + 1) % entries_;
-    }
-    in_use_[table_index] = true;
-    SetCodeOffset(table_index, native_offset);
-    DCHECK_EQ(native_offset, GetCodeOffset(table_index));
-    SetReferences(table_index, references);
-  }
-
- private:
-  size_t TableIndex(uint32_t native_offset) {
-    return NativePcOffsetToReferenceMap::Hash(native_offset) % entries_;
-  }
-
-  uint32_t GetCodeOffset(size_t table_index) {
-    uint32_t native_offset = 0;
-    size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
-    for (size_t i = 0; i < native_offset_width_; i++) {
-      native_offset |= (*table_)[table_offset + i] << (i * 8);
-    }
-    return native_offset;
-  }
-
-  void SetCodeOffset(size_t table_index, uint32_t native_offset) {
-    size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
-    for (size_t i = 0; i < native_offset_width_; i++) {
-      (*table_)[table_offset + i] = (native_offset >> (i * 8)) & 0xFF;
-    }
-  }
-
-  void SetReferences(size_t table_index, const uint8_t* references) {
-    size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
-    memcpy(&(*table_)[table_offset + native_offset_width_], references, references_width_);
-  }
-
-  size_t EntryWidth() const {
-    return native_offset_width_ + references_width_;
-  }
-
-  // Number of entries in the table.
-  const size_t entries_;
-  // Number of bytes used to encode the reference bitmap.
-  const size_t references_width_;
-  // Number of bytes used to encode a native offset.
-  size_t native_offset_width_;
-  // Entries that are in use.
-  std::vector<bool> in_use_;
-  // The table we're building.
-  std::vector<uint8_t>* const table_;
-};
-
 void Mir2Lir::CreateNativeGcMap() {
   DCHECK(!encoded_mapping_table_.empty());
   MappingTable mapping_table(&encoded_mapping_table_[0]);
@@ -771,9 +694,9 @@
   verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
   DCHECK_EQ(gc_map_raw.size(), dex_gc_map.RawSize());
   // Compute native offset to references size.
-  NativePcToReferenceMapBuilder native_gc_map_builder(&native_gc_map_,
-                                                      mapping_table.PcToDexSize(),
-                                                      max_native_offset, dex_gc_map.RegWidth());
+  GcMapBuilder native_gc_map_builder(&native_gc_map_,
+                                     mapping_table.PcToDexSize(),
+                                     max_native_offset, dex_gc_map.RegWidth());
 
   for (auto it = mapping_table.PcToDexBegin(), end = mapping_table.PcToDexEnd(); it != end; ++it) {
     uint32_t native_offset = it.NativePcOffset();
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 71cc0d9..8c3a11f 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -629,8 +629,6 @@
     int v1 = lab->operands[2];
     int v2 = lab->operands[3];
     const bool target_x86 = cu_->instruction_set == kX86;
-    const bool target_arm = cu_->instruction_set == kArm || cu_->instruction_set == kThumb2;
-    const bool target_mips = cu_->instruction_set == kMips;
     switch (lab->operands[0]) {
       case kThrowNullPointer:
         func_offset = QUICK_ENTRYPOINT_OFFSET(pThrowNullPointer);
@@ -688,34 +686,6 @@
         func_offset =
           QUICK_ENTRYPOINT_OFFSET(pThrowNoSuchMethod);
         break;
-      case kThrowStackOverflow: {
-        func_offset = QUICK_ENTRYPOINT_OFFSET(pThrowStackOverflow);
-        // Restore stack alignment
-        int r_tgt = 0;
-        const int spill_size = (num_core_spills_ + num_fp_spills_) * 4;
-        if (target_x86) {
-          // - 4 to leave link register on stack.
-          OpRegImm(kOpAdd, TargetReg(kSp), frame_size_ - 4);
-          ClobberCallerSave();
-        } else if (target_arm) {
-          r_tgt = r12;
-          LoadWordDisp(TargetReg(kSp), spill_size - 4, TargetReg(kLr));
-          OpRegImm(kOpAdd, TargetReg(kSp), spill_size);
-          ClobberCallerSave();
-          LoadWordDisp(rARM_SELF, func_offset.Int32Value(), r_tgt);
-        } else {
-          DCHECK(target_mips);
-          DCHECK_EQ(num_fp_spills_, 0);  // FP spills currently don't happen on mips.
-          // LR is offset 0 since we push in reverse order.
-          LoadWordDisp(TargetReg(kSp), 0, TargetReg(kLr));
-          OpRegImm(kOpAdd, TargetReg(kSp), spill_size);
-          ClobberCallerSave();
-          r_tgt = CallHelperSetup(func_offset);  // Doesn't clobber LR.
-          DCHECK_NE(r_tgt, TargetReg(kLr));
-        }
-        CallHelper(r_tgt, func_offset, false /* MarkSafepointPC */, false /* UseLink */);
-        continue;
-      }
       default:
         LOG(FATAL) << "Unexpected throw kind: " << lab->operands[0];
     }
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index 234299e..95fd6e7 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -317,12 +317,36 @@
   SpillCoreRegs();
   /* NOTE: promotion of FP regs currently unsupported, thus no FP spill */
   DCHECK_EQ(num_fp_spills_, 0);
+  const int frame_sub = frame_size_ - spill_count * 4;
   if (!skip_overflow_check) {
-    OpRegRegImm(kOpSub, new_sp, rMIPS_SP, frame_size_ - (spill_count * 4));
-    GenRegRegCheck(kCondUlt, new_sp, check_reg, kThrowStackOverflow);
+    class StackOverflowSlowPath : public LIRSlowPath {
+     public:
+      StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace)
+          : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), sp_displace_(sp_displace) {
+      }
+      void Compile() OVERRIDE {
+        m2l_->ResetRegPool();
+        m2l_->ResetDefTracking();
+        GenerateTargetLabel();
+        // LR is offset 0 since we push in reverse order.
+        m2l_->LoadWordDisp(kMipsRegSP, 0, kMipsRegLR);
+        m2l_->OpRegImm(kOpAdd, kMipsRegSP, sp_displace_);
+        m2l_->ClobberCallerSave();
+        ThreadOffset func_offset = QUICK_ENTRYPOINT_OFFSET(pThrowStackOverflow);
+        int r_tgt = m2l_->CallHelperSetup(func_offset);  // Doesn't clobber LR.
+        m2l_->CallHelper(r_tgt, func_offset, false /* MarkSafepointPC */, false /* UseLink */);
+      }
+
+     private:
+      const size_t sp_displace_;
+    };
+    OpRegRegImm(kOpSub, new_sp, rMIPS_SP, frame_sub);
+    LIR* branch = OpCmpBranch(kCondUlt, new_sp, check_reg, nullptr);
+    AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_count * 4));
+    // TODO: avoid copy for small frame sizes.
     OpRegCopy(rMIPS_SP, new_sp);     // Establish stack
   } else {
-    OpRegImm(kOpSub, rMIPS_SP, frame_size_ - (spill_count * 4));
+    OpRegImm(kOpSub, rMIPS_SP, frame_sub);
   }
 
   FlushIns(ArgLocs, rl_method);
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 72fc922..68e2b6d 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -213,12 +213,37 @@
   /* NOTE: promotion of FP regs currently unsupported, thus no FP spill */
   DCHECK_EQ(num_fp_spills_, 0);
   if (!skip_overflow_check) {
+    class StackOverflowSlowPath : public LIRSlowPath {
+     public:
+      StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace)
+          : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), sp_displace_(sp_displace) {
+      }
+      void Compile() OVERRIDE {
+        m2l_->ResetRegPool();
+        m2l_->ResetDefTracking();
+        GenerateTargetLabel();
+        m2l_->OpRegImm(kOpAdd, kX86RegSP, sp_displace_);
+        m2l_->ClobberCallerSave();
+        ThreadOffset func_offset = QUICK_ENTRYPOINT_OFFSET(pThrowStackOverflow);
+        // Assumes codegen and target are in thumb2 mode.
+        m2l_->CallHelper(0, func_offset, false /* MarkSafepointPC */, false /* UseLink */);
+      }
+
+     private:
+      const size_t sp_displace_;
+    };
+    // TODO: for large frames we should do something like:
+    // spill ebp
+    // lea ebp, [esp + frame_size]
+    // cmp ebp, fs:[stack_end_]
+    // jcc stack_overflow_exception
+    // mov esp, ebp
+    // in case a signal comes in that's not using an alternate signal stack and the large frame may
+    // have moved us outside of the reserved area at the end of the stack.
     // cmp rX86_SP, fs:[stack_end_]; jcc throw_launchpad
-    LIR* tgt = RawLIR(0, kPseudoThrowTarget, kThrowStackOverflow, 0, 0, 0, 0);
     OpRegThreadMem(kOpCmp, rX86_SP, Thread::StackEndOffset());
-    OpCondBranch(kCondUlt, tgt);
-    // Remember branch target - will process later
-    throw_launchpads_.Insert(tgt);
+    LIR* branch = OpCondBranch(kCondUlt, nullptr);
+    AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, frame_size_ - 4));
   }
 
   FlushIns(ArgLocs, rl_method);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index e601a1b..59754d5 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1871,7 +1871,7 @@
   if ((access_flags & kAccNative) != 0) {
     // Are we interpreting only and have support for generic JNI down calls?
     if ((compiler_options_->GetCompilerFilter() == CompilerOptions::kInterpretOnly) &&
-        (instruction_set_ == kX86_64)) {
+        (instruction_set_ == kX86_64 || instruction_set_ == kArm64)) {
       // Leaving this empty will trigger the generic JNI version
     } else {
       compiled_method = compiler_->JniCompile(*this, access_flags, method_idx, dex_file);
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index a6daa5d..f6a324f 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -372,6 +372,11 @@
       elf_header.e_flags = EF_ARM_EABI_VER5;
       break;
     }
+    case kArm64: {
+      elf_header.e_machine = EM_AARCH64;
+      elf_header.e_flags = 0;
+      break;
+    }
     case kX86: {
       elf_header.e_machine = EM_386;
       elf_header.e_flags = 0;
diff --git a/compiler/gc_map_builder.h b/compiler/gc_map_builder.h
new file mode 100644
index 0000000..5a7a9e0
--- /dev/null
+++ b/compiler/gc_map_builder.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_GC_MAP_BUILDER_H_
+#define ART_COMPILER_GC_MAP_BUILDER_H_
+
+#include <vector>
+
+#include "gc_map.h"
+
+namespace art {
+
+class GcMapBuilder {
+ public:
+  GcMapBuilder(std::vector<uint8_t>* table,
+               size_t entries, uint32_t max_native_offset,
+               size_t references_width) : entries_(entries),
+               references_width_(references_width), in_use_(entries),
+               table_(table) {
+    // Compute width in bytes needed to hold max_native_offset.
+    native_offset_width_ = 0;
+    while (max_native_offset != 0) {
+      native_offset_width_++;
+      max_native_offset >>= 8;
+    }
+    // Resize table and set up header.
+    table->resize((EntryWidth() * entries) + sizeof(uint32_t));
+    CHECK_LT(native_offset_width_, 1U << 3);
+    (*table)[0] = native_offset_width_ & 7;
+    CHECK_LT(references_width_, 1U << 13);
+    (*table)[0] |= (references_width_ << 3) & 0xFF;
+    (*table)[1] = (references_width_ >> 5) & 0xFF;
+    CHECK_LT(entries, 1U << 16);
+    (*table)[2] = entries & 0xFF;
+    (*table)[3] = (entries >> 8) & 0xFF;
+  }
+
+  void AddEntry(uint32_t native_offset, const uint8_t* references) {
+    size_t table_index = TableIndex(native_offset);
+    while (in_use_[table_index]) {
+      table_index = (table_index + 1) % entries_;
+    }
+    in_use_[table_index] = true;
+    SetCodeOffset(table_index, native_offset);
+    DCHECK_EQ(native_offset, GetCodeOffset(table_index));
+    SetReferences(table_index, references);
+  }
+
+ private:
+  size_t TableIndex(uint32_t native_offset) {
+    return NativePcOffsetToReferenceMap::Hash(native_offset) % entries_;
+  }
+
+  uint32_t GetCodeOffset(size_t table_index) {
+    uint32_t native_offset = 0;
+    size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
+    for (size_t i = 0; i < native_offset_width_; i++) {
+      native_offset |= (*table_)[table_offset + i] << (i * 8);
+    }
+    return native_offset;
+  }
+
+  void SetCodeOffset(size_t table_index, uint32_t native_offset) {
+    size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
+    for (size_t i = 0; i < native_offset_width_; i++) {
+      (*table_)[table_offset + i] = (native_offset >> (i * 8)) & 0xFF;
+    }
+  }
+
+  void SetReferences(size_t table_index, const uint8_t* references) {
+    size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
+    memcpy(&(*table_)[table_offset + native_offset_width_], references, references_width_);
+  }
+
+  size_t EntryWidth() const {
+    return native_offset_width_ + references_width_;
+  }
+
+  // Number of entries in the table.
+  const size_t entries_;
+  // Number of bytes used to encode the reference bitmap.
+  const size_t references_width_;
+  // Number of bytes used to encode a native offset.
+  size_t native_offset_width_;
+  // Entries that are in use.
+  std::vector<bool> in_use_;
+  // The table we're building.
+  std::vector<uint8_t>* const table_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_GC_MAP_BUILDER_H_
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
new file mode 100644
index 0000000..c4d0d45
--- /dev/null
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -0,0 +1,245 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/logging.h"
+#include "calling_convention_arm64.h"
+#include "utils/arm64/managed_register_arm64.h"
+
+namespace art {
+namespace arm64 {
+
+// Calling convention
+
+ManagedRegister Arm64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
+  return Arm64ManagedRegister::FromCoreRegister(IP0);  // X16
+}
+
+ManagedRegister Arm64JniCallingConvention::InterproceduralScratchRegister() {
+  return Arm64ManagedRegister::FromCoreRegister(IP0);  // X16
+}
+
+static ManagedRegister ReturnRegisterForShorty(const char* shorty) {
+  if (shorty[0] == 'F') {
+    return Arm64ManagedRegister::FromSRegister(S0);
+  } else if (shorty[0] == 'D') {
+    return Arm64ManagedRegister::FromDRegister(D0);
+  } else if (shorty[0] == 'J') {
+    return Arm64ManagedRegister::FromCoreRegister(X0);
+  } else if (shorty[0] == 'V') {
+    return Arm64ManagedRegister::NoRegister();
+  } else {
+    return Arm64ManagedRegister::FromWRegister(W0);
+  }
+}
+
+ManagedRegister Arm64ManagedRuntimeCallingConvention::ReturnRegister() {
+  return ReturnRegisterForShorty(GetShorty());
+}
+
+ManagedRegister Arm64JniCallingConvention::ReturnRegister() {
+  return ReturnRegisterForShorty(GetShorty());
+}
+
+ManagedRegister Arm64JniCallingConvention::IntReturnRegister() {
+  return Arm64ManagedRegister::FromWRegister(W0);
+}
+
+// Managed runtime calling convention
+
+ManagedRegister Arm64ManagedRuntimeCallingConvention::MethodRegister() {
+  return Arm64ManagedRegister::FromCoreRegister(X0);
+}
+
+bool Arm64ManagedRuntimeCallingConvention::IsCurrentParamInRegister() {
+  return false;  // Everything moved to stack on entry.
+}
+
+bool Arm64ManagedRuntimeCallingConvention::IsCurrentParamOnStack() {
+  return true;
+}
+
+ManagedRegister Arm64ManagedRuntimeCallingConvention::CurrentParamRegister() {
+  LOG(FATAL) << "Should not reach here";
+  return ManagedRegister::NoRegister();
+}
+
+FrameOffset Arm64ManagedRuntimeCallingConvention::CurrentParamStackOffset() {
+  CHECK(IsCurrentParamOnStack());
+  FrameOffset result =
+      FrameOffset(displacement_.Int32Value() +   // displacement
+                  kPointerSize +                 // Method*
+                  (itr_slots_ * kPointerSize));  // offset into in args
+  return result;
+}
+
+const std::vector<ManagedRegister>& Arm64ManagedRuntimeCallingConvention::EntrySpills() {
+  // We spill the argument registers on ARM64 to free them up for scratch use, we then assume
+  // all arguments are on the stack.
+  if (entry_spills_.size() == 0) {
+    // TODO Need fp regs spilled too.
+    //
+    size_t num_spills = NumArgs();
+
+    // TODO Floating point need spilling too.
+    if (num_spills > 0) {
+      entry_spills_.push_back(Arm64ManagedRegister::FromCoreRegister(X1));
+      if (num_spills > 1) {
+        entry_spills_.push_back(Arm64ManagedRegister::FromCoreRegister(X2));
+        if (num_spills > 2) {
+          entry_spills_.push_back(Arm64ManagedRegister::FromCoreRegister(X3));
+          if (num_spills > 3) {
+            entry_spills_.push_back(Arm64ManagedRegister::FromCoreRegister(X5));
+            if (num_spills > 4) {
+              entry_spills_.push_back(Arm64ManagedRegister::FromCoreRegister(X6));
+              if (num_spills > 5) {
+                entry_spills_.push_back(Arm64ManagedRegister::FromCoreRegister(X7));
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return entry_spills_;
+}
+// JNI calling convention
+
+Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static, bool is_synchronized,
+                                                 const char* shorty)
+    : JniCallingConvention(is_static, is_synchronized, shorty) {
+  // TODO This needs to be converted to 64bit.
+  // Compute padding to ensure longs and doubles are not split in AAPCS. Ignore the 'this' jobject
+  // or jclass for static methods and the JNIEnv. We start at the aligned register r2.
+//  size_t padding = 0;
+//  for (size_t cur_arg = IsStatic() ? 0 : 1, cur_reg = 2; cur_arg < NumArgs(); cur_arg++) {
+//    if (IsParamALongOrDouble(cur_arg)) {
+//      if ((cur_reg & 1) != 0) {
+//        padding += 4;
+//        cur_reg++;  // additional bump to ensure alignment
+//      }
+//      cur_reg++;  // additional bump to skip extra long word
+//    }
+//    cur_reg++;  // bump the iterator for every argument
+//  }
+//  padding_ =0;
+
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X19));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X20));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X21));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X22));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X23));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X24));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X25));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X26));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X27));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X28));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X29));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X30));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D8));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D9));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D10));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D11));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D12));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D13));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D14));
+  callee_save_regs_.push_back(Arm64ManagedRegister::FromDRegister(D15));
+}
+
+uint32_t Arm64JniCallingConvention::CoreSpillMask() const {
+  // Compute spill mask to agree with callee saves initialized in the constructor
+  uint32_t result = 0;
+  result =  1 << X19 | 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 | 1 << X25
+      | 1 << X26 | 1 << X27 | 1 << X28 | 1<< X29 | 1 << LR;
+  return result;
+}
+
+ManagedRegister Arm64JniCallingConvention::ReturnScratchRegister() const {
+  return Arm64ManagedRegister::FromCoreRegister(X9);
+}
+
+size_t Arm64JniCallingConvention::FrameSize() {
+  // Method*, LR and callee save area size, local reference segment state
+  size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kPointerSize;
+  // References plus 2 words for SIRT header
+  size_t sirt_size = (ReferenceCount() + 2) * kPointerSize;
+  // Plus return value spill area size
+  return RoundUp(frame_data_size + sirt_size + SizeOfReturnValue(), kStackAlignment);
+}
+
+size_t Arm64JniCallingConvention::OutArgSize() {
+  return RoundUp(NumberOfOutgoingStackArgs() * kPointerSize + padding_,
+                 kStackAlignment);
+}
+
+// JniCallingConvention ABI follows AAPCS where longs and doubles must occur
+// in even register numbers and stack slots
+void Arm64JniCallingConvention::Next() {
+  JniCallingConvention::Next();
+  size_t arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
+  if ((itr_args_ >= 2) &&
+      (arg_pos < NumArgs()) &&
+      IsParamALongOrDouble(arg_pos)) {
+    // itr_slots_ needs to be an even number, according to AAPCS.
+    if ((itr_slots_ & 0x1u) != 0) {
+      itr_slots_++;
+    }
+  }
+}
+
+bool Arm64JniCallingConvention::IsCurrentParamInRegister() {
+  return itr_slots_ < 4;
+}
+
+bool Arm64JniCallingConvention::IsCurrentParamOnStack() {
+  return !IsCurrentParamInRegister();
+}
+
+// TODO and floating point?
+
+static const Register kJniArgumentRegisters[] = {
+  X0, X1, X2, X3, X4, X5, X6, X7
+};
+ManagedRegister Arm64JniCallingConvention::CurrentParamRegister() {
+  CHECK_LT(itr_slots_, 4u);
+  int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
+  // TODO Floating point & 64bit registers.
+  if ((itr_args_ >= 2) && IsParamALongOrDouble(arg_pos)) {
+    CHECK_EQ(itr_slots_, 2u);
+    return Arm64ManagedRegister::FromCoreRegister(X1);
+  } else {
+    return
+      Arm64ManagedRegister::FromCoreRegister(kJniArgumentRegisters[itr_slots_]);
+  }
+}
+
+FrameOffset Arm64JniCallingConvention::CurrentParamStackOffset() {
+  CHECK_GE(itr_slots_, 4u);
+  size_t offset = displacement_.Int32Value() - OutArgSize() + ((itr_slots_ - 4) * kPointerSize);
+  CHECK_LT(offset, OutArgSize());
+  return FrameOffset(offset);
+}
+
+size_t Arm64JniCallingConvention::NumberOfOutgoingStackArgs() {
+  size_t static_args = IsStatic() ? 1 : 0;  // count jclass
+  // regular argument parameters and this
+  size_t param_args = NumArgs() + NumLongOrDoubleArgs();
+  // count JNIEnv* less arguments in registers
+  return static_args + param_args + 1 - 4;
+}
+
+}  // namespace arm64
+}  // namespace art
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
new file mode 100644
index 0000000..b4d0502
--- /dev/null
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_JNI_QUICK_ARM64_CALLING_CONVENTION_ARM64_H_
+#define ART_COMPILER_JNI_QUICK_ARM64_CALLING_CONVENTION_ARM64_H_
+
+#include "jni/quick/calling_convention.h"
+
+namespace art {
+namespace arm64 {
+
+class Arm64ManagedRuntimeCallingConvention : public ManagedRuntimeCallingConvention {
+ public:
+  Arm64ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
+      : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty) {}
+  virtual ~Arm64ManagedRuntimeCallingConvention() {}
+  // Calling convention
+  virtual ManagedRegister ReturnRegister();
+  virtual ManagedRegister InterproceduralScratchRegister();
+  // Managed runtime calling convention
+  virtual ManagedRegister MethodRegister();
+  virtual bool IsCurrentParamInRegister();
+  virtual bool IsCurrentParamOnStack();
+  virtual ManagedRegister CurrentParamRegister();
+  virtual FrameOffset CurrentParamStackOffset();
+  virtual const std::vector<ManagedRegister>& EntrySpills();
+
+ private:
+  std::vector<ManagedRegister> entry_spills_;
+
+  DISALLOW_COPY_AND_ASSIGN(Arm64ManagedRuntimeCallingConvention);
+};
+
+class Arm64JniCallingConvention : public JniCallingConvention {
+ public:
+  explicit Arm64JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty);
+  virtual ~Arm64JniCallingConvention() {}
+  // Calling convention
+  virtual ManagedRegister ReturnRegister();
+  virtual ManagedRegister IntReturnRegister();
+  virtual ManagedRegister InterproceduralScratchRegister();
+  // JNI calling convention
+  virtual void Next();  // Override default behavior for AAPCS
+  virtual size_t FrameSize();
+  virtual size_t OutArgSize();
+  virtual const std::vector<ManagedRegister>& CalleeSaveRegisters() const {
+    return callee_save_regs_;
+  }
+  virtual ManagedRegister ReturnScratchRegister() const;
+  virtual uint32_t CoreSpillMask() const;
+  virtual uint32_t FpSpillMask() const {
+    return 0;  // Floats aren't spilled in JNI down call
+  }
+  virtual bool IsCurrentParamInRegister();
+  virtual bool IsCurrentParamOnStack();
+  virtual ManagedRegister CurrentParamRegister();
+  virtual FrameOffset CurrentParamStackOffset();
+
+ protected:
+  virtual size_t NumberOfOutgoingStackArgs();
+
+ private:
+  // TODO: these values aren't unique and can be shared amongst instances
+  std::vector<ManagedRegister> callee_save_regs_;
+
+  // Padding to ensure longs and doubles are not split in AAPCS
+  size_t padding_;
+
+  DISALLOW_COPY_AND_ASSIGN(Arm64JniCallingConvention);
+};
+
+}  // namespace arm64
+}  // namespace art
+
+#endif  // ART_COMPILER_JNI_QUICK_ARM64_CALLING_CONVENTION_ARM64_H_
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index ac962af..5856df4 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -18,6 +18,7 @@
 
 #include "base/logging.h"
 #include "jni/quick/arm/calling_convention_arm.h"
+#include "jni/quick/arm64/calling_convention_arm64.h"
 #include "jni/quick/mips/calling_convention_mips.h"
 #include "jni/quick/x86/calling_convention_x86.h"
 #include "utils.h"
@@ -37,6 +38,8 @@
     case kArm:
     case kThumb2:
       return new arm::ArmManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+    case kArm64:
+      return new arm64::Arm64ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
     case kMips:
       return new mips::MipsManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
     case kX86:
@@ -91,6 +94,8 @@
     case kArm:
     case kThumb2:
       return new arm::ArmJniCallingConvention(is_static, is_synchronized, shorty);
+    case kArm64:
+      return new arm64::Arm64JniCallingConvention(is_static, is_synchronized, shorty);
     case kMips:
       return new mips::MipsJniCallingConvention(is_static, is_synchronized, shorty);
     case kX86:
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index b2a69d8..bb6ac84 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -18,7 +18,11 @@
 
 #include "code_generator_arm.h"
 #include "code_generator_x86.h"
+#include "dex/verified_method.h"
+#include "driver/dex_compilation_unit.h"
+#include "gc_map_builder.h"
 #include "utils/assembler.h"
+#include "verifier/dex_gc_map.h"
 
 namespace art {
 
@@ -110,4 +114,14 @@
   }
 }
 
+void CodeGenerator::BuildNativeGCMap(
+    std::vector<uint8_t>* data, const DexCompilationUnit& dex_compilation_unit) const {
+  const std::vector<uint8_t>& gc_map_raw =
+      dex_compilation_unit.GetVerifiedMethod()->GetDexGcMap();
+  verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
+
+  GcMapBuilder builder(data, 0, 0, dex_gc_map.RegWidth());
+}
+
+
 }  // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index e95bb21..63f8cbf 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -25,6 +25,8 @@
 
 namespace art {
 
+class DexCompilationUnit;
+
 class CodeAllocator {
  public:
   CodeAllocator() { }
@@ -135,7 +137,8 @@
 
   void BuildMappingTable(std::vector<uint8_t>* vector) const { }
   void BuildVMapTable(std::vector<uint8_t>* vector) const { }
-  void BuildNativeGCMap(std::vector<uint8_t>* vector) const { }
+  void BuildNativeGCMap(
+      std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
 
  protected:
   explicit CodeGenerator(HGraph* graph)
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index cc36bbe..334b185 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -20,6 +20,7 @@
 #include "code_generator.h"
 #include "compilers.h"
 #include "driver/compiler_driver.h"
+#include "driver/dex_compilation_unit.h"
 #include "nodes.h"
 #include "utils/arena_allocator.h"
 
@@ -34,12 +35,12 @@
 
   virtual uint8_t* Allocate(size_t size) {
     size_ = size;
-    memory_.reserve(size);
+    memory_.resize(size);
     return &memory_[0];
   }
 
   size_t GetSize() const { return size_; }
-  std::vector<uint8_t>* GetMemory() { return &memory_; }
+  const std::vector<uint8_t>& GetMemory() const { return memory_; }
 
  private:
   std::vector<uint8_t> memory_;
@@ -57,6 +58,10 @@
                                                uint32_t method_idx,
                                                jobject class_loader,
                                                const DexFile& dex_file) const {
+  DexCompilationUnit dex_compilation_unit(
+    nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item,
+    class_def_idx, method_idx, access_flags, driver.GetVerifiedMethod(&dex_file, method_idx));
+
   ArenaPool pool;
   ArenaAllocator arena(&pool);
   HGraphBuilder builder(&arena);
@@ -79,11 +84,11 @@
   std::vector<uint8_t> vmap_table;
   codegen->BuildVMapTable(&vmap_table);
   std::vector<uint8_t> gc_map;
-  codegen->BuildNativeGCMap(&gc_map);
+  codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
 
   return new CompiledMethod(driver,
                             instruction_set,
-                            *allocator.GetMemory(),
+                            allocator.GetMemory(),
                             codegen->GetFrameSize(),
                             0, /* GPR spill mask, unused */
                             0, /* FPR spill mask, unused */
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index 3e13e44..4dffef9 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -18,6 +18,7 @@
 
 #include "jni_internal.h"
 #include "utils/arm/assembler_arm.h"
+#include "utils/arm64/assembler_arm64.h"
 #include "utils/mips/assembler_mips.h"
 #include "utils/x86/assembler_x86.h"
 
@@ -53,6 +54,46 @@
 }
 }  // namespace arm
 
+namespace arm64 {
+static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
+                                                    ThreadOffset offset) {
+  UniquePtr<Arm64Assembler> assembler(static_cast<Arm64Assembler*>(Assembler::Create(kArm64)));
+
+  switch (abi) {
+    case kInterpreterAbi:  // Thread* is first argument (X0) in interpreter ABI.
+      // FIXME IPx used by VIXL - this is unsafe.
+      __ Call(Arm64ManagedRegister::FromCoreRegister(X0), Offset(offset.Int32Value()),
+          Arm64ManagedRegister::FromCoreRegister(IP1));
+
+      break;
+    case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (X0).
+
+      __ LoadRawPtr(Arm64ManagedRegister::FromCoreRegister(IP1),
+                      Arm64ManagedRegister::FromCoreRegister(X0),
+                      Offset(JNIEnvExt::SelfOffset().Int32Value()));
+
+      // FIXME IPx used by VIXL - this is unsafe.
+      __ Call(Arm64ManagedRegister::FromCoreRegister(IP1), Offset(offset.Int32Value()),
+                Arm64ManagedRegister::FromCoreRegister(IP0));
+
+      break;
+    case kPortableAbi:  // X18 holds Thread*.
+    case kQuickAbi:  // Fall-through.
+      __ Call(Arm64ManagedRegister::FromCoreRegister(TR), Offset(offset.Int32Value()),
+                Arm64ManagedRegister::FromCoreRegister(IP0));
+
+      break;
+  }
+
+  size_t cs = assembler->CodeSize();
+  UniquePtr<std::vector<uint8_t> > entry_stub(new std::vector<uint8_t>(cs));
+  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  assembler->FinalizeInstructions(code);
+
+  return entry_stub.release();
+}
+}  // namespace arm64
+
 namespace mips {
 static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
                                                     ThreadOffset offset) {
@@ -123,6 +164,8 @@
     case kArm:
     case kThumb2:
       return arm::CreateTrampoline(abi, offset);
+    case kArm64:
+      return arm64::CreateTrampoline(abi, offset);
     case kMips:
       return mips::CreateTrampoline(abi, offset);
     case kX86:
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 70df252..2bada3f 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_UTILS_ARM64_ASSEMBLER_ARM64_H_
 
 #include <vector>
+#include <stdint.h>
 
 #include "base/logging.h"
 #include "constants_arm64.h"
diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h
index 5df37cc..80f17f5 100644
--- a/compiler/utils/arm64/managed_register_arm64.h
+++ b/compiler/utils/arm64/managed_register_arm64.h
@@ -24,7 +24,7 @@
 namespace art {
 namespace arm64 {
 
-const int kNumberOfCoreRegIds = kNumberOfCoreRegisters;
+const int kNumberOfCoreRegIds = 32;
 const int kNumberOfWRegIds = kNumberOfWRegisters;
 const int kNumberOfDRegIds = kNumberOfDRegisters;
 const int kNumberOfSRegIds = kNumberOfSRegisters;
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index f02c20f..cd4fc12 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -38,6 +38,9 @@
 namespace arm {
   class ArmAssembler;
 }
+namespace arm64 {
+  class Arm64Assembler;
+}
 namespace mips {
   class MipsAssembler;
 }
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 908d995..72effde 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -837,6 +837,8 @@
       StringPiece instruction_set_str = option.substr(strlen("--instruction-set=")).data();
       if (instruction_set_str == "arm") {
         instruction_set = kThumb2;
+      } else if (instruction_set_str == "arm64") {
+        instruction_set = kArm64;
       } else if (instruction_set_str == "mips") {
         instruction_set = kMips;
       } else if (instruction_set_str == "x86") {
@@ -1020,8 +1022,8 @@
   }
 
   if (compiler_filter_string == NULL) {
-    if (instruction_set == kX86_64) {
-      // TODO: currently x86-64 is only interpreted.
+    if (instruction_set == kX86_64 || instruction_set == kArm64) {
+      // TODO: currently x86-64 and arm64 are only interpreted.
       compiler_filter_string = "interpret-only";
     } else if (image) {
       compiler_filter_string = "speed";
diff --git a/runtime/Android.mk b/runtime/Android.mk
index cca7d03..1ca8e07 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -212,6 +212,16 @@
 	arch/arm/thread_arm.cc \
 	arch/arm/fault_handler_arm.cc
 
+LIBART_TARGET_SRC_FILES_arm64 := \
+	arch/arm64/context_arm64.cc \
+	arch/arm64/entrypoints_init_arm64.cc \
+	arch/arm64/jni_entrypoints_arm64.S \
+	arch/arm64/portable_entrypoints_arm64.S \
+	arch/arm64/quick_entrypoints_arm64.S \
+	arch/arm64/thread_arm64.cc \
+	monitor_pool.cc \
+	arch/arm64/fault_handler_arm64.cc
+
 LIBART_TARGET_SRC_FILES_x86 := \
 	arch/x86/context_x86.cc \
 	arch/x86/entrypoints_init_x86.cc \
@@ -241,13 +251,9 @@
 	arch/mips/thread_mips.cc \
 	arch/mips/fault_handler_mips.cc
 
-ifeq ($(TARGET_ARCH),arm64)
-$(info TODOArm64: $(LOCAL_PATH)/Android.mk Add Arm64 specific runtime files)
-else
 ifeq ($(TARGET_ARCH),mips64)
 $(info TODOMips64: $(LOCAL_PATH)/Android.mk Add mips64 specific runtime files)
 endif # TARGET_ARCH != mips64
-endif # TARGET_ARCH != arm64
 
 ifeq (,$(filter $(TARGET_ARCH),$(ART_SUPPORTED_ARCH)))
 $(warning unsupported TARGET_ARCH=$(TARGET_ARCH))
diff --git a/runtime/arch/arm64/asm_support_arm64.S b/runtime/arch/arm64/asm_support_arm64.S
new file mode 100644
index 0000000..634f777
--- /dev/null
+++ b/runtime/arch/arm64/asm_support_arm64.S
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_S_
+#define ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_S_
+
+#include "asm_support_arm64.h"
+
+.cfi_sections   .debug_frame
+
+.macro ENTRY name
+    .type \name, #function
+    .global \name
+    /* Cache alignment for function entry */
+    .balign 16
+\name:
+    .cfi_startproc
+.endm
+
+.macro END name
+    .cfi_endproc
+    .size \name, .-\name
+.endm
+
+.macro UNIMPLEMENTED name
+    ENTRY \name
+    brk 0
+    END \name
+.endm
+
+#endif  // ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_S_
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
new file mode 100644
index 0000000..44c3e60
--- /dev/null
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_H_
+#define ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_H_
+
+#include "asm_support.h"
+
+// TODO Thread offsets need to be checked when on Aarch64.
+
+// Offset of field Runtime::callee_save_methods_[kSaveAll]
+#define RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET 320
+// Offset of field Runtime::callee_save_methods_[kRefsOnly]
+#define RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET 328
+// Offset of field Runtime::callee_save_methods_[kRefsAndArgs]
+#define RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET 336
+
+// Register holding Thread::Current().
+#define xSELF x18
+// Frame Pointer
+#define xFP   x29
+// Link Register
+#define xLR   x30
+// Define the intraprocedural linkage temporary registers.
+#define xIP0 x16
+#define xIP1 x17
+// Offset of field Thread::suspend_count_ verified in InitCpu
+#define THREAD_FLAGS_OFFSET 0
+// Offset of field Thread::card_table_ verified in InitCpu
+#define THREAD_CARD_TABLE_OFFSET 8
+// Offset of field Thread::exception_ verified in InitCpu
+#define THREAD_EXCEPTION_OFFSET 16
+// Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
+#define THREAD_ID_OFFSET 112
+
+#endif  // ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_H_
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
new file mode 100644
index 0000000..3d63c36
--- /dev/null
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdint.h>
+
+#include "context_arm64.h"
+
+#include "mirror/art_method.h"
+#include "mirror/object-inl.h"
+#include "stack.h"
+#include "thread.h"
+
+
+namespace art {
+namespace arm64 {
+
+static const uint64_t gZero = 0;
+
+void Arm64Context::Reset() {
+  for (size_t i = 0; i < kNumberOfCoreRegisters; i++) {
+    gprs_[i] = NULL;
+  }
+  for (size_t i = 0; i < kNumberOfDRegisters; i++) {
+    fprs_[i] = NULL;
+  }
+  gprs_[SP] = &sp_;
+  gprs_[LR] = &pc_;
+  // Initialize registers with easy to spot debug values.
+  sp_ = Arm64Context::kBadGprBase + SP;
+  pc_ = Arm64Context::kBadGprBase + LR;
+}
+
+void Arm64Context::FillCalleeSaves(const StackVisitor& fr) {
+  mirror::ArtMethod* method = fr.GetMethod();
+  uint32_t core_spills = method->GetCoreSpillMask();
+  uint32_t fp_core_spills = method->GetFpSpillMask();
+  size_t spill_count = __builtin_popcount(core_spills);
+  size_t fp_spill_count = __builtin_popcount(fp_core_spills);
+  size_t frame_size = method->GetFrameSizeInBytes();
+
+  if (spill_count > 0) {
+    // Lowest number spill is farthest away, walk registers and fill into context.
+    int j = 1;
+    for (size_t i = 0; i < kNumberOfCoreRegisters; i++) {
+      if (((core_spills >> i) & 1) != 0) {
+        gprs_[i] = fr.CalleeSaveAddress(spill_count  - j, frame_size);
+        j++;
+      }
+    }
+  }
+
+  if (fp_spill_count > 0) {
+    // Lowest number spill is farthest away, walk registers and fill into context.
+    int j = 1;
+    for (size_t i = 0; i < kNumberOfDRegisters; i++) {
+      if (((fp_core_spills >> i) & 1) != 0) {
+        fprs_[i] = fr.CalleeSaveAddress(spill_count + fp_spill_count - j, frame_size);
+        j++;
+      }
+    }
+  }
+}
+
+void Arm64Context::SetGPR(uint32_t reg, uintptr_t value) {
+  DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
+  DCHECK_NE(gprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
+  DCHECK(gprs_[reg] != NULL);
+  *gprs_[reg] = value;
+}
+
+void Arm64Context::SmashCallerSaves() {
+  // This needs to be 0 because we want a null/zero return value.
+  gprs_[X0] = const_cast<uint64_t*>(&gZero);
+  gprs_[X1] = NULL;
+  gprs_[X2] = NULL;
+  gprs_[X3] = NULL;
+  gprs_[X4] = NULL;
+  gprs_[X5] = NULL;
+  gprs_[X6] = NULL;
+  gprs_[X7] = NULL;
+  gprs_[X8] = NULL;
+  gprs_[X9] = NULL;
+  gprs_[X10] = NULL;
+  gprs_[X11] = NULL;
+  gprs_[X12] = NULL;
+  gprs_[X13] = NULL;
+  gprs_[X14] = NULL;
+  gprs_[X15] = NULL;
+
+  fprs_[D8] = NULL;
+  fprs_[D9] = NULL;
+  fprs_[D10] = NULL;
+  fprs_[D11] = NULL;
+  fprs_[D12] = NULL;
+  fprs_[D13] = NULL;
+  fprs_[D14] = NULL;
+  fprs_[D15] = NULL;
+}
+
+extern "C" void art_quick_do_long_jump(uint64_t*, uint64_t*);
+
+void Arm64Context::DoLongJump() {
+  uint64_t gprs[32];
+  uint64_t fprs[32];
+
+  for (size_t i = 0; i < kNumberOfCoreRegisters; ++i) {
+    gprs[i] = gprs_[i] != NULL ? *gprs_[i] : Arm64Context::kBadGprBase + i;
+  }
+  for (size_t i = 0; i < kNumberOfDRegisters; ++i) {
+    fprs[i] = fprs_[i] != NULL ? *fprs_[i] : Arm64Context::kBadGprBase + i;
+  }
+  DCHECK_EQ(reinterpret_cast<uintptr_t>(Thread::Current()), gprs[TR]);
+  art_quick_do_long_jump(gprs, fprs);
+}
+
+}  // namespace arm64
+}  // namespace art
diff --git a/runtime/arch/arm64/context_arm64.h b/runtime/arch/arm64/context_arm64.h
new file mode 100644
index 0000000..d40e291
--- /dev/null
+++ b/runtime/arch/arm64/context_arm64.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_ARM64_CONTEXT_ARM64_H_
+#define ART_RUNTIME_ARCH_ARM64_CONTEXT_ARM64_H_
+
+#include "arch/context.h"
+#include "base/logging.h"
+#include "registers_arm64.h"
+
+namespace art {
+namespace arm64 {
+
+class Arm64Context : public Context {
+ public:
+  Arm64Context() {
+    Reset();
+  }
+
+  ~Arm64Context() {}
+
+  void Reset();
+
+  void FillCalleeSaves(const StackVisitor& fr);
+
+  void SetSP(uintptr_t new_sp) {
+    SetGPR(SP, new_sp);
+  }
+
+  void SetPC(uintptr_t new_lr) {
+    SetGPR(LR, new_lr);
+  }
+
+  virtual uintptr_t* GetGPRAddress(uint32_t reg) {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
+    return gprs_[reg];
+  }
+
+  uintptr_t GetGPR(uint32_t reg) {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
+    return *gprs_[reg];
+  }
+
+  void SetGPR(uint32_t reg, uintptr_t value);
+  void SmashCallerSaves();
+  void DoLongJump();
+
+ private:
+  // Pointers to register locations, initialized to NULL or the specific registers below.
+  uintptr_t* gprs_[kNumberOfCoreRegisters];
+  uint64_t * fprs_[kNumberOfDRegisters];
+  // Hold values for sp and pc if they are not located within a stack frame.
+  uintptr_t sp_, pc_;
+};
+
+}  // namespace arm64
+}  // namespace art
+
+#endif  // ART_RUNTIME_ARCH_ARM64_CONTEXT_ARM64_H_
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
new file mode 100644
index 0000000..2a5c7d1
--- /dev/null
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "entrypoints/interpreter/interpreter_entrypoints.h"
+#include "entrypoints/portable/portable_entrypoints.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/math_entrypoints.h"
+
+namespace art {
+
+// Interpreter entrypoints.
+extern "C" void artInterpreterToInterpreterBridge(Thread* self, MethodHelper& mh,
+                                                 const DexFile::CodeItem* code_item,
+                                                 ShadowFrame* shadow_frame, JValue* result);
+extern "C" void artInterpreterToCompiledCodeBridge(Thread* self, MethodHelper& mh,
+                                           const DexFile::CodeItem* code_item,
+                                           ShadowFrame* shadow_frame, JValue* result);
+
+// Portable entrypoints.
+extern "C" void art_portable_resolution_trampoline(mirror::ArtMethod*);
+extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*);
+
+// Cast entrypoints.
+extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
+                                            const mirror::Class* ref_class);
+extern "C" void art_quick_check_cast(void*, void*);
+
+// DexCache entrypoints.
+extern "C" void* art_quick_initialize_static_storage(uint32_t, void*);
+extern "C" void* art_quick_initialize_type(uint32_t, void*);
+extern "C" void* art_quick_initialize_type_and_verify_access(uint32_t, void*);
+extern "C" void* art_quick_resolve_string(void*, uint32_t);
+
+// Exception entrypoints.
+extern "C" void* GetAndClearException(Thread*);
+
+// Field entrypoints.
+extern "C" int art_quick_set32_instance(uint32_t, void*, int32_t);
+extern "C" int art_quick_set32_static(uint32_t, int32_t);
+extern "C" int art_quick_set64_instance(uint32_t, void*, int64_t);
+extern "C" int art_quick_set64_static(uint32_t, int64_t);
+extern "C" int art_quick_set_obj_instance(uint32_t, void*, void*);
+extern "C" int art_quick_set_obj_static(uint32_t, void*);
+extern "C" int32_t art_quick_get32_instance(uint32_t, void*);
+extern "C" int32_t art_quick_get32_static(uint32_t);
+extern "C" int64_t art_quick_get64_instance(uint32_t, void*);
+extern "C" int64_t art_quick_get64_static(uint32_t);
+extern "C" void* art_quick_get_obj_instance(uint32_t, void*);
+extern "C" void* art_quick_get_obj_static(uint32_t);
+
+// Array entrypoints.
+extern "C" void art_quick_aput_obj_with_null_and_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj_with_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj(void*, uint32_t, void*);
+extern "C" void art_quick_handle_fill_data(void*, void*);
+
+// Lock entrypoints.
+extern "C" void art_quick_lock_object(void*);
+extern "C" void art_quick_unlock_object(void*);
+
+// Math entrypoints.
+extern int32_t CmpgDouble(double a, double b);
+extern int32_t CmplDouble(double a, double b);
+extern int32_t CmpgFloat(float a, float b);
+extern int32_t CmplFloat(float a, float b);
+
+// Single-precision FP arithmetics.
+extern "C" float fmodf(float a, float b);          // REM_FLOAT[_2ADDR]
+
+// Double-precision FP arithmetics.
+extern "C" double fmod(double a, double b);         // REM_DOUBLE[_2ADDR]
+
+// Long long arithmetics - REM_LONG[_2ADDR] and DIV_LONG[_2ADDR]
+extern "C" int64_t art_quick_mul_long(int64_t, int64_t);
+extern "C" uint64_t art_quick_shl_long(uint64_t, uint32_t);
+extern "C" uint64_t art_quick_shr_long(uint64_t, uint32_t);
+extern "C" uint64_t art_quick_ushr_long(uint64_t, uint32_t);
+
+// Intrinsic entrypoints.
+extern "C" int32_t __memcmp16(void*, void*, int32_t);
+extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
+extern "C" int32_t art_quick_string_compareto(void*, void*);
+
+// Invoke entrypoints.
+extern "C" void art_quick_imt_conflict_trampoline(mirror::ArtMethod*);
+extern "C" void art_quick_resolution_trampoline(mirror::ArtMethod*);
+extern "C" void art_quick_to_interpreter_bridge(mirror::ArtMethod*);
+extern "C" void art_quick_invoke_direct_trampoline_with_access_check(uint32_t, void*);
+extern "C" void art_quick_invoke_interface_trampoline_with_access_check(uint32_t, void*);
+extern "C" void art_quick_invoke_static_trampoline_with_access_check(uint32_t, void*);
+extern "C" void art_quick_invoke_super_trampoline_with_access_check(uint32_t, void*);
+extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*);
+
+// Thread entrypoints.
+extern void CheckSuspendFromCode(Thread* thread);
+extern "C" void art_quick_test_suspend();
+
+// Throw entrypoints.
+extern "C" void art_quick_deliver_exception(void*);
+extern "C" void art_quick_throw_array_bounds(int32_t index, int32_t limit);
+extern "C" void art_quick_throw_div_zero();
+extern "C" void art_quick_throw_no_such_method(int32_t method_idx);
+extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_stack_overflow(void*);
+
+extern void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
+
+// Generic JNI downcall
+extern "C" void art_quick_generic_jni_trampoline(mirror::ArtMethod*);
+
+void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
+                     PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
+  // Interpreter
+  ipoints->pInterpreterToInterpreterBridge = artInterpreterToInterpreterBridge;
+  ipoints->pInterpreterToCompiledCodeBridge = artInterpreterToCompiledCodeBridge;
+
+  // JNI
+  jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
+
+  // Portable
+  ppoints->pPortableResolutionTrampoline = art_portable_resolution_trampoline;
+  ppoints->pPortableToInterpreterBridge = art_portable_to_interpreter_bridge;
+
+  // Alloc
+  ResetQuickAllocEntryPoints(qpoints);
+
+  // Cast
+  qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
+  qpoints->pCheckCast = art_quick_check_cast;
+
+  // DexCache
+  qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
+  qpoints->pInitializeTypeAndVerifyAccess = art_quick_initialize_type_and_verify_access;
+  qpoints->pInitializeType = art_quick_initialize_type;
+  qpoints->pResolveString = art_quick_resolve_string;
+
+  // Field
+  qpoints->pSet32Instance = art_quick_set32_instance;
+  qpoints->pSet32Static = art_quick_set32_static;
+  qpoints->pSet64Instance = art_quick_set64_instance;
+  qpoints->pSet64Static = art_quick_set64_static;
+  qpoints->pSetObjInstance = art_quick_set_obj_instance;
+  qpoints->pSetObjStatic = art_quick_set_obj_static;
+  qpoints->pGet32Instance = art_quick_get32_instance;
+  qpoints->pGet64Instance = art_quick_get64_instance;
+  qpoints->pGetObjInstance = art_quick_get_obj_instance;
+  qpoints->pGet32Static = art_quick_get32_static;
+  qpoints->pGet64Static = art_quick_get64_static;
+  qpoints->pGetObjStatic = art_quick_get_obj_static;
+
+  // Array
+  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
+  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
+  qpoints->pAputObject = art_quick_aput_obj;
+  qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
+
+  // JNI
+  qpoints->pJniMethodStart = JniMethodStart;
+  qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
+  qpoints->pJniMethodEnd = JniMethodEnd;
+  qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
+  qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
+  qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
+  qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
+
+  // Locks
+  qpoints->pLockObject = art_quick_lock_object;
+  qpoints->pUnlockObject = art_quick_unlock_object;
+
+  // Math
+  // TODO NULL entrypoints not needed for ARM64 - generate inline.
+  qpoints->pCmpgDouble = CmpgDouble;
+  qpoints->pCmpgFloat = CmpgFloat;
+  qpoints->pCmplDouble = CmplDouble;
+  qpoints->pCmplFloat = CmplFloat;
+  qpoints->pFmod = fmod;
+  qpoints->pSqrt = sqrt;
+  qpoints->pL2d = NULL;
+  qpoints->pFmodf = fmodf;
+  qpoints->pL2f = NULL;
+  qpoints->pD2iz = NULL;
+  qpoints->pF2iz = NULL;
+  qpoints->pIdivmod = NULL;
+  qpoints->pD2l = NULL;
+  qpoints->pF2l = NULL;
+  qpoints->pLdiv = NULL;
+  qpoints->pLmod = NULL;
+  qpoints->pLmul = art_quick_mul_long;
+  qpoints->pShlLong = art_quick_shl_long;
+  qpoints->pShrLong = art_quick_shr_long;
+  qpoints->pUshrLong = art_quick_ushr_long;
+
+  // Intrinsics
+  qpoints->pIndexOf = art_quick_indexof;
+  qpoints->pMemcmp16 = __memcmp16;
+  qpoints->pStringCompareTo = art_quick_string_compareto;
+  qpoints->pMemcpy = memcpy;
+
+  // Invocation
+  qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
+  qpoints->pQuickResolutionTrampoline = art_quick_resolution_trampoline;
+  qpoints->pQuickToInterpreterBridge = art_quick_to_interpreter_bridge;
+  qpoints->pInvokeDirectTrampolineWithAccessCheck = art_quick_invoke_direct_trampoline_with_access_check;
+  qpoints->pInvokeInterfaceTrampolineWithAccessCheck = art_quick_invoke_interface_trampoline_with_access_check;
+  qpoints->pInvokeStaticTrampolineWithAccessCheck = art_quick_invoke_static_trampoline_with_access_check;
+  qpoints->pInvokeSuperTrampolineWithAccessCheck = art_quick_invoke_super_trampoline_with_access_check;
+  qpoints->pInvokeVirtualTrampolineWithAccessCheck = art_quick_invoke_virtual_trampoline_with_access_check;
+
+  // Thread
+  qpoints->pCheckSuspend = CheckSuspendFromCode;
+  qpoints->pTestSuspend = art_quick_test_suspend;
+
+  // Throws
+  qpoints->pDeliverException = art_quick_deliver_exception;
+  qpoints->pThrowArrayBounds = art_quick_throw_array_bounds;
+  qpoints->pThrowDivZero = art_quick_throw_div_zero;
+  qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
+  qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
+  qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
+};
+
+}  // namespace art
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
new file mode 100644
index 0000000..419e5af
--- /dev/null
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "fault_handler.h"
+#include <sys/ucontext.h>
+#include "base/macros.h"
+#include "globals.h"
+#include "base/logging.h"
+#include "base/hex_dump.h"
+
+
+//
+// ARM64 specific fault handler functions.
+//
+
+namespace art {
+
+void FaultManager::GetMethodAndReturnPC(void* context, uintptr_t& method, uintptr_t& return_pc) {
+}
+
+bool NullPointerHandler::Action(int sig, siginfo_t* info, void* context) {
+  return false;
+}
+
+bool SuspensionHandler::Action(int sig, siginfo_t* info, void* context) {
+  return false;
+}
+
+bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
+  return false;
+}
+}       // namespace art
diff --git a/runtime/arch/arm64/jni_entrypoints_arm64.S b/runtime/arch/arm64/jni_entrypoints_arm64.S
new file mode 100644
index 0000000..d2ed692
--- /dev/null
+++ b/runtime/arch/arm64/jni_entrypoints_arm64.S
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "asm_support_arm64.S"
+
+    /*
+     * Jni dlsym lookup stub.
+     */
+    .extern artFindNativeMethod
+UNIMPLEMENTED art_jni_dlsym_lookup_stub
+
+    /*
+     * Entry point of native methods when JNI bug compatibility is enabled.
+     */
+    .extern artWorkAroundAppJniBugs
+UNIMPLEMENTED art_work_around_app_jni_bugs
+
diff --git a/runtime/arch/arm64/portable_entrypoints_arm64.S b/runtime/arch/arm64/portable_entrypoints_arm64.S
new file mode 100644
index 0000000..e136885
--- /dev/null
+++ b/runtime/arch/arm64/portable_entrypoints_arm64.S
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "asm_support_arm64.S"
+
+    /*
+     * Portable invocation stub.
+     */
+UNIMPLEMENTED art_portable_invoke_stub
+
+UNIMPLEMENTED art_portable_proxy_invoke_handler
+
+UNIMPLEMENTED art_portable_resolution_trampoline
+
+UNIMPLEMENTED art_portable_to_interpreter_bridge
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
new file mode 100644
index 0000000..2d64e7f
--- /dev/null
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -0,0 +1,1094 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "asm_support_arm64.S"
+
+#include "arch/quick_alloc_entrypoints.S"
+
+
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     */
+.macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    adrp x9, :got:_ZN3art7Runtime9instance_E
+    ldr x9, [x9, #:got_lo12:_ZN3art7Runtime9instance_E]
+
+    // Our registers aren't intermixed - just spill in order.
+    ldr x9,[x9]  // x9 = & (art::Runtime * art::Runtime.instance_) .
+
+    // x9 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
+    ldr x9, [x9, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET ]
+
+    sub sp, sp, #368
+    .cfi_adjust_cfa_offset 368
+
+    // FP args
+    stp d1, d2,   [sp, #8]
+    stp d2, d3, [sp, #24]
+    stp d4, d5, [sp, #40]
+    stp d6, d7, [sp, #56]
+
+    // FP callee-saves
+    stp d8, d9,   [sp, #72]
+    stp d10, d11, [sp, #88]
+    stp d12, d13, [sp, #104]
+    stp d14, d15, [sp, #120]
+
+    stp d16, d17,   [sp, #136]
+    stp d18, d19,   [sp, #152]
+    stp d20, d21,   [sp, #168]
+    stp d22, d23,   [sp, #184]
+    stp d24, d25,   [sp, #200]
+    stp d26, d27,   [sp, #216]
+    stp d28, d29,   [sp, #232]
+    stp d30, d31,   [sp, #248]
+
+
+    // Callee saved.
+    stp xSELF, x19, [sp, #264]
+    stp x20, x21, [sp, #280]
+    stp x22, x23, [sp, #296]
+    stp x24, x25, [sp, #312]
+    stp x26, x27, [sp, #328]
+    stp x28, xFP, [sp, #344]    // Save FP.
+    str xLR, [sp, #360]
+
+    .cfi_offset x18,72
+    .cfi_offset x19,80
+    .cfi_offset x20,88
+    .cfi_offset x21,96
+    .cfi_offset x22,104
+    .cfi_offset x23,112
+    .cfi_offset x24,120
+    .cfi_offset x25,128
+    .cfi_offset x26,136
+    .cfi_offset x27,144
+    .cfi_offset x28,152
+    .cfi_offset x29,160
+    .cfi_offset x30,168
+
+    // Loads appropriate callee-save-method
+    str x9, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs]
+
+.endm
+
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kRefsOnly).
+     */
+.macro SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+    brk 0
+.endm
+
+.macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    brk 0
+.endm
+
+.macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+    brk 0
+.endm
+
+
+.macro SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+    sub sp, sp, #304
+    .cfi_adjust_cfa_offset 304
+
+    stp d0, d1,   [sp, #16]
+    stp d2, d3,   [sp, #32]
+    stp d4, d5,   [sp, #48]
+    stp d6, d7,   [sp, #64]
+    stp d8, d9,   [sp, #80]
+    stp d10, d11, [sp, #96]
+    stp d12, d13, [sp, #112]
+    stp d14, d15, [sp, #128]
+
+    stp x1,  x2, [sp, #144]
+    stp x3,  x4, [sp, #160]
+    stp x5,  x6, [sp, #176]
+    stp x7,  xSELF, [sp, #192]
+    stp x19, x20, [sp, #208]
+    stp x21, x22, [sp, #224]
+    stp x23, x24, [sp, #240]
+    stp x25, x26, [sp, #256]
+    stp x27, x28, [sp, #272]
+    stp xFP, xLR, [sp, #288]
+
+    .cfi_offset x1,144
+    .cfi_offset x2,152
+    .cfi_offset x3,160
+    .cfi_offset x4,168
+    .cfi_offset x5,176
+    .cfi_offset x6,184
+    .cfi_offset x7,192
+    .cfi_offset x18,200
+    .cfi_offset x19,208
+    .cfi_offset x20,216
+    .cfi_offset x21,224
+    .cfi_offset x22,232
+    .cfi_offset x23,240
+    .cfi_offset x24,248
+    .cfi_offset x25,256
+    .cfi_offset x26,264
+    .cfi_offset x27,272
+    .cfi_offset x28,280
+    .cfi_offset x29,288
+    .cfi_offset x30,296
+.endm
+
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs).
+     *
+     * TODO This is probably too conservative - saving FP & LR.
+     */
+.macro SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    adrp x9, :got:_ZN3art7Runtime9instance_E
+    ldr x9, [x9, #:got_lo12:_ZN3art7Runtime9instance_E]
+
+    // Our registers aren't intermixed - just spill in order.
+    ldr x9,[x9]  // x9 = & (art::Runtime * art::Runtime.instance_) .
+
+    // x9 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
+    ldr x9, [x9, RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET ]
+
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+
+    str x9, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs]
+.endm
+
+.macro RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+
+    ldp d0, d1,   [sp, #16]
+    ldp d2, d3,   [sp, #32]
+    ldp d4, d5,   [sp, #48]
+    ldp d6, d7,   [sp, #64]
+    ldp d8, d9,   [sp, #80]
+    ldp d10, d11, [sp, #96]
+    ldp d12, d13, [sp, #112]
+    ldp d14, d15, [sp, #128]
+
+    // args.
+    ldp x1,  x2, [sp, #144]
+    ldp x3,  x4, [sp, #160]
+    ldp x5,  x6, [sp, #176]
+    ldp x7,  xSELF, [sp, #192]
+    ldp x19, x20, [sp, #208]
+    ldp x21, x22, [sp, #224]
+    ldp x23, x24, [sp, #240]
+    ldp x25, x26, [sp, #256]
+    ldp x27, x28, [sp, #272]
+    ldp xFP, xLR, [sp, #288]
+
+    add sp, sp, #304
+    .cfi_adjust_cfa_offset -304
+.endm
+
+.macro RETURN_IF_RESULT_IS_ZERO
+    brk 0
+.endm
+
+.macro RETURN_IF_RESULT_IS_NON_ZERO
+    brk 0
+.endm
+
+    /*
+     * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
+     * exception is Thread::Current()->exception_
+     */
+.macro DELIVER_PENDING_EXCEPTION
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    mov x0, xSELF
+    mov x1, sp
+
+    // Point of no return.
+    b artDeliverPendingExceptionFromCode  // artDeliverPendingExceptionFromCode(Thread*, SP)
+    brk 0  // Unreached
+.endm
+
+.macro RETURN_OR_DELIVER_PENDING_EXCEPTION
+    ldr x9, [xSELF, # THREAD_EXCEPTION_OFFSET]   // Get exception field.
+    cbnz x9, 1f
+    ret
+1:
+    DELIVER_PENDING_EXCEPTION
+.endm
+
+.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
+    .extern \cxx_name
+ENTRY \c_name
+    brk 0
+END \c_name
+.endm
+
+.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
+    .extern \cxx_name
+ENTRY \c_name
+    brk 0
+END \c_name
+.endm
+
+.macro TWO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
+    .extern \cxx_name
+ENTRY \c_name
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    brk 0
+END \c_name
+.endm
+
+    /*
+     * Called by managed code, saves callee saves and then calls artThrowException
+     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
+
+    /*
+     * Called by managed code to create and deliver a NullPointerException.
+     */
+NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
+
+    /*
+     * Called by managed code to create and deliver an ArithmeticException.
+     */
+NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
+
+    /*
+     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
+     * index, arg2 holds limit.
+     */
+TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
+
+    /*
+     * Called by managed code to create and deliver a StackOverflowError.
+     */
+NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
+
+    /*
+     * Called by managed code to create and deliver a NoSuchMethodError.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
+
+    /*
+     * TODO arm64 specifics need to be fleshed out.
+     * All generated callsites for interface invokes and invocation slow paths will load arguments
+     * as usual - except instead of loading x0 with the target Method*, x0 will contain
+     * the method_idx.  This wrapper will save x1-x3, load the caller's Method*, align the
+     * stack and call the appropriate C helper.
+     * NOTE: "this" is first visible argument of the target, and so can be found in x1.
+     *
+     * The helper will attempt to locate the target and return a result in x0 consisting
+     * of the target Method* in x0 and method->code_ in x1.
+     *
+     * If unsuccessful, the helper will return NULL/NULL. There will be a pending exception in the
+     * thread and we branch to another stub to deliver it.
+     *
+     * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
+     * pointing back to the original caller.
+     */
+.macro INVOKE_TRAMPOLINE c_name, cxx_name
+    .extern \cxx_name
+ENTRY \c_name
+    brk 0
+END \c_name
+.endm
+
+INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline, artInvokeInterfaceTrampoline
+INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
+
+INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
+INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
+INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
+INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
+
+/*
+ *  extern"C" void art_quick_invoke_stub(ArtMethod *method,   x0
+ *                                       uint32_t  *args,     x1
+ *                                       uint32_t argsize,    w2
+ *                                       Thread *self,        x3
+ *                                       JValue *result,      x4
+ *                                       char   *shorty);     x5
+ *  +----------------------+
+ *  |                      |
+ *  |  C/C++ frame         |
+ *  |       LR''           |
+ *  |       FP''           | <- SP'
+ *  +----------------------+
+ *  +----------------------+
+ *  |        SP'           |
+ *  |        X5            |
+ *  |        X4            |        Saved registers
+ *  |        LR'           |
+ *  |        FP'           | <- FP
+ *  +----------------------+
+ *  | uint32_t out[n-1]    |
+ *  |    :      :          |        Outs
+ *  | uint32_t out[0]      |
+ *  | ArtMethod* NULL      | <- SP
+ *  +----------------------+
+ *
+ * Outgoing registers:
+ *  x0    - Method*
+ *  x1-x7 - integer parameters.
+ *  d0-d7 - Floating point parameters.
+ *  xSELF = self
+ *  SP = & of ArtMethod*
+ *  x1 = "this" pointer.
+ *
+ */
+ENTRY art_quick_invoke_stub
+    // Spill registers as per AACPS64 calling convention.
+
+SAVE_SIZE=5*8   // x4, x5, LR & FP saved.
+SAVE_SIZE_AND_METHOD=SAVE_SIZE+8
+
+    mov x9, sp     // Save stack pointer.
+
+    mov x10, xFP   // Save frame pointer
+    .cfi_register x29,x10
+    add x11, x2, # SAVE_SIZE_AND_METHOD // calculate size of frame.
+
+    sub x11, sp, x11 // Calculate SP position - saves + ArtMethod* +  args
+
+    and x11, x11, # ~0xf  // Enforce 16 byte stack alignment.
+
+    sub xFP, x9, #SAVE_SIZE   // Calculate new FP. Don't store here until SP moved.
+    .cfi_def_cfa_register x29
+
+    mov sp, x11        // set new SP.
+
+    str x9, [xFP, #32]     // Save old stack pointer.
+
+    .cfi_offset x9, 32
+
+    stp x4, x5, [xFP, #16]  // Save result and shorty addresses.
+
+    .cfi_offset x4, 16
+    .cfi_offset x5, 24
+
+    stp x10, xLR, [xFP]   // Store lr & old fp @ fp
+
+    .cfi_offset x30, 0
+    .cfi_offset x10, 8
+
+    mov xSELF, x3       // Move thread pointer into SELF register.
+
+    // Copy arguments into stack frame.
+    // Use simple copy routine for now.
+    // 4 bytes per slot.
+    // X1 - source address
+    // W2 - args length
+    // X10 - destination address.
+    add x9, sp, #8     // Destination address is bottom of stack + NULL.
+
+    // w2 = argsize parameter.
+.LcopyParams:
+    cmp w2, #0
+    beq .LendCopyParams
+    sub w2, w2, #4      // Need 65536 bytes of range.
+    ldr w10, [x1, x2]
+    str w10, [x9, x2]
+
+    b .LcopyParams
+
+.LendCopyParams:
+
+    // Store NULL into Method* at bottom of frame.
+    str xzr, [sp]
+
+    // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
+    // Parse the passed shorty to determine which register to load.
+    // Load addresses for routines that load WXSD registers.
+    adr  x11, .LstoreW2
+    adr  x12, .LstoreX2
+    adr  x13, .LstoreS0
+    adr  x14, .LstoreD0
+
+    // Initialize routine offsets to 0 for integers and floats.
+    // x8 for integers, x15 for floating point.
+    mov x8, #0
+    mov x15, #0
+
+    add x10, x5, #1         // Load shorty address, plus one to skip return value.
+    ldr w1, [x9],#4         // Load "this" parameter, and increment arg pointer.
+
+    // Loop to fill registers.
+.LfillRegisters:
+    ldrb w17, [x10], #1       // Load next character in signature, and increment.
+    cbz w17, .LcallFunction   // Exit at end of signature. Shorty 0 terminated.
+
+    cmp  w17, #'F' // is this a float?
+    bne .LisDouble
+
+    cmp x15, # 8*12         // Skip this load if all registers full.
+    beq .LfillRegisters
+
+    add x17, x13, x15       // Calculate subroutine to jump to.
+    br  x17
+
+.LisDouble:
+    cmp w17, #'D'           // is this a double?
+    bne .LisLong
+
+    cmp x15, # 8*12         // Skip this load if all registers full.
+    beq .LfillRegisters
+
+
+    add x17, x14, x15       // Calculate subroutine to jump to.
+    br x17
+
+.LisLong:
+    cmp w17, #'J'           // is this a long?
+    bne .LisOther
+
+    cmp x8, # 7*12          // Skip this load if all registers full.
+    beq .LfillRegisters
+
+    add x17, x12, x8        // Calculate subroutine to jump to.
+    br x17
+
+
+.LisOther:                  // Everything else takes one vReg.
+    cmp x8, # 7*12          // Skip this load if all registers full.
+    beq .LfillRegisters
+    add x17, x11, x8        // Calculate subroutine to jump to.
+    br x17
+
+// Macro for loading a parameter into a register.
+//  counter - the register with offset into these tables
+//  size - the size of the register - 4 or 8 bytes.
+//  register - the name of the register to be loaded.
+.macro LOADREG counter size register return
+    ldr \register , [x9], #\size
+    add \counter, \counter, 12
+    b \return
+.endm
+
+// Store ints.
+.LstoreW2:
+    LOADREG x8 4 w2 .LfillRegisters
+    LOADREG x8 4 w3 .LfillRegisters
+    LOADREG x8 4 w4 .LfillRegisters
+    LOADREG x8 4 w5 .LfillRegisters
+    LOADREG x8 4 w6 .LfillRegisters
+    LOADREG x8 4 w7 .LfillRegisters
+
+// Store longs.
+.LstoreX2:
+    LOADREG x8 8 x2 .LfillRegisters
+    LOADREG x8 8 x3 .LfillRegisters
+    LOADREG x8 8 x4 .LfillRegisters
+    LOADREG x8 8 x5 .LfillRegisters
+    LOADREG x8 8 x6 .LfillRegisters
+    LOADREG x8 8 x7 .LfillRegisters
+
+// Store singles.
+.LstoreS0:
+    LOADREG x15 4 s0 .LfillRegisters
+    LOADREG x15 4 s1 .LfillRegisters
+    LOADREG x15 4 s2 .LfillRegisters
+    LOADREG x15 4 s3 .LfillRegisters
+    LOADREG x15 4 s4 .LfillRegisters
+    LOADREG x15 4 s5 .LfillRegisters
+    LOADREG x15 4 s6 .LfillRegisters
+    LOADREG x15 4 s7 .LfillRegisters
+
+// Store doubles.
+.LstoreD0:
+    LOADREG x15 8 d0 .LfillRegisters
+    LOADREG x15 8 d1 .LfillRegisters
+    LOADREG x15 8 d2 .LfillRegisters
+    LOADREG x15 8 d3 .LfillRegisters
+    LOADREG x15 8 d4 .LfillRegisters
+    LOADREG x15 8 d5 .LfillRegisters
+    LOADREG x15 8 d6 .LfillRegisters
+    LOADREG x15 8 d7 .LfillRegisters
+
+
+.LcallFunction:
+
+    // load method-> METHOD_QUICK_CODE_OFFSET
+    ldr x9, [x0 , #METHOD_QUICK_CODE_OFFSET]
+    // Branch to method.
+    blr x9
+
+    // Restore return value address and shorty address.
+    ldp x4,x5, [xFP, #16]
+    .cfi_restore x4
+    .cfi_restore x5
+
+    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
+    ldrb w10, [x5]
+
+    // Don't set anything for a void type.
+    cmp w10, #'V'
+    beq .Lexit_art_quick_invoke_stub
+
+    cmp w10, #'D'
+    bne .Lreturn_is_float
+    str d0, [x4]
+    b .Lexit_art_quick_invoke_stub
+
+.Lreturn_is_float:
+    cmp w10, #'F'
+    bne .Lreturn_is_int
+    str s0, [x4]
+    b .Lexit_art_quick_invoke_stub
+
+    // Just store x0. Doesn't matter if it is 64 or 32 bits.
+.Lreturn_is_int:
+    str x0, [x4]
+
+.Lexit_art_quick_invoke_stub:
+    ldr x2, [x29, #32]   // Restore stack pointer.
+    mov sp, x2
+    .cfi_restore sp
+
+    ldp x29, x30, [x29]    // Restore old frame pointer and link register.
+    .cfi_restore x29
+    .cfi_restore x30
+
+    ret
+END art_quick_invoke_stub
+
+/*  extern"C"
+ *     void art_quick_invoke_static_stub(ArtMethod *method,   x0
+ *                                       uint32_t  *args,     x1
+ *                                       uint32_t argsize,    w2
+ *                                       Thread *self,        x3
+ *                                       JValue *result,      x4
+ *                                       char   *shorty);     x5
+ */
+ENTRY art_quick_invoke_static_stub
+    // Spill registers as per AACPS64 calling convention.
+
+SAVE_SIZE=5*8   // x4, x5, SP, LR & FP saved
+SAVE_SIZE_AND_METHOD=SAVE_SIZE+8
+
+    mov x9, sp     // Save stack pointer.
+
+    mov x10, xFP   // Save frame pointer
+    .cfi_register x29,x10
+    add x11, x2, # SAVE_SIZE_AND_METHOD // calculate size of frame.
+
+    sub x11, sp, x11 // Calculate SP position - saves + ArtMethod* +  args
+
+    and x11, x11, # ~0xf  // Enforce 16 byte stack alignment.
+
+    sub xFP, x9, #SAVE_SIZE   // Calculate new FP. Don't store here until SP moved.
+
+    mov sp, x11        // set new SP.
+
+    .cfi_def_cfa_register   29
+
+    str x9, [xFP, #32]     // Save old stack pointer.
+
+    .cfi_offset x9, 32
+
+    stp x4, x5, [xFP, #16]  // Save result and shorty addresses.
+
+    .cfi_offset x4, 16
+    .cfi_offset x5, 24
+
+    stp x10, xLR, [x29]   // Store lr & old fp @ fp
+
+    .cfi_offset x30, 0
+    .cfi_offset x10, 8
+
+    mov xSELF, x3       // Move thread pointer into SELF register.
+
+    // Copy arguments into stack frame.
+    // Use simple copy routine for now.
+    // 4 bytes per slot.
+    // X1 - source address
+    // W2 - args length
+    // X10 - destination address.
+    add x9, sp, #8     // Destination address is bottom of stack + NULL.
+
+    // w2 = argsize parameter.
+.LcopyParams2:
+    cmp w2, #0
+    beq .LendCopyParams2
+    sub w2, w2, #4      // Need 65536 bytes of range.
+    ldr w10, [x1, x2]
+    str w10, [x9, x2]
+
+    b .LcopyParams2
+
+.LendCopyParams2:
+
+    // Store NULL into Method* at bottom of frame.
+    str xzr, [sp]
+
+    // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
+    // Parse the passed shorty to determine which register to load.
+    // Load addresses for routines that load WXSD registers.
+    adr  x11, .LstoreW1_2
+    adr  x12, .LstoreX1_2
+    adr  x13, .LstoreS0_2
+    adr  x14, .LstoreD0_2
+
+    // Initialize routine offsets to 0 for integers and floats.
+    // x8 for integers, x15 for floating point.
+    mov x8, #0
+    mov x15, #0
+
+    add x10, x5, #1     // Load shorty address, plus one to skip return value.
+
+    // Loop to fill registers.
+.LfillRegisters2:
+    ldrb w17, [x10], #1         // Load next character in signature, and increment.
+    cbz w17, .LcallFunction2    // Exit at end of signature. Shorty 0 terminated.
+
+    cmp  w17, #'F'          // is this a float?
+    bne .LisDouble2
+
+    cmp x15, # 8*12         // Skip this load if all registers full.
+    beq .LfillRegisters2
+
+    add x17, x13, x15       // Calculate subroutine to jump to.
+    br  x17
+
+.LisDouble2:
+    cmp w17, #'D'           // is this a double?
+    bne .LisLong2
+
+    cmp x15, # 8*12         // Skip this load if all registers full.
+    beq .LfillRegisters2
+
+
+    add x17, x14, x15       // Calculate subroutine to jump to.
+    br x17
+
+.LisLong2:
+    cmp w17, #'J'           // is this a long?
+    bne .LisOther2
+
+    cmp x8, # 7*12          // Skip this load if all registers full.
+    beq .LfillRegisters2
+
+    add x17, x12, x8        // Calculate subroutine to jump to.
+    br x17
+
+
+.LisOther2:                 // Everything else takes one vReg.
+    cmp x8, # 7*12          // Skip this load if all registers full.
+    beq .LfillRegisters2
+    add x17, x11, x8        // Calculate subroutine to jump to.
+    br x17
+
+// Store ints.
+.LstoreW1_2:
+    LOADREG x8 4 w1 .LfillRegisters2
+    LOADREG x8 4 w2 .LfillRegisters2
+    LOADREG x8 4 w3 .LfillRegisters2
+    LOADREG x8 4 w4 .LfillRegisters2
+    LOADREG x8 4 w5 .LfillRegisters2
+    LOADREG x8 4 w6 .LfillRegisters2
+    LOADREG x8 4 w7 .LfillRegisters2
+
+// Store longs.
+.LstoreX1_2:
+    LOADREG x8 8 x1 .LfillRegisters2
+    LOADREG x8 8 x2 .LfillRegisters2
+    LOADREG x8 8 x3 .LfillRegisters2
+    LOADREG x8 8 x4 .LfillRegisters2
+    LOADREG x8 8 x5 .LfillRegisters2
+    LOADREG x8 8 x6 .LfillRegisters2
+    LOADREG x8 8 x7 .LfillRegisters2
+
+// Store singles.
+.LstoreS0_2:
+    LOADREG x15 4 s0 .LfillRegisters2
+    LOADREG x15 4 s1 .LfillRegisters2
+    LOADREG x15 4 s2 .LfillRegisters2
+    LOADREG x15 4 s3 .LfillRegisters2
+    LOADREG x15 4 s4 .LfillRegisters2
+    LOADREG x15 4 s5 .LfillRegisters2
+    LOADREG x15 4 s6 .LfillRegisters2
+    LOADREG x15 4 s7 .LfillRegisters2
+
+// Store doubles.
+.LstoreD0_2:
+    LOADREG x15 8 d0 .LfillRegisters2
+    LOADREG x15 8 d1 .LfillRegisters2
+    LOADREG x15 8 d2 .LfillRegisters2
+    LOADREG x15 8 d3 .LfillRegisters2
+    LOADREG x15 8 d4 .LfillRegisters2
+    LOADREG x15 8 d5 .LfillRegisters2
+    LOADREG x15 8 d6 .LfillRegisters2
+    LOADREG x15 8 d7 .LfillRegisters2
+
+
+.LcallFunction2:
+
+    // load method-> METHOD_QUICK_CODE_OFFSET.
+    ldr x9, [x0 , #METHOD_QUICK_CODE_OFFSET]
+    // Branch to method.
+    blr x9
+
+    // Restore return value address and shorty address.
+    ldp x4, x5, [xFP, #16]
+    .cfi_restore x4
+    .cfi_restore x5
+
+    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
+    ldrb w10, [x5]
+
+    // Don't set anything for a void type.
+    cmp w10, #'V'
+    beq .Lexit_art_quick_invoke_stub2
+
+    cmp w10, #'D'
+    bne .Lreturn_is_float2
+    str d0, [x4]
+    b .Lexit_art_quick_invoke_stub2
+
+.Lreturn_is_float2:
+    cmp w10, #'F'
+    bne .Lreturn_is_int2
+    str s0, [x4]
+    b .Lexit_art_quick_invoke_stub2
+
+    // Just store x0. Doesn't matter if it is 64 or 32 bits.
+.Lreturn_is_int2:
+    str x0, [x4]
+
+.Lexit_art_quick_invoke_stub2:
+
+    ldr x2, [xFP, #32]   // Restore stack pointer.
+    mov sp, x2
+    .cfi_restore sp
+
+    ldp xFP, xLR, [xFP]    // Restore old frame pointer and link register.
+    .cfi_restore x29
+    .cfi_restore x30
+
+    ret
+END art_quick_invoke_static_stub
+
+// UNIMPLEMENTED art_quick_do_long_jump
+
+    /*
+     * On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_
+     */
+
+ENTRY art_quick_do_long_jump
+    // Load FPRs
+    ldp d0, d1, [x1], #16
+    ldp d2, d3, [x1], #16
+    ldp d4, d5, [x1], #16
+    ldp d6, d7, [x1], #16
+    ldp d8, d9, [x1], #16
+    ldp d10, d11, [x1], #16
+    ldp d12, d13, [x1], #16
+    ldp d14, d15, [x1], #16
+    ldp d16, d17, [x1], #16
+    ldp d18, d19, [x1], #16
+    ldp d20, d21, [x1], #16
+    ldp d22, d23, [x1], #16
+    ldp d24, d25, [x1], #16
+    ldp d26, d27, [x1], #16
+    ldp d28, d29, [x1], #16
+    ldp d30, d31, [x1]
+
+    // Load GPRs
+    // TODO: lots of those are smashed, could optimize.
+    add x0, x0, #30*8
+    ldp x30, x1, [x0], #-16
+    ldp x28, x29, [x0], #-16
+    ldp x26, x27, [x0], #-16
+    ldp x24, x25, [x0], #-16
+    ldp x22, x23, [x0], #-16
+    ldp x20, x21, [x0], #-16
+    ldp x18, x19, [x0], #-16
+    ldp x16, x17, [x0], #-16
+    ldp x14, x15, [x0], #-16
+    ldp x12, x13, [x0], #-16
+    ldp x10, x11, [x0], #-16
+    ldp x8, x9, [x0], #-16
+    ldp x6, x7, [x0], #-16
+    ldp x4, x5, [x0], #-16
+    ldp x2, x3, [x0], #-16
+    mov sp, x1
+
+    // TODO: Is it really OK to use LR for the target PC?
+    mov x0, #0
+    mov x1, #0
+    br  xLR
+END art_quick_do_long_jump
+
+UNIMPLEMENTED art_quick_handle_fill_data
+
+UNIMPLEMENTED art_quick_lock_object
+UNIMPLEMENTED art_quick_unlock_object
+UNIMPLEMENTED art_quick_check_cast
+UNIMPLEMENTED art_quick_aput_obj_with_null_and_bound_check
+UNIMPLEMENTED art_quick_aput_obj_with_bound_check
+UNIMPLEMENTED art_quick_aput_obj
+UNIMPLEMENTED art_quick_initialize_static_storage
+UNIMPLEMENTED art_quick_initialize_type
+UNIMPLEMENTED art_quick_initialize_type_and_verify_access
+UNIMPLEMENTED art_quick_get32_static
+UNIMPLEMENTED art_quick_get64_static
+UNIMPLEMENTED art_quick_get_obj_static
+UNIMPLEMENTED art_quick_get32_instance
+UNIMPLEMENTED art_quick_get64_instance
+UNIMPLEMENTED art_quick_get_obj_instance
+UNIMPLEMENTED art_quick_set32_static
+UNIMPLEMENTED art_quick_set64_static
+UNIMPLEMENTED art_quick_set_obj_static
+UNIMPLEMENTED art_quick_set32_instance
+UNIMPLEMENTED art_quick_set64_instance
+UNIMPLEMENTED art_quick_set_obj_instance
+UNIMPLEMENTED art_quick_resolve_string
+
+// Macro to facilitate adding new allocation entrypoints.
+.macro TWO_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    brk 0
+END \name
+.endm
+
+// Macro to facilitate adding new array allocation entrypoints.
+.macro THREE_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    brk 0
+END \name
+.endm
+
+// Generate the allocation entrypoints for each allocator.
+GENERATE_ALL_ALLOC_ENTRYPOINTS
+
+UNIMPLEMENTED art_quick_test_suspend
+
+/**
+ * Returned by ClassLinker::GetOatCodeFor
+ *
+ */
+UNIMPLEMENTED art_quick_proxy_invoke_handler
+
+UNIMPLEMENTED art_quick_imt_conflict_trampoline
+
+
+ENTRY art_quick_resolution_trampoline
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    mov x2, xSELF
+    mov x3, sp
+    bl artQuickResolutionTrampoline  // (called, receiver, Thread*, SP)
+    mov x9, x0           // Remember returned code pointer in x9.
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    cbz x9, 1f
+    br x0
+1:
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    DELIVER_PENDING_EXCEPTION
+END art_quick_resolution_trampoline
+
+/*
+ * Generic JNI frame layout:
+ *
+ * #-------------------#
+ * |                   |
+ * | caller method...  |
+ * #-------------------#    <--- SP on entry
+ * | Return X30/LR     |
+ * | X29/FP            |    callee save
+ * | X28               |    callee save
+ * | X27               |    callee save
+ * | X26               |    callee save
+ * | X25               |    callee save
+ * | X24               |    callee save
+ * | X23               |    callee save
+ * | X22               |    callee save
+ * | X21               |    callee save
+ * | X20               |    callee save
+ * | X19               |    callee save
+ * | X7                |    arg7
+ * | X6                |    arg6
+ * | X5                |    arg5
+ * | X4                |    arg4
+ * | X3                |    arg3
+ * | X2                |    arg2
+ * | X1                |    arg1
+ * | D15               |    float arg 8
+ * | D14               |    float arg 8
+ * | D13               |    float arg 8
+ * | D12               |    callee save
+ * | D11               |    callee save
+ * | D10               |    callee save
+ * | D9                |    callee save
+ * | D8                |    callee save
+ * | D7                |    float arg 8
+ * | D6                |    float arg 7
+ * | D5                |    float arg 6
+ * | D4                |    float arg 5
+ * | D3                |    float arg 4
+ * | D2                |    float arg 3
+ * | D1                |    float arg 2
+ * | D0                |    float arg 1
+ * | RDI/Method*       |  <- X0
+ * #-------------------#
+ * | local ref cookie  | // 4B
+ * |   SIRT size       | // 4B
+ * #-------------------#
+ * | JNI Call Stack    |
+ * #-------------------#    <--- SP on native call
+ * |                   |
+ * | Stack for Regs    |    The trampoline assembly will pop these values
+ * |                   |    into registers for native call
+ * #-------------------#
+ * | Native code ptr   |
+ * #-------------------#
+ * | Free scratch      |
+ * #-------------------#
+ * | Ptr to (1)        |    <--- SP
+ * #-------------------#
+ */
+    /*
+     * Called to do a generic JNI down-call
+     */
+ENTRY art_quick_generic_jni_trampoline
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+    str x0, [sp, #0]  // Store native ArtMethod* to bottom of stack.
+
+    // Save SP , so we can have static CFI info.
+    mov x28, sp
+    .cfi_def_cfa_register x28
+
+    // This looks the same, but is different: this will be updated to point to the bottom
+    // of the frame when the SIRT is inserted.
+    mov xFP, sp
+
+    mov x8, #5120
+    sub sp, sp, x8
+
+    // prepare for artQuickGenericJniTrampoline call
+    // (Thread*,  SP)
+    //    x0      x1   <= C calling convention
+    //   xSELF    xFP  <= where they are
+
+    mov x0, xSELF   // Thread*
+    mov x1, xFP
+    bl artQuickGenericJniTrampoline  // (Thread*, sp)
+
+    // Get the updated pointer. This is the bottom of the frame _with_ SIRT.
+    ldr xFP, [sp]
+    add x9, sp, #8
+
+    cmp x0, #0
+    b.mi .Lentry_error      // Check for error, negative value.
+
+    // release part of the alloca.
+    add x9, x9, x0
+
+    // Get the code pointer
+    ldr xIP0, [x9, #0]
+
+    // Load parameters from frame into registers.
+    // TODO Check with artQuickGenericJniTrampoline.
+    //      Also, check again APPCS64 - the stack arguments are interleaved.
+    ldp x0, x1, [x9, #8]
+    ldp x2, x3, [x9, #24]
+    ldp x4, x5, [x9, #40]
+    ldp x6, x7, [x9, #56]
+
+    ldp d0, d1, [x9, #72]
+    ldp d2, d3, [x9, #88]
+    ldp d4, d5, [x9, #104]
+    ldp d6, d7, [x9, #120]
+
+    add sp, x9, #136
+
+    blr xIP0           // native call.
+
+    // Restore self pointer.
+    ldr xSELF, [x28, #200]
+
+    // result sign extension is handled in C code
+    // prepare for artQuickGenericJniEndTrampoline call
+    // (Thread*,  SP, result, result_f)
+    //   x0       x1   x2       x3       <= C calling convention
+    mov x5, x0      // Save return value
+    mov x0, xSELF   // Thread register
+    mov x1, xFP     // Stack pointer
+    mov x2, x5      // Result (from saved)
+    fmov x3, d0     // d0 will contain floating point result, but needs to go into x3
+
+    bl artQuickGenericJniEndTrampoline
+
+    // Tear down the alloca.
+    mov sp, x28
+    .cfi_def_cfa_register sp
+
+    // Restore self pointer.
+    ldr xSELF, [x28, #200]
+
+    // Pending exceptions possible.
+    ldr x1, [xSELF, THREAD_EXCEPTION_OFFSET]
+    cbnz x1, .Lexception_in_native
+
+    // Tear down the callee-save frame.
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+
+    // store into fpr, for when it's a fpr return...
+    fmov d0, x0
+    ret
+
+.Lentry_error:
+    mov sp, x28
+    .cfi_def_cfa_register sp
+    ldr xSELF, [x28, #200]
+.Lexception_in_native:
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    DELIVER_PENDING_EXCEPTION
+
+END art_quick_generic_jni_trampoline
+
+/*
+ * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
+ * of a quick call:
+ * x0 = method being called/to bridge to.
+ * x1..x7, d0..d7 = arguments to that method.
+ */
+ENTRY art_quick_to_interpreter_bridge
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME   // Set up frame and save arguments.
+
+    //  x0 will contain mirror::ArtMethod* method.
+    mov x1, xSELF                          // How to get Thread::Current() ???
+    mov x2, sp
+
+    // uint64_t artQuickToInterpreterBridge(mirror::ArtMethod* method, Thread* self,
+    //                                      mirror::ArtMethod** sp)
+    bl   artQuickToInterpreterBridge
+
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME  // TODO: no need to restore arguments in this case.
+
+    fmov d0, x0
+
+    RETURN_OR_DELIVER_PENDING_EXCEPTION
+END art_quick_to_interpreter_bridge
+
+UNIMPLEMENTED art_quick_instrumentation_entry
+UNIMPLEMENTED art_quick_instrumentation_exit
+UNIMPLEMENTED art_quick_deoptimize
+UNIMPLEMENTED art_quick_mul_long
+UNIMPLEMENTED art_quick_shl_long
+UNIMPLEMENTED art_quick_shr_long
+UNIMPLEMENTED art_quick_ushr_long
+UNIMPLEMENTED art_quick_indexof
+UNIMPLEMENTED art_quick_string_compareto
diff --git a/runtime/arch/arm64/registers_arm64.cc b/runtime/arch/arm64/registers_arm64.cc
index c5bb06b..87901e3 100644
--- a/runtime/arch/arm64/registers_arm64.cc
+++ b/runtime/arch/arm64/registers_arm64.cc
@@ -25,18 +25,18 @@
   "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
   "x10", "x11", "x12", "x13", "x14", "x15", "ip0", "ip1", "x18", "x19",
   "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "fp",
-  "lr", "xzr", "sp"
+  "lr", "sp", "xzr"
 };
 
 static const char* kWRegisterNames[] = {
   "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9",
   "w10", "w11", "w12", "w13", "w14", "w15", "w16", "w17", "w18", "w19",
   "w20", "w21", "w22", "w23", "w24", "w25", "w26", "w27", "w28", "w29",
-  "w30", "wzr"
+  "w30", "wsp", "wxr"
 };
 
 std::ostream& operator<<(std::ostream& os, const Register& rhs) {
-  if (rhs >= X0 && rhs <= SP) {
+  if (rhs >= X0 && rhs <= XZR) {
     os << kRegisterNames[rhs];
   } else {
     os << "XRegister[" << static_cast<int>(rhs) << "]";
diff --git a/runtime/arch/arm64/registers_arm64.h b/runtime/arch/arm64/registers_arm64.h
index e9460e4..ca904bc 100644
--- a/runtime/arch/arm64/registers_arm64.h
+++ b/runtime/arch/arm64/registers_arm64.h
@@ -61,10 +61,10 @@
   IP1 = 17,     // Used as scratch by ART JNI Assembler.
   FP  = 29,
   LR  = 30,
-  XZR = 31,
-  SP  = 32,     // SP is X31 and overlaps with XRZ but we encode it as a
+  SP  = 31,     // SP is X31 and overlaps with XRZ but we encode it as a
                 // special register, due to the different instruction semantics.
-  kNumberOfCoreRegisters = 33,
+  XZR = 32,     // FIXME This needs to be reconciled with the JNI assembler.
+  kNumberOfCoreRegisters = 32,
   kNoRegister = -1,
 };
 std::ostream& operator<<(std::ostream& os, const Register& rhs);
@@ -103,6 +103,7 @@
   W29 = 29,
   W30 = 30,
   W31 = 31,
+  WSP = 31,
   WZR = 31,
   kNumberOfWRegisters = 32,
   kNoWRegister = -1,
diff --git a/runtime/arch/arm64/thread_arm64.cc b/runtime/arch/arm64/thread_arm64.cc
new file mode 100644
index 0000000..4eebb85
--- /dev/null
+++ b/runtime/arch/arm64/thread_arm64.cc
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "thread.h"
+
+#include "asm_support_arm64.h"
+#include "base/logging.h"
+
+namespace art {
+
+void Thread::InitCpu() {
+  CHECK_EQ(THREAD_FLAGS_OFFSET, OFFSETOF_MEMBER(Thread, state_and_flags_));
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, OFFSETOF_MEMBER(Thread, card_table_));
+  CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
+  CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
+}
+
+void Thread::CleanupCpu() {
+  // Do nothing.
+}
+
+}  // namespace art
diff --git a/runtime/arch/context.cc b/runtime/arch/context.cc
index 5eaf809..b1700bb 100644
--- a/runtime/arch/context.cc
+++ b/runtime/arch/context.cc
@@ -18,6 +18,8 @@
 
 #if defined(__arm__)
 #include "arm/context_arm.h"
+#elif defined(__aarch64__)
+#include "arm64/context_arm64.h"
 #elif defined(__mips__)
 #include "mips/context_mips.h"
 #elif defined(__i386__)
@@ -33,6 +35,8 @@
 Context* Context::Create() {
 #if defined(__arm__)
   return new arm::ArmContext();
+#elif defined(__aarch64__)
+  return new arm64::Arm64Context();
 #elif defined(__mips__)
   return new mips::MipsContext();
 #elif defined(__i386__)
diff --git a/runtime/elf_utils.h b/runtime/elf_utils.h
index acc6f46..f3ec713 100644
--- a/runtime/elf_utils.h
+++ b/runtime/elf_utils.h
@@ -33,6 +33,8 @@
 #define EF_MIPS_CPIC 4
 #define STV_DEFAULT 0
 
+#define EM_AARCH64 183
+
 #define DT_BIND_NOW 24
 #define DT_INIT_ARRAY 25
 #define DT_FINI_ARRAY 26
diff --git a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
index 55fd301..f1b15b5 100644
--- a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
@@ -53,7 +53,8 @@
 #define PORTABLE_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE 96
 #define PORTABLE_STACK_ARG_SKIP 0
 #else
-#error "Unsupported architecture"
+// TODO: portable should be disabled for aarch64 for now.
+// #error "Unsupported architecture"
 #define PORTABLE_CALLEE_SAVE_FRAME__REF_AND_ARGS__R1_OFFSET 0
 #define PORTABLE_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE 0
 #define PORTABLE_STACK_ARG_SKIP 0
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 184e5e9..20432c6 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -68,6 +68,38 @@
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * kBytesPerGprSpillLocation;
   }
+#elif defined(__aarch64__)
+  // The callee save frame is pointed to by SP.
+  // | argN       |  |
+  // | ...        |  |
+  // | arg4       |  |
+  // | arg3 spill |  |  Caller's frame
+  // | arg2 spill |  |
+  // | arg1 spill |  |
+  // | Method*    | ---
+  // | LR         |
+  // | X28        |
+  // |  :         |
+  // | X19        |
+  // | X7         |
+  // | :          |
+  // | X1         |
+  // | D15        |
+  // |  :         |
+  // | D0         |
+  // |            |    padding
+  // | Method*    |  <- sp
+  static constexpr bool kQuickSoftFloatAbi = false;  // This is a hard float ABI.
+  static constexpr size_t kNumQuickGprArgs = 7;  // 7 arguments passed in GPRs.
+  static constexpr size_t kNumQuickFprArgs = 8;  // 8 arguments passed in FPRs.
+  static constexpr size_t kBytesPerFprSpillLocation = 8;  // FPR spill size is 8 bytes.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset =16;  // Offset of first FPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 144;  // Offset of first GPR arg.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 296;  // Offset of return address.
+  static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize = 304;  // Frame size.
+  static size_t GprIndexToGprOffset(uint32_t gpr_index) {
+    return gpr_index * kBytesPerGprSpillLocation;
+  }
 #elif defined(__mips__)
   // The callee save frame is pointed to by SP.
   // | argN       |  |
@@ -888,6 +920,17 @@
   static constexpr bool kMultiRegistersWidened = false;
   static constexpr bool kAlignLongOnStack = true;
   static constexpr bool kAlignDoubleOnStack = true;
+#elif defined(__aarch64__)
+  static constexpr bool kNativeSoftFloatAbi = false;  // This is a hard float ABI.
+  static constexpr size_t kNumNativeGprArgs = 8;  // 6 arguments passed in GPRs.
+  static constexpr size_t kNumNativeFprArgs = 8;  // 8 arguments passed in FPRs.
+
+  static constexpr size_t kRegistersNeededForLong = 1;
+  static constexpr size_t kRegistersNeededForDouble = 1;
+  static constexpr bool kMultiRegistersAligned = false;
+  static constexpr bool kMultiRegistersWidened = false;
+  static constexpr bool kAlignLongOnStack = false;
+  static constexpr bool kAlignDoubleOnStack = false;
 #elif defined(__mips__)
   // TODO: These are all dummy values!
   static constexpr bool kNativeSoftFloatAbi = true;  // This is a hard float ABI.
diff --git a/runtime/globals.h b/runtime/globals.h
index 5bc4b91..9c6fa0d 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -49,6 +49,10 @@
 // but ARM ELF requires 8..
 static constexpr size_t kArmAlignment = 8;
 
+// ARM64 instruction alignment. AArch64 require code to be 4-byte aligned.
+// AArch64 ELF requires at least 4.
+static constexpr size_t kArm64Alignment = 4;
+
 // MIPS instruction alignment.  MIPS processors require code to be 4-byte aligned.
 // TODO: Can this be 4?
 static constexpr size_t kMipsAlignment = 8;
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index e8a0891..7814f36 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -37,7 +37,7 @@
 extern "C" void art_portable_invoke_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*, char);
 extern "C" void art_quick_invoke_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*,
                                       const char*);
-#ifdef __x86_64__
+#ifdef __LP64__
 extern "C" void art_quick_invoke_static_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*,
                                              const char*);
 #endif
@@ -282,7 +282,7 @@
                                                   : GetEntryPointFromPortableCompiledCode());
       }
       if (!IsPortableCompiled()) {
-#ifdef __x86_64__
+#ifdef __LP64__
         if (!IsStatic()) {
           (*art_quick_invoke_stub)(this, args, args_size, self, result, shorty);
         } else {
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 51edc85..eaa27de 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -30,6 +30,7 @@
 #include <fcntl.h>
 
 #include "arch/arm/registers_arm.h"
+#include "arch/arm64/registers_arm64.h"
 #include "arch/mips/registers_mips.h"
 #include "arch/x86/registers_x86.h"
 #include "arch/x86_64/registers_x86_64.h"
@@ -1035,6 +1036,46 @@
     method->SetFrameSizeInBytes(frame_size);
     method->SetCoreSpillMask(core_spills);
     method->SetFpSpillMask(fp_spills);
+  } else if (instruction_set == kArm64) {
+      // Callee saved registers
+      uint32_t ref_spills = (1 << art::arm64::X19) | (1 << art::arm64::X20) | (1 << art::arm64::X21) |
+                            (1 << art::arm64::X22) | (1 << art::arm64::X23) | (1 << art::arm64::X24) |
+                            (1 << art::arm64::X25) | (1 << art::arm64::X26) | (1 << art::arm64::X27) |
+                            (1 << art::arm64::X28);
+      // X0 is the method pointer. Not saved.
+      uint32_t arg_spills = (1 << art::arm64::X1) | (1 << art::arm64::X2) | (1 << art::arm64::X3) |
+                            (1 << art::arm64::X4) | (1 << art::arm64::X5) | (1 << art::arm64::X6) |
+                            (1 << art::arm64::X7);
+      // TODO  This is conservative. Only ALL should include the thread register.
+      // The thread register is not preserved by the aapcs64.
+      // LR is always saved.
+      uint32_t all_spills =  0;  // (1 << art::arm64::LR);
+      uint32_t core_spills = ref_spills | (type == kRefsAndArgs ? arg_spills : 0) |
+                             (type == kSaveAll ? all_spills : 0) | (1 << art::arm64::FP)
+                             | (1 << art::arm64::X18) | (1 << art::arm64::LR);
+
+      // Save callee-saved floating point registers. Rest are scratch/parameters.
+      uint32_t fp_arg_spills = (1 << art::arm64::D0) | (1 << art::arm64::D1) | (1 << art::arm64::D2) |
+                            (1 << art::arm64::D3) | (1 << art::arm64::D4) | (1 << art::arm64::D5) |
+                            (1 << art::arm64::D6) | (1 << art::arm64::D7);
+      uint32_t fp_ref_spills = (1 << art::arm64::D8)  | (1 << art::arm64::D9)  | (1 << art::arm64::D10) |
+                               (1 << art::arm64::D11)  | (1 << art::arm64::D12)  | (1 << art::arm64::D13) |
+                               (1 << art::arm64::D14)  | (1 << art::arm64::D15);
+      uint32_t fp_all_spills = fp_arg_spills |
+                          (1 << art::arm64::D16)  | (1 << art::arm64::D17) | (1 << art::arm64::D18) |
+                          (1 << art::arm64::D19)  | (1 << art::arm64::D20) | (1 << art::arm64::D21) |
+                          (1 << art::arm64::D22)  | (1 << art::arm64::D23) | (1 << art::arm64::D24) |
+                          (1 << art::arm64::D25)  | (1 << art::arm64::D26) | (1 << art::arm64::D27) |
+                          (1 << art::arm64::D28)  | (1 << art::arm64::D29) | (1 << art::arm64::D30) |
+                          (1 << art::arm64::D31);
+      uint32_t fp_spills = fp_ref_spills | (type == kRefsAndArgs ? fp_arg_spills: 0)
+                          | (type == kSaveAll ? fp_all_spills : 0);
+      size_t frame_size = RoundUp((__builtin_popcount(core_spills) /* gprs */ +
+                                   __builtin_popcount(fp_spills) /* fprs */ +
+                                   1 /* Method* */) * kPointerSize, kStackAlignment);
+      method->SetFrameSizeInBytes(frame_size);
+      method->SetCoreSpillMask(core_spills);
+      method->SetFpSpillMask(fp_spills);
   } else {
     UNIMPLEMENTED(FATAL) << instruction_set;
   }
diff --git a/runtime/thread.h b/runtime/thread.h
index fdf976d..6cbd3d9 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -101,6 +101,12 @@
 #else
   static constexpr size_t kStackOverflowReservedBytes = 16 * KB;
 #endif
+  // How much of the reserved bytes is reserved for incoming signals.
+  static constexpr size_t kStackOverflowSignalReservedBytes = 2 * KB;
+  // How much of the reserved bytes we may temporarily use during stack overflow checks as an
+  // optimization.
+  static constexpr size_t kStackOverflowReservedUsableBytes =
+      kStackOverflowReservedBytes - kStackOverflowSignalReservedBytes;
 
   // Creates a new native thread corresponding to the given managed peer.
   // Used to implement Thread.start.
diff --git a/test/083-compiler-regressions/expected.txt b/test/083-compiler-regressions/expected.txt
index ddd11f3..90d8634 100644
--- a/test/083-compiler-regressions/expected.txt
+++ b/test/083-compiler-regressions/expected.txt
@@ -15,3 +15,4 @@
 longDivTest passes
 longModTest passes
 testIfCcz passes
+ManyFloatArgs passes
diff --git a/test/083-compiler-regressions/src/Main.java b/test/083-compiler-regressions/src/Main.java
index 3b4d586..c5d675f 100644
--- a/test/083-compiler-regressions/src/Main.java
+++ b/test/083-compiler-regressions/src/Main.java
@@ -46,6 +46,7 @@
         ZeroTests.longDivTest();
         ZeroTests.longModTest();
         MirOpSelectTests.testIfCcz();
+        ManyFloatArgs();
     }
 
     public static void returnConstantTest() {
@@ -276,6 +277,303 @@
         }
     }
 
+    static double TooManyArgs(
+          long l00,
+          long l01,
+          long l02,
+          long l03,
+          long l04,
+          long l05,
+          long l06,
+          long l07,
+          long l08,
+          long l09,
+          long l10,
+          long l11,
+          long l12,
+          long l13,
+          long l14,
+          long l15,
+          long l16,
+          long l17,
+          long l18,
+          long l19,
+          long l20,
+          long l21,
+          long l22,
+          long l23,
+          long l24,
+          long l25,
+          long l26,
+          long l27,
+          long l28,
+          long l29,
+          long l30,
+          long l31,
+          long l32,
+          long l33,
+          long l34,
+          long l35,
+          long l36,
+          long l37,
+          long l38,
+          long l39,
+          long l40,
+          long l41,
+          long l42,
+          long l43,
+          long l44,
+          long l45,
+          long l46,
+          long l47,
+          long l48,
+          long l49,
+          long ll00,
+          long ll01,
+          long ll02,
+          long ll03,
+          long ll04,
+          long ll05,
+          long ll06,
+          long ll07,
+          long ll08,
+          long ll09,
+          long ll10,
+          long ll11,
+          long ll12,
+          long ll13,
+          long ll14,
+          long ll15,
+          long ll16,
+          long ll17,
+          long ll18,
+          long ll19,
+          double d01,
+          double d02,
+          double d03,
+          double d04,
+          double d05,
+          double d06,
+          double d07,
+          double d08,
+          double d09,
+          double d10,
+          double d11,
+          double d12,
+          double d13,
+          double d14,
+          double d15,
+          double d16,
+          double d17,
+          double d18,
+          double d19,
+          double d20,
+          double d21,
+          double d22,
+          double d23,
+          double d24,
+          double d25,
+          double d26,
+          double d27,
+          double d28,
+          double d29,
+          double d30,
+          double d31,
+          double d32,
+          double d33,
+          double d34,
+          double d35,
+          double d36,
+          double d37,
+          double d38,
+          double d39,
+          double d40,
+          double d41,
+          double d42,
+          double d43,
+          double d44,
+          double d45,
+          double d46,
+          double d47,
+          double d48,
+          double d49) {
+        double res = 0.0;
+        double t01 = d49;
+        double t02 = 02.0 + t01;
+        double t03 = 03.0 + t02;
+        double t04 = 04.0 + t03;
+        double t05 = 05.0 + t04;
+        double t06 = 06.0 + t05;
+        double t07 = 07.0 + t06;
+        double t08 = 08.0 + t07;
+        double t09 = 09.0 + t08;
+        double t10 = 10.0 + t09;
+        double t11 = 11.0 + t10;
+        double t12 = 12.0 + t11;
+        double t13 = 13.0 + t12;
+        double t14 = 14.0 + t13;
+        double t15 = 15.0 + t14;
+        double t16 = 16.0 + t15;
+        double t17 = 17.0 + t16;
+        double t18 = 18.0 + t17;
+        double t19 = 19.0 + t18;
+        double t20 = 20.0 + t19;
+        double t21 = 21.0 + t20;
+        double t22 = 22.0 + t21;
+        double t23 = 23.0 + t22;
+        double t24 = 24.0 + t23;
+        double t25 = 25.0 + t24;
+        double t26 = 26.0 + t25;
+        double t27 = 27.0 + t26;
+        double t28 = 28.0 + t27;
+        double t29 = 29.0 + t28;
+        double t30 = 30.0 + t29;
+        double t31 = 31.0 + t30;
+        double t32 = 32.0 + t31;
+        double t33 = 33.0 + t32;
+        double t34 = 34.0 + t33;
+        double t35 = 35.0 + t34;
+        double t36 = 36.0 + t35;
+        double t37 = 37.0 + t36;
+        double t38 = 38.0 + t37;
+        double t39 = 39.0 + t38;
+        double t40 = 40.0 + t39;
+        double tt02 = 02.0 + t40;
+        double tt03 = 03.0 + tt02;
+        double tt04 = 04.0 + tt03;
+        double tt05 = 05.0 + tt04;
+        double tt06 = 06.0 + tt05;
+        double tt07 = 07.0 + tt06;
+        double tt08 = 08.0 + tt07;
+        double tt09 = 09.0 + tt08;
+        double tt10 = 10.0 + tt09;
+        double tt11 = 11.0 + tt10;
+        double tt12 = 12.0 + tt11;
+        double tt13 = 13.0 + tt12;
+        double tt14 = 14.0 + tt13;
+        double tt15 = 15.0 + tt14;
+        double tt16 = 16.0 + tt15;
+        double tt17 = 17.0 + tt16;
+        double tt18 = 18.0 + tt17;
+        double tt19 = 19.0 + tt18;
+        double tt20 = 20.0 + tt19;
+        double tt21 = 21.0 + tt20;
+        double tt22 = 22.0 + tt21;
+        double tt23 = 23.0 + tt22;
+        double tt24 = 24.0 + tt23;
+        double tt25 = 25.0 + tt24;
+        double tt26 = 26.0 + tt25;
+        double tt27 = 27.0 + tt26;
+        double tt28 = 28.0 + tt27;
+        double tt29 = 29.0 + tt28;
+        double tt30 = 30.0 + tt29;
+        double tt31 = 31.0 + tt30;
+        double tt32 = 32.0 + tt31;
+        double tt33 = 33.0 + tt32;
+        double tt34 = 34.0 + tt33;
+        double tt35 = 35.0 + tt34;
+        double tt36 = 36.0 + tt35;
+        double tt37 = 37.0 + tt36;
+        double tt38 = 38.0 + tt37;
+        double tt39 = 39.0 + tt38;
+        double tt40 = 40.0 + tt39;
+        double ttt02 = 02.0 + tt40;
+        double ttt03 = 03.0 + ttt02;
+        double ttt04 = 04.0 + ttt03;
+        double ttt05 = 05.0 + ttt04;
+        double ttt06 = 06.0 + ttt05;
+        double ttt07 = 07.0 + ttt06;
+        double ttt08 = 08.0 + ttt07;
+        double ttt09 = 09.0 + ttt08;
+        double ttt10 = 10.0 + ttt09;
+        double ttt11 = 11.0 + ttt10;
+        double ttt12 = 12.0 + ttt11;
+        double ttt13 = 13.0 + ttt12;
+        double ttt14 = 14.0 + ttt13;
+        double ttt15 = 15.0 + ttt14;
+        double ttt16 = 16.0 + ttt15;
+        double ttt17 = 17.0 + ttt16;
+        double ttt18 = 18.0 + ttt17;
+        double ttt19 = 19.0 + ttt18;
+        double ttt20 = 20.0 + ttt19;
+        double ttt21 = 21.0 + ttt20;
+        double ttt22 = 22.0 + ttt21;
+        double ttt23 = 23.0 + ttt22;
+        double ttt24 = 24.0 + ttt23;
+        double ttt25 = 25.0 + ttt24;
+        double ttt26 = 26.0 + ttt25;
+        double ttt27 = 27.0 + ttt26;
+        double ttt28 = 28.0 + ttt27;
+        double ttt29 = 29.0 + ttt28;
+        double ttt30 = 30.0 + ttt29;
+        double ttt31 = 31.0 + ttt30;
+      // Repeatedly use some doubles from the middle of the pack to trigger promotion from frame-passed args.
+      for (int i = 0; i < 100; i++) {
+         res += d40;
+         res += d41;
+         res += d42;
+         res += d43;
+         res += d44;
+         res += d45;
+         res += d46;
+         res += d47;
+         res += d48;
+      }
+      for (int i = 0; i < 100; i++) {
+         res += d40;
+         res += d41;
+         res += d42;
+         res += d43;
+         res += d44;
+         res += d45;
+         res += d46;
+         res += d47;
+         res += d48;
+      }
+      for (int i = 0; i < 100; i++) {
+         res += d40;
+         res += d41;
+         res += d42;
+         res += d43;
+         res += d44;
+         res += d45;
+         res += d46;
+         res += d47;
+         res += d48;
+      }
+      for (int i = 0; i < 100; i++) {
+         res += d40;
+         res += d41;
+         res += d42;
+         res += d43;
+         res += d44;
+         res += d45;
+         res += d46;
+         res += d47;
+         res += d48;
+      }
+      return res + tt40;
+   }
+
+    public static void ManyFloatArgs() {
+        double res = TooManyArgs(
+                                 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
+                                 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0,
+                                 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0,
+                                 35.0, 36.0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49);
+       if ((long)res == 160087) {
+           System.out.println("ManyFloatArgs passes");
+       } else {
+           System.out.println("ManyFloatArgs fails, expected 30600, got: " + res);
+       }
+    }
     static long largeFrame() {
         int i0 = 0;
         long l0 = 0;
diff --git a/test/etc/host-run-test-jar b/test/etc/host-run-test-jar
index de6ba3f..a844e82 100755
--- a/test/etc/host-run-test-jar
+++ b/test/etc/host-run-test-jar
@@ -17,6 +17,7 @@
 INVOKE_WITH=""
 DEV_MODE="n"
 QUIET="n"
+COMPILER_OPTIONS=""
 
 while true; do
     if [ "x$1" = "x--quiet" ]; then
@@ -65,6 +66,11 @@
     elif [ "x$1" = "x--no-optimize" ]; then
         OPTIMIZE="n"
         shift
+    elif [ "x$1" = "x-Xcompiler-option" ]; then
+        shift
+        option="$1"
+        COMPILER_OPTIONS="${COMPILER_OPTIONS} -Xcompiler-option $option"
+        shift
     elif [ "x$1" = "x--" ]; then
         shift
         break
@@ -121,4 +127,4 @@
 fi
 
 cd $ANDROID_BUILD_TOP
-$INVOKE_WITH $gdb $exe $gdbargs -XXlib:$LIB $JNI_OPTS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main "$@"
+$INVOKE_WITH $gdb $exe $gdbargs -XXlib:$LIB $JNI_OPTS $COMPILER_OPTIONS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main "$@"
diff --git a/test/run-test b/test/run-test
index ea60f51..cc15e58 100755
--- a/test/run-test
+++ b/test/run-test
@@ -93,6 +93,11 @@
         image="$1"
         run_args="${run_args} --image $image"
         shift
+    elif [ "x$1" = "x-Xcompiler-option" ]; then
+        shift
+        option="$1"
+        run_args="${run_args} -Xcompiler-option $option"
+        shift
     elif [ "x$1" = "x--debug" ]; then
         run_args="${run_args} --debug"
         shift
@@ -221,21 +226,22 @@
         echo '  Omitting the test name or specifying "-" will use the' \
              "current directory."
         echo "  Runtime Options:"
-        echo "    -O             Run non-debug rather than debug build (off by default)."
-        echo "    --debug        Wait for a debugger to attach."
-        echo "    --gdb          Run under gdb; incompatible with some tests."
-        echo "    --build-only   Build test files only (off by default)."
-        echo "    --interpreter  Enable interpreter only mode (off by default)."
-        echo "    --no-verify    Turn off verification (on by default)."
-        echo "    --no-optimize  Turn off optimization (on by default)."
-        echo "    --no-precise   Turn off precise GC (on by default)."
-        echo "    --zygote       Spawn the process from the Zygote." \
+        echo "    -O                   Run non-debug rather than debug build (off by default)."
+        echo "    -Xcompiler-option    Pass an option to the compiler."
+        echo "    --debug              Wait for a debugger to attach."
+        echo "    --gdb                Run under gdb; incompatible with some tests."
+        echo "    --build-only         Build test files only (off by default)."
+        echo "    --interpreter        Enable interpreter only mode (off by default)."
+        echo "    --no-verify          Turn off verification (on by default)."
+        echo "    --no-optimize        Turn off optimization (on by default)."
+        echo "    --no-precise         Turn off precise GC (on by default)."
+        echo "    --zygote             Spawn the process from the Zygote." \
              "If used, then the"
-        echo "                   other runtime options are ignored."
-        echo "    --host         Use the host-mode virtual machine."
-        echo "    --invoke-with  Pass --invoke-with option to runtime."
-        echo "    --dalvik       Use Dalvik (off by default)."
-        echo "    --jvm          Use a host-local RI virtual machine."
+        echo "                         other runtime options are ignored."
+        echo "    --host               Use the host-mode virtual machine."
+        echo "    --invoke-with        Pass --invoke-with option to runtime."
+        echo "    --dalvik             Use Dalvik (off by default)."
+        echo "    --jvm                Use a host-local RI virtual machine."
         echo "    --output-path [path] Location where to store the build" \
              "files."
     ) 1>&2