Merge "Clean up Add/Remove space."
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 2358391..429c523 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -49,6 +49,7 @@
 	runtime/gtest_test.cc \
 	runtime/indenter_test.cc \
 	runtime/indirect_reference_table_test.cc \
+	runtime/instruction_set_test.cc \
 	runtime/intern_table_test.cc \
 	runtime/leb128_test.cc \
 	runtime/mem_map_test.cc \
@@ -105,7 +106,8 @@
 
 COMPILER_GTEST_HOST_SRC_FILES := \
 	$(COMPILER_GTEST_COMMON_SRC_FILES) \
-	compiler/utils/x86/assembler_x86_test.cc
+	compiler/utils/x86/assembler_x86_test.cc \
+	compiler/utils/x86_64/assembler_x86_64_test.cc
 
 ART_HOST_GTEST_EXECUTABLES :=
 ART_TARGET_GTEST_EXECUTABLES$(ART_PHONY_TEST_TARGET_SUFFIX) :=
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index c3f9b67..b030bb4 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -555,8 +555,8 @@
   return offset;
 }
 
-static int AssignLiteralPointerOffsetCommon(LIR* lir, CodeOffset offset) {
-  unsigned int element_size = sizeof(void*);
+static int AssignLiteralPointerOffsetCommon(LIR* lir, CodeOffset offset,
+                                            unsigned int element_size) {
   // Align to natural pointer size.
   offset = (offset + (element_size - 1)) & ~(element_size - 1);
   for (; lir != NULL; lir = lir->next) {
@@ -726,9 +726,10 @@
 /* Determine the offset of each literal field */
 int Mir2Lir::AssignLiteralOffset(CodeOffset offset) {
   offset = AssignLiteralOffsetCommon(literal_list_, offset);
-  offset = AssignLiteralPointerOffsetCommon(code_literal_list_, offset);
-  offset = AssignLiteralPointerOffsetCommon(method_literal_list_, offset);
-  offset = AssignLiteralPointerOffsetCommon(class_literal_list_, offset);
+  unsigned int ptr_size = GetInstructionSetPointerSize(cu_->instruction_set);
+  offset = AssignLiteralPointerOffsetCommon(code_literal_list_, offset, ptr_size);
+  offset = AssignLiteralPointerOffsetCommon(method_literal_list_, offset, ptr_size);
+  offset = AssignLiteralPointerOffsetCommon(class_literal_list_, offset, ptr_size);
   return offset;
 }
 
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 05313a9..93a23a6 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -455,14 +455,14 @@
   if (direct_code != 0 && direct_method != 0) {
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
-      if (direct_code != static_cast<unsigned int>(-1)) {
+      if (direct_code != static_cast<uintptr_t>(-1)) {
         if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
           cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code);
         }
       } else if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
         cg->LoadCodeAddress(target_method, type, kInvokeTgt);
       }
-      if (direct_method != static_cast<unsigned int>(-1)) {
+      if (direct_method != static_cast<uintptr_t>(-1)) {
         cg->LoadConstant(cg->TargetReg(kArg0), direct_method);
       } else {
         cg->LoadMethodAddress(target_method, type, kArg0);
@@ -483,7 +483,7 @@
                       cg->TargetReg(kArg0));
       // Set up direct code if known.
       if (direct_code != 0) {
-        if (direct_code != static_cast<unsigned int>(-1)) {
+        if (direct_code != static_cast<uintptr_t>(-1)) {
           cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code);
         } else if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
           CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
diff --git a/compiler/llvm/llvm_compilation_unit.cc b/compiler/llvm/llvm_compilation_unit.cc
index fe60959..78bdb4d 100644
--- a/compiler/llvm/llvm_compilation_unit.cc
+++ b/compiler/llvm/llvm_compilation_unit.cc
@@ -143,7 +143,7 @@
 
 static std::string DumpDirectory() {
   if (kIsTargetBuild) {
-    return GetDalvikCacheOrDie(GetAndroidData());
+    return GetDalvikCacheOrDie("llvm-dump");
   }
   return "/tmp";
 }
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
new file mode 100644
index 0000000..ce1c4de
--- /dev/null
+++ b/compiler/utils/assembler_test.h
@@ -0,0 +1,687 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ASSEMBLER_TEST_H_
+#define ART_COMPILER_UTILS_ASSEMBLER_TEST_H_
+
+#include "assembler.h"
+
+#include "gtest/gtest.h"
+
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <sys/stat.h>
+
+namespace art {
+
+template<typename Ass, typename Reg, typename Imm>
+class AssemblerTest : public testing::Test {
+ public:
+  Ass* GetAssembler() {
+    return assembler_.get();
+  }
+
+  typedef std::string (*TestFn)(Ass* assembler);
+
+  void DriverFn(TestFn f, std::string test_name) {
+    Driver(f(assembler_.get()), test_name);
+  }
+
+  // This driver assumes the assembler has already been called.
+  void DriverStr(std::string assembly_string, std::string test_name) {
+    Driver(assembly_string, test_name);
+  }
+
+  std::string RepeatR(void (Ass::*f)(Reg), std::string fmt) {
+    const std::vector<Reg*> registers = GetRegisters();
+    std::string str;
+    for (auto reg : registers) {
+      (assembler_.get()->*f)(*reg);
+      std::string base = fmt;
+
+      size_t reg_index = base.find("{reg}");
+      if (reg_index != std::string::npos) {
+        std::ostringstream sreg;
+        sreg << *reg;
+        std::string reg_string = sreg.str();
+        base.replace(reg_index, 5, reg_string);
+      }
+
+      if (str.size() > 0) {
+        str += "\n";
+      }
+      str += base;
+    }
+    // Add a newline at the end.
+    str += "\n";
+    return str;
+  }
+
+  std::string RepeatRR(void (Ass::*f)(Reg, Reg), std::string fmt) {
+    const std::vector<Reg*> registers = GetRegisters();
+    std::string str;
+    for (auto reg1 : registers) {
+      for (auto reg2 : registers) {
+        (assembler_.get()->*f)(*reg1, *reg2);
+        std::string base = fmt;
+
+        size_t reg1_index = base.find("{reg1}");
+        if (reg1_index != std::string::npos) {
+          std::ostringstream sreg;
+          sreg << *reg1;
+          std::string reg_string = sreg.str();
+          base.replace(reg1_index, 6, reg_string);
+        }
+
+        size_t reg2_index = base.find("{reg2}");
+        if (reg2_index != std::string::npos) {
+          std::ostringstream sreg;
+          sreg << *reg2;
+          std::string reg_string = sreg.str();
+          base.replace(reg2_index, 6, reg_string);
+        }
+
+        if (str.size() > 0) {
+          str += "\n";
+        }
+        str += base;
+      }
+    }
+    // Add a newline at the end.
+    str += "\n";
+    return str;
+  }
+
+  std::string RepeatRI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, std::string fmt) {
+    const std::vector<Reg*> registers = GetRegisters();
+    std::string str;
+    std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
+    for (auto reg : registers) {
+      for (int64_t imm : imms) {
+        Imm* new_imm = CreateImmediate(imm);
+        (assembler_.get()->*f)(*reg, *new_imm);
+        delete new_imm;
+        std::string base = fmt;
+
+        size_t reg_index = base.find("{reg}");
+        if (reg_index != std::string::npos) {
+          std::ostringstream sreg;
+          sreg << *reg;
+          std::string reg_string = sreg.str();
+          base.replace(reg_index, 5, reg_string);
+        }
+
+        size_t imm_index = base.find("{imm}");
+        if (imm_index != std::string::npos) {
+          std::ostringstream sreg;
+          sreg << imm;
+          std::string imm_string = sreg.str();
+          base.replace(imm_index, 5, imm_string);
+        }
+
+        if (str.size() > 0) {
+          str += "\n";
+        }
+        str += base;
+      }
+    }
+    // Add a newline at the end.
+    str += "\n";
+    return str;
+  }
+
+  std::string RepeatI(void (Ass::*f)(const Imm&), size_t imm_bytes, std::string fmt) {
+    std::string str;
+    std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
+    for (int64_t imm : imms) {
+      Imm* new_imm = CreateImmediate(imm);
+      (assembler_.get()->*f)(*new_imm);
+      delete new_imm;
+      std::string base = fmt;
+
+      size_t imm_index = base.find("{imm}");
+      if (imm_index != std::string::npos) {
+        std::ostringstream sreg;
+        sreg << imm;
+        std::string imm_string = sreg.str();
+        base.replace(imm_index, 5, imm_string);
+      }
+
+      if (str.size() > 0) {
+        str += "\n";
+      }
+      str += base;
+    }
+    // Add a newline at the end.
+    str += "\n";
+    return str;
+  }
+
+  // This is intended to be run as a test.
+  bool CheckTools() {
+    if (!FileExists(GetAssemblerCommand())) {
+      return false;
+    }
+    LOG(INFO) << "Chosen assembler command: " << GetAssemblerCommand();
+
+    if (!FileExists(GetObjdumpCommand())) {
+      return false;
+    }
+    LOG(INFO) << "Chosen objdump command: " << GetObjdumpCommand();
+
+    // Disassembly is optional.
+    std::string disassembler = GetDisassembleCommand();
+    if (disassembler.length() != 0) {
+      if (!FileExists(disassembler)) {
+        return false;
+      }
+      LOG(INFO) << "Chosen disassemble command: " << GetDisassembleCommand();
+    } else {
+      LOG(INFO) << "No disassembler given.";
+    }
+
+    return true;
+  }
+
+ protected:
+  void SetUp() OVERRIDE {
+    assembler_.reset(new Ass());
+
+    SetUpHelpers();
+  }
+
+  // Override this to set up any architecture-specific things, e.g., register vectors.
+  virtual void SetUpHelpers() {}
+
+  virtual std::vector<Reg*> GetRegisters() = 0;
+
+  // Get the typically used name for this architecture, e.g., aarch64, x86_64, ...
+  virtual std::string GetArchitectureString() = 0;
+
+  // Get the name of the assembler, e.g., "as" by default.
+  virtual std::string GetAssemblerCmdName() {
+    return "as";
+  }
+
+  // Switches to the assembler command. Default none.
+  virtual std::string GetAssemblerParameters() {
+    return "";
+  }
+
+  // Return the host assembler command for this test.
+  virtual std::string GetAssemblerCommand() {
+    // Already resolved it once?
+    if (resolved_assembler_cmd_.length() != 0) {
+      return resolved_assembler_cmd_;
+    }
+
+    std::string line = FindTool(GetAssemblerCmdName());
+    if (line.length() == 0) {
+      return line;
+    }
+
+    resolved_assembler_cmd_ = line + GetAssemblerParameters();
+
+    return line;
+  }
+
+  // Get the name of the objdump, e.g., "objdump" by default.
+  virtual std::string GetObjdumpCmdName() {
+    return "objdump";
+  }
+
+  // Switches to the objdump command. Default is " -h".
+  virtual std::string GetObjdumpParameters() {
+    return " -h";
+  }
+
+  // Return the host objdump command for this test.
+  virtual std::string GetObjdumpCommand() {
+    // Already resolved it once?
+    if (resolved_objdump_cmd_.length() != 0) {
+      return resolved_objdump_cmd_;
+    }
+
+    std::string line = FindTool(GetObjdumpCmdName());
+    if (line.length() == 0) {
+      return line;
+    }
+
+    resolved_objdump_cmd_ = line + GetObjdumpParameters();
+
+    return line;
+  }
+
+  // Get the name of the objdump, e.g., "objdump" by default.
+  virtual std::string GetDisassembleCmdName() {
+    return "objdump";
+  }
+
+  // Switches to the objdump command. As it's a binary, one needs to push the architecture and
+  // such to objdump, so it's architecture-specific and there is no default.
+  virtual std::string GetDisassembleParameters() = 0;
+
+  // Return the host disassembler command for this test.
+  virtual std::string GetDisassembleCommand() {
+    // Already resolved it once?
+    if (resolved_disassemble_cmd_.length() != 0) {
+      return resolved_disassemble_cmd_;
+    }
+
+    std::string line = FindTool(GetDisassembleCmdName());
+    if (line.length() == 0) {
+      return line;
+    }
+
+    resolved_disassemble_cmd_ = line + GetDisassembleParameters();
+
+    return line;
+  }
+
+  // Create a couple of immediate values up to the number of bytes given.
+  virtual std::vector<int64_t> CreateImmediateValues(size_t imm_bytes) {
+    std::vector<int64_t> res;
+    res.push_back(0);
+    res.push_back(-1);
+    res.push_back(0x12);
+    if (imm_bytes >= 2) {
+      res.push_back(0x1234);
+      res.push_back(-0x1234);
+      if (imm_bytes >= 4) {
+        res.push_back(0x12345678);
+        res.push_back(-0x12345678);
+        if (imm_bytes >= 6) {
+          res.push_back(0x123456789ABC);
+          res.push_back(-0x123456789ABC);
+          if (imm_bytes >= 8) {
+            res.push_back(0x123456789ABCDEF0);
+            res.push_back(-0x123456789ABCDEF0);
+          }
+        }
+      }
+    }
+    return res;
+  }
+
+  // Create an immediate from the specific value.
+  virtual Imm* CreateImmediate(int64_t imm_value) = 0;
+
+ private:
+  // Driver() assembles and compares the results. If the results are not equal and we have a
+  // disassembler, disassemble both and check whether they have the same mnemonics (in which case
+  // we just warn).
+  void Driver(std::string assembly_text, std::string test_name) {
+    EXPECT_NE(assembly_text.length(), 0U) << "Empty assembly";
+
+    NativeAssemblerResult res;
+    Compile(assembly_text, &res, test_name);
+
+    EXPECT_TRUE(res.ok) << res.error_msg;
+    if (!res.ok) {
+      // No way of continuing.
+      return;
+    }
+
+    size_t cs = assembler_->CodeSize();
+    UniquePtr<std::vector<uint8_t> > data(new std::vector<uint8_t>(cs));
+    MemoryRegion code(&(*data)[0], data->size());
+    assembler_->FinalizeInstructions(code);
+
+    if (*data == *res.code) {
+      Clean(&res);
+    } else {
+      if (DisassembleBinaries(*data, *res.code, test_name)) {
+        if (data->size() > res.code->size()) {
+          LOG(WARNING) << "Assembly code is not identical, but disassembly of machine code is "
+              "equal: this implies sub-optimal encoding! Our code size=" << data->size() <<
+              ", gcc size=" << res.code->size();
+        } else {
+          LOG(INFO) << "GCC chose a different encoding than ours, but the overall length is the "
+              "same.";
+        }
+      } else {
+        // This will output the assembly.
+        EXPECT_EQ(*data, *res.code) << "Outputs (and disassembly) not identical.";
+      }
+    }
+  }
+
+  // Structure to store intermediates and results.
+  struct NativeAssemblerResult {
+    bool ok;
+    std::string error_msg;
+    std::string base_name;
+    UniquePtr<std::vector<uint8_t>> code;
+    uintptr_t length;
+  };
+
+  // Compile the assembly file from_file to a binary file to_file. Returns true on success.
+  bool Assemble(const char* from_file, const char* to_file, std::string* error_msg) {
+    bool have_assembler = FileExists(GetAssemblerCommand());
+    EXPECT_TRUE(have_assembler) << "Cannot find assembler:" << GetAssemblerCommand();
+    if (!have_assembler) {
+      return false;
+    }
+
+    std::vector<std::string> args;
+
+    args.push_back(GetAssemblerCommand());
+    args.push_back("-o");
+    args.push_back(to_file);
+    args.push_back(from_file);
+
+    return Exec(args, error_msg);
+  }
+
+  // Runs objdump -h on the binary file and extracts the first line with .text.
+  // Returns "" on failure.
+  std::string Objdump(std::string file) {
+    bool have_objdump = FileExists(GetObjdumpCommand());
+    EXPECT_TRUE(have_objdump) << "Cannot find objdump: " << GetObjdumpCommand();
+    if (!have_objdump) {
+      return "";
+    }
+
+    std::string error_msg;
+    std::vector<std::string> args;
+
+    args.push_back(GetObjdumpCommand());
+    args.push_back(file);
+    args.push_back(">");
+    args.push_back(file+".dump");
+    std::string cmd = Join(args, ' ');
+
+    args.clear();
+    args.push_back("/bin/sh");
+    args.push_back("-c");
+    args.push_back(cmd);
+
+    if (!Exec(args, &error_msg)) {
+      EXPECT_TRUE(false) << error_msg;
+    }
+
+    std::ifstream dump(file+".dump");
+
+    std::string line;
+    bool found = false;
+    while (std::getline(dump, line)) {
+      if (line.find(".text") != line.npos) {
+        found = true;
+        break;
+      }
+    }
+
+    dump.close();
+
+    if (found) {
+      return line;
+    } else {
+      return "";
+    }
+  }
+
+  // Disassemble both binaries and compare the text.
+  bool DisassembleBinaries(std::vector<uint8_t>& data, std::vector<uint8_t>& as,
+                           std::string test_name) {
+    std::string disassembler = GetDisassembleCommand();
+    if (disassembler.length() == 0) {
+      LOG(WARNING) << "No dissassembler command.";
+      return false;
+    }
+
+    std::string data_name = WriteToFile(data, test_name + ".ass");
+    std::string error_msg;
+    if (!DisassembleBinary(data_name, &error_msg)) {
+      LOG(INFO) << "Error disassembling: " << error_msg;
+      std::remove(data_name.c_str());
+      return false;
+    }
+
+    std::string as_name = WriteToFile(as, test_name + ".gcc");
+    if (!DisassembleBinary(as_name, &error_msg)) {
+      LOG(INFO) << "Error disassembling: " << error_msg;
+      std::remove(data_name.c_str());
+      std::remove((data_name + ".dis").c_str());
+      std::remove(as_name.c_str());
+      return false;
+    }
+
+    bool result = CompareFiles(data_name + ".dis", as_name + ".dis");
+
+    if (result) {
+      std::remove(data_name.c_str());
+      std::remove(as_name.c_str());
+      std::remove((data_name + ".dis").c_str());
+      std::remove((as_name + ".dis").c_str());
+    }
+
+    return result;
+  }
+
+  bool DisassembleBinary(std::string file, std::string* error_msg) {
+    std::vector<std::string> args;
+
+    args.push_back(GetDisassembleCommand());
+    args.push_back(file);
+    args.push_back("| sed -n \'/<.data>/,$p\' | sed -e \'s/.*://\'");
+    args.push_back(">");
+    args.push_back(file+".dis");
+    std::string cmd = Join(args, ' ');
+
+    args.clear();
+    args.push_back("/bin/sh");
+    args.push_back("-c");
+    args.push_back(cmd);
+
+    return Exec(args, error_msg);
+  }
+
+  std::string WriteToFile(std::vector<uint8_t>& buffer, std::string test_name) {
+    std::string file_name = GetTmpnam() + std::string("---") + test_name;
+    const char* data = reinterpret_cast<char*>(buffer.data());
+    std::ofstream s_out(file_name + ".o");
+    s_out.write(data, buffer.size());
+    s_out.close();
+    return file_name + ".o";
+  }
+
+  bool CompareFiles(std::string f1, std::string f2) {
+    std::ifstream f1_in(f1);
+    std::ifstream f2_in(f2);
+
+    bool result = std::equal(std::istreambuf_iterator<char>(f1_in),
+                             std::istreambuf_iterator<char>(),
+                             std::istreambuf_iterator<char>(f2_in));
+
+    f1_in.close();
+    f2_in.close();
+
+    return result;
+  }
+
+  // Compile the given assembly code and extract the binary, if possible. Put result into res.
+  bool Compile(std::string assembly_code, NativeAssemblerResult* res, std::string test_name) {
+    res->ok = false;
+    res->code.reset(nullptr);
+
+    res->base_name = GetTmpnam() + std::string("---") + test_name;
+
+    // TODO: Lots of error checking.
+
+    std::ofstream s_out(res->base_name + ".S");
+    s_out << assembly_code;
+    s_out.close();
+
+    if (!Assemble((res->base_name + ".S").c_str(), (res->base_name + ".o").c_str(),
+                  &res->error_msg)) {
+      res->error_msg = "Could not compile.";
+      return false;
+    }
+
+    std::string odump = Objdump(res->base_name + ".o");
+    if (odump.length() == 0) {
+      res->error_msg = "Objdump failed.";
+      return false;
+    }
+
+    std::istringstream iss(odump);
+    std::istream_iterator<std::string> start(iss);
+    std::istream_iterator<std::string> end;
+    std::vector<std::string> tokens(start, end);
+
+    if (tokens.size() < OBJDUMP_SECTION_LINE_MIN_TOKENS) {
+      res->error_msg = "Objdump output not recognized: too few tokens.";
+      return false;
+    }
+
+    if (tokens[1] != ".text") {
+      res->error_msg = "Objdump output not recognized: .text not second token.";
+      return false;
+    }
+
+    std::string lengthToken = "0x" + tokens[2];
+    std::istringstream(lengthToken) >> std::hex >> res->length;
+
+    std::string offsetToken = "0x" + tokens[5];
+    uintptr_t offset;
+    std::istringstream(offsetToken) >> std::hex >> offset;
+
+    std::ifstream obj(res->base_name + ".o");
+    obj.seekg(offset);
+    res->code.reset(new std::vector<uint8_t>(res->length));
+    obj.read(reinterpret_cast<char*>(&(*res->code)[0]), res->length);
+    obj.close();
+
+    res->ok = true;
+    return true;
+  }
+
+  // Remove temporary files.
+  void Clean(const NativeAssemblerResult* res) {
+    std::remove((res->base_name + ".S").c_str());
+    std::remove((res->base_name + ".o").c_str());
+    std::remove((res->base_name + ".o.dump").c_str());
+  }
+
+  // Check whether file exists. Is used for commands, so strips off any parameters: anything after
+  // the first space. We skip to the last slash for this, so it should work with directories with
+  // spaces.
+  static bool FileExists(std::string file) {
+    if (file.length() == 0) {
+      return false;
+    }
+
+    // Need to strip any options.
+    size_t last_slash = file.find_last_of('/');
+    if (last_slash == std::string::npos) {
+      // No slash, start looking at the start.
+      last_slash = 0;
+    }
+    size_t space_index = file.find(' ', last_slash);
+
+    if (space_index == std::string::npos) {
+      std::ifstream infile(file.c_str());
+      return infile.good();
+    } else {
+      std::string copy = file.substr(0, space_index - 1);
+
+      struct stat buf;
+      return stat(copy.c_str(), &buf) == 0;
+    }
+  }
+
+  static std::string GetGCCRootPath() {
+    return "prebuilts/gcc/linux-x86";
+  }
+
+  static std::string GetRootPath() {
+    // 1) Check ANDROID_BUILD_TOP
+    char* build_top = getenv("ANDROID_BUILD_TOP");
+    if (build_top != nullptr) {
+      return std::string(build_top) + "/";
+    }
+
+    // 2) Do cwd
+    char temp[1024];
+    return getcwd(temp, 1024) ? std::string(temp) + "/" : std::string("");
+  }
+
+  std::string FindTool(std::string tool_name) {
+    // Find the current tool. Wild-card pattern is "arch-string*tool-name".
+    std::string gcc_path = GetRootPath() + GetGCCRootPath();
+    std::vector<std::string> args;
+    args.push_back("find");
+    args.push_back(gcc_path);
+    args.push_back("-name");
+    args.push_back(GetArchitectureString() + "*" + tool_name);
+    args.push_back("|");
+    args.push_back("sort");
+    args.push_back("|");
+    args.push_back("tail");
+    args.push_back("-n");
+    args.push_back("1");
+    std::string tmp_file = GetTmpnam();
+    args.push_back(">");
+    args.push_back(tmp_file);
+    std::string sh_args = Join(args, ' ');
+
+    args.clear();
+    args.push_back("/bin/sh");
+    args.push_back("-c");
+    args.push_back(sh_args);
+
+    std::string error_msg;
+    if (!Exec(args, &error_msg)) {
+      EXPECT_TRUE(false) << error_msg;
+      return "";
+    }
+
+    std::ifstream in(tmp_file.c_str());
+    std::string line;
+    if (!std::getline(in, line)) {
+      in.close();
+      std::remove(tmp_file.c_str());
+      return "";
+    }
+    in.close();
+    std::remove(tmp_file.c_str());
+    return line;
+  }
+
+  // Use a consistent tmpnam, so store it.
+  std::string GetTmpnam() {
+    if (tmpnam_.length() == 0) {
+      tmpnam_ = std::string(tmpnam(nullptr));
+    }
+    return tmpnam_;
+  }
+
+  UniquePtr<Ass> assembler_;
+
+  std::string resolved_assembler_cmd_;
+  std::string resolved_objdump_cmd_;
+  std::string resolved_disassemble_cmd_;
+  std::string tmpnam_;
+
+  static constexpr size_t OBJDUMP_SECTION_LINE_MIN_TOKENS = 6;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_ASSEMBLER_TEST_H_
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 9507e12..8eaeae1 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -77,6 +77,7 @@
 
 void X86_64Assembler::pushq(const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());  // pushq only supports 32b immediate.
   if (imm.is_int8()) {
     EmitUint8(0x6A);
     EmitUint8(imm.value() & 0xFF);
@@ -104,9 +105,17 @@
 
 void X86_64Assembler::movq(CpuRegister dst, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitRex64(dst);
-  EmitUint8(0xB8 + dst.LowBits());
-  EmitImmediate(imm);
+  if (imm.is_int32()) {
+    // 32 bit. Note: sign-extends.
+    EmitRex64(dst);
+    EmitUint8(0xC7);
+    EmitRegisterOperand(0, dst.LowBits());
+    EmitInt32(static_cast<int32_t>(imm.value()));
+  } else {
+    EmitRex64(dst);
+    EmitUint8(0xB8 + dst.LowBits());
+    EmitInt64(imm.value());
+  }
 }
 
 
@@ -120,7 +129,8 @@
 
 void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitRex64(dst, src);
+  // 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
+  EmitRex64(src, dst);
   EmitUint8(0x89);
   EmitRegisterOperand(src.LowBits(), dst.LowBits());
 }
@@ -843,6 +853,14 @@
 }
 
 
+void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg0, reg1);
+  EmitUint8(0x3B);
+  EmitOperand(reg0.LowBits(), Operand(reg1));
+}
+
+
 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -945,6 +963,14 @@
   EmitOperand(dst.LowBits(), Operand(src));
 }
 
+
+void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());  // xorq only supports 32b immediate.
+  EmitRex64(dst);
+  EmitComplex(6, Operand(dst), imm);
+}
+
 #if 0
 void X86_64Assembler::rex(bool force, bool w, Register* r, Register* x, Register* b) {
   // REX.WRXB
@@ -1007,11 +1033,21 @@
 
 void X86_64Assembler::addq(CpuRegister reg, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());  // addq only supports 32b immediate.
   EmitRex64(reg);
   EmitComplex(0, Operand(reg), imm);
 }
 
 
+void X86_64Assembler::addq(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  // 0x01 is addq r/m64 <- r/m64 + r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
+  EmitRex64(src, dst);
+  EmitUint8(0x01);
+  EmitRegisterOperand(src.LowBits(), dst.LowBits());
+}
+
+
 void X86_64Assembler::addl(const Address& address, CpuRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg, address);
@@ -1042,6 +1078,22 @@
 }
 
 
+void X86_64Assembler::subq(CpuRegister reg, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());  // subq only supports 32b immediate.
+  EmitRex64(reg);
+  EmitComplex(5, Operand(reg), imm);
+}
+
+
+void X86_64Assembler::subq(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst, src);
+  EmitUint8(0x2B);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+
 void X86_64Assembler::subl(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg, address);
@@ -1299,13 +1351,15 @@
   EmitUint8(0xF0);
 }
 
+
 X86_64Assembler* X86_64Assembler::gs() {
-  // TODO: fs is a prefix and not an instruction
+  // TODO: gs is a prefix and not an instruction
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x65);
   return this;
 }
 
+
 void X86_64Assembler::AddImmediate(CpuRegister reg, const Immediate& imm) {
   int value = imm.value();
   if (value != 0) {
@@ -1318,6 +1372,18 @@
 }
 
 
+void X86_64Assembler::setcc(Condition condition, CpuRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  // RSP, RBP, RDI, RSI need rex prefix (else the pattern encodes ah/bh/ch/dh).
+  if (dst.NeedsRex() || dst.AsRegister() > 3) {
+    EmitOptionalRex(true, false, false, false, dst.NeedsRex());
+  }
+  EmitUint8(0x0F);
+  EmitUint8(0x90 + condition);
+  EmitUint8(0xC0 + dst.LowBits());
+}
+
+
 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
   // TODO: Need to have a code constants table.
   int64_t constant = bit_cast<int64_t, double>(value);
@@ -1398,7 +1464,11 @@
 
 
 void X86_64Assembler::EmitImmediate(const Immediate& imm) {
-  EmitInt32(imm.value());
+  if (imm.is_int32()) {
+    EmitInt32(static_cast<int32_t>(imm.value()));
+  } else {
+    EmitInt64(imm.value());
+  }
 }
 
 
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 4738dcb..87fb359 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -31,16 +31,21 @@
 
 class Immediate {
  public:
-  explicit Immediate(int32_t value) : value_(value) {}
+  explicit Immediate(int64_t value) : value_(value) {}
 
-  int32_t value() const { return value_; }
+  int64_t value() const { return value_; }
 
   bool is_int8() const { return IsInt(8, value_); }
   bool is_uint8() const { return IsUint(8, value_); }
   bool is_uint16() const { return IsUint(16, value_); }
+  bool is_int32() const {
+    // This does not work on 32b machines: return IsInt(32, value_);
+    int64_t limit = static_cast<int64_t>(1) << 31;
+    return (-limit <= value_) && (value_ < limit);
+  }
 
  private:
-  const int32_t value_;
+  const int64_t value_;
 
   DISALLOW_COPY_AND_ASSIGN(Immediate);
 };
@@ -368,10 +373,11 @@
   void cmpl(CpuRegister reg, const Immediate& imm);
   void cmpl(CpuRegister reg0, CpuRegister reg1);
   void cmpl(CpuRegister reg, const Address& address);
-
   void cmpl(const Address& address, CpuRegister reg);
   void cmpl(const Address& address, const Immediate& imm);
 
+  void cmpq(CpuRegister reg0, CpuRegister reg1);
+
   void testl(CpuRegister reg1, CpuRegister reg2);
   void testl(CpuRegister reg, const Immediate& imm);
 
@@ -382,19 +388,24 @@
   void orl(CpuRegister dst, CpuRegister src);
 
   void xorl(CpuRegister dst, CpuRegister src);
+  void xorq(CpuRegister dst, const Immediate& imm);
 
   void addl(CpuRegister dst, CpuRegister src);
-  void addq(CpuRegister reg, const Immediate& imm);
   void addl(CpuRegister reg, const Immediate& imm);
   void addl(CpuRegister reg, const Address& address);
-
   void addl(const Address& address, CpuRegister reg);
   void addl(const Address& address, const Immediate& imm);
 
+  void addq(CpuRegister reg, const Immediate& imm);
+  void addq(CpuRegister dst, CpuRegister src);
+
   void subl(CpuRegister dst, CpuRegister src);
   void subl(CpuRegister reg, const Immediate& imm);
   void subl(CpuRegister reg, const Address& address);
 
+  void subq(CpuRegister reg, const Immediate& imm);
+  void subq(CpuRegister dst, CpuRegister src);
+
   void cdq();
 
   void idivl(CpuRegister reg);
@@ -442,6 +453,8 @@
 
   X86_64Assembler* gs();
 
+  void setcc(Condition condition, CpuRegister dst);
+
   //
   // Macros for High-level operations.
   //
@@ -586,6 +599,7 @@
  private:
   void EmitUint8(uint8_t value);
   void EmitInt32(int32_t value);
+  void EmitInt64(int64_t value);
   void EmitRegisterOperand(uint8_t rm, uint8_t reg);
   void EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg);
   void EmitFixup(AssemblerFixup* fixup);
@@ -634,6 +648,10 @@
   buffer_.Emit<int32_t>(value);
 }
 
+inline void X86_64Assembler::EmitInt64(int64_t value) {
+  buffer_.Emit<int64_t>(value);
+}
+
 inline void X86_64Assembler::EmitRegisterOperand(uint8_t rm, uint8_t reg) {
   CHECK_GE(rm, 0);
   CHECK_LT(rm, 8);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index df0d14e..7201d04 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -16,7 +16,7 @@
 
 #include "assembler_x86_64.h"
 
-#include "gtest/gtest.h"
+#include "utils/assembler_test.h"
 
 namespace art {
 
@@ -29,4 +29,175 @@
   ASSERT_EQ(static_cast<size_t>(5), buffer.Size());
 }
 
+class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, x86_64::CpuRegister,
+                                                 x86_64::Immediate> {
+ protected:
+  // Get the typically used name for this architecture, e.g., aarch64, x86-64, ...
+  std::string GetArchitectureString() OVERRIDE {
+    return "x86_64";
+  }
+
+  std::string GetDisassembleParameters() OVERRIDE {
+    return " -D -bbinary -mi386:x86-64 -Mx86-64,addr64,data32 --no-show-raw-insn";
+  }
+
+  void SetUpHelpers() OVERRIDE {
+    if (registers_.size() == 0) {
+      registers_.push_back(new x86_64::CpuRegister(x86_64::RAX));
+      registers_.push_back(new x86_64::CpuRegister(x86_64::RBX));
+      registers_.push_back(new x86_64::CpuRegister(x86_64::RCX));
+      registers_.push_back(new x86_64::CpuRegister(x86_64::RDX));
+      registers_.push_back(new x86_64::CpuRegister(x86_64::RBP));
+      registers_.push_back(new x86_64::CpuRegister(x86_64::RSP));
+      registers_.push_back(new x86_64::CpuRegister(x86_64::RSI));
+      registers_.push_back(new x86_64::CpuRegister(x86_64::RDI));
+      registers_.push_back(new x86_64::CpuRegister(x86_64::R8));
+      registers_.push_back(new x86_64::CpuRegister(x86_64::R9));
+      registers_.push_back(new x86_64::CpuRegister(x86_64::R10));
+      registers_.push_back(new x86_64::CpuRegister(x86_64::R11));
+      registers_.push_back(new x86_64::CpuRegister(x86_64::R12));
+      registers_.push_back(new x86_64::CpuRegister(x86_64::R13));
+      registers_.push_back(new x86_64::CpuRegister(x86_64::R14));
+      registers_.push_back(new x86_64::CpuRegister(x86_64::R15));
+    }
+  }
+
+  std::vector<x86_64::CpuRegister*> GetRegisters() OVERRIDE {
+    return registers_;
+  }
+
+  x86_64::Immediate* CreateImmediate(int64_t imm_value) OVERRIDE {
+    return new x86_64::Immediate(imm_value);
+  }
+
+ private:
+  std::vector<x86_64::CpuRegister*> registers_;
+};
+
+
+TEST_F(AssemblerX86_64Test, Toolchain) {
+  EXPECT_TRUE(CheckTools());
+}
+
+
+TEST_F(AssemblerX86_64Test, PushqRegs) {
+  DriverStr(RepeatR(&x86_64::X86_64Assembler::pushq, "pushq %{reg}"), "pushq");
+}
+
+TEST_F(AssemblerX86_64Test, PushqImm) {
+  DriverStr(RepeatI(&x86_64::X86_64Assembler::pushq, 4U, "pushq ${imm}"), "pushqi");
+}
+
+
+TEST_F(AssemblerX86_64Test, MovqRegs) {
+  DriverStr(RepeatRR(&x86_64::X86_64Assembler::movq, "movq %{reg2}, %{reg1}"), "movq");
+}
+
+TEST_F(AssemblerX86_64Test, MovqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::movq, 8U, "movq ${imm}, %{reg}"), "movqi");
+}
+
+
+TEST_F(AssemblerX86_64Test, AddqRegs) {
+  DriverStr(RepeatRR(&x86_64::X86_64Assembler::addq, "addq %{reg2}, %{reg1}"), "addq");
+}
+
+TEST_F(AssemblerX86_64Test, AddqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::addq, 4U, "addq ${imm}, %{reg}"), "addqi");
+}
+
+
+TEST_F(AssemblerX86_64Test, SubqRegs) {
+  DriverStr(RepeatRR(&x86_64::X86_64Assembler::subq, "subq %{reg2}, %{reg1}"), "subq");
+}
+
+TEST_F(AssemblerX86_64Test, SubqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::subq, 4U, "subq ${imm}, %{reg}"), "subqi");
+}
+
+
+TEST_F(AssemblerX86_64Test, CmpqRegs) {
+  DriverStr(RepeatRR(&x86_64::X86_64Assembler::cmpq, "cmpq %{reg2}, %{reg1}"), "cmpq");
+}
+
+
+TEST_F(AssemblerX86_64Test, XorqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::xorq, 4U, "xorq ${imm}, %{reg}"), "xorqi");
+}
+
+
+std::string setcc_test_fn(x86_64::X86_64Assembler* assembler) {
+  // From Condition
+  /*
+  kOverflow     =  0,
+  kNoOverflow   =  1,
+  kBelow        =  2,
+  kAboveEqual   =  3,
+  kEqual        =  4,
+  kNotEqual     =  5,
+  kBelowEqual   =  6,
+  kAbove        =  7,
+  kSign         =  8,
+  kNotSign      =  9,
+  kParityEven   = 10,
+  kParityOdd    = 11,
+  kLess         = 12,
+  kGreaterEqual = 13,
+  kLessEqual    = 14,
+  */
+  std::string suffixes[15] = { "o", "no", "b", "ae", "e", "ne", "be", "a", "s", "ns", "pe", "po",
+                               "l", "ge", "le" };
+
+  std::vector<x86_64::CpuRegister*> registers;
+  registers.push_back(new x86_64::CpuRegister(x86_64::RAX));
+  registers.push_back(new x86_64::CpuRegister(x86_64::RBX));
+  registers.push_back(new x86_64::CpuRegister(x86_64::RCX));
+  registers.push_back(new x86_64::CpuRegister(x86_64::RDX));
+  registers.push_back(new x86_64::CpuRegister(x86_64::RBP));
+  registers.push_back(new x86_64::CpuRegister(x86_64::RSP));
+  registers.push_back(new x86_64::CpuRegister(x86_64::RSI));
+  registers.push_back(new x86_64::CpuRegister(x86_64::RDI));
+  registers.push_back(new x86_64::CpuRegister(x86_64::R8));
+  registers.push_back(new x86_64::CpuRegister(x86_64::R9));
+  registers.push_back(new x86_64::CpuRegister(x86_64::R10));
+  registers.push_back(new x86_64::CpuRegister(x86_64::R11));
+  registers.push_back(new x86_64::CpuRegister(x86_64::R12));
+  registers.push_back(new x86_64::CpuRegister(x86_64::R13));
+  registers.push_back(new x86_64::CpuRegister(x86_64::R14));
+  registers.push_back(new x86_64::CpuRegister(x86_64::R15));
+
+  std::string byte_regs[16];
+  byte_regs[x86_64::RAX] = "al";
+  byte_regs[x86_64::RBX] = "bl";
+  byte_regs[x86_64::RCX] = "cl";
+  byte_regs[x86_64::RDX] = "dl";
+  byte_regs[x86_64::RBP] = "bpl";
+  byte_regs[x86_64::RSP] = "spl";
+  byte_regs[x86_64::RSI] = "sil";
+  byte_regs[x86_64::RDI] = "dil";
+  byte_regs[x86_64::R8] = "r8b";
+  byte_regs[x86_64::R9] = "r9b";
+  byte_regs[x86_64::R10] = "r10b";
+  byte_regs[x86_64::R11] = "r11b";
+  byte_regs[x86_64::R12] = "r12b";
+  byte_regs[x86_64::R13] = "r13b";
+  byte_regs[x86_64::R14] = "r14b";
+  byte_regs[x86_64::R15] = "r15b";
+
+  std::ostringstream str;
+
+  for (auto reg : registers) {
+    for (size_t i = 0; i < 15; ++i) {
+      assembler->setcc(static_cast<x86_64::Condition>(i), *reg);
+      str << "set" << suffixes[i] << " %" << byte_regs[reg->AsRegister()] << "\n";
+    }
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, SetCC) {
+  DriverFn(&setcc_test_fn, "setcc");
+}
+
 }  // namespace art
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index d3e56da..823b818 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1101,6 +1101,9 @@
   DexFileToMethodInlinerMap method_inliner_map;
   CompilerCallbacksImpl callbacks(&verification_results, &method_inliner_map);
   runtime_options.push_back(std::make_pair("compilercallbacks", &callbacks));
+  runtime_options.push_back(
+      std::make_pair("imageinstructionset",
+                     reinterpret_cast<const void*>(GetInstructionSetString(instruction_set))));
 
   Dex2Oat* p_dex2oat;
   if (!Dex2Oat::Create(&p_dex2oat,
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index d6d2058..4e4a512 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -1360,7 +1360,7 @@
               } else if (Rn.r == 15) {
                 intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr);
                 lit_adr = RoundDown(lit_adr, 4) + 4 + imm12;
-                args << "  ; " << reinterpret_cast<void*>(*reinterpret_cast<int32_t*>(lit_adr));
+                args << StringPrintf("  ; 0x%08x", *reinterpret_cast<int32_t*>(lit_adr));
               }
             } else if (op3 == 3) {
               // LDRSH.W Rt, [Rn, #imm12]      - 111 11 00 11 011 nnnn tttt iiiiiiiiiiii
@@ -1373,7 +1373,7 @@
               } else if (Rn.r == 15) {
                 intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr);
                 lit_adr = RoundDown(lit_adr, 4) + 4 + imm12;
-                args << "  ; " << reinterpret_cast<void*>(*reinterpret_cast<int32_t*>(lit_adr));
+                args << StringPrintf("  ; 0x%08x", *reinterpret_cast<int32_t*>(lit_adr));
               }
             }
           }
@@ -1430,7 +1430,7 @@
             } else if (Rn.r == 15) {
               intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr);
               lit_adr = RoundDown(lit_adr, 4) + 4 + imm12;
-              args << "  ; " << reinterpret_cast<void*>(*reinterpret_cast<int32_t*>(lit_adr));
+              args << StringPrintf("  ; 0x%08x", *reinterpret_cast<int32_t*>(lit_adr));
             }
           } else if (op4 == 0) {
             // LDR.W Rt, [Rn, Rm{, LSL #imm2}] - 111 11 00 00 101 nnnn tttt 000000iimmmm
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index 47c6d28..c285088 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -92,6 +92,9 @@
 #ifdef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
 #undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
 #endif
+#ifdef HEAP_REFERENCE_SIZE
+#undef HEAP_REFERENCE_SIZE
+#endif
 }
 
 
@@ -147,6 +150,9 @@
 #ifdef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
 #undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
 #endif
+#ifdef HEAP_REFERENCE_SIZE
+#undef HEAP_REFERENCE_SIZE
+#endif
 }
 
 
@@ -202,6 +208,9 @@
 #ifdef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
 #undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
 #endif
+#ifdef HEAP_REFERENCE_SIZE
+#undef HEAP_REFERENCE_SIZE
+#endif
 }
 
 
@@ -257,6 +266,9 @@
 #ifdef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
 #undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
 #endif
+#ifdef HEAP_REFERENCE_SIZE
+#undef HEAP_REFERENCE_SIZE
+#endif
 }
 
 
@@ -312,10 +324,16 @@
 #ifdef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
 #undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
 #endif
+#ifdef HEAP_REFERENCE_SIZE
+#undef HEAP_REFERENCE_SIZE
+#endif
 }
 
 
-TEST_F(ArchTest, ThreadOffsets) {
+// The following tests are all for the running architecture. So we get away
+// with just including it and not undefining it every time.
+
+
 #if defined(__arm__)
 #include "arch/arm/asm_support_arm.h"
 #undef ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_H_
@@ -342,6 +360,8 @@
 #endif
 #endif
 
+
+TEST_F(ArchTest, ThreadOffsets) {
   // Ugly hack, change when possible.
 #ifdef __LP64__
 #define POINTER_SIZE 8
@@ -376,70 +396,10 @@
 #else
   LOG(INFO) << "No Thread ID Offset found.";
 #endif
-
-
-  // Undefine everything for the next test
-#ifdef RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef THREAD_SELF_OFFSET
-#undef THREAD_SELF_OFFSET
-#endif
-#ifdef THREAD_CARD_TABLE_OFFSET
-#undef THREAD_CARD_TABLE_OFFSET
-#endif
-#ifdef THREAD_EXCEPTION_OFFSET
-#undef THREAD_EXCEPTION_OFFSET
-#endif
-#ifdef THREAD_ID_OFFSET
-#undef THREAD_ID_OFFSET
-#endif
-#ifdef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-#endif
-#ifdef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-#endif
-#ifdef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-#endif
 }
 
 
 TEST_F(ArchTest, CalleeSaveMethodOffsets) {
-#if defined(__arm__)
-#include "arch/arm/asm_support_arm.h"
-#undef ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_H_
-#elif defined(__aarch64__)
-#include "arch/arm64/asm_support_arm64.h"
-#undef ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_H_
-#elif defined(__mips__)
-#include "arch/mips/asm_support_mips.h"
-#undef ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_H_
-#elif defined(__i386__)
-#include "arch/x86/asm_support_x86.h"
-#undef ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_
-#elif defined(__x86_64__)
-#include "arch/x86_64/asm_support_x86_64.h"
-#undef ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
-#else
-  // This happens for the host test.
-#ifdef __LP64__
-#include "arch/x86_64/asm_support_x86_64.h"
-#undef ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
-#else
-#include "arch/x86/asm_support_x86.h"
-#undef ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_
-#endif
-#endif
-
-
 #if defined(RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET)
   EXPECT_EQ(Runtime::GetCalleeSaveMethodOffset(Runtime::kSaveAll),
             static_cast<size_t>(RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET));
@@ -460,38 +420,15 @@
 #else
   LOG(INFO) << "No Runtime Refs-and-Args Offset found.";
 #endif
+}
 
 
-  // Undefine everything for the next test
-#ifdef RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET
-#undef RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET
-#endif
-#ifdef THREAD_SELF_OFFSET
-#undef THREAD_SELF_OFFSET
-#endif
-#ifdef THREAD_CARD_TABLE_OFFSET
-#undef THREAD_CARD_TABLE_OFFSET
-#endif
-#ifdef THREAD_EXCEPTION_OFFSET
-#undef THREAD_EXCEPTION_OFFSET
-#endif
-#ifdef THREAD_ID_OFFSET
-#undef THREAD_ID_OFFSET
-#endif
-#ifdef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-#undef FRAME_SIZE_SAVE_ALL_CALLEE_SAVE
-#endif
-#ifdef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-#undef FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
-#endif
-#ifdef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
-#undef FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE
+TEST_F(ArchTest, HeapReferenceSize) {
+#if defined(HEAP_REFERENCE_SIZE)
+  EXPECT_EQ(sizeof(mirror::HeapReference<mirror::Object>),
+            static_cast<size_t>(HEAP_REFERENCE_SIZE));
+#else
+  LOG(INFO) << "No expected HeapReference Size found.";
 #endif
 }
 
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index a73d522..9e1db96 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -32,4 +32,7 @@
 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32
 #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 48
 
+// Expected size of a heap reference
+#define HEAP_REFERENCE_SIZE 4
+
 #endif  // ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_H_
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
index b18e415..e55885f 100644
--- a/runtime/arch/arm64/asm_support_arm64.h
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -41,4 +41,7 @@
 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 176
 #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 304
 
+// Expected size of a heap reference
+#define HEAP_REFERENCE_SIZE 4
+
 #endif  // ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_H_
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 2083051..85a2c9e 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -970,7 +970,21 @@
     br  xLR
 END art_quick_do_long_jump
 
-UNIMPLEMENTED art_quick_handle_fill_data
+    /*
+     * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
+     * failure.
+     */
+    .extern artHandleFillArrayDataFromCode
+// TODO: xSELF -> x19.
+ENTRY art_quick_handle_fill_data
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // Save callee saves in case exception allocation triggers GC.
+    mov    x2, xSELF                       // Pass Thread::Current.
+    mov    x3, sp                          // Pass SP.
+    bl     artHandleFillArrayDataFromCode  // (Array*, const DexFile::Payload*, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_ZERO
+    DELIVER_PENDING_EXCEPTION
+END art_quick_handle_fill_data
 
 UNIMPLEMENTED art_quick_lock_object
 UNIMPLEMENTED art_quick_unlock_object
@@ -1026,9 +1040,116 @@
     brk 0                             // We should not return here...
 END art_quick_check_cast
 
-UNIMPLEMENTED art_quick_aput_obj_with_null_and_bound_check
-UNIMPLEMENTED art_quick_aput_obj_with_bound_check
-UNIMPLEMENTED art_quick_aput_obj
+    /*
+     * Entry from managed code for array put operations of objects where the value being stored
+     * needs to be checked for compatibility.
+     * x0 = array, x1 = index, x2 = value
+     *
+     * Currently all values should fit into w0/w1/w2, and w1 always will as indices are 32b. We
+     * assume, though, that the upper 32b are zeroed out. At least for x1/w1 we can do better by
+     * using index-zero-extension in load/stores.
+     *
+     * Temporaries: x3, x4
+     * TODO: x4 OK? ip seems wrong here.
+     */
+ENTRY art_quick_aput_obj_with_null_and_bound_check
+    tst x0, x0
+    bne art_quick_aput_obj_with_bound_check
+    b art_quick_throw_null_pointer_exception
+END art_quick_aput_obj_with_null_and_bound_check
+
+ENTRY art_quick_aput_obj_with_bound_check
+    ldr w3, [x0, #ARRAY_LENGTH_OFFSET]
+    cmp w3, w1
+    bhi art_quick_aput_obj
+    mov x0, x1
+    mov x1, x3
+    b art_quick_throw_array_bounds
+END art_quick_aput_obj_with_bound_check
+
+ENTRY art_quick_aput_obj
+    cbz x2, .Ldo_aput_null
+    ldr w3, [x0, #CLASS_OFFSET]                          // Heap reference = 32b
+                                                         // This also zero-extends to x3
+    ldr w4, [x2, #CLASS_OFFSET]                          // Heap reference = 32b
+                                                         // This also zero-extends to x4
+    ldr w3, [x3, #CLASS_COMPONENT_TYPE_OFFSET]           // Heap reference = 32b
+                                                         // This also zero-extends to x3
+    cmp w3, w4  // value's type == array's component type - trivial assignability
+    bne .Lcheck_assignability
+.Ldo_aput:
+    add x3, x0, #OBJECT_ARRAY_DATA_OFFSET
+                                                         // "Compress" = do nothing
+    str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
+    ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
+    lsr x0, x0, #7
+    strb w3, [x3, x0]
+    ret
+.Ldo_aput_null:
+    add x3, x0, #OBJECT_ARRAY_DATA_OFFSET
+                                                         // "Compress" = do nothing
+    str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
+    ret
+.Lcheck_assignability:
+    // Store arguments and link register
+    sub sp, sp, #48                     // Stack needs to be 16b aligned on calls
+    .cfi_adjust_cfa_offset 48
+    stp x0, x1, [sp]
+    .cfi_rel_offset x0, 0
+    .cfi_rel_offset x1, 8
+    stp x2, xSELF, [sp, #16]
+    .cfi_rel_offset x2, 16
+    .cfi_rel_offset x18, 24
+    str xLR, [sp, #32]
+    .cfi_rel_offset x30, 32
+
+    // Call runtime code
+    mov x0, x3              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
+    mov x1, x4              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
+    bl artIsAssignableFromCode
+
+    // Check for exception
+    cbz x0, .Lthrow_array_store_exception
+
+    // Restore
+    ldp x0, x1, [sp]
+    .cfi_restore x0
+    .cfi_restore x1
+    ldp x2, xSELF, [sp, #16]
+    .cfi_restore x2
+    .cfi_restore x18
+    ldr xLR, [sp, #32]
+    .cfi_restore x30
+    add sp, sp, #48
+    .cfi_adjust_cfa_offset -48
+
+    add x3, x0, #OBJECT_ARRAY_DATA_OFFSET
+                                                          // "Compress" = do nothing
+    str w2, [x3, x1, lsl #2]                              // Heap reference = 32b
+    ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
+    lsr x0, x0, #7
+    strb w3, [x3, x0]
+    ret
+.Lthrow_array_store_exception:
+    ldp x0, x1, [sp]
+    .cfi_restore x0
+    .cfi_restore x1
+    ldp x2, xSELF, [sp, #16]
+    .cfi_restore x2
+    .cfi_restore x18
+    ldr xLR, [sp, #32]
+    .cfi_restore x30
+    add sp, sp, #48
+    .cfi_adjust_cfa_offset -48
+
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    mov x1, x2                    // Pass value.
+    mov x2, xSELF                 // Pass Thread::Current.
+    mov x3, sp                    // Pass SP.
+    b artThrowArrayStoreException // (Object*, Object*, Thread*, SP).
+    brk 0                         // Unreached.
+END art_quick_aput_obj
+
 UNIMPLEMENTED art_quick_initialize_static_storage
 UNIMPLEMENTED art_quick_initialize_type
 UNIMPLEMENTED art_quick_initialize_type_and_verify_access
diff --git a/runtime/arch/mips/asm_support_mips.h b/runtime/arch/mips/asm_support_mips.h
index 2b4a745..4db5ea6 100644
--- a/runtime/arch/mips/asm_support_mips.h
+++ b/runtime/arch/mips/asm_support_mips.h
@@ -30,4 +30,7 @@
 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64
 #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 64
 
+// Expected size of a heap reference
+#define HEAP_REFERENCE_SIZE 4
+
 #endif  // ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_H_
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 20dc53b..7027b32 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -122,13 +122,13 @@
         "pushq $0\n\t"                 // 16B alignment padding
         ".cfi_adjust_cfa_offset 16\n\t"
         "call *%%rax\n\t"              // Call the stub
-        "addq $16, %%rsp"              // Pop nullptr and padding
-        // ".cfi_adjust_cfa_offset -16\n\t"
+        "addq $16, %%rsp\n\t"              // Pop nullptr and padding
+        ".cfi_adjust_cfa_offset -16\n\t"
         : "=a" (result)
           // Use the result from rax
         : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code)
           // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
-        : "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");  // clobber all
+        : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");  // clobber all
     // TODO: Should we clobber the other registers?
 #else
     LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
@@ -273,7 +273,7 @@
 }
 
 
-#if defined(__i386__) || defined(__arm__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
 extern "C" void art_quick_aput_obj_with_null_and_bound_check(void);
 // Do not check non-checked ones, we'd need handlers and stuff...
 #endif
@@ -281,7 +281,7 @@
 TEST_F(StubTest, APutObj) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
-#if defined(__i386__) || defined(__arm__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
   Thread* self = Thread::Current();
   // Create an object
   ScopedObjectAccess soa(self);
@@ -296,7 +296,7 @@
 
   // Build a string array of size 1
   SirtRef<mirror::ObjectArray<mirror::Object> > array(soa.Self(),
-            mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), ca.get(), 1));
+            mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), ca.get(), 10));
 
   // Build a string -> should be assignable
   SirtRef<mirror::Object> str_obj(soa.Self(),
@@ -308,7 +308,7 @@
   // Play with it...
 
   // 1) Success cases
-  // 1.1) Assign str_obj to array[0]
+  // 1.1) Assign str_obj to array[0..3]
 
   EXPECT_FALSE(self->IsExceptionPending());
 
@@ -316,13 +316,51 @@
           reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
 
   EXPECT_FALSE(self->IsExceptionPending());
+  EXPECT_EQ(str_obj.get(), array->Get(0));
 
-  // 1.2) Assign null to array[0]
+  Invoke3(reinterpret_cast<size_t>(array.get()), 1U, reinterpret_cast<size_t>(str_obj.get()),
+          reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
+
+  EXPECT_FALSE(self->IsExceptionPending());
+  EXPECT_EQ(str_obj.get(), array->Get(1));
+
+  Invoke3(reinterpret_cast<size_t>(array.get()), 2U, reinterpret_cast<size_t>(str_obj.get()),
+          reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
+
+  EXPECT_FALSE(self->IsExceptionPending());
+  EXPECT_EQ(str_obj.get(), array->Get(2));
+
+  Invoke3(reinterpret_cast<size_t>(array.get()), 3U, reinterpret_cast<size_t>(str_obj.get()),
+          reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
+
+  EXPECT_FALSE(self->IsExceptionPending());
+  EXPECT_EQ(str_obj.get(), array->Get(3));
+
+  // 1.2) Assign null to array[0..3]
 
   Invoke3(reinterpret_cast<size_t>(array.get()), 0U, reinterpret_cast<size_t>(nullptr),
           reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
 
   EXPECT_FALSE(self->IsExceptionPending());
+  EXPECT_EQ(nullptr, array->Get(0));
+
+  Invoke3(reinterpret_cast<size_t>(array.get()), 1U, reinterpret_cast<size_t>(nullptr),
+          reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
+
+  EXPECT_FALSE(self->IsExceptionPending());
+  EXPECT_EQ(nullptr, array->Get(1));
+
+  Invoke3(reinterpret_cast<size_t>(array.get()), 2U, reinterpret_cast<size_t>(nullptr),
+          reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
+
+  EXPECT_FALSE(self->IsExceptionPending());
+  EXPECT_EQ(nullptr, array->Get(2));
+
+  Invoke3(reinterpret_cast<size_t>(array.get()), 3U, reinterpret_cast<size_t>(nullptr),
+          reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
+
+  EXPECT_FALSE(self->IsExceptionPending());
+  EXPECT_EQ(nullptr, array->Get(3));
 
   // TODO: Check _which_ exception is thrown. Then make 3) check that it's the right check order.
 
@@ -347,7 +385,7 @@
 
   // 2.3) Index > 0
 
-  Invoke3(reinterpret_cast<size_t>(array.get()), 1U, reinterpret_cast<size_t>(str_obj.get()),
+  Invoke3(reinterpret_cast<size_t>(array.get()), 10U, reinterpret_cast<size_t>(str_obj.get()),
           reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
 
   EXPECT_TRUE(self->IsExceptionPending());
diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S
index 642d9a3..d7c88ba 100644
--- a/runtime/arch/x86/asm_support_x86.S
+++ b/runtime/arch/x86/asm_support_x86.S
@@ -86,7 +86,7 @@
     // Symbols.
 #if !defined(__APPLE__)
     #define SYMBOL(name) name
-    #define PLT_SYMBOL(name) name
+    #define PLT_SYMBOL(name) name  // ## @PLT  // TODO: Disabled for old clang 3.3
 #else
     // Mac OS' symbols have an _ prefix.
     #define SYMBOL(name) _ ## name
diff --git a/runtime/arch/x86/asm_support_x86.h b/runtime/arch/x86/asm_support_x86.h
index fd5ed5a..531ed77 100644
--- a/runtime/arch/x86/asm_support_x86.h
+++ b/runtime/arch/x86/asm_support_x86.h
@@ -32,4 +32,7 @@
 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32
 #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 32
 
+// Expected size of a heap reference
+#define HEAP_REFERENCE_SIZE 4
+
 #endif  // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index ad65033..34c8b82 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -103,6 +103,8 @@
     .globl VAR(c_name, 0)
     ALIGN_FUNCTION_ENTRY
 VAR(c_name, 0):
+    // Have a local entrypoint that's not globl
+VAR(c_name, 0)_local:
     CFI_STARTPROC
     // Ensure we get a sane starting CFA.
     CFI_DEF_CFA(rsp, 8)
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
index 109533b..29633fb 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.h
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -39,4 +39,7 @@
 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64
 #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176
 
+// Expected size of a heap reference
+#define HEAP_REFERENCE_SIZE 4
+
 #endif  // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index bc9907b..4fefd20 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -174,7 +174,6 @@
 
 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name, 0)
-    UNTESTED
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     // Outgoing argument set up
     movq %rsp, %rsi                    // pass SP
@@ -197,7 +196,6 @@
 
 MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name, 0)
-    UNTESTED
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     // Outgoing argument set up
     movq %rsp, %rcx                    // pass SP
@@ -696,14 +694,112 @@
     int3                              // unreached
 END_FUNCTION art_quick_check_cast
 
+
     /*
      * Entry from managed code for array put operations of objects where the value being stored
      * needs to be checked for compatibility.
-     * eax = array, ecx = index, edx = value
+     *
+     * Currently all the parameters should fit into the 32b portions of the registers. Index always
+     * will. So we optimize for a tighter encoding. The 64b versions are in comments.
+     *
+     * rdi(edi) = array, rsi(esi) = index, rdx(edx) = value
      */
-UNIMPLEMENTED art_quick_aput_obj_with_null_and_bound_check
-UNIMPLEMENTED art_quick_aput_obj_with_bound_check
-UNIMPLEMENTED art_quick_aput_obj
+DEFINE_FUNCTION art_quick_aput_obj_with_null_and_bound_check
+    testl %edi, %edi
+//  testq %rdi, %rdi
+    jnz art_quick_aput_obj_with_bound_check_local
+    jmp art_quick_throw_null_pointer_exception_local
+END_FUNCTION art_quick_aput_obj_with_null_and_bound_check
+
+
+DEFINE_FUNCTION art_quick_aput_obj_with_bound_check
+    movl ARRAY_LENGTH_OFFSET(%edi), %ebx
+//  movl ARRAY_LENGTH_OFFSET(%rdi), %ebx      // This zero-extends, so value(%rbx)=value(%ebx)
+    cmpl %ebx, %esi
+    jb art_quick_aput_obj_local
+    mov %esi, %edi
+//  mov %rsi, %rdi
+    mov %ebx, %esi
+//  mov %rbx, %rsi
+    jmp art_quick_throw_array_bounds_local
+END_FUNCTION art_quick_aput_obj_with_bound_check
+
+
+DEFINE_FUNCTION art_quick_aput_obj
+    testl %edx, %edx                // store of null
+//  test %rdx, %rdx
+    jz .Ldo_aput_null
+    movl CLASS_OFFSET(%edi), %ebx
+//  movq CLASS_OFFSET(%rdi), %rbx
+    movl CLASS_COMPONENT_TYPE_OFFSET(%ebx), %ebx
+//  movq CLASS_COMPONENT_TYPE_OFFSET(%rbx), %rbx
+    cmpl CLASS_OFFSET(%edx), %ebx // value's type == array's component type - trivial assignability
+//  cmpq CLASS_OFFSET(%rdx), %rbx
+    jne .Lcheck_assignability
+.Ldo_aput:
+    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
+//  movq %rdx, OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
+    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
+    shrl LITERAL(7), %edi
+//  shrl LITERAL(7), %rdi
+    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
+    ret
+.Ldo_aput_null:
+    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
+//  movq %rdx, OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
+    ret
+.Lcheck_assignability:
+    // Save arguments.
+    PUSH rdi
+    PUSH rsi
+    PUSH rdx
+    subq LITERAL(8), %rsp        // Alignment padding.
+    CFI_ADJUST_CFA_OFFSET(8)
+
+                                  // "Uncompress" = do nothing, as already zero-extended on load.
+    movl CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class.
+    movq %rbx, %rdi               // Pass arg1 = array's component type.
+
+    call PLT_SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
+
+    // Exception?
+    testq %rax, %rax
+    jz   .Lthrow_array_store_exception
+
+    // Restore arguments.
+    addq LITERAL(8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-8)
+    POP  rdx
+    POP  rsi
+    POP  rdi
+
+    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
+//  movq %rdx, OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
+    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
+    shrl LITERAL(7), %edi
+//  shrl LITERAL(7), %rdi
+    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
+//  movb %dl, (%rdx, %rdi)
+    ret
+.Lthrow_array_store_exception:
+    // Restore arguments.
+    addq LITERAL(8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-8)
+    POP  rdx
+    POP  rsi
+    POP  rdi
+
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // Save all registers as basis for long jump context.
+
+    // Outgoing argument set up.
+    movq %rsp, %rcx                         // Pass arg 4 = SP.
+    movq %rdx, %rsi                         // Pass arg 2 = value.
+    movq %gs:THREAD_SELF_OFFSET, %rdx // Pass arg 3 = Thread::Current().
+                                            // Pass arg 1 = array.
+
+    call PLT_SYMBOL(artThrowArrayStoreException) // (array, value, Thread*, SP)
+    int3                          // unreached
+END_FUNCTION art_quick_aput_obj
 
 // TODO: This is quite silly on X86_64 now.
 DEFINE_FUNCTION art_quick_memcpy
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 583e5e5..c9e3c11 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -919,7 +919,9 @@
   }
 
   std::string cache_error_msg;
-  std::string cache_location(GetDalvikCacheFilenameOrDie(dex_location));
+  const std::string dalvik_cache(GetDalvikCacheOrDie(GetInstructionSetString(kRuntimeISA)));
+  std::string cache_location(GetDalvikCacheFilenameOrDie(dex_location,
+                                                         dalvik_cache.c_str()));
   dex_file = VerifyAndOpenDexFileFromOatFile(cache_location, dex_location, &cache_error_msg,
                                              &open_failed);
   if (dex_file != nullptr) {
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index a0d5fca..26a0527 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -89,7 +89,7 @@
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
            double target_utilization, double foreground_heap_growth_multiplier, size_t capacity,
-           const std::string& image_file_name,
+           const std::string& image_file_name, const InstructionSet image_instruction_set,
            CollectorType foreground_collector_type, CollectorType background_collector_type,
            size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
            size_t long_pause_log_threshold, size_t long_gc_log_threshold,
@@ -186,7 +186,8 @@
   // Requested begin for the alloc space, to follow the mapped image and oat files
   byte* requested_alloc_space_begin = nullptr;
   if (!image_file_name.empty()) {
-    space::ImageSpace* image_space = space::ImageSpace::Create(image_file_name.c_str());
+    space::ImageSpace* image_space = space::ImageSpace::Create(image_file_name.c_str(),
+                                                               image_instruction_set);
     CHECK(image_space != nullptr) << "Failed to create space for " << image_file_name;
     AddSpace(image_space);
     // Oat files referenced by image files immediately follow them in memory, ensure alloc space
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 2592983..d3b5cdc 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -31,6 +31,7 @@
 #include "gc/collector_type.h"
 #include "globals.h"
 #include "gtest/gtest.h"
+#include "instruction_set.h"
 #include "jni.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -140,6 +141,7 @@
                 size_t max_free, double target_utilization,
                 double foreground_heap_growth_multiplier, size_t capacity,
                 const std::string& original_image_file_name,
+                const InstructionSet image_instruction_set,
                 CollectorType foreground_collector_type, CollectorType background_collector_type,
                 size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
                 size_t long_pause_threshold, size_t long_gc_threshold,
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 91d8820..3de1ba4 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -24,7 +24,6 @@
 #include "mirror/object-inl.h"
 #include "oat_file.h"
 #include "os.h"
-#include "runtime.h"
 #include "space-inl.h"
 #include "utils.h"
 
@@ -99,7 +98,8 @@
   return Exec(arg_vector, error_msg);
 }
 
-ImageSpace* ImageSpace::Create(const char* original_image_file_name) {
+ImageSpace* ImageSpace::Create(const char* original_image_file_name,
+                               const InstructionSet image_isa) {
   if (OS::FileExists(original_image_file_name)) {
     // If the /system file exists, it should be up-to-date, don't try to generate
     std::string error_msg;
@@ -112,7 +112,9 @@
   // If the /system file didn't exist, we need to use one from the dalvik-cache.
   // If the cache file exists, try to open, but if it fails, regenerate.
   // If it does not exist, generate.
-  std::string image_file_name(GetDalvikCacheFilenameOrDie(original_image_file_name));
+  const std::string dalvik_cache = GetDalvikCacheOrDie(GetInstructionSetString(image_isa));
+  std::string image_file_name(GetDalvikCacheFilenameOrDie(original_image_file_name,
+                                                          dalvik_cache.c_str()));
   std::string error_msg;
   if (OS::FileExists(image_file_name.c_str())) {
     space::ImageSpace* image_space = ImageSpace::Init(image_file_name.c_str(), true, &error_msg);
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index f6daf89..1652ec9 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_GC_SPACE_IMAGE_SPACE_H_
 
 #include "gc/accounting/space_bitmap.h"
+#include "runtime.h"
 #include "space.h"
 
 namespace art {
@@ -34,15 +35,16 @@
     return kSpaceTypeImageSpace;
   }
 
-  // Create a Space from an image file. Cannot be used for future
-  // allocation or collected.
+  // Create a Space from an image file for a specified instruction
+  // set. Cannot be used for future allocation or collected.
   //
   // Create also opens the OatFile associated with the image file so
   // that it be contiguously allocated with the image before the
   // creation of the alloc space. The ReleaseOatFile will later be
   // used to transfer ownership of the OatFile to the ClassLinker when
   // it is initialized.
-  static ImageSpace* Create(const char* image) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static ImageSpace* Create(const char* image, const InstructionSet image_isa)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Releases the OatFile from the ImageSpace so it can be transfer to
   // the caller, presumably the ClassLinker.
diff --git a/runtime/instruction_set.cc b/runtime/instruction_set.cc
index 73d4279..cbcd2e0 100644
--- a/runtime/instruction_set.cc
+++ b/runtime/instruction_set.cc
@@ -21,6 +21,48 @@
 
 namespace art {
 
+const char* GetInstructionSetString(const InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      return "arm";
+    case kArm64:
+      return "arm64";
+    case kX86:
+      return "x86";
+    case kX86_64:
+      return "x86_64";
+    case kMips:
+      return "mips";
+    case kNone:
+      return "none";
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      return nullptr;
+  }
+}
+
+InstructionSet GetInstructionSetFromString(const char* isa_str) {
+  CHECK(isa_str != nullptr);
+
+  if (!strcmp("arm", isa_str)) {
+    return kArm;
+  } else if (!strcmp("arm64", isa_str)) {
+    return kArm64;
+  } else if (!strcmp("x86", isa_str)) {
+    return kX86;
+  } else if (!strcmp("x86_64", isa_str)) {
+    return kX86_64;
+  } else if (!strcmp("mips", isa_str)) {
+    return kMips;
+  } else if (!strcmp("none", isa_str)) {
+    return kNone;
+  }
+
+  LOG(FATAL) << "Unknown ISA " << isa_str;
+  return kNone;
+}
+
 size_t GetInstructionSetPointerSize(InstructionSet isa) {
   switch (isa) {
     case kArm:
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index c746e06..4bc35a7 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -35,6 +35,9 @@
 };
 std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs);
 
+const char* GetInstructionSetString(const InstructionSet isa);
+InstructionSet GetInstructionSetFromString(const char* instruction_set);
+
 size_t GetInstructionSetPointerSize(InstructionSet isa);
 size_t GetInstructionSetAlignment(InstructionSet isa);
 bool Is64BitInstructionSet(InstructionSet isa);
diff --git a/runtime/instruction_set_test.cc b/runtime/instruction_set_test.cc
new file mode 100644
index 0000000..cd6337c
--- /dev/null
+++ b/runtime/instruction_set_test.cc
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_set.h"
+
+#include "common_runtime_test.h"
+
+namespace art {
+
+class InstructionSetTest : public CommonRuntimeTest {};
+
+TEST_F(InstructionSetTest, GetInstructionSetFromString) {
+  EXPECT_EQ(kArm, GetInstructionSetFromString("arm"));
+  EXPECT_EQ(kArm64, GetInstructionSetFromString("arm64"));
+  EXPECT_EQ(kX86, GetInstructionSetFromString("x86"));
+  EXPECT_EQ(kX86_64, GetInstructionSetFromString("x86_64"));
+  EXPECT_EQ(kMips, GetInstructionSetFromString("mips"));
+  EXPECT_EQ(kNone, GetInstructionSetFromString("none"));
+}
+
+TEST_F(InstructionSetTest, GetInstructionSetString) {
+  EXPECT_STREQ("arm", GetInstructionSetString(kArm));
+  EXPECT_STREQ("arm", GetInstructionSetString(kThumb2));
+  EXPECT_STREQ("arm64", GetInstructionSetString(kArm64));
+  EXPECT_STREQ("x86", GetInstructionSetString(kX86));
+  EXPECT_STREQ("x86_64", GetInstructionSetString(kX86_64));
+  EXPECT_STREQ("mips", GetInstructionSetString(kMips));
+  EXPECT_STREQ("none", GetInstructionSetString(kNone));
+}
+
+TEST_F(InstructionSetTest, TestRoundTrip) {
+  EXPECT_EQ(kRuntimeISA, GetInstructionSetFromString(GetInstructionSetString(kRuntimeISA)));
+}
+
+}  // namespace art
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 953d3a6..d9c1309 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -260,16 +260,15 @@
 #endif
 }
 
-static jboolean DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFilename,
-    jstring javaPkgname, jboolean defer) {
+static jboolean IsDexOptNeededInternal(JNIEnv* env, const char* filename,
+    const char* pkgname, const char* instruction_set, const jboolean defer) {
   const bool kVerboseLogging = false;  // Spammy logging.
   const bool kReasonLogging = true;  // Logging of reason for returning JNI_TRUE.
 
-  ScopedUtfChars filename(env, javaFilename);
-  if ((filename.c_str() == nullptr) || !OS::FileExists(filename.c_str())) {
-    LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename.c_str() << "' does not exist";
+  if ((filename == nullptr) || !OS::FileExists(filename)) {
+    LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename << "' does not exist";
     ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException"));
-    const char* message = (filename.c_str() == nullptr) ? "<empty file name>" : filename.c_str();
+    const char* message = (filename == nullptr) ? "<empty file name>" : filename;
     env->ThrowNew(fnfe.get(), message);
     return JNI_FALSE;
   }
@@ -278,11 +277,14 @@
   // fact that code is running at all means that this should be true.
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
+  // TODO: We're assuming that the 64 and 32 bit runtimes have identical
+  // class paths. isDexOptNeeded will not necessarily be called on a runtime
+  // that has the same instruction set as the file being dexopted.
   const std::vector<const DexFile*>& boot_class_path = class_linker->GetBootClassPath();
   for (size_t i = 0; i < boot_class_path.size(); i++) {
-    if (boot_class_path[i]->GetLocation() == filename.c_str()) {
+    if (boot_class_path[i]->GetLocation() == filename) {
       if (kVerboseLogging) {
-        LOG(INFO) << "DexFile_isDexOptNeeded ignoring boot class path file: " << filename.c_str();
+        LOG(INFO) << "DexFile_isDexOptNeeded ignoring boot class path file: " << filename;
       }
       return JNI_FALSE;
     }
@@ -293,12 +295,11 @@
   // If the 'defer' argument is true then this will be retried later.  In this case we
   // need to make sure that the profile file copy is not made so that we will get the
   // same result second time.
-  if (javaPkgname != NULL) {
-    ScopedUtfChars pkgname(env, javaPkgname);
-    std::string profile_file = GetDalvikCacheOrDie(GetAndroidData()) + std::string("/profiles/") +
-    pkgname.c_str();
-
-    std::string profile_cache_dir = GetDalvikCacheOrDie(GetAndroidData()) + "/profile-cache";
+  if (pkgname != nullptr) {
+    const std::string profile_file = GetDalvikCacheOrDie("profiles", false /* create_if_absent */)
+        + std::string("/") + pkgname;
+    const std::string profile_cache_dir = GetDalvikCacheOrDie("profile-cache",
+                                                              false /* create_if_absent */);
 
     // Make the profile cache if it doesn't exist.
     mkdir(profile_cache_dir.c_str(), 0700);
@@ -306,7 +307,7 @@
     // The previous profile file (a copy of the profile the last time this was run) is
     // in the dalvik-cache directory because this is owned by system.  The profiles
     // directory is owned by install so system cannot write files in there.
-    std::string prev_profile_file = profile_cache_dir + std::string("/") + pkgname.c_str();
+    std::string prev_profile_file = profile_cache_dir + std::string("/") + pkgname;
 
     struct stat profstat, prevstat;
     int e1 = stat(profile_file.c_str(), &profstat);
@@ -377,41 +378,41 @@
   }
 
   // Check if we have an odex file next to the dex file.
-  std::string odex_filename(OatFile::DexFilenameToOdexFilename(filename.c_str()));
+  std::string odex_filename(OatFile::DexFilenameToOdexFilename(filename));
   std::string error_msg;
   UniquePtr<const OatFile> oat_file(OatFile::Open(odex_filename, odex_filename, NULL, false,
                                                   &error_msg));
   if (oat_file.get() == nullptr) {
     if (kVerboseLogging) {
-      LOG(INFO) << "DexFile_isDexOptNeeded failed to open oat file '" << filename.c_str()
+      LOG(INFO) << "DexFile_isDexOptNeeded failed to open oat file '" << filename
           << "': " << error_msg;
     }
     error_msg.clear();
   } else {
-    const art::OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(filename.c_str(), NULL,
+    const art::OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(filename, NULL,
                                                                            kReasonLogging);
     if (oat_dex_file != nullptr) {
       uint32_t location_checksum;
       // If its not possible to read the classes.dex assume up-to-date as we won't be able to
       // compile it anyway.
-      if (!DexFile::GetChecksum(filename.c_str(), &location_checksum, &error_msg)) {
+      if (!DexFile::GetChecksum(filename, &location_checksum, &error_msg)) {
         if (kVerboseLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded ignoring precompiled stripped file: "
-              << filename.c_str() << ": " << error_msg;
+              << filename << ": " << error_msg;
         }
         return JNI_FALSE;
       }
-      if (ClassLinker::VerifyOatFileChecksums(oat_file.get(), filename.c_str(), location_checksum,
+      if (ClassLinker::VerifyOatFileChecksums(oat_file.get(), filename, location_checksum,
                                               &error_msg)) {
         if (kVerboseLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded precompiled file " << odex_filename
-              << " has an up-to-date checksum compared to " << filename.c_str();
+              << " has an up-to-date checksum compared to " << filename;
         }
         return JNI_FALSE;
       } else {
         if (kVerboseLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded found precompiled file " << odex_filename
-              << " with an out-of-date checksum compared to " << filename.c_str()
+              << " with an out-of-date checksum compared to " << filename
               << ": " << error_msg;
         }
         error_msg.clear();
@@ -420,12 +421,14 @@
   }
 
   // Check if we have an oat file in the cache
-  std::string cache_location(GetDalvikCacheFilenameOrDie(filename.c_str()));
-  oat_file.reset(OatFile::Open(cache_location, filename.c_str(), NULL, false, &error_msg));
+  const std::string cache_dir(GetDalvikCacheOrDie(instruction_set));
+  const std::string cache_location(
+      GetDalvikCacheFilenameOrDie(filename, cache_dir.c_str()));
+  oat_file.reset(OatFile::Open(cache_location, filename, NULL, false, &error_msg));
   if (oat_file.get() == nullptr) {
     if (kReasonLogging) {
       LOG(INFO) << "DexFile_isDexOptNeeded cache file " << cache_location
-          << " does not exist for " << filename.c_str() << ": " << error_msg;
+          << " does not exist for " << filename << ": " << error_msg;
     }
     return JNI_TRUE;
   }
@@ -458,19 +461,19 @@
   }
 
   uint32_t location_checksum;
-  if (!DexFile::GetChecksum(filename.c_str(), &location_checksum, &error_msg)) {
+  if (!DexFile::GetChecksum(filename, &location_checksum, &error_msg)) {
     if (kReasonLogging) {
-      LOG(ERROR) << "DexFile_isDexOptNeeded failed to compute checksum of " << filename.c_str()
+      LOG(ERROR) << "DexFile_isDexOptNeeded failed to compute checksum of " << filename
             << " (error " << error_msg << ")";
     }
     return JNI_TRUE;
   }
 
-  if (!ClassLinker::VerifyOatFileChecksums(oat_file.get(), filename.c_str(), location_checksum,
+  if (!ClassLinker::VerifyOatFileChecksums(oat_file.get(), filename, location_checksum,
                                            &error_msg)) {
     if (kReasonLogging) {
       LOG(INFO) << "DexFile_isDexOptNeeded cache file " << cache_location
-          << " has out-of-date checksum compared to " << filename.c_str()
+          << " has out-of-date checksum compared to " << filename
           << " (error " << error_msg << ")";
     }
     return JNI_TRUE;
@@ -478,15 +481,28 @@
 
   if (kVerboseLogging) {
     LOG(INFO) << "DexFile_isDexOptNeeded cache file " << cache_location
-              << " is up-to-date for " << filename.c_str();
+              << " is up-to-date for " << filename;
   }
   CHECK(error_msg.empty()) << error_msg;
   return JNI_FALSE;
 }
 
+static jboolean DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFilename,
+    jstring javaPkgname, jstring javaInstructionSet, jboolean defer) {
+  ScopedUtfChars filename(env, javaFilename);
+  NullableScopedUtfChars pkgname(env, javaPkgname);
+  ScopedUtfChars instruction_set(env, javaInstructionSet);
+
+  return IsDexOptNeededInternal(env, filename.c_str(), pkgname.c_str(),
+                                instruction_set.c_str(), defer);
+}
+
 // public API, NULL pkgname
-static jboolean DexFile_isDexOptNeeded(JNIEnv* env, jclass c, jstring javaFilename) {
-  return DexFile_isDexOptNeededInternal(env, c, javaFilename, NULL, false);
+static jboolean DexFile_isDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename) {
+  const char* instruction_set = GetInstructionSetString(kRuntimeISA);
+  ScopedUtfChars filename(env, javaFilename);
+  return IsDexOptNeededInternal(env, filename.c_str(), nullptr /* pkgname */,
+                                instruction_set, false /* defer */);
 }
 
 
@@ -495,7 +511,7 @@
   NATIVE_METHOD(DexFile, defineClassNative, "(Ljava/lang/String;Ljava/lang/ClassLoader;J)Ljava/lang/Class;"),
   NATIVE_METHOD(DexFile, getClassNameList, "(J)[Ljava/lang/String;"),
   NATIVE_METHOD(DexFile, isDexOptNeeded, "(Ljava/lang/String;)Z"),
-  NATIVE_METHOD(DexFile, isDexOptNeededInternal, "(Ljava/lang/String;Ljava/lang/String;Z)Z"),
+  NATIVE_METHOD(DexFile, isDexOptNeededInternal, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)Z"),
   NATIVE_METHOD(DexFile, openDexFileNative, "(Ljava/lang/String;Ljava/lang/String;I)J"),
 };
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index c0dc94b..9cf8785 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -246,6 +246,7 @@
   profile_clock_source_ = kDefaultProfilerClockSource;
 
   verify_ = true;
+  image_isa_ = kRuntimeISA;
 
   // Default to explicit checks.  Switch off with -implicit-checks:.
   // or setprop dalvik.vm.implicit_checks check1,check2,...
@@ -412,6 +413,9 @@
     } else if (option == "compilercallbacks") {
       compiler_callbacks_ =
           reinterpret_cast<CompilerCallbacks*>(const_cast<void*>(options[i].second));
+    } else if (option == "imageinstructionset") {
+      image_isa_ = GetInstructionSetFromString(
+          reinterpret_cast<const char*>(options[i].second));
     } else if (option == "-Xzygote") {
       is_zygote_ = true;
     } else if (option == "-Xint") {
@@ -673,7 +677,7 @@
     background_collector_type_ = collector_type_;
   }
   return true;
-}
+}  // NOLINT(readability/fn_size)
 
 void ParsedOptions::Exit(int status) {
   hook_exit_(status);
diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h
index c02eb1d..e0b0fb5 100644
--- a/runtime/parsed_options.h
+++ b/runtime/parsed_options.h
@@ -85,6 +85,7 @@
   bool profile_start_immediately_;
   ProfilerClockSource profile_clock_source_;
   bool verify_;
+  InstructionSet image_isa_;
 
   static constexpr uint32_t kExplicitNullCheck = 1;
   static constexpr uint32_t kExplicitSuspendCheck = 2;
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index a91fdf1..aee0d64 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -19,6 +19,7 @@
 #include "catch_block_stack_visitor.h"
 #include "deoptimize_stack_visitor.h"
 #include "entrypoints/entrypoint_utils.h"
+#include "mirror/art_method-inl.h"
 #include "sirt_ref-inl.h"
 
 namespace art {
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 6bbfcee..20df78e 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -567,6 +567,7 @@
                        options->foreground_heap_growth_multiplier_,
                        options->heap_maximum_size_,
                        options->image_,
+                       options->image_isa_,
                        options->collector_type_,
                        options->background_collector_type_,
                        options->parallel_gc_threads_,
diff --git a/runtime/utils.cc b/runtime/utils.cc
index c4d1a78..ee2cca4 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1167,14 +1167,20 @@
   return android_data;
 }
 
-std::string GetDalvikCacheOrDie(const char* android_data) {
-  std::string dalvik_cache(StringPrintf("%s/dalvik-cache", android_data));
-
-  if (!OS::DirectoryExists(dalvik_cache.c_str())) {
-    if (StartsWith(dalvik_cache, "/tmp/")) {
-      int result = mkdir(dalvik_cache.c_str(), 0700);
+std::string GetDalvikCacheOrDie(const char* subdir, const bool create_if_absent) {
+  CHECK(subdir != nullptr);
+  const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", GetAndroidData()));
+  const std::string dalvik_cache = dalvik_cache_root + subdir;
+  if (create_if_absent && !OS::DirectoryExists(dalvik_cache.c_str())) {
+    if (StartsWith(dalvik_cache_root, "/tmp/")) {
+      int result = mkdir(dalvik_cache_root.c_str(), 0700);
+      if (result != 0 && errno != EEXIST) {
+        PLOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache_root;
+        return "";
+      }
+      result = mkdir(dalvik_cache.c_str(), 0700);
       if (result != 0) {
-        LOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache;
+        PLOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache;
         return "";
       }
     } else {
@@ -1185,8 +1191,7 @@
   return dalvik_cache;
 }
 
-std::string GetDalvikCacheFilenameOrDie(const char* location) {
-  std::string dalvik_cache(GetDalvikCacheOrDie(GetAndroidData()));
+std::string GetDalvikCacheFilenameOrDie(const char* location, const char* cache_location) {
   if (location[0] != '/') {
     LOG(FATAL) << "Expected path in location to be absolute: "<< location;
   }
@@ -1196,7 +1201,7 @@
     cache_file += DexFile::kClassesDex;
   }
   std::replace(cache_file.begin(), cache_file.end(), '/', '@');
-  return dalvik_cache + "/" + cache_file;
+  return StringPrintf("%s/%s", cache_location, cache_file.c_str());
 }
 
 bool IsZipMagic(uint32_t magic) {
diff --git a/runtime/utils.h b/runtime/utils.h
index 6ab1013..4b2f230 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -394,11 +394,14 @@
 // Find $ANDROID_DATA, /data, or abort.
 const char* GetAndroidData();
 
-// Returns the dalvik-cache location, or dies trying.
-std::string GetDalvikCacheOrDie(const char* android_data);
+// Returns the dalvik-cache location, or dies trying. subdir will be
+// appended to the cache location.
+std::string GetDalvikCacheOrDie(const char* subdir, bool create_if_absent = true);
 
-// Returns the dalvik-cache location for a DexFile or OatFile, or dies trying.
-std::string GetDalvikCacheFilenameOrDie(const char* location);
+// Returns the absolute dalvik-cache path for a DexFile or OatFile, or
+// dies trying. The path returned will be rooted at cache_location.
+std::string GetDalvikCacheFilenameOrDie(const char* file_location,
+                                        const char* cache_location);
 
 // Check whether the given magic matches a known file type.
 bool IsZipMagic(uint32_t magic);
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index 2c1aae8..d425620 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -338,18 +338,16 @@
   EXPECT_FALSE(EndsWith("oo", "foo"));
 }
 
-void CheckGetDalvikCacheFilenameOrDie(const char* in, const char* out) {
-  std::string expected(getenv("ANDROID_DATA"));
-  expected += "/dalvik-cache/";
-  expected += out;
-  EXPECT_STREQ(expected.c_str(), GetDalvikCacheFilenameOrDie(in).c_str());
-}
-
 TEST_F(UtilsTest, GetDalvikCacheFilenameOrDie) {
-  CheckGetDalvikCacheFilenameOrDie("/system/app/Foo.apk", "system@app@Foo.apk@classes.dex");
-  CheckGetDalvikCacheFilenameOrDie("/data/app/foo-1.apk", "data@app@foo-1.apk@classes.dex");
-  CheckGetDalvikCacheFilenameOrDie("/system/framework/core.jar", "system@framework@core.jar@classes.dex");
-  CheckGetDalvikCacheFilenameOrDie("/system/framework/boot.art", "system@framework@boot.art");
+  EXPECT_STREQ("/foo/system@app@Foo.apk@classes.dex",
+               GetDalvikCacheFilenameOrDie("/system/app/Foo.apk", "/foo").c_str());
+
+  EXPECT_STREQ("/foo/data@app@foo-1.apk@classes.dex",
+               GetDalvikCacheFilenameOrDie("/data/app/foo-1.apk", "/foo").c_str());
+  EXPECT_STREQ("/foo/system@framework@core.jar@classes.dex",
+               GetDalvikCacheFilenameOrDie("/system/framework/core.jar", "/foo").c_str());
+  EXPECT_STREQ("/foo/system@framework@boot.art",
+               GetDalvikCacheFilenameOrDie("/system/framework/boot.art", "/foo").c_str());
 }
 
 TEST_F(UtilsTest, ExecSuccess) {
diff --git a/test/etc/push-and-run-test-jar b/test/etc/push-and-run-test-jar
index 93d7e79..e0d2f1d 100755
--- a/test/etc/push-and-run-test-jar
+++ b/test/etc/push-and-run-test-jar
@@ -140,7 +140,7 @@
 fi
 
 if [ "$GDB" = "y" ]; then
-    gdb="/data/gdbserver$TARGET_SUFFIX :5039"
+    gdb="gdbserver$TARGET_SUFFIX :5039"
     gdbargs="$exe"
 fi
 
@@ -150,7 +150,7 @@
 
 JNI_OPTS="-Xjnigreflimit:512 -Xcheck:jni"
 
-cmdline="cd $DEX_LOCATION && mkdir dalvik-cache && export ANDROID_DATA=$DEX_LOCATION && export DEX_LOCATION=$DEX_LOCATION && \
+cmdline="cd $DEX_LOCATION && mkdir -p dalvik-cache/{arm,arm64,mips,x86,x86_64} && export ANDROID_DATA=$DEX_LOCATION && export DEX_LOCATION=$DEX_LOCATION && \
     $INVOKE_WITH $gdb /system/bin/dalvikvm$TARGET_SUFFIX $FLAGS $gdbargs -XXlib:$LIB $ZYGOTE $JNI_OPTS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main"
 if [ "$DEV_MODE" = "y" ]; then
   echo $cmdline "$@"