Merge "Optionally add debug symbols to ELF files made by quick compiler."
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 4f9f312..6d2f5d1 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -84,6 +84,7 @@
 	optimizing/code_generator.cc \
 	optimizing/code_generator_arm.cc \
 	optimizing/code_generator_x86.cc \
+	optimizing/code_generator_x86_64.cc \
 	optimizing/graph_visualizer.cc \
 	optimizing/locations.cc \
 	optimizing/nodes.cc \
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index f05cb66..b8332ad 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -18,6 +18,7 @@
 
 #include "code_generator_arm.h"
 #include "code_generator_x86.h"
+#include "code_generator_x86_64.h"
 #include "dex/verified_method.h"
 #include "driver/dex_compilation_unit.h"
 #include "gc_map_builder.h"
@@ -221,7 +222,7 @@
       return new (allocator) x86::CodeGeneratorX86(graph);
     }
     case kX86_64: {
-      return new (allocator) x86::CodeGeneratorX86(graph);
+      return new (allocator) x86_64::CodeGeneratorX86_64(graph);
     }
     default:
       return nullptr;
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 82fa639..83621e0 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -67,8 +67,7 @@
     // Note that this follows the current calling convention.
     return GetFrameSize()
         + kVRegSize  // Art method
-        + (parameter->GetIndex() - graph_->GetNumberOfVRegs() + graph_->GetNumberOfInVRegs())
-          * kVRegSize;
+        + parameter->GetIndex() * kVRegSize;
   }
 
   virtual void GenerateFrameEntry() = 0;
@@ -158,10 +157,10 @@
     return registers_[index];
   }
 
-  uint8_t GetStackOffsetOf(size_t index, size_t word_size) const {
+  uint8_t GetStackOffsetOf(size_t index) const {
     // We still reserve the space for parameters passed by registers.
-    // Add word_size for the method pointer.
-    return index * kVRegSize + word_size;
+    // Add one for the method pointer.
+    return (index + 1) * kVRegSize;
   }
 
  private:
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index d61df36..212a6dc 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -15,14 +15,14 @@
  */
 
 #include "code_generator_arm.h"
-#include "utils/assembler.h"
-#include "utils/arm/assembler_arm.h"
-#include "utils/arm/managed_register_arm.h"
 
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/array.h"
 #include "mirror/art_method.h"
 #include "thread.h"
+#include "utils/assembler.h"
+#include "utils/arm/assembler_arm.h"
+#include "utils/arm/managed_register_arm.h"
 
 #define __ reinterpret_cast<ArmAssembler*>(GetAssembler())->
 
@@ -48,7 +48,8 @@
 CodeGeneratorARM::CodeGeneratorARM(HGraph* graph)
     : CodeGenerator(graph, kNumberOfRegIds),
       location_builder_(graph, this),
-      instruction_visitor_(graph, this) {}
+      instruction_visitor_(graph, this),
+      move_resolver_(graph->GetArena(), this) {}
 
 static bool* GetBlockedRegisterPairs(bool* blocked_registers) {
   return blocked_registers + kNumberOfAllocIds;
@@ -106,6 +107,9 @@
   // Reserve thread register.
   blocked_registers[TR] = true;
 
+  // Reserve temp register.
+  blocked_registers[IP] = true;
+
   // TODO: We currently don't use Quick's callee saved registers.
   blocked_registers[R5] = true;
   blocked_registers[R6] = true;
@@ -161,7 +165,7 @@
   uint16_t number_of_in_vregs = GetGraph()->GetNumberOfInVRegs();
   if (reg_number >= number_of_vregs - number_of_in_vregs) {
     // Local is a parameter of the method. It is stored in the caller's frame.
-    return GetFrameSize() + kArmWordSize  // ART method
+    return GetFrameSize() + kVRegSize  // ART method
                           + (reg_number - number_of_vregs + number_of_in_vregs) * kVRegSize;
   } else {
     // Local is a temporary in this method. It is stored in this method's frame.
@@ -210,7 +214,7 @@
       if (index < calling_convention.GetNumberOfRegisters()) {
         return ArmCoreLocation(calling_convention.GetRegisterAt(index));
       } else {
-        return Location::StackSlot(calling_convention.GetStackOffsetOf(index, kArmWordSize));
+        return Location::StackSlot(calling_convention.GetStackOffsetOf(index));
       }
     }
 
@@ -223,7 +227,7 @@
       } else if (index + 1 == calling_convention.GetNumberOfRegisters()) {
         return Location::QuickParameter(index);
       } else {
-        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index, kArmWordSize));
+        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index));
       }
     }
 
@@ -254,8 +258,8 @@
     if (source.IsRegister()) {
       __ str(source.AsArm().AsCoreRegister(), Address(SP, destination.GetStackIndex()));
     } else {
-      __ ldr(R0, Address(SP, source.GetStackIndex()));
-      __ str(R0, Address(SP, destination.GetStackIndex()));
+      __ ldr(IP, Address(SP, source.GetStackIndex()));
+      __ str(IP, Address(SP, destination.GetStackIndex()));
     }
   }
 }
@@ -274,7 +278,7 @@
       __ Mov(destination.AsArm().AsRegisterPairLow(),
              calling_convention.GetRegisterAt(argument_index));
       __ ldr(destination.AsArm().AsRegisterPairHigh(),
-             Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize) + GetFrameSize()));
+             Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize()));
     } else {
       DCHECK(source.IsDoubleStackSlot());
       if (destination.AsArm().AsRegisterPair() == R1_R2) {
@@ -291,12 +295,12 @@
     if (source.IsRegister()) {
       __ Mov(calling_convention.GetRegisterAt(argument_index), source.AsArm().AsRegisterPairLow());
       __ str(source.AsArm().AsRegisterPairHigh(),
-             Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize)));
+             Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1)));
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ ldr(calling_convention.GetRegisterAt(argument_index), Address(SP, source.GetStackIndex()));
       __ ldr(R0, Address(SP, source.GetHighStackIndex(kArmWordSize)));
-      __ str(R0, Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize)));
+      __ str(R0, Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1)));
     }
   } else {
     DCHECK(destination.IsDoubleStackSlot());
@@ -314,14 +318,14 @@
       __ str(calling_convention.GetRegisterAt(argument_index),
              Address(SP, destination.GetStackIndex()));
       __ ldr(R0,
-             Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize) + GetFrameSize()));
+             Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize()));
       __ str(R0, Address(SP, destination.GetHighStackIndex(kArmWordSize)));
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ ldr(R0, Address(SP, source.GetStackIndex()));
-      __ str(R0, Address(SP, destination.GetStackIndex()));
-      __ ldr(R0, Address(SP, source.GetHighStackIndex(kArmWordSize)));
-      __ str(R0, Address(SP, destination.GetHighStackIndex(kArmWordSize)));
+      __ ldr(IP, Address(SP, source.GetStackIndex()));
+      __ str(IP, Address(SP, destination.GetStackIndex()));
+      __ ldr(IP, Address(SP, source.GetHighStackIndex(kArmWordSize)));
+      __ str(IP, Address(SP, destination.GetHighStackIndex(kArmWordSize)));
     }
   }
 }
@@ -332,8 +336,8 @@
     if (location.IsRegister()) {
       __ LoadImmediate(location.AsArm().AsCoreRegister(), value);
     } else {
-      __ LoadImmediate(R0, value);
-      __ str(R0, Address(SP, location.GetStackIndex()));
+      __ LoadImmediate(IP, value);
+      __ str(IP, Address(SP, location.GetStackIndex()));
     }
   } else if (instruction->AsLongConstant() != nullptr) {
     int64_t value = instruction->AsLongConstant()->GetValue();
@@ -341,10 +345,10 @@
       __ LoadImmediate(location.AsArm().AsRegisterPairLow(), Low32Bits(value));
       __ LoadImmediate(location.AsArm().AsRegisterPairHigh(), High32Bits(value));
     } else {
-      __ LoadImmediate(R0, Low32Bits(value));
-      __ str(R0, Address(SP, location.GetStackIndex()));
-      __ LoadImmediate(R0, High32Bits(value));
-      __ str(R0, Address(SP, location.GetHighStackIndex(kArmWordSize)));
+      __ LoadImmediate(IP, Low32Bits(value));
+      __ str(IP, Address(SP, location.GetStackIndex()));
+      __ LoadImmediate(IP, High32Bits(value));
+      __ str(IP, Address(SP, location.GetHighStackIndex(kArmWordSize)));
     }
   } else if (instruction->AsLoadLocal() != nullptr) {
     uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
@@ -493,7 +497,7 @@
 }
 
 void InstructionCodeGeneratorARM::VisitIntConstant(HIntConstant* constant) {
-  // Will be generated at use site.
+  codegen_->Move(constant, constant->GetLocations()->Out(), nullptr);
 }
 
 void LocationsBuilderARM::VisitLongConstant(HLongConstant* constant) {
@@ -564,7 +568,7 @@
 
 void LocationsBuilderARM::VisitInvokeStatic(HInvokeStatic* invoke) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
-  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(ArmCoreLocation(R0));
 
   InvokeDexCallingConventionVisitor calling_convention_visitor;
   for (size_t i = 0; i < invoke->InputCount(); i++) {
@@ -811,15 +815,93 @@
 }
 
 void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction) {
-  LOG(FATAL) << "Unimplemented";
+  LOG(FATAL) << "Unreachable";
 }
 
 void LocationsBuilderARM::VisitParallelMove(HParallelMove* instruction) {
-  LOG(FATAL) << "Unimplemented";
+  LOG(FATAL) << "Unreachable";
 }
 
 void InstructionCodeGeneratorARM::VisitParallelMove(HParallelMove* instruction) {
-  LOG(FATAL) << "Unimplemented";
+  codegen_->GetMoveResolver()->EmitNativeCode(instruction);
+}
+
+ArmAssembler* ParallelMoveResolverARM::GetAssembler() const {
+  return codegen_->GetAssembler();
+}
+
+void ParallelMoveResolverARM::EmitMove(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  Location source = move->GetSource();
+  Location destination = move->GetDestination();
+
+  if (source.IsRegister()) {
+    if (destination.IsRegister()) {
+      __ Mov(destination.AsArm().AsCoreRegister(), source.AsArm().AsCoreRegister());
+    } else {
+      DCHECK(destination.IsStackSlot());
+      __ StoreToOffset(kStoreWord, source.AsArm().AsCoreRegister(),
+                       SP, destination.GetStackIndex());
+    }
+  } else if (source.IsStackSlot()) {
+    if (destination.IsRegister()) {
+      __ LoadFromOffset(kLoadWord, destination.AsArm().AsCoreRegister(),
+                        SP, source.GetStackIndex());
+    } else {
+      DCHECK(destination.IsStackSlot());
+      __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
+      __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+    }
+  } else {
+    LOG(FATAL) << "Unimplemented";
+  }
+}
+
+void ParallelMoveResolverARM::Exchange(Register reg, int mem) {
+  __ Mov(IP, reg);
+  __ LoadFromOffset(kLoadWord, reg, SP, mem);
+  __ StoreToOffset(kStoreWord, IP, SP, mem);
+}
+
+void ParallelMoveResolverARM::Exchange(int mem1, int mem2) {
+  ScratchRegisterScope ensure_scratch(this, IP, R0, codegen_->GetNumberOfCoreRegisters());
+  int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0;
+  __ LoadFromOffset(kLoadWord, static_cast<Register>(ensure_scratch.GetRegister()),
+                    SP, mem1 + stack_offset);
+  __ LoadFromOffset(kLoadWord, IP, SP, mem2 + stack_offset);
+  __ StoreToOffset(kStoreWord, static_cast<Register>(ensure_scratch.GetRegister()),
+                   SP, mem2 + stack_offset);
+  __ StoreToOffset(kStoreWord, IP, SP, mem1 + stack_offset);
+}
+
+void ParallelMoveResolverARM::EmitSwap(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  Location source = move->GetSource();
+  Location destination = move->GetDestination();
+
+  if (source.IsRegister() && destination.IsRegister()) {
+    DCHECK_NE(source.AsArm().AsCoreRegister(), IP);
+    DCHECK_NE(destination.AsArm().AsCoreRegister(), IP);
+    __ Mov(IP, source.AsArm().AsCoreRegister());
+    __ Mov(source.AsArm().AsCoreRegister(), destination.AsArm().AsCoreRegister());
+    __ Mov(destination.AsArm().AsCoreRegister(), IP);
+  } else if (source.IsRegister() && destination.IsStackSlot()) {
+    Exchange(source.AsArm().AsCoreRegister(), destination.GetStackIndex());
+  } else if (source.IsStackSlot() && destination.IsRegister()) {
+    Exchange(destination.AsArm().AsCoreRegister(), source.GetStackIndex());
+  } else if (source.IsStackSlot() && destination.IsStackSlot()) {
+    Exchange(source.GetStackIndex(), destination.GetStackIndex());
+  } else {
+    LOG(FATAL) << "Unimplemented";
+  }
+}
+
+void ParallelMoveResolverARM::SpillScratch(int reg) {
+  __ Push(static_cast<Register>(reg));
+}
+
+void ParallelMoveResolverARM::RestoreScratch(int reg) {
+  __ Pop(static_cast<Register>(reg));
 }
 
 }  // namespace arm
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index ac5ef21..712a24c 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -19,6 +19,7 @@
 
 #include "code_generator.h"
 #include "nodes.h"
+#include "parallel_move_resolver.h"
 #include "utils/arm/assembler_arm32.h"
 
 namespace art {
@@ -59,6 +60,27 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
 };
 
+class ParallelMoveResolverARM : public ParallelMoveResolver {
+ public:
+  ParallelMoveResolverARM(ArenaAllocator* allocator, CodeGeneratorARM* codegen)
+      : ParallelMoveResolver(allocator), codegen_(codegen) {}
+
+  virtual void EmitMove(size_t index) OVERRIDE;
+  virtual void EmitSwap(size_t index) OVERRIDE;
+  virtual void SpillScratch(int reg) OVERRIDE;
+  virtual void RestoreScratch(int reg) OVERRIDE;
+
+  ArmAssembler* GetAssembler() const;
+
+ private:
+  void Exchange(Register reg, int mem);
+  void Exchange(int mem1, int mem2);
+
+  CodeGeneratorARM* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM);
+};
+
 class LocationsBuilderARM : public HGraphVisitor {
  public:
   explicit LocationsBuilderARM(HGraph* graph, CodeGeneratorARM* codegen)
@@ -145,6 +167,10 @@
   virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
+  ParallelMoveResolverARM* GetMoveResolver() {
+    return &move_resolver_;
+  }
+
  private:
   // Helper method to move a 32bits value between two locations.
   void Move32(Location destination, Location source);
@@ -153,6 +179,7 @@
 
   LocationsBuilderARM location_builder_;
   InstructionCodeGeneratorARM instruction_visitor_;
+  ParallelMoveResolverARM move_resolver_;
   Arm32Assembler assembler_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index c7dca86..342a191 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -157,7 +157,7 @@
   uint16_t number_of_in_vregs = GetGraph()->GetNumberOfInVRegs();
   if (reg_number >= number_of_vregs - number_of_in_vregs) {
     // Local is a parameter of the method. It is stored in the caller's frame.
-    return GetFrameSize() + kX86WordSize  // ART method
+    return GetFrameSize() + kVRegSize  // ART method
                           + (reg_number - number_of_vregs + number_of_in_vregs) * kVRegSize;
   } else {
     // Local is a temporary in this method. It is stored in this method's frame.
@@ -221,7 +221,7 @@
       if (index < calling_convention.GetNumberOfRegisters()) {
         return X86CpuLocation(calling_convention.GetRegisterAt(index));
       } else {
-        return Location::StackSlot(calling_convention.GetStackOffsetOf(index, kX86WordSize));
+        return Location::StackSlot(calling_convention.GetStackOffsetOf(index));
       }
     }
 
@@ -234,7 +234,7 @@
       } else if (index + 1 == calling_convention.GetNumberOfRegisters()) {
         return Location::QuickParameter(index);
       } else {
-        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index, kX86WordSize));
+        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index));
       }
     }
 
@@ -286,7 +286,7 @@
       __ movl(destination.AsX86().AsRegisterPairLow(),
               calling_convention.GetRegisterAt(argument_index));
       __ movl(destination.AsX86().AsRegisterPairHigh(), Address(ESP,
-          calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize) + GetFrameSize()));
+          calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize()));
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ movl(destination.AsX86().AsRegisterPairLow(), Address(ESP, source.GetStackIndex()));
@@ -298,14 +298,14 @@
     uint32_t argument_index = destination.GetQuickParameterIndex();
     if (source.IsRegister()) {
       __ movl(calling_convention.GetRegisterAt(argument_index), source.AsX86().AsRegisterPairLow());
-      __ movl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize)),
+      __ movl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1)),
               source.AsX86().AsRegisterPairHigh());
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ movl(calling_convention.GetRegisterAt(argument_index),
               Address(ESP, source.GetStackIndex()));
       __ pushl(Address(ESP, source.GetHighStackIndex(kX86WordSize)));
-      __ popl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize)));
+      __ popl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1)));
     }
   } else {
     if (source.IsRegister()) {
@@ -318,7 +318,7 @@
       __ movl(Address(ESP, destination.GetStackIndex()),
               calling_convention.GetRegisterAt(argument_index));
       __ pushl(Address(ESP,
-          calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize) + GetFrameSize()));
+          calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize()));
       __ popl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)));
     } else {
       DCHECK(source.IsDoubleStackSlot());
@@ -847,7 +847,7 @@
 
 void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src) {
   ScratchRegisterScope ensure_scratch(
-      this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+      this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
   __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, src + stack_offset));
   __ movl(Address(ESP, dst + stack_offset), static_cast<Register>(ensure_scratch.GetRegister()));
@@ -879,7 +879,10 @@
 }
 
 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
-  ScratchRegisterScope ensure_scratch(this, reg, codegen_->GetNumberOfCoreRegisters());
+  Register suggested_scratch = reg == EAX ? EBX : EAX;
+  ScratchRegisterScope ensure_scratch(
+      this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
+
   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
   __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
   __ movl(Address(ESP, mem + stack_offset), reg);
@@ -889,9 +892,12 @@
 
 void ParallelMoveResolverX86::Exchange(int mem1, int mem2) {
   ScratchRegisterScope ensure_scratch1(
-      this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+      this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+
+  Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
   ScratchRegisterScope ensure_scratch2(
-      this, ensure_scratch1.GetRegister(), codegen_->GetNumberOfCoreRegisters());
+      this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
+
   int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
   stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
   __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
new file mode 100644
index 0000000..ef17ca7
--- /dev/null
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -0,0 +1,708 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_x86_64.h"
+
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "mirror/array.h"
+#include "mirror/art_method.h"
+#include "mirror/object_reference.h"
+#include "thread.h"
+#include "utils/assembler.h"
+#include "utils/x86_64/assembler_x86_64.h"
+#include "utils/x86_64/managed_register_x86_64.h"
+
+#define __ reinterpret_cast<X86_64Assembler*>(GetAssembler())->
+
+namespace art {
+
+x86_64::X86_64ManagedRegister Location::AsX86_64() const {
+  return reg().AsX86_64();
+}
+
+namespace x86_64 {
+
+static constexpr int kNumberOfPushedRegistersAtEntry = 1;
+static constexpr int kCurrentMethodStackOffset = 0;
+
+void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
+  stream << X86_64ManagedRegister::FromCpuRegister(Register(reg));
+}
+
+void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
+  stream << X86_64ManagedRegister::FromXmmRegister(FloatRegister(reg));
+}
+
+static Location X86_64CpuLocation(Register reg) {
+  return Location::RegisterLocation(X86_64ManagedRegister::FromCpuRegister(reg));
+}
+
+CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph)
+      : CodeGenerator(graph, kNumberOfRegIds),
+        location_builder_(graph, this),
+        instruction_visitor_(graph, this) {}
+
+InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen)
+      : HGraphVisitor(graph),
+        assembler_(codegen->GetAssembler()),
+        codegen_(codegen) {}
+
+ManagedRegister CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type,
+                                                          bool* blocked_registers) const {
+  switch (type) {
+    case Primitive::kPrimLong:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      size_t reg = AllocateFreeRegisterInternal(blocked_registers, kNumberOfCpuRegisters);
+      return X86_64ManagedRegister::FromCpuRegister(static_cast<Register>(reg));
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Unimplemented register type " << type;
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+  }
+
+  return ManagedRegister::NoRegister();
+}
+
+void CodeGeneratorX86_64::SetupBlockedRegisters(bool* blocked_registers) const {
+  // Stack register is always reserved.
+  blocked_registers[RSP] = true;
+
+  // TODO: We currently don't use Quick's callee saved registers.
+  blocked_registers[RBX] = true;
+  blocked_registers[RBP] = true;
+  blocked_registers[R12] = true;
+  blocked_registers[R13] = true;
+  blocked_registers[R14] = true;
+  blocked_registers[R15] = true;
+}
+
+void CodeGeneratorX86_64::ComputeFrameSize(size_t number_of_spill_slots) {
+  // Add the current ART method to the frame size, the return PC, and the filler.
+  SetFrameSize(RoundUp(
+      number_of_spill_slots * kVRegSize
+      + kVRegSize  // filler
+      + kVRegSize  // Art method
+      + kNumberOfPushedRegistersAtEntry * kX86_64WordSize,
+      kStackAlignment));
+}
+
+void CodeGeneratorX86_64::GenerateFrameEntry() {
+  // Create a fake register to mimic Quick.
+  static const int kFakeReturnRegister = 16;
+  core_spill_mask_ |= (1 << kFakeReturnRegister);
+
+  // The return PC has already been pushed on the stack.
+  __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
+  __ movl(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
+}
+
+void CodeGeneratorX86_64::GenerateFrameExit() {
+  __ addq(CpuRegister(RSP),
+          Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
+}
+
+void CodeGeneratorX86_64::Bind(Label* label) {
+  __ Bind(label);
+}
+
+void InstructionCodeGeneratorX86_64::LoadCurrentMethod(CpuRegister reg) {
+  __ movl(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
+}
+
+int32_t CodeGeneratorX86_64::GetStackSlot(HLocal* local) const {
+  uint16_t reg_number = local->GetRegNumber();
+  uint16_t number_of_vregs = GetGraph()->GetNumberOfVRegs();
+  uint16_t number_of_in_vregs = GetGraph()->GetNumberOfInVRegs();
+  if (reg_number >= number_of_vregs - number_of_in_vregs) {
+    // Local is a parameter of the method. It is stored in the caller's frame.
+    return GetFrameSize() + kVRegSize  // ART method
+                          + (reg_number - number_of_vregs + number_of_in_vregs) * kVRegSize;
+  } else {
+    // Local is a temporary in this method. It is stored in this method's frame.
+    return GetFrameSize() - (kNumberOfPushedRegistersAtEntry * kX86_64WordSize)
+                          - kVRegSize
+                          - (number_of_vregs * kVRegSize)
+                          + (reg_number * kVRegSize);
+  }
+}
+
+Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const {
+  switch (load->GetType()) {
+    case Primitive::kPrimLong:
+      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
+      break;
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+      return Location::StackSlot(GetStackSlot(load->GetLocal()));
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Unimplemented type " << load->GetType();
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected type " << load->GetType();
+  }
+
+  LOG(FATAL) << "Unreachable";
+  return Location();
+}
+
+void CodeGeneratorX86_64::Move(Location destination, Location source) {
+  if (source.Equals(destination)) {
+    return;
+  }
+  if (destination.IsRegister()) {
+    if (source.IsRegister()) {
+      __ movq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
+    } else if (source.IsStackSlot()) {
+      __ movl(destination.AsX86_64().AsCpuRegister(), Address(CpuRegister(RSP), source.GetStackIndex()));
+    } else {
+      DCHECK(source.IsDoubleStackSlot());
+      __ movq(destination.AsX86_64().AsCpuRegister(), Address(CpuRegister(RSP), source.GetStackIndex()));
+    }
+  } else if (destination.IsStackSlot()) {
+    if (source.IsRegister()) {
+      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsX86_64().AsCpuRegister());
+    } else {
+      DCHECK(source.IsStackSlot());
+      __ movl(CpuRegister(RAX), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(RAX));
+    }
+  } else {
+    DCHECK(destination.IsDoubleStackSlot());
+    if (source.IsRegister()) {
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsX86_64().AsCpuRegister());
+    } else {
+      DCHECK(source.IsDoubleStackSlot());
+      __ movq(CpuRegister(RAX), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(RAX));
+    }
+  }
+}
+
+void CodeGeneratorX86_64::Move(HInstruction* instruction, Location location, HInstruction* move_for) {
+  if (instruction->AsIntConstant() != nullptr) {
+    Immediate imm(instruction->AsIntConstant()->GetValue());
+    if (location.IsRegister()) {
+      __ movq(location.AsX86_64().AsCpuRegister(), imm);
+    } else {
+      __ movl(Address(CpuRegister(RSP), location.GetStackIndex()), imm);
+    }
+  } else if (instruction->AsLongConstant() != nullptr) {
+    int64_t value = instruction->AsLongConstant()->GetValue();
+    if (location.IsRegister()) {
+      __ movq(location.AsX86_64().AsCpuRegister(), Immediate(value));
+    } else {
+      __ movq(CpuRegister(RAX), Immediate(value));
+      __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(RAX));
+    }
+  } else if (instruction->AsLoadLocal() != nullptr) {
+    switch (instruction->GetType()) {
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimByte:
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+      case Primitive::kPrimInt:
+      case Primitive::kPrimNot:
+        Move(location, Location::StackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
+        break;
+
+      case Primitive::kPrimLong:
+        Move(location, Location::DoubleStackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
+        break;
+
+      default:
+        LOG(FATAL) << "Unimplemented local type " << instruction->GetType();
+    }
+  } else {
+    // This can currently only happen when the instruction that requests the move
+    // is the next to be compiled.
+    DCHECK_EQ(instruction->GetNext(), move_for);
+    switch (instruction->GetType()) {
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimByte:
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+      case Primitive::kPrimInt:
+      case Primitive::kPrimNot:
+      case Primitive::kPrimLong:
+        Move(location, instruction->GetLocations()->Out());
+        break;
+
+      default:
+        LOG(FATAL) << "Unimplemented type " << instruction->GetType();
+    }
+  }
+}
+
+void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
+  got->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
+  HBasicBlock* successor = got->GetSuccessor();
+  if (GetGraph()->GetExitBlock() == successor) {
+    codegen_->GenerateFrameExit();
+  } else if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
+    __ jmp(codegen_->GetLabelOf(successor));
+  }
+}
+
+void LocationsBuilderX86_64::VisitExit(HExit* exit) {
+  exit->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) {
+  if (kIsDebugBuild) {
+    __ Comment("Unreachable");
+    __ int3();
+  }
+}
+
+void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+  locations->SetInAt(0, X86_64CpuLocation(RAX));
+  if_instr->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
+  // TODO: Generate the input as a condition, instead of materializing in a register.
+  __ cmpl(if_instr->GetLocations()->InAt(0).AsX86_64().AsCpuRegister(), Immediate(0));
+  __ j(kEqual, codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
+  if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) {
+    __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+  }
+}
+
+void LocationsBuilderX86_64::VisitLocal(HLocal* local) {
+  local->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitLocal(HLocal* local) {
+  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
+}
+
+void LocationsBuilderX86_64::VisitLoadLocal(HLoadLocal* local) {
+  local->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitLoadLocal(HLoadLocal* load) {
+  // Nothing to do, this is driven by the code generator.
+}
+
+void LocationsBuilderX86_64::VisitStoreLocal(HStoreLocal* store) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
+  switch (store->InputAt(1)->GetType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
+      break;
+
+    case Primitive::kPrimLong:
+      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented local type " << store->InputAt(1)->GetType();
+  }
+  store->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store) {
+}
+
+void LocationsBuilderX86_64::VisitEqual(HEqual* equal) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(equal);
+  locations->SetInAt(0, X86_64CpuLocation(RAX));
+  locations->SetInAt(1, X86_64CpuLocation(RCX));
+  locations->SetOut(X86_64CpuLocation(RAX));
+  equal->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* equal) {
+  __ cmpq(equal->GetLocations()->InAt(0).AsX86_64().AsCpuRegister(),
+          equal->GetLocations()->InAt(1).AsX86_64().AsCpuRegister());
+  __ setcc(kEqual, equal->GetLocations()->Out().AsX86_64().AsCpuRegister());
+}
+
+void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
+  // TODO: Support constant locations.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::RequiresRegister());
+  constant->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
+  // TODO: Support constant locations.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::RequiresRegister());
+  constant->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
+  ret->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret) {
+  codegen_->GenerateFrameExit();
+  __ ret();
+}
+
+void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(ret);
+  switch (ret->InputAt(0)->GetType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, X86_64CpuLocation(RAX));
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType();
+  }
+  ret->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
+  if (kIsDebugBuild) {
+    switch (ret->InputAt(0)->GetType()) {
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimByte:
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+      case Primitive::kPrimInt:
+      case Primitive::kPrimNot:
+      case Primitive::kPrimLong:
+        DCHECK_EQ(ret->GetLocations()->InAt(0).AsX86_64().AsCpuRegister().AsRegister(), RAX);
+        break;
+
+      default:
+        LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType();
+    }
+  }
+  codegen_->GenerateFrameExit();
+  __ ret();
+}
+
+static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX };
+static constexpr size_t kRuntimeParameterCoreRegistersLength =
+    arraysize(kRuntimeParameterCoreRegisters);
+
+class InvokeRuntimeCallingConvention : public CallingConvention<Register> {
+ public:
+  InvokeRuntimeCallingConvention()
+      : CallingConvention(kRuntimeParameterCoreRegisters,
+                          kRuntimeParameterCoreRegistersLength) {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
+};
+
+Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      uint32_t index = gp_index_++;
+      stack_index_++;
+      if (index < calling_convention.GetNumberOfRegisters()) {
+        return X86_64CpuLocation(calling_convention.GetRegisterAt(index));
+      } else {
+        return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
+      }
+    }
+
+    case Primitive::kPrimLong: {
+      uint32_t index = gp_index_;
+      stack_index_ += 2;
+      if (index < calling_convention.GetNumberOfRegisters()) {
+        gp_index_ += 1;
+        return X86_64CpuLocation(calling_convention.GetRegisterAt(index));
+      } else {
+        gp_index_ += 2;
+        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
+      }
+    }
+
+    case Primitive::kPrimDouble:
+    case Primitive::kPrimFloat:
+      LOG(FATAL) << "Unimplemented parameter type " << type;
+      break;
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected parameter type " << type;
+      break;
+  }
+  return Location();
+}
+
+void LocationsBuilderX86_64::VisitInvokeStatic(HInvokeStatic* invoke) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
+  locations->AddTemp(X86_64CpuLocation(RDI));
+
+  InvokeDexCallingConventionVisitor calling_convention_visitor;
+  for (size_t i = 0; i < invoke->InputCount(); ++i) {
+    HInstruction* input = invoke->InputAt(i);
+    locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
+  }
+
+  switch (invoke->GetType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+    case Primitive::kPrimLong:
+      locations->SetOut(X86_64CpuLocation(RAX));
+      break;
+
+    case Primitive::kPrimVoid:
+      break;
+
+    case Primitive::kPrimDouble:
+    case Primitive::kPrimFloat:
+      LOG(FATAL) << "Unimplemented return type " << invoke->GetType();
+      break;
+  }
+
+  invoke->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitInvokeStatic(HInvokeStatic* invoke) {
+  CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsX86_64().AsCpuRegister();
+  uint32_t heap_reference_size = sizeof(mirror::HeapReference<mirror::Object>);
+  size_t index_in_cache = mirror::Array::DataOffset(heap_reference_size).SizeValue() +
+      invoke->GetIndexInDexCache() * heap_reference_size;
+
+  // TODO: Implement all kinds of calls:
+  // 1) boot -> boot
+  // 2) app -> boot
+  // 3) app -> app
+  //
+  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+
+  // temp = method;
+  LoadCurrentMethod(temp);
+  // temp = temp->dex_cache_resolved_methods_;
+  __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
+  // temp = temp[index_in_cache]
+  __ movl(temp, Address(temp, index_in_cache));
+  // (temp + offset_of_quick_compiled_code)()
+  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().SizeValue()));
+
+  codegen_->RecordPcInfo(invoke->GetDexPc());
+}
+
+void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add);
+  switch (add->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, X86_64CpuLocation(RAX));
+      locations->SetInAt(1, X86_64CpuLocation(RCX));
+      locations->SetOut(X86_64CpuLocation(RAX));
+      break;
+    }
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
+  }
+  add->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
+  LocationSummary* locations = add->GetLocations();
+  switch (add->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(),
+                locations->Out().AsX86_64().AsCpuRegister().AsRegister());
+      __ addq(locations->InAt(0).AsX86_64().AsCpuRegister(),
+              locations->InAt(1).AsX86_64().AsCpuRegister());
+      break;
+    }
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
+  }
+}
+
+void LocationsBuilderX86_64::VisitSub(HSub* sub) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(sub);
+  switch (sub->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, X86_64CpuLocation(RAX));
+      locations->SetInAt(1, X86_64CpuLocation(RCX));
+      locations->SetOut(X86_64CpuLocation(RAX));
+      break;
+    }
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType();
+  }
+  sub->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
+  LocationSummary* locations = sub->GetLocations();
+  switch (sub->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(),
+                locations->Out().AsX86_64().AsCpuRegister().AsRegister());
+      __ subq(locations->InAt(0).AsX86_64().AsCpuRegister(),
+              locations->InAt(1).AsX86_64().AsCpuRegister());
+      break;
+    }
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType();
+  }
+}
+
+void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetOut(X86_64CpuLocation(RAX));
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
+  InvokeRuntimeCallingConvention calling_convention;
+  LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1)));
+  __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex()));
+
+  __ gs()->call(Address::Absolute(
+      QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocObjectWithAccessCheck), true));
+
+  codegen_->RecordPcInfo(instruction->GetDexPc());
+}
+
+void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
+  if (location.IsStackSlot()) {
+    location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  } else if (location.IsDoubleStackSlot()) {
+    location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  }
+  locations->SetOut(location);
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitParameterValue(HParameterValue* instruction) {
+  // Nothing to do, the parameter is already at its location.
+}
+
+void LocationsBuilderX86_64::VisitNot(HNot* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, X86_64CpuLocation(RAX));
+  locations->SetOut(X86_64CpuLocation(RAX));
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitNot(HNot* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(),
+            locations->Out().AsX86_64().AsCpuRegister().AsRegister());
+  __ xorq(locations->Out().AsX86_64().AsCpuRegister(), Immediate(1));
+}
+
+void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+    locations->SetInAt(i, Location::Any());
+  }
+  locations->SetOut(Location::Any());
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction) {
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction) {
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
+  LOG(FATAL) << "Unimplemented";
+}
+
+}  // namespace x86_64
+}  // namespace art
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
new file mode 100644
index 0000000..ac7ee9f
--- /dev/null
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
+#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
+
+#include "code_generator.h"
+#include "nodes.h"
+#include "utils/x86_64/assembler_x86_64.h"
+
+namespace art {
+namespace x86_64 {
+
+static constexpr size_t kX86_64WordSize = 8;
+
+static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 };
+
+static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+
+class InvokeDexCallingConvention : public CallingConvention<Register> {
+ public:
+  InvokeDexCallingConvention()
+      : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength) {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
+};
+
+class InvokeDexCallingConventionVisitor {
+ public:
+  InvokeDexCallingConventionVisitor() : gp_index_(0), stack_index_(0) {}
+
+  Location GetNextLocation(Primitive::Type type);
+
+ private:
+  InvokeDexCallingConvention calling_convention;
+  uint32_t gp_index_;
+  uint32_t stack_index_;
+
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+};
+
+class CodeGeneratorX86_64;
+
+class LocationsBuilderX86_64 : public HGraphVisitor {
+ public:
+  LocationsBuilderX86_64(HGraph* graph, CodeGeneratorX86_64* codegen)
+      : HGraphVisitor(graph), codegen_(codegen) {}
+
+#define DECLARE_VISIT_INSTRUCTION(name)     \
+  virtual void Visit##name(H##name* instr);
+
+  FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ private:
+  CodeGeneratorX86_64* const codegen_;
+  InvokeDexCallingConventionVisitor parameter_visitor_;
+
+  DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64);
+};
+
+class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
+ public:
+  InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen);
+
+#define DECLARE_VISIT_INSTRUCTION(name)     \
+  virtual void Visit##name(H##name* instr);
+
+  FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+  void LoadCurrentMethod(CpuRegister reg);
+
+  X86_64Assembler* GetAssembler() const { return assembler_; }
+
+ private:
+  X86_64Assembler* const assembler_;
+  CodeGeneratorX86_64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64);
+};
+
+class CodeGeneratorX86_64 : public CodeGenerator {
+ public:
+  explicit CodeGeneratorX86_64(HGraph* graph);
+  virtual ~CodeGeneratorX86_64() {}
+
+  virtual void ComputeFrameSize(size_t number_of_spill_slots) OVERRIDE;
+  virtual void GenerateFrameEntry() OVERRIDE;
+  virtual void GenerateFrameExit() OVERRIDE;
+  virtual void Bind(Label* label) OVERRIDE;
+  virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
+
+  virtual size_t GetWordSize() const OVERRIDE {
+    return kX86_64WordSize;
+  }
+
+  virtual HGraphVisitor* GetLocationBuilder() OVERRIDE {
+    return &location_builder_;
+  }
+
+  virtual HGraphVisitor* GetInstructionVisitor() OVERRIDE {
+    return &instruction_visitor_;
+  }
+
+  virtual X86_64Assembler* GetAssembler() OVERRIDE {
+    return &assembler_;
+  }
+
+  int32_t GetStackSlot(HLocal* local) const;
+  virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
+
+  virtual size_t GetNumberOfRegisters() const OVERRIDE {
+    return kNumberOfRegIds;
+  }
+
+  virtual size_t GetNumberOfCoreRegisters() const OVERRIDE {
+    return kNumberOfCpuRegisters;
+  }
+
+  virtual size_t GetNumberOfFloatingPointRegisters() const OVERRIDE {
+    return kNumberOfFloatRegisters;
+  }
+
+  virtual void SetupBlockedRegisters(bool* blocked_registers) const OVERRIDE;
+  virtual ManagedRegister AllocateFreeRegister(
+      Primitive::Type type, bool* blocked_registers) const OVERRIDE;
+  virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
+  virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+
+ private:
+  // Helper method to move a value between two locations.
+  void Move(Location destination, Location source);
+
+  LocationsBuilderX86_64 location_builder_;
+  InstructionCodeGeneratorX86_64 instruction_visitor_;
+  X86_64Assembler assembler_;
+
+  DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
+};
+
+}  // namespace x86_64
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 8ee775c..c3baf1a 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -47,6 +47,17 @@
   DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
 };
 
+#if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
+static void Run(const InternalCodeAllocator& allocator, bool has_result, int32_t expected) {
+  typedef int32_t (*fptr)();
+  CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
+  int32_t result = reinterpret_cast<fptr>(allocator.GetMemory())();
+  if (has_result) {
+    CHECK_EQ(result, expected);
+  }
+}
+#endif
+
 static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) {
   ArenaPool pool;
   ArenaAllocator arena(&pool);
@@ -55,24 +66,23 @@
   HGraph* graph = builder.BuildGraph(*item);
   ASSERT_NE(graph, nullptr);
   InternalCodeAllocator allocator;
+
   CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, kX86);
   codegen->CompileBaseline(&allocator);
-  typedef int32_t (*fptr)();
 #if defined(__i386__)
-  CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
-  int32_t result = reinterpret_cast<fptr>(allocator.GetMemory())();
-  if (has_result) {
-    CHECK_EQ(result, expected);
-  }
+  Run(allocator, has_result, expected);
 #endif
+
   codegen = CodeGenerator::Create(&arena, graph, kArm);
   codegen->CompileBaseline(&allocator);
 #if defined(__arm__)
-  CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
-  int32_t result = reinterpret_cast<fptr>(allocator.GetMemory())();
-  if (has_result) {
-    CHECK_EQ(result, expected);
-  }
+  Run(allocator, has_result, expected);
+#endif
+
+  codegen = CodeGenerator::Create(&arena, graph, kX86_64);
+  codegen->CompileBaseline(&allocator);
+#if defined(__x86_64__)
+  Run(allocator, has_result, expected);
 #endif
 }
 
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 3c60d3c..40a39ad 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -150,6 +150,7 @@
 
   arm::ArmManagedRegister AsArm() const;
   x86::X86ManagedRegister AsX86() const;
+  x86_64::X86_64ManagedRegister AsX86_64() const;
 
   Kind GetKind() const {
     return KindField::Decode(value_);
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 4a1b6ce..cadd3c5 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -163,7 +163,11 @@
   return false;
 }
 
-int ParallelMoveResolver::AllocateScratchRegister(int blocked, int register_count, bool* spilled) {
+int ParallelMoveResolver::AllocateScratchRegister(int blocked,
+                                                  int register_count,
+                                                  int if_scratch,
+                                                  bool* spilled) {
+  DCHECK_NE(blocked, if_scratch);
   int scratch = -1;
   for (int reg = 0; reg < register_count; ++reg) {
     if ((blocked != reg) &&
@@ -175,11 +179,7 @@
 
   if (scratch == -1) {
     *spilled = true;
-    for (int reg = 0; reg < register_count; ++reg) {
-      if (blocked != reg) {
-        scratch = reg;
-      }
-    }
+    scratch = if_scratch;
   } else {
     *spilled = false;
   }
@@ -189,11 +189,11 @@
 
 
 ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
-    ParallelMoveResolver* resolver, int blocked, int number_of_registers)
+    ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers)
     : resolver_(resolver),
       reg_(kNoRegister),
       spilled_(false) {
-  reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, &spilled_);
+  reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, if_scratch, &spilled_);
 
   if (spilled_) {
     resolver->SpillScratch(reg_);
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index e1189d8..fcc1de6 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -42,7 +42,10 @@
  protected:
   class ScratchRegisterScope : public ValueObject {
    public:
-    ScratchRegisterScope(ParallelMoveResolver* resolver, int blocked, int number_of_registers);
+    ScratchRegisterScope(ParallelMoveResolver* resolver,
+                         int blocked,
+                         int if_scratch,
+                         int number_of_registers);
     ~ScratchRegisterScope();
 
     int GetRegister() const { return reg_; }
@@ -55,7 +58,7 @@
   };
 
   bool IsScratchLocation(Location loc);
-  int AllocateScratchRegister(int blocked, int register_count, bool* spilled);
+  int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled);
 
   // Emit a move.
   virtual void EmitMove(size_t index) = 0;
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index c2a4769..348e9d4 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -651,7 +651,9 @@
     // Move must happen after the instruction.
     DCHECK(!at->IsControlFlow());
     move = at->GetNext()->AsParallelMove();
-    if (move == nullptr || IsInputMove(move)) {
+    // This is a parallel move for connecting siblings in a same block. We need to
+    // differentiate it with moves for connecting blocks, and input moves.
+    if (move == nullptr || move->GetLifetimePosition() != position) {
       move = new (allocator_) HParallelMove(allocator_);
       move->SetLifetimePosition(position);
       at->GetBlock()->InsertInstructionBefore(move, at->GetNext());
@@ -660,7 +662,9 @@
     // Move must happen before the instruction.
     HInstruction* previous = at->GetPrevious();
     if (previous != nullptr && previous->AsParallelMove() != nullptr) {
-      if (IsInputMove(previous)) {
+      // This is a parallel move for connecting siblings in a same block. We need to
+      // differentiate it with moves for connecting blocks, and input moves.
+      if (previous->GetLifetimePosition() != position) {
         previous = previous->GetPrevious();
       }
     }
@@ -684,8 +688,12 @@
   HInstruction* last = block->GetLastInstruction();
   HInstruction* previous = last->GetPrevious();
   HParallelMove* move;
-  if (previous == nullptr || previous->AsParallelMove() == nullptr) {
+  // This is a parallel move for connecting blocks. We need to differentiate
+  // it with moves for connecting siblings in a same block, and output moves.
+  if (previous == nullptr || previous->AsParallelMove() == nullptr
+      || previous->AsParallelMove()->GetLifetimePosition() != block->GetLifetimeEnd()) {
     move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(block->GetLifetimeEnd());
     block->InsertInstructionBefore(move, last);
   } else {
     move = previous->AsParallelMove();
@@ -700,7 +708,9 @@
 
   HInstruction* first = block->GetFirstInstruction();
   HParallelMove* move = first->AsParallelMove();
-  if (move == nullptr || IsInputMove(move)) {
+  // This is a parallel move for connecting blocks. We need to differentiate
+  // it with moves for connecting siblings in a same block, and input moves.
+  if (move == nullptr || move->GetLifetimePosition() != block->GetLifetimeStart()) {
     move = new (allocator_) HParallelMove(allocator_);
     move->SetLifetimePosition(block->GetLifetimeStart());
     block->InsertInstructionBefore(move, first);
@@ -718,9 +728,14 @@
     return;
   }
 
+  size_t position = instruction->GetLifetimePosition() + 1;
   HParallelMove* move = instruction->GetNext()->AsParallelMove();
-  if (move == nullptr || IsInputMove(move)) {
+  // This is a parallel move for moving the output of an instruction. We need
+  // to differentiate with input moves, moves for connecting siblings in a
+  // and moves for connecting blocks.
+  if (move == nullptr || move->GetLifetimePosition() != position) {
     move = new (allocator_) HParallelMove(allocator_);
+    move->SetLifetimePosition(position);
     instruction->GetBlock()->InsertInstructionBefore(move, instruction->GetNext());
   }
   move->AddMove(new (allocator_) MoveOperands(source, destination));
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 1b5585f..8b7c4f1 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -65,7 +65,7 @@
 
   static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set);
   static bool Supports(InstructionSet instruction_set) {
-    return instruction_set == kX86;
+    return instruction_set == kX86 || instruction_set == kArm;
   }
 
   size_t GetNumberOfSpillSlots() const {
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index dd8e221..d704788 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1288,8 +1288,8 @@
      */
 TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 
-UNIMPLEMENTED art_quick_initialize_type
-UNIMPLEMENTED art_quick_initialize_type_and_verify_access
+TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 
 ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
 ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 59311bc..7785bc3 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -1784,9 +1784,87 @@
   ASSERT_FALSE(self->IsExceptionPending());
   EXPECT_EQ(static_cast<size_t>(JNI_TRUE), result);
 #else
-  LOG(INFO) << "Skipping memcpy as I don't know how to do that on " << kRuntimeISA;
+  LOG(INFO) << "Skipping imt as I don't know how to do that on " << kRuntimeISA;
   // Force-print to std::cout so it's also outside the logcat.
-  std::cout << "Skipping memcpy as I don't know how to do that on " << kRuntimeISA << std::endl;
+  std::cout << "Skipping imt as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+#if defined(__arm__) || defined(__aarch64__)
+extern "C" void art_quick_indexof(void);
+#endif
+
+TEST_F(StubTest, StringIndexOf) {
+#if defined(__arm__) || defined(__aarch64__)
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  // garbage is created during ClassLinker::Init
+
+  // Create some strings
+  // Use array so we can index into it and use a matrix for expected results
+  // Setup: The first half is standard. The second half uses a non-zero offset.
+  // TODO: Shared backing arrays.
+  static constexpr size_t kStringCount = 7;
+  const char* c_str[kStringCount] = { "", "a", "ba", "cba", "dcba", "edcba", "asdfghjkl" };
+  static constexpr size_t kCharCount = 5;
+  const char c_char[kCharCount] = { 'a', 'b', 'c', 'd', 'e' };
+
+  StackHandleScope<kStringCount> hs(self);
+  Handle<mirror::String> s[kStringCount];
+
+  for (size_t i = 0; i < kStringCount; ++i) {
+    s[i] = hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), c_str[i]));
+  }
+
+  // Matrix of expectations. First component is first parameter. Note we only check against the
+  // sign, not the value. As we are testing random offsets, we need to compute this and need to
+  // rely on String::CompareTo being correct.
+  static constexpr size_t kMaxLen = 9;
+  DCHECK_LE(strlen(c_str[kStringCount-1]), kMaxLen) << "Please fix the indexof test.";
+
+  // Last dimension: start, offset by 1.
+  int32_t expected[kStringCount][kCharCount][kMaxLen + 3];
+  for (size_t x = 0; x < kStringCount; ++x) {
+    for (size_t y = 0; y < kCharCount; ++y) {
+      for (size_t z = 0; z <= kMaxLen + 2; ++z) {
+        expected[x][y][z] = s[x]->FastIndexOf(c_char[y], static_cast<int32_t>(z) - 1);
+      }
+    }
+  }
+
+  // Play with it...
+
+  for (size_t x = 0; x < kStringCount; ++x) {
+    for (size_t y = 0; y < kCharCount; ++y) {
+      for (size_t z = 0; z <= kMaxLen + 2; ++z) {
+        int32_t start = static_cast<int32_t>(z) - 1;
+
+        // Test string_compareto x y
+        size_t result = Invoke3(reinterpret_cast<size_t>(s[x].Get()), c_char[y], start,
+                                reinterpret_cast<uintptr_t>(&art_quick_indexof), self);
+
+        EXPECT_FALSE(self->IsExceptionPending());
+
+        // The result is a 32b signed integer
+        union {
+          size_t r;
+          int32_t i;
+        } conv;
+        conv.r = result;
+
+        EXPECT_EQ(expected[x][y][z], conv.i) << "Wrong result for " << c_str[x] << " / " <<
+            c_char[y] << " @ " << start;
+      }
+    }
+  }
+
+  // TODO: Deallocate things.
+
+  // Tests done.
+#else
+  LOG(INFO) << "Skipping indexof as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping indexof as I don't know how to do that on " << kRuntimeISA << std::endl;
 #endif
 }
 
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 61ea870..3000217 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -111,17 +111,17 @@
   }
 
 // Helper macro to load method id. Return last parameter on error.
-#define LOAD_METHOD(var, idx, error_string, error_val)                  \
+#define LOAD_METHOD(var, idx, error_string, error_stmt)                 \
   const DexFile::MethodId* var  = CheckLoadMethodId(idx, error_string); \
   if (UNLIKELY(var == nullptr)) {                                       \
-    return error_val;                                                   \
+    error_stmt;                                                         \
   }
 
 // Helper macro to load method id. Return last parameter on error.
-#define LOAD_FIELD(var, idx, fmt, error_val)                \
+#define LOAD_FIELD(var, idx, fmt, error_stmt)               \
   const DexFile::FieldId* var = CheckLoadFieldId(idx, fmt); \
   if (UNLIKELY(var == nullptr)) {                           \
-    return error_val;                                       \
+    error_stmt;                                             \
   }
 
 bool DexFileVerifier::Verify(const DexFile* dex_file, const byte* begin, size_t size,
@@ -1378,42 +1378,48 @@
   return true;
 }
 
-uint32_t DexFileVerifier::FindFirstClassDataDefiner(const byte* ptr) {
+uint16_t DexFileVerifier::FindFirstClassDataDefiner(const byte* ptr, bool* success) {
   ClassDataItemIterator it(*dex_file_, ptr);
+  *success = true;
 
   if (it.HasNextStaticField() || it.HasNextInstanceField()) {
-    LOAD_FIELD(field, it.GetMemberIndex(), "first_class_data_definer field_id", 0x10000U)
+    LOAD_FIELD(field, it.GetMemberIndex(), "first_class_data_definer field_id",
+               *success = false; return DexFile::kDexNoIndex16)
     return field->class_idx_;
   }
 
   if (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) {
-    LOAD_METHOD(method, it.GetMemberIndex(), "first_class_data_definer method_id", 0x10000U)
+    LOAD_METHOD(method, it.GetMemberIndex(), "first_class_data_definer method_id",
+                *success = false; return DexFile::kDexNoIndex16)
     return method->class_idx_;
   }
 
   return DexFile::kDexNoIndex16;
 }
 
-uint32_t DexFileVerifier::FindFirstAnnotationsDirectoryDefiner(const byte* ptr) {
+uint16_t DexFileVerifier::FindFirstAnnotationsDirectoryDefiner(const byte* ptr, bool* success) {
   const DexFile::AnnotationsDirectoryItem* item =
       reinterpret_cast<const DexFile::AnnotationsDirectoryItem*>(ptr);
+  *success = true;
+
   if (item->fields_size_ != 0) {
     DexFile::FieldAnnotationsItem* field_items = (DexFile::FieldAnnotationsItem*) (item + 1);
-    LOAD_FIELD(field, field_items[0].field_idx_, "first_annotations_dir_definer field_id", 0x10000U)
+    LOAD_FIELD(field, field_items[0].field_idx_, "first_annotations_dir_definer field_id",
+               *success = false; return DexFile::kDexNoIndex16)
     return field->class_idx_;
   }
 
   if (item->methods_size_ != 0) {
     DexFile::MethodAnnotationsItem* method_items = (DexFile::MethodAnnotationsItem*) (item + 1);
     LOAD_METHOD(method, method_items[0].method_idx_, "first_annotations_dir_definer method id",
-                0x10000U)
+                *success = false; return DexFile::kDexNoIndex16)
     return method->class_idx_;
   }
 
   if (item->parameters_size_ != 0) {
     DexFile::ParameterAnnotationsItem* parameter_items = (DexFile::ParameterAnnotationsItem*) (item + 1);
     LOAD_METHOD(method, parameter_items[0].method_idx_, "first_annotations_dir_definer method id",
-                0x10000U)
+                *success = false; return DexFile::kDexNoIndex16)
     return method->class_idx_;
   }
 
@@ -1699,8 +1705,9 @@
   // Check that references in class_data_item are to the right class.
   if (item->class_data_off_ != 0) {
     const byte* data = begin_ + item->class_data_off_;
-    uint32_t data_definer = FindFirstClassDataDefiner(data);
-    if (data_definer >= 0x10000U) {
+    bool success;
+    uint16_t data_definer = FindFirstClassDataDefiner(data, &success);
+    if (!success) {
       return false;
     }
     if (UNLIKELY((data_definer != item->class_idx_) && (data_definer != DexFile::kDexNoIndex16))) {
@@ -1712,8 +1719,9 @@
   // Check that references in annotations_directory_item are to right class.
   if (item->annotations_off_ != 0) {
     const byte* data = begin_ + item->annotations_off_;
-    uint32_t annotations_definer = FindFirstAnnotationsDirectoryDefiner(data);
-    if (annotations_definer >= 0x10000U) {
+    bool success;
+    uint16_t annotations_definer = FindFirstAnnotationsDirectoryDefiner(data, &success);
+    if (!success) {
       return false;
     }
     if (UNLIKELY((annotations_definer != item->class_idx_) &&
@@ -1777,13 +1785,14 @@
 
 bool DexFileVerifier::CheckInterClassDataItem() {
   ClassDataItemIterator it(*dex_file_, ptr_);
-  uint32_t defining_class = FindFirstClassDataDefiner(ptr_);
-  if (defining_class >= 0x10000U) {
+  bool success;
+  uint16_t defining_class = FindFirstClassDataDefiner(ptr_, &success);
+  if (!success) {
     return false;
   }
 
   for (; it.HasNextStaticField() || it.HasNextInstanceField(); it.Next()) {
-    LOAD_FIELD(field, it.GetMemberIndex(), "inter_class_data_item field_id", false)
+    LOAD_FIELD(field, it.GetMemberIndex(), "inter_class_data_item field_id", return false)
     if (UNLIKELY(field->class_idx_ != defining_class)) {
       ErrorStringPrintf("Mismatched defining class for class_data_item field");
       return false;
@@ -1794,7 +1803,7 @@
     if (code_off != 0 && !CheckOffsetToTypeMap(code_off, DexFile::kDexTypeCodeItem)) {
       return false;
     }
-    LOAD_METHOD(method, it.GetMemberIndex(), "inter_class_data_item method_id", false)
+    LOAD_METHOD(method, it.GetMemberIndex(), "inter_class_data_item method_id", return false)
     if (UNLIKELY(method->class_idx_ != defining_class)) {
       ErrorStringPrintf("Mismatched defining class for class_data_item method");
       return false;
@@ -1808,8 +1817,9 @@
 bool DexFileVerifier::CheckInterAnnotationsDirectoryItem() {
   const DexFile::AnnotationsDirectoryItem* item =
       reinterpret_cast<const DexFile::AnnotationsDirectoryItem*>(ptr_);
-  uint32_t defining_class = FindFirstAnnotationsDirectoryDefiner(ptr_);
-  if (defining_class >= 0x10000U) {
+  bool success;
+  uint16_t defining_class = FindFirstAnnotationsDirectoryDefiner(ptr_, &success);
+  if (!success) {
     return false;
   }
 
@@ -1823,7 +1833,8 @@
       reinterpret_cast<const DexFile::FieldAnnotationsItem*>(item + 1);
   uint32_t field_count = item->fields_size_;
   for (uint32_t i = 0; i < field_count; i++) {
-    LOAD_FIELD(field, field_item->field_idx_, "inter_annotations_directory_item field_id", false)
+    LOAD_FIELD(field, field_item->field_idx_, "inter_annotations_directory_item field_id",
+               return false)
     if (UNLIKELY(field->class_idx_ != defining_class)) {
       ErrorStringPrintf("Mismatched defining class for field_annotation");
       return false;
@@ -1840,7 +1851,7 @@
   uint32_t method_count = item->methods_size_;
   for (uint32_t i = 0; i < method_count; i++) {
     LOAD_METHOD(method, method_item->method_idx_, "inter_annotations_directory_item method_id",
-                false)
+                return false)
     if (UNLIKELY(method->class_idx_ != defining_class)) {
       ErrorStringPrintf("Mismatched defining class for method_annotation");
       return false;
@@ -1857,7 +1868,7 @@
   uint32_t parameter_count = item->parameters_size_;
   for (uint32_t i = 0; i < parameter_count; i++) {
     LOAD_METHOD(parameter_method, parameter_item->method_idx_,
-                "inter_annotations_directory_item parameter method_id", false)
+                "inter_annotations_directory_item parameter method_id", return false)
     if (UNLIKELY(parameter_method->class_idx_ != defining_class)) {
       ErrorStringPrintf("Mismatched defining class for parameter_annotation");
       return false;
diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h
index 7489dcd..f845993 100644
--- a/runtime/dex_file_verifier.h
+++ b/runtime/dex_file_verifier.h
@@ -72,10 +72,10 @@
 
   bool CheckOffsetToTypeMap(size_t offset, uint16_t type);
 
-  // Note: the result type of the following methods is wider than that of the underlying index
-  // (16b vs 32b). This is so that we can define an error value (anything >= 2^16).
-  uint32_t FindFirstClassDataDefiner(const byte* ptr);
-  uint32_t FindFirstAnnotationsDirectoryDefiner(const byte* ptr);
+  // Note: as sometimes kDexNoIndex16, being 0xFFFF, is a valid return value, we need an
+  // additional out parameter to signal any errors loading an index.
+  uint16_t FindFirstClassDataDefiner(const byte* ptr, bool* success);
+  uint16_t FindFirstAnnotationsDirectoryDefiner(const byte* ptr, bool* success);
 
   bool CheckInterStringIdItem();
   bool CheckInterTypeIdItem();
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index 1ff5c19..edba502 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -145,27 +145,28 @@
   };
 
   enum VerifyFlag {
-    kVerifyNone            = 0x00000,
-    kVerifyRegA            = 0x00001,
-    kVerifyRegAWide        = 0x00002,
-    kVerifyRegB            = 0x00004,
-    kVerifyRegBField       = 0x00008,
-    kVerifyRegBMethod      = 0x00010,
-    kVerifyRegBNewInstance = 0x00020,
-    kVerifyRegBString      = 0x00040,
-    kVerifyRegBType        = 0x00080,
-    kVerifyRegBWide        = 0x00100,
-    kVerifyRegC            = 0x00200,
-    kVerifyRegCField       = 0x00400,
-    kVerifyRegCNewArray    = 0x00800,
-    kVerifyRegCType        = 0x01000,
-    kVerifyRegCWide        = 0x02000,
-    kVerifyArrayData       = 0x04000,
-    kVerifyBranchTarget    = 0x08000,
-    kVerifySwitchTargets   = 0x10000,
-    kVerifyVarArg          = 0x20000,
-    kVerifyVarArgRange     = 0x40000,
-    kVerifyError           = 0x80000,
+    kVerifyNone            = 0x000000,
+    kVerifyRegA            = 0x000001,
+    kVerifyRegAWide        = 0x000002,
+    kVerifyRegB            = 0x000004,
+    kVerifyRegBField       = 0x000008,
+    kVerifyRegBMethod      = 0x000010,
+    kVerifyRegBNewInstance = 0x000020,
+    kVerifyRegBString      = 0x000040,
+    kVerifyRegBType        = 0x000080,
+    kVerifyRegBWide        = 0x000100,
+    kVerifyRegC            = 0x000200,
+    kVerifyRegCField       = 0x000400,
+    kVerifyRegCNewArray    = 0x000800,
+    kVerifyRegCType        = 0x001000,
+    kVerifyRegCWide        = 0x002000,
+    kVerifyArrayData       = 0x004000,
+    kVerifyBranchTarget    = 0x008000,
+    kVerifySwitchTargets   = 0x010000,
+    kVerifyVarArg          = 0x020000,
+    kVerifyVarArgRange     = 0x040000,
+    kVerifyRuntimeOnly     = 0x080000,
+    kVerifyError           = 0x100000,
   };
 
   static constexpr uint32_t kMaxVarArgRegs = 5;
@@ -493,18 +494,23 @@
   }
 
   int GetVerifyTypeArgumentB() const {
-    return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegB | kVerifyRegBField | kVerifyRegBMethod |
-             kVerifyRegBNewInstance | kVerifyRegBString | kVerifyRegBType | kVerifyRegBWide));
+    return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegB | kVerifyRegBField |
+        kVerifyRegBMethod | kVerifyRegBNewInstance | kVerifyRegBString | kVerifyRegBType |
+        kVerifyRegBWide));
   }
 
   int GetVerifyTypeArgumentC() const {
     return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegC | kVerifyRegCField |
-             kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide));
+        kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide));
   }
 
   int GetVerifyExtraFlags() const {
     return (kInstructionVerifyFlags[Opcode()] & (kVerifyArrayData | kVerifyBranchTarget |
-             kVerifySwitchTargets | kVerifyVarArg | kVerifyVarArgRange | kVerifyError));
+        kVerifySwitchTargets | kVerifyVarArg | kVerifyVarArgRange | kVerifyError));
+  }
+
+  bool GetVerifyIsRuntimeOnly() const {
+    return (kInstructionVerifyFlags[Opcode()] & kVerifyRuntimeOnly) != 0;
   }
 
   // Get the dex PC of this instruction as a offset in code units from the beginning of insns.
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index f43e42f..4cda58b 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -245,14 +245,14 @@
   V(0xE0, SHL_INT_LIT8, "shl-int/lit8", k22b, true, kNone, kContinue | kShl | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
   V(0xE1, SHR_INT_LIT8, "shr-int/lit8", k22b, true, kNone, kContinue | kShr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
   V(0xE2, USHR_INT_LIT8, "ushr-int/lit8", k22b, true, kNone, kContinue | kUshr | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE3, IGET_QUICK, "iget-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE4, IGET_WIDE_QUICK, "iget-wide-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB) \
-  V(0xE5, IGET_OBJECT_QUICK, "iget-object-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE6, IPUT_QUICK, "iput-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE7, IPUT_WIDE_QUICK, "iput-wide-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB) \
-  V(0xE8, IPUT_OBJECT_QUICK, "iput-object-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB) \
-  V(0xE9, INVOKE_VIRTUAL_QUICK, "invoke-virtual-quick", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArg) \
-  V(0xEA, INVOKE_VIRTUAL_RANGE_QUICK, "invoke-virtual/range-quick", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArgRange) \
+  V(0xE3, IGET_QUICK, "iget-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE4, IGET_WIDE_QUICK, "iget-wide-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE5, IGET_OBJECT_QUICK, "iget-object-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE6, IPUT_QUICK, "iput-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE7, IPUT_WIDE_QUICK, "iput-wide-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegAWide | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE8, IPUT_OBJECT_QUICK, "iput-object-quick", k22c, false, kFieldRef, kContinue | kThrow | kStore | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
+  V(0xE9, INVOKE_VIRTUAL_QUICK, "invoke-virtual-quick", k35c, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArg | kVerifyRuntimeOnly) \
+  V(0xEA, INVOKE_VIRTUAL_RANGE_QUICK, "invoke-virtual/range-quick", k3rc, false, kMethodRef, kContinue | kThrow | kInvoke, kVerifyVarArgRange | kVerifyRuntimeOnly) \
   V(0xEB, UNUSED_EB, "unused-eb", k10x, false, kUnknown, 0, kVerifyError) \
   V(0xEC, UNUSED_EC, "unused-ec", k10x, false, kUnknown, 0, kVerifyError) \
   V(0xED, UNUSED_ED, "unused-ed", k10x, false, kUnknown, 0, kVerifyError) \
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 7e3810c..9512a5a 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -340,16 +340,7 @@
   if (Runtime::Current()->GetProfilerOptions().IsEnabled() && (pkgname != nullptr)) {
     const std::string profile_file = GetDalvikCacheOrDie("profiles", false /* create_if_absent */)
         + std::string("/") + pkgname;
-    const std::string profile_cache_dir = GetDalvikCacheOrDie("profile-cache",
-                                                              false /* create_if_absent */);
-
-    // Make the profile cache if it doesn't exist.
-    mkdir(profile_cache_dir.c_str(), 0700);
-
-    // The previous profile file (a copy of the profile the last time this was run) is
-    // in the dalvik-cache directory because this is owned by system.  The profiles
-    // directory is owned by install so system cannot write files in there.
-    std::string prev_profile_file = profile_cache_dir + std::string("/") + pkgname;
+    const std::string prev_profile_file = profile_file + std::string("@old");
 
     struct stat profstat, prevstat;
     int e1 = stat(profile_file.c_str(), &profstat);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index ccf478c..717381c 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -574,7 +574,7 @@
       break;
   }
 
-  if (implicit_checks_supported &&
+  if (!options->interpreter_only_ && implicit_checks_supported &&
       (options->explicit_checks_ != (ParsedOptions::kExplicitSuspendCheck |
           ParsedOptions::kExplicitNullCheck |
           ParsedOptions::kExplicitStackOverflowCheck) || kEnableJavaStackTraceHandler)) {
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index e5dcbb0..1d04151 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -731,6 +731,10 @@
       result = false;
       break;
   }
+  if (inst->GetVerifyIsRuntimeOnly() && Runtime::Current()->IsCompiler()) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "opcode only expected at runtime " << inst->Name();
+    result = false;
+  }
   return result;
 }
 
@@ -2135,8 +2139,8 @@
                        inst->Opcode() == Instruction::INVOKE_SUPER_RANGE);
       bool is_super = (inst->Opcode() == Instruction::INVOKE_SUPER ||
                        inst->Opcode() == Instruction::INVOKE_SUPER_RANGE);
-      mirror::ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_VIRTUAL,
-                                                                   is_range, is_super);
+      mirror::ArtMethod* called_method = VerifyInvocationArgs(inst, METHOD_VIRTUAL, is_range,
+                                                              is_super);
       const RegType* return_type = nullptr;
       if (called_method != nullptr) {
         Thread* self = Thread::Current();
@@ -3009,6 +3013,26 @@
   // Resolve the method. This could be an abstract or concrete method depending on what sort of call
   // we're making.
   const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+
+  // As the method may not have been resolved, make this static check against what we expect.
+  const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
+  uint32_t shorty_idx = dex_file_->GetProtoId(method_id.proto_idx_).shorty_idx_;
+  uint32_t shorty_len;
+  const char* descriptor = dex_file_->StringDataAndUtf16LengthByIdx(shorty_idx, &shorty_len);
+  int32_t sig_registers = method_type == METHOD_STATIC ? 0 : 1;
+  for (size_t i = 1; i < shorty_len; i++) {
+    if (descriptor[i] == 'J' || descriptor[i] == 'D') {
+      sig_registers += 2;
+    } else {
+      sig_registers++;
+    }
+  }
+  if (inst->VRegA() != sig_registers) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation, expected " << inst->VRegA() <<
+        " arguments, found " << sig_registers;
+    return nullptr;
+  }
+
   mirror::ArtMethod* res_method = ResolveMethodAndCheckAccess(method_idx, method_type);
   if (res_method == NULL) {  // error or class is unresolved
     return NULL;
diff --git a/test/404-optimizing-allocator/src/Main.java b/test/404-optimizing-allocator/src/Main.java
index 60477f9..7b31820 100644
--- a/test/404-optimizing-allocator/src/Main.java
+++ b/test/404-optimizing-allocator/src/Main.java
@@ -27,7 +27,8 @@
     expectEquals(10, $opt$reg$TestMultipleLive());
     expectEquals(1, $opt$reg$TestWithBreakAndContinue());
     expectEquals(-15, $opt$reg$testSpillInIf(5, 6, 7));
-    expectEquals(-567, $opt$reg$TestAgressiveLive(1, 2, 3, 4, 5, 6, 7));
+    expectEquals(-567, $opt$reg$TestAgressiveLive1(1, 2, 3, 4, 5, 6, 7));
+    expectEquals(-77, $opt$reg$TestAgressiveLive2(1, 2, 3, 4, 5, 6, 7));
   }
 
   public static int $opt$reg$TestLostCopy() {
@@ -125,7 +126,7 @@
     return a - b - c - d - e;
   }
 
-  public static int $opt$reg$TestAgressiveLive(int a, int b, int c, int d, int e, int f, int g) {
+  public static int $opt$reg$TestAgressiveLive1(int a, int b, int c, int d, int e, int f, int g) {
     int h = a - b;
     int i = c - d;
     int j = e - f;
@@ -146,6 +147,17 @@
     return a - b - c - d - e - f - g - h - i - j - k;
   }
 
+  public static int $opt$reg$TestAgressiveLive2(int a, int b, int c, int d, int e, int f, int g) {
+    int h = a - b;
+    int i = c - d;
+    int j = e - f;
+    int k = 42 + g - a;
+    do {
+      h++;
+    } while (h != 5);
+    return a - b - c - d - e - f - g - h - i - j - k;
+  }
+
   public static void expectEquals(int expected, int value) {
     if (expected != value) {
       throw new Error("Expected: " + expected + ", got: " + value);