Upgrade to 3.29

Update V8 to 3.29.88.17 and update makefiles to support building on
all the relevant platforms.

Bug: 17370214

Change-Id: Ia3407c157fd8d72a93e23d8318ccaf6ecf77fa4e
diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc
index ec28da4..96f28f9 100644
--- a/src/arm/assembler-arm.cc
+++ b/src/arm/assembler-arm.cc
@@ -32,96 +32,187 @@
 
 // The original source code covered by the above license above has been
 // modified significantly by Google Inc.
-// Copyright 2011 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
 
-#include "v8.h"
+#include "src/v8.h"
 
-#if defined(V8_TARGET_ARCH_ARM)
+#if V8_TARGET_ARCH_ARM
 
-#include "arm/assembler-arm-inl.h"
-#include "serialize.h"
+#include "src/arm/assembler-arm-inl.h"
+#include "src/base/bits.h"
+#include "src/base/cpu.h"
+#include "src/macro-assembler.h"
+#include "src/serialize.h"
 
 namespace v8 {
 namespace internal {
 
-#ifdef DEBUG
-bool CpuFeatures::initialized_ = false;
-#endif
-unsigned CpuFeatures::supported_ = 0;
-unsigned CpuFeatures::found_by_runtime_probing_ = 0;
-
-
 // Get the CPU features enabled by the build. For cross compilation the
-// preprocessor symbols CAN_USE_ARMV7_INSTRUCTIONS and CAN_USE_VFP_INSTRUCTIONS
+// preprocessor symbols CAN_USE_ARMV7_INSTRUCTIONS and CAN_USE_VFP3_INSTRUCTIONS
 // can be defined to enable ARMv7 and VFPv3 instructions when building the
 // snapshot.
-static uint64_t CpuFeaturesImpliedByCompiler() {
-  uint64_t answer = 0;
+static unsigned CpuFeaturesImpliedByCompiler() {
+  unsigned answer = 0;
 #ifdef CAN_USE_ARMV7_INSTRUCTIONS
-  answer |= 1u << ARMv7;
-#endif  // def CAN_USE_ARMV7_INSTRUCTIONS
-#ifdef CAN_USE_VFP_INSTRUCTIONS
-  answer |= 1u << VFP3 | 1u << ARMv7;
-#endif  // def CAN_USE_VFP_INSTRUCTIONS
-
-#ifdef __arm__
-  // If the compiler is allowed to use VFP then we can use VFP too in our code
-  // generation even when generating snapshots. ARMv7 and hardware floating
-  // point support implies VFPv3, see ARM DDI 0406B, page A1-6.
-#if defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(__VFP_FP__) \
-    && !defined(__SOFTFP__)
-  answer |= 1u << VFP3 | 1u << ARMv7;
-#endif  // defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(__VFP_FP__)
-        // && !defined(__SOFTFP__)
-#endif  // def __arm__
+  if (FLAG_enable_armv7) answer |= 1u << ARMv7;
+#endif  // CAN_USE_ARMV7_INSTRUCTIONS
+#ifdef CAN_USE_VFP3_INSTRUCTIONS
+  if (FLAG_enable_vfp3) answer |= 1u << VFP3 | 1u << ARMv7;
+#endif  // CAN_USE_VFP3_INSTRUCTIONS
+#ifdef CAN_USE_VFP32DREGS
+  if (FLAG_enable_32dregs) answer |= 1u << VFP32DREGS;
+#endif  // CAN_USE_VFP32DREGS
+#ifdef CAN_USE_NEON
+  if (FLAG_enable_neon) answer |= 1u << NEON;
+#endif  // CAN_USE_VFP32DREGS
+  if ((answer & (1u << ARMv7)) && FLAG_enable_unaligned_accesses) {
+    answer |= 1u << UNALIGNED_ACCESSES;
+  }
 
   return answer;
 }
 
 
-void CpuFeatures::Probe() {
-  unsigned standard_features = (OS::CpuFeaturesImpliedByPlatform() |
-                                CpuFeaturesImpliedByCompiler());
-  ASSERT(supported_ == 0 || supported_ == standard_features);
-#ifdef DEBUG
-  initialized_ = true;
-#endif
+void CpuFeatures::ProbeImpl(bool cross_compile) {
+  supported_ |= CpuFeaturesImpliedByCompiler();
+  cache_line_size_ = 64;
 
-  // Get the features implied by the OS and the compiler settings. This is the
-  // minimal set of features which is also alowed for generated code in the
-  // snapshot.
-  supported_ |= standard_features;
-
-  if (Serializer::enabled()) {
-    // No probing for features if we might serialize (generate snapshot).
-    return;
-  }
+  // Only use statically determined features for cross compile (snapshot).
+  if (cross_compile) return;
 
 #ifndef __arm__
-  // For the simulator=arm build, use VFP when FLAG_enable_vfp3 is
-  // enabled. VFPv3 implies ARMv7, see ARM DDI 0406B, page A1-6.
-  if (FLAG_enable_vfp3) {
-    supported_ |= 1u << VFP3 | 1u << ARMv7;
-  }
-  // For the simulator=arm build, use ARMv7 when FLAG_enable_armv7 is enabled
+  // For the simulator build, use whatever the flags specify.
   if (FLAG_enable_armv7) {
     supported_ |= 1u << ARMv7;
+    if (FLAG_enable_vfp3) supported_ |= 1u << VFP3;
+    if (FLAG_enable_neon) supported_ |= 1u << NEON | 1u << VFP32DREGS;
+    if (FLAG_enable_sudiv) supported_ |= 1u << SUDIV;
+    if (FLAG_enable_movw_movt) supported_ |= 1u << MOVW_MOVT_IMMEDIATE_LOADS;
+    if (FLAG_enable_32dregs) supported_ |= 1u << VFP32DREGS;
   }
-#else  // def __arm__
-  // Probe for additional features not already known to be available.
-  if (!IsSupported(VFP3) && OS::ArmCpuHasFeature(VFP3)) {
+  if (FLAG_enable_mls) supported_ |= 1u << MLS;
+  if (FLAG_enable_unaligned_accesses) supported_ |= 1u << UNALIGNED_ACCESSES;
+
+#else  // __arm__
+  // Probe for additional features at runtime.
+  base::CPU cpu;
+  if (FLAG_enable_vfp3 && cpu.has_vfp3()) {
     // This implementation also sets the VFP flags if runtime
     // detection of VFP returns true. VFPv3 implies ARMv7, see ARM DDI
     // 0406B, page A1-6.
     supported_ |= 1u << VFP3 | 1u << ARMv7;
-    found_by_runtime_probing_ |= 1u << VFP3 | 1u << ARMv7;
   }
 
-  if (!IsSupported(ARMv7) && OS::ArmCpuHasFeature(ARMv7)) {
-    supported_ |= 1u << ARMv7;
-    found_by_runtime_probing_ |= 1u << ARMv7;
+  if (FLAG_enable_neon && cpu.has_neon()) supported_ |= 1u << NEON;
+  if (FLAG_enable_sudiv && cpu.has_idiva()) supported_ |= 1u << SUDIV;
+  if (FLAG_enable_mls && cpu.has_thumb2()) supported_ |= 1u << MLS;
+
+  if (cpu.architecture() >= 7) {
+    if (FLAG_enable_armv7) supported_ |= 1u << ARMv7;
+    if (FLAG_enable_unaligned_accesses) supported_ |= 1u << UNALIGNED_ACCESSES;
+    // Use movw/movt for QUALCOMM ARMv7 cores.
+    if (FLAG_enable_movw_movt && cpu.implementer() == base::CPU::QUALCOMM) {
+      supported_ |= 1u << MOVW_MOVT_IMMEDIATE_LOADS;
+    }
   }
+
+  // ARM Cortex-A9 and Cortex-A5 have 32 byte cachelines.
+  if (cpu.implementer() == base::CPU::ARM &&
+      (cpu.part() == base::CPU::ARM_CORTEX_A5 ||
+       cpu.part() == base::CPU::ARM_CORTEX_A9)) {
+    cache_line_size_ = 32;
+  }
+
+  if (FLAG_enable_32dregs && cpu.has_vfp3_d32()) supported_ |= 1u << VFP32DREGS;
 #endif
+
+  DCHECK(!IsSupported(VFP3) || IsSupported(ARMv7));
+}
+
+
+void CpuFeatures::PrintTarget() {
+  const char* arm_arch = NULL;
+  const char* arm_target_type = "";
+  const char* arm_no_probe = "";
+  const char* arm_fpu = "";
+  const char* arm_thumb = "";
+  const char* arm_float_abi = NULL;
+
+#if !defined __arm__
+  arm_target_type = " simulator";
+#endif
+
+#if defined ARM_TEST_NO_FEATURE_PROBE
+  arm_no_probe = " noprobe";
+#endif
+
+#if defined CAN_USE_ARMV7_INSTRUCTIONS
+  arm_arch = "arm v7";
+#else
+  arm_arch = "arm v6";
+#endif
+
+#if defined CAN_USE_NEON
+  arm_fpu = " neon";
+#elif defined CAN_USE_VFP3_INSTRUCTIONS
+#  if defined CAN_USE_VFP32DREGS
+  arm_fpu = " vfp3";
+#  else
+  arm_fpu = " vfp3-d16";
+#  endif
+#else
+  arm_fpu = " vfp2";
+#endif
+
+#ifdef __arm__
+  arm_float_abi = base::OS::ArmUsingHardFloat() ? "hard" : "softfp";
+#elif USE_EABI_HARDFLOAT
+  arm_float_abi = "hard";
+#else
+  arm_float_abi = "softfp";
+#endif
+
+#if defined __arm__ && (defined __thumb__) || (defined __thumb2__)
+  arm_thumb = " thumb";
+#endif
+
+  printf("target%s%s %s%s%s %s\n",
+         arm_target_type, arm_no_probe, arm_arch, arm_fpu, arm_thumb,
+         arm_float_abi);
+}
+
+
+void CpuFeatures::PrintFeatures() {
+  printf(
+    "ARMv7=%d VFP3=%d VFP32DREGS=%d NEON=%d SUDIV=%d UNALIGNED_ACCESSES=%d "
+    "MOVW_MOVT_IMMEDIATE_LOADS=%d",
+    CpuFeatures::IsSupported(ARMv7),
+    CpuFeatures::IsSupported(VFP3),
+    CpuFeatures::IsSupported(VFP32DREGS),
+    CpuFeatures::IsSupported(NEON),
+    CpuFeatures::IsSupported(SUDIV),
+    CpuFeatures::IsSupported(UNALIGNED_ACCESSES),
+    CpuFeatures::IsSupported(MOVW_MOVT_IMMEDIATE_LOADS));
+#ifdef __arm__
+  bool eabi_hardfloat = base::OS::ArmUsingHardFloat();
+#elif USE_EABI_HARDFLOAT
+  bool eabi_hardfloat = true;
+#else
+  bool eabi_hardfloat = false;
+#endif
+    printf(" USE_EABI_HARDFLOAT=%d\n", eabi_hardfloat);
+}
+
+
+// -----------------------------------------------------------------------------
+// Implementation of DwVfpRegister
+
+const char* DwVfpRegister::AllocationIndexToString(int index) {
+  DCHECK(index >= 0 && index < NumAllocatableRegisters());
+  DCHECK(kScratchDoubleReg.code() - kDoubleRegZero.code() ==
+         kNumReservedRegisters - 1);
+  if (index >= kDoubleRegZero.code()) index += kNumReservedRegisters;
+  return VFPRegisters::Name(index, true);
 }
 
 
@@ -132,10 +223,16 @@
 
 
 bool RelocInfo::IsCodedSpecially() {
-  // The deserializer needs to know whether a pointer is specially coded.  Being
-  // specially coded on ARM means that it is a movw/movt instruction.  We don't
-  // generate those yet.
-  return false;
+  // The deserializer needs to know whether a pointer is specially coded.  Being
+  // specially coded on ARM means that it is a movw/movt instruction, or is an
+  // out of line constant pool entry.  These only occur if
+  // FLAG_enable_ool_constant_pool is true.
+  return FLAG_enable_ool_constant_pool;
+}
+
+
+bool RelocInfo::IsInConstantPool() {
+  return Assembler::is_constant_pool_load(pc_);
 }
 
 
@@ -148,7 +245,7 @@
   }
 
   // Indicate that code has changed.
-  CPU::FlushICache(pc_, instruction_count * Assembler::kInstrSize);
+  CpuFeatures::FlushICache(pc_, instruction_count * Assembler::kInstrSize);
 }
 
 
@@ -165,31 +262,37 @@
 // See assembler-arm-inl.h for inlined constructors
 
 Operand::Operand(Handle<Object> handle) {
+  AllowDeferredHandleDereference using_raw_address;
   rm_ = no_reg;
   // Verify all Objects referred by code are NOT in new space.
   Object* obj = *handle;
-  ASSERT(!HEAP->InNewSpace(obj));
   if (obj->IsHeapObject()) {
+    DCHECK(!HeapObject::cast(obj)->GetHeap()->InNewSpace(obj));
     imm32_ = reinterpret_cast<intptr_t>(handle.location());
     rmode_ = RelocInfo::EMBEDDED_OBJECT;
   } else {
     // no relocation needed
-    imm32_ =  reinterpret_cast<intptr_t>(obj);
-    rmode_ = RelocInfo::NONE;
+    imm32_ = reinterpret_cast<intptr_t>(obj);
+    rmode_ = RelocInfo::NONE32;
   }
 }
 
 
 Operand::Operand(Register rm, ShiftOp shift_op, int shift_imm) {
-  ASSERT(is_uint5(shift_imm));
-  ASSERT(shift_op != ROR || shift_imm != 0);  // use RRX if you mean it
+  DCHECK(is_uint5(shift_imm));
+
   rm_ = rm;
   rs_ = no_reg;
   shift_op_ = shift_op;
   shift_imm_ = shift_imm & 31;
-  if (shift_op == RRX) {
+
+  if ((shift_op == ROR) && (shift_imm == 0)) {
+    // ROR #0 is functionally equivalent to LSL #0 and this allow us to encode
+    // RRX as ROR #0 (See below).
+    shift_op = LSL;
+  } else if (shift_op == RRX) {
     // encoded as ROR with shift_imm == 0
-    ASSERT(shift_imm == 0);
+    DCHECK(shift_imm == 0);
     shift_op_ = ROR;
     shift_imm_ = 0;
   }
@@ -197,7 +300,7 @@
 
 
 Operand::Operand(Register rm, ShiftOp shift_op, Register rs) {
-  ASSERT(shift_op != RRX);
+  DCHECK(shift_op != RRX);
   rm_ = rm;
   rs_ = no_reg;
   shift_op_ = shift_op;
@@ -212,6 +315,7 @@
   am_ = am;
 }
 
+
 MemOperand::MemOperand(Register rn, Register rm, AddrMode am) {
   rn_ = rn;
   rm_ = rm;
@@ -223,7 +327,7 @@
 
 MemOperand::MemOperand(Register rn, Register rm,
                        ShiftOp shift_op, int shift_imm, AddrMode am) {
-  ASSERT(is_uint5(shift_imm));
+  DCHECK(is_uint5(shift_imm));
   rn_ = rn;
   rm_ = rm;
   shift_op_ = shift_op;
@@ -232,13 +336,69 @@
 }
 
 
+NeonMemOperand::NeonMemOperand(Register rn, AddrMode am, int align) {
+  DCHECK((am == Offset) || (am == PostIndex));
+  rn_ = rn;
+  rm_ = (am == Offset) ? pc : sp;
+  SetAlignment(align);
+}
+
+
+NeonMemOperand::NeonMemOperand(Register rn, Register rm, int align) {
+  rn_ = rn;
+  rm_ = rm;
+  SetAlignment(align);
+}
+
+
+void NeonMemOperand::SetAlignment(int align) {
+  switch (align) {
+    case 0:
+      align_ = 0;
+      break;
+    case 64:
+      align_ = 1;
+      break;
+    case 128:
+      align_ = 2;
+      break;
+    case 256:
+      align_ = 3;
+      break;
+    default:
+      UNREACHABLE();
+      align_ = 0;
+      break;
+  }
+}
+
+
+NeonListOperand::NeonListOperand(DoubleRegister base, int registers_count) {
+  base_ = base;
+  switch (registers_count) {
+    case 1:
+      type_ = nlt_1;
+      break;
+    case 2:
+      type_ = nlt_2;
+      break;
+    case 3:
+      type_ = nlt_3;
+      break;
+    case 4:
+      type_ = nlt_4;
+      break;
+    default:
+      UNREACHABLE();
+      type_ = nlt_1;
+      break;
+  }
+}
+
+
 // -----------------------------------------------------------------------------
 // Specific instructions, constants, and masks.
 
-// add(sp, sp, 4) instruction (aka Pop())
-const Instr kPopInstruction =
-    al | PostIndex | 4 | LeaveCC | I | kRegister_sp_Code * B16 |
-        kRegister_sp_Code * B12;
 // str(r, MemOperand(sp, 4, NegPreIndex), al) instruction (aka push(r))
 // register r is not encoded.
 const Instr kPushRegPattern =
@@ -247,11 +407,21 @@
 // register r is not encoded.
 const Instr kPopRegPattern =
     al | B26 | L | 4 | PostIndex | kRegister_sp_Code * B16;
-// mov lr, pc
-const Instr kMovLrPc = al | MOV | kRegister_pc_Code | kRegister_lr_Code * B12;
 // ldr rd, [pc, #offset]
-const Instr kLdrPCMask = kCondMask | 15 * B24 | 7 * B20 | 15 * B16;
-const Instr kLdrPCPattern = al | 5 * B24 | L | kRegister_pc_Code * B16;
+const Instr kLdrPCImmedMask = 15 * B24 | 7 * B20 | 15 * B16;
+const Instr kLdrPCImmedPattern = 5 * B24 | L | kRegister_pc_Code * B16;
+// ldr rd, [pp, #offset]
+const Instr kLdrPpImmedMask = 15 * B24 | 7 * B20 | 15 * B16;
+const Instr kLdrPpImmedPattern = 5 * B24 | L | kRegister_r8_Code * B16;
+// ldr rd, [pp, rn]
+const Instr kLdrPpRegMask = 15 * B24 | 7 * B20 | 15 * B16;
+const Instr kLdrPpRegPattern = 7 * B24 | L | kRegister_r8_Code * B16;
+// vldr dd, [pc, #offset]
+const Instr kVldrDPCMask = 15 * B24 | 3 * B20 | 15 * B16 | 15 * B8;
+const Instr kVldrDPCPattern = 13 * B24 | L | kRegister_pc_Code * B16 | 11 * B8;
+// vldr dd, [pp, #offset]
+const Instr kVldrDPpMask = 15 * B24 | 3 * B20 | 15 * B16 | 15 * B8;
+const Instr kVldrDPpPattern = 13 * B24 | L | kRegister_r8_Code * B16 | 11 * B8;
 // blxcc rm
 const Instr kBlxRegMask =
     15 * B24 | 15 * B20 | 15 * B16 | 15 * B12 | 15 * B8 | 15 * B4;
@@ -263,9 +433,13 @@
 const Instr kMovMvnFlip = B22;
 const Instr kMovLeaveCCMask = 0xdff * B16;
 const Instr kMovLeaveCCPattern = 0x1a0 * B16;
-const Instr kMovwMask = 0xff * B20;
 const Instr kMovwPattern = 0x30 * B20;
+const Instr kMovtPattern = 0x34 * B20;
 const Instr kMovwLeaveCCFlip = 0x5 * B21;
+const Instr kMovImmedMask = 0x7f * B21;
+const Instr kMovImmedPattern = 0x1d * B21;
+const Instr kOrrImmedMask = 0x7f * B21;
+const Instr kOrrImmedPattern = 0x1c * B21;
 const Instr kCmpCmnMask = 0xdd * B20 | 0xf * B12;
 const Instr kCmpCmnPattern = 0x15 * B20;
 const Instr kCmpCmnFlip = B21;
@@ -282,86 +456,50 @@
 const Instr kStrRegFpNegOffsetPattern =
     al | B26 | NegOffset | kRegister_fp_Code * B16;
 const Instr kLdrStrInstrTypeMask = 0xffff0000;
-const Instr kLdrStrInstrArgumentMask = 0x0000ffff;
-const Instr kLdrStrOffsetMask = 0x00000fff;
 
 
-// Spare buffer.
-static const int kMinimalBufferSize = 4*KB;
-
-
-Assembler::Assembler(Isolate* arg_isolate, void* buffer, int buffer_size)
-    : AssemblerBase(arg_isolate),
-      positions_recorder_(this),
-      emit_debug_code_(FLAG_debug_code) {
-  if (buffer == NULL) {
-    // Do our own buffer management.
-    if (buffer_size <= kMinimalBufferSize) {
-      buffer_size = kMinimalBufferSize;
-
-      if (isolate()->assembler_spare_buffer() != NULL) {
-        buffer = isolate()->assembler_spare_buffer();
-        isolate()->set_assembler_spare_buffer(NULL);
-      }
-    }
-    if (buffer == NULL) {
-      buffer_ = NewArray<byte>(buffer_size);
-    } else {
-      buffer_ = static_cast<byte*>(buffer);
-    }
-    buffer_size_ = buffer_size;
-    own_buffer_ = true;
-
-  } else {
-    // Use externally provided buffer instead.
-    ASSERT(buffer_size > 0);
-    buffer_ = static_cast<byte*>(buffer);
-    buffer_size_ = buffer_size;
-    own_buffer_ = false;
-  }
-
-  // Set up buffer pointers.
-  ASSERT(buffer_ != NULL);
-  pc_ = buffer_;
-  reloc_info_writer.Reposition(buffer_ + buffer_size, pc_);
-  num_pending_reloc_info_ = 0;
+Assembler::Assembler(Isolate* isolate, void* buffer, int buffer_size)
+    : AssemblerBase(isolate, buffer, buffer_size),
+      recorded_ast_id_(TypeFeedbackId::None()),
+      constant_pool_builder_(),
+      positions_recorder_(this) {
+  reloc_info_writer.Reposition(buffer_ + buffer_size_, pc_);
+  num_pending_32_bit_reloc_info_ = 0;
+  num_pending_64_bit_reloc_info_ = 0;
   next_buffer_check_ = 0;
   const_pool_blocked_nesting_ = 0;
   no_const_pool_before_ = 0;
-  first_const_pool_use_ = -1;
+  first_const_pool_32_use_ = -1;
+  first_const_pool_64_use_ = -1;
   last_bound_pos_ = 0;
+  constant_pool_available_ = !FLAG_enable_ool_constant_pool;
   ClearRecordedAstId();
 }
 
 
 Assembler::~Assembler() {
-  ASSERT(const_pool_blocked_nesting_ == 0);
-  if (own_buffer_) {
-    if (isolate()->assembler_spare_buffer() == NULL &&
-        buffer_size_ == kMinimalBufferSize) {
-      isolate()->set_assembler_spare_buffer(buffer_);
-    } else {
-      DeleteArray(buffer_);
-    }
-  }
+  DCHECK(const_pool_blocked_nesting_ == 0);
 }
 
 
 void Assembler::GetCode(CodeDesc* desc) {
-  // Emit constant pool if necessary.
-  CheckConstPool(true, false);
-  ASSERT(num_pending_reloc_info_ == 0);
-
+  if (!FLAG_enable_ool_constant_pool) {
+    // Emit constant pool if necessary.
+    CheckConstPool(true, false);
+    DCHECK(num_pending_32_bit_reloc_info_ == 0);
+    DCHECK(num_pending_64_bit_reloc_info_ == 0);
+  }
   // Set up code descriptor.
   desc->buffer = buffer_;
   desc->buffer_size = buffer_size_;
   desc->instr_size = pc_offset();
   desc->reloc_size = (buffer_ + buffer_size_) - reloc_info_writer.pos();
+  desc->origin = this;
 }
 
 
 void Assembler::Align(int m) {
-  ASSERT(m >= 4 && IsPowerOf2(m));
+  DCHECK(m >= 4 && base::bits::IsPowerOfTwo32(m));
   while ((pc_offset() & (m - 1)) != 0) {
     nop();
   }
@@ -385,7 +523,7 @@
 
 
 int Assembler::GetBranchOffset(Instr instr) {
-  ASSERT(IsBranch(instr));
+  DCHECK(IsBranch(instr));
   // Take the jump offset in the lower 24 bits, sign extend it and multiply it
   // with 4 to get the offset in bytes.
   return ((instr & kImm24Mask) << 8) >> 6;
@@ -397,19 +535,33 @@
 }
 
 
+bool Assembler::IsVldrDRegisterImmediate(Instr instr) {
+  return (instr & (15 * B24 | 3 * B20 | 15 * B8)) == (13 * B24 | B20 | 11 * B8);
+}
+
+
 int Assembler::GetLdrRegisterImmediateOffset(Instr instr) {
-  ASSERT(IsLdrRegisterImmediate(instr));
+  DCHECK(IsLdrRegisterImmediate(instr));
   bool positive = (instr & B23) == B23;
   int offset = instr & kOff12Mask;  // Zero extended offset.
   return positive ? offset : -offset;
 }
 
 
+int Assembler::GetVldrDRegisterImmediateOffset(Instr instr) {
+  DCHECK(IsVldrDRegisterImmediate(instr));
+  bool positive = (instr & B23) == B23;
+  int offset = instr & kOff8Mask;  // Zero extended offset.
+  offset <<= 2;
+  return positive ? offset : -offset;
+}
+
+
 Instr Assembler::SetLdrRegisterImmediateOffset(Instr instr, int offset) {
-  ASSERT(IsLdrRegisterImmediate(instr));
+  DCHECK(IsLdrRegisterImmediate(instr));
   bool positive = offset >= 0;
   if (!positive) offset = -offset;
-  ASSERT(is_uint12(offset));
+  DCHECK(is_uint12(offset));
   // Set bit indicating whether the offset should be added.
   instr = (instr & ~B23) | (positive ? B23 : 0);
   // Set the actual offset.
@@ -417,16 +569,29 @@
 }
 
 
+Instr Assembler::SetVldrDRegisterImmediateOffset(Instr instr, int offset) {
+  DCHECK(IsVldrDRegisterImmediate(instr));
+  DCHECK((offset & ~3) == offset);  // Must be 64-bit aligned.
+  bool positive = offset >= 0;
+  if (!positive) offset = -offset;
+  DCHECK(is_uint10(offset));
+  // Set bit indicating whether the offset should be added.
+  instr = (instr & ~B23) | (positive ? B23 : 0);
+  // Set the actual offset. Its bottom 2 bits are zero.
+  return (instr & ~kOff8Mask) | (offset >> 2);
+}
+
+
 bool Assembler::IsStrRegisterImmediate(Instr instr) {
   return (instr & (B27 | B26 | B25 | B22 | B20)) == B26;
 }
 
 
 Instr Assembler::SetStrRegisterImmediateOffset(Instr instr, int offset) {
-  ASSERT(IsStrRegisterImmediate(instr));
+  DCHECK(IsStrRegisterImmediate(instr));
   bool positive = offset >= 0;
   if (!positive) offset = -offset;
-  ASSERT(is_uint12(offset));
+  DCHECK(is_uint12(offset));
   // Set bit indicating whether the offset should be added.
   instr = (instr & ~B23) | (positive ? B23 : 0);
   // Set the actual offset.
@@ -440,9 +605,9 @@
 
 
 Instr Assembler::SetAddRegisterImmediateOffset(Instr instr, int offset) {
-  ASSERT(IsAddRegisterImmediate(instr));
-  ASSERT(offset >= 0);
-  ASSERT(is_uint12(offset));
+  DCHECK(IsAddRegisterImmediate(instr));
+  DCHECK(offset >= 0);
+  DCHECK(is_uint12(offset));
   // Set the offset.
   return (instr & ~kOff12Mask) | offset;
 }
@@ -469,6 +634,24 @@
 }
 
 
+Instr Assembler::GetConsantPoolLoadPattern() {
+  if (FLAG_enable_ool_constant_pool) {
+    return kLdrPpImmedPattern;
+  } else {
+    return kLdrPCImmedPattern;
+  }
+}
+
+
+Instr Assembler::GetConsantPoolLoadMask() {
+  if (FLAG_enable_ool_constant_pool) {
+    return kLdrPpImmedMask;
+  } else {
+    return kLdrPCImmedMask;
+  }
+}
+
+
 bool Assembler::IsPush(Instr instr) {
   return ((instr & ~kRdMask) == kPushRegPattern);
 }
@@ -502,7 +685,52 @@
 bool Assembler::IsLdrPcImmediateOffset(Instr instr) {
   // Check the instruction is indeed a
   // ldr<cond> <Rd>, [pc +/- offset_12].
-  return (instr & (kLdrPCMask & ~kCondMask)) == 0x051f0000;
+  return (instr & kLdrPCImmedMask) == kLdrPCImmedPattern;
+}
+
+
+bool Assembler::IsLdrPpImmediateOffset(Instr instr) {
+  // Check the instruction is indeed a
+  // ldr<cond> <Rd>, [pp +/- offset_12].
+  return (instr & kLdrPpImmedMask) == kLdrPpImmedPattern;
+}
+
+
+bool Assembler::IsLdrPpRegOffset(Instr instr) {
+  // Check the instruction is indeed a
+  // ldr<cond> <Rd>, [pp, +/- <Rm>].
+  return (instr & kLdrPpRegMask) == kLdrPpRegPattern;
+}
+
+
+Instr Assembler::GetLdrPpRegOffsetPattern() { return kLdrPpRegPattern; }
+
+
+bool Assembler::IsVldrDPcImmediateOffset(Instr instr) {
+  // Check the instruction is indeed a
+  // vldr<cond> <Dd>, [pc +/- offset_10].
+  return (instr & kVldrDPCMask) == kVldrDPCPattern;
+}
+
+
+bool Assembler::IsVldrDPpImmediateOffset(Instr instr) {
+  // Check the instruction is indeed a
+  // vldr<cond> <Dd>, [pp +/- offset_10].
+  return (instr & kVldrDPpMask) == kVldrDPpPattern;
+}
+
+
+bool Assembler::IsBlxReg(Instr instr) {
+  // Check the instruction is indeed a
+  // blxcc <Rm>
+  return (instr & kBlxRegMask) == kBlxRegPattern;
+}
+
+
+bool Assembler::IsBlxIp(Instr instr) {
+  // Check the instruction is indeed a
+  // blx ip
+  return instr == kBlxIp;
 }
 
 
@@ -525,16 +753,17 @@
 
 
 Register Assembler::GetCmpImmediateRegister(Instr instr) {
-  ASSERT(IsCmpImmediate(instr));
+  DCHECK(IsCmpImmediate(instr));
   return GetRn(instr);
 }
 
 
 int Assembler::GetCmpImmediateRawImmediate(Instr instr) {
-  ASSERT(IsCmpImmediate(instr));
+  DCHECK(IsCmpImmediate(instr));
   return instr & kOff12Mask;
 }
 
+
 // Labels refer to positions in the (to be) generated code.
 // There are bound, linked, and unused labels.
 //
@@ -544,19 +773,22 @@
 // Linked labels refer to unknown positions in the code
 // to be generated; pos() is the position of the last
 // instruction using the label.
+//
+// The linked labels form a link chain by making the branch offset
+// in the instruction steam to point to the previous branch
+// instruction using the same label.
+//
+// The link chain is terminated by a branch offset pointing to the
+// same position.
 
 
-// The link chain is terminated by a negative code position (must be aligned)
-const int kEndOfChain = -4;
-
-
-int Assembler::target_at(int pos)  {
+int Assembler::target_at(int pos) {
   Instr instr = instr_at(pos);
-  if ((instr & ~kImm24Mask) == 0) {
-    // Emitted label constant, not part of a branch.
-    return instr - (Code::kHeaderSize - kHeapObjectTag);
+  if (is_uint24(instr)) {
+    // Emitted link to a label, not part of a branch.
+    return instr;
   }
-  ASSERT((instr & 7*B25) == 5*B25);  // b, bl, or blx imm24
+  DCHECK((instr & 7*B25) == 5*B25);  // b, bl, or blx imm24
   int imm26 = ((instr & kImm24Mask) << 8) >> 6;
   if ((Instruction::ConditionField(instr) == kSpecialCondition) &&
       ((instr & B24) != 0)) {
@@ -569,25 +801,86 @@
 
 void Assembler::target_at_put(int pos, int target_pos) {
   Instr instr = instr_at(pos);
-  if ((instr & ~kImm24Mask) == 0) {
-    ASSERT(target_pos == kEndOfChain || target_pos >= 0);
-    // Emitted label constant, not part of a branch.
-    // Make label relative to Code* of generated Code object.
-    instr_at_put(pos, target_pos + (Code::kHeaderSize - kHeapObjectTag));
+  if (is_uint24(instr)) {
+    DCHECK(target_pos == pos || target_pos >= 0);
+    // Emitted link to a label, not part of a branch.
+    // Load the position of the label relative to the generated code object
+    // pointer in a register.
+
+    // Here are the instructions we need to emit:
+    //   For ARMv7: target24 => target16_1:target16_0
+    //      movw dst, #target16_0
+    //      movt dst, #target16_1
+    //   For ARMv6: target24 => target8_2:target8_1:target8_0
+    //      mov dst, #target8_0
+    //      orr dst, dst, #target8_1 << 8
+    //      orr dst, dst, #target8_2 << 16
+
+    // We extract the destination register from the emitted nop instruction.
+    Register dst = Register::from_code(
+        Instruction::RmValue(instr_at(pos + kInstrSize)));
+    DCHECK(IsNop(instr_at(pos + kInstrSize), dst.code()));
+    uint32_t target24 = target_pos + (Code::kHeaderSize - kHeapObjectTag);
+    DCHECK(is_uint24(target24));
+    if (is_uint8(target24)) {
+      // If the target fits in a byte then only patch with a mov
+      // instruction.
+      CodePatcher patcher(reinterpret_cast<byte*>(buffer_ + pos),
+                          1,
+                          CodePatcher::DONT_FLUSH);
+      patcher.masm()->mov(dst, Operand(target24));
+    } else {
+      uint16_t target16_0 = target24 & kImm16Mask;
+      uint16_t target16_1 = target24 >> 16;
+      if (CpuFeatures::IsSupported(ARMv7)) {
+        // Patch with movw/movt.
+        if (target16_1 == 0) {
+          CodePatcher patcher(reinterpret_cast<byte*>(buffer_ + pos),
+                              1,
+                              CodePatcher::DONT_FLUSH);
+          patcher.masm()->movw(dst, target16_0);
+        } else {
+          CodePatcher patcher(reinterpret_cast<byte*>(buffer_ + pos),
+                              2,
+                              CodePatcher::DONT_FLUSH);
+          patcher.masm()->movw(dst, target16_0);
+          patcher.masm()->movt(dst, target16_1);
+        }
+      } else {
+        // Patch with a sequence of mov/orr/orr instructions.
+        uint8_t target8_0 = target16_0 & kImm8Mask;
+        uint8_t target8_1 = target16_0 >> 8;
+        uint8_t target8_2 = target16_1 & kImm8Mask;
+        if (target8_2 == 0) {
+          CodePatcher patcher(reinterpret_cast<byte*>(buffer_ + pos),
+                              2,
+                              CodePatcher::DONT_FLUSH);
+          patcher.masm()->mov(dst, Operand(target8_0));
+          patcher.masm()->orr(dst, dst, Operand(target8_1 << 8));
+        } else {
+          CodePatcher patcher(reinterpret_cast<byte*>(buffer_ + pos),
+                              3,
+                              CodePatcher::DONT_FLUSH);
+          patcher.masm()->mov(dst, Operand(target8_0));
+          patcher.masm()->orr(dst, dst, Operand(target8_1 << 8));
+          patcher.masm()->orr(dst, dst, Operand(target8_2 << 16));
+        }
+      }
+    }
     return;
   }
   int imm26 = target_pos - (pos + kPcLoadDelta);
-  ASSERT((instr & 7*B25) == 5*B25);  // b, bl, or blx imm24
+  DCHECK((instr & 7*B25) == 5*B25);  // b, bl, or blx imm24
   if (Instruction::ConditionField(instr) == kSpecialCondition) {
     // blx uses bit 24 to encode bit 2 of imm26
-    ASSERT((imm26 & 1) == 0);
+    DCHECK((imm26 & 1) == 0);
     instr = (instr & ~(B24 | kImm24Mask)) | ((imm26 & 2) >> 1)*B24;
   } else {
-    ASSERT((imm26 & 3) == 0);
+    DCHECK((imm26 & 3) == 0);
     instr &= ~kImm24Mask;
   }
   int imm24 = imm26 >> 2;
-  ASSERT(is_int24(imm24));
+  DCHECK(is_int24(imm24));
   instr_at_put(pos, instr | (imm24 & kImm24Mask));
 }
 
@@ -606,7 +899,7 @@
       if ((instr & ~kImm24Mask) == 0) {
         PrintF("value\n");
       } else {
-        ASSERT((instr & 7*B25) == 5*B25);  // b, bl, or blx
+        DCHECK((instr & 7*B25) == 5*B25);  // b, bl, or blx
         Condition cond = Instruction::ConditionField(instr);
         const char* b;
         const char* c;
@@ -651,7 +944,7 @@
 
 
 void Assembler::bind_to(Label* L, int pos) {
-  ASSERT(0 <= pos && pos <= pc_offset());  // must have a valid binding position
+  DCHECK(0 <= pos && pos <= pc_offset());  // must have a valid binding position
   while (L->is_linked()) {
     int fixup_pos = L->pos();
     next(L);  // call next before overwriting link with target at fixup_pos
@@ -666,51 +959,26 @@
 }
 
 
-void Assembler::link_to(Label* L, Label* appendix) {
-  if (appendix->is_linked()) {
-    if (L->is_linked()) {
-      // Append appendix to L's list.
-      int fixup_pos;
-      int link = L->pos();
-      do {
-        fixup_pos = link;
-        link = target_at(fixup_pos);
-      } while (link > 0);
-      ASSERT(link == kEndOfChain);
-      target_at_put(fixup_pos, appendix->pos());
-    } else {
-      // L is empty, simply use appendix.
-      *L = *appendix;
-    }
-  }
-  appendix->Unuse();  // appendix should not be used anymore
-}
-
-
 void Assembler::bind(Label* L) {
-  ASSERT(!L->is_bound());  // label can only be bound once
+  DCHECK(!L->is_bound());  // label can only be bound once
   bind_to(L, pc_offset());
 }
 
 
 void Assembler::next(Label* L) {
-  ASSERT(L->is_linked());
+  DCHECK(L->is_linked());
   int link = target_at(L->pos());
-  if (link == kEndOfChain) {
+  if (link == L->pos()) {
+    // Branch target points to the same instuction. This is the end of the link
+    // chain.
     L->Unuse();
   } else {
-    ASSERT(link >= 0);
+    DCHECK(link >= 0);
     L->link_to(link);
   }
 }
 
 
-static Instr EncodeMovwImmediate(uint32_t immediate) {
-  ASSERT(immediate < 0x10000);
-  return ((immediate & 0xf000) << 4) | (immediate & 0xfff);
-}
-
-
 // Low-level code emission routines depending on the addressing mode.
 // If this returns true then you have to use the rotate_imm and immed_8
 // that it returns, because it may have already changed the instruction
@@ -739,14 +1007,14 @@
         if (CpuFeatures::IsSupported(ARMv7)) {
           if (imm32 < 0x10000) {
             *instr ^= kMovwLeaveCCFlip;
-            *instr |= EncodeMovwImmediate(imm32);
+            *instr |= Assembler::EncodeMovwImmediate(imm32);
             *rotate_imm = *immed_8 = 0;  // Not used for movw.
             return true;
           }
         }
       }
     } else if ((*instr & kCmpCmnMask) == kCmpCmnPattern) {
-      if (fits_shifter(-imm32, rotate_imm, immed_8, NULL)) {
+      if (fits_shifter(-static_cast<int>(imm32), rotate_imm, immed_8, NULL)) {
         *instr ^= kCmpCmnFlip;
         return true;
       }
@@ -754,7 +1022,7 @@
       Instr alu_insn = (*instr & kALUMask);
       if (alu_insn == ADD ||
           alu_insn == SUB) {
-        if (fits_shifter(-imm32, rotate_imm, immed_8, NULL)) {
+        if (fits_shifter(-static_cast<int>(imm32), rotate_imm, immed_8, NULL)) {
           *instr ^= kAddSubFlip;
           return true;
         }
@@ -775,50 +1043,121 @@
 // if they can be encoded in the ARM's 12 bits of immediate-offset instruction
 // space.  There is no guarantee that the relocated location can be similarly
 // encoded.
-bool Operand::must_use_constant_pool() const {
+bool Operand::must_output_reloc_info(const Assembler* assembler) const {
   if (rmode_ == RelocInfo::EXTERNAL_REFERENCE) {
-#ifdef DEBUG
-    if (!Serializer::enabled()) {
-      Serializer::TooLateToEnableNow();
-    }
-#endif  // def DEBUG
-    return Serializer::enabled();
-  } else if (rmode_ == RelocInfo::NONE) {
+    if (assembler != NULL && assembler->predictable_code_size()) return true;
+    return assembler->serializer_enabled();
+  } else if (RelocInfo::IsNone(rmode_)) {
     return false;
   }
   return true;
 }
 
 
-bool Operand::is_single_instruction(Instr instr) const {
-  if (rm_.is_valid()) return true;
+static bool use_mov_immediate_load(const Operand& x,
+                                   const Assembler* assembler) {
+  if (assembler != NULL && !assembler->is_constant_pool_available()) {
+    return true;
+  } else if (CpuFeatures::IsSupported(MOVW_MOVT_IMMEDIATE_LOADS) &&
+             (assembler == NULL || !assembler->predictable_code_size())) {
+    // Prefer movw / movt to constant pool if it is more efficient on the CPU.
+    return true;
+  } else if (x.must_output_reloc_info(assembler)) {
+    // Prefer constant pool if data is likely to be patched.
+    return false;
+  } else {
+    // Otherwise, use immediate load if movw / movt is available.
+    return CpuFeatures::IsSupported(ARMv7);
+  }
+}
+
+
+int Operand::instructions_required(const Assembler* assembler,
+                                   Instr instr) const {
+  if (rm_.is_valid()) return 1;
   uint32_t dummy1, dummy2;
-  if (must_use_constant_pool() ||
+  if (must_output_reloc_info(assembler) ||
       !fits_shifter(imm32_, &dummy1, &dummy2, &instr)) {
     // The immediate operand cannot be encoded as a shifter operand, or use of
-    // constant pool is required. For a mov instruction not setting the
-    // condition code additional instruction conventions can be used.
-    if ((instr & ~kCondMask) == 13*B21) {  // mov, S not set
-      if (must_use_constant_pool() ||
-          !CpuFeatures::IsSupported(ARMv7)) {
-        // mov instruction will be an ldr from constant pool (one instruction).
-        return true;
-      } else {
-        // mov instruction will be a mov or movw followed by movt (two
-        // instructions).
-        return false;
-      }
+    // constant pool is required.  First account for the instructions required
+    // for the constant pool or immediate load
+    int instructions;
+    if (use_mov_immediate_load(*this, assembler)) {
+      // A movw / movt or mov / orr immediate load.
+      instructions = CpuFeatures::IsSupported(ARMv7) ? 2 : 4;
+    } else if (assembler != NULL && assembler->use_extended_constant_pool()) {
+      // An extended constant pool load.
+      instructions = CpuFeatures::IsSupported(ARMv7) ? 3 : 5;
     } else {
-      // If this is not a mov or mvn instruction there will always an additional
-      // instructions - either mov or ldr. The mov might actually be two
-      // instructions mov or movw followed by movt so including the actual
-      // instruction two or three instructions will be generated.
-      return false;
+      // A small constant pool load.
+      instructions = 1;
     }
+
+    if ((instr & ~kCondMask) != 13 * B21) {  // mov, S not set
+      // For a mov or mvn instruction which doesn't set the condition
+      // code, the constant pool or immediate load is enough, otherwise we need
+      // to account for the actual instruction being requested.
+      instructions += 1;
+    }
+    return instructions;
   } else {
     // No use of constant pool and the immediate operand can be encoded as a
     // shifter operand.
-    return true;
+    return 1;
+  }
+}
+
+
+void Assembler::move_32_bit_immediate(Register rd,
+                                      const Operand& x,
+                                      Condition cond) {
+  RelocInfo rinfo(pc_, x.rmode_, x.imm32_, NULL);
+  uint32_t imm32 = static_cast<uint32_t>(x.imm32_);
+  if (x.must_output_reloc_info(this)) {
+    RecordRelocInfo(rinfo);
+  }
+
+  if (use_mov_immediate_load(x, this)) {
+    Register target = rd.code() == pc.code() ? ip : rd;
+    if (CpuFeatures::IsSupported(ARMv7)) {
+      if (!FLAG_enable_ool_constant_pool && x.must_output_reloc_info(this)) {
+        // Make sure the movw/movt doesn't get separated.
+        BlockConstPoolFor(2);
+      }
+      movw(target, imm32 & 0xffff, cond);
+      movt(target, imm32 >> 16, cond);
+    } else {
+      DCHECK(FLAG_enable_ool_constant_pool);
+      mov(target, Operand(imm32 & kImm8Mask), LeaveCC, cond);
+      orr(target, target, Operand(imm32 & (kImm8Mask << 8)), LeaveCC, cond);
+      orr(target, target, Operand(imm32 & (kImm8Mask << 16)), LeaveCC, cond);
+      orr(target, target, Operand(imm32 & (kImm8Mask << 24)), LeaveCC, cond);
+    }
+    if (target.code() != rd.code()) {
+      mov(rd, target, LeaveCC, cond);
+    }
+  } else {
+    DCHECK(is_constant_pool_available());
+    ConstantPoolArray::LayoutSection section = ConstantPoolAddEntry(rinfo);
+    if (section == ConstantPoolArray::EXTENDED_SECTION) {
+      DCHECK(FLAG_enable_ool_constant_pool);
+      Register target = rd.code() == pc.code() ? ip : rd;
+      // Emit instructions to load constant pool offset.
+      if (CpuFeatures::IsSupported(ARMv7)) {
+        movw(target, 0, cond);
+        movt(target, 0, cond);
+      } else {
+        mov(target, Operand(0), LeaveCC, cond);
+        orr(target, target, Operand(0), LeaveCC, cond);
+        orr(target, target, Operand(0), LeaveCC, cond);
+        orr(target, target, Operand(0), LeaveCC, cond);
+      }
+      // Load from constant pool at offset.
+      ldr(rd, MemOperand(pp, target), cond);
+    } else {
+      DCHECK(section == ConstantPoolArray::SMALL_SECTION);
+      ldr(rd, MemOperand(FLAG_enable_ool_constant_pool ? pp : pc, 0), cond);
+    }
   }
 }
 
@@ -828,12 +1167,12 @@
                          Register rd,
                          const Operand& x) {
   CheckBuffer();
-  ASSERT((instr & ~(kCondMask | kOpCodeMask | S)) == 0);
+  DCHECK((instr & ~(kCondMask | kOpCodeMask | S)) == 0);
   if (!x.rm_.is_valid()) {
     // Immediate.
     uint32_t rotate_imm;
     uint32_t immed_8;
-    if (x.must_use_constant_pool() ||
+    if (x.must_output_reloc_info(this) ||
         !fits_shifter(x.imm32_, &rotate_imm, &immed_8, &instr)) {
       // The immediate operand cannot be encoded as a shifter operand, so load
       // it first to register ip and change the original instruction to use ip.
@@ -842,25 +1181,9 @@
       CHECK(!rn.is(ip));  // rn should never be ip, or will be trashed
       Condition cond = Instruction::ConditionField(instr);
       if ((instr & ~kCondMask) == 13*B21) {  // mov, S not set
-        if (x.must_use_constant_pool() ||
-            !CpuFeatures::IsSupported(ARMv7)) {
-          RecordRelocInfo(x.rmode_, x.imm32_);
-          ldr(rd, MemOperand(pc, 0), cond);
-        } else {
-          // Will probably use movw, will certainly not use constant pool.
-          mov(rd, Operand(x.imm32_ & 0xffff), LeaveCC, cond);
-          movt(rd, static_cast<uint32_t>(x.imm32_) >> 16, cond);
-        }
+        move_32_bit_immediate(rd, x, cond);
       } else {
-        // If this is not a mov or mvn instruction we may still be able to avoid
-        // a constant pool entry by using mvn or movw.
-        if (!x.must_use_constant_pool() &&
-            (instr & kMovMvnMask) != kMovMvnPattern) {
-          mov(ip, x, LeaveCC, cond);
-        } else {
-          RecordRelocInfo(x.rmode_, x.imm32_);
-          ldr(ip, MemOperand(pc, 0), cond);
-        }
+        mov(ip, x, LeaveCC, cond);
         addrmod1(instr, rn, rd, Operand(ip));
       }
       return;
@@ -871,7 +1194,7 @@
     instr |= x.shift_imm_*B7 | x.shift_op_ | x.rm_.code();
   } else {
     // Register shift.
-    ASSERT(!rn.is(pc) && !rd.is(pc) && !x.rm_.is(pc) && !x.rs_.is(pc));
+    DCHECK(!rn.is(pc) && !rd.is(pc) && !x.rm_.is(pc) && !x.rs_.is(pc));
     instr |= x.rs_.code()*B8 | x.shift_op_ | B4 | x.rm_.code();
   }
   emit(instr | rn.code()*B16 | rd.code()*B12);
@@ -883,7 +1206,7 @@
 
 
 void Assembler::addrmod2(Instr instr, Register rd, const MemOperand& x) {
-  ASSERT((instr & ~(kCondMask | B | L)) == B26);
+  DCHECK((instr & ~(kCondMask | B | L)) == B26);
   int am = x.am_;
   if (!x.rm_.is_valid()) {
     // Immediate offset.
@@ -895,28 +1218,28 @@
     if (!is_uint12(offset_12)) {
       // Immediate offset cannot be encoded, load it first to register ip
       // rn (and rd in a load) should never be ip, or will be trashed.
-      ASSERT(!x.rn_.is(ip) && ((instr & L) == L || !rd.is(ip)));
+      DCHECK(!x.rn_.is(ip) && ((instr & L) == L || !rd.is(ip)));
       mov(ip, Operand(x.offset_), LeaveCC, Instruction::ConditionField(instr));
       addrmod2(instr, rd, MemOperand(x.rn_, ip, x.am_));
       return;
     }
-    ASSERT(offset_12 >= 0);  // no masking needed
+    DCHECK(offset_12 >= 0);  // no masking needed
     instr |= offset_12;
   } else {
     // Register offset (shift_imm_ and shift_op_ are 0) or scaled
     // register offset the constructors make sure than both shift_imm_
     // and shift_op_ are initialized.
-    ASSERT(!x.rm_.is(pc));
+    DCHECK(!x.rm_.is(pc));
     instr |= B25 | x.shift_imm_*B7 | x.shift_op_ | x.rm_.code();
   }
-  ASSERT((am & (P|W)) == P || !x.rn_.is(pc));  // no pc base with writeback
+  DCHECK((am & (P|W)) == P || !x.rn_.is(pc));  // no pc base with writeback
   emit(instr | am | x.rn_.code()*B16 | rd.code()*B12);
 }
 
 
 void Assembler::addrmod3(Instr instr, Register rd, const MemOperand& x) {
-  ASSERT((instr & ~(kCondMask | L | S6 | H)) == (B4 | B7));
-  ASSERT(x.rn_.is_valid());
+  DCHECK((instr & ~(kCondMask | L | S6 | H)) == (B4 | B7));
+  DCHECK(x.rn_.is_valid());
   int am = x.am_;
   if (!x.rm_.is_valid()) {
     // Immediate offset.
@@ -928,60 +1251,60 @@
     if (!is_uint8(offset_8)) {
       // Immediate offset cannot be encoded, load it first to register ip
       // rn (and rd in a load) should never be ip, or will be trashed.
-      ASSERT(!x.rn_.is(ip) && ((instr & L) == L || !rd.is(ip)));
+      DCHECK(!x.rn_.is(ip) && ((instr & L) == L || !rd.is(ip)));
       mov(ip, Operand(x.offset_), LeaveCC, Instruction::ConditionField(instr));
       addrmod3(instr, rd, MemOperand(x.rn_, ip, x.am_));
       return;
     }
-    ASSERT(offset_8 >= 0);  // no masking needed
+    DCHECK(offset_8 >= 0);  // no masking needed
     instr |= B | (offset_8 >> 4)*B8 | (offset_8 & 0xf);
   } else if (x.shift_imm_ != 0) {
     // Scaled register offset not supported, load index first
     // rn (and rd in a load) should never be ip, or will be trashed.
-    ASSERT(!x.rn_.is(ip) && ((instr & L) == L || !rd.is(ip)));
+    DCHECK(!x.rn_.is(ip) && ((instr & L) == L || !rd.is(ip)));
     mov(ip, Operand(x.rm_, x.shift_op_, x.shift_imm_), LeaveCC,
         Instruction::ConditionField(instr));
     addrmod3(instr, rd, MemOperand(x.rn_, ip, x.am_));
     return;
   } else {
     // Register offset.
-    ASSERT((am & (P|W)) == P || !x.rm_.is(pc));  // no pc index with writeback
+    DCHECK((am & (P|W)) == P || !x.rm_.is(pc));  // no pc index with writeback
     instr |= x.rm_.code();
   }
-  ASSERT((am & (P|W)) == P || !x.rn_.is(pc));  // no pc base with writeback
+  DCHECK((am & (P|W)) == P || !x.rn_.is(pc));  // no pc base with writeback
   emit(instr | am | x.rn_.code()*B16 | rd.code()*B12);
 }
 
 
 void Assembler::addrmod4(Instr instr, Register rn, RegList rl) {
-  ASSERT((instr & ~(kCondMask | P | U | W | L)) == B27);
-  ASSERT(rl != 0);
-  ASSERT(!rn.is(pc));
+  DCHECK((instr & ~(kCondMask | P | U | W | L)) == B27);
+  DCHECK(rl != 0);
+  DCHECK(!rn.is(pc));
   emit(instr | rn.code()*B16 | rl);
 }
 
 
 void Assembler::addrmod5(Instr instr, CRegister crd, const MemOperand& x) {
   // Unindexed addressing is not encoded by this function.
-  ASSERT_EQ((B27 | B26),
+  DCHECK_EQ((B27 | B26),
             (instr & ~(kCondMask | kCoprocessorMask | P | U | N | W | L)));
-  ASSERT(x.rn_.is_valid() && !x.rm_.is_valid());
+  DCHECK(x.rn_.is_valid() && !x.rm_.is_valid());
   int am = x.am_;
   int offset_8 = x.offset_;
-  ASSERT((offset_8 & 3) == 0);  // offset must be an aligned word offset
+  DCHECK((offset_8 & 3) == 0);  // offset must be an aligned word offset
   offset_8 >>= 2;
   if (offset_8 < 0) {
     offset_8 = -offset_8;
     am ^= U;
   }
-  ASSERT(is_uint8(offset_8));  // unsigned word offset must fit in a byte
-  ASSERT((am & (P|W)) == P || !x.rn_.is(pc));  // no pc base with writeback
+  DCHECK(is_uint8(offset_8));  // unsigned word offset must fit in a byte
+  DCHECK((am & (P|W)) == P || !x.rn_.is(pc));  // no pc base with writeback
 
   // Post-indexed addressing requires W == 1; different than in addrmod2/3.
   if ((am & P) == 0)
     am |= W;
 
-  ASSERT(offset_8 >= 0);  // no masking needed
+  DCHECK(offset_8 >= 0);  // no masking needed
   emit(instr | am | x.rn_.code()*B16 | crd.code()*B12 | offset_8);
 }
 
@@ -992,9 +1315,11 @@
     target_pos = L->pos();
   } else {
     if (L->is_linked()) {
-      target_pos = L->pos();  // L's link
+      // Point to previous instruction that uses the link.
+      target_pos = L->pos();
     } else {
-      target_pos = kEndOfChain;
+      // First entry of the link chain points to itself.
+      target_pos = pc_offset();
     }
     L->link_to(pc_offset());
   }
@@ -1006,27 +1331,11 @@
 }
 
 
-void Assembler::label_at_put(Label* L, int at_offset) {
-  int target_pos;
-  if (L->is_bound()) {
-    target_pos = L->pos();
-  } else {
-    if (L->is_linked()) {
-      target_pos = L->pos();  // L's link
-    } else {
-      target_pos = kEndOfChain;
-    }
-    L->link_to(at_offset);
-    instr_at_put(at_offset, target_pos + (Code::kHeaderSize - kHeapObjectTag));
-  }
-}
-
-
 // Branch instructions.
 void Assembler::b(int branch_offset, Condition cond) {
-  ASSERT((branch_offset & 3) == 0);
+  DCHECK((branch_offset & 3) == 0);
   int imm24 = branch_offset >> 2;
-  ASSERT(is_int24(imm24));
+  DCHECK(is_int24(imm24));
   emit(cond | B27 | B25 | (imm24 & kImm24Mask));
 
   if (cond == al) {
@@ -1038,33 +1347,33 @@
 
 void Assembler::bl(int branch_offset, Condition cond) {
   positions_recorder()->WriteRecordedPositions();
-  ASSERT((branch_offset & 3) == 0);
+  DCHECK((branch_offset & 3) == 0);
   int imm24 = branch_offset >> 2;
-  ASSERT(is_int24(imm24));
+  DCHECK(is_int24(imm24));
   emit(cond | B27 | B25 | B24 | (imm24 & kImm24Mask));
 }
 
 
 void Assembler::blx(int branch_offset) {  // v5 and above
   positions_recorder()->WriteRecordedPositions();
-  ASSERT((branch_offset & 1) == 0);
+  DCHECK((branch_offset & 1) == 0);
   int h = ((branch_offset & 2) >> 1)*B24;
   int imm24 = branch_offset >> 2;
-  ASSERT(is_int24(imm24));
+  DCHECK(is_int24(imm24));
   emit(kSpecialCondition | B27 | B25 | h | (imm24 & kImm24Mask));
 }
 
 
 void Assembler::blx(Register target, Condition cond) {  // v5 and above
   positions_recorder()->WriteRecordedPositions();
-  ASSERT(!target.is(pc));
+  DCHECK(!target.is(pc));
   emit(cond | B24 | B21 | 15*B16 | 15*B12 | 15*B8 | BLX | target.code());
 }
 
 
 void Assembler::bx(Register target, Condition cond) {  // v5 and above, plus v4t
   positions_recorder()->WriteRecordedPositions();
-  ASSERT(!target.is(pc));  // use of pc is actually allowed, but discouraged
+  DCHECK(!target.is(pc));  // use of pc is actually allowed, but discouraged
   emit(cond | B24 | B21 | 15*B16 | 15*B12 | 15*B8 | BX | target.code());
 }
 
@@ -1136,7 +1445,7 @@
 
 void Assembler::cmp_raw_immediate(
     Register src, int raw_immediate, Condition cond) {
-  ASSERT(is_uint12(raw_immediate));
+  DCHECK(is_uint12(raw_immediate));
   emit(cond | I | CMP | S | src.code() << 16 | raw_immediate);
 }
 
@@ -1159,18 +1468,58 @@
   // Don't allow nop instructions in the form mov rn, rn to be generated using
   // the mov instruction. They must be generated using nop(int/NopMarkerTypes)
   // or MarkCode(int/NopMarkerTypes) pseudo instructions.
-  ASSERT(!(src.is_reg() && src.rm().is(dst) && s == LeaveCC && cond == al));
+  DCHECK(!(src.is_reg() && src.rm().is(dst) && s == LeaveCC && cond == al));
   addrmod1(cond | MOV | s, r0, dst, src);
 }
 
 
+void Assembler::mov_label_offset(Register dst, Label* label) {
+  if (label->is_bound()) {
+    mov(dst, Operand(label->pos() + (Code::kHeaderSize - kHeapObjectTag)));
+  } else {
+    // Emit the link to the label in the code stream followed by extra nop
+    // instructions.
+    // If the label is not linked, then start a new link chain by linking it to
+    // itself, emitting pc_offset().
+    int link = label->is_linked() ? label->pos() : pc_offset();
+    label->link_to(pc_offset());
+
+    // When the label is bound, these instructions will be patched with a
+    // sequence of movw/movt or mov/orr/orr instructions. They will load the
+    // destination register with the position of the label from the beginning
+    // of the code.
+    //
+    // The link will be extracted from the first instruction and the destination
+    // register from the second.
+    //   For ARMv7:
+    //      link
+    //      mov dst, dst
+    //   For ARMv6:
+    //      link
+    //      mov dst, dst
+    //      mov dst, dst
+    //
+    // When the label gets bound: target_at extracts the link and target_at_put
+    // patches the instructions.
+    DCHECK(is_uint24(link));
+    BlockConstPoolScope block_const_pool(this);
+    emit(link);
+    nop(dst.code());
+    if (!CpuFeatures::IsSupported(ARMv7)) {
+      nop(dst.code());
+    }
+  }
+}
+
+
 void Assembler::movw(Register reg, uint32_t immediate, Condition cond) {
-  ASSERT(immediate < 0x10000);
-  mov(reg, Operand(immediate), LeaveCC, cond);
+  DCHECK(CpuFeatures::IsSupported(ARMv7));
+  emit(cond | 0x30*B20 | reg.code()*B12 | EncodeMovwImmediate(immediate));
 }
 
 
 void Assembler::movt(Register reg, uint32_t immediate, Condition cond) {
+  DCHECK(CpuFeatures::IsSupported(ARMv7));
   emit(cond | 0x34*B20 | reg.code()*B12 | EncodeMovwImmediate(immediate));
 }
 
@@ -1189,15 +1538,42 @@
 // Multiply instructions.
 void Assembler::mla(Register dst, Register src1, Register src2, Register srcA,
                     SBit s, Condition cond) {
-  ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc) && !srcA.is(pc));
+  DCHECK(!dst.is(pc) && !src1.is(pc) && !src2.is(pc) && !srcA.is(pc));
   emit(cond | A | s | dst.code()*B16 | srcA.code()*B12 |
        src2.code()*B8 | B7 | B4 | src1.code());
 }
 
 
+void Assembler::mls(Register dst, Register src1, Register src2, Register srcA,
+                    Condition cond) {
+  DCHECK(!dst.is(pc) && !src1.is(pc) && !src2.is(pc) && !srcA.is(pc));
+  DCHECK(IsEnabled(MLS));
+  emit(cond | B22 | B21 | dst.code()*B16 | srcA.code()*B12 |
+       src2.code()*B8 | B7 | B4 | src1.code());
+}
+
+
+void Assembler::sdiv(Register dst, Register src1, Register src2,
+                     Condition cond) {
+  DCHECK(!dst.is(pc) && !src1.is(pc) && !src2.is(pc));
+  DCHECK(IsEnabled(SUDIV));
+  emit(cond | B26 | B25| B24 | B20 | dst.code()*B16 | 0xf * B12 |
+       src2.code()*B8 | B4 | src1.code());
+}
+
+
+void Assembler::udiv(Register dst, Register src1, Register src2,
+                     Condition cond) {
+  DCHECK(!dst.is(pc) && !src1.is(pc) && !src2.is(pc));
+  DCHECK(IsEnabled(SUDIV));
+  emit(cond | B26 | B25 | B24 | B21 | B20 | dst.code() * B16 | 0xf * B12 |
+       src2.code() * B8 | B4 | src1.code());
+}
+
+
 void Assembler::mul(Register dst, Register src1, Register src2,
                     SBit s, Condition cond) {
-  ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc));
+  DCHECK(!dst.is(pc) && !src1.is(pc) && !src2.is(pc));
   // dst goes in bits 16-19 for this instruction!
   emit(cond | s | dst.code()*B16 | src2.code()*B8 | B7 | B4 | src1.code());
 }
@@ -1209,8 +1585,8 @@
                       Register src2,
                       SBit s,
                       Condition cond) {
-  ASSERT(!dstL.is(pc) && !dstH.is(pc) && !src1.is(pc) && !src2.is(pc));
-  ASSERT(!dstL.is(dstH));
+  DCHECK(!dstL.is(pc) && !dstH.is(pc) && !src1.is(pc) && !src2.is(pc));
+  DCHECK(!dstL.is(dstH));
   emit(cond | B23 | B22 | A | s | dstH.code()*B16 | dstL.code()*B12 |
        src2.code()*B8 | B7 | B4 | src1.code());
 }
@@ -1222,8 +1598,8 @@
                       Register src2,
                       SBit s,
                       Condition cond) {
-  ASSERT(!dstL.is(pc) && !dstH.is(pc) && !src1.is(pc) && !src2.is(pc));
-  ASSERT(!dstL.is(dstH));
+  DCHECK(!dstL.is(pc) && !dstH.is(pc) && !src1.is(pc) && !src2.is(pc));
+  DCHECK(!dstL.is(dstH));
   emit(cond | B23 | B22 | s | dstH.code()*B16 | dstL.code()*B12 |
        src2.code()*B8 | B7 | B4 | src1.code());
 }
@@ -1235,8 +1611,8 @@
                       Register src2,
                       SBit s,
                       Condition cond) {
-  ASSERT(!dstL.is(pc) && !dstH.is(pc) && !src1.is(pc) && !src2.is(pc));
-  ASSERT(!dstL.is(dstH));
+  DCHECK(!dstL.is(pc) && !dstH.is(pc) && !src1.is(pc) && !src2.is(pc));
+  DCHECK(!dstL.is(dstH));
   emit(cond | B23 | A | s | dstH.code()*B16 | dstL.code()*B12 |
        src2.code()*B8 | B7 | B4 | src1.code());
 }
@@ -1248,8 +1624,8 @@
                       Register src2,
                       SBit s,
                       Condition cond) {
-  ASSERT(!dstL.is(pc) && !dstH.is(pc) && !src1.is(pc) && !src2.is(pc));
-  ASSERT(!dstL.is(dstH));
+  DCHECK(!dstL.is(pc) && !dstH.is(pc) && !src1.is(pc) && !src2.is(pc));
+  DCHECK(!dstL.is(dstH));
   emit(cond | B23 | s | dstH.code()*B16 | dstL.code()*B12 |
        src2.code()*B8 | B7 | B4 | src1.code());
 }
@@ -1258,7 +1634,7 @@
 // Miscellaneous arithmetic instructions.
 void Assembler::clz(Register dst, Register src, Condition cond) {
   // v5 and above.
-  ASSERT(!dst.is(pc) && !src.is(pc));
+  DCHECK(!dst.is(pc) && !src.is(pc));
   emit(cond | B24 | B22 | B21 | 15*B16 | dst.code()*B12 |
        15*B8 | CLZ | src.code());
 }
@@ -1272,11 +1648,11 @@
                      const Operand& src,
                      Condition cond) {
   // v6 and above.
-  ASSERT(CpuFeatures::IsSupported(ARMv7));
-  ASSERT(!dst.is(pc) && !src.rm_.is(pc));
-  ASSERT((satpos >= 0) && (satpos <= 31));
-  ASSERT((src.shift_op_ == ASR) || (src.shift_op_ == LSL));
-  ASSERT(src.rs_.is(no_reg));
+  DCHECK(CpuFeatures::IsSupported(ARMv7));
+  DCHECK(!dst.is(pc) && !src.rm_.is(pc));
+  DCHECK((satpos >= 0) && (satpos <= 31));
+  DCHECK((src.shift_op_ == ASR) || (src.shift_op_ == LSL));
+  DCHECK(src.rs_.is(no_reg));
 
   int sh = 0;
   if (src.shift_op_ == ASR) {
@@ -1300,10 +1676,10 @@
                      int width,
                      Condition cond) {
   // v7 and above.
-  ASSERT(CpuFeatures::IsSupported(ARMv7));
-  ASSERT(!dst.is(pc) && !src.is(pc));
-  ASSERT((lsb >= 0) && (lsb <= 31));
-  ASSERT((width >= 1) && (width <= (32 - lsb)));
+  DCHECK(CpuFeatures::IsSupported(ARMv7));
+  DCHECK(!dst.is(pc) && !src.is(pc));
+  DCHECK((lsb >= 0) && (lsb <= 31));
+  DCHECK((width >= 1) && (width <= (32 - lsb)));
   emit(cond | 0xf*B23 | B22 | B21 | (width - 1)*B16 | dst.code()*B12 |
        lsb*B7 | B6 | B4 | src.code());
 }
@@ -1320,10 +1696,10 @@
                      int width,
                      Condition cond) {
   // v7 and above.
-  ASSERT(CpuFeatures::IsSupported(ARMv7));
-  ASSERT(!dst.is(pc) && !src.is(pc));
-  ASSERT((lsb >= 0) && (lsb <= 31));
-  ASSERT((width >= 1) && (width <= (32 - lsb)));
+  DCHECK(CpuFeatures::IsSupported(ARMv7));
+  DCHECK(!dst.is(pc) && !src.is(pc));
+  DCHECK((lsb >= 0) && (lsb <= 31));
+  DCHECK((width >= 1) && (width <= (32 - lsb)));
   emit(cond | 0xf*B23 | B21 | (width - 1)*B16 | dst.code()*B12 |
        lsb*B7 | B6 | B4 | src.code());
 }
@@ -1335,10 +1711,10 @@
 //   bfc dst, #lsb, #width
 void Assembler::bfc(Register dst, int lsb, int width, Condition cond) {
   // v7 and above.
-  ASSERT(CpuFeatures::IsSupported(ARMv7));
-  ASSERT(!dst.is(pc));
-  ASSERT((lsb >= 0) && (lsb <= 31));
-  ASSERT((width >= 1) && (width <= (32 - lsb)));
+  DCHECK(CpuFeatures::IsSupported(ARMv7));
+  DCHECK(!dst.is(pc));
+  DCHECK((lsb >= 0) && (lsb <= 31));
+  DCHECK((width >= 1) && (width <= (32 - lsb)));
   int msb = lsb + width - 1;
   emit(cond | 0x1f*B22 | msb*B16 | dst.code()*B12 | lsb*B7 | B4 | 0xf);
 }
@@ -1354,42 +1730,148 @@
                     int width,
                     Condition cond) {
   // v7 and above.
-  ASSERT(CpuFeatures::IsSupported(ARMv7));
-  ASSERT(!dst.is(pc) && !src.is(pc));
-  ASSERT((lsb >= 0) && (lsb <= 31));
-  ASSERT((width >= 1) && (width <= (32 - lsb)));
+  DCHECK(CpuFeatures::IsSupported(ARMv7));
+  DCHECK(!dst.is(pc) && !src.is(pc));
+  DCHECK((lsb >= 0) && (lsb <= 31));
+  DCHECK((width >= 1) && (width <= (32 - lsb)));
   int msb = lsb + width - 1;
   emit(cond | 0x1f*B22 | msb*B16 | dst.code()*B12 | lsb*B7 | B4 |
        src.code());
 }
 
 
+void Assembler::pkhbt(Register dst,
+                      Register src1,
+                      const Operand& src2,
+                      Condition cond ) {
+  // Instruction details available in ARM DDI 0406C.b, A8.8.125.
+  // cond(31-28) | 01101000(27-20) | Rn(19-16) |
+  // Rd(15-12) | imm5(11-7) | 0(6) | 01(5-4) | Rm(3-0)
+  DCHECK(!dst.is(pc));
+  DCHECK(!src1.is(pc));
+  DCHECK(!src2.rm().is(pc));
+  DCHECK(!src2.rm().is(no_reg));
+  DCHECK(src2.rs().is(no_reg));
+  DCHECK((src2.shift_imm_ >= 0) && (src2.shift_imm_ <= 31));
+  DCHECK(src2.shift_op() == LSL);
+  emit(cond | 0x68*B20 | src1.code()*B16 | dst.code()*B12 |
+       src2.shift_imm_*B7 | B4 | src2.rm().code());
+}
+
+
+void Assembler::pkhtb(Register dst,
+                      Register src1,
+                      const Operand& src2,
+                      Condition cond) {
+  // Instruction details available in ARM DDI 0406C.b, A8.8.125.
+  // cond(31-28) | 01101000(27-20) | Rn(19-16) |
+  // Rd(15-12) | imm5(11-7) | 1(6) | 01(5-4) | Rm(3-0)
+  DCHECK(!dst.is(pc));
+  DCHECK(!src1.is(pc));
+  DCHECK(!src2.rm().is(pc));
+  DCHECK(!src2.rm().is(no_reg));
+  DCHECK(src2.rs().is(no_reg));
+  DCHECK((src2.shift_imm_ >= 1) && (src2.shift_imm_ <= 32));
+  DCHECK(src2.shift_op() == ASR);
+  int asr = (src2.shift_imm_ == 32) ? 0 : src2.shift_imm_;
+  emit(cond | 0x68*B20 | src1.code()*B16 | dst.code()*B12 |
+       asr*B7 | B6 | B4 | src2.rm().code());
+}
+
+
+void Assembler::uxtb(Register dst,
+                     const Operand& src,
+                     Condition cond) {
+  // Instruction details available in ARM DDI 0406C.b, A8.8.274.
+  // cond(31-28) | 01101110(27-20) | 1111(19-16) |
+  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
+  DCHECK(!dst.is(pc));
+  DCHECK(!src.rm().is(pc));
+  DCHECK(!src.rm().is(no_reg));
+  DCHECK(src.rs().is(no_reg));
+  DCHECK((src.shift_imm_ == 0) ||
+         (src.shift_imm_ == 8) ||
+         (src.shift_imm_ == 16) ||
+         (src.shift_imm_ == 24));
+  // Operand maps ROR #0 to LSL #0.
+  DCHECK((src.shift_op() == ROR) ||
+         ((src.shift_op() == LSL) && (src.shift_imm_ == 0)));
+  emit(cond | 0x6E*B20 | 0xF*B16 | dst.code()*B12 |
+       ((src.shift_imm_ >> 1)&0xC)*B8 | 7*B4 | src.rm().code());
+}
+
+
+void Assembler::uxtab(Register dst,
+                      Register src1,
+                      const Operand& src2,
+                      Condition cond) {
+  // Instruction details available in ARM DDI 0406C.b, A8.8.271.
+  // cond(31-28) | 01101110(27-20) | Rn(19-16) |
+  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
+  DCHECK(!dst.is(pc));
+  DCHECK(!src1.is(pc));
+  DCHECK(!src2.rm().is(pc));
+  DCHECK(!src2.rm().is(no_reg));
+  DCHECK(src2.rs().is(no_reg));
+  DCHECK((src2.shift_imm_ == 0) ||
+         (src2.shift_imm_ == 8) ||
+         (src2.shift_imm_ == 16) ||
+         (src2.shift_imm_ == 24));
+  // Operand maps ROR #0 to LSL #0.
+  DCHECK((src2.shift_op() == ROR) ||
+         ((src2.shift_op() == LSL) && (src2.shift_imm_ == 0)));
+  emit(cond | 0x6E*B20 | src1.code()*B16 | dst.code()*B12 |
+       ((src2.shift_imm_ >> 1) &0xC)*B8 | 7*B4 | src2.rm().code());
+}
+
+
+void Assembler::uxtb16(Register dst,
+                       const Operand& src,
+                       Condition cond) {
+  // Instruction details available in ARM DDI 0406C.b, A8.8.275.
+  // cond(31-28) | 01101100(27-20) | 1111(19-16) |
+  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
+  DCHECK(!dst.is(pc));
+  DCHECK(!src.rm().is(pc));
+  DCHECK(!src.rm().is(no_reg));
+  DCHECK(src.rs().is(no_reg));
+  DCHECK((src.shift_imm_ == 0) ||
+         (src.shift_imm_ == 8) ||
+         (src.shift_imm_ == 16) ||
+         (src.shift_imm_ == 24));
+  // Operand maps ROR #0 to LSL #0.
+  DCHECK((src.shift_op() == ROR) ||
+         ((src.shift_op() == LSL) && (src.shift_imm_ == 0)));
+  emit(cond | 0x6C*B20 | 0xF*B16 | dst.code()*B12 |
+       ((src.shift_imm_ >> 1)&0xC)*B8 | 7*B4 | src.rm().code());
+}
+
+
 // Status register access instructions.
 void Assembler::mrs(Register dst, SRegister s, Condition cond) {
-  ASSERT(!dst.is(pc));
+  DCHECK(!dst.is(pc));
   emit(cond | B24 | s | 15*B16 | dst.code()*B12);
 }
 
 
 void Assembler::msr(SRegisterFieldMask fields, const Operand& src,
                     Condition cond) {
-  ASSERT(fields >= B16 && fields < B20);  // at least one field set
+  DCHECK(fields >= B16 && fields < B20);  // at least one field set
   Instr instr;
   if (!src.rm_.is_valid()) {
     // Immediate.
     uint32_t rotate_imm;
     uint32_t immed_8;
-    if (src.must_use_constant_pool() ||
+    if (src.must_output_reloc_info(this) ||
         !fits_shifter(src.imm32_, &rotate_imm, &immed_8, NULL)) {
       // Immediate operand cannot be encoded, load it first to register ip.
-      RecordRelocInfo(src.rmode_, src.imm32_);
-      ldr(ip, MemOperand(pc, 0), cond);
+      move_32_bit_immediate(ip, src);
       msr(fields, Operand(ip), cond);
       return;
     }
     instr = I | rotate_imm*B8 | immed_8;
   } else {
-    ASSERT(!src.rs_.is_valid() && src.shift_imm_ == 0);  // only rm allowed
+    DCHECK(!src.rs_.is_valid() && src.shift_imm_ == 0);  // only rm allowed
     instr = src.rm_.code();
   }
   emit(cond | instr | B24 | B21 | fields | 15*B12);
@@ -1442,32 +1924,52 @@
 
 void Assembler::ldrd(Register dst1, Register dst2,
                      const MemOperand& src, Condition cond) {
-  ASSERT(CpuFeatures::IsEnabled(ARMv7));
-  ASSERT(src.rm().is(no_reg));
-  ASSERT(!dst1.is(lr));  // r14.
-  ASSERT_EQ(0, dst1.code() % 2);
-  ASSERT_EQ(dst1.code() + 1, dst2.code());
+  DCHECK(IsEnabled(ARMv7));
+  DCHECK(src.rm().is(no_reg));
+  DCHECK(!dst1.is(lr));  // r14.
+  DCHECK_EQ(0, dst1.code() % 2);
+  DCHECK_EQ(dst1.code() + 1, dst2.code());
   addrmod3(cond | B7 | B6 | B4, dst1, src);
 }
 
 
 void Assembler::strd(Register src1, Register src2,
                      const MemOperand& dst, Condition cond) {
-  ASSERT(dst.rm().is(no_reg));
-  ASSERT(!src1.is(lr));  // r14.
-  ASSERT_EQ(0, src1.code() % 2);
-  ASSERT_EQ(src1.code() + 1, src2.code());
-  ASSERT(CpuFeatures::IsEnabled(ARMv7));
+  DCHECK(dst.rm().is(no_reg));
+  DCHECK(!src1.is(lr));  // r14.
+  DCHECK_EQ(0, src1.code() % 2);
+  DCHECK_EQ(src1.code() + 1, src2.code());
+  DCHECK(IsEnabled(ARMv7));
   addrmod3(cond | B7 | B6 | B5 | B4, src1, dst);
 }
 
+
+// Preload instructions.
+void Assembler::pld(const MemOperand& address) {
+  // Instruction details available in ARM DDI 0406C.b, A8.8.128.
+  // 1111(31-28) | 0111(27-24) | U(23) | R(22) | 01(21-20) | Rn(19-16) |
+  // 1111(15-12) | imm5(11-07) | type(6-5) | 0(4)| Rm(3-0) |
+  DCHECK(address.rm().is(no_reg));
+  DCHECK(address.am() == Offset);
+  int U = B23;
+  int offset = address.offset();
+  if (offset < 0) {
+    offset = -offset;
+    U = 0;
+  }
+  DCHECK(offset < 4096);
+  emit(kSpecialCondition | B26 | B24 | U | B22 | B20 | address.rn().code()*B16 |
+       0xf*B12 | offset);
+}
+
+
 // Load/Store multiple instructions.
 void Assembler::ldm(BlockAddrMode am,
                     Register base,
                     RegList dst,
                     Condition cond) {
   // ABI stack constraint: ldmxx base, {..sp..}  base != sp  is not restartable.
-  ASSERT(base.is(sp) || (dst & sp.bit()) == 0);
+  DCHECK(base.is(sp) || (dst & sp.bit()) == 0);
 
   addrmod4(cond | B27 | am | L, base, dst);
 
@@ -1496,7 +1998,7 @@
 // enabling/disabling and a counter feature. See simulator-arm.h .
 void Assembler::stop(const char* msg, Condition cond, int32_t code) {
 #ifndef __arm__
-  ASSERT(code >= kDefaultStopCode);
+  DCHECK(code >= kDefaultStopCode);
   {
     // The Simulator will handle the stop instruction and get the message
     // address. It expects to find the address just after the svc instruction.
@@ -1509,7 +2011,6 @@
     emit(reinterpret_cast<Instr>(msg));
   }
 #else  // def __arm__
-#ifdef CAN_USE_ARMV5_INSTRUCTIONS
   if (cond != al) {
     Label skip;
     b(&skip, NegateCondition(cond));
@@ -1518,21 +2019,18 @@
   } else {
     bkpt(0);
   }
-#else  // ndef CAN_USE_ARMV5_INSTRUCTIONS
-  svc(0x9f0001, cond);
-#endif  // ndef CAN_USE_ARMV5_INSTRUCTIONS
 #endif  // def __arm__
 }
 
 
 void Assembler::bkpt(uint32_t imm16) {  // v5 and above
-  ASSERT(is_uint16(imm16));
+  DCHECK(is_uint16(imm16));
   emit(al | B24 | B21 | (imm16 >> 4)*B8 | BKPT | (imm16 & 0xf));
 }
 
 
 void Assembler::svc(uint32_t imm24, Condition cond) {
-  ASSERT(is_uint24(imm24));
+  DCHECK(is_uint24(imm24));
   emit(cond | 15*B24 | imm24);
 }
 
@@ -1545,7 +2043,7 @@
                     CRegister crm,
                     int opcode_2,
                     Condition cond) {
-  ASSERT(is_uint4(opcode_1) && is_uint3(opcode_2));
+  DCHECK(is_uint4(opcode_1) && is_uint3(opcode_2));
   emit(cond | B27 | B26 | B25 | (opcode_1 & 15)*B20 | crn.code()*B16 |
        crd.code()*B12 | coproc*B8 | (opcode_2 & 7)*B5 | crm.code());
 }
@@ -1568,7 +2066,7 @@
                     CRegister crm,
                     int opcode_2,
                     Condition cond) {
-  ASSERT(is_uint3(opcode_1) && is_uint3(opcode_2));
+  DCHECK(is_uint3(opcode_1) && is_uint3(opcode_2));
   emit(cond | B27 | B26 | B25 | (opcode_1 & 7)*B21 | crn.code()*B16 |
        rd.code()*B12 | coproc*B8 | (opcode_2 & 7)*B5 | B4 | crm.code());
 }
@@ -1591,7 +2089,7 @@
                     CRegister crm,
                     int opcode_2,
                     Condition cond) {
-  ASSERT(is_uint3(opcode_1) && is_uint3(opcode_2));
+  DCHECK(is_uint3(opcode_1) && is_uint3(opcode_2));
   emit(cond | B27 | B26 | B25 | (opcode_1 & 7)*B21 | L | crn.code()*B16 |
        rd.code()*B12 | coproc*B8 | (opcode_2 & 7)*B5 | B4 | crm.code());
 }
@@ -1623,7 +2121,7 @@
                     LFlag l,
                     Condition cond) {
   // Unindexed addressing.
-  ASSERT(is_uint8(option));
+  DCHECK(is_uint8(option));
   emit(cond | B27 | B26 | U | l | L | rn.code()*B16 | crd.code()*B12 |
        coproc*B8 | (option & 255));
 }
@@ -1653,30 +2151,31 @@
                      int offset,
                      const Condition cond) {
   // Ddst = MEM(Rbase + offset).
-  // Instruction details available in ARM DDI 0406A, A8-628.
-  // cond(31-28) | 1101(27-24)| U001(23-20) | Rbase(19-16) |
-  // Vdst(15-12) | 1011(11-8) | offset
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
+  // Instruction details available in ARM DDI 0406C.b, A8-924.
+  // cond(31-28) | 1101(27-24)| U(23) | D(22) | 01(21-20) | Rbase(19-16) |
+  // Vd(15-12) | 1011(11-8) | offset
   int u = 1;
   if (offset < 0) {
     offset = -offset;
     u = 0;
   }
+  int vd, d;
+  dst.split_code(&vd, &d);
 
-  ASSERT(offset >= 0);
+  DCHECK(offset >= 0);
   if ((offset % 4) == 0 && (offset / 4) < 256) {
-    emit(cond | u*B23 | 0xD1*B20 | base.code()*B16 | dst.code()*B12 |
+    emit(cond | 0xD*B24 | u*B23 | d*B22 | B20 | base.code()*B16 | vd*B12 |
          0xB*B8 | ((offset / 4) & 255));
   } else {
     // Larger offsets must be handled by computing the correct address
     // in the ip register.
-    ASSERT(!base.is(ip));
+    DCHECK(!base.is(ip));
     if (u == 1) {
       add(ip, base, Operand(offset));
     } else {
       sub(ip, base, Operand(offset));
     }
-    emit(cond | 0xD1*B20 | ip.code()*B16 | dst.code()*B12 | 0xB*B8);
+    emit(cond | 0xD*B24 | d*B22 | B20 | ip.code()*B16 | vd*B12 | 0xB*B8);
   }
 }
 
@@ -1684,9 +2183,14 @@
 void Assembler::vldr(const DwVfpRegister dst,
                      const MemOperand& operand,
                      const Condition cond) {
-  ASSERT(!operand.rm().is_valid());
-  ASSERT(operand.am_ == Offset);
-  vldr(dst, operand.rn(), operand.offset(), cond);
+  DCHECK(operand.am_ == Offset);
+  if (operand.rm().is_valid()) {
+    add(ip, operand.rn(),
+        Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
+    vldr(dst, ip, 0, cond);
+  } else {
+    vldr(dst, operand.rn(), operand.offset(), cond);
+  }
 }
 
 
@@ -1698,7 +2202,6 @@
   // Instruction details available in ARM DDI 0406A, A8-628.
   // cond(31-28) | 1101(27-24)| U001(23-20) | Rbase(19-16) |
   // Vdst(15-12) | 1010(11-8) | offset
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
   int u = 1;
   if (offset < 0) {
     offset = -offset;
@@ -1706,7 +2209,7 @@
   }
   int sd, d;
   dst.split_code(&sd, &d);
-  ASSERT(offset >= 0);
+  DCHECK(offset >= 0);
 
   if ((offset % 4) == 0 && (offset / 4) < 256) {
   emit(cond | u*B23 | d*B22 | 0xD1*B20 | base.code()*B16 | sd*B12 |
@@ -1714,7 +2217,7 @@
   } else {
     // Larger offsets must be handled by computing the correct address
     // in the ip register.
-    ASSERT(!base.is(ip));
+    DCHECK(!base.is(ip));
     if (u == 1) {
       add(ip, base, Operand(offset));
     } else {
@@ -1728,9 +2231,14 @@
 void Assembler::vldr(const SwVfpRegister dst,
                      const MemOperand& operand,
                      const Condition cond) {
-  ASSERT(!operand.rm().is_valid());
-  ASSERT(operand.am_ == Offset);
-  vldr(dst, operand.rn(), operand.offset(), cond);
+  DCHECK(operand.am_ == Offset);
+  if (operand.rm().is_valid()) {
+    add(ip, operand.rn(),
+        Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
+    vldr(dst, ip, 0, cond);
+  } else {
+    vldr(dst, operand.rn(), operand.offset(), cond);
+  }
 }
 
 
@@ -1739,29 +2247,31 @@
                      int offset,
                      const Condition cond) {
   // MEM(Rbase + offset) = Dsrc.
-  // Instruction details available in ARM DDI 0406A, A8-786.
-  // cond(31-28) | 1101(27-24)| U000(23-20) | | Rbase(19-16) |
-  // Vsrc(15-12) | 1011(11-8) | (offset/4)
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
+  // Instruction details available in ARM DDI 0406C.b, A8-1082.
+  // cond(31-28) | 1101(27-24)| U(23) | D(22) | 00(21-20) | Rbase(19-16) |
+  // Vd(15-12) | 1011(11-8) | (offset/4)
   int u = 1;
   if (offset < 0) {
     offset = -offset;
     u = 0;
   }
-  ASSERT(offset >= 0);
+  DCHECK(offset >= 0);
+  int vd, d;
+  src.split_code(&vd, &d);
+
   if ((offset % 4) == 0 && (offset / 4) < 256) {
-    emit(cond | u*B23 | 0xD0*B20 | base.code()*B16 | src.code()*B12 |
-         0xB*B8 | ((offset / 4) & 255));
+    emit(cond | 0xD*B24 | u*B23 | d*B22 | base.code()*B16 | vd*B12 | 0xB*B8 |
+         ((offset / 4) & 255));
   } else {
     // Larger offsets must be handled by computing the correct address
     // in the ip register.
-    ASSERT(!base.is(ip));
+    DCHECK(!base.is(ip));
     if (u == 1) {
       add(ip, base, Operand(offset));
     } else {
       sub(ip, base, Operand(offset));
     }
-    emit(cond | 0xD0*B20 | ip.code()*B16 | src.code()*B12 | 0xB*B8);
+    emit(cond | 0xD*B24 | d*B22 | ip.code()*B16 | vd*B12 | 0xB*B8);
   }
 }
 
@@ -1769,9 +2279,14 @@
 void Assembler::vstr(const DwVfpRegister src,
                      const MemOperand& operand,
                      const Condition cond) {
-  ASSERT(!operand.rm().is_valid());
-  ASSERT(operand.am_ == Offset);
-  vstr(src, operand.rn(), operand.offset(), cond);
+  DCHECK(operand.am_ == Offset);
+  if (operand.rm().is_valid()) {
+    add(ip, operand.rn(),
+        Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
+    vstr(src, ip, 0, cond);
+  } else {
+    vstr(src, operand.rn(), operand.offset(), cond);
+  }
 }
 
 
@@ -1783,7 +2298,6 @@
   // Instruction details available in ARM DDI 0406A, A8-786.
   // cond(31-28) | 1101(27-24)| U000(23-20) | Rbase(19-16) |
   // Vdst(15-12) | 1010(11-8) | (offset/4)
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
   int u = 1;
   if (offset < 0) {
     offset = -offset;
@@ -1791,14 +2305,14 @@
   }
   int sd, d;
   src.split_code(&sd, &d);
-  ASSERT(offset >= 0);
+  DCHECK(offset >= 0);
   if ((offset % 4) == 0 && (offset / 4) < 256) {
     emit(cond | u*B23 | d*B22 | 0xD0*B20 | base.code()*B16 | sd*B12 |
          0xA*B8 | ((offset / 4) & 255));
   } else {
     // Larger offsets must be handled by computing the correct address
     // in the ip register.
-    ASSERT(!base.is(ip));
+    DCHECK(!base.is(ip));
     if (u == 1) {
       add(ip, base, Operand(offset));
     } else {
@@ -1812,9 +2326,14 @@
 void Assembler::vstr(const SwVfpRegister src,
                      const MemOperand& operand,
                      const Condition cond) {
-  ASSERT(!operand.rm().is_valid());
-  ASSERT(operand.am_ == Offset);
-  vldr(src, operand.rn(), operand.offset(), cond);
+  DCHECK(operand.am_ == Offset);
+  if (operand.rm().is_valid()) {
+    add(ip, operand.rn(),
+        Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
+    vstr(src, ip, 0, cond);
+  } else {
+    vstr(src, operand.rn(), operand.offset(), cond);
+  }
 }
 
 
@@ -1823,17 +2342,17 @@
                       DwVfpRegister first,
                       DwVfpRegister last,
                       Condition cond) {
-  // Instruction details available in ARM DDI 0406A, A8-626.
+  // Instruction details available in ARM DDI 0406C.b, A8-922.
   // cond(31-28) | 110(27-25)| PUDW1(24-20) | Rbase(19-16) |
-  // first(15-12) | 1010(11-8) | (count * 2)
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
-  ASSERT_LE(first.code(), last.code());
-  ASSERT(am == ia || am == ia_w || am == db_w);
-  ASSERT(!base.is(pc));
+  // first(15-12) | 1011(11-8) | (count * 2)
+  DCHECK_LE(first.code(), last.code());
+  DCHECK(am == ia || am == ia_w || am == db_w);
+  DCHECK(!base.is(pc));
 
   int sd, d;
   first.split_code(&sd, &d);
   int count = last.code() - first.code() + 1;
+  DCHECK(count <= 16);
   emit(cond | B27 | B26 | am | d*B22 | B20 | base.code()*B16 | sd*B12 |
        0xB*B8 | count*2);
 }
@@ -1844,17 +2363,17 @@
                       DwVfpRegister first,
                       DwVfpRegister last,
                       Condition cond) {
-  // Instruction details available in ARM DDI 0406A, A8-784.
+  // Instruction details available in ARM DDI 0406C.b, A8-1080.
   // cond(31-28) | 110(27-25)| PUDW0(24-20) | Rbase(19-16) |
   // first(15-12) | 1011(11-8) | (count * 2)
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
-  ASSERT_LE(first.code(), last.code());
-  ASSERT(am == ia || am == ia_w || am == db_w);
-  ASSERT(!base.is(pc));
+  DCHECK_LE(first.code(), last.code());
+  DCHECK(am == ia || am == ia_w || am == db_w);
+  DCHECK(!base.is(pc));
 
   int sd, d;
   first.split_code(&sd, &d);
   int count = last.code() - first.code() + 1;
+  DCHECK(count <= 16);
   emit(cond | B27 | B26 | am | d*B22 | base.code()*B16 | sd*B12 |
        0xB*B8 | count*2);
 }
@@ -1867,10 +2386,9 @@
   // Instruction details available in ARM DDI 0406A, A8-626.
   // cond(31-28) | 110(27-25)| PUDW1(24-20) | Rbase(19-16) |
   // first(15-12) | 1010(11-8) | (count/2)
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
-  ASSERT_LE(first.code(), last.code());
-  ASSERT(am == ia || am == ia_w || am == db_w);
-  ASSERT(!base.is(pc));
+  DCHECK_LE(first.code(), last.code());
+  DCHECK(am == ia || am == ia_w || am == db_w);
+  DCHECK(!base.is(pc));
 
   int sd, d;
   first.split_code(&sd, &d);
@@ -1888,10 +2406,9 @@
   // Instruction details available in ARM DDI 0406A, A8-784.
   // cond(31-28) | 110(27-25)| PUDW0(24-20) | Rbase(19-16) |
   // first(15-12) | 1011(11-8) | (count/2)
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
-  ASSERT_LE(first.code(), last.code());
-  ASSERT(am == ia || am == ia_w || am == db_w);
-  ASSERT(!base.is(pc));
+  DCHECK_LE(first.code(), last.code());
+  DCHECK(am == ia || am == ia_w || am == db_w);
+  DCHECK(!base.is(pc));
 
   int sd, d;
   first.split_code(&sd, &d);
@@ -1900,6 +2417,7 @@
        0xA*B8 | count);
 }
 
+
 static void DoubleAsTwoUInt32(double d, uint32_t* lo, uint32_t* hi) {
   uint64_t i;
   memcpy(&i, &d, 8);
@@ -1908,10 +2426,11 @@
   *hi = i >> 32;
 }
 
+
 // Only works for little endian floating point formats.
 // We don't support VFP on the mixed endian floating point platform.
 static bool FitsVMOVDoubleImmediate(double d, uint32_t *encoding) {
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
+  DCHECK(CpuFeatures::IsSupported(VFP3));
 
   // VMOV can accept an immediate of the form:
   //
@@ -1961,36 +2480,79 @@
 
 void Assembler::vmov(const DwVfpRegister dst,
                      double imm,
-                     const Condition cond) {
-  // Dd = immediate
-  // Instruction details available in ARM DDI 0406B, A8-640.
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
-
+                     const Register scratch) {
   uint32_t enc;
-  if (FitsVMOVDoubleImmediate(imm, &enc)) {
+  if (CpuFeatures::IsSupported(VFP3) && FitsVMOVDoubleImmediate(imm, &enc)) {
     // The double can be encoded in the instruction.
-    emit(cond | 0xE*B24 | 0xB*B20 | dst.code()*B12 | 0xB*B8 | enc);
+    //
+    // Dd = immediate
+    // Instruction details available in ARM DDI 0406C.b, A8-936.
+    // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | imm4H(19-16) |
+    // Vd(15-12) | 101(11-9) | sz=1(8) | imm4L(3-0)
+    int vd, d;
+    dst.split_code(&vd, &d);
+    emit(al | 0x1D*B23 | d*B22 | 0x3*B20 | vd*B12 | 0x5*B9 | B8 | enc);
+  } else if (FLAG_enable_vldr_imm && is_constant_pool_available()) {
+    // TODO(jfb) Temporarily turned off until we have constant blinding or
+    //           some equivalent mitigation: an attacker can otherwise control
+    //           generated data which also happens to be executable, a Very Bad
+    //           Thing indeed.
+    //           Blinding gets tricky because we don't have xor, we probably
+    //           need to add/subtract without losing precision, which requires a
+    //           cookie value that Lithium is probably better positioned to
+    //           choose.
+    //           We could also add a few peepholes here like detecting 0.0 and
+    //           -0.0 and doing a vmov from the sequestered d14, forcing denorms
+    //           to zero (we set flush-to-zero), and normalizing NaN values.
+    //           We could also detect redundant values.
+    //           The code could also randomize the order of values, though
+    //           that's tricky because vldr has a limited reach. Furthermore
+    //           it breaks load locality.
+    RelocInfo rinfo(pc_, imm);
+    ConstantPoolArray::LayoutSection section = ConstantPoolAddEntry(rinfo);
+    if (section == ConstantPoolArray::EXTENDED_SECTION) {
+      DCHECK(FLAG_enable_ool_constant_pool);
+      // Emit instructions to load constant pool offset.
+      movw(ip, 0);
+      movt(ip, 0);
+      // Load from constant pool at offset.
+      vldr(dst, MemOperand(pp, ip));
+    } else {
+      DCHECK(section == ConstantPoolArray::SMALL_SECTION);
+      vldr(dst, MemOperand(FLAG_enable_ool_constant_pool ? pp : pc, 0));
+    }
   } else {
-    // Synthesise the double from ARM immediates. This could be implemented
-    // using vldr from a constant pool.
+    // Synthesise the double from ARM immediates.
     uint32_t lo, hi;
     DoubleAsTwoUInt32(imm, &lo, &hi);
 
-    if (lo == hi) {
-      // If the lo and hi parts of the double are equal, the literal is easier
-      // to create. This is the case with 0.0.
-      mov(ip, Operand(lo));
-      vmov(dst, ip, ip);
-    } else {
-      // Move the low part of the double into the lower of the corresponsing S
-      // registers of D register dst.
-      mov(ip, Operand(lo));
-      vmov(dst.low(), ip, cond);
+    if (scratch.is(no_reg)) {
+      if (dst.code() < 16) {
+        const LowDwVfpRegister loc = LowDwVfpRegister::from_code(dst.code());
+        // Move the low part of the double into the lower of the corresponsing S
+        // registers of D register dst.
+        mov(ip, Operand(lo));
+        vmov(loc.low(), ip);
 
-      // Move the high part of the double into the higher of the corresponsing S
-      // registers of D register dst.
-      mov(ip, Operand(hi));
-      vmov(dst.high(), ip, cond);
+        // Move the high part of the double into the higher of the
+        // corresponsing S registers of D register dst.
+        mov(ip, Operand(hi));
+        vmov(loc.high(), ip);
+      } else {
+        // D16-D31 does not have S registers, so move the low and high parts
+        // directly to the D register using vmov.32.
+        // Note: This may be slower, so we only do this when we have to.
+        mov(ip, Operand(lo));
+        vmov(dst, VmovIndexLo, ip);
+        mov(ip, Operand(hi));
+        vmov(dst, VmovIndexHi, ip);
+      }
+    } else {
+      // Move the low and high parts of the double to a D register in one
+      // instruction.
+      mov(ip, Operand(lo));
+      mov(scratch, Operand(hi));
+      vmov(dst, ip, scratch);
     }
   }
 }
@@ -2001,7 +2563,6 @@
                      const Condition cond) {
   // Sd = Sm
   // Instruction details available in ARM DDI 0406B, A8-642.
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
   int sd, d, sm, m;
   dst.split_code(&sd, &d);
   src.split_code(&sm, &m);
@@ -2013,10 +2574,47 @@
                      const DwVfpRegister src,
                      const Condition cond) {
   // Dd = Dm
-  // Instruction details available in ARM DDI 0406B, A8-642.
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
-  emit(cond | 0xE*B24 | 0xB*B20 |
-       dst.code()*B12 | 0x5*B9 | B8 | B6 | src.code());
+  // Instruction details available in ARM DDI 0406C.b, A8-938.
+  // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) |
+  // 101(11-9) | sz=1(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
+  int vd, d;
+  dst.split_code(&vd, &d);
+  int vm, m;
+  src.split_code(&vm, &m);
+  emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | vd*B12 | 0x5*B9 | B8 | B6 | m*B5 |
+       vm);
+}
+
+
+void Assembler::vmov(const DwVfpRegister dst,
+                     const VmovIndex index,
+                     const Register src,
+                     const Condition cond) {
+  // Dd[index] = Rt
+  // Instruction details available in ARM DDI 0406C.b, A8-940.
+  // cond(31-28) | 1110(27-24) | 0(23) | opc1=0index(22-21) | 0(20) |
+  // Vd(19-16) | Rt(15-12) | 1011(11-8) | D(7) | opc2=00(6-5) | 1(4) | 0000(3-0)
+  DCHECK(index.index == 0 || index.index == 1);
+  int vd, d;
+  dst.split_code(&vd, &d);
+  emit(cond | 0xE*B24 | index.index*B21 | vd*B16 | src.code()*B12 | 0xB*B8 |
+       d*B7 | B4);
+}
+
+
+void Assembler::vmov(const Register dst,
+                     const VmovIndex index,
+                     const DwVfpRegister src,
+                     const Condition cond) {
+  // Dd[index] = Rt
+  // Instruction details available in ARM DDI 0406C.b, A8.8.342.
+  // cond(31-28) | 1110(27-24) | U=0(23) | opc1=0index(22-21) | 1(20) |
+  // Vn(19-16) | Rt(15-12) | 1011(11-8) | N(7) | opc2=00(6-5) | 1(4) | 0000(3-0)
+  DCHECK(index.index == 0 || index.index == 1);
+  int vn, n;
+  src.split_code(&vn, &n);
+  emit(cond | 0xE*B24 | index.index*B21 | B20 | vn*B16 | dst.code()*B12 |
+       0xB*B8 | n*B7 | B4);
 }
 
 
@@ -2025,13 +2623,14 @@
                      const Register src2,
                      const Condition cond) {
   // Dm = <Rt,Rt2>.
-  // Instruction details available in ARM DDI 0406A, A8-646.
+  // Instruction details available in ARM DDI 0406C.b, A8-948.
   // cond(31-28) | 1100(27-24)| 010(23-21) | op=0(20) | Rt2(19-16) |
   // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
-  ASSERT(!src1.is(pc) && !src2.is(pc));
+  DCHECK(!src1.is(pc) && !src2.is(pc));
+  int vm, m;
+  dst.split_code(&vm, &m);
   emit(cond | 0xC*B24 | B22 | src2.code()*B16 |
-       src1.code()*B12 | 0xB*B8 | B4 | dst.code());
+       src1.code()*B12 | 0xB*B8 | m*B5 | B4 | vm);
 }
 
 
@@ -2040,13 +2639,14 @@
                      const DwVfpRegister src,
                      const Condition cond) {
   // <Rt,Rt2> = Dm.
-  // Instruction details available in ARM DDI 0406A, A8-646.
+  // Instruction details available in ARM DDI 0406C.b, A8-948.
   // cond(31-28) | 1100(27-24)| 010(23-21) | op=1(20) | Rt2(19-16) |
   // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
-  ASSERT(!dst1.is(pc) && !dst2.is(pc));
+  DCHECK(!dst1.is(pc) && !dst2.is(pc));
+  int vm, m;
+  src.split_code(&vm, &m);
   emit(cond | 0xC*B24 | B22 | B20 | dst2.code()*B16 |
-       dst1.code()*B12 | 0xB*B8 | B4 | src.code());
+       dst1.code()*B12 | 0xB*B8 | m*B5 | B4 | vm);
 }
 
 
@@ -2057,8 +2657,7 @@
   // Instruction details available in ARM DDI 0406A, A8-642.
   // cond(31-28) | 1110(27-24)| 000(23-21) | op=0(20) | Vn(19-16) |
   // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0)
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
-  ASSERT(!src.is(pc));
+  DCHECK(!src.is(pc));
   int sn, n;
   dst.split_code(&sn, &n);
   emit(cond | 0xE*B24 | sn*B16 | src.code()*B12 | 0xA*B8 | n*B7 | B4);
@@ -2072,8 +2671,7 @@
   // Instruction details available in ARM DDI 0406A, A8-642.
   // cond(31-28) | 1110(27-24)| 000(23-21) | op=1(20) | Vn(19-16) |
   // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0)
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
-  ASSERT(!dst.is(pc));
+  DCHECK(!dst.is(pc));
   int sn, n;
   src.split_code(&sn, &n);
   emit(cond | 0xE*B24 | B20 | sn*B16 | dst.code()*B12 | 0xA*B8 | n*B7 | B4);
@@ -2134,7 +2732,7 @@
                          int reg_code,
                          int* vm,
                          int* m) {
-  ASSERT((reg_code >= 0) && (reg_code <= 31));
+  DCHECK((reg_code >= 0) && (reg_code <= 31));
   if (IsIntegerVFPType(reg_type) || !IsDoubleVFPType(reg_type)) {
     // 32 bit type.
     *m  = reg_code & 0x1;
@@ -2154,7 +2752,7 @@
                         const int src_code,
                         VFPConversionMode mode,
                         const Condition cond) {
-  ASSERT(src_type != dst_type);
+  DCHECK(src_type != dst_type);
   int D, Vd, M, Vm;
   SplitRegCode(src_type, src_code, &Vm, &M);
   SplitRegCode(dst_type, dst_code, &Vd, &D);
@@ -2164,7 +2762,7 @@
     // Instruction details available in ARM DDI 0406B, A8.6.295.
     // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 1(19) | opc2(18-16) |
     // Vd(15-12) | 101(11-9) | sz(8) | op(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
-    ASSERT(!IsIntegerVFPType(dst_type) || !IsIntegerVFPType(src_type));
+    DCHECK(!IsIntegerVFPType(dst_type) || !IsIntegerVFPType(src_type));
 
     int sz, opc2, op;
 
@@ -2173,7 +2771,7 @@
       sz = IsDoubleVFPType(src_type) ? 0x1 : 0x0;
       op = mode;
     } else {
-      ASSERT(IsIntegerVFPType(src_type));
+      DCHECK(IsIntegerVFPType(src_type));
       opc2 = 0x0;
       sz = IsDoubleVFPType(dst_type) ? 0x1 : 0x0;
       op = IsSignedVFPType(src_type) ? 0x1 : 0x0;
@@ -2197,7 +2795,6 @@
                              const SwVfpRegister src,
                              VFPConversionMode mode,
                              const Condition cond) {
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
   emit(EncodeVCVT(F64, dst.code(), S32, src.code(), mode, cond));
 }
 
@@ -2206,7 +2803,6 @@
                              const SwVfpRegister src,
                              VFPConversionMode mode,
                              const Condition cond) {
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
   emit(EncodeVCVT(F32, dst.code(), S32, src.code(), mode, cond));
 }
 
@@ -2215,7 +2811,6 @@
                              const SwVfpRegister src,
                              VFPConversionMode mode,
                              const Condition cond) {
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
   emit(EncodeVCVT(F64, dst.code(), U32, src.code(), mode, cond));
 }
 
@@ -2224,7 +2819,6 @@
                              const DwVfpRegister src,
                              VFPConversionMode mode,
                              const Condition cond) {
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
   emit(EncodeVCVT(S32, dst.code(), F64, src.code(), mode, cond));
 }
 
@@ -2233,7 +2827,6 @@
                              const DwVfpRegister src,
                              VFPConversionMode mode,
                              const Condition cond) {
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
   emit(EncodeVCVT(U32, dst.code(), F64, src.code(), mode, cond));
 }
 
@@ -2242,7 +2835,6 @@
                              const SwVfpRegister src,
                              VFPConversionMode mode,
                              const Condition cond) {
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
   emit(EncodeVCVT(F64, dst.code(), F32, src.code(), mode, cond));
 }
 
@@ -2251,24 +2843,56 @@
                              const DwVfpRegister src,
                              VFPConversionMode mode,
                              const Condition cond) {
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
   emit(EncodeVCVT(F32, dst.code(), F64, src.code(), mode, cond));
 }
 
 
+void Assembler::vcvt_f64_s32(const DwVfpRegister dst,
+                             int fraction_bits,
+                             const Condition cond) {
+  // Instruction details available in ARM DDI 0406C.b, A8-874.
+  // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 1010(19-16) | Vd(15-12) |
+  // 101(11-9) | sf=1(8) | sx=1(7) | 1(6) | i(5) | 0(4) | imm4(3-0)
+  DCHECK(fraction_bits > 0 && fraction_bits <= 32);
+  DCHECK(CpuFeatures::IsSupported(VFP3));
+  int vd, d;
+  dst.split_code(&vd, &d);
+  int imm5 = 32 - fraction_bits;
+  int i = imm5 & 1;
+  int imm4 = (imm5 >> 1) & 0xf;
+  emit(cond | 0xE*B24 | B23 | d*B22 | 0x3*B20 | B19 | 0x2*B16 |
+       vd*B12 | 0x5*B9 | B8 | B7 | B6 | i*B5 | imm4);
+}
+
+
 void Assembler::vneg(const DwVfpRegister dst,
                      const DwVfpRegister src,
                      const Condition cond) {
-  emit(cond | 0xE*B24 | 0xB*B20 | B16 | dst.code()*B12 |
-       0x5*B9 | B8 | B6 | src.code());
+  // Instruction details available in ARM DDI 0406C.b, A8-968.
+  // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0001(19-16) | Vd(15-12) |
+  // 101(11-9) | sz=1(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
+  int vd, d;
+  dst.split_code(&vd, &d);
+  int vm, m;
+  src.split_code(&vm, &m);
+
+  emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | B16 | vd*B12 | 0x5*B9 | B8 | B6 |
+       m*B5 | vm);
 }
 
 
 void Assembler::vabs(const DwVfpRegister dst,
                      const DwVfpRegister src,
                      const Condition cond) {
-  emit(cond | 0xE*B24 | 0xB*B20 | dst.code()*B12 |
-       0x5*B9 | B8 | 0x3*B6 | src.code());
+  // Instruction details available in ARM DDI 0406C.b, A8-524.
+  // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) |
+  // 101(11-9) | sz=1(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
+  int vd, d;
+  dst.split_code(&vd, &d);
+  int vm, m;
+  src.split_code(&vm, &m);
+  emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | vd*B12 | 0x5*B9 | B8 | B7 | B6 |
+       m*B5 | vm);
 }
 
 
@@ -2278,12 +2902,17 @@
                      const Condition cond) {
   // Dd = vadd(Dn, Dm) double precision floating point addition.
   // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
-  // Instruction details available in ARM DDI 0406A, A8-536.
-  // cond(31-28) | 11100(27-23)| D=?(22) | 11(21-20) | Vn(19-16) |
-  // Vd(15-12) | 101(11-9) | sz(8)=1 | N(7)=0 | 0(6) | M=?(5) | 0(4) | Vm(3-0)
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
-  emit(cond | 0xE*B24 | 0x3*B20 | src1.code()*B16 |
-       dst.code()*B12 | 0x5*B9 | B8 | src2.code());
+  // Instruction details available in ARM DDI 0406C.b, A8-830.
+  // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
+  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
+  int vd, d;
+  dst.split_code(&vd, &d);
+  int vn, n;
+  src1.split_code(&vn, &n);
+  int vm, m;
+  src2.split_code(&vm, &m);
+  emit(cond | 0x1C*B23 | d*B22 | 0x3*B20 | vn*B16 | vd*B12 | 0x5*B9 | B8 |
+       n*B7 | m*B5 | vm);
 }
 
 
@@ -2293,12 +2922,17 @@
                      const Condition cond) {
   // Dd = vsub(Dn, Dm) double precision floating point subtraction.
   // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
-  // Instruction details available in ARM DDI 0406A, A8-784.
-  // cond(31-28) | 11100(27-23)| D=?(22) | 11(21-20) | Vn(19-16) |
-  // Vd(15-12) | 101(11-9) | sz(8)=1 | N(7)=0 | 1(6) | M=?(5) | 0(4) | Vm(3-0)
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
-  emit(cond | 0xE*B24 | 0x3*B20 | src1.code()*B16 |
-       dst.code()*B12 | 0x5*B9 | B8 | B6 | src2.code());
+  // Instruction details available in ARM DDI 0406C.b, A8-1086.
+  // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
+  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
+  int vd, d;
+  dst.split_code(&vd, &d);
+  int vn, n;
+  src1.split_code(&vn, &n);
+  int vm, m;
+  src2.split_code(&vm, &m);
+  emit(cond | 0x1C*B23 | d*B22 | 0x3*B20 | vn*B16 | vd*B12 | 0x5*B9 | B8 |
+       n*B7 | B6 | m*B5 | vm);
 }
 
 
@@ -2308,12 +2942,53 @@
                      const Condition cond) {
   // Dd = vmul(Dn, Dm) double precision floating point multiplication.
   // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
-  // Instruction details available in ARM DDI 0406A, A8-784.
-  // cond(31-28) | 11100(27-23)| D=?(22) | 10(21-20) | Vn(19-16) |
-  // Vd(15-12) | 101(11-9) | sz(8)=1 | N(7)=0 | 0(6) | M=?(5) | 0(4) | Vm(3-0)
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
-  emit(cond | 0xE*B24 | 0x2*B20 | src1.code()*B16 |
-       dst.code()*B12 | 0x5*B9 | B8 | src2.code());
+  // Instruction details available in ARM DDI 0406C.b, A8-960.
+  // cond(31-28) | 11100(27-23)| D(22) | 10(21-20) | Vn(19-16) |
+  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
+  int vd, d;
+  dst.split_code(&vd, &d);
+  int vn, n;
+  src1.split_code(&vn, &n);
+  int vm, m;
+  src2.split_code(&vm, &m);
+  emit(cond | 0x1C*B23 | d*B22 | 0x2*B20 | vn*B16 | vd*B12 | 0x5*B9 | B8 |
+       n*B7 | m*B5 | vm);
+}
+
+
+void Assembler::vmla(const DwVfpRegister dst,
+                     const DwVfpRegister src1,
+                     const DwVfpRegister src2,
+                     const Condition cond) {
+  // Instruction details available in ARM DDI 0406C.b, A8-932.
+  // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
+  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | op=0(6) | M(5) | 0(4) | Vm(3-0)
+  int vd, d;
+  dst.split_code(&vd, &d);
+  int vn, n;
+  src1.split_code(&vn, &n);
+  int vm, m;
+  src2.split_code(&vm, &m);
+  emit(cond | 0x1C*B23 | d*B22 | vn*B16 | vd*B12 | 0x5*B9 | B8 | n*B7 | m*B5 |
+       vm);
+}
+
+
+void Assembler::vmls(const DwVfpRegister dst,
+                     const DwVfpRegister src1,
+                     const DwVfpRegister src2,
+                     const Condition cond) {
+  // Instruction details available in ARM DDI 0406C.b, A8-932.
+  // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
+  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | op=1(6) | M(5) | 0(4) | Vm(3-0)
+  int vd, d;
+  dst.split_code(&vd, &d);
+  int vn, n;
+  src1.split_code(&vn, &n);
+  int vm, m;
+  src2.split_code(&vm, &m);
+  emit(cond | 0x1C*B23 | d*B22 | vn*B16 | vd*B12 | 0x5*B9 | B8 | n*B7 | B6 |
+       m*B5 | vm);
 }
 
 
@@ -2323,12 +2998,17 @@
                      const Condition cond) {
   // Dd = vdiv(Dn, Dm) double precision floating point division.
   // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
-  // Instruction details available in ARM DDI 0406A, A8-584.
-  // cond(31-28) | 11101(27-23)| D=?(22) | 00(21-20) | Vn(19-16) |
-  // Vd(15-12) | 101(11-9) | sz(8)=1 | N(7)=? | 0(6) | M=?(5) | 0(4) | Vm(3-0)
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
-  emit(cond | 0xE*B24 | B23 | src1.code()*B16 |
-       dst.code()*B12 | 0x5*B9 | B8 | src2.code());
+  // Instruction details available in ARM DDI 0406C.b, A8-882.
+  // cond(31-28) | 11101(27-23)| D(22) | 00(21-20) | Vn(19-16) |
+  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
+  int vd, d;
+  dst.split_code(&vd, &d);
+  int vn, n;
+  src1.split_code(&vn, &n);
+  int vm, m;
+  src2.split_code(&vm, &m);
+  emit(cond | 0x1D*B23 | d*B22 | vn*B16 | vd*B12 | 0x5*B9 | B8 | n*B7 | m*B5 |
+       vm);
 }
 
 
@@ -2336,26 +3016,29 @@
                      const DwVfpRegister src2,
                      const Condition cond) {
   // vcmp(Dd, Dm) double precision floating point comparison.
-  // Instruction details available in ARM DDI 0406A, A8-570.
-  // cond(31-28) | 11101 (27-23)| D=?(22) | 11 (21-20) | 0100 (19-16) |
-  // Vd(15-12) | 101(11-9) | sz(8)=1 | E(7)=0 | 1(6) | M(5)=? | 0(4) | Vm(3-0)
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
-  emit(cond | 0xE*B24 |B23 | 0x3*B20 | B18 |
-       src1.code()*B12 | 0x5*B9 | B8 | B6 | src2.code());
+  // Instruction details available in ARM DDI 0406C.b, A8-864.
+  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0100(19-16) |
+  // Vd(15-12) | 101(11-9) | sz=1(8) | E=0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
+  int vd, d;
+  src1.split_code(&vd, &d);
+  int vm, m;
+  src2.split_code(&vm, &m);
+  emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | 0x4*B16 | vd*B12 | 0x5*B9 | B8 | B6 |
+       m*B5 | vm);
 }
 
 
 void Assembler::vcmp(const DwVfpRegister src1,
                      const double src2,
                      const Condition cond) {
-  // vcmp(Dd, Dm) double precision floating point comparison.
-  // Instruction details available in ARM DDI 0406A, A8-570.
-  // cond(31-28) | 11101 (27-23)| D=?(22) | 11 (21-20) | 0101 (19-16) |
-  // Vd(15-12) | 101(11-9) | sz(8)=1 | E(7)=0 | 1(6) | M(5)=? | 0(4) | 0000(3-0)
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
-  ASSERT(src2 == 0.0);
-  emit(cond | 0xE*B24 |B23 | 0x3*B20 | B18 | B16 |
-       src1.code()*B12 | 0x5*B9 | B8 | B6);
+  // vcmp(Dd, #0.0) double precision floating point comparison.
+  // Instruction details available in ARM DDI 0406C.b, A8-864.
+  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0101(19-16) |
+  // Vd(15-12) | 101(11-9) | sz=1(8) | E=0(7) | 1(6) | 0(5) | 0(4) | 0000(3-0)
+  DCHECK(src2 == 0.0);
+  int vd, d;
+  src1.split_code(&vd, &d);
+  emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | 0x5*B16 | vd*B12 | 0x5*B9 | B8 | B6);
 }
 
 
@@ -2363,7 +3046,6 @@
   // Instruction details available in ARM DDI 0406A, A8-652.
   // cond(31-28) | 1110 (27-24) | 1110(23-20)| 0001 (19-16) |
   // Rt(15-12) | 1010 (11-8) | 0(7) | 00 (6-5) | 1(4) | 0000(3-0)
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
   emit(cond | 0xE*B24 | 0xE*B20 |  B16 |
        dst.code()*B12 | 0xA*B8 | B4);
 }
@@ -2373,7 +3055,6 @@
   // Instruction details available in ARM DDI 0406A, A8-652.
   // cond(31-28) | 1110 (27-24) | 1111(23-20)| 0001 (19-16) |
   // Rt(15-12) | 1010 (11-8) | 0(7) | 00 (6-5) | 1(4) | 0000(3-0)
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
   emit(cond | 0xE*B24 | 0xF*B20 |  B16 |
        dst.code()*B12 | 0xA*B8 | B4);
 }
@@ -2382,29 +3063,143 @@
 void Assembler::vsqrt(const DwVfpRegister dst,
                       const DwVfpRegister src,
                       const Condition cond) {
-  // cond(31-28) | 11101 (27-23)| D=?(22) | 11 (21-20) | 0001 (19-16) |
-  // Vd(15-12) | 101(11-9) | sz(8)=1 | 11 (7-6) | M(5)=? | 0(4) | Vm(3-0)
-  ASSERT(CpuFeatures::IsEnabled(VFP3));
-  emit(cond | 0xE*B24 | B23 | 0x3*B20 | B16 |
-       dst.code()*B12 | 0x5*B9 | B8 | 3*B6 | src.code());
+  // Instruction details available in ARM DDI 0406C.b, A8-1058.
+  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0001(19-16) |
+  // Vd(15-12) | 101(11-9) | sz=1(8) | 11(7-6) | M(5) | 0(4) | Vm(3-0)
+  int vd, d;
+  dst.split_code(&vd, &d);
+  int vm, m;
+  src.split_code(&vm, &m);
+  emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | B16 | vd*B12 | 0x5*B9 | B8 | 0x3*B6 |
+       m*B5 | vm);
+}
+
+
+// Support for NEON.
+
+void Assembler::vld1(NeonSize size,
+                     const NeonListOperand& dst,
+                     const NeonMemOperand& src) {
+  // Instruction details available in ARM DDI 0406C.b, A8.8.320.
+  // 1111(31-28) | 01000(27-23) | D(22) | 10(21-20) | Rn(19-16) |
+  // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0)
+  DCHECK(CpuFeatures::IsSupported(NEON));
+  int vd, d;
+  dst.base().split_code(&vd, &d);
+  emit(0xFU*B28 | 4*B24 | d*B22 | 2*B20 | src.rn().code()*B16 | vd*B12 |
+       dst.type()*B8 | size*B6 | src.align()*B4 | src.rm().code());
+}
+
+
+void Assembler::vst1(NeonSize size,
+                     const NeonListOperand& src,
+                     const NeonMemOperand& dst) {
+  // Instruction details available in ARM DDI 0406C.b, A8.8.404.
+  // 1111(31-28) | 01000(27-23) | D(22) | 00(21-20) | Rn(19-16) |
+  // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0)
+  DCHECK(CpuFeatures::IsSupported(NEON));
+  int vd, d;
+  src.base().split_code(&vd, &d);
+  emit(0xFU*B28 | 4*B24 | d*B22 | dst.rn().code()*B16 | vd*B12 | src.type()*B8 |
+       size*B6 | dst.align()*B4 | dst.rm().code());
+}
+
+
+void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) {
+  // Instruction details available in ARM DDI 0406C.b, A8.8.346.
+  // 1111(31-28) | 001(27-25) | U(24) | 1(23) | D(22) | imm3(21-19) |
+  // 000(18-16) | Vd(15-12) | 101000(11-6) | M(5) | 1(4) | Vm(3-0)
+  DCHECK(CpuFeatures::IsSupported(NEON));
+  int vd, d;
+  dst.split_code(&vd, &d);
+  int vm, m;
+  src.split_code(&vm, &m);
+  emit(0xFU*B28 | B25 | (dt & NeonDataTypeUMask) | B23 | d*B22 |
+        (dt & NeonDataTypeSizeMask)*B19 | vd*B12 | 0xA*B8 | m*B5 | B4 | vm);
 }
 
 
 // Pseudo instructions.
 void Assembler::nop(int type) {
-  // This is mov rx, rx.
-  ASSERT(0 <= type && type <= 14);  // mov pc, pc is not a nop.
+  // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes
+  // some of the CPU's pipeline and has to issue. Older ARM chips simply used
+  // MOV Rx, Rx as NOP and it performs better even in newer CPUs.
+  // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode
+  // a type.
+  DCHECK(0 <= type && type <= 14);  // mov pc, pc isn't a nop.
   emit(al | 13*B21 | type*B12 | type);
 }
 
 
+bool Assembler::IsMovT(Instr instr) {
+  instr &= ~(((kNumberOfConditions - 1) << 28) |  // Mask off conditions
+             ((kNumRegisters-1)*B12) |            // mask out register
+             EncodeMovwImmediate(0xFFFF));        // mask out immediate value
+  return instr == kMovtPattern;
+}
+
+
+bool Assembler::IsMovW(Instr instr) {
+  instr &= ~(((kNumberOfConditions - 1) << 28) |  // Mask off conditions
+             ((kNumRegisters-1)*B12) |            // mask out destination
+             EncodeMovwImmediate(0xFFFF));        // mask out immediate value
+  return instr == kMovwPattern;
+}
+
+
+Instr Assembler::GetMovTPattern() { return kMovtPattern; }
+
+
+Instr Assembler::GetMovWPattern() { return kMovwPattern; }
+
+
+Instr Assembler::EncodeMovwImmediate(uint32_t immediate) {
+  DCHECK(immediate < 0x10000);
+  return ((immediate & 0xf000) << 4) | (immediate & 0xfff);
+}
+
+
+Instr Assembler::PatchMovwImmediate(Instr instruction, uint32_t immediate) {
+  instruction &= ~EncodeMovwImmediate(0xffff);
+  return instruction | EncodeMovwImmediate(immediate);
+}
+
+
+int Assembler::DecodeShiftImm(Instr instr) {
+  int rotate = Instruction::RotateValue(instr) * 2;
+  int immed8 = Instruction::Immed8Value(instr);
+  return (immed8 >> rotate) | (immed8 << (32 - rotate));
+}
+
+
+Instr Assembler::PatchShiftImm(Instr instr, int immed) {
+  uint32_t rotate_imm = 0;
+  uint32_t immed_8 = 0;
+  bool immed_fits = fits_shifter(immed, &rotate_imm, &immed_8, NULL);
+  DCHECK(immed_fits);
+  USE(immed_fits);
+  return (instr & ~kOff12Mask) | (rotate_imm << 8) | immed_8;
+}
+
+
 bool Assembler::IsNop(Instr instr, int type) {
+  DCHECK(0 <= type && type <= 14);  // mov pc, pc isn't a nop.
   // Check for mov rx, rx where x = type.
-  ASSERT(0 <= type && type <= 14);  // mov pc, pc is not a nop.
   return instr == (al | 13*B21 | type*B12 | type);
 }
 
 
+bool Assembler::IsMovImmed(Instr instr) {
+  return (instr & kMovImmedMask) == kMovImmedPattern;
+}
+
+
+bool Assembler::IsOrrImmed(Instr instr) {
+  return (instr & kOrrImmedMask) == kOrrImmedPattern;
+}
+
+
+// static
 bool Assembler::ImmediateFitsAddrMode1Instruction(int32_t imm32) {
   uint32_t dummy1;
   uint32_t dummy2;
@@ -2412,6 +3207,11 @@
 }
 
 
+bool Assembler::ImmediateFitsAddrMode2Instruction(int32_t imm32) {
+  return is_uint12(abs(imm32));
+}
+
+
 // Debugging.
 void Assembler::RecordJSReturn() {
   positions_recorder()->WriteRecordedPositions();
@@ -2435,14 +3235,19 @@
 }
 
 
+void Assembler::RecordConstPool(int size) {
+  // We only need this for debugger support, to correctly compute offsets in the
+  // code.
+  RecordRelocInfo(RelocInfo::CONST_POOL, static_cast<intptr_t>(size));
+}
+
+
 void Assembler::GrowBuffer() {
   if (!own_buffer_) FATAL("external code buffer is too small");
 
   // Compute new buffer size.
   CodeDesc desc;  // the new buffer
-  if (buffer_size_ < 4*KB) {
-    desc.buffer_size = 4*KB;
-  } else if (buffer_size_ < 1*MB) {
+  if (buffer_size_ < 1 * MB) {
     desc.buffer_size = 2*buffer_size_;
   } else {
     desc.buffer_size = buffer_size_ + 1*MB;
@@ -2458,9 +3263,9 @@
   // Copy the data.
   int pc_delta = desc.buffer - buffer_;
   int rc_delta = (desc.buffer + desc.buffer_size) - (buffer_ + buffer_size_);
-  memmove(desc.buffer, buffer_, desc.instr_size);
-  memmove(reloc_info_writer.pos() + rc_delta,
-          reloc_info_writer.pos(), desc.reloc_size);
+  MemMove(desc.buffer, buffer_, desc.instr_size);
+  MemMove(reloc_info_writer.pos() + rc_delta, reloc_info_writer.pos(),
+          desc.reloc_size);
 
   // Switch buffers.
   DeleteArray(buffer_);
@@ -2475,14 +3280,20 @@
   // to relocate any emitted relocation entries.
 
   // Relocate pending relocation entries.
-  for (int i = 0; i < num_pending_reloc_info_; i++) {
-    RelocInfo& rinfo = pending_reloc_info_[i];
-    ASSERT(rinfo.rmode() != RelocInfo::COMMENT &&
+  for (int i = 0; i < num_pending_32_bit_reloc_info_; i++) {
+    RelocInfo& rinfo = pending_32_bit_reloc_info_[i];
+    DCHECK(rinfo.rmode() != RelocInfo::COMMENT &&
            rinfo.rmode() != RelocInfo::POSITION);
     if (rinfo.rmode() != RelocInfo::JS_RETURN) {
       rinfo.set_pc(rinfo.pc() + pc_delta);
     }
   }
+  for (int i = 0; i < num_pending_64_bit_reloc_info_; i++) {
+    RelocInfo& rinfo = pending_64_bit_reloc_info_[i];
+    DCHECK(rinfo.rmode() == RelocInfo::NONE64);
+    rinfo.set_pc(rinfo.pc() + pc_delta);
+  }
+  constant_pool_builder_.Relocate(pc_delta);
 }
 
 
@@ -2490,7 +3301,8 @@
   // No relocation info should be pending while using db. db is used
   // to write pure data with no pointers and the constant pool should
   // be emitted before using db.
-  ASSERT(num_pending_reloc_info_ == 0);
+  DCHECK(num_pending_32_bit_reloc_info_ == 0);
+  DCHECK(num_pending_64_bit_reloc_info_ == 0);
   CheckBuffer();
   *reinterpret_cast<uint8_t*>(pc_) = data;
   pc_ += sizeof(uint8_t);
@@ -2501,48 +3313,41 @@
   // No relocation info should be pending while using dd. dd is used
   // to write pure data with no pointers and the constant pool should
   // be emitted before using dd.
-  ASSERT(num_pending_reloc_info_ == 0);
+  DCHECK(num_pending_32_bit_reloc_info_ == 0);
+  DCHECK(num_pending_64_bit_reloc_info_ == 0);
   CheckBuffer();
   *reinterpret_cast<uint32_t*>(pc_) = data;
   pc_ += sizeof(uint32_t);
 }
 
 
+void Assembler::emit_code_stub_address(Code* stub) {
+  CheckBuffer();
+  *reinterpret_cast<uint32_t*>(pc_) =
+      reinterpret_cast<uint32_t>(stub->instruction_start());
+  pc_ += sizeof(uint32_t);
+}
+
+
 void Assembler::RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data) {
-  // We do not try to reuse pool constants.
   RelocInfo rinfo(pc_, rmode, data, NULL);
-  if (rmode >= RelocInfo::JS_RETURN && rmode <= RelocInfo::DEBUG_BREAK_SLOT) {
-    // Adjust code for new modes.
-    ASSERT(RelocInfo::IsDebugBreakSlot(rmode)
-           || RelocInfo::IsJSReturn(rmode)
-           || RelocInfo::IsComment(rmode)
-           || RelocInfo::IsPosition(rmode));
-    // These modes do not need an entry in the constant pool.
-  } else {
-    ASSERT(num_pending_reloc_info_ < kMaxNumPendingRelocInfo);
-    if (num_pending_reloc_info_ == 0) {
-      first_const_pool_use_ = pc_offset();
-    }
-    pending_reloc_info_[num_pending_reloc_info_++] = rinfo;
-    // Make sure the constant pool is not emitted in place of the next
-    // instruction for which we just recorded relocation info.
-    BlockConstPoolFor(1);
-  }
-  if (rinfo.rmode() != RelocInfo::NONE) {
+  RecordRelocInfo(rinfo);
+}
+
+
+void Assembler::RecordRelocInfo(const RelocInfo& rinfo) {
+  if (!RelocInfo::IsNone(rinfo.rmode())) {
     // Don't record external references unless the heap will be serialized.
-    if (rmode == RelocInfo::EXTERNAL_REFERENCE) {
-#ifdef DEBUG
-      if (!Serializer::enabled()) {
-        Serializer::TooLateToEnableNow();
-      }
-#endif
-      if (!Serializer::enabled() && !emit_debug_code()) {
-        return;
-      }
+    if (rinfo.rmode() == RelocInfo::EXTERNAL_REFERENCE &&
+        !serializer_enabled() && !emit_debug_code()) {
+      return;
     }
-    ASSERT(buffer_space() >= kMaxRelocSize);  // too late to grow buffer here
-    if (rmode == RelocInfo::CODE_TARGET_WITH_ID) {
-      RelocInfo reloc_info_with_ast_id(pc_, rmode, RecordedAstId(), NULL);
+    DCHECK(buffer_space() >= kMaxRelocSize);  // too late to grow buffer here
+    if (rinfo.rmode() == RelocInfo::CODE_TARGET_WITH_ID) {
+      RelocInfo reloc_info_with_ast_id(rinfo.pc(),
+                                       rinfo.rmode(),
+                                       RecordedAstId().ToInt(),
+                                       NULL);
       ClearRecordedAstId();
       reloc_info_writer.Write(&reloc_info_with_ast_id);
     } else {
@@ -2552,13 +3357,51 @@
 }
 
 
+ConstantPoolArray::LayoutSection Assembler::ConstantPoolAddEntry(
+    const RelocInfo& rinfo) {
+  if (FLAG_enable_ool_constant_pool) {
+    return constant_pool_builder_.AddEntry(this, rinfo);
+  } else {
+    if (rinfo.rmode() == RelocInfo::NONE64) {
+      DCHECK(num_pending_64_bit_reloc_info_ < kMaxNumPending64RelocInfo);
+      if (num_pending_64_bit_reloc_info_ == 0) {
+        first_const_pool_64_use_ = pc_offset();
+      }
+      pending_64_bit_reloc_info_[num_pending_64_bit_reloc_info_++] = rinfo;
+    } else {
+      DCHECK(num_pending_32_bit_reloc_info_ < kMaxNumPending32RelocInfo);
+      if (num_pending_32_bit_reloc_info_ == 0) {
+        first_const_pool_32_use_ = pc_offset();
+      }
+      pending_32_bit_reloc_info_[num_pending_32_bit_reloc_info_++] = rinfo;
+    }
+    // Make sure the constant pool is not emitted in place of the next
+    // instruction for which we just recorded relocation info.
+    BlockConstPoolFor(1);
+    return ConstantPoolArray::SMALL_SECTION;
+  }
+}
+
+
 void Assembler::BlockConstPoolFor(int instructions) {
+  if (FLAG_enable_ool_constant_pool) {
+    // Should be a no-op if using an out-of-line constant pool.
+    DCHECK(num_pending_32_bit_reloc_info_ == 0);
+    DCHECK(num_pending_64_bit_reloc_info_ == 0);
+    return;
+  }
+
   int pc_limit = pc_offset() + instructions * kInstrSize;
   if (no_const_pool_before_ < pc_limit) {
-    // If there are some pending entries, the constant pool cannot be blocked
-    // further than first_const_pool_use_ + kMaxDistToPool
-    ASSERT((num_pending_reloc_info_ == 0) ||
-           (pc_limit < (first_const_pool_use_ + kMaxDistToPool)));
+    // Max pool start (if we need a jump and an alignment).
+#ifdef DEBUG
+    int start = pc_limit + kInstrSize + 2 * kPointerSize;
+    DCHECK((num_pending_32_bit_reloc_info_ == 0) ||
+           (start - first_const_pool_32_use_ +
+            num_pending_64_bit_reloc_info_ * kDoubleSize < kMaxDistToIntPool));
+    DCHECK((num_pending_64_bit_reloc_info_ == 0) ||
+           (start - first_const_pool_64_use_ < kMaxDistToFPPool));
+#endif
     no_const_pool_before_ = pc_limit;
   }
 
@@ -2569,46 +3412,86 @@
 
 
 void Assembler::CheckConstPool(bool force_emit, bool require_jump) {
+  if (FLAG_enable_ool_constant_pool) {
+    // Should be a no-op if using an out-of-line constant pool.
+    DCHECK(num_pending_32_bit_reloc_info_ == 0);
+    DCHECK(num_pending_64_bit_reloc_info_ == 0);
+    return;
+  }
+
   // Some short sequence of instruction mustn't be broken up by constant pool
   // emission, such sequences are protected by calls to BlockConstPoolFor and
   // BlockConstPoolScope.
   if (is_const_pool_blocked()) {
     // Something is wrong if emission is forced and blocked at the same time.
-    ASSERT(!force_emit);
+    DCHECK(!force_emit);
     return;
   }
 
   // There is nothing to do if there are no pending constant pool entries.
-  if (num_pending_reloc_info_ == 0)  {
+  if ((num_pending_32_bit_reloc_info_ == 0) &&
+      (num_pending_64_bit_reloc_info_ == 0)) {
     // Calculate the offset of the next check.
     next_buffer_check_ = pc_offset() + kCheckPoolInterval;
     return;
   }
 
-  // We emit a constant pool when:
-  //  * requested to do so by parameter force_emit (e.g. after each function).
-  //  * the distance to the first instruction accessing the constant pool is
-  //    kAvgDistToPool or more.
-  //  * no jump is required and the distance to the first instruction accessing
-  //    the constant pool is at least kMaxDistToPool / 2.
-  ASSERT(first_const_pool_use_ >= 0);
-  int dist = pc_offset() - first_const_pool_use_;
-  if (!force_emit && dist < kAvgDistToPool &&
-      (require_jump || (dist < (kMaxDistToPool / 2)))) {
-    return;
-  }
-
   // Check that the code buffer is large enough before emitting the constant
   // pool (include the jump over the pool and the constant pool marker and
   // the gap to the relocation information).
   int jump_instr = require_jump ? kInstrSize : 0;
-  int needed_space = jump_instr + kInstrSize +
-                     num_pending_reloc_info_ * kInstrSize + kGap;
+  int size_up_to_marker = jump_instr + kInstrSize;
+  int size_after_marker = num_pending_32_bit_reloc_info_ * kPointerSize;
+  bool has_fp_values = (num_pending_64_bit_reloc_info_ > 0);
+  bool require_64_bit_align = false;
+  if (has_fp_values) {
+    require_64_bit_align = (((uintptr_t)pc_ + size_up_to_marker) & 0x7);
+    if (require_64_bit_align) {
+      size_after_marker += kInstrSize;
+    }
+    size_after_marker += num_pending_64_bit_reloc_info_ * kDoubleSize;
+  }
+
+  int size = size_up_to_marker + size_after_marker;
+
+  // We emit a constant pool when:
+  //  * requested to do so by parameter force_emit (e.g. after each function).
+  //  * the distance from the first instruction accessing the constant pool to
+  //    any of the constant pool entries will exceed its limit the next
+  //    time the pool is checked. This is overly restrictive, but we don't emit
+  //    constant pool entries in-order so it's conservatively correct.
+  //  * the instruction doesn't require a jump after itself to jump over the
+  //    constant pool, and we're getting close to running out of range.
+  if (!force_emit) {
+    DCHECK((first_const_pool_32_use_ >= 0) || (first_const_pool_64_use_ >= 0));
+    bool need_emit = false;
+    if (has_fp_values) {
+      int dist64 = pc_offset() +
+                   size -
+                   num_pending_32_bit_reloc_info_ * kPointerSize -
+                   first_const_pool_64_use_;
+      if ((dist64 >= kMaxDistToFPPool - kCheckPoolInterval) ||
+          (!require_jump && (dist64 >= kMaxDistToFPPool / 2))) {
+        need_emit = true;
+      }
+    }
+    int dist32 =
+      pc_offset() + size - first_const_pool_32_use_;
+    if ((dist32 >= kMaxDistToIntPool - kCheckPoolInterval) ||
+        (!require_jump && (dist32 >= kMaxDistToIntPool / 2))) {
+      need_emit = true;
+    }
+    if (!need_emit) return;
+  }
+
+  int needed_space = size + kGap;
   while (buffer_space() <= needed_space) GrowBuffer();
 
   {
     // Block recursive calls to CheckConstPool.
     BlockConstPoolScope block_const_pool(this);
+    RecordComment("[ Constant Pool");
+    RecordConstPool(size);
 
     // Emit jump over constant pool if necessary.
     Label after_pool;
@@ -2616,37 +3499,109 @@
       b(&after_pool);
     }
 
-    RecordComment("[ Constant Pool");
+    // Put down constant pool marker "Undefined instruction".
+    // The data size helps disassembly know what to print.
+    emit(kConstantPoolMarker |
+         EncodeConstantPoolLength(size_after_marker / kPointerSize));
 
-    // Put down constant pool marker "Undefined instruction" as specified by
-    // A5.6 (ARMv7) Instruction set encoding.
-    emit(kConstantPoolMarker | num_pending_reloc_info_);
-
-    // Emit constant pool entries.
-    for (int i = 0; i < num_pending_reloc_info_; i++) {
-      RelocInfo& rinfo = pending_reloc_info_[i];
-      ASSERT(rinfo.rmode() != RelocInfo::COMMENT &&
-             rinfo.rmode() != RelocInfo::POSITION &&
-             rinfo.rmode() != RelocInfo::STATEMENT_POSITION);
-
-      Instr instr = instr_at(rinfo.pc());
-      // Instruction to patch must be 'ldr rd, [pc, #offset]' with offset == 0.
-      ASSERT(IsLdrPcImmediateOffset(instr) &&
-             GetLdrRegisterImmediateOffset(instr) == 0);
-
-      int delta = pc_ - rinfo.pc() - kPcLoadDelta;
-      // 0 is the smallest delta:
-      //   ldr rd, [pc, #0]
-      //   constant pool marker
-      //   data
-      ASSERT(is_uint12(delta));
-
-      instr_at_put(rinfo.pc(), SetLdrRegisterImmediateOffset(instr, delta));
-      emit(rinfo.data());
+    if (require_64_bit_align) {
+      emit(kConstantPoolMarker);
     }
 
-    num_pending_reloc_info_ = 0;
-    first_const_pool_use_ = -1;
+    // Emit 64-bit constant pool entries first: their range is smaller than
+    // 32-bit entries.
+    for (int i = 0; i < num_pending_64_bit_reloc_info_; i++) {
+      RelocInfo& rinfo = pending_64_bit_reloc_info_[i];
+
+      DCHECK(!((uintptr_t)pc_ & 0x7));  // Check 64-bit alignment.
+
+      Instr instr = instr_at(rinfo.pc());
+      // Instruction to patch must be 'vldr rd, [pc, #offset]' with offset == 0.
+      DCHECK((IsVldrDPcImmediateOffset(instr) &&
+              GetVldrDRegisterImmediateOffset(instr) == 0));
+
+      int delta = pc_ - rinfo.pc() - kPcLoadDelta;
+      DCHECK(is_uint10(delta));
+
+      bool found = false;
+      uint64_t value = rinfo.raw_data64();
+      for (int j = 0; j < i; j++) {
+        RelocInfo& rinfo2 = pending_64_bit_reloc_info_[j];
+        if (value == rinfo2.raw_data64()) {
+          found = true;
+          DCHECK(rinfo2.rmode() == RelocInfo::NONE64);
+          Instr instr2 = instr_at(rinfo2.pc());
+          DCHECK(IsVldrDPcImmediateOffset(instr2));
+          delta = GetVldrDRegisterImmediateOffset(instr2);
+          delta += rinfo2.pc() - rinfo.pc();
+          break;
+        }
+      }
+
+      instr_at_put(rinfo.pc(), SetVldrDRegisterImmediateOffset(instr, delta));
+
+      if (!found) {
+        uint64_t uint_data = rinfo.raw_data64();
+        emit(uint_data & 0xFFFFFFFF);
+        emit(uint_data >> 32);
+      }
+    }
+
+    // Emit 32-bit constant pool entries.
+    for (int i = 0; i < num_pending_32_bit_reloc_info_; i++) {
+      RelocInfo& rinfo = pending_32_bit_reloc_info_[i];
+      DCHECK(rinfo.rmode() != RelocInfo::COMMENT &&
+             rinfo.rmode() != RelocInfo::POSITION &&
+             rinfo.rmode() != RelocInfo::STATEMENT_POSITION &&
+             rinfo.rmode() != RelocInfo::CONST_POOL &&
+             rinfo.rmode() != RelocInfo::NONE64);
+
+      Instr instr = instr_at(rinfo.pc());
+
+      // 64-bit loads shouldn't get here.
+      DCHECK(!IsVldrDPcImmediateOffset(instr));
+
+      if (IsLdrPcImmediateOffset(instr) &&
+          GetLdrRegisterImmediateOffset(instr) == 0) {
+        int delta = pc_ - rinfo.pc() - kPcLoadDelta;
+        DCHECK(is_uint12(delta));
+        // 0 is the smallest delta:
+        //   ldr rd, [pc, #0]
+        //   constant pool marker
+        //   data
+
+        bool found = false;
+        if (!serializer_enabled() && rinfo.rmode() >= RelocInfo::CELL) {
+          for (int j = 0; j < i; j++) {
+            RelocInfo& rinfo2 = pending_32_bit_reloc_info_[j];
+
+            if ((rinfo2.data() == rinfo.data()) &&
+                (rinfo2.rmode() == rinfo.rmode())) {
+              Instr instr2 = instr_at(rinfo2.pc());
+              if (IsLdrPcImmediateOffset(instr2)) {
+                delta = GetLdrRegisterImmediateOffset(instr2);
+                delta += rinfo2.pc() - rinfo.pc();
+                found = true;
+                break;
+              }
+            }
+          }
+        }
+
+        instr_at_put(rinfo.pc(), SetLdrRegisterImmediateOffset(instr, delta));
+
+        if (!found) {
+          emit(rinfo.data());
+        }
+      } else {
+        DCHECK(IsMovW(instr));
+      }
+    }
+
+    num_pending_32_bit_reloc_info_ = 0;
+    num_pending_64_bit_reloc_info_ = 0;
+    first_const_pool_32_use_ = -1;
+    first_const_pool_64_use_ = -1;
 
     RecordComment("]");
 
@@ -2661,6 +3616,229 @@
 }
 
 
+Handle<ConstantPoolArray> Assembler::NewConstantPool(Isolate* isolate) {
+  if (!FLAG_enable_ool_constant_pool) {
+    return isolate->factory()->empty_constant_pool_array();
+  }
+  return constant_pool_builder_.New(isolate);
+}
+
+
+void Assembler::PopulateConstantPool(ConstantPoolArray* constant_pool) {
+  constant_pool_builder_.Populate(this, constant_pool);
+}
+
+
+ConstantPoolBuilder::ConstantPoolBuilder()
+    : entries_(), current_section_(ConstantPoolArray::SMALL_SECTION) {}
+
+
+bool ConstantPoolBuilder::IsEmpty() {
+  return entries_.size() == 0;
+}
+
+
+ConstantPoolArray::Type ConstantPoolBuilder::GetConstantPoolType(
+    RelocInfo::Mode rmode) {
+  if (rmode == RelocInfo::NONE64) {
+    return ConstantPoolArray::INT64;
+  } else if (!RelocInfo::IsGCRelocMode(rmode)) {
+    return ConstantPoolArray::INT32;
+  } else if (RelocInfo::IsCodeTarget(rmode)) {
+    return ConstantPoolArray::CODE_PTR;
+  } else {
+    DCHECK(RelocInfo::IsGCRelocMode(rmode) && !RelocInfo::IsCodeTarget(rmode));
+    return ConstantPoolArray::HEAP_PTR;
+  }
+}
+
+
+ConstantPoolArray::LayoutSection ConstantPoolBuilder::AddEntry(
+    Assembler* assm, const RelocInfo& rinfo) {
+  RelocInfo::Mode rmode = rinfo.rmode();
+  DCHECK(rmode != RelocInfo::COMMENT &&
+         rmode != RelocInfo::POSITION &&
+         rmode != RelocInfo::STATEMENT_POSITION &&
+         rmode != RelocInfo::CONST_POOL);
+
+  // Try to merge entries which won't be patched.
+  int merged_index = -1;
+  ConstantPoolArray::LayoutSection entry_section = current_section_;
+  if (RelocInfo::IsNone(rmode) ||
+      (!assm->serializer_enabled() && (rmode >= RelocInfo::CELL))) {
+    size_t i;
+    std::vector<ConstantPoolEntry>::const_iterator it;
+    for (it = entries_.begin(), i = 0; it != entries_.end(); it++, i++) {
+      if (RelocInfo::IsEqual(rinfo, it->rinfo_)) {
+        // Merge with found entry.
+        merged_index = i;
+        entry_section = entries_[i].section_;
+        break;
+      }
+    }
+  }
+  DCHECK(entry_section <= current_section_);
+  entries_.push_back(ConstantPoolEntry(rinfo, entry_section, merged_index));
+
+  if (merged_index == -1) {
+    // Not merged, so update the appropriate count.
+    number_of_entries_[entry_section].increment(GetConstantPoolType(rmode));
+  }
+
+  // Check if we still have room for another entry in the small section
+  // given Arm's ldr and vldr immediate offset range.
+  if (current_section_ == ConstantPoolArray::SMALL_SECTION &&
+      !(is_uint12(ConstantPoolArray::SizeFor(*small_entries())) &&
+        is_uint10(ConstantPoolArray::MaxInt64Offset(
+            small_entries()->count_of(ConstantPoolArray::INT64))))) {
+    current_section_ = ConstantPoolArray::EXTENDED_SECTION;
+  }
+  return entry_section;
+}
+
+
+void ConstantPoolBuilder::Relocate(int pc_delta) {
+  for (std::vector<ConstantPoolEntry>::iterator entry = entries_.begin();
+       entry != entries_.end(); entry++) {
+    DCHECK(entry->rinfo_.rmode() != RelocInfo::JS_RETURN);
+    entry->rinfo_.set_pc(entry->rinfo_.pc() + pc_delta);
+  }
+}
+
+
+Handle<ConstantPoolArray> ConstantPoolBuilder::New(Isolate* isolate) {
+  if (IsEmpty()) {
+    return isolate->factory()->empty_constant_pool_array();
+  } else if (extended_entries()->is_empty()) {
+    return isolate->factory()->NewConstantPoolArray(*small_entries());
+  } else {
+    DCHECK(current_section_ == ConstantPoolArray::EXTENDED_SECTION);
+    return isolate->factory()->NewExtendedConstantPoolArray(
+        *small_entries(), *extended_entries());
+  }
+}
+
+
+void ConstantPoolBuilder::Populate(Assembler* assm,
+                                   ConstantPoolArray* constant_pool) {
+  DCHECK_EQ(extended_entries()->is_empty(),
+            !constant_pool->is_extended_layout());
+  DCHECK(small_entries()->equals(ConstantPoolArray::NumberOfEntries(
+      constant_pool, ConstantPoolArray::SMALL_SECTION)));
+  if (constant_pool->is_extended_layout()) {
+    DCHECK(extended_entries()->equals(ConstantPoolArray::NumberOfEntries(
+        constant_pool, ConstantPoolArray::EXTENDED_SECTION)));
+  }
+
+  // Set up initial offsets.
+  int offsets[ConstantPoolArray::NUMBER_OF_LAYOUT_SECTIONS]
+             [ConstantPoolArray::NUMBER_OF_TYPES];
+  for (int section = 0; section <= constant_pool->final_section(); section++) {
+    int section_start = (section == ConstantPoolArray::EXTENDED_SECTION)
+                            ? small_entries()->total_count()
+                            : 0;
+    for (int i = 0; i < ConstantPoolArray::NUMBER_OF_TYPES; i++) {
+      ConstantPoolArray::Type type = static_cast<ConstantPoolArray::Type>(i);
+      if (number_of_entries_[section].count_of(type) != 0) {
+        offsets[section][type] = constant_pool->OffsetOfElementAt(
+            number_of_entries_[section].base_of(type) + section_start);
+      }
+    }
+  }
+
+  for (std::vector<ConstantPoolEntry>::iterator entry = entries_.begin();
+       entry != entries_.end(); entry++) {
+    RelocInfo rinfo = entry->rinfo_;
+    RelocInfo::Mode rmode = entry->rinfo_.rmode();
+    ConstantPoolArray::Type type = GetConstantPoolType(rmode);
+
+    // Update constant pool if necessary and get the entry's offset.
+    int offset;
+    if (entry->merged_index_ == -1) {
+      offset = offsets[entry->section_][type];
+      offsets[entry->section_][type] += ConstantPoolArray::entry_size(type);
+      if (type == ConstantPoolArray::INT64) {
+        constant_pool->set_at_offset(offset, rinfo.data64());
+      } else if (type == ConstantPoolArray::INT32) {
+        constant_pool->set_at_offset(offset,
+                                     static_cast<int32_t>(rinfo.data()));
+      } else if (type == ConstantPoolArray::CODE_PTR) {
+        constant_pool->set_at_offset(offset,
+                                     reinterpret_cast<Address>(rinfo.data()));
+      } else {
+        DCHECK(type == ConstantPoolArray::HEAP_PTR);
+        constant_pool->set_at_offset(offset,
+                                     reinterpret_cast<Object*>(rinfo.data()));
+      }
+      offset -= kHeapObjectTag;
+      entry->merged_index_ = offset;  // Stash offset for merged entries.
+    } else {
+      DCHECK(entry->merged_index_ < (entry - entries_.begin()));
+      offset = entries_[entry->merged_index_].merged_index_;
+    }
+
+    // Patch vldr/ldr instruction with correct offset.
+    Instr instr = assm->instr_at(rinfo.pc());
+    if (entry->section_ == ConstantPoolArray::EXTENDED_SECTION) {
+      if (CpuFeatures::IsSupported(ARMv7)) {
+        // Instructions to patch must be 'movw rd, [#0]' and 'movt rd, [#0].
+        Instr next_instr = assm->instr_at(rinfo.pc() + Assembler::kInstrSize);
+        DCHECK((Assembler::IsMovW(instr) &&
+                Instruction::ImmedMovwMovtValue(instr) == 0));
+        DCHECK((Assembler::IsMovT(next_instr) &&
+                Instruction::ImmedMovwMovtValue(next_instr) == 0));
+        assm->instr_at_put(
+            rinfo.pc(), Assembler::PatchMovwImmediate(instr, offset & 0xffff));
+        assm->instr_at_put(
+            rinfo.pc() + Assembler::kInstrSize,
+            Assembler::PatchMovwImmediate(next_instr, offset >> 16));
+      } else {
+        // Instructions to patch must be 'mov rd, [#0]' and 'orr rd, rd, [#0].
+        Instr instr_2 = assm->instr_at(rinfo.pc() + Assembler::kInstrSize);
+        Instr instr_3 = assm->instr_at(rinfo.pc() + 2 * Assembler::kInstrSize);
+        Instr instr_4 = assm->instr_at(rinfo.pc() + 3 * Assembler::kInstrSize);
+        DCHECK((Assembler::IsMovImmed(instr) &&
+                Instruction::Immed8Value(instr) == 0));
+        DCHECK((Assembler::IsOrrImmed(instr_2) &&
+                Instruction::Immed8Value(instr_2) == 0) &&
+               Assembler::GetRn(instr_2).is(Assembler::GetRd(instr_2)));
+        DCHECK((Assembler::IsOrrImmed(instr_3) &&
+                Instruction::Immed8Value(instr_3) == 0) &&
+               Assembler::GetRn(instr_3).is(Assembler::GetRd(instr_3)));
+        DCHECK((Assembler::IsOrrImmed(instr_4) &&
+                Instruction::Immed8Value(instr_4) == 0) &&
+               Assembler::GetRn(instr_4).is(Assembler::GetRd(instr_4)));
+        assm->instr_at_put(
+            rinfo.pc(), Assembler::PatchShiftImm(instr, (offset & kImm8Mask)));
+        assm->instr_at_put(
+            rinfo.pc() + Assembler::kInstrSize,
+            Assembler::PatchShiftImm(instr_2, (offset & (kImm8Mask << 8))));
+        assm->instr_at_put(
+            rinfo.pc() + 2 * Assembler::kInstrSize,
+            Assembler::PatchShiftImm(instr_3, (offset & (kImm8Mask << 16))));
+        assm->instr_at_put(
+            rinfo.pc() + 3 * Assembler::kInstrSize,
+            Assembler::PatchShiftImm(instr_4, (offset & (kImm8Mask << 24))));
+      }
+    } else if (type == ConstantPoolArray::INT64) {
+      // Instruction to patch must be 'vldr rd, [pp, #0]'.
+      DCHECK((Assembler::IsVldrDPpImmediateOffset(instr) &&
+              Assembler::GetVldrDRegisterImmediateOffset(instr) == 0));
+      DCHECK(is_uint10(offset));
+      assm->instr_at_put(rinfo.pc(), Assembler::SetVldrDRegisterImmediateOffset(
+                                         instr, offset));
+    } else {
+      // Instruction to patch must be 'ldr rd, [pp, #0]'.
+      DCHECK((Assembler::IsLdrPpImmediateOffset(instr) &&
+              Assembler::GetLdrRegisterImmediateOffset(instr) == 0));
+      DCHECK(is_uint12(offset));
+      assm->instr_at_put(
+          rinfo.pc(), Assembler::SetLdrRegisterImmediateOffset(instr, offset));
+    }
+  }
+}
+
+
 } }  // namespace v8::internal
 
 #endif  // V8_TARGET_ARCH_ARM