Upgrade to 3.29

Update V8 to 3.29.88.17 and update makefiles to support building on
all the relevant platforms.

Bug: 17370214

Change-Id: Ia3407c157fd8d72a93e23d8318ccaf6ecf77fa4e
diff --git a/src/ia32/codegen-ia32.cc b/src/ia32/codegen-ia32.cc
index ea61910..52cf72b 100644
--- a/src/ia32/codegen-ia32.cc
+++ b/src/ia32/codegen-ia32.cc
@@ -1,37 +1,14 @@
 // Copyright 2012 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-//       notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-//       copyright notice, this list of conditions and the following
-//       disclaimer in the documentation and/or other materials provided
-//       with the distribution.
-//     * Neither the name of Google Inc. nor the names of its
-//       contributors may be used to endorse or promote products derived
-//       from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
 
-#include "v8.h"
+#include "src/v8.h"
 
-#if defined(V8_TARGET_ARCH_IA32)
+#if V8_TARGET_ARCH_IA32
 
-#include "codegen.h"
-#include "heap.h"
-#include "macro-assembler.h"
+#include "src/codegen.h"
+#include "src/heap/heap.h"
+#include "src/macro-assembler.h"
 
 namespace v8 {
 namespace internal {
@@ -42,14 +19,14 @@
 
 void StubRuntimeCallHelper::BeforeCall(MacroAssembler* masm) const {
   masm->EnterFrame(StackFrame::INTERNAL);
-  ASSERT(!masm->has_frame());
+  DCHECK(!masm->has_frame());
   masm->set_has_frame(true);
 }
 
 
 void StubRuntimeCallHelper::AfterCall(MacroAssembler* masm) const {
   masm->LeaveFrame(StackFrame::INTERNAL);
-  ASSERT(masm->has_frame());
+  DCHECK(masm->has_frame());
   masm->set_has_frame(false);
 }
 
@@ -57,47 +34,39 @@
 #define __ masm.
 
 
-UnaryMathFunction CreateTranscendentalFunction(TranscendentalCache::Type type) {
+UnaryMathFunction CreateExpFunction() {
+  if (!FLAG_fast_math) return &std::exp;
   size_t actual_size;
-  // Allocate buffer in executable space.
-  byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB,
-                                                 &actual_size,
-                                                 true));
-  if (buffer == NULL) {
-    // Fallback to library function if function cannot be created.
-    switch (type) {
-      case TranscendentalCache::SIN: return &sin;
-      case TranscendentalCache::COS: return &cos;
-      case TranscendentalCache::TAN: return &tan;
-      case TranscendentalCache::LOG: return &log;
-      default: UNIMPLEMENTED();
-    }
-  }
+  byte* buffer =
+      static_cast<byte*>(base::OS::Allocate(1 * KB, &actual_size, true));
+  if (buffer == NULL) return &std::exp;
+  ExternalReference::InitializeMathExpData();
 
   MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
   // esp[1 * kPointerSize]: raw double input
   // esp[0 * kPointerSize]: return address
-  // Move double input into registers.
+  {
+    XMMRegister input = xmm1;
+    XMMRegister result = xmm2;
+    __ movsd(input, Operand(esp, 1 * kPointerSize));
+    __ push(eax);
+    __ push(ebx);
 
-  __ push(ebx);
-  __ push(edx);
-  __ push(edi);
-  __ fld_d(Operand(esp, 4 * kPointerSize));
-  __ mov(ebx, Operand(esp, 4 * kPointerSize));
-  __ mov(edx, Operand(esp, 5 * kPointerSize));
-  TranscendentalCacheStub::GenerateOperation(&masm, type);
-  // The return value is expected to be on ST(0) of the FPU stack.
-  __ pop(edi);
-  __ pop(edx);
-  __ pop(ebx);
-  __ Ret();
+    MathExpGenerator::EmitMathExp(&masm, input, result, xmm0, eax, ebx);
+
+    __ pop(ebx);
+    __ pop(eax);
+    __ movsd(Operand(esp, 1 * kPointerSize), result);
+    __ fld_d(Operand(esp, 1 * kPointerSize));
+    __ Ret();
+  }
 
   CodeDesc desc;
   masm.GetCode(&desc);
-  ASSERT(desc.reloc_size == 0);
+  DCHECK(!RelocInfo::RequiresRelocation(desc));
 
-  CPU::FlushICache(buffer, actual_size);
-  OS::ProtectCode(buffer, actual_size);
+  CpuFeatures::FlushICache(buffer, actual_size);
+  base::OS::ProtectCode(buffer, actual_size);
   return FUNCTION_CAST<UnaryMathFunction>(buffer);
 }
 
@@ -105,21 +74,17 @@
 UnaryMathFunction CreateSqrtFunction() {
   size_t actual_size;
   // Allocate buffer in executable space.
-  byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB,
-                                                 &actual_size,
-                                                 true));
-  // If SSE2 is not available, we can use libc's implementation to ensure
-  // consistency since code by fullcodegen's calls into runtime in that case.
-  if (buffer == NULL || !CpuFeatures::IsSupported(SSE2)) return &sqrt;
+  byte* buffer =
+      static_cast<byte*>(base::OS::Allocate(1 * KB, &actual_size, true));
+  if (buffer == NULL) return &std::sqrt;
   MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
   // esp[1 * kPointerSize]: raw double input
   // esp[0 * kPointerSize]: return address
   // Move double input into registers.
   {
-    CpuFeatures::Scope use_sse2(SSE2);
-    __ movdbl(xmm0, Operand(esp, 1 * kPointerSize));
+    __ movsd(xmm0, Operand(esp, 1 * kPointerSize));
     __ sqrtsd(xmm0, xmm0);
-    __ movdbl(Operand(esp, 1 * kPointerSize), xmm0);
+    __ movsd(Operand(esp, 1 * kPointerSize), xmm0);
     // Load result into floating point register as return value.
     __ fld_d(Operand(esp, 1 * kPointerSize));
     __ Ret();
@@ -127,29 +92,110 @@
 
   CodeDesc desc;
   masm.GetCode(&desc);
-  ASSERT(desc.reloc_size == 0);
+  DCHECK(!RelocInfo::RequiresRelocation(desc));
 
-  CPU::FlushICache(buffer, actual_size);
-  OS::ProtectCode(buffer, actual_size);
+  CpuFeatures::FlushICache(buffer, actual_size);
+  base::OS::ProtectCode(buffer, actual_size);
   return FUNCTION_CAST<UnaryMathFunction>(buffer);
 }
 
 
-static void MemCopyWrapper(void* dest, const void* src, size_t size) {
-  memcpy(dest, src, size);
+// Helper functions for CreateMemMoveFunction.
+#undef __
+#define __ ACCESS_MASM(masm)
+
+enum Direction { FORWARD, BACKWARD };
+enum Alignment { MOVE_ALIGNED, MOVE_UNALIGNED };
+
+// Expects registers:
+// esi - source, aligned if alignment == ALIGNED
+// edi - destination, always aligned
+// ecx - count (copy size in bytes)
+// edx - loop count (number of 64 byte chunks)
+void MemMoveEmitMainLoop(MacroAssembler* masm,
+                         Label* move_last_15,
+                         Direction direction,
+                         Alignment alignment) {
+  Register src = esi;
+  Register dst = edi;
+  Register count = ecx;
+  Register loop_count = edx;
+  Label loop, move_last_31, move_last_63;
+  __ cmp(loop_count, 0);
+  __ j(equal, &move_last_63);
+  __ bind(&loop);
+  // Main loop. Copy in 64 byte chunks.
+  if (direction == BACKWARD) __ sub(src, Immediate(0x40));
+  __ movdq(alignment == MOVE_ALIGNED, xmm0, Operand(src, 0x00));
+  __ movdq(alignment == MOVE_ALIGNED, xmm1, Operand(src, 0x10));
+  __ movdq(alignment == MOVE_ALIGNED, xmm2, Operand(src, 0x20));
+  __ movdq(alignment == MOVE_ALIGNED, xmm3, Operand(src, 0x30));
+  if (direction == FORWARD) __ add(src, Immediate(0x40));
+  if (direction == BACKWARD) __ sub(dst, Immediate(0x40));
+  __ movdqa(Operand(dst, 0x00), xmm0);
+  __ movdqa(Operand(dst, 0x10), xmm1);
+  __ movdqa(Operand(dst, 0x20), xmm2);
+  __ movdqa(Operand(dst, 0x30), xmm3);
+  if (direction == FORWARD) __ add(dst, Immediate(0x40));
+  __ dec(loop_count);
+  __ j(not_zero, &loop);
+  // At most 63 bytes left to copy.
+  __ bind(&move_last_63);
+  __ test(count, Immediate(0x20));
+  __ j(zero, &move_last_31);
+  if (direction == BACKWARD) __ sub(src, Immediate(0x20));
+  __ movdq(alignment == MOVE_ALIGNED, xmm0, Operand(src, 0x00));
+  __ movdq(alignment == MOVE_ALIGNED, xmm1, Operand(src, 0x10));
+  if (direction == FORWARD) __ add(src, Immediate(0x20));
+  if (direction == BACKWARD) __ sub(dst, Immediate(0x20));
+  __ movdqa(Operand(dst, 0x00), xmm0);
+  __ movdqa(Operand(dst, 0x10), xmm1);
+  if (direction == FORWARD) __ add(dst, Immediate(0x20));
+  // At most 31 bytes left to copy.
+  __ bind(&move_last_31);
+  __ test(count, Immediate(0x10));
+  __ j(zero, move_last_15);
+  if (direction == BACKWARD) __ sub(src, Immediate(0x10));
+  __ movdq(alignment == MOVE_ALIGNED, xmm0, Operand(src, 0));
+  if (direction == FORWARD) __ add(src, Immediate(0x10));
+  if (direction == BACKWARD) __ sub(dst, Immediate(0x10));
+  __ movdqa(Operand(dst, 0), xmm0);
+  if (direction == FORWARD) __ add(dst, Immediate(0x10));
 }
 
 
-OS::MemCopyFunction CreateMemCopyFunction() {
+void MemMoveEmitPopAndReturn(MacroAssembler* masm) {
+  __ pop(esi);
+  __ pop(edi);
+  __ ret(0);
+}
+
+
+#undef __
+#define __ masm.
+
+
+class LabelConverter {
+ public:
+  explicit LabelConverter(byte* buffer) : buffer_(buffer) {}
+  int32_t address(Label* l) const {
+    return reinterpret_cast<int32_t>(buffer_) + l->pos();
+  }
+ private:
+  byte* buffer_;
+};
+
+
+MemMoveFunction CreateMemMoveFunction() {
   size_t actual_size;
   // Allocate buffer in executable space.
-  byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB,
-                                                 &actual_size,
-                                                 true));
-  if (buffer == NULL) return &MemCopyWrapper;
+  byte* buffer =
+      static_cast<byte*>(base::OS::Allocate(1 * KB, &actual_size, true));
+  if (buffer == NULL) return NULL;
   MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
+  LabelConverter conv(buffer);
 
-  // Generated code is put into a fixed, unmovable, buffer, and not into
+  // Generated code is put into a fixed, unmovable buffer, and not into
   // the V8 heap. We can't, and don't, refer to any relocatable addresses
   // (e.g. the JavaScript nan-object).
 
@@ -165,31 +211,53 @@
   const int kSourceOffset = 2 * kPointerSize;
   const int kSizeOffset = 3 * kPointerSize;
 
+  // When copying up to this many bytes, use special "small" handlers.
+  const size_t kSmallCopySize = 8;
+  // When copying up to this many bytes, use special "medium" handlers.
+  const size_t kMediumCopySize = 63;
+  // When non-overlapping region of src and dst is less than this,
+  // use a more careful implementation (slightly slower).
+  const size_t kMinMoveDistance = 16;
+  // Note that these values are dictated by the implementation below,
+  // do not just change them and hope things will work!
+
   int stack_offset = 0;  // Update if we change the stack height.
 
-  if (FLAG_debug_code) {
-    __ cmp(Operand(esp, kSizeOffset + stack_offset),
-           Immediate(OS::kMinComplexMemCopy));
-    Label ok;
-    __ j(greater_equal, &ok);
-    __ int3();
-    __ bind(&ok);
-  }
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatures::Scope enable(SSE2);
-    __ push(edi);
-    __ push(esi);
-    stack_offset += 2 * kPointerSize;
-    Register dst = edi;
-    Register src = esi;
-    Register count = ecx;
-    __ mov(dst, Operand(esp, stack_offset + kDestinationOffset));
-    __ mov(src, Operand(esp, stack_offset + kSourceOffset));
-    __ mov(count, Operand(esp, stack_offset + kSizeOffset));
+  Label backward, backward_much_overlap;
+  Label forward_much_overlap, small_size, medium_size, pop_and_return;
+  __ push(edi);
+  __ push(esi);
+  stack_offset += 2 * kPointerSize;
+  Register dst = edi;
+  Register src = esi;
+  Register count = ecx;
+  Register loop_count = edx;
+  __ mov(dst, Operand(esp, stack_offset + kDestinationOffset));
+  __ mov(src, Operand(esp, stack_offset + kSourceOffset));
+  __ mov(count, Operand(esp, stack_offset + kSizeOffset));
 
+  __ cmp(dst, src);
+  __ j(equal, &pop_and_return);
 
+  __ prefetch(Operand(src, 0), 1);
+  __ cmp(count, kSmallCopySize);
+  __ j(below_equal, &small_size);
+  __ cmp(count, kMediumCopySize);
+  __ j(below_equal, &medium_size);
+  __ cmp(dst, src);
+  __ j(above, &backward);
+
+  {
+    // |dst| is a lower address than |src|. Copy front-to-back.
+    Label unaligned_source, move_last_15, skip_last_move;
+    __ mov(eax, src);
+    __ sub(eax, dst);
+    __ cmp(eax, kMinMoveDistance);
+    __ j(below, &forward_much_overlap);
+    // Copy first 16 bytes.
     __ movdqu(xmm0, Operand(src, 0));
     __ movdqu(Operand(dst, 0), xmm0);
+    // Determine distance to alignment: 16 - (dst & 0xF).
     __ mov(edx, dst);
     __ and_(edx, 0xF);
     __ neg(edx);
@@ -197,153 +265,254 @@
     __ add(dst, edx);
     __ add(src, edx);
     __ sub(count, edx);
-
-    // edi is now aligned. Check if esi is also aligned.
-    Label unaligned_source;
-    __ test(src, Immediate(0x0F));
+    // dst is now aligned. Main copy loop.
+    __ mov(loop_count, count);
+    __ shr(loop_count, 6);
+    // Check if src is also aligned.
+    __ test(src, Immediate(0xF));
     __ j(not_zero, &unaligned_source);
-    {
-      // Copy loop for aligned source and destination.
-      __ mov(edx, count);
-      Register loop_count = ecx;
-      Register count = edx;
-      __ shr(loop_count, 5);
-      {
-        // Main copy loop.
-        Label loop;
-        __ bind(&loop);
-        __ prefetch(Operand(src, 0x20), 1);
-        __ movdqa(xmm0, Operand(src, 0x00));
-        __ movdqa(xmm1, Operand(src, 0x10));
-        __ add(src, Immediate(0x20));
+    // Copy loop for aligned source and destination.
+    MemMoveEmitMainLoop(&masm, &move_last_15, FORWARD, MOVE_ALIGNED);
+    // At most 15 bytes to copy. Copy 16 bytes at end of string.
+    __ bind(&move_last_15);
+    __ and_(count, 0xF);
+    __ j(zero, &skip_last_move, Label::kNear);
+    __ movdqu(xmm0, Operand(src, count, times_1, -0x10));
+    __ movdqu(Operand(dst, count, times_1, -0x10), xmm0);
+    __ bind(&skip_last_move);
+    MemMoveEmitPopAndReturn(&masm);
 
-        __ movdqa(Operand(dst, 0x00), xmm0);
-        __ movdqa(Operand(dst, 0x10), xmm1);
-        __ add(dst, Immediate(0x20));
+    // Copy loop for unaligned source and aligned destination.
+    __ bind(&unaligned_source);
+    MemMoveEmitMainLoop(&masm, &move_last_15, FORWARD, MOVE_UNALIGNED);
+    __ jmp(&move_last_15);
 
-        __ dec(loop_count);
-        __ j(not_zero, &loop);
-      }
+    // Less than kMinMoveDistance offset between dst and src.
+    Label loop_until_aligned, last_15_much_overlap;
+    __ bind(&loop_until_aligned);
+    __ mov_b(eax, Operand(src, 0));
+    __ inc(src);
+    __ mov_b(Operand(dst, 0), eax);
+    __ inc(dst);
+    __ dec(count);
+    __ bind(&forward_much_overlap);  // Entry point into this block.
+    __ test(dst, Immediate(0xF));
+    __ j(not_zero, &loop_until_aligned);
+    // dst is now aligned, src can't be. Main copy loop.
+    __ mov(loop_count, count);
+    __ shr(loop_count, 6);
+    MemMoveEmitMainLoop(&masm, &last_15_much_overlap,
+                        FORWARD, MOVE_UNALIGNED);
+    __ bind(&last_15_much_overlap);
+    __ and_(count, 0xF);
+    __ j(zero, &pop_and_return);
+    __ cmp(count, kSmallCopySize);
+    __ j(below_equal, &small_size);
+    __ jmp(&medium_size);
+  }
 
-      // At most 31 bytes to copy.
-      Label move_less_16;
-      __ test(count, Immediate(0x10));
-      __ j(zero, &move_less_16);
-      __ movdqa(xmm0, Operand(src, 0));
-      __ add(src, Immediate(0x10));
-      __ movdqa(Operand(dst, 0), xmm0);
-      __ add(dst, Immediate(0x10));
-      __ bind(&move_less_16);
+  {
+    // |dst| is a higher address than |src|. Copy backwards.
+    Label unaligned_source, move_first_15, skip_last_move;
+    __ bind(&backward);
+    // |dst| and |src| always point to the end of what's left to copy.
+    __ add(dst, count);
+    __ add(src, count);
+    __ mov(eax, dst);
+    __ sub(eax, src);
+    __ cmp(eax, kMinMoveDistance);
+    __ j(below, &backward_much_overlap);
+    // Copy last 16 bytes.
+    __ movdqu(xmm0, Operand(src, -0x10));
+    __ movdqu(Operand(dst, -0x10), xmm0);
+    // Find distance to alignment: dst & 0xF
+    __ mov(edx, dst);
+    __ and_(edx, 0xF);
+    __ sub(dst, edx);
+    __ sub(src, edx);
+    __ sub(count, edx);
+    // dst is now aligned. Main copy loop.
+    __ mov(loop_count, count);
+    __ shr(loop_count, 6);
+    // Check if src is also aligned.
+    __ test(src, Immediate(0xF));
+    __ j(not_zero, &unaligned_source);
+    // Copy loop for aligned source and destination.
+    MemMoveEmitMainLoop(&masm, &move_first_15, BACKWARD, MOVE_ALIGNED);
+    // At most 15 bytes to copy. Copy 16 bytes at beginning of string.
+    __ bind(&move_first_15);
+    __ and_(count, 0xF);
+    __ j(zero, &skip_last_move, Label::kNear);
+    __ sub(src, count);
+    __ sub(dst, count);
+    __ movdqu(xmm0, Operand(src, 0));
+    __ movdqu(Operand(dst, 0), xmm0);
+    __ bind(&skip_last_move);
+    MemMoveEmitPopAndReturn(&masm);
 
-      // At most 15 bytes to copy. Copy 16 bytes at end of string.
-      __ and_(count, 0xF);
-      __ movdqu(xmm0, Operand(src, count, times_1, -0x10));
-      __ movdqu(Operand(dst, count, times_1, -0x10), xmm0);
+    // Copy loop for unaligned source and aligned destination.
+    __ bind(&unaligned_source);
+    MemMoveEmitMainLoop(&masm, &move_first_15, BACKWARD, MOVE_UNALIGNED);
+    __ jmp(&move_first_15);
 
-      __ mov(eax, Operand(esp, stack_offset + kDestinationOffset));
-      __ pop(esi);
-      __ pop(edi);
-      __ ret(0);
+    // Less than kMinMoveDistance offset between dst and src.
+    Label loop_until_aligned, first_15_much_overlap;
+    __ bind(&loop_until_aligned);
+    __ dec(src);
+    __ dec(dst);
+    __ mov_b(eax, Operand(src, 0));
+    __ mov_b(Operand(dst, 0), eax);
+    __ dec(count);
+    __ bind(&backward_much_overlap);  // Entry point into this block.
+    __ test(dst, Immediate(0xF));
+    __ j(not_zero, &loop_until_aligned);
+    // dst is now aligned, src can't be. Main copy loop.
+    __ mov(loop_count, count);
+    __ shr(loop_count, 6);
+    MemMoveEmitMainLoop(&masm, &first_15_much_overlap,
+                        BACKWARD, MOVE_UNALIGNED);
+    __ bind(&first_15_much_overlap);
+    __ and_(count, 0xF);
+    __ j(zero, &pop_and_return);
+    // Small/medium handlers expect dst/src to point to the beginning.
+    __ sub(dst, count);
+    __ sub(src, count);
+    __ cmp(count, kSmallCopySize);
+    __ j(below_equal, &small_size);
+    __ jmp(&medium_size);
+  }
+  {
+    // Special handlers for 9 <= copy_size < 64. No assumptions about
+    // alignment or move distance, so all reads must be unaligned and
+    // must happen before any writes.
+    Label medium_handlers, f9_16, f17_32, f33_48, f49_63;
+
+    __ bind(&f9_16);
+    __ movsd(xmm0, Operand(src, 0));
+    __ movsd(xmm1, Operand(src, count, times_1, -8));
+    __ movsd(Operand(dst, 0), xmm0);
+    __ movsd(Operand(dst, count, times_1, -8), xmm1);
+    MemMoveEmitPopAndReturn(&masm);
+
+    __ bind(&f17_32);
+    __ movdqu(xmm0, Operand(src, 0));
+    __ movdqu(xmm1, Operand(src, count, times_1, -0x10));
+    __ movdqu(Operand(dst, 0x00), xmm0);
+    __ movdqu(Operand(dst, count, times_1, -0x10), xmm1);
+    MemMoveEmitPopAndReturn(&masm);
+
+    __ bind(&f33_48);
+    __ movdqu(xmm0, Operand(src, 0x00));
+    __ movdqu(xmm1, Operand(src, 0x10));
+    __ movdqu(xmm2, Operand(src, count, times_1, -0x10));
+    __ movdqu(Operand(dst, 0x00), xmm0);
+    __ movdqu(Operand(dst, 0x10), xmm1);
+    __ movdqu(Operand(dst, count, times_1, -0x10), xmm2);
+    MemMoveEmitPopAndReturn(&masm);
+
+    __ bind(&f49_63);
+    __ movdqu(xmm0, Operand(src, 0x00));
+    __ movdqu(xmm1, Operand(src, 0x10));
+    __ movdqu(xmm2, Operand(src, 0x20));
+    __ movdqu(xmm3, Operand(src, count, times_1, -0x10));
+    __ movdqu(Operand(dst, 0x00), xmm0);
+    __ movdqu(Operand(dst, 0x10), xmm1);
+    __ movdqu(Operand(dst, 0x20), xmm2);
+    __ movdqu(Operand(dst, count, times_1, -0x10), xmm3);
+    MemMoveEmitPopAndReturn(&masm);
+
+    __ bind(&medium_handlers);
+    __ dd(conv.address(&f9_16));
+    __ dd(conv.address(&f17_32));
+    __ dd(conv.address(&f33_48));
+    __ dd(conv.address(&f49_63));
+
+    __ bind(&medium_size);  // Entry point into this block.
+    __ mov(eax, count);
+    __ dec(eax);
+    __ shr(eax, 4);
+    if (FLAG_debug_code) {
+      Label ok;
+      __ cmp(eax, 3);
+      __ j(below_equal, &ok);
+      __ int3();
+      __ bind(&ok);
     }
-    __ Align(16);
-    {
-      // Copy loop for unaligned source and aligned destination.
-      // If source is not aligned, we can't read it as efficiently.
-      __ bind(&unaligned_source);
-      __ mov(edx, ecx);
-      Register loop_count = ecx;
-      Register count = edx;
-      __ shr(loop_count, 5);
-      {
-        // Main copy loop
-        Label loop;
-        __ bind(&loop);
-        __ prefetch(Operand(src, 0x20), 1);
-        __ movdqu(xmm0, Operand(src, 0x00));
-        __ movdqu(xmm1, Operand(src, 0x10));
-        __ add(src, Immediate(0x20));
+    __ mov(eax, Operand(eax, times_4, conv.address(&medium_handlers)));
+    __ jmp(eax);
+  }
+  {
+    // Specialized copiers for copy_size <= 8 bytes.
+    Label small_handlers, f0, f1, f2, f3, f4, f5_8;
+    __ bind(&f0);
+    MemMoveEmitPopAndReturn(&masm);
 
-        __ movdqa(Operand(dst, 0x00), xmm0);
-        __ movdqa(Operand(dst, 0x10), xmm1);
-        __ add(dst, Immediate(0x20));
+    __ bind(&f1);
+    __ mov_b(eax, Operand(src, 0));
+    __ mov_b(Operand(dst, 0), eax);
+    MemMoveEmitPopAndReturn(&masm);
 
-        __ dec(loop_count);
-        __ j(not_zero, &loop);
-      }
+    __ bind(&f2);
+    __ mov_w(eax, Operand(src, 0));
+    __ mov_w(Operand(dst, 0), eax);
+    MemMoveEmitPopAndReturn(&masm);
 
-      // At most 31 bytes to copy.
-      Label move_less_16;
-      __ test(count, Immediate(0x10));
-      __ j(zero, &move_less_16);
-      __ movdqu(xmm0, Operand(src, 0));
-      __ add(src, Immediate(0x10));
-      __ movdqa(Operand(dst, 0), xmm0);
-      __ add(dst, Immediate(0x10));
-      __ bind(&move_less_16);
+    __ bind(&f3);
+    __ mov_w(eax, Operand(src, 0));
+    __ mov_b(edx, Operand(src, 2));
+    __ mov_w(Operand(dst, 0), eax);
+    __ mov_b(Operand(dst, 2), edx);
+    MemMoveEmitPopAndReturn(&masm);
 
-      // At most 15 bytes to copy. Copy 16 bytes at end of string.
-      __ and_(count, 0x0F);
-      __ movdqu(xmm0, Operand(src, count, times_1, -0x10));
-      __ movdqu(Operand(dst, count, times_1, -0x10), xmm0);
-
-      __ mov(eax, Operand(esp, stack_offset + kDestinationOffset));
-      __ pop(esi);
-      __ pop(edi);
-      __ ret(0);
-    }
-
-  } else {
-    // SSE2 not supported. Unlikely to happen in practice.
-    __ push(edi);
-    __ push(esi);
-    stack_offset += 2 * kPointerSize;
-    __ cld();
-    Register dst = edi;
-    Register src = esi;
-    Register count = ecx;
-    __ mov(dst, Operand(esp, stack_offset + kDestinationOffset));
-    __ mov(src, Operand(esp, stack_offset + kSourceOffset));
-    __ mov(count, Operand(esp, stack_offset + kSizeOffset));
-
-    // Copy the first word.
+    __ bind(&f4);
     __ mov(eax, Operand(src, 0));
     __ mov(Operand(dst, 0), eax);
+    MemMoveEmitPopAndReturn(&masm);
 
-    // Increment src,dstso that dst is aligned.
-    __ mov(edx, dst);
-    __ and_(edx, 0x03);
-    __ neg(edx);
-    __ add(edx, Immediate(4));  // edx = 4 - (dst & 3)
-    __ add(dst, edx);
-    __ add(src, edx);
-    __ sub(count, edx);
-    // edi is now aligned, ecx holds number of remaning bytes to copy.
+    __ bind(&f5_8);
+    __ mov(eax, Operand(src, 0));
+    __ mov(edx, Operand(src, count, times_1, -4));
+    __ mov(Operand(dst, 0), eax);
+    __ mov(Operand(dst, count, times_1, -4), edx);
+    MemMoveEmitPopAndReturn(&masm);
 
-    __ mov(edx, count);
-    count = edx;
-    __ shr(ecx, 2);  // Make word count instead of byte count.
-    __ rep_movs();
+    __ bind(&small_handlers);
+    __ dd(conv.address(&f0));
+    __ dd(conv.address(&f1));
+    __ dd(conv.address(&f2));
+    __ dd(conv.address(&f3));
+    __ dd(conv.address(&f4));
+    __ dd(conv.address(&f5_8));
+    __ dd(conv.address(&f5_8));
+    __ dd(conv.address(&f5_8));
+    __ dd(conv.address(&f5_8));
 
-    // At most 3 bytes left to copy. Copy 4 bytes at end of string.
-    __ and_(count, 3);
-    __ mov(eax, Operand(src, count, times_1, -4));
-    __ mov(Operand(dst, count, times_1, -4), eax);
-
-    __ mov(eax, Operand(esp, stack_offset + kDestinationOffset));
-    __ pop(esi);
-    __ pop(edi);
-    __ ret(0);
+    __ bind(&small_size);  // Entry point into this block.
+    if (FLAG_debug_code) {
+      Label ok;
+      __ cmp(count, 8);
+      __ j(below_equal, &ok);
+      __ int3();
+      __ bind(&ok);
+    }
+    __ mov(eax, Operand(count, times_4, conv.address(&small_handlers)));
+    __ jmp(eax);
   }
 
+  __ bind(&pop_and_return);
+  MemMoveEmitPopAndReturn(&masm);
+
   CodeDesc desc;
   masm.GetCode(&desc);
-  ASSERT(desc.reloc_size == 0);
-
-  CPU::FlushICache(buffer, actual_size);
-  OS::ProtectCode(buffer, actual_size);
-  return FUNCTION_CAST<OS::MemCopyFunction>(buffer);
+  DCHECK(!RelocInfo::RequiresRelocation(desc));
+  CpuFeatures::FlushICache(buffer, actual_size);
+  base::OS::ProtectCode(buffer, actual_size);
+  // TODO(jkummerow): It would be nice to register this code creation event
+  // with the PROFILE / GDBJIT system.
+  return FUNCTION_CAST<MemMoveFunction>(buffer);
 }
 
+
 #undef __
 
 // -------------------------------------------------------------------------
@@ -351,38 +520,56 @@
 
 #define __ ACCESS_MASM(masm)
 
-void ElementsTransitionGenerator::GenerateSmiOnlyToObject(
-    MacroAssembler* masm) {
-  // ----------- S t a t e -------------
-  //  -- eax    : value
-  //  -- ebx    : target map
-  //  -- ecx    : key
-  //  -- edx    : receiver
-  //  -- esp[0] : return address
-  // -----------------------------------
+
+void ElementsTransitionGenerator::GenerateMapChangeElementsTransition(
+    MacroAssembler* masm,
+    Register receiver,
+    Register key,
+    Register value,
+    Register target_map,
+    AllocationSiteMode mode,
+    Label* allocation_memento_found) {
+  Register scratch = edi;
+  DCHECK(!AreAliased(receiver, key, value, target_map, scratch));
+
+  if (mode == TRACK_ALLOCATION_SITE) {
+    DCHECK(allocation_memento_found != NULL);
+    __ JumpIfJSArrayHasAllocationMemento(
+        receiver, scratch, allocation_memento_found);
+  }
+
   // Set transitioned map.
-  __ mov(FieldOperand(edx, HeapObject::kMapOffset), ebx);
-  __ RecordWriteField(edx,
+  __ mov(FieldOperand(receiver, HeapObject::kMapOffset), target_map);
+  __ RecordWriteField(receiver,
                       HeapObject::kMapOffset,
-                      ebx,
-                      edi,
+                      target_map,
+                      scratch,
                       kDontSaveFPRegs,
                       EMIT_REMEMBERED_SET,
                       OMIT_SMI_CHECK);
 }
 
 
-void ElementsTransitionGenerator::GenerateSmiOnlyToDouble(
-    MacroAssembler* masm, Label* fail) {
-  // ----------- S t a t e -------------
-  //  -- eax    : value
-  //  -- ebx    : target map
-  //  -- ecx    : key
-  //  -- edx    : receiver
-  //  -- esp[0] : return address
-  // -----------------------------------
+void ElementsTransitionGenerator::GenerateSmiToDouble(
+    MacroAssembler* masm,
+    Register receiver,
+    Register key,
+    Register value,
+    Register target_map,
+    AllocationSiteMode mode,
+    Label* fail) {
+  // Return address is on the stack.
+  DCHECK(receiver.is(edx));
+  DCHECK(key.is(ecx));
+  DCHECK(value.is(eax));
+  DCHECK(target_map.is(ebx));
+
   Label loop, entry, convert_hole, gc_required, only_change_map;
 
+  if (mode == TRACK_ALLOCATION_SITE) {
+    __ JumpIfJSArrayHasAllocationMemento(edx, edi, fail);
+  }
+
   // Check for empty arrays, which only require a map transition and no changes
   // to the backing store.
   __ mov(edi, FieldOperand(edx, JSObject::kElementsOffset));
@@ -397,8 +584,10 @@
   // Allocate new FixedDoubleArray.
   // edx: receiver
   // edi: length of source FixedArray (smi-tagged)
-  __ lea(esi, Operand(edi, times_4, FixedDoubleArray::kHeaderSize));
-  __ AllocateInNewSpace(esi, eax, ebx, no_reg, &gc_required, TAG_OBJECT);
+  AllocationFlags flags =
+      static_cast<AllocationFlags>(TAG_OBJECT | DOUBLE_ALIGNMENT);
+  __ Allocate(FixedDoubleArray::kHeaderSize, times_8, edi,
+              REGISTER_VALUE_IS_SMI, eax, ebx, no_reg, &gc_required, flags);
 
   // eax: destination FixedDoubleArray
   // edi: number of elements
@@ -424,11 +613,8 @@
   ExternalReference canonical_the_hole_nan_reference =
       ExternalReference::address_of_the_hole_nan();
   XMMRegister the_hole_nan = xmm1;
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatures::Scope use_sse2(SSE2);
-    __ movdbl(the_hole_nan,
-              Operand::StaticVariable(canonical_the_hole_nan_reference));
-  }
+  __ movsd(the_hole_nan,
+           Operand::StaticVariable(canonical_the_hole_nan_reference));
   __ jmp(&entry);
 
   // Call into runtime if GC is required.
@@ -449,17 +635,9 @@
 
   // Normal smi, convert it to double and store.
   __ SmiUntag(ebx);
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatures::Scope fscope(SSE2);
-    __ cvtsi2sd(xmm0, ebx);
-    __ movdbl(FieldOperand(eax, edi, times_4, FixedDoubleArray::kHeaderSize),
-              xmm0);
-  } else {
-    __ push(ebx);
-    __ fild_s(Operand(esp, 0));
-    __ pop(ebx);
-    __ fstp_d(FieldOperand(eax, edi, times_4, FixedDoubleArray::kHeaderSize));
-  }
+  __ Cvtsi2sd(xmm0, ebx);
+  __ movsd(FieldOperand(eax, edi, times_4, FixedDoubleArray::kHeaderSize),
+           xmm0);
   __ jmp(&entry);
 
   // Found hole, store hole_nan_as_double instead.
@@ -467,17 +645,11 @@
 
   if (FLAG_debug_code) {
     __ cmp(ebx, masm->isolate()->factory()->the_hole_value());
-    __ Assert(equal, "object found in smi-only array");
+    __ Assert(equal, kObjectFoundInSmiOnlyArray);
   }
 
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatures::Scope use_sse2(SSE2);
-    __ movdbl(FieldOperand(eax, edi, times_4, FixedDoubleArray::kHeaderSize),
-              the_hole_nan);
-  } else {
-    __ fld_d(Operand::StaticVariable(canonical_the_hole_nan_reference));
-    __ fstp_d(FieldOperand(eax, edi, times_4, FixedDoubleArray::kHeaderSize));
-  }
+  __ movsd(FieldOperand(eax, edi, times_4, FixedDoubleArray::kHeaderSize),
+           the_hole_nan);
 
   __ bind(&entry);
   __ sub(edi, Immediate(Smi::FromInt(1)));
@@ -505,16 +677,25 @@
 
 
 void ElementsTransitionGenerator::GenerateDoubleToObject(
-    MacroAssembler* masm, Label* fail) {
-  // ----------- S t a t e -------------
-  //  -- eax    : value
-  //  -- ebx    : target map
-  //  -- ecx    : key
-  //  -- edx    : receiver
-  //  -- esp[0] : return address
-  // -----------------------------------
+    MacroAssembler* masm,
+    Register receiver,
+    Register key,
+    Register value,
+    Register target_map,
+    AllocationSiteMode mode,
+    Label* fail) {
+  // Return address is on the stack.
+  DCHECK(receiver.is(edx));
+  DCHECK(key.is(ecx));
+  DCHECK(value.is(eax));
+  DCHECK(target_map.is(ebx));
+
   Label loop, entry, convert_hole, gc_required, only_change_map, success;
 
+  if (mode == TRACK_ALLOCATION_SITE) {
+    __ JumpIfJSArrayHasAllocationMemento(edx, edi, fail);
+  }
+
   // Check for empty arrays, which only require a map transition and no changes
   // to the backing store.
   __ mov(edi, FieldOperand(edx, JSObject::kElementsOffset));
@@ -530,7 +711,7 @@
   // Allocate new FixedArray.
   // ebx: length of source FixedDoubleArray (smi-tagged)
   __ lea(edi, Operand(ebx, times_2, FixedArray::kHeaderSize));
-  __ AllocateInNewSpace(edi, eax, esi, no_reg, &gc_required, TAG_OBJECT);
+  __ Allocate(edi, eax, esi, no_reg, &gc_required, TAG_OBJECT);
 
   // eax: destination FixedArray
   // ebx: number of elements
@@ -575,17 +756,9 @@
   // Non-hole double, copy value into a heap number.
   __ AllocateHeapNumber(edx, esi, no_reg, &gc_required);
   // edx: new heap number
-  if (CpuFeatures::IsSupported(SSE2)) {
-    CpuFeatures::Scope fscope(SSE2);
-    __ movdbl(xmm0,
-              FieldOperand(edi, ebx, times_4, FixedDoubleArray::kHeaderSize));
-    __ movdbl(FieldOperand(edx, HeapNumber::kValueOffset), xmm0);
-  } else {
-    __ mov(esi, FieldOperand(edi, ebx, times_4, FixedDoubleArray::kHeaderSize));
-    __ mov(FieldOperand(edx, HeapNumber::kValueOffset), esi);
-    __ mov(esi, FieldOperand(edi, ebx, times_4, offset));
-    __ mov(FieldOperand(edx, HeapNumber::kValueOffset + kPointerSize), esi);
-  }
+  __ movsd(xmm0,
+           FieldOperand(edi, ebx, times_4, FixedDoubleArray::kHeaderSize));
+  __ movsd(FieldOperand(edx, HeapNumber::kValueOffset), xmm0);
   __ mov(FieldOperand(eax, ebx, times_2, FixedArray::kHeaderSize), edx);
   __ mov(esi, ebx);
   __ RecordWriteArray(eax,
@@ -689,37 +862,37 @@
   __ j(zero, &seq_string, Label::kNear);
 
   // Handle external strings.
-  Label ascii_external, done;
+  Label one_byte_external, done;
   if (FLAG_debug_code) {
     // Assert that we do not have a cons or slice (indirect strings) here.
     // Sequential strings have already been ruled out.
     __ test(result, Immediate(kIsIndirectStringMask));
-    __ Assert(zero, "external string expected, but not found");
+    __ Assert(zero, kExternalStringExpectedButNotFound);
   }
   // Rule out short external strings.
-  STATIC_CHECK(kShortExternalStringTag != 0);
+  STATIC_ASSERT(kShortExternalStringTag != 0);
   __ test_b(result, kShortExternalStringMask);
   __ j(not_zero, call_runtime);
   // Check encoding.
   STATIC_ASSERT(kTwoByteStringTag == 0);
   __ test_b(result, kStringEncodingMask);
   __ mov(result, FieldOperand(string, ExternalString::kResourceDataOffset));
-  __ j(not_equal, &ascii_external, Label::kNear);
+  __ j(not_equal, &one_byte_external, Label::kNear);
   // Two-byte string.
   __ movzx_w(result, Operand(result, index, times_2, 0));
   __ jmp(&done, Label::kNear);
-  __ bind(&ascii_external);
-  // Ascii string.
+  __ bind(&one_byte_external);
+  // One-byte string.
   __ movzx_b(result, Operand(result, index, times_1, 0));
   __ jmp(&done, Label::kNear);
 
-  // Dispatch on the encoding: ASCII or two-byte.
-  Label ascii;
+  // Dispatch on the encoding: one-byte or two-byte.
+  Label one_byte;
   __ bind(&seq_string);
-  STATIC_ASSERT((kStringEncodingMask & kAsciiStringTag) != 0);
+  STATIC_ASSERT((kStringEncodingMask & kOneByteStringTag) != 0);
   STATIC_ASSERT((kStringEncodingMask & kTwoByteStringTag) == 0);
   __ test(result, Immediate(kStringEncodingMask));
-  __ j(not_zero, &ascii, Label::kNear);
+  __ j(not_zero, &one_byte, Label::kNear);
 
   // Two-byte string.
   // Load the two-byte character code into the result register.
@@ -729,18 +902,132 @@
                                   SeqTwoByteString::kHeaderSize));
   __ jmp(&done, Label::kNear);
 
-  // Ascii string.
+  // One-byte string.
   // Load the byte into the result register.
-  __ bind(&ascii);
+  __ bind(&one_byte);
   __ movzx_b(result, FieldOperand(string,
                                   index,
                                   times_1,
-                                  SeqAsciiString::kHeaderSize));
+                                  SeqOneByteString::kHeaderSize));
+  __ bind(&done);
+}
+
+
+static Operand ExpConstant(int index) {
+  return Operand::StaticVariable(ExternalReference::math_exp_constants(index));
+}
+
+
+void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
+                                   XMMRegister input,
+                                   XMMRegister result,
+                                   XMMRegister double_scratch,
+                                   Register temp1,
+                                   Register temp2) {
+  DCHECK(!input.is(double_scratch));
+  DCHECK(!input.is(result));
+  DCHECK(!result.is(double_scratch));
+  DCHECK(!temp1.is(temp2));
+  DCHECK(ExternalReference::math_exp_constants(0).address() != NULL);
+  DCHECK(!masm->serializer_enabled());  // External references not serializable.
+
+  Label done;
+
+  __ movsd(double_scratch, ExpConstant(0));
+  __ xorpd(result, result);
+  __ ucomisd(double_scratch, input);
+  __ j(above_equal, &done);
+  __ ucomisd(input, ExpConstant(1));
+  __ movsd(result, ExpConstant(2));
+  __ j(above_equal, &done);
+  __ movsd(double_scratch, ExpConstant(3));
+  __ movsd(result, ExpConstant(4));
+  __ mulsd(double_scratch, input);
+  __ addsd(double_scratch, result);
+  __ movd(temp2, double_scratch);
+  __ subsd(double_scratch, result);
+  __ movsd(result, ExpConstant(6));
+  __ mulsd(double_scratch, ExpConstant(5));
+  __ subsd(double_scratch, input);
+  __ subsd(result, double_scratch);
+  __ movsd(input, double_scratch);
+  __ mulsd(input, double_scratch);
+  __ mulsd(result, input);
+  __ mov(temp1, temp2);
+  __ mulsd(result, ExpConstant(7));
+  __ subsd(result, double_scratch);
+  __ add(temp1, Immediate(0x1ff800));
+  __ addsd(result, ExpConstant(8));
+  __ and_(temp2, Immediate(0x7ff));
+  __ shr(temp1, 11);
+  __ shl(temp1, 20);
+  __ movd(input, temp1);
+  __ pshufd(input, input, static_cast<uint8_t>(0xe1));  // Order: 11 10 00 01
+  __ movsd(double_scratch, Operand::StaticArray(
+      temp2, times_8, ExternalReference::math_exp_log_table()));
+  __ orps(input, double_scratch);
+  __ mulsd(result, input);
   __ bind(&done);
 }
 
 #undef __
 
+
+CodeAgingHelper::CodeAgingHelper() {
+  DCHECK(young_sequence_.length() == kNoCodeAgeSequenceLength);
+  CodePatcher patcher(young_sequence_.start(), young_sequence_.length());
+  patcher.masm()->push(ebp);
+  patcher.masm()->mov(ebp, esp);
+  patcher.masm()->push(esi);
+  patcher.masm()->push(edi);
+}
+
+
+#ifdef DEBUG
+bool CodeAgingHelper::IsOld(byte* candidate) const {
+  return *candidate == kCallOpcode;
+}
+#endif
+
+
+bool Code::IsYoungSequence(Isolate* isolate, byte* sequence) {
+  bool result = isolate->code_aging_helper()->IsYoung(sequence);
+  DCHECK(result || isolate->code_aging_helper()->IsOld(sequence));
+  return result;
+}
+
+
+void Code::GetCodeAgeAndParity(Isolate* isolate, byte* sequence, Age* age,
+                               MarkingParity* parity) {
+  if (IsYoungSequence(isolate, sequence)) {
+    *age = kNoAgeCodeAge;
+    *parity = NO_MARKING_PARITY;
+  } else {
+    sequence++;  // Skip the kCallOpcode byte
+    Address target_address = sequence + *reinterpret_cast<int*>(sequence) +
+        Assembler::kCallTargetAddressOffset;
+    Code* stub = GetCodeFromTargetAddress(target_address);
+    GetCodeAgeAndParity(stub, age, parity);
+  }
+}
+
+
+void Code::PatchPlatformCodeAge(Isolate* isolate,
+                                byte* sequence,
+                                Code::Age age,
+                                MarkingParity parity) {
+  uint32_t young_length = isolate->code_aging_helper()->young_sequence_length();
+  if (age == kNoAgeCodeAge) {
+    isolate->code_aging_helper()->CopyYoungSequenceTo(sequence);
+    CpuFeatures::FlushICache(sequence, young_length);
+  } else {
+    Code* stub = GetCodeAgeStub(isolate, age, parity);
+    CodePatcher patcher(sequence, young_length);
+    patcher.masm()->call(stub->instruction_start(), RelocInfo::NONE32);
+  }
+}
+
+
 } }  // namespace v8::internal
 
 #endif  // V8_TARGET_ARCH_IA32