Upgrade V8 to version 4.9.385.28

https://chromium.googlesource.com/v8/v8/+/4.9.385.28

FPIIM-449

Change-Id: I4b2e74289d4bf3667f2f3dc8aa2e541f63e26eb4
diff --git a/src/compiler/x64/code-generator-x64.cc b/src/compiler/x64/code-generator-x64.cc
index 0480f9d..be406fb 100644
--- a/src/compiler/x64/code-generator-x64.cc
+++ b/src/compiler/x64/code-generator-x64.cc
@@ -4,11 +4,11 @@
 
 #include "src/compiler/code-generator.h"
 
+#include "src/ast/scopes.h"
 #include "src/compiler/code-generator-impl.h"
 #include "src/compiler/gap-resolver.h"
 #include "src/compiler/node-matchers.h"
-#include "src/compiler/node-properties-inl.h"
-#include "src/scopes.h"
+#include "src/compiler/osr.h"
 #include "src/x64/assembler-x64.h"
 #include "src/x64/macro-assembler-x64.h"
 
@@ -19,33 +19,44 @@
 #define __ masm()->
 
 
+#define kScratchDoubleReg xmm0
+
+
 // Adds X64 specific methods for decoding operands.
 class X64OperandConverter : public InstructionOperandConverter {
  public:
   X64OperandConverter(CodeGenerator* gen, Instruction* instr)
       : InstructionOperandConverter(gen, instr) {}
 
-  Immediate InputImmediate(int index) {
+  Immediate InputImmediate(size_t index) {
     return ToImmediate(instr_->InputAt(index));
   }
 
-  Operand InputOperand(int index) { return ToOperand(instr_->InputAt(index)); }
+  Operand InputOperand(size_t index, int extra = 0) {
+    return ToOperand(instr_->InputAt(index), extra);
+  }
 
   Operand OutputOperand() { return ToOperand(instr_->Output()); }
 
   Immediate ToImmediate(InstructionOperand* operand) {
-    return Immediate(ToConstant(operand).ToInt32());
+    Constant constant = ToConstant(operand);
+    if (constant.type() == Constant::kFloat64) {
+      DCHECK_EQ(0, bit_cast<int64_t>(constant.ToFloat64()));
+      return Immediate(0);
+    }
+    return Immediate(constant.ToInt32());
   }
 
   Operand ToOperand(InstructionOperand* op, int extra = 0) {
     DCHECK(op->IsStackSlot() || op->IsDoubleStackSlot());
-    // The linkage computes where all spill slots are located.
-    FrameOffset offset = linkage()->GetFrameOffset(op->index(), frame(), extra);
-    return Operand(offset.from_stack_pointer() ? rsp : rbp, offset.offset());
+    FrameOffset offset = frame_access_state()->GetFrameOffset(
+        AllocatedOperand::cast(op)->index());
+    return Operand(offset.from_stack_pointer() ? rsp : rbp,
+                   offset.offset() + extra);
   }
 
-  static int NextOffset(int* offset) {
-    int i = *offset;
+  static size_t NextOffset(size_t* offset) {
+    size_t i = *offset;
     (*offset)++;
     return i;
   }
@@ -60,7 +71,7 @@
     return static_cast<ScaleFactor>(scale);
   }
 
-  Operand MemoryOperand(int* offset) {
+  Operand MemoryOperand(size_t* offset) {
     AddressingMode mode = AddressingModeField::decode(instr_->opcode());
     switch (mode) {
       case kMode_MR: {
@@ -125,7 +136,7 @@
     return Operand(no_reg, 0);
   }
 
-  Operand MemoryOperand(int first_input = 0) {
+  Operand MemoryOperand(size_t first_input = 0) {
     return MemoryOperand(&first_input);
   }
 };
@@ -133,44 +144,44 @@
 
 namespace {
 
-bool HasImmediateInput(Instruction* instr, int index) {
+bool HasImmediateInput(Instruction* instr, size_t index) {
   return instr->InputAt(index)->IsImmediate();
 }
 
 
-class OutOfLineLoadZero FINAL : public OutOfLineCode {
+class OutOfLineLoadZero final : public OutOfLineCode {
  public:
   OutOfLineLoadZero(CodeGenerator* gen, Register result)
       : OutOfLineCode(gen), result_(result) {}
 
-  void Generate() FINAL { __ xorl(result_, result_); }
+  void Generate() final { __ xorl(result_, result_); }
 
  private:
   Register const result_;
 };
 
 
-class OutOfLineLoadNaN FINAL : public OutOfLineCode {
+class OutOfLineLoadNaN final : public OutOfLineCode {
  public:
   OutOfLineLoadNaN(CodeGenerator* gen, XMMRegister result)
       : OutOfLineCode(gen), result_(result) {}
 
-  void Generate() FINAL { __ pcmpeqd(result_, result_); }
+  void Generate() final { __ Pcmpeqd(result_, result_); }
 
  private:
   XMMRegister const result_;
 };
 
 
-class OutOfLineTruncateDoubleToI FINAL : public OutOfLineCode {
+class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
  public:
   OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
                              XMMRegister input)
       : OutOfLineCode(gen), result_(result), input_(input) {}
 
-  void Generate() FINAL {
+  void Generate() final {
     __ subp(rsp, Immediate(kDoubleSize));
-    __ movsd(MemOperand(rsp, 0), input_);
+    __ Movsd(MemOperand(rsp, 0), input_);
     __ SlowTruncateToI(result_, rsp, 0);
     __ addp(rsp, Immediate(kDoubleSize));
   }
@@ -180,6 +191,46 @@
   XMMRegister const input_;
 };
 
+
+class OutOfLineRecordWrite final : public OutOfLineCode {
+ public:
+  OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
+                       Register value, Register scratch0, Register scratch1,
+                       RecordWriteMode mode)
+      : OutOfLineCode(gen),
+        object_(object),
+        operand_(operand),
+        value_(value),
+        scratch0_(scratch0),
+        scratch1_(scratch1),
+        mode_(mode) {}
+
+  void Generate() final {
+    if (mode_ > RecordWriteMode::kValueIsPointer) {
+      __ JumpIfSmi(value_, exit());
+    }
+    if (mode_ > RecordWriteMode::kValueIsMap) {
+      __ CheckPageFlag(value_, scratch0_,
+                       MemoryChunk::kPointersToHereAreInterestingMask, zero,
+                       exit());
+    }
+    SaveFPRegsMode const save_fp_mode =
+        frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
+    RecordWriteStub stub(isolate(), object_, scratch0_, scratch1_,
+                         EMIT_REMEMBERED_SET, save_fp_mode);
+    __ leap(scratch1_, operand_);
+    __ CallStub(&stub);
+  }
+
+ private:
+  Register const object_;
+  Operand const operand_;
+  Register const value_;
+  Register const scratch0_;
+  Register const scratch1_;
+  RecordWriteMode const mode_;
+};
+
 }  // namespace
 
 
@@ -249,7 +300,19 @@
   } while (0)
 
 
-#define ASSEMBLE_DOUBLE_BINOP(asm_instr)                                \
+#define ASSEMBLE_MOVX(asm_instr)                            \
+  do {                                                      \
+    if (instr->addressing_mode() != kMode_None) {           \
+      __ asm_instr(i.OutputRegister(), i.MemoryOperand());  \
+    } else if (instr->InputAt(0)->IsRegister()) {           \
+      __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
+    } else {                                                \
+      __ asm_instr(i.OutputRegister(), i.InputOperand(0));  \
+    }                                                       \
+  } while (0)
+
+
+#define ASSEMBLE_SSE_BINOP(asm_instr)                                   \
   do {                                                                  \
     if (instr->InputAt(1)->IsDoubleRegister()) {                        \
       __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
@@ -259,7 +322,17 @@
   } while (0)
 
 
-#define ASSEMBLE_AVX_DOUBLE_BINOP(asm_instr)                           \
+#define ASSEMBLE_SSE_UNOP(asm_instr)                                    \
+  do {                                                                  \
+    if (instr->InputAt(0)->IsDoubleRegister()) {                        \
+      __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
+    } else {                                                            \
+      __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0));        \
+    }                                                                   \
+  } while (0)
+
+
+#define ASSEMBLE_AVX_BINOP(asm_instr)                                  \
   do {                                                                 \
     CpuFeatureScope avx_scope(masm(), AVX);                            \
     if (instr->InputAt(1)->IsDoubleRegister()) {                       \
@@ -288,7 +361,7 @@
       auto length = i.InputInt32(3);                                         \
       DCHECK_LE(index2, length);                                             \
       __ cmpq(index1, Immediate(length - index2));                           \
-      class OutOfLineLoadFloat FINAL : public OutOfLineCode {                \
+      class OutOfLineLoadFloat final : public OutOfLineCode {                \
        public:                                                               \
         OutOfLineLoadFloat(CodeGenerator* gen, XMMRegister result,           \
                            Register buffer, Register index1, int32_t index2, \
@@ -300,9 +373,9 @@
               index2_(index2),                                               \
               length_(length) {}                                             \
                                                                              \
-        void Generate() FINAL {                                              \
+        void Generate() final {                                              \
           __ leal(kScratchRegister, Operand(index1_, index2_));              \
-          __ pcmpeqd(result_, result_);                                      \
+          __ Pcmpeqd(result_, result_);                                      \
           __ cmpl(kScratchRegister, Immediate(length_));                     \
           __ j(above_equal, exit());                                         \
           __ asm_instr(result_,                                              \
@@ -341,7 +414,7 @@
       auto length = i.InputInt32(3);                                           \
       DCHECK_LE(index2, length);                                               \
       __ cmpq(index1, Immediate(length - index2));                             \
-      class OutOfLineLoadInteger FINAL : public OutOfLineCode {                \
+      class OutOfLineLoadInteger final : public OutOfLineCode {                \
        public:                                                                 \
         OutOfLineLoadInteger(CodeGenerator* gen, Register result,              \
                              Register buffer, Register index1, int32_t index2, \
@@ -353,7 +426,7 @@
               index2_(index2),                                                 \
               length_(length) {}                                               \
                                                                                \
-        void Generate() FINAL {                                                \
+        void Generate() final {                                                \
           Label oob;                                                           \
           __ leal(kScratchRegister, Operand(index1_, index2_));                \
           __ cmpl(kScratchRegister, Immediate(length_));                       \
@@ -399,7 +472,7 @@
       auto length = i.InputInt32(3);                                         \
       DCHECK_LE(index2, length);                                             \
       __ cmpq(index1, Immediate(length - index2));                           \
-      class OutOfLineStoreFloat FINAL : public OutOfLineCode {               \
+      class OutOfLineStoreFloat final : public OutOfLineCode {               \
        public:                                                               \
         OutOfLineStoreFloat(CodeGenerator* gen, Register buffer,             \
                             Register index1, int32_t index2, int32_t length, \
@@ -411,7 +484,7 @@
               length_(length),                                               \
               value_(value) {}                                               \
                                                                              \
-        void Generate() FINAL {                                              \
+        void Generate() final {                                              \
           __ leal(kScratchRegister, Operand(index1_, index2_));              \
           __ cmpl(kScratchRegister, Immediate(length_));                     \
           __ j(above_equal, exit());                                         \
@@ -452,7 +525,7 @@
       auto length = i.InputInt32(3);                                           \
       DCHECK_LE(index2, length);                                               \
       __ cmpq(index1, Immediate(length - index2));                             \
-      class OutOfLineStoreInteger FINAL : public OutOfLineCode {               \
+      class OutOfLineStoreInteger final : public OutOfLineCode {               \
        public:                                                                 \
         OutOfLineStoreInteger(CodeGenerator* gen, Register buffer,             \
                               Register index1, int32_t index2, int32_t length, \
@@ -464,7 +537,7 @@
               length_(length),                                                 \
               value_(value) {}                                                 \
                                                                                \
-        void Generate() FINAL {                                                \
+        void Generate() final {                                                \
           __ leal(kScratchRegister, Operand(index1_, index2_));                \
           __ cmpl(kScratchRegister, Immediate(length_));                       \
           __ j(above_equal, exit());                                           \
@@ -500,6 +573,28 @@
   } while (false)
 
 
+void CodeGenerator::AssembleDeconstructActivationRecord(int stack_param_delta) {
+  int sp_slot_delta = TailCallFrameStackSlotDelta(stack_param_delta);
+  if (sp_slot_delta > 0) {
+    __ addq(rsp, Immediate(sp_slot_delta * kPointerSize));
+  }
+  frame_access_state()->SetFrameAccessToDefault();
+}
+
+
+void CodeGenerator::AssemblePrepareTailCall(int stack_param_delta) {
+  int sp_slot_delta = TailCallFrameStackSlotDelta(stack_param_delta);
+  if (sp_slot_delta < 0) {
+    __ subq(rsp, Immediate(-sp_slot_delta * kPointerSize));
+    frame_access_state()->IncreaseSPDelta(-sp_slot_delta);
+  }
+  if (frame()->needs_frame()) {
+    __ movq(rbp, MemOperand(rbp, 0));
+  }
+  frame_access_state()->SetFrameAccessToSP();
+}
+
+
 // Assembles an instruction after register allocation, producing machine code.
 void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
   X64OperandConverter i(this, instr);
@@ -512,10 +607,25 @@
         __ Call(code, RelocInfo::CODE_TARGET);
       } else {
         Register reg = i.InputRegister(0);
-        int entry = Code::kHeaderSize - kHeapObjectTag;
-        __ Call(Operand(reg, entry));
+        __ addp(reg, Immediate(Code::kHeaderSize - kHeapObjectTag));
+        __ call(reg);
       }
-      AddSafepointAndDeopt(instr);
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchTailCallCodeObject: {
+      int stack_param_delta = i.InputInt32(instr->InputCount() - 1);
+      AssembleDeconstructActivationRecord(stack_param_delta);
+      if (HasImmediateInput(instr, 0)) {
+        Handle<Code> code = Handle<Code>::cast(i.InputHeapObject(0));
+        __ jmp(code, RelocInfo::CODE_TARGET);
+      } else {
+        Register reg = i.InputRegister(0);
+        __ addp(reg, Immediate(Code::kHeaderSize - kHeapObjectTag));
+        __ jmp(reg);
+      }
+      frame_access_state()->ClearSPDelta();
       break;
     }
     case kArchCallJSFunction: {
@@ -527,31 +637,109 @@
         __ Assert(equal, kWrongFunctionContext);
       }
       __ Call(FieldOperand(func, JSFunction::kCodeEntryOffset));
-      AddSafepointAndDeopt(instr);
+      frame_access_state()->ClearSPDelta();
+      RecordCallPosition(instr);
+      break;
+    }
+    case kArchTailCallJSFunction: {
+      Register func = i.InputRegister(0);
+      if (FLAG_debug_code) {
+        // Check the function's context matches the context argument.
+        __ cmpp(rsi, FieldOperand(func, JSFunction::kContextOffset));
+        __ Assert(equal, kWrongFunctionContext);
+      }
+      int stack_param_delta = i.InputInt32(instr->InputCount() - 1);
+      AssembleDeconstructActivationRecord(stack_param_delta);
+      __ jmp(FieldOperand(func, JSFunction::kCodeEntryOffset));
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchLazyBailout: {
+      EnsureSpaceForLazyDeopt();
+      RecordCallPosition(instr);
+      break;
+    }
+    case kArchPrepareCallCFunction: {
+      // Frame alignment requires using FP-relative frame addressing.
+      frame_access_state()->SetFrameAccessToFP();
+      int const num_parameters = MiscField::decode(instr->opcode());
+      __ PrepareCallCFunction(num_parameters);
+      break;
+    }
+    case kArchPrepareTailCall:
+      AssemblePrepareTailCall(i.InputInt32(instr->InputCount() - 1));
+      break;
+    case kArchCallCFunction: {
+      int const num_parameters = MiscField::decode(instr->opcode());
+      if (HasImmediateInput(instr, 0)) {
+        ExternalReference ref = i.InputExternalReference(0);
+        __ CallCFunction(ref, num_parameters);
+      } else {
+        Register func = i.InputRegister(0);
+        __ CallCFunction(func, num_parameters);
+      }
+      frame_access_state()->SetFrameAccessToDefault();
+      frame_access_state()->ClearSPDelta();
       break;
     }
     case kArchJmp:
       AssembleArchJump(i.InputRpo(0));
       break;
+    case kArchLookupSwitch:
+      AssembleArchLookupSwitch(instr);
+      break;
+    case kArchTableSwitch:
+      AssembleArchTableSwitch(instr);
+      break;
     case kArchNop:
+    case kArchThrowTerminator:
       // don't emit code for nops.
       break;
+    case kArchDeoptimize: {
+      int deopt_state_id =
+          BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
+      Deoptimizer::BailoutType bailout_type =
+          Deoptimizer::BailoutType(MiscField::decode(instr->opcode()));
+      AssembleDeoptimizerCall(deopt_state_id, bailout_type);
+      break;
+    }
     case kArchRet:
       AssembleReturn();
       break;
     case kArchStackPointer:
       __ movq(i.OutputRegister(), rsp);
       break;
+    case kArchFramePointer:
+      __ movq(i.OutputRegister(), rbp);
+      break;
     case kArchTruncateDoubleToI: {
       auto result = i.OutputRegister();
       auto input = i.InputDoubleRegister(0);
       auto ool = new (zone()) OutOfLineTruncateDoubleToI(this, result, input);
-      __ cvttsd2siq(result, input);
+      __ Cvttsd2siq(result, input);
       __ cmpq(result, Immediate(1));
       __ j(overflow, ool->entry());
       __ bind(ool->exit());
       break;
     }
+    case kArchStoreWithWriteBarrier: {
+      RecordWriteMode mode =
+          static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
+      Register object = i.InputRegister(0);
+      size_t index = 0;
+      Operand operand = i.MemoryOperand(&index);
+      Register value = i.InputRegister(index);
+      Register scratch0 = i.TempRegister(0);
+      Register scratch1 = i.TempRegister(1);
+      auto ool = new (zone()) OutOfLineRecordWrite(this, object, operand, value,
+                                                   scratch0, scratch1, mode);
+      __ movp(operand, value);
+      __ CheckPageFlag(object, scratch0,
+                       MemoryChunk::kPointersFromHereAreInterestingMask,
+                       not_zero, ool->entry());
+      __ bind(ool->exit());
+      break;
+    }
     case kX64Add32:
       ASSEMBLE_BINOP(addl);
       break;
@@ -666,27 +854,123 @@
     case kX64Ror:
       ASSEMBLE_SHIFT(rorq, 6);
       break;
+    case kX64Lzcnt:
+      if (instr->InputAt(0)->IsRegister()) {
+        __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
+      } else {
+        __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
+      }
+      break;
+    case kX64Lzcnt32:
+      if (instr->InputAt(0)->IsRegister()) {
+        __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
+      } else {
+        __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
+      }
+      break;
+    case kX64Tzcnt:
+      if (instr->InputAt(0)->IsRegister()) {
+        __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
+      } else {
+        __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
+      }
+      break;
+    case kX64Tzcnt32:
+      if (instr->InputAt(0)->IsRegister()) {
+        __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
+      } else {
+        __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
+      }
+      break;
+    case kX64Popcnt:
+      if (instr->InputAt(0)->IsRegister()) {
+        __ Popcntq(i.OutputRegister(), i.InputRegister(0));
+      } else {
+        __ Popcntq(i.OutputRegister(), i.InputOperand(0));
+      }
+      break;
+    case kX64Popcnt32:
+      if (instr->InputAt(0)->IsRegister()) {
+        __ Popcntl(i.OutputRegister(), i.InputRegister(0));
+      } else {
+        __ Popcntl(i.OutputRegister(), i.InputOperand(0));
+      }
+      break;
+    case kSSEFloat32Cmp:
+      ASSEMBLE_SSE_BINOP(Ucomiss);
+      break;
+    case kSSEFloat32Add:
+      ASSEMBLE_SSE_BINOP(addss);
+      break;
+    case kSSEFloat32Sub:
+      ASSEMBLE_SSE_BINOP(subss);
+      break;
+    case kSSEFloat32Mul:
+      ASSEMBLE_SSE_BINOP(mulss);
+      break;
+    case kSSEFloat32Div:
+      ASSEMBLE_SSE_BINOP(divss);
+      // Don't delete this mov. It may improve performance on some CPUs,
+      // when there is a (v)mulss depending on the result.
+      __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+      break;
+    case kSSEFloat32Abs: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+      __ psrlq(kScratchDoubleReg, 33);
+      __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
+      break;
+    }
+    case kSSEFloat32Neg: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+      __ psllq(kScratchDoubleReg, 31);
+      __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
+      break;
+    }
+    case kSSEFloat32Sqrt:
+      ASSEMBLE_SSE_UNOP(sqrtss);
+      break;
+    case kSSEFloat32Max:
+      ASSEMBLE_SSE_BINOP(maxss);
+      break;
+    case kSSEFloat32Min:
+      ASSEMBLE_SSE_BINOP(minss);
+      break;
+    case kSSEFloat32ToFloat64:
+      ASSEMBLE_SSE_UNOP(Cvtss2sd);
+      break;
+    case kSSEFloat32Round: {
+      CpuFeatureScope sse_scope(masm(), SSE4_1);
+      RoundingMode const mode =
+          static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+      __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
+      break;
+    }
     case kSSEFloat64Cmp:
-      ASSEMBLE_DOUBLE_BINOP(ucomisd);
+      ASSEMBLE_SSE_BINOP(Ucomisd);
       break;
     case kSSEFloat64Add:
-      ASSEMBLE_DOUBLE_BINOP(addsd);
+      ASSEMBLE_SSE_BINOP(addsd);
       break;
     case kSSEFloat64Sub:
-      ASSEMBLE_DOUBLE_BINOP(subsd);
+      ASSEMBLE_SSE_BINOP(subsd);
       break;
     case kSSEFloat64Mul:
-      ASSEMBLE_DOUBLE_BINOP(mulsd);
+      ASSEMBLE_SSE_BINOP(mulsd);
       break;
     case kSSEFloat64Div:
-      ASSEMBLE_DOUBLE_BINOP(divsd);
+      ASSEMBLE_SSE_BINOP(divsd);
+      // Don't delete this mov. It may improve performance on some CPUs,
+      // when there is a (v)mulsd depending on the result.
+      __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
       break;
     case kSSEFloat64Mod: {
       __ subq(rsp, Immediate(kDoubleSize));
       // Move values to st(0) and st(1).
-      __ movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
+      __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
       __ fld_d(Operand(rsp, 0));
-      __ movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
+      __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
       __ fld_d(Operand(rsp, 0));
       // Loop while fprem isn't done.
       Label mod_loop;
@@ -709,107 +993,411 @@
       // Move output to stack and clean up.
       __ fstp(1);
       __ fstp_d(Operand(rsp, 0));
-      __ movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
+      __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
       __ addq(rsp, Immediate(kDoubleSize));
       break;
     }
+    case kSSEFloat64Max:
+      ASSEMBLE_SSE_BINOP(maxsd);
+      break;
+    case kSSEFloat64Min:
+      ASSEMBLE_SSE_BINOP(minsd);
+      break;
+    case kSSEFloat64Abs: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+      __ psrlq(kScratchDoubleReg, 1);
+      __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
+      break;
+    }
+    case kSSEFloat64Neg: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+      __ psllq(kScratchDoubleReg, 63);
+      __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
+      break;
+    }
     case kSSEFloat64Sqrt:
-      if (instr->InputAt(0)->IsDoubleRegister()) {
-        __ sqrtsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
-      } else {
-        __ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
-      }
+      ASSEMBLE_SSE_UNOP(sqrtsd);
       break;
-    case kSSEFloat64Floor: {
+    case kSSEFloat64Round: {
       CpuFeatureScope sse_scope(masm(), SSE4_1);
-      __ roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
-                 v8::internal::Assembler::kRoundDown);
+      RoundingMode const mode =
+          static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+      __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
       break;
     }
-    case kSSEFloat64Ceil: {
-      CpuFeatureScope sse_scope(masm(), SSE4_1);
-      __ roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
-                 v8::internal::Assembler::kRoundUp);
-      break;
-    }
-    case kSSEFloat64RoundTruncate: {
-      CpuFeatureScope sse_scope(masm(), SSE4_1);
-      __ roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
-                 v8::internal::Assembler::kRoundToZero);
-      break;
-    }
-    case kSSECvtss2sd:
-      if (instr->InputAt(0)->IsDoubleRegister()) {
-        __ cvtss2sd(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
-      } else {
-        __ cvtss2sd(i.OutputDoubleRegister(), i.InputOperand(0));
-      }
-      break;
-    case kSSECvtsd2ss:
-      if (instr->InputAt(0)->IsDoubleRegister()) {
-        __ cvtsd2ss(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
-      } else {
-        __ cvtsd2ss(i.OutputDoubleRegister(), i.InputOperand(0));
-      }
+    case kSSEFloat64ToFloat32:
+      ASSEMBLE_SSE_UNOP(Cvtsd2ss);
       break;
     case kSSEFloat64ToInt32:
       if (instr->InputAt(0)->IsDoubleRegister()) {
-        __ cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
+        __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
       } else {
-        __ cvttsd2si(i.OutputRegister(), i.InputOperand(0));
+        __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
       }
       break;
     case kSSEFloat64ToUint32: {
       if (instr->InputAt(0)->IsDoubleRegister()) {
-        __ cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
+        __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
       } else {
-        __ cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
+        __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
       }
       __ AssertZeroExtended(i.OutputRegister());
       break;
     }
+    case kSSEFloat32ToInt64:
+      if (instr->InputAt(0)->IsDoubleRegister()) {
+        __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
+      } else {
+        __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
+      }
+      if (instr->OutputCount() > 1) {
+        __ Set(i.OutputRegister(1), 1);
+        Label done;
+        Label fail;
+        __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
+        if (instr->InputAt(0)->IsDoubleRegister()) {
+          __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
+        } else {
+          __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
+        }
+        // If the input is NaN, then the conversion fails.
+        __ j(parity_even, &fail);
+        // If the input is INT64_MIN, then the conversion succeeds.
+        __ j(equal, &done);
+        __ cmpq(i.OutputRegister(0), Immediate(1));
+        // If the conversion results in INT64_MIN, but the input was not
+        // INT64_MIN, then the conversion fails.
+        __ j(no_overflow, &done);
+        __ bind(&fail);
+        __ Set(i.OutputRegister(1), 0);
+        __ bind(&done);
+      }
+      break;
+    case kSSEFloat64ToInt64:
+      if (instr->InputAt(0)->IsDoubleRegister()) {
+        __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
+      } else {
+        __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
+      }
+      if (instr->OutputCount() > 1) {
+        __ Set(i.OutputRegister(1), 1);
+        Label done;
+        Label fail;
+        __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
+        if (instr->InputAt(0)->IsDoubleRegister()) {
+          __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
+        } else {
+          __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
+        }
+        // If the input is NaN, then the conversion fails.
+        __ j(parity_even, &fail);
+        // If the input is INT64_MIN, then the conversion succeeds.
+        __ j(equal, &done);
+        __ cmpq(i.OutputRegister(0), Immediate(1));
+        // If the conversion results in INT64_MIN, but the input was not
+        // INT64_MIN, then the conversion fails.
+        __ j(no_overflow, &done);
+        __ bind(&fail);
+        __ Set(i.OutputRegister(1), 0);
+        __ bind(&done);
+      }
+      break;
+    case kSSEFloat32ToUint64: {
+      Label done;
+      Label success;
+      if (instr->OutputCount() > 1) {
+        __ Set(i.OutputRegister(1), 0);
+      }
+      // There does not exist a Float32ToUint64 instruction, so we have to use
+      // the Float32ToInt64 instruction.
+      if (instr->InputAt(0)->IsDoubleRegister()) {
+        __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
+      } else {
+        __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
+      }
+      // Check if the result of the Float32ToInt64 conversion is positive, we
+      // are already done.
+      __ testq(i.OutputRegister(), i.OutputRegister());
+      __ j(positive, &success);
+      // The result of the first conversion was negative, which means that the
+      // input value was not within the positive int64 range. We subtract 2^64
+      // and convert it again to see if it is within the uint64 range.
+      __ Move(kScratchDoubleReg, -9223372036854775808.0f);
+      if (instr->InputAt(0)->IsDoubleRegister()) {
+        __ addss(kScratchDoubleReg, i.InputDoubleRegister(0));
+      } else {
+        __ addss(kScratchDoubleReg, i.InputOperand(0));
+      }
+      __ Cvttss2siq(i.OutputRegister(), kScratchDoubleReg);
+      __ testq(i.OutputRegister(), i.OutputRegister());
+      // The only possible negative value here is 0x80000000000000000, which is
+      // used on x64 to indicate an integer overflow.
+      __ j(negative, &done);
+      // The input value is within uint64 range and the second conversion worked
+      // successfully, but we still have to undo the subtraction we did
+      // earlier.
+      __ Set(kScratchRegister, 0x8000000000000000);
+      __ orq(i.OutputRegister(), kScratchRegister);
+      __ bind(&success);
+      if (instr->OutputCount() > 1) {
+        __ Set(i.OutputRegister(1), 1);
+      }
+      __ bind(&done);
+      break;
+    }
+    case kSSEFloat64ToUint64: {
+      Label done;
+      Label success;
+      if (instr->OutputCount() > 1) {
+        __ Set(i.OutputRegister(1), 0);
+      }
+      // There does not exist a Float64ToUint64 instruction, so we have to use
+      // the Float64ToInt64 instruction.
+      if (instr->InputAt(0)->IsDoubleRegister()) {
+        __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
+      } else {
+        __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
+      }
+      // Check if the result of the Float64ToInt64 conversion is positive, we
+      // are already done.
+      __ testq(i.OutputRegister(), i.OutputRegister());
+      __ j(positive, &success);
+      // The result of the first conversion was negative, which means that the
+      // input value was not within the positive int64 range. We subtract 2^64
+      // and convert it again to see if it is within the uint64 range.
+      __ Move(kScratchDoubleReg, -9223372036854775808.0);
+      if (instr->InputAt(0)->IsDoubleRegister()) {
+        __ addsd(kScratchDoubleReg, i.InputDoubleRegister(0));
+      } else {
+        __ addsd(kScratchDoubleReg, i.InputOperand(0));
+      }
+      __ Cvttsd2siq(i.OutputRegister(), kScratchDoubleReg);
+      __ testq(i.OutputRegister(), i.OutputRegister());
+      // The only possible negative value here is 0x80000000000000000, which is
+      // used on x64 to indicate an integer overflow.
+      __ j(negative, &done);
+      // The input value is within uint64 range and the second conversion worked
+      // successfully, but we still have to undo the subtraction we did
+      // earlier.
+      __ Set(kScratchRegister, 0x8000000000000000);
+      __ orq(i.OutputRegister(), kScratchRegister);
+      __ bind(&success);
+      if (instr->OutputCount() > 1) {
+        __ Set(i.OutputRegister(1), 1);
+      }
+      __ bind(&done);
+      break;
+    }
     case kSSEInt32ToFloat64:
       if (instr->InputAt(0)->IsRegister()) {
-        __ cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
+        __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
       } else {
-        __ cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
+        __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
       }
       break;
+    case kSSEInt64ToFloat32:
+      if (instr->InputAt(0)->IsRegister()) {
+        __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
+      } else {
+        __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
+      }
+      break;
+    case kSSEInt64ToFloat64:
+      if (instr->InputAt(0)->IsRegister()) {
+        __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
+      } else {
+        __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
+      }
+      break;
+    case kSSEUint64ToFloat32:
+      if (instr->InputAt(0)->IsRegister()) {
+        __ movq(kScratchRegister, i.InputRegister(0));
+      } else {
+        __ movq(kScratchRegister, i.InputOperand(0));
+      }
+      __ Cvtqui2ss(i.OutputDoubleRegister(), kScratchRegister,
+                   i.TempRegister(0));
+      break;
+    case kSSEUint64ToFloat64:
+      if (instr->InputAt(0)->IsRegister()) {
+        __ movq(kScratchRegister, i.InputRegister(0));
+      } else {
+        __ movq(kScratchRegister, i.InputOperand(0));
+      }
+      __ Cvtqui2sd(i.OutputDoubleRegister(), kScratchRegister,
+                   i.TempRegister(0));
+      break;
     case kSSEUint32ToFloat64:
       if (instr->InputAt(0)->IsRegister()) {
         __ movl(kScratchRegister, i.InputRegister(0));
       } else {
         __ movl(kScratchRegister, i.InputOperand(0));
       }
-      __ cvtqsi2sd(i.OutputDoubleRegister(), kScratchRegister);
+      __ Cvtqsi2sd(i.OutputDoubleRegister(), kScratchRegister);
       break;
+    case kSSEFloat64ExtractLowWord32:
+      if (instr->InputAt(0)->IsDoubleStackSlot()) {
+        __ movl(i.OutputRegister(), i.InputOperand(0));
+      } else {
+        __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
+      }
+      break;
+    case kSSEFloat64ExtractHighWord32:
+      if (instr->InputAt(0)->IsDoubleStackSlot()) {
+        __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
+      } else {
+        __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
+      }
+      break;
+    case kSSEFloat64InsertLowWord32:
+      if (instr->InputAt(1)->IsRegister()) {
+        __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
+      } else {
+        __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
+      }
+      break;
+    case kSSEFloat64InsertHighWord32:
+      if (instr->InputAt(1)->IsRegister()) {
+        __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
+      } else {
+        __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
+      }
+      break;
+    case kSSEFloat64LoadLowWord32:
+      if (instr->InputAt(0)->IsRegister()) {
+        __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
+      } else {
+        __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
+      }
+      break;
+    case kAVXFloat32Cmp: {
+      CpuFeatureScope avx_scope(masm(), AVX);
+      if (instr->InputAt(1)->IsDoubleRegister()) {
+        __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      break;
+    }
+    case kAVXFloat32Add:
+      ASSEMBLE_AVX_BINOP(vaddss);
+      break;
+    case kAVXFloat32Sub:
+      ASSEMBLE_AVX_BINOP(vsubss);
+      break;
+    case kAVXFloat32Mul:
+      ASSEMBLE_AVX_BINOP(vmulss);
+      break;
+    case kAVXFloat32Div:
+      ASSEMBLE_AVX_BINOP(vdivss);
+      // Don't delete this mov. It may improve performance on some CPUs,
+      // when there is a (v)mulss depending on the result.
+      __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+      break;
+    case kAVXFloat32Max:
+      ASSEMBLE_AVX_BINOP(vmaxss);
+      break;
+    case kAVXFloat32Min:
+      ASSEMBLE_AVX_BINOP(vminss);
+      break;
+    case kAVXFloat64Cmp: {
+      CpuFeatureScope avx_scope(masm(), AVX);
+      if (instr->InputAt(1)->IsDoubleRegister()) {
+        __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      break;
+    }
     case kAVXFloat64Add:
-      ASSEMBLE_AVX_DOUBLE_BINOP(vaddsd);
+      ASSEMBLE_AVX_BINOP(vaddsd);
       break;
     case kAVXFloat64Sub:
-      ASSEMBLE_AVX_DOUBLE_BINOP(vsubsd);
+      ASSEMBLE_AVX_BINOP(vsubsd);
       break;
     case kAVXFloat64Mul:
-      ASSEMBLE_AVX_DOUBLE_BINOP(vmulsd);
+      ASSEMBLE_AVX_BINOP(vmulsd);
       break;
     case kAVXFloat64Div:
-      ASSEMBLE_AVX_DOUBLE_BINOP(vdivsd);
+      ASSEMBLE_AVX_BINOP(vdivsd);
+      // Don't delete this mov. It may improve performance on some CPUs,
+      // when there is a (v)mulsd depending on the result.
+      __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
       break;
-    case kX64Movsxbl:
-      if (instr->addressing_mode() != kMode_None) {
-        __ movsxbl(i.OutputRegister(), i.MemoryOperand());
-      } else if (instr->InputAt(0)->IsRegister()) {
-        __ movsxbl(i.OutputRegister(), i.InputRegister(0));
+    case kAVXFloat64Max:
+      ASSEMBLE_AVX_BINOP(vmaxsd);
+      break;
+    case kAVXFloat64Min:
+      ASSEMBLE_AVX_BINOP(vminsd);
+      break;
+    case kAVXFloat32Abs: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      CpuFeatureScope avx_scope(masm(), AVX);
+      __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33);
+      if (instr->InputAt(0)->IsDoubleRegister()) {
+        __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
+                  i.InputDoubleRegister(0));
       } else {
-        __ movsxbl(i.OutputRegister(), i.InputOperand(0));
+        __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
+                  i.InputOperand(0));
       }
+      break;
+    }
+    case kAVXFloat32Neg: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      CpuFeatureScope avx_scope(masm(), AVX);
+      __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31);
+      if (instr->InputAt(0)->IsDoubleRegister()) {
+        __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
+                  i.InputDoubleRegister(0));
+      } else {
+        __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
+                  i.InputOperand(0));
+      }
+      break;
+    }
+    case kAVXFloat64Abs: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      CpuFeatureScope avx_scope(masm(), AVX);
+      __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1);
+      if (instr->InputAt(0)->IsDoubleRegister()) {
+        __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
+                  i.InputDoubleRegister(0));
+      } else {
+        __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
+                  i.InputOperand(0));
+      }
+      break;
+    }
+    case kAVXFloat64Neg: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      CpuFeatureScope avx_scope(masm(), AVX);
+      __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63);
+      if (instr->InputAt(0)->IsDoubleRegister()) {
+        __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
+                  i.InputDoubleRegister(0));
+      } else {
+        __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
+                  i.InputOperand(0));
+      }
+      break;
+    }
+    case kX64Movsxbl:
+      ASSEMBLE_MOVX(movsxbl);
       __ AssertZeroExtended(i.OutputRegister());
       break;
     case kX64Movzxbl:
-      __ movzxbl(i.OutputRegister(), i.MemoryOperand());
+      ASSEMBLE_MOVX(movzxbl);
+      __ AssertZeroExtended(i.OutputRegister());
       break;
     case kX64Movb: {
-      int index = 0;
+      size_t index = 0;
       Operand operand = i.MemoryOperand(&index);
       if (HasImmediateInput(instr, index)) {
         __ movb(operand, Immediate(i.InputInt8(index)));
@@ -819,21 +1407,15 @@
       break;
     }
     case kX64Movsxwl:
-      if (instr->addressing_mode() != kMode_None) {
-        __ movsxwl(i.OutputRegister(), i.MemoryOperand());
-      } else if (instr->InputAt(0)->IsRegister()) {
-        __ movsxwl(i.OutputRegister(), i.InputRegister(0));
-      } else {
-        __ movsxwl(i.OutputRegister(), i.InputOperand(0));
-      }
+      ASSEMBLE_MOVX(movsxwl);
       __ AssertZeroExtended(i.OutputRegister());
       break;
     case kX64Movzxwl:
-      __ movzxwl(i.OutputRegister(), i.MemoryOperand());
+      ASSEMBLE_MOVX(movzxwl);
       __ AssertZeroExtended(i.OutputRegister());
       break;
     case kX64Movw: {
-      int index = 0;
+      size_t index = 0;
       Operand operand = i.MemoryOperand(&index);
       if (HasImmediateInput(instr, index)) {
         __ movw(operand, Immediate(i.InputInt16(index)));
@@ -855,7 +1437,7 @@
         }
         __ AssertZeroExtended(i.OutputRegister());
       } else {
-        int index = 0;
+        size_t index = 0;
         Operand operand = i.MemoryOperand(&index);
         if (HasImmediateInput(instr, index)) {
           __ movl(operand, i.InputImmediate(index));
@@ -864,19 +1446,14 @@
         }
       }
       break;
-    case kX64Movsxlq: {
-      if (instr->InputAt(0)->IsRegister()) {
-        __ movsxlq(i.OutputRegister(), i.InputRegister(0));
-      } else {
-        __ movsxlq(i.OutputRegister(), i.InputOperand(0));
-      }
+    case kX64Movsxlq:
+      ASSEMBLE_MOVX(movsxlq);
       break;
-    }
     case kX64Movq:
       if (instr->HasOutput()) {
         __ movq(i.OutputRegister(), i.MemoryOperand());
       } else {
-        int index = 0;
+        size_t index = 0;
         Operand operand = i.MemoryOperand(&index);
         if (HasImmediateInput(instr, index)) {
           __ movq(operand, i.InputImmediate(index));
@@ -889,18 +1466,46 @@
       if (instr->HasOutput()) {
         __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
       } else {
-        int index = 0;
+        size_t index = 0;
         Operand operand = i.MemoryOperand(&index);
         __ movss(operand, i.InputDoubleRegister(index));
       }
       break;
     case kX64Movsd:
       if (instr->HasOutput()) {
-        __ movsd(i.OutputDoubleRegister(), i.MemoryOperand());
+        __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
       } else {
-        int index = 0;
+        size_t index = 0;
         Operand operand = i.MemoryOperand(&index);
-        __ movsd(operand, i.InputDoubleRegister(index));
+        __ Movsd(operand, i.InputDoubleRegister(index));
+      }
+      break;
+    case kX64BitcastFI:
+      if (instr->InputAt(0)->IsDoubleStackSlot()) {
+        __ movl(i.OutputRegister(), i.InputOperand(0));
+      } else {
+        __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
+      }
+      break;
+    case kX64BitcastDL:
+      if (instr->InputAt(0)->IsDoubleStackSlot()) {
+        __ movq(i.OutputRegister(), i.InputOperand(0));
+      } else {
+        __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
+      }
+      break;
+    case kX64BitcastIF:
+      if (instr->InputAt(0)->IsRegister()) {
+        __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
+      } else {
+        __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
+      }
+      break;
+    case kX64BitcastLD:
+      if (instr->InputAt(0)->IsRegister()) {
+        __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
+      } else {
+        __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
       }
       break;
     case kX64Lea32: {
@@ -949,24 +1554,29 @@
     case kX64Push:
       if (HasImmediateInput(instr, 0)) {
         __ pushq(i.InputImmediate(0));
+        frame_access_state()->IncreaseSPDelta(1);
       } else {
         if (instr->InputAt(0)->IsRegister()) {
           __ pushq(i.InputRegister(0));
+          frame_access_state()->IncreaseSPDelta(1);
+        } else if (instr->InputAt(0)->IsDoubleRegister()) {
+          // TODO(titzer): use another machine instruction?
+          __ subq(rsp, Immediate(kDoubleSize));
+          frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
+          __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
         } else {
           __ pushq(i.InputOperand(0));
+          frame_access_state()->IncreaseSPDelta(1);
         }
       }
       break;
-    case kX64StoreWriteBarrier: {
-      Register object = i.InputRegister(0);
-      Register index = i.InputRegister(1);
-      Register value = i.InputRegister(2);
-      __ movsxlq(index, index);
-      __ movq(Operand(object, index, times_1, 0), value);
-      __ leaq(index, Operand(object, index, times_1, 0));
-      SaveFPRegsMode mode =
-          frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
-      __ RecordWrite(object, index, value, mode);
+    case kX64Poke: {
+      int const slot = MiscField::decode(instr->opcode());
+      if (HasImmediateInput(instr, 0)) {
+        __ movq(Operand(rsp, slot * kPointerSize), i.InputImmediate(0));
+      } else {
+        __ movq(Operand(rsp, slot * kPointerSize), i.InputRegister(0));
+      }
       break;
     }
     case kCheckedLoadInt8:
@@ -984,11 +1594,14 @@
     case kCheckedLoadWord32:
       ASSEMBLE_CHECKED_LOAD_INTEGER(movl);
       break;
+    case kCheckedLoadWord64:
+      ASSEMBLE_CHECKED_LOAD_INTEGER(movq);
+      break;
     case kCheckedLoadFloat32:
-      ASSEMBLE_CHECKED_LOAD_FLOAT(movss);
+      ASSEMBLE_CHECKED_LOAD_FLOAT(Movss);
       break;
     case kCheckedLoadFloat64:
-      ASSEMBLE_CHECKED_LOAD_FLOAT(movsd);
+      ASSEMBLE_CHECKED_LOAD_FLOAT(Movsd);
       break;
     case kCheckedStoreWord8:
       ASSEMBLE_CHECKED_STORE_INTEGER(movb);
@@ -999,14 +1612,20 @@
     case kCheckedStoreWord32:
       ASSEMBLE_CHECKED_STORE_INTEGER(movl);
       break;
+    case kCheckedStoreWord64:
+      ASSEMBLE_CHECKED_STORE_INTEGER(movq);
+      break;
     case kCheckedStoreFloat32:
-      ASSEMBLE_CHECKED_STORE_FLOAT(movss);
+      ASSEMBLE_CHECKED_STORE_FLOAT(Movss);
       break;
     case kCheckedStoreFloat64:
-      ASSEMBLE_CHECKED_STORE_FLOAT(movsd);
+      ASSEMBLE_CHECKED_STORE_FLOAT(Movsd);
+      break;
+    case kX64StackCheck:
+      __ CompareRoot(rsp, Heap::kStackLimitRootIndex);
       break;
   }
-}
+}  // NOLINT(readability/fn_size)
 
 
 // Assembles branches after this instruction.
@@ -1041,27 +1660,15 @@
     case kSignedGreaterThan:
       __ j(greater, tlabel);
       break;
-    case kUnorderedLessThan:
-      __ j(parity_even, flabel, flabel_distance);
-    // Fall through.
     case kUnsignedLessThan:
       __ j(below, tlabel);
       break;
-    case kUnorderedGreaterThanOrEqual:
-      __ j(parity_even, tlabel);
-    // Fall through.
     case kUnsignedGreaterThanOrEqual:
       __ j(above_equal, tlabel);
       break;
-    case kUnorderedLessThanOrEqual:
-      __ j(parity_even, flabel, flabel_distance);
-    // Fall through.
     case kUnsignedLessThanOrEqual:
       __ j(below_equal, tlabel);
       break;
-    case kUnorderedGreaterThan:
-      __ j(parity_even, tlabel);
-    // Fall through.
     case kUnsignedGreaterThan:
       __ j(above, tlabel);
       break;
@@ -1071,12 +1678,15 @@
     case kNotOverflow:
       __ j(no_overflow, tlabel);
       break;
+    default:
+      UNREACHABLE();
+      break;
   }
   if (!branch->fallthru) __ jmp(flabel, flabel_distance);
 }
 
 
-void CodeGenerator::AssembleArchJump(BasicBlock::RpoNumber target) {
+void CodeGenerator::AssembleArchJump(RpoNumber target) {
   if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
 }
 
@@ -1090,8 +1700,8 @@
   // Materialize a full 64-bit 1 or 0 value. The result register is always the
   // last output of the instruction.
   Label check;
-  DCHECK_NE(0, static_cast<int>(instr->OutputCount()));
-  Register reg = i.OutputRegister(static_cast<int>(instr->OutputCount() - 1));
+  DCHECK_NE(0u, instr->OutputCount());
+  Register reg = i.OutputRegister(instr->OutputCount() - 1);
   Condition cc = no_condition;
   switch (condition) {
     case kUnorderedEqual:
@@ -1122,35 +1732,15 @@
     case kSignedGreaterThan:
       cc = greater;
       break;
-    case kUnorderedLessThan:
-      __ j(parity_odd, &check, Label::kNear);
-      __ movl(reg, Immediate(0));
-      __ jmp(&done, Label::kNear);
-    // Fall through.
     case kUnsignedLessThan:
       cc = below;
       break;
-    case kUnorderedGreaterThanOrEqual:
-      __ j(parity_odd, &check, Label::kNear);
-      __ movl(reg, Immediate(1));
-      __ jmp(&done, Label::kNear);
-    // Fall through.
     case kUnsignedGreaterThanOrEqual:
       cc = above_equal;
       break;
-    case kUnorderedLessThanOrEqual:
-      __ j(parity_odd, &check, Label::kNear);
-      __ movl(reg, Immediate(0));
-      __ jmp(&done, Label::kNear);
-    // Fall through.
     case kUnsignedLessThanOrEqual:
       cc = below_equal;
       break;
-    case kUnorderedGreaterThan:
-      __ j(parity_odd, &check, Label::kNear);
-      __ movl(reg, Immediate(1));
-      __ jmp(&done, Label::kNear);
-    // Fall through.
     case kUnsignedGreaterThan:
       cc = above;
       break;
@@ -1160,6 +1750,9 @@
     case kNotOverflow:
       cc = no_overflow;
       break;
+    default:
+      UNREACHABLE();
+      break;
   }
   __ bind(&check);
   __ setcc(cc, reg);
@@ -1168,84 +1761,166 @@
 }
 
 
-void CodeGenerator::AssembleDeoptimizerCall(int deoptimization_id) {
+void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
+  X64OperandConverter i(this, instr);
+  Register input = i.InputRegister(0);
+  for (size_t index = 2; index < instr->InputCount(); index += 2) {
+    __ cmpl(input, Immediate(i.InputInt32(index + 0)));
+    __ j(equal, GetLabel(i.InputRpo(index + 1)));
+  }
+  AssembleArchJump(i.InputRpo(1));
+}
+
+
+void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
+  X64OperandConverter i(this, instr);
+  Register input = i.InputRegister(0);
+  int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
+  Label** cases = zone()->NewArray<Label*>(case_count);
+  for (int32_t index = 0; index < case_count; ++index) {
+    cases[index] = GetLabel(i.InputRpo(index + 2));
+  }
+  Label* const table = AddJumpTable(cases, case_count);
+  __ cmpl(input, Immediate(case_count));
+  __ j(above_equal, GetLabel(i.InputRpo(1)));
+  __ leaq(kScratchRegister, Operand(table));
+  __ jmp(Operand(kScratchRegister, input, times_8, 0));
+}
+
+
+void CodeGenerator::AssembleDeoptimizerCall(
+    int deoptimization_id, Deoptimizer::BailoutType bailout_type) {
   Address deopt_entry = Deoptimizer::GetDeoptimizationEntry(
-      isolate(), deoptimization_id, Deoptimizer::LAZY);
+      isolate(), deoptimization_id, bailout_type);
   __ call(deopt_entry, RelocInfo::RUNTIME_ENTRY);
 }
 
 
+namespace {
+
+static const int kQuadWordSize = 16;
+
+}  // namespace
+
+
 void CodeGenerator::AssemblePrologue() {
   CallDescriptor* descriptor = linkage()->GetIncomingDescriptor();
-  int stack_slots = frame()->GetSpillSlotCount();
-  if (descriptor->kind() == CallDescriptor::kCallAddress) {
+  if (descriptor->IsCFunctionCall()) {
     __ pushq(rbp);
     __ movq(rbp, rsp);
-    const RegList saves = descriptor->CalleeSavedRegisters();
-    if (saves != 0) {  // Save callee-saved registers.
-      int register_save_area_size = 0;
-      for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
-        if (!((1 << i) & saves)) continue;
-        __ pushq(Register::from_code(i));
-        register_save_area_size += kPointerSize;
-      }
-      frame()->SetRegisterSaveAreaSize(register_save_area_size);
-    }
   } else if (descriptor->IsJSFunctionCall()) {
-    CompilationInfo* info = this->info();
-    __ Prologue(info->IsCodePreAgingActive());
-    frame()->SetRegisterSaveAreaSize(
-        StandardFrameConstants::kFixedFrameSizeFromFp);
-  } else {
+    __ Prologue(this->info()->GeneratePreagedPrologue());
+  } else if (frame()->needs_frame()) {
     __ StubPrologue();
-    frame()->SetRegisterSaveAreaSize(
-        StandardFrameConstants::kFixedFrameSizeFromFp);
+  } else {
+    frame()->SetElidedFrameSizeInSlots(kPCOnStackSize / kPointerSize);
   }
-  if (stack_slots > 0) {
-    __ subq(rsp, Immediate(stack_slots * kPointerSize));
+  frame_access_state()->SetFrameAccessToDefault();
+
+  int stack_shrink_slots = frame()->GetSpillSlotCount();
+  if (info()->is_osr()) {
+    // TurboFan OSR-compiled functions cannot be entered directly.
+    __ Abort(kShouldNotDirectlyEnterOsrFunction);
+
+    // Unoptimized code jumps directly to this entrypoint while the unoptimized
+    // frame is still on the stack. Optimized code uses OSR values directly from
+    // the unoptimized frame. Thus, all that needs to be done is to allocate the
+    // remaining stack slots.
+    if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
+    osr_pc_offset_ = __ pc_offset();
+    // TODO(titzer): cannot address target function == local #-1
+    __ movq(rdi, Operand(rbp, JavaScriptFrameConstants::kFunctionOffset));
+    stack_shrink_slots -=
+        static_cast<int>(OsrHelper(info()).UnoptimizedFrameSlots());
+  }
+
+  const RegList saves_fp = descriptor->CalleeSavedFPRegisters();
+  if (saves_fp != 0) {
+    stack_shrink_slots += frame()->AlignSavedCalleeRegisterSlots();
+  }
+  if (stack_shrink_slots > 0) {
+    __ subq(rsp, Immediate(stack_shrink_slots * kPointerSize));
+  }
+
+  if (saves_fp != 0) {  // Save callee-saved XMM registers.
+    const uint32_t saves_fp_count = base::bits::CountPopulation32(saves_fp);
+    const int stack_size = saves_fp_count * kQuadWordSize;
+    // Adjust the stack pointer.
+    __ subp(rsp, Immediate(stack_size));
+    // Store the registers on the stack.
+    int slot_idx = 0;
+    for (int i = 0; i < XMMRegister::kMaxNumRegisters; i++) {
+      if (!((1 << i) & saves_fp)) continue;
+      __ movdqu(Operand(rsp, kQuadWordSize * slot_idx),
+                XMMRegister::from_code(i));
+      slot_idx++;
+    }
+    frame()->AllocateSavedCalleeRegisterSlots(saves_fp_count *
+                                              (kQuadWordSize / kPointerSize));
+  }
+
+  const RegList saves = descriptor->CalleeSavedRegisters();
+  if (saves != 0) {  // Save callee-saved registers.
+    for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
+      if (!((1 << i) & saves)) continue;
+      __ pushq(Register::from_code(i));
+      frame()->AllocateSavedCalleeRegisterSlots(1);
+    }
   }
 }
 
 
 void CodeGenerator::AssembleReturn() {
   CallDescriptor* descriptor = linkage()->GetIncomingDescriptor();
-  if (descriptor->kind() == CallDescriptor::kCallAddress) {
-    if (frame()->GetRegisterSaveAreaSize() > 0) {
-      // Remove this frame's spill slots first.
-      int stack_slots = frame()->GetSpillSlotCount();
-      if (stack_slots > 0) {
-        __ addq(rsp, Immediate(stack_slots * kPointerSize));
-      }
-      const RegList saves = descriptor->CalleeSavedRegisters();
-      // Restore registers.
-      if (saves != 0) {
-        for (int i = 0; i < Register::kNumRegisters; i++) {
-          if (!((1 << i) & saves)) continue;
-          __ popq(Register::from_code(i));
-        }
-      }
-      __ popq(rbp);  // Pop caller's frame pointer.
-      __ ret(0);
-    } else {
-      // No saved registers.
-      __ movq(rsp, rbp);  // Move stack pointer back to frame pointer.
-      __ popq(rbp);       // Pop caller's frame pointer.
-      __ ret(0);
+
+  // Restore registers.
+  const RegList saves = descriptor->CalleeSavedRegisters();
+  if (saves != 0) {
+    for (int i = 0; i < Register::kNumRegisters; i++) {
+      if (!((1 << i) & saves)) continue;
+      __ popq(Register::from_code(i));
     }
-  } else {
+  }
+  const RegList saves_fp = descriptor->CalleeSavedFPRegisters();
+  if (saves_fp != 0) {
+    const uint32_t saves_fp_count = base::bits::CountPopulation32(saves_fp);
+    const int stack_size = saves_fp_count * kQuadWordSize;
+    // Load the registers from the stack.
+    int slot_idx = 0;
+    for (int i = 0; i < XMMRegister::kMaxNumRegisters; i++) {
+      if (!((1 << i) & saves_fp)) continue;
+      __ movdqu(XMMRegister::from_code(i),
+                Operand(rsp, kQuadWordSize * slot_idx));
+      slot_idx++;
+    }
+    // Adjust the stack pointer.
+    __ addp(rsp, Immediate(stack_size));
+  }
+
+  if (descriptor->IsCFunctionCall()) {
     __ movq(rsp, rbp);  // Move stack pointer back to frame pointer.
     __ popq(rbp);       // Pop caller's frame pointer.
-    int pop_count = descriptor->IsJSFunctionCall()
-                        ? static_cast<int>(descriptor->JSParameterCount())
-                        : 0;
-    __ ret(pop_count * kPointerSize);
+  } else if (frame()->needs_frame()) {
+    // Canonicalize JSFunction return sites for now.
+    if (return_label_.is_bound()) {
+      __ jmp(&return_label_);
+      return;
+    } else {
+      __ bind(&return_label_);
+      __ movq(rsp, rbp);  // Move stack pointer back to frame pointer.
+      __ popq(rbp);       // Pop caller's frame pointer.
+    }
   }
+  size_t pop_size = descriptor->StackParameterCount() * kPointerSize;
+  // Might need rcx for scratch if pop_size is too big.
+  DCHECK_EQ(0u, descriptor->CalleeSavedRegisters() & rcx.bit());
+  __ Ret(static_cast<int>(pop_size), rcx);
 }
 
 
 void CodeGenerator::AssembleMove(InstructionOperand* source,
                                  InstructionOperand* destination) {
-  X64OperandConverter g(this, NULL);
+  X64OperandConverter g(this, nullptr);
   // Dispatch on the source and destination operand kinds.  Not all
   // combinations are possible.
   if (source->IsRegister()) {
@@ -1295,9 +1970,19 @@
         case Constant::kExternalReference:
           __ Move(dst, src.ToExternalReference());
           break;
-        case Constant::kHeapObject:
-          __ Move(dst, src.ToHeapObject());
+        case Constant::kHeapObject: {
+          Handle<HeapObject> src_object = src.ToHeapObject();
+          Heap::RootListIndex index;
+          int offset;
+          if (IsMaterializableFromFrame(src_object, &offset)) {
+            __ movp(dst, Operand(rbp, offset));
+          } else if (IsMaterializableFromRoot(src_object, &index)) {
+            __ LoadRoot(dst, index);
+          } else {
+            __ Move(dst, src_object);
+          }
           break;
+        }
         case Constant::kRpoNumber:
           UNREACHABLE();  // TODO(dcarney): load of labels on x64.
           break;
@@ -1330,23 +2015,23 @@
     XMMRegister src = g.ToDoubleRegister(source);
     if (destination->IsDoubleRegister()) {
       XMMRegister dst = g.ToDoubleRegister(destination);
-      __ movsd(dst, src);
+      __ Movapd(dst, src);
     } else {
       DCHECK(destination->IsDoubleStackSlot());
       Operand dst = g.ToOperand(destination);
-      __ movsd(dst, src);
+      __ Movsd(dst, src);
     }
   } else if (source->IsDoubleStackSlot()) {
     DCHECK(destination->IsDoubleRegister() || destination->IsDoubleStackSlot());
     Operand src = g.ToOperand(source);
     if (destination->IsDoubleRegister()) {
       XMMRegister dst = g.ToDoubleRegister(destination);
-      __ movsd(dst, src);
+      __ Movsd(dst, src);
     } else {
       // We rely on having xmm0 available as a fixed scratch register.
       Operand dst = g.ToOperand(destination);
-      __ movsd(xmm0, src);
-      __ movsd(dst, xmm0);
+      __ Movsd(xmm0, src);
+      __ Movsd(dst, xmm0);
     }
   } else {
     UNREACHABLE();
@@ -1356,16 +2041,25 @@
 
 void CodeGenerator::AssembleSwap(InstructionOperand* source,
                                  InstructionOperand* destination) {
-  X64OperandConverter g(this, NULL);
+  X64OperandConverter g(this, nullptr);
   // Dispatch on the source and destination operand kinds.  Not all
   // combinations are possible.
   if (source->IsRegister() && destination->IsRegister()) {
     // Register-register.
-    __ xchgq(g.ToRegister(source), g.ToRegister(destination));
+    Register src = g.ToRegister(source);
+    Register dst = g.ToRegister(destination);
+    __ movq(kScratchRegister, src);
+    __ movq(src, dst);
+    __ movq(dst, kScratchRegister);
   } else if (source->IsRegister() && destination->IsStackSlot()) {
     Register src = g.ToRegister(source);
+    __ pushq(src);
+    frame_access_state()->IncreaseSPDelta(1);
     Operand dst = g.ToOperand(destination);
-    __ xchgq(src, dst);
+    __ movq(src, dst);
+    frame_access_state()->IncreaseSPDelta(-1);
+    dst = g.ToOperand(destination);
+    __ popq(dst);
   } else if ((source->IsStackSlot() && destination->IsStackSlot()) ||
              (source->IsDoubleStackSlot() &&
               destination->IsDoubleStackSlot())) {
@@ -1374,24 +2068,29 @@
     Operand src = g.ToOperand(source);
     Operand dst = g.ToOperand(destination);
     __ movq(tmp, dst);
-    __ xchgq(tmp, src);
-    __ movq(dst, tmp);
+    __ pushq(src);
+    frame_access_state()->IncreaseSPDelta(1);
+    src = g.ToOperand(source);
+    __ movq(src, tmp);
+    frame_access_state()->IncreaseSPDelta(-1);
+    dst = g.ToOperand(destination);
+    __ popq(dst);
   } else if (source->IsDoubleRegister() && destination->IsDoubleRegister()) {
     // XMM register-register swap. We rely on having xmm0
     // available as a fixed scratch register.
     XMMRegister src = g.ToDoubleRegister(source);
     XMMRegister dst = g.ToDoubleRegister(destination);
-    __ movsd(xmm0, src);
-    __ movsd(src, dst);
-    __ movsd(dst, xmm0);
+    __ Movapd(xmm0, src);
+    __ Movapd(src, dst);
+    __ Movapd(dst, xmm0);
   } else if (source->IsDoubleRegister() && destination->IsDoubleStackSlot()) {
     // XMM register-memory swap.  We rely on having xmm0
     // available as a fixed scratch register.
     XMMRegister src = g.ToDoubleRegister(source);
     Operand dst = g.ToOperand(destination);
-    __ movsd(xmm0, src);
-    __ movsd(src, dst);
-    __ movsd(dst, xmm0);
+    __ Movsd(xmm0, src);
+    __ Movsd(src, dst);
+    __ Movsd(dst, xmm0);
   } else {
     // No other combinations are possible.
     UNREACHABLE();
@@ -1399,25 +2098,33 @@
 }
 
 
+void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
+  for (size_t index = 0; index < target_count; ++index) {
+    __ dq(targets[index]);
+  }
+}
+
+
 void CodeGenerator::AddNopForSmiCodeInlining() { __ nop(); }
 
 
 void CodeGenerator::EnsureSpaceForLazyDeopt() {
-  int space_needed = Deoptimizer::patch_size();
-  if (!info()->IsStub()) {
-    // Ensure that we have enough space after the previous lazy-bailout
-    // instruction for patching the code here.
-    int current_pc = masm()->pc_offset();
-    if (current_pc < last_lazy_deopt_pc_ + space_needed) {
-      int padding_size = last_lazy_deopt_pc_ + space_needed - current_pc;
-      __ Nop(padding_size);
-    }
+  if (!info()->ShouldEnsureSpaceForLazyDeopt()) {
+    return;
   }
-  MarkLazyDeoptSite();
+
+  int space_needed = Deoptimizer::patch_size();
+  // Ensure that we have enough space after the previous lazy-bailout
+  // instruction for patching the code here.
+  int current_pc = masm()->pc_offset();
+  if (current_pc < last_lazy_deopt_pc_ + space_needed) {
+    int padding_size = last_lazy_deopt_pc_ + space_needed - current_pc;
+    __ Nop(padding_size);
+  }
 }
 
 #undef __
 
-}  // namespace internal
 }  // namespace compiler
+}  // namespace internal
 }  // namespace v8