Upgrade V8 to 8.8.278.14

Bug: 162604069
Bug: 167389063
Test: gts-tradefed run gts-dev --module GtsGmscoreHostTestCases
      --test com.google.android.gts.devicepolicy.DeviceOwnerTest#testProxyPacProxyTest
Test: m -j proxy_resolver_v8_unittest && adb sync && adb shell \
      /data/nativetest/proxy_resolver_v8_unittest/proxy_resolver_v8_unittest

Merged-In: Ifb09923b9d7f6d8990fb062d7dc0294edf2c098e
Change-Id: Ifb09923b9d7f6d8990fb062d7dc0294edf2c098e
(cherry picked from commit 9580a23bc5b8874a0979001d3595d027cbb68128)
diff --git a/src/compiler/backend/DIR_METADATA b/src/compiler/backend/DIR_METADATA
new file mode 100644
index 0000000..fc01866
--- /dev/null
+++ b/src/compiler/backend/DIR_METADATA
@@ -0,0 +1,11 @@
+# Metadata information for this directory.
+#
+# For more information on DIR_METADATA files, see:
+#   https://source.chromium.org/chromium/infra/infra/+/master:go/src/infra/tools/dirmd/README.md
+#
+# For the schema of this file, see Metadata message:
+#   https://source.chromium.org/chromium/infra/infra/+/master:go/src/infra/tools/dirmd/proto/dir_metadata.proto
+
+monorail {
+  component: "Blink>JavaScript>Compiler"
+}
\ No newline at end of file
diff --git a/src/compiler/backend/OWNERS b/src/compiler/backend/OWNERS
new file mode 100644
index 0000000..d55672b
--- /dev/null
+++ b/src/compiler/backend/OWNERS
@@ -0,0 +1,8 @@
+bbudge@chromium.org
+gdeepti@chromium.org
+zhin@chromium.org
+
+# Plus src/compiler owners.
+
+per-file register-allocator*=thibaudm@chromium.org
+per-file spill-placer*=thibaudm@chromium.org
diff --git a/src/compiler/backend/arm/code-generator-arm.cc b/src/compiler/backend/arm/code-generator-arm.cc
new file mode 100644
index 0000000..9267cb1
--- /dev/null
+++ b/src/compiler/backend/arm/code-generator-arm.cc
@@ -0,0 +1,4161 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/codegen/assembler-inl.h"
+#include "src/codegen/macro-assembler.h"
+#include "src/codegen/optimized-compilation-info.h"
+#include "src/compiler/backend/code-generator-impl.h"
+#include "src/compiler/backend/code-generator.h"
+#include "src/compiler/backend/gap-resolver.h"
+#include "src/compiler/backend/instruction-codes.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/osr.h"
+#include "src/heap/memory-chunk.h"
+#include "src/numbers/double.h"
+#include "src/utils/boxed-float.h"
+#include "src/wasm/wasm-code-manager.h"
+#include "src/wasm/wasm-objects.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+#define __ tasm()->
+
+// Adds Arm-specific methods to convert InstructionOperands.
+class ArmOperandConverter final : public InstructionOperandConverter {
+ public:
+  ArmOperandConverter(CodeGenerator* gen, Instruction* instr)
+      : InstructionOperandConverter(gen, instr) {}
+
+  SBit OutputSBit() const {
+    switch (instr_->flags_mode()) {
+      case kFlags_branch:
+      case kFlags_branch_and_poison:
+      case kFlags_deoptimize:
+      case kFlags_deoptimize_and_poison:
+      case kFlags_set:
+      case kFlags_trap:
+        return SetCC;
+      case kFlags_none:
+        return LeaveCC;
+    }
+    UNREACHABLE();
+  }
+
+  Operand InputImmediate(size_t index) const {
+    return ToImmediate(instr_->InputAt(index));
+  }
+
+  Operand InputOperand2(size_t first_index) {
+    const size_t index = first_index;
+    switch (AddressingModeField::decode(instr_->opcode())) {
+      case kMode_None:
+      case kMode_Offset_RI:
+      case kMode_Offset_RR:
+      case kMode_Root:
+        break;
+      case kMode_Operand2_I:
+        return InputImmediate(index + 0);
+      case kMode_Operand2_R:
+        return Operand(InputRegister(index + 0));
+      case kMode_Operand2_R_ASR_I:
+        return Operand(InputRegister(index + 0), ASR, InputInt5(index + 1));
+      case kMode_Operand2_R_ASR_R:
+        return Operand(InputRegister(index + 0), ASR, InputRegister(index + 1));
+      case kMode_Operand2_R_LSL_I:
+        return Operand(InputRegister(index + 0), LSL, InputInt5(index + 1));
+      case kMode_Operand2_R_LSL_R:
+        return Operand(InputRegister(index + 0), LSL, InputRegister(index + 1));
+      case kMode_Operand2_R_LSR_I:
+        return Operand(InputRegister(index + 0), LSR, InputInt5(index + 1));
+      case kMode_Operand2_R_LSR_R:
+        return Operand(InputRegister(index + 0), LSR, InputRegister(index + 1));
+      case kMode_Operand2_R_ROR_I:
+        return Operand(InputRegister(index + 0), ROR, InputInt5(index + 1));
+      case kMode_Operand2_R_ROR_R:
+        return Operand(InputRegister(index + 0), ROR, InputRegister(index + 1));
+    }
+    UNREACHABLE();
+  }
+
+  MemOperand InputOffset(size_t* first_index) {
+    const size_t index = *first_index;
+    switch (AddressingModeField::decode(instr_->opcode())) {
+      case kMode_None:
+      case kMode_Operand2_I:
+      case kMode_Operand2_R:
+      case kMode_Operand2_R_ASR_I:
+      case kMode_Operand2_R_ASR_R:
+      case kMode_Operand2_R_LSL_R:
+      case kMode_Operand2_R_LSR_I:
+      case kMode_Operand2_R_LSR_R:
+      case kMode_Operand2_R_ROR_I:
+      case kMode_Operand2_R_ROR_R:
+        break;
+      case kMode_Operand2_R_LSL_I:
+        *first_index += 3;
+        return MemOperand(InputRegister(index + 0), InputRegister(index + 1),
+                          LSL, InputInt32(index + 2));
+      case kMode_Offset_RI:
+        *first_index += 2;
+        return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
+      case kMode_Offset_RR:
+        *first_index += 2;
+        return MemOperand(InputRegister(index + 0), InputRegister(index + 1));
+      case kMode_Root:
+        *first_index += 1;
+        return MemOperand(kRootRegister, InputInt32(index));
+    }
+    UNREACHABLE();
+  }
+
+  MemOperand InputOffset(size_t first_index = 0) {
+    return InputOffset(&first_index);
+  }
+
+  Operand ToImmediate(InstructionOperand* operand) const {
+    Constant constant = ToConstant(operand);
+    switch (constant.type()) {
+      case Constant::kInt32:
+        if (RelocInfo::IsWasmReference(constant.rmode())) {
+          return Operand(constant.ToInt32(), constant.rmode());
+        } else {
+          return Operand(constant.ToInt32());
+        }
+      case Constant::kFloat32:
+        return Operand::EmbeddedNumber(constant.ToFloat32());
+      case Constant::kFloat64:
+        return Operand::EmbeddedNumber(constant.ToFloat64().value());
+      case Constant::kExternalReference:
+        return Operand(constant.ToExternalReference());
+      case Constant::kDelayedStringConstant:
+        return Operand::EmbeddedStringConstant(
+            constant.ToDelayedStringConstant());
+      case Constant::kInt64:
+      case Constant::kCompressedHeapObject:
+      case Constant::kHeapObject:
+      // TODO(dcarney): loading RPO constants on arm.
+      case Constant::kRpoNumber:
+        break;
+    }
+    UNREACHABLE();
+  }
+
+  MemOperand ToMemOperand(InstructionOperand* op) const {
+    DCHECK_NOT_NULL(op);
+    DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
+    return SlotToMemOperand(AllocatedOperand::cast(op)->index());
+  }
+
+  MemOperand SlotToMemOperand(int slot) const {
+    FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
+    return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
+  }
+
+  NeonMemOperand NeonInputOperand(size_t first_index) {
+    const size_t index = first_index;
+    switch (AddressingModeField::decode(instr_->opcode())) {
+      case kMode_Operand2_R:
+        return NeonMemOperand(InputRegister(index + 0));
+      default:
+        break;
+    }
+    UNREACHABLE();
+  }
+};
+
+namespace {
+
+class OutOfLineRecordWrite final : public OutOfLineCode {
+ public:
+  OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand offset,
+                       Register value, RecordWriteMode mode,
+                       StubCallMode stub_mode,
+                       UnwindingInfoWriter* unwinding_info_writer)
+      : OutOfLineCode(gen),
+        object_(object),
+        offset_(offset),
+        value_(value),
+        mode_(mode),
+        stub_mode_(stub_mode),
+        must_save_lr_(!gen->frame_access_state()->has_frame()),
+        unwinding_info_writer_(unwinding_info_writer),
+        zone_(gen->zone()) {}
+
+  void Generate() final {
+    if (mode_ > RecordWriteMode::kValueIsPointer) {
+      __ JumpIfSmi(value_, exit());
+    }
+    __ CheckPageFlag(value_, MemoryChunk::kPointersToHereAreInterestingMask, eq,
+                     exit());
+    RememberedSetAction const remembered_set_action =
+        mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
+                                             : OMIT_REMEMBERED_SET;
+    SaveFPRegsMode const save_fp_mode =
+        frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
+    if (must_save_lr_) {
+      // We need to save and restore lr if the frame was elided.
+      __ Push(lr);
+      unwinding_info_writer_->MarkLinkRegisterOnTopOfStack(__ pc_offset());
+    }
+    if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
+      __ CallEphemeronKeyBarrier(object_, offset_, save_fp_mode);
+    } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
+      __ CallRecordWriteStub(object_, offset_, remembered_set_action,
+                             save_fp_mode, wasm::WasmCode::kRecordWrite);
+    } else {
+      __ CallRecordWriteStub(object_, offset_, remembered_set_action,
+                             save_fp_mode);
+    }
+    if (must_save_lr_) {
+      __ Pop(lr);
+      unwinding_info_writer_->MarkPopLinkRegisterFromTopOfStack(__ pc_offset());
+    }
+  }
+
+ private:
+  Register const object_;
+  Operand const offset_;
+  Register const value_;
+  RecordWriteMode const mode_;
+  StubCallMode stub_mode_;
+  bool must_save_lr_;
+  UnwindingInfoWriter* const unwinding_info_writer_;
+  Zone* zone_;
+};
+
+template <typename T>
+class OutOfLineFloatMin final : public OutOfLineCode {
+ public:
+  OutOfLineFloatMin(CodeGenerator* gen, T result, T left, T right)
+      : OutOfLineCode(gen), result_(result), left_(left), right_(right) {}
+
+  void Generate() final { __ FloatMinOutOfLine(result_, left_, right_); }
+
+ private:
+  T const result_;
+  T const left_;
+  T const right_;
+};
+using OutOfLineFloat32Min = OutOfLineFloatMin<SwVfpRegister>;
+using OutOfLineFloat64Min = OutOfLineFloatMin<DwVfpRegister>;
+
+template <typename T>
+class OutOfLineFloatMax final : public OutOfLineCode {
+ public:
+  OutOfLineFloatMax(CodeGenerator* gen, T result, T left, T right)
+      : OutOfLineCode(gen), result_(result), left_(left), right_(right) {}
+
+  void Generate() final { __ FloatMaxOutOfLine(result_, left_, right_); }
+
+ private:
+  T const result_;
+  T const left_;
+  T const right_;
+};
+using OutOfLineFloat32Max = OutOfLineFloatMax<SwVfpRegister>;
+using OutOfLineFloat64Max = OutOfLineFloatMax<DwVfpRegister>;
+
+Condition FlagsConditionToCondition(FlagsCondition condition) {
+  switch (condition) {
+    case kEqual:
+      return eq;
+    case kNotEqual:
+      return ne;
+    case kSignedLessThan:
+      return lt;
+    case kSignedGreaterThanOrEqual:
+      return ge;
+    case kSignedLessThanOrEqual:
+      return le;
+    case kSignedGreaterThan:
+      return gt;
+    case kUnsignedLessThan:
+      return lo;
+    case kUnsignedGreaterThanOrEqual:
+      return hs;
+    case kUnsignedLessThanOrEqual:
+      return ls;
+    case kUnsignedGreaterThan:
+      return hi;
+    case kFloatLessThanOrUnordered:
+      return lt;
+    case kFloatGreaterThanOrEqual:
+      return ge;
+    case kFloatLessThanOrEqual:
+      return ls;
+    case kFloatGreaterThanOrUnordered:
+      return hi;
+    case kFloatLessThan:
+      return lo;
+    case kFloatGreaterThanOrEqualOrUnordered:
+      return hs;
+    case kFloatLessThanOrEqualOrUnordered:
+      return le;
+    case kFloatGreaterThan:
+      return gt;
+    case kOverflow:
+      return vs;
+    case kNotOverflow:
+      return vc;
+    case kPositiveOrZero:
+      return pl;
+    case kNegative:
+      return mi;
+    default:
+      break;
+  }
+  UNREACHABLE();
+}
+
+void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
+                                   InstructionCode opcode,
+                                   ArmOperandConverter const& i) {
+  const MemoryAccessMode access_mode =
+      static_cast<MemoryAccessMode>(MiscField::decode(opcode));
+  if (access_mode == kMemoryAccessPoisoned) {
+    Register value = i.OutputRegister();
+    codegen->tasm()->and_(value, value, Operand(kSpeculationPoisonRegister));
+  }
+}
+
+void ComputePoisonedAddressForLoad(CodeGenerator* codegen,
+                                   InstructionCode opcode,
+                                   ArmOperandConverter const& i,
+                                   Register address) {
+  DCHECK_EQ(kMemoryAccessPoisoned,
+            static_cast<MemoryAccessMode>(MiscField::decode(opcode)));
+  switch (AddressingModeField::decode(opcode)) {
+    case kMode_Offset_RI:
+      codegen->tasm()->mov(address, i.InputImmediate(1));
+      codegen->tasm()->add(address, address, i.InputRegister(0));
+      break;
+    case kMode_Offset_RR:
+      codegen->tasm()->add(address, i.InputRegister(0), i.InputRegister(1));
+      break;
+    default:
+      UNREACHABLE();
+  }
+  codegen->tasm()->and_(address, address, Operand(kSpeculationPoisonRegister));
+}
+
+}  // namespace
+
+#define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr)                       \
+  do {                                                                \
+    __ asm_instr(i.OutputRegister(),                                  \
+                 MemOperand(i.InputRegister(0), i.InputRegister(1))); \
+    __ dmb(ISH);                                                      \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr)                      \
+  do {                                                                \
+    __ dmb(ISH);                                                      \
+    __ asm_instr(i.InputRegister(2),                                  \
+                 MemOperand(i.InputRegister(0), i.InputRegister(1))); \
+    __ dmb(ISH);                                                      \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_instr, store_instr)             \
+  do {                                                                        \
+    Label exchange;                                                           \
+    __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));        \
+    __ dmb(ISH);                                                              \
+    __ bind(&exchange);                                                       \
+    __ load_instr(i.OutputRegister(0), i.TempRegister(1));                    \
+    __ store_instr(i.TempRegister(0), i.InputRegister(2), i.TempRegister(1)); \
+    __ teq(i.TempRegister(0), Operand(0));                                    \
+    __ b(ne, &exchange);                                                      \
+    __ dmb(ISH);                                                              \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_instr, store_instr,     \
+                                                 cmp_reg)                     \
+  do {                                                                        \
+    Label compareExchange;                                                    \
+    Label exit;                                                               \
+    __ dmb(ISH);                                                              \
+    __ bind(&compareExchange);                                                \
+    __ load_instr(i.OutputRegister(0), i.TempRegister(1));                    \
+    __ teq(cmp_reg, Operand(i.OutputRegister(0)));                            \
+    __ b(ne, &exit);                                                          \
+    __ store_instr(i.TempRegister(0), i.InputRegister(3), i.TempRegister(1)); \
+    __ teq(i.TempRegister(0), Operand(0));                                    \
+    __ b(ne, &compareExchange);                                               \
+    __ bind(&exit);                                                           \
+    __ dmb(ISH);                                                              \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_BINOP(load_instr, store_instr, bin_instr)            \
+  do {                                                                       \
+    Label binop;                                                             \
+    __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));       \
+    __ dmb(ISH);                                                             \
+    __ bind(&binop);                                                         \
+    __ load_instr(i.OutputRegister(0), i.TempRegister(1));                   \
+    __ bin_instr(i.TempRegister(0), i.OutputRegister(0),                     \
+                 Operand(i.InputRegister(2)));                               \
+    __ store_instr(i.TempRegister(2), i.TempRegister(0), i.TempRegister(1)); \
+    __ teq(i.TempRegister(2), Operand(0));                                   \
+    __ b(ne, &binop);                                                        \
+    __ dmb(ISH);                                                             \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2)                  \
+  do {                                                                 \
+    Label binop;                                                       \
+    __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3)); \
+    __ dmb(ISH);                                                       \
+    __ bind(&binop);                                                   \
+    __ ldrexd(r2, r3, i.TempRegister(0));                              \
+    __ instr1(i.TempRegister(1), r2, i.InputRegister(0), SBit::SetCC); \
+    __ instr2(i.TempRegister(2), r3, Operand(i.InputRegister(1)));     \
+    DCHECK_EQ(LeaveCC, i.OutputSBit());                                \
+    __ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2), \
+              i.TempRegister(0));                                      \
+    __ teq(i.TempRegister(3), Operand(0));                             \
+    __ b(ne, &binop);                                                  \
+    __ dmb(ISH);                                                       \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr)                           \
+  do {                                                                 \
+    Label binop;                                                       \
+    __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3)); \
+    __ dmb(ISH);                                                       \
+    __ bind(&binop);                                                   \
+    __ ldrexd(r2, r3, i.TempRegister(0));                              \
+    __ instr(i.TempRegister(1), r2, Operand(i.InputRegister(0)));      \
+    __ instr(i.TempRegister(2), r3, Operand(i.InputRegister(1)));      \
+    __ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2), \
+              i.TempRegister(0));                                      \
+    __ teq(i.TempRegister(3), Operand(0));                             \
+    __ b(ne, &binop);                                                  \
+    __ dmb(ISH);                                                       \
+  } while (0)
+
+#define ASSEMBLE_IEEE754_BINOP(name)                                           \
+  do {                                                                         \
+    /* TODO(bmeurer): We should really get rid of this special instruction, */ \
+    /* and generate a CallAddress instruction instead. */                      \
+    FrameScope scope(tasm(), StackFrame::MANUAL);                              \
+    __ PrepareCallCFunction(0, 2);                                             \
+    __ MovToFloatParameters(i.InputDoubleRegister(0),                          \
+                            i.InputDoubleRegister(1));                         \
+    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2);    \
+    /* Move the result in the double result register. */                       \
+    __ MovFromFloatResult(i.OutputDoubleRegister());                           \
+    DCHECK_EQ(LeaveCC, i.OutputSBit());                                        \
+  } while (0)
+
+#define ASSEMBLE_IEEE754_UNOP(name)                                            \
+  do {                                                                         \
+    /* TODO(bmeurer): We should really get rid of this special instruction, */ \
+    /* and generate a CallAddress instruction instead. */                      \
+    FrameScope scope(tasm(), StackFrame::MANUAL);                              \
+    __ PrepareCallCFunction(0, 1);                                             \
+    __ MovToFloatParameter(i.InputDoubleRegister(0));                          \
+    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1);    \
+    /* Move the result in the double result register. */                       \
+    __ MovFromFloatResult(i.OutputDoubleRegister());                           \
+    DCHECK_EQ(LeaveCC, i.OutputSBit());                                        \
+  } while (0)
+
+#define ASSEMBLE_NEON_NARROWING_OP(dt, sdt)           \
+  do {                                                \
+    Simd128Register dst = i.OutputSimd128Register(),  \
+                    src0 = i.InputSimd128Register(0), \
+                    src1 = i.InputSimd128Register(1); \
+    if (dst == src0 && dst == src1) {                 \
+      __ vqmovn(dt, sdt, dst.low(), src0);            \
+      __ vmov(dst.high(), dst.low());                 \
+    } else if (dst == src0) {                         \
+      __ vqmovn(dt, sdt, dst.low(), src0);            \
+      __ vqmovn(dt, sdt, dst.high(), src1);           \
+    } else {                                          \
+      __ vqmovn(dt, sdt, dst.high(), src1);           \
+      __ vqmovn(dt, sdt, dst.low(), src0);            \
+    }                                                 \
+  } while (0)
+
+#define ASSEMBLE_NEON_PAIRWISE_OP(op, size)               \
+  do {                                                    \
+    Simd128Register dst = i.OutputSimd128Register(),      \
+                    src0 = i.InputSimd128Register(0),     \
+                    src1 = i.InputSimd128Register(1);     \
+    if (dst == src0) {                                    \
+      __ op(size, dst.low(), src0.low(), src0.high());    \
+      if (dst == src1) {                                  \
+        __ vmov(dst.high(), dst.low());                   \
+      } else {                                            \
+        __ op(size, dst.high(), src1.low(), src1.high()); \
+      }                                                   \
+    } else {                                              \
+      __ op(size, dst.high(), src1.low(), src1.high());   \
+      __ op(size, dst.low(), src0.low(), src0.high());    \
+    }                                                     \
+  } while (0)
+
+#define ASSEMBLE_F64X2_ARITHMETIC_BINOP(op)                                   \
+  do {                                                                        \
+    __ op(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low(),   \
+          i.InputSimd128Register(1).low());                                   \
+    __ op(i.OutputSimd128Register().high(), i.InputSimd128Register(0).high(), \
+          i.InputSimd128Register(1).high());                                  \
+  } while (0)
+
+// If shift value is an immediate, we can call asm_imm, taking the shift value
+// modulo 2^width. Otherwise, emit code to perform the modulus operation, and
+// call vshl.
+#define ASSEMBLE_SIMD_SHIFT_LEFT(asm_imm, width, sz, dt) \
+  do {                                                   \
+    QwNeonRegister dst = i.OutputSimd128Register();      \
+    QwNeonRegister src = i.InputSimd128Register(0);      \
+    if (instr->InputAt(1)->IsImmediate()) {              \
+      __ asm_imm(dt, dst, src, i.InputInt##width(1));    \
+    } else {                                             \
+      QwNeonRegister tmp = i.TempSimd128Register(0);     \
+      Register shift = i.TempRegister(1);                \
+      constexpr int mask = (1 << width) - 1;             \
+      __ and_(shift, i.InputRegister(1), Operand(mask)); \
+      __ vdup(sz, tmp, shift);                           \
+      __ vshl(dt, dst, src, tmp);                        \
+    }                                                    \
+  } while (0)
+
+// If shift value is an immediate, we can call asm_imm, taking the shift value
+// modulo 2^width. Otherwise, emit code to perform the modulus operation, and
+// call vshl, passing in the negative shift value (treated as a right shift).
+#define ASSEMBLE_SIMD_SHIFT_RIGHT(asm_imm, width, sz, dt) \
+  do {                                                    \
+    QwNeonRegister dst = i.OutputSimd128Register();       \
+    QwNeonRegister src = i.InputSimd128Register(0);       \
+    if (instr->InputAt(1)->IsImmediate()) {               \
+      __ asm_imm(dt, dst, src, i.InputInt##width(1));     \
+    } else {                                              \
+      QwNeonRegister tmp = i.TempSimd128Register(0);      \
+      Register shift = i.TempRegister(1);                 \
+      constexpr int mask = (1 << width) - 1;              \
+      __ and_(shift, i.InputRegister(1), Operand(mask));  \
+      __ vdup(sz, tmp, shift);                            \
+      __ vneg(sz, tmp, tmp);                              \
+      __ vshl(dt, dst, src, tmp);                         \
+    }                                                     \
+  } while (0)
+
+void CodeGenerator::AssembleDeconstructFrame() {
+  __ LeaveFrame(StackFrame::MANUAL);
+  unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
+}
+
+void CodeGenerator::AssemblePrepareTailCall() {
+  if (frame_access_state()->has_frame()) {
+    __ ldm(ia, fp, lr.bit() | fp.bit());
+  }
+  frame_access_state()->SetFrameAccessToSP();
+}
+
+void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
+                                                     Register scratch1,
+                                                     Register scratch2,
+                                                     Register scratch3) {
+  DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
+  Label done;
+
+  // Check if current frame is an arguments adaptor frame.
+  __ ldr(scratch1, MemOperand(fp, StandardFrameConstants::kContextOffset));
+  __ cmp(scratch1,
+         Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
+  __ b(ne, &done);
+
+  // Load arguments count from current arguments adaptor frame (note, it
+  // does not include receiver).
+  Register caller_args_count_reg = scratch1;
+  __ ldr(caller_args_count_reg,
+         MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset));
+  __ SmiUntag(caller_args_count_reg);
+
+  __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3);
+  __ bind(&done);
+}
+
+namespace {
+
+void FlushPendingPushRegisters(TurboAssembler* tasm,
+                               FrameAccessState* frame_access_state,
+                               ZoneVector<Register>* pending_pushes) {
+  switch (pending_pushes->size()) {
+    case 0:
+      break;
+    case 1:
+      tasm->push((*pending_pushes)[0]);
+      break;
+    case 2:
+      tasm->Push((*pending_pushes)[0], (*pending_pushes)[1]);
+      break;
+    case 3:
+      tasm->Push((*pending_pushes)[0], (*pending_pushes)[1],
+                 (*pending_pushes)[2]);
+      break;
+    default:
+      UNREACHABLE();
+  }
+  frame_access_state->IncreaseSPDelta(pending_pushes->size());
+  pending_pushes->clear();
+}
+
+void AdjustStackPointerForTailCall(
+    TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp,
+    ZoneVector<Register>* pending_pushes = nullptr,
+    bool allow_shrinkage = true) {
+  int current_sp_offset = state->GetSPToFPSlotCount() +
+                          StandardFrameConstants::kFixedSlotCountAboveFp;
+  int stack_slot_delta = new_slot_above_sp - current_sp_offset;
+  if (stack_slot_delta > 0) {
+    if (pending_pushes != nullptr) {
+      FlushPendingPushRegisters(tasm, state, pending_pushes);
+    }
+    tasm->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
+    state->IncreaseSPDelta(stack_slot_delta);
+  } else if (allow_shrinkage && stack_slot_delta < 0) {
+    if (pending_pushes != nullptr) {
+      FlushPendingPushRegisters(tasm, state, pending_pushes);
+    }
+    tasm->add(sp, sp, Operand(-stack_slot_delta * kSystemPointerSize));
+    state->IncreaseSPDelta(stack_slot_delta);
+  }
+}
+
+#if DEBUG
+bool VerifyOutputOfAtomicPairInstr(ArmOperandConverter* converter,
+                                   const Instruction* instr, Register low,
+                                   Register high) {
+  DCHECK_GE(instr->OutputCount() + instr->TempCount(), 2);
+  if (instr->OutputCount() == 2) {
+    return (converter->OutputRegister(0) == low &&
+            converter->OutputRegister(1) == high);
+  }
+  if (instr->OutputCount() == 1) {
+    return (converter->OutputRegister(0) == low &&
+            converter->TempRegister(instr->TempCount() - 1) == high) ||
+           (converter->OutputRegister(0) == high &&
+            converter->TempRegister(instr->TempCount() - 1) == low);
+  }
+  DCHECK_EQ(instr->OutputCount(), 0);
+  return (converter->TempRegister(instr->TempCount() - 2) == low &&
+          converter->TempRegister(instr->TempCount() - 1) == high);
+}
+#endif
+
+}  // namespace
+
+void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
+                                              int first_unused_stack_slot) {
+  ZoneVector<MoveOperands*> pushes(zone());
+  GetPushCompatibleMoves(instr, kRegisterPush, &pushes);
+
+  if (!pushes.empty() &&
+      (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
+       first_unused_stack_slot)) {
+    ArmOperandConverter g(this, instr);
+    ZoneVector<Register> pending_pushes(zone());
+    for (auto move : pushes) {
+      LocationOperand destination_location(
+          LocationOperand::cast(move->destination()));
+      InstructionOperand source(move->source());
+      AdjustStackPointerForTailCall(
+          tasm(), frame_access_state(),
+          destination_location.index() - pending_pushes.size(),
+          &pending_pushes);
+      // Pushes of non-register data types are not supported.
+      DCHECK(source.IsRegister());
+      LocationOperand source_location(LocationOperand::cast(source));
+      pending_pushes.push_back(source_location.GetRegister());
+      // TODO(arm): We can push more than 3 registers at once. Add support in
+      // the macro-assembler for pushing a list of registers.
+      if (pending_pushes.size() == 3) {
+        FlushPendingPushRegisters(tasm(), frame_access_state(),
+                                  &pending_pushes);
+      }
+      move->Eliminate();
+    }
+    FlushPendingPushRegisters(tasm(), frame_access_state(), &pending_pushes);
+  }
+  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+                                first_unused_stack_slot, nullptr, false);
+}
+
+void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
+                                             int first_unused_stack_slot) {
+  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+                                first_unused_stack_slot);
+}
+
+// Check that {kJavaScriptCallCodeStartRegister} is correct.
+void CodeGenerator::AssembleCodeStartRegisterCheck() {
+  UseScratchRegisterScope temps(tasm());
+  Register scratch = temps.Acquire();
+  __ ComputeCodeStartAddress(scratch);
+  __ cmp(scratch, kJavaScriptCallCodeStartRegister);
+  __ Assert(eq, AbortReason::kWrongFunctionCodeStart);
+}
+
+// Check if the code object is marked for deoptimization. If it is, then it
+// jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
+// to:
+//    1. read from memory the word that contains that bit, which can be found in
+//       the flags in the referenced {CodeDataContainer} object;
+//    2. test kMarkedForDeoptimizationBit in those flags; and
+//    3. if it is not zero then it jumps to the builtin.
+void CodeGenerator::BailoutIfDeoptimized() {
+  UseScratchRegisterScope temps(tasm());
+  Register scratch = temps.Acquire();
+  int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
+  __ ldr(scratch, MemOperand(kJavaScriptCallCodeStartRegister, offset));
+  __ ldr(scratch,
+         FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset));
+  __ tst(scratch, Operand(1 << Code::kMarkedForDeoptimizationBit));
+  __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
+          RelocInfo::CODE_TARGET, ne);
+}
+
+void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
+  UseScratchRegisterScope temps(tasm());
+  Register scratch = temps.Acquire();
+
+  // Set a mask which has all bits set in the normal case, but has all
+  // bits cleared if we are speculatively executing the wrong PC.
+  __ ComputeCodeStartAddress(scratch);
+  __ cmp(kJavaScriptCallCodeStartRegister, scratch);
+  __ mov(kSpeculationPoisonRegister, Operand(-1), SBit::LeaveCC, eq);
+  __ mov(kSpeculationPoisonRegister, Operand(0), SBit::LeaveCC, ne);
+  __ csdb();
+}
+
+void CodeGenerator::AssembleRegisterArgumentPoisoning() {
+  __ and_(kJSFunctionRegister, kJSFunctionRegister, kSpeculationPoisonRegister);
+  __ and_(kContextRegister, kContextRegister, kSpeculationPoisonRegister);
+  __ and_(sp, sp, kSpeculationPoisonRegister);
+}
+
+// Assembles an instruction after register allocation, producing machine code.
+CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
+    Instruction* instr) {
+  ArmOperandConverter i(this, instr);
+
+  __ MaybeCheckConstPool();
+  InstructionCode opcode = instr->opcode();
+  ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
+  switch (arch_opcode) {
+    case kArchCallCodeObject: {
+      if (instr->InputAt(0)->IsImmediate()) {
+        __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
+      } else {
+        Register reg = i.InputRegister(0);
+        DCHECK_IMPLIES(
+            instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+            reg == kJavaScriptCallCodeStartRegister);
+        __ CallCodeObject(reg);
+      }
+      RecordCallPosition(instr);
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchCallBuiltinPointer: {
+      DCHECK(!instr->InputAt(0)->IsImmediate());
+      Register builtin_index = i.InputRegister(0);
+      __ CallBuiltinByIndex(builtin_index);
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchCallWasmFunction: {
+      if (instr->InputAt(0)->IsImmediate()) {
+        Constant constant = i.ToConstant(instr->InputAt(0));
+        Address wasm_code = static_cast<Address>(constant.ToInt32());
+        __ Call(wasm_code, constant.rmode());
+      } else {
+        __ Call(i.InputRegister(0));
+      }
+      RecordCallPosition(instr);
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchTailCallCodeObjectFromJSFunction:
+    case kArchTailCallCodeObject: {
+      if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
+        AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
+                                         i.TempRegister(0), i.TempRegister(1),
+                                         i.TempRegister(2));
+      }
+      if (instr->InputAt(0)->IsImmediate()) {
+        __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
+      } else {
+        Register reg = i.InputRegister(0);
+        DCHECK_IMPLIES(
+            instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+            reg == kJavaScriptCallCodeStartRegister);
+        __ JumpCodeObject(reg);
+      }
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      unwinding_info_writer_.MarkBlockWillExit();
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchTailCallWasm: {
+      if (instr->InputAt(0)->IsImmediate()) {
+        Constant constant = i.ToConstant(instr->InputAt(0));
+        Address wasm_code = static_cast<Address>(constant.ToInt32());
+        __ Jump(wasm_code, constant.rmode());
+      } else {
+        __ Jump(i.InputRegister(0));
+      }
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      unwinding_info_writer_.MarkBlockWillExit();
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchTailCallAddress: {
+      CHECK(!instr->InputAt(0)->IsImmediate());
+      Register reg = i.InputRegister(0);
+      DCHECK_IMPLIES(
+          instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+          reg == kJavaScriptCallCodeStartRegister);
+      __ Jump(reg);
+      unwinding_info_writer_.MarkBlockWillExit();
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchCallJSFunction: {
+      Register func = i.InputRegister(0);
+      if (FLAG_debug_code) {
+        UseScratchRegisterScope temps(tasm());
+        Register scratch = temps.Acquire();
+        // Check the function's context matches the context argument.
+        __ ldr(scratch, FieldMemOperand(func, JSFunction::kContextOffset));
+        __ cmp(cp, scratch);
+        __ Assert(eq, AbortReason::kWrongFunctionContext);
+      }
+      static_assert(kJavaScriptCallCodeStartRegister == r2, "ABI mismatch");
+      __ ldr(r2, FieldMemOperand(func, JSFunction::kCodeOffset));
+      __ CallCodeObject(r2);
+      RecordCallPosition(instr);
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchPrepareCallCFunction: {
+      int const num_parameters = MiscField::decode(instr->opcode());
+      __ PrepareCallCFunction(num_parameters);
+      // Frame alignment requires using FP-relative frame addressing.
+      frame_access_state()->SetFrameAccessToFP();
+      break;
+    }
+    case kArchSaveCallerRegisters: {
+      fp_mode_ =
+          static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
+      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+      // kReturnRegister0 should have been saved before entering the stub.
+      int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
+      DCHECK(IsAligned(bytes, kSystemPointerSize));
+      DCHECK_EQ(0, frame_access_state()->sp_delta());
+      frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+      DCHECK(!caller_registers_saved_);
+      caller_registers_saved_ = true;
+      break;
+    }
+    case kArchRestoreCallerRegisters: {
+      DCHECK(fp_mode_ ==
+             static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
+      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+      // Don't overwrite the returned value.
+      int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
+      frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
+      DCHECK_EQ(0, frame_access_state()->sp_delta());
+      DCHECK(caller_registers_saved_);
+      caller_registers_saved_ = false;
+      break;
+    }
+    case kArchPrepareTailCall:
+      AssemblePrepareTailCall();
+      break;
+    case kArchCallCFunction: {
+      int const num_parameters = MiscField::decode(instr->opcode());
+      if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
+        // Put the return address in a stack slot.
+        __ str(pc, MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset));
+      }
+      if (instr->InputAt(0)->IsImmediate()) {
+        ExternalReference ref = i.InputExternalReference(0);
+        __ CallCFunction(ref, num_parameters);
+      } else {
+        Register func = i.InputRegister(0);
+        __ CallCFunction(func, num_parameters);
+      }
+      if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
+        RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
+      }
+      frame_access_state()->SetFrameAccessToDefault();
+      // Ideally, we should decrement SP delta to match the change of stack
+      // pointer in CallCFunction. However, for certain architectures (e.g.
+      // ARM), there may be more strict alignment requirement, causing old SP
+      // to be saved on the stack. In those cases, we can not calculate the SP
+      // delta statically.
+      frame_access_state()->ClearSPDelta();
+      if (caller_registers_saved_) {
+        // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
+        // Here, we assume the sequence to be:
+        //   kArchSaveCallerRegisters;
+        //   kArchCallCFunction;
+        //   kArchRestoreCallerRegisters;
+        int bytes =
+            __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
+        frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+      }
+      break;
+    }
+    case kArchJmp:
+      AssembleArchJump(i.InputRpo(0));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArchBinarySearchSwitch:
+      AssembleArchBinarySearchSwitch(instr);
+      break;
+    case kArchTableSwitch:
+      AssembleArchTableSwitch(instr);
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArchAbortCSAAssert:
+      DCHECK(i.InputRegister(0) == r1);
+      {
+        // We don't actually want to generate a pile of code for this, so just
+        // claim there is a stack frame, without generating one.
+        FrameScope scope(tasm(), StackFrame::NONE);
+        __ Call(
+            isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
+            RelocInfo::CODE_TARGET);
+      }
+      __ stop();
+      unwinding_info_writer_.MarkBlockWillExit();
+      break;
+    case kArchDebugBreak:
+      __ DebugBreak();
+      break;
+    case kArchComment:
+      __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
+      break;
+    case kArchThrowTerminator:
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      unwinding_info_writer_.MarkBlockWillExit();
+      break;
+    case kArchNop:
+      // don't emit code for nops.
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArchDeoptimize: {
+      DeoptimizationExit* exit =
+          BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
+      __ b(exit->label());
+      break;
+    }
+    case kArchRet:
+      AssembleReturn(instr->InputAt(0));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArchFramePointer:
+      __ mov(i.OutputRegister(), fp);
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArchParentFramePointer:
+      if (frame_access_state()->has_frame()) {
+        __ ldr(i.OutputRegister(), MemOperand(fp, 0));
+      } else {
+        __ mov(i.OutputRegister(), fp);
+      }
+      break;
+    case kArchStackPointerGreaterThan: {
+      // Potentially apply an offset to the current stack pointer before the
+      // comparison to consider the size difference of an optimized frame versus
+      // the contained unoptimized frames.
+
+      Register lhs_register = sp;
+      uint32_t offset;
+
+      if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
+        lhs_register = i.TempRegister(0);
+        __ sub(lhs_register, sp, Operand(offset));
+      }
+
+      constexpr size_t kValueIndex = 0;
+      DCHECK(instr->InputAt(kValueIndex)->IsRegister());
+      __ cmp(lhs_register, i.InputRegister(kValueIndex));
+      break;
+    }
+    case kArchStackCheckOffset:
+      __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
+      break;
+    case kArchTruncateDoubleToI:
+      __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
+                           i.InputDoubleRegister(0), DetermineStubCallMode());
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArchStoreWithWriteBarrier: {
+      RecordWriteMode mode =
+          static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
+      Register object = i.InputRegister(0);
+      Register value = i.InputRegister(2);
+
+      AddressingMode addressing_mode =
+          AddressingModeField::decode(instr->opcode());
+      Operand offset(0);
+      if (addressing_mode == kMode_Offset_RI) {
+        int32_t immediate = i.InputInt32(1);
+        offset = Operand(immediate);
+        __ str(value, MemOperand(object, immediate));
+      } else {
+        DCHECK_EQ(kMode_Offset_RR, addressing_mode);
+        Register reg = i.InputRegister(1);
+        offset = Operand(reg);
+        __ str(value, MemOperand(object, reg));
+      }
+      auto ool = zone()->New<OutOfLineRecordWrite>(
+          this, object, offset, value, mode, DetermineStubCallMode(),
+          &unwinding_info_writer_);
+      __ CheckPageFlag(object, MemoryChunk::kPointersFromHereAreInterestingMask,
+                       ne, ool->entry());
+      __ bind(ool->exit());
+      break;
+    }
+    case kArchStackSlot: {
+      FrameOffset offset =
+          frame_access_state()->GetFrameOffset(i.InputInt32(0));
+      Register base = offset.from_stack_pointer() ? sp : fp;
+      __ add(i.OutputRegister(0), base, Operand(offset.offset()));
+      break;
+    }
+    case kIeee754Float64Acos:
+      ASSEMBLE_IEEE754_UNOP(acos);
+      break;
+    case kIeee754Float64Acosh:
+      ASSEMBLE_IEEE754_UNOP(acosh);
+      break;
+    case kIeee754Float64Asin:
+      ASSEMBLE_IEEE754_UNOP(asin);
+      break;
+    case kIeee754Float64Asinh:
+      ASSEMBLE_IEEE754_UNOP(asinh);
+      break;
+    case kIeee754Float64Atan:
+      ASSEMBLE_IEEE754_UNOP(atan);
+      break;
+    case kIeee754Float64Atanh:
+      ASSEMBLE_IEEE754_UNOP(atanh);
+      break;
+    case kIeee754Float64Atan2:
+      ASSEMBLE_IEEE754_BINOP(atan2);
+      break;
+    case kIeee754Float64Cbrt:
+      ASSEMBLE_IEEE754_UNOP(cbrt);
+      break;
+    case kIeee754Float64Cos:
+      ASSEMBLE_IEEE754_UNOP(cos);
+      break;
+    case kIeee754Float64Cosh:
+      ASSEMBLE_IEEE754_UNOP(cosh);
+      break;
+    case kIeee754Float64Exp:
+      ASSEMBLE_IEEE754_UNOP(exp);
+      break;
+    case kIeee754Float64Expm1:
+      ASSEMBLE_IEEE754_UNOP(expm1);
+      break;
+    case kIeee754Float64Log:
+      ASSEMBLE_IEEE754_UNOP(log);
+      break;
+    case kIeee754Float64Log1p:
+      ASSEMBLE_IEEE754_UNOP(log1p);
+      break;
+    case kIeee754Float64Log2:
+      ASSEMBLE_IEEE754_UNOP(log2);
+      break;
+    case kIeee754Float64Log10:
+      ASSEMBLE_IEEE754_UNOP(log10);
+      break;
+    case kIeee754Float64Pow:
+      ASSEMBLE_IEEE754_BINOP(pow);
+      break;
+    case kIeee754Float64Sin:
+      ASSEMBLE_IEEE754_UNOP(sin);
+      break;
+    case kIeee754Float64Sinh:
+      ASSEMBLE_IEEE754_UNOP(sinh);
+      break;
+    case kIeee754Float64Tan:
+      ASSEMBLE_IEEE754_UNOP(tan);
+      break;
+    case kIeee754Float64Tanh:
+      ASSEMBLE_IEEE754_UNOP(tanh);
+      break;
+    case kArmAdd:
+      __ add(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
+             i.OutputSBit());
+      break;
+    case kArmAnd:
+      __ and_(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
+              i.OutputSBit());
+      break;
+    case kArmBic:
+      __ bic(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
+             i.OutputSBit());
+      break;
+    case kArmMul:
+      __ mul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+             i.OutputSBit());
+      break;
+    case kArmMla:
+      __ mla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+             i.InputRegister(2), i.OutputSBit());
+      break;
+    case kArmMls: {
+      CpuFeatureScope scope(tasm(), ARMv7);
+      __ mls(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+             i.InputRegister(2));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmSmull:
+      __ smull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
+               i.InputRegister(1));
+      break;
+    case kArmSmmul:
+      __ smmul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmSmmla:
+      __ smmla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+               i.InputRegister(2));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmUmull:
+      __ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
+               i.InputRegister(1), i.OutputSBit());
+      break;
+    case kArmSdiv: {
+      CpuFeatureScope scope(tasm(), SUDIV);
+      __ sdiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmUdiv: {
+      CpuFeatureScope scope(tasm(), SUDIV);
+      __ udiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmMov:
+      __ Move(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit());
+      break;
+    case kArmMvn:
+      __ mvn(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit());
+      break;
+    case kArmOrr:
+      __ orr(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
+             i.OutputSBit());
+      break;
+    case kArmEor:
+      __ eor(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
+             i.OutputSBit());
+      break;
+    case kArmSub:
+      __ sub(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
+             i.OutputSBit());
+      break;
+    case kArmRsb:
+      __ rsb(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
+             i.OutputSBit());
+      break;
+    case kArmBfc: {
+      CpuFeatureScope scope(tasm(), ARMv7);
+      __ bfc(i.OutputRegister(), i.InputInt8(1), i.InputInt8(2));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmUbfx: {
+      CpuFeatureScope scope(tasm(), ARMv7);
+      __ ubfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
+              i.InputInt8(2));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmSbfx: {
+      CpuFeatureScope scope(tasm(), ARMv7);
+      __ sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
+              i.InputInt8(2));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmSxtb:
+      __ sxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmSxth:
+      __ sxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmSxtab:
+      __ sxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+               i.InputInt32(2));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmSxtah:
+      __ sxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+               i.InputInt32(2));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmUxtb:
+      __ uxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmUxth:
+      __ uxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmUxtab:
+      __ uxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+               i.InputInt32(2));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmUxtah:
+      __ uxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+               i.InputInt32(2));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmRbit: {
+      CpuFeatureScope scope(tasm(), ARMv7);
+      __ rbit(i.OutputRegister(), i.InputRegister(0));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmRev:
+      __ rev(i.OutputRegister(), i.InputRegister(0));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmClz:
+      __ clz(i.OutputRegister(), i.InputRegister(0));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmCmp:
+      __ cmp(i.InputRegister(0), i.InputOperand2(1));
+      DCHECK_EQ(SetCC, i.OutputSBit());
+      break;
+    case kArmCmn:
+      __ cmn(i.InputRegister(0), i.InputOperand2(1));
+      DCHECK_EQ(SetCC, i.OutputSBit());
+      break;
+    case kArmTst:
+      __ tst(i.InputRegister(0), i.InputOperand2(1));
+      DCHECK_EQ(SetCC, i.OutputSBit());
+      break;
+    case kArmTeq:
+      __ teq(i.InputRegister(0), i.InputOperand2(1));
+      DCHECK_EQ(SetCC, i.OutputSBit());
+      break;
+    case kArmAddPair:
+      // i.InputRegister(0) ... left low word.
+      // i.InputRegister(1) ... left high word.
+      // i.InputRegister(2) ... right low word.
+      // i.InputRegister(3) ... right high word.
+      __ add(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2),
+             SBit::SetCC);
+      __ adc(i.OutputRegister(1), i.InputRegister(1),
+             Operand(i.InputRegister(3)));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmSubPair:
+      // i.InputRegister(0) ... left low word.
+      // i.InputRegister(1) ... left high word.
+      // i.InputRegister(2) ... right low word.
+      // i.InputRegister(3) ... right high word.
+      __ sub(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2),
+             SBit::SetCC);
+      __ sbc(i.OutputRegister(1), i.InputRegister(1),
+             Operand(i.InputRegister(3)));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmMulPair:
+      // i.InputRegister(0) ... left low word.
+      // i.InputRegister(1) ... left high word.
+      // i.InputRegister(2) ... right low word.
+      // i.InputRegister(3) ... right high word.
+      __ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
+               i.InputRegister(2));
+      __ mla(i.OutputRegister(1), i.InputRegister(0), i.InputRegister(3),
+             i.OutputRegister(1));
+      __ mla(i.OutputRegister(1), i.InputRegister(2), i.InputRegister(1),
+             i.OutputRegister(1));
+      break;
+    case kArmLslPair: {
+      Register second_output =
+          instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
+      if (instr->InputAt(2)->IsImmediate()) {
+        __ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0),
+                   i.InputRegister(1), i.InputInt32(2));
+      } else {
+        __ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0),
+                   i.InputRegister(1), i.InputRegister(2));
+      }
+      break;
+    }
+    case kArmLsrPair: {
+      Register second_output =
+          instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
+      if (instr->InputAt(2)->IsImmediate()) {
+        __ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
+                   i.InputRegister(1), i.InputInt32(2));
+      } else {
+        __ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
+                   i.InputRegister(1), i.InputRegister(2));
+      }
+      break;
+    }
+    case kArmAsrPair: {
+      Register second_output =
+          instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
+      if (instr->InputAt(2)->IsImmediate()) {
+        __ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
+                   i.InputRegister(1), i.InputInt32(2));
+      } else {
+        __ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
+                   i.InputRegister(1), i.InputRegister(2));
+      }
+      break;
+    }
+    case kArmVcmpF32:
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ VFPCompareAndSetFlags(i.InputFloatRegister(0),
+                                 i.InputFloatRegister(1));
+      } else {
+        DCHECK(instr->InputAt(1)->IsImmediate());
+        // 0.0 is the only immediate supported by vcmp instructions.
+        DCHECK_EQ(0.0f, i.InputFloat32(1));
+        __ VFPCompareAndSetFlags(i.InputFloatRegister(0), i.InputFloat32(1));
+      }
+      DCHECK_EQ(SetCC, i.OutputSBit());
+      break;
+    case kArmVaddF32:
+      __ vadd(i.OutputFloatRegister(), i.InputFloatRegister(0),
+              i.InputFloatRegister(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVsubF32:
+      __ vsub(i.OutputFloatRegister(), i.InputFloatRegister(0),
+              i.InputFloatRegister(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVmulF32:
+      __ vmul(i.OutputFloatRegister(), i.InputFloatRegister(0),
+              i.InputFloatRegister(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVmlaF32:
+      __ vmla(i.OutputFloatRegister(), i.InputFloatRegister(1),
+              i.InputFloatRegister(2));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVmlsF32:
+      __ vmls(i.OutputFloatRegister(), i.InputFloatRegister(1),
+              i.InputFloatRegister(2));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVdivF32:
+      __ vdiv(i.OutputFloatRegister(), i.InputFloatRegister(0),
+              i.InputFloatRegister(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVsqrtF32:
+      __ vsqrt(i.OutputFloatRegister(), i.InputFloatRegister(0));
+      break;
+    case kArmVabsF32:
+      __ vabs(i.OutputFloatRegister(), i.InputFloatRegister(0));
+      break;
+    case kArmVnegF32:
+      __ vneg(i.OutputFloatRegister(), i.InputFloatRegister(0));
+      break;
+    case kArmVcmpF64:
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ VFPCompareAndSetFlags(i.InputDoubleRegister(0),
+                                 i.InputDoubleRegister(1));
+      } else {
+        DCHECK(instr->InputAt(1)->IsImmediate());
+        // 0.0 is the only immediate supported by vcmp instructions.
+        DCHECK_EQ(0.0, i.InputDouble(1));
+        __ VFPCompareAndSetFlags(i.InputDoubleRegister(0), i.InputDouble(1));
+      }
+      DCHECK_EQ(SetCC, i.OutputSBit());
+      break;
+    case kArmVaddF64:
+      __ vadd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+              i.InputDoubleRegister(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVsubF64:
+      __ vsub(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+              i.InputDoubleRegister(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVmulF64:
+      __ vmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+              i.InputDoubleRegister(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVmlaF64:
+      __ vmla(i.OutputDoubleRegister(), i.InputDoubleRegister(1),
+              i.InputDoubleRegister(2));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVmlsF64:
+      __ vmls(i.OutputDoubleRegister(), i.InputDoubleRegister(1),
+              i.InputDoubleRegister(2));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVdivF64:
+      __ vdiv(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+              i.InputDoubleRegister(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVmodF64: {
+      // TODO(bmeurer): We should really get rid of this special instruction,
+      // and generate a CallAddress instruction instead.
+      FrameScope scope(tasm(), StackFrame::MANUAL);
+      __ PrepareCallCFunction(0, 2);
+      __ MovToFloatParameters(i.InputDoubleRegister(0),
+                              i.InputDoubleRegister(1));
+      __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
+      // Move the result in the double result register.
+      __ MovFromFloatResult(i.OutputDoubleRegister());
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmVsqrtF64:
+      __ vsqrt(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kArmVabsF64:
+      __ vabs(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kArmVnegF64:
+      __ vneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kArmVrintmF32: {
+      CpuFeatureScope scope(tasm(), ARMv8);
+      if (instr->InputAt(0)->IsSimd128Register()) {
+        __ vrintm(NeonS32, i.OutputSimd128Register(),
+                  i.InputSimd128Register(0));
+      } else {
+        __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0));
+      }
+      break;
+    }
+    case kArmVrintmF64: {
+      CpuFeatureScope scope(tasm(), ARMv8);
+      __ vrintm(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kArmVrintpF32: {
+      CpuFeatureScope scope(tasm(), ARMv8);
+      if (instr->InputAt(0)->IsSimd128Register()) {
+        __ vrintp(NeonS32, i.OutputSimd128Register(),
+                  i.InputSimd128Register(0));
+      } else {
+        __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0));
+      }
+      break;
+    }
+    case kArmVrintpF64: {
+      CpuFeatureScope scope(tasm(), ARMv8);
+      __ vrintp(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kArmVrintzF32: {
+      CpuFeatureScope scope(tasm(), ARMv8);
+      if (instr->InputAt(0)->IsSimd128Register()) {
+        __ vrintz(NeonS32, i.OutputSimd128Register(),
+                  i.InputSimd128Register(0));
+      } else {
+        __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0));
+      }
+      break;
+    }
+    case kArmVrintzF64: {
+      CpuFeatureScope scope(tasm(), ARMv8);
+      __ vrintz(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kArmVrintaF64: {
+      CpuFeatureScope scope(tasm(), ARMv8);
+      __ vrinta(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kArmVrintnF32: {
+      CpuFeatureScope scope(tasm(), ARMv8);
+      if (instr->InputAt(0)->IsSimd128Register()) {
+        __ vrintn(NeonS32, i.OutputSimd128Register(),
+                  i.InputSimd128Register(0));
+      } else {
+        __ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0));
+      }
+      break;
+    }
+    case kArmVrintnF64: {
+      CpuFeatureScope scope(tasm(), ARMv8);
+      __ vrintn(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kArmVcvtF32F64: {
+      __ vcvt_f32_f64(i.OutputFloatRegister(), i.InputDoubleRegister(0));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmVcvtF64F32: {
+      __ vcvt_f64_f32(i.OutputDoubleRegister(), i.InputFloatRegister(0));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmVcvtF32S32: {
+      UseScratchRegisterScope temps(tasm());
+      SwVfpRegister scratch = temps.AcquireS();
+      __ vmov(scratch, i.InputRegister(0));
+      __ vcvt_f32_s32(i.OutputFloatRegister(), scratch);
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmVcvtF32U32: {
+      UseScratchRegisterScope temps(tasm());
+      SwVfpRegister scratch = temps.AcquireS();
+      __ vmov(scratch, i.InputRegister(0));
+      __ vcvt_f32_u32(i.OutputFloatRegister(), scratch);
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmVcvtF64S32: {
+      UseScratchRegisterScope temps(tasm());
+      SwVfpRegister scratch = temps.AcquireS();
+      __ vmov(scratch, i.InputRegister(0));
+      __ vcvt_f64_s32(i.OutputDoubleRegister(), scratch);
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmVcvtF64U32: {
+      UseScratchRegisterScope temps(tasm());
+      SwVfpRegister scratch = temps.AcquireS();
+      __ vmov(scratch, i.InputRegister(0));
+      __ vcvt_f64_u32(i.OutputDoubleRegister(), scratch);
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmVcvtS32F32: {
+      UseScratchRegisterScope temps(tasm());
+      SwVfpRegister scratch = temps.AcquireS();
+      __ vcvt_s32_f32(scratch, i.InputFloatRegister(0));
+      __ vmov(i.OutputRegister(), scratch);
+      bool set_overflow_to_min_i32 = MiscField::decode(instr->opcode());
+      if (set_overflow_to_min_i32) {
+        // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
+        // because INT32_MIN allows easier out-of-bounds detection.
+        __ cmn(i.OutputRegister(), Operand(1));
+        __ mov(i.OutputRegister(), Operand(INT32_MIN), SBit::LeaveCC, vs);
+      }
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmVcvtU32F32: {
+      UseScratchRegisterScope temps(tasm());
+      SwVfpRegister scratch = temps.AcquireS();
+      __ vcvt_u32_f32(scratch, i.InputFloatRegister(0));
+      __ vmov(i.OutputRegister(), scratch);
+      bool set_overflow_to_min_u32 = MiscField::decode(instr->opcode());
+      if (set_overflow_to_min_u32) {
+        // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
+        // because 0 allows easier out-of-bounds detection.
+        __ cmn(i.OutputRegister(), Operand(1));
+        __ adc(i.OutputRegister(), i.OutputRegister(), Operand::Zero());
+      }
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmVcvtS32F64: {
+      UseScratchRegisterScope temps(tasm());
+      SwVfpRegister scratch = temps.AcquireS();
+      __ vcvt_s32_f64(scratch, i.InputDoubleRegister(0));
+      __ vmov(i.OutputRegister(), scratch);
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmVcvtU32F64: {
+      UseScratchRegisterScope temps(tasm());
+      SwVfpRegister scratch = temps.AcquireS();
+      __ vcvt_u32_f64(scratch, i.InputDoubleRegister(0));
+      __ vmov(i.OutputRegister(), scratch);
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmVmovU32F32:
+      __ vmov(i.OutputRegister(), i.InputFloatRegister(0));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVmovF32U32:
+      __ vmov(i.OutputFloatRegister(), i.InputRegister(0));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVmovLowU32F64:
+      __ VmovLow(i.OutputRegister(), i.InputDoubleRegister(0));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVmovLowF64U32:
+      __ VmovLow(i.OutputDoubleRegister(), i.InputRegister(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVmovHighU32F64:
+      __ VmovHigh(i.OutputRegister(), i.InputDoubleRegister(0));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVmovHighF64U32:
+      __ VmovHigh(i.OutputDoubleRegister(), i.InputRegister(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVmovF64U32U32:
+      __ vmov(i.OutputDoubleRegister(), i.InputRegister(0), i.InputRegister(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVmovU32U32F64:
+      __ vmov(i.OutputRegister(0), i.OutputRegister(1),
+              i.InputDoubleRegister(0));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmLdrb:
+      __ ldrb(i.OutputRegister(), i.InputOffset());
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, i);
+      break;
+    case kArmLdrsb:
+      __ ldrsb(i.OutputRegister(), i.InputOffset());
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, i);
+      break;
+    case kArmStrb:
+      __ strb(i.InputRegister(0), i.InputOffset(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmLdrh:
+      __ ldrh(i.OutputRegister(), i.InputOffset());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, i);
+      break;
+    case kArmLdrsh:
+      __ ldrsh(i.OutputRegister(), i.InputOffset());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, i);
+      break;
+    case kArmStrh:
+      __ strh(i.InputRegister(0), i.InputOffset(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmLdr:
+      __ ldr(i.OutputRegister(), i.InputOffset());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, i);
+      break;
+    case kArmStr:
+      __ str(i.InputRegister(0), i.InputOffset(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVldrF32: {
+      const MemoryAccessMode access_mode =
+          static_cast<MemoryAccessMode>(MiscField::decode(opcode));
+      if (access_mode == kMemoryAccessPoisoned) {
+        UseScratchRegisterScope temps(tasm());
+        Register address = temps.Acquire();
+        ComputePoisonedAddressForLoad(this, opcode, i, address);
+        __ vldr(i.OutputFloatRegister(), address, 0);
+      } else {
+        __ vldr(i.OutputFloatRegister(), i.InputOffset());
+      }
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmVstrF32:
+      __ vstr(i.InputFloatRegister(0), i.InputOffset(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmVld1F64: {
+      __ vld1(Neon8, NeonListOperand(i.OutputDoubleRegister()),
+              i.NeonInputOperand(0));
+      break;
+    }
+    case kArmVst1F64: {
+      __ vst1(Neon8, NeonListOperand(i.InputDoubleRegister(0)),
+              i.NeonInputOperand(1));
+      break;
+    }
+    case kArmVld1S128: {
+      __ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
+              i.NeonInputOperand(0));
+      break;
+    }
+    case kArmVst1S128: {
+      __ vst1(Neon8, NeonListOperand(i.InputSimd128Register(0)),
+              i.NeonInputOperand(1));
+      break;
+    }
+    case kArmVldrF64: {
+      const MemoryAccessMode access_mode =
+          static_cast<MemoryAccessMode>(MiscField::decode(opcode));
+      if (access_mode == kMemoryAccessPoisoned) {
+        UseScratchRegisterScope temps(tasm());
+        Register address = temps.Acquire();
+        ComputePoisonedAddressForLoad(this, opcode, i, address);
+        __ vldr(i.OutputDoubleRegister(), address, 0);
+      } else {
+        __ vldr(i.OutputDoubleRegister(), i.InputOffset());
+      }
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmVstrF64:
+      __ vstr(i.InputDoubleRegister(0), i.InputOffset(1));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmFloat32Max: {
+      SwVfpRegister result = i.OutputFloatRegister();
+      SwVfpRegister left = i.InputFloatRegister(0);
+      SwVfpRegister right = i.InputFloatRegister(1);
+      if (left == right) {
+        __ Move(result, left);
+      } else {
+        auto ool = zone()->New<OutOfLineFloat32Max>(this, result, left, right);
+        __ FloatMax(result, left, right, ool->entry());
+        __ bind(ool->exit());
+      }
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmFloat64Max: {
+      DwVfpRegister result = i.OutputDoubleRegister();
+      DwVfpRegister left = i.InputDoubleRegister(0);
+      DwVfpRegister right = i.InputDoubleRegister(1);
+      if (left == right) {
+        __ Move(result, left);
+      } else {
+        auto ool = zone()->New<OutOfLineFloat64Max>(this, result, left, right);
+        __ FloatMax(result, left, right, ool->entry());
+        __ bind(ool->exit());
+      }
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmFloat32Min: {
+      SwVfpRegister result = i.OutputFloatRegister();
+      SwVfpRegister left = i.InputFloatRegister(0);
+      SwVfpRegister right = i.InputFloatRegister(1);
+      if (left == right) {
+        __ Move(result, left);
+      } else {
+        auto ool = zone()->New<OutOfLineFloat32Min>(this, result, left, right);
+        __ FloatMin(result, left, right, ool->entry());
+        __ bind(ool->exit());
+      }
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmFloat64Min: {
+      DwVfpRegister result = i.OutputDoubleRegister();
+      DwVfpRegister left = i.InputDoubleRegister(0);
+      DwVfpRegister right = i.InputDoubleRegister(1);
+      if (left == right) {
+        __ Move(result, left);
+      } else {
+        auto ool = zone()->New<OutOfLineFloat64Min>(this, result, left, right);
+        __ FloatMin(result, left, right, ool->entry());
+        __ bind(ool->exit());
+      }
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmFloat64SilenceNaN: {
+      DwVfpRegister value = i.InputDoubleRegister(0);
+      DwVfpRegister result = i.OutputDoubleRegister();
+      __ VFPCanonicalizeNaN(result, value);
+      break;
+    }
+    case kArmPush:
+      if (instr->InputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
+        switch (op->representation()) {
+          case MachineRepresentation::kFloat32:
+            __ vpush(i.InputFloatRegister(0));
+            frame_access_state()->IncreaseSPDelta(1);
+            break;
+          case MachineRepresentation::kFloat64:
+            __ vpush(i.InputDoubleRegister(0));
+            frame_access_state()->IncreaseSPDelta(kDoubleSize /
+                                                  kSystemPointerSize);
+            break;
+          case MachineRepresentation::kSimd128: {
+            __ vpush(i.InputSimd128Register(0));
+            frame_access_state()->IncreaseSPDelta(kSimd128Size /
+                                                  kSystemPointerSize);
+            break;
+          }
+          default:
+            UNREACHABLE();
+            break;
+        }
+      } else {
+        __ push(i.InputRegister(0));
+        frame_access_state()->IncreaseSPDelta(1);
+      }
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    case kArmPoke: {
+      int const slot = MiscField::decode(instr->opcode());
+      __ str(i.InputRegister(0), MemOperand(sp, slot * kSystemPointerSize));
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmPeek: {
+      int reverse_slot = i.InputInt32(0);
+      int offset =
+          FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
+      if (instr->OutputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          __ vldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
+        } else if (op->representation() == MachineRepresentation::kFloat32) {
+          __ vldr(i.OutputFloatRegister(), MemOperand(fp, offset));
+        } else {
+          DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
+          UseScratchRegisterScope temps(tasm());
+          Register scratch = temps.Acquire();
+          __ add(scratch, fp, Operand(offset));
+          __ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
+                  NeonMemOperand(scratch));
+        }
+      } else {
+        __ ldr(i.OutputRegister(), MemOperand(fp, offset));
+      }
+      break;
+    }
+    case kArmDmbIsh: {
+      __ dmb(ISH);
+      break;
+    }
+    case kArmDsbIsb: {
+      __ dsb(SY);
+      __ isb(SY);
+      break;
+    }
+    case kArchWordPoisonOnSpeculation:
+      __ and_(i.OutputRegister(0), i.InputRegister(0),
+              Operand(kSpeculationPoisonRegister));
+      break;
+    case kArmF64x2Splat: {
+      Simd128Register dst = i.OutputSimd128Register();
+      DoubleRegister src = i.InputDoubleRegister(0);
+      __ Move(dst.low(), src);
+      __ Move(dst.high(), src);
+      break;
+    }
+    case kArmF64x2ExtractLane: {
+      __ ExtractLane(i.OutputDoubleRegister(), i.InputSimd128Register(0),
+                     i.InputInt8(1));
+      break;
+    }
+    case kArmF64x2ReplaceLane: {
+      __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                     i.InputDoubleRegister(2), i.InputInt8(1));
+      break;
+    }
+    case kArmF64x2Abs: {
+      __ vabs(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low());
+      __ vabs(i.OutputSimd128Register().high(),
+              i.InputSimd128Register(0).high());
+      break;
+    }
+    case kArmF64x2Neg: {
+      __ vneg(i.OutputSimd128Register().low(), i.InputSimd128Register(0).low());
+      __ vneg(i.OutputSimd128Register().high(),
+              i.InputSimd128Register(0).high());
+      break;
+    }
+    case kArmF64x2Sqrt: {
+      __ vsqrt(i.OutputSimd128Register().low(),
+               i.InputSimd128Register(0).low());
+      __ vsqrt(i.OutputSimd128Register().high(),
+               i.InputSimd128Register(0).high());
+      break;
+    }
+    case kArmF64x2Add: {
+      ASSEMBLE_F64X2_ARITHMETIC_BINOP(vadd);
+      break;
+    }
+    case kArmF64x2Sub: {
+      ASSEMBLE_F64X2_ARITHMETIC_BINOP(vsub);
+      break;
+    }
+    case kArmF64x2Mul: {
+      ASSEMBLE_F64X2_ARITHMETIC_BINOP(vmul);
+      break;
+    }
+    case kArmF64x2Div: {
+      ASSEMBLE_F64X2_ARITHMETIC_BINOP(vdiv);
+      break;
+    }
+    case kArmF64x2Min: {
+      Simd128Register result = i.OutputSimd128Register();
+      Simd128Register left = i.InputSimd128Register(0);
+      Simd128Register right = i.InputSimd128Register(1);
+      if (left == right) {
+        __ Move(result, left);
+      } else {
+        auto ool_low = zone()->New<OutOfLineFloat64Min>(
+            this, result.low(), left.low(), right.low());
+        auto ool_high = zone()->New<OutOfLineFloat64Min>(
+            this, result.high(), left.high(), right.high());
+        __ FloatMin(result.low(), left.low(), right.low(), ool_low->entry());
+        __ bind(ool_low->exit());
+        __ FloatMin(result.high(), left.high(), right.high(),
+                    ool_high->entry());
+        __ bind(ool_high->exit());
+      }
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+    case kArmF64x2Max: {
+      Simd128Register result = i.OutputSimd128Register();
+      Simd128Register left = i.InputSimd128Register(0);
+      Simd128Register right = i.InputSimd128Register(1);
+      if (left == right) {
+        __ Move(result, left);
+      } else {
+        auto ool_low = zone()->New<OutOfLineFloat64Max>(
+            this, result.low(), left.low(), right.low());
+        auto ool_high = zone()->New<OutOfLineFloat64Max>(
+            this, result.high(), left.high(), right.high());
+        __ FloatMax(result.low(), left.low(), right.low(), ool_low->entry());
+        __ bind(ool_low->exit());
+        __ FloatMax(result.high(), left.high(), right.high(),
+                    ool_high->entry());
+        __ bind(ool_high->exit());
+      }
+      DCHECK_EQ(LeaveCC, i.OutputSBit());
+      break;
+    }
+#undef ASSEMBLE_F64X2_ARITHMETIC_BINOP
+    case kArmF64x2Eq: {
+      UseScratchRegisterScope temps(tasm());
+      Register scratch = temps.Acquire();
+      __ mov(scratch, Operand(0));
+      __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
+                               i.InputSimd128Register(1).low());
+      __ mov(scratch, Operand(-1), LeaveCC, eq);
+      __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
+
+      __ mov(scratch, Operand(0));
+      __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
+                               i.InputSimd128Register(1).high());
+      __ mov(scratch, Operand(-1), LeaveCC, eq);
+      __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
+      break;
+    }
+    case kArmF64x2Ne: {
+      UseScratchRegisterScope temps(tasm());
+      Register scratch = temps.Acquire();
+      __ mov(scratch, Operand(0));
+      __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
+                               i.InputSimd128Register(1).low());
+      __ mov(scratch, Operand(-1), LeaveCC, ne);
+      __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
+
+      __ mov(scratch, Operand(0));
+      __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
+                               i.InputSimd128Register(1).high());
+      __ mov(scratch, Operand(-1), LeaveCC, ne);
+      __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
+      break;
+    }
+    case kArmF64x2Lt: {
+      UseScratchRegisterScope temps(tasm());
+      Register scratch = temps.Acquire();
+      __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
+                               i.InputSimd128Register(1).low());
+      __ mov(scratch, Operand(0), LeaveCC, cs);
+      __ mov(scratch, Operand(-1), LeaveCC, mi);
+      __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
+
+      __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
+                               i.InputSimd128Register(1).high());
+      __ mov(scratch, Operand(0), LeaveCC, cs);
+      __ mov(scratch, Operand(-1), LeaveCC, mi);
+      __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
+      break;
+    }
+    case kArmF64x2Le: {
+      UseScratchRegisterScope temps(tasm());
+      Register scratch = temps.Acquire();
+      __ VFPCompareAndSetFlags(i.InputSimd128Register(0).low(),
+                               i.InputSimd128Register(1).low());
+      __ mov(scratch, Operand(0), LeaveCC, hi);
+      __ mov(scratch, Operand(-1), LeaveCC, ls);
+      __ vmov(i.OutputSimd128Register().low(), scratch, scratch);
+
+      __ VFPCompareAndSetFlags(i.InputSimd128Register(0).high(),
+                               i.InputSimd128Register(1).high());
+      __ mov(scratch, Operand(0), LeaveCC, hi);
+      __ mov(scratch, Operand(-1), LeaveCC, ls);
+      __ vmov(i.OutputSimd128Register().high(), scratch, scratch);
+      break;
+    }
+    case kArmF64x2Pmin: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register lhs = i.InputSimd128Register(0);
+      Simd128Register rhs = i.InputSimd128Register(1);
+      DCHECK_EQ(dst, lhs);
+
+      // Move rhs only when rhs is strictly lesser (mi).
+      __ VFPCompareAndSetFlags(rhs.low(), lhs.low());
+      __ vmov(dst.low(), rhs.low(), mi);
+      __ VFPCompareAndSetFlags(rhs.high(), lhs.high());
+      __ vmov(dst.high(), rhs.high(), mi);
+      break;
+    }
+    case kArmF64x2Pmax: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register lhs = i.InputSimd128Register(0);
+      Simd128Register rhs = i.InputSimd128Register(1);
+      DCHECK_EQ(dst, lhs);
+
+      // Move rhs only when rhs is strictly greater (gt).
+      __ VFPCompareAndSetFlags(rhs.low(), lhs.low());
+      __ vmov(dst.low(), rhs.low(), gt);
+      __ VFPCompareAndSetFlags(rhs.high(), lhs.high());
+      __ vmov(dst.high(), rhs.high(), gt);
+      break;
+    }
+    case kArmF64x2Ceil: {
+      CpuFeatureScope scope(tasm(), ARMv8);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(0);
+      __ vrintp(dst.low(), src.low());
+      __ vrintp(dst.high(), src.high());
+      break;
+    }
+    case kArmF64x2Floor: {
+      CpuFeatureScope scope(tasm(), ARMv8);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(0);
+      __ vrintm(dst.low(), src.low());
+      __ vrintm(dst.high(), src.high());
+      break;
+    }
+    case kArmF64x2Trunc: {
+      CpuFeatureScope scope(tasm(), ARMv8);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(0);
+      __ vrintz(dst.low(), src.low());
+      __ vrintz(dst.high(), src.high());
+      break;
+    }
+    case kArmF64x2NearestInt: {
+      CpuFeatureScope scope(tasm(), ARMv8);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(0);
+      __ vrintn(dst.low(), src.low());
+      __ vrintn(dst.high(), src.high());
+      break;
+    }
+    case kArmI64x2SplatI32Pair: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vdup(Neon32, dst, i.InputRegister(0));
+      __ ReplaceLane(dst, dst, i.InputRegister(1), NeonS32, 1);
+      __ ReplaceLane(dst, dst, i.InputRegister(1), NeonS32, 3);
+      break;
+    }
+    case kArmI64x2ReplaceLaneI32Pair: {
+      Simd128Register dst = i.OutputSimd128Register();
+      int8_t lane = i.InputInt8(1);
+      __ ReplaceLane(dst, dst, i.InputRegister(2), NeonS32, lane * 2);
+      __ ReplaceLane(dst, dst, i.InputRegister(3), NeonS32, lane * 2 + 1);
+      break;
+    }
+    case kArmI64x2Add: {
+      __ vadd(Neon64, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI64x2Sub: {
+      __ vsub(Neon64, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI64x2Mul: {
+      QwNeonRegister dst = i.OutputSimd128Register();
+      QwNeonRegister left = i.InputSimd128Register(0);
+      QwNeonRegister right = i.InputSimd128Register(1);
+      QwNeonRegister tmp1 = i.TempSimd128Register(0);
+      QwNeonRegister tmp2 = i.TempSimd128Register(1);
+
+      // This algorithm uses vector operations to perform 64-bit integer
+      // multiplication by splitting it into a high and low 32-bit integers.
+      // The tricky part is getting the low and high integers in the correct
+      // place inside a NEON register, so that we can use as little vmull and
+      // vmlal as possible.
+
+      // Move left and right into temporaries, they will be modified by vtrn.
+      __ vmov(tmp1, left);
+      __ vmov(tmp2, right);
+
+      // This diagram shows how the 64-bit integers fit into NEON registers.
+      //
+      //             [q.high()| q.low()]
+      // left/tmp1:  [ a3, a2 | a1, a0 ]
+      // right/tmp2: [ b3, b2 | b1, b0 ]
+      //
+      // We want to multiply the low 32 bits of left with high 32 bits of right,
+      // for each lane, i.e. a2 * b3, a0 * b1. However, vmull takes two input d
+      // registers, and multiply the corresponding low/high 32 bits, to get a
+      // 64-bit integer: a1 * b1, a0 * b0. In order to make it work we transpose
+      // the vectors, so that we get the low 32 bits of each 64-bit integer into
+      // the same lane, similarly for high 32 bits.
+      __ vtrn(Neon32, tmp1.low(), tmp1.high());
+      // tmp1: [ a3, a1 | a2, a0 ]
+      __ vtrn(Neon32, tmp2.low(), tmp2.high());
+      // tmp2: [ b3, b1 | b2, b0 ]
+
+      __ vmull(NeonU32, dst, tmp1.low(), tmp2.high());
+      // dst: [ a2*b3 | a0*b1 ]
+      __ vmlal(NeonU32, dst, tmp1.high(), tmp2.low());
+      // dst: [ a2*b3 + a3*b2 | a0*b1 + a1*b0 ]
+      __ vshl(NeonU64, dst, dst, 32);
+      // dst: [ (a2*b3 + a3*b2) << 32 | (a0*b1 + a1*b0) << 32 ]
+
+      __ vmlal(NeonU32, dst, tmp1.low(), tmp2.low());
+      // dst: [ (a2*b3 + a3*b2)<<32 + (a2*b2) | (a0*b1 + a1*b0)<<32 + (a0*b0) ]
+      break;
+    }
+    case kArmI64x2Neg: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vmov(dst, uint64_t{0});
+      __ vsub(Neon64, dst, dst, i.InputSimd128Register(0));
+      break;
+    }
+    case kArmI64x2Shl: {
+      ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 6, Neon32, NeonS64);
+      break;
+    }
+    case kArmI64x2ShrS: {
+      // Only the least significant byte of each lane is used, so we can use
+      // Neon32 as the size.
+      ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 6, Neon32, NeonS64);
+      break;
+    }
+    case kArmI64x2ShrU: {
+      // Only the least significant byte of each lane is used, so we can use
+      // Neon32 as the size.
+      ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 6, Neon32, NeonU64);
+      break;
+    }
+    case kArmF32x4Splat: {
+      int src_code = i.InputFloatRegister(0).code();
+      __ vdup(Neon32, i.OutputSimd128Register(),
+              DwVfpRegister::from_code(src_code / 2), src_code % 2);
+      break;
+    }
+    case kArmF32x4ExtractLane: {
+      __ ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
+                     i.InputInt8(1));
+      break;
+    }
+    case kArmF32x4ReplaceLane: {
+      __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                     i.InputFloatRegister(2), i.InputInt8(1));
+      break;
+    }
+    case kArmF32x4SConvertI32x4: {
+      __ vcvt_f32_s32(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmF32x4UConvertI32x4: {
+      __ vcvt_f32_u32(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmF32x4Abs: {
+      __ vabs(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmF32x4Neg: {
+      __ vneg(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmF32x4Sqrt: {
+      QwNeonRegister dst = i.OutputSimd128Register();
+      QwNeonRegister src1 = i.InputSimd128Register(0);
+      DCHECK_EQ(dst, q0);
+      DCHECK_EQ(src1, q0);
+#define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane)
+      __ vsqrt(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0));
+      __ vsqrt(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1));
+      __ vsqrt(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2));
+      __ vsqrt(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3));
+#undef S_FROM_Q
+      break;
+    }
+    case kArmF32x4RecipApprox: {
+      __ vrecpe(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmF32x4RecipSqrtApprox: {
+      __ vrsqrte(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmF32x4Add: {
+      __ vadd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmF32x4AddHoriz: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // Make sure we don't overwrite source data before it's used.
+      if (dst == src0) {
+        __ vpadd(dst.low(), src0.low(), src0.high());
+        if (dst == src1) {
+          __ vmov(dst.high(), dst.low());
+        } else {
+          __ vpadd(dst.high(), src1.low(), src1.high());
+        }
+      } else {
+        __ vpadd(dst.high(), src1.low(), src1.high());
+        __ vpadd(dst.low(), src0.low(), src0.high());
+      }
+      break;
+    }
+    case kArmF32x4Sub: {
+      __ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmF32x4Mul: {
+      __ vmul(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmF32x4Div: {
+      QwNeonRegister dst = i.OutputSimd128Register();
+      QwNeonRegister src1 = i.InputSimd128Register(0);
+      QwNeonRegister src2 = i.InputSimd128Register(1);
+      DCHECK_EQ(dst, q0);
+      DCHECK_EQ(src1, q0);
+      DCHECK_EQ(src2, q1);
+#define S_FROM_Q(reg, lane) SwVfpRegister::from_code(reg.code() * 4 + lane)
+      __ vdiv(S_FROM_Q(dst, 0), S_FROM_Q(src1, 0), S_FROM_Q(src2, 0));
+      __ vdiv(S_FROM_Q(dst, 1), S_FROM_Q(src1, 1), S_FROM_Q(src2, 1));
+      __ vdiv(S_FROM_Q(dst, 2), S_FROM_Q(src1, 2), S_FROM_Q(src2, 2));
+      __ vdiv(S_FROM_Q(dst, 3), S_FROM_Q(src1, 3), S_FROM_Q(src2, 3));
+#undef S_FROM_Q
+      break;
+    }
+    case kArmF32x4Min: {
+      __ vmin(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmF32x4Max: {
+      __ vmax(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmF32x4Eq: {
+      __ vceq(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmF32x4Ne: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vceq(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
+      __ vmvn(dst, dst);
+      break;
+    }
+    case kArmF32x4Lt: {
+      __ vcgt(i.OutputSimd128Register(), i.InputSimd128Register(1),
+              i.InputSimd128Register(0));
+      break;
+    }
+    case kArmF32x4Le: {
+      __ vcge(i.OutputSimd128Register(), i.InputSimd128Register(1),
+              i.InputSimd128Register(0));
+      break;
+    }
+    case kArmF32x4Pmin: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register lhs = i.InputSimd128Register(0);
+      Simd128Register rhs = i.InputSimd128Register(1);
+      DCHECK_NE(dst, lhs);
+      DCHECK_NE(dst, rhs);
+
+      // f32x4.pmin(lhs, rhs)
+      // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs))
+      // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs))
+      __ vcgt(dst, lhs, rhs);
+      __ vbsl(dst, rhs, lhs);
+      break;
+    }
+    case kArmF32x4Pmax: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register lhs = i.InputSimd128Register(0);
+      Simd128Register rhs = i.InputSimd128Register(1);
+      DCHECK_NE(dst, lhs);
+      DCHECK_NE(dst, rhs);
+
+      // f32x4.pmax(lhs, rhs)
+      // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs))
+      __ vcgt(dst, rhs, lhs);
+      __ vbsl(dst, rhs, lhs);
+      break;
+    }
+    case kArmI32x4Splat: {
+      __ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0));
+      break;
+    }
+    case kArmI32x4ExtractLane: {
+      __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS32,
+                     i.InputInt8(1));
+      break;
+    }
+    case kArmI32x4ReplaceLane: {
+      __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                     i.InputRegister(2), NeonS32, i.InputInt8(1));
+      break;
+    }
+    case kArmI32x4SConvertF32x4: {
+      __ vcvt_s32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmI32x4SConvertI16x8Low: {
+      __ vmovl(NeonS16, i.OutputSimd128Register(),
+               i.InputSimd128Register(0).low());
+      break;
+    }
+    case kArmI32x4SConvertI16x8High: {
+      __ vmovl(NeonS16, i.OutputSimd128Register(),
+               i.InputSimd128Register(0).high());
+      break;
+    }
+    case kArmI32x4Neg: {
+      __ vneg(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmI32x4Shl: {
+      ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 5, Neon32, NeonS32);
+      break;
+    }
+    case kArmI32x4ShrS: {
+      ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 5, Neon32, NeonS32);
+      break;
+    }
+    case kArmI32x4Add: {
+      __ vadd(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI32x4AddHoriz:
+      ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon32);
+      break;
+    case kArmI32x4Sub: {
+      __ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI32x4Mul: {
+      __ vmul(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI32x4MinS: {
+      __ vmin(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI32x4MaxS: {
+      __ vmax(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI32x4Eq: {
+      __ vceq(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI32x4Ne: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vceq(Neon32, dst, i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      __ vmvn(dst, dst);
+      break;
+    }
+    case kArmI32x4GtS: {
+      __ vcgt(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI32x4GeS: {
+      __ vcge(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI32x4UConvertF32x4: {
+      __ vcvt_u32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmI32x4UConvertI16x8Low: {
+      __ vmovl(NeonU16, i.OutputSimd128Register(),
+               i.InputSimd128Register(0).low());
+      break;
+    }
+    case kArmI32x4UConvertI16x8High: {
+      __ vmovl(NeonU16, i.OutputSimd128Register(),
+               i.InputSimd128Register(0).high());
+      break;
+    }
+    case kArmI32x4ShrU: {
+      ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 5, Neon32, NeonU32);
+      break;
+    }
+    case kArmI32x4MinU: {
+      __ vmin(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI32x4MaxU: {
+      __ vmax(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI32x4GtU: {
+      __ vcgt(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI32x4GeU: {
+      __ vcge(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI32x4Abs: {
+      __ vabs(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmI32x4BitMask: {
+      Register dst = i.OutputRegister();
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register tmp2 = i.TempSimd128Register(0);
+      Simd128Register mask = i.TempSimd128Register(1);
+
+      __ vshr(NeonS32, tmp2, src, 31);
+      // Set i-th bit of each lane i. When AND with tmp, the lanes that
+      // are signed will have i-th bit set, unsigned will be 0.
+      __ vmov(mask.low(), Double(uint64_t{0x0000'0002'0000'0001}));
+      __ vmov(mask.high(), Double(uint64_t{0x0000'0008'0000'0004}));
+      __ vand(tmp2, mask, tmp2);
+      __ vpadd(Neon32, tmp2.low(), tmp2.low(), tmp2.high());
+      __ vpadd(Neon32, tmp2.low(), tmp2.low(), kDoubleRegZero);
+      __ VmovLow(dst, tmp2.low());
+      break;
+    }
+    case kArmI32x4DotI16x8S: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register lhs = i.InputSimd128Register(0);
+      Simd128Register rhs = i.InputSimd128Register(1);
+      Simd128Register tmp1 = i.TempSimd128Register(0);
+      UseScratchRegisterScope temps(tasm());
+      Simd128Register scratch = temps.AcquireQ();
+      __ vmull(NeonS16, tmp1, lhs.low(), rhs.low());
+      __ vmull(NeonS16, scratch, lhs.high(), rhs.high());
+      __ vpadd(Neon32, dst.low(), tmp1.low(), tmp1.high());
+      __ vpadd(Neon32, dst.high(), scratch.low(), scratch.high());
+      break;
+    }
+    case kArmI16x8Splat: {
+      __ vdup(Neon16, i.OutputSimd128Register(), i.InputRegister(0));
+      break;
+    }
+    case kArmI16x8ExtractLaneU: {
+      __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU16,
+                     i.InputInt8(1));
+      break;
+    }
+    case kArmI16x8ExtractLaneS: {
+      __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS16,
+                     i.InputInt8(1));
+      break;
+    }
+    case kArmI16x8ReplaceLane: {
+      __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                     i.InputRegister(2), NeonS16, i.InputInt8(1));
+      break;
+    }
+    case kArmI16x8SConvertI8x16Low: {
+      __ vmovl(NeonS8, i.OutputSimd128Register(),
+               i.InputSimd128Register(0).low());
+      break;
+    }
+    case kArmI16x8SConvertI8x16High: {
+      __ vmovl(NeonS8, i.OutputSimd128Register(),
+               i.InputSimd128Register(0).high());
+      break;
+    }
+    case kArmI16x8Neg: {
+      __ vneg(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmI16x8Shl: {
+      ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 4, Neon16, NeonS16);
+      break;
+    }
+    case kArmI16x8ShrS: {
+      ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 4, Neon16, NeonS16);
+      break;
+    }
+    case kArmI16x8SConvertI32x4:
+      ASSEMBLE_NEON_NARROWING_OP(NeonS16, NeonS16);
+      break;
+    case kArmI16x8Add: {
+      __ vadd(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI16x8AddSatS: {
+      __ vqadd(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI16x8AddHoriz:
+      ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon16);
+      break;
+    case kArmI16x8Sub: {
+      __ vsub(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI16x8SubSatS: {
+      __ vqsub(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI16x8Mul: {
+      __ vmul(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI16x8MinS: {
+      __ vmin(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI16x8MaxS: {
+      __ vmax(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI16x8Eq: {
+      __ vceq(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI16x8Ne: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vceq(Neon16, dst, i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      __ vmvn(dst, dst);
+      break;
+    }
+    case kArmI16x8GtS: {
+      __ vcgt(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI16x8GeS: {
+      __ vcge(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI16x8UConvertI8x16Low: {
+      __ vmovl(NeonU8, i.OutputSimd128Register(),
+               i.InputSimd128Register(0).low());
+      break;
+    }
+    case kArmI16x8UConvertI8x16High: {
+      __ vmovl(NeonU8, i.OutputSimd128Register(),
+               i.InputSimd128Register(0).high());
+      break;
+    }
+    case kArmI16x8ShrU: {
+      ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 4, Neon16, NeonU16);
+      break;
+    }
+    case kArmI16x8UConvertI32x4:
+      ASSEMBLE_NEON_NARROWING_OP(NeonU16, NeonS16);
+      break;
+    case kArmI16x8AddSatU: {
+      __ vqadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI16x8SubSatU: {
+      __ vqsub(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI16x8MinU: {
+      __ vmin(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI16x8MaxU: {
+      __ vmax(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI16x8GtU: {
+      __ vcgt(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI16x8GeU: {
+      __ vcge(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI16x8RoundingAverageU: {
+      __ vrhadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI16x8Abs: {
+      __ vabs(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmI16x8BitMask: {
+      Register dst = i.OutputRegister();
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register tmp2 = i.TempSimd128Register(0);
+      Simd128Register mask = i.TempSimd128Register(1);
+
+      __ vshr(NeonS16, tmp2, src, 15);
+      // Set i-th bit of each lane i. When AND with tmp, the lanes that
+      // are signed will have i-th bit set, unsigned will be 0.
+      __ vmov(mask.low(), Double(uint64_t{0x0008'0004'0002'0001}));
+      __ vmov(mask.high(), Double(uint64_t{0x0080'0040'0020'0010}));
+      __ vand(tmp2, mask, tmp2);
+      __ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.high());
+      __ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.low());
+      __ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.low());
+      __ vmov(NeonU16, dst, tmp2.low(), 0);
+      break;
+    }
+    case kArmI8x16Splat: {
+      __ vdup(Neon8, i.OutputSimd128Register(), i.InputRegister(0));
+      break;
+    }
+    case kArmI8x16ExtractLaneU: {
+      __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonU8,
+                     i.InputInt8(1));
+      break;
+    }
+    case kArmI8x16ExtractLaneS: {
+      __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS8,
+                     i.InputInt8(1));
+      break;
+    }
+    case kArmI8x16ReplaceLane: {
+      __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                     i.InputRegister(2), NeonS8, i.InputInt8(1));
+      break;
+    }
+    case kArmI8x16Neg: {
+      __ vneg(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmI8x16Shl: {
+      ASSEMBLE_SIMD_SHIFT_LEFT(vshl, 3, Neon8, NeonS8);
+      break;
+    }
+    case kArmI8x16ShrS: {
+      ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 3, Neon8, NeonS8);
+      break;
+    }
+    case kArmI8x16SConvertI16x8:
+      ASSEMBLE_NEON_NARROWING_OP(NeonS8, NeonS8);
+      break;
+    case kArmI8x16Add: {
+      __ vadd(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI8x16AddSatS: {
+      __ vqadd(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI8x16Sub: {
+      __ vsub(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI8x16SubSatS: {
+      __ vqsub(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI8x16Mul: {
+      __ vmul(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI8x16MinS: {
+      __ vmin(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI8x16MaxS: {
+      __ vmax(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI8x16Eq: {
+      __ vceq(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI8x16Ne: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vceq(Neon8, dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
+      __ vmvn(dst, dst);
+      break;
+    }
+    case kArmI8x16GtS: {
+      __ vcgt(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI8x16GeS: {
+      __ vcge(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI8x16ShrU: {
+      ASSEMBLE_SIMD_SHIFT_RIGHT(vshr, 3, Neon8, NeonU8);
+      break;
+    }
+    case kArmI8x16UConvertI16x8:
+      ASSEMBLE_NEON_NARROWING_OP(NeonU8, NeonS8);
+      break;
+    case kArmI8x16AddSatU: {
+      __ vqadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI8x16SubSatU: {
+      __ vqsub(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI8x16MinU: {
+      __ vmin(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI8x16MaxU: {
+      __ vmax(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI8x16GtU: {
+      __ vcgt(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI8x16GeU: {
+      __ vcge(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI8x16RoundingAverageU: {
+      __ vrhadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kArmI8x16Abs: {
+      __ vabs(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmI8x16BitMask: {
+      Register dst = i.OutputRegister();
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register tmp2 = i.TempSimd128Register(0);
+      Simd128Register mask = i.TempSimd128Register(1);
+
+      __ vshr(NeonS8, tmp2, src, 7);
+      // Set i-th bit of each lane i. When AND with tmp, the lanes that
+      // are signed will have i-th bit set, unsigned will be 0.
+      __ vmov(mask.low(), Double(uint64_t{0x8040'2010'0804'0201}));
+      __ vmov(mask.high(), Double(uint64_t{0x8040'2010'0804'0201}));
+      __ vand(tmp2, mask, tmp2);
+      __ vext(mask, tmp2, tmp2, 8);
+      __ vzip(Neon8, mask, tmp2);
+      __ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.high());
+      __ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.low());
+      __ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.low());
+      __ vmov(NeonU16, dst, tmp2.low(), 0);
+      break;
+    }
+    case kArmS128Const: {
+      QwNeonRegister dst = i.OutputSimd128Register();
+      uint64_t imm1 = make_uint64(i.InputUint32(1), i.InputUint32(0));
+      uint64_t imm2 = make_uint64(i.InputUint32(3), i.InputUint32(2));
+      __ vmov(dst.low(), Double(imm1));
+      __ vmov(dst.high(), Double(imm2));
+      break;
+    }
+    case kArmS128Zero: {
+      __ veor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+              i.OutputSimd128Register());
+      break;
+    }
+    case kArmS128AllOnes: {
+      __ vmov(i.OutputSimd128Register(), uint64_t{0xffff'ffff'ffff'ffff});
+      break;
+    }
+    case kArmS128Dup: {
+      NeonSize size = static_cast<NeonSize>(i.InputInt32(1));
+      int lanes = kSimd128Size >> size;
+      int index = i.InputInt32(2);
+      DCHECK(index < lanes);
+      int d_lanes = lanes / 2;
+      int src_d_index = index & (d_lanes - 1);
+      int src_d_code = i.InputSimd128Register(0).low().code() + index / d_lanes;
+      __ vdup(size, i.OutputSimd128Register(),
+              DwVfpRegister::from_code(src_d_code), src_d_index);
+      break;
+    }
+    case kArmS128And: {
+      __ vand(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmS128Or: {
+      __ vorr(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmS128Xor: {
+      __ veor(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmS128Not: {
+      __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmS128Select: {
+      Simd128Register dst = i.OutputSimd128Register();
+      DCHECK(dst == i.InputSimd128Register(0));
+      __ vbsl(dst, i.InputSimd128Register(1), i.InputSimd128Register(2));
+      break;
+    }
+    case kArmS128AndNot: {
+      __ vbic(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kArmS32x4ZipLeft: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      DCHECK(dst == i.InputSimd128Register(0));
+      // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
+      __ vmov(dst.high(), src1.low());         // dst = [0, 1, 4, 5]
+      __ vtrn(Neon32, dst.low(), dst.high());  // dst = [0, 4, 1, 5]
+      break;
+    }
+    case kArmS32x4ZipRight: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      DCHECK(dst == i.InputSimd128Register(0));
+      // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from ZipLeft).
+      __ vmov(dst.low(), src1.high());         // dst = [2, 3, 6, 7]
+      __ vtrn(Neon32, dst.low(), dst.high());  // dst = [2, 6, 3, 7]
+      break;
+    }
+    case kArmS32x4UnzipLeft: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      DCHECK(dst == i.InputSimd128Register(0));
+      UseScratchRegisterScope temps(tasm());
+      Simd128Register scratch = temps.AcquireQ();
+      // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
+      __ vmov(scratch, src1);
+      __ vuzp(Neon32, dst, scratch);  // dst = [0, 2, 4, 6]
+      break;
+    }
+    case kArmS32x4UnzipRight: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      DCHECK(dst == i.InputSimd128Register(0));
+      UseScratchRegisterScope temps(tasm());
+      Simd128Register scratch = temps.AcquireQ();
+      // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from UnzipLeft).
+      __ vmov(scratch, src1);
+      __ vuzp(Neon32, scratch, dst);  // dst = [1, 3, 5, 7]
+      break;
+    }
+    case kArmS32x4TransposeLeft: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      DCHECK(dst == i.InputSimd128Register(0));
+      UseScratchRegisterScope temps(tasm());
+      Simd128Register scratch = temps.AcquireQ();
+      // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
+      __ vmov(scratch, src1);
+      __ vtrn(Neon32, dst, scratch);  // dst = [0, 4, 2, 6]
+      break;
+    }
+    case kArmS32x4Shuffle: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      DCHECK_NE(dst, src0);
+      DCHECK_NE(dst, src1);
+      // Perform shuffle as a vmov per lane.
+      int dst_code = dst.code() * 4;
+      int src0_code = src0.code() * 4;
+      int src1_code = src1.code() * 4;
+      int32_t shuffle = i.InputInt32(2);
+      for (int i = 0; i < 4; i++) {
+        int lane = shuffle & 0x7;
+        int src_code = src0_code;
+        if (lane >= 4) {
+          src_code = src1_code;
+          lane &= 0x3;
+        }
+        __ VmovExtended(dst_code + i, src_code + lane);
+        shuffle >>= 8;
+      }
+      break;
+    }
+    case kArmS32x4TransposeRight: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      UseScratchRegisterScope temps(tasm());
+      Simd128Register scratch = temps.AcquireQ();
+      DCHECK(dst == i.InputSimd128Register(0));
+      // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft).
+      __ vmov(scratch, src1);
+      __ vtrn(Neon32, scratch, dst);  // dst = [1, 5, 3, 7]
+      break;
+    }
+    case kArmS16x8ZipLeft: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
+      DCHECK(dst == i.InputSimd128Register(0));
+      __ vmov(dst.high(), src1.low());         // dst = [0, 1, 2, 3, 8, ... 11]
+      __ vzip(Neon16, dst.low(), dst.high());  // dst = [0, 8, 1, 9, ... 11]
+      break;
+    }
+    case kArmS16x8ZipRight: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      DCHECK(dst == i.InputSimd128Register(0));
+      // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
+      __ vmov(dst.low(), src1.high());
+      __ vzip(Neon16, dst.low(), dst.high());  // dst = [4, 12, 5, 13, ... 15]
+      break;
+    }
+    case kArmS16x8UnzipLeft: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      UseScratchRegisterScope temps(tasm());
+      Simd128Register scratch = temps.AcquireQ();
+      DCHECK(dst == i.InputSimd128Register(0));
+      // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
+      __ vmov(scratch, src1);
+      __ vuzp(Neon16, dst, scratch);  // dst = [0, 2, 4, 6, ... 14]
+      break;
+    }
+    case kArmS16x8UnzipRight: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      UseScratchRegisterScope temps(tasm());
+      Simd128Register scratch = temps.AcquireQ();
+      DCHECK(dst == i.InputSimd128Register(0));
+      // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
+      __ vmov(scratch, src1);
+      __ vuzp(Neon16, scratch, dst);  // dst = [1, 3, 5, 7, ... 15]
+      break;
+    }
+    case kArmS16x8TransposeLeft: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      UseScratchRegisterScope temps(tasm());
+      Simd128Register scratch = temps.AcquireQ();
+      DCHECK(dst == i.InputSimd128Register(0));
+      // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
+      __ vmov(scratch, src1);
+      __ vtrn(Neon16, dst, scratch);  // dst = [0, 8, 2, 10, ... 14]
+      break;
+    }
+    case kArmS16x8TransposeRight: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      UseScratchRegisterScope temps(tasm());
+      Simd128Register scratch = temps.AcquireQ();
+      DCHECK(dst == i.InputSimd128Register(0));
+      // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
+      __ vmov(scratch, src1);
+      __ vtrn(Neon16, scratch, dst);  // dst = [1, 9, 3, 11, ... 15]
+      break;
+    }
+    case kArmS8x16ZipLeft: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      DCHECK(dst == i.InputSimd128Register(0));
+      // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
+      __ vmov(dst.high(), src1.low());
+      __ vzip(Neon8, dst.low(), dst.high());  // dst = [0, 16, 1, 17, ... 23]
+      break;
+    }
+    case kArmS8x16ZipRight: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      DCHECK(dst == i.InputSimd128Register(0));
+      // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
+      __ vmov(dst.low(), src1.high());
+      __ vzip(Neon8, dst.low(), dst.high());  // dst = [8, 24, 9, 25, ... 31]
+      break;
+    }
+    case kArmS8x16UnzipLeft: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      UseScratchRegisterScope temps(tasm());
+      Simd128Register scratch = temps.AcquireQ();
+      DCHECK(dst == i.InputSimd128Register(0));
+      // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
+      __ vmov(scratch, src1);
+      __ vuzp(Neon8, dst, scratch);  // dst = [0, 2, 4, 6, ... 30]
+      break;
+    }
+    case kArmS8x16UnzipRight: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      UseScratchRegisterScope temps(tasm());
+      Simd128Register scratch = temps.AcquireQ();
+      DCHECK(dst == i.InputSimd128Register(0));
+      // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
+      __ vmov(scratch, src1);
+      __ vuzp(Neon8, scratch, dst);  // dst = [1, 3, 5, 7, ... 31]
+      break;
+    }
+    case kArmS8x16TransposeLeft: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      UseScratchRegisterScope temps(tasm());
+      Simd128Register scratch = temps.AcquireQ();
+      DCHECK(dst == i.InputSimd128Register(0));
+      // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
+      __ vmov(scratch, src1);
+      __ vtrn(Neon8, dst, scratch);  // dst = [0, 16, 2, 18, ... 30]
+      break;
+    }
+    case kArmS8x16TransposeRight: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src1 = i.InputSimd128Register(1);
+      UseScratchRegisterScope temps(tasm());
+      Simd128Register scratch = temps.AcquireQ();
+      DCHECK(dst == i.InputSimd128Register(0));
+      // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
+      __ vmov(scratch, src1);
+      __ vtrn(Neon8, scratch, dst);  // dst = [1, 17, 3, 19, ... 31]
+      break;
+    }
+    case kArmS8x16Concat: {
+      __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), i.InputInt4(2));
+      break;
+    }
+    case kArmI8x16Swizzle: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      tbl = i.InputSimd128Register(0),
+                      src = i.InputSimd128Register(1);
+      NeonListOperand table(tbl);
+      __ vtbl(dst.low(), table, src.low());
+      __ vtbl(dst.high(), table, src.high());
+      break;
+    }
+    case kArmI8x16Shuffle: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      DwVfpRegister table_base = src0.low();
+      UseScratchRegisterScope temps(tasm());
+      Simd128Register scratch = temps.AcquireQ();
+      // If unary shuffle, table is src0 (2 d-registers), otherwise src0 and
+      // src1. They must be consecutive.
+      int table_size = src0 == src1 ? 2 : 4;
+      DCHECK_IMPLIES(src0 != src1, src0.code() + 1 == src1.code());
+      // The shuffle lane mask is a byte mask, materialize in scratch.
+      int scratch_s_base = scratch.code() * 4;
+      for (int j = 0; j < 4; j++) {
+        uint32_t four_lanes = i.InputUint32(2 + j);
+        DCHECK_EQ(0, four_lanes & (table_size == 2 ? 0xF0F0F0F0 : 0xE0E0E0E0));
+        __ vmov(SwVfpRegister::from_code(scratch_s_base + j),
+                Float32::FromBits(four_lanes));
+      }
+      NeonListOperand table(table_base, table_size);
+      if (dst != src0 && dst != src1) {
+        __ vtbl(dst.low(), table, scratch.low());
+        __ vtbl(dst.high(), table, scratch.high());
+      } else {
+        __ vtbl(scratch.low(), table, scratch.low());
+        __ vtbl(scratch.high(), table, scratch.high());
+        __ vmov(dst, scratch);
+      }
+      break;
+    }
+    case kArmS32x2Reverse: {
+      __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmS16x4Reverse: {
+      __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmS16x2Reverse: {
+      __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmS8x8Reverse: {
+      __ vrev64(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmS8x4Reverse: {
+      __ vrev32(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmS8x2Reverse: {
+      __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kArmV32x4AnyTrue:
+    case kArmV16x8AnyTrue:
+    case kArmV8x16AnyTrue: {
+      const QwNeonRegister& src = i.InputSimd128Register(0);
+      UseScratchRegisterScope temps(tasm());
+      DwVfpRegister scratch = temps.AcquireD();
+      __ vpmax(NeonU32, scratch, src.low(), src.high());
+      __ vpmax(NeonU32, scratch, scratch, scratch);
+      __ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
+      __ cmp(i.OutputRegister(), Operand(0));
+      __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
+      break;
+    }
+    case kArmV32x4AllTrue: {
+      const QwNeonRegister& src = i.InputSimd128Register(0);
+      UseScratchRegisterScope temps(tasm());
+      DwVfpRegister scratch = temps.AcquireD();
+      __ vpmin(NeonU32, scratch, src.low(), src.high());
+      __ vpmin(NeonU32, scratch, scratch, scratch);
+      __ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
+      __ cmp(i.OutputRegister(), Operand(0));
+      __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
+      break;
+    }
+    case kArmV16x8AllTrue: {
+      const QwNeonRegister& src = i.InputSimd128Register(0);
+      UseScratchRegisterScope temps(tasm());
+      DwVfpRegister scratch = temps.AcquireD();
+      __ vpmin(NeonU16, scratch, src.low(), src.high());
+      __ vpmin(NeonU16, scratch, scratch, scratch);
+      __ vpmin(NeonU16, scratch, scratch, scratch);
+      __ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0);
+      __ cmp(i.OutputRegister(), Operand(0));
+      __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
+      break;
+    }
+    case kArmV8x16AllTrue: {
+      const QwNeonRegister& src = i.InputSimd128Register(0);
+      UseScratchRegisterScope temps(tasm());
+      DwVfpRegister scratch = temps.AcquireD();
+      __ vpmin(NeonU8, scratch, src.low(), src.high());
+      __ vpmin(NeonU8, scratch, scratch, scratch);
+      __ vpmin(NeonU8, scratch, scratch, scratch);
+      __ vpmin(NeonU8, scratch, scratch, scratch);
+      __ ExtractLane(i.OutputRegister(), scratch, NeonS8, 0);
+      __ cmp(i.OutputRegister(), Operand(0));
+      __ mov(i.OutputRegister(), Operand(1), LeaveCC, ne);
+      break;
+    }
+    case kArmS128Load8Splat: {
+      __ vld1r(Neon8, NeonListOperand(i.OutputSimd128Register()),
+               i.NeonInputOperand(0));
+      break;
+    }
+    case kArmS128Load16Splat: {
+      __ vld1r(Neon16, NeonListOperand(i.OutputSimd128Register()),
+               i.NeonInputOperand(0));
+      break;
+    }
+    case kArmS128Load32Splat: {
+      __ vld1r(Neon32, NeonListOperand(i.OutputSimd128Register()),
+               i.NeonInputOperand(0));
+      break;
+    }
+    case kArmS128Load64Splat: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vld1(Neon32, NeonListOperand(dst.low()), i.NeonInputOperand(0));
+      __ Move(dst.high(), dst.low());
+      break;
+    }
+    case kArmS128Load8x8S: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vld1(Neon8, NeonListOperand(dst.low()), i.NeonInputOperand(0));
+      __ vmovl(NeonS8, dst, dst.low());
+      break;
+    }
+    case kArmS128Load8x8U: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vld1(Neon8, NeonListOperand(dst.low()), i.NeonInputOperand(0));
+      __ vmovl(NeonU8, dst, dst.low());
+      break;
+    }
+    case kArmS128Load16x4S: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vld1(Neon16, NeonListOperand(dst.low()), i.NeonInputOperand(0));
+      __ vmovl(NeonS16, dst, dst.low());
+      break;
+    }
+    case kArmS128Load16x4U: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vld1(Neon16, NeonListOperand(dst.low()), i.NeonInputOperand(0));
+      __ vmovl(NeonU16, dst, dst.low());
+      break;
+    }
+    case kArmS128Load32x2S: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vld1(Neon32, NeonListOperand(dst.low()), i.NeonInputOperand(0));
+      __ vmovl(NeonS32, dst, dst.low());
+      break;
+    }
+    case kArmS128Load32x2U: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vld1(Neon32, NeonListOperand(dst.low()), i.NeonInputOperand(0));
+      __ vmovl(NeonU32, dst, dst.low());
+      break;
+    }
+    case kArmS128Load32Zero: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vmov(dst, 0);
+      __ vld1s(Neon32, NeonListOperand(dst.low()), 0, i.NeonInputOperand(0));
+      break;
+    }
+    case kArmS128Load64Zero: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vmov(dst.high(), 0);
+      __ vld1(Neon64, NeonListOperand(dst.low()), i.NeonInputOperand(0));
+      break;
+    }
+    case kWord32AtomicLoadInt8:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrsb);
+      break;
+    case kWord32AtomicLoadUint8:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrb);
+      break;
+    case kWord32AtomicLoadInt16:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrsh);
+      break;
+    case kWord32AtomicLoadUint16:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrh);
+      break;
+    case kWord32AtomicLoadWord32:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(ldr);
+      break;
+    case kWord32AtomicStoreWord8:
+      ASSEMBLE_ATOMIC_STORE_INTEGER(strb);
+      break;
+    case kWord32AtomicStoreWord16:
+      ASSEMBLE_ATOMIC_STORE_INTEGER(strh);
+      break;
+    case kWord32AtomicStoreWord32:
+      ASSEMBLE_ATOMIC_STORE_INTEGER(str);
+      break;
+    case kWord32AtomicExchangeInt8:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb);
+      __ sxtb(i.OutputRegister(0), i.OutputRegister(0));
+      break;
+    case kWord32AtomicExchangeUint8:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb);
+      break;
+    case kWord32AtomicExchangeInt16:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh);
+      __ sxth(i.OutputRegister(0), i.OutputRegister(0));
+      break;
+    case kWord32AtomicExchangeUint16:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh);
+      break;
+    case kWord32AtomicExchangeWord32:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrex, strex);
+      break;
+    case kWord32AtomicCompareExchangeInt8:
+      __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
+      __ uxtb(i.TempRegister(2), i.InputRegister(2));
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexb, strexb,
+                                               i.TempRegister(2));
+      __ sxtb(i.OutputRegister(0), i.OutputRegister(0));
+      break;
+    case kWord32AtomicCompareExchangeUint8:
+      __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
+      __ uxtb(i.TempRegister(2), i.InputRegister(2));
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexb, strexb,
+                                               i.TempRegister(2));
+      break;
+    case kWord32AtomicCompareExchangeInt16:
+      __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
+      __ uxth(i.TempRegister(2), i.InputRegister(2));
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexh, strexh,
+                                               i.TempRegister(2));
+      __ sxth(i.OutputRegister(0), i.OutputRegister(0));
+      break;
+    case kWord32AtomicCompareExchangeUint16:
+      __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
+      __ uxth(i.TempRegister(2), i.InputRegister(2));
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexh, strexh,
+                                               i.TempRegister(2));
+      break;
+    case kWord32AtomicCompareExchangeWord32:
+      __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrex, strex,
+                                               i.InputRegister(2));
+      break;
+#define ATOMIC_BINOP_CASE(op, inst)                    \
+  case kWord32Atomic##op##Int8:                        \
+    ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst);       \
+    __ sxtb(i.OutputRegister(0), i.OutputRegister(0)); \
+    break;                                             \
+  case kWord32Atomic##op##Uint8:                       \
+    ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst);       \
+    break;                                             \
+  case kWord32Atomic##op##Int16:                       \
+    ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst);       \
+    __ sxth(i.OutputRegister(0), i.OutputRegister(0)); \
+    break;                                             \
+  case kWord32Atomic##op##Uint16:                      \
+    ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst);       \
+    break;                                             \
+  case kWord32Atomic##op##Word32:                      \
+    ASSEMBLE_ATOMIC_BINOP(ldrex, strex, inst);         \
+    break;
+      ATOMIC_BINOP_CASE(Add, add)
+      ATOMIC_BINOP_CASE(Sub, sub)
+      ATOMIC_BINOP_CASE(And, and_)
+      ATOMIC_BINOP_CASE(Or, orr)
+      ATOMIC_BINOP_CASE(Xor, eor)
+#undef ATOMIC_BINOP_CASE
+    case kArmWord32AtomicPairLoad: {
+      if (instr->OutputCount() == 2) {
+        DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r0, r1));
+        __ add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
+        __ ldrexd(r0, r1, i.TempRegister(0));
+        __ dmb(ISH);
+      } else {
+        // A special case of this instruction: even though this is a pair load,
+        // we only need one of the two words. We emit a normal atomic load.
+        DCHECK_EQ(instr->OutputCount(), 1);
+        Register base = i.InputRegister(0);
+        Register offset = i.InputRegister(1);
+        DCHECK(instr->InputAt(2)->IsImmediate());
+        int32_t offset_imm = i.InputInt32(2);
+        if (offset_imm != 0) {
+          Register temp = i.TempRegister(0);
+          __ add(temp, offset, Operand(offset_imm));
+          offset = temp;
+        }
+        __ ldr(i.OutputRegister(), MemOperand(base, offset));
+        __ dmb(ISH);
+      }
+      break;
+    }
+    case kArmWord32AtomicPairStore: {
+      Label store;
+      Register base = i.InputRegister(0);
+      Register offset = i.InputRegister(1);
+      Register value_low = i.InputRegister(2);
+      Register value_high = i.InputRegister(3);
+      Register actual_addr = i.TempRegister(0);
+      // The {ldrexd} instruction needs two temp registers. We do not need the
+      // result of {ldrexd}, but {strexd} likely fails without the {ldrexd}.
+      Register tmp1 = i.TempRegister(1);
+      Register tmp2 = i.TempRegister(2);
+      // Reuse one of the temp registers for the result of {strexd}.
+      Register store_result = tmp1;
+      __ add(actual_addr, base, offset);
+      __ dmb(ISH);
+      __ bind(&store);
+      // Add this {ldrexd} instruction here so that {strexd} below can succeed.
+      // We don't need the result of {ldrexd} itself.
+      __ ldrexd(tmp1, tmp2, actual_addr);
+      __ strexd(store_result, value_low, value_high, actual_addr);
+      __ cmp(store_result, Operand(0));
+      __ b(ne, &store);
+      __ dmb(ISH);
+      break;
+    }
+#define ATOMIC_ARITH_BINOP_CASE(op, instr1, instr2)           \
+  case kArmWord32AtomicPair##op: {                            \
+    DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3)); \
+    ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2);            \
+    break;                                                    \
+  }
+      ATOMIC_ARITH_BINOP_CASE(Add, add, adc)
+      ATOMIC_ARITH_BINOP_CASE(Sub, sub, sbc)
+#undef ATOMIC_ARITH_BINOP_CASE
+#define ATOMIC_LOGIC_BINOP_CASE(op, instr1)                   \
+  case kArmWord32AtomicPair##op: {                            \
+    DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3)); \
+    ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr1);                    \
+    break;                                                    \
+  }
+      ATOMIC_LOGIC_BINOP_CASE(And, and_)
+      ATOMIC_LOGIC_BINOP_CASE(Or, orr)
+      ATOMIC_LOGIC_BINOP_CASE(Xor, eor)
+#undef ATOMIC_LOGIC_BINOP_CASE
+    case kArmWord32AtomicPairExchange: {
+      DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r6, r7));
+      Label exchange;
+      __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3));
+      __ dmb(ISH);
+      __ bind(&exchange);
+      __ ldrexd(r6, r7, i.TempRegister(0));
+      __ strexd(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1),
+                i.TempRegister(0));
+      __ teq(i.TempRegister(1), Operand(0));
+      __ b(ne, &exchange);
+      __ dmb(ISH);
+      break;
+    }
+    case kArmWord32AtomicPairCompareExchange: {
+      DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3));
+      __ add(i.TempRegister(0), i.InputRegister(4), i.InputRegister(5));
+      Label compareExchange;
+      Label exit;
+      __ dmb(ISH);
+      __ bind(&compareExchange);
+      __ ldrexd(r2, r3, i.TempRegister(0));
+      __ teq(i.InputRegister(0), Operand(r2));
+      __ b(ne, &exit);
+      __ teq(i.InputRegister(1), Operand(r3));
+      __ b(ne, &exit);
+      __ strexd(i.TempRegister(1), i.InputRegister(2), i.InputRegister(3),
+                i.TempRegister(0));
+      __ teq(i.TempRegister(1), Operand(0));
+      __ b(ne, &compareExchange);
+      __ bind(&exit);
+      __ dmb(ISH);
+      break;
+    }
+#undef ASSEMBLE_ATOMIC_LOAD_INTEGER
+#undef ASSEMBLE_ATOMIC_STORE_INTEGER
+#undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER
+#undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
+#undef ASSEMBLE_ATOMIC_BINOP
+#undef ASSEMBLE_ATOMIC64_ARITH_BINOP
+#undef ASSEMBLE_ATOMIC64_LOGIC_BINOP
+#undef ASSEMBLE_IEEE754_BINOP
+#undef ASSEMBLE_IEEE754_UNOP
+#undef ASSEMBLE_NEON_NARROWING_OP
+#undef ASSEMBLE_NEON_PAIRWISE_OP
+#undef ASSEMBLE_SIMD_SHIFT_LEFT
+#undef ASSEMBLE_SIMD_SHIFT_RIGHT
+  }
+  return kSuccess;
+}  // NOLINT(readability/fn_size)
+
+// Assembles branches after an instruction.
+void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
+  ArmOperandConverter i(this, instr);
+  Label* tlabel = branch->true_label;
+  Label* flabel = branch->false_label;
+  Condition cc = FlagsConditionToCondition(branch->condition);
+  __ b(cc, tlabel);
+  if (!branch->fallthru) __ b(flabel);  // no fallthru to flabel.
+}
+
+void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
+                                            Instruction* instr) {
+  // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
+  if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
+    return;
+  }
+
+  condition = NegateFlagsCondition(condition);
+  __ eor(kSpeculationPoisonRegister, kSpeculationPoisonRegister,
+         Operand(kSpeculationPoisonRegister), SBit::LeaveCC,
+         FlagsConditionToCondition(condition));
+  __ csdb();
+}
+
+void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
+                                            BranchInfo* branch) {
+  AssembleArchBranch(instr, branch);
+}
+
+void CodeGenerator::AssembleArchJump(RpoNumber target) {
+  if (!IsNextInAssemblyOrder(target)) __ b(GetLabel(target));
+}
+
+void CodeGenerator::AssembleArchTrap(Instruction* instr,
+                                     FlagsCondition condition) {
+  class OutOfLineTrap final : public OutOfLineCode {
+   public:
+    OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
+        : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
+
+    void Generate() final {
+      ArmOperandConverter i(gen_, instr_);
+      TrapId trap_id =
+          static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
+      GenerateCallToTrap(trap_id);
+    }
+
+   private:
+    void GenerateCallToTrap(TrapId trap_id) {
+      if (trap_id == TrapId::kInvalid) {
+        // We cannot test calls to the runtime in cctest/test-run-wasm.
+        // Therefore we emit a call to C here instead of a call to the runtime.
+        // We use the context register as the scratch register, because we do
+        // not have a context here.
+        __ PrepareCallCFunction(0, 0);
+        __ CallCFunction(
+            ExternalReference::wasm_call_trap_callback_for_testing(), 0);
+        __ LeaveFrame(StackFrame::WASM);
+        auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
+        int pop_count =
+            static_cast<int>(call_descriptor->StackParameterCount());
+        __ Drop(pop_count);
+        __ Ret();
+      } else {
+        gen_->AssembleSourcePosition(instr_);
+        // A direct call to a wasm runtime stub defined in this module.
+        // Just encode the stub index. This will be patched when the code
+        // is added to the native module and copied into wasm code space.
+        __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
+        ReferenceMap* reference_map =
+            gen_->zone()->New<ReferenceMap>(gen_->zone());
+        gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
+        if (FLAG_debug_code) {
+          __ stop();
+        }
+      }
+    }
+
+    Instruction* instr_;
+    CodeGenerator* gen_;
+  };
+  auto ool = zone()->New<OutOfLineTrap>(this, instr);
+  Label* tlabel = ool->entry();
+  Condition cc = FlagsConditionToCondition(condition);
+  __ b(cc, tlabel);
+}
+
+// Assembles boolean materializations after an instruction.
+void CodeGenerator::AssembleArchBoolean(Instruction* instr,
+                                        FlagsCondition condition) {
+  ArmOperandConverter i(this, instr);
+
+  // Materialize a full 32-bit 1 or 0 value. The result register is always the
+  // last output of the instruction.
+  DCHECK_NE(0u, instr->OutputCount());
+  Register reg = i.OutputRegister(instr->OutputCount() - 1);
+  Condition cc = FlagsConditionToCondition(condition);
+  __ mov(reg, Operand(0));
+  __ mov(reg, Operand(1), LeaveCC, cc);
+}
+
+void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
+  ArmOperandConverter i(this, instr);
+  Register input = i.InputRegister(0);
+  std::vector<std::pair<int32_t, Label*>> cases;
+  for (size_t index = 2; index < instr->InputCount(); index += 2) {
+    cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
+  }
+  AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
+                                      cases.data() + cases.size());
+}
+
+void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
+  ArmOperandConverter i(this, instr);
+  Register input = i.InputRegister(0);
+  size_t const case_count = instr->InputCount() - 2;
+  // This {cmp} might still emit a constant pool entry.
+  __ cmp(input, Operand(case_count));
+  // Ensure to emit the constant pool first if necessary.
+  __ CheckConstPool(true, true);
+  __ BlockConstPoolFor(case_count + 2);
+  __ add(pc, pc, Operand(input, LSL, 2), LeaveCC, lo);
+  __ b(GetLabel(i.InputRpo(1)));
+  for (size_t index = 0; index < case_count; ++index) {
+    __ b(GetLabel(i.InputRpo(index + 2)));
+  }
+}
+
+void CodeGenerator::FinishFrame(Frame* frame) {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+
+  const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
+  if (saves_fp != 0) {
+    frame->AlignSavedCalleeRegisterSlots();
+  }
+
+  if (saves_fp != 0) {
+    // Save callee-saved FP registers.
+    STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
+    uint32_t last = base::bits::CountLeadingZeros32(saves_fp) - 1;
+    uint32_t first = base::bits::CountTrailingZeros32(saves_fp);
+    DCHECK_EQ((last - first + 1), base::bits::CountPopulation(saves_fp));
+    frame->AllocateSavedCalleeRegisterSlots((last - first + 1) *
+                                            (kDoubleSize / kSystemPointerSize));
+  }
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+  if (saves != 0) {
+    // Save callee-saved registers.
+    frame->AllocateSavedCalleeRegisterSlots(base::bits::CountPopulation(saves));
+  }
+}
+
+void CodeGenerator::AssembleConstructFrame() {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+  if (frame_access_state()->has_frame()) {
+    if (call_descriptor->IsCFunctionCall()) {
+      if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
+        __ StubPrologue(StackFrame::C_WASM_ENTRY);
+        // Reserve stack space for saving the c_entry_fp later.
+        __ AllocateStackSpace(kSystemPointerSize);
+      } else {
+        __ Push(lr, fp);
+        __ mov(fp, sp);
+      }
+    } else if (call_descriptor->IsJSFunctionCall()) {
+      __ Prologue();
+    } else {
+      __ StubPrologue(info()->GetOutputStackFrameType());
+      if (call_descriptor->IsWasmFunctionCall()) {
+        __ Push(kWasmInstanceRegister);
+      } else if (call_descriptor->IsWasmImportWrapper() ||
+                 call_descriptor->IsWasmCapiFunction()) {
+        // Wasm import wrappers are passed a tuple in the place of the instance.
+        // Unpack the tuple into the instance and the target callable.
+        // This must be done here in the codegen because it cannot be expressed
+        // properly in the graph.
+        __ ldr(kJSFunctionRegister,
+               FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
+        __ ldr(kWasmInstanceRegister,
+               FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
+        __ Push(kWasmInstanceRegister);
+        if (call_descriptor->IsWasmCapiFunction()) {
+          // Reserve space for saving the PC later.
+          __ AllocateStackSpace(kSystemPointerSize);
+        }
+      }
+    }
+
+    unwinding_info_writer_.MarkFrameConstructed(__ pc_offset());
+  }
+
+  int required_slots =
+      frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
+
+  if (info()->is_osr()) {
+    // TurboFan OSR-compiled functions cannot be entered directly.
+    __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
+
+    // Unoptimized code jumps directly to this entrypoint while the unoptimized
+    // frame is still on the stack. Optimized code uses OSR values directly from
+    // the unoptimized frame. Thus, all that needs to be done is to allocate the
+    // remaining stack slots.
+    if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
+    osr_pc_offset_ = __ pc_offset();
+    required_slots -= osr_helper()->UnoptimizedFrameSlots();
+    ResetSpeculationPoison();
+  }
+
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+  const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
+
+  if (required_slots > 0) {
+    DCHECK(frame_access_state()->has_frame());
+    if (info()->IsWasm() && required_slots > 128) {
+      // For WebAssembly functions with big frames we have to do the stack
+      // overflow check before we construct the frame. Otherwise we may not
+      // have enough space on the stack to call the runtime for the stack
+      // overflow.
+      Label done;
+
+      // If the frame is bigger than the stack, we throw the stack overflow
+      // exception unconditionally. Thereby we can avoid the integer overflow
+      // check in the condition code.
+      if ((required_slots * kSystemPointerSize) < (FLAG_stack_size * 1024)) {
+        UseScratchRegisterScope temps(tasm());
+        Register scratch = temps.Acquire();
+        __ ldr(scratch, FieldMemOperand(
+                            kWasmInstanceRegister,
+                            WasmInstanceObject::kRealStackLimitAddressOffset));
+        __ ldr(scratch, MemOperand(scratch));
+        __ add(scratch, scratch, Operand(required_slots * kSystemPointerSize));
+        __ cmp(sp, scratch);
+        __ b(cs, &done);
+      }
+
+      __ Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
+      // We come from WebAssembly, there are no references for the GC.
+      ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
+      RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
+      if (FLAG_debug_code) {
+        __ stop();
+      }
+
+      __ bind(&done);
+    }
+
+    // Skip callee-saved and return slots, which are pushed below.
+    required_slots -= base::bits::CountPopulation(saves);
+    required_slots -= frame()->GetReturnSlotCount();
+    required_slots -= 2 * base::bits::CountPopulation(saves_fp);
+    if (required_slots > 0) {
+      __ AllocateStackSpace(required_slots * kSystemPointerSize);
+    }
+  }
+
+  if (saves_fp != 0) {
+    // Save callee-saved FP registers.
+    STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
+    uint32_t last = base::bits::CountLeadingZeros32(saves_fp) - 1;
+    uint32_t first = base::bits::CountTrailingZeros32(saves_fp);
+    DCHECK_EQ((last - first + 1), base::bits::CountPopulation(saves_fp));
+    __ vstm(db_w, sp, DwVfpRegister::from_code(first),
+            DwVfpRegister::from_code(last));
+  }
+
+  if (saves != 0) {
+    // Save callee-saved registers.
+    __ stm(db_w, sp, saves);
+  }
+
+  const int returns = frame()->GetReturnSlotCount();
+  if (returns != 0) {
+    // Create space for returns.
+    __ AllocateStackSpace(returns * kSystemPointerSize);
+  }
+}
+
+void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+
+  const int returns = frame()->GetReturnSlotCount();
+  if (returns != 0) {
+    // Free space of returns.
+    __ add(sp, sp, Operand(returns * kSystemPointerSize));
+  }
+
+  // Restore registers.
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+  if (saves != 0) {
+    __ ldm(ia_w, sp, saves);
+  }
+
+  // Restore FP registers.
+  const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
+  if (saves_fp != 0) {
+    STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
+    uint32_t last = base::bits::CountLeadingZeros32(saves_fp) - 1;
+    uint32_t first = base::bits::CountTrailingZeros32(saves_fp);
+    __ vldm(ia_w, sp, DwVfpRegister::from_code(first),
+            DwVfpRegister::from_code(last));
+  }
+
+  unwinding_info_writer_.MarkBlockWillExit();
+
+  // We might need r3 for scratch.
+  DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & r3.bit());
+  ArmOperandConverter g(this, nullptr);
+  const int parameter_count =
+      static_cast<int>(call_descriptor->StackParameterCount());
+
+  // {additional_pop_count} is only greater than zero if {parameter_count = 0}.
+  // Check RawMachineAssembler::PopAndReturn.
+  if (parameter_count != 0) {
+    if (additional_pop_count->IsImmediate()) {
+      DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
+    } else if (__ emit_debug_code()) {
+      __ cmp(g.ToRegister(additional_pop_count), Operand(0));
+      __ Assert(eq, AbortReason::kUnexpectedAdditionalPopValue);
+    }
+  }
+
+  Register argc_reg = r3;
+#ifdef V8_NO_ARGUMENTS_ADAPTOR
+  // Functions with JS linkage have at least one parameter (the receiver).
+  // If {parameter_count} == 0, it means it is a builtin with
+  // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
+  // itself.
+  const bool drop_jsargs = frame_access_state()->has_frame() &&
+                           call_descriptor->IsJSFunctionCall() &&
+                           parameter_count != 0;
+#else
+  const bool drop_jsargs = false;
+#endif
+  if (call_descriptor->IsCFunctionCall()) {
+    AssembleDeconstructFrame();
+  } else if (frame_access_state()->has_frame()) {
+    // Canonicalize JSFunction return sites for now unless they have an variable
+    // number of stack slot pops.
+    if (additional_pop_count->IsImmediate() &&
+        g.ToConstant(additional_pop_count).ToInt32() == 0) {
+      if (return_label_.is_bound()) {
+        __ b(&return_label_);
+        return;
+      } else {
+        __ bind(&return_label_);
+      }
+    }
+    if (drop_jsargs) {
+      // Get the actual argument count.
+      __ ldr(argc_reg, MemOperand(fp, StandardFrameConstants::kArgCOffset));
+    }
+    AssembleDeconstructFrame();
+  }
+
+  if (drop_jsargs) {
+    // We must pop all arguments from the stack (including the receiver). This
+    // number of arguments is given by max(1 + argc_reg, parameter_count).
+    __ add(argc_reg, argc_reg, Operand(1));  // Also pop the receiver.
+    if (parameter_count > 1) {
+      __ cmp(argc_reg, Operand(parameter_count));
+      __ mov(argc_reg, Operand(parameter_count), LeaveCC, lt);
+    }
+    __ Drop(argc_reg);
+  } else if (additional_pop_count->IsImmediate()) {
+    DCHECK_EQ(Constant::kInt32, g.ToConstant(additional_pop_count).type());
+    int additional_count = g.ToConstant(additional_pop_count).ToInt32();
+    __ Drop(parameter_count + additional_count);
+  } else if (parameter_count == 0) {
+    __ Drop(g.ToRegister(additional_pop_count));
+  } else {
+    // {additional_pop_count} is guaranteed to be zero if {parameter_count !=
+    // 0}. Check RawMachineAssembler::PopAndReturn.
+    __ Drop(parameter_count);
+  }
+  __ Ret();
+}
+
+void CodeGenerator::FinishCode() { __ CheckConstPool(true, false); }
+
+void CodeGenerator::PrepareForDeoptimizationExits(
+    ZoneDeque<DeoptimizationExit*>* exits) {
+  __ CheckConstPool(true, false);
+}
+
+void CodeGenerator::AssembleMove(InstructionOperand* source,
+                                 InstructionOperand* destination) {
+  ArmOperandConverter g(this, nullptr);
+  // Helper function to write the given constant to the dst register.
+  auto MoveConstantToRegister = [&](Register dst, Constant src) {
+    if (src.type() == Constant::kHeapObject) {
+      Handle<HeapObject> src_object = src.ToHeapObject();
+      RootIndex index;
+      if (IsMaterializableFromRoot(src_object, &index)) {
+        __ LoadRoot(dst, index);
+      } else {
+        __ Move(dst, src_object);
+      }
+    } else if (src.type() == Constant::kExternalReference) {
+      __ Move(dst, src.ToExternalReference());
+    } else {
+      __ mov(dst, g.ToImmediate(source));
+    }
+  };
+  switch (MoveType::InferMove(source, destination)) {
+    case MoveType::kRegisterToRegister:
+      if (source->IsRegister()) {
+        __ mov(g.ToRegister(destination), g.ToRegister(source));
+      } else if (source->IsFloatRegister()) {
+        DCHECK(destination->IsFloatRegister());
+        // GapResolver may give us reg codes that don't map to actual
+        // s-registers. Generate code to work around those cases.
+        int src_code = LocationOperand::cast(source)->register_code();
+        int dst_code = LocationOperand::cast(destination)->register_code();
+        __ VmovExtended(dst_code, src_code);
+      } else if (source->IsDoubleRegister()) {
+        __ Move(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
+      } else {
+        __ Move(g.ToSimd128Register(destination), g.ToSimd128Register(source));
+      }
+      return;
+    case MoveType::kRegisterToStack: {
+      MemOperand dst = g.ToMemOperand(destination);
+      if (source->IsRegister()) {
+        __ str(g.ToRegister(source), dst);
+      } else if (source->IsFloatRegister()) {
+        // GapResolver may give us reg codes that don't map to actual
+        // s-registers. Generate code to work around those cases.
+        int src_code = LocationOperand::cast(source)->register_code();
+        __ VmovExtended(dst, src_code);
+      } else if (source->IsDoubleRegister()) {
+        __ vstr(g.ToDoubleRegister(source), dst);
+      } else {
+        UseScratchRegisterScope temps(tasm());
+        Register temp = temps.Acquire();
+        QwNeonRegister src = g.ToSimd128Register(source);
+        __ add(temp, dst.rn(), Operand(dst.offset()));
+        __ vst1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp));
+      }
+      return;
+    }
+    case MoveType::kStackToRegister: {
+      MemOperand src = g.ToMemOperand(source);
+      if (source->IsStackSlot()) {
+        __ ldr(g.ToRegister(destination), src);
+      } else if (source->IsFloatStackSlot()) {
+        DCHECK(destination->IsFloatRegister());
+        // GapResolver may give us reg codes that don't map to actual
+        // s-registers. Generate code to work around those cases.
+        int dst_code = LocationOperand::cast(destination)->register_code();
+        __ VmovExtended(dst_code, src);
+      } else if (source->IsDoubleStackSlot()) {
+        __ vldr(g.ToDoubleRegister(destination), src);
+      } else {
+        UseScratchRegisterScope temps(tasm());
+        Register temp = temps.Acquire();
+        QwNeonRegister dst = g.ToSimd128Register(destination);
+        __ add(temp, src.rn(), Operand(src.offset()));
+        __ vld1(Neon8, NeonListOperand(dst.low(), 2), NeonMemOperand(temp));
+      }
+      return;
+    }
+    case MoveType::kStackToStack: {
+      MemOperand src = g.ToMemOperand(source);
+      MemOperand dst = g.ToMemOperand(destination);
+      UseScratchRegisterScope temps(tasm());
+      if (source->IsStackSlot() || source->IsFloatStackSlot()) {
+        SwVfpRegister temp = temps.AcquireS();
+        __ vldr(temp, src);
+        __ vstr(temp, dst);
+      } else if (source->IsDoubleStackSlot()) {
+        DwVfpRegister temp = temps.AcquireD();
+        __ vldr(temp, src);
+        __ vstr(temp, dst);
+      } else {
+        DCHECK(source->IsSimd128StackSlot());
+        Register temp = temps.Acquire();
+        QwNeonRegister temp_q = temps.AcquireQ();
+        __ add(temp, src.rn(), Operand(src.offset()));
+        __ vld1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
+        __ add(temp, dst.rn(), Operand(dst.offset()));
+        __ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
+      }
+      return;
+    }
+    case MoveType::kConstantToRegister: {
+      Constant src = g.ToConstant(source);
+      if (destination->IsRegister()) {
+        MoveConstantToRegister(g.ToRegister(destination), src);
+      } else if (destination->IsFloatRegister()) {
+        __ vmov(g.ToFloatRegister(destination),
+                Float32::FromBits(src.ToFloat32AsInt()));
+      } else {
+        // TODO(arm): Look into optimizing this further if possible. Supporting
+        // the NEON version of VMOV may help.
+        __ vmov(g.ToDoubleRegister(destination), src.ToFloat64());
+      }
+      return;
+    }
+    case MoveType::kConstantToStack: {
+      Constant src = g.ToConstant(source);
+      MemOperand dst = g.ToMemOperand(destination);
+      if (destination->IsStackSlot()) {
+        UseScratchRegisterScope temps(tasm());
+        // Acquire a S register instead of a general purpose register in case
+        // `vstr` needs one to compute the address of `dst`.
+        SwVfpRegister s_temp = temps.AcquireS();
+        {
+          // TODO(arm): This sequence could be optimized further if necessary by
+          // writing the constant directly into `s_temp`.
+          UseScratchRegisterScope temps(tasm());
+          Register temp = temps.Acquire();
+          MoveConstantToRegister(temp, src);
+          __ vmov(s_temp, temp);
+        }
+        __ vstr(s_temp, dst);
+      } else if (destination->IsFloatStackSlot()) {
+        UseScratchRegisterScope temps(tasm());
+        SwVfpRegister temp = temps.AcquireS();
+        __ vmov(temp, Float32::FromBits(src.ToFloat32AsInt()));
+        __ vstr(temp, dst);
+      } else {
+        DCHECK(destination->IsDoubleStackSlot());
+        UseScratchRegisterScope temps(tasm());
+        DwVfpRegister temp = temps.AcquireD();
+        // TODO(arm): Look into optimizing this further if possible. Supporting
+        // the NEON version of VMOV may help.
+        __ vmov(temp, src.ToFloat64());
+        __ vstr(temp, g.ToMemOperand(destination));
+      }
+      return;
+    }
+  }
+  UNREACHABLE();
+}
+
+void CodeGenerator::AssembleSwap(InstructionOperand* source,
+                                 InstructionOperand* destination) {
+  ArmOperandConverter g(this, nullptr);
+  switch (MoveType::InferSwap(source, destination)) {
+    case MoveType::kRegisterToRegister:
+      if (source->IsRegister()) {
+        __ Swap(g.ToRegister(source), g.ToRegister(destination));
+      } else if (source->IsFloatRegister()) {
+        DCHECK(destination->IsFloatRegister());
+        // GapResolver may give us reg codes that don't map to actual
+        // s-registers. Generate code to work around those cases.
+        UseScratchRegisterScope temps(tasm());
+        LowDwVfpRegister temp = temps.AcquireLowD();
+        int src_code = LocationOperand::cast(source)->register_code();
+        int dst_code = LocationOperand::cast(destination)->register_code();
+        __ VmovExtended(temp.low().code(), src_code);
+        __ VmovExtended(src_code, dst_code);
+        __ VmovExtended(dst_code, temp.low().code());
+      } else if (source->IsDoubleRegister()) {
+        __ Swap(g.ToDoubleRegister(source), g.ToDoubleRegister(destination));
+      } else {
+        __ Swap(g.ToSimd128Register(source), g.ToSimd128Register(destination));
+      }
+      return;
+    case MoveType::kRegisterToStack: {
+      MemOperand dst = g.ToMemOperand(destination);
+      if (source->IsRegister()) {
+        Register src = g.ToRegister(source);
+        UseScratchRegisterScope temps(tasm());
+        SwVfpRegister temp = temps.AcquireS();
+        __ vmov(temp, src);
+        __ ldr(src, dst);
+        __ vstr(temp, dst);
+      } else if (source->IsFloatRegister()) {
+        int src_code = LocationOperand::cast(source)->register_code();
+        UseScratchRegisterScope temps(tasm());
+        LowDwVfpRegister temp = temps.AcquireLowD();
+        __ VmovExtended(temp.low().code(), src_code);
+        __ VmovExtended(src_code, dst);
+        __ vstr(temp.low(), dst);
+      } else if (source->IsDoubleRegister()) {
+        UseScratchRegisterScope temps(tasm());
+        DwVfpRegister temp = temps.AcquireD();
+        DwVfpRegister src = g.ToDoubleRegister(source);
+        __ Move(temp, src);
+        __ vldr(src, dst);
+        __ vstr(temp, dst);
+      } else {
+        QwNeonRegister src = g.ToSimd128Register(source);
+        UseScratchRegisterScope temps(tasm());
+        Register temp = temps.Acquire();
+        QwNeonRegister temp_q = temps.AcquireQ();
+        __ Move(temp_q, src);
+        __ add(temp, dst.rn(), Operand(dst.offset()));
+        __ vld1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp));
+        __ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
+      }
+      return;
+    }
+    case MoveType::kStackToStack: {
+      MemOperand src = g.ToMemOperand(source);
+      MemOperand dst = g.ToMemOperand(destination);
+      if (source->IsStackSlot() || source->IsFloatStackSlot()) {
+        UseScratchRegisterScope temps(tasm());
+        SwVfpRegister temp_0 = temps.AcquireS();
+        SwVfpRegister temp_1 = temps.AcquireS();
+        __ vldr(temp_0, dst);
+        __ vldr(temp_1, src);
+        __ vstr(temp_0, src);
+        __ vstr(temp_1, dst);
+      } else if (source->IsDoubleStackSlot()) {
+        UseScratchRegisterScope temps(tasm());
+        LowDwVfpRegister temp = temps.AcquireLowD();
+        if (temps.CanAcquireD()) {
+          DwVfpRegister temp_0 = temp;
+          DwVfpRegister temp_1 = temps.AcquireD();
+          __ vldr(temp_0, dst);
+          __ vldr(temp_1, src);
+          __ vstr(temp_0, src);
+          __ vstr(temp_1, dst);
+        } else {
+          // We only have a single D register available. However, we can split
+          // it into 2 S registers and swap the slots 32 bits at a time.
+          MemOperand src0 = src;
+          MemOperand dst0 = dst;
+          MemOperand src1(src.rn(), src.offset() + kFloatSize);
+          MemOperand dst1(dst.rn(), dst.offset() + kFloatSize);
+          SwVfpRegister temp_0 = temp.low();
+          SwVfpRegister temp_1 = temp.high();
+          __ vldr(temp_0, dst0);
+          __ vldr(temp_1, src0);
+          __ vstr(temp_0, src0);
+          __ vstr(temp_1, dst0);
+          __ vldr(temp_0, dst1);
+          __ vldr(temp_1, src1);
+          __ vstr(temp_0, src1);
+          __ vstr(temp_1, dst1);
+        }
+      } else {
+        DCHECK(source->IsSimd128StackSlot());
+        MemOperand src0 = src;
+        MemOperand dst0 = dst;
+        MemOperand src1(src.rn(), src.offset() + kDoubleSize);
+        MemOperand dst1(dst.rn(), dst.offset() + kDoubleSize);
+        UseScratchRegisterScope temps(tasm());
+        DwVfpRegister temp_0 = temps.AcquireD();
+        DwVfpRegister temp_1 = temps.AcquireD();
+        __ vldr(temp_0, dst0);
+        __ vldr(temp_1, src0);
+        __ vstr(temp_0, src0);
+        __ vstr(temp_1, dst0);
+        __ vldr(temp_0, dst1);
+        __ vldr(temp_1, src1);
+        __ vstr(temp_0, src1);
+        __ vstr(temp_1, dst1);
+      }
+      return;
+    }
+    default:
+      UNREACHABLE();
+  }
+}
+
+void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
+  // On 32-bit ARM we emit the jump tables inline.
+  UNREACHABLE();
+}
+
+#undef __
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/arm/instruction-codes-arm.h b/src/compiler/backend/arm/instruction-codes-arm.h
new file mode 100644
index 0000000..f4629ff
--- /dev/null
+++ b/src/compiler/backend/arm/instruction-codes-arm.h
@@ -0,0 +1,369 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_ARM_INSTRUCTION_CODES_ARM_H_
+#define V8_COMPILER_BACKEND_ARM_INSTRUCTION_CODES_ARM_H_
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+// ARM-specific opcodes that specify which assembly sequence to emit.
+// Most opcodes specify a single instruction.
+#define TARGET_ARCH_OPCODE_LIST(V) \
+  V(ArmAdd)                        \
+  V(ArmAnd)                        \
+  V(ArmBic)                        \
+  V(ArmClz)                        \
+  V(ArmCmp)                        \
+  V(ArmCmn)                        \
+  V(ArmTst)                        \
+  V(ArmTeq)                        \
+  V(ArmOrr)                        \
+  V(ArmEor)                        \
+  V(ArmSub)                        \
+  V(ArmRsb)                        \
+  V(ArmMul)                        \
+  V(ArmMla)                        \
+  V(ArmMls)                        \
+  V(ArmSmull)                      \
+  V(ArmSmmul)                      \
+  V(ArmSmmla)                      \
+  V(ArmUmull)                      \
+  V(ArmSdiv)                       \
+  V(ArmUdiv)                       \
+  V(ArmMov)                        \
+  V(ArmMvn)                        \
+  V(ArmBfc)                        \
+  V(ArmUbfx)                       \
+  V(ArmSbfx)                       \
+  V(ArmSxtb)                       \
+  V(ArmSxth)                       \
+  V(ArmSxtab)                      \
+  V(ArmSxtah)                      \
+  V(ArmUxtb)                       \
+  V(ArmUxth)                       \
+  V(ArmUxtab)                      \
+  V(ArmRbit)                       \
+  V(ArmRev)                        \
+  V(ArmUxtah)                      \
+  V(ArmAddPair)                    \
+  V(ArmSubPair)                    \
+  V(ArmMulPair)                    \
+  V(ArmLslPair)                    \
+  V(ArmLsrPair)                    \
+  V(ArmAsrPair)                    \
+  V(ArmVcmpF32)                    \
+  V(ArmVaddF32)                    \
+  V(ArmVsubF32)                    \
+  V(ArmVmulF32)                    \
+  V(ArmVmlaF32)                    \
+  V(ArmVmlsF32)                    \
+  V(ArmVdivF32)                    \
+  V(ArmVabsF32)                    \
+  V(ArmVnegF32)                    \
+  V(ArmVsqrtF32)                   \
+  V(ArmVcmpF64)                    \
+  V(ArmVaddF64)                    \
+  V(ArmVsubF64)                    \
+  V(ArmVmulF64)                    \
+  V(ArmVmlaF64)                    \
+  V(ArmVmlsF64)                    \
+  V(ArmVdivF64)                    \
+  V(ArmVmodF64)                    \
+  V(ArmVabsF64)                    \
+  V(ArmVnegF64)                    \
+  V(ArmVsqrtF64)                   \
+  V(ArmVrintmF32)                  \
+  V(ArmVrintmF64)                  \
+  V(ArmVrintpF32)                  \
+  V(ArmVrintpF64)                  \
+  V(ArmVrintzF32)                  \
+  V(ArmVrintzF64)                  \
+  V(ArmVrintaF64)                  \
+  V(ArmVrintnF32)                  \
+  V(ArmVrintnF64)                  \
+  V(ArmVcvtF32F64)                 \
+  V(ArmVcvtF64F32)                 \
+  V(ArmVcvtF32S32)                 \
+  V(ArmVcvtF32U32)                 \
+  V(ArmVcvtF64S32)                 \
+  V(ArmVcvtF64U32)                 \
+  V(ArmVcvtS32F32)                 \
+  V(ArmVcvtU32F32)                 \
+  V(ArmVcvtS32F64)                 \
+  V(ArmVcvtU32F64)                 \
+  V(ArmVmovU32F32)                 \
+  V(ArmVmovF32U32)                 \
+  V(ArmVmovLowU32F64)              \
+  V(ArmVmovLowF64U32)              \
+  V(ArmVmovHighU32F64)             \
+  V(ArmVmovHighF64U32)             \
+  V(ArmVmovF64U32U32)              \
+  V(ArmVmovU32U32F64)              \
+  V(ArmVldrF32)                    \
+  V(ArmVstrF32)                    \
+  V(ArmVldrF64)                    \
+  V(ArmVld1F64)                    \
+  V(ArmVstrF64)                    \
+  V(ArmVst1F64)                    \
+  V(ArmVld1S128)                   \
+  V(ArmVst1S128)                   \
+  V(ArmFloat32Max)                 \
+  V(ArmFloat64Max)                 \
+  V(ArmFloat32Min)                 \
+  V(ArmFloat64Min)                 \
+  V(ArmFloat64SilenceNaN)          \
+  V(ArmLdrb)                       \
+  V(ArmLdrsb)                      \
+  V(ArmStrb)                       \
+  V(ArmLdrh)                       \
+  V(ArmLdrsh)                      \
+  V(ArmStrh)                       \
+  V(ArmLdr)                        \
+  V(ArmStr)                        \
+  V(ArmPush)                       \
+  V(ArmPoke)                       \
+  V(ArmPeek)                       \
+  V(ArmDmbIsh)                     \
+  V(ArmDsbIsb)                     \
+  V(ArmF64x2Splat)                 \
+  V(ArmF64x2ExtractLane)           \
+  V(ArmF64x2ReplaceLane)           \
+  V(ArmF64x2Abs)                   \
+  V(ArmF64x2Neg)                   \
+  V(ArmF64x2Sqrt)                  \
+  V(ArmF64x2Add)                   \
+  V(ArmF64x2Sub)                   \
+  V(ArmF64x2Mul)                   \
+  V(ArmF64x2Div)                   \
+  V(ArmF64x2Min)                   \
+  V(ArmF64x2Max)                   \
+  V(ArmF64x2Eq)                    \
+  V(ArmF64x2Ne)                    \
+  V(ArmF64x2Lt)                    \
+  V(ArmF64x2Le)                    \
+  V(ArmF64x2Pmin)                  \
+  V(ArmF64x2Pmax)                  \
+  V(ArmF64x2Ceil)                  \
+  V(ArmF64x2Floor)                 \
+  V(ArmF64x2Trunc)                 \
+  V(ArmF64x2NearestInt)            \
+  V(ArmF32x4Splat)                 \
+  V(ArmF32x4ExtractLane)           \
+  V(ArmF32x4ReplaceLane)           \
+  V(ArmF32x4SConvertI32x4)         \
+  V(ArmF32x4UConvertI32x4)         \
+  V(ArmF32x4Abs)                   \
+  V(ArmF32x4Neg)                   \
+  V(ArmF32x4Sqrt)                  \
+  V(ArmF32x4RecipApprox)           \
+  V(ArmF32x4RecipSqrtApprox)       \
+  V(ArmF32x4Add)                   \
+  V(ArmF32x4AddHoriz)              \
+  V(ArmF32x4Sub)                   \
+  V(ArmF32x4Mul)                   \
+  V(ArmF32x4Div)                   \
+  V(ArmF32x4Min)                   \
+  V(ArmF32x4Max)                   \
+  V(ArmF32x4Eq)                    \
+  V(ArmF32x4Ne)                    \
+  V(ArmF32x4Lt)                    \
+  V(ArmF32x4Le)                    \
+  V(ArmF32x4Pmin)                  \
+  V(ArmF32x4Pmax)                  \
+  V(ArmI64x2SplatI32Pair)          \
+  V(ArmI64x2ReplaceLaneI32Pair)    \
+  V(ArmI64x2Neg)                   \
+  V(ArmI64x2Shl)                   \
+  V(ArmI64x2ShrS)                  \
+  V(ArmI64x2Add)                   \
+  V(ArmI64x2Sub)                   \
+  V(ArmI64x2Mul)                   \
+  V(ArmI64x2ShrU)                  \
+  V(ArmI32x4Splat)                 \
+  V(ArmI32x4ExtractLane)           \
+  V(ArmI32x4ReplaceLane)           \
+  V(ArmI32x4SConvertF32x4)         \
+  V(ArmI32x4SConvertI16x8Low)      \
+  V(ArmI32x4SConvertI16x8High)     \
+  V(ArmI32x4Neg)                   \
+  V(ArmI32x4Shl)                   \
+  V(ArmI32x4ShrS)                  \
+  V(ArmI32x4Add)                   \
+  V(ArmI32x4AddHoriz)              \
+  V(ArmI32x4Sub)                   \
+  V(ArmI32x4Mul)                   \
+  V(ArmI32x4MinS)                  \
+  V(ArmI32x4MaxS)                  \
+  V(ArmI32x4Eq)                    \
+  V(ArmI32x4Ne)                    \
+  V(ArmI32x4GtS)                   \
+  V(ArmI32x4GeS)                   \
+  V(ArmI32x4UConvertF32x4)         \
+  V(ArmI32x4UConvertI16x8Low)      \
+  V(ArmI32x4UConvertI16x8High)     \
+  V(ArmI32x4ShrU)                  \
+  V(ArmI32x4MinU)                  \
+  V(ArmI32x4MaxU)                  \
+  V(ArmI32x4GtU)                   \
+  V(ArmI32x4GeU)                   \
+  V(ArmI32x4Abs)                   \
+  V(ArmI32x4BitMask)               \
+  V(ArmI32x4DotI16x8S)             \
+  V(ArmI16x8Splat)                 \
+  V(ArmI16x8ExtractLaneS)          \
+  V(ArmI16x8ReplaceLane)           \
+  V(ArmI16x8SConvertI8x16Low)      \
+  V(ArmI16x8SConvertI8x16High)     \
+  V(ArmI16x8Neg)                   \
+  V(ArmI16x8Shl)                   \
+  V(ArmI16x8ShrS)                  \
+  V(ArmI16x8SConvertI32x4)         \
+  V(ArmI16x8Add)                   \
+  V(ArmI16x8AddSatS)               \
+  V(ArmI16x8AddHoriz)              \
+  V(ArmI16x8Sub)                   \
+  V(ArmI16x8SubSatS)               \
+  V(ArmI16x8Mul)                   \
+  V(ArmI16x8MinS)                  \
+  V(ArmI16x8MaxS)                  \
+  V(ArmI16x8Eq)                    \
+  V(ArmI16x8Ne)                    \
+  V(ArmI16x8GtS)                   \
+  V(ArmI16x8GeS)                   \
+  V(ArmI16x8ExtractLaneU)          \
+  V(ArmI16x8UConvertI8x16Low)      \
+  V(ArmI16x8UConvertI8x16High)     \
+  V(ArmI16x8ShrU)                  \
+  V(ArmI16x8UConvertI32x4)         \
+  V(ArmI16x8AddSatU)               \
+  V(ArmI16x8SubSatU)               \
+  V(ArmI16x8MinU)                  \
+  V(ArmI16x8MaxU)                  \
+  V(ArmI16x8GtU)                   \
+  V(ArmI16x8GeU)                   \
+  V(ArmI16x8RoundingAverageU)      \
+  V(ArmI16x8Abs)                   \
+  V(ArmI16x8BitMask)               \
+  V(ArmI8x16Splat)                 \
+  V(ArmI8x16ExtractLaneS)          \
+  V(ArmI8x16ReplaceLane)           \
+  V(ArmI8x16Neg)                   \
+  V(ArmI8x16Shl)                   \
+  V(ArmI8x16ShrS)                  \
+  V(ArmI8x16SConvertI16x8)         \
+  V(ArmI8x16Add)                   \
+  V(ArmI8x16AddSatS)               \
+  V(ArmI8x16Sub)                   \
+  V(ArmI8x16SubSatS)               \
+  V(ArmI8x16Mul)                   \
+  V(ArmI8x16MinS)                  \
+  V(ArmI8x16MaxS)                  \
+  V(ArmI8x16Eq)                    \
+  V(ArmI8x16Ne)                    \
+  V(ArmI8x16GtS)                   \
+  V(ArmI8x16GeS)                   \
+  V(ArmI8x16ExtractLaneU)          \
+  V(ArmI8x16ShrU)                  \
+  V(ArmI8x16UConvertI16x8)         \
+  V(ArmI8x16AddSatU)               \
+  V(ArmI8x16SubSatU)               \
+  V(ArmI8x16MinU)                  \
+  V(ArmI8x16MaxU)                  \
+  V(ArmI8x16GtU)                   \
+  V(ArmI8x16GeU)                   \
+  V(ArmI8x16RoundingAverageU)      \
+  V(ArmI8x16Abs)                   \
+  V(ArmI8x16BitMask)               \
+  V(ArmS128Const)                  \
+  V(ArmS128Zero)                   \
+  V(ArmS128AllOnes)                \
+  V(ArmS128Dup)                    \
+  V(ArmS128And)                    \
+  V(ArmS128Or)                     \
+  V(ArmS128Xor)                    \
+  V(ArmS128Not)                    \
+  V(ArmS128Select)                 \
+  V(ArmS128AndNot)                 \
+  V(ArmS32x4ZipLeft)               \
+  V(ArmS32x4ZipRight)              \
+  V(ArmS32x4UnzipLeft)             \
+  V(ArmS32x4UnzipRight)            \
+  V(ArmS32x4TransposeLeft)         \
+  V(ArmS32x4TransposeRight)        \
+  V(ArmS32x4Shuffle)               \
+  V(ArmS16x8ZipLeft)               \
+  V(ArmS16x8ZipRight)              \
+  V(ArmS16x8UnzipLeft)             \
+  V(ArmS16x8UnzipRight)            \
+  V(ArmS16x8TransposeLeft)         \
+  V(ArmS16x8TransposeRight)        \
+  V(ArmS8x16ZipLeft)               \
+  V(ArmS8x16ZipRight)              \
+  V(ArmS8x16UnzipLeft)             \
+  V(ArmS8x16UnzipRight)            \
+  V(ArmS8x16TransposeLeft)         \
+  V(ArmS8x16TransposeRight)        \
+  V(ArmS8x16Concat)                \
+  V(ArmI8x16Swizzle)               \
+  V(ArmI8x16Shuffle)               \
+  V(ArmS32x2Reverse)               \
+  V(ArmS16x4Reverse)               \
+  V(ArmS16x2Reverse)               \
+  V(ArmS8x8Reverse)                \
+  V(ArmS8x4Reverse)                \
+  V(ArmS8x2Reverse)                \
+  V(ArmV32x4AnyTrue)               \
+  V(ArmV32x4AllTrue)               \
+  V(ArmV16x8AnyTrue)               \
+  V(ArmV16x8AllTrue)               \
+  V(ArmV8x16AnyTrue)               \
+  V(ArmV8x16AllTrue)               \
+  V(ArmS128Load8Splat)             \
+  V(ArmS128Load16Splat)            \
+  V(ArmS128Load32Splat)            \
+  V(ArmS128Load64Splat)            \
+  V(ArmS128Load8x8S)               \
+  V(ArmS128Load8x8U)               \
+  V(ArmS128Load16x4S)              \
+  V(ArmS128Load16x4U)              \
+  V(ArmS128Load32x2S)              \
+  V(ArmS128Load32x2U)              \
+  V(ArmS128Load32Zero)             \
+  V(ArmS128Load64Zero)             \
+  V(ArmWord32AtomicPairLoad)       \
+  V(ArmWord32AtomicPairStore)      \
+  V(ArmWord32AtomicPairAdd)        \
+  V(ArmWord32AtomicPairSub)        \
+  V(ArmWord32AtomicPairAnd)        \
+  V(ArmWord32AtomicPairOr)         \
+  V(ArmWord32AtomicPairXor)        \
+  V(ArmWord32AtomicPairExchange)   \
+  V(ArmWord32AtomicPairCompareExchange)
+
+// Addressing modes represent the "shape" of inputs to an instruction.
+// Many instructions support multiple addressing modes. Addressing modes
+// are encoded into the InstructionCode of the instruction and tell the
+// code generator after register allocation which assembler method to call.
+#define TARGET_ADDRESSING_MODE_LIST(V)  \
+  V(Offset_RI)        /* [%r0 + K] */   \
+  V(Offset_RR)        /* [%r0 + %r1] */ \
+  V(Operand2_I)       /* K */           \
+  V(Operand2_R)       /* %r0 */         \
+  V(Operand2_R_ASR_I) /* %r0 ASR K */   \
+  V(Operand2_R_LSL_I) /* %r0 LSL K */   \
+  V(Operand2_R_LSR_I) /* %r0 LSR K */   \
+  V(Operand2_R_ROR_I) /* %r0 ROR K */   \
+  V(Operand2_R_ASR_R) /* %r0 ASR %r1 */ \
+  V(Operand2_R_LSL_R) /* %r0 LSL %r1 */ \
+  V(Operand2_R_LSR_R) /* %r0 LSR %r1 */ \
+  V(Operand2_R_ROR_R) /* %r0 ROR %r1 */ \
+  V(Root)             /* [%rr + K] */
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_ARM_INSTRUCTION_CODES_ARM_H_
diff --git a/src/compiler/backend/arm/instruction-scheduler-arm.cc b/src/compiler/backend/arm/instruction-scheduler-arm.cc
new file mode 100644
index 0000000..70fb1a7
--- /dev/null
+++ b/src/compiler/backend/arm/instruction-scheduler-arm.cc
@@ -0,0 +1,369 @@
+// Copyright 2015 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/instruction-scheduler.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+bool InstructionScheduler::SchedulerSupported() { return true; }
+
+int InstructionScheduler::GetTargetInstructionFlags(
+    const Instruction* instr) const {
+  switch (instr->arch_opcode()) {
+    case kArmAdd:
+    case kArmAnd:
+    case kArmBic:
+    case kArmClz:
+    case kArmCmp:
+    case kArmCmn:
+    case kArmTst:
+    case kArmTeq:
+    case kArmOrr:
+    case kArmEor:
+    case kArmSub:
+    case kArmRsb:
+    case kArmMul:
+    case kArmMla:
+    case kArmMls:
+    case kArmSmmul:
+    case kArmSmull:
+    case kArmSmmla:
+    case kArmUmull:
+    case kArmSdiv:
+    case kArmUdiv:
+    case kArmMov:
+    case kArmMvn:
+    case kArmBfc:
+    case kArmUbfx:
+    case kArmSbfx:
+    case kArmSxtb:
+    case kArmSxth:
+    case kArmSxtab:
+    case kArmSxtah:
+    case kArmUxtb:
+    case kArmUxth:
+    case kArmUxtab:
+    case kArmUxtah:
+    case kArmRbit:
+    case kArmRev:
+    case kArmAddPair:
+    case kArmSubPair:
+    case kArmMulPair:
+    case kArmLslPair:
+    case kArmLsrPair:
+    case kArmAsrPair:
+    case kArmVcmpF32:
+    case kArmVaddF32:
+    case kArmVsubF32:
+    case kArmVmulF32:
+    case kArmVmlaF32:
+    case kArmVmlsF32:
+    case kArmVdivF32:
+    case kArmVabsF32:
+    case kArmVnegF32:
+    case kArmVsqrtF32:
+    case kArmVcmpF64:
+    case kArmVaddF64:
+    case kArmVsubF64:
+    case kArmVmulF64:
+    case kArmVmlaF64:
+    case kArmVmlsF64:
+    case kArmVdivF64:
+    case kArmVmodF64:
+    case kArmVabsF64:
+    case kArmVnegF64:
+    case kArmVsqrtF64:
+    case kArmVrintmF32:
+    case kArmVrintmF64:
+    case kArmVrintpF32:
+    case kArmVrintpF64:
+    case kArmVrintzF32:
+    case kArmVrintzF64:
+    case kArmVrintaF64:
+    case kArmVrintnF32:
+    case kArmVrintnF64:
+    case kArmVcvtF32F64:
+    case kArmVcvtF64F32:
+    case kArmVcvtF32S32:
+    case kArmVcvtF32U32:
+    case kArmVcvtF64S32:
+    case kArmVcvtF64U32:
+    case kArmVcvtS32F32:
+    case kArmVcvtU32F32:
+    case kArmVcvtS32F64:
+    case kArmVcvtU32F64:
+    case kArmVmovU32F32:
+    case kArmVmovF32U32:
+    case kArmVmovLowU32F64:
+    case kArmVmovLowF64U32:
+    case kArmVmovHighU32F64:
+    case kArmVmovHighF64U32:
+    case kArmVmovF64U32U32:
+    case kArmVmovU32U32F64:
+    case kArmFloat32Max:
+    case kArmFloat64Max:
+    case kArmFloat32Min:
+    case kArmFloat64Min:
+    case kArmFloat64SilenceNaN:
+    case kArmF64x2Splat:
+    case kArmF64x2ExtractLane:
+    case kArmF64x2ReplaceLane:
+    case kArmF64x2Abs:
+    case kArmF64x2Neg:
+    case kArmF64x2Sqrt:
+    case kArmF64x2Add:
+    case kArmF64x2Sub:
+    case kArmF64x2Mul:
+    case kArmF64x2Div:
+    case kArmF64x2Min:
+    case kArmF64x2Max:
+    case kArmF64x2Eq:
+    case kArmF64x2Ne:
+    case kArmF64x2Lt:
+    case kArmF64x2Le:
+    case kArmF64x2Pmin:
+    case kArmF64x2Pmax:
+    case kArmF64x2Ceil:
+    case kArmF64x2Floor:
+    case kArmF64x2Trunc:
+    case kArmF64x2NearestInt:
+    case kArmF32x4Splat:
+    case kArmF32x4ExtractLane:
+    case kArmF32x4ReplaceLane:
+    case kArmF32x4SConvertI32x4:
+    case kArmF32x4UConvertI32x4:
+    case kArmF32x4Abs:
+    case kArmF32x4Neg:
+    case kArmF32x4Sqrt:
+    case kArmF32x4RecipApprox:
+    case kArmF32x4RecipSqrtApprox:
+    case kArmF32x4Add:
+    case kArmF32x4AddHoriz:
+    case kArmF32x4Sub:
+    case kArmF32x4Mul:
+    case kArmF32x4Div:
+    case kArmF32x4Min:
+    case kArmF32x4Max:
+    case kArmF32x4Eq:
+    case kArmF32x4Ne:
+    case kArmF32x4Lt:
+    case kArmF32x4Le:
+    case kArmF32x4Pmin:
+    case kArmF32x4Pmax:
+    case kArmI64x2SplatI32Pair:
+    case kArmI64x2ReplaceLaneI32Pair:
+    case kArmI64x2Neg:
+    case kArmI64x2Shl:
+    case kArmI64x2ShrS:
+    case kArmI64x2Add:
+    case kArmI64x2Sub:
+    case kArmI64x2Mul:
+    case kArmI64x2ShrU:
+    case kArmI32x4Splat:
+    case kArmI32x4ExtractLane:
+    case kArmI32x4ReplaceLane:
+    case kArmI32x4SConvertF32x4:
+    case kArmI32x4SConvertI16x8Low:
+    case kArmI32x4SConvertI16x8High:
+    case kArmI32x4Neg:
+    case kArmI32x4Shl:
+    case kArmI32x4ShrS:
+    case kArmI32x4Add:
+    case kArmI32x4AddHoriz:
+    case kArmI32x4Sub:
+    case kArmI32x4Mul:
+    case kArmI32x4MinS:
+    case kArmI32x4MaxS:
+    case kArmI32x4Eq:
+    case kArmI32x4Ne:
+    case kArmI32x4GtS:
+    case kArmI32x4GeS:
+    case kArmI32x4UConvertF32x4:
+    case kArmI32x4UConvertI16x8Low:
+    case kArmI32x4UConvertI16x8High:
+    case kArmI32x4ShrU:
+    case kArmI32x4MinU:
+    case kArmI32x4MaxU:
+    case kArmI32x4GtU:
+    case kArmI32x4GeU:
+    case kArmI32x4Abs:
+    case kArmI32x4BitMask:
+    case kArmI32x4DotI16x8S:
+    case kArmI16x8Splat:
+    case kArmI16x8ExtractLaneS:
+    case kArmI16x8ReplaceLane:
+    case kArmI16x8SConvertI8x16Low:
+    case kArmI16x8SConvertI8x16High:
+    case kArmI16x8Neg:
+    case kArmI16x8Shl:
+    case kArmI16x8ShrS:
+    case kArmI16x8SConvertI32x4:
+    case kArmI16x8Add:
+    case kArmI16x8AddSatS:
+    case kArmI16x8AddHoriz:
+    case kArmI16x8Sub:
+    case kArmI16x8SubSatS:
+    case kArmI16x8Mul:
+    case kArmI16x8MinS:
+    case kArmI16x8MaxS:
+    case kArmI16x8Eq:
+    case kArmI16x8Ne:
+    case kArmI16x8GtS:
+    case kArmI16x8GeS:
+    case kArmI16x8ExtractLaneU:
+    case kArmI16x8UConvertI8x16Low:
+    case kArmI16x8UConvertI8x16High:
+    case kArmI16x8ShrU:
+    case kArmI16x8UConvertI32x4:
+    case kArmI16x8AddSatU:
+    case kArmI16x8SubSatU:
+    case kArmI16x8MinU:
+    case kArmI16x8MaxU:
+    case kArmI16x8GtU:
+    case kArmI16x8GeU:
+    case kArmI16x8RoundingAverageU:
+    case kArmI16x8Abs:
+    case kArmI16x8BitMask:
+    case kArmI8x16Splat:
+    case kArmI8x16ExtractLaneS:
+    case kArmI8x16ReplaceLane:
+    case kArmI8x16Neg:
+    case kArmI8x16Shl:
+    case kArmI8x16ShrS:
+    case kArmI8x16SConvertI16x8:
+    case kArmI8x16Add:
+    case kArmI8x16AddSatS:
+    case kArmI8x16Sub:
+    case kArmI8x16SubSatS:
+    case kArmI8x16Mul:
+    case kArmI8x16MinS:
+    case kArmI8x16MaxS:
+    case kArmI8x16Eq:
+    case kArmI8x16Ne:
+    case kArmI8x16GtS:
+    case kArmI8x16GeS:
+    case kArmI8x16ExtractLaneU:
+    case kArmI8x16UConvertI16x8:
+    case kArmI8x16AddSatU:
+    case kArmI8x16SubSatU:
+    case kArmI8x16ShrU:
+    case kArmI8x16MinU:
+    case kArmI8x16MaxU:
+    case kArmI8x16GtU:
+    case kArmI8x16GeU:
+    case kArmI8x16RoundingAverageU:
+    case kArmI8x16Abs:
+    case kArmI8x16BitMask:
+    case kArmS128Const:
+    case kArmS128Zero:
+    case kArmS128AllOnes:
+    case kArmS128Dup:
+    case kArmS128And:
+    case kArmS128Or:
+    case kArmS128Xor:
+    case kArmS128Not:
+    case kArmS128Select:
+    case kArmS128AndNot:
+    case kArmS32x4ZipLeft:
+    case kArmS32x4ZipRight:
+    case kArmS32x4UnzipLeft:
+    case kArmS32x4UnzipRight:
+    case kArmS32x4TransposeLeft:
+    case kArmS32x4TransposeRight:
+    case kArmS32x4Shuffle:
+    case kArmS16x8ZipLeft:
+    case kArmS16x8ZipRight:
+    case kArmS16x8UnzipLeft:
+    case kArmS16x8UnzipRight:
+    case kArmS16x8TransposeLeft:
+    case kArmS16x8TransposeRight:
+    case kArmS8x16ZipLeft:
+    case kArmS8x16ZipRight:
+    case kArmS8x16UnzipLeft:
+    case kArmS8x16UnzipRight:
+    case kArmS8x16TransposeLeft:
+    case kArmS8x16TransposeRight:
+    case kArmS8x16Concat:
+    case kArmI8x16Swizzle:
+    case kArmI8x16Shuffle:
+    case kArmS32x2Reverse:
+    case kArmS16x4Reverse:
+    case kArmS16x2Reverse:
+    case kArmS8x8Reverse:
+    case kArmS8x4Reverse:
+    case kArmS8x2Reverse:
+    case kArmV32x4AnyTrue:
+    case kArmV32x4AllTrue:
+    case kArmV16x8AnyTrue:
+    case kArmV16x8AllTrue:
+    case kArmV8x16AnyTrue:
+    case kArmV8x16AllTrue:
+      return kNoOpcodeFlags;
+
+    case kArmVldrF32:
+    case kArmVldrF64:
+    case kArmVld1F64:
+    case kArmVld1S128:
+    case kArmLdrb:
+    case kArmLdrsb:
+    case kArmLdrh:
+    case kArmLdrsh:
+    case kArmLdr:
+    case kArmPeek:
+    case kArmWord32AtomicPairLoad:
+    case kArmS128Load8Splat:
+    case kArmS128Load16Splat:
+    case kArmS128Load32Splat:
+    case kArmS128Load64Splat:
+    case kArmS128Load8x8S:
+    case kArmS128Load8x8U:
+    case kArmS128Load16x4S:
+    case kArmS128Load16x4U:
+    case kArmS128Load32x2S:
+    case kArmS128Load32x2U:
+    case kArmS128Load32Zero:
+    case kArmS128Load64Zero:
+      return kIsLoadOperation;
+
+    case kArmVstrF32:
+    case kArmVstrF64:
+    case kArmVst1F64:
+    case kArmVst1S128:
+    case kArmStrb:
+    case kArmStrh:
+    case kArmStr:
+    case kArmPush:
+    case kArmPoke:
+    case kArmDmbIsh:
+    case kArmDsbIsb:
+    case kArmWord32AtomicPairStore:
+    case kArmWord32AtomicPairAdd:
+    case kArmWord32AtomicPairSub:
+    case kArmWord32AtomicPairAnd:
+    case kArmWord32AtomicPairOr:
+    case kArmWord32AtomicPairXor:
+    case kArmWord32AtomicPairExchange:
+    case kArmWord32AtomicPairCompareExchange:
+      return kHasSideEffect;
+
+#define CASE(Name) case k##Name:
+      COMMON_ARCH_OPCODE_LIST(CASE)
+#undef CASE
+      // Already covered in architecture independent code.
+      UNREACHABLE();
+  }
+
+  UNREACHABLE();
+}
+
+int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
+  // TODO(all): Add instruction cost modeling.
+  return 1;
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/arm/instruction-selector-arm.cc b/src/compiler/backend/arm/instruction-selector-arm.cc
new file mode 100644
index 0000000..248f765
--- /dev/null
+++ b/src/compiler/backend/arm/instruction-selector-arm.cc
@@ -0,0 +1,3088 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/base/bits.h"
+#include "src/base/enum-set.h"
+#include "src/base/iterator.h"
+#include "src/compiler/backend/instruction-selector-impl.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/node-properties.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+// Adds Arm-specific methods for generating InstructionOperands.
+class ArmOperandGenerator : public OperandGenerator {
+ public:
+  explicit ArmOperandGenerator(InstructionSelector* selector)
+      : OperandGenerator(selector) {}
+
+  bool CanBeImmediate(int32_t value) const {
+    return Assembler::ImmediateFitsAddrMode1Instruction(value);
+  }
+
+  bool CanBeImmediate(uint32_t value) const {
+    return CanBeImmediate(bit_cast<int32_t>(value));
+  }
+
+  bool CanBeImmediate(Node* node, InstructionCode opcode) {
+    Int32Matcher m(node);
+    if (!m.HasResolvedValue()) return false;
+    int32_t value = m.ResolvedValue();
+    switch (ArchOpcodeField::decode(opcode)) {
+      case kArmAnd:
+      case kArmMov:
+      case kArmMvn:
+      case kArmBic:
+        return CanBeImmediate(value) || CanBeImmediate(~value);
+
+      case kArmAdd:
+      case kArmSub:
+      case kArmCmp:
+      case kArmCmn:
+        return CanBeImmediate(value) || CanBeImmediate(-value);
+
+      case kArmTst:
+      case kArmTeq:
+      case kArmOrr:
+      case kArmEor:
+      case kArmRsb:
+        return CanBeImmediate(value);
+
+      case kArmVldrF32:
+      case kArmVstrF32:
+      case kArmVldrF64:
+      case kArmVstrF64:
+        return value >= -1020 && value <= 1020 && (value % 4) == 0;
+
+      case kArmLdrb:
+      case kArmLdrsb:
+      case kArmStrb:
+      case kArmLdr:
+      case kArmStr:
+        return value >= -4095 && value <= 4095;
+
+      case kArmLdrh:
+      case kArmLdrsh:
+      case kArmStrh:
+        return value >= -255 && value <= 255;
+
+      default:
+        break;
+    }
+    return false;
+  }
+};
+
+namespace {
+
+void VisitRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
+  ArmOperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)));
+}
+
+void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
+  ArmOperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)),
+                 g.UseRegister(node->InputAt(1)));
+}
+
+void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode,
+                       Node* node, int width) {
+  ArmOperandGenerator g(selector);
+  Int32Matcher m(node->InputAt(1));
+  if (m.HasResolvedValue()) {
+    if (m.IsMultipleOf(width)) {
+      selector->EmitIdentity(node);
+    } else {
+      selector->Emit(opcode, g.DefineAsRegister(node),
+                     g.UseRegister(node->InputAt(0)),
+                     g.UseImmediate(node->InputAt(1)));
+    }
+  } else {
+    InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
+    selector->Emit(opcode, g.DefineAsRegister(node),
+                   g.UseUniqueRegister(node->InputAt(0)),
+                   g.UseRegister(node->InputAt(1)), arraysize(temps), temps);
+  }
+}
+
+void VisitRRRShuffle(InstructionSelector* selector, ArchOpcode opcode,
+                     Node* node) {
+  ArmOperandGenerator g(selector);
+  // Swap inputs to save an instruction in the CodeGenerator for High ops.
+  if (opcode == kArmS32x4ZipRight || opcode == kArmS32x4UnzipRight ||
+      opcode == kArmS32x4TransposeRight || opcode == kArmS16x8ZipRight ||
+      opcode == kArmS16x8UnzipRight || opcode == kArmS16x8TransposeRight ||
+      opcode == kArmS8x16ZipRight || opcode == kArmS8x16UnzipRight ||
+      opcode == kArmS8x16TransposeRight) {
+    Node* in0 = node->InputAt(0);
+    Node* in1 = node->InputAt(1);
+    node->ReplaceInput(0, in1);
+    node->ReplaceInput(1, in0);
+  }
+  // Use DefineSameAsFirst for binary ops that clobber their inputs, e.g. the
+  // NEON vzip, vuzp, and vtrn instructions.
+  selector->Emit(opcode, g.DefineSameAsFirst(node),
+                 g.UseRegister(node->InputAt(0)),
+                 g.UseRegister(node->InputAt(1)));
+}
+
+void VisitRRI(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
+  ArmOperandGenerator g(selector);
+  int32_t imm = OpParameter<int32_t>(node->op());
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)), g.UseImmediate(imm));
+}
+
+void VisitRRIR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
+  ArmOperandGenerator g(selector);
+  int32_t imm = OpParameter<int32_t>(node->op());
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)), g.UseImmediate(imm),
+                 g.UseUniqueRegister(node->InputAt(1)));
+}
+
+template <IrOpcode::Value kOpcode, int kImmMin, int kImmMax,
+          AddressingMode kImmMode, AddressingMode kRegMode>
+bool TryMatchShift(InstructionSelector* selector,
+                   InstructionCode* opcode_return, Node* node,
+                   InstructionOperand* value_return,
+                   InstructionOperand* shift_return) {
+  ArmOperandGenerator g(selector);
+  if (node->opcode() == kOpcode) {
+    Int32BinopMatcher m(node);
+    *value_return = g.UseRegister(m.left().node());
+    if (m.right().IsInRange(kImmMin, kImmMax)) {
+      *opcode_return |= AddressingModeField::encode(kImmMode);
+      *shift_return = g.UseImmediate(m.right().node());
+    } else {
+      *opcode_return |= AddressingModeField::encode(kRegMode);
+      *shift_return = g.UseRegister(m.right().node());
+    }
+    return true;
+  }
+  return false;
+}
+
+template <IrOpcode::Value kOpcode, int kImmMin, int kImmMax,
+          AddressingMode kImmMode>
+bool TryMatchShiftImmediate(InstructionSelector* selector,
+                            InstructionCode* opcode_return, Node* node,
+                            InstructionOperand* value_return,
+                            InstructionOperand* shift_return) {
+  ArmOperandGenerator g(selector);
+  if (node->opcode() == kOpcode) {
+    Int32BinopMatcher m(node);
+    if (m.right().IsInRange(kImmMin, kImmMax)) {
+      *opcode_return |= AddressingModeField::encode(kImmMode);
+      *value_return = g.UseRegister(m.left().node());
+      *shift_return = g.UseImmediate(m.right().node());
+      return true;
+    }
+  }
+  return false;
+}
+
+bool TryMatchROR(InstructionSelector* selector, InstructionCode* opcode_return,
+                 Node* node, InstructionOperand* value_return,
+                 InstructionOperand* shift_return) {
+  return TryMatchShift<IrOpcode::kWord32Ror, 1, 31, kMode_Operand2_R_ROR_I,
+                       kMode_Operand2_R_ROR_R>(selector, opcode_return, node,
+                                               value_return, shift_return);
+}
+
+bool TryMatchASR(InstructionSelector* selector, InstructionCode* opcode_return,
+                 Node* node, InstructionOperand* value_return,
+                 InstructionOperand* shift_return) {
+  return TryMatchShift<IrOpcode::kWord32Sar, 1, 32, kMode_Operand2_R_ASR_I,
+                       kMode_Operand2_R_ASR_R>(selector, opcode_return, node,
+                                               value_return, shift_return);
+}
+
+bool TryMatchLSL(InstructionSelector* selector, InstructionCode* opcode_return,
+                 Node* node, InstructionOperand* value_return,
+                 InstructionOperand* shift_return) {
+  return TryMatchShift<IrOpcode::kWord32Shl, 0, 31, kMode_Operand2_R_LSL_I,
+                       kMode_Operand2_R_LSL_R>(selector, opcode_return, node,
+                                               value_return, shift_return);
+}
+
+bool TryMatchLSLImmediate(InstructionSelector* selector,
+                          InstructionCode* opcode_return, Node* node,
+                          InstructionOperand* value_return,
+                          InstructionOperand* shift_return) {
+  return TryMatchShiftImmediate<IrOpcode::kWord32Shl, 0, 31,
+                                kMode_Operand2_R_LSL_I>(
+      selector, opcode_return, node, value_return, shift_return);
+}
+
+bool TryMatchLSR(InstructionSelector* selector, InstructionCode* opcode_return,
+                 Node* node, InstructionOperand* value_return,
+                 InstructionOperand* shift_return) {
+  return TryMatchShift<IrOpcode::kWord32Shr, 1, 32, kMode_Operand2_R_LSR_I,
+                       kMode_Operand2_R_LSR_R>(selector, opcode_return, node,
+                                               value_return, shift_return);
+}
+
+bool TryMatchShift(InstructionSelector* selector,
+                   InstructionCode* opcode_return, Node* node,
+                   InstructionOperand* value_return,
+                   InstructionOperand* shift_return) {
+  return (
+      TryMatchASR(selector, opcode_return, node, value_return, shift_return) ||
+      TryMatchLSL(selector, opcode_return, node, value_return, shift_return) ||
+      TryMatchLSR(selector, opcode_return, node, value_return, shift_return) ||
+      TryMatchROR(selector, opcode_return, node, value_return, shift_return));
+}
+
+bool TryMatchImmediateOrShift(InstructionSelector* selector,
+                              InstructionCode* opcode_return, Node* node,
+                              size_t* input_count_return,
+                              InstructionOperand* inputs) {
+  ArmOperandGenerator g(selector);
+  if (g.CanBeImmediate(node, *opcode_return)) {
+    *opcode_return |= AddressingModeField::encode(kMode_Operand2_I);
+    inputs[0] = g.UseImmediate(node);
+    *input_count_return = 1;
+    return true;
+  }
+  if (TryMatchShift(selector, opcode_return, node, &inputs[0], &inputs[1])) {
+    *input_count_return = 2;
+    return true;
+  }
+  return false;
+}
+
+void VisitBinop(InstructionSelector* selector, Node* node,
+                InstructionCode opcode, InstructionCode reverse_opcode,
+                FlagsContinuation* cont) {
+  ArmOperandGenerator g(selector);
+  Int32BinopMatcher m(node);
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  InstructionOperand outputs[1];
+  size_t output_count = 0;
+
+  if (m.left().node() == m.right().node()) {
+    // If both inputs refer to the same operand, enforce allocating a register
+    // for both of them to ensure that we don't end up generating code like
+    // this:
+    //
+    //   mov r0, r1, asr #16
+    //   adds r0, r0, r1, asr #16
+    //   bvs label
+    InstructionOperand const input = g.UseRegister(m.left().node());
+    opcode |= AddressingModeField::encode(kMode_Operand2_R);
+    inputs[input_count++] = input;
+    inputs[input_count++] = input;
+  } else if (TryMatchImmediateOrShift(selector, &opcode, m.right().node(),
+                                      &input_count, &inputs[1])) {
+    inputs[0] = g.UseRegister(m.left().node());
+    input_count++;
+  } else if (TryMatchImmediateOrShift(selector, &reverse_opcode,
+                                      m.left().node(), &input_count,
+                                      &inputs[1])) {
+    inputs[0] = g.UseRegister(m.right().node());
+    opcode = reverse_opcode;
+    input_count++;
+  } else {
+    opcode |= AddressingModeField::encode(kMode_Operand2_R);
+    inputs[input_count++] = g.UseRegister(m.left().node());
+    inputs[input_count++] = g.UseRegister(m.right().node());
+  }
+
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  DCHECK_NE(0u, input_count);
+  DCHECK_EQ(1u, output_count);
+  DCHECK_GE(arraysize(inputs), input_count);
+  DCHECK_GE(arraysize(outputs), output_count);
+  DCHECK_NE(kMode_None, AddressingModeField::decode(opcode));
+
+  selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
+                                 inputs, cont);
+}
+
+void VisitBinop(InstructionSelector* selector, Node* node,
+                InstructionCode opcode, InstructionCode reverse_opcode) {
+  FlagsContinuation cont;
+  VisitBinop(selector, node, opcode, reverse_opcode, &cont);
+}
+
+void EmitDiv(InstructionSelector* selector, ArchOpcode div_opcode,
+             ArchOpcode f64i32_opcode, ArchOpcode i32f64_opcode,
+             InstructionOperand result_operand, InstructionOperand left_operand,
+             InstructionOperand right_operand) {
+  ArmOperandGenerator g(selector);
+  if (selector->IsSupported(SUDIV)) {
+    selector->Emit(div_opcode, result_operand, left_operand, right_operand);
+    return;
+  }
+  InstructionOperand left_double_operand = g.TempDoubleRegister();
+  InstructionOperand right_double_operand = g.TempDoubleRegister();
+  InstructionOperand result_double_operand = g.TempDoubleRegister();
+  selector->Emit(f64i32_opcode, left_double_operand, left_operand);
+  selector->Emit(f64i32_opcode, right_double_operand, right_operand);
+  selector->Emit(kArmVdivF64, result_double_operand, left_double_operand,
+                 right_double_operand);
+  selector->Emit(i32f64_opcode, result_operand, result_double_operand);
+}
+
+void VisitDiv(InstructionSelector* selector, Node* node, ArchOpcode div_opcode,
+              ArchOpcode f64i32_opcode, ArchOpcode i32f64_opcode) {
+  ArmOperandGenerator g(selector);
+  Int32BinopMatcher m(node);
+  EmitDiv(selector, div_opcode, f64i32_opcode, i32f64_opcode,
+          g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+          g.UseRegister(m.right().node()));
+}
+
+void VisitMod(InstructionSelector* selector, Node* node, ArchOpcode div_opcode,
+              ArchOpcode f64i32_opcode, ArchOpcode i32f64_opcode) {
+  ArmOperandGenerator g(selector);
+  Int32BinopMatcher m(node);
+  InstructionOperand div_operand = g.TempRegister();
+  InstructionOperand result_operand = g.DefineAsRegister(node);
+  InstructionOperand left_operand = g.UseRegister(m.left().node());
+  InstructionOperand right_operand = g.UseRegister(m.right().node());
+  EmitDiv(selector, div_opcode, f64i32_opcode, i32f64_opcode, div_operand,
+          left_operand, right_operand);
+  if (selector->IsSupported(ARMv7)) {
+    selector->Emit(kArmMls, result_operand, div_operand, right_operand,
+                   left_operand);
+  } else {
+    InstructionOperand mul_operand = g.TempRegister();
+    selector->Emit(kArmMul, mul_operand, div_operand, right_operand);
+    selector->Emit(kArmSub | AddressingModeField::encode(kMode_Operand2_R),
+                   result_operand, left_operand, mul_operand);
+  }
+}
+
+// Adds the base and offset into a register, then change the addressing
+// mode of opcode_return to use this register. Certain instructions, e.g.
+// vld1 and vst1, when given two registers, will post-increment the offset, i.e.
+// perform the operation at base, then add offset to base. What we intend is to
+// access at (base+offset).
+void EmitAddBeforeS128LoadStore(InstructionSelector* selector,
+                                InstructionCode* opcode_return,
+                                size_t* input_count_return,
+                                InstructionOperand* inputs) {
+  ArmOperandGenerator g(selector);
+  InstructionOperand addr = g.TempRegister();
+  InstructionCode op = kArmAdd;
+  op |= AddressingModeField::encode(kMode_Operand2_R);
+  selector->Emit(op, 1, &addr, 2, inputs);
+  *opcode_return |= AddressingModeField::encode(kMode_Operand2_R);
+  *input_count_return -= 1;
+  inputs[0] = addr;
+}
+
+void EmitLoad(InstructionSelector* selector, InstructionCode opcode,
+              InstructionOperand* output, Node* base, Node* index) {
+  ArmOperandGenerator g(selector);
+  InstructionOperand inputs[3];
+  size_t input_count = 2;
+
+  ExternalReferenceMatcher m(base);
+  if (m.HasResolvedValue() &&
+      selector->CanAddressRelativeToRootsRegister(m.ResolvedValue())) {
+    Int32Matcher int_matcher(index);
+    if (int_matcher.HasResolvedValue()) {
+      ptrdiff_t const delta =
+          int_matcher.ResolvedValue() +
+          TurboAssemblerBase::RootRegisterOffsetForExternalReference(
+              selector->isolate(), m.ResolvedValue());
+      input_count = 1;
+      inputs[0] = g.UseImmediate(static_cast<int32_t>(delta));
+      opcode |= AddressingModeField::encode(kMode_Root);
+      selector->Emit(opcode, 1, output, input_count, inputs);
+      return;
+    }
+  }
+
+  inputs[0] = g.UseRegister(base);
+  if (g.CanBeImmediate(index, opcode)) {
+    inputs[1] = g.UseImmediate(index);
+    opcode |= AddressingModeField::encode(kMode_Offset_RI);
+  } else if ((opcode == kArmLdr) &&
+             TryMatchLSLImmediate(selector, &opcode, index, &inputs[1],
+                                  &inputs[2])) {
+    input_count = 3;
+  } else {
+    inputs[1] = g.UseRegister(index);
+    if (opcode == kArmVld1S128) {
+      EmitAddBeforeS128LoadStore(selector, &opcode, &input_count, &inputs[0]);
+    } else {
+      opcode |= AddressingModeField::encode(kMode_Offset_RR);
+    }
+  }
+  selector->Emit(opcode, 1, output, input_count, inputs);
+}
+
+void EmitStore(InstructionSelector* selector, InstructionCode opcode,
+               size_t input_count, InstructionOperand* inputs, Node* index) {
+  ArmOperandGenerator g(selector);
+
+  if (g.CanBeImmediate(index, opcode)) {
+    inputs[input_count++] = g.UseImmediate(index);
+    opcode |= AddressingModeField::encode(kMode_Offset_RI);
+  } else if ((opcode == kArmStr) &&
+             TryMatchLSLImmediate(selector, &opcode, index, &inputs[2],
+                                  &inputs[3])) {
+    input_count = 4;
+  } else {
+    inputs[input_count++] = g.UseRegister(index);
+    if (opcode == kArmVst1S128) {
+      // Inputs are value, base, index, only care about base and index.
+      EmitAddBeforeS128LoadStore(selector, &opcode, &input_count, &inputs[1]);
+    } else {
+      opcode |= AddressingModeField::encode(kMode_Offset_RR);
+    }
+  }
+  selector->Emit(opcode, 0, nullptr, input_count, inputs);
+}
+
+void VisitPairAtomicBinOp(InstructionSelector* selector, Node* node,
+                          ArchOpcode opcode) {
+  ArmOperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  Node* value_high = node->InputAt(3);
+  AddressingMode addressing_mode = kMode_Offset_RR;
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  InstructionOperand inputs[] = {
+      g.UseUniqueRegister(value), g.UseUniqueRegister(value_high),
+      g.UseUniqueRegister(base), g.UseUniqueRegister(index)};
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  InstructionOperand temps[6];
+  size_t temp_count = 0;
+  temps[temp_count++] = g.TempRegister();
+  temps[temp_count++] = g.TempRegister(r6);
+  temps[temp_count++] = g.TempRegister(r7);
+  temps[temp_count++] = g.TempRegister();
+  Node* projection0 = NodeProperties::FindProjection(node, 0);
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  if (projection0) {
+    outputs[output_count++] = g.DefineAsFixed(projection0, r2);
+  } else {
+    temps[temp_count++] = g.TempRegister(r2);
+  }
+  if (projection1) {
+    outputs[output_count++] = g.DefineAsFixed(projection1, r3);
+  } else {
+    temps[temp_count++] = g.TempRegister(r3);
+  }
+  selector->Emit(code, output_count, outputs, arraysize(inputs), inputs,
+                 temp_count, temps);
+}
+
+}  // namespace
+
+void InstructionSelector::VisitStackSlot(Node* node) {
+  StackSlotRepresentation rep = StackSlotRepresentationOf(node->op());
+  int slot = frame_->AllocateSpillSlot(rep.size());
+  OperandGenerator g(this);
+
+  Emit(kArchStackSlot, g.DefineAsRegister(node),
+       sequence()->AddImmediate(Constant(slot)), 0, nullptr);
+}
+
+void InstructionSelector::VisitAbortCSAAssert(Node* node) {
+  ArmOperandGenerator g(this);
+  Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), r1));
+}
+
+void InstructionSelector::VisitLoadTransform(Node* node) {
+  LoadTransformParameters params = LoadTransformParametersOf(node->op());
+  InstructionCode opcode = kArchNop;
+  switch (params.transformation) {
+    case LoadTransformation::kS128Load8Splat:
+      opcode = kArmS128Load8Splat;
+      break;
+    case LoadTransformation::kS128Load16Splat:
+      opcode = kArmS128Load16Splat;
+      break;
+    case LoadTransformation::kS128Load32Splat:
+      opcode = kArmS128Load32Splat;
+      break;
+    case LoadTransformation::kS128Load64Splat:
+      opcode = kArmS128Load64Splat;
+      break;
+    case LoadTransformation::kS128Load8x8S:
+      opcode = kArmS128Load8x8S;
+      break;
+    case LoadTransformation::kS128Load8x8U:
+      opcode = kArmS128Load8x8U;
+      break;
+    case LoadTransformation::kS128Load16x4S:
+      opcode = kArmS128Load16x4S;
+      break;
+    case LoadTransformation::kS128Load16x4U:
+      opcode = kArmS128Load16x4U;
+      break;
+    case LoadTransformation::kS128Load32x2S:
+      opcode = kArmS128Load32x2S;
+      break;
+    case LoadTransformation::kS128Load32x2U:
+      opcode = kArmS128Load32x2U;
+      break;
+    case LoadTransformation::kS128Load32Zero:
+      opcode = kArmS128Load32Zero;
+      break;
+    case LoadTransformation::kS128Load64Zero:
+      opcode = kArmS128Load64Zero;
+      break;
+    default:
+      UNIMPLEMENTED();
+  }
+
+  ArmOperandGenerator g(this);
+  InstructionOperand output = g.DefineAsRegister(node);
+  InstructionOperand inputs[2];
+  size_t input_count = 2;
+  inputs[0] = g.UseRegister(node->InputAt(0));
+  inputs[1] = g.UseRegister(node->InputAt(1));
+  EmitAddBeforeS128LoadStore(this, &opcode, &input_count, &inputs[0]);
+  Emit(opcode, 1, &output, input_count, inputs);
+}
+
+void InstructionSelector::VisitLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  ArmOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+
+  InstructionCode opcode = kArchNop;
+  switch (load_rep.representation()) {
+    case MachineRepresentation::kFloat32:
+      opcode = kArmVldrF32;
+      break;
+    case MachineRepresentation::kFloat64:
+      opcode = kArmVldrF64;
+      break;
+    case MachineRepresentation::kBit:  // Fall through.
+    case MachineRepresentation::kWord8:
+      opcode = load_rep.IsUnsigned() ? kArmLdrb : kArmLdrsb;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = load_rep.IsUnsigned() ? kArmLdrh : kArmLdrsh;
+      break;
+    case MachineRepresentation::kTaggedSigned:   // Fall through.
+    case MachineRepresentation::kTaggedPointer:  // Fall through.
+    case MachineRepresentation::kTagged:         // Fall through.
+    case MachineRepresentation::kWord32:
+      opcode = kArmLdr;
+      break;
+    case MachineRepresentation::kSimd128:
+      opcode = kArmVld1S128;
+      break;
+    case MachineRepresentation::kCompressedPointer:  // Fall through.
+    case MachineRepresentation::kCompressed:         // Fall through.
+    case MachineRepresentation::kWord64:             // Fall through.
+    case MachineRepresentation::kNone:
+      UNREACHABLE();
+  }
+  if (node->opcode() == IrOpcode::kPoisonedLoad) {
+    CHECK_NE(poisoning_level_, PoisoningMitigationLevel::kDontPoison);
+    opcode |= MiscField::encode(kMemoryAccessPoisoned);
+  }
+
+  InstructionOperand output = g.DefineAsRegister(node);
+  EmitLoad(this, opcode, &output, base, index);
+}
+
+void InstructionSelector::VisitPoisonedLoad(Node* node) { VisitLoad(node); }
+
+void InstructionSelector::VisitProtectedLoad(Node* node) {
+  // TODO(eholk)
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitStore(Node* node) {
+  ArmOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+
+  StoreRepresentation store_rep = StoreRepresentationOf(node->op());
+  WriteBarrierKind write_barrier_kind = store_rep.write_barrier_kind();
+  MachineRepresentation rep = store_rep.representation();
+
+  if (FLAG_enable_unconditional_write_barriers &&
+      CanBeTaggedOrCompressedPointer(rep)) {
+    write_barrier_kind = kFullWriteBarrier;
+  }
+
+  if (write_barrier_kind != kNoWriteBarrier &&
+      V8_LIKELY(!FLAG_disable_write_barriers)) {
+    DCHECK(CanBeTaggedPointer(rep));
+    AddressingMode addressing_mode;
+    InstructionOperand inputs[3];
+    size_t input_count = 0;
+    inputs[input_count++] = g.UseUniqueRegister(base);
+    // OutOfLineRecordWrite uses the index in an 'add' instruction as well as
+    // for the store itself, so we must check compatibility with both.
+    if (g.CanBeImmediate(index, kArmAdd) && g.CanBeImmediate(index, kArmStr)) {
+      inputs[input_count++] = g.UseImmediate(index);
+      addressing_mode = kMode_Offset_RI;
+    } else {
+      inputs[input_count++] = g.UseUniqueRegister(index);
+      addressing_mode = kMode_Offset_RR;
+    }
+    inputs[input_count++] = g.UseUniqueRegister(value);
+    RecordWriteMode record_write_mode =
+        WriteBarrierKindToRecordWriteMode(write_barrier_kind);
+    InstructionCode code = kArchStoreWithWriteBarrier;
+    code |= AddressingModeField::encode(addressing_mode);
+    code |= MiscField::encode(static_cast<int>(record_write_mode));
+    Emit(code, 0, nullptr, input_count, inputs);
+  } else {
+    InstructionCode opcode = kArchNop;
+    switch (rep) {
+      case MachineRepresentation::kFloat32:
+        opcode = kArmVstrF32;
+        break;
+      case MachineRepresentation::kFloat64:
+        opcode = kArmVstrF64;
+        break;
+      case MachineRepresentation::kBit:  // Fall through.
+      case MachineRepresentation::kWord8:
+        opcode = kArmStrb;
+        break;
+      case MachineRepresentation::kWord16:
+        opcode = kArmStrh;
+        break;
+      case MachineRepresentation::kTaggedSigned:   // Fall through.
+      case MachineRepresentation::kTaggedPointer:  // Fall through.
+      case MachineRepresentation::kTagged:         // Fall through.
+      case MachineRepresentation::kWord32:
+        opcode = kArmStr;
+        break;
+      case MachineRepresentation::kSimd128:
+        opcode = kArmVst1S128;
+        break;
+      case MachineRepresentation::kCompressedPointer:  // Fall through.
+      case MachineRepresentation::kCompressed:         // Fall through.
+      case MachineRepresentation::kWord64:             // Fall through.
+      case MachineRepresentation::kNone:
+        UNREACHABLE();
+    }
+
+    ExternalReferenceMatcher m(base);
+    if (m.HasResolvedValue() &&
+        CanAddressRelativeToRootsRegister(m.ResolvedValue())) {
+      Int32Matcher int_matcher(index);
+      if (int_matcher.HasResolvedValue()) {
+        ptrdiff_t const delta =
+            int_matcher.ResolvedValue() +
+            TurboAssemblerBase::RootRegisterOffsetForExternalReference(
+                isolate(), m.ResolvedValue());
+        int input_count = 2;
+        InstructionOperand inputs[2];
+        inputs[0] = g.UseRegister(value);
+        inputs[1] = g.UseImmediate(static_cast<int32_t>(delta));
+        opcode |= AddressingModeField::encode(kMode_Root);
+        Emit(opcode, 0, nullptr, input_count, inputs);
+        return;
+      }
+    }
+
+    InstructionOperand inputs[4];
+    size_t input_count = 0;
+    inputs[input_count++] = g.UseRegister(value);
+    inputs[input_count++] = g.UseRegister(base);
+    EmitStore(this, opcode, input_count, inputs, index);
+  }
+}
+
+void InstructionSelector::VisitProtectedStore(Node* node) {
+  // TODO(eholk)
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitUnalignedLoad(Node* node) {
+  MachineRepresentation load_rep =
+      LoadRepresentationOf(node->op()).representation();
+  ArmOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+
+  InstructionCode opcode = kArmLdr;
+  // Only floating point loads need to be specially handled; integer loads
+  // support unaligned access. We support unaligned FP loads by loading to
+  // integer registers first, then moving to the destination FP register. If
+  // NEON is supported, we use the vld1.8 instruction.
+  switch (load_rep) {
+    case MachineRepresentation::kFloat32: {
+      InstructionOperand temp = g.TempRegister();
+      EmitLoad(this, opcode, &temp, base, index);
+      Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);
+      return;
+    }
+    case MachineRepresentation::kFloat64: {
+      // Compute the address of the least-significant byte of the FP value.
+      // We assume that the base node is unlikely to be an encodable immediate
+      // or the result of a shift operation, so only consider the addressing
+      // mode that should be used for the index node.
+      InstructionCode add_opcode = kArmAdd;
+      InstructionOperand inputs[3];
+      inputs[0] = g.UseRegister(base);
+
+      size_t input_count;
+      if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,
+                                   &inputs[1])) {
+        // input_count has been set by TryMatchImmediateOrShift(), so
+        // increment it to account for the base register in inputs[0].
+        input_count++;
+      } else {
+        add_opcode |= AddressingModeField::encode(kMode_Operand2_R);
+        inputs[1] = g.UseRegister(index);
+        input_count = 2;  // Base register and index.
+      }
+
+      InstructionOperand addr = g.TempRegister();
+      Emit(add_opcode, 1, &addr, input_count, inputs);
+
+      if (CpuFeatures::IsSupported(NEON)) {
+        // With NEON we can load directly from the calculated address.
+        InstructionCode op = kArmVld1F64;
+        op |= AddressingModeField::encode(kMode_Operand2_R);
+        Emit(op, g.DefineAsRegister(node), addr);
+      } else {
+        // Load both halves and move to an FP register.
+        InstructionOperand fp_lo = g.TempRegister();
+        InstructionOperand fp_hi = g.TempRegister();
+        opcode |= AddressingModeField::encode(kMode_Offset_RI);
+        Emit(opcode, fp_lo, addr, g.TempImmediate(0));
+        Emit(opcode, fp_hi, addr, g.TempImmediate(4));
+        Emit(kArmVmovF64U32U32, g.DefineAsRegister(node), fp_lo, fp_hi);
+      }
+      return;
+    }
+    default:
+      // All other cases should support unaligned accesses.
+      UNREACHABLE();
+  }
+}
+
+void InstructionSelector::VisitUnalignedStore(Node* node) {
+  ArmOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+
+  InstructionOperand inputs[4];
+  size_t input_count = 0;
+
+  UnalignedStoreRepresentation store_rep =
+      UnalignedStoreRepresentationOf(node->op());
+
+  // Only floating point stores need to be specially handled; integer stores
+  // support unaligned access. We support unaligned FP stores by moving the
+  // value to integer registers first, then storing to the destination address.
+  // If NEON is supported, we use the vst1.8 instruction.
+  switch (store_rep) {
+    case MachineRepresentation::kFloat32: {
+      inputs[input_count++] = g.TempRegister();
+      Emit(kArmVmovU32F32, inputs[0], g.UseRegister(value));
+      inputs[input_count++] = g.UseRegister(base);
+      EmitStore(this, kArmStr, input_count, inputs, index);
+      return;
+    }
+    case MachineRepresentation::kFloat64: {
+      if (CpuFeatures::IsSupported(NEON)) {
+        InstructionOperand address = g.TempRegister();
+        {
+          // First we have to calculate the actual address.
+          InstructionCode add_opcode = kArmAdd;
+          InstructionOperand inputs[3];
+          inputs[0] = g.UseRegister(base);
+
+          size_t input_count;
+          if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,
+                                       &inputs[1])) {
+            // input_count has been set by TryMatchImmediateOrShift(), so
+            // increment it to account for the base register in inputs[0].
+            input_count++;
+          } else {
+            add_opcode |= AddressingModeField::encode(kMode_Operand2_R);
+            inputs[1] = g.UseRegister(index);
+            input_count = 2;  // Base register and index.
+          }
+
+          Emit(add_opcode, 1, &address, input_count, inputs);
+        }
+
+        inputs[input_count++] = g.UseRegister(value);
+        inputs[input_count++] = address;
+        InstructionCode op = kArmVst1F64;
+        op |= AddressingModeField::encode(kMode_Operand2_R);
+        Emit(op, 0, nullptr, input_count, inputs);
+      } else {
+        // Store a 64-bit floating point value using two 32-bit integer stores.
+        // Computing the store address here would require three live temporary
+        // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after
+        // storing the least-significant half of the value.
+
+        // First, move the 64-bit FP value into two temporary integer registers.
+        InstructionOperand fp[] = {g.TempRegister(), g.TempRegister()};
+        inputs[input_count++] = g.UseRegister(value);
+        Emit(kArmVmovU32U32F64, arraysize(fp), fp, input_count, inputs);
+
+        // Store the least-significant half.
+        inputs[0] = fp[0];  // Low 32-bits of FP value.
+        inputs[input_count++] =
+            g.UseRegister(base);  // First store base address.
+        EmitStore(this, kArmStr, input_count, inputs, index);
+
+        // Store the most-significant half.
+        InstructionOperand base4 = g.TempRegister();
+        Emit(kArmAdd | AddressingModeField::encode(kMode_Operand2_I), base4,
+             g.UseRegister(base), g.TempImmediate(4));  // Compute base + 4.
+        inputs[0] = fp[1];  // High 32-bits of FP value.
+        inputs[1] = base4;  // Second store base + 4 address.
+        EmitStore(this, kArmStr, input_count, inputs, index);
+      }
+      return;
+    }
+    default:
+      // All other cases should support unaligned accesses.
+      UNREACHABLE();
+  }
+}
+
+namespace {
+
+void EmitBic(InstructionSelector* selector, Node* node, Node* left,
+             Node* right) {
+  ArmOperandGenerator g(selector);
+  InstructionCode opcode = kArmBic;
+  InstructionOperand value_operand;
+  InstructionOperand shift_operand;
+  if (TryMatchShift(selector, &opcode, right, &value_operand, &shift_operand)) {
+    selector->Emit(opcode, g.DefineAsRegister(node), g.UseRegister(left),
+                   value_operand, shift_operand);
+    return;
+  }
+  selector->Emit(opcode | AddressingModeField::encode(kMode_Operand2_R),
+                 g.DefineAsRegister(node), g.UseRegister(left),
+                 g.UseRegister(right));
+}
+
+void EmitUbfx(InstructionSelector* selector, Node* node, Node* left,
+              uint32_t lsb, uint32_t width) {
+  DCHECK_LE(lsb, 31u);
+  DCHECK_LE(1u, width);
+  DCHECK_LE(width, 32u - lsb);
+  ArmOperandGenerator g(selector);
+  selector->Emit(kArmUbfx, g.DefineAsRegister(node), g.UseRegister(left),
+                 g.TempImmediate(lsb), g.TempImmediate(width));
+}
+
+}  // namespace
+
+void InstructionSelector::VisitWord32And(Node* node) {
+  ArmOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (m.left().IsWord32Xor() && CanCover(node, m.left().node())) {
+    Int32BinopMatcher mleft(m.left().node());
+    if (mleft.right().Is(-1)) {
+      EmitBic(this, node, m.right().node(), mleft.left().node());
+      return;
+    }
+  }
+  if (m.right().IsWord32Xor() && CanCover(node, m.right().node())) {
+    Int32BinopMatcher mright(m.right().node());
+    if (mright.right().Is(-1)) {
+      EmitBic(this, node, m.left().node(), mright.left().node());
+      return;
+    }
+  }
+  if (m.right().HasResolvedValue()) {
+    uint32_t const value = m.right().ResolvedValue();
+    uint32_t width = base::bits::CountPopulation(value);
+    uint32_t leading_zeros = base::bits::CountLeadingZeros32(value);
+
+    // Try to merge SHR operations on the left hand input into this AND.
+    if (m.left().IsWord32Shr()) {
+      Int32BinopMatcher mshr(m.left().node());
+      if (mshr.right().HasResolvedValue()) {
+        uint32_t const shift = mshr.right().ResolvedValue();
+
+        if (((shift == 8) || (shift == 16) || (shift == 24)) &&
+            (value == 0xFF)) {
+          // Merge SHR into AND by emitting a UXTB instruction with a
+          // bytewise rotation.
+          Emit(kArmUxtb, g.DefineAsRegister(m.node()),
+               g.UseRegister(mshr.left().node()),
+               g.TempImmediate(mshr.right().ResolvedValue()));
+          return;
+        } else if (((shift == 8) || (shift == 16)) && (value == 0xFFFF)) {
+          // Merge SHR into AND by emitting a UXTH instruction with a
+          // bytewise rotation.
+          Emit(kArmUxth, g.DefineAsRegister(m.node()),
+               g.UseRegister(mshr.left().node()),
+               g.TempImmediate(mshr.right().ResolvedValue()));
+          return;
+        } else if (IsSupported(ARMv7) && (width != 0) &&
+                   ((leading_zeros + width) == 32)) {
+          // Merge Shr into And by emitting a UBFX instruction.
+          DCHECK_EQ(0u, base::bits::CountTrailingZeros32(value));
+          if ((1 <= shift) && (shift <= 31)) {
+            // UBFX cannot extract bits past the register size, however since
+            // shifting the original value would have introduced some zeros we
+            // can still use UBFX with a smaller mask and the remaining bits
+            // will be zeros.
+            EmitUbfx(this, node, mshr.left().node(), shift,
+                     std::min(width, 32 - shift));
+            return;
+          }
+        }
+      }
+    } else if (value == 0xFFFF) {
+      // Emit UXTH for this AND. We don't bother testing for UXTB, as it's no
+      // better than AND 0xFF for this operation.
+      Emit(kArmUxth, g.DefineAsRegister(m.node()),
+           g.UseRegister(m.left().node()), g.TempImmediate(0));
+      return;
+    }
+    if (g.CanBeImmediate(~value)) {
+      // Emit BIC for this AND by inverting the immediate value first.
+      Emit(kArmBic | AddressingModeField::encode(kMode_Operand2_I),
+           g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+           g.TempImmediate(~value));
+      return;
+    }
+    if (!g.CanBeImmediate(value) && IsSupported(ARMv7)) {
+      // If value has 9 to 23 contiguous set bits, and has the lsb set, we can
+      // replace this AND with UBFX. Other contiguous bit patterns have already
+      // been handled by BIC or will be handled by AND.
+      if ((width != 0) && ((leading_zeros + width) == 32) &&
+          (9 <= leading_zeros) && (leading_zeros <= 23)) {
+        DCHECK_EQ(0u, base::bits::CountTrailingZeros32(value));
+        EmitUbfx(this, node, m.left().node(), 0, width);
+        return;
+      }
+
+      width = 32 - width;
+      leading_zeros = base::bits::CountLeadingZeros32(~value);
+      uint32_t lsb = base::bits::CountTrailingZeros32(~value);
+      if ((leading_zeros + width + lsb) == 32) {
+        // This AND can be replaced with BFC.
+        Emit(kArmBfc, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
+             g.TempImmediate(lsb), g.TempImmediate(width));
+        return;
+      }
+    }
+  }
+  VisitBinop(this, node, kArmAnd, kArmAnd);
+}
+
+void InstructionSelector::VisitWord32Or(Node* node) {
+  VisitBinop(this, node, kArmOrr, kArmOrr);
+}
+
+void InstructionSelector::VisitWord32Xor(Node* node) {
+  ArmOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (m.right().Is(-1)) {
+    InstructionCode opcode = kArmMvn;
+    InstructionOperand value_operand;
+    InstructionOperand shift_operand;
+    if (TryMatchShift(this, &opcode, m.left().node(), &value_operand,
+                      &shift_operand)) {
+      Emit(opcode, g.DefineAsRegister(node), value_operand, shift_operand);
+      return;
+    }
+    Emit(opcode | AddressingModeField::encode(kMode_Operand2_R),
+         g.DefineAsRegister(node), g.UseRegister(m.left().node()));
+    return;
+  }
+  VisitBinop(this, node, kArmEor, kArmEor);
+}
+
+void InstructionSelector::VisitStackPointerGreaterThan(
+    Node* node, FlagsContinuation* cont) {
+  StackCheckKind kind = StackCheckKindOf(node->op());
+  InstructionCode opcode =
+      kArchStackPointerGreaterThan | MiscField::encode(static_cast<int>(kind));
+
+  ArmOperandGenerator g(this);
+
+  // No outputs.
+  InstructionOperand* const outputs = nullptr;
+  const int output_count = 0;
+
+  // Applying an offset to this stack check requires a temp register. Offsets
+  // are only applied to the first stack check. If applying an offset, we must
+  // ensure the input and temp registers do not alias, thus kUniqueRegister.
+  InstructionOperand temps[] = {g.TempRegister()};
+  const int temp_count = (kind == StackCheckKind::kJSFunctionEntry) ? 1 : 0;
+  const auto register_mode = (kind == StackCheckKind::kJSFunctionEntry)
+                                 ? OperandGenerator::kUniqueRegister
+                                 : OperandGenerator::kRegister;
+
+  Node* const value = node->InputAt(0);
+  InstructionOperand inputs[] = {g.UseRegisterWithMode(value, register_mode)};
+  static constexpr int input_count = arraysize(inputs);
+
+  EmitWithContinuation(opcode, output_count, outputs, input_count, inputs,
+                       temp_count, temps, cont);
+}
+
+namespace {
+
+template <typename TryMatchShift>
+void VisitShift(InstructionSelector* selector, Node* node,
+                TryMatchShift try_match_shift, FlagsContinuation* cont) {
+  ArmOperandGenerator g(selector);
+  InstructionCode opcode = kArmMov;
+  InstructionOperand inputs[2];
+  size_t input_count = 2;
+  InstructionOperand outputs[1];
+  size_t output_count = 0;
+
+  CHECK(try_match_shift(selector, &opcode, node, &inputs[0], &inputs[1]));
+
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  DCHECK_NE(0u, input_count);
+  DCHECK_NE(0u, output_count);
+  DCHECK_GE(arraysize(inputs), input_count);
+  DCHECK_GE(arraysize(outputs), output_count);
+  DCHECK_NE(kMode_None, AddressingModeField::decode(opcode));
+
+  selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
+                                 inputs, cont);
+}
+
+template <typename TryMatchShift>
+void VisitShift(InstructionSelector* selector, Node* node,
+                TryMatchShift try_match_shift) {
+  FlagsContinuation cont;
+  VisitShift(selector, node, try_match_shift, &cont);
+}
+
+}  // namespace
+
+void InstructionSelector::VisitWord32Shl(Node* node) {
+  VisitShift(this, node, TryMatchLSL);
+}
+
+void InstructionSelector::VisitWord32Shr(Node* node) {
+  ArmOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (IsSupported(ARMv7) && m.left().IsWord32And() &&
+      m.right().IsInRange(0, 31)) {
+    uint32_t lsb = m.right().ResolvedValue();
+    Int32BinopMatcher mleft(m.left().node());
+    if (mleft.right().HasResolvedValue()) {
+      uint32_t value =
+          static_cast<uint32_t>(mleft.right().ResolvedValue() >> lsb) << lsb;
+      uint32_t width = base::bits::CountPopulation(value);
+      uint32_t msb = base::bits::CountLeadingZeros32(value);
+      if ((width != 0) && (msb + width + lsb == 32)) {
+        DCHECK_EQ(lsb, base::bits::CountTrailingZeros32(value));
+        return EmitUbfx(this, node, mleft.left().node(), lsb, width);
+      }
+    }
+  }
+  VisitShift(this, node, TryMatchLSR);
+}
+
+void InstructionSelector::VisitWord32Sar(Node* node) {
+  ArmOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (CanCover(m.node(), m.left().node()) && m.left().IsWord32Shl()) {
+    Int32BinopMatcher mleft(m.left().node());
+    if (m.right().HasResolvedValue() && mleft.right().HasResolvedValue()) {
+      uint32_t sar = m.right().ResolvedValue();
+      uint32_t shl = mleft.right().ResolvedValue();
+      if ((sar == shl) && (sar == 16)) {
+        Emit(kArmSxth, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()), g.TempImmediate(0));
+        return;
+      } else if ((sar == shl) && (sar == 24)) {
+        Emit(kArmSxtb, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()), g.TempImmediate(0));
+        return;
+      } else if (IsSupported(ARMv7) && (sar >= shl)) {
+        Emit(kArmSbfx, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()), g.TempImmediate(sar - shl),
+             g.TempImmediate(32 - sar));
+        return;
+      }
+    }
+  }
+  VisitShift(this, node, TryMatchASR);
+}
+
+void InstructionSelector::VisitInt32PairAdd(Node* node) {
+  ArmOperandGenerator g(this);
+
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  if (projection1) {
+    // We use UseUniqueRegister here to avoid register sharing with the output
+    // registers.
+    InstructionOperand inputs[] = {
+        g.UseRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(1)),
+        g.UseRegister(node->InputAt(2)), g.UseUniqueRegister(node->InputAt(3))};
+
+    InstructionOperand outputs[] = {
+        g.DefineAsRegister(node),
+        g.DefineAsRegister(NodeProperties::FindProjection(node, 1))};
+
+    Emit(kArmAddPair, 2, outputs, 4, inputs);
+  } else {
+    // The high word of the result is not used, so we emit the standard 32 bit
+    // instruction.
+    Emit(kArmAdd | AddressingModeField::encode(kMode_Operand2_R),
+         g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
+         g.UseRegister(node->InputAt(2)));
+  }
+}
+
+void InstructionSelector::VisitInt32PairSub(Node* node) {
+  ArmOperandGenerator g(this);
+
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  if (projection1) {
+    // We use UseUniqueRegister here to avoid register sharing with the output
+    // register.
+    InstructionOperand inputs[] = {
+        g.UseRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(1)),
+        g.UseRegister(node->InputAt(2)), g.UseUniqueRegister(node->InputAt(3))};
+
+    InstructionOperand outputs[] = {
+        g.DefineAsRegister(node),
+        g.DefineAsRegister(NodeProperties::FindProjection(node, 1))};
+
+    Emit(kArmSubPair, 2, outputs, 4, inputs);
+  } else {
+    // The high word of the result is not used, so we emit the standard 32 bit
+    // instruction.
+    Emit(kArmSub | AddressingModeField::encode(kMode_Operand2_R),
+         g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
+         g.UseRegister(node->InputAt(2)));
+  }
+}
+
+void InstructionSelector::VisitInt32PairMul(Node* node) {
+  ArmOperandGenerator g(this);
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  if (projection1) {
+    InstructionOperand inputs[] = {g.UseUniqueRegister(node->InputAt(0)),
+                                   g.UseUniqueRegister(node->InputAt(1)),
+                                   g.UseUniqueRegister(node->InputAt(2)),
+                                   g.UseUniqueRegister(node->InputAt(3))};
+
+    InstructionOperand outputs[] = {
+        g.DefineAsRegister(node),
+        g.DefineAsRegister(NodeProperties::FindProjection(node, 1))};
+
+    Emit(kArmMulPair, 2, outputs, 4, inputs);
+  } else {
+    // The high word of the result is not used, so we emit the standard 32 bit
+    // instruction.
+    Emit(kArmMul | AddressingModeField::encode(kMode_Operand2_R),
+         g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
+         g.UseRegister(node->InputAt(2)));
+  }
+}
+
+namespace {
+// Shared routine for multiple shift operations.
+void VisitWord32PairShift(InstructionSelector* selector, InstructionCode opcode,
+                          Node* node) {
+  ArmOperandGenerator g(selector);
+  // We use g.UseUniqueRegister here to guarantee that there is
+  // no register aliasing of input registers with output registers.
+  Int32Matcher m(node->InputAt(2));
+  InstructionOperand shift_operand;
+  if (m.HasResolvedValue()) {
+    shift_operand = g.UseImmediate(m.node());
+  } else {
+    shift_operand = g.UseUniqueRegister(m.node());
+  }
+
+  InstructionOperand inputs[] = {g.UseUniqueRegister(node->InputAt(0)),
+                                 g.UseUniqueRegister(node->InputAt(1)),
+                                 shift_operand};
+
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+
+  InstructionOperand outputs[2];
+  InstructionOperand temps[1];
+  int32_t output_count = 0;
+  int32_t temp_count = 0;
+
+  outputs[output_count++] = g.DefineAsRegister(node);
+  if (projection1) {
+    outputs[output_count++] = g.DefineAsRegister(projection1);
+  } else {
+    temps[temp_count++] = g.TempRegister();
+  }
+
+  selector->Emit(opcode, output_count, outputs, 3, inputs, temp_count, temps);
+}
+}  // namespace
+void InstructionSelector::VisitWord32PairShl(Node* node) {
+  VisitWord32PairShift(this, kArmLslPair, node);
+}
+
+void InstructionSelector::VisitWord32PairShr(Node* node) {
+  VisitWord32PairShift(this, kArmLsrPair, node);
+}
+
+void InstructionSelector::VisitWord32PairSar(Node* node) {
+  VisitWord32PairShift(this, kArmAsrPair, node);
+}
+
+void InstructionSelector::VisitWord32Rol(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord32Ror(Node* node) {
+  VisitShift(this, node, TryMatchROR);
+}
+
+void InstructionSelector::VisitWord32Ctz(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord32ReverseBits(Node* node) {
+  DCHECK(IsSupported(ARMv7));
+  VisitRR(this, kArmRbit, node);
+}
+
+void InstructionSelector::VisitWord64ReverseBytes(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
+  VisitRR(this, kArmRev, node);
+}
+
+void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitWord32Popcnt(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitInt32Add(Node* node) {
+  ArmOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (CanCover(node, m.left().node())) {
+    switch (m.left().opcode()) {
+      case IrOpcode::kInt32Mul: {
+        Int32BinopMatcher mleft(m.left().node());
+        Emit(kArmMla, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()),
+             g.UseRegister(mleft.right().node()),
+             g.UseRegister(m.right().node()));
+        return;
+      }
+      case IrOpcode::kInt32MulHigh: {
+        Int32BinopMatcher mleft(m.left().node());
+        Emit(kArmSmmla, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()),
+             g.UseRegister(mleft.right().node()),
+             g.UseRegister(m.right().node()));
+        return;
+      }
+      case IrOpcode::kWord32And: {
+        Int32BinopMatcher mleft(m.left().node());
+        if (mleft.right().Is(0xFF)) {
+          Emit(kArmUxtab, g.DefineAsRegister(node),
+               g.UseRegister(m.right().node()),
+               g.UseRegister(mleft.left().node()), g.TempImmediate(0));
+          return;
+        } else if (mleft.right().Is(0xFFFF)) {
+          Emit(kArmUxtah, g.DefineAsRegister(node),
+               g.UseRegister(m.right().node()),
+               g.UseRegister(mleft.left().node()), g.TempImmediate(0));
+          return;
+        }
+        break;
+      }
+      case IrOpcode::kWord32Sar: {
+        Int32BinopMatcher mleft(m.left().node());
+        if (CanCover(mleft.node(), mleft.left().node()) &&
+            mleft.left().IsWord32Shl()) {
+          Int32BinopMatcher mleftleft(mleft.left().node());
+          if (mleft.right().Is(24) && mleftleft.right().Is(24)) {
+            Emit(kArmSxtab, g.DefineAsRegister(node),
+                 g.UseRegister(m.right().node()),
+                 g.UseRegister(mleftleft.left().node()), g.TempImmediate(0));
+            return;
+          } else if (mleft.right().Is(16) && mleftleft.right().Is(16)) {
+            Emit(kArmSxtah, g.DefineAsRegister(node),
+                 g.UseRegister(m.right().node()),
+                 g.UseRegister(mleftleft.left().node()), g.TempImmediate(0));
+            return;
+          }
+        }
+        break;
+      }
+      default:
+        break;
+    }
+  }
+  if (CanCover(node, m.right().node())) {
+    switch (m.right().opcode()) {
+      case IrOpcode::kInt32Mul: {
+        Int32BinopMatcher mright(m.right().node());
+        Emit(kArmMla, g.DefineAsRegister(node),
+             g.UseRegister(mright.left().node()),
+             g.UseRegister(mright.right().node()),
+             g.UseRegister(m.left().node()));
+        return;
+      }
+      case IrOpcode::kInt32MulHigh: {
+        Int32BinopMatcher mright(m.right().node());
+        Emit(kArmSmmla, g.DefineAsRegister(node),
+             g.UseRegister(mright.left().node()),
+             g.UseRegister(mright.right().node()),
+             g.UseRegister(m.left().node()));
+        return;
+      }
+      case IrOpcode::kWord32And: {
+        Int32BinopMatcher mright(m.right().node());
+        if (mright.right().Is(0xFF)) {
+          Emit(kArmUxtab, g.DefineAsRegister(node),
+               g.UseRegister(m.left().node()),
+               g.UseRegister(mright.left().node()), g.TempImmediate(0));
+          return;
+        } else if (mright.right().Is(0xFFFF)) {
+          Emit(kArmUxtah, g.DefineAsRegister(node),
+               g.UseRegister(m.left().node()),
+               g.UseRegister(mright.left().node()), g.TempImmediate(0));
+          return;
+        }
+        break;
+      }
+      case IrOpcode::kWord32Sar: {
+        Int32BinopMatcher mright(m.right().node());
+        if (CanCover(mright.node(), mright.left().node()) &&
+            mright.left().IsWord32Shl()) {
+          Int32BinopMatcher mrightleft(mright.left().node());
+          if (mright.right().Is(24) && mrightleft.right().Is(24)) {
+            Emit(kArmSxtab, g.DefineAsRegister(node),
+                 g.UseRegister(m.left().node()),
+                 g.UseRegister(mrightleft.left().node()), g.TempImmediate(0));
+            return;
+          } else if (mright.right().Is(16) && mrightleft.right().Is(16)) {
+            Emit(kArmSxtah, g.DefineAsRegister(node),
+                 g.UseRegister(m.left().node()),
+                 g.UseRegister(mrightleft.left().node()), g.TempImmediate(0));
+            return;
+          }
+        }
+        break;
+      }
+      default:
+        break;
+    }
+  }
+  VisitBinop(this, node, kArmAdd, kArmAdd);
+}
+
+void InstructionSelector::VisitInt32Sub(Node* node) {
+  ArmOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (IsSupported(ARMv7) && m.right().IsInt32Mul() &&
+      CanCover(node, m.right().node())) {
+    Int32BinopMatcher mright(m.right().node());
+    Emit(kArmMls, g.DefineAsRegister(node), g.UseRegister(mright.left().node()),
+         g.UseRegister(mright.right().node()), g.UseRegister(m.left().node()));
+    return;
+  }
+  VisitBinop(this, node, kArmSub, kArmRsb);
+}
+
+namespace {
+
+void EmitInt32MulWithOverflow(InstructionSelector* selector, Node* node,
+                              FlagsContinuation* cont) {
+  ArmOperandGenerator g(selector);
+  Int32BinopMatcher m(node);
+  InstructionOperand result_operand = g.DefineAsRegister(node);
+  InstructionOperand temp_operand = g.TempRegister();
+  InstructionOperand outputs[] = {result_operand, temp_operand};
+  InstructionOperand inputs[] = {g.UseRegister(m.left().node()),
+                                 g.UseRegister(m.right().node())};
+  selector->Emit(kArmSmull, 2, outputs, 2, inputs);
+
+  // result operand needs shift operator.
+  InstructionOperand shift_31 = g.UseImmediate(31);
+  InstructionCode opcode =
+      kArmCmp | AddressingModeField::encode(kMode_Operand2_R_ASR_I);
+  selector->EmitWithContinuation(opcode, temp_operand, result_operand, shift_31,
+                                 cont);
+}
+
+}  // namespace
+
+void InstructionSelector::VisitInt32Mul(Node* node) {
+  ArmOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (m.right().HasResolvedValue() && m.right().ResolvedValue() > 0) {
+    int32_t value = m.right().ResolvedValue();
+    if (base::bits::IsPowerOfTwo(value - 1)) {
+      Emit(kArmAdd | AddressingModeField::encode(kMode_Operand2_R_LSL_I),
+           g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+           g.UseRegister(m.left().node()),
+           g.TempImmediate(base::bits::WhichPowerOfTwo(value - 1)));
+      return;
+    }
+    if (value < kMaxInt && base::bits::IsPowerOfTwo(value + 1)) {
+      Emit(kArmRsb | AddressingModeField::encode(kMode_Operand2_R_LSL_I),
+           g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+           g.UseRegister(m.left().node()),
+           g.TempImmediate(base::bits::WhichPowerOfTwo(value + 1)));
+      return;
+    }
+  }
+  VisitRRR(this, kArmMul, node);
+}
+
+void InstructionSelector::VisitUint32MulHigh(Node* node) {
+  ArmOperandGenerator g(this);
+  InstructionOperand outputs[] = {g.TempRegister(), g.DefineAsRegister(node)};
+  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0)),
+                                 g.UseRegister(node->InputAt(1))};
+  Emit(kArmUmull, arraysize(outputs), outputs, arraysize(inputs), inputs);
+}
+
+void InstructionSelector::VisitInt32Div(Node* node) {
+  VisitDiv(this, node, kArmSdiv, kArmVcvtF64S32, kArmVcvtS32F64);
+}
+
+void InstructionSelector::VisitUint32Div(Node* node) {
+  VisitDiv(this, node, kArmUdiv, kArmVcvtF64U32, kArmVcvtU32F64);
+}
+
+void InstructionSelector::VisitInt32Mod(Node* node) {
+  VisitMod(this, node, kArmSdiv, kArmVcvtF64S32, kArmVcvtS32F64);
+}
+
+void InstructionSelector::VisitUint32Mod(Node* node) {
+  VisitMod(this, node, kArmUdiv, kArmVcvtF64U32, kArmVcvtU32F64);
+}
+
+#define RR_OP_LIST(V)                                \
+  V(Word32Clz, kArmClz)                              \
+  V(ChangeFloat32ToFloat64, kArmVcvtF64F32)          \
+  V(RoundInt32ToFloat32, kArmVcvtF32S32)             \
+  V(RoundUint32ToFloat32, kArmVcvtF32U32)            \
+  V(ChangeInt32ToFloat64, kArmVcvtF64S32)            \
+  V(ChangeUint32ToFloat64, kArmVcvtF64U32)           \
+  V(ChangeFloat64ToInt32, kArmVcvtS32F64)            \
+  V(ChangeFloat64ToUint32, kArmVcvtU32F64)           \
+  V(TruncateFloat64ToUint32, kArmVcvtU32F64)         \
+  V(TruncateFloat64ToFloat32, kArmVcvtF32F64)        \
+  V(TruncateFloat64ToWord32, kArchTruncateDoubleToI) \
+  V(RoundFloat64ToInt32, kArmVcvtS32F64)             \
+  V(BitcastFloat32ToInt32, kArmVmovU32F32)           \
+  V(BitcastInt32ToFloat32, kArmVmovF32U32)           \
+  V(Float64ExtractLowWord32, kArmVmovLowU32F64)      \
+  V(Float64ExtractHighWord32, kArmVmovHighU32F64)    \
+  V(Float64SilenceNaN, kArmFloat64SilenceNaN)        \
+  V(Float32Abs, kArmVabsF32)                         \
+  V(Float64Abs, kArmVabsF64)                         \
+  V(Float32Neg, kArmVnegF32)                         \
+  V(Float64Neg, kArmVnegF64)                         \
+  V(Float32Sqrt, kArmVsqrtF32)                       \
+  V(Float64Sqrt, kArmVsqrtF64)
+
+#define RR_OP_LIST_V8(V)                  \
+  V(Float32RoundDown, kArmVrintmF32)      \
+  V(Float64RoundDown, kArmVrintmF64)      \
+  V(Float32RoundUp, kArmVrintpF32)        \
+  V(Float64RoundUp, kArmVrintpF64)        \
+  V(Float32RoundTruncate, kArmVrintzF32)  \
+  V(Float64RoundTruncate, kArmVrintzF64)  \
+  V(Float64RoundTiesAway, kArmVrintaF64)  \
+  V(Float32RoundTiesEven, kArmVrintnF32)  \
+  V(Float64RoundTiesEven, kArmVrintnF64)  \
+  V(F64x2Ceil, kArmF64x2Ceil)             \
+  V(F64x2Floor, kArmF64x2Floor)           \
+  V(F64x2Trunc, kArmF64x2Trunc)           \
+  V(F64x2NearestInt, kArmF64x2NearestInt) \
+  V(F32x4Ceil, kArmVrintpF32)             \
+  V(F32x4Floor, kArmVrintmF32)            \
+  V(F32x4Trunc, kArmVrintzF32)            \
+  V(F32x4NearestInt, kArmVrintnF32)
+
+#define RRR_OP_LIST(V)          \
+  V(Int32MulHigh, kArmSmmul)    \
+  V(Float32Mul, kArmVmulF32)    \
+  V(Float64Mul, kArmVmulF64)    \
+  V(Float32Div, kArmVdivF32)    \
+  V(Float64Div, kArmVdivF64)    \
+  V(Float32Max, kArmFloat32Max) \
+  V(Float64Max, kArmFloat64Max) \
+  V(Float32Min, kArmFloat32Min) \
+  V(Float64Min, kArmFloat64Min)
+
+#define RR_VISITOR(Name, opcode)                      \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRR(this, opcode, node);                      \
+  }
+RR_OP_LIST(RR_VISITOR)
+#undef RR_VISITOR
+#undef RR_OP_LIST
+
+#define RR_VISITOR_V8(Name, opcode)                   \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    DCHECK(CpuFeatures::IsSupported(ARMv8));          \
+    VisitRR(this, opcode, node);                      \
+  }
+RR_OP_LIST_V8(RR_VISITOR_V8)
+#undef RR_VISITOR_V8
+#undef RR_OP_LIST_V8
+
+#define RRR_VISITOR(Name, opcode)                     \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRRR(this, opcode, node);                     \
+  }
+RRR_OP_LIST(RRR_VISITOR)
+#undef RRR_VISITOR
+#undef RRR_OP_LIST
+
+void InstructionSelector::VisitFloat32Add(Node* node) {
+  ArmOperandGenerator g(this);
+  Float32BinopMatcher m(node);
+  if (m.left().IsFloat32Mul() && CanCover(node, m.left().node())) {
+    Float32BinopMatcher mleft(m.left().node());
+    Emit(kArmVmlaF32, g.DefineSameAsFirst(node),
+         g.UseRegister(m.right().node()), g.UseRegister(mleft.left().node()),
+         g.UseRegister(mleft.right().node()));
+    return;
+  }
+  if (m.right().IsFloat32Mul() && CanCover(node, m.right().node())) {
+    Float32BinopMatcher mright(m.right().node());
+    Emit(kArmVmlaF32, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
+         g.UseRegister(mright.left().node()),
+         g.UseRegister(mright.right().node()));
+    return;
+  }
+  VisitRRR(this, kArmVaddF32, node);
+}
+
+void InstructionSelector::VisitFloat64Add(Node* node) {
+  ArmOperandGenerator g(this);
+  Float64BinopMatcher m(node);
+  if (m.left().IsFloat64Mul() && CanCover(node, m.left().node())) {
+    Float64BinopMatcher mleft(m.left().node());
+    Emit(kArmVmlaF64, g.DefineSameAsFirst(node),
+         g.UseRegister(m.right().node()), g.UseRegister(mleft.left().node()),
+         g.UseRegister(mleft.right().node()));
+    return;
+  }
+  if (m.right().IsFloat64Mul() && CanCover(node, m.right().node())) {
+    Float64BinopMatcher mright(m.right().node());
+    Emit(kArmVmlaF64, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
+         g.UseRegister(mright.left().node()),
+         g.UseRegister(mright.right().node()));
+    return;
+  }
+  VisitRRR(this, kArmVaddF64, node);
+}
+
+void InstructionSelector::VisitFloat32Sub(Node* node) {
+  ArmOperandGenerator g(this);
+  Float32BinopMatcher m(node);
+  if (m.right().IsFloat32Mul() && CanCover(node, m.right().node())) {
+    Float32BinopMatcher mright(m.right().node());
+    Emit(kArmVmlsF32, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
+         g.UseRegister(mright.left().node()),
+         g.UseRegister(mright.right().node()));
+    return;
+  }
+  VisitRRR(this, kArmVsubF32, node);
+}
+
+void InstructionSelector::VisitFloat64Sub(Node* node) {
+  ArmOperandGenerator g(this);
+  Float64BinopMatcher m(node);
+  if (m.right().IsFloat64Mul() && CanCover(node, m.right().node())) {
+    Float64BinopMatcher mright(m.right().node());
+    Emit(kArmVmlsF64, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
+         g.UseRegister(mright.left().node()),
+         g.UseRegister(mright.right().node()));
+    return;
+  }
+  VisitRRR(this, kArmVsubF64, node);
+}
+
+void InstructionSelector::VisitFloat64Mod(Node* node) {
+  ArmOperandGenerator g(this);
+  Emit(kArmVmodF64, g.DefineAsFixed(node, d0), g.UseFixed(node->InputAt(0), d0),
+       g.UseFixed(node->InputAt(1), d1))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::VisitFloat64Ieee754Binop(Node* node,
+                                                   InstructionCode opcode) {
+  ArmOperandGenerator g(this);
+  Emit(opcode, g.DefineAsFixed(node, d0), g.UseFixed(node->InputAt(0), d0),
+       g.UseFixed(node->InputAt(1), d1))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::VisitFloat64Ieee754Unop(Node* node,
+                                                  InstructionCode opcode) {
+  ArmOperandGenerator g(this);
+  Emit(opcode, g.DefineAsFixed(node, d0), g.UseFixed(node->InputAt(0), d0))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::EmitPrepareArguments(
+    ZoneVector<PushParameter>* arguments, const CallDescriptor* call_descriptor,
+    Node* node) {
+  ArmOperandGenerator g(this);
+
+  // Prepare for C function call.
+  if (call_descriptor->IsCFunctionCall()) {
+    Emit(kArchPrepareCallCFunction | MiscField::encode(static_cast<int>(
+                                         call_descriptor->ParameterCount())),
+         0, nullptr, 0, nullptr);
+
+    // Poke any stack arguments.
+    for (size_t n = 0; n < arguments->size(); ++n) {
+      PushParameter input = (*arguments)[n];
+      if (input.node) {
+        int slot = static_cast<int>(n);
+        Emit(kArmPoke | MiscField::encode(slot), g.NoOutput(),
+             g.UseRegister(input.node));
+      }
+    }
+  } else {
+    // Push any stack arguments.
+    for (PushParameter input : base::Reversed(*arguments)) {
+      // Skip any alignment holes in pushed nodes.
+      if (input.node == nullptr) continue;
+      Emit(kArmPush, g.NoOutput(), g.UseRegister(input.node));
+    }
+  }
+}
+
+void InstructionSelector::EmitPrepareResults(
+    ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
+    Node* node) {
+  ArmOperandGenerator g(this);
+
+  int reverse_slot = 1;
+  for (PushParameter output : *results) {
+    if (!output.location.IsCallerFrameSlot()) continue;
+    // Skip any alignment holes in nodes.
+    if (output.node != nullptr) {
+      DCHECK(!call_descriptor->IsCFunctionCall());
+      if (output.location.GetType() == MachineType::Float32()) {
+        MarkAsFloat32(output.node);
+      } else if (output.location.GetType() == MachineType::Float64()) {
+        MarkAsFloat64(output.node);
+      } else if (output.location.GetType() == MachineType::Simd128()) {
+        MarkAsSimd128(output.node);
+      }
+      Emit(kArmPeek, g.DefineAsRegister(output.node),
+           g.UseImmediate(reverse_slot));
+    }
+    reverse_slot += output.location.GetSizeInPointers();
+  }
+}
+
+bool InstructionSelector::IsTailCallAddressImmediate() { return false; }
+
+int InstructionSelector::GetTempsCountForTailCallFromJSFunction() { return 3; }
+
+namespace {
+
+// Shared routine for multiple compare operations.
+void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
+                  InstructionOperand left, InstructionOperand right,
+                  FlagsContinuation* cont) {
+  selector->EmitWithContinuation(opcode, left, right, cont);
+}
+
+// Shared routine for multiple float32 compare operations.
+void VisitFloat32Compare(InstructionSelector* selector, Node* node,
+                         FlagsContinuation* cont) {
+  ArmOperandGenerator g(selector);
+  Float32BinopMatcher m(node);
+  if (m.right().Is(0.0f)) {
+    VisitCompare(selector, kArmVcmpF32, g.UseRegister(m.left().node()),
+                 g.UseImmediate(m.right().node()), cont);
+  } else if (m.left().Is(0.0f)) {
+    cont->Commute();
+    VisitCompare(selector, kArmVcmpF32, g.UseRegister(m.right().node()),
+                 g.UseImmediate(m.left().node()), cont);
+  } else {
+    VisitCompare(selector, kArmVcmpF32, g.UseRegister(m.left().node()),
+                 g.UseRegister(m.right().node()), cont);
+  }
+}
+
+// Shared routine for multiple float64 compare operations.
+void VisitFloat64Compare(InstructionSelector* selector, Node* node,
+                         FlagsContinuation* cont) {
+  ArmOperandGenerator g(selector);
+  Float64BinopMatcher m(node);
+  if (m.right().Is(0.0)) {
+    VisitCompare(selector, kArmVcmpF64, g.UseRegister(m.left().node()),
+                 g.UseImmediate(m.right().node()), cont);
+  } else if (m.left().Is(0.0)) {
+    cont->Commute();
+    VisitCompare(selector, kArmVcmpF64, g.UseRegister(m.right().node()),
+                 g.UseImmediate(m.left().node()), cont);
+  } else {
+    VisitCompare(selector, kArmVcmpF64, g.UseRegister(m.left().node()),
+                 g.UseRegister(m.right().node()), cont);
+  }
+}
+
+// Check whether we can convert:
+// ((a <op> b) cmp 0), b.<cond>
+// to:
+// (a <ops> b), b.<cond'>
+// where <ops> is the flag setting version of <op>.
+// We only generate conditions <cond'> that are a combination of the N
+// and Z flags. This avoids the need to make this function dependent on
+// the flag-setting operation.
+bool CanUseFlagSettingBinop(FlagsCondition cond) {
+  switch (cond) {
+    case kEqual:
+    case kNotEqual:
+    case kSignedLessThan:
+    case kSignedGreaterThanOrEqual:
+    case kUnsignedLessThanOrEqual:  // x <= 0 -> x == 0
+    case kUnsignedGreaterThan:      // x > 0 -> x != 0
+      return true;
+    default:
+      return false;
+  }
+}
+
+// Map <cond> to <cond'> so that the following transformation is possible:
+// ((a <op> b) cmp 0), b.<cond>
+// to:
+// (a <ops> b), b.<cond'>
+// where <ops> is the flag setting version of <op>.
+FlagsCondition MapForFlagSettingBinop(FlagsCondition cond) {
+  DCHECK(CanUseFlagSettingBinop(cond));
+  switch (cond) {
+    case kEqual:
+    case kNotEqual:
+      return cond;
+    case kSignedLessThan:
+      return kNegative;
+    case kSignedGreaterThanOrEqual:
+      return kPositiveOrZero;
+    case kUnsignedLessThanOrEqual:  // x <= 0 -> x == 0
+      return kEqual;
+    case kUnsignedGreaterThan:  // x > 0 -> x != 0
+      return kNotEqual;
+    default:
+      UNREACHABLE();
+  }
+}
+
+// Check if we can perform the transformation:
+// ((a <op> b) cmp 0), b.<cond>
+// to:
+// (a <ops> b), b.<cond'>
+// where <ops> is the flag setting version of <op>, and if so,
+// updates {node}, {opcode} and {cont} accordingly.
+void MaybeReplaceCmpZeroWithFlagSettingBinop(InstructionSelector* selector,
+                                             Node** node, Node* binop,
+                                             InstructionCode* opcode,
+                                             FlagsCondition cond,
+                                             FlagsContinuation* cont) {
+  InstructionCode binop_opcode;
+  InstructionCode no_output_opcode;
+  switch (binop->opcode()) {
+    case IrOpcode::kInt32Add:
+      binop_opcode = kArmAdd;
+      no_output_opcode = kArmCmn;
+      break;
+    case IrOpcode::kWord32And:
+      binop_opcode = kArmAnd;
+      no_output_opcode = kArmTst;
+      break;
+    case IrOpcode::kWord32Or:
+      binop_opcode = kArmOrr;
+      no_output_opcode = kArmOrr;
+      break;
+    case IrOpcode::kWord32Xor:
+      binop_opcode = kArmEor;
+      no_output_opcode = kArmTeq;
+      break;
+    default:
+      UNREACHABLE();
+  }
+  if (selector->CanCover(*node, binop)) {
+    // The comparison is the only user of {node}.
+    cont->Overwrite(MapForFlagSettingBinop(cond));
+    *opcode = no_output_opcode;
+    *node = binop;
+  } else if (selector->IsOnlyUserOfNodeInSameBlock(*node, binop)) {
+    // We can also handle the case where the {node} and the comparison are in
+    // the same basic block, and the comparison is the only user of {node} in
+    // this basic block ({node} has users in other basic blocks).
+    cont->Overwrite(MapForFlagSettingBinop(cond));
+    *opcode = binop_opcode;
+    *node = binop;
+  }
+}
+
+// Shared routine for multiple word compare operations.
+void VisitWordCompare(InstructionSelector* selector, Node* node,
+                      InstructionCode opcode, FlagsContinuation* cont) {
+  ArmOperandGenerator g(selector);
+  Int32BinopMatcher m(node);
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  bool has_result = (opcode != kArmCmp) && (opcode != kArmCmn) &&
+                    (opcode != kArmTst) && (opcode != kArmTeq);
+
+  if (TryMatchImmediateOrShift(selector, &opcode, m.right().node(),
+                               &input_count, &inputs[1])) {
+    inputs[0] = g.UseRegister(m.left().node());
+    input_count++;
+  } else if (TryMatchImmediateOrShift(selector, &opcode, m.left().node(),
+                                      &input_count, &inputs[1])) {
+    if (!node->op()->HasProperty(Operator::kCommutative)) cont->Commute();
+    inputs[0] = g.UseRegister(m.right().node());
+    input_count++;
+  } else {
+    opcode |= AddressingModeField::encode(kMode_Operand2_R);
+    inputs[input_count++] = g.UseRegister(m.left().node());
+    inputs[input_count++] = g.UseRegister(m.right().node());
+  }
+
+  if (has_result) {
+    if (cont->IsDeoptimize()) {
+      // If we can deoptimize as a result of the binop, we need to make sure
+      // that the deopt inputs are not overwritten by the binop result. One way
+      // to achieve that is to declare the output register as same-as-first.
+      outputs[output_count++] = g.DefineSameAsFirst(node);
+    } else {
+      outputs[output_count++] = g.DefineAsRegister(node);
+    }
+  }
+
+  DCHECK_NE(0u, input_count);
+  DCHECK_GE(arraysize(inputs), input_count);
+  DCHECK_GE(arraysize(outputs), output_count);
+
+  selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
+                                 inputs, cont);
+}
+
+void VisitWordCompare(InstructionSelector* selector, Node* node,
+                      FlagsContinuation* cont) {
+  InstructionCode opcode = kArmCmp;
+  Int32BinopMatcher m(node);
+
+  FlagsCondition cond = cont->condition();
+  if (m.right().Is(0) && (m.left().IsInt32Add() || m.left().IsWord32Or() ||
+                          m.left().IsWord32And() || m.left().IsWord32Xor())) {
+    // Emit flag setting instructions for comparisons against zero.
+    if (CanUseFlagSettingBinop(cond)) {
+      Node* binop = m.left().node();
+      MaybeReplaceCmpZeroWithFlagSettingBinop(selector, &node, binop, &opcode,
+                                              cond, cont);
+    }
+  } else if (m.left().Is(0) &&
+             (m.right().IsInt32Add() || m.right().IsWord32Or() ||
+              m.right().IsWord32And() || m.right().IsWord32Xor())) {
+    // Same as above, but we need to commute the condition before we
+    // continue with the rest of the checks.
+    cond = CommuteFlagsCondition(cond);
+    if (CanUseFlagSettingBinop(cond)) {
+      Node* binop = m.right().node();
+      MaybeReplaceCmpZeroWithFlagSettingBinop(selector, &node, binop, &opcode,
+                                              cond, cont);
+    }
+  }
+
+  VisitWordCompare(selector, node, opcode, cont);
+}
+
+}  // namespace
+
+// Shared routine for word comparisons against zero.
+void InstructionSelector::VisitWordCompareZero(Node* user, Node* value,
+                                               FlagsContinuation* cont) {
+  // Try to combine with comparisons against 0 by simply inverting the branch.
+  while (value->opcode() == IrOpcode::kWord32Equal && CanCover(user, value)) {
+    Int32BinopMatcher m(value);
+    if (!m.right().Is(0)) break;
+
+    user = value;
+    value = m.left().node();
+    cont->Negate();
+  }
+
+  if (CanCover(user, value)) {
+    switch (value->opcode()) {
+      case IrOpcode::kWord32Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitWordCompare(this, value, cont);
+      case IrOpcode::kInt32LessThan:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThan);
+        return VisitWordCompare(this, value, cont);
+      case IrOpcode::kInt32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
+        return VisitWordCompare(this, value, cont);
+      case IrOpcode::kUint32LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitWordCompare(this, value, cont);
+      case IrOpcode::kUint32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitWordCompare(this, value, cont);
+      case IrOpcode::kFloat32Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat32LessThan:
+        cont->OverwriteAndNegateIfEqual(kFloatLessThan);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kFloatLessThanOrEqual);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat64Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kFloat64LessThan:
+        cont->OverwriteAndNegateIfEqual(kFloatLessThan);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kFloat64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kFloatLessThanOrEqual);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kProjection:
+        // Check if this is the overflow output projection of an
+        // <Operation>WithOverflow node.
+        if (ProjectionIndexOf(value->op()) == 1u) {
+          // We cannot combine the <Operation>WithOverflow with this branch
+          // unless the 0th projection (the use of the actual value of the
+          // <Operation> is either nullptr, which means there's no use of the
+          // actual value, or was already defined, which means it is scheduled
+          // *AFTER* this branch).
+          Node* const node = value->InputAt(0);
+          Node* const result = NodeProperties::FindProjection(node, 0);
+          if (!result || IsDefined(result)) {
+            switch (node->opcode()) {
+              case IrOpcode::kInt32AddWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kArmAdd, kArmAdd, cont);
+              case IrOpcode::kInt32SubWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kArmSub, kArmRsb, cont);
+              case IrOpcode::kInt32MulWithOverflow:
+                // ARM doesn't set the overflow flag for multiplication, so we
+                // need to test on kNotEqual. Here is the code sequence used:
+                //   smull resultlow, resulthigh, left, right
+                //   cmp resulthigh, Operand(resultlow, ASR, 31)
+                cont->OverwriteAndNegateIfEqual(kNotEqual);
+                return EmitInt32MulWithOverflow(this, node, cont);
+              default:
+                break;
+            }
+          }
+        }
+        break;
+      case IrOpcode::kInt32Add:
+        return VisitWordCompare(this, value, kArmCmn, cont);
+      case IrOpcode::kInt32Sub:
+        return VisitWordCompare(this, value, kArmCmp, cont);
+      case IrOpcode::kWord32And:
+        return VisitWordCompare(this, value, kArmTst, cont);
+      case IrOpcode::kWord32Or:
+        return VisitBinop(this, value, kArmOrr, kArmOrr, cont);
+      case IrOpcode::kWord32Xor:
+        return VisitWordCompare(this, value, kArmTeq, cont);
+      case IrOpcode::kWord32Sar:
+        return VisitShift(this, value, TryMatchASR, cont);
+      case IrOpcode::kWord32Shl:
+        return VisitShift(this, value, TryMatchLSL, cont);
+      case IrOpcode::kWord32Shr:
+        return VisitShift(this, value, TryMatchLSR, cont);
+      case IrOpcode::kWord32Ror:
+        return VisitShift(this, value, TryMatchROR, cont);
+      case IrOpcode::kStackPointerGreaterThan:
+        cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition);
+        return VisitStackPointerGreaterThan(value, cont);
+      default:
+        break;
+    }
+  }
+
+  if (user->opcode() == IrOpcode::kWord32Equal) {
+    return VisitWordCompare(this, user, cont);
+  }
+
+  // Continuation could not be combined with a compare, emit compare against 0.
+  ArmOperandGenerator g(this);
+  InstructionCode const opcode =
+      kArmTst | AddressingModeField::encode(kMode_Operand2_R);
+  InstructionOperand const value_operand = g.UseRegister(value);
+  EmitWithContinuation(opcode, value_operand, value_operand, cont);
+}
+
+void InstructionSelector::VisitSwitch(Node* node, const SwitchInfo& sw) {
+  ArmOperandGenerator g(this);
+  InstructionOperand value_operand = g.UseRegister(node->InputAt(0));
+
+  // Emit either ArchTableSwitch or ArchBinarySearchSwitch.
+  if (enable_switch_jump_table_ == kEnableSwitchJumpTable) {
+    static const size_t kMaxTableSwitchValueRange = 2 << 16;
+    size_t table_space_cost = 4 + sw.value_range();
+    size_t table_time_cost = 3;
+    size_t lookup_space_cost = 3 + 2 * sw.case_count();
+    size_t lookup_time_cost = sw.case_count();
+    if (sw.case_count() > 0 &&
+        table_space_cost + 3 * table_time_cost <=
+            lookup_space_cost + 3 * lookup_time_cost &&
+        sw.min_value() > std::numeric_limits<int32_t>::min() &&
+        sw.value_range() <= kMaxTableSwitchValueRange) {
+      InstructionOperand index_operand = value_operand;
+      if (sw.min_value()) {
+        index_operand = g.TempRegister();
+        Emit(kArmSub | AddressingModeField::encode(kMode_Operand2_I),
+             index_operand, value_operand, g.TempImmediate(sw.min_value()));
+      }
+      // Generate a table lookup.
+      return EmitTableSwitch(sw, index_operand);
+    }
+  }
+
+  // Generate a tree of conditional jumps.
+  return EmitBinarySearchSwitch(sw, value_operand);
+}
+
+void InstructionSelector::VisitWord32Equal(Node* const node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  Int32BinopMatcher m(node);
+  if (m.right().Is(0)) {
+    return VisitWordCompareZero(m.node(), m.left().node(), &cont);
+  }
+  VisitWordCompare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
+  VisitWordCompare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
+  VisitWordCompare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitWordCompare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitWordCompare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32AddWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kArmAdd, kArmAdd, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kArmAdd, kArmAdd, &cont);
+}
+
+void InstructionSelector::VisitInt32SubWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kArmSub, kArmRsb, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kArmSub, kArmRsb, &cont);
+}
+
+void InstructionSelector::VisitInt32MulWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    // ARM doesn't set the overflow flag for multiplication, so we need to test
+    // on kNotEqual. Here is the code sequence used:
+    //   smull resultlow, resulthigh, left, right
+    //   cmp resulthigh, Operand(resultlow, ASR, 31)
+    FlagsContinuation cont = FlagsContinuation::ForSet(kNotEqual, ovf);
+    return EmitInt32MulWithOverflow(this, node, &cont);
+  }
+  FlagsContinuation cont;
+  EmitInt32MulWithOverflow(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32Equal(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kFloatLessThan, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kFloatLessThanOrEqual, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64Equal(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kFloatLessThan, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kFloatLessThanOrEqual, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64InsertLowWord32(Node* node) {
+  ArmOperandGenerator g(this);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  if (left->opcode() == IrOpcode::kFloat64InsertHighWord32 &&
+      CanCover(node, left)) {
+    left = left->InputAt(1);
+    Emit(kArmVmovF64U32U32, g.DefineAsRegister(node), g.UseRegister(right),
+         g.UseRegister(left));
+    return;
+  }
+  Emit(kArmVmovLowF64U32, g.DefineSameAsFirst(node), g.UseRegister(left),
+       g.UseRegister(right));
+}
+
+void InstructionSelector::VisitFloat64InsertHighWord32(Node* node) {
+  ArmOperandGenerator g(this);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  if (left->opcode() == IrOpcode::kFloat64InsertLowWord32 &&
+      CanCover(node, left)) {
+    left = left->InputAt(1);
+    Emit(kArmVmovF64U32U32, g.DefineAsRegister(node), g.UseRegister(left),
+         g.UseRegister(right));
+    return;
+  }
+  Emit(kArmVmovHighF64U32, g.DefineSameAsFirst(node), g.UseRegister(left),
+       g.UseRegister(right));
+}
+
+void InstructionSelector::VisitMemoryBarrier(Node* node) {
+  ArmOperandGenerator g(this);
+  Emit(kArmDmbIsh, g.NoOutput());
+}
+
+void InstructionSelector::VisitWord32AtomicLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  ArmOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  ArchOpcode opcode;
+  switch (load_rep.representation()) {
+    case MachineRepresentation::kWord8:
+      opcode =
+          load_rep.IsSigned() ? kWord32AtomicLoadInt8 : kWord32AtomicLoadUint8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = load_rep.IsSigned() ? kWord32AtomicLoadInt16
+                                   : kWord32AtomicLoadUint16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kWord32AtomicLoadWord32;
+      break;
+    default:
+      UNREACHABLE();
+  }
+  Emit(opcode | AddressingModeField::encode(kMode_Offset_RR),
+       g.DefineAsRegister(node), g.UseRegister(base), g.UseRegister(index));
+}
+
+void InstructionSelector::VisitWord32AtomicStore(Node* node) {
+  MachineRepresentation rep = AtomicStoreRepresentationOf(node->op());
+  ArmOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  ArchOpcode opcode;
+  switch (rep) {
+    case MachineRepresentation::kWord8:
+      opcode = kWord32AtomicStoreWord8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = kWord32AtomicStoreWord16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kWord32AtomicStoreWord32;
+      break;
+    default:
+      UNREACHABLE();
+  }
+
+  AddressingMode addressing_mode = kMode_Offset_RR;
+  InstructionOperand inputs[4];
+  size_t input_count = 0;
+  inputs[input_count++] = g.UseUniqueRegister(base);
+  inputs[input_count++] = g.UseUniqueRegister(index);
+  inputs[input_count++] = g.UseUniqueRegister(value);
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  Emit(code, 0, nullptr, input_count, inputs);
+}
+
+void InstructionSelector::VisitWord32AtomicExchange(Node* node) {
+  ArmOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Int8()) {
+    opcode = kWord32AtomicExchangeInt8;
+  } else if (type == MachineType::Uint8()) {
+    opcode = kWord32AtomicExchangeUint8;
+  } else if (type == MachineType::Int16()) {
+    opcode = kWord32AtomicExchangeInt16;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kWord32AtomicExchangeUint16;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = kWord32AtomicExchangeWord32;
+  } else {
+    UNREACHABLE();
+  }
+
+  AddressingMode addressing_mode = kMode_Offset_RR;
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  inputs[input_count++] = g.UseRegister(base);
+  inputs[input_count++] = g.UseRegister(index);
+  inputs[input_count++] = g.UseUniqueRegister(value);
+  InstructionOperand outputs[1];
+  outputs[0] = g.DefineAsRegister(node);
+  InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  Emit(code, 1, outputs, input_count, inputs, arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitWord32AtomicCompareExchange(Node* node) {
+  ArmOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* old_value = node->InputAt(2);
+  Node* new_value = node->InputAt(3);
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Int8()) {
+    opcode = kWord32AtomicCompareExchangeInt8;
+  } else if (type == MachineType::Uint8()) {
+    opcode = kWord32AtomicCompareExchangeUint8;
+  } else if (type == MachineType::Int16()) {
+    opcode = kWord32AtomicCompareExchangeInt16;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kWord32AtomicCompareExchangeUint16;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = kWord32AtomicCompareExchangeWord32;
+  } else {
+    UNREACHABLE();
+  }
+
+  AddressingMode addressing_mode = kMode_Offset_RR;
+  InstructionOperand inputs[4];
+  size_t input_count = 0;
+  inputs[input_count++] = g.UseRegister(base);
+  inputs[input_count++] = g.UseRegister(index);
+  inputs[input_count++] = g.UseUniqueRegister(old_value);
+  inputs[input_count++] = g.UseUniqueRegister(new_value);
+  InstructionOperand outputs[1];
+  outputs[0] = g.DefineAsRegister(node);
+  InstructionOperand temps[] = {g.TempRegister(), g.TempRegister(),
+                                g.TempRegister()};
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  Emit(code, 1, outputs, input_count, inputs, arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitWord32AtomicBinaryOperation(
+    Node* node, ArchOpcode int8_op, ArchOpcode uint8_op, ArchOpcode int16_op,
+    ArchOpcode uint16_op, ArchOpcode word32_op) {
+  ArmOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Int8()) {
+    opcode = int8_op;
+  } else if (type == MachineType::Uint8()) {
+    opcode = uint8_op;
+  } else if (type == MachineType::Int16()) {
+    opcode = int16_op;
+  } else if (type == MachineType::Uint16()) {
+    opcode = uint16_op;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = word32_op;
+  } else {
+    UNREACHABLE();
+  }
+
+  AddressingMode addressing_mode = kMode_Offset_RR;
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  inputs[input_count++] = g.UseRegister(base);
+  inputs[input_count++] = g.UseRegister(index);
+  inputs[input_count++] = g.UseUniqueRegister(value);
+  InstructionOperand outputs[1];
+  outputs[0] = g.DefineAsRegister(node);
+  InstructionOperand temps[] = {g.TempRegister(), g.TempRegister(),
+                                g.TempRegister()};
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  Emit(code, 1, outputs, input_count, inputs, arraysize(temps), temps);
+}
+
+#define VISIT_ATOMIC_BINOP(op)                                   \
+  void InstructionSelector::VisitWord32Atomic##op(Node* node) {  \
+    VisitWord32AtomicBinaryOperation(                            \
+        node, kWord32Atomic##op##Int8, kWord32Atomic##op##Uint8, \
+        kWord32Atomic##op##Int16, kWord32Atomic##op##Uint16,     \
+        kWord32Atomic##op##Word32);                              \
+  }
+VISIT_ATOMIC_BINOP(Add)
+VISIT_ATOMIC_BINOP(Sub)
+VISIT_ATOMIC_BINOP(And)
+VISIT_ATOMIC_BINOP(Or)
+VISIT_ATOMIC_BINOP(Xor)
+#undef VISIT_ATOMIC_BINOP
+
+void InstructionSelector::VisitWord32AtomicPairLoad(Node* node) {
+  ArmOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  inputs[input_count++] = g.UseUniqueRegister(base);
+  inputs[input_count++] = g.UseUniqueRegister(index);
+  InstructionOperand temps[1];
+  size_t temp_count = 0;
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+
+  Node* projection0 = NodeProperties::FindProjection(node, 0);
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  if (projection0 && projection1) {
+    outputs[output_count++] = g.DefineAsFixed(projection0, r0);
+    outputs[output_count++] = g.DefineAsFixed(projection1, r1);
+    temps[temp_count++] = g.TempRegister();
+  } else if (projection0) {
+    inputs[input_count++] = g.UseImmediate(0);
+    outputs[output_count++] = g.DefineAsRegister(projection0);
+  } else if (projection1) {
+    inputs[input_count++] = g.UseImmediate(4);
+    temps[temp_count++] = g.TempRegister();
+    outputs[output_count++] = g.DefineAsRegister(projection1);
+  } else {
+    // There is no use of the loaded value, we don't need to generate code.
+    return;
+  }
+  Emit(kArmWord32AtomicPairLoad, output_count, outputs, input_count, inputs,
+       temp_count, temps);
+}
+
+void InstructionSelector::VisitWord32AtomicPairStore(Node* node) {
+  ArmOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value_low = node->InputAt(2);
+  Node* value_high = node->InputAt(3);
+  AddressingMode addressing_mode = kMode_Offset_RR;
+  InstructionOperand inputs[] = {
+      g.UseUniqueRegister(base), g.UseUniqueRegister(index),
+      g.UseFixed(value_low, r2), g.UseFixed(value_high, r3)};
+  InstructionOperand temps[] = {g.TempRegister(), g.TempRegister(r0),
+                                g.TempRegister(r1)};
+  InstructionCode code =
+      kArmWord32AtomicPairStore | AddressingModeField::encode(addressing_mode);
+  Emit(code, 0, nullptr, arraysize(inputs), inputs, arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitWord32AtomicPairAdd(Node* node) {
+  VisitPairAtomicBinOp(this, node, kArmWord32AtomicPairAdd);
+}
+
+void InstructionSelector::VisitWord32AtomicPairSub(Node* node) {
+  VisitPairAtomicBinOp(this, node, kArmWord32AtomicPairSub);
+}
+
+void InstructionSelector::VisitWord32AtomicPairAnd(Node* node) {
+  VisitPairAtomicBinOp(this, node, kArmWord32AtomicPairAnd);
+}
+
+void InstructionSelector::VisitWord32AtomicPairOr(Node* node) {
+  VisitPairAtomicBinOp(this, node, kArmWord32AtomicPairOr);
+}
+
+void InstructionSelector::VisitWord32AtomicPairXor(Node* node) {
+  VisitPairAtomicBinOp(this, node, kArmWord32AtomicPairXor);
+}
+
+void InstructionSelector::VisitWord32AtomicPairExchange(Node* node) {
+  ArmOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  Node* value_high = node->InputAt(3);
+  AddressingMode addressing_mode = kMode_Offset_RR;
+  InstructionOperand inputs[] = {
+      g.UseFixed(value, r0), g.UseFixed(value_high, r1),
+      g.UseUniqueRegister(base), g.UseUniqueRegister(index)};
+  InstructionCode code = kArmWord32AtomicPairExchange |
+                         AddressingModeField::encode(addressing_mode);
+  Node* projection0 = NodeProperties::FindProjection(node, 0);
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  InstructionOperand temps[4];
+  size_t temp_count = 0;
+  temps[temp_count++] = g.TempRegister();
+  temps[temp_count++] = g.TempRegister();
+  if (projection0) {
+    outputs[output_count++] = g.DefineAsFixed(projection0, r6);
+  } else {
+    temps[temp_count++] = g.TempRegister(r6);
+  }
+  if (projection1) {
+    outputs[output_count++] = g.DefineAsFixed(projection1, r7);
+  } else {
+    temps[temp_count++] = g.TempRegister(r7);
+  }
+  Emit(code, output_count, outputs, arraysize(inputs), inputs, temp_count,
+       temps);
+}
+
+void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
+  ArmOperandGenerator g(this);
+  AddressingMode addressing_mode = kMode_Offset_RR;
+  InstructionOperand inputs[] = {g.UseFixed(node->InputAt(2), r4),
+                                 g.UseFixed(node->InputAt(3), r5),
+                                 g.UseFixed(node->InputAt(4), r8),
+                                 g.UseFixed(node->InputAt(5), r9),
+                                 g.UseUniqueRegister(node->InputAt(0)),
+                                 g.UseUniqueRegister(node->InputAt(1))};
+  InstructionCode code = kArmWord32AtomicPairCompareExchange |
+                         AddressingModeField::encode(addressing_mode);
+  Node* projection0 = NodeProperties::FindProjection(node, 0);
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  InstructionOperand temps[4];
+  size_t temp_count = 0;
+  temps[temp_count++] = g.TempRegister();
+  temps[temp_count++] = g.TempRegister();
+  if (projection0) {
+    outputs[output_count++] = g.DefineAsFixed(projection0, r2);
+  } else {
+    temps[temp_count++] = g.TempRegister(r2);
+  }
+  if (projection1) {
+    outputs[output_count++] = g.DefineAsFixed(projection1, r3);
+  } else {
+    temps[temp_count++] = g.TempRegister(r3);
+  }
+  Emit(code, output_count, outputs, arraysize(inputs), inputs, temp_count,
+       temps);
+}
+
+#define SIMD_TYPE_LIST(V) \
+  V(F32x4)                \
+  V(I32x4)                \
+  V(I16x8)                \
+  V(I8x16)
+
+#define SIMD_UNOP_LIST(V)                               \
+  V(F64x2Abs, kArmF64x2Abs)                             \
+  V(F64x2Neg, kArmF64x2Neg)                             \
+  V(F64x2Sqrt, kArmF64x2Sqrt)                           \
+  V(F32x4SConvertI32x4, kArmF32x4SConvertI32x4)         \
+  V(F32x4UConvertI32x4, kArmF32x4UConvertI32x4)         \
+  V(F32x4Abs, kArmF32x4Abs)                             \
+  V(F32x4Neg, kArmF32x4Neg)                             \
+  V(F32x4RecipApprox, kArmF32x4RecipApprox)             \
+  V(F32x4RecipSqrtApprox, kArmF32x4RecipSqrtApprox)     \
+  V(I32x4SConvertF32x4, kArmI32x4SConvertF32x4)         \
+  V(I32x4SConvertI16x8Low, kArmI32x4SConvertI16x8Low)   \
+  V(I32x4SConvertI16x8High, kArmI32x4SConvertI16x8High) \
+  V(I32x4Neg, kArmI32x4Neg)                             \
+  V(I32x4UConvertF32x4, kArmI32x4UConvertF32x4)         \
+  V(I32x4UConvertI16x8Low, kArmI32x4UConvertI16x8Low)   \
+  V(I32x4UConvertI16x8High, kArmI32x4UConvertI16x8High) \
+  V(I32x4Abs, kArmI32x4Abs)                             \
+  V(I16x8SConvertI8x16Low, kArmI16x8SConvertI8x16Low)   \
+  V(I16x8SConvertI8x16High, kArmI16x8SConvertI8x16High) \
+  V(I16x8Neg, kArmI16x8Neg)                             \
+  V(I16x8UConvertI8x16Low, kArmI16x8UConvertI8x16Low)   \
+  V(I16x8UConvertI8x16High, kArmI16x8UConvertI8x16High) \
+  V(I16x8Abs, kArmI16x8Abs)                             \
+  V(I8x16Neg, kArmI8x16Neg)                             \
+  V(I8x16Abs, kArmI8x16Abs)                             \
+  V(S128Not, kArmS128Not)                               \
+  V(V32x4AnyTrue, kArmV32x4AnyTrue)                     \
+  V(V32x4AllTrue, kArmV32x4AllTrue)                     \
+  V(V16x8AnyTrue, kArmV16x8AnyTrue)                     \
+  V(V16x8AllTrue, kArmV16x8AllTrue)                     \
+  V(V8x16AnyTrue, kArmV8x16AnyTrue)                     \
+  V(V8x16AllTrue, kArmV8x16AllTrue)
+
+#define SIMD_SHIFT_OP_LIST(V) \
+  V(I64x2Shl, 64)             \
+  V(I64x2ShrS, 64)            \
+  V(I64x2ShrU, 64)            \
+  V(I32x4Shl, 32)             \
+  V(I32x4ShrS, 32)            \
+  V(I32x4ShrU, 32)            \
+  V(I16x8Shl, 16)             \
+  V(I16x8ShrS, 16)            \
+  V(I16x8ShrU, 16)            \
+  V(I8x16Shl, 8)              \
+  V(I8x16ShrS, 8)             \
+  V(I8x16ShrU, 8)
+
+#define SIMD_BINOP_LIST(V)                            \
+  V(F64x2Add, kArmF64x2Add)                           \
+  V(F64x2Sub, kArmF64x2Sub)                           \
+  V(F64x2Mul, kArmF64x2Mul)                           \
+  V(F64x2Div, kArmF64x2Div)                           \
+  V(F64x2Min, kArmF64x2Min)                           \
+  V(F64x2Max, kArmF64x2Max)                           \
+  V(F64x2Eq, kArmF64x2Eq)                             \
+  V(F64x2Ne, kArmF64x2Ne)                             \
+  V(F64x2Lt, kArmF64x2Lt)                             \
+  V(F64x2Le, kArmF64x2Le)                             \
+  V(F32x4Add, kArmF32x4Add)                           \
+  V(F32x4AddHoriz, kArmF32x4AddHoriz)                 \
+  V(F32x4Sub, kArmF32x4Sub)                           \
+  V(F32x4Mul, kArmF32x4Mul)                           \
+  V(F32x4Min, kArmF32x4Min)                           \
+  V(F32x4Max, kArmF32x4Max)                           \
+  V(F32x4Eq, kArmF32x4Eq)                             \
+  V(F32x4Ne, kArmF32x4Ne)                             \
+  V(F32x4Lt, kArmF32x4Lt)                             \
+  V(F32x4Le, kArmF32x4Le)                             \
+  V(I64x2Add, kArmI64x2Add)                           \
+  V(I64x2Sub, kArmI64x2Sub)                           \
+  V(I32x4Add, kArmI32x4Add)                           \
+  V(I32x4AddHoriz, kArmI32x4AddHoriz)                 \
+  V(I32x4Sub, kArmI32x4Sub)                           \
+  V(I32x4Mul, kArmI32x4Mul)                           \
+  V(I32x4MinS, kArmI32x4MinS)                         \
+  V(I32x4MaxS, kArmI32x4MaxS)                         \
+  V(I32x4Eq, kArmI32x4Eq)                             \
+  V(I32x4Ne, kArmI32x4Ne)                             \
+  V(I32x4GtS, kArmI32x4GtS)                           \
+  V(I32x4GeS, kArmI32x4GeS)                           \
+  V(I32x4MinU, kArmI32x4MinU)                         \
+  V(I32x4MaxU, kArmI32x4MaxU)                         \
+  V(I32x4GtU, kArmI32x4GtU)                           \
+  V(I32x4GeU, kArmI32x4GeU)                           \
+  V(I16x8SConvertI32x4, kArmI16x8SConvertI32x4)       \
+  V(I16x8Add, kArmI16x8Add)                           \
+  V(I16x8AddSatS, kArmI16x8AddSatS)                   \
+  V(I16x8AddHoriz, kArmI16x8AddHoriz)                 \
+  V(I16x8Sub, kArmI16x8Sub)                           \
+  V(I16x8SubSatS, kArmI16x8SubSatS)                   \
+  V(I16x8Mul, kArmI16x8Mul)                           \
+  V(I16x8MinS, kArmI16x8MinS)                         \
+  V(I16x8MaxS, kArmI16x8MaxS)                         \
+  V(I16x8Eq, kArmI16x8Eq)                             \
+  V(I16x8Ne, kArmI16x8Ne)                             \
+  V(I16x8GtS, kArmI16x8GtS)                           \
+  V(I16x8GeS, kArmI16x8GeS)                           \
+  V(I16x8UConvertI32x4, kArmI16x8UConvertI32x4)       \
+  V(I16x8AddSatU, kArmI16x8AddSatU)                   \
+  V(I16x8SubSatU, kArmI16x8SubSatU)                   \
+  V(I16x8MinU, kArmI16x8MinU)                         \
+  V(I16x8MaxU, kArmI16x8MaxU)                         \
+  V(I16x8GtU, kArmI16x8GtU)                           \
+  V(I16x8GeU, kArmI16x8GeU)                           \
+  V(I16x8RoundingAverageU, kArmI16x8RoundingAverageU) \
+  V(I8x16SConvertI16x8, kArmI8x16SConvertI16x8)       \
+  V(I8x16Add, kArmI8x16Add)                           \
+  V(I8x16AddSatS, kArmI8x16AddSatS)                   \
+  V(I8x16Sub, kArmI8x16Sub)                           \
+  V(I8x16SubSatS, kArmI8x16SubSatS)                   \
+  V(I8x16Mul, kArmI8x16Mul)                           \
+  V(I8x16MinS, kArmI8x16MinS)                         \
+  V(I8x16MaxS, kArmI8x16MaxS)                         \
+  V(I8x16Eq, kArmI8x16Eq)                             \
+  V(I8x16Ne, kArmI8x16Ne)                             \
+  V(I8x16GtS, kArmI8x16GtS)                           \
+  V(I8x16GeS, kArmI8x16GeS)                           \
+  V(I8x16UConvertI16x8, kArmI8x16UConvertI16x8)       \
+  V(I8x16AddSatU, kArmI8x16AddSatU)                   \
+  V(I8x16SubSatU, kArmI8x16SubSatU)                   \
+  V(I8x16MinU, kArmI8x16MinU)                         \
+  V(I8x16MaxU, kArmI8x16MaxU)                         \
+  V(I8x16GtU, kArmI8x16GtU)                           \
+  V(I8x16GeU, kArmI8x16GeU)                           \
+  V(I8x16RoundingAverageU, kArmI8x16RoundingAverageU) \
+  V(S128And, kArmS128And)                             \
+  V(S128Or, kArmS128Or)                               \
+  V(S128Xor, kArmS128Xor)                             \
+  V(S128AndNot, kArmS128AndNot)
+
+void InstructionSelector::VisitI32x4DotI16x8S(Node* node) {
+  ArmOperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  Emit(kArmI32x4DotI16x8S, g.DefineAsRegister(node),
+       g.UseUniqueRegister(node->InputAt(0)),
+       g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitS128Const(Node* node) {
+  ArmOperandGenerator g(this);
+  uint32_t val[kSimd128Size / sizeof(uint32_t)];
+  memcpy(val, S128ImmediateParameterOf(node->op()).data(), kSimd128Size);
+  // If all bytes are zeros, avoid emitting code for generic constants.
+  bool all_zeros = !(val[0] || val[1] || val[2] || val[3]);
+  bool all_ones = val[0] == UINT32_MAX && val[1] == UINT32_MAX &&
+                  val[2] == UINT32_MAX && val[3] == UINT32_MAX;
+  InstructionOperand dst = g.DefineAsRegister(node);
+  if (all_zeros) {
+    Emit(kArmS128Zero, dst);
+  } else if (all_ones) {
+    Emit(kArmS128AllOnes, dst);
+  } else {
+    Emit(kArmS128Const, dst, g.UseImmediate(val[0]), g.UseImmediate(val[1]),
+         g.UseImmediate(val[2]), g.UseImmediate(val[3]));
+  }
+}
+
+void InstructionSelector::VisitS128Zero(Node* node) {
+  ArmOperandGenerator g(this);
+  Emit(kArmS128Zero, g.DefineAsRegister(node));
+}
+
+#define SIMD_VISIT_SPLAT(Type)                               \
+  void InstructionSelector::Visit##Type##Splat(Node* node) { \
+    VisitRR(this, kArm##Type##Splat, node);                  \
+  }
+SIMD_TYPE_LIST(SIMD_VISIT_SPLAT)
+SIMD_VISIT_SPLAT(F64x2)
+#undef SIMD_VISIT_SPLAT
+
+#define SIMD_VISIT_EXTRACT_LANE(Type, Sign)                              \
+  void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \
+    VisitRRI(this, kArm##Type##ExtractLane##Sign, node);                 \
+  }
+SIMD_VISIT_EXTRACT_LANE(F64x2, )
+SIMD_VISIT_EXTRACT_LANE(F32x4, )
+SIMD_VISIT_EXTRACT_LANE(I32x4, )
+SIMD_VISIT_EXTRACT_LANE(I16x8, U)
+SIMD_VISIT_EXTRACT_LANE(I16x8, S)
+SIMD_VISIT_EXTRACT_LANE(I8x16, U)
+SIMD_VISIT_EXTRACT_LANE(I8x16, S)
+#undef SIMD_VISIT_EXTRACT_LANE
+
+#define SIMD_VISIT_REPLACE_LANE(Type)                              \
+  void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \
+    VisitRRIR(this, kArm##Type##ReplaceLane, node);                \
+  }
+SIMD_TYPE_LIST(SIMD_VISIT_REPLACE_LANE)
+SIMD_VISIT_REPLACE_LANE(F64x2)
+#undef SIMD_VISIT_REPLACE_LANE
+#undef SIMD_TYPE_LIST
+
+#define SIMD_VISIT_UNOP(Name, instruction)            \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRR(this, instruction, node);                 \
+  }
+SIMD_UNOP_LIST(SIMD_VISIT_UNOP)
+#undef SIMD_VISIT_UNOP
+#undef SIMD_UNOP_LIST
+
+#define SIMD_VISIT_SHIFT_OP(Name, width)              \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitSimdShiftRRR(this, kArm##Name, node, width); \
+  }
+SIMD_SHIFT_OP_LIST(SIMD_VISIT_SHIFT_OP)
+#undef SIMD_VISIT_SHIFT_OP
+#undef SIMD_SHIFT_OP_LIST
+
+#define SIMD_VISIT_BINOP(Name, instruction)           \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRRR(this, instruction, node);                \
+  }
+SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
+#undef SIMD_VISIT_BINOP
+#undef SIMD_BINOP_LIST
+
+void InstructionSelector::VisitI64x2SplatI32Pair(Node* node) {
+  ArmOperandGenerator g(this);
+  InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
+  InstructionOperand operand1 = g.UseRegister(node->InputAt(1));
+  Emit(kArmI64x2SplatI32Pair, g.DefineAsRegister(node), operand0, operand1);
+}
+
+void InstructionSelector::VisitI64x2ReplaceLaneI32Pair(Node* node) {
+  ArmOperandGenerator g(this);
+  InstructionOperand operand = g.UseRegister(node->InputAt(0));
+  InstructionOperand lane = g.UseImmediate(OpParameter<int32_t>(node->op()));
+  InstructionOperand low = g.UseRegister(node->InputAt(1));
+  InstructionOperand high = g.UseRegister(node->InputAt(2));
+  Emit(kArmI64x2ReplaceLaneI32Pair, g.DefineSameAsFirst(node), operand, lane,
+       low, high);
+}
+
+void InstructionSelector::VisitI64x2Neg(Node* node) {
+  ArmOperandGenerator g(this);
+  Emit(kArmI64x2Neg, g.DefineAsRegister(node),
+       g.UseUniqueRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitI64x2Mul(Node* node) {
+  ArmOperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register(),
+                                g.TempSimd128Register()};
+  Emit(kArmI64x2Mul, g.DefineAsRegister(node),
+       g.UseUniqueRegister(node->InputAt(0)),
+       g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitF32x4Sqrt(Node* node) {
+  ArmOperandGenerator g(this);
+  // Use fixed registers in the lower 8 Q-registers so we can directly access
+  // mapped registers S0-S31.
+  Emit(kArmF32x4Sqrt, g.DefineAsFixed(node, q0),
+       g.UseFixed(node->InputAt(0), q0));
+}
+
+void InstructionSelector::VisitF32x4Div(Node* node) {
+  ArmOperandGenerator g(this);
+  // Use fixed registers in the lower 8 Q-registers so we can directly access
+  // mapped registers S0-S31.
+  Emit(kArmF32x4Div, g.DefineAsFixed(node, q0),
+       g.UseFixed(node->InputAt(0), q0), g.UseFixed(node->InputAt(1), q1));
+}
+
+void InstructionSelector::VisitS128Select(Node* node) {
+  ArmOperandGenerator g(this);
+  Emit(kArmS128Select, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
+       g.UseRegister(node->InputAt(2)));
+}
+
+namespace {
+
+struct ShuffleEntry {
+  uint8_t shuffle[kSimd128Size];
+  ArchOpcode opcode;
+};
+
+static const ShuffleEntry arch_shuffles[] = {
+    {{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23},
+     kArmS32x4ZipLeft},
+    {{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31},
+     kArmS32x4ZipRight},
+    {{0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27},
+     kArmS32x4UnzipLeft},
+    {{4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31},
+     kArmS32x4UnzipRight},
+    {{0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27},
+     kArmS32x4TransposeLeft},
+    {{4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31},
+     kArmS32x4TransposeRight},
+    {{4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}, kArmS32x2Reverse},
+
+    {{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23},
+     kArmS16x8ZipLeft},
+    {{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31},
+     kArmS16x8ZipRight},
+    {{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29},
+     kArmS16x8UnzipLeft},
+    {{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31},
+     kArmS16x8UnzipRight},
+    {{0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29},
+     kArmS16x8TransposeLeft},
+    {{2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31},
+     kArmS16x8TransposeRight},
+    {{6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9}, kArmS16x4Reverse},
+    {{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}, kArmS16x2Reverse},
+
+    {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
+     kArmS8x16ZipLeft},
+    {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31},
+     kArmS8x16ZipRight},
+    {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30},
+     kArmS8x16UnzipLeft},
+    {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31},
+     kArmS8x16UnzipRight},
+    {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30},
+     kArmS8x16TransposeLeft},
+    {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31},
+     kArmS8x16TransposeRight},
+    {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}, kArmS8x8Reverse},
+    {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kArmS8x4Reverse},
+    {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse}};
+
+bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
+                         size_t num_entries, bool is_swizzle,
+                         ArchOpcode* opcode) {
+  uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
+  for (size_t i = 0; i < num_entries; ++i) {
+    const ShuffleEntry& entry = table[i];
+    int j = 0;
+    for (; j < kSimd128Size; ++j) {
+      if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) {
+        break;
+      }
+    }
+    if (j == kSimd128Size) {
+      *opcode = entry.opcode;
+      return true;
+    }
+  }
+  return false;
+}
+
+void ArrangeShuffleTable(ArmOperandGenerator* g, Node* input0, Node* input1,
+                         InstructionOperand* src0, InstructionOperand* src1) {
+  if (input0 == input1) {
+    // Unary, any q-register can be the table.
+    *src0 = *src1 = g->UseRegister(input0);
+  } else {
+    // Binary, table registers must be consecutive.
+    *src0 = g->UseFixed(input0, q0);
+    *src1 = g->UseFixed(input1, q1);
+  }
+}
+
+}  // namespace
+
+void InstructionSelector::VisitI8x16Shuffle(Node* node) {
+  uint8_t shuffle[kSimd128Size];
+  bool is_swizzle;
+  CanonicalizeShuffle(node, shuffle, &is_swizzle);
+  Node* input0 = node->InputAt(0);
+  Node* input1 = node->InputAt(1);
+  uint8_t shuffle32x4[4];
+  ArmOperandGenerator g(this);
+  int index = 0;
+  if (wasm::SimdShuffle::TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
+    if (wasm::SimdShuffle::TryMatchSplat<4>(shuffle, &index)) {
+      DCHECK_GT(4, index);
+      Emit(kArmS128Dup, g.DefineAsRegister(node), g.UseRegister(input0),
+           g.UseImmediate(Neon32), g.UseImmediate(index % 4));
+    } else if (wasm::SimdShuffle::TryMatchIdentity(shuffle)) {
+      EmitIdentity(node);
+    } else {
+      // 32x4 shuffles are implemented as s-register moves. To simplify these,
+      // make sure the destination is distinct from both sources.
+      InstructionOperand src0 = g.UseUniqueRegister(input0);
+      InstructionOperand src1 = is_swizzle ? src0 : g.UseUniqueRegister(input1);
+      Emit(kArmS32x4Shuffle, g.DefineAsRegister(node), src0, src1,
+           g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle32x4)));
+    }
+    return;
+  }
+  if (wasm::SimdShuffle::TryMatchSplat<8>(shuffle, &index)) {
+    DCHECK_GT(8, index);
+    Emit(kArmS128Dup, g.DefineAsRegister(node), g.UseRegister(input0),
+         g.UseImmediate(Neon16), g.UseImmediate(index % 8));
+    return;
+  }
+  if (wasm::SimdShuffle::TryMatchSplat<16>(shuffle, &index)) {
+    DCHECK_GT(16, index);
+    Emit(kArmS128Dup, g.DefineAsRegister(node), g.UseRegister(input0),
+         g.UseImmediate(Neon8), g.UseImmediate(index % 16));
+    return;
+  }
+  ArchOpcode opcode;
+  if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),
+                          is_swizzle, &opcode)) {
+    VisitRRRShuffle(this, opcode, node);
+    return;
+  }
+  uint8_t offset;
+  if (wasm::SimdShuffle::TryMatchConcat(shuffle, &offset)) {
+    Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0),
+         g.UseRegister(input1), g.UseImmediate(offset));
+    return;
+  }
+  // Code generator uses vtbl, arrange sources to form a valid lookup table.
+  InstructionOperand src0, src1;
+  ArrangeShuffleTable(&g, input0, input1, &src0, &src1);
+  Emit(kArmI8x16Shuffle, g.DefineAsRegister(node), src0, src1,
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 4)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 8)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 12)));
+}
+
+void InstructionSelector::VisitI8x16Swizzle(Node* node) {
+  ArmOperandGenerator g(this);
+  // We don't want input 0 (the table) to be the same as output, since we will
+  // modify output twice (low and high), and need to keep the table the same.
+  Emit(kArmI8x16Swizzle, g.DefineAsRegister(node),
+       g.UseUniqueRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
+}
+
+void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) {
+  ArmOperandGenerator g(this);
+  Emit(kArmSxtb, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)),
+       g.TempImmediate(0));
+}
+
+void InstructionSelector::VisitSignExtendWord16ToInt32(Node* node) {
+  ArmOperandGenerator g(this);
+  Emit(kArmSxth, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)),
+       g.TempImmediate(0));
+}
+
+void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
+  UNREACHABLE();
+}
+
+namespace {
+template <ArchOpcode opcode>
+void VisitBitMask(InstructionSelector* selector, Node* node) {
+  ArmOperandGenerator g(selector);
+  InstructionOperand temps[] = {g.TempSimd128Register(),
+                                g.TempSimd128Register()};
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
+}
+}  // namespace
+
+void InstructionSelector::VisitI8x16BitMask(Node* node) {
+  VisitBitMask<kArmI8x16BitMask>(this, node);
+}
+
+void InstructionSelector::VisitI16x8BitMask(Node* node) {
+  VisitBitMask<kArmI16x8BitMask>(this, node);
+}
+
+void InstructionSelector::VisitI32x4BitMask(Node* node) {
+  VisitBitMask<kArmI32x4BitMask>(this, node);
+}
+
+namespace {
+void VisitF32x4PminOrPmax(InstructionSelector* selector, ArchOpcode opcode,
+                          Node* node) {
+  ArmOperandGenerator g(selector);
+  // Need all unique registers because we first compare the two inputs, then we
+  // need the inputs to remain unchanged for the bitselect later.
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseUniqueRegister(node->InputAt(0)),
+                 g.UseUniqueRegister(node->InputAt(1)));
+}
+
+void VisitF64x2PminOrPMax(InstructionSelector* selector, ArchOpcode opcode,
+                          Node* node) {
+  ArmOperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineSameAsFirst(node),
+                 g.UseRegister(node->InputAt(0)),
+                 g.UseRegister(node->InputAt(1)));
+}
+}  // namespace
+
+void InstructionSelector::VisitF32x4Pmin(Node* node) {
+  VisitF32x4PminOrPmax(this, kArmF32x4Pmin, node);
+}
+
+void InstructionSelector::VisitF32x4Pmax(Node* node) {
+  VisitF32x4PminOrPmax(this, kArmF32x4Pmax, node);
+}
+
+void InstructionSelector::VisitF64x2Pmin(Node* node) {
+  VisitF64x2PminOrPMax(this, kArmF64x2Pmin, node);
+}
+
+void InstructionSelector::VisitF64x2Pmax(Node* node) {
+  VisitF64x2PminOrPMax(this, kArmF64x2Pmax, node);
+}
+
+void InstructionSelector::VisitTruncateFloat32ToInt32(Node* node) {
+  ArmOperandGenerator g(this);
+
+  InstructionCode opcode = kArmVcvtS32F32;
+  TruncateKind kind = OpParameter<TruncateKind>(node->op());
+  if (kind == TruncateKind::kSetOverflowToMin) {
+    opcode |= MiscField::encode(true);
+  }
+
+  Emit(opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitTruncateFloat32ToUint32(Node* node) {
+  ArmOperandGenerator g(this);
+
+  InstructionCode opcode = kArmVcvtU32F32;
+  TruncateKind kind = OpParameter<TruncateKind>(node->op());
+  if (kind == TruncateKind::kSetOverflowToMin) {
+    opcode |= MiscField::encode(true);
+  }
+
+  Emit(opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+// static
+MachineOperatorBuilder::Flags
+InstructionSelector::SupportedMachineOperatorFlags() {
+  MachineOperatorBuilder::Flags flags = MachineOperatorBuilder::kNoFlags;
+  if (CpuFeatures::IsSupported(SUDIV)) {
+    // The sdiv and udiv instructions correctly return 0 if the divisor is 0,
+    // but the fall-back implementation does not.
+    flags |= MachineOperatorBuilder::kInt32DivIsSafe |
+             MachineOperatorBuilder::kUint32DivIsSafe;
+  }
+  if (CpuFeatures::IsSupported(ARMv7)) {
+    flags |= MachineOperatorBuilder::kWord32ReverseBits;
+  }
+  if (CpuFeatures::IsSupported(ARMv8)) {
+    flags |= MachineOperatorBuilder::kFloat32RoundDown |
+             MachineOperatorBuilder::kFloat64RoundDown |
+             MachineOperatorBuilder::kFloat32RoundUp |
+             MachineOperatorBuilder::kFloat64RoundUp |
+             MachineOperatorBuilder::kFloat32RoundTruncate |
+             MachineOperatorBuilder::kFloat64RoundTruncate |
+             MachineOperatorBuilder::kFloat64RoundTiesAway |
+             MachineOperatorBuilder::kFloat32RoundTiesEven |
+             MachineOperatorBuilder::kFloat64RoundTiesEven;
+  }
+  flags |= MachineOperatorBuilder::kSatConversionIsSafe;
+  return flags;
+}
+
+// static
+MachineOperatorBuilder::AlignmentRequirements
+InstructionSelector::AlignmentRequirements() {
+  base::EnumSet<MachineRepresentation> req_aligned;
+  req_aligned.Add(MachineRepresentation::kFloat32);
+  req_aligned.Add(MachineRepresentation::kFloat64);
+  return MachineOperatorBuilder::AlignmentRequirements::
+      SomeUnalignedAccessUnsupported(req_aligned, req_aligned);
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/arm/unwinding-info-writer-arm.cc b/src/compiler/backend/arm/unwinding-info-writer-arm.cc
new file mode 100644
index 0000000..be43c18
--- /dev/null
+++ b/src/compiler/backend/arm/unwinding-info-writer-arm.cc
@@ -0,0 +1,102 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/arm/unwinding-info-writer-arm.h"
+#include "src/compiler/backend/instruction.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+void UnwindingInfoWriter::BeginInstructionBlock(int pc_offset,
+                                                const InstructionBlock* block) {
+  if (!enabled()) return;
+
+  block_will_exit_ = false;
+
+  DCHECK_LT(block->rpo_number().ToInt(),
+            static_cast<int>(block_initial_states_.size()));
+  const BlockInitialState* initial_state =
+      block_initial_states_[block->rpo_number().ToInt()];
+  if (!initial_state) return;
+  if (initial_state->saved_lr_ != saved_lr_) {
+    eh_frame_writer_.AdvanceLocation(pc_offset);
+    if (initial_state->saved_lr_) {
+      eh_frame_writer_.RecordRegisterSavedToStack(lr, kSystemPointerSize);
+    } else {
+      eh_frame_writer_.RecordRegisterFollowsInitialRule(lr);
+    }
+    saved_lr_ = initial_state->saved_lr_;
+  }
+}
+
+void UnwindingInfoWriter::EndInstructionBlock(const InstructionBlock* block) {
+  if (!enabled() || block_will_exit_) return;
+
+  for (const RpoNumber& successor : block->successors()) {
+    int successor_index = successor.ToInt();
+    DCHECK_LT(successor_index, static_cast<int>(block_initial_states_.size()));
+    const BlockInitialState* existing_state =
+        block_initial_states_[successor_index];
+
+    // If we already had an entry for this BB, check that the values are the
+    // same we are trying to insert.
+    if (existing_state) {
+      DCHECK_EQ(existing_state->saved_lr_, saved_lr_);
+    } else {
+      block_initial_states_[successor_index] =
+          zone_->New<BlockInitialState>(saved_lr_);
+    }
+  }
+}
+
+void UnwindingInfoWriter::MarkFrameConstructed(int at_pc) {
+  if (!enabled()) return;
+
+  // Regardless of the type of frame constructed, the relevant part of the
+  // layout is always the one in the diagram:
+  //
+  // |   ....   |         higher addresses
+  // +----------+               ^
+  // |    LR    |               |            |
+  // +----------+               |            |
+  // | saved FP |               |            |
+  // +----------+ <-- FP                     v
+  // |   ....   |                       stack growth
+  //
+  // The LR is pushed on the stack, and we can record this fact at the end of
+  // the construction, since the LR itself is not modified in the process.
+  eh_frame_writer_.AdvanceLocation(at_pc);
+  eh_frame_writer_.RecordRegisterSavedToStack(lr, kSystemPointerSize);
+  saved_lr_ = true;
+}
+
+void UnwindingInfoWriter::MarkFrameDeconstructed(int at_pc) {
+  if (!enabled()) return;
+
+  // The lr is restored by the last operation in LeaveFrame().
+  eh_frame_writer_.AdvanceLocation(at_pc);
+  eh_frame_writer_.RecordRegisterFollowsInitialRule(lr);
+  saved_lr_ = false;
+}
+
+void UnwindingInfoWriter::MarkLinkRegisterOnTopOfStack(int pc_offset) {
+  if (!enabled()) return;
+
+  eh_frame_writer_.AdvanceLocation(pc_offset);
+  eh_frame_writer_.SetBaseAddressRegisterAndOffset(sp, 0);
+  eh_frame_writer_.RecordRegisterSavedToStack(lr, 0);
+}
+
+void UnwindingInfoWriter::MarkPopLinkRegisterFromTopOfStack(int pc_offset) {
+  if (!enabled()) return;
+
+  eh_frame_writer_.AdvanceLocation(pc_offset);
+  eh_frame_writer_.SetBaseAddressRegisterAndOffset(fp, 0);
+  eh_frame_writer_.RecordRegisterFollowsInitialRule(lr);
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/arm/unwinding-info-writer-arm.h b/src/compiler/backend/arm/unwinding-info-writer-arm.h
new file mode 100644
index 0000000..6b9ade0
--- /dev/null
+++ b/src/compiler/backend/arm/unwinding-info-writer-arm.h
@@ -0,0 +1,73 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_ARM_UNWINDING_INFO_WRITER_ARM_H_
+#define V8_COMPILER_BACKEND_ARM_UNWINDING_INFO_WRITER_ARM_H_
+
+#include "src/diagnostics/eh-frame.h"
+#include "src/flags/flags.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+class InstructionBlock;
+
+class UnwindingInfoWriter {
+ public:
+  explicit UnwindingInfoWriter(Zone* zone)
+      : zone_(zone),
+        eh_frame_writer_(zone),
+        saved_lr_(false),
+        block_will_exit_(false),
+        block_initial_states_(zone) {
+    if (enabled()) eh_frame_writer_.Initialize();
+  }
+
+  void SetNumberOfInstructionBlocks(int number) {
+    if (enabled()) block_initial_states_.resize(number);
+  }
+
+  void BeginInstructionBlock(int pc_offset, const InstructionBlock* block);
+  void EndInstructionBlock(const InstructionBlock* block);
+
+  void MarkLinkRegisterOnTopOfStack(int pc_offset);
+  void MarkPopLinkRegisterFromTopOfStack(int pc_offset);
+
+  void MarkFrameConstructed(int at_pc);
+  void MarkFrameDeconstructed(int at_pc);
+
+  void MarkBlockWillExit() { block_will_exit_ = true; }
+
+  void Finish(int code_size) {
+    if (enabled()) eh_frame_writer_.Finish(code_size);
+  }
+
+  EhFrameWriter* eh_frame_writer() {
+    return enabled() ? &eh_frame_writer_ : nullptr;
+  }
+
+ private:
+  bool enabled() const { return FLAG_perf_prof_unwinding_info; }
+
+  class BlockInitialState : public ZoneObject {
+   public:
+    explicit BlockInitialState(bool saved_lr) : saved_lr_(saved_lr) {}
+
+    bool saved_lr_;
+  };
+
+  Zone* zone_;
+  EhFrameWriter eh_frame_writer_;
+  bool saved_lr_;
+  bool block_will_exit_;
+
+  ZoneVector<const BlockInitialState*> block_initial_states_;
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_ARM_UNWINDING_INFO_WRITER_ARM_H_
diff --git a/src/compiler/backend/arm64/code-generator-arm64.cc b/src/compiler/backend/arm64/code-generator-arm64.cc
new file mode 100644
index 0000000..0280994
--- /dev/null
+++ b/src/compiler/backend/arm64/code-generator-arm64.cc
@@ -0,0 +1,3439 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/codegen/arm64/assembler-arm64-inl.h"
+#include "src/codegen/arm64/macro-assembler-arm64-inl.h"
+#include "src/codegen/optimized-compilation-info.h"
+#include "src/compiler/backend/code-generator-impl.h"
+#include "src/compiler/backend/code-generator.h"
+#include "src/compiler/backend/gap-resolver.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/osr.h"
+#include "src/execution/frame-constants.h"
+#include "src/heap/memory-chunk.h"
+#include "src/wasm/wasm-code-manager.h"
+#include "src/wasm/wasm-objects.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+#define __ tasm()->
+
+// Adds Arm64-specific methods to convert InstructionOperands.
+class Arm64OperandConverter final : public InstructionOperandConverter {
+ public:
+  Arm64OperandConverter(CodeGenerator* gen, Instruction* instr)
+      : InstructionOperandConverter(gen, instr) {}
+
+  DoubleRegister InputFloat32Register(size_t index) {
+    return InputDoubleRegister(index).S();
+  }
+
+  DoubleRegister InputFloat64Register(size_t index) {
+    return InputDoubleRegister(index);
+  }
+
+  DoubleRegister InputSimd128Register(size_t index) {
+    return InputDoubleRegister(index).Q();
+  }
+
+  CPURegister InputFloat32OrZeroRegister(size_t index) {
+    if (instr_->InputAt(index)->IsImmediate()) {
+      DCHECK_EQ(0, bit_cast<int32_t>(InputFloat32(index)));
+      return wzr;
+    }
+    DCHECK(instr_->InputAt(index)->IsFPRegister());
+    return InputDoubleRegister(index).S();
+  }
+
+  CPURegister InputFloat64OrZeroRegister(size_t index) {
+    if (instr_->InputAt(index)->IsImmediate()) {
+      DCHECK_EQ(0, bit_cast<int64_t>(InputDouble(index)));
+      return xzr;
+    }
+    DCHECK(instr_->InputAt(index)->IsDoubleRegister());
+    return InputDoubleRegister(index);
+  }
+
+  size_t OutputCount() { return instr_->OutputCount(); }
+
+  DoubleRegister OutputFloat32Register() { return OutputDoubleRegister().S(); }
+
+  DoubleRegister OutputFloat64Register() { return OutputDoubleRegister(); }
+
+  DoubleRegister OutputSimd128Register() { return OutputDoubleRegister().Q(); }
+
+  Register InputRegister32(size_t index) {
+    return ToRegister(instr_->InputAt(index)).W();
+  }
+
+  Register InputOrZeroRegister32(size_t index) {
+    DCHECK(instr_->InputAt(index)->IsRegister() ||
+           (instr_->InputAt(index)->IsImmediate() && (InputInt32(index) == 0)));
+    if (instr_->InputAt(index)->IsImmediate()) {
+      return wzr;
+    }
+    return InputRegister32(index);
+  }
+
+  Register InputRegister64(size_t index) { return InputRegister(index); }
+
+  Register InputOrZeroRegister64(size_t index) {
+    DCHECK(instr_->InputAt(index)->IsRegister() ||
+           (instr_->InputAt(index)->IsImmediate() && (InputInt64(index) == 0)));
+    if (instr_->InputAt(index)->IsImmediate()) {
+      return xzr;
+    }
+    return InputRegister64(index);
+  }
+
+  Operand InputOperand(size_t index) {
+    return ToOperand(instr_->InputAt(index));
+  }
+
+  Operand InputOperand64(size_t index) { return InputOperand(index); }
+
+  Operand InputOperand32(size_t index) {
+    return ToOperand32(instr_->InputAt(index));
+  }
+
+  Register OutputRegister64() { return OutputRegister(); }
+
+  Register OutputRegister32() { return ToRegister(instr_->Output()).W(); }
+
+  Register TempRegister32(size_t index) {
+    return ToRegister(instr_->TempAt(index)).W();
+  }
+
+  Operand InputOperand2_32(size_t index) {
+    switch (AddressingModeField::decode(instr_->opcode())) {
+      case kMode_None:
+        return InputOperand32(index);
+      case kMode_Operand2_R_LSL_I:
+        return Operand(InputRegister32(index), LSL, InputInt5(index + 1));
+      case kMode_Operand2_R_LSR_I:
+        return Operand(InputRegister32(index), LSR, InputInt5(index + 1));
+      case kMode_Operand2_R_ASR_I:
+        return Operand(InputRegister32(index), ASR, InputInt5(index + 1));
+      case kMode_Operand2_R_ROR_I:
+        return Operand(InputRegister32(index), ROR, InputInt5(index + 1));
+      case kMode_Operand2_R_UXTB:
+        return Operand(InputRegister32(index), UXTB);
+      case kMode_Operand2_R_UXTH:
+        return Operand(InputRegister32(index), UXTH);
+      case kMode_Operand2_R_SXTB:
+        return Operand(InputRegister32(index), SXTB);
+      case kMode_Operand2_R_SXTH:
+        return Operand(InputRegister32(index), SXTH);
+      case kMode_Operand2_R_SXTW:
+        return Operand(InputRegister32(index), SXTW);
+      case kMode_MRI:
+      case kMode_MRR:
+      case kMode_Root:
+        break;
+    }
+    UNREACHABLE();
+  }
+
+  Operand InputOperand2_64(size_t index) {
+    switch (AddressingModeField::decode(instr_->opcode())) {
+      case kMode_None:
+        return InputOperand64(index);
+      case kMode_Operand2_R_LSL_I:
+        return Operand(InputRegister64(index), LSL, InputInt6(index + 1));
+      case kMode_Operand2_R_LSR_I:
+        return Operand(InputRegister64(index), LSR, InputInt6(index + 1));
+      case kMode_Operand2_R_ASR_I:
+        return Operand(InputRegister64(index), ASR, InputInt6(index + 1));
+      case kMode_Operand2_R_ROR_I:
+        return Operand(InputRegister64(index), ROR, InputInt6(index + 1));
+      case kMode_Operand2_R_UXTB:
+        return Operand(InputRegister64(index), UXTB);
+      case kMode_Operand2_R_UXTH:
+        return Operand(InputRegister64(index), UXTH);
+      case kMode_Operand2_R_SXTB:
+        return Operand(InputRegister64(index), SXTB);
+      case kMode_Operand2_R_SXTH:
+        return Operand(InputRegister64(index), SXTH);
+      case kMode_Operand2_R_SXTW:
+        return Operand(InputRegister64(index), SXTW);
+      case kMode_MRI:
+      case kMode_MRR:
+      case kMode_Root:
+        break;
+    }
+    UNREACHABLE();
+  }
+
+  MemOperand MemoryOperand(size_t index = 0) {
+    switch (AddressingModeField::decode(instr_->opcode())) {
+      case kMode_None:
+      case kMode_Operand2_R_LSR_I:
+      case kMode_Operand2_R_ASR_I:
+      case kMode_Operand2_R_ROR_I:
+      case kMode_Operand2_R_UXTB:
+      case kMode_Operand2_R_UXTH:
+      case kMode_Operand2_R_SXTB:
+      case kMode_Operand2_R_SXTH:
+      case kMode_Operand2_R_SXTW:
+        break;
+      case kMode_Root:
+        return MemOperand(kRootRegister, InputInt64(index));
+      case kMode_Operand2_R_LSL_I:
+        return MemOperand(InputRegister(index + 0), InputRegister(index + 1),
+                          LSL, InputInt32(index + 2));
+      case kMode_MRI:
+        return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
+      case kMode_MRR:
+        return MemOperand(InputRegister(index + 0), InputRegister(index + 1));
+    }
+    UNREACHABLE();
+  }
+
+  Operand ToOperand(InstructionOperand* op) {
+    if (op->IsRegister()) {
+      return Operand(ToRegister(op));
+    }
+    return ToImmediate(op);
+  }
+
+  Operand ToOperand32(InstructionOperand* op) {
+    if (op->IsRegister()) {
+      return Operand(ToRegister(op).W());
+    }
+    return ToImmediate(op);
+  }
+
+  Operand ToImmediate(InstructionOperand* operand) {
+    Constant constant = ToConstant(operand);
+    switch (constant.type()) {
+      case Constant::kInt32:
+        return Operand(constant.ToInt32());
+      case Constant::kInt64:
+        if (RelocInfo::IsWasmReference(constant.rmode())) {
+          return Operand(constant.ToInt64(), constant.rmode());
+        } else {
+          return Operand(constant.ToInt64());
+        }
+      case Constant::kFloat32:
+        return Operand(Operand::EmbeddedNumber(constant.ToFloat32()));
+      case Constant::kFloat64:
+        return Operand(Operand::EmbeddedNumber(constant.ToFloat64().value()));
+      case Constant::kExternalReference:
+        return Operand(constant.ToExternalReference());
+      case Constant::kCompressedHeapObject:  // Fall through.
+      case Constant::kHeapObject:
+        return Operand(constant.ToHeapObject());
+      case Constant::kDelayedStringConstant:
+        return Operand::EmbeddedStringConstant(
+            constant.ToDelayedStringConstant());
+      case Constant::kRpoNumber:
+        UNREACHABLE();  // TODO(dcarney): RPO immediates on arm64.
+        break;
+    }
+    UNREACHABLE();
+  }
+
+  MemOperand ToMemOperand(InstructionOperand* op, TurboAssembler* tasm) const {
+    DCHECK_NOT_NULL(op);
+    DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
+    return SlotToMemOperand(AllocatedOperand::cast(op)->index(), tasm);
+  }
+
+  MemOperand SlotToMemOperand(int slot, TurboAssembler* tasm) const {
+    FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
+    if (offset.from_frame_pointer()) {
+      int from_sp = offset.offset() + frame_access_state()->GetSPToFPOffset();
+      // Convert FP-offsets to SP-offsets if it results in better code.
+      if (Assembler::IsImmLSUnscaled(from_sp) ||
+          Assembler::IsImmLSScaled(from_sp, 3)) {
+        offset = FrameOffset::FromStackPointer(from_sp);
+      }
+    }
+    return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
+  }
+};
+
+namespace {
+
+class OutOfLineRecordWrite final : public OutOfLineCode {
+ public:
+  OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand offset,
+                       Register value, RecordWriteMode mode,
+                       StubCallMode stub_mode,
+                       UnwindingInfoWriter* unwinding_info_writer)
+      : OutOfLineCode(gen),
+        object_(object),
+        offset_(offset),
+        value_(value),
+        mode_(mode),
+        stub_mode_(stub_mode),
+        must_save_lr_(!gen->frame_access_state()->has_frame()),
+        unwinding_info_writer_(unwinding_info_writer),
+        zone_(gen->zone()) {}
+
+  void Generate() final {
+    if (mode_ > RecordWriteMode::kValueIsPointer) {
+      __ JumpIfSmi(value_, exit());
+    }
+    if (COMPRESS_POINTERS_BOOL) {
+      __ DecompressTaggedPointer(value_, value_);
+    }
+    __ CheckPageFlag(value_, MemoryChunk::kPointersToHereAreInterestingMask, ne,
+                     exit());
+    RememberedSetAction const remembered_set_action =
+        mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
+                                             : OMIT_REMEMBERED_SET;
+    SaveFPRegsMode const save_fp_mode =
+        frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
+    if (must_save_lr_) {
+      // We need to save and restore lr if the frame was elided.
+      __ Push<TurboAssembler::kSignLR>(lr, padreg);
+      unwinding_info_writer_->MarkLinkRegisterOnTopOfStack(__ pc_offset(), sp);
+    }
+    if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
+      __ CallEphemeronKeyBarrier(object_, offset_, save_fp_mode);
+    } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
+      // A direct call to a wasm runtime stub defined in this module.
+      // Just encode the stub index. This will be patched when the code
+      // is added to the native module and copied into wasm code space.
+      __ CallRecordWriteStub(object_, offset_, remembered_set_action,
+                             save_fp_mode, wasm::WasmCode::kRecordWrite);
+    } else {
+      __ CallRecordWriteStub(object_, offset_, remembered_set_action,
+                             save_fp_mode);
+    }
+    if (must_save_lr_) {
+      __ Pop<TurboAssembler::kAuthLR>(padreg, lr);
+      unwinding_info_writer_->MarkPopLinkRegisterFromTopOfStack(__ pc_offset());
+    }
+  }
+
+ private:
+  Register const object_;
+  Operand const offset_;
+  Register const value_;
+  RecordWriteMode const mode_;
+  StubCallMode const stub_mode_;
+  bool must_save_lr_;
+  UnwindingInfoWriter* const unwinding_info_writer_;
+  Zone* zone_;
+};
+
+Condition FlagsConditionToCondition(FlagsCondition condition) {
+  switch (condition) {
+    case kEqual:
+      return eq;
+    case kNotEqual:
+      return ne;
+    case kSignedLessThan:
+      return lt;
+    case kSignedGreaterThanOrEqual:
+      return ge;
+    case kSignedLessThanOrEqual:
+      return le;
+    case kSignedGreaterThan:
+      return gt;
+    case kUnsignedLessThan:
+      return lo;
+    case kUnsignedGreaterThanOrEqual:
+      return hs;
+    case kUnsignedLessThanOrEqual:
+      return ls;
+    case kUnsignedGreaterThan:
+      return hi;
+    case kFloatLessThanOrUnordered:
+      return lt;
+    case kFloatGreaterThanOrEqual:
+      return ge;
+    case kFloatLessThanOrEqual:
+      return ls;
+    case kFloatGreaterThanOrUnordered:
+      return hi;
+    case kFloatLessThan:
+      return lo;
+    case kFloatGreaterThanOrEqualOrUnordered:
+      return hs;
+    case kFloatLessThanOrEqualOrUnordered:
+      return le;
+    case kFloatGreaterThan:
+      return gt;
+    case kOverflow:
+      return vs;
+    case kNotOverflow:
+      return vc;
+    case kUnorderedEqual:
+    case kUnorderedNotEqual:
+      break;
+    case kPositiveOrZero:
+      return pl;
+    case kNegative:
+      return mi;
+  }
+  UNREACHABLE();
+}
+
+void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
+                                   InstructionCode opcode, Instruction* instr,
+                                   Arm64OperandConverter const& i) {
+  const MemoryAccessMode access_mode =
+      static_cast<MemoryAccessMode>(MiscField::decode(opcode));
+  if (access_mode == kMemoryAccessPoisoned) {
+    Register value = i.OutputRegister();
+    Register poison = value.Is64Bits() ? kSpeculationPoisonRegister
+                                       : kSpeculationPoisonRegister.W();
+    codegen->tasm()->And(value, value, Operand(poison));
+  }
+}
+
+void EmitMaybePoisonedFPLoad(CodeGenerator* codegen, InstructionCode opcode,
+                             Arm64OperandConverter* i, VRegister output_reg) {
+  const MemoryAccessMode access_mode =
+      static_cast<MemoryAccessMode>(MiscField::decode(opcode));
+  AddressingMode address_mode = AddressingModeField::decode(opcode);
+  if (access_mode == kMemoryAccessPoisoned && address_mode != kMode_Root) {
+    UseScratchRegisterScope temps(codegen->tasm());
+    Register address = temps.AcquireX();
+    switch (address_mode) {
+      case kMode_MRI:  // Fall through.
+      case kMode_MRR:
+        codegen->tasm()->Add(address, i->InputRegister(0), i->InputOperand(1));
+        break;
+      case kMode_Operand2_R_LSL_I:
+        codegen->tasm()->Add(address, i->InputRegister(0),
+                             i->InputOperand2_64(1));
+        break;
+      default:
+        // Note: we don't need poisoning for kMode_Root loads as those loads
+        // target a fixed offset from root register which is set once when
+        // initializing the vm.
+        UNREACHABLE();
+    }
+    codegen->tasm()->And(address, address, Operand(kSpeculationPoisonRegister));
+    codegen->tasm()->Ldr(output_reg, MemOperand(address));
+  } else {
+    codegen->tasm()->Ldr(output_reg, i->MemoryOperand());
+  }
+}
+
+// Handles unary ops that work for float (scalar), double (scalar), or NEON.
+template <typename Fn>
+void EmitFpOrNeonUnop(TurboAssembler* tasm, Fn fn, Instruction* instr,
+                      Arm64OperandConverter i, VectorFormat scalar,
+                      VectorFormat vector) {
+  VectorFormat f = instr->InputAt(0)->IsSimd128Register() ? vector : scalar;
+
+  VRegister output = VRegister::Create(i.OutputDoubleRegister().code(), f);
+  VRegister input = VRegister::Create(i.InputDoubleRegister(0).code(), f);
+  (tasm->*fn)(output, input);
+}
+
+}  // namespace
+
+#define ASSEMBLE_SHIFT(asm_instr, width)                                    \
+  do {                                                                      \
+    if (instr->InputAt(1)->IsRegister()) {                                  \
+      __ asm_instr(i.OutputRegister##width(), i.InputRegister##width(0),    \
+                   i.InputRegister##width(1));                              \
+    } else {                                                                \
+      uint32_t imm =                                                        \
+          static_cast<uint32_t>(i.InputOperand##width(1).ImmediateValue()); \
+      __ asm_instr(i.OutputRegister##width(), i.InputRegister##width(0),    \
+                   imm % (width));                                          \
+    }                                                                       \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr, reg)                   \
+  do {                                                                 \
+    __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
+    __ asm_instr(i.Output##reg(), i.TempRegister(0));                  \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr, reg)                  \
+  do {                                                                 \
+    __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
+    __ asm_instr(i.Input##reg(2), i.TempRegister(0));                  \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_instr, store_instr, reg)       \
+  do {                                                                       \
+    Label exchange;                                                          \
+    __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));       \
+    __ Bind(&exchange);                                                      \
+    __ load_instr(i.Output##reg(), i.TempRegister(0));                       \
+    __ store_instr(i.TempRegister32(1), i.Input##reg(2), i.TempRegister(0)); \
+    __ Cbnz(i.TempRegister32(1), &exchange);                                 \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_instr, store_instr, ext, \
+                                                 reg)                          \
+  do {                                                                         \
+    Label compareExchange;                                                     \
+    Label exit;                                                                \
+    __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));         \
+    __ Bind(&compareExchange);                                                 \
+    __ load_instr(i.Output##reg(), i.TempRegister(0));                         \
+    __ Cmp(i.Output##reg(), Operand(i.Input##reg(2), ext));                    \
+    __ B(ne, &exit);                                                           \
+    __ store_instr(i.TempRegister32(1), i.Input##reg(3), i.TempRegister(0));   \
+    __ Cbnz(i.TempRegister32(1), &compareExchange);                            \
+    __ Bind(&exit);                                                            \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_BINOP(load_instr, store_instr, bin_instr, reg)       \
+  do {                                                                       \
+    Label binop;                                                             \
+    __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));       \
+    __ Bind(&binop);                                                         \
+    __ load_instr(i.Output##reg(), i.TempRegister(0));                       \
+    __ bin_instr(i.Temp##reg(1), i.Output##reg(), Operand(i.Input##reg(2))); \
+    __ store_instr(i.TempRegister32(2), i.Temp##reg(1), i.TempRegister(0));  \
+    __ Cbnz(i.TempRegister32(2), &binop);                                    \
+  } while (0)
+
+#define ASSEMBLE_IEEE754_BINOP(name)                                        \
+  do {                                                                      \
+    FrameScope scope(tasm(), StackFrame::MANUAL);                           \
+    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2); \
+  } while (0)
+
+#define ASSEMBLE_IEEE754_UNOP(name)                                         \
+  do {                                                                      \
+    FrameScope scope(tasm(), StackFrame::MANUAL);                           \
+    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1); \
+  } while (0)
+
+// If shift value is an immediate, we can call asm_imm, taking the shift value
+// modulo 2^width. Otherwise, emit code to perform the modulus operation, and
+// call asm_shl.
+#define ASSEMBLE_SIMD_SHIFT_LEFT(asm_imm, width, format, asm_shl, gp)       \
+  do {                                                                      \
+    if (instr->InputAt(1)->IsImmediate()) {                                 \
+      __ asm_imm(i.OutputSimd128Register().format(),                        \
+                 i.InputSimd128Register(0).format(), i.InputInt##width(1)); \
+    } else {                                                                \
+      UseScratchRegisterScope temps(tasm());                                \
+      VRegister tmp = temps.AcquireQ();                                     \
+      Register shift = temps.Acquire##gp();                                 \
+      constexpr int mask = (1 << width) - 1;                                \
+      __ And(shift, i.InputRegister32(1), mask);                            \
+      __ Dup(tmp.format(), shift);                                          \
+      __ asm_shl(i.OutputSimd128Register().format(),                        \
+                 i.InputSimd128Register(0).format(), tmp.format());         \
+    }                                                                       \
+  } while (0)
+
+// If shift value is an immediate, we can call asm_imm, taking the shift value
+// modulo 2^width. Otherwise, emit code to perform the modulus operation, and
+// call asm_shl, passing in the negative shift value (treated as right shift).
+#define ASSEMBLE_SIMD_SHIFT_RIGHT(asm_imm, width, format, asm_shl, gp)      \
+  do {                                                                      \
+    if (instr->InputAt(1)->IsImmediate()) {                                 \
+      __ asm_imm(i.OutputSimd128Register().format(),                        \
+                 i.InputSimd128Register(0).format(), i.InputInt##width(1)); \
+    } else {                                                                \
+      UseScratchRegisterScope temps(tasm());                                \
+      VRegister tmp = temps.AcquireQ();                                     \
+      Register shift = temps.Acquire##gp();                                 \
+      constexpr int mask = (1 << width) - 1;                                \
+      __ And(shift, i.InputRegister32(1), mask);                            \
+      __ Dup(tmp.format(), shift);                                          \
+      __ Neg(tmp.format(), tmp.format());                                   \
+      __ asm_shl(i.OutputSimd128Register().format(),                        \
+                 i.InputSimd128Register(0).format(), tmp.format());         \
+    }                                                                       \
+  } while (0)
+
+void CodeGenerator::AssembleDeconstructFrame() {
+  __ Mov(sp, fp);
+  __ Pop<TurboAssembler::kAuthLR>(fp, lr);
+
+  unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
+}
+
+void CodeGenerator::AssemblePrepareTailCall() {
+  if (frame_access_state()->has_frame()) {
+    __ RestoreFPAndLR();
+  }
+  frame_access_state()->SetFrameAccessToSP();
+}
+
+void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
+                                                     Register scratch1,
+                                                     Register scratch2,
+                                                     Register scratch3) {
+  DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
+  Label done;
+
+  // Check if current frame is an arguments adaptor frame.
+  __ Ldr(scratch1, MemOperand(fp, StandardFrameConstants::kContextOffset));
+  __ Cmp(scratch1,
+         Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
+  __ B(ne, &done);
+
+  // Load arguments count from current arguments adaptor frame (note, it
+  // does not include receiver).
+  Register caller_args_count_reg = scratch1;
+  __ Ldr(caller_args_count_reg,
+         MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset));
+  __ SmiUntag(caller_args_count_reg);
+
+  __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3);
+  __ bind(&done);
+}
+
+namespace {
+
+void AdjustStackPointerForTailCall(TurboAssembler* tasm,
+                                   FrameAccessState* state,
+                                   int new_slot_above_sp,
+                                   bool allow_shrinkage = true) {
+  int current_sp_offset = state->GetSPToFPSlotCount() +
+                          StandardFrameConstants::kFixedSlotCountAboveFp;
+  int stack_slot_delta = new_slot_above_sp - current_sp_offset;
+  DCHECK_EQ(stack_slot_delta % 2, 0);
+  if (stack_slot_delta > 0) {
+    tasm->Claim(stack_slot_delta);
+    state->IncreaseSPDelta(stack_slot_delta);
+  } else if (allow_shrinkage && stack_slot_delta < 0) {
+    tasm->Drop(-stack_slot_delta);
+    state->IncreaseSPDelta(stack_slot_delta);
+  }
+}
+
+}  // namespace
+
+void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
+                                              int first_unused_stack_slot) {
+  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+                                first_unused_stack_slot, false);
+}
+
+void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
+                                             int first_unused_stack_slot) {
+  DCHECK_EQ(first_unused_stack_slot % 2, 0);
+  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+                                first_unused_stack_slot);
+  DCHECK(instr->IsTailCall());
+  InstructionOperandConverter g(this, instr);
+  int optional_padding_slot = g.InputInt32(instr->InputCount() - 2);
+  if (optional_padding_slot % 2) {
+    __ Poke(padreg, optional_padding_slot * kSystemPointerSize);
+  }
+}
+
+// Check that {kJavaScriptCallCodeStartRegister} is correct.
+void CodeGenerator::AssembleCodeStartRegisterCheck() {
+  UseScratchRegisterScope temps(tasm());
+  Register scratch = temps.AcquireX();
+  __ ComputeCodeStartAddress(scratch);
+  __ cmp(scratch, kJavaScriptCallCodeStartRegister);
+  __ Assert(eq, AbortReason::kWrongFunctionCodeStart);
+}
+
+// Check if the code object is marked for deoptimization. If it is, then it
+// jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
+// to:
+//    1. read from memory the word that contains that bit, which can be found in
+//       the flags in the referenced {CodeDataContainer} object;
+//    2. test kMarkedForDeoptimizationBit in those flags; and
+//    3. if it is not zero then it jumps to the builtin.
+void CodeGenerator::BailoutIfDeoptimized() {
+  UseScratchRegisterScope temps(tasm());
+  Register scratch = temps.AcquireX();
+  int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
+  __ LoadTaggedPointerField(
+      scratch, MemOperand(kJavaScriptCallCodeStartRegister, offset));
+  __ Ldr(scratch.W(),
+         FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset));
+  Label not_deoptimized;
+  __ Tbz(scratch.W(), Code::kMarkedForDeoptimizationBit, &not_deoptimized);
+  __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
+          RelocInfo::CODE_TARGET);
+  __ Bind(&not_deoptimized);
+}
+
+void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
+  UseScratchRegisterScope temps(tasm());
+  Register scratch = temps.AcquireX();
+
+  // Set a mask which has all bits set in the normal case, but has all
+  // bits cleared if we are speculatively executing the wrong PC.
+  __ ComputeCodeStartAddress(scratch);
+  __ Cmp(kJavaScriptCallCodeStartRegister, scratch);
+  __ Csetm(kSpeculationPoisonRegister, eq);
+  __ Csdb();
+}
+
+void CodeGenerator::AssembleRegisterArgumentPoisoning() {
+  UseScratchRegisterScope temps(tasm());
+  Register scratch = temps.AcquireX();
+
+  __ Mov(scratch, sp);
+  __ And(kJSFunctionRegister, kJSFunctionRegister, kSpeculationPoisonRegister);
+  __ And(kContextRegister, kContextRegister, kSpeculationPoisonRegister);
+  __ And(scratch, scratch, kSpeculationPoisonRegister);
+  __ Mov(sp, scratch);
+}
+
+// Assembles an instruction after register allocation, producing machine code.
+CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
+    Instruction* instr) {
+  Arm64OperandConverter i(this, instr);
+  InstructionCode opcode = instr->opcode();
+  ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
+  switch (arch_opcode) {
+    case kArchCallCodeObject: {
+      if (instr->InputAt(0)->IsImmediate()) {
+        __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
+      } else {
+        Register reg = i.InputRegister(0);
+        DCHECK_IMPLIES(
+            instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+            reg == kJavaScriptCallCodeStartRegister);
+        __ CallCodeObject(reg);
+      }
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchCallBuiltinPointer: {
+      DCHECK(!instr->InputAt(0)->IsImmediate());
+      Register builtin_index = i.InputRegister(0);
+      __ CallBuiltinByIndex(builtin_index);
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchCallWasmFunction: {
+      if (instr->InputAt(0)->IsImmediate()) {
+        Constant constant = i.ToConstant(instr->InputAt(0));
+        Address wasm_code = static_cast<Address>(constant.ToInt64());
+        __ Call(wasm_code, constant.rmode());
+      } else {
+        Register target = i.InputRegister(0);
+        __ Call(target);
+      }
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchTailCallCodeObjectFromJSFunction:
+    case kArchTailCallCodeObject: {
+      if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
+        AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
+                                         i.TempRegister(0), i.TempRegister(1),
+                                         i.TempRegister(2));
+      }
+      if (instr->InputAt(0)->IsImmediate()) {
+        __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
+      } else {
+        Register reg = i.InputRegister(0);
+        DCHECK_IMPLIES(
+            instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+            reg == kJavaScriptCallCodeStartRegister);
+        __ JumpCodeObject(reg);
+      }
+      unwinding_info_writer_.MarkBlockWillExit();
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchTailCallWasm: {
+      if (instr->InputAt(0)->IsImmediate()) {
+        Constant constant = i.ToConstant(instr->InputAt(0));
+        Address wasm_code = static_cast<Address>(constant.ToInt64());
+        __ Jump(wasm_code, constant.rmode());
+      } else {
+        Register target = i.InputRegister(0);
+        UseScratchRegisterScope temps(tasm());
+        temps.Exclude(x17);
+        __ Mov(x17, target);
+        __ Jump(x17);
+      }
+      unwinding_info_writer_.MarkBlockWillExit();
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchTailCallAddress: {
+      CHECK(!instr->InputAt(0)->IsImmediate());
+      Register reg = i.InputRegister(0);
+      DCHECK_IMPLIES(
+          instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+          reg == kJavaScriptCallCodeStartRegister);
+      UseScratchRegisterScope temps(tasm());
+      temps.Exclude(x17);
+      __ Mov(x17, reg);
+      __ Jump(x17);
+      unwinding_info_writer_.MarkBlockWillExit();
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchCallJSFunction: {
+      Register func = i.InputRegister(0);
+      if (FLAG_debug_code) {
+        // Check the function's context matches the context argument.
+        UseScratchRegisterScope scope(tasm());
+        Register temp = scope.AcquireX();
+        __ LoadTaggedPointerField(
+            temp, FieldMemOperand(func, JSFunction::kContextOffset));
+        __ cmp(cp, temp);
+        __ Assert(eq, AbortReason::kWrongFunctionContext);
+      }
+      static_assert(kJavaScriptCallCodeStartRegister == x2, "ABI mismatch");
+      __ LoadTaggedPointerField(x2,
+                                FieldMemOperand(func, JSFunction::kCodeOffset));
+      __ CallCodeObject(x2);
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchPrepareCallCFunction:
+      // We don't need kArchPrepareCallCFunction on arm64 as the instruction
+      // selector has already performed a Claim to reserve space on the stack.
+      // Frame alignment is always 16 bytes, and the stack pointer is already
+      // 16-byte aligned, therefore we do not need to align the stack pointer
+      // by an unknown value, and it is safe to continue accessing the frame
+      // via the stack pointer.
+      UNREACHABLE();
+    case kArchSaveCallerRegisters: {
+      fp_mode_ =
+          static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
+      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+      // kReturnRegister0 should have been saved before entering the stub.
+      int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
+      DCHECK(IsAligned(bytes, kSystemPointerSize));
+      DCHECK_EQ(0, frame_access_state()->sp_delta());
+      frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+      DCHECK(!caller_registers_saved_);
+      caller_registers_saved_ = true;
+      break;
+    }
+    case kArchRestoreCallerRegisters: {
+      DCHECK(fp_mode_ ==
+             static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
+      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+      // Don't overwrite the returned value.
+      int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
+      frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
+      DCHECK_EQ(0, frame_access_state()->sp_delta());
+      DCHECK(caller_registers_saved_);
+      caller_registers_saved_ = false;
+      break;
+    }
+    case kArchPrepareTailCall:
+      AssemblePrepareTailCall();
+      break;
+    case kArchCallCFunction: {
+      int const num_parameters = MiscField::decode(instr->opcode());
+      Label return_location;
+      if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
+        // Put the return address in a stack slot.
+        __ StoreReturnAddressInWasmExitFrame(&return_location);
+      }
+
+      if (instr->InputAt(0)->IsImmediate()) {
+        ExternalReference ref = i.InputExternalReference(0);
+        __ CallCFunction(ref, num_parameters, 0);
+      } else {
+        Register func = i.InputRegister(0);
+        __ CallCFunction(func, num_parameters, 0);
+      }
+      __ Bind(&return_location);
+      if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
+        RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
+      }
+      frame_access_state()->SetFrameAccessToDefault();
+      // Ideally, we should decrement SP delta to match the change of stack
+      // pointer in CallCFunction. However, for certain architectures (e.g.
+      // ARM), there may be more strict alignment requirement, causing old SP
+      // to be saved on the stack. In those cases, we can not calculate the SP
+      // delta statically.
+      frame_access_state()->ClearSPDelta();
+      if (caller_registers_saved_) {
+        // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
+        // Here, we assume the sequence to be:
+        //   kArchSaveCallerRegisters;
+        //   kArchCallCFunction;
+        //   kArchRestoreCallerRegisters;
+        int bytes =
+            __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
+        frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+      }
+      break;
+    }
+    case kArchJmp:
+      AssembleArchJump(i.InputRpo(0));
+      break;
+    case kArchTableSwitch:
+      AssembleArchTableSwitch(instr);
+      break;
+    case kArchBinarySearchSwitch:
+      AssembleArchBinarySearchSwitch(instr);
+      break;
+    case kArchAbortCSAAssert:
+      DCHECK_EQ(i.InputRegister(0), x1);
+      {
+        // We don't actually want to generate a pile of code for this, so just
+        // claim there is a stack frame, without generating one.
+        FrameScope scope(tasm(), StackFrame::NONE);
+        __ Call(
+            isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
+            RelocInfo::CODE_TARGET);
+      }
+      __ Debug("kArchAbortCSAAssert", 0, BREAK);
+      unwinding_info_writer_.MarkBlockWillExit();
+      break;
+    case kArchDebugBreak:
+      __ DebugBreak();
+      break;
+    case kArchComment:
+      __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
+      break;
+    case kArchThrowTerminator:
+      unwinding_info_writer_.MarkBlockWillExit();
+      break;
+    case kArchNop:
+      // don't emit code for nops.
+      break;
+    case kArchDeoptimize: {
+      DeoptimizationExit* exit =
+          BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
+      __ B(exit->label());
+      break;
+    }
+    case kArchRet:
+      AssembleReturn(instr->InputAt(0));
+      break;
+    case kArchFramePointer:
+      __ mov(i.OutputRegister(), fp);
+      break;
+    case kArchParentFramePointer:
+      if (frame_access_state()->has_frame()) {
+        __ ldr(i.OutputRegister(), MemOperand(fp, 0));
+      } else {
+        __ mov(i.OutputRegister(), fp);
+      }
+      break;
+    case kArchStackPointerGreaterThan: {
+      // Potentially apply an offset to the current stack pointer before the
+      // comparison to consider the size difference of an optimized frame versus
+      // the contained unoptimized frames.
+
+      Register lhs_register = sp;
+      uint32_t offset;
+
+      if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
+        lhs_register = i.TempRegister(0);
+        __ Sub(lhs_register, sp, offset);
+      }
+
+      constexpr size_t kValueIndex = 0;
+      DCHECK(instr->InputAt(kValueIndex)->IsRegister());
+      __ Cmp(lhs_register, i.InputRegister(kValueIndex));
+      break;
+    }
+    case kArchStackCheckOffset:
+      __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
+      break;
+    case kArchTruncateDoubleToI:
+      __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
+                           i.InputDoubleRegister(0), DetermineStubCallMode(),
+                           frame_access_state()->has_frame()
+                               ? kLRHasBeenSaved
+                               : kLRHasNotBeenSaved);
+
+      break;
+    case kArchStoreWithWriteBarrier: {
+      RecordWriteMode mode =
+          static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
+      AddressingMode addressing_mode =
+          AddressingModeField::decode(instr->opcode());
+      Register object = i.InputRegister(0);
+      Operand offset(0);
+      if (addressing_mode == kMode_MRI) {
+        offset = Operand(i.InputInt64(1));
+      } else {
+        DCHECK_EQ(addressing_mode, kMode_MRR);
+        offset = Operand(i.InputRegister(1));
+      }
+      Register value = i.InputRegister(2);
+      auto ool = zone()->New<OutOfLineRecordWrite>(
+          this, object, offset, value, mode, DetermineStubCallMode(),
+          &unwinding_info_writer_);
+      __ StoreTaggedField(value, MemOperand(object, offset));
+      __ CheckPageFlag(object, MemoryChunk::kPointersFromHereAreInterestingMask,
+                       eq, ool->entry());
+      __ Bind(ool->exit());
+      break;
+    }
+    case kArchStackSlot: {
+      FrameOffset offset =
+          frame_access_state()->GetFrameOffset(i.InputInt32(0));
+      Register base = offset.from_stack_pointer() ? sp : fp;
+      __ Add(i.OutputRegister(0), base, Operand(offset.offset()));
+      break;
+    }
+    case kIeee754Float64Acos:
+      ASSEMBLE_IEEE754_UNOP(acos);
+      break;
+    case kIeee754Float64Acosh:
+      ASSEMBLE_IEEE754_UNOP(acosh);
+      break;
+    case kIeee754Float64Asin:
+      ASSEMBLE_IEEE754_UNOP(asin);
+      break;
+    case kIeee754Float64Asinh:
+      ASSEMBLE_IEEE754_UNOP(asinh);
+      break;
+    case kIeee754Float64Atan:
+      ASSEMBLE_IEEE754_UNOP(atan);
+      break;
+    case kIeee754Float64Atanh:
+      ASSEMBLE_IEEE754_UNOP(atanh);
+      break;
+    case kIeee754Float64Atan2:
+      ASSEMBLE_IEEE754_BINOP(atan2);
+      break;
+    case kIeee754Float64Cos:
+      ASSEMBLE_IEEE754_UNOP(cos);
+      break;
+    case kIeee754Float64Cosh:
+      ASSEMBLE_IEEE754_UNOP(cosh);
+      break;
+    case kIeee754Float64Cbrt:
+      ASSEMBLE_IEEE754_UNOP(cbrt);
+      break;
+    case kIeee754Float64Exp:
+      ASSEMBLE_IEEE754_UNOP(exp);
+      break;
+    case kIeee754Float64Expm1:
+      ASSEMBLE_IEEE754_UNOP(expm1);
+      break;
+    case kIeee754Float64Log:
+      ASSEMBLE_IEEE754_UNOP(log);
+      break;
+    case kIeee754Float64Log1p:
+      ASSEMBLE_IEEE754_UNOP(log1p);
+      break;
+    case kIeee754Float64Log2:
+      ASSEMBLE_IEEE754_UNOP(log2);
+      break;
+    case kIeee754Float64Log10:
+      ASSEMBLE_IEEE754_UNOP(log10);
+      break;
+    case kIeee754Float64Pow:
+      ASSEMBLE_IEEE754_BINOP(pow);
+      break;
+    case kIeee754Float64Sin:
+      ASSEMBLE_IEEE754_UNOP(sin);
+      break;
+    case kIeee754Float64Sinh:
+      ASSEMBLE_IEEE754_UNOP(sinh);
+      break;
+    case kIeee754Float64Tan:
+      ASSEMBLE_IEEE754_UNOP(tan);
+      break;
+    case kIeee754Float64Tanh:
+      ASSEMBLE_IEEE754_UNOP(tanh);
+      break;
+    case kArm64Float32RoundDown:
+      EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintm, instr, i, kFormatS,
+                       kFormat4S);
+      break;
+    case kArm64Float64RoundDown:
+      EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintm, instr, i, kFormatD,
+                       kFormat2D);
+      break;
+    case kArm64Float32RoundUp:
+      EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintp, instr, i, kFormatS,
+                       kFormat4S);
+      break;
+    case kArm64Float64RoundUp:
+      EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintp, instr, i, kFormatD,
+                       kFormat2D);
+      break;
+    case kArm64Float64RoundTiesAway:
+      EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frinta, instr, i, kFormatD,
+                       kFormat2D);
+      break;
+    case kArm64Float32RoundTruncate:
+      EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintz, instr, i, kFormatS,
+                       kFormat4S);
+      break;
+    case kArm64Float64RoundTruncate:
+      EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintz, instr, i, kFormatD,
+                       kFormat2D);
+      break;
+    case kArm64Float32RoundTiesEven:
+      EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintn, instr, i, kFormatS,
+                       kFormat4S);
+      break;
+    case kArm64Float64RoundTiesEven:
+      EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintn, instr, i, kFormatD,
+                       kFormat2D);
+      break;
+    case kArm64Add:
+      if (FlagsModeField::decode(opcode) != kFlags_none) {
+        __ Adds(i.OutputRegister(), i.InputOrZeroRegister64(0),
+                i.InputOperand2_64(1));
+      } else {
+        __ Add(i.OutputRegister(), i.InputOrZeroRegister64(0),
+               i.InputOperand2_64(1));
+      }
+      break;
+    case kArm64Add32:
+      if (FlagsModeField::decode(opcode) != kFlags_none) {
+        __ Adds(i.OutputRegister32(), i.InputOrZeroRegister32(0),
+                i.InputOperand2_32(1));
+      } else {
+        __ Add(i.OutputRegister32(), i.InputOrZeroRegister32(0),
+               i.InputOperand2_32(1));
+      }
+      break;
+    case kArm64And:
+      if (FlagsModeField::decode(opcode) != kFlags_none) {
+        // The ands instruction only sets N and Z, so only the following
+        // conditions make sense.
+        DCHECK(FlagsConditionField::decode(opcode) == kEqual ||
+               FlagsConditionField::decode(opcode) == kNotEqual ||
+               FlagsConditionField::decode(opcode) == kPositiveOrZero ||
+               FlagsConditionField::decode(opcode) == kNegative);
+        __ Ands(i.OutputRegister(), i.InputOrZeroRegister64(0),
+                i.InputOperand2_64(1));
+      } else {
+        __ And(i.OutputRegister(), i.InputOrZeroRegister64(0),
+               i.InputOperand2_64(1));
+      }
+      break;
+    case kArm64And32:
+      if (FlagsModeField::decode(opcode) != kFlags_none) {
+        // The ands instruction only sets N and Z, so only the following
+        // conditions make sense.
+        DCHECK(FlagsConditionField::decode(opcode) == kEqual ||
+               FlagsConditionField::decode(opcode) == kNotEqual ||
+               FlagsConditionField::decode(opcode) == kPositiveOrZero ||
+               FlagsConditionField::decode(opcode) == kNegative);
+        __ Ands(i.OutputRegister32(), i.InputOrZeroRegister32(0),
+                i.InputOperand2_32(1));
+      } else {
+        __ And(i.OutputRegister32(), i.InputOrZeroRegister32(0),
+               i.InputOperand2_32(1));
+      }
+      break;
+    case kArm64Bic:
+      __ Bic(i.OutputRegister(), i.InputOrZeroRegister64(0),
+             i.InputOperand2_64(1));
+      break;
+    case kArm64Bic32:
+      __ Bic(i.OutputRegister32(), i.InputOrZeroRegister32(0),
+             i.InputOperand2_32(1));
+      break;
+    case kArm64Mul:
+      __ Mul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      break;
+    case kArm64Mul32:
+      __ Mul(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
+      break;
+    case kArm64Saddlp: {
+      VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
+      VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
+      __ Saddlp(i.OutputSimd128Register().Format(dst_f),
+                i.InputSimd128Register(0).Format(src_f));
+      break;
+    }
+    case kArm64Uaddlp: {
+      VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
+      VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
+      __ Uaddlp(i.OutputSimd128Register().Format(dst_f),
+                i.InputSimd128Register(0).Format(src_f));
+      break;
+    }
+    case kArm64Smull: {
+      if (instr->InputAt(0)->IsRegister()) {
+        __ Smull(i.OutputRegister(), i.InputRegister32(0),
+                 i.InputRegister32(1));
+      } else {
+        DCHECK(instr->InputAt(0)->IsSimd128Register());
+        VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
+        VectorFormat src_f = VectorFormatHalfWidth(dst_f);
+        __ Smull(i.OutputSimd128Register().Format(dst_f),
+                 i.InputSimd128Register(0).Format(src_f),
+                 i.InputSimd128Register(1).Format(src_f));
+      }
+      break;
+    }
+    case kArm64Smull2: {
+      VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
+      VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
+      __ Smull2(i.OutputSimd128Register().Format(dst_f),
+                i.InputSimd128Register(0).Format(src_f),
+                i.InputSimd128Register(1).Format(src_f));
+      break;
+    }
+    case kArm64Umull: {
+      if (instr->InputAt(0)->IsRegister()) {
+        __ Umull(i.OutputRegister(), i.InputRegister32(0),
+                 i.InputRegister32(1));
+      } else {
+        DCHECK(instr->InputAt(0)->IsSimd128Register());
+        VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
+        VectorFormat src_f = VectorFormatHalfWidth(dst_f);
+        __ Umull(i.OutputSimd128Register().Format(dst_f),
+                 i.InputSimd128Register(0).Format(src_f),
+                 i.InputSimd128Register(1).Format(src_f));
+      }
+      break;
+    }
+    case kArm64Umull2: {
+      VectorFormat dst_f = VectorFormatFillQ(MiscField::decode(opcode));
+      VectorFormat src_f = VectorFormatHalfWidthDoubleLanes(dst_f);
+      __ Umull2(i.OutputSimd128Register().Format(dst_f),
+                i.InputSimd128Register(0).Format(src_f),
+                i.InputSimd128Register(1).Format(src_f));
+      break;
+    }
+    case kArm64Madd:
+      __ Madd(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+              i.InputRegister(2));
+      break;
+    case kArm64Madd32:
+      __ Madd(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1),
+              i.InputRegister32(2));
+      break;
+    case kArm64Msub:
+      __ Msub(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+              i.InputRegister(2));
+      break;
+    case kArm64Msub32:
+      __ Msub(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1),
+              i.InputRegister32(2));
+      break;
+    case kArm64Mneg:
+      __ Mneg(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      break;
+    case kArm64Mneg32:
+      __ Mneg(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
+      break;
+    case kArm64Idiv:
+      __ Sdiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      break;
+    case kArm64Idiv32:
+      __ Sdiv(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
+      break;
+    case kArm64Udiv:
+      __ Udiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      break;
+    case kArm64Udiv32:
+      __ Udiv(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
+      break;
+    case kArm64Imod: {
+      UseScratchRegisterScope scope(tasm());
+      Register temp = scope.AcquireX();
+      __ Sdiv(temp, i.InputRegister(0), i.InputRegister(1));
+      __ Msub(i.OutputRegister(), temp, i.InputRegister(1), i.InputRegister(0));
+      break;
+    }
+    case kArm64Imod32: {
+      UseScratchRegisterScope scope(tasm());
+      Register temp = scope.AcquireW();
+      __ Sdiv(temp, i.InputRegister32(0), i.InputRegister32(1));
+      __ Msub(i.OutputRegister32(), temp, i.InputRegister32(1),
+              i.InputRegister32(0));
+      break;
+    }
+    case kArm64Umod: {
+      UseScratchRegisterScope scope(tasm());
+      Register temp = scope.AcquireX();
+      __ Udiv(temp, i.InputRegister(0), i.InputRegister(1));
+      __ Msub(i.OutputRegister(), temp, i.InputRegister(1), i.InputRegister(0));
+      break;
+    }
+    case kArm64Umod32: {
+      UseScratchRegisterScope scope(tasm());
+      Register temp = scope.AcquireW();
+      __ Udiv(temp, i.InputRegister32(0), i.InputRegister32(1));
+      __ Msub(i.OutputRegister32(), temp, i.InputRegister32(1),
+              i.InputRegister32(0));
+      break;
+    }
+    case kArm64Not:
+      __ Mvn(i.OutputRegister(), i.InputOperand(0));
+      break;
+    case kArm64Not32:
+      __ Mvn(i.OutputRegister32(), i.InputOperand32(0));
+      break;
+    case kArm64Or:
+      __ Orr(i.OutputRegister(), i.InputOrZeroRegister64(0),
+             i.InputOperand2_64(1));
+      break;
+    case kArm64Or32:
+      __ Orr(i.OutputRegister32(), i.InputOrZeroRegister32(0),
+             i.InputOperand2_32(1));
+      break;
+    case kArm64Orn:
+      __ Orn(i.OutputRegister(), i.InputOrZeroRegister64(0),
+             i.InputOperand2_64(1));
+      break;
+    case kArm64Orn32:
+      __ Orn(i.OutputRegister32(), i.InputOrZeroRegister32(0),
+             i.InputOperand2_32(1));
+      break;
+    case kArm64Eor:
+      __ Eor(i.OutputRegister(), i.InputOrZeroRegister64(0),
+             i.InputOperand2_64(1));
+      break;
+    case kArm64Eor32:
+      __ Eor(i.OutputRegister32(), i.InputOrZeroRegister32(0),
+             i.InputOperand2_32(1));
+      break;
+    case kArm64Eon:
+      __ Eon(i.OutputRegister(), i.InputOrZeroRegister64(0),
+             i.InputOperand2_64(1));
+      break;
+    case kArm64Eon32:
+      __ Eon(i.OutputRegister32(), i.InputOrZeroRegister32(0),
+             i.InputOperand2_32(1));
+      break;
+    case kArm64Sub:
+      if (FlagsModeField::decode(opcode) != kFlags_none) {
+        __ Subs(i.OutputRegister(), i.InputOrZeroRegister64(0),
+                i.InputOperand2_64(1));
+      } else {
+        __ Sub(i.OutputRegister(), i.InputOrZeroRegister64(0),
+               i.InputOperand2_64(1));
+      }
+      break;
+    case kArm64Sub32:
+      if (FlagsModeField::decode(opcode) != kFlags_none) {
+        __ Subs(i.OutputRegister32(), i.InputOrZeroRegister32(0),
+                i.InputOperand2_32(1));
+      } else {
+        __ Sub(i.OutputRegister32(), i.InputOrZeroRegister32(0),
+               i.InputOperand2_32(1));
+      }
+      break;
+    case kArm64Lsl:
+      ASSEMBLE_SHIFT(Lsl, 64);
+      break;
+    case kArm64Lsl32:
+      ASSEMBLE_SHIFT(Lsl, 32);
+      break;
+    case kArm64Lsr:
+      ASSEMBLE_SHIFT(Lsr, 64);
+      break;
+    case kArm64Lsr32:
+      ASSEMBLE_SHIFT(Lsr, 32);
+      break;
+    case kArm64Asr:
+      ASSEMBLE_SHIFT(Asr, 64);
+      break;
+    case kArm64Asr32:
+      ASSEMBLE_SHIFT(Asr, 32);
+      break;
+    case kArm64Ror:
+      ASSEMBLE_SHIFT(Ror, 64);
+      break;
+    case kArm64Ror32:
+      ASSEMBLE_SHIFT(Ror, 32);
+      break;
+    case kArm64Mov32:
+      __ Mov(i.OutputRegister32(), i.InputRegister32(0));
+      break;
+    case kArm64Sxtb32:
+      __ Sxtb(i.OutputRegister32(), i.InputRegister32(0));
+      break;
+    case kArm64Sxth32:
+      __ Sxth(i.OutputRegister32(), i.InputRegister32(0));
+      break;
+    case kArm64Sxtb:
+      __ Sxtb(i.OutputRegister(), i.InputRegister32(0));
+      break;
+    case kArm64Sxth:
+      __ Sxth(i.OutputRegister(), i.InputRegister32(0));
+      break;
+    case kArm64Sxtw:
+      __ Sxtw(i.OutputRegister(), i.InputRegister32(0));
+      break;
+    case kArm64Sbfx:
+      __ Sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt6(1),
+              i.InputInt6(2));
+      break;
+    case kArm64Sbfx32:
+      __ Sbfx(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
+              i.InputInt5(2));
+      break;
+    case kArm64Ubfx:
+      __ Ubfx(i.OutputRegister(), i.InputRegister(0), i.InputInt6(1),
+              i.InputInt32(2));
+      break;
+    case kArm64Ubfx32:
+      __ Ubfx(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
+              i.InputInt32(2));
+      break;
+    case kArm64Ubfiz32:
+      __ Ubfiz(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
+               i.InputInt5(2));
+      break;
+    case kArm64Bfi:
+      __ Bfi(i.OutputRegister(), i.InputRegister(1), i.InputInt6(2),
+             i.InputInt6(3));
+      break;
+    case kArm64TestAndBranch32:
+    case kArm64TestAndBranch:
+      // Pseudo instructions turned into tbz/tbnz in AssembleArchBranch.
+      break;
+    case kArm64CompareAndBranch32:
+    case kArm64CompareAndBranch:
+      // Pseudo instruction handled in AssembleArchBranch.
+      break;
+    case kArm64Claim: {
+      int count = i.InputInt32(0);
+      DCHECK_EQ(count % 2, 0);
+      __ AssertSpAligned();
+      if (count > 0) {
+        __ Claim(count);
+        frame_access_state()->IncreaseSPDelta(count);
+      }
+      break;
+    }
+    case kArm64Poke: {
+      Operand operand(i.InputInt32(1) * kSystemPointerSize);
+      if (instr->InputAt(0)->IsSimd128Register()) {
+        __ Poke(i.InputSimd128Register(0), operand);
+      } else if (instr->InputAt(0)->IsFPRegister()) {
+        __ Poke(i.InputFloat64Register(0), operand);
+      } else {
+        __ Poke(i.InputOrZeroRegister64(0), operand);
+      }
+      break;
+    }
+    case kArm64PokePair: {
+      int slot = i.InputInt32(2) - 1;
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ PokePair(i.InputFloat64Register(1), i.InputFloat64Register(0),
+                    slot * kSystemPointerSize);
+      } else {
+        __ PokePair(i.InputRegister(1), i.InputRegister(0),
+                    slot * kSystemPointerSize);
+      }
+      break;
+    }
+    case kArm64Peek: {
+      int reverse_slot = i.InputInt32(0);
+      int offset =
+          FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
+      if (instr->OutputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          __ Ldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
+        } else if (op->representation() == MachineRepresentation::kFloat32) {
+          __ Ldr(i.OutputFloatRegister(), MemOperand(fp, offset));
+        } else {
+          DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
+          __ Ldr(i.OutputSimd128Register(), MemOperand(fp, offset));
+        }
+      } else {
+        __ Ldr(i.OutputRegister(), MemOperand(fp, offset));
+      }
+      break;
+    }
+    case kArm64Clz:
+      __ Clz(i.OutputRegister64(), i.InputRegister64(0));
+      break;
+    case kArm64Clz32:
+      __ Clz(i.OutputRegister32(), i.InputRegister32(0));
+      break;
+    case kArm64Rbit:
+      __ Rbit(i.OutputRegister64(), i.InputRegister64(0));
+      break;
+    case kArm64Rbit32:
+      __ Rbit(i.OutputRegister32(), i.InputRegister32(0));
+      break;
+    case kArm64Rev:
+      __ Rev(i.OutputRegister64(), i.InputRegister64(0));
+      break;
+    case kArm64Rev32:
+      __ Rev(i.OutputRegister32(), i.InputRegister32(0));
+      break;
+    case kArm64Cmp:
+      __ Cmp(i.InputOrZeroRegister64(0), i.InputOperand2_64(1));
+      break;
+    case kArm64Cmp32:
+      __ Cmp(i.InputOrZeroRegister32(0), i.InputOperand2_32(1));
+      break;
+    case kArm64Cmn:
+      __ Cmn(i.InputOrZeroRegister64(0), i.InputOperand2_64(1));
+      break;
+    case kArm64Cmn32:
+      __ Cmn(i.InputOrZeroRegister32(0), i.InputOperand2_32(1));
+      break;
+    case kArm64Cnt: {
+      VectorFormat f = VectorFormatFillQ(MiscField::decode(opcode));
+      __ Cnt(i.OutputSimd128Register().Format(f),
+             i.InputSimd128Register(0).Format(f));
+      break;
+    }
+    case kArm64Tst:
+      __ Tst(i.InputOrZeroRegister64(0), i.InputOperand2_64(1));
+      break;
+    case kArm64Tst32:
+      __ Tst(i.InputOrZeroRegister32(0), i.InputOperand2_32(1));
+      break;
+    case kArm64Float32Cmp:
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ Fcmp(i.InputFloat32Register(0), i.InputFloat32Register(1));
+      } else {
+        DCHECK(instr->InputAt(1)->IsImmediate());
+        // 0.0 is the only immediate supported by fcmp instructions.
+        DCHECK_EQ(0.0f, i.InputFloat32(1));
+        __ Fcmp(i.InputFloat32Register(0), i.InputFloat32(1));
+      }
+      break;
+    case kArm64Float32Add:
+      __ Fadd(i.OutputFloat32Register(), i.InputFloat32Register(0),
+              i.InputFloat32Register(1));
+      break;
+    case kArm64Float32Sub:
+      __ Fsub(i.OutputFloat32Register(), i.InputFloat32Register(0),
+              i.InputFloat32Register(1));
+      break;
+    case kArm64Float32Mul:
+      __ Fmul(i.OutputFloat32Register(), i.InputFloat32Register(0),
+              i.InputFloat32Register(1));
+      break;
+    case kArm64Float32Div:
+      __ Fdiv(i.OutputFloat32Register(), i.InputFloat32Register(0),
+              i.InputFloat32Register(1));
+      break;
+    case kArm64Float32Abs:
+      __ Fabs(i.OutputFloat32Register(), i.InputFloat32Register(0));
+      break;
+    case kArm64Float32Neg:
+      __ Fneg(i.OutputFloat32Register(), i.InputFloat32Register(0));
+      break;
+    case kArm64Float32Sqrt:
+      __ Fsqrt(i.OutputFloat32Register(), i.InputFloat32Register(0));
+      break;
+    case kArm64Float32Fnmul: {
+      __ Fnmul(i.OutputFloat32Register(), i.InputFloat32Register(0),
+               i.InputFloat32Register(1));
+      break;
+    }
+    case kArm64Float64Cmp:
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ Fcmp(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        DCHECK(instr->InputAt(1)->IsImmediate());
+        // 0.0 is the only immediate supported by fcmp instructions.
+        DCHECK_EQ(0.0, i.InputDouble(1));
+        __ Fcmp(i.InputDoubleRegister(0), i.InputDouble(1));
+      }
+      break;
+    case kArm64Float64Add:
+      __ Fadd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+              i.InputDoubleRegister(1));
+      break;
+    case kArm64Float64Sub:
+      __ Fsub(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+              i.InputDoubleRegister(1));
+      break;
+    case kArm64Float64Mul:
+      __ Fmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+              i.InputDoubleRegister(1));
+      break;
+    case kArm64Float64Div:
+      __ Fdiv(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+              i.InputDoubleRegister(1));
+      break;
+    case kArm64Float64Mod: {
+      // TODO(turbofan): implement directly.
+      FrameScope scope(tasm(), StackFrame::MANUAL);
+      DCHECK_EQ(d0, i.InputDoubleRegister(0));
+      DCHECK_EQ(d1, i.InputDoubleRegister(1));
+      DCHECK_EQ(d0, i.OutputDoubleRegister());
+      // TODO(turbofan): make sure this saves all relevant registers.
+      __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
+      break;
+    }
+    case kArm64Float32Max: {
+      __ Fmax(i.OutputFloat32Register(), i.InputFloat32Register(0),
+              i.InputFloat32Register(1));
+      break;
+    }
+    case kArm64Float64Max: {
+      __ Fmax(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+              i.InputDoubleRegister(1));
+      break;
+    }
+    case kArm64Float32Min: {
+      __ Fmin(i.OutputFloat32Register(), i.InputFloat32Register(0),
+              i.InputFloat32Register(1));
+      break;
+    }
+    case kArm64Float64Min: {
+      __ Fmin(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+              i.InputDoubleRegister(1));
+      break;
+    }
+    case kArm64Float64Abs:
+      __ Fabs(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kArm64Float64Neg:
+      __ Fneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kArm64Float64Sqrt:
+      __ Fsqrt(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kArm64Float64Fnmul:
+      __ Fnmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kArm64Float32ToFloat64:
+      __ Fcvt(i.OutputDoubleRegister(), i.InputDoubleRegister(0).S());
+      break;
+    case kArm64Float64ToFloat32:
+      __ Fcvt(i.OutputDoubleRegister().S(), i.InputDoubleRegister(0));
+      break;
+    case kArm64Float32ToInt32: {
+      __ Fcvtzs(i.OutputRegister32(), i.InputFloat32Register(0));
+      bool set_overflow_to_min_i32 = MiscField::decode(instr->opcode());
+      if (set_overflow_to_min_i32) {
+        // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
+        // because INT32_MIN allows easier out-of-bounds detection.
+        __ Cmn(i.OutputRegister32(), 1);
+        __ Csinc(i.OutputRegister32(), i.OutputRegister32(),
+                 i.OutputRegister32(), vc);
+      }
+      break;
+    }
+    case kArm64Float64ToInt32:
+      __ Fcvtzs(i.OutputRegister32(), i.InputDoubleRegister(0));
+      break;
+    case kArm64Float32ToUint32: {
+      __ Fcvtzu(i.OutputRegister32(), i.InputFloat32Register(0));
+      bool set_overflow_to_min_u32 = MiscField::decode(instr->opcode());
+      if (set_overflow_to_min_u32) {
+        // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
+        // because 0 allows easier out-of-bounds detection.
+        __ Cmn(i.OutputRegister32(), 1);
+        __ Adc(i.OutputRegister32(), i.OutputRegister32(), Operand(0));
+      }
+      break;
+    }
+    case kArm64Float64ToUint32:
+      __ Fcvtzu(i.OutputRegister32(), i.InputDoubleRegister(0));
+      break;
+    case kArm64Float32ToInt64:
+      __ Fcvtzs(i.OutputRegister64(), i.InputFloat32Register(0));
+      if (i.OutputCount() > 1) {
+        // Check for inputs below INT64_MIN and NaN.
+        __ Fcmp(i.InputFloat32Register(0), static_cast<float>(INT64_MIN));
+        // Check overflow.
+        // -1 value is used to indicate a possible overflow which will occur
+        // when subtracting (-1) from the provided INT64_MAX operand.
+        // OutputRegister(1) is set to 0 if the input was out of range or NaN.
+        __ Ccmp(i.OutputRegister(0), -1, VFlag, ge);
+        __ Cset(i.OutputRegister(1), vc);
+      }
+      break;
+    case kArm64Float64ToInt64:
+      __ Fcvtzs(i.OutputRegister(0), i.InputDoubleRegister(0));
+      if (i.OutputCount() > 1) {
+        // See kArm64Float32ToInt64 for a detailed description.
+        __ Fcmp(i.InputDoubleRegister(0), static_cast<double>(INT64_MIN));
+        __ Ccmp(i.OutputRegister(0), -1, VFlag, ge);
+        __ Cset(i.OutputRegister(1), vc);
+      }
+      break;
+    case kArm64Float32ToUint64:
+      __ Fcvtzu(i.OutputRegister64(), i.InputFloat32Register(0));
+      if (i.OutputCount() > 1) {
+        // See kArm64Float32ToInt64 for a detailed description.
+        __ Fcmp(i.InputFloat32Register(0), -1.0);
+        __ Ccmp(i.OutputRegister(0), -1, ZFlag, gt);
+        __ Cset(i.OutputRegister(1), ne);
+      }
+      break;
+    case kArm64Float64ToUint64:
+      __ Fcvtzu(i.OutputRegister64(), i.InputDoubleRegister(0));
+      if (i.OutputCount() > 1) {
+        // See kArm64Float32ToInt64 for a detailed description.
+        __ Fcmp(i.InputDoubleRegister(0), -1.0);
+        __ Ccmp(i.OutputRegister(0), -1, ZFlag, gt);
+        __ Cset(i.OutputRegister(1), ne);
+      }
+      break;
+    case kArm64Int32ToFloat32:
+      __ Scvtf(i.OutputFloat32Register(), i.InputRegister32(0));
+      break;
+    case kArm64Int32ToFloat64:
+      __ Scvtf(i.OutputDoubleRegister(), i.InputRegister32(0));
+      break;
+    case kArm64Int64ToFloat32:
+      __ Scvtf(i.OutputDoubleRegister().S(), i.InputRegister64(0));
+      break;
+    case kArm64Int64ToFloat64:
+      __ Scvtf(i.OutputDoubleRegister(), i.InputRegister64(0));
+      break;
+    case kArm64Uint32ToFloat32:
+      __ Ucvtf(i.OutputFloat32Register(), i.InputRegister32(0));
+      break;
+    case kArm64Uint32ToFloat64:
+      __ Ucvtf(i.OutputDoubleRegister(), i.InputRegister32(0));
+      break;
+    case kArm64Uint64ToFloat32:
+      __ Ucvtf(i.OutputDoubleRegister().S(), i.InputRegister64(0));
+      break;
+    case kArm64Uint64ToFloat64:
+      __ Ucvtf(i.OutputDoubleRegister(), i.InputRegister64(0));
+      break;
+    case kArm64Float64ExtractLowWord32:
+      __ Fmov(i.OutputRegister32(), i.InputFloat32Register(0));
+      break;
+    case kArm64Float64ExtractHighWord32:
+      __ Umov(i.OutputRegister32(), i.InputFloat64Register(0).V2S(), 1);
+      break;
+    case kArm64Float64InsertLowWord32:
+      DCHECK_EQ(i.OutputFloat64Register(), i.InputFloat64Register(0));
+      __ Ins(i.OutputFloat64Register().V2S(), 0, i.InputRegister32(1));
+      break;
+    case kArm64Float64InsertHighWord32:
+      DCHECK_EQ(i.OutputFloat64Register(), i.InputFloat64Register(0));
+      __ Ins(i.OutputFloat64Register().V2S(), 1, i.InputRegister32(1));
+      break;
+    case kArm64Float64MoveU64:
+      __ Fmov(i.OutputFloat64Register(), i.InputRegister(0));
+      break;
+    case kArm64Float64SilenceNaN:
+      __ CanonicalizeNaN(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kArm64U64MoveFloat64:
+      __ Fmov(i.OutputRegister(), i.InputDoubleRegister(0));
+      break;
+    case kArm64Ldrb:
+      __ Ldrb(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kArm64Ldrsb:
+      __ Ldrsb(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kArm64Strb:
+      __ Strb(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
+      break;
+    case kArm64Ldrh:
+      __ Ldrh(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kArm64Ldrsh:
+      __ Ldrsh(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kArm64Strh:
+      __ Strh(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
+      break;
+    case kArm64Ldrsw:
+      __ Ldrsw(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kArm64LdrW:
+      __ Ldr(i.OutputRegister32(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kArm64StrW:
+      __ Str(i.InputOrZeroRegister32(0), i.MemoryOperand(1));
+      break;
+    case kArm64Ldr:
+      __ Ldr(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kArm64LdrDecompressTaggedSigned:
+      __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kArm64LdrDecompressTaggedPointer:
+      __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kArm64LdrDecompressAnyTagged:
+      __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kArm64Str:
+      __ Str(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
+      break;
+    case kArm64StrCompressTagged:
+      __ StoreTaggedField(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
+      break;
+    case kArm64LdrS:
+      EmitMaybePoisonedFPLoad(this, opcode, &i, i.OutputDoubleRegister().S());
+      break;
+    case kArm64StrS:
+      __ Str(i.InputFloat32OrZeroRegister(0), i.MemoryOperand(1));
+      break;
+    case kArm64LdrD:
+      EmitMaybePoisonedFPLoad(this, opcode, &i, i.OutputDoubleRegister());
+      break;
+    case kArm64StrD:
+      __ Str(i.InputFloat64OrZeroRegister(0), i.MemoryOperand(1));
+      break;
+    case kArm64LdrQ:
+      __ Ldr(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    case kArm64StrQ:
+      __ Str(i.InputSimd128Register(0), i.MemoryOperand(1));
+      break;
+    case kArm64DmbIsh:
+      __ Dmb(InnerShareable, BarrierAll);
+      break;
+    case kArm64DsbIsb:
+      __ Dsb(FullSystem, BarrierAll);
+      __ Isb();
+      break;
+    case kArchWordPoisonOnSpeculation:
+      __ And(i.OutputRegister(0), i.InputRegister(0),
+             Operand(kSpeculationPoisonRegister));
+      break;
+    case kWord32AtomicLoadInt8:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarb, Register32);
+      __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));
+      break;
+    case kWord32AtomicLoadUint8:
+    case kArm64Word64AtomicLoadUint8:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarb, Register32);
+      break;
+    case kWord32AtomicLoadInt16:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarh, Register32);
+      __ Sxth(i.OutputRegister(0), i.OutputRegister(0));
+      break;
+    case kWord32AtomicLoadUint16:
+    case kArm64Word64AtomicLoadUint16:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarh, Register32);
+      break;
+    case kWord32AtomicLoadWord32:
+    case kArm64Word64AtomicLoadUint32:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldar, Register32);
+      break;
+    case kArm64Word64AtomicLoadUint64:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldar, Register);
+      break;
+    case kWord32AtomicStoreWord8:
+    case kArm64Word64AtomicStoreWord8:
+      ASSEMBLE_ATOMIC_STORE_INTEGER(Stlrb, Register32);
+      break;
+    case kWord32AtomicStoreWord16:
+    case kArm64Word64AtomicStoreWord16:
+      ASSEMBLE_ATOMIC_STORE_INTEGER(Stlrh, Register32);
+      break;
+    case kWord32AtomicStoreWord32:
+    case kArm64Word64AtomicStoreWord32:
+      ASSEMBLE_ATOMIC_STORE_INTEGER(Stlr, Register32);
+      break;
+    case kArm64Word64AtomicStoreWord64:
+      ASSEMBLE_ATOMIC_STORE_INTEGER(Stlr, Register);
+      break;
+    case kWord32AtomicExchangeInt8:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxrb, stlxrb, Register32);
+      __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));
+      break;
+    case kWord32AtomicExchangeUint8:
+    case kArm64Word64AtomicExchangeUint8:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxrb, stlxrb, Register32);
+      break;
+    case kWord32AtomicExchangeInt16:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxrh, stlxrh, Register32);
+      __ Sxth(i.OutputRegister(0), i.OutputRegister(0));
+      break;
+    case kWord32AtomicExchangeUint16:
+    case kArm64Word64AtomicExchangeUint16:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxrh, stlxrh, Register32);
+      break;
+    case kWord32AtomicExchangeWord32:
+    case kArm64Word64AtomicExchangeUint32:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxr, stlxr, Register32);
+      break;
+    case kArm64Word64AtomicExchangeUint64:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxr, stlxr, Register);
+      break;
+    case kWord32AtomicCompareExchangeInt8:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxrb, stlxrb, UXTB,
+                                               Register32);
+      __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));
+      break;
+    case kWord32AtomicCompareExchangeUint8:
+    case kArm64Word64AtomicCompareExchangeUint8:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxrb, stlxrb, UXTB,
+                                               Register32);
+      break;
+    case kWord32AtomicCompareExchangeInt16:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxrh, stlxrh, UXTH,
+                                               Register32);
+      __ Sxth(i.OutputRegister(0), i.OutputRegister(0));
+      break;
+    case kWord32AtomicCompareExchangeUint16:
+    case kArm64Word64AtomicCompareExchangeUint16:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxrh, stlxrh, UXTH,
+                                               Register32);
+      break;
+    case kWord32AtomicCompareExchangeWord32:
+    case kArm64Word64AtomicCompareExchangeUint32:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxr, stlxr, UXTW, Register32);
+      break;
+    case kArm64Word64AtomicCompareExchangeUint64:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxr, stlxr, UXTX, Register);
+      break;
+#define ATOMIC_BINOP_CASE(op, inst)                          \
+  case kWord32Atomic##op##Int8:                              \
+    ASSEMBLE_ATOMIC_BINOP(ldaxrb, stlxrb, inst, Register32); \
+    __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));       \
+    break;                                                   \
+  case kWord32Atomic##op##Uint8:                             \
+  case kArm64Word64Atomic##op##Uint8:                        \
+    ASSEMBLE_ATOMIC_BINOP(ldaxrb, stlxrb, inst, Register32); \
+    break;                                                   \
+  case kWord32Atomic##op##Int16:                             \
+    ASSEMBLE_ATOMIC_BINOP(ldaxrh, stlxrh, inst, Register32); \
+    __ Sxth(i.OutputRegister(0), i.OutputRegister(0));       \
+    break;                                                   \
+  case kWord32Atomic##op##Uint16:                            \
+  case kArm64Word64Atomic##op##Uint16:                       \
+    ASSEMBLE_ATOMIC_BINOP(ldaxrh, stlxrh, inst, Register32); \
+    break;                                                   \
+  case kWord32Atomic##op##Word32:                            \
+  case kArm64Word64Atomic##op##Uint32:                       \
+    ASSEMBLE_ATOMIC_BINOP(ldaxr, stlxr, inst, Register32);   \
+    break;                                                   \
+  case kArm64Word64Atomic##op##Uint64:                       \
+    ASSEMBLE_ATOMIC_BINOP(ldaxr, stlxr, inst, Register);     \
+    break;
+      ATOMIC_BINOP_CASE(Add, Add)
+      ATOMIC_BINOP_CASE(Sub, Sub)
+      ATOMIC_BINOP_CASE(And, And)
+      ATOMIC_BINOP_CASE(Or, Orr)
+      ATOMIC_BINOP_CASE(Xor, Eor)
+#undef ATOMIC_BINOP_CASE
+#undef ASSEMBLE_SHIFT
+#undef ASSEMBLE_ATOMIC_LOAD_INTEGER
+#undef ASSEMBLE_ATOMIC_STORE_INTEGER
+#undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER
+#undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
+#undef ASSEMBLE_ATOMIC_BINOP
+#undef ASSEMBLE_IEEE754_BINOP
+#undef ASSEMBLE_IEEE754_UNOP
+
+#define SIMD_UNOP_CASE(Op, Instr, FORMAT)            \
+  case Op:                                           \
+    __ Instr(i.OutputSimd128Register().V##FORMAT(),  \
+             i.InputSimd128Register(0).V##FORMAT()); \
+    break;
+#define SIMD_BINOP_CASE(Op, Instr, FORMAT)           \
+  case Op:                                           \
+    __ Instr(i.OutputSimd128Register().V##FORMAT(),  \
+             i.InputSimd128Register(0).V##FORMAT(),  \
+             i.InputSimd128Register(1).V##FORMAT()); \
+    break;
+#define SIMD_DESTRUCTIVE_BINOP_CASE(Op, Instr, FORMAT)     \
+  case Op: {                                               \
+    VRegister dst = i.OutputSimd128Register().V##FORMAT(); \
+    DCHECK_EQ(dst, i.InputSimd128Register(0).V##FORMAT()); \
+    __ Instr(dst, i.InputSimd128Register(1).V##FORMAT(),   \
+             i.InputSimd128Register(2).V##FORMAT());       \
+    break;                                                 \
+  }
+
+    case kArm64Sxtl: {
+      VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
+      VectorFormat narrow = VectorFormatHalfWidth(wide);
+      __ Sxtl(i.OutputSimd128Register().Format(wide),
+              i.InputSimd128Register(0).Format(narrow));
+      break;
+    }
+    case kArm64Sxtl2: {
+      VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
+      VectorFormat narrow = VectorFormatHalfWidthDoubleLanes(wide);
+      __ Sxtl2(i.OutputSimd128Register().Format(wide),
+               i.InputSimd128Register(0).Format(narrow));
+      break;
+    }
+    case kArm64Uxtl: {
+      VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
+      VectorFormat narrow = VectorFormatHalfWidth(wide);
+      __ Uxtl(i.OutputSimd128Register().Format(wide),
+              i.InputSimd128Register(0).Format(narrow));
+      break;
+    }
+    case kArm64Uxtl2: {
+      VectorFormat wide = VectorFormatFillQ(MiscField::decode(opcode));
+      VectorFormat narrow = VectorFormatHalfWidthDoubleLanes(wide);
+      __ Uxtl2(i.OutputSimd128Register().Format(wide),
+               i.InputSimd128Register(0).Format(narrow));
+      break;
+    }
+    case kArm64F64x2Splat: {
+      __ Dup(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).D(), 0);
+      break;
+    }
+    case kArm64F64x2ExtractLane: {
+      __ Mov(i.OutputSimd128Register().D(), i.InputSimd128Register(0).V2D(),
+             i.InputInt8(1));
+      break;
+    }
+    case kArm64F64x2ReplaceLane: {
+      VRegister dst = i.OutputSimd128Register().V2D(),
+                src1 = i.InputSimd128Register(0).V2D();
+      if (dst != src1) {
+        __ Mov(dst, src1);
+      }
+      __ Mov(dst, i.InputInt8(1), i.InputSimd128Register(2).V2D(), 0);
+      break;
+    }
+      SIMD_UNOP_CASE(kArm64F64x2Abs, Fabs, 2D);
+      SIMD_UNOP_CASE(kArm64F64x2Neg, Fneg, 2D);
+      SIMD_UNOP_CASE(kArm64F64x2Sqrt, Fsqrt, 2D);
+      SIMD_BINOP_CASE(kArm64F64x2Add, Fadd, 2D);
+      SIMD_BINOP_CASE(kArm64F64x2Sub, Fsub, 2D);
+      SIMD_BINOP_CASE(kArm64F64x2Mul, Fmul, 2D);
+      SIMD_BINOP_CASE(kArm64F64x2Div, Fdiv, 2D);
+      SIMD_BINOP_CASE(kArm64F64x2Min, Fmin, 2D);
+      SIMD_BINOP_CASE(kArm64F64x2Max, Fmax, 2D);
+      SIMD_BINOP_CASE(kArm64F64x2Eq, Fcmeq, 2D);
+    case kArm64F64x2Ne: {
+      VRegister dst = i.OutputSimd128Register().V2D();
+      __ Fcmeq(dst, i.InputSimd128Register(0).V2D(),
+               i.InputSimd128Register(1).V2D());
+      __ Mvn(dst, dst);
+      break;
+    }
+    case kArm64F64x2Lt: {
+      __ Fcmgt(i.OutputSimd128Register().V2D(), i.InputSimd128Register(1).V2D(),
+               i.InputSimd128Register(0).V2D());
+      break;
+    }
+    case kArm64F64x2Le: {
+      __ Fcmge(i.OutputSimd128Register().V2D(), i.InputSimd128Register(1).V2D(),
+               i.InputSimd128Register(0).V2D());
+      break;
+    }
+      SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfma, Fmla, 2D);
+      SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F64x2Qfms, Fmls, 2D);
+    case kArm64F64x2Pmin: {
+      VRegister dst = i.OutputSimd128Register().V2D();
+      VRegister lhs = i.InputSimd128Register(0).V2D();
+      VRegister rhs = i.InputSimd128Register(1).V2D();
+      // f64x2.pmin(lhs, rhs)
+      // = v128.bitselect(rhs, lhs, f64x2.lt(rhs,lhs))
+      // = v128.bitselect(rhs, lhs, f64x2.gt(lhs,rhs))
+      __ Fcmgt(dst, lhs, rhs);
+      __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
+      break;
+    }
+    case kArm64F64x2Pmax: {
+      VRegister dst = i.OutputSimd128Register().V2D();
+      VRegister lhs = i.InputSimd128Register(0).V2D();
+      VRegister rhs = i.InputSimd128Register(1).V2D();
+      // f64x2.pmax(lhs, rhs)
+      // = v128.bitselect(rhs, lhs, f64x2.gt(rhs, lhs))
+      __ Fcmgt(dst, rhs, lhs);
+      __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
+      break;
+    }
+    case kArm64F32x4Splat: {
+      __ Dup(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).S(), 0);
+      break;
+    }
+    case kArm64F32x4ExtractLane: {
+      __ Mov(i.OutputSimd128Register().S(), i.InputSimd128Register(0).V4S(),
+             i.InputInt8(1));
+      break;
+    }
+    case kArm64F32x4ReplaceLane: {
+      VRegister dst = i.OutputSimd128Register().V4S(),
+                src1 = i.InputSimd128Register(0).V4S();
+      if (dst != src1) {
+        __ Mov(dst, src1);
+      }
+      __ Mov(dst, i.InputInt8(1), i.InputSimd128Register(2).V4S(), 0);
+      break;
+    }
+      SIMD_UNOP_CASE(kArm64F32x4SConvertI32x4, Scvtf, 4S);
+      SIMD_UNOP_CASE(kArm64F32x4UConvertI32x4, Ucvtf, 4S);
+      SIMD_UNOP_CASE(kArm64F32x4Abs, Fabs, 4S);
+      SIMD_UNOP_CASE(kArm64F32x4Neg, Fneg, 4S);
+      SIMD_UNOP_CASE(kArm64F32x4Sqrt, Fsqrt, 4S);
+      SIMD_UNOP_CASE(kArm64F32x4RecipApprox, Frecpe, 4S);
+      SIMD_UNOP_CASE(kArm64F32x4RecipSqrtApprox, Frsqrte, 4S);
+      SIMD_BINOP_CASE(kArm64F32x4Add, Fadd, 4S);
+      SIMD_BINOP_CASE(kArm64F32x4AddHoriz, Faddp, 4S);
+      SIMD_BINOP_CASE(kArm64F32x4Sub, Fsub, 4S);
+      SIMD_BINOP_CASE(kArm64F32x4Mul, Fmul, 4S);
+      SIMD_BINOP_CASE(kArm64F32x4Div, Fdiv, 4S);
+      SIMD_BINOP_CASE(kArm64F32x4Min, Fmin, 4S);
+      SIMD_BINOP_CASE(kArm64F32x4Max, Fmax, 4S);
+      SIMD_BINOP_CASE(kArm64F32x4Eq, Fcmeq, 4S);
+    case kArm64F32x4Ne: {
+      VRegister dst = i.OutputSimd128Register().V4S();
+      __ Fcmeq(dst, i.InputSimd128Register(0).V4S(),
+               i.InputSimd128Register(1).V4S());
+      __ Mvn(dst, dst);
+      break;
+    }
+    case kArm64F32x4Lt: {
+      __ Fcmgt(i.OutputSimd128Register().V4S(), i.InputSimd128Register(1).V4S(),
+               i.InputSimd128Register(0).V4S());
+      break;
+    }
+    case kArm64F32x4Le: {
+      __ Fcmge(i.OutputSimd128Register().V4S(), i.InputSimd128Register(1).V4S(),
+               i.InputSimd128Register(0).V4S());
+      break;
+    }
+      SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfma, Fmla, 4S);
+      SIMD_DESTRUCTIVE_BINOP_CASE(kArm64F32x4Qfms, Fmls, 4S);
+    case kArm64F32x4Pmin: {
+      VRegister dst = i.OutputSimd128Register().V4S();
+      VRegister lhs = i.InputSimd128Register(0).V4S();
+      VRegister rhs = i.InputSimd128Register(1).V4S();
+      // f32x4.pmin(lhs, rhs)
+      // = v128.bitselect(rhs, lhs, f32x4.lt(rhs, lhs))
+      // = v128.bitselect(rhs, lhs, f32x4.gt(lhs, rhs))
+      __ Fcmgt(dst, lhs, rhs);
+      __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
+      break;
+    }
+    case kArm64F32x4Pmax: {
+      VRegister dst = i.OutputSimd128Register().V4S();
+      VRegister lhs = i.InputSimd128Register(0).V4S();
+      VRegister rhs = i.InputSimd128Register(1).V4S();
+      // f32x4.pmax(lhs, rhs)
+      // = v128.bitselect(rhs, lhs, f32x4.gt(rhs, lhs))
+      __ Fcmgt(dst, rhs, lhs);
+      __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
+      break;
+    }
+    case kArm64I64x2Splat: {
+      __ Dup(i.OutputSimd128Register().V2D(), i.InputRegister64(0));
+      break;
+    }
+    case kArm64I64x2ExtractLane: {
+      __ Mov(i.OutputRegister64(), i.InputSimd128Register(0).V2D(),
+             i.InputInt8(1));
+      break;
+    }
+    case kArm64I64x2ReplaceLane: {
+      VRegister dst = i.OutputSimd128Register().V2D(),
+                src1 = i.InputSimd128Register(0).V2D();
+      if (dst != src1) {
+        __ Mov(dst, src1);
+      }
+      __ Mov(dst, i.InputInt8(1), i.InputRegister64(2));
+      break;
+    }
+      SIMD_UNOP_CASE(kArm64I64x2Neg, Neg, 2D);
+    case kArm64I64x2Shl: {
+      ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 6, V2D, Sshl, X);
+      break;
+    }
+    case kArm64I64x2ShrS: {
+      ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 6, V2D, Sshl, X);
+      break;
+    }
+      SIMD_BINOP_CASE(kArm64I64x2Add, Add, 2D);
+      SIMD_BINOP_CASE(kArm64I64x2Sub, Sub, 2D);
+    case kArm64I64x2Mul: {
+      UseScratchRegisterScope scope(tasm());
+      VRegister dst = i.OutputSimd128Register();
+      VRegister src1 = i.InputSimd128Register(0);
+      VRegister src2 = i.InputSimd128Register(1);
+      VRegister tmp1 = scope.AcquireSameSizeAs(dst);
+      VRegister tmp2 = scope.AcquireSameSizeAs(dst);
+      VRegister tmp3 = i.ToSimd128Register(instr->TempAt(0));
+
+      // This 2x64-bit multiplication is performed with several 32-bit
+      // multiplications.
+
+      // 64-bit numbers x and y, can be represented as:
+      //   x = a + 2^32(b)
+      //   y = c + 2^32(d)
+
+      // A 64-bit multiplication is:
+      //   x * y = ac + 2^32(ad + bc) + 2^64(bd)
+      // note: `2^64(bd)` can be ignored, the value is too large to fit in
+      // 64-bits.
+
+      // This sequence implements a 2x64bit multiply, where the registers
+      // `src1` and `src2` are split up into 32-bit components:
+      //   src1 = |d|c|b|a|
+      //   src2 = |h|g|f|e|
+      //
+      //   src1 * src2 = |cg + 2^32(ch + dg)|ae + 2^32(af + be)|
+
+      // Reverse the 32-bit elements in the 64-bit words.
+      //   tmp2 = |g|h|e|f|
+      __ Rev64(tmp2.V4S(), src2.V4S());
+
+      // Calculate the high half components.
+      //   tmp2 = |dg|ch|be|af|
+      __ Mul(tmp2.V4S(), tmp2.V4S(), src1.V4S());
+
+      // Extract the low half components of src1.
+      //   tmp1 = |c|a|
+      __ Xtn(tmp1.V2S(), src1.V2D());
+
+      // Sum the respective high half components.
+      //   tmp2 = |dg+ch|be+af||dg+ch|be+af|
+      __ Addp(tmp2.V4S(), tmp2.V4S(), tmp2.V4S());
+
+      // Extract the low half components of src2.
+      //   tmp3 = |g|e|
+      __ Xtn(tmp3.V2S(), src2.V2D());
+
+      // Shift the high half components, into the high half.
+      //   dst = |dg+ch << 32|be+af << 32|
+      __ Shll(dst.V2D(), tmp2.V2S(), 32);
+
+      // Multiply the low components together, and accumulate with the high
+      // half.
+      //   dst = |dst[1] + cg|dst[0] + ae|
+      __ Umlal(dst.V2D(), tmp3.V2S(), tmp1.V2S());
+
+      break;
+    }
+      SIMD_BINOP_CASE(kArm64I64x2Eq, Cmeq, 2D);
+    case kArm64I64x2ShrU: {
+      ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 6, V2D, Ushl, X);
+      break;
+    }
+    case kArm64I32x4Splat: {
+      __ Dup(i.OutputSimd128Register().V4S(), i.InputRegister32(0));
+      break;
+    }
+    case kArm64I32x4ExtractLane: {
+      __ Mov(i.OutputRegister32(), i.InputSimd128Register(0).V4S(),
+             i.InputInt8(1));
+      break;
+    }
+    case kArm64I32x4ReplaceLane: {
+      VRegister dst = i.OutputSimd128Register().V4S(),
+                src1 = i.InputSimd128Register(0).V4S();
+      if (dst != src1) {
+        __ Mov(dst, src1);
+      }
+      __ Mov(dst, i.InputInt8(1), i.InputRegister32(2));
+      break;
+    }
+      SIMD_UNOP_CASE(kArm64I32x4SConvertF32x4, Fcvtzs, 4S);
+      SIMD_UNOP_CASE(kArm64I32x4Neg, Neg, 4S);
+    case kArm64I32x4Shl: {
+      ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 5, V4S, Sshl, W);
+      break;
+    }
+    case kArm64I32x4ShrS: {
+      ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 5, V4S, Sshl, W);
+      break;
+    }
+      SIMD_BINOP_CASE(kArm64I32x4Add, Add, 4S);
+      SIMD_BINOP_CASE(kArm64I32x4AddHoriz, Addp, 4S);
+      SIMD_BINOP_CASE(kArm64I32x4Sub, Sub, 4S);
+      SIMD_BINOP_CASE(kArm64I32x4Mul, Mul, 4S);
+      SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I32x4Mla, Mla, 4S);
+      SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I32x4Mls, Mls, 4S);
+      SIMD_BINOP_CASE(kArm64I32x4MinS, Smin, 4S);
+      SIMD_BINOP_CASE(kArm64I32x4MaxS, Smax, 4S);
+      SIMD_BINOP_CASE(kArm64I32x4Eq, Cmeq, 4S);
+    case kArm64I32x4Ne: {
+      VRegister dst = i.OutputSimd128Register().V4S();
+      __ Cmeq(dst, i.InputSimd128Register(0).V4S(),
+              i.InputSimd128Register(1).V4S());
+      __ Mvn(dst, dst);
+      break;
+    }
+      SIMD_BINOP_CASE(kArm64I32x4GtS, Cmgt, 4S);
+      SIMD_BINOP_CASE(kArm64I32x4GeS, Cmge, 4S);
+      SIMD_UNOP_CASE(kArm64I32x4UConvertF32x4, Fcvtzu, 4S);
+    case kArm64I32x4ShrU: {
+      ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 5, V4S, Ushl, W);
+      break;
+    }
+      SIMD_BINOP_CASE(kArm64I32x4MinU, Umin, 4S);
+      SIMD_BINOP_CASE(kArm64I32x4MaxU, Umax, 4S);
+      SIMD_BINOP_CASE(kArm64I32x4GtU, Cmhi, 4S);
+      SIMD_BINOP_CASE(kArm64I32x4GeU, Cmhs, 4S);
+      SIMD_UNOP_CASE(kArm64I32x4Abs, Abs, 4S);
+    case kArm64I32x4BitMask: {
+      Register dst = i.OutputRegister32();
+      VRegister src = i.InputSimd128Register(0);
+      VRegister tmp = i.TempSimd128Register(0);
+      VRegister mask = i.TempSimd128Register(1);
+
+      __ Sshr(tmp.V4S(), src.V4S(), 31);
+      // Set i-th bit of each lane i. When AND with tmp, the lanes that
+      // are signed will have i-th bit set, unsigned will be 0.
+      __ Movi(mask.V2D(), 0x0000'0008'0000'0004, 0x0000'0002'0000'0001);
+      __ And(tmp.V16B(), mask.V16B(), tmp.V16B());
+      __ Addv(tmp.S(), tmp.V4S());
+      __ Mov(dst.W(), tmp.V4S(), 0);
+      break;
+    }
+    case kArm64I32x4DotI16x8S: {
+      UseScratchRegisterScope scope(tasm());
+      VRegister lhs = i.InputSimd128Register(0);
+      VRegister rhs = i.InputSimd128Register(1);
+      VRegister tmp1 = scope.AcquireV(kFormat4S);
+      VRegister tmp2 = scope.AcquireV(kFormat4S);
+      __ Smull(tmp1, lhs.V4H(), rhs.V4H());
+      __ Smull2(tmp2, lhs.V8H(), rhs.V8H());
+      __ Addp(i.OutputSimd128Register().V4S(), tmp1, tmp2);
+      break;
+    }
+    case kArm64I16x8Splat: {
+      __ Dup(i.OutputSimd128Register().V8H(), i.InputRegister32(0));
+      break;
+    }
+    case kArm64I16x8ExtractLaneU: {
+      __ Umov(i.OutputRegister32(), i.InputSimd128Register(0).V8H(),
+              i.InputInt8(1));
+      break;
+    }
+    case kArm64I16x8ExtractLaneS: {
+      __ Smov(i.OutputRegister32(), i.InputSimd128Register(0).V8H(),
+              i.InputInt8(1));
+      break;
+    }
+    case kArm64I16x8ReplaceLane: {
+      VRegister dst = i.OutputSimd128Register().V8H(),
+                src1 = i.InputSimd128Register(0).V8H();
+      if (dst != src1) {
+        __ Mov(dst, src1);
+      }
+      __ Mov(dst, i.InputInt8(1), i.InputRegister32(2));
+      break;
+    }
+      SIMD_UNOP_CASE(kArm64I16x8Neg, Neg, 8H);
+    case kArm64I16x8Shl: {
+      ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 4, V8H, Sshl, W);
+      break;
+    }
+    case kArm64I16x8ShrS: {
+      ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 4, V8H, Sshl, W);
+      break;
+    }
+    case kArm64I16x8SConvertI32x4: {
+      VRegister dst = i.OutputSimd128Register(),
+                src0 = i.InputSimd128Register(0),
+                src1 = i.InputSimd128Register(1);
+      UseScratchRegisterScope scope(tasm());
+      VRegister temp = scope.AcquireV(kFormat4S);
+      if (dst == src1) {
+        __ Mov(temp, src1.V4S());
+        src1 = temp;
+      }
+      __ Sqxtn(dst.V4H(), src0.V4S());
+      __ Sqxtn2(dst.V8H(), src1.V4S());
+      break;
+    }
+      SIMD_BINOP_CASE(kArm64I16x8Add, Add, 8H);
+      SIMD_BINOP_CASE(kArm64I16x8AddSatS, Sqadd, 8H);
+      SIMD_BINOP_CASE(kArm64I16x8AddHoriz, Addp, 8H);
+      SIMD_BINOP_CASE(kArm64I16x8Sub, Sub, 8H);
+      SIMD_BINOP_CASE(kArm64I16x8SubSatS, Sqsub, 8H);
+      SIMD_BINOP_CASE(kArm64I16x8Mul, Mul, 8H);
+      SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I16x8Mla, Mla, 8H);
+      SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I16x8Mls, Mls, 8H);
+      SIMD_BINOP_CASE(kArm64I16x8MinS, Smin, 8H);
+      SIMD_BINOP_CASE(kArm64I16x8MaxS, Smax, 8H);
+      SIMD_BINOP_CASE(kArm64I16x8Eq, Cmeq, 8H);
+    case kArm64I16x8Ne: {
+      VRegister dst = i.OutputSimd128Register().V8H();
+      __ Cmeq(dst, i.InputSimd128Register(0).V8H(),
+              i.InputSimd128Register(1).V8H());
+      __ Mvn(dst, dst);
+      break;
+    }
+      SIMD_BINOP_CASE(kArm64I16x8GtS, Cmgt, 8H);
+      SIMD_BINOP_CASE(kArm64I16x8GeS, Cmge, 8H);
+    case kArm64I16x8ShrU: {
+      ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 4, V8H, Ushl, W);
+      break;
+    }
+    case kArm64I16x8UConvertI32x4: {
+      VRegister dst = i.OutputSimd128Register(),
+                src0 = i.InputSimd128Register(0),
+                src1 = i.InputSimd128Register(1);
+      UseScratchRegisterScope scope(tasm());
+      VRegister temp = scope.AcquireV(kFormat4S);
+      if (dst == src1) {
+        __ Mov(temp, src1.V4S());
+        src1 = temp;
+      }
+      __ Sqxtun(dst.V4H(), src0.V4S());
+      __ Sqxtun2(dst.V8H(), src1.V4S());
+      break;
+    }
+      SIMD_BINOP_CASE(kArm64I16x8AddSatU, Uqadd, 8H);
+      SIMD_BINOP_CASE(kArm64I16x8SubSatU, Uqsub, 8H);
+      SIMD_BINOP_CASE(kArm64I16x8MinU, Umin, 8H);
+      SIMD_BINOP_CASE(kArm64I16x8MaxU, Umax, 8H);
+      SIMD_BINOP_CASE(kArm64I16x8GtU, Cmhi, 8H);
+      SIMD_BINOP_CASE(kArm64I16x8GeU, Cmhs, 8H);
+      SIMD_BINOP_CASE(kArm64I16x8RoundingAverageU, Urhadd, 8H);
+      SIMD_BINOP_CASE(kArm64I16x8Q15MulRSatS, Sqrdmulh, 8H);
+      SIMD_UNOP_CASE(kArm64I16x8Abs, Abs, 8H);
+    case kArm64I16x8BitMask: {
+      Register dst = i.OutputRegister32();
+      VRegister src = i.InputSimd128Register(0);
+      VRegister tmp = i.TempSimd128Register(0);
+      VRegister mask = i.TempSimd128Register(1);
+
+      __ Sshr(tmp.V8H(), src.V8H(), 15);
+      // Set i-th bit of each lane i. When AND with tmp, the lanes that
+      // are signed will have i-th bit set, unsigned will be 0.
+      __ Movi(mask.V2D(), 0x0080'0040'0020'0010, 0x0008'0004'0002'0001);
+      __ And(tmp.V16B(), mask.V16B(), tmp.V16B());
+      __ Addv(tmp.H(), tmp.V8H());
+      __ Mov(dst.W(), tmp.V8H(), 0);
+      break;
+    }
+    case kArm64I8x16Splat: {
+      __ Dup(i.OutputSimd128Register().V16B(), i.InputRegister32(0));
+      break;
+    }
+    case kArm64I8x16ExtractLaneU: {
+      __ Umov(i.OutputRegister32(), i.InputSimd128Register(0).V16B(),
+              i.InputInt8(1));
+      break;
+    }
+    case kArm64I8x16ExtractLaneS: {
+      __ Smov(i.OutputRegister32(), i.InputSimd128Register(0).V16B(),
+              i.InputInt8(1));
+      break;
+    }
+    case kArm64I8x16ReplaceLane: {
+      VRegister dst = i.OutputSimd128Register().V16B(),
+                src1 = i.InputSimd128Register(0).V16B();
+      if (dst != src1) {
+        __ Mov(dst, src1);
+      }
+      __ Mov(dst, i.InputInt8(1), i.InputRegister32(2));
+      break;
+    }
+      SIMD_UNOP_CASE(kArm64I8x16Neg, Neg, 16B);
+    case kArm64I8x16Shl: {
+      ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 3, V16B, Sshl, W);
+      break;
+    }
+    case kArm64I8x16ShrS: {
+      ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 3, V16B, Sshl, W);
+      break;
+    }
+    case kArm64I8x16SConvertI16x8: {
+      VRegister dst = i.OutputSimd128Register(),
+                src0 = i.InputSimd128Register(0),
+                src1 = i.InputSimd128Register(1);
+      UseScratchRegisterScope scope(tasm());
+      VRegister temp = scope.AcquireV(kFormat8H);
+      if (dst == src1) {
+        __ Mov(temp, src1.V8H());
+        src1 = temp;
+      }
+      __ Sqxtn(dst.V8B(), src0.V8H());
+      __ Sqxtn2(dst.V16B(), src1.V8H());
+      break;
+    }
+      SIMD_BINOP_CASE(kArm64I8x16Add, Add, 16B);
+      SIMD_BINOP_CASE(kArm64I8x16AddSatS, Sqadd, 16B);
+      SIMD_BINOP_CASE(kArm64I8x16Sub, Sub, 16B);
+      SIMD_BINOP_CASE(kArm64I8x16SubSatS, Sqsub, 16B);
+      SIMD_BINOP_CASE(kArm64I8x16Mul, Mul, 16B);
+      SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I8x16Mla, Mla, 16B);
+      SIMD_DESTRUCTIVE_BINOP_CASE(kArm64I8x16Mls, Mls, 16B);
+      SIMD_BINOP_CASE(kArm64I8x16MinS, Smin, 16B);
+      SIMD_BINOP_CASE(kArm64I8x16MaxS, Smax, 16B);
+      SIMD_BINOP_CASE(kArm64I8x16Eq, Cmeq, 16B);
+    case kArm64I8x16Ne: {
+      VRegister dst = i.OutputSimd128Register().V16B();
+      __ Cmeq(dst, i.InputSimd128Register(0).V16B(),
+              i.InputSimd128Register(1).V16B());
+      __ Mvn(dst, dst);
+      break;
+    }
+      SIMD_BINOP_CASE(kArm64I8x16GtS, Cmgt, 16B);
+      SIMD_BINOP_CASE(kArm64I8x16GeS, Cmge, 16B);
+    case kArm64I8x16ShrU: {
+      ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 3, V16B, Ushl, W);
+      break;
+    }
+    case kArm64I8x16UConvertI16x8: {
+      VRegister dst = i.OutputSimd128Register(),
+                src0 = i.InputSimd128Register(0),
+                src1 = i.InputSimd128Register(1);
+      UseScratchRegisterScope scope(tasm());
+      VRegister temp = scope.AcquireV(kFormat8H);
+      if (dst == src1) {
+        __ Mov(temp, src1.V8H());
+        src1 = temp;
+      }
+      __ Sqxtun(dst.V8B(), src0.V8H());
+      __ Sqxtun2(dst.V16B(), src1.V8H());
+      break;
+    }
+      SIMD_BINOP_CASE(kArm64I8x16AddSatU, Uqadd, 16B);
+      SIMD_BINOP_CASE(kArm64I8x16SubSatU, Uqsub, 16B);
+      SIMD_BINOP_CASE(kArm64I8x16MinU, Umin, 16B);
+      SIMD_BINOP_CASE(kArm64I8x16MaxU, Umax, 16B);
+      SIMD_BINOP_CASE(kArm64I8x16GtU, Cmhi, 16B);
+      SIMD_BINOP_CASE(kArm64I8x16GeU, Cmhs, 16B);
+      SIMD_BINOP_CASE(kArm64I8x16RoundingAverageU, Urhadd, 16B);
+      SIMD_UNOP_CASE(kArm64I8x16Abs, Abs, 16B);
+    case kArm64I8x16BitMask: {
+      Register dst = i.OutputRegister32();
+      VRegister src = i.InputSimd128Register(0);
+      VRegister tmp = i.TempSimd128Register(0);
+      VRegister mask = i.TempSimd128Register(1);
+
+      // Set i-th bit of each lane i. When AND with tmp, the lanes that
+      // are signed will have i-th bit set, unsigned will be 0.
+      __ Sshr(tmp.V16B(), src.V16B(), 7);
+      __ Movi(mask.V2D(), 0x8040'2010'0804'0201);
+      __ And(tmp.V16B(), mask.V16B(), tmp.V16B());
+      __ Ext(mask.V16B(), tmp.V16B(), tmp.V16B(), 8);
+      __ Zip1(tmp.V16B(), tmp.V16B(), mask.V16B());
+      __ Addv(tmp.H(), tmp.V8H());
+      __ Mov(dst.W(), tmp.V8H(), 0);
+      break;
+    }
+    case kArm64S128Const: {
+      uint64_t imm1 = make_uint64(i.InputUint32(1), i.InputUint32(0));
+      uint64_t imm2 = make_uint64(i.InputUint32(3), i.InputUint32(2));
+      __ Movi(i.OutputSimd128Register().V16B(), imm2, imm1);
+      break;
+    }
+    case kArm64S128Zero: {
+      VRegister dst = i.OutputSimd128Register().V16B();
+      __ Eor(dst, dst, dst);
+      break;
+    }
+      SIMD_BINOP_CASE(kArm64S128And, And, 16B);
+      SIMD_BINOP_CASE(kArm64S128Or, Orr, 16B);
+      SIMD_BINOP_CASE(kArm64S128Xor, Eor, 16B);
+      SIMD_UNOP_CASE(kArm64S128Not, Mvn, 16B);
+    case kArm64S128Dup: {
+      VRegister dst = i.OutputSimd128Register(),
+                src = i.InputSimd128Register(0);
+      int lanes = i.InputInt32(1);
+      int index = i.InputInt32(2);
+      switch (lanes) {
+        case 4:
+          __ Dup(dst.V4S(), src.V4S(), index);
+          break;
+        case 8:
+          __ Dup(dst.V8H(), src.V8H(), index);
+          break;
+        case 16:
+          __ Dup(dst.V16B(), src.V16B(), index);
+          break;
+        default:
+          UNREACHABLE();
+          break;
+      }
+      break;
+    }
+      SIMD_DESTRUCTIVE_BINOP_CASE(kArm64S128Select, Bsl, 16B);
+      SIMD_BINOP_CASE(kArm64S128AndNot, Bic, 16B);
+    case kArm64S32x4Shuffle: {
+      Simd128Register dst = i.OutputSimd128Register().V4S(),
+                      src0 = i.InputSimd128Register(0).V4S(),
+                      src1 = i.InputSimd128Register(1).V4S();
+      // Check for in-place shuffles.
+      // If dst == src0 == src1, then the shuffle is unary and we only use src0.
+      UseScratchRegisterScope scope(tasm());
+      VRegister temp = scope.AcquireV(kFormat4S);
+      if (dst == src0) {
+        __ Mov(temp, src0);
+        src0 = temp;
+      } else if (dst == src1) {
+        __ Mov(temp, src1);
+        src1 = temp;
+      }
+      // Perform shuffle as a vmov per lane.
+      int32_t shuffle = i.InputInt32(2);
+      for (int i = 0; i < 4; i++) {
+        VRegister src = src0;
+        int lane = shuffle & 0x7;
+        if (lane >= 4) {
+          src = src1;
+          lane &= 0x3;
+        }
+        __ Mov(dst, i, src, lane);
+        shuffle >>= 8;
+      }
+      break;
+    }
+      SIMD_BINOP_CASE(kArm64S32x4ZipLeft, Zip1, 4S);
+      SIMD_BINOP_CASE(kArm64S32x4ZipRight, Zip2, 4S);
+      SIMD_BINOP_CASE(kArm64S32x4UnzipLeft, Uzp1, 4S);
+      SIMD_BINOP_CASE(kArm64S32x4UnzipRight, Uzp2, 4S);
+      SIMD_BINOP_CASE(kArm64S32x4TransposeLeft, Trn1, 4S);
+      SIMD_BINOP_CASE(kArm64S32x4TransposeRight, Trn2, 4S);
+      SIMD_BINOP_CASE(kArm64S16x8ZipLeft, Zip1, 8H);
+      SIMD_BINOP_CASE(kArm64S16x8ZipRight, Zip2, 8H);
+      SIMD_BINOP_CASE(kArm64S16x8UnzipLeft, Uzp1, 8H);
+      SIMD_BINOP_CASE(kArm64S16x8UnzipRight, Uzp2, 8H);
+      SIMD_BINOP_CASE(kArm64S16x8TransposeLeft, Trn1, 8H);
+      SIMD_BINOP_CASE(kArm64S16x8TransposeRight, Trn2, 8H);
+      SIMD_BINOP_CASE(kArm64S8x16ZipLeft, Zip1, 16B);
+      SIMD_BINOP_CASE(kArm64S8x16ZipRight, Zip2, 16B);
+      SIMD_BINOP_CASE(kArm64S8x16UnzipLeft, Uzp1, 16B);
+      SIMD_BINOP_CASE(kArm64S8x16UnzipRight, Uzp2, 16B);
+      SIMD_BINOP_CASE(kArm64S8x16TransposeLeft, Trn1, 16B);
+      SIMD_BINOP_CASE(kArm64S8x16TransposeRight, Trn2, 16B);
+    case kArm64S8x16Concat: {
+      __ Ext(i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(),
+             i.InputSimd128Register(1).V16B(), i.InputInt4(2));
+      break;
+    }
+    case kArm64I8x16Swizzle: {
+      __ Tbl(i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(),
+             i.InputSimd128Register(1).V16B());
+      break;
+    }
+    case kArm64I8x16Shuffle: {
+      Simd128Register dst = i.OutputSimd128Register().V16B(),
+                      src0 = i.InputSimd128Register(0).V16B(),
+                      src1 = i.InputSimd128Register(1).V16B();
+      // Unary shuffle table is in src0, binary shuffle table is in src0, src1,
+      // which must be consecutive.
+      if (src0 != src1) {
+        DCHECK(AreConsecutive(src0, src1));
+      }
+
+      int64_t imm1 = make_uint64(i.InputInt32(3), i.InputInt32(2));
+      int64_t imm2 = make_uint64(i.InputInt32(5), i.InputInt32(4));
+      DCHECK_EQ(0, (imm1 | imm2) & (src0 == src1 ? 0xF0F0F0F0F0F0F0F0
+                                                 : 0xE0E0E0E0E0E0E0E0));
+
+      UseScratchRegisterScope scope(tasm());
+      VRegister temp = scope.AcquireV(kFormat16B);
+      __ Movi(temp, imm2, imm1);
+
+      if (src0 == src1) {
+        __ Tbl(dst, src0, temp.V16B());
+      } else {
+        __ Tbl(dst, src0, src1, temp.V16B());
+      }
+      break;
+    }
+      SIMD_UNOP_CASE(kArm64S32x2Reverse, Rev64, 4S);
+      SIMD_UNOP_CASE(kArm64S16x4Reverse, Rev64, 8H);
+      SIMD_UNOP_CASE(kArm64S16x2Reverse, Rev32, 8H);
+      SIMD_UNOP_CASE(kArm64S8x8Reverse, Rev64, 16B);
+      SIMD_UNOP_CASE(kArm64S8x4Reverse, Rev32, 16B);
+      SIMD_UNOP_CASE(kArm64S8x2Reverse, Rev16, 16B);
+    case kArm64LoadSplat: {
+      VectorFormat f = VectorFormatFillQ(MiscField::decode(opcode));
+      __ ld1r(i.OutputSimd128Register().Format(f), i.MemoryOperand(0));
+      break;
+    }
+    case kArm64S128Load8x8S: {
+      __ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
+      __ Sxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
+      break;
+    }
+    case kArm64S128Load8x8U: {
+      __ Ldr(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
+      __ Uxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
+      break;
+    }
+    case kArm64S128Load16x4S: {
+      __ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
+      __ Sxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
+      break;
+    }
+    case kArm64S128Load16x4U: {
+      __ Ldr(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
+      __ Uxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
+      break;
+    }
+    case kArm64S128Load32x2S: {
+      __ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
+      __ Sxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
+      break;
+    }
+    case kArm64S128Load32x2U: {
+      __ Ldr(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
+      __ Uxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
+      break;
+    }
+    case kArm64S128Load32Zero: {
+      __ Ldr(i.OutputSimd128Register().S(), i.MemoryOperand(0));
+      break;
+    }
+    case kArm64S128Load64Zero: {
+      __ Ldr(i.OutputSimd128Register().D(), i.MemoryOperand(0));
+      break;
+    }
+#define SIMD_REDUCE_OP_CASE(Op, Instr, format, FORMAT)     \
+  case Op: {                                               \
+    UseScratchRegisterScope scope(tasm());                 \
+    VRegister temp = scope.AcquireV(format);               \
+    __ Instr(temp, i.InputSimd128Register(0).V##FORMAT()); \
+    __ Umov(i.OutputRegister32(), temp, 0);                \
+    __ Cmp(i.OutputRegister32(), 0);                       \
+    __ Cset(i.OutputRegister32(), ne);                     \
+    break;                                                 \
+  }
+      // For AnyTrue, the format does not matter.
+      SIMD_REDUCE_OP_CASE(kArm64V128AnyTrue, Umaxv, kFormatS, 4S);
+      SIMD_REDUCE_OP_CASE(kArm64V32x4AllTrue, Uminv, kFormatS, 4S);
+      SIMD_REDUCE_OP_CASE(kArm64V16x8AllTrue, Uminv, kFormatH, 8H);
+      SIMD_REDUCE_OP_CASE(kArm64V8x16AllTrue, Uminv, kFormatB, 16B);
+  }
+  return kSuccess;
+}  // NOLINT(readability/fn_size)
+
+#undef SIMD_UNOP_CASE
+#undef SIMD_BINOP_CASE
+#undef SIMD_DESTRUCTIVE_BINOP_CASE
+#undef SIMD_REDUCE_OP_CASE
+#undef ASSEMBLE_SIMD_SHIFT_LEFT
+#undef ASSEMBLE_SIMD_SHIFT_RIGHT
+
+// Assemble branches after this instruction.
+void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
+  Arm64OperandConverter i(this, instr);
+  Label* tlabel = branch->true_label;
+  Label* flabel = branch->false_label;
+  FlagsCondition condition = branch->condition;
+  ArchOpcode opcode = instr->arch_opcode();
+
+  if (opcode == kArm64CompareAndBranch32) {
+    DCHECK(FlagsModeField::decode(instr->opcode()) != kFlags_branch_and_poison);
+    switch (condition) {
+      case kEqual:
+        __ Cbz(i.InputRegister32(0), tlabel);
+        break;
+      case kNotEqual:
+        __ Cbnz(i.InputRegister32(0), tlabel);
+        break;
+      default:
+        UNREACHABLE();
+    }
+  } else if (opcode == kArm64CompareAndBranch) {
+    DCHECK(FlagsModeField::decode(instr->opcode()) != kFlags_branch_and_poison);
+    switch (condition) {
+      case kEqual:
+        __ Cbz(i.InputRegister64(0), tlabel);
+        break;
+      case kNotEqual:
+        __ Cbnz(i.InputRegister64(0), tlabel);
+        break;
+      default:
+        UNREACHABLE();
+    }
+  } else if (opcode == kArm64TestAndBranch32) {
+    DCHECK(FlagsModeField::decode(instr->opcode()) != kFlags_branch_and_poison);
+    switch (condition) {
+      case kEqual:
+        __ Tbz(i.InputRegister32(0), i.InputInt5(1), tlabel);
+        break;
+      case kNotEqual:
+        __ Tbnz(i.InputRegister32(0), i.InputInt5(1), tlabel);
+        break;
+      default:
+        UNREACHABLE();
+    }
+  } else if (opcode == kArm64TestAndBranch) {
+    DCHECK(FlagsModeField::decode(instr->opcode()) != kFlags_branch_and_poison);
+    switch (condition) {
+      case kEqual:
+        __ Tbz(i.InputRegister64(0), i.InputInt6(1), tlabel);
+        break;
+      case kNotEqual:
+        __ Tbnz(i.InputRegister64(0), i.InputInt6(1), tlabel);
+        break;
+      default:
+        UNREACHABLE();
+    }
+  } else {
+    Condition cc = FlagsConditionToCondition(condition);
+    __ B(cc, tlabel);
+  }
+  if (!branch->fallthru) __ B(flabel);  // no fallthru to flabel.
+}
+
+void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
+                                            Instruction* instr) {
+  // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
+  if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
+    return;
+  }
+
+  condition = NegateFlagsCondition(condition);
+  __ CmovX(kSpeculationPoisonRegister, xzr,
+           FlagsConditionToCondition(condition));
+  __ Csdb();
+}
+
+void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
+                                            BranchInfo* branch) {
+  AssembleArchBranch(instr, branch);
+}
+
+void CodeGenerator::AssembleArchJump(RpoNumber target) {
+  if (!IsNextInAssemblyOrder(target)) __ B(GetLabel(target));
+}
+
+void CodeGenerator::AssembleArchTrap(Instruction* instr,
+                                     FlagsCondition condition) {
+  class OutOfLineTrap final : public OutOfLineCode {
+   public:
+    OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
+        : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
+    void Generate() final {
+      Arm64OperandConverter i(gen_, instr_);
+      TrapId trap_id =
+          static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
+      GenerateCallToTrap(trap_id);
+    }
+
+   private:
+    void GenerateCallToTrap(TrapId trap_id) {
+      if (trap_id == TrapId::kInvalid) {
+        // We cannot test calls to the runtime in cctest/test-run-wasm.
+        // Therefore we emit a call to C here instead of a call to the runtime.
+        __ CallCFunction(
+            ExternalReference::wasm_call_trap_callback_for_testing(), 0);
+        __ LeaveFrame(StackFrame::WASM);
+        auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
+        int pop_count =
+            static_cast<int>(call_descriptor->StackParameterCount());
+        pop_count += (pop_count & 1);  // align
+        __ Drop(pop_count);
+        __ Ret();
+      } else {
+        gen_->AssembleSourcePosition(instr_);
+        // A direct call to a wasm runtime stub defined in this module.
+        // Just encode the stub index. This will be patched when the code
+        // is added to the native module and copied into wasm code space.
+        __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
+        ReferenceMap* reference_map =
+            gen_->zone()->New<ReferenceMap>(gen_->zone());
+        gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
+        if (FLAG_debug_code) {
+          // The trap code should never return.
+          __ Brk(0);
+        }
+      }
+    }
+    Instruction* instr_;
+    CodeGenerator* gen_;
+  };
+  auto ool = zone()->New<OutOfLineTrap>(this, instr);
+  Label* tlabel = ool->entry();
+  Condition cc = FlagsConditionToCondition(condition);
+  __ B(cc, tlabel);
+}
+
+// Assemble boolean materializations after this instruction.
+void CodeGenerator::AssembleArchBoolean(Instruction* instr,
+                                        FlagsCondition condition) {
+  Arm64OperandConverter i(this, instr);
+
+  // Materialize a full 64-bit 1 or 0 value. The result register is always the
+  // last output of the instruction.
+  DCHECK_NE(0u, instr->OutputCount());
+  Register reg = i.OutputRegister(instr->OutputCount() - 1);
+  Condition cc = FlagsConditionToCondition(condition);
+  __ Cset(reg, cc);
+}
+
+void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
+  Arm64OperandConverter i(this, instr);
+  Register input = i.InputRegister32(0);
+  std::vector<std::pair<int32_t, Label*>> cases;
+  for (size_t index = 2; index < instr->InputCount(); index += 2) {
+    cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
+  }
+  AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
+                                      cases.data() + cases.size());
+}
+
+void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
+  Arm64OperandConverter i(this, instr);
+  UseScratchRegisterScope scope(tasm());
+  Register input = i.InputRegister32(0);
+  Register temp = scope.AcquireX();
+  size_t const case_count = instr->InputCount() - 2;
+  Label table;
+  __ Cmp(input, case_count);
+  __ B(hs, GetLabel(i.InputRpo(1)));
+  __ Adr(temp, &table);
+  int entry_size_log2 = 2;
+#ifdef V8_ENABLE_CONTROL_FLOW_INTEGRITY
+  ++entry_size_log2;  // Account for BTI.
+#endif
+  __ Add(temp, temp, Operand(input, UXTW, entry_size_log2));
+  __ Br(temp);
+  {
+    TurboAssembler::BlockPoolsScope block_pools(tasm(),
+                                                case_count * kInstrSize);
+    __ Bind(&table);
+    for (size_t index = 0; index < case_count; ++index) {
+      __ JumpTarget();
+      __ B(GetLabel(i.InputRpo(index + 2)));
+    }
+    __ JumpTarget();
+  }
+}
+
+void CodeGenerator::FinishFrame(Frame* frame) {
+  frame->AlignFrame(16);
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+
+  // Save FP registers.
+  CPURegList saves_fp = CPURegList(CPURegister::kVRegister, kDRegSizeInBits,
+                                   call_descriptor->CalleeSavedFPRegisters());
+  int saved_count = saves_fp.Count();
+  if (saved_count != 0) {
+    DCHECK(saves_fp.list() == CPURegList::GetCalleeSavedV().list());
+    DCHECK_EQ(saved_count % 2, 0);
+    frame->AllocateSavedCalleeRegisterSlots(saved_count *
+                                            (kDoubleSize / kSystemPointerSize));
+  }
+
+  CPURegList saves = CPURegList(CPURegister::kRegister, kXRegSizeInBits,
+                                call_descriptor->CalleeSavedRegisters());
+  saved_count = saves.Count();
+  if (saved_count != 0) {
+    DCHECK_EQ(saved_count % 2, 0);
+    frame->AllocateSavedCalleeRegisterSlots(saved_count);
+  }
+}
+
+void CodeGenerator::AssembleConstructFrame() {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+  __ AssertSpAligned();
+
+  // The frame has been previously padded in CodeGenerator::FinishFrame().
+  DCHECK_EQ(frame()->GetTotalFrameSlotCount() % 2, 0);
+  int required_slots =
+      frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
+
+  CPURegList saves = CPURegList(CPURegister::kRegister, kXRegSizeInBits,
+                                call_descriptor->CalleeSavedRegisters());
+  DCHECK_EQ(saves.Count() % 2, 0);
+  CPURegList saves_fp = CPURegList(CPURegister::kVRegister, kDRegSizeInBits,
+                                   call_descriptor->CalleeSavedFPRegisters());
+  DCHECK_EQ(saves_fp.Count() % 2, 0);
+  // The number of slots for returns has to be even to ensure the correct stack
+  // alignment.
+  const int returns = RoundUp(frame()->GetReturnSlotCount(), 2);
+
+  if (frame_access_state()->has_frame()) {
+    // Link the frame
+    if (call_descriptor->IsJSFunctionCall()) {
+      STATIC_ASSERT(InterpreterFrameConstants::kFixedFrameSize % 16 == 8);
+      DCHECK_EQ(required_slots % 2, 1);
+      __ Prologue();
+      // Update required_slots count since we have just claimed one extra slot.
+      STATIC_ASSERT(TurboAssembler::kExtraSlotClaimedByPrologue == 1);
+      required_slots -= TurboAssembler::kExtraSlotClaimedByPrologue;
+    } else {
+      __ Push<TurboAssembler::kSignLR>(lr, fp);
+      __ Mov(fp, sp);
+    }
+    unwinding_info_writer_.MarkFrameConstructed(__ pc_offset());
+
+    // Create OSR entry if applicable
+    if (info()->is_osr()) {
+      // TurboFan OSR-compiled functions cannot be entered directly.
+      __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
+
+      // Unoptimized code jumps directly to this entrypoint while the
+      // unoptimized frame is still on the stack. Optimized code uses OSR values
+      // directly from the unoptimized frame. Thus, all that needs to be done is
+      // to allocate the remaining stack slots.
+      if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
+      osr_pc_offset_ = __ pc_offset();
+      size_t unoptimized_frame_slots = osr_helper()->UnoptimizedFrameSlots();
+      DCHECK(call_descriptor->IsJSFunctionCall());
+      DCHECK_EQ(unoptimized_frame_slots % 2, 1);
+      // One unoptimized frame slot has already been claimed when the actual
+      // arguments count was pushed.
+      required_slots -=
+          unoptimized_frame_slots - TurboAssembler::kExtraSlotClaimedByPrologue;
+      ResetSpeculationPoison();
+    }
+
+    if (info()->IsWasm() && required_slots > 128) {
+      // For WebAssembly functions with big frames we have to do the stack
+      // overflow check before we construct the frame. Otherwise we may not
+      // have enough space on the stack to call the runtime for the stack
+      // overflow.
+      Label done;
+      // If the frame is bigger than the stack, we throw the stack overflow
+      // exception unconditionally. Thereby we can avoid the integer overflow
+      // check in the condition code.
+      if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
+        UseScratchRegisterScope scope(tasm());
+        Register scratch = scope.AcquireX();
+        __ Ldr(scratch, FieldMemOperand(
+                            kWasmInstanceRegister,
+                            WasmInstanceObject::kRealStackLimitAddressOffset));
+        __ Ldr(scratch, MemOperand(scratch));
+        __ Add(scratch, scratch, required_slots * kSystemPointerSize);
+        __ Cmp(sp, scratch);
+        __ B(hs, &done);
+      }
+
+      {
+        // Finish the frame that hasn't been fully built yet.
+        UseScratchRegisterScope temps(tasm());
+        Register scratch = temps.AcquireX();
+        __ Mov(scratch,
+               StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
+        __ Push(scratch, kWasmInstanceRegister);
+      }
+
+      __ Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
+      // We come from WebAssembly, there are no references for the GC.
+      ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
+      RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
+      if (FLAG_debug_code) {
+        __ Brk(0);
+      }
+      __ Bind(&done);
+    }
+
+    // Skip callee-saved slots, which are pushed below.
+    required_slots -= saves.Count();
+    required_slots -= saves_fp.Count();
+    required_slots -= returns;
+
+    // Build remainder of frame, including accounting for and filling-in
+    // frame-specific header information, i.e. claiming the extra slot that
+    // other platforms explicitly push for STUB (code object) frames and frames
+    // recording their argument count.
+    switch (call_descriptor->kind()) {
+      case CallDescriptor::kCallJSFunction:
+        __ Claim(required_slots);
+        break;
+      case CallDescriptor::kCallCodeObject: {
+        UseScratchRegisterScope temps(tasm());
+        Register scratch = temps.AcquireX();
+        __ Mov(scratch,
+               StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
+        __ Push(scratch, padreg);
+        // One of the extra slots has just been claimed when pushing the frame
+        // type marker above. We also know that we have at least one slot to
+        // claim here, as the typed frame has an odd number of fixed slots, and
+        // all other parts of the total frame slots are even, leaving
+        // {required_slots} to be odd.
+        DCHECK_GE(required_slots, 1);
+        __ Claim(required_slots - 1);
+      } break;
+      case CallDescriptor::kCallWasmFunction: {
+        UseScratchRegisterScope temps(tasm());
+        Register scratch = temps.AcquireX();
+        __ Mov(scratch,
+               StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
+        __ Push(scratch, kWasmInstanceRegister);
+        __ Claim(required_slots);
+      } break;
+      case CallDescriptor::kCallWasmImportWrapper:
+      case CallDescriptor::kCallWasmCapiFunction: {
+        UseScratchRegisterScope temps(tasm());
+        __ LoadTaggedPointerField(
+            kJSFunctionRegister,
+            FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
+        __ LoadTaggedPointerField(
+            kWasmInstanceRegister,
+            FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
+        Register scratch = temps.AcquireX();
+        __ Mov(scratch,
+               StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
+        __ Push(scratch, kWasmInstanceRegister);
+        int extra_slots =
+            call_descriptor->kind() == CallDescriptor::kCallWasmImportWrapper
+                ? 0   // Import wrapper: none.
+                : 1;  // C-API function: PC.
+        __ Claim(required_slots + extra_slots);
+      } break;
+      case CallDescriptor::kCallAddress:
+        if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
+          UseScratchRegisterScope temps(tasm());
+          Register scratch = temps.AcquireX();
+          __ Mov(scratch, StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY));
+          __ Push(scratch, padreg);
+          // The additional slot will be used for the saved c_entry_fp.
+        }
+        __ Claim(required_slots);
+        break;
+      default:
+        UNREACHABLE();
+    }
+  }
+
+  // Save FP registers.
+  DCHECK_IMPLIES(saves_fp.Count() != 0,
+                 saves_fp.list() == CPURegList::GetCalleeSavedV().list());
+  __ PushCPURegList(saves_fp);
+
+  // Save registers.
+  DCHECK_IMPLIES(!saves.IsEmpty(),
+                 saves.list() == CPURegList::GetCalleeSaved().list());
+  __ PushCPURegList<TurboAssembler::kSignLR>(saves);
+
+  if (returns != 0) {
+    __ Claim(returns);
+  }
+}
+
+void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+
+  const int returns = RoundUp(frame()->GetReturnSlotCount(), 2);
+  if (returns != 0) {
+    __ Drop(returns);
+  }
+
+  // Restore registers.
+  CPURegList saves = CPURegList(CPURegister::kRegister, kXRegSizeInBits,
+                                call_descriptor->CalleeSavedRegisters());
+  __ PopCPURegList<TurboAssembler::kAuthLR>(saves);
+
+  // Restore fp registers.
+  CPURegList saves_fp = CPURegList(CPURegister::kVRegister, kDRegSizeInBits,
+                                   call_descriptor->CalleeSavedFPRegisters());
+  __ PopCPURegList(saves_fp);
+
+  unwinding_info_writer_.MarkBlockWillExit();
+
+  // We might need x3 for scratch.
+  DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & x3.bit());
+  const int parameter_count =
+      static_cast<int>(call_descriptor->StackParameterCount());
+  Arm64OperandConverter g(this, nullptr);
+
+  // {aditional_pop_count} is only greater than zero if {parameter_count = 0}.
+  // Check RawMachineAssembler::PopAndReturn.
+  if (parameter_count != 0) {
+    if (additional_pop_count->IsImmediate()) {
+      DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
+    } else if (__ emit_debug_code()) {
+      __ cmp(g.ToRegister(additional_pop_count), Operand(0));
+      __ Assert(eq, AbortReason::kUnexpectedAdditionalPopValue);
+    }
+  }
+
+  Register argc_reg = x3;
+#ifdef V8_NO_ARGUMENTS_ADAPTOR
+  // Functions with JS linkage have at least one parameter (the receiver).
+  // If {parameter_count} == 0, it means it is a builtin with
+  // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
+  // itself.
+  const bool drop_jsargs = frame_access_state()->has_frame() &&
+                           call_descriptor->IsJSFunctionCall() &&
+                           parameter_count != 0;
+#else
+  const bool drop_jsargs = false;
+#endif
+  if (call_descriptor->IsCFunctionCall()) {
+    AssembleDeconstructFrame();
+  } else if (frame_access_state()->has_frame()) {
+    // Canonicalize JSFunction return sites for now unless they have an variable
+    // number of stack slot pops.
+    if (additional_pop_count->IsImmediate() &&
+        g.ToConstant(additional_pop_count).ToInt32() == 0) {
+      if (return_label_.is_bound()) {
+        __ B(&return_label_);
+        return;
+      } else {
+        __ Bind(&return_label_);
+      }
+    }
+    if (drop_jsargs) {
+      // Get the actual argument count.
+      __ Ldr(argc_reg, MemOperand(fp, StandardFrameConstants::kArgCOffset));
+    }
+    AssembleDeconstructFrame();
+  }
+
+  if (drop_jsargs) {
+    // We must pop all arguments from the stack (including the receiver). This
+    // number of arguments is given by max(1 + argc_reg, parameter_count).
+    Label argc_reg_has_final_count;
+    __ Add(argc_reg, argc_reg, 1);  // Consider the receiver.
+    if (parameter_count > 1) {
+      __ Cmp(argc_reg, Operand(parameter_count));
+      __ B(&argc_reg_has_final_count, ge);
+      __ Mov(argc_reg, Operand(parameter_count));
+      __ Bind(&argc_reg_has_final_count);
+    }
+    __ DropArguments(argc_reg);
+  } else if (additional_pop_count->IsImmediate()) {
+    int additional_count = g.ToConstant(additional_pop_count).ToInt32();
+    __ DropArguments(parameter_count + additional_count);
+  } else if (parameter_count == 0) {
+    __ DropArguments(g.ToRegister(additional_pop_count));
+  } else {
+    // {additional_pop_count} is guaranteed to be zero if {parameter_count !=
+    // 0}. Check RawMachineAssembler::PopAndReturn.
+    __ DropArguments(parameter_count);
+  }
+  __ AssertSpAligned();
+  __ Ret();
+}
+
+void CodeGenerator::FinishCode() { __ ForceConstantPoolEmissionWithoutJump(); }
+
+void CodeGenerator::PrepareForDeoptimizationExits(
+    ZoneDeque<DeoptimizationExit*>* exits) {
+  __ ForceConstantPoolEmissionWithoutJump();
+  // We are conservative here, assuming all deopts are lazy deopts.
+  DCHECK_GE(Deoptimizer::kLazyDeoptExitSize,
+            Deoptimizer::kNonLazyDeoptExitSize);
+  __ CheckVeneerPool(
+      false, false,
+      static_cast<int>(exits->size()) * Deoptimizer::kLazyDeoptExitSize);
+
+  // Check which deopt kinds exist in this Code object, to avoid emitting jumps
+  // to unused entries.
+  bool saw_deopt_kind[kDeoptimizeKindCount] = {false};
+  for (auto exit : *exits) {
+    saw_deopt_kind[static_cast<int>(exit->kind())] = true;
+  }
+
+  // Emit the jumps to deoptimization entries.
+  UseScratchRegisterScope scope(tasm());
+  Register scratch = scope.AcquireX();
+  STATIC_ASSERT(static_cast<int>(kFirstDeoptimizeKind) == 0);
+  for (int i = 0; i < kDeoptimizeKindCount; i++) {
+    if (!saw_deopt_kind[i]) continue;
+    __ bind(&jump_deoptimization_entry_labels_[i]);
+    __ LoadEntryFromBuiltinIndex(Deoptimizer::GetDeoptimizationEntry(
+                                     isolate(), static_cast<DeoptimizeKind>(i)),
+                                 scratch);
+    __ Jump(scratch);
+  }
+}
+
+void CodeGenerator::AssembleMove(InstructionOperand* source,
+                                 InstructionOperand* destination) {
+  Arm64OperandConverter g(this, nullptr);
+  // Helper function to write the given constant to the dst register.
+  auto MoveConstantToRegister = [&](Register dst, Constant src) {
+    if (src.type() == Constant::kHeapObject) {
+      Handle<HeapObject> src_object = src.ToHeapObject();
+      RootIndex index;
+      if (IsMaterializableFromRoot(src_object, &index)) {
+        __ LoadRoot(dst, index);
+      } else {
+        __ Mov(dst, src_object);
+      }
+    } else if (src.type() == Constant::kCompressedHeapObject) {
+      Handle<HeapObject> src_object = src.ToHeapObject();
+      RootIndex index;
+      if (IsMaterializableFromRoot(src_object, &index)) {
+        __ LoadRoot(dst, index);
+      } else {
+        // TODO(v8:8977): Even though this mov happens on 32 bits (Note the
+        // .W()) and we are passing along the RelocInfo, we still haven't made
+        // the address embedded in the code-stream actually be compressed.
+        __ Mov(dst.W(),
+               Immediate(src_object, RelocInfo::COMPRESSED_EMBEDDED_OBJECT));
+      }
+    } else {
+      __ Mov(dst, g.ToImmediate(source));
+    }
+  };
+  switch (MoveType::InferMove(source, destination)) {
+    case MoveType::kRegisterToRegister:
+      if (source->IsRegister()) {
+        __ Mov(g.ToRegister(destination), g.ToRegister(source));
+      } else if (source->IsFloatRegister() || source->IsDoubleRegister()) {
+        __ Mov(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
+      } else {
+        DCHECK(source->IsSimd128Register());
+        __ Mov(g.ToDoubleRegister(destination).Q(),
+               g.ToDoubleRegister(source).Q());
+      }
+      return;
+    case MoveType::kRegisterToStack: {
+      MemOperand dst = g.ToMemOperand(destination, tasm());
+      if (source->IsRegister()) {
+        __ Str(g.ToRegister(source), dst);
+      } else {
+        VRegister src = g.ToDoubleRegister(source);
+        if (source->IsFloatRegister() || source->IsDoubleRegister()) {
+          __ Str(src, dst);
+        } else {
+          DCHECK(source->IsSimd128Register());
+          __ Str(src.Q(), dst);
+        }
+      }
+      return;
+    }
+    case MoveType::kStackToRegister: {
+      MemOperand src = g.ToMemOperand(source, tasm());
+      if (destination->IsRegister()) {
+        __ Ldr(g.ToRegister(destination), src);
+      } else {
+        VRegister dst = g.ToDoubleRegister(destination);
+        if (destination->IsFloatRegister() || destination->IsDoubleRegister()) {
+          __ Ldr(dst, src);
+        } else {
+          DCHECK(destination->IsSimd128Register());
+          __ Ldr(dst.Q(), src);
+        }
+      }
+      return;
+    }
+    case MoveType::kStackToStack: {
+      MemOperand src = g.ToMemOperand(source, tasm());
+      MemOperand dst = g.ToMemOperand(destination, tasm());
+      if (source->IsSimd128StackSlot()) {
+        UseScratchRegisterScope scope(tasm());
+        VRegister temp = scope.AcquireQ();
+        __ Ldr(temp, src);
+        __ Str(temp, dst);
+      } else {
+        UseScratchRegisterScope scope(tasm());
+        Register temp = scope.AcquireX();
+        __ Ldr(temp, src);
+        __ Str(temp, dst);
+      }
+      return;
+    }
+    case MoveType::kConstantToRegister: {
+      Constant src = g.ToConstant(source);
+      if (destination->IsRegister()) {
+        MoveConstantToRegister(g.ToRegister(destination), src);
+      } else {
+        VRegister dst = g.ToDoubleRegister(destination);
+        if (destination->IsFloatRegister()) {
+          __ Fmov(dst.S(), src.ToFloat32());
+        } else {
+          DCHECK(destination->IsDoubleRegister());
+          __ Fmov(dst, src.ToFloat64().value());
+        }
+      }
+      return;
+    }
+    case MoveType::kConstantToStack: {
+      Constant src = g.ToConstant(source);
+      MemOperand dst = g.ToMemOperand(destination, tasm());
+      if (destination->IsStackSlot()) {
+        UseScratchRegisterScope scope(tasm());
+        Register temp = scope.AcquireX();
+        MoveConstantToRegister(temp, src);
+        __ Str(temp, dst);
+      } else if (destination->IsFloatStackSlot()) {
+        if (bit_cast<int32_t>(src.ToFloat32()) == 0) {
+          __ Str(wzr, dst);
+        } else {
+          UseScratchRegisterScope scope(tasm());
+          VRegister temp = scope.AcquireS();
+          __ Fmov(temp, src.ToFloat32());
+          __ Str(temp, dst);
+        }
+      } else {
+        DCHECK(destination->IsDoubleStackSlot());
+        if (src.ToFloat64().AsUint64() == 0) {
+          __ Str(xzr, dst);
+        } else {
+          UseScratchRegisterScope scope(tasm());
+          VRegister temp = scope.AcquireD();
+          __ Fmov(temp, src.ToFloat64().value());
+          __ Str(temp, dst);
+        }
+      }
+      return;
+    }
+  }
+  UNREACHABLE();
+}
+
+void CodeGenerator::AssembleSwap(InstructionOperand* source,
+                                 InstructionOperand* destination) {
+  Arm64OperandConverter g(this, nullptr);
+  switch (MoveType::InferSwap(source, destination)) {
+    case MoveType::kRegisterToRegister:
+      if (source->IsRegister()) {
+        __ Swap(g.ToRegister(source), g.ToRegister(destination));
+      } else {
+        VRegister src = g.ToDoubleRegister(source);
+        VRegister dst = g.ToDoubleRegister(destination);
+        if (source->IsFloatRegister() || source->IsDoubleRegister()) {
+          __ Swap(src, dst);
+        } else {
+          DCHECK(source->IsSimd128Register());
+          __ Swap(src.Q(), dst.Q());
+        }
+      }
+      return;
+    case MoveType::kRegisterToStack: {
+      UseScratchRegisterScope scope(tasm());
+      MemOperand dst = g.ToMemOperand(destination, tasm());
+      if (source->IsRegister()) {
+        Register temp = scope.AcquireX();
+        Register src = g.ToRegister(source);
+        __ Mov(temp, src);
+        __ Ldr(src, dst);
+        __ Str(temp, dst);
+      } else {
+        UseScratchRegisterScope scope(tasm());
+        VRegister src = g.ToDoubleRegister(source);
+        if (source->IsFloatRegister() || source->IsDoubleRegister()) {
+          VRegister temp = scope.AcquireD();
+          __ Mov(temp, src);
+          __ Ldr(src, dst);
+          __ Str(temp, dst);
+        } else {
+          DCHECK(source->IsSimd128Register());
+          VRegister temp = scope.AcquireQ();
+          __ Mov(temp, src.Q());
+          __ Ldr(src.Q(), dst);
+          __ Str(temp, dst);
+        }
+      }
+      return;
+    }
+    case MoveType::kStackToStack: {
+      UseScratchRegisterScope scope(tasm());
+      MemOperand src = g.ToMemOperand(source, tasm());
+      MemOperand dst = g.ToMemOperand(destination, tasm());
+      VRegister temp_0 = scope.AcquireD();
+      VRegister temp_1 = scope.AcquireD();
+      if (source->IsSimd128StackSlot()) {
+        __ Ldr(temp_0.Q(), src);
+        __ Ldr(temp_1.Q(), dst);
+        __ Str(temp_0.Q(), dst);
+        __ Str(temp_1.Q(), src);
+      } else {
+        __ Ldr(temp_0, src);
+        __ Ldr(temp_1, dst);
+        __ Str(temp_0, dst);
+        __ Str(temp_1, src);
+      }
+      return;
+    }
+    default:
+      UNREACHABLE();
+  }
+}
+
+void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
+  // On 64-bit ARM we emit the jump tables inline.
+  UNREACHABLE();
+}
+
+#undef __
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/arm64/instruction-codes-arm64.h b/src/compiler/backend/arm64/instruction-codes-arm64.h
new file mode 100644
index 0000000..c80538f
--- /dev/null
+++ b/src/compiler/backend/arm64/instruction-codes-arm64.h
@@ -0,0 +1,448 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_ARM64_INSTRUCTION_CODES_ARM64_H_
+#define V8_COMPILER_BACKEND_ARM64_INSTRUCTION_CODES_ARM64_H_
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+// ARM64-specific opcodes that specify which assembly sequence to emit.
+// Most opcodes specify a single instruction.
+#define TARGET_ARCH_OPCODE_LIST(V)          \
+  V(Arm64Add)                               \
+  V(Arm64Add32)                             \
+  V(Arm64And)                               \
+  V(Arm64And32)                             \
+  V(Arm64Bic)                               \
+  V(Arm64Bic32)                             \
+  V(Arm64Clz)                               \
+  V(Arm64Clz32)                             \
+  V(Arm64Cmp)                               \
+  V(Arm64Cmp32)                             \
+  V(Arm64Cmn)                               \
+  V(Arm64Cmn32)                             \
+  V(Arm64Cnt)                               \
+  V(Arm64Tst)                               \
+  V(Arm64Tst32)                             \
+  V(Arm64Or)                                \
+  V(Arm64Or32)                              \
+  V(Arm64Orn)                               \
+  V(Arm64Orn32)                             \
+  V(Arm64Eor)                               \
+  V(Arm64Eor32)                             \
+  V(Arm64Eon)                               \
+  V(Arm64Eon32)                             \
+  V(Arm64Saddlp)                            \
+  V(Arm64Sub)                               \
+  V(Arm64Sub32)                             \
+  V(Arm64Mul)                               \
+  V(Arm64Mul32)                             \
+  V(Arm64Smull)                             \
+  V(Arm64Smull2)                            \
+  V(Arm64Uaddlp)                            \
+  V(Arm64Umull)                             \
+  V(Arm64Umull2)                            \
+  V(Arm64Madd)                              \
+  V(Arm64Madd32)                            \
+  V(Arm64Msub)                              \
+  V(Arm64Msub32)                            \
+  V(Arm64Mneg)                              \
+  V(Arm64Mneg32)                            \
+  V(Arm64Idiv)                              \
+  V(Arm64Idiv32)                            \
+  V(Arm64Udiv)                              \
+  V(Arm64Udiv32)                            \
+  V(Arm64Imod)                              \
+  V(Arm64Imod32)                            \
+  V(Arm64Umod)                              \
+  V(Arm64Umod32)                            \
+  V(Arm64Not)                               \
+  V(Arm64Not32)                             \
+  V(Arm64Lsl)                               \
+  V(Arm64Lsl32)                             \
+  V(Arm64Lsr)                               \
+  V(Arm64Lsr32)                             \
+  V(Arm64Asr)                               \
+  V(Arm64Asr32)                             \
+  V(Arm64Ror)                               \
+  V(Arm64Ror32)                             \
+  V(Arm64Mov32)                             \
+  V(Arm64Sxtb32)                            \
+  V(Arm64Sxth32)                            \
+  V(Arm64Sxtb)                              \
+  V(Arm64Sxth)                              \
+  V(Arm64Sxtw)                              \
+  V(Arm64Sbfx)                              \
+  V(Arm64Sbfx32)                            \
+  V(Arm64Ubfx)                              \
+  V(Arm64Ubfx32)                            \
+  V(Arm64Ubfiz32)                           \
+  V(Arm64Bfi)                               \
+  V(Arm64Rbit)                              \
+  V(Arm64Rbit32)                            \
+  V(Arm64Rev)                               \
+  V(Arm64Rev32)                             \
+  V(Arm64TestAndBranch32)                   \
+  V(Arm64TestAndBranch)                     \
+  V(Arm64CompareAndBranch32)                \
+  V(Arm64CompareAndBranch)                  \
+  V(Arm64Claim)                             \
+  V(Arm64Poke)                              \
+  V(Arm64PokePair)                          \
+  V(Arm64Peek)                              \
+  V(Arm64Float32Cmp)                        \
+  V(Arm64Float32Add)                        \
+  V(Arm64Float32Sub)                        \
+  V(Arm64Float32Mul)                        \
+  V(Arm64Float32Div)                        \
+  V(Arm64Float32Abs)                        \
+  V(Arm64Float32Neg)                        \
+  V(Arm64Float32Sqrt)                       \
+  V(Arm64Float32Fnmul)                      \
+  V(Arm64Float32RoundDown)                  \
+  V(Arm64Float32Max)                        \
+  V(Arm64Float32Min)                        \
+  V(Arm64Float64Cmp)                        \
+  V(Arm64Float64Add)                        \
+  V(Arm64Float64Sub)                        \
+  V(Arm64Float64Mul)                        \
+  V(Arm64Float64Div)                        \
+  V(Arm64Float64Mod)                        \
+  V(Arm64Float64Max)                        \
+  V(Arm64Float64Min)                        \
+  V(Arm64Float64Abs)                        \
+  V(Arm64Float64Neg)                        \
+  V(Arm64Float64Sqrt)                       \
+  V(Arm64Float64Fnmul)                      \
+  V(Arm64Float64RoundDown)                  \
+  V(Arm64Float32RoundUp)                    \
+  V(Arm64Float64RoundUp)                    \
+  V(Arm64Float64RoundTiesAway)              \
+  V(Arm64Float32RoundTruncate)              \
+  V(Arm64Float64RoundTruncate)              \
+  V(Arm64Float32RoundTiesEven)              \
+  V(Arm64Float64RoundTiesEven)              \
+  V(Arm64Float64SilenceNaN)                 \
+  V(Arm64Float32ToFloat64)                  \
+  V(Arm64Float64ToFloat32)                  \
+  V(Arm64Float32ToInt32)                    \
+  V(Arm64Float64ToInt32)                    \
+  V(Arm64Float32ToUint32)                   \
+  V(Arm64Float64ToUint32)                   \
+  V(Arm64Float32ToInt64)                    \
+  V(Arm64Float64ToInt64)                    \
+  V(Arm64Float32ToUint64)                   \
+  V(Arm64Float64ToUint64)                   \
+  V(Arm64Int32ToFloat32)                    \
+  V(Arm64Int32ToFloat64)                    \
+  V(Arm64Int64ToFloat32)                    \
+  V(Arm64Int64ToFloat64)                    \
+  V(Arm64Uint32ToFloat32)                   \
+  V(Arm64Uint32ToFloat64)                   \
+  V(Arm64Uint64ToFloat32)                   \
+  V(Arm64Uint64ToFloat64)                   \
+  V(Arm64Float64ExtractLowWord32)           \
+  V(Arm64Float64ExtractHighWord32)          \
+  V(Arm64Float64InsertLowWord32)            \
+  V(Arm64Float64InsertHighWord32)           \
+  V(Arm64Float64MoveU64)                    \
+  V(Arm64U64MoveFloat64)                    \
+  V(Arm64LdrS)                              \
+  V(Arm64StrS)                              \
+  V(Arm64LdrD)                              \
+  V(Arm64StrD)                              \
+  V(Arm64LdrQ)                              \
+  V(Arm64StrQ)                              \
+  V(Arm64Ldrb)                              \
+  V(Arm64Ldrsb)                             \
+  V(Arm64Strb)                              \
+  V(Arm64Ldrh)                              \
+  V(Arm64Ldrsh)                             \
+  V(Arm64Strh)                              \
+  V(Arm64Ldrsw)                             \
+  V(Arm64LdrW)                              \
+  V(Arm64StrW)                              \
+  V(Arm64Ldr)                               \
+  V(Arm64LdrDecompressTaggedSigned)         \
+  V(Arm64LdrDecompressTaggedPointer)        \
+  V(Arm64LdrDecompressAnyTagged)            \
+  V(Arm64Str)                               \
+  V(Arm64StrCompressTagged)                 \
+  V(Arm64DmbIsh)                            \
+  V(Arm64DsbIsb)                            \
+  V(Arm64Sxtl)                              \
+  V(Arm64Sxtl2)                             \
+  V(Arm64Uxtl)                              \
+  V(Arm64Uxtl2)                             \
+  V(Arm64F64x2Splat)                        \
+  V(Arm64F64x2ExtractLane)                  \
+  V(Arm64F64x2ReplaceLane)                  \
+  V(Arm64F64x2Abs)                          \
+  V(Arm64F64x2Neg)                          \
+  V(Arm64F64x2Sqrt)                         \
+  V(Arm64F64x2Add)                          \
+  V(Arm64F64x2Sub)                          \
+  V(Arm64F64x2Mul)                          \
+  V(Arm64F64x2Div)                          \
+  V(Arm64F64x2Min)                          \
+  V(Arm64F64x2Max)                          \
+  V(Arm64F64x2Eq)                           \
+  V(Arm64F64x2Ne)                           \
+  V(Arm64F64x2Lt)                           \
+  V(Arm64F64x2Le)                           \
+  V(Arm64F64x2Qfma)                         \
+  V(Arm64F64x2Qfms)                         \
+  V(Arm64F64x2Pmin)                         \
+  V(Arm64F64x2Pmax)                         \
+  V(Arm64F32x4Splat)                        \
+  V(Arm64F32x4ExtractLane)                  \
+  V(Arm64F32x4ReplaceLane)                  \
+  V(Arm64F32x4SConvertI32x4)                \
+  V(Arm64F32x4UConvertI32x4)                \
+  V(Arm64F32x4Abs)                          \
+  V(Arm64F32x4Neg)                          \
+  V(Arm64F32x4Sqrt)                         \
+  V(Arm64F32x4RecipApprox)                  \
+  V(Arm64F32x4RecipSqrtApprox)              \
+  V(Arm64F32x4Add)                          \
+  V(Arm64F32x4AddHoriz)                     \
+  V(Arm64F32x4Sub)                          \
+  V(Arm64F32x4Mul)                          \
+  V(Arm64F32x4Div)                          \
+  V(Arm64F32x4Min)                          \
+  V(Arm64F32x4Max)                          \
+  V(Arm64F32x4Eq)                           \
+  V(Arm64F32x4Ne)                           \
+  V(Arm64F32x4Lt)                           \
+  V(Arm64F32x4Le)                           \
+  V(Arm64F32x4Qfma)                         \
+  V(Arm64F32x4Qfms)                         \
+  V(Arm64F32x4Pmin)                         \
+  V(Arm64F32x4Pmax)                         \
+  V(Arm64I64x2Splat)                        \
+  V(Arm64I64x2ExtractLane)                  \
+  V(Arm64I64x2ReplaceLane)                  \
+  V(Arm64I64x2Neg)                          \
+  V(Arm64I64x2Shl)                          \
+  V(Arm64I64x2ShrS)                         \
+  V(Arm64I64x2Add)                          \
+  V(Arm64I64x2Sub)                          \
+  V(Arm64I64x2Mul)                          \
+  V(Arm64I64x2Eq)                           \
+  V(Arm64I64x2ShrU)                         \
+  V(Arm64I32x4Splat)                        \
+  V(Arm64I32x4ExtractLane)                  \
+  V(Arm64I32x4ReplaceLane)                  \
+  V(Arm64I32x4SConvertF32x4)                \
+  V(Arm64I32x4Neg)                          \
+  V(Arm64I32x4Shl)                          \
+  V(Arm64I32x4ShrS)                         \
+  V(Arm64I32x4Add)                          \
+  V(Arm64I32x4AddHoriz)                     \
+  V(Arm64I32x4Sub)                          \
+  V(Arm64I32x4Mul)                          \
+  V(Arm64I32x4Mla)                          \
+  V(Arm64I32x4Mls)                          \
+  V(Arm64I32x4MinS)                         \
+  V(Arm64I32x4MaxS)                         \
+  V(Arm64I32x4Eq)                           \
+  V(Arm64I32x4Ne)                           \
+  V(Arm64I32x4GtS)                          \
+  V(Arm64I32x4GeS)                          \
+  V(Arm64I32x4UConvertF32x4)                \
+  V(Arm64I32x4ShrU)                         \
+  V(Arm64I32x4MinU)                         \
+  V(Arm64I32x4MaxU)                         \
+  V(Arm64I32x4GtU)                          \
+  V(Arm64I32x4GeU)                          \
+  V(Arm64I32x4Abs)                          \
+  V(Arm64I32x4BitMask)                      \
+  V(Arm64I32x4DotI16x8S)                    \
+  V(Arm64I16x8Splat)                        \
+  V(Arm64I16x8ExtractLaneU)                 \
+  V(Arm64I16x8ExtractLaneS)                 \
+  V(Arm64I16x8ReplaceLane)                  \
+  V(Arm64I16x8Neg)                          \
+  V(Arm64I16x8Shl)                          \
+  V(Arm64I16x8ShrS)                         \
+  V(Arm64I16x8SConvertI32x4)                \
+  V(Arm64I16x8Add)                          \
+  V(Arm64I16x8AddSatS)                      \
+  V(Arm64I16x8AddHoriz)                     \
+  V(Arm64I16x8Sub)                          \
+  V(Arm64I16x8SubSatS)                      \
+  V(Arm64I16x8Mul)                          \
+  V(Arm64I16x8Mla)                          \
+  V(Arm64I16x8Mls)                          \
+  V(Arm64I16x8MinS)                         \
+  V(Arm64I16x8MaxS)                         \
+  V(Arm64I16x8Eq)                           \
+  V(Arm64I16x8Ne)                           \
+  V(Arm64I16x8GtS)                          \
+  V(Arm64I16x8GeS)                          \
+  V(Arm64I16x8ShrU)                         \
+  V(Arm64I16x8UConvertI32x4)                \
+  V(Arm64I16x8AddSatU)                      \
+  V(Arm64I16x8SubSatU)                      \
+  V(Arm64I16x8MinU)                         \
+  V(Arm64I16x8MaxU)                         \
+  V(Arm64I16x8GtU)                          \
+  V(Arm64I16x8GeU)                          \
+  V(Arm64I16x8RoundingAverageU)             \
+  V(Arm64I16x8Q15MulRSatS)                  \
+  V(Arm64I16x8Abs)                          \
+  V(Arm64I16x8BitMask)                      \
+  V(Arm64I8x16Splat)                        \
+  V(Arm64I8x16ExtractLaneU)                 \
+  V(Arm64I8x16ExtractLaneS)                 \
+  V(Arm64I8x16ReplaceLane)                  \
+  V(Arm64I8x16Neg)                          \
+  V(Arm64I8x16Shl)                          \
+  V(Arm64I8x16ShrS)                         \
+  V(Arm64I8x16SConvertI16x8)                \
+  V(Arm64I8x16Add)                          \
+  V(Arm64I8x16AddSatS)                      \
+  V(Arm64I8x16Sub)                          \
+  V(Arm64I8x16SubSatS)                      \
+  V(Arm64I8x16Mul)                          \
+  V(Arm64I8x16Mla)                          \
+  V(Arm64I8x16Mls)                          \
+  V(Arm64I8x16MinS)                         \
+  V(Arm64I8x16MaxS)                         \
+  V(Arm64I8x16Eq)                           \
+  V(Arm64I8x16Ne)                           \
+  V(Arm64I8x16GtS)                          \
+  V(Arm64I8x16GeS)                          \
+  V(Arm64I8x16ShrU)                         \
+  V(Arm64I8x16UConvertI16x8)                \
+  V(Arm64I8x16AddSatU)                      \
+  V(Arm64I8x16SubSatU)                      \
+  V(Arm64I8x16MinU)                         \
+  V(Arm64I8x16MaxU)                         \
+  V(Arm64I8x16GtU)                          \
+  V(Arm64I8x16GeU)                          \
+  V(Arm64I8x16RoundingAverageU)             \
+  V(Arm64I8x16Abs)                          \
+  V(Arm64I8x16BitMask)                      \
+  V(Arm64S128Const)                         \
+  V(Arm64S128Zero)                          \
+  V(Arm64S128Dup)                           \
+  V(Arm64S128And)                           \
+  V(Arm64S128Or)                            \
+  V(Arm64S128Xor)                           \
+  V(Arm64S128Not)                           \
+  V(Arm64S128Select)                        \
+  V(Arm64S128AndNot)                        \
+  V(Arm64S32x4ZipLeft)                      \
+  V(Arm64S32x4ZipRight)                     \
+  V(Arm64S32x4UnzipLeft)                    \
+  V(Arm64S32x4UnzipRight)                   \
+  V(Arm64S32x4TransposeLeft)                \
+  V(Arm64S32x4TransposeRight)               \
+  V(Arm64S32x4Shuffle)                      \
+  V(Arm64S16x8ZipLeft)                      \
+  V(Arm64S16x8ZipRight)                     \
+  V(Arm64S16x8UnzipLeft)                    \
+  V(Arm64S16x8UnzipRight)                   \
+  V(Arm64S16x8TransposeLeft)                \
+  V(Arm64S16x8TransposeRight)               \
+  V(Arm64S8x16ZipLeft)                      \
+  V(Arm64S8x16ZipRight)                     \
+  V(Arm64S8x16UnzipLeft)                    \
+  V(Arm64S8x16UnzipRight)                   \
+  V(Arm64S8x16TransposeLeft)                \
+  V(Arm64S8x16TransposeRight)               \
+  V(Arm64S8x16Concat)                       \
+  V(Arm64I8x16Swizzle)                      \
+  V(Arm64I8x16Shuffle)                      \
+  V(Arm64S32x2Reverse)                      \
+  V(Arm64S16x4Reverse)                      \
+  V(Arm64S16x2Reverse)                      \
+  V(Arm64S8x8Reverse)                       \
+  V(Arm64S8x4Reverse)                       \
+  V(Arm64S8x2Reverse)                       \
+  V(Arm64V128AnyTrue)                       \
+  V(Arm64V32x4AllTrue)                      \
+  V(Arm64V16x8AllTrue)                      \
+  V(Arm64V8x16AllTrue)                      \
+  V(Arm64LoadSplat)                         \
+  V(Arm64S128Load8x8S)                      \
+  V(Arm64S128Load8x8U)                      \
+  V(Arm64S128Load16x4S)                     \
+  V(Arm64S128Load16x4U)                     \
+  V(Arm64S128Load32x2S)                     \
+  V(Arm64S128Load32x2U)                     \
+  V(Arm64S128Load32Zero)                    \
+  V(Arm64S128Load64Zero)                    \
+  V(Arm64Word64AtomicLoadUint8)             \
+  V(Arm64Word64AtomicLoadUint16)            \
+  V(Arm64Word64AtomicLoadUint32)            \
+  V(Arm64Word64AtomicLoadUint64)            \
+  V(Arm64Word64AtomicStoreWord8)            \
+  V(Arm64Word64AtomicStoreWord16)           \
+  V(Arm64Word64AtomicStoreWord32)           \
+  V(Arm64Word64AtomicStoreWord64)           \
+  V(Arm64Word64AtomicAddUint8)              \
+  V(Arm64Word64AtomicAddUint16)             \
+  V(Arm64Word64AtomicAddUint32)             \
+  V(Arm64Word64AtomicAddUint64)             \
+  V(Arm64Word64AtomicSubUint8)              \
+  V(Arm64Word64AtomicSubUint16)             \
+  V(Arm64Word64AtomicSubUint32)             \
+  V(Arm64Word64AtomicSubUint64)             \
+  V(Arm64Word64AtomicAndUint8)              \
+  V(Arm64Word64AtomicAndUint16)             \
+  V(Arm64Word64AtomicAndUint32)             \
+  V(Arm64Word64AtomicAndUint64)             \
+  V(Arm64Word64AtomicOrUint8)               \
+  V(Arm64Word64AtomicOrUint16)              \
+  V(Arm64Word64AtomicOrUint32)              \
+  V(Arm64Word64AtomicOrUint64)              \
+  V(Arm64Word64AtomicXorUint8)              \
+  V(Arm64Word64AtomicXorUint16)             \
+  V(Arm64Word64AtomicXorUint32)             \
+  V(Arm64Word64AtomicXorUint64)             \
+  V(Arm64Word64AtomicExchangeUint8)         \
+  V(Arm64Word64AtomicExchangeUint16)        \
+  V(Arm64Word64AtomicExchangeUint32)        \
+  V(Arm64Word64AtomicExchangeUint64)        \
+  V(Arm64Word64AtomicCompareExchangeUint8)  \
+  V(Arm64Word64AtomicCompareExchangeUint16) \
+  V(Arm64Word64AtomicCompareExchangeUint32) \
+  V(Arm64Word64AtomicCompareExchangeUint64)
+
+// Addressing modes represent the "shape" of inputs to an instruction.
+// Many instructions support multiple addressing modes. Addressing modes
+// are encoded into the InstructionCode of the instruction and tell the
+// code generator after register allocation which assembler method to call.
+//
+// We use the following local notation for addressing modes:
+//
+// R = register
+// O = register or stack slot
+// D = double register
+// I = immediate (handle, external, int32)
+// MRI = [register + immediate]
+// MRR = [register + register]
+#define TARGET_ADDRESSING_MODE_LIST(V)                          \
+  V(MRI)              /* [%r0 + K] */                           \
+  V(MRR)              /* [%r0 + %r1] */                         \
+  V(Operand2_R_LSL_I) /* %r0 LSL K */                           \
+  V(Operand2_R_LSR_I) /* %r0 LSR K */                           \
+  V(Operand2_R_ASR_I) /* %r0 ASR K */                           \
+  V(Operand2_R_ROR_I) /* %r0 ROR K */                           \
+  V(Operand2_R_UXTB)  /* %r0 UXTB (unsigned extend byte) */     \
+  V(Operand2_R_UXTH)  /* %r0 UXTH (unsigned extend halfword) */ \
+  V(Operand2_R_SXTB)  /* %r0 SXTB (signed extend byte) */       \
+  V(Operand2_R_SXTH)  /* %r0 SXTH (signed extend halfword) */   \
+  V(Operand2_R_SXTW)  /* %r0 SXTW (signed extend word) */       \
+  V(Root)             /* [%rr + K] */
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_ARM64_INSTRUCTION_CODES_ARM64_H_
diff --git a/src/compiler/backend/arm64/instruction-scheduler-arm64.cc b/src/compiler/backend/arm64/instruction-scheduler-arm64.cc
new file mode 100644
index 0000000..9d53074
--- /dev/null
+++ b/src/compiler/backend/arm64/instruction-scheduler-arm64.cc
@@ -0,0 +1,591 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/instruction-scheduler.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+bool InstructionScheduler::SchedulerSupported() { return true; }
+
+int InstructionScheduler::GetTargetInstructionFlags(
+    const Instruction* instr) const {
+  switch (instr->arch_opcode()) {
+    case kArm64Add:
+    case kArm64Add32:
+    case kArm64And:
+    case kArm64And32:
+    case kArm64Bic:
+    case kArm64Bic32:
+    case kArm64Clz:
+    case kArm64Clz32:
+    case kArm64Cmp:
+    case kArm64Cmp32:
+    case kArm64Cmn:
+    case kArm64Cmn32:
+    case kArm64Cnt:
+    case kArm64Tst:
+    case kArm64Tst32:
+    case kArm64Or:
+    case kArm64Or32:
+    case kArm64Orn:
+    case kArm64Orn32:
+    case kArm64Eor:
+    case kArm64Eor32:
+    case kArm64Eon:
+    case kArm64Eon32:
+    case kArm64Saddlp:
+    case kArm64Sub:
+    case kArm64Sub32:
+    case kArm64Mul:
+    case kArm64Mul32:
+    case kArm64Smull:
+    case kArm64Smull2:
+    case kArm64Uaddlp:
+    case kArm64Umull:
+    case kArm64Umull2:
+    case kArm64Madd:
+    case kArm64Madd32:
+    case kArm64Msub:
+    case kArm64Msub32:
+    case kArm64Mneg:
+    case kArm64Mneg32:
+    case kArm64Idiv:
+    case kArm64Idiv32:
+    case kArm64Udiv:
+    case kArm64Udiv32:
+    case kArm64Imod:
+    case kArm64Imod32:
+    case kArm64Umod:
+    case kArm64Umod32:
+    case kArm64Not:
+    case kArm64Not32:
+    case kArm64Lsl:
+    case kArm64Lsl32:
+    case kArm64Lsr:
+    case kArm64Lsr32:
+    case kArm64Asr:
+    case kArm64Asr32:
+    case kArm64Ror:
+    case kArm64Ror32:
+    case kArm64Mov32:
+    case kArm64Sxtb:
+    case kArm64Sxtb32:
+    case kArm64Sxth:
+    case kArm64Sxth32:
+    case kArm64Sxtw:
+    case kArm64Sbfx:
+    case kArm64Sbfx32:
+    case kArm64Ubfx:
+    case kArm64Ubfx32:
+    case kArm64Ubfiz32:
+    case kArm64Bfi:
+    case kArm64Rbit:
+    case kArm64Rbit32:
+    case kArm64Rev:
+    case kArm64Rev32:
+    case kArm64Float32Cmp:
+    case kArm64Float32Add:
+    case kArm64Float32Sub:
+    case kArm64Float32Mul:
+    case kArm64Float32Div:
+    case kArm64Float32Abs:
+    case kArm64Float32Neg:
+    case kArm64Float32Sqrt:
+    case kArm64Float32Fnmul:
+    case kArm64Float32RoundDown:
+    case kArm64Float32Max:
+    case kArm64Float32Min:
+    case kArm64Float64Cmp:
+    case kArm64Float64Add:
+    case kArm64Float64Sub:
+    case kArm64Float64Mul:
+    case kArm64Float64Div:
+    case kArm64Float64Max:
+    case kArm64Float64Min:
+    case kArm64Float64Abs:
+    case kArm64Float64Neg:
+    case kArm64Float64Sqrt:
+    case kArm64Float64Fnmul:
+    case kArm64Float64RoundDown:
+    case kArm64Float64RoundTiesAway:
+    case kArm64Float64RoundTruncate:
+    case kArm64Float64RoundTiesEven:
+    case kArm64Float64RoundUp:
+    case kArm64Float32RoundTiesEven:
+    case kArm64Float32RoundTruncate:
+    case kArm64Float32RoundUp:
+    case kArm64Float32ToFloat64:
+    case kArm64Float64ToFloat32:
+    case kArm64Float32ToInt32:
+    case kArm64Float64ToInt32:
+    case kArm64Float32ToUint32:
+    case kArm64Float64ToUint32:
+    case kArm64Float32ToInt64:
+    case kArm64Float64ToInt64:
+    case kArm64Float32ToUint64:
+    case kArm64Float64ToUint64:
+    case kArm64Int32ToFloat32:
+    case kArm64Int32ToFloat64:
+    case kArm64Int64ToFloat32:
+    case kArm64Int64ToFloat64:
+    case kArm64Uint32ToFloat32:
+    case kArm64Uint32ToFloat64:
+    case kArm64Uint64ToFloat32:
+    case kArm64Uint64ToFloat64:
+    case kArm64Float64ExtractLowWord32:
+    case kArm64Float64ExtractHighWord32:
+    case kArm64Float64InsertLowWord32:
+    case kArm64Float64InsertHighWord32:
+    case kArm64Float64Mod:
+    case kArm64Float64MoveU64:
+    case kArm64U64MoveFloat64:
+    case kArm64Float64SilenceNaN:
+    case kArm64F64x2Splat:
+    case kArm64F64x2ExtractLane:
+    case kArm64F64x2ReplaceLane:
+    case kArm64F64x2Abs:
+    case kArm64F64x2Neg:
+    case kArm64F64x2Sqrt:
+    case kArm64F64x2Add:
+    case kArm64F64x2Sub:
+    case kArm64F64x2Mul:
+    case kArm64F64x2Div:
+    case kArm64F64x2Min:
+    case kArm64F64x2Max:
+    case kArm64F64x2Eq:
+    case kArm64F64x2Ne:
+    case kArm64F64x2Lt:
+    case kArm64F64x2Le:
+    case kArm64F64x2Qfma:
+    case kArm64F64x2Qfms:
+    case kArm64F64x2Pmin:
+    case kArm64F64x2Pmax:
+    case kArm64F32x4Splat:
+    case kArm64F32x4ExtractLane:
+    case kArm64F32x4ReplaceLane:
+    case kArm64F32x4SConvertI32x4:
+    case kArm64F32x4UConvertI32x4:
+    case kArm64F32x4Abs:
+    case kArm64F32x4Neg:
+    case kArm64F32x4Sqrt:
+    case kArm64F32x4RecipApprox:
+    case kArm64F32x4RecipSqrtApprox:
+    case kArm64F32x4Add:
+    case kArm64F32x4AddHoriz:
+    case kArm64F32x4Sub:
+    case kArm64F32x4Mul:
+    case kArm64F32x4Div:
+    case kArm64F32x4Min:
+    case kArm64F32x4Max:
+    case kArm64F32x4Eq:
+    case kArm64F32x4Ne:
+    case kArm64F32x4Lt:
+    case kArm64F32x4Le:
+    case kArm64F32x4Qfma:
+    case kArm64F32x4Qfms:
+    case kArm64F32x4Pmin:
+    case kArm64F32x4Pmax:
+    case kArm64I64x2Splat:
+    case kArm64I64x2ExtractLane:
+    case kArm64I64x2ReplaceLane:
+    case kArm64I64x2Neg:
+    case kArm64I64x2Shl:
+    case kArm64I64x2ShrS:
+    case kArm64I64x2Add:
+    case kArm64I64x2Sub:
+    case kArm64I64x2Mul:
+    case kArm64I64x2Eq:
+    case kArm64I64x2ShrU:
+    case kArm64I32x4Splat:
+    case kArm64I32x4ExtractLane:
+    case kArm64I32x4ReplaceLane:
+    case kArm64I32x4SConvertF32x4:
+    case kArm64Sxtl:
+    case kArm64Sxtl2:
+    case kArm64Uxtl:
+    case kArm64Uxtl2:
+    case kArm64I32x4Neg:
+    case kArm64I32x4Shl:
+    case kArm64I32x4ShrS:
+    case kArm64I32x4Add:
+    case kArm64I32x4AddHoriz:
+    case kArm64I32x4Sub:
+    case kArm64I32x4Mul:
+    case kArm64I32x4Mla:
+    case kArm64I32x4Mls:
+    case kArm64I32x4MinS:
+    case kArm64I32x4MaxS:
+    case kArm64I32x4Eq:
+    case kArm64I32x4Ne:
+    case kArm64I32x4GtS:
+    case kArm64I32x4GeS:
+    case kArm64I32x4UConvertF32x4:
+    case kArm64I32x4ShrU:
+    case kArm64I32x4MinU:
+    case kArm64I32x4MaxU:
+    case kArm64I32x4GtU:
+    case kArm64I32x4GeU:
+    case kArm64I32x4Abs:
+    case kArm64I32x4BitMask:
+    case kArm64I32x4DotI16x8S:
+    case kArm64I16x8Splat:
+    case kArm64I16x8ExtractLaneU:
+    case kArm64I16x8ExtractLaneS:
+    case kArm64I16x8ReplaceLane:
+    case kArm64I16x8Neg:
+    case kArm64I16x8Shl:
+    case kArm64I16x8ShrS:
+    case kArm64I16x8SConvertI32x4:
+    case kArm64I16x8Add:
+    case kArm64I16x8AddSatS:
+    case kArm64I16x8AddHoriz:
+    case kArm64I16x8Sub:
+    case kArm64I16x8SubSatS:
+    case kArm64I16x8Mul:
+    case kArm64I16x8Mla:
+    case kArm64I16x8Mls:
+    case kArm64I16x8MinS:
+    case kArm64I16x8MaxS:
+    case kArm64I16x8Eq:
+    case kArm64I16x8Ne:
+    case kArm64I16x8GtS:
+    case kArm64I16x8GeS:
+    case kArm64I16x8ShrU:
+    case kArm64I16x8UConvertI32x4:
+    case kArm64I16x8AddSatU:
+    case kArm64I16x8SubSatU:
+    case kArm64I16x8MinU:
+    case kArm64I16x8MaxU:
+    case kArm64I16x8GtU:
+    case kArm64I16x8GeU:
+    case kArm64I16x8RoundingAverageU:
+    case kArm64I16x8Q15MulRSatS:
+    case kArm64I16x8Abs:
+    case kArm64I16x8BitMask:
+    case kArm64I8x16Splat:
+    case kArm64I8x16ExtractLaneU:
+    case kArm64I8x16ExtractLaneS:
+    case kArm64I8x16ReplaceLane:
+    case kArm64I8x16Neg:
+    case kArm64I8x16Shl:
+    case kArm64I8x16ShrS:
+    case kArm64I8x16SConvertI16x8:
+    case kArm64I8x16Add:
+    case kArm64I8x16AddSatS:
+    case kArm64I8x16Sub:
+    case kArm64I8x16SubSatS:
+    case kArm64I8x16Mul:
+    case kArm64I8x16Mla:
+    case kArm64I8x16Mls:
+    case kArm64I8x16MinS:
+    case kArm64I8x16MaxS:
+    case kArm64I8x16Eq:
+    case kArm64I8x16Ne:
+    case kArm64I8x16GtS:
+    case kArm64I8x16GeS:
+    case kArm64I8x16UConvertI16x8:
+    case kArm64I8x16AddSatU:
+    case kArm64I8x16SubSatU:
+    case kArm64I8x16ShrU:
+    case kArm64I8x16MinU:
+    case kArm64I8x16MaxU:
+    case kArm64I8x16GtU:
+    case kArm64I8x16GeU:
+    case kArm64I8x16RoundingAverageU:
+    case kArm64I8x16Abs:
+    case kArm64I8x16BitMask:
+    case kArm64S128Const:
+    case kArm64S128Zero:
+    case kArm64S128Dup:
+    case kArm64S128And:
+    case kArm64S128Or:
+    case kArm64S128Xor:
+    case kArm64S128Not:
+    case kArm64S128Select:
+    case kArm64S128AndNot:
+    case kArm64S32x4ZipLeft:
+    case kArm64S32x4ZipRight:
+    case kArm64S32x4UnzipLeft:
+    case kArm64S32x4UnzipRight:
+    case kArm64S32x4TransposeLeft:
+    case kArm64S32x4TransposeRight:
+    case kArm64S32x4Shuffle:
+    case kArm64S16x8ZipLeft:
+    case kArm64S16x8ZipRight:
+    case kArm64S16x8UnzipLeft:
+    case kArm64S16x8UnzipRight:
+    case kArm64S16x8TransposeLeft:
+    case kArm64S16x8TransposeRight:
+    case kArm64S8x16ZipLeft:
+    case kArm64S8x16ZipRight:
+    case kArm64S8x16UnzipLeft:
+    case kArm64S8x16UnzipRight:
+    case kArm64S8x16TransposeLeft:
+    case kArm64S8x16TransposeRight:
+    case kArm64S8x16Concat:
+    case kArm64I8x16Swizzle:
+    case kArm64I8x16Shuffle:
+    case kArm64S32x2Reverse:
+    case kArm64S16x4Reverse:
+    case kArm64S16x2Reverse:
+    case kArm64S8x8Reverse:
+    case kArm64S8x4Reverse:
+    case kArm64S8x2Reverse:
+    case kArm64V128AnyTrue:
+    case kArm64V32x4AllTrue:
+    case kArm64V16x8AllTrue:
+    case kArm64V8x16AllTrue:
+    case kArm64TestAndBranch32:
+    case kArm64TestAndBranch:
+    case kArm64CompareAndBranch32:
+    case kArm64CompareAndBranch:
+      return kNoOpcodeFlags;
+
+    case kArm64LdrS:
+    case kArm64LdrD:
+    case kArm64LdrQ:
+    case kArm64Ldrb:
+    case kArm64Ldrsb:
+    case kArm64Ldrh:
+    case kArm64Ldrsh:
+    case kArm64Ldrsw:
+    case kArm64LdrW:
+    case kArm64Ldr:
+    case kArm64LdrDecompressTaggedSigned:
+    case kArm64LdrDecompressTaggedPointer:
+    case kArm64LdrDecompressAnyTagged:
+    case kArm64Peek:
+    case kArm64LoadSplat:
+    case kArm64S128Load8x8S:
+    case kArm64S128Load8x8U:
+    case kArm64S128Load16x4S:
+    case kArm64S128Load16x4U:
+    case kArm64S128Load32x2S:
+    case kArm64S128Load32x2U:
+    case kArm64S128Load32Zero:
+    case kArm64S128Load64Zero:
+      return kIsLoadOperation;
+
+    case kArm64Claim:
+    case kArm64Poke:
+    case kArm64PokePair:
+    case kArm64StrS:
+    case kArm64StrD:
+    case kArm64StrQ:
+    case kArm64Strb:
+    case kArm64Strh:
+    case kArm64StrW:
+    case kArm64Str:
+    case kArm64StrCompressTagged:
+    case kArm64DmbIsh:
+    case kArm64DsbIsb:
+      return kHasSideEffect;
+
+    case kArm64Word64AtomicLoadUint8:
+    case kArm64Word64AtomicLoadUint16:
+    case kArm64Word64AtomicLoadUint32:
+    case kArm64Word64AtomicLoadUint64:
+      return kIsLoadOperation;
+
+    case kArm64Word64AtomicStoreWord8:
+    case kArm64Word64AtomicStoreWord16:
+    case kArm64Word64AtomicStoreWord32:
+    case kArm64Word64AtomicStoreWord64:
+    case kArm64Word64AtomicAddUint8:
+    case kArm64Word64AtomicAddUint16:
+    case kArm64Word64AtomicAddUint32:
+    case kArm64Word64AtomicAddUint64:
+    case kArm64Word64AtomicSubUint8:
+    case kArm64Word64AtomicSubUint16:
+    case kArm64Word64AtomicSubUint32:
+    case kArm64Word64AtomicSubUint64:
+    case kArm64Word64AtomicAndUint8:
+    case kArm64Word64AtomicAndUint16:
+    case kArm64Word64AtomicAndUint32:
+    case kArm64Word64AtomicAndUint64:
+    case kArm64Word64AtomicOrUint8:
+    case kArm64Word64AtomicOrUint16:
+    case kArm64Word64AtomicOrUint32:
+    case kArm64Word64AtomicOrUint64:
+    case kArm64Word64AtomicXorUint8:
+    case kArm64Word64AtomicXorUint16:
+    case kArm64Word64AtomicXorUint32:
+    case kArm64Word64AtomicXorUint64:
+    case kArm64Word64AtomicExchangeUint8:
+    case kArm64Word64AtomicExchangeUint16:
+    case kArm64Word64AtomicExchangeUint32:
+    case kArm64Word64AtomicExchangeUint64:
+    case kArm64Word64AtomicCompareExchangeUint8:
+    case kArm64Word64AtomicCompareExchangeUint16:
+    case kArm64Word64AtomicCompareExchangeUint32:
+    case kArm64Word64AtomicCompareExchangeUint64:
+      return kHasSideEffect;
+
+#define CASE(Name) case k##Name:
+      COMMON_ARCH_OPCODE_LIST(CASE)
+#undef CASE
+      // Already covered in architecture independent code.
+      UNREACHABLE();
+  }
+
+  UNREACHABLE();
+}
+
+int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
+  // Basic latency modeling for arm64 instructions. They have been determined
+  // in an empirical way.
+  switch (instr->arch_opcode()) {
+    case kArm64Add:
+    case kArm64Add32:
+    case kArm64And:
+    case kArm64And32:
+    case kArm64Bic:
+    case kArm64Bic32:
+    case kArm64Cmn:
+    case kArm64Cmn32:
+    case kArm64Cmp:
+    case kArm64Cmp32:
+    case kArm64Eon:
+    case kArm64Eon32:
+    case kArm64Eor:
+    case kArm64Eor32:
+    case kArm64Not:
+    case kArm64Not32:
+    case kArm64Or:
+    case kArm64Or32:
+    case kArm64Orn:
+    case kArm64Orn32:
+    case kArm64Sub:
+    case kArm64Sub32:
+    case kArm64Tst:
+    case kArm64Tst32:
+      if (instr->addressing_mode() != kMode_None) {
+        return 3;
+      } else {
+        return 1;
+      }
+
+    case kArm64Clz:
+    case kArm64Clz32:
+    case kArm64Sbfx:
+    case kArm64Sbfx32:
+    case kArm64Sxtb32:
+    case kArm64Sxth32:
+    case kArm64Sxtw:
+    case kArm64Ubfiz32:
+    case kArm64Ubfx:
+    case kArm64Ubfx32:
+      return 1;
+
+    case kArm64Lsl:
+    case kArm64Lsl32:
+    case kArm64Lsr:
+    case kArm64Lsr32:
+    case kArm64Asr:
+    case kArm64Asr32:
+    case kArm64Ror:
+    case kArm64Ror32:
+      return 1;
+
+    case kArm64LdrDecompressTaggedSigned:
+    case kArm64LdrDecompressTaggedPointer:
+    case kArm64LdrDecompressAnyTagged:
+    case kArm64Ldr:
+    case kArm64LdrD:
+    case kArm64LdrS:
+    case kArm64LdrW:
+    case kArm64Ldrb:
+    case kArm64Ldrh:
+    case kArm64Ldrsb:
+    case kArm64Ldrsh:
+    case kArm64Ldrsw:
+      return 11;
+
+    case kArm64Str:
+    case kArm64StrD:
+    case kArm64StrS:
+    case kArm64StrW:
+    case kArm64Strb:
+    case kArm64Strh:
+      return 1;
+
+    case kArm64Madd32:
+    case kArm64Mneg32:
+    case kArm64Msub32:
+    case kArm64Mul32:
+      return 3;
+
+    case kArm64Madd:
+    case kArm64Mneg:
+    case kArm64Msub:
+    case kArm64Mul:
+      return 5;
+
+    case kArm64Idiv32:
+    case kArm64Udiv32:
+      return 12;
+
+    case kArm64Idiv:
+    case kArm64Udiv:
+      return 20;
+
+    case kArm64Float32Add:
+    case kArm64Float32Sub:
+    case kArm64Float64Add:
+    case kArm64Float64Sub:
+      return 5;
+
+    case kArm64Float32Abs:
+    case kArm64Float32Cmp:
+    case kArm64Float32Neg:
+    case kArm64Float64Abs:
+    case kArm64Float64Cmp:
+    case kArm64Float64Neg:
+      return 3;
+
+    case kArm64Float32Div:
+    case kArm64Float32Sqrt:
+      return 12;
+
+    case kArm64Float64Div:
+    case kArm64Float64Sqrt:
+      return 19;
+
+    case kArm64Float32RoundDown:
+    case kArm64Float32RoundTiesEven:
+    case kArm64Float32RoundTruncate:
+    case kArm64Float32RoundUp:
+    case kArm64Float64RoundDown:
+    case kArm64Float64RoundTiesAway:
+    case kArm64Float64RoundTiesEven:
+    case kArm64Float64RoundTruncate:
+    case kArm64Float64RoundUp:
+      return 5;
+
+    case kArm64Float32ToFloat64:
+    case kArm64Float64ToFloat32:
+    case kArm64Float64ToInt32:
+    case kArm64Float64ToUint32:
+    case kArm64Float32ToInt64:
+    case kArm64Float64ToInt64:
+    case kArm64Float32ToUint64:
+    case kArm64Float64ToUint64:
+    case kArm64Int32ToFloat64:
+    case kArm64Int64ToFloat32:
+    case kArm64Int64ToFloat64:
+    case kArm64Uint32ToFloat64:
+    case kArm64Uint64ToFloat32:
+    case kArm64Uint64ToFloat64:
+      return 5;
+
+    default:
+      return 2;
+  }
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/arm64/instruction-selector-arm64.cc b/src/compiler/backend/arm64/instruction-selector-arm64.cc
new file mode 100644
index 0000000..584cfb6
--- /dev/null
+++ b/src/compiler/backend/arm64/instruction-selector-arm64.cc
@@ -0,0 +1,3892 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/base/bits.h"
+#include "src/codegen/assembler-inl.h"
+#include "src/compiler/backend/instruction-selector-impl.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/node-properties.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+enum ImmediateMode {
+  kArithmeticImm,  // 12 bit unsigned immediate shifted left 0 or 12 bits
+  kShift32Imm,     // 0 - 31
+  kShift64Imm,     // 0 - 63
+  kLogical32Imm,
+  kLogical64Imm,
+  kLoadStoreImm8,  // signed 8 bit or 12 bit unsigned scaled by access size
+  kLoadStoreImm16,
+  kLoadStoreImm32,
+  kLoadStoreImm64,
+  kNoImmediate
+};
+
+// Adds Arm64-specific methods for generating operands.
+class Arm64OperandGenerator final : public OperandGenerator {
+ public:
+  explicit Arm64OperandGenerator(InstructionSelector* selector)
+      : OperandGenerator(selector) {}
+
+  InstructionOperand UseOperand(Node* node, ImmediateMode mode) {
+    if (CanBeImmediate(node, mode)) {
+      return UseImmediate(node);
+    }
+    return UseRegister(node);
+  }
+
+  // Use the zero register if the node has the immediate value zero, otherwise
+  // assign a register.
+  InstructionOperand UseRegisterOrImmediateZero(Node* node) {
+    if ((IsIntegerConstant(node) && (GetIntegerConstantValue(node) == 0)) ||
+        (IsFloatConstant(node) &&
+         (bit_cast<int64_t>(GetFloatConstantValue(node)) == 0))) {
+      return UseImmediate(node);
+    }
+    return UseRegister(node);
+  }
+
+  // Use the provided node if it has the required value, or create a
+  // TempImmediate otherwise.
+  InstructionOperand UseImmediateOrTemp(Node* node, int32_t value) {
+    if (GetIntegerConstantValue(node) == value) {
+      return UseImmediate(node);
+    }
+    return TempImmediate(value);
+  }
+
+  bool IsIntegerConstant(Node* node) {
+    return (node->opcode() == IrOpcode::kInt32Constant) ||
+           (node->opcode() == IrOpcode::kInt64Constant);
+  }
+
+  int64_t GetIntegerConstantValue(Node* node) {
+    if (node->opcode() == IrOpcode::kInt32Constant) {
+      return OpParameter<int32_t>(node->op());
+    }
+    DCHECK_EQ(IrOpcode::kInt64Constant, node->opcode());
+    return OpParameter<int64_t>(node->op());
+  }
+
+  bool IsFloatConstant(Node* node) {
+    return (node->opcode() == IrOpcode::kFloat32Constant) ||
+           (node->opcode() == IrOpcode::kFloat64Constant);
+  }
+
+  double GetFloatConstantValue(Node* node) {
+    if (node->opcode() == IrOpcode::kFloat32Constant) {
+      return OpParameter<float>(node->op());
+    }
+    DCHECK_EQ(IrOpcode::kFloat64Constant, node->opcode());
+    return OpParameter<double>(node->op());
+  }
+
+  bool CanBeImmediate(Node* node, ImmediateMode mode) {
+    return IsIntegerConstant(node) &&
+           CanBeImmediate(GetIntegerConstantValue(node), mode);
+  }
+
+  bool CanBeImmediate(int64_t value, ImmediateMode mode) {
+    unsigned ignored;
+    switch (mode) {
+      case kLogical32Imm:
+        // TODO(dcarney): some unencodable values can be handled by
+        // switching instructions.
+        return Assembler::IsImmLogical(static_cast<uint64_t>(value), 32,
+                                       &ignored, &ignored, &ignored);
+      case kLogical64Imm:
+        return Assembler::IsImmLogical(static_cast<uint64_t>(value), 64,
+                                       &ignored, &ignored, &ignored);
+      case kArithmeticImm:
+        return Assembler::IsImmAddSub(value);
+      case kLoadStoreImm8:
+        return IsLoadStoreImmediate(value, 0);
+      case kLoadStoreImm16:
+        return IsLoadStoreImmediate(value, 1);
+      case kLoadStoreImm32:
+        return IsLoadStoreImmediate(value, 2);
+      case kLoadStoreImm64:
+        return IsLoadStoreImmediate(value, 3);
+      case kNoImmediate:
+        return false;
+      case kShift32Imm:  // Fall through.
+      case kShift64Imm:
+        // Shift operations only observe the bottom 5 or 6 bits of the value.
+        // All possible shifts can be encoded by discarding bits which have no
+        // effect.
+        return true;
+    }
+    return false;
+  }
+
+  bool CanBeLoadStoreShiftImmediate(Node* node, MachineRepresentation rep) {
+    // TODO(arm64): Load and Store on 128 bit Q registers is not supported yet.
+    DCHECK_GT(MachineRepresentation::kSimd128, rep);
+    return IsIntegerConstant(node) &&
+           (GetIntegerConstantValue(node) == ElementSizeLog2Of(rep));
+  }
+
+ private:
+  bool IsLoadStoreImmediate(int64_t value, unsigned size) {
+    return Assembler::IsImmLSScaled(value, size) ||
+           Assembler::IsImmLSUnscaled(value);
+  }
+};
+
+namespace {
+
+void VisitRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
+  Arm64OperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)));
+}
+
+void VisitRR(InstructionSelector* selector, InstructionCode opcode,
+             Node* node) {
+  Arm64OperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)));
+}
+
+void VisitRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
+  Arm64OperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)),
+                 g.UseRegister(node->InputAt(1)));
+}
+
+void VisitRRR(InstructionSelector* selector, InstructionCode opcode,
+              Node* node) {
+  Arm64OperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)),
+                 g.UseRegister(node->InputAt(1)));
+}
+
+void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode,
+                       Node* node, int width) {
+  Arm64OperandGenerator g(selector);
+  if (g.IsIntegerConstant(node->InputAt(1))) {
+    if (g.GetIntegerConstantValue(node->InputAt(1)) % width == 0) {
+      selector->EmitIdentity(node);
+    } else {
+      selector->Emit(opcode, g.DefineAsRegister(node),
+                     g.UseRegister(node->InputAt(0)),
+                     g.UseImmediate(node->InputAt(1)));
+    }
+  } else {
+    selector->Emit(opcode, g.DefineAsRegister(node),
+                   g.UseRegister(node->InputAt(0)),
+                   g.UseRegister(node->InputAt(1)));
+  }
+}
+
+void VisitRRI(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
+  Arm64OperandGenerator g(selector);
+  int32_t imm = OpParameter<int32_t>(node->op());
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)), g.UseImmediate(imm));
+}
+
+void VisitRRO(InstructionSelector* selector, ArchOpcode opcode, Node* node,
+              ImmediateMode operand_mode) {
+  Arm64OperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)),
+                 g.UseOperand(node->InputAt(1), operand_mode));
+}
+
+void VisitRRIR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
+  Arm64OperandGenerator g(selector);
+  int32_t imm = OpParameter<int32_t>(node->op());
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)), g.UseImmediate(imm),
+                 g.UseUniqueRegister(node->InputAt(1)));
+}
+
+struct ExtendingLoadMatcher {
+  ExtendingLoadMatcher(Node* node, InstructionSelector* selector)
+      : matches_(false), selector_(selector), base_(nullptr), immediate_(0) {
+    Initialize(node);
+  }
+
+  bool Matches() const { return matches_; }
+
+  Node* base() const {
+    DCHECK(Matches());
+    return base_;
+  }
+  int64_t immediate() const {
+    DCHECK(Matches());
+    return immediate_;
+  }
+  ArchOpcode opcode() const {
+    DCHECK(Matches());
+    return opcode_;
+  }
+
+ private:
+  bool matches_;
+  InstructionSelector* selector_;
+  Node* base_;
+  int64_t immediate_;
+  ArchOpcode opcode_;
+
+  void Initialize(Node* node) {
+    Int64BinopMatcher m(node);
+    // When loading a 64-bit value and shifting by 32, we should
+    // just load and sign-extend the interesting 4 bytes instead.
+    // This happens, for example, when we're loading and untagging SMIs.
+    DCHECK(m.IsWord64Sar());
+    if (m.left().IsLoad() && m.right().Is(32) &&
+        selector_->CanCover(m.node(), m.left().node())) {
+      Arm64OperandGenerator g(selector_);
+      Node* load = m.left().node();
+      Node* offset = load->InputAt(1);
+      base_ = load->InputAt(0);
+      opcode_ = kArm64Ldrsw;
+      if (g.IsIntegerConstant(offset)) {
+        immediate_ = g.GetIntegerConstantValue(offset) + 4;
+        matches_ = g.CanBeImmediate(immediate_, kLoadStoreImm32);
+      }
+    }
+  }
+};
+
+bool TryMatchExtendingLoad(InstructionSelector* selector, Node* node) {
+  ExtendingLoadMatcher m(node, selector);
+  return m.Matches();
+}
+
+bool TryEmitExtendingLoad(InstructionSelector* selector, Node* node) {
+  ExtendingLoadMatcher m(node, selector);
+  Arm64OperandGenerator g(selector);
+  if (m.Matches()) {
+    InstructionOperand inputs[2];
+    inputs[0] = g.UseRegister(m.base());
+    InstructionCode opcode =
+        m.opcode() | AddressingModeField::encode(kMode_MRI);
+    DCHECK(is_int32(m.immediate()));
+    inputs[1] = g.TempImmediate(static_cast<int32_t>(m.immediate()));
+    InstructionOperand outputs[] = {g.DefineAsRegister(node)};
+    selector->Emit(opcode, arraysize(outputs), outputs, arraysize(inputs),
+                   inputs);
+    return true;
+  }
+  return false;
+}
+
+bool TryMatchAnyShift(InstructionSelector* selector, Node* node,
+                      Node* input_node, InstructionCode* opcode, bool try_ror) {
+  Arm64OperandGenerator g(selector);
+
+  if (!selector->CanCover(node, input_node)) return false;
+  if (input_node->InputCount() != 2) return false;
+  if (!g.IsIntegerConstant(input_node->InputAt(1))) return false;
+
+  switch (input_node->opcode()) {
+    case IrOpcode::kWord32Shl:
+    case IrOpcode::kWord64Shl:
+      *opcode |= AddressingModeField::encode(kMode_Operand2_R_LSL_I);
+      return true;
+    case IrOpcode::kWord32Shr:
+    case IrOpcode::kWord64Shr:
+      *opcode |= AddressingModeField::encode(kMode_Operand2_R_LSR_I);
+      return true;
+    case IrOpcode::kWord32Sar:
+      *opcode |= AddressingModeField::encode(kMode_Operand2_R_ASR_I);
+      return true;
+    case IrOpcode::kWord64Sar:
+      if (TryMatchExtendingLoad(selector, input_node)) return false;
+      *opcode |= AddressingModeField::encode(kMode_Operand2_R_ASR_I);
+      return true;
+    case IrOpcode::kWord32Ror:
+    case IrOpcode::kWord64Ror:
+      if (try_ror) {
+        *opcode |= AddressingModeField::encode(kMode_Operand2_R_ROR_I);
+        return true;
+      }
+      return false;
+    default:
+      return false;
+  }
+}
+
+bool TryMatchAnyExtend(Arm64OperandGenerator* g, InstructionSelector* selector,
+                       Node* node, Node* left_node, Node* right_node,
+                       InstructionOperand* left_op,
+                       InstructionOperand* right_op, InstructionCode* opcode) {
+  if (!selector->CanCover(node, right_node)) return false;
+
+  NodeMatcher nm(right_node);
+
+  if (nm.IsWord32And()) {
+    Int32BinopMatcher mright(right_node);
+    if (mright.right().Is(0xFF) || mright.right().Is(0xFFFF)) {
+      int32_t mask = mright.right().ResolvedValue();
+      *left_op = g->UseRegister(left_node);
+      *right_op = g->UseRegister(mright.left().node());
+      *opcode |= AddressingModeField::encode(
+          (mask == 0xFF) ? kMode_Operand2_R_UXTB : kMode_Operand2_R_UXTH);
+      return true;
+    }
+  } else if (nm.IsWord32Sar()) {
+    Int32BinopMatcher mright(right_node);
+    if (selector->CanCover(mright.node(), mright.left().node()) &&
+        mright.left().IsWord32Shl()) {
+      Int32BinopMatcher mleft_of_right(mright.left().node());
+      if ((mright.right().Is(16) && mleft_of_right.right().Is(16)) ||
+          (mright.right().Is(24) && mleft_of_right.right().Is(24))) {
+        int32_t shift = mright.right().ResolvedValue();
+        *left_op = g->UseRegister(left_node);
+        *right_op = g->UseRegister(mleft_of_right.left().node());
+        *opcode |= AddressingModeField::encode(
+            (shift == 24) ? kMode_Operand2_R_SXTB : kMode_Operand2_R_SXTH);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+bool TryMatchLoadStoreShift(Arm64OperandGenerator* g,
+                            InstructionSelector* selector,
+                            MachineRepresentation rep, Node* node, Node* index,
+                            InstructionOperand* index_op,
+                            InstructionOperand* shift_immediate_op) {
+  if (!selector->CanCover(node, index)) return false;
+  if (index->InputCount() != 2) return false;
+  Node* left = index->InputAt(0);
+  Node* right = index->InputAt(1);
+  switch (index->opcode()) {
+    case IrOpcode::kWord32Shl:
+    case IrOpcode::kWord64Shl:
+      if (!g->CanBeLoadStoreShiftImmediate(right, rep)) {
+        return false;
+      }
+      *index_op = g->UseRegister(left);
+      *shift_immediate_op = g->UseImmediate(right);
+      return true;
+    default:
+      return false;
+  }
+}
+
+// Bitfields describing binary operator properties:
+// CanCommuteField is true if we can switch the two operands, potentially
+// requiring commuting the flags continuation condition.
+using CanCommuteField = base::BitField8<bool, 1, 1>;
+// MustCommuteCondField is true when we need to commute the flags continuation
+// condition in order to switch the operands.
+using MustCommuteCondField = base::BitField8<bool, 2, 1>;
+// IsComparisonField is true when the operation is a comparison and has no other
+// result other than the condition.
+using IsComparisonField = base::BitField8<bool, 3, 1>;
+// IsAddSubField is true when an instruction is encoded as ADD or SUB.
+using IsAddSubField = base::BitField8<bool, 4, 1>;
+
+// Get properties of a binary operator.
+uint8_t GetBinopProperties(InstructionCode opcode) {
+  uint8_t result = 0;
+  switch (opcode) {
+    case kArm64Cmp32:
+    case kArm64Cmp:
+      // We can commute CMP by switching the inputs and commuting
+      // the flags continuation.
+      result = CanCommuteField::update(result, true);
+      result = MustCommuteCondField::update(result, true);
+      result = IsComparisonField::update(result, true);
+      // The CMP and CMN instructions are encoded as SUB or ADD
+      // with zero output register, and therefore support the same
+      // operand modes.
+      result = IsAddSubField::update(result, true);
+      break;
+    case kArm64Cmn32:
+    case kArm64Cmn:
+      result = CanCommuteField::update(result, true);
+      result = IsComparisonField::update(result, true);
+      result = IsAddSubField::update(result, true);
+      break;
+    case kArm64Add32:
+    case kArm64Add:
+      result = CanCommuteField::update(result, true);
+      result = IsAddSubField::update(result, true);
+      break;
+    case kArm64Sub32:
+    case kArm64Sub:
+      result = IsAddSubField::update(result, true);
+      break;
+    case kArm64Tst32:
+    case kArm64Tst:
+      result = CanCommuteField::update(result, true);
+      result = IsComparisonField::update(result, true);
+      break;
+    case kArm64And32:
+    case kArm64And:
+    case kArm64Or32:
+    case kArm64Or:
+    case kArm64Eor32:
+    case kArm64Eor:
+      result = CanCommuteField::update(result, true);
+      break;
+    default:
+      UNREACHABLE();
+  }
+  DCHECK_IMPLIES(MustCommuteCondField::decode(result),
+                 CanCommuteField::decode(result));
+  return result;
+}
+
+// Shared routine for multiple binary operations.
+template <typename Matcher>
+void VisitBinop(InstructionSelector* selector, Node* node,
+                InstructionCode opcode, ImmediateMode operand_mode,
+                FlagsContinuation* cont) {
+  Arm64OperandGenerator g(selector);
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  InstructionOperand outputs[1];
+  size_t output_count = 0;
+
+  Node* left_node = node->InputAt(0);
+  Node* right_node = node->InputAt(1);
+
+  uint8_t properties = GetBinopProperties(opcode);
+  bool can_commute = CanCommuteField::decode(properties);
+  bool must_commute_cond = MustCommuteCondField::decode(properties);
+  bool is_add_sub = IsAddSubField::decode(properties);
+
+  if (g.CanBeImmediate(right_node, operand_mode)) {
+    inputs[input_count++] = g.UseRegister(left_node);
+    inputs[input_count++] = g.UseImmediate(right_node);
+  } else if (can_commute && g.CanBeImmediate(left_node, operand_mode)) {
+    if (must_commute_cond) cont->Commute();
+    inputs[input_count++] = g.UseRegister(right_node);
+    inputs[input_count++] = g.UseImmediate(left_node);
+  } else if (is_add_sub &&
+             TryMatchAnyExtend(&g, selector, node, left_node, right_node,
+                               &inputs[0], &inputs[1], &opcode)) {
+    input_count += 2;
+  } else if (is_add_sub && can_commute &&
+             TryMatchAnyExtend(&g, selector, node, right_node, left_node,
+                               &inputs[0], &inputs[1], &opcode)) {
+    if (must_commute_cond) cont->Commute();
+    input_count += 2;
+  } else if (TryMatchAnyShift(selector, node, right_node, &opcode,
+                              !is_add_sub)) {
+    Matcher m_shift(right_node);
+    inputs[input_count++] = g.UseRegisterOrImmediateZero(left_node);
+    inputs[input_count++] = g.UseRegister(m_shift.left().node());
+    // We only need at most the last 6 bits of the shift.
+    inputs[input_count++] = g.UseImmediate(
+        static_cast<int>(m_shift.right().ResolvedValue() & 0x3F));
+  } else if (can_commute && TryMatchAnyShift(selector, node, left_node, &opcode,
+                                             !is_add_sub)) {
+    if (must_commute_cond) cont->Commute();
+    Matcher m_shift(left_node);
+    inputs[input_count++] = g.UseRegisterOrImmediateZero(right_node);
+    inputs[input_count++] = g.UseRegister(m_shift.left().node());
+    // We only need at most the last 6 bits of the shift.
+    inputs[input_count++] = g.UseImmediate(
+        static_cast<int>(m_shift.right().ResolvedValue() & 0x3F));
+  } else {
+    inputs[input_count++] = g.UseRegisterOrImmediateZero(left_node);
+    inputs[input_count++] = g.UseRegister(right_node);
+  }
+
+  if (!IsComparisonField::decode(properties)) {
+    outputs[output_count++] = g.DefineAsRegister(node);
+  }
+
+  DCHECK_NE(0u, input_count);
+  DCHECK((output_count != 0) || IsComparisonField::decode(properties));
+  DCHECK_GE(arraysize(inputs), input_count);
+  DCHECK_GE(arraysize(outputs), output_count);
+
+  selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
+                                 inputs, cont);
+}
+
+// Shared routine for multiple binary operations.
+template <typename Matcher>
+void VisitBinop(InstructionSelector* selector, Node* node, ArchOpcode opcode,
+                ImmediateMode operand_mode) {
+  FlagsContinuation cont;
+  VisitBinop<Matcher>(selector, node, opcode, operand_mode, &cont);
+}
+
+template <typename Matcher>
+void VisitAddSub(InstructionSelector* selector, Node* node, ArchOpcode opcode,
+                 ArchOpcode negate_opcode) {
+  Arm64OperandGenerator g(selector);
+  Matcher m(node);
+  if (m.right().HasResolvedValue() && (m.right().ResolvedValue() < 0) &&
+      (m.right().ResolvedValue() > std::numeric_limits<int>::min()) &&
+      g.CanBeImmediate(-m.right().ResolvedValue(), kArithmeticImm)) {
+    selector->Emit(
+        negate_opcode, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+        g.TempImmediate(static_cast<int32_t>(-m.right().ResolvedValue())));
+  } else {
+    VisitBinop<Matcher>(selector, node, opcode, kArithmeticImm);
+  }
+}
+
+// For multiplications by immediate of the form x * (2^k + 1), where k > 0,
+// return the value of k, otherwise return zero. This is used to reduce the
+// multiplication to addition with left shift: x + (x << k).
+template <typename Matcher>
+int32_t LeftShiftForReducedMultiply(Matcher* m) {
+  DCHECK(m->IsInt32Mul() || m->IsInt64Mul());
+  if (m->right().HasResolvedValue() && m->right().ResolvedValue() >= 3) {
+    uint64_t value_minus_one = m->right().ResolvedValue() - 1;
+    if (base::bits::IsPowerOfTwo(value_minus_one)) {
+      return base::bits::WhichPowerOfTwo(value_minus_one);
+    }
+  }
+  return 0;
+}
+
+}  // namespace
+
+void InstructionSelector::VisitStackSlot(Node* node) {
+  StackSlotRepresentation rep = StackSlotRepresentationOf(node->op());
+  int slot = frame_->AllocateSpillSlot(rep.size());
+  OperandGenerator g(this);
+
+  Emit(kArchStackSlot, g.DefineAsRegister(node),
+       sequence()->AddImmediate(Constant(slot)), 0, nullptr);
+}
+
+void InstructionSelector::VisitAbortCSAAssert(Node* node) {
+  Arm64OperandGenerator g(this);
+  Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), x1));
+}
+
+void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode,
+              ImmediateMode immediate_mode, MachineRepresentation rep,
+              Node* output = nullptr) {
+  Arm64OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  InstructionOperand outputs[1];
+
+  // If output is not nullptr, use that as the output register. This
+  // is used when we merge a conversion into the load.
+  outputs[0] = g.DefineAsRegister(output == nullptr ? node : output);
+
+  ExternalReferenceMatcher m(base);
+  if (m.HasResolvedValue() && g.IsIntegerConstant(index) &&
+      selector->CanAddressRelativeToRootsRegister(m.ResolvedValue())) {
+    ptrdiff_t const delta =
+        g.GetIntegerConstantValue(index) +
+        TurboAssemblerBase::RootRegisterOffsetForExternalReference(
+            selector->isolate(), m.ResolvedValue());
+    input_count = 1;
+    // Check that the delta is a 32-bit integer due to the limitations of
+    // immediate operands.
+    if (is_int32(delta)) {
+      inputs[0] = g.UseImmediate(static_cast<int32_t>(delta));
+      opcode |= AddressingModeField::encode(kMode_Root);
+      selector->Emit(opcode, arraysize(outputs), outputs, input_count, inputs);
+      return;
+    }
+  }
+
+  inputs[0] = g.UseRegister(base);
+
+  if (g.CanBeImmediate(index, immediate_mode)) {
+    input_count = 2;
+    inputs[1] = g.UseImmediate(index);
+    opcode |= AddressingModeField::encode(kMode_MRI);
+  } else if (TryMatchLoadStoreShift(&g, selector, rep, node, index, &inputs[1],
+                                    &inputs[2])) {
+    input_count = 3;
+    opcode |= AddressingModeField::encode(kMode_Operand2_R_LSL_I);
+  } else {
+    input_count = 2;
+    inputs[1] = g.UseRegister(index);
+    opcode |= AddressingModeField::encode(kMode_MRR);
+  }
+
+  selector->Emit(opcode, arraysize(outputs), outputs, input_count, inputs);
+}
+
+void InstructionSelector::VisitLoadTransform(Node* node) {
+  LoadTransformParameters params = LoadTransformParametersOf(node->op());
+  InstructionCode opcode = kArchNop;
+  bool require_add = false;
+  switch (params.transformation) {
+    case LoadTransformation::kS128Load8Splat:
+      opcode = kArm64LoadSplat;
+      opcode |= MiscField::encode(8);
+      require_add = true;
+      break;
+    case LoadTransformation::kS128Load16Splat:
+      opcode = kArm64LoadSplat;
+      opcode |= MiscField::encode(16);
+      require_add = true;
+      break;
+    case LoadTransformation::kS128Load32Splat:
+      opcode = kArm64LoadSplat;
+      opcode |= MiscField::encode(32);
+      require_add = true;
+      break;
+    case LoadTransformation::kS128Load64Splat:
+      opcode = kArm64LoadSplat;
+      opcode |= MiscField::encode(64);
+      require_add = true;
+      break;
+    case LoadTransformation::kS128Load8x8S:
+      opcode = kArm64S128Load8x8S;
+      break;
+    case LoadTransformation::kS128Load8x8U:
+      opcode = kArm64S128Load8x8U;
+      break;
+    case LoadTransformation::kS128Load16x4S:
+      opcode = kArm64S128Load16x4S;
+      break;
+    case LoadTransformation::kS128Load16x4U:
+      opcode = kArm64S128Load16x4U;
+      break;
+    case LoadTransformation::kS128Load32x2S:
+      opcode = kArm64S128Load32x2S;
+      break;
+    case LoadTransformation::kS128Load32x2U:
+      opcode = kArm64S128Load32x2U;
+      break;
+    case LoadTransformation::kS128Load32Zero:
+      opcode = kArm64S128Load32Zero;
+      break;
+    case LoadTransformation::kS128Load64Zero:
+      opcode = kArm64S128Load64Zero;
+      break;
+    default:
+      UNIMPLEMENTED();
+  }
+  // ARM64 supports unaligned loads
+  DCHECK_NE(params.kind, MemoryAccessKind::kUnaligned);
+
+  Arm64OperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  InstructionOperand inputs[2];
+  InstructionOperand outputs[1];
+
+  inputs[0] = g.UseRegister(base);
+  inputs[1] = g.UseRegister(index);
+  outputs[0] = g.DefineAsRegister(node);
+
+  if (require_add) {
+    // ld1r uses post-index, so construct address first.
+    // TODO(v8:9886) If index can be immediate, use vldr without this add.
+    InstructionOperand addr = g.TempRegister();
+    Emit(kArm64Add, 1, &addr, 2, inputs);
+    inputs[0] = addr;
+    inputs[1] = g.TempImmediate(0);
+    opcode |= AddressingModeField::encode(kMode_MRI);
+  } else {
+    opcode |= AddressingModeField::encode(kMode_MRR);
+  }
+  Emit(opcode, 1, outputs, 2, inputs);
+}
+
+void InstructionSelector::VisitLoad(Node* node) {
+  InstructionCode opcode = kArchNop;
+  ImmediateMode immediate_mode = kNoImmediate;
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  MachineRepresentation rep = load_rep.representation();
+  switch (rep) {
+    case MachineRepresentation::kFloat32:
+      opcode = kArm64LdrS;
+      immediate_mode = kLoadStoreImm32;
+      break;
+    case MachineRepresentation::kFloat64:
+      opcode = kArm64LdrD;
+      immediate_mode = kLoadStoreImm64;
+      break;
+    case MachineRepresentation::kBit:  // Fall through.
+    case MachineRepresentation::kWord8:
+      opcode = load_rep.IsSigned() ? kArm64Ldrsb : kArm64Ldrb;
+      immediate_mode = kLoadStoreImm8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = load_rep.IsSigned() ? kArm64Ldrsh : kArm64Ldrh;
+      immediate_mode = kLoadStoreImm16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kArm64LdrW;
+      immediate_mode = kLoadStoreImm32;
+      break;
+    case MachineRepresentation::kCompressedPointer:  // Fall through.
+    case MachineRepresentation::kCompressed:
+#ifdef V8_COMPRESS_POINTERS
+      opcode = kArm64LdrW;
+      immediate_mode = kLoadStoreImm32;
+      break;
+#else
+      UNREACHABLE();
+#endif
+#ifdef V8_COMPRESS_POINTERS
+    case MachineRepresentation::kTaggedSigned:
+      opcode = kArm64LdrDecompressTaggedSigned;
+      immediate_mode = kLoadStoreImm32;
+      break;
+    case MachineRepresentation::kTaggedPointer:
+      opcode = kArm64LdrDecompressTaggedPointer;
+      immediate_mode = kLoadStoreImm32;
+      break;
+    case MachineRepresentation::kTagged:
+      opcode = kArm64LdrDecompressAnyTagged;
+      immediate_mode = kLoadStoreImm32;
+      break;
+#else
+    case MachineRepresentation::kTaggedSigned:   // Fall through.
+    case MachineRepresentation::kTaggedPointer:  // Fall through.
+    case MachineRepresentation::kTagged:         // Fall through.
+#endif
+    case MachineRepresentation::kWord64:
+      opcode = kArm64Ldr;
+      immediate_mode = kLoadStoreImm64;
+      break;
+    case MachineRepresentation::kSimd128:
+      opcode = kArm64LdrQ;
+      immediate_mode = kNoImmediate;
+      break;
+    case MachineRepresentation::kNone:
+      UNREACHABLE();
+  }
+  if (node->opcode() == IrOpcode::kPoisonedLoad) {
+    CHECK_NE(poisoning_level_, PoisoningMitigationLevel::kDontPoison);
+    opcode |= MiscField::encode(kMemoryAccessPoisoned);
+  }
+
+  EmitLoad(this, node, opcode, immediate_mode, rep);
+}
+
+void InstructionSelector::VisitPoisonedLoad(Node* node) { VisitLoad(node); }
+
+void InstructionSelector::VisitProtectedLoad(Node* node) {
+  // TODO(eholk)
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitStore(Node* node) {
+  Arm64OperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+
+  StoreRepresentation store_rep = StoreRepresentationOf(node->op());
+  WriteBarrierKind write_barrier_kind = store_rep.write_barrier_kind();
+  MachineRepresentation rep = store_rep.representation();
+
+  if (FLAG_enable_unconditional_write_barriers &&
+      CanBeTaggedOrCompressedPointer(rep)) {
+    write_barrier_kind = kFullWriteBarrier;
+  }
+
+  // TODO(arm64): I guess this could be done in a better way.
+  if (write_barrier_kind != kNoWriteBarrier &&
+      V8_LIKELY(!FLAG_disable_write_barriers)) {
+    DCHECK(CanBeTaggedOrCompressedPointer(rep));
+    AddressingMode addressing_mode;
+    InstructionOperand inputs[3];
+    size_t input_count = 0;
+    inputs[input_count++] = g.UseUniqueRegister(base);
+    // OutOfLineRecordWrite uses the index in an add or sub instruction, but we
+    // can trust the assembler to generate extra instructions if the index does
+    // not fit into add or sub. So here only check the immediate for a store.
+    if (g.CanBeImmediate(index, COMPRESS_POINTERS_BOOL ? kLoadStoreImm32
+                                                       : kLoadStoreImm64)) {
+      inputs[input_count++] = g.UseImmediate(index);
+      addressing_mode = kMode_MRI;
+    } else {
+      inputs[input_count++] = g.UseUniqueRegister(index);
+      addressing_mode = kMode_MRR;
+    }
+    inputs[input_count++] = g.UseUniqueRegister(value);
+    RecordWriteMode record_write_mode =
+        WriteBarrierKindToRecordWriteMode(write_barrier_kind);
+    InstructionCode code = kArchStoreWithWriteBarrier;
+    code |= AddressingModeField::encode(addressing_mode);
+    code |= MiscField::encode(static_cast<int>(record_write_mode));
+    Emit(code, 0, nullptr, input_count, inputs);
+  } else {
+    InstructionOperand inputs[4];
+    size_t input_count = 0;
+    InstructionCode opcode = kArchNop;
+    ImmediateMode immediate_mode = kNoImmediate;
+    switch (rep) {
+      case MachineRepresentation::kFloat32:
+        opcode = kArm64StrS;
+        immediate_mode = kLoadStoreImm32;
+        break;
+      case MachineRepresentation::kFloat64:
+        opcode = kArm64StrD;
+        immediate_mode = kLoadStoreImm64;
+        break;
+      case MachineRepresentation::kBit:  // Fall through.
+      case MachineRepresentation::kWord8:
+        opcode = kArm64Strb;
+        immediate_mode = kLoadStoreImm8;
+        break;
+      case MachineRepresentation::kWord16:
+        opcode = kArm64Strh;
+        immediate_mode = kLoadStoreImm16;
+        break;
+      case MachineRepresentation::kWord32:
+        opcode = kArm64StrW;
+        immediate_mode = kLoadStoreImm32;
+        break;
+      case MachineRepresentation::kCompressedPointer:  // Fall through.
+      case MachineRepresentation::kCompressed:
+#ifdef V8_COMPRESS_POINTERS
+        opcode = kArm64StrCompressTagged;
+        immediate_mode = kLoadStoreImm32;
+        break;
+#else
+        UNREACHABLE();
+#endif
+      case MachineRepresentation::kTaggedSigned:   // Fall through.
+      case MachineRepresentation::kTaggedPointer:  // Fall through.
+      case MachineRepresentation::kTagged:
+        opcode = kArm64StrCompressTagged;
+        immediate_mode =
+            COMPRESS_POINTERS_BOOL ? kLoadStoreImm32 : kLoadStoreImm64;
+        break;
+      case MachineRepresentation::kWord64:
+        opcode = kArm64Str;
+        immediate_mode = kLoadStoreImm64;
+        break;
+      case MachineRepresentation::kSimd128:
+        opcode = kArm64StrQ;
+        immediate_mode = kNoImmediate;
+        break;
+      case MachineRepresentation::kNone:
+        UNREACHABLE();
+    }
+
+    ExternalReferenceMatcher m(base);
+    if (m.HasResolvedValue() && g.IsIntegerConstant(index) &&
+        CanAddressRelativeToRootsRegister(m.ResolvedValue())) {
+      ptrdiff_t const delta =
+          g.GetIntegerConstantValue(index) +
+          TurboAssemblerBase::RootRegisterOffsetForExternalReference(
+              isolate(), m.ResolvedValue());
+      if (is_int32(delta)) {
+        input_count = 2;
+        InstructionOperand inputs[2];
+        inputs[0] = g.UseRegister(value);
+        inputs[1] = g.UseImmediate(static_cast<int32_t>(delta));
+        opcode |= AddressingModeField::encode(kMode_Root);
+        Emit(opcode, 0, nullptr, input_count, inputs);
+        return;
+      }
+    }
+
+    inputs[0] = g.UseRegisterOrImmediateZero(value);
+    inputs[1] = g.UseRegister(base);
+
+    if (g.CanBeImmediate(index, immediate_mode)) {
+      input_count = 3;
+      inputs[2] = g.UseImmediate(index);
+      opcode |= AddressingModeField::encode(kMode_MRI);
+    } else if (TryMatchLoadStoreShift(&g, this, rep, node, index, &inputs[2],
+                                      &inputs[3])) {
+      input_count = 4;
+      opcode |= AddressingModeField::encode(kMode_Operand2_R_LSL_I);
+    } else {
+      input_count = 3;
+      inputs[2] = g.UseRegister(index);
+      opcode |= AddressingModeField::encode(kMode_MRR);
+    }
+
+    Emit(opcode, 0, nullptr, input_count, inputs);
+  }
+}
+
+void InstructionSelector::VisitProtectedStore(Node* node) {
+  // TODO(eholk)
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
+  UNREACHABLE();
+}
+
+// Architecture supports unaligned access, therefore VisitLoad is used instead
+void InstructionSelector::VisitUnalignedLoad(Node* node) { UNREACHABLE(); }
+
+// Architecture supports unaligned access, therefore VisitStore is used instead
+void InstructionSelector::VisitUnalignedStore(Node* node) { UNREACHABLE(); }
+
+template <typename Matcher>
+static void VisitLogical(InstructionSelector* selector, Node* node, Matcher* m,
+                         ArchOpcode opcode, bool left_can_cover,
+                         bool right_can_cover, ImmediateMode imm_mode) {
+  Arm64OperandGenerator g(selector);
+
+  // Map instruction to equivalent operation with inverted right input.
+  ArchOpcode inv_opcode = opcode;
+  switch (opcode) {
+    case kArm64And32:
+      inv_opcode = kArm64Bic32;
+      break;
+    case kArm64And:
+      inv_opcode = kArm64Bic;
+      break;
+    case kArm64Or32:
+      inv_opcode = kArm64Orn32;
+      break;
+    case kArm64Or:
+      inv_opcode = kArm64Orn;
+      break;
+    case kArm64Eor32:
+      inv_opcode = kArm64Eon32;
+      break;
+    case kArm64Eor:
+      inv_opcode = kArm64Eon;
+      break;
+    default:
+      UNREACHABLE();
+  }
+
+  // Select Logical(y, ~x) for Logical(Xor(x, -1), y).
+  if ((m->left().IsWord32Xor() || m->left().IsWord64Xor()) && left_can_cover) {
+    Matcher mleft(m->left().node());
+    if (mleft.right().Is(-1)) {
+      // TODO(all): support shifted operand on right.
+      selector->Emit(inv_opcode, g.DefineAsRegister(node),
+                     g.UseRegister(m->right().node()),
+                     g.UseRegister(mleft.left().node()));
+      return;
+    }
+  }
+
+  // Select Logical(x, ~y) for Logical(x, Xor(y, -1)).
+  if ((m->right().IsWord32Xor() || m->right().IsWord64Xor()) &&
+      right_can_cover) {
+    Matcher mright(m->right().node());
+    if (mright.right().Is(-1)) {
+      // TODO(all): support shifted operand on right.
+      selector->Emit(inv_opcode, g.DefineAsRegister(node),
+                     g.UseRegister(m->left().node()),
+                     g.UseRegister(mright.left().node()));
+      return;
+    }
+  }
+
+  if (m->IsWord32Xor() && m->right().Is(-1)) {
+    selector->Emit(kArm64Not32, g.DefineAsRegister(node),
+                   g.UseRegister(m->left().node()));
+  } else if (m->IsWord64Xor() && m->right().Is(-1)) {
+    selector->Emit(kArm64Not, g.DefineAsRegister(node),
+                   g.UseRegister(m->left().node()));
+  } else {
+    VisitBinop<Matcher>(selector, node, opcode, imm_mode);
+  }
+}
+
+void InstructionSelector::VisitWord32And(Node* node) {
+  Arm64OperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (m.left().IsWord32Shr() && CanCover(node, m.left().node()) &&
+      m.right().HasResolvedValue()) {
+    uint32_t mask = m.right().ResolvedValue();
+    uint32_t mask_width = base::bits::CountPopulation(mask);
+    uint32_t mask_msb = base::bits::CountLeadingZeros32(mask);
+    if ((mask_width != 0) && (mask_width != 32) &&
+        (mask_msb + mask_width == 32)) {
+      // The mask must be contiguous, and occupy the least-significant bits.
+      DCHECK_EQ(0u, base::bits::CountTrailingZeros32(mask));
+
+      // Select Ubfx for And(Shr(x, imm), mask) where the mask is in the least
+      // significant bits.
+      Int32BinopMatcher mleft(m.left().node());
+      if (mleft.right().HasResolvedValue()) {
+        // Any shift value can match; int32 shifts use `value % 32`.
+        uint32_t lsb = mleft.right().ResolvedValue() & 0x1F;
+
+        // Ubfx cannot extract bits past the register size, however since
+        // shifting the original value would have introduced some zeros we can
+        // still use ubfx with a smaller mask and the remaining bits will be
+        // zeros.
+        if (lsb + mask_width > 32) mask_width = 32 - lsb;
+
+        Emit(kArm64Ubfx32, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()),
+             g.UseImmediateOrTemp(mleft.right().node(), lsb),
+             g.TempImmediate(mask_width));
+        return;
+      }
+      // Other cases fall through to the normal And operation.
+    }
+  }
+  VisitLogical<Int32BinopMatcher>(
+      this, node, &m, kArm64And32, CanCover(node, m.left().node()),
+      CanCover(node, m.right().node()), kLogical32Imm);
+}
+
+void InstructionSelector::VisitWord64And(Node* node) {
+  Arm64OperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  if (m.left().IsWord64Shr() && CanCover(node, m.left().node()) &&
+      m.right().HasResolvedValue()) {
+    uint64_t mask = m.right().ResolvedValue();
+    uint64_t mask_width = base::bits::CountPopulation(mask);
+    uint64_t mask_msb = base::bits::CountLeadingZeros64(mask);
+    if ((mask_width != 0) && (mask_width != 64) &&
+        (mask_msb + mask_width == 64)) {
+      // The mask must be contiguous, and occupy the least-significant bits.
+      DCHECK_EQ(0u, base::bits::CountTrailingZeros64(mask));
+
+      // Select Ubfx for And(Shr(x, imm), mask) where the mask is in the least
+      // significant bits.
+      Int64BinopMatcher mleft(m.left().node());
+      if (mleft.right().HasResolvedValue()) {
+        // Any shift value can match; int64 shifts use `value % 64`.
+        uint32_t lsb =
+            static_cast<uint32_t>(mleft.right().ResolvedValue() & 0x3F);
+
+        // Ubfx cannot extract bits past the register size, however since
+        // shifting the original value would have introduced some zeros we can
+        // still use ubfx with a smaller mask and the remaining bits will be
+        // zeros.
+        if (lsb + mask_width > 64) mask_width = 64 - lsb;
+
+        Emit(kArm64Ubfx, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()),
+             g.UseImmediateOrTemp(mleft.right().node(), lsb),
+             g.TempImmediate(static_cast<int32_t>(mask_width)));
+        return;
+      }
+      // Other cases fall through to the normal And operation.
+    }
+  }
+  VisitLogical<Int64BinopMatcher>(
+      this, node, &m, kArm64And, CanCover(node, m.left().node()),
+      CanCover(node, m.right().node()), kLogical64Imm);
+}
+
+void InstructionSelector::VisitWord32Or(Node* node) {
+  Int32BinopMatcher m(node);
+  VisitLogical<Int32BinopMatcher>(
+      this, node, &m, kArm64Or32, CanCover(node, m.left().node()),
+      CanCover(node, m.right().node()), kLogical32Imm);
+}
+
+void InstructionSelector::VisitWord64Or(Node* node) {
+  Int64BinopMatcher m(node);
+  VisitLogical<Int64BinopMatcher>(
+      this, node, &m, kArm64Or, CanCover(node, m.left().node()),
+      CanCover(node, m.right().node()), kLogical64Imm);
+}
+
+void InstructionSelector::VisitWord32Xor(Node* node) {
+  Int32BinopMatcher m(node);
+  VisitLogical<Int32BinopMatcher>(
+      this, node, &m, kArm64Eor32, CanCover(node, m.left().node()),
+      CanCover(node, m.right().node()), kLogical32Imm);
+}
+
+void InstructionSelector::VisitWord64Xor(Node* node) {
+  Int64BinopMatcher m(node);
+  VisitLogical<Int64BinopMatcher>(
+      this, node, &m, kArm64Eor, CanCover(node, m.left().node()),
+      CanCover(node, m.right().node()), kLogical64Imm);
+}
+
+void InstructionSelector::VisitWord32Shl(Node* node) {
+  Int32BinopMatcher m(node);
+  if (m.left().IsWord32And() && CanCover(node, m.left().node()) &&
+      m.right().IsInRange(1, 31)) {
+    Arm64OperandGenerator g(this);
+    Int32BinopMatcher mleft(m.left().node());
+    if (mleft.right().HasResolvedValue()) {
+      uint32_t mask = mleft.right().ResolvedValue();
+      uint32_t mask_width = base::bits::CountPopulation(mask);
+      uint32_t mask_msb = base::bits::CountLeadingZeros32(mask);
+      if ((mask_width != 0) && (mask_msb + mask_width == 32)) {
+        uint32_t shift = m.right().ResolvedValue();
+        DCHECK_EQ(0u, base::bits::CountTrailingZeros32(mask));
+        DCHECK_NE(0u, shift);
+
+        if ((shift + mask_width) >= 32) {
+          // If the mask is contiguous and reaches or extends beyond the top
+          // bit, only the shift is needed.
+          Emit(kArm64Lsl32, g.DefineAsRegister(node),
+               g.UseRegister(mleft.left().node()),
+               g.UseImmediate(m.right().node()));
+          return;
+        } else {
+          // Select Ubfiz for Shl(And(x, mask), imm) where the mask is
+          // contiguous, and the shift immediate non-zero.
+          Emit(kArm64Ubfiz32, g.DefineAsRegister(node),
+               g.UseRegister(mleft.left().node()),
+               g.UseImmediate(m.right().node()), g.TempImmediate(mask_width));
+          return;
+        }
+      }
+    }
+  }
+  VisitRRO(this, kArm64Lsl32, node, kShift32Imm);
+}
+
+void InstructionSelector::VisitWord64Shl(Node* node) {
+  Arm64OperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  if ((m.left().IsChangeInt32ToInt64() || m.left().IsChangeUint32ToUint64()) &&
+      m.right().IsInRange(32, 63) && CanCover(node, m.left().node())) {
+    // There's no need to sign/zero-extend to 64-bit if we shift out the upper
+    // 32 bits anyway.
+    Emit(kArm64Lsl, g.DefineAsRegister(node),
+         g.UseRegister(m.left().node()->InputAt(0)),
+         g.UseImmediate(m.right().node()));
+    return;
+  }
+  VisitRRO(this, kArm64Lsl, node, kShift64Imm);
+}
+
+void InstructionSelector::VisitStackPointerGreaterThan(
+    Node* node, FlagsContinuation* cont) {
+  StackCheckKind kind = StackCheckKindOf(node->op());
+  InstructionCode opcode =
+      kArchStackPointerGreaterThan | MiscField::encode(static_cast<int>(kind));
+
+  Arm64OperandGenerator g(this);
+
+  // No outputs.
+  InstructionOperand* const outputs = nullptr;
+  const int output_count = 0;
+
+  // Applying an offset to this stack check requires a temp register. Offsets
+  // are only applied to the first stack check. If applying an offset, we must
+  // ensure the input and temp registers do not alias, thus kUniqueRegister.
+  InstructionOperand temps[] = {g.TempRegister()};
+  const int temp_count = (kind == StackCheckKind::kJSFunctionEntry) ? 1 : 0;
+  const auto register_mode = (kind == StackCheckKind::kJSFunctionEntry)
+                                 ? OperandGenerator::kUniqueRegister
+                                 : OperandGenerator::kRegister;
+
+  Node* const value = node->InputAt(0);
+  InstructionOperand inputs[] = {g.UseRegisterWithMode(value, register_mode)};
+  static constexpr int input_count = arraysize(inputs);
+
+  EmitWithContinuation(opcode, output_count, outputs, input_count, inputs,
+                       temp_count, temps, cont);
+}
+
+namespace {
+
+bool TryEmitBitfieldExtract32(InstructionSelector* selector, Node* node) {
+  Arm64OperandGenerator g(selector);
+  Int32BinopMatcher m(node);
+  if (selector->CanCover(node, m.left().node()) && m.left().IsWord32Shl()) {
+    // Select Ubfx or Sbfx for (x << (K & 0x1F)) OP (K & 0x1F), where
+    // OP is >>> or >> and (K & 0x1F) != 0.
+    Int32BinopMatcher mleft(m.left().node());
+    if (mleft.right().HasResolvedValue() && m.right().HasResolvedValue() &&
+        (mleft.right().ResolvedValue() & 0x1F) != 0 &&
+        (mleft.right().ResolvedValue() & 0x1F) ==
+            (m.right().ResolvedValue() & 0x1F)) {
+      DCHECK(m.IsWord32Shr() || m.IsWord32Sar());
+      ArchOpcode opcode = m.IsWord32Sar() ? kArm64Sbfx32 : kArm64Ubfx32;
+
+      int right_val = m.right().ResolvedValue() & 0x1F;
+      DCHECK_NE(right_val, 0);
+
+      selector->Emit(opcode, g.DefineAsRegister(node),
+                     g.UseRegister(mleft.left().node()), g.TempImmediate(0),
+                     g.TempImmediate(32 - right_val));
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace
+
+void InstructionSelector::VisitWord32Shr(Node* node) {
+  Int32BinopMatcher m(node);
+  if (m.left().IsWord32And() && m.right().HasResolvedValue()) {
+    uint32_t lsb = m.right().ResolvedValue() & 0x1F;
+    Int32BinopMatcher mleft(m.left().node());
+    if (mleft.right().HasResolvedValue() &&
+        mleft.right().ResolvedValue() != 0) {
+      // Select Ubfx for Shr(And(x, mask), imm) where the result of the mask is
+      // shifted into the least-significant bits.
+      uint32_t mask =
+          static_cast<uint32_t>(mleft.right().ResolvedValue() >> lsb) << lsb;
+      unsigned mask_width = base::bits::CountPopulation(mask);
+      unsigned mask_msb = base::bits::CountLeadingZeros32(mask);
+      if ((mask_msb + mask_width + lsb) == 32) {
+        Arm64OperandGenerator g(this);
+        DCHECK_EQ(lsb, base::bits::CountTrailingZeros32(mask));
+        Emit(kArm64Ubfx32, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()),
+             g.UseImmediateOrTemp(m.right().node(), lsb),
+             g.TempImmediate(mask_width));
+        return;
+      }
+    }
+  } else if (TryEmitBitfieldExtract32(this, node)) {
+    return;
+  }
+
+  if (m.left().IsUint32MulHigh() && m.right().HasResolvedValue() &&
+      CanCover(node, node->InputAt(0))) {
+    // Combine this shift with the multiply and shift that would be generated
+    // by Uint32MulHigh.
+    Arm64OperandGenerator g(this);
+    Node* left = m.left().node();
+    int shift = m.right().ResolvedValue() & 0x1F;
+    InstructionOperand const smull_operand = g.TempRegister();
+    Emit(kArm64Umull, smull_operand, g.UseRegister(left->InputAt(0)),
+         g.UseRegister(left->InputAt(1)));
+    Emit(kArm64Lsr, g.DefineAsRegister(node), smull_operand,
+         g.TempImmediate(32 + shift));
+    return;
+  }
+
+  VisitRRO(this, kArm64Lsr32, node, kShift32Imm);
+}
+
+void InstructionSelector::VisitWord64Shr(Node* node) {
+  Int64BinopMatcher m(node);
+  if (m.left().IsWord64And() && m.right().HasResolvedValue()) {
+    uint32_t lsb = m.right().ResolvedValue() & 0x3F;
+    Int64BinopMatcher mleft(m.left().node());
+    if (mleft.right().HasResolvedValue() &&
+        mleft.right().ResolvedValue() != 0) {
+      // Select Ubfx for Shr(And(x, mask), imm) where the result of the mask is
+      // shifted into the least-significant bits.
+      uint64_t mask =
+          static_cast<uint64_t>(mleft.right().ResolvedValue() >> lsb) << lsb;
+      unsigned mask_width = base::bits::CountPopulation(mask);
+      unsigned mask_msb = base::bits::CountLeadingZeros64(mask);
+      if ((mask_msb + mask_width + lsb) == 64) {
+        Arm64OperandGenerator g(this);
+        DCHECK_EQ(lsb, base::bits::CountTrailingZeros64(mask));
+        Emit(kArm64Ubfx, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()),
+             g.UseImmediateOrTemp(m.right().node(), lsb),
+             g.TempImmediate(mask_width));
+        return;
+      }
+    }
+  }
+  VisitRRO(this, kArm64Lsr, node, kShift64Imm);
+}
+
+void InstructionSelector::VisitWord32Sar(Node* node) {
+  if (TryEmitBitfieldExtract32(this, node)) {
+    return;
+  }
+
+  Int32BinopMatcher m(node);
+  if (m.left().IsInt32MulHigh() && m.right().HasResolvedValue() &&
+      CanCover(node, node->InputAt(0))) {
+    // Combine this shift with the multiply and shift that would be generated
+    // by Int32MulHigh.
+    Arm64OperandGenerator g(this);
+    Node* left = m.left().node();
+    int shift = m.right().ResolvedValue() & 0x1F;
+    InstructionOperand const smull_operand = g.TempRegister();
+    Emit(kArm64Smull, smull_operand, g.UseRegister(left->InputAt(0)),
+         g.UseRegister(left->InputAt(1)));
+    Emit(kArm64Asr, g.DefineAsRegister(node), smull_operand,
+         g.TempImmediate(32 + shift));
+    return;
+  }
+
+  if (m.left().IsInt32Add() && m.right().HasResolvedValue() &&
+      CanCover(node, node->InputAt(0))) {
+    Node* add_node = m.left().node();
+    Int32BinopMatcher madd_node(add_node);
+    if (madd_node.left().IsInt32MulHigh() &&
+        CanCover(add_node, madd_node.left().node())) {
+      // Combine the shift that would be generated by Int32MulHigh with the add
+      // on the left of this Sar operation. We do it here, as the result of the
+      // add potentially has 33 bits, so we have to ensure the result is
+      // truncated by being the input to this 32-bit Sar operation.
+      Arm64OperandGenerator g(this);
+      Node* mul_node = madd_node.left().node();
+
+      InstructionOperand const smull_operand = g.TempRegister();
+      Emit(kArm64Smull, smull_operand, g.UseRegister(mul_node->InputAt(0)),
+           g.UseRegister(mul_node->InputAt(1)));
+
+      InstructionOperand const add_operand = g.TempRegister();
+      Emit(kArm64Add | AddressingModeField::encode(kMode_Operand2_R_ASR_I),
+           add_operand, g.UseRegister(add_node->InputAt(1)), smull_operand,
+           g.TempImmediate(32));
+
+      Emit(kArm64Asr32, g.DefineAsRegister(node), add_operand,
+           g.UseImmediate(node->InputAt(1)));
+      return;
+    }
+  }
+
+  VisitRRO(this, kArm64Asr32, node, kShift32Imm);
+}
+
+void InstructionSelector::VisitWord64Sar(Node* node) {
+  if (TryEmitExtendingLoad(this, node)) return;
+  VisitRRO(this, kArm64Asr, node, kShift64Imm);
+}
+
+void InstructionSelector::VisitWord32Rol(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord64Rol(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord32Ror(Node* node) {
+  VisitRRO(this, kArm64Ror32, node, kShift32Imm);
+}
+
+void InstructionSelector::VisitWord64Ror(Node* node) {
+  VisitRRO(this, kArm64Ror, node, kShift64Imm);
+}
+
+#define RR_OP_LIST(V)                                         \
+  V(Word64Clz, kArm64Clz)                                     \
+  V(Word32Clz, kArm64Clz32)                                   \
+  V(Word32ReverseBits, kArm64Rbit32)                          \
+  V(Word64ReverseBits, kArm64Rbit)                            \
+  V(Word32ReverseBytes, kArm64Rev32)                          \
+  V(Word64ReverseBytes, kArm64Rev)                            \
+  V(ChangeFloat32ToFloat64, kArm64Float32ToFloat64)           \
+  V(RoundInt32ToFloat32, kArm64Int32ToFloat32)                \
+  V(RoundUint32ToFloat32, kArm64Uint32ToFloat32)              \
+  V(ChangeInt32ToFloat64, kArm64Int32ToFloat64)               \
+  V(ChangeInt64ToFloat64, kArm64Int64ToFloat64)               \
+  V(ChangeUint32ToFloat64, kArm64Uint32ToFloat64)             \
+  V(ChangeFloat64ToInt32, kArm64Float64ToInt32)               \
+  V(ChangeFloat64ToInt64, kArm64Float64ToInt64)               \
+  V(ChangeFloat64ToUint32, kArm64Float64ToUint32)             \
+  V(ChangeFloat64ToUint64, kArm64Float64ToUint64)             \
+  V(TruncateFloat64ToInt64, kArm64Float64ToInt64)             \
+  V(TruncateFloat64ToUint32, kArm64Float64ToUint32)           \
+  V(TruncateFloat64ToFloat32, kArm64Float64ToFloat32)         \
+  V(TruncateFloat64ToWord32, kArchTruncateDoubleToI)          \
+  V(RoundFloat64ToInt32, kArm64Float64ToInt32)                \
+  V(RoundInt64ToFloat32, kArm64Int64ToFloat32)                \
+  V(RoundInt64ToFloat64, kArm64Int64ToFloat64)                \
+  V(RoundUint64ToFloat32, kArm64Uint64ToFloat32)              \
+  V(RoundUint64ToFloat64, kArm64Uint64ToFloat64)              \
+  V(BitcastFloat32ToInt32, kArm64Float64ExtractLowWord32)     \
+  V(BitcastFloat64ToInt64, kArm64U64MoveFloat64)              \
+  V(BitcastInt32ToFloat32, kArm64Float64MoveU64)              \
+  V(BitcastInt64ToFloat64, kArm64Float64MoveU64)              \
+  V(Float32Abs, kArm64Float32Abs)                             \
+  V(Float64Abs, kArm64Float64Abs)                             \
+  V(Float32Sqrt, kArm64Float32Sqrt)                           \
+  V(Float64Sqrt, kArm64Float64Sqrt)                           \
+  V(Float32RoundDown, kArm64Float32RoundDown)                 \
+  V(Float64RoundDown, kArm64Float64RoundDown)                 \
+  V(Float32RoundUp, kArm64Float32RoundUp)                     \
+  V(Float64RoundUp, kArm64Float64RoundUp)                     \
+  V(Float32RoundTruncate, kArm64Float32RoundTruncate)         \
+  V(Float64RoundTruncate, kArm64Float64RoundTruncate)         \
+  V(Float64RoundTiesAway, kArm64Float64RoundTiesAway)         \
+  V(Float32RoundTiesEven, kArm64Float32RoundTiesEven)         \
+  V(Float64RoundTiesEven, kArm64Float64RoundTiesEven)         \
+  V(Float64ExtractLowWord32, kArm64Float64ExtractLowWord32)   \
+  V(Float64ExtractHighWord32, kArm64Float64ExtractHighWord32) \
+  V(Float64SilenceNaN, kArm64Float64SilenceNaN)               \
+  V(F32x4Ceil, kArm64Float32RoundUp)                          \
+  V(F32x4Floor, kArm64Float32RoundDown)                       \
+  V(F32x4Trunc, kArm64Float32RoundTruncate)                   \
+  V(F32x4NearestInt, kArm64Float32RoundTiesEven)              \
+  V(F64x2Ceil, kArm64Float64RoundUp)                          \
+  V(F64x2Floor, kArm64Float64RoundDown)                       \
+  V(F64x2Trunc, kArm64Float64RoundTruncate)                   \
+  V(F64x2NearestInt, kArm64Float64RoundTiesEven)
+
+#define RRR_OP_LIST(V)            \
+  V(Int32Div, kArm64Idiv32)       \
+  V(Int64Div, kArm64Idiv)         \
+  V(Uint32Div, kArm64Udiv32)      \
+  V(Uint64Div, kArm64Udiv)        \
+  V(Int32Mod, kArm64Imod32)       \
+  V(Int64Mod, kArm64Imod)         \
+  V(Uint32Mod, kArm64Umod32)      \
+  V(Uint64Mod, kArm64Umod)        \
+  V(Float32Add, kArm64Float32Add) \
+  V(Float64Add, kArm64Float64Add) \
+  V(Float32Sub, kArm64Float32Sub) \
+  V(Float64Sub, kArm64Float64Sub) \
+  V(Float32Div, kArm64Float32Div) \
+  V(Float64Div, kArm64Float64Div) \
+  V(Float32Max, kArm64Float32Max) \
+  V(Float64Max, kArm64Float64Max) \
+  V(Float32Min, kArm64Float32Min) \
+  V(Float64Min, kArm64Float64Min) \
+  V(I8x16Swizzle, kArm64I8x16Swizzle)
+
+#define RR_VISITOR(Name, opcode)                      \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRR(this, opcode, node);                      \
+  }
+RR_OP_LIST(RR_VISITOR)
+#undef RR_VISITOR
+#undef RR_OP_LIST
+
+#define RRR_VISITOR(Name, opcode)                     \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRRR(this, opcode, node);                     \
+  }
+RRR_OP_LIST(RRR_VISITOR)
+#undef RRR_VISITOR
+#undef RRR_OP_LIST
+
+void InstructionSelector::VisitWord32Ctz(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord64Ctz(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord32Popcnt(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord64Popcnt(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitInt32Add(Node* node) {
+  Arm64OperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  // Select Madd(x, y, z) for Add(Mul(x, y), z).
+  if (m.left().IsInt32Mul() && CanCover(node, m.left().node())) {
+    Int32BinopMatcher mleft(m.left().node());
+    // Check multiply can't be later reduced to addition with shift.
+    if (LeftShiftForReducedMultiply(&mleft) == 0) {
+      Emit(kArm64Madd32, g.DefineAsRegister(node),
+           g.UseRegister(mleft.left().node()),
+           g.UseRegister(mleft.right().node()),
+           g.UseRegister(m.right().node()));
+      return;
+    }
+  }
+  // Select Madd(x, y, z) for Add(z, Mul(x, y)).
+  if (m.right().IsInt32Mul() && CanCover(node, m.right().node())) {
+    Int32BinopMatcher mright(m.right().node());
+    // Check multiply can't be later reduced to addition with shift.
+    if (LeftShiftForReducedMultiply(&mright) == 0) {
+      Emit(kArm64Madd32, g.DefineAsRegister(node),
+           g.UseRegister(mright.left().node()),
+           g.UseRegister(mright.right().node()),
+           g.UseRegister(m.left().node()));
+      return;
+    }
+  }
+  VisitAddSub<Int32BinopMatcher>(this, node, kArm64Add32, kArm64Sub32);
+}
+
+void InstructionSelector::VisitInt64Add(Node* node) {
+  Arm64OperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  // Select Madd(x, y, z) for Add(Mul(x, y), z).
+  if (m.left().IsInt64Mul() && CanCover(node, m.left().node())) {
+    Int64BinopMatcher mleft(m.left().node());
+    // Check multiply can't be later reduced to addition with shift.
+    if (LeftShiftForReducedMultiply(&mleft) == 0) {
+      Emit(kArm64Madd, g.DefineAsRegister(node),
+           g.UseRegister(mleft.left().node()),
+           g.UseRegister(mleft.right().node()),
+           g.UseRegister(m.right().node()));
+      return;
+    }
+  }
+  // Select Madd(x, y, z) for Add(z, Mul(x, y)).
+  if (m.right().IsInt64Mul() && CanCover(node, m.right().node())) {
+    Int64BinopMatcher mright(m.right().node());
+    // Check multiply can't be later reduced to addition with shift.
+    if (LeftShiftForReducedMultiply(&mright) == 0) {
+      Emit(kArm64Madd, g.DefineAsRegister(node),
+           g.UseRegister(mright.left().node()),
+           g.UseRegister(mright.right().node()),
+           g.UseRegister(m.left().node()));
+      return;
+    }
+  }
+  VisitAddSub<Int64BinopMatcher>(this, node, kArm64Add, kArm64Sub);
+}
+
+void InstructionSelector::VisitInt32Sub(Node* node) {
+  Arm64OperandGenerator g(this);
+  Int32BinopMatcher m(node);
+
+  // Select Msub(x, y, a) for Sub(a, Mul(x, y)).
+  if (m.right().IsInt32Mul() && CanCover(node, m.right().node())) {
+    Int32BinopMatcher mright(m.right().node());
+    // Check multiply can't be later reduced to addition with shift.
+    if (LeftShiftForReducedMultiply(&mright) == 0) {
+      Emit(kArm64Msub32, g.DefineAsRegister(node),
+           g.UseRegister(mright.left().node()),
+           g.UseRegister(mright.right().node()),
+           g.UseRegister(m.left().node()));
+      return;
+    }
+  }
+
+  VisitAddSub<Int32BinopMatcher>(this, node, kArm64Sub32, kArm64Add32);
+}
+
+void InstructionSelector::VisitInt64Sub(Node* node) {
+  Arm64OperandGenerator g(this);
+  Int64BinopMatcher m(node);
+
+  // Select Msub(x, y, a) for Sub(a, Mul(x, y)).
+  if (m.right().IsInt64Mul() && CanCover(node, m.right().node())) {
+    Int64BinopMatcher mright(m.right().node());
+    // Check multiply can't be later reduced to addition with shift.
+    if (LeftShiftForReducedMultiply(&mright) == 0) {
+      Emit(kArm64Msub, g.DefineAsRegister(node),
+           g.UseRegister(mright.left().node()),
+           g.UseRegister(mright.right().node()),
+           g.UseRegister(m.left().node()));
+      return;
+    }
+  }
+
+  VisitAddSub<Int64BinopMatcher>(this, node, kArm64Sub, kArm64Add);
+}
+
+namespace {
+
+void EmitInt32MulWithOverflow(InstructionSelector* selector, Node* node,
+                              FlagsContinuation* cont) {
+  Arm64OperandGenerator g(selector);
+  Int32BinopMatcher m(node);
+  InstructionOperand result = g.DefineAsRegister(node);
+  InstructionOperand left = g.UseRegister(m.left().node());
+  InstructionOperand right = g.UseRegister(m.right().node());
+  selector->Emit(kArm64Smull, result, left, right);
+
+  InstructionCode opcode =
+      kArm64Cmp | AddressingModeField::encode(kMode_Operand2_R_SXTW);
+  selector->EmitWithContinuation(opcode, result, result, cont);
+}
+
+}  // namespace
+
+void InstructionSelector::VisitInt32Mul(Node* node) {
+  Arm64OperandGenerator g(this);
+  Int32BinopMatcher m(node);
+
+  // First, try to reduce the multiplication to addition with left shift.
+  // x * (2^k + 1) -> x + (x << k)
+  int32_t shift = LeftShiftForReducedMultiply(&m);
+  if (shift > 0) {
+    Emit(kArm64Add32 | AddressingModeField::encode(kMode_Operand2_R_LSL_I),
+         g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+         g.UseRegister(m.left().node()), g.TempImmediate(shift));
+    return;
+  }
+
+  if (m.left().IsInt32Sub() && CanCover(node, m.left().node())) {
+    Int32BinopMatcher mleft(m.left().node());
+
+    // Select Mneg(x, y) for Mul(Sub(0, x), y).
+    if (mleft.left().Is(0)) {
+      Emit(kArm64Mneg32, g.DefineAsRegister(node),
+           g.UseRegister(mleft.right().node()),
+           g.UseRegister(m.right().node()));
+      return;
+    }
+  }
+
+  if (m.right().IsInt32Sub() && CanCover(node, m.right().node())) {
+    Int32BinopMatcher mright(m.right().node());
+
+    // Select Mneg(x, y) for Mul(x, Sub(0, y)).
+    if (mright.left().Is(0)) {
+      Emit(kArm64Mneg32, g.DefineAsRegister(node),
+           g.UseRegister(m.left().node()),
+           g.UseRegister(mright.right().node()));
+      return;
+    }
+  }
+
+  VisitRRR(this, kArm64Mul32, node);
+}
+
+void InstructionSelector::VisitInt64Mul(Node* node) {
+  Arm64OperandGenerator g(this);
+  Int64BinopMatcher m(node);
+
+  // First, try to reduce the multiplication to addition with left shift.
+  // x * (2^k + 1) -> x + (x << k)
+  int32_t shift = LeftShiftForReducedMultiply(&m);
+  if (shift > 0) {
+    Emit(kArm64Add | AddressingModeField::encode(kMode_Operand2_R_LSL_I),
+         g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+         g.UseRegister(m.left().node()), g.TempImmediate(shift));
+    return;
+  }
+
+  if (m.left().IsInt64Sub() && CanCover(node, m.left().node())) {
+    Int64BinopMatcher mleft(m.left().node());
+
+    // Select Mneg(x, y) for Mul(Sub(0, x), y).
+    if (mleft.left().Is(0)) {
+      Emit(kArm64Mneg, g.DefineAsRegister(node),
+           g.UseRegister(mleft.right().node()),
+           g.UseRegister(m.right().node()));
+      return;
+    }
+  }
+
+  if (m.right().IsInt64Sub() && CanCover(node, m.right().node())) {
+    Int64BinopMatcher mright(m.right().node());
+
+    // Select Mneg(x, y) for Mul(x, Sub(0, y)).
+    if (mright.left().Is(0)) {
+      Emit(kArm64Mneg, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+           g.UseRegister(mright.right().node()));
+      return;
+    }
+  }
+
+  VisitRRR(this, kArm64Mul, node);
+}
+
+namespace {
+void VisitExtMul(InstructionSelector* selector, ArchOpcode opcode, Node* node,
+                 int dst_lane_size) {
+  InstructionCode code = opcode;
+  code |= MiscField::encode(dst_lane_size);
+  VisitRRR(selector, code, node);
+}
+}  // namespace
+
+void InstructionSelector::VisitI16x8ExtMulLowI8x16S(Node* node) {
+  VisitExtMul(this, kArm64Smull, node, 16);
+}
+
+void InstructionSelector::VisitI16x8ExtMulHighI8x16S(Node* node) {
+  VisitExtMul(this, kArm64Smull2, node, 16);
+}
+
+void InstructionSelector::VisitI16x8ExtMulLowI8x16U(Node* node) {
+  VisitExtMul(this, kArm64Umull, node, 16);
+}
+
+void InstructionSelector::VisitI16x8ExtMulHighI8x16U(Node* node) {
+  VisitExtMul(this, kArm64Umull2, node, 16);
+}
+
+void InstructionSelector::VisitI32x4ExtMulLowI16x8S(Node* node) {
+  VisitExtMul(this, kArm64Smull, node, 32);
+}
+
+void InstructionSelector::VisitI32x4ExtMulHighI16x8S(Node* node) {
+  VisitExtMul(this, kArm64Smull2, node, 32);
+}
+
+void InstructionSelector::VisitI32x4ExtMulLowI16x8U(Node* node) {
+  VisitExtMul(this, kArm64Umull, node, 32);
+}
+
+void InstructionSelector::VisitI32x4ExtMulHighI16x8U(Node* node) {
+  VisitExtMul(this, kArm64Umull2, node, 32);
+}
+
+void InstructionSelector::VisitI64x2ExtMulLowI32x4S(Node* node) {
+  VisitExtMul(this, kArm64Smull, node, 64);
+}
+
+void InstructionSelector::VisitI64x2ExtMulHighI32x4S(Node* node) {
+  VisitExtMul(this, kArm64Smull2, node, 64);
+}
+
+void InstructionSelector::VisitI64x2ExtMulLowI32x4U(Node* node) {
+  VisitExtMul(this, kArm64Umull, node, 64);
+}
+
+void InstructionSelector::VisitI64x2ExtMulHighI32x4U(Node* node) {
+  VisitExtMul(this, kArm64Umull2, node, 64);
+}
+
+namespace {
+void VisitExtAddPairwise(InstructionSelector* selector, ArchOpcode opcode,
+                         Node* node, int dst_lane_size) {
+  InstructionCode code = opcode;
+  code |= MiscField::encode(dst_lane_size);
+  VisitRR(selector, code, node);
+}
+}  // namespace
+
+void InstructionSelector::VisitI32x4ExtAddPairwiseI16x8S(Node* node) {
+  VisitExtAddPairwise(this, kArm64Saddlp, node, 32);
+}
+
+void InstructionSelector::VisitI32x4ExtAddPairwiseI16x8U(Node* node) {
+  VisitExtAddPairwise(this, kArm64Uaddlp, node, 32);
+}
+
+void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16S(Node* node) {
+  VisitExtAddPairwise(this, kArm64Saddlp, node, 16);
+}
+
+void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16U(Node* node) {
+  VisitExtAddPairwise(this, kArm64Uaddlp, node, 16);
+}
+
+void InstructionSelector::VisitInt32MulHigh(Node* node) {
+  Arm64OperandGenerator g(this);
+  InstructionOperand const smull_operand = g.TempRegister();
+  Emit(kArm64Smull, smull_operand, g.UseRegister(node->InputAt(0)),
+       g.UseRegister(node->InputAt(1)));
+  Emit(kArm64Asr, g.DefineAsRegister(node), smull_operand, g.TempImmediate(32));
+}
+
+void InstructionSelector::VisitUint32MulHigh(Node* node) {
+  Arm64OperandGenerator g(this);
+  InstructionOperand const smull_operand = g.TempRegister();
+  Emit(kArm64Umull, smull_operand, g.UseRegister(node->InputAt(0)),
+       g.UseRegister(node->InputAt(1)));
+  Emit(kArm64Lsr, g.DefineAsRegister(node), smull_operand, g.TempImmediate(32));
+}
+
+void InstructionSelector::VisitTruncateFloat32ToInt32(Node* node) {
+  Arm64OperandGenerator g(this);
+
+  InstructionCode opcode = kArm64Float32ToInt32;
+  TruncateKind kind = OpParameter<TruncateKind>(node->op());
+  opcode |= MiscField::encode(kind == TruncateKind::kSetOverflowToMin);
+
+  Emit(opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitTruncateFloat32ToUint32(Node* node) {
+  Arm64OperandGenerator g(this);
+
+  InstructionCode opcode = kArm64Float32ToUint32;
+  TruncateKind kind = OpParameter<TruncateKind>(node->op());
+  if (kind == TruncateKind::kSetOverflowToMin) {
+    opcode |= MiscField::encode(true);
+  }
+
+  Emit(opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitTryTruncateFloat32ToInt64(Node* node) {
+  Arm64OperandGenerator g(this);
+
+  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  Node* success_output = NodeProperties::FindProjection(node, 1);
+  if (success_output) {
+    outputs[output_count++] = g.DefineAsRegister(success_output);
+  }
+
+  Emit(kArm64Float32ToInt64, output_count, outputs, 1, inputs);
+}
+
+void InstructionSelector::VisitTryTruncateFloat64ToInt64(Node* node) {
+  Arm64OperandGenerator g(this);
+
+  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  Node* success_output = NodeProperties::FindProjection(node, 1);
+  if (success_output) {
+    outputs[output_count++] = g.DefineAsRegister(success_output);
+  }
+
+  Emit(kArm64Float64ToInt64, output_count, outputs, 1, inputs);
+}
+
+void InstructionSelector::VisitTryTruncateFloat32ToUint64(Node* node) {
+  Arm64OperandGenerator g(this);
+
+  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  Node* success_output = NodeProperties::FindProjection(node, 1);
+  if (success_output) {
+    outputs[output_count++] = g.DefineAsRegister(success_output);
+  }
+
+  Emit(kArm64Float32ToUint64, output_count, outputs, 1, inputs);
+}
+
+void InstructionSelector::VisitTryTruncateFloat64ToUint64(Node* node) {
+  Arm64OperandGenerator g(this);
+
+  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  Node* success_output = NodeProperties::FindProjection(node, 1);
+  if (success_output) {
+    outputs[output_count++] = g.DefineAsRegister(success_output);
+  }
+
+  Emit(kArm64Float64ToUint64, output_count, outputs, 1, inputs);
+}
+
+void InstructionSelector::VisitBitcastWord32ToWord64(Node* node) {
+  DCHECK(SmiValuesAre31Bits());
+  DCHECK(COMPRESS_POINTERS_BOOL);
+  EmitIdentity(node);
+}
+
+void InstructionSelector::VisitChangeInt32ToInt64(Node* node) {
+  Node* value = node->InputAt(0);
+  if (value->opcode() == IrOpcode::kLoad && CanCover(node, value)) {
+    // Generate sign-extending load.
+    LoadRepresentation load_rep = LoadRepresentationOf(value->op());
+    MachineRepresentation rep = load_rep.representation();
+    InstructionCode opcode = kArchNop;
+    ImmediateMode immediate_mode = kNoImmediate;
+    switch (rep) {
+      case MachineRepresentation::kBit:  // Fall through.
+      case MachineRepresentation::kWord8:
+        opcode = load_rep.IsSigned() ? kArm64Ldrsb : kArm64Ldrb;
+        immediate_mode = kLoadStoreImm8;
+        break;
+      case MachineRepresentation::kWord16:
+        opcode = load_rep.IsSigned() ? kArm64Ldrsh : kArm64Ldrh;
+        immediate_mode = kLoadStoreImm16;
+        break;
+      case MachineRepresentation::kWord32:
+        opcode = kArm64Ldrsw;
+        immediate_mode = kLoadStoreImm32;
+        break;
+      default:
+        UNREACHABLE();
+    }
+    EmitLoad(this, value, opcode, immediate_mode, rep, node);
+    return;
+  }
+
+  if (value->opcode() == IrOpcode::kWord32Sar && CanCover(node, value)) {
+    Int32BinopMatcher m(value);
+    if (m.right().HasResolvedValue()) {
+      Arm64OperandGenerator g(this);
+      // Mask the shift amount, to keep the same semantics as Word32Sar.
+      int right = m.right().ResolvedValue() & 0x1F;
+      Emit(kArm64Sbfx, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+           g.TempImmediate(right), g.TempImmediate(32 - right));
+      return;
+    }
+  }
+
+  VisitRR(this, kArm64Sxtw, node);
+}
+
+bool InstructionSelector::ZeroExtendsWord32ToWord64NoPhis(Node* node) {
+  DCHECK_NE(node->opcode(), IrOpcode::kPhi);
+  switch (node->opcode()) {
+    case IrOpcode::kWord32And:
+    case IrOpcode::kWord32Or:
+    case IrOpcode::kWord32Xor:
+    case IrOpcode::kWord32Shl:
+    case IrOpcode::kWord32Shr:
+    case IrOpcode::kWord32Sar:
+    case IrOpcode::kWord32Ror:
+    case IrOpcode::kWord32Equal:
+    case IrOpcode::kInt32Add:
+    case IrOpcode::kInt32AddWithOverflow:
+    case IrOpcode::kInt32Sub:
+    case IrOpcode::kInt32SubWithOverflow:
+    case IrOpcode::kInt32Mul:
+    case IrOpcode::kInt32MulHigh:
+    case IrOpcode::kInt32Div:
+    case IrOpcode::kInt32Mod:
+    case IrOpcode::kInt32LessThan:
+    case IrOpcode::kInt32LessThanOrEqual:
+    case IrOpcode::kUint32Div:
+    case IrOpcode::kUint32LessThan:
+    case IrOpcode::kUint32LessThanOrEqual:
+    case IrOpcode::kUint32Mod:
+    case IrOpcode::kUint32MulHigh: {
+      // 32-bit operations will write their result in a W register (implicitly
+      // clearing the top 32-bit of the corresponding X register) so the
+      // zero-extension is a no-op.
+      return true;
+    }
+    case IrOpcode::kLoad: {
+      // As for the operations above, a 32-bit load will implicitly clear the
+      // top 32 bits of the destination register.
+      LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+      switch (load_rep.representation()) {
+        case MachineRepresentation::kWord8:
+        case MachineRepresentation::kWord16:
+        case MachineRepresentation::kWord32:
+          return true;
+        default:
+          return false;
+      }
+    }
+    default:
+      return false;
+  }
+}
+
+void InstructionSelector::VisitChangeUint32ToUint64(Node* node) {
+  Arm64OperandGenerator g(this);
+  Node* value = node->InputAt(0);
+  if (ZeroExtendsWord32ToWord64(value)) {
+    return EmitIdentity(node);
+  }
+  Emit(kArm64Mov32, g.DefineAsRegister(node), g.UseRegister(value));
+}
+
+void InstructionSelector::VisitTruncateInt64ToInt32(Node* node) {
+  Arm64OperandGenerator g(this);
+  // The top 32 bits in the 64-bit register will be undefined, and
+  // must not be used by a dependent node.
+  EmitIdentity(node);
+}
+
+void InstructionSelector::VisitFloat64Mod(Node* node) {
+  Arm64OperandGenerator g(this);
+  Emit(kArm64Float64Mod, g.DefineAsFixed(node, d0),
+       g.UseFixed(node->InputAt(0), d0), g.UseFixed(node->InputAt(1), d1))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::VisitFloat64Ieee754Binop(Node* node,
+                                                   InstructionCode opcode) {
+  Arm64OperandGenerator g(this);
+  Emit(opcode, g.DefineAsFixed(node, d0), g.UseFixed(node->InputAt(0), d0),
+       g.UseFixed(node->InputAt(1), d1))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::VisitFloat64Ieee754Unop(Node* node,
+                                                  InstructionCode opcode) {
+  Arm64OperandGenerator g(this);
+  Emit(opcode, g.DefineAsFixed(node, d0), g.UseFixed(node->InputAt(0), d0))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::EmitPrepareArguments(
+    ZoneVector<PushParameter>* arguments, const CallDescriptor* call_descriptor,
+    Node* node) {
+  Arm64OperandGenerator g(this);
+
+  // `arguments` includes alignment "holes". This means that slots bigger than
+  // kSystemPointerSize, e.g. Simd128, will span across multiple arguments.
+  int claim_count = static_cast<int>(arguments->size());
+  bool needs_padding = claim_count % 2 != 0;
+  int slot = claim_count - 1;
+  claim_count = RoundUp(claim_count, 2);
+  // Bump the stack pointer.
+  if (claim_count > 0) {
+    // TODO(titzer): claim and poke probably take small immediates.
+    // TODO(titzer): it would be better to bump the sp here only
+    //               and emit paired stores with increment for non c frames.
+    Emit(kArm64Claim, g.NoOutput(), g.TempImmediate(claim_count));
+
+    if (needs_padding) {
+      Emit(kArm64Poke, g.NoOutput(), g.UseImmediate(0),
+           g.TempImmediate(claim_count - 1));
+    }
+  }
+
+  // Poke the arguments into the stack.
+  while (slot >= 0) {
+    PushParameter input0 = (*arguments)[slot];
+    PushParameter input1 = slot > 0 ? (*arguments)[slot - 1] : PushParameter();
+    // Emit a poke-pair if consecutive parameters have the same type.
+    // TODO(arm): Support consecutive Simd128 parameters.
+    if (input0.node != nullptr && input1.node != nullptr &&
+        input0.location.GetType() == input1.location.GetType()) {
+      Emit(kArm64PokePair, g.NoOutput(), g.UseRegister(input0.node),
+           g.UseRegister(input1.node), g.TempImmediate(slot));
+      slot -= 2;
+    } else if (input0.node != nullptr) {
+      Emit(kArm64Poke, g.NoOutput(), g.UseRegister(input0.node),
+           g.TempImmediate(slot));
+      slot--;
+    } else {
+      // Skip any alignment holes in pushed nodes.
+      slot--;
+    }
+  }
+}
+
+void InstructionSelector::EmitPrepareResults(
+    ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
+    Node* node) {
+  Arm64OperandGenerator g(this);
+
+  int reverse_slot = 1;
+  for (PushParameter output : *results) {
+    if (!output.location.IsCallerFrameSlot()) continue;
+    // Skip any alignment holes in nodes.
+    if (output.node != nullptr) {
+      DCHECK(!call_descriptor->IsCFunctionCall());
+
+      if (output.location.GetType() == MachineType::Float32()) {
+        MarkAsFloat32(output.node);
+      } else if (output.location.GetType() == MachineType::Float64()) {
+        MarkAsFloat64(output.node);
+      } else if (output.location.GetType() == MachineType::Simd128()) {
+        MarkAsSimd128(output.node);
+      }
+
+      Emit(kArm64Peek, g.DefineAsRegister(output.node),
+           g.UseImmediate(reverse_slot));
+    }
+    reverse_slot += output.location.GetSizeInPointers();
+  }
+}
+
+bool InstructionSelector::IsTailCallAddressImmediate() { return false; }
+
+int InstructionSelector::GetTempsCountForTailCallFromJSFunction() { return 3; }
+
+namespace {
+
+// Shared routine for multiple compare operations.
+void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
+                  InstructionOperand left, InstructionOperand right,
+                  FlagsContinuation* cont) {
+  selector->EmitWithContinuation(opcode, left, right, cont);
+}
+
+// This function checks whether we can convert:
+// ((a <op> b) cmp 0), b.<cond>
+// to:
+// (a <ops> b), b.<cond'>
+// where <ops> is the flag setting version of <op>.
+// We only generate conditions <cond'> that are a combination of the N
+// and Z flags. This avoids the need to make this function dependent on
+// the flag-setting operation.
+bool CanUseFlagSettingBinop(FlagsCondition cond) {
+  switch (cond) {
+    case kEqual:
+    case kNotEqual:
+    case kSignedLessThan:
+    case kSignedGreaterThanOrEqual:
+    case kUnsignedLessThanOrEqual:  // x <= 0 -> x == 0
+    case kUnsignedGreaterThan:      // x > 0 -> x != 0
+      return true;
+    default:
+      return false;
+  }
+}
+
+// Map <cond> to <cond'> so that the following transformation is possible:
+// ((a <op> b) cmp 0), b.<cond>
+// to:
+// (a <ops> b), b.<cond'>
+// where <ops> is the flag setting version of <op>.
+FlagsCondition MapForFlagSettingBinop(FlagsCondition cond) {
+  DCHECK(CanUseFlagSettingBinop(cond));
+  switch (cond) {
+    case kEqual:
+    case kNotEqual:
+      return cond;
+    case kSignedLessThan:
+      return kNegative;
+    case kSignedGreaterThanOrEqual:
+      return kPositiveOrZero;
+    case kUnsignedLessThanOrEqual:  // x <= 0 -> x == 0
+      return kEqual;
+    case kUnsignedGreaterThan:  // x > 0 -> x != 0
+      return kNotEqual;
+    default:
+      UNREACHABLE();
+  }
+}
+
+// This function checks if we can perform the transformation:
+// ((a <op> b) cmp 0), b.<cond>
+// to:
+// (a <ops> b), b.<cond'>
+// where <ops> is the flag setting version of <op>, and if so,
+// updates {node}, {opcode} and {cont} accordingly.
+void MaybeReplaceCmpZeroWithFlagSettingBinop(InstructionSelector* selector,
+                                             Node** node, Node* binop,
+                                             ArchOpcode* opcode,
+                                             FlagsCondition cond,
+                                             FlagsContinuation* cont,
+                                             ImmediateMode* immediate_mode) {
+  ArchOpcode binop_opcode;
+  ArchOpcode no_output_opcode;
+  ImmediateMode binop_immediate_mode;
+  switch (binop->opcode()) {
+    case IrOpcode::kInt32Add:
+      binop_opcode = kArm64Add32;
+      no_output_opcode = kArm64Cmn32;
+      binop_immediate_mode = kArithmeticImm;
+      break;
+    case IrOpcode::kWord32And:
+      binop_opcode = kArm64And32;
+      no_output_opcode = kArm64Tst32;
+      binop_immediate_mode = kLogical32Imm;
+      break;
+    default:
+      UNREACHABLE();
+  }
+  if (selector->CanCover(*node, binop)) {
+    // The comparison is the only user of the add or and, so we can generate
+    // a cmn or tst instead.
+    cont->Overwrite(MapForFlagSettingBinop(cond));
+    *opcode = no_output_opcode;
+    *node = binop;
+    *immediate_mode = binop_immediate_mode;
+  } else if (selector->IsOnlyUserOfNodeInSameBlock(*node, binop)) {
+    // We can also handle the case where the add and the compare are in the
+    // same basic block, and the compare is the only use of add in this basic
+    // block (the add has users in other basic blocks).
+    cont->Overwrite(MapForFlagSettingBinop(cond));
+    *opcode = binop_opcode;
+    *node = binop;
+    *immediate_mode = binop_immediate_mode;
+  }
+}
+
+// Map {cond} to kEqual or kNotEqual, so that we can select
+// either TBZ or TBNZ when generating code for:
+// (x cmp 0), b.{cond}
+FlagsCondition MapForTbz(FlagsCondition cond) {
+  switch (cond) {
+    case kSignedLessThan:  // generate TBNZ
+      return kNotEqual;
+    case kSignedGreaterThanOrEqual:  // generate TBZ
+      return kEqual;
+    default:
+      UNREACHABLE();
+  }
+}
+
+// Map {cond} to kEqual or kNotEqual, so that we can select
+// either CBZ or CBNZ when generating code for:
+// (x cmp 0), b.{cond}
+FlagsCondition MapForCbz(FlagsCondition cond) {
+  switch (cond) {
+    case kEqual:     // generate CBZ
+    case kNotEqual:  // generate CBNZ
+      return cond;
+    case kUnsignedLessThanOrEqual:  // generate CBZ
+      return kEqual;
+    case kUnsignedGreaterThan:  // generate CBNZ
+      return kNotEqual;
+    default:
+      UNREACHABLE();
+  }
+}
+
+void EmitBranchOrDeoptimize(InstructionSelector* selector,
+                            InstructionCode opcode, InstructionOperand value,
+                            FlagsContinuation* cont) {
+  DCHECK(cont->IsBranch() || cont->IsDeoptimize());
+  selector->EmitWithContinuation(opcode, value, cont);
+}
+
+template <int N>
+struct CbzOrTbzMatchTrait {};
+
+template <>
+struct CbzOrTbzMatchTrait<32> {
+  using IntegralType = uint32_t;
+  using BinopMatcher = Int32BinopMatcher;
+  static constexpr IrOpcode::Value kAndOpcode = IrOpcode::kWord32And;
+  static constexpr ArchOpcode kTestAndBranchOpcode = kArm64TestAndBranch32;
+  static constexpr ArchOpcode kCompareAndBranchOpcode =
+      kArm64CompareAndBranch32;
+  static constexpr unsigned kSignBit = kWSignBit;
+};
+
+template <>
+struct CbzOrTbzMatchTrait<64> {
+  using IntegralType = uint64_t;
+  using BinopMatcher = Int64BinopMatcher;
+  static constexpr IrOpcode::Value kAndOpcode = IrOpcode::kWord64And;
+  static constexpr ArchOpcode kTestAndBranchOpcode = kArm64TestAndBranch;
+  static constexpr ArchOpcode kCompareAndBranchOpcode = kArm64CompareAndBranch;
+  static constexpr unsigned kSignBit = kXSignBit;
+};
+
+// Try to emit TBZ, TBNZ, CBZ or CBNZ for certain comparisons of {node}
+// against {value}, depending on the condition.
+template <int N>
+bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node,
+                     typename CbzOrTbzMatchTrait<N>::IntegralType value,
+                     Node* user, FlagsCondition cond, FlagsContinuation* cont) {
+  // Branch poisoning requires flags to be set, so when it's enabled for
+  // a particular branch, we shouldn't be applying the cbz/tbz optimization.
+  DCHECK(!cont->IsPoisoned());
+  // Only handle branches and deoptimisations.
+  if (!cont->IsBranch() && !cont->IsDeoptimize()) return false;
+
+  switch (cond) {
+    case kSignedLessThan:
+    case kSignedGreaterThanOrEqual: {
+      // Here we handle sign tests, aka. comparisons with zero.
+      if (value != 0) return false;
+      // We don't generate TBZ/TBNZ for deoptimisations, as they have a
+      // shorter range than conditional branches and generating them for
+      // deoptimisations results in more veneers.
+      if (cont->IsDeoptimize()) return false;
+      Arm64OperandGenerator g(selector);
+      cont->Overwrite(MapForTbz(cond));
+
+      if (N == 32) {
+        Int32Matcher m(node);
+        if (m.IsFloat64ExtractHighWord32() && selector->CanCover(user, node)) {
+          // SignedLessThan(Float64ExtractHighWord32(x), 0) and
+          // SignedGreaterThanOrEqual(Float64ExtractHighWord32(x), 0)
+          // essentially check the sign bit of a 64-bit floating point value.
+          InstructionOperand temp = g.TempRegister();
+          selector->Emit(kArm64U64MoveFloat64, temp,
+                         g.UseRegister(node->InputAt(0)));
+          selector->EmitWithContinuation(kArm64TestAndBranch, temp,
+                                         g.TempImmediate(kDSignBit), cont);
+          return true;
+        }
+      }
+
+      selector->EmitWithContinuation(
+          CbzOrTbzMatchTrait<N>::kTestAndBranchOpcode, g.UseRegister(node),
+          g.TempImmediate(CbzOrTbzMatchTrait<N>::kSignBit), cont);
+      return true;
+    }
+    case kEqual:
+    case kNotEqual: {
+      if (node->opcode() == CbzOrTbzMatchTrait<N>::kAndOpcode) {
+        // Emit a tbz/tbnz if we are comparing with a single-bit mask:
+        //   Branch(WordEqual(WordAnd(x, 1 << N), 1 << N), true, false)
+        typename CbzOrTbzMatchTrait<N>::BinopMatcher m_and(node);
+        if (cont->IsBranch() && base::bits::IsPowerOfTwo(value) &&
+            m_and.right().Is(value) && selector->CanCover(user, node)) {
+          Arm64OperandGenerator g(selector);
+          // In the code generator, Equal refers to a bit being cleared. We want
+          // the opposite here so negate the condition.
+          cont->Negate();
+          selector->EmitWithContinuation(
+              CbzOrTbzMatchTrait<N>::kTestAndBranchOpcode,
+              g.UseRegister(m_and.left().node()),
+              g.TempImmediate(base::bits::CountTrailingZeros(value)), cont);
+          return true;
+        }
+      }
+      V8_FALLTHROUGH;
+    }
+    case kUnsignedLessThanOrEqual:
+    case kUnsignedGreaterThan: {
+      if (value != 0) return false;
+      Arm64OperandGenerator g(selector);
+      cont->Overwrite(MapForCbz(cond));
+      EmitBranchOrDeoptimize(selector,
+                             CbzOrTbzMatchTrait<N>::kCompareAndBranchOpcode,
+                             g.UseRegister(node), cont);
+      return true;
+    }
+    default:
+      return false;
+  }
+}
+
+// Shared routine for multiple word compare operations.
+void VisitWordCompare(InstructionSelector* selector, Node* node,
+                      InstructionCode opcode, FlagsContinuation* cont,
+                      ImmediateMode immediate_mode) {
+  Arm64OperandGenerator g(selector);
+
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+
+  // If one of the two inputs is an immediate, make sure it's on the right.
+  if (!g.CanBeImmediate(right, immediate_mode) &&
+      g.CanBeImmediate(left, immediate_mode)) {
+    cont->Commute();
+    std::swap(left, right);
+  }
+
+  if (opcode == kArm64Cmp && !cont->IsPoisoned()) {
+    Int64Matcher m(right);
+    if (m.HasResolvedValue()) {
+      if (TryEmitCbzOrTbz<64>(selector, left, m.ResolvedValue(), node,
+                              cont->condition(), cont)) {
+        return;
+      }
+    }
+  }
+
+  VisitCompare(selector, opcode, g.UseRegister(left),
+               g.UseOperand(right, immediate_mode), cont);
+}
+
+void VisitWord32Compare(InstructionSelector* selector, Node* node,
+                        FlagsContinuation* cont) {
+  Int32BinopMatcher m(node);
+  FlagsCondition cond = cont->condition();
+  if (!cont->IsPoisoned()) {
+    if (m.right().HasResolvedValue()) {
+      if (TryEmitCbzOrTbz<32>(selector, m.left().node(),
+                              m.right().ResolvedValue(), node, cond, cont)) {
+        return;
+      }
+    } else if (m.left().HasResolvedValue()) {
+      FlagsCondition commuted_cond = CommuteFlagsCondition(cond);
+      if (TryEmitCbzOrTbz<32>(selector, m.right().node(),
+                              m.left().ResolvedValue(), node, commuted_cond,
+                              cont)) {
+        return;
+      }
+    }
+  }
+  ArchOpcode opcode = kArm64Cmp32;
+  ImmediateMode immediate_mode = kArithmeticImm;
+  if (m.right().Is(0) && (m.left().IsInt32Add() || m.left().IsWord32And())) {
+    // Emit flag setting add/and instructions for comparisons against zero.
+    if (CanUseFlagSettingBinop(cond)) {
+      Node* binop = m.left().node();
+      MaybeReplaceCmpZeroWithFlagSettingBinop(selector, &node, binop, &opcode,
+                                              cond, cont, &immediate_mode);
+    }
+  } else if (m.left().Is(0) &&
+             (m.right().IsInt32Add() || m.right().IsWord32And())) {
+    // Same as above, but we need to commute the condition before we
+    // continue with the rest of the checks.
+    FlagsCondition commuted_cond = CommuteFlagsCondition(cond);
+    if (CanUseFlagSettingBinop(commuted_cond)) {
+      Node* binop = m.right().node();
+      MaybeReplaceCmpZeroWithFlagSettingBinop(selector, &node, binop, &opcode,
+                                              commuted_cond, cont,
+                                              &immediate_mode);
+    }
+  } else if (m.right().IsInt32Sub() && (cond == kEqual || cond == kNotEqual)) {
+    // Select negated compare for comparisons with negated right input.
+    // Only do this for kEqual and kNotEqual, which do not depend on the
+    // C and V flags, as those flags will be different with CMN when the
+    // right-hand side of the original subtraction is INT_MIN.
+    Node* sub = m.right().node();
+    Int32BinopMatcher msub(sub);
+    if (msub.left().Is(0)) {
+      bool can_cover = selector->CanCover(node, sub);
+      node->ReplaceInput(1, msub.right().node());
+      // Even if the comparison node covers the subtraction, after the input
+      // replacement above, the node still won't cover the input to the
+      // subtraction; the subtraction still uses it.
+      // In order to get shifted operations to work, we must remove the rhs
+      // input to the subtraction, as TryMatchAnyShift requires this node to
+      // cover the input shift. We do this by setting it to the lhs input,
+      // as we know it's zero, and the result of the subtraction isn't used by
+      // any other node.
+      if (can_cover) sub->ReplaceInput(1, msub.left().node());
+      opcode = kArm64Cmn32;
+    }
+  }
+  VisitBinop<Int32BinopMatcher>(selector, node, opcode, immediate_mode, cont);
+}
+
+void VisitWordTest(InstructionSelector* selector, Node* node,
+                   InstructionCode opcode, FlagsContinuation* cont) {
+  Arm64OperandGenerator g(selector);
+  VisitCompare(selector, opcode, g.UseRegister(node), g.UseRegister(node),
+               cont);
+}
+
+void VisitWord32Test(InstructionSelector* selector, Node* node,
+                     FlagsContinuation* cont) {
+  VisitWordTest(selector, node, kArm64Tst32, cont);
+}
+
+void VisitWord64Test(InstructionSelector* selector, Node* node,
+                     FlagsContinuation* cont) {
+  VisitWordTest(selector, node, kArm64Tst, cont);
+}
+
+template <typename Matcher>
+struct TestAndBranchMatcher {
+  TestAndBranchMatcher(Node* node, FlagsContinuation* cont)
+      : matches_(false), cont_(cont), matcher_(node) {
+    Initialize();
+  }
+  bool Matches() const { return matches_; }
+
+  unsigned bit() const {
+    DCHECK(Matches());
+    return base::bits::CountTrailingZeros(matcher_.right().ResolvedValue());
+  }
+
+  Node* input() const {
+    DCHECK(Matches());
+    return matcher_.left().node();
+  }
+
+ private:
+  bool matches_;
+  FlagsContinuation* cont_;
+  Matcher matcher_;
+
+  void Initialize() {
+    if (cont_->IsBranch() && !cont_->IsPoisoned() &&
+        matcher_.right().HasResolvedValue() &&
+        base::bits::IsPowerOfTwo(matcher_.right().ResolvedValue())) {
+      // If the mask has only one bit set, we can use tbz/tbnz.
+      DCHECK((cont_->condition() == kEqual) ||
+             (cont_->condition() == kNotEqual));
+      matches_ = true;
+    } else {
+      matches_ = false;
+    }
+  }
+};
+
+// Shared routine for multiple float32 compare operations.
+void VisitFloat32Compare(InstructionSelector* selector, Node* node,
+                         FlagsContinuation* cont) {
+  Arm64OperandGenerator g(selector);
+  Float32BinopMatcher m(node);
+  if (m.right().Is(0.0f)) {
+    VisitCompare(selector, kArm64Float32Cmp, g.UseRegister(m.left().node()),
+                 g.UseImmediate(m.right().node()), cont);
+  } else if (m.left().Is(0.0f)) {
+    cont->Commute();
+    VisitCompare(selector, kArm64Float32Cmp, g.UseRegister(m.right().node()),
+                 g.UseImmediate(m.left().node()), cont);
+  } else {
+    VisitCompare(selector, kArm64Float32Cmp, g.UseRegister(m.left().node()),
+                 g.UseRegister(m.right().node()), cont);
+  }
+}
+
+// Shared routine for multiple float64 compare operations.
+void VisitFloat64Compare(InstructionSelector* selector, Node* node,
+                         FlagsContinuation* cont) {
+  Arm64OperandGenerator g(selector);
+  Float64BinopMatcher m(node);
+  if (m.right().Is(0.0)) {
+    VisitCompare(selector, kArm64Float64Cmp, g.UseRegister(m.left().node()),
+                 g.UseImmediate(m.right().node()), cont);
+  } else if (m.left().Is(0.0)) {
+    cont->Commute();
+    VisitCompare(selector, kArm64Float64Cmp, g.UseRegister(m.right().node()),
+                 g.UseImmediate(m.left().node()), cont);
+  } else {
+    VisitCompare(selector, kArm64Float64Cmp, g.UseRegister(m.left().node()),
+                 g.UseRegister(m.right().node()), cont);
+  }
+}
+
+void VisitAtomicExchange(InstructionSelector* selector, Node* node,
+                         ArchOpcode opcode) {
+  Arm64OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  InstructionOperand inputs[] = {g.UseRegister(base), g.UseRegister(index),
+                                 g.UseUniqueRegister(value)};
+  InstructionOperand outputs[] = {g.DefineAsRegister(node)};
+  InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
+  InstructionCode code = opcode | AddressingModeField::encode(kMode_MRR);
+  selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs,
+                 arraysize(temps), temps);
+}
+
+void VisitAtomicCompareExchange(InstructionSelector* selector, Node* node,
+                                ArchOpcode opcode) {
+  Arm64OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* old_value = node->InputAt(2);
+  Node* new_value = node->InputAt(3);
+  InstructionOperand inputs[] = {g.UseRegister(base), g.UseRegister(index),
+                                 g.UseUniqueRegister(old_value),
+                                 g.UseUniqueRegister(new_value)};
+  InstructionOperand outputs[] = {g.DefineAsRegister(node)};
+  InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
+  InstructionCode code = opcode | AddressingModeField::encode(kMode_MRR);
+  selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs,
+                 arraysize(temps), temps);
+}
+
+void VisitAtomicLoad(InstructionSelector* selector, Node* node,
+                     ArchOpcode opcode) {
+  Arm64OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  InstructionOperand inputs[] = {g.UseRegister(base), g.UseRegister(index)};
+  InstructionOperand outputs[] = {g.DefineAsRegister(node)};
+  InstructionOperand temps[] = {g.TempRegister()};
+  InstructionCode code = opcode | AddressingModeField::encode(kMode_MRR);
+  selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs,
+                 arraysize(temps), temps);
+}
+
+void VisitAtomicStore(InstructionSelector* selector, Node* node,
+                      ArchOpcode opcode) {
+  Arm64OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  InstructionOperand inputs[] = {g.UseRegister(base), g.UseRegister(index),
+                                 g.UseUniqueRegister(value)};
+  InstructionOperand temps[] = {g.TempRegister()};
+  InstructionCode code = opcode | AddressingModeField::encode(kMode_MRR);
+  selector->Emit(code, 0, nullptr, arraysize(inputs), inputs, arraysize(temps),
+                 temps);
+}
+
+void VisitAtomicBinop(InstructionSelector* selector, Node* node,
+                      ArchOpcode opcode) {
+  Arm64OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  AddressingMode addressing_mode = kMode_MRR;
+  InstructionOperand inputs[] = {g.UseRegister(base), g.UseRegister(index),
+                                 g.UseUniqueRegister(value)};
+  InstructionOperand outputs[] = {g.DefineAsRegister(node)};
+  InstructionOperand temps[] = {g.TempRegister(), g.TempRegister(),
+                                g.TempRegister()};
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs,
+                 arraysize(temps), temps);
+}
+
+}  // namespace
+
+void InstructionSelector::VisitWordCompareZero(Node* user, Node* value,
+                                               FlagsContinuation* cont) {
+  Arm64OperandGenerator g(this);
+  // Try to combine with comparisons against 0 by simply inverting the branch.
+  while (value->opcode() == IrOpcode::kWord32Equal && CanCover(user, value)) {
+    Int32BinopMatcher m(value);
+    if (!m.right().Is(0)) break;
+
+    user = value;
+    value = m.left().node();
+    cont->Negate();
+  }
+
+  // Try to match bit checks to create TBZ/TBNZ instructions.
+  // Unlike the switch below, CanCover check is not needed here.
+  // If there are several uses of the given operation, we will generate a TBZ
+  // instruction for each. This is useful even if there are other uses of the
+  // arithmetic result, because it moves dependencies further back.
+  switch (value->opcode()) {
+    case IrOpcode::kWord64Equal: {
+      Int64BinopMatcher m(value);
+      if (m.right().Is(0)) {
+        Node* const left = m.left().node();
+        if (left->opcode() == IrOpcode::kWord64And) {
+          // Attempt to merge the Word64Equal(Word64And(x, y), 0) comparison
+          // into a tbz/tbnz instruction.
+          TestAndBranchMatcher<Uint64BinopMatcher> tbm(left, cont);
+          if (tbm.Matches()) {
+            Arm64OperandGenerator gen(this);
+            cont->OverwriteAndNegateIfEqual(kEqual);
+            this->EmitWithContinuation(kArm64TestAndBranch,
+                                       gen.UseRegister(tbm.input()),
+                                       gen.TempImmediate(tbm.bit()), cont);
+            return;
+          }
+        }
+      }
+      break;
+    }
+    case IrOpcode::kWord32And: {
+      TestAndBranchMatcher<Uint32BinopMatcher> tbm(value, cont);
+      if (tbm.Matches()) {
+        Arm64OperandGenerator gen(this);
+        this->EmitWithContinuation(kArm64TestAndBranch32,
+                                   gen.UseRegister(tbm.input()),
+                                   gen.TempImmediate(tbm.bit()), cont);
+        return;
+      }
+      break;
+    }
+    case IrOpcode::kWord64And: {
+      TestAndBranchMatcher<Uint64BinopMatcher> tbm(value, cont);
+      if (tbm.Matches()) {
+        Arm64OperandGenerator gen(this);
+        this->EmitWithContinuation(kArm64TestAndBranch,
+                                   gen.UseRegister(tbm.input()),
+                                   gen.TempImmediate(tbm.bit()), cont);
+        return;
+      }
+      break;
+    }
+    default:
+      break;
+  }
+
+  if (CanCover(user, value)) {
+    switch (value->opcode()) {
+      case IrOpcode::kWord32Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kInt32LessThan:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThan);
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kInt32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kUint32LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kUint32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kWord64Equal: {
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        Int64BinopMatcher m(value);
+        if (m.right().Is(0)) {
+          Node* const left = m.left().node();
+          if (CanCover(value, left) && left->opcode() == IrOpcode::kWord64And) {
+            return VisitWordCompare(this, left, kArm64Tst, cont, kLogical64Imm);
+          }
+        }
+        return VisitWordCompare(this, value, kArm64Cmp, cont, kArithmeticImm);
+      }
+      case IrOpcode::kInt64LessThan:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThan);
+        return VisitWordCompare(this, value, kArm64Cmp, cont, kArithmeticImm);
+      case IrOpcode::kInt64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
+        return VisitWordCompare(this, value, kArm64Cmp, cont, kArithmeticImm);
+      case IrOpcode::kUint64LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitWordCompare(this, value, kArm64Cmp, cont, kArithmeticImm);
+      case IrOpcode::kUint64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitWordCompare(this, value, kArm64Cmp, cont, kArithmeticImm);
+      case IrOpcode::kFloat32Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat32LessThan:
+        cont->OverwriteAndNegateIfEqual(kFloatLessThan);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kFloatLessThanOrEqual);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat64Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kFloat64LessThan:
+        cont->OverwriteAndNegateIfEqual(kFloatLessThan);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kFloat64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kFloatLessThanOrEqual);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kProjection:
+        // Check if this is the overflow output projection of an
+        // <Operation>WithOverflow node.
+        if (ProjectionIndexOf(value->op()) == 1u) {
+          // We cannot combine the <Operation>WithOverflow with this branch
+          // unless the 0th projection (the use of the actual value of the
+          // <Operation> is either nullptr, which means there's no use of the
+          // actual value, or was already defined, which means it is scheduled
+          // *AFTER* this branch).
+          Node* const node = value->InputAt(0);
+          Node* const result = NodeProperties::FindProjection(node, 0);
+          if (result == nullptr || IsDefined(result)) {
+            switch (node->opcode()) {
+              case IrOpcode::kInt32AddWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop<Int32BinopMatcher>(this, node, kArm64Add32,
+                                                     kArithmeticImm, cont);
+              case IrOpcode::kInt32SubWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop<Int32BinopMatcher>(this, node, kArm64Sub32,
+                                                     kArithmeticImm, cont);
+              case IrOpcode::kInt32MulWithOverflow:
+                // ARM64 doesn't set the overflow flag for multiplication, so we
+                // need to test on kNotEqual. Here is the code sequence used:
+                //   smull result, left, right
+                //   cmp result.X(), Operand(result, SXTW)
+                cont->OverwriteAndNegateIfEqual(kNotEqual);
+                return EmitInt32MulWithOverflow(this, node, cont);
+              case IrOpcode::kInt64AddWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop<Int64BinopMatcher>(this, node, kArm64Add,
+                                                     kArithmeticImm, cont);
+              case IrOpcode::kInt64SubWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop<Int64BinopMatcher>(this, node, kArm64Sub,
+                                                     kArithmeticImm, cont);
+              default:
+                break;
+            }
+          }
+        }
+        break;
+      case IrOpcode::kInt32Add:
+        return VisitWordCompare(this, value, kArm64Cmn32, cont, kArithmeticImm);
+      case IrOpcode::kInt32Sub:
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kWord32And:
+        return VisitWordCompare(this, value, kArm64Tst32, cont, kLogical32Imm);
+      case IrOpcode::kWord64And:
+        return VisitWordCompare(this, value, kArm64Tst, cont, kLogical64Imm);
+      case IrOpcode::kStackPointerGreaterThan:
+        cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition);
+        return VisitStackPointerGreaterThan(value, cont);
+      default:
+        break;
+    }
+  }
+
+  // Branch could not be combined with a compare, compare against 0 and branch.
+  if (!cont->IsPoisoned() && cont->IsBranch()) {
+    Emit(cont->Encode(kArm64CompareAndBranch32), g.NoOutput(),
+         g.UseRegister(value), g.Label(cont->true_block()),
+         g.Label(cont->false_block()));
+  } else {
+    EmitWithContinuation(cont->Encode(kArm64Tst32), g.UseRegister(value),
+                         g.UseRegister(value), cont);
+  }
+}
+
+void InstructionSelector::VisitSwitch(Node* node, const SwitchInfo& sw) {
+  Arm64OperandGenerator g(this);
+  InstructionOperand value_operand = g.UseRegister(node->InputAt(0));
+
+  // Emit either ArchTableSwitch or ArchBinarySearchSwitch.
+  if (enable_switch_jump_table_ == kEnableSwitchJumpTable) {
+    static const size_t kMaxTableSwitchValueRange = 2 << 16;
+    size_t table_space_cost = 4 + sw.value_range();
+    size_t table_time_cost = 3;
+    size_t lookup_space_cost = 3 + 2 * sw.case_count();
+    size_t lookup_time_cost = sw.case_count();
+    if (sw.case_count() > 4 &&
+        table_space_cost + 3 * table_time_cost <=
+            lookup_space_cost + 3 * lookup_time_cost &&
+        sw.min_value() > std::numeric_limits<int32_t>::min() &&
+        sw.value_range() <= kMaxTableSwitchValueRange) {
+      InstructionOperand index_operand = value_operand;
+      if (sw.min_value()) {
+        index_operand = g.TempRegister();
+        Emit(kArm64Sub32, index_operand, value_operand,
+             g.TempImmediate(sw.min_value()));
+      }
+      // Generate a table lookup.
+      return EmitTableSwitch(sw, index_operand);
+    }
+  }
+
+  // Generate a tree of conditional jumps.
+  return EmitBinarySearchSwitch(sw, value_operand);
+}
+
+void InstructionSelector::VisitWord32Equal(Node* const node) {
+  Node* const user = node;
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  Int32BinopMatcher m(user);
+  if (m.right().Is(0)) {
+    Node* const value = m.left().node();
+    if (CanCover(user, value)) {
+      switch (value->opcode()) {
+        case IrOpcode::kInt32Add:
+        case IrOpcode::kWord32And:
+          return VisitWord32Compare(this, node, &cont);
+        case IrOpcode::kInt32Sub:
+          return VisitWordCompare(this, value, kArm64Cmp32, &cont,
+                                  kArithmeticImm);
+        case IrOpcode::kWord32Equal: {
+          // Word32Equal(Word32Equal(x, y), 0) => Word32Compare(x, y, ne).
+          Int32BinopMatcher mequal(value);
+          node->ReplaceInput(0, mequal.left().node());
+          node->ReplaceInput(1, mequal.right().node());
+          cont.Negate();
+          // {node} still does not cover its new operands, because {mequal} is
+          // still using them.
+          // Since we won't generate any more code for {mequal}, set its
+          // operands to zero to make sure {node} can cover them.
+          // This improves pattern matching in VisitWord32Compare.
+          mequal.node()->ReplaceInput(0, m.right().node());
+          mequal.node()->ReplaceInput(1, m.right().node());
+          return VisitWord32Compare(this, node, &cont);
+        }
+        default:
+          break;
+      }
+      return VisitWord32Test(this, value, &cont);
+    }
+  }
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitWord64Equal(Node* const node) {
+  Node* const user = node;
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  Int64BinopMatcher m(user);
+  if (m.right().Is(0)) {
+    Node* const value = m.left().node();
+    if (CanCover(user, value)) {
+      switch (value->opcode()) {
+        case IrOpcode::kWord64And:
+          return VisitWordCompare(this, value, kArm64Tst, &cont, kLogical64Imm);
+        default:
+          break;
+      }
+      return VisitWord64Test(this, value, &cont);
+    }
+  }
+  VisitWordCompare(this, node, kArm64Cmp, &cont, kArithmeticImm);
+}
+
+void InstructionSelector::VisitInt32AddWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop<Int32BinopMatcher>(this, node, kArm64Add32,
+                                         kArithmeticImm, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop<Int32BinopMatcher>(this, node, kArm64Add32, kArithmeticImm, &cont);
+}
+
+void InstructionSelector::VisitInt32SubWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop<Int32BinopMatcher>(this, node, kArm64Sub32,
+                                         kArithmeticImm, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop<Int32BinopMatcher>(this, node, kArm64Sub32, kArithmeticImm, &cont);
+}
+
+void InstructionSelector::VisitInt32MulWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    // ARM64 doesn't set the overflow flag for multiplication, so we need to
+    // test on kNotEqual. Here is the code sequence used:
+    //   smull result, left, right
+    //   cmp result.X(), Operand(result, SXTW)
+    FlagsContinuation cont = FlagsContinuation::ForSet(kNotEqual, ovf);
+    return EmitInt32MulWithOverflow(this, node, &cont);
+  }
+  FlagsContinuation cont;
+  EmitInt32MulWithOverflow(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt64AddWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop<Int64BinopMatcher>(this, node, kArm64Add, kArithmeticImm,
+                                         &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop<Int64BinopMatcher>(this, node, kArm64Add, kArithmeticImm, &cont);
+}
+
+void InstructionSelector::VisitInt64SubWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop<Int64BinopMatcher>(this, node, kArm64Sub, kArithmeticImm,
+                                         &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop<Int64BinopMatcher>(this, node, kArm64Sub, kArithmeticImm, &cont);
+}
+
+void InstructionSelector::VisitInt64LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
+  VisitWordCompare(this, node, kArm64Cmp, &cont, kArithmeticImm);
+}
+
+void InstructionSelector::VisitInt64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
+  VisitWordCompare(this, node, kArm64Cmp, &cont, kArithmeticImm);
+}
+
+void InstructionSelector::VisitUint64LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitWordCompare(this, node, kArm64Cmp, &cont, kArithmeticImm);
+}
+
+void InstructionSelector::VisitUint64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitWordCompare(this, node, kArm64Cmp, &cont, kArithmeticImm);
+}
+
+void InstructionSelector::VisitFloat32Neg(Node* node) {
+  Arm64OperandGenerator g(this);
+  Node* in = node->InputAt(0);
+  if (in->opcode() == IrOpcode::kFloat32Mul && CanCover(node, in)) {
+    Float32BinopMatcher m(in);
+    Emit(kArm64Float32Fnmul, g.DefineAsRegister(node),
+         g.UseRegister(m.left().node()), g.UseRegister(m.right().node()));
+    return;
+  }
+  VisitRR(this, kArm64Float32Neg, node);
+}
+
+void InstructionSelector::VisitFloat32Mul(Node* node) {
+  Arm64OperandGenerator g(this);
+  Float32BinopMatcher m(node);
+
+  if (m.left().IsFloat32Neg() && CanCover(node, m.left().node())) {
+    Emit(kArm64Float32Fnmul, g.DefineAsRegister(node),
+         g.UseRegister(m.left().node()->InputAt(0)),
+         g.UseRegister(m.right().node()));
+    return;
+  }
+
+  if (m.right().IsFloat32Neg() && CanCover(node, m.right().node())) {
+    Emit(kArm64Float32Fnmul, g.DefineAsRegister(node),
+         g.UseRegister(m.right().node()->InputAt(0)),
+         g.UseRegister(m.left().node()));
+    return;
+  }
+  return VisitRRR(this, kArm64Float32Mul, node);
+}
+
+void InstructionSelector::VisitFloat32Equal(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kFloatLessThan, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kFloatLessThanOrEqual, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64Equal(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kFloatLessThan, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kFloatLessThanOrEqual, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64InsertLowWord32(Node* node) {
+  Arm64OperandGenerator g(this);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  if (left->opcode() == IrOpcode::kFloat64InsertHighWord32 &&
+      CanCover(node, left)) {
+    Node* right_of_left = left->InputAt(1);
+    Emit(kArm64Bfi, g.DefineSameAsFirst(right), g.UseRegister(right),
+         g.UseRegister(right_of_left), g.TempImmediate(32),
+         g.TempImmediate(32));
+    Emit(kArm64Float64MoveU64, g.DefineAsRegister(node), g.UseRegister(right));
+    return;
+  }
+  Emit(kArm64Float64InsertLowWord32, g.DefineSameAsFirst(node),
+       g.UseRegister(left), g.UseRegister(right));
+}
+
+void InstructionSelector::VisitFloat64InsertHighWord32(Node* node) {
+  Arm64OperandGenerator g(this);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  if (left->opcode() == IrOpcode::kFloat64InsertLowWord32 &&
+      CanCover(node, left)) {
+    Node* right_of_left = left->InputAt(1);
+    Emit(kArm64Bfi, g.DefineSameAsFirst(left), g.UseRegister(right_of_left),
+         g.UseRegister(right), g.TempImmediate(32), g.TempImmediate(32));
+    Emit(kArm64Float64MoveU64, g.DefineAsRegister(node), g.UseRegister(left));
+    return;
+  }
+  Emit(kArm64Float64InsertHighWord32, g.DefineSameAsFirst(node),
+       g.UseRegister(left), g.UseRegister(right));
+}
+
+void InstructionSelector::VisitFloat64Neg(Node* node) {
+  Arm64OperandGenerator g(this);
+  Node* in = node->InputAt(0);
+  if (in->opcode() == IrOpcode::kFloat64Mul && CanCover(node, in)) {
+    Float64BinopMatcher m(in);
+    Emit(kArm64Float64Fnmul, g.DefineAsRegister(node),
+         g.UseRegister(m.left().node()), g.UseRegister(m.right().node()));
+    return;
+  }
+  VisitRR(this, kArm64Float64Neg, node);
+}
+
+void InstructionSelector::VisitFloat64Mul(Node* node) {
+  Arm64OperandGenerator g(this);
+  Float64BinopMatcher m(node);
+
+  if (m.left().IsFloat64Neg() && CanCover(node, m.left().node())) {
+    Emit(kArm64Float64Fnmul, g.DefineAsRegister(node),
+         g.UseRegister(m.left().node()->InputAt(0)),
+         g.UseRegister(m.right().node()));
+    return;
+  }
+
+  if (m.right().IsFloat64Neg() && CanCover(node, m.right().node())) {
+    Emit(kArm64Float64Fnmul, g.DefineAsRegister(node),
+         g.UseRegister(m.right().node()->InputAt(0)),
+         g.UseRegister(m.left().node()));
+    return;
+  }
+  return VisitRRR(this, kArm64Float64Mul, node);
+}
+
+void InstructionSelector::VisitMemoryBarrier(Node* node) {
+  Arm64OperandGenerator g(this);
+  Emit(kArm64DmbIsh, g.NoOutput());
+}
+
+void InstructionSelector::VisitWord32AtomicLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  ArchOpcode opcode;
+  switch (load_rep.representation()) {
+    case MachineRepresentation::kWord8:
+      opcode =
+          load_rep.IsSigned() ? kWord32AtomicLoadInt8 : kWord32AtomicLoadUint8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = load_rep.IsSigned() ? kWord32AtomicLoadInt16
+                                   : kWord32AtomicLoadUint16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kWord32AtomicLoadWord32;
+      break;
+    default:
+      UNREACHABLE();
+  }
+  VisitAtomicLoad(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord64AtomicLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  ArchOpcode opcode;
+  switch (load_rep.representation()) {
+    case MachineRepresentation::kWord8:
+      opcode = kArm64Word64AtomicLoadUint8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = kArm64Word64AtomicLoadUint16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kArm64Word64AtomicLoadUint32;
+      break;
+    case MachineRepresentation::kWord64:
+      opcode = kArm64Word64AtomicLoadUint64;
+      break;
+    default:
+      UNREACHABLE();
+  }
+  VisitAtomicLoad(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord32AtomicStore(Node* node) {
+  MachineRepresentation rep = AtomicStoreRepresentationOf(node->op());
+  ArchOpcode opcode;
+  switch (rep) {
+    case MachineRepresentation::kWord8:
+      opcode = kWord32AtomicStoreWord8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = kWord32AtomicStoreWord16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kWord32AtomicStoreWord32;
+      break;
+    default:
+      UNREACHABLE();
+  }
+  VisitAtomicStore(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord64AtomicStore(Node* node) {
+  MachineRepresentation rep = AtomicStoreRepresentationOf(node->op());
+  ArchOpcode opcode;
+  switch (rep) {
+    case MachineRepresentation::kWord8:
+      opcode = kArm64Word64AtomicStoreWord8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = kArm64Word64AtomicStoreWord16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kArm64Word64AtomicStoreWord32;
+      break;
+    case MachineRepresentation::kWord64:
+      opcode = kArm64Word64AtomicStoreWord64;
+      break;
+    default:
+      UNREACHABLE();
+  }
+  VisitAtomicStore(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord32AtomicExchange(Node* node) {
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Int8()) {
+    opcode = kWord32AtomicExchangeInt8;
+  } else if (type == MachineType::Uint8()) {
+    opcode = kWord32AtomicExchangeUint8;
+  } else if (type == MachineType::Int16()) {
+    opcode = kWord32AtomicExchangeInt16;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kWord32AtomicExchangeUint16;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = kWord32AtomicExchangeWord32;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicExchange(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord64AtomicExchange(Node* node) {
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Uint8()) {
+    opcode = kArm64Word64AtomicExchangeUint8;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kArm64Word64AtomicExchangeUint16;
+  } else if (type == MachineType::Uint32()) {
+    opcode = kArm64Word64AtomicExchangeUint32;
+  } else if (type == MachineType::Uint64()) {
+    opcode = kArm64Word64AtomicExchangeUint64;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicExchange(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord32AtomicCompareExchange(Node* node) {
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Int8()) {
+    opcode = kWord32AtomicCompareExchangeInt8;
+  } else if (type == MachineType::Uint8()) {
+    opcode = kWord32AtomicCompareExchangeUint8;
+  } else if (type == MachineType::Int16()) {
+    opcode = kWord32AtomicCompareExchangeInt16;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kWord32AtomicCompareExchangeUint16;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = kWord32AtomicCompareExchangeWord32;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicCompareExchange(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Uint8()) {
+    opcode = kArm64Word64AtomicCompareExchangeUint8;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kArm64Word64AtomicCompareExchangeUint16;
+  } else if (type == MachineType::Uint32()) {
+    opcode = kArm64Word64AtomicCompareExchangeUint32;
+  } else if (type == MachineType::Uint64()) {
+    opcode = kArm64Word64AtomicCompareExchangeUint64;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicCompareExchange(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord32AtomicBinaryOperation(
+    Node* node, ArchOpcode int8_op, ArchOpcode uint8_op, ArchOpcode int16_op,
+    ArchOpcode uint16_op, ArchOpcode word32_op) {
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Int8()) {
+    opcode = int8_op;
+  } else if (type == MachineType::Uint8()) {
+    opcode = uint8_op;
+  } else if (type == MachineType::Int16()) {
+    opcode = int16_op;
+  } else if (type == MachineType::Uint16()) {
+    opcode = uint16_op;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = word32_op;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicBinop(this, node, opcode);
+}
+
+#define VISIT_ATOMIC_BINOP(op)                                   \
+  void InstructionSelector::VisitWord32Atomic##op(Node* node) {  \
+    VisitWord32AtomicBinaryOperation(                            \
+        node, kWord32Atomic##op##Int8, kWord32Atomic##op##Uint8, \
+        kWord32Atomic##op##Int16, kWord32Atomic##op##Uint16,     \
+        kWord32Atomic##op##Word32);                              \
+  }
+VISIT_ATOMIC_BINOP(Add)
+VISIT_ATOMIC_BINOP(Sub)
+VISIT_ATOMIC_BINOP(And)
+VISIT_ATOMIC_BINOP(Or)
+VISIT_ATOMIC_BINOP(Xor)
+#undef VISIT_ATOMIC_BINOP
+
+void InstructionSelector::VisitWord64AtomicBinaryOperation(
+    Node* node, ArchOpcode uint8_op, ArchOpcode uint16_op, ArchOpcode uint32_op,
+    ArchOpcode uint64_op) {
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Uint8()) {
+    opcode = uint8_op;
+  } else if (type == MachineType::Uint16()) {
+    opcode = uint16_op;
+  } else if (type == MachineType::Uint32()) {
+    opcode = uint32_op;
+  } else if (type == MachineType::Uint64()) {
+    opcode = uint64_op;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicBinop(this, node, opcode);
+}
+
+#define VISIT_ATOMIC_BINOP(op)                                               \
+  void InstructionSelector::VisitWord64Atomic##op(Node* node) {              \
+    VisitWord64AtomicBinaryOperation(                                        \
+        node, kArm64Word64Atomic##op##Uint8, kArm64Word64Atomic##op##Uint16, \
+        kArm64Word64Atomic##op##Uint32, kArm64Word64Atomic##op##Uint64);     \
+  }
+VISIT_ATOMIC_BINOP(Add)
+VISIT_ATOMIC_BINOP(Sub)
+VISIT_ATOMIC_BINOP(And)
+VISIT_ATOMIC_BINOP(Or)
+VISIT_ATOMIC_BINOP(Xor)
+#undef VISIT_ATOMIC_BINOP
+
+void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
+  UNREACHABLE();
+}
+
+#define SIMD_TYPE_LIST(V) \
+  V(F64x2)                \
+  V(F32x4)                \
+  V(I64x2)                \
+  V(I32x4)                \
+  V(I16x8)                \
+  V(I8x16)
+
+#define SIMD_UNOP_LIST(V)                                 \
+  V(F64x2Abs, kArm64F64x2Abs)                             \
+  V(F64x2Neg, kArm64F64x2Neg)                             \
+  V(F64x2Sqrt, kArm64F64x2Sqrt)                           \
+  V(F32x4SConvertI32x4, kArm64F32x4SConvertI32x4)         \
+  V(F32x4UConvertI32x4, kArm64F32x4UConvertI32x4)         \
+  V(F32x4Abs, kArm64F32x4Abs)                             \
+  V(F32x4Neg, kArm64F32x4Neg)                             \
+  V(F32x4Sqrt, kArm64F32x4Sqrt)                           \
+  V(F32x4RecipApprox, kArm64F32x4RecipApprox)             \
+  V(F32x4RecipSqrtApprox, kArm64F32x4RecipSqrtApprox)     \
+  V(I64x2Neg, kArm64I64x2Neg)                             \
+  V(I32x4SConvertF32x4, kArm64I32x4SConvertF32x4)         \
+  V(I32x4Neg, kArm64I32x4Neg)                             \
+  V(I32x4UConvertF32x4, kArm64I32x4UConvertF32x4)         \
+  V(I32x4Abs, kArm64I32x4Abs)                             \
+  V(I16x8Neg, kArm64I16x8Neg)                             \
+  V(I16x8Abs, kArm64I16x8Abs)                             \
+  V(I8x16Neg, kArm64I8x16Neg)                             \
+  V(I8x16Abs, kArm64I8x16Abs)                             \
+  V(S128Not, kArm64S128Not)                               \
+  V(V32x4AnyTrue, kArm64V128AnyTrue)                      \
+  V(V32x4AllTrue, kArm64V32x4AllTrue)                     \
+  V(V16x8AnyTrue, kArm64V128AnyTrue)                      \
+  V(V16x8AllTrue, kArm64V16x8AllTrue)                     \
+  V(V8x16AnyTrue, kArm64V128AnyTrue)                      \
+  V(V8x16AllTrue, kArm64V8x16AllTrue)
+
+#define SIMD_SHIFT_OP_LIST(V) \
+  V(I64x2Shl, 64)             \
+  V(I64x2ShrS, 64)            \
+  V(I64x2ShrU, 64)            \
+  V(I32x4Shl, 32)             \
+  V(I32x4ShrS, 32)            \
+  V(I32x4ShrU, 32)            \
+  V(I16x8Shl, 16)             \
+  V(I16x8ShrS, 16)            \
+  V(I16x8ShrU, 16)            \
+  V(I8x16Shl, 8)              \
+  V(I8x16ShrS, 8)             \
+  V(I8x16ShrU, 8)
+
+#define SIMD_BINOP_LIST(V)                              \
+  V(F64x2Add, kArm64F64x2Add)                           \
+  V(F64x2Sub, kArm64F64x2Sub)                           \
+  V(F64x2Mul, kArm64F64x2Mul)                           \
+  V(F64x2Div, kArm64F64x2Div)                           \
+  V(F64x2Min, kArm64F64x2Min)                           \
+  V(F64x2Max, kArm64F64x2Max)                           \
+  V(F64x2Eq, kArm64F64x2Eq)                             \
+  V(F64x2Ne, kArm64F64x2Ne)                             \
+  V(F64x2Lt, kArm64F64x2Lt)                             \
+  V(F64x2Le, kArm64F64x2Le)                             \
+  V(F32x4Add, kArm64F32x4Add)                           \
+  V(F32x4AddHoriz, kArm64F32x4AddHoriz)                 \
+  V(F32x4Sub, kArm64F32x4Sub)                           \
+  V(F32x4Mul, kArm64F32x4Mul)                           \
+  V(F32x4Div, kArm64F32x4Div)                           \
+  V(F32x4Min, kArm64F32x4Min)                           \
+  V(F32x4Max, kArm64F32x4Max)                           \
+  V(F32x4Eq, kArm64F32x4Eq)                             \
+  V(F32x4Ne, kArm64F32x4Ne)                             \
+  V(F32x4Lt, kArm64F32x4Lt)                             \
+  V(F32x4Le, kArm64F32x4Le)                             \
+  V(I64x2Add, kArm64I64x2Add)                           \
+  V(I64x2Sub, kArm64I64x2Sub)                           \
+  V(I64x2Eq, kArm64I64x2Eq)                             \
+  V(I32x4AddHoriz, kArm64I32x4AddHoriz)                 \
+  V(I32x4Mul, kArm64I32x4Mul)                           \
+  V(I32x4MinS, kArm64I32x4MinS)                         \
+  V(I32x4MaxS, kArm64I32x4MaxS)                         \
+  V(I32x4Eq, kArm64I32x4Eq)                             \
+  V(I32x4Ne, kArm64I32x4Ne)                             \
+  V(I32x4GtS, kArm64I32x4GtS)                           \
+  V(I32x4GeS, kArm64I32x4GeS)                           \
+  V(I32x4MinU, kArm64I32x4MinU)                         \
+  V(I32x4MaxU, kArm64I32x4MaxU)                         \
+  V(I32x4GtU, kArm64I32x4GtU)                           \
+  V(I32x4GeU, kArm64I32x4GeU)                           \
+  V(I32x4DotI16x8S, kArm64I32x4DotI16x8S)               \
+  V(I16x8SConvertI32x4, kArm64I16x8SConvertI32x4)       \
+  V(I16x8AddSatS, kArm64I16x8AddSatS)                   \
+  V(I16x8AddHoriz, kArm64I16x8AddHoriz)                 \
+  V(I16x8SubSatS, kArm64I16x8SubSatS)                   \
+  V(I16x8Mul, kArm64I16x8Mul)                           \
+  V(I16x8MinS, kArm64I16x8MinS)                         \
+  V(I16x8MaxS, kArm64I16x8MaxS)                         \
+  V(I16x8Eq, kArm64I16x8Eq)                             \
+  V(I16x8Ne, kArm64I16x8Ne)                             \
+  V(I16x8GtS, kArm64I16x8GtS)                           \
+  V(I16x8GeS, kArm64I16x8GeS)                           \
+  V(I16x8UConvertI32x4, kArm64I16x8UConvertI32x4)       \
+  V(I16x8AddSatU, kArm64I16x8AddSatU)                   \
+  V(I16x8SubSatU, kArm64I16x8SubSatU)                   \
+  V(I16x8MinU, kArm64I16x8MinU)                         \
+  V(I16x8MaxU, kArm64I16x8MaxU)                         \
+  V(I16x8GtU, kArm64I16x8GtU)                           \
+  V(I16x8GeU, kArm64I16x8GeU)                           \
+  V(I16x8RoundingAverageU, kArm64I16x8RoundingAverageU) \
+  V(I16x8Q15MulRSatS, kArm64I16x8Q15MulRSatS)           \
+  V(I8x16SConvertI16x8, kArm64I8x16SConvertI16x8)       \
+  V(I8x16AddSatS, kArm64I8x16AddSatS)                   \
+  V(I8x16SubSatS, kArm64I8x16SubSatS)                   \
+  V(I8x16Mul, kArm64I8x16Mul)                           \
+  V(I8x16MinS, kArm64I8x16MinS)                         \
+  V(I8x16MaxS, kArm64I8x16MaxS)                         \
+  V(I8x16Eq, kArm64I8x16Eq)                             \
+  V(I8x16Ne, kArm64I8x16Ne)                             \
+  V(I8x16GtS, kArm64I8x16GtS)                           \
+  V(I8x16GeS, kArm64I8x16GeS)                           \
+  V(I8x16UConvertI16x8, kArm64I8x16UConvertI16x8)       \
+  V(I8x16AddSatU, kArm64I8x16AddSatU)                   \
+  V(I8x16SubSatU, kArm64I8x16SubSatU)                   \
+  V(I8x16MinU, kArm64I8x16MinU)                         \
+  V(I8x16MaxU, kArm64I8x16MaxU)                         \
+  V(I8x16GtU, kArm64I8x16GtU)                           \
+  V(I8x16GeU, kArm64I8x16GeU)                           \
+  V(I8x16RoundingAverageU, kArm64I8x16RoundingAverageU) \
+  V(S128And, kArm64S128And)                             \
+  V(S128Or, kArm64S128Or)                               \
+  V(S128Xor, kArm64S128Xor)                             \
+  V(S128AndNot, kArm64S128AndNot)
+
+void InstructionSelector::VisitS128Const(Node* node) {
+  Arm64OperandGenerator g(this);
+  static const int kUint32Immediates = 4;
+  uint32_t val[kUint32Immediates];
+  STATIC_ASSERT(sizeof(val) == kSimd128Size);
+  memcpy(val, S128ImmediateParameterOf(node->op()).data(), kSimd128Size);
+  // If all bytes are zeros, avoid emitting code for generic constants
+  bool all_zeros = !(val[0] || val[1] || val[2] || val[3]);
+  InstructionOperand dst = g.DefineAsRegister(node);
+  if (all_zeros) {
+    Emit(kArm64S128Zero, dst);
+  } else {
+    Emit(kArm64S128Const, g.DefineAsRegister(node), g.UseImmediate(val[0]),
+         g.UseImmediate(val[1]), g.UseImmediate(val[2]),
+         g.UseImmediate(val[3]));
+  }
+}
+
+void InstructionSelector::VisitS128Zero(Node* node) {
+  Arm64OperandGenerator g(this);
+  Emit(kArm64S128Zero, g.DefineAsRegister(node));
+}
+
+#define SIMD_VISIT_SPLAT(Type)                               \
+  void InstructionSelector::Visit##Type##Splat(Node* node) { \
+    VisitRR(this, kArm64##Type##Splat, node);                \
+  }
+SIMD_TYPE_LIST(SIMD_VISIT_SPLAT)
+#undef SIMD_VISIT_SPLAT
+
+#define SIMD_VISIT_EXTRACT_LANE(Type, Sign)                              \
+  void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \
+    VisitRRI(this, kArm64##Type##ExtractLane##Sign, node);               \
+  }
+SIMD_VISIT_EXTRACT_LANE(F64x2, )
+SIMD_VISIT_EXTRACT_LANE(F32x4, )
+SIMD_VISIT_EXTRACT_LANE(I64x2, )
+SIMD_VISIT_EXTRACT_LANE(I32x4, )
+SIMD_VISIT_EXTRACT_LANE(I16x8, U)
+SIMD_VISIT_EXTRACT_LANE(I16x8, S)
+SIMD_VISIT_EXTRACT_LANE(I8x16, U)
+SIMD_VISIT_EXTRACT_LANE(I8x16, S)
+#undef SIMD_VISIT_EXTRACT_LANE
+
+#define SIMD_VISIT_REPLACE_LANE(Type)                              \
+  void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \
+    VisitRRIR(this, kArm64##Type##ReplaceLane, node);              \
+  }
+SIMD_TYPE_LIST(SIMD_VISIT_REPLACE_LANE)
+#undef SIMD_VISIT_REPLACE_LANE
+#undef SIMD_TYPE_LIST
+
+#define SIMD_VISIT_UNOP(Name, instruction)            \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRR(this, instruction, node);                 \
+  }
+SIMD_UNOP_LIST(SIMD_VISIT_UNOP)
+#undef SIMD_VISIT_UNOP
+#undef SIMD_UNOP_LIST
+
+#define SIMD_VISIT_SHIFT_OP(Name, width)                \
+  void InstructionSelector::Visit##Name(Node* node) {   \
+    VisitSimdShiftRRR(this, kArm64##Name, node, width); \
+  }
+SIMD_SHIFT_OP_LIST(SIMD_VISIT_SHIFT_OP)
+#undef SIMD_VISIT_SHIFT_OP
+#undef SIMD_SHIFT_OP_LIST
+
+#define SIMD_VISIT_BINOP(Name, instruction)           \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRRR(this, instruction, node);                \
+  }
+SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
+#undef SIMD_VISIT_BINOP
+#undef SIMD_BINOP_LIST
+
+void InstructionSelector::VisitI64x2Mul(Node* node) {
+  Arm64OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  Emit(kArm64I64x2Mul, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
+       arraysize(temps), temps);
+}
+
+#define VISIT_SIMD_ADD(Type)                                                   \
+  void InstructionSelector::Visit##Type##Add(Node* node) {                     \
+    Arm64OperandGenerator g(this);                                             \
+    Node* left = node->InputAt(0);                                             \
+    Node* right = node->InputAt(1);                                            \
+    /* Select Mla(z, x, y) for Add(Mul(x, y), z). */                           \
+    if (left->opcode() == IrOpcode::k##Type##Mul && CanCover(node, left)) {    \
+      Emit(kArm64##Type##Mla, g.DefineSameAsFirst(node), g.UseRegister(right), \
+           g.UseRegister(left->InputAt(0)), g.UseRegister(left->InputAt(1)));  \
+      return;                                                                  \
+    }                                                                          \
+    /* Select Mla(z, x, y) for Add(z, Mul(x, y)). */                           \
+    if (right->opcode() == IrOpcode::k##Type##Mul && CanCover(node, right)) {  \
+      Emit(kArm64##Type##Mla, g.DefineSameAsFirst(node), g.UseRegister(left),  \
+           g.UseRegister(right->InputAt(0)),                                   \
+           g.UseRegister(right->InputAt(1)));                                  \
+      return;                                                                  \
+    }                                                                          \
+    VisitRRR(this, kArm64##Type##Add, node);                                   \
+  }
+
+VISIT_SIMD_ADD(I32x4)
+VISIT_SIMD_ADD(I16x8)
+VISIT_SIMD_ADD(I8x16)
+#undef VISIT_SIMD_ADD
+
+#define VISIT_SIMD_SUB(Type)                                                  \
+  void InstructionSelector::Visit##Type##Sub(Node* node) {                    \
+    Arm64OperandGenerator g(this);                                            \
+    Node* left = node->InputAt(0);                                            \
+    Node* right = node->InputAt(1);                                           \
+    /* Select Mls(z, x, y) for Sub(z, Mul(x, y)). */                          \
+    if (right->opcode() == IrOpcode::k##Type##Mul && CanCover(node, right)) { \
+      Emit(kArm64##Type##Mls, g.DefineSameAsFirst(node), g.UseRegister(left), \
+           g.UseRegister(right->InputAt(0)),                                  \
+           g.UseRegister(right->InputAt(1)));                                 \
+      return;                                                                 \
+    }                                                                         \
+    VisitRRR(this, kArm64##Type##Sub, node);                                  \
+  }
+
+VISIT_SIMD_SUB(I32x4)
+VISIT_SIMD_SUB(I16x8)
+VISIT_SIMD_SUB(I8x16)
+#undef VISIT_SIMD_SUB
+
+void InstructionSelector::VisitS128Select(Node* node) {
+  Arm64OperandGenerator g(this);
+  Emit(kArm64S128Select, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
+       g.UseRegister(node->InputAt(2)));
+}
+
+#define VISIT_SIMD_QFMOP(op)                                               \
+  void InstructionSelector::Visit##op(Node* node) {                        \
+    Arm64OperandGenerator g(this);                                         \
+    Emit(kArm64##op, g.DefineSameAsFirst(node),                            \
+         g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), \
+         g.UseRegister(node->InputAt(2)));                                 \
+  }
+VISIT_SIMD_QFMOP(F64x2Qfma)
+VISIT_SIMD_QFMOP(F64x2Qfms)
+VISIT_SIMD_QFMOP(F32x4Qfma)
+VISIT_SIMD_QFMOP(F32x4Qfms)
+#undef VISIT_SIMD_QFMOP
+
+namespace {
+template <ArchOpcode opcode>
+void VisitBitMask(InstructionSelector* selector, Node* node) {
+  Arm64OperandGenerator g(selector);
+  InstructionOperand temps[] = {g.TempSimd128Register(),
+                                g.TempSimd128Register()};
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
+}
+}  // namespace
+
+void InstructionSelector::VisitI8x16BitMask(Node* node) {
+  VisitBitMask<kArm64I8x16BitMask>(this, node);
+}
+
+void InstructionSelector::VisitI16x8BitMask(Node* node) {
+  VisitBitMask<kArm64I16x8BitMask>(this, node);
+}
+
+void InstructionSelector::VisitI32x4BitMask(Node* node) {
+  VisitBitMask<kArm64I32x4BitMask>(this, node);
+}
+
+namespace {
+
+struct ShuffleEntry {
+  uint8_t shuffle[kSimd128Size];
+  ArchOpcode opcode;
+};
+
+static const ShuffleEntry arch_shuffles[] = {
+    {{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23},
+     kArm64S32x4ZipLeft},
+    {{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31},
+     kArm64S32x4ZipRight},
+    {{0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27},
+     kArm64S32x4UnzipLeft},
+    {{4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31},
+     kArm64S32x4UnzipRight},
+    {{0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27},
+     kArm64S32x4TransposeLeft},
+    {{4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 21, 22, 23, 24},
+     kArm64S32x4TransposeRight},
+    {{4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11},
+     kArm64S32x2Reverse},
+
+    {{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23},
+     kArm64S16x8ZipLeft},
+    {{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31},
+     kArm64S16x8ZipRight},
+    {{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29},
+     kArm64S16x8UnzipLeft},
+    {{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31},
+     kArm64S16x8UnzipRight},
+    {{0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29},
+     kArm64S16x8TransposeLeft},
+    {{2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31},
+     kArm64S16x8TransposeRight},
+    {{6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9},
+     kArm64S16x4Reverse},
+    {{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13},
+     kArm64S16x2Reverse},
+
+    {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
+     kArm64S8x16ZipLeft},
+    {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31},
+     kArm64S8x16ZipRight},
+    {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30},
+     kArm64S8x16UnzipLeft},
+    {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31},
+     kArm64S8x16UnzipRight},
+    {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30},
+     kArm64S8x16TransposeLeft},
+    {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31},
+     kArm64S8x16TransposeRight},
+    {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}, kArm64S8x8Reverse},
+    {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kArm64S8x4Reverse},
+    {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
+     kArm64S8x2Reverse}};
+
+bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
+                         size_t num_entries, bool is_swizzle,
+                         ArchOpcode* opcode) {
+  uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
+  for (size_t i = 0; i < num_entries; i++) {
+    const ShuffleEntry& entry = table[i];
+    int j = 0;
+    for (; j < kSimd128Size; j++) {
+      if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) {
+        break;
+      }
+    }
+    if (j == kSimd128Size) {
+      *opcode = entry.opcode;
+      return true;
+    }
+  }
+  return false;
+}
+
+void ArrangeShuffleTable(Arm64OperandGenerator* g, Node* input0, Node* input1,
+                         InstructionOperand* src0, InstructionOperand* src1) {
+  if (input0 == input1) {
+    // Unary, any q-register can be the table.
+    *src0 = *src1 = g->UseRegister(input0);
+  } else {
+    // Binary, table registers must be consecutive.
+    *src0 = g->UseFixed(input0, fp_fixed1);
+    *src1 = g->UseFixed(input1, fp_fixed2);
+  }
+}
+
+}  // namespace
+
+void InstructionSelector::VisitI8x16Shuffle(Node* node) {
+  uint8_t shuffle[kSimd128Size];
+  bool is_swizzle;
+  CanonicalizeShuffle(node, shuffle, &is_swizzle);
+  uint8_t shuffle32x4[4];
+  Arm64OperandGenerator g(this);
+  ArchOpcode opcode;
+  if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),
+                          is_swizzle, &opcode)) {
+    VisitRRR(this, opcode, node);
+    return;
+  }
+  Node* input0 = node->InputAt(0);
+  Node* input1 = node->InputAt(1);
+  uint8_t offset;
+  if (wasm::SimdShuffle::TryMatchConcat(shuffle, &offset)) {
+    Emit(kArm64S8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0),
+         g.UseRegister(input1), g.UseImmediate(offset));
+    return;
+  }
+  int index = 0;
+  if (wasm::SimdShuffle::TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
+    if (wasm::SimdShuffle::TryMatchSplat<4>(shuffle, &index)) {
+      DCHECK_GT(4, index);
+      Emit(kArm64S128Dup, g.DefineAsRegister(node), g.UseRegister(input0),
+           g.UseImmediate(4), g.UseImmediate(index % 4));
+    } else if (wasm::SimdShuffle::TryMatchIdentity(shuffle)) {
+      EmitIdentity(node);
+    } else {
+      Emit(kArm64S32x4Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
+           g.UseRegister(input1),
+           g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle32x4)));
+    }
+    return;
+  }
+  if (wasm::SimdShuffle::TryMatchSplat<8>(shuffle, &index)) {
+    DCHECK_GT(8, index);
+    Emit(kArm64S128Dup, g.DefineAsRegister(node), g.UseRegister(input0),
+         g.UseImmediate(8), g.UseImmediate(index % 8));
+    return;
+  }
+  if (wasm::SimdShuffle::TryMatchSplat<16>(shuffle, &index)) {
+    DCHECK_GT(16, index);
+    Emit(kArm64S128Dup, g.DefineAsRegister(node), g.UseRegister(input0),
+         g.UseImmediate(16), g.UseImmediate(index % 16));
+    return;
+  }
+  // Code generator uses vtbl, arrange sources to form a valid lookup table.
+  InstructionOperand src0, src1;
+  ArrangeShuffleTable(&g, input0, input1, &src0, &src1);
+  Emit(kArm64I8x16Shuffle, g.DefineAsRegister(node), src0, src1,
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 4)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 8)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 12)));
+}
+
+void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) {
+  VisitRR(this, kArm64Sxtb32, node);
+}
+
+void InstructionSelector::VisitSignExtendWord16ToInt32(Node* node) {
+  VisitRR(this, kArm64Sxth32, node);
+}
+
+void InstructionSelector::VisitSignExtendWord8ToInt64(Node* node) {
+  VisitRR(this, kArm64Sxtb, node);
+}
+
+void InstructionSelector::VisitSignExtendWord16ToInt64(Node* node) {
+  VisitRR(this, kArm64Sxth, node);
+}
+
+void InstructionSelector::VisitSignExtendWord32ToInt64(Node* node) {
+  VisitRR(this, kArm64Sxtw, node);
+}
+
+namespace {
+void VisitPminOrPmax(InstructionSelector* selector, ArchOpcode opcode,
+                     Node* node) {
+  Arm64OperandGenerator g(selector);
+  // Need all unique registers because we first compare the two inputs, then we
+  // need the inputs to remain unchanged for the bitselect later.
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseUniqueRegister(node->InputAt(0)),
+                 g.UseUniqueRegister(node->InputAt(1)));
+}
+}  // namespace
+
+void InstructionSelector::VisitF32x4Pmin(Node* node) {
+  VisitPminOrPmax(this, kArm64F32x4Pmin, node);
+}
+
+void InstructionSelector::VisitF32x4Pmax(Node* node) {
+  VisitPminOrPmax(this, kArm64F32x4Pmax, node);
+}
+
+void InstructionSelector::VisitF64x2Pmin(Node* node) {
+  VisitPminOrPmax(this, kArm64F64x2Pmin, node);
+}
+
+void InstructionSelector::VisitF64x2Pmax(Node* node) {
+  VisitPminOrPmax(this, kArm64F64x2Pmax, node);
+}
+
+namespace {
+void VisitSignExtendLong(InstructionSelector* selector, ArchOpcode opcode,
+                         Node* node, int lane_size) {
+  InstructionCode code = opcode;
+  code |= MiscField::encode(lane_size);
+  VisitRR(selector, code, node);
+}
+}  // namespace
+
+void InstructionSelector::VisitI64x2SConvertI32x4Low(Node* node) {
+  VisitSignExtendLong(this, kArm64Sxtl, node, 64);
+}
+
+void InstructionSelector::VisitI64x2SConvertI32x4High(Node* node) {
+  VisitSignExtendLong(this, kArm64Sxtl2, node, 64);
+}
+
+void InstructionSelector::VisitI64x2UConvertI32x4Low(Node* node) {
+  VisitSignExtendLong(this, kArm64Uxtl, node, 64);
+}
+
+void InstructionSelector::VisitI64x2UConvertI32x4High(Node* node) {
+  VisitSignExtendLong(this, kArm64Uxtl2, node, 64);
+}
+
+void InstructionSelector::VisitI32x4SConvertI16x8Low(Node* node) {
+  VisitSignExtendLong(this, kArm64Sxtl, node, 32);
+}
+
+void InstructionSelector::VisitI32x4SConvertI16x8High(Node* node) {
+  VisitSignExtendLong(this, kArm64Sxtl2, node, 32);
+}
+
+void InstructionSelector::VisitI32x4UConvertI16x8Low(Node* node) {
+  VisitSignExtendLong(this, kArm64Uxtl, node, 32);
+}
+
+void InstructionSelector::VisitI32x4UConvertI16x8High(Node* node) {
+  VisitSignExtendLong(this, kArm64Uxtl2, node, 32);
+}
+
+void InstructionSelector::VisitI16x8SConvertI8x16Low(Node* node) {
+  VisitSignExtendLong(this, kArm64Sxtl, node, 16);
+}
+
+void InstructionSelector::VisitI16x8SConvertI8x16High(Node* node) {
+  VisitSignExtendLong(this, kArm64Sxtl2, node, 16);
+}
+
+void InstructionSelector::VisitI16x8UConvertI8x16Low(Node* node) {
+  VisitSignExtendLong(this, kArm64Uxtl, node, 16);
+}
+
+void InstructionSelector::VisitI16x8UConvertI8x16High(Node* node) {
+  VisitSignExtendLong(this, kArm64Uxtl2, node, 16);
+}
+
+void InstructionSelector::VisitI8x16Popcnt(Node* node) {
+  InstructionCode code = kArm64Cnt;
+  code |= MiscField::encode(8);
+  VisitRR(this, code, node);
+}
+
+// static
+MachineOperatorBuilder::Flags
+InstructionSelector::SupportedMachineOperatorFlags() {
+  return MachineOperatorBuilder::kFloat32RoundDown |
+         MachineOperatorBuilder::kFloat64RoundDown |
+         MachineOperatorBuilder::kFloat32RoundUp |
+         MachineOperatorBuilder::kFloat64RoundUp |
+         MachineOperatorBuilder::kFloat32RoundTruncate |
+         MachineOperatorBuilder::kFloat64RoundTruncate |
+         MachineOperatorBuilder::kFloat64RoundTiesAway |
+         MachineOperatorBuilder::kFloat32RoundTiesEven |
+         MachineOperatorBuilder::kFloat64RoundTiesEven |
+         MachineOperatorBuilder::kWord32ShiftIsSafe |
+         MachineOperatorBuilder::kInt32DivIsSafe |
+         MachineOperatorBuilder::kUint32DivIsSafe |
+         MachineOperatorBuilder::kWord32ReverseBits |
+         MachineOperatorBuilder::kWord64ReverseBits |
+         MachineOperatorBuilder::kSatConversionIsSafe;
+}
+
+// static
+MachineOperatorBuilder::AlignmentRequirements
+InstructionSelector::AlignmentRequirements() {
+  return MachineOperatorBuilder::AlignmentRequirements::
+      FullUnalignedAccessSupport();
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/arm64/unwinding-info-writer-arm64.cc b/src/compiler/backend/arm64/unwinding-info-writer-arm64.cc
new file mode 100644
index 0000000..bc3a91a
--- /dev/null
+++ b/src/compiler/backend/arm64/unwinding-info-writer-arm64.cc
@@ -0,0 +1,108 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/arm64/unwinding-info-writer-arm64.h"
+#include "src/compiler/backend/instruction.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+// TODO(v8:10026): When using CFI, we need to generate unwinding info to tell
+// the unwinder that return addresses are signed.
+
+void UnwindingInfoWriter::BeginInstructionBlock(int pc_offset,
+                                                const InstructionBlock* block) {
+  if (!enabled()) return;
+
+  block_will_exit_ = false;
+
+  DCHECK_LT(block->rpo_number().ToInt(),
+            static_cast<int>(block_initial_states_.size()));
+  const BlockInitialState* initial_state =
+      block_initial_states_[block->rpo_number().ToInt()];
+  if (!initial_state) return;
+  if (initial_state->saved_lr_ != saved_lr_) {
+    eh_frame_writer_.AdvanceLocation(pc_offset);
+    if (initial_state->saved_lr_) {
+      eh_frame_writer_.RecordRegisterSavedToStack(lr, kSystemPointerSize);
+      eh_frame_writer_.RecordRegisterSavedToStack(fp, 0);
+    } else {
+      eh_frame_writer_.RecordRegisterFollowsInitialRule(lr);
+    }
+    saved_lr_ = initial_state->saved_lr_;
+  }
+}
+
+void UnwindingInfoWriter::EndInstructionBlock(const InstructionBlock* block) {
+  if (!enabled() || block_will_exit_) return;
+
+  for (const RpoNumber& successor : block->successors()) {
+    int successor_index = successor.ToInt();
+    DCHECK_LT(successor_index, static_cast<int>(block_initial_states_.size()));
+    const BlockInitialState* existing_state =
+        block_initial_states_[successor_index];
+
+    // If we already had an entry for this BB, check that the values are the
+    // same we are trying to insert.
+    if (existing_state) {
+      DCHECK_EQ(existing_state->saved_lr_, saved_lr_);
+    } else {
+      block_initial_states_[successor_index] =
+          zone_->New<BlockInitialState>(saved_lr_);
+    }
+  }
+}
+
+void UnwindingInfoWriter::MarkFrameConstructed(int at_pc) {
+  if (!enabled()) return;
+
+  // Regardless of the type of frame constructed, the relevant part of the
+  // layout is always the one in the diagram:
+  //
+  // |   ....   |         higher addresses
+  // +----------+               ^
+  // |    LR    |               |            |
+  // +----------+               |            |
+  // | saved FP |               |            |
+  // +----------+ <-- FP                     v
+  // |   ....   |                       stack growth
+  //
+  // The LR is pushed on the stack, and we can record this fact at the end of
+  // the construction, since the LR itself is not modified in the process.
+  eh_frame_writer_.AdvanceLocation(at_pc);
+  eh_frame_writer_.RecordRegisterSavedToStack(lr, kSystemPointerSize);
+  eh_frame_writer_.RecordRegisterSavedToStack(fp, 0);
+  saved_lr_ = true;
+}
+
+void UnwindingInfoWriter::MarkFrameDeconstructed(int at_pc) {
+  if (!enabled()) return;
+
+  // The lr is restored by the last operation in LeaveFrame().
+  eh_frame_writer_.AdvanceLocation(at_pc);
+  eh_frame_writer_.RecordRegisterFollowsInitialRule(lr);
+  saved_lr_ = false;
+}
+
+void UnwindingInfoWriter::MarkLinkRegisterOnTopOfStack(int pc_offset,
+                                                       const Register& sp) {
+  if (!enabled()) return;
+
+  eh_frame_writer_.AdvanceLocation(pc_offset);
+  eh_frame_writer_.SetBaseAddressRegisterAndOffset(sp, 0);
+  eh_frame_writer_.RecordRegisterSavedToStack(lr, 0);
+}
+
+void UnwindingInfoWriter::MarkPopLinkRegisterFromTopOfStack(int pc_offset) {
+  if (!enabled()) return;
+
+  eh_frame_writer_.AdvanceLocation(pc_offset);
+  eh_frame_writer_.SetBaseAddressRegisterAndOffset(fp, 0);
+  eh_frame_writer_.RecordRegisterFollowsInitialRule(lr);
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/arm64/unwinding-info-writer-arm64.h b/src/compiler/backend/arm64/unwinding-info-writer-arm64.h
new file mode 100644
index 0000000..3678873
--- /dev/null
+++ b/src/compiler/backend/arm64/unwinding-info-writer-arm64.h
@@ -0,0 +1,73 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_ARM64_UNWINDING_INFO_WRITER_ARM64_H_
+#define V8_COMPILER_BACKEND_ARM64_UNWINDING_INFO_WRITER_ARM64_H_
+
+#include "src/diagnostics/eh-frame.h"
+#include "src/flags/flags.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+class InstructionBlock;
+
+class UnwindingInfoWriter {
+ public:
+  explicit UnwindingInfoWriter(Zone* zone)
+      : zone_(zone),
+        eh_frame_writer_(zone),
+        saved_lr_(false),
+        block_will_exit_(false),
+        block_initial_states_(zone) {
+    if (enabled()) eh_frame_writer_.Initialize();
+  }
+
+  void SetNumberOfInstructionBlocks(int number) {
+    if (enabled()) block_initial_states_.resize(number);
+  }
+
+  void BeginInstructionBlock(int pc_offset, const InstructionBlock* block);
+  void EndInstructionBlock(const InstructionBlock* block);
+
+  void MarkLinkRegisterOnTopOfStack(int pc_offset, const Register& sp);
+  void MarkPopLinkRegisterFromTopOfStack(int pc_offset);
+
+  void MarkFrameConstructed(int at_pc);
+  void MarkFrameDeconstructed(int at_pc);
+
+  void MarkBlockWillExit() { block_will_exit_ = true; }
+
+  void Finish(int code_size) {
+    if (enabled()) eh_frame_writer_.Finish(code_size);
+  }
+
+  EhFrameWriter* eh_frame_writer() {
+    return enabled() ? &eh_frame_writer_ : nullptr;
+  }
+
+ private:
+  bool enabled() const { return FLAG_perf_prof_unwinding_info; }
+
+  class BlockInitialState : public ZoneObject {
+   public:
+    explicit BlockInitialState(bool saved_lr) : saved_lr_(saved_lr) {}
+
+    bool saved_lr_;
+  };
+
+  Zone* zone_;
+  EhFrameWriter eh_frame_writer_;
+  bool saved_lr_;
+  bool block_will_exit_;
+
+  ZoneVector<const BlockInitialState*> block_initial_states_;
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_ARM64_UNWINDING_INFO_WRITER_ARM64_H_
diff --git a/src/compiler/backend/code-generator-impl.h b/src/compiler/backend/code-generator-impl.h
new file mode 100644
index 0000000..93113b9
--- /dev/null
+++ b/src/compiler/backend/code-generator-impl.h
@@ -0,0 +1,264 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_CODE_GENERATOR_IMPL_H_
+#define V8_COMPILER_BACKEND_CODE_GENERATOR_IMPL_H_
+
+#include "src/codegen/macro-assembler.h"
+#include "src/compiler/backend/code-generator.h"
+#include "src/compiler/backend/instruction.h"
+#include "src/compiler/linkage.h"
+#include "src/compiler/opcodes.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+// Converts InstructionOperands from a given instruction to
+// architecture-specific
+// registers and operands after they have been assigned by the register
+// allocator.
+class InstructionOperandConverter {
+ public:
+  InstructionOperandConverter(CodeGenerator* gen, Instruction* instr)
+      : gen_(gen), instr_(instr) {}
+
+  // -- Instruction operand accesses with conversions --------------------------
+
+  Register InputRegister(size_t index) const {
+    return ToRegister(instr_->InputAt(index));
+  }
+
+  FloatRegister InputFloatRegister(size_t index) {
+    return ToFloatRegister(instr_->InputAt(index));
+  }
+
+  DoubleRegister InputDoubleRegister(size_t index) {
+    return ToDoubleRegister(instr_->InputAt(index));
+  }
+
+  Simd128Register InputSimd128Register(size_t index) {
+    return ToSimd128Register(instr_->InputAt(index));
+  }
+
+  double InputDouble(size_t index) { return ToDouble(instr_->InputAt(index)); }
+
+  float InputFloat32(size_t index) { return ToFloat32(instr_->InputAt(index)); }
+
+  int32_t InputInt32(size_t index) {
+    return ToConstant(instr_->InputAt(index)).ToInt32();
+  }
+
+  uint32_t InputUint32(size_t index) {
+    return bit_cast<uint32_t>(InputInt32(index));
+  }
+
+  int64_t InputInt64(size_t index) {
+    return ToConstant(instr_->InputAt(index)).ToInt64();
+  }
+
+  int8_t InputInt8(size_t index) {
+    return static_cast<int8_t>(InputInt32(index));
+  }
+
+  uint8_t InputUint8(size_t index) {
+    return bit_cast<uint8_t>(InputInt8(index));
+  }
+
+  int16_t InputInt16(size_t index) {
+    return static_cast<int16_t>(InputInt32(index));
+  }
+
+  uint8_t InputInt3(size_t index) {
+    return static_cast<uint8_t>(InputInt32(index) & 0x7);
+  }
+
+  uint8_t InputInt4(size_t index) {
+    return static_cast<uint8_t>(InputInt32(index) & 0xF);
+  }
+
+  uint8_t InputInt5(size_t index) {
+    return static_cast<uint8_t>(InputInt32(index) & 0x1F);
+  }
+
+  uint8_t InputInt6(size_t index) {
+    return static_cast<uint8_t>(InputInt32(index) & 0x3F);
+  }
+
+  ExternalReference InputExternalReference(size_t index) {
+    return ToExternalReference(instr_->InputAt(index));
+  }
+
+  Handle<Code> InputCode(size_t index) {
+    return ToCode(instr_->InputAt(index));
+  }
+
+  Label* InputLabel(size_t index) { return ToLabel(instr_->InputAt(index)); }
+
+  RpoNumber InputRpo(size_t index) {
+    return ToRpoNumber(instr_->InputAt(index));
+  }
+
+  Register OutputRegister(size_t index = 0) const {
+    return ToRegister(instr_->OutputAt(index));
+  }
+
+  Register TempRegister(size_t index) {
+    return ToRegister(instr_->TempAt(index));
+  }
+
+  FloatRegister OutputFloatRegister() {
+    return ToFloatRegister(instr_->Output());
+  }
+
+  DoubleRegister OutputDoubleRegister() {
+    return ToDoubleRegister(instr_->Output());
+  }
+
+  Simd128Register OutputSimd128Register() {
+    return ToSimd128Register(instr_->Output());
+  }
+
+  Simd128Register TempSimd128Register(size_t index) {
+    return ToSimd128Register(instr_->TempAt(index));
+  }
+
+  // -- Conversions for operands -----------------------------------------------
+
+  Label* ToLabel(InstructionOperand* op) {
+    return gen_->GetLabel(ToRpoNumber(op));
+  }
+
+  RpoNumber ToRpoNumber(InstructionOperand* op) {
+    return ToConstant(op).ToRpoNumber();
+  }
+
+  Register ToRegister(InstructionOperand* op) const {
+    return LocationOperand::cast(op)->GetRegister();
+  }
+
+  FloatRegister ToFloatRegister(InstructionOperand* op) {
+    return LocationOperand::cast(op)->GetFloatRegister();
+  }
+
+  DoubleRegister ToDoubleRegister(InstructionOperand* op) {
+    return LocationOperand::cast(op)->GetDoubleRegister();
+  }
+
+  Simd128Register ToSimd128Register(InstructionOperand* op) {
+    return LocationOperand::cast(op)->GetSimd128Register();
+  }
+
+  Constant ToConstant(InstructionOperand* op) const {
+    if (op->IsImmediate()) {
+      return gen_->instructions()->GetImmediate(ImmediateOperand::cast(op));
+    }
+    return gen_->instructions()->GetConstant(
+        ConstantOperand::cast(op)->virtual_register());
+  }
+
+  double ToDouble(InstructionOperand* op) {
+    return ToConstant(op).ToFloat64().value();
+  }
+
+  float ToFloat32(InstructionOperand* op) { return ToConstant(op).ToFloat32(); }
+
+  ExternalReference ToExternalReference(InstructionOperand* op) {
+    return ToConstant(op).ToExternalReference();
+  }
+
+  Handle<Code> ToCode(InstructionOperand* op) {
+    return ToConstant(op).ToCode();
+  }
+
+  const Frame* frame() const { return gen_->frame(); }
+  FrameAccessState* frame_access_state() const {
+    return gen_->frame_access_state();
+  }
+  Isolate* isolate() const { return gen_->isolate(); }
+  Linkage* linkage() const { return gen_->linkage(); }
+
+ protected:
+  CodeGenerator* gen_;
+  Instruction* instr_;
+};
+
+// Deoptimization exit.
+class DeoptimizationExit : public ZoneObject {
+ public:
+  explicit DeoptimizationExit(SourcePosition pos, BailoutId bailout_id,
+                              int translation_id, int pc_offset,
+                              DeoptimizeKind kind, DeoptimizeReason reason)
+      : deoptimization_id_(kNoDeoptIndex),
+        pos_(pos),
+        bailout_id_(bailout_id),
+        translation_id_(translation_id),
+        pc_offset_(pc_offset),
+        kind_(kind),
+        reason_(reason),
+        emitted_(false) {}
+
+  bool has_deoptimization_id() const {
+    return deoptimization_id_ != kNoDeoptIndex;
+  }
+  int deoptimization_id() const {
+    DCHECK(has_deoptimization_id());
+    return deoptimization_id_;
+  }
+  void set_deoptimization_id(int deoptimization_id) {
+    deoptimization_id_ = deoptimization_id;
+  }
+  SourcePosition pos() const { return pos_; }
+  Label* label() { return &label_; }
+  BailoutId bailout_id() const { return bailout_id_; }
+  int translation_id() const { return translation_id_; }
+  int pc_offset() const { return pc_offset_; }
+  DeoptimizeKind kind() const { return kind_; }
+  DeoptimizeReason reason() const { return reason_; }
+  // Returns whether the deopt exit has already been emitted. Most deopt exits
+  // are emitted contiguously at the end of the code, but unconditional deopt
+  // exits (kArchDeoptimize) may be inlined where they are encountered.
+  bool emitted() const { return emitted_; }
+  void set_emitted() { emitted_ = true; }
+
+ private:
+  static const int kNoDeoptIndex = kMaxInt16 + 1;
+  int deoptimization_id_;
+  const SourcePosition pos_;
+  Label label_;
+  const BailoutId bailout_id_;
+  const int translation_id_;
+  const int pc_offset_;
+  const DeoptimizeKind kind_;
+  const DeoptimizeReason reason_;
+  bool emitted_;
+};
+
+// Generator for out-of-line code that is emitted after the main code is done.
+class OutOfLineCode : public ZoneObject {
+ public:
+  explicit OutOfLineCode(CodeGenerator* gen);
+  virtual ~OutOfLineCode();
+
+  virtual void Generate() = 0;
+
+  Label* entry() { return &entry_; }
+  Label* exit() { return &exit_; }
+  const Frame* frame() const { return frame_; }
+  TurboAssembler* tasm() { return tasm_; }
+  OutOfLineCode* next() const { return next_; }
+
+ private:
+  Label entry_;
+  Label exit_;
+  const Frame* const frame_;
+  TurboAssembler* const tasm_;
+  OutOfLineCode* const next_;
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_CODE_GENERATOR_IMPL_H_
diff --git a/src/compiler/backend/code-generator.cc b/src/compiler/backend/code-generator.cc
new file mode 100644
index 0000000..0cb0e61
--- /dev/null
+++ b/src/compiler/backend/code-generator.cc
@@ -0,0 +1,1385 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/code-generator.h"
+
+#include "src/base/iterator.h"
+#include "src/codegen/assembler-inl.h"
+#include "src/codegen/macro-assembler-inl.h"
+#include "src/codegen/optimized-compilation-info.h"
+#include "src/codegen/string-constants.h"
+#include "src/compiler/backend/code-generator-impl.h"
+#include "src/compiler/globals.h"
+#include "src/compiler/linkage.h"
+#include "src/compiler/pipeline.h"
+#include "src/compiler/wasm-compiler.h"
+#include "src/diagnostics/eh-frame.h"
+#include "src/execution/frames.h"
+#include "src/logging/counters.h"
+#include "src/logging/log.h"
+#include "src/objects/smi.h"
+#include "src/utils/address-map.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+class CodeGenerator::JumpTable final : public ZoneObject {
+ public:
+  JumpTable(JumpTable* next, Label** targets, size_t target_count)
+      : next_(next), targets_(targets), target_count_(target_count) {}
+
+  Label* label() { return &label_; }
+  JumpTable* next() const { return next_; }
+  Label** targets() const { return targets_; }
+  size_t target_count() const { return target_count_; }
+
+ private:
+  Label label_;
+  JumpTable* const next_;
+  Label** const targets_;
+  size_t const target_count_;
+};
+
+CodeGenerator::CodeGenerator(
+    Zone* codegen_zone, Frame* frame, Linkage* linkage,
+    InstructionSequence* instructions, OptimizedCompilationInfo* info,
+    Isolate* isolate, base::Optional<OsrHelper> osr_helper,
+    int start_source_position, JumpOptimizationInfo* jump_opt,
+    PoisoningMitigationLevel poisoning_level, const AssemblerOptions& options,
+    int32_t builtin_index, size_t max_unoptimized_frame_height,
+    size_t max_pushed_argument_count, std::unique_ptr<AssemblerBuffer> buffer,
+    const char* debug_name)
+    : zone_(codegen_zone),
+      isolate_(isolate),
+      frame_access_state_(nullptr),
+      linkage_(linkage),
+      instructions_(instructions),
+      unwinding_info_writer_(codegen_zone),
+      info_(info),
+      labels_(
+          codegen_zone->NewArray<Label>(instructions->InstructionBlockCount())),
+      current_block_(RpoNumber::Invalid()),
+      start_source_position_(start_source_position),
+      current_source_position_(SourcePosition::Unknown()),
+      tasm_(isolate, options, CodeObjectRequired::kNo, std::move(buffer)),
+      resolver_(this),
+      safepoints_(codegen_zone),
+      handlers_(codegen_zone),
+      deoptimization_exits_(codegen_zone),
+      deoptimization_literals_(codegen_zone),
+      translations_(codegen_zone),
+      max_unoptimized_frame_height_(max_unoptimized_frame_height),
+      max_pushed_argument_count_(max_pushed_argument_count),
+      caller_registers_saved_(false),
+      jump_tables_(nullptr),
+      ools_(nullptr),
+      osr_helper_(std::move(osr_helper)),
+      osr_pc_offset_(-1),
+      optimized_out_literal_id_(-1),
+      source_position_table_builder_(
+          codegen_zone, SourcePositionTableBuilder::RECORD_SOURCE_POSITIONS),
+      protected_instructions_(codegen_zone),
+      result_(kSuccess),
+      poisoning_level_(poisoning_level),
+      block_starts_(codegen_zone),
+      instr_starts_(codegen_zone),
+      debug_name_(debug_name) {
+  for (int i = 0; i < instructions->InstructionBlockCount(); ++i) {
+    new (&labels_[i]) Label;
+  }
+  CreateFrameAccessState(frame);
+  CHECK_EQ(info->is_osr(), osr_helper_.has_value());
+  tasm_.set_jump_optimization_info(jump_opt);
+  CodeKind code_kind = info->code_kind();
+  if (code_kind == CodeKind::WASM_FUNCTION ||
+      code_kind == CodeKind::WASM_TO_CAPI_FUNCTION ||
+      code_kind == CodeKind::WASM_TO_JS_FUNCTION ||
+      code_kind == CodeKind::JS_TO_WASM_FUNCTION) {
+    tasm_.set_abort_hard(true);
+  }
+  tasm_.set_builtin_index(builtin_index);
+}
+
+bool CodeGenerator::wasm_runtime_exception_support() const {
+  DCHECK_NOT_NULL(info_);
+  return info_->wasm_runtime_exception_support();
+}
+
+void CodeGenerator::AddProtectedInstructionLanding(uint32_t instr_offset,
+                                                   uint32_t landing_offset) {
+  protected_instructions_.push_back({instr_offset, landing_offset});
+}
+
+void CodeGenerator::CreateFrameAccessState(Frame* frame) {
+  FinishFrame(frame);
+  frame_access_state_ = zone()->New<FrameAccessState>(frame);
+}
+
+bool CodeGenerator::ShouldApplyOffsetToStackCheck(Instruction* instr,
+                                                  uint32_t* offset) {
+  DCHECK_EQ(instr->arch_opcode(), kArchStackPointerGreaterThan);
+
+  StackCheckKind kind =
+      static_cast<StackCheckKind>(MiscField::decode(instr->opcode()));
+  if (kind != StackCheckKind::kJSFunctionEntry) return false;
+
+  uint32_t stack_check_offset = *offset = GetStackCheckOffset();
+  return stack_check_offset > kStackLimitSlackForDeoptimizationInBytes;
+}
+
+uint32_t CodeGenerator::GetStackCheckOffset() {
+  if (!frame_access_state()->has_frame()) {
+    DCHECK_EQ(max_unoptimized_frame_height_, 0);
+    DCHECK_EQ(max_pushed_argument_count_, 0);
+    return 0;
+  }
+
+  int32_t optimized_frame_height =
+      frame()->GetTotalFrameSlotCount() * kSystemPointerSize;
+  DCHECK(is_int32(max_unoptimized_frame_height_));
+  int32_t signed_max_unoptimized_frame_height =
+      static_cast<int32_t>(max_unoptimized_frame_height_);
+
+  // The offset is either the delta between the optimized frames and the
+  // interpreted frame, or the maximal number of bytes pushed to the stack
+  // while preparing for function calls, whichever is bigger.
+  uint32_t frame_height_delta = static_cast<uint32_t>(std::max(
+      signed_max_unoptimized_frame_height - optimized_frame_height, 0));
+  uint32_t max_pushed_argument_bytes =
+      static_cast<uint32_t>(max_pushed_argument_count_ * kSystemPointerSize);
+  return std::max(frame_height_delta, max_pushed_argument_bytes);
+}
+
+CodeGenerator::CodeGenResult CodeGenerator::AssembleDeoptimizerCall(
+    DeoptimizationExit* exit) {
+  int deoptimization_id = exit->deoptimization_id();
+  if (deoptimization_id > Deoptimizer::kMaxNumberOfEntries) {
+    return kTooManyDeoptimizationBailouts;
+  }
+
+  DeoptimizeKind deopt_kind = exit->kind();
+
+  DeoptimizeReason deoptimization_reason = exit->reason();
+  Builtins::Name deopt_entry =
+      Deoptimizer::GetDeoptimizationEntry(tasm()->isolate(), deopt_kind);
+  Label* jump_deoptimization_entry_label =
+      &jump_deoptimization_entry_labels_[static_cast<int>(deopt_kind)];
+  if (info()->source_positions()) {
+    tasm()->RecordDeoptReason(deoptimization_reason, exit->pos(),
+                              deoptimization_id);
+  }
+
+  if (deopt_kind == DeoptimizeKind::kLazy) {
+    tasm()->BindExceptionHandler(exit->label());
+  } else {
+    ++non_lazy_deopt_count_;
+    tasm()->bind(exit->label());
+  }
+
+  tasm()->CallForDeoptimization(deopt_entry, deoptimization_id, exit->label(),
+                                deopt_kind, jump_deoptimization_entry_label);
+  exit->set_emitted();
+  return kSuccess;
+}
+
+void CodeGenerator::MaybeEmitOutOfLineConstantPool() {
+  tasm()->MaybeEmitOutOfLineConstantPool();
+}
+
+void CodeGenerator::AssembleCode() {
+  OptimizedCompilationInfo* info = this->info();
+
+  // Open a frame scope to indicate that there is a frame on the stack.  The
+  // MANUAL indicates that the scope shouldn't actually generate code to set up
+  // the frame (that is done in AssemblePrologue).
+  FrameScope frame_scope(tasm(), StackFrame::MANUAL);
+
+  if (info->source_positions()) {
+    AssembleSourcePosition(start_source_position());
+  }
+  offsets_info_.code_start_register_check = tasm()->pc_offset();
+
+  tasm()->CodeEntry();
+
+  // Check that {kJavaScriptCallCodeStartRegister} has been set correctly.
+  if (FLAG_debug_code && info->called_with_code_start_register()) {
+    tasm()->RecordComment("-- Prologue: check code start register --");
+    AssembleCodeStartRegisterCheck();
+  }
+
+  offsets_info_.deopt_check = tasm()->pc_offset();
+  // We want to bailout only from JS functions, which are the only ones
+  // that are optimized.
+  if (info->IsOptimizing()) {
+    DCHECK(linkage()->GetIncomingDescriptor()->IsJSFunctionCall());
+    tasm()->RecordComment("-- Prologue: check for deoptimization --");
+    BailoutIfDeoptimized();
+  }
+
+  offsets_info_.init_poison = tasm()->pc_offset();
+  InitializeSpeculationPoison();
+
+  // Define deoptimization literals for all inlined functions.
+  DCHECK_EQ(0u, deoptimization_literals_.size());
+  for (OptimizedCompilationInfo::InlinedFunctionHolder& inlined :
+       info->inlined_functions()) {
+    if (!inlined.shared_info.equals(info->shared_info())) {
+      int index = DefineDeoptimizationLiteral(
+          DeoptimizationLiteral(inlined.shared_info));
+      inlined.RegisterInlinedFunctionId(index);
+    }
+  }
+  inlined_function_count_ = deoptimization_literals_.size();
+
+  // Define deoptimization literals for all BytecodeArrays to which we might
+  // deopt to ensure they are strongly held by the optimized code.
+  if (info->has_bytecode_array()) {
+    DefineDeoptimizationLiteral(DeoptimizationLiteral(info->bytecode_array()));
+  }
+  for (OptimizedCompilationInfo::InlinedFunctionHolder& inlined :
+       info->inlined_functions()) {
+    DefineDeoptimizationLiteral(DeoptimizationLiteral(inlined.bytecode_array));
+  }
+
+  unwinding_info_writer_.SetNumberOfInstructionBlocks(
+      instructions()->InstructionBlockCount());
+
+  if (info->trace_turbo_json()) {
+    block_starts_.assign(instructions()->instruction_blocks().size(), -1);
+    instr_starts_.assign(instructions()->instructions().size(), {});
+  }
+  // Assemble instructions in assembly order.
+  offsets_info_.blocks_start = tasm()->pc_offset();
+  for (const InstructionBlock* block : instructions()->ao_blocks()) {
+    // Align loop headers on vendor recommended boundaries.
+    if (block->ShouldAlign() && !tasm()->jump_optimization_info()) {
+      tasm()->CodeTargetAlign();
+    }
+    if (info->trace_turbo_json()) {
+      block_starts_[block->rpo_number().ToInt()] = tasm()->pc_offset();
+    }
+    // Bind a label for a block.
+    current_block_ = block->rpo_number();
+    unwinding_info_writer_.BeginInstructionBlock(tasm()->pc_offset(), block);
+    if (FLAG_code_comments) {
+      std::ostringstream buffer;
+      buffer << "-- B" << block->rpo_number().ToInt() << " start";
+      if (block->IsDeferred()) buffer << " (deferred)";
+      if (!block->needs_frame()) buffer << " (no frame)";
+      if (block->must_construct_frame()) buffer << " (construct frame)";
+      if (block->must_deconstruct_frame()) buffer << " (deconstruct frame)";
+
+      if (block->IsLoopHeader()) {
+        buffer << " (loop up to " << block->loop_end().ToInt() << ")";
+      }
+      if (block->loop_header().IsValid()) {
+        buffer << " (in loop " << block->loop_header().ToInt() << ")";
+      }
+      buffer << " --";
+      tasm()->RecordComment(buffer.str().c_str());
+    }
+
+    frame_access_state()->MarkHasFrame(block->needs_frame());
+
+    tasm()->bind(GetLabel(current_block_));
+
+    TryInsertBranchPoisoning(block);
+
+    if (block->must_construct_frame()) {
+      AssembleConstructFrame();
+      // We need to setup the root register after we assemble the prologue, to
+      // avoid clobbering callee saved registers in case of C linkage and
+      // using the roots.
+      // TODO(mtrofin): investigate how we can avoid doing this repeatedly.
+      if (linkage()->GetIncomingDescriptor()->InitializeRootRegister()) {
+        tasm()->InitializeRootRegister();
+      }
+    }
+
+    if (FLAG_enable_embedded_constant_pool && !block->needs_frame()) {
+      ConstantPoolUnavailableScope constant_pool_unavailable(tasm());
+      result_ = AssembleBlock(block);
+    } else {
+      result_ = AssembleBlock(block);
+    }
+    if (result_ != kSuccess) return;
+    unwinding_info_writer_.EndInstructionBlock(block);
+  }
+
+  // Assemble all out-of-line code.
+  offsets_info_.out_of_line_code = tasm()->pc_offset();
+  if (ools_) {
+    tasm()->RecordComment("-- Out of line code --");
+    for (OutOfLineCode* ool = ools_; ool; ool = ool->next()) {
+      tasm()->bind(ool->entry());
+      ool->Generate();
+      if (ool->exit()->is_bound()) tasm()->jmp(ool->exit());
+    }
+  }
+
+  // This nop operation is needed to ensure that the trampoline is not
+  // confused with the pc of the call before deoptimization.
+  // The test regress/regress-259 is an example of where we need it.
+  tasm()->nop();
+
+  // For some targets, we must make sure that constant and veneer pools are
+  // emitted before emitting the deoptimization exits.
+  PrepareForDeoptimizationExits(&deoptimization_exits_);
+
+  if (Deoptimizer::kSupportsFixedDeoptExitSizes) {
+    deopt_exit_start_offset_ = tasm()->pc_offset();
+  }
+
+  // Assemble deoptimization exits.
+  offsets_info_.deoptimization_exits = tasm()->pc_offset();
+  int last_updated = 0;
+  // We sort the deoptimization exits here so that the lazy ones will
+  // be visited last. We need this as on architectures where
+  // Deoptimizer::kSupportsFixedDeoptExitSizes is true, lazy deopts
+  // might need additional instructions.
+  auto cmp = [](const DeoptimizationExit* a, const DeoptimizationExit* b) {
+    static_assert(DeoptimizeKind::kLazy == kLastDeoptimizeKind,
+                  "lazy deopts are expected to be emitted last");
+    if (a->kind() != b->kind()) {
+      return a->kind() < b->kind();
+    }
+    return a->pc_offset() < b->pc_offset();
+  };
+  if (Deoptimizer::kSupportsFixedDeoptExitSizes) {
+    std::sort(deoptimization_exits_.begin(), deoptimization_exits_.end(), cmp);
+  }
+
+  for (DeoptimizationExit* exit : deoptimization_exits_) {
+    if (exit->emitted()) continue;
+    if (Deoptimizer::kSupportsFixedDeoptExitSizes) {
+      exit->set_deoptimization_id(next_deoptimization_id_++);
+    }
+    result_ = AssembleDeoptimizerCall(exit);
+    if (result_ != kSuccess) return;
+
+    // UpdateDeoptimizationInfo expects lazy deopts to be visited in pc_offset
+    // order, which is always the case since they are added to
+    // deoptimization_exits_ in that order, and the optional sort operation
+    // above preserves that order.
+    if (exit->kind() == DeoptimizeKind::kLazy) {
+      int trampoline_pc = exit->label()->pos();
+      last_updated = safepoints()->UpdateDeoptimizationInfo(
+          exit->pc_offset(), trampoline_pc, last_updated,
+          exit->deoptimization_id());
+    }
+  }
+
+  offsets_info_.pools = tasm()->pc_offset();
+  // TODO(jgruber): Move all inlined metadata generation into a new,
+  // architecture-independent version of FinishCode. Currently, this includes
+  // the safepoint table, handler table, constant pool, and code comments, in
+  // that order.
+  FinishCode();
+
+  offsets_info_.jump_tables = tasm()->pc_offset();
+  // Emit the jump tables.
+  if (jump_tables_) {
+    tasm()->Align(kSystemPointerSize);
+    for (JumpTable* table = jump_tables_; table; table = table->next()) {
+      tasm()->bind(table->label());
+      AssembleJumpTable(table->targets(), table->target_count());
+    }
+  }
+
+  // The PerfJitLogger logs code up until here, excluding the safepoint
+  // table. Resolve the unwinding info now so it is aware of the same code
+  // size as reported by perf.
+  unwinding_info_writer_.Finish(tasm()->pc_offset());
+
+  // Final alignment before starting on the metadata section.
+  tasm()->Align(Code::kMetadataAlignment);
+
+  safepoints()->Emit(tasm(), frame()->GetTotalFrameSlotCount());
+
+  // Emit the exception handler table.
+  if (!handlers_.empty()) {
+    handler_table_offset_ = HandlerTable::EmitReturnTableStart(tasm());
+    for (size_t i = 0; i < handlers_.size(); ++i) {
+      HandlerTable::EmitReturnEntry(tasm(), handlers_[i].pc_offset,
+                                    handlers_[i].handler->pos());
+    }
+  }
+
+  tasm()->MaybeEmitOutOfLineConstantPool();
+  tasm()->FinalizeJumpOptimizationInfo();
+
+  result_ = kSuccess;
+}
+
+void CodeGenerator::TryInsertBranchPoisoning(const InstructionBlock* block) {
+  // See if our predecessor was a basic block terminated by a branch_and_poison
+  // instruction. If yes, then perform the masking based on the flags.
+  if (block->PredecessorCount() != 1) return;
+  RpoNumber pred_rpo = (block->predecessors())[0];
+  const InstructionBlock* pred = instructions()->InstructionBlockAt(pred_rpo);
+  if (pred->code_start() == pred->code_end()) return;
+  Instruction* instr = instructions()->InstructionAt(pred->code_end() - 1);
+  FlagsMode mode = FlagsModeField::decode(instr->opcode());
+  switch (mode) {
+    case kFlags_branch_and_poison: {
+      BranchInfo branch;
+      RpoNumber target = ComputeBranchInfo(&branch, instr);
+      if (!target.IsValid()) {
+        // Non-trivial branch, add the masking code.
+        FlagsCondition condition = branch.condition;
+        if (branch.false_label == GetLabel(block->rpo_number())) {
+          condition = NegateFlagsCondition(condition);
+        }
+        AssembleBranchPoisoning(condition, instr);
+      }
+      break;
+    }
+    case kFlags_deoptimize_and_poison: {
+      UNREACHABLE();
+    }
+    default:
+      break;
+  }
+}
+
+void CodeGenerator::AssembleArchBinarySearchSwitchRange(
+    Register input, RpoNumber def_block, std::pair<int32_t, Label*>* begin,
+    std::pair<int32_t, Label*>* end) {
+  if (end - begin < kBinarySearchSwitchMinimalCases) {
+    while (begin != end) {
+      tasm()->JumpIfEqual(input, begin->first, begin->second);
+      ++begin;
+    }
+    AssembleArchJump(def_block);
+    return;
+  }
+  auto middle = begin + (end - begin) / 2;
+  Label less_label;
+  tasm()->JumpIfLessThan(input, middle->first, &less_label);
+  AssembleArchBinarySearchSwitchRange(input, def_block, middle, end);
+  tasm()->bind(&less_label);
+  AssembleArchBinarySearchSwitchRange(input, def_block, begin, middle);
+}
+
+OwnedVector<byte> CodeGenerator::GetSourcePositionTable() {
+  return source_position_table_builder_.ToSourcePositionTableVector();
+}
+
+OwnedVector<byte> CodeGenerator::GetProtectedInstructionsData() {
+  return OwnedVector<byte>::Of(
+      Vector<byte>::cast(VectorOf(protected_instructions_)));
+}
+
+MaybeHandle<Code> CodeGenerator::FinalizeCode() {
+  if (result_ != kSuccess) {
+    tasm()->AbortedCodeGeneration();
+    return MaybeHandle<Code>();
+  }
+
+  // Allocate the source position table.
+  Handle<ByteArray> source_positions =
+      source_position_table_builder_.ToSourcePositionTable(isolate());
+
+  // Allocate deoptimization data.
+  Handle<DeoptimizationData> deopt_data = GenerateDeoptimizationData();
+
+  // Allocate and install the code.
+  CodeDesc desc;
+  tasm()->GetCode(isolate(), &desc, safepoints(), handler_table_offset_);
+
+#if defined(V8_OS_WIN64)
+  if (Builtins::IsBuiltinId(info_->builtin_index())) {
+    isolate_->SetBuiltinUnwindData(info_->builtin_index(),
+                                   tasm()->GetUnwindInfo());
+  }
+#endif  // V8_OS_WIN64
+
+  if (unwinding_info_writer_.eh_frame_writer()) {
+    unwinding_info_writer_.eh_frame_writer()->GetEhFrame(&desc);
+  }
+
+  MaybeHandle<Code> maybe_code =
+      Factory::CodeBuilder(isolate(), desc, info()->code_kind())
+          .set_builtin_index(info()->builtin_index())
+          .set_inlined_bytecode_size(info()->inlined_bytecode_size())
+          .set_source_position_table(source_positions)
+          .set_deoptimization_data(deopt_data)
+          .set_is_turbofanned()
+          .set_stack_slots(frame()->GetTotalFrameSlotCount())
+          .set_profiler_data(info()->profiler_data())
+          .TryBuild();
+
+  Handle<Code> code;
+  if (!maybe_code.ToHandle(&code)) {
+    tasm()->AbortedCodeGeneration();
+    return MaybeHandle<Code>();
+  }
+
+  // TODO(jgruber,v8:8888): Turn this into a DCHECK once confidence is
+  // high that the implementation is complete.
+  CHECK_IMPLIES(info()->IsNativeContextIndependent(),
+                code->IsNativeContextIndependent(isolate()));
+
+  // Counts both compiled code and metadata.
+  isolate()->counters()->total_compiled_code_size()->Increment(
+      code->raw_body_size());
+
+  LOG_CODE_EVENT(isolate(),
+                 CodeLinePosInfoRecordEvent(code->raw_instruction_start(),
+                                            *source_positions));
+
+  return code;
+}
+
+bool CodeGenerator::IsNextInAssemblyOrder(RpoNumber block) const {
+  return instructions()
+      ->InstructionBlockAt(current_block_)
+      ->ao_number()
+      .IsNext(instructions()->InstructionBlockAt(block)->ao_number());
+}
+
+void CodeGenerator::RecordSafepoint(ReferenceMap* references,
+                                    Safepoint::DeoptMode deopt_mode) {
+  Safepoint safepoint = safepoints()->DefineSafepoint(tasm(), deopt_mode);
+  int stackSlotToSpillSlotDelta =
+      frame()->GetTotalFrameSlotCount() - frame()->GetSpillSlotCount();
+  for (const InstructionOperand& operand : references->reference_operands()) {
+    if (operand.IsStackSlot()) {
+      int index = LocationOperand::cast(operand).index();
+      DCHECK_LE(0, index);
+      // We might index values in the fixed part of the frame (i.e. the
+      // closure pointer or the context pointer); these are not spill slots
+      // and therefore don't work with the SafepointTable currently, but
+      // we also don't need to worry about them, since the GC has special
+      // knowledge about those fields anyway.
+      if (index < stackSlotToSpillSlotDelta) continue;
+      safepoint.DefinePointerSlot(index);
+    }
+  }
+}
+
+bool CodeGenerator::IsMaterializableFromRoot(Handle<HeapObject> object,
+                                             RootIndex* index_return) {
+  const CallDescriptor* incoming_descriptor =
+      linkage()->GetIncomingDescriptor();
+  if (incoming_descriptor->flags() & CallDescriptor::kCanUseRoots) {
+    return isolate()->roots_table().IsRootHandle(object, index_return) &&
+           RootsTable::IsImmortalImmovable(*index_return);
+  }
+  return false;
+}
+
+CodeGenerator::CodeGenResult CodeGenerator::AssembleBlock(
+    const InstructionBlock* block) {
+  if (block->IsHandler()) {
+    tasm()->ExceptionHandler();
+  }
+  for (int i = block->code_start(); i < block->code_end(); ++i) {
+    CodeGenResult result = AssembleInstruction(i, block);
+    if (result != kSuccess) return result;
+  }
+  return kSuccess;
+}
+
+bool CodeGenerator::IsValidPush(InstructionOperand source,
+                                CodeGenerator::PushTypeFlags push_type) {
+  if (source.IsImmediate() &&
+      ((push_type & CodeGenerator::kImmediatePush) != 0)) {
+    return true;
+  }
+  if (source.IsRegister() &&
+      ((push_type & CodeGenerator::kRegisterPush) != 0)) {
+    return true;
+  }
+  if (source.IsStackSlot() &&
+      ((push_type & CodeGenerator::kStackSlotPush) != 0)) {
+    return true;
+  }
+  return false;
+}
+
+void CodeGenerator::GetPushCompatibleMoves(Instruction* instr,
+                                           PushTypeFlags push_type,
+                                           ZoneVector<MoveOperands*>* pushes) {
+  static constexpr int first_push_compatible_index =
+      kReturnAddressStackSlotCount;
+  pushes->clear();
+  for (int i = Instruction::FIRST_GAP_POSITION;
+       i <= Instruction::LAST_GAP_POSITION; ++i) {
+    Instruction::GapPosition inner_pos =
+        static_cast<Instruction::GapPosition>(i);
+    ParallelMove* parallel_move = instr->GetParallelMove(inner_pos);
+    if (parallel_move != nullptr) {
+      for (auto move : *parallel_move) {
+        InstructionOperand source = move->source();
+        InstructionOperand destination = move->destination();
+        // If there are any moves from slots that will be overridden by pushes,
+        // then the full gap resolver must be used since optimization with
+        // pushes don't participate in the parallel move and might clobber
+        // values needed for the gap resolve.
+        if (source.IsAnyStackSlot() && LocationOperand::cast(source).index() >=
+                                           first_push_compatible_index) {
+          pushes->clear();
+          return;
+        }
+        // TODO(danno): Right now, only consider moves from the FIRST gap for
+        // pushes. Theoretically, we could extract pushes for both gaps (there
+        // are cases where this happens), but the logic for that would also have
+        // to check to make sure that non-memory inputs to the pushes from the
+        // LAST gap don't get clobbered in the FIRST gap.
+        if (i == Instruction::FIRST_GAP_POSITION) {
+          if (destination.IsStackSlot() &&
+              LocationOperand::cast(destination).index() >=
+                  first_push_compatible_index) {
+            int index = LocationOperand::cast(destination).index();
+            if (IsValidPush(source, push_type)) {
+              if (index >= static_cast<int>(pushes->size())) {
+                pushes->resize(index + 1);
+              }
+              (*pushes)[index] = move;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // For now, only support a set of continuous pushes at the end of the list.
+  size_t push_count_upper_bound = pushes->size();
+  size_t push_begin = push_count_upper_bound;
+  for (auto move : base::Reversed(*pushes)) {
+    if (move == nullptr) break;
+    push_begin--;
+  }
+  size_t push_count = pushes->size() - push_begin;
+  std::copy(pushes->begin() + push_begin,
+            pushes->begin() + push_begin + push_count, pushes->begin());
+  pushes->resize(push_count);
+}
+
+CodeGenerator::MoveType::Type CodeGenerator::MoveType::InferMove(
+    InstructionOperand* source, InstructionOperand* destination) {
+  if (source->IsConstant()) {
+    if (destination->IsAnyRegister()) {
+      return MoveType::kConstantToRegister;
+    } else {
+      DCHECK(destination->IsAnyStackSlot());
+      return MoveType::kConstantToStack;
+    }
+  }
+  DCHECK(LocationOperand::cast(source)->IsCompatible(
+      LocationOperand::cast(destination)));
+  if (source->IsAnyRegister()) {
+    if (destination->IsAnyRegister()) {
+      return MoveType::kRegisterToRegister;
+    } else {
+      DCHECK(destination->IsAnyStackSlot());
+      return MoveType::kRegisterToStack;
+    }
+  } else {
+    DCHECK(source->IsAnyStackSlot());
+    if (destination->IsAnyRegister()) {
+      return MoveType::kStackToRegister;
+    } else {
+      DCHECK(destination->IsAnyStackSlot());
+      return MoveType::kStackToStack;
+    }
+  }
+}
+
+CodeGenerator::MoveType::Type CodeGenerator::MoveType::InferSwap(
+    InstructionOperand* source, InstructionOperand* destination) {
+  DCHECK(LocationOperand::cast(source)->IsCompatible(
+      LocationOperand::cast(destination)));
+  if (source->IsAnyRegister()) {
+    if (destination->IsAnyRegister()) {
+      return MoveType::kRegisterToRegister;
+    } else {
+      DCHECK(destination->IsAnyStackSlot());
+      return MoveType::kRegisterToStack;
+    }
+  } else {
+    DCHECK(source->IsAnyStackSlot());
+    DCHECK(destination->IsAnyStackSlot());
+    return MoveType::kStackToStack;
+  }
+}
+
+RpoNumber CodeGenerator::ComputeBranchInfo(BranchInfo* branch,
+                                           Instruction* instr) {
+  // Assemble a branch after this instruction.
+  InstructionOperandConverter i(this, instr);
+  RpoNumber true_rpo = i.InputRpo(instr->InputCount() - 2);
+  RpoNumber false_rpo = i.InputRpo(instr->InputCount() - 1);
+
+  if (true_rpo == false_rpo) {
+    return true_rpo;
+  }
+  FlagsCondition condition = FlagsConditionField::decode(instr->opcode());
+  if (IsNextInAssemblyOrder(true_rpo)) {
+    // true block is next, can fall through if condition negated.
+    std::swap(true_rpo, false_rpo);
+    condition = NegateFlagsCondition(condition);
+  }
+  branch->condition = condition;
+  branch->true_label = GetLabel(true_rpo);
+  branch->false_label = GetLabel(false_rpo);
+  branch->fallthru = IsNextInAssemblyOrder(false_rpo);
+  return RpoNumber::Invalid();
+}
+
+CodeGenerator::CodeGenResult CodeGenerator::AssembleInstruction(
+    int instruction_index, const InstructionBlock* block) {
+  Instruction* instr = instructions()->InstructionAt(instruction_index);
+  if (info()->trace_turbo_json()) {
+    instr_starts_[instruction_index].gap_pc_offset = tasm()->pc_offset();
+  }
+  int first_unused_stack_slot;
+  FlagsMode mode = FlagsModeField::decode(instr->opcode());
+  if (mode != kFlags_trap) {
+    AssembleSourcePosition(instr);
+  }
+  bool adjust_stack =
+      GetSlotAboveSPBeforeTailCall(instr, &first_unused_stack_slot);
+  if (adjust_stack) AssembleTailCallBeforeGap(instr, first_unused_stack_slot);
+  AssembleGaps(instr);
+  if (adjust_stack) AssembleTailCallAfterGap(instr, first_unused_stack_slot);
+  DCHECK_IMPLIES(
+      block->must_deconstruct_frame(),
+      instr != instructions()->InstructionAt(block->last_instruction_index()) ||
+          instr->IsRet() || instr->IsJump());
+  if (instr->IsJump() && block->must_deconstruct_frame()) {
+    AssembleDeconstructFrame();
+  }
+  if (info()->trace_turbo_json()) {
+    instr_starts_[instruction_index].arch_instr_pc_offset = tasm()->pc_offset();
+  }
+  // Assemble architecture-specific code for the instruction.
+  CodeGenResult result = AssembleArchInstruction(instr);
+  if (result != kSuccess) return result;
+
+  if (info()->trace_turbo_json()) {
+    instr_starts_[instruction_index].condition_pc_offset = tasm()->pc_offset();
+  }
+
+  FlagsCondition condition = FlagsConditionField::decode(instr->opcode());
+  switch (mode) {
+    case kFlags_branch:
+    case kFlags_branch_and_poison: {
+      BranchInfo branch;
+      RpoNumber target = ComputeBranchInfo(&branch, instr);
+      if (target.IsValid()) {
+        // redundant branch.
+        if (!IsNextInAssemblyOrder(target)) {
+          AssembleArchJump(target);
+        }
+        return kSuccess;
+      }
+      // Assemble architecture-specific branch.
+      AssembleArchBranch(instr, &branch);
+      break;
+    }
+    case kFlags_deoptimize:
+    case kFlags_deoptimize_and_poison: {
+      // Assemble a conditional eager deoptimization after this instruction.
+      InstructionOperandConverter i(this, instr);
+      size_t frame_state_offset = MiscField::decode(instr->opcode());
+      DeoptimizationExit* const exit =
+          AddDeoptimizationExit(instr, frame_state_offset);
+      Label continue_label;
+      BranchInfo branch;
+      branch.condition = condition;
+      branch.true_label = exit->label();
+      branch.false_label = &continue_label;
+      branch.fallthru = true;
+      // Assemble architecture-specific branch.
+      AssembleArchDeoptBranch(instr, &branch);
+      tasm()->bind(&continue_label);
+      if (mode == kFlags_deoptimize_and_poison) {
+        AssembleBranchPoisoning(NegateFlagsCondition(branch.condition), instr);
+      }
+      break;
+    }
+    case kFlags_set: {
+      // Assemble a boolean materialization after this instruction.
+      AssembleArchBoolean(instr, condition);
+      break;
+    }
+    case kFlags_trap: {
+      AssembleArchTrap(instr, condition);
+      break;
+    }
+    case kFlags_none: {
+      break;
+    }
+  }
+
+  // TODO(jarin) We should thread the flag through rather than set it.
+  if (instr->IsCall()) {
+    ResetSpeculationPoison();
+  }
+
+  return kSuccess;
+}
+
+void CodeGenerator::AssembleSourcePosition(Instruction* instr) {
+  SourcePosition source_position = SourcePosition::Unknown();
+  if (instr->IsNop() && instr->AreMovesRedundant()) return;
+  if (!instructions()->GetSourcePosition(instr, &source_position)) return;
+  AssembleSourcePosition(source_position);
+}
+
+void CodeGenerator::AssembleSourcePosition(SourcePosition source_position) {
+  if (source_position == current_source_position_) return;
+  current_source_position_ = source_position;
+  if (!source_position.IsKnown()) return;
+  source_position_table_builder_.AddPosition(tasm()->pc_offset(),
+                                             source_position, false);
+  if (FLAG_code_comments) {
+    OptimizedCompilationInfo* info = this->info();
+    if (!info->IsOptimizing() && !info->IsWasm()) return;
+    std::ostringstream buffer;
+    buffer << "-- ";
+    // Turbolizer only needs the source position, as it can reconstruct
+    // the inlining stack from other information.
+    if (info->trace_turbo_json() || !tasm()->isolate() ||
+        tasm()->isolate()->concurrent_recompilation_enabled()) {
+      buffer << source_position;
+    } else {
+      AllowHeapAllocation allocation;
+      AllowHandleAllocation handles;
+      AllowHandleDereference deref;
+      buffer << source_position.InliningStack(info);
+    }
+    buffer << " --";
+    tasm()->RecordComment(buffer.str().c_str());
+  }
+}
+
+bool CodeGenerator::GetSlotAboveSPBeforeTailCall(Instruction* instr,
+                                                 int* slot) {
+  if (instr->IsTailCall()) {
+    InstructionOperandConverter g(this, instr);
+    *slot = g.InputInt32(instr->InputCount() - 1);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+StubCallMode CodeGenerator::DetermineStubCallMode() const {
+  CodeKind code_kind = info()->code_kind();
+  return (code_kind == CodeKind::WASM_FUNCTION ||
+          code_kind == CodeKind::WASM_TO_CAPI_FUNCTION ||
+          code_kind == CodeKind::WASM_TO_JS_FUNCTION)
+             ? StubCallMode::kCallWasmRuntimeStub
+             : StubCallMode::kCallCodeObject;
+}
+
+void CodeGenerator::AssembleGaps(Instruction* instr) {
+  for (int i = Instruction::FIRST_GAP_POSITION;
+       i <= Instruction::LAST_GAP_POSITION; i++) {
+    Instruction::GapPosition inner_pos =
+        static_cast<Instruction::GapPosition>(i);
+    ParallelMove* move = instr->GetParallelMove(inner_pos);
+    if (move != nullptr) resolver()->Resolve(move);
+  }
+}
+
+namespace {
+
+Handle<PodArray<InliningPosition>> CreateInliningPositions(
+    OptimizedCompilationInfo* info, Isolate* isolate) {
+  const OptimizedCompilationInfo::InlinedFunctionList& inlined_functions =
+      info->inlined_functions();
+  if (inlined_functions.size() == 0) {
+    return Handle<PodArray<InliningPosition>>::cast(
+        isolate->factory()->empty_byte_array());
+  }
+  Handle<PodArray<InliningPosition>> inl_positions =
+      PodArray<InliningPosition>::New(
+          isolate, static_cast<int>(inlined_functions.size()),
+          AllocationType::kOld);
+  for (size_t i = 0; i < inlined_functions.size(); ++i) {
+    inl_positions->set(static_cast<int>(i), inlined_functions[i].position);
+  }
+  return inl_positions;
+}
+
+}  // namespace
+
+Handle<DeoptimizationData> CodeGenerator::GenerateDeoptimizationData() {
+  OptimizedCompilationInfo* info = this->info();
+  int deopt_count = static_cast<int>(deoptimization_exits_.size());
+  if (deopt_count == 0 && !info->is_osr()) {
+    return DeoptimizationData::Empty(isolate());
+  }
+  Handle<DeoptimizationData> data =
+      DeoptimizationData::New(isolate(), deopt_count, AllocationType::kOld);
+
+  Handle<ByteArray> translation_array =
+      translations_.CreateByteArray(isolate()->factory());
+
+  data->SetTranslationByteArray(*translation_array);
+  data->SetInlinedFunctionCount(
+      Smi::FromInt(static_cast<int>(inlined_function_count_)));
+  data->SetOptimizationId(Smi::FromInt(info->optimization_id()));
+
+  data->SetDeoptExitStart(Smi::FromInt(deopt_exit_start_offset_));
+  data->SetNonLazyDeoptCount(Smi::FromInt(non_lazy_deopt_count_));
+
+  if (info->has_shared_info()) {
+    data->SetSharedFunctionInfo(*info->shared_info());
+  } else {
+    data->SetSharedFunctionInfo(Smi::zero());
+  }
+
+  Handle<FixedArray> literals = isolate()->factory()->NewFixedArray(
+      static_cast<int>(deoptimization_literals_.size()), AllocationType::kOld);
+  for (unsigned i = 0; i < deoptimization_literals_.size(); i++) {
+    Handle<Object> object = deoptimization_literals_[i].Reify(isolate());
+    CHECK(!object.is_null());
+    literals->set(i, *object);
+  }
+  data->SetLiteralArray(*literals);
+
+  Handle<PodArray<InliningPosition>> inl_pos =
+      CreateInliningPositions(info, isolate());
+  data->SetInliningPositions(*inl_pos);
+
+  if (info->is_osr()) {
+    DCHECK_LE(0, osr_pc_offset_);
+    data->SetOsrBytecodeOffset(Smi::FromInt(info_->osr_offset().ToInt()));
+    data->SetOsrPcOffset(Smi::FromInt(osr_pc_offset_));
+  } else {
+    BailoutId osr_offset = BailoutId::None();
+    data->SetOsrBytecodeOffset(Smi::FromInt(osr_offset.ToInt()));
+    data->SetOsrPcOffset(Smi::FromInt(-1));
+  }
+
+  // Populate deoptimization entries.
+  for (int i = 0; i < deopt_count; i++) {
+    DeoptimizationExit* deoptimization_exit = deoptimization_exits_[i];
+    CHECK_NOT_NULL(deoptimization_exit);
+    DCHECK_EQ(i, deoptimization_exit->deoptimization_id());
+    data->SetBytecodeOffset(i, deoptimization_exit->bailout_id());
+    data->SetTranslationIndex(
+        i, Smi::FromInt(deoptimization_exit->translation_id()));
+    data->SetPc(i, Smi::FromInt(deoptimization_exit->pc_offset()));
+  }
+
+  return data;
+}
+
+Label* CodeGenerator::AddJumpTable(Label** targets, size_t target_count) {
+  jump_tables_ = zone()->New<JumpTable>(jump_tables_, targets, target_count);
+  return jump_tables_->label();
+}
+
+void CodeGenerator::RecordCallPosition(Instruction* instr) {
+  const bool needs_frame_state =
+      instr->HasCallDescriptorFlag(CallDescriptor::kNeedsFrameState);
+  RecordSafepoint(instr->reference_map(), needs_frame_state
+                                              ? Safepoint::kLazyDeopt
+                                              : Safepoint::kNoLazyDeopt);
+
+  if (instr->HasCallDescriptorFlag(CallDescriptor::kHasExceptionHandler)) {
+    InstructionOperandConverter i(this, instr);
+    RpoNumber handler_rpo = i.InputRpo(instr->InputCount() - 1);
+    DCHECK(instructions()->InstructionBlockAt(handler_rpo)->IsHandler());
+    handlers_.push_back(
+        {GetLabel(handler_rpo), tasm()->pc_offset_for_safepoint()});
+  }
+
+  if (needs_frame_state) {
+    MarkLazyDeoptSite();
+    // If the frame state is present, it starts at argument 2 - after
+    // the code address and the poison-alias index.
+    size_t frame_state_offset = 2;
+    FrameStateDescriptor* descriptor =
+        GetDeoptimizationEntry(instr, frame_state_offset).descriptor();
+    int pc_offset = tasm()->pc_offset_for_safepoint();
+    BuildTranslation(instr, pc_offset, frame_state_offset,
+                     descriptor->state_combine());
+  }
+}
+
+int CodeGenerator::DefineDeoptimizationLiteral(DeoptimizationLiteral literal) {
+  literal.Validate();
+  int result = static_cast<int>(deoptimization_literals_.size());
+  for (unsigned i = 0; i < deoptimization_literals_.size(); ++i) {
+    deoptimization_literals_[i].Validate();
+    if (deoptimization_literals_[i] == literal) return i;
+  }
+  deoptimization_literals_.push_back(literal);
+  return result;
+}
+
+DeoptimizationEntry const& CodeGenerator::GetDeoptimizationEntry(
+    Instruction* instr, size_t frame_state_offset) {
+  InstructionOperandConverter i(this, instr);
+  int const state_id = i.InputInt32(frame_state_offset);
+  return instructions()->GetDeoptimizationEntry(state_id);
+}
+
+void CodeGenerator::TranslateStateValueDescriptor(
+    StateValueDescriptor* desc, StateValueList* nested,
+    Translation* translation, InstructionOperandIterator* iter) {
+  // Note:
+  // If translation is null, we just skip the relevant instruction operands.
+  if (desc->IsNested()) {
+    if (translation != nullptr) {
+      translation->BeginCapturedObject(static_cast<int>(nested->size()));
+    }
+    for (auto field : *nested) {
+      TranslateStateValueDescriptor(field.desc, field.nested, translation,
+                                    iter);
+    }
+  } else if (desc->IsArgumentsElements()) {
+    if (translation != nullptr) {
+      translation->ArgumentsElements(desc->arguments_type());
+    }
+  } else if (desc->IsArgumentsLength()) {
+    if (translation != nullptr) {
+      translation->ArgumentsLength();
+    }
+  } else if (desc->IsDuplicate()) {
+    if (translation != nullptr) {
+      translation->DuplicateObject(static_cast<int>(desc->id()));
+    }
+  } else if (desc->IsPlain()) {
+    InstructionOperand* op = iter->Advance();
+    if (translation != nullptr) {
+      AddTranslationForOperand(translation, iter->instruction(), op,
+                               desc->type());
+    }
+  } else {
+    DCHECK(desc->IsOptimizedOut());
+    if (translation != nullptr) {
+      if (optimized_out_literal_id_ == -1) {
+        optimized_out_literal_id_ = DefineDeoptimizationLiteral(
+            DeoptimizationLiteral(isolate()->factory()->optimized_out()));
+      }
+      translation->StoreLiteral(optimized_out_literal_id_);
+    }
+  }
+}
+
+void CodeGenerator::TranslateFrameStateDescriptorOperands(
+    FrameStateDescriptor* desc, InstructionOperandIterator* iter,
+    Translation* translation) {
+  size_t index = 0;
+  StateValueList* values = desc->GetStateValueDescriptors();
+  for (StateValueList::iterator it = values->begin(); it != values->end();
+       ++it, ++index) {
+    TranslateStateValueDescriptor((*it).desc, (*it).nested, translation, iter);
+  }
+  DCHECK_EQ(desc->GetSize(), index);
+}
+
+void CodeGenerator::BuildTranslationForFrameStateDescriptor(
+    FrameStateDescriptor* descriptor, InstructionOperandIterator* iter,
+    Translation* translation, OutputFrameStateCombine state_combine) {
+  // Outer-most state must be added to translation first.
+  if (descriptor->outer_state() != nullptr) {
+    BuildTranslationForFrameStateDescriptor(descriptor->outer_state(), iter,
+                                            translation, state_combine);
+  }
+
+  Handle<SharedFunctionInfo> shared_info;
+  if (!descriptor->shared_info().ToHandle(&shared_info)) {
+    if (!info()->has_shared_info()) {
+      return;  // Stub with no SharedFunctionInfo.
+    }
+    shared_info = info()->shared_info();
+  }
+
+  const BailoutId bailout_id = descriptor->bailout_id();
+  const int shared_info_id =
+      DefineDeoptimizationLiteral(DeoptimizationLiteral(shared_info));
+  const unsigned int height =
+      static_cast<unsigned int>(descriptor->GetHeight());
+
+  switch (descriptor->type()) {
+    case FrameStateType::kInterpretedFunction: {
+      int return_offset = 0;
+      int return_count = 0;
+      if (!state_combine.IsOutputIgnored()) {
+        return_offset = static_cast<int>(state_combine.GetOffsetToPokeAt());
+        return_count = static_cast<int>(iter->instruction()->OutputCount());
+      }
+      translation->BeginInterpretedFrame(bailout_id, shared_info_id, height,
+                                         return_offset, return_count);
+      break;
+    }
+    case FrameStateType::kArgumentsAdaptor:
+      translation->BeginArgumentsAdaptorFrame(shared_info_id, height);
+      break;
+    case FrameStateType::kConstructStub:
+      DCHECK(bailout_id.IsValidForConstructStub());
+      translation->BeginConstructStubFrame(bailout_id, shared_info_id, height);
+      break;
+    case FrameStateType::kBuiltinContinuation: {
+      translation->BeginBuiltinContinuationFrame(bailout_id, shared_info_id,
+                                                 height);
+      break;
+    }
+    case FrameStateType::kJavaScriptBuiltinContinuation: {
+      translation->BeginJavaScriptBuiltinContinuationFrame(
+          bailout_id, shared_info_id, height);
+      break;
+    }
+    case FrameStateType::kJavaScriptBuiltinContinuationWithCatch: {
+      translation->BeginJavaScriptBuiltinContinuationWithCatchFrame(
+          bailout_id, shared_info_id, height);
+      break;
+    }
+  }
+
+  TranslateFrameStateDescriptorOperands(descriptor, iter, translation);
+}
+
+DeoptimizationExit* CodeGenerator::BuildTranslation(
+    Instruction* instr, int pc_offset, size_t frame_state_offset,
+    OutputFrameStateCombine state_combine) {
+  DeoptimizationEntry const& entry =
+      GetDeoptimizationEntry(instr, frame_state_offset);
+  FrameStateDescriptor* const descriptor = entry.descriptor();
+  frame_state_offset++;
+
+  int update_feedback_count = entry.feedback().IsValid() ? 1 : 0;
+  Translation translation(&translations_,
+                          static_cast<int>(descriptor->GetFrameCount()),
+                          static_cast<int>(descriptor->GetJSFrameCount()),
+                          update_feedback_count, zone());
+  if (entry.feedback().IsValid()) {
+    DeoptimizationLiteral literal =
+        DeoptimizationLiteral(entry.feedback().vector);
+    int literal_id = DefineDeoptimizationLiteral(literal);
+    translation.AddUpdateFeedback(literal_id, entry.feedback().slot.ToInt());
+  }
+  InstructionOperandIterator iter(instr, frame_state_offset);
+  BuildTranslationForFrameStateDescriptor(descriptor, &iter, &translation,
+                                          state_combine);
+
+  DeoptimizationExit* const exit = zone()->New<DeoptimizationExit>(
+      current_source_position_, descriptor->bailout_id(), translation.index(),
+      pc_offset, entry.kind(), entry.reason());
+
+  if (!Deoptimizer::kSupportsFixedDeoptExitSizes) {
+    exit->set_deoptimization_id(next_deoptimization_id_++);
+  }
+
+  deoptimization_exits_.push_back(exit);
+  return exit;
+}
+
+void CodeGenerator::AddTranslationForOperand(Translation* translation,
+                                             Instruction* instr,
+                                             InstructionOperand* op,
+                                             MachineType type) {
+  if (op->IsStackSlot()) {
+    if (type.representation() == MachineRepresentation::kBit) {
+      translation->StoreBoolStackSlot(LocationOperand::cast(op)->index());
+    } else if (type == MachineType::Int8() || type == MachineType::Int16() ||
+               type == MachineType::Int32()) {
+      translation->StoreInt32StackSlot(LocationOperand::cast(op)->index());
+    } else if (type == MachineType::Uint8() || type == MachineType::Uint16() ||
+               type == MachineType::Uint32()) {
+      translation->StoreUint32StackSlot(LocationOperand::cast(op)->index());
+    } else if (type == MachineType::Int64()) {
+      translation->StoreInt64StackSlot(LocationOperand::cast(op)->index());
+    } else {
+#if defined(V8_COMPRESS_POINTERS)
+      CHECK(MachineRepresentation::kTagged == type.representation() ||
+            MachineRepresentation::kCompressed == type.representation());
+#else
+      CHECK(MachineRepresentation::kTagged == type.representation());
+#endif
+      translation->StoreStackSlot(LocationOperand::cast(op)->index());
+    }
+  } else if (op->IsFPStackSlot()) {
+    if (type.representation() == MachineRepresentation::kFloat64) {
+      translation->StoreDoubleStackSlot(LocationOperand::cast(op)->index());
+    } else {
+      CHECK_EQ(MachineRepresentation::kFloat32, type.representation());
+      translation->StoreFloatStackSlot(LocationOperand::cast(op)->index());
+    }
+  } else if (op->IsRegister()) {
+    InstructionOperandConverter converter(this, instr);
+    if (type.representation() == MachineRepresentation::kBit) {
+      translation->StoreBoolRegister(converter.ToRegister(op));
+    } else if (type == MachineType::Int8() || type == MachineType::Int16() ||
+               type == MachineType::Int32()) {
+      translation->StoreInt32Register(converter.ToRegister(op));
+    } else if (type == MachineType::Uint8() || type == MachineType::Uint16() ||
+               type == MachineType::Uint32()) {
+      translation->StoreUint32Register(converter.ToRegister(op));
+    } else if (type == MachineType::Int64()) {
+      translation->StoreInt64Register(converter.ToRegister(op));
+    } else {
+#if defined(V8_COMPRESS_POINTERS)
+      CHECK(MachineRepresentation::kTagged == type.representation() ||
+            MachineRepresentation::kCompressed == type.representation());
+#else
+      CHECK(MachineRepresentation::kTagged == type.representation());
+#endif
+      translation->StoreRegister(converter.ToRegister(op));
+    }
+  } else if (op->IsFPRegister()) {
+    InstructionOperandConverter converter(this, instr);
+    if (type.representation() == MachineRepresentation::kFloat64) {
+      translation->StoreDoubleRegister(converter.ToDoubleRegister(op));
+    } else {
+      CHECK_EQ(MachineRepresentation::kFloat32, type.representation());
+      translation->StoreFloatRegister(converter.ToFloatRegister(op));
+    }
+  } else {
+    CHECK(op->IsImmediate());
+    InstructionOperandConverter converter(this, instr);
+    Constant constant = converter.ToConstant(op);
+    DeoptimizationLiteral literal;
+    switch (constant.type()) {
+      case Constant::kInt32:
+        if (type.representation() == MachineRepresentation::kTagged) {
+          // When pointers are 4 bytes, we can use int32 constants to represent
+          // Smis.
+          DCHECK_EQ(4, kSystemPointerSize);
+          Smi smi(static_cast<Address>(constant.ToInt32()));
+          DCHECK(smi.IsSmi());
+          literal = DeoptimizationLiteral(smi.value());
+        } else if (type.representation() == MachineRepresentation::kBit) {
+          if (constant.ToInt32() == 0) {
+            literal =
+                DeoptimizationLiteral(isolate()->factory()->false_value());
+          } else {
+            DCHECK_EQ(1, constant.ToInt32());
+            literal = DeoptimizationLiteral(isolate()->factory()->true_value());
+          }
+        } else {
+          DCHECK(type == MachineType::Int32() ||
+                 type == MachineType::Uint32() ||
+                 type.representation() == MachineRepresentation::kWord32 ||
+                 type.representation() == MachineRepresentation::kNone);
+          DCHECK(type.representation() != MachineRepresentation::kNone ||
+                 constant.ToInt32() == FrameStateDescriptor::kImpossibleValue);
+          if (type == MachineType::Uint32()) {
+            literal = DeoptimizationLiteral(
+                static_cast<uint32_t>(constant.ToInt32()));
+          } else {
+            literal = DeoptimizationLiteral(constant.ToInt32());
+          }
+        }
+        break;
+      case Constant::kInt64:
+        DCHECK_EQ(8, kSystemPointerSize);
+        if (type.representation() == MachineRepresentation::kWord64) {
+          literal =
+              DeoptimizationLiteral(static_cast<double>(constant.ToInt64()));
+        } else {
+          // When pointers are 8 bytes, we can use int64 constants to represent
+          // Smis.
+          DCHECK_EQ(MachineRepresentation::kTagged, type.representation());
+          Smi smi(static_cast<Address>(constant.ToInt64()));
+          DCHECK(smi.IsSmi());
+          literal = DeoptimizationLiteral(smi.value());
+        }
+        break;
+      case Constant::kFloat32:
+        DCHECK(type.representation() == MachineRepresentation::kFloat32 ||
+               type.representation() == MachineRepresentation::kTagged);
+        literal = DeoptimizationLiteral(constant.ToFloat32());
+        break;
+      case Constant::kFloat64:
+        DCHECK(type.representation() == MachineRepresentation::kFloat64 ||
+               type.representation() == MachineRepresentation::kTagged);
+        literal = DeoptimizationLiteral(constant.ToFloat64().value());
+        break;
+      case Constant::kHeapObject:
+        DCHECK_EQ(MachineRepresentation::kTagged, type.representation());
+        literal = DeoptimizationLiteral(constant.ToHeapObject());
+        break;
+      case Constant::kCompressedHeapObject:
+        DCHECK_EQ(MachineType::AnyTagged(), type);
+        literal = DeoptimizationLiteral(constant.ToHeapObject());
+        break;
+      case Constant::kDelayedStringConstant:
+        DCHECK_EQ(MachineRepresentation::kTagged, type.representation());
+        literal = DeoptimizationLiteral(constant.ToDelayedStringConstant());
+        break;
+      default:
+        UNREACHABLE();
+    }
+    if (literal.object().equals(info()->closure())) {
+      translation->StoreJSFrameFunction();
+    } else {
+      int literal_id = DefineDeoptimizationLiteral(literal);
+      translation->StoreLiteral(literal_id);
+    }
+  }
+}
+
+void CodeGenerator::MarkLazyDeoptSite() {
+  last_lazy_deopt_pc_ = tasm()->pc_offset();
+}
+
+DeoptimizationExit* CodeGenerator::AddDeoptimizationExit(
+    Instruction* instr, size_t frame_state_offset) {
+  return BuildTranslation(instr, -1, frame_state_offset,
+                          OutputFrameStateCombine::Ignore());
+}
+
+void CodeGenerator::InitializeSpeculationPoison() {
+  if (poisoning_level_ == PoisoningMitigationLevel::kDontPoison) return;
+
+  // Initialize {kSpeculationPoisonRegister} either by comparing the expected
+  // with the actual call target, or by unconditionally using {-1} initially.
+  // Masking register arguments with it only makes sense in the first case.
+  if (info()->called_with_code_start_register()) {
+    tasm()->RecordComment("-- Prologue: generate speculation poison --");
+    GenerateSpeculationPoisonFromCodeStartRegister();
+    if (info()->poison_register_arguments()) {
+      AssembleRegisterArgumentPoisoning();
+    }
+  } else {
+    ResetSpeculationPoison();
+  }
+}
+
+void CodeGenerator::ResetSpeculationPoison() {
+  if (poisoning_level_ != PoisoningMitigationLevel::kDontPoison) {
+    tasm()->ResetSpeculationPoisonRegister();
+  }
+}
+
+OutOfLineCode::OutOfLineCode(CodeGenerator* gen)
+    : frame_(gen->frame()), tasm_(gen->tasm()), next_(gen->ools_) {
+  gen->ools_ = this;
+}
+
+OutOfLineCode::~OutOfLineCode() = default;
+
+Handle<Object> DeoptimizationLiteral::Reify(Isolate* isolate) const {
+  Validate();
+  switch (kind_) {
+    case DeoptimizationLiteralKind::kObject: {
+      return object_;
+    }
+    case DeoptimizationLiteralKind::kNumber: {
+      return isolate->factory()->NewNumber(number_);
+    }
+    case DeoptimizationLiteralKind::kString: {
+      return string_->AllocateStringConstant(isolate);
+    }
+    case DeoptimizationLiteralKind::kInvalid: {
+      UNREACHABLE();
+    }
+  }
+  UNREACHABLE();
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/code-generator.h b/src/compiler/backend/code-generator.h
new file mode 100644
index 0000000..6181bc7
--- /dev/null
+++ b/src/compiler/backend/code-generator.h
@@ -0,0 +1,495 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_CODE_GENERATOR_H_
+#define V8_COMPILER_BACKEND_CODE_GENERATOR_H_
+
+#include <memory>
+
+#include "src/base/optional.h"
+#include "src/codegen/macro-assembler.h"
+#include "src/codegen/safepoint-table.h"
+#include "src/codegen/source-position-table.h"
+#include "src/compiler/backend/gap-resolver.h"
+#include "src/compiler/backend/instruction.h"
+#include "src/compiler/backend/unwinding-info-writer.h"
+#include "src/compiler/osr.h"
+#include "src/deoptimizer/deoptimizer.h"
+#include "src/trap-handler/trap-handler.h"
+
+namespace v8 {
+namespace internal {
+
+class OptimizedCompilationInfo;
+
+namespace compiler {
+
+// Forward declarations.
+class DeoptimizationExit;
+class FrameAccessState;
+class Linkage;
+class OutOfLineCode;
+
+struct BranchInfo {
+  FlagsCondition condition;
+  Label* true_label;
+  Label* false_label;
+  bool fallthru;
+};
+
+class InstructionOperandIterator {
+ public:
+  InstructionOperandIterator(Instruction* instr, size_t pos)
+      : instr_(instr), pos_(pos) {}
+
+  Instruction* instruction() const { return instr_; }
+  InstructionOperand* Advance() { return instr_->InputAt(pos_++); }
+
+ private:
+  Instruction* instr_;
+  size_t pos_;
+};
+
+enum class DeoptimizationLiteralKind { kObject, kNumber, kString, kInvalid };
+
+// Either a non-null Handle<Object>, a double or a StringConstantBase.
+class DeoptimizationLiteral {
+ public:
+  DeoptimizationLiteral()
+      : kind_(DeoptimizationLiteralKind::kInvalid),
+        object_(),
+        number_(0),
+        string_(nullptr) {}
+  explicit DeoptimizationLiteral(Handle<Object> object)
+      : kind_(DeoptimizationLiteralKind::kObject), object_(object) {
+    CHECK(!object_.is_null());
+  }
+  explicit DeoptimizationLiteral(double number)
+      : kind_(DeoptimizationLiteralKind::kNumber), number_(number) {}
+  explicit DeoptimizationLiteral(const StringConstantBase* string)
+      : kind_(DeoptimizationLiteralKind::kString), string_(string) {}
+
+  Handle<Object> object() const { return object_; }
+  const StringConstantBase* string() const { return string_; }
+
+  bool operator==(const DeoptimizationLiteral& other) const {
+    return kind_ == other.kind_ && object_.equals(other.object_) &&
+           bit_cast<uint64_t>(number_) == bit_cast<uint64_t>(other.number_) &&
+           bit_cast<intptr_t>(string_) == bit_cast<intptr_t>(other.string_);
+  }
+
+  Handle<Object> Reify(Isolate* isolate) const;
+
+  void Validate() const {
+    CHECK_NE(kind_, DeoptimizationLiteralKind::kInvalid);
+  }
+
+  DeoptimizationLiteralKind kind() const {
+    Validate();
+    return kind_;
+  }
+
+ private:
+  DeoptimizationLiteralKind kind_;
+
+  Handle<Object> object_;
+  double number_ = 0;
+  const StringConstantBase* string_ = nullptr;
+};
+
+// These structs hold pc offsets for generated instructions and is only used
+// when tracing for turbolizer is enabled.
+struct TurbolizerCodeOffsetsInfo {
+  int code_start_register_check = -1;
+  int deopt_check = -1;
+  int init_poison = -1;
+  int blocks_start = -1;
+  int out_of_line_code = -1;
+  int deoptimization_exits = -1;
+  int pools = -1;
+  int jump_tables = -1;
+};
+
+struct TurbolizerInstructionStartInfo {
+  int gap_pc_offset = -1;
+  int arch_instr_pc_offset = -1;
+  int condition_pc_offset = -1;
+};
+
+// Generates native code for a sequence of instructions.
+class V8_EXPORT_PRIVATE CodeGenerator final : public GapResolver::Assembler {
+ public:
+  explicit CodeGenerator(
+      Zone* codegen_zone, Frame* frame, Linkage* linkage,
+      InstructionSequence* instructions, OptimizedCompilationInfo* info,
+      Isolate* isolate, base::Optional<OsrHelper> osr_helper,
+      int start_source_position, JumpOptimizationInfo* jump_opt,
+      PoisoningMitigationLevel poisoning_level, const AssemblerOptions& options,
+      int32_t builtin_index, size_t max_unoptimized_frame_height,
+      size_t max_pushed_argument_count, std::unique_ptr<AssemblerBuffer> = {},
+      const char* debug_name = nullptr);
+
+  // Generate native code. After calling AssembleCode, call FinalizeCode to
+  // produce the actual code object. If an error occurs during either phase,
+  // FinalizeCode returns an empty MaybeHandle.
+  void AssembleCode();  // Does not need to run on main thread.
+  MaybeHandle<Code> FinalizeCode();
+
+  OwnedVector<byte> GetSourcePositionTable();
+  OwnedVector<byte> GetProtectedInstructionsData();
+
+  InstructionSequence* instructions() const { return instructions_; }
+  FrameAccessState* frame_access_state() const { return frame_access_state_; }
+  const Frame* frame() const { return frame_access_state_->frame(); }
+  Isolate* isolate() const { return isolate_; }
+  Linkage* linkage() const { return linkage_; }
+
+  Label* GetLabel(RpoNumber rpo) { return &labels_[rpo.ToSize()]; }
+
+  void AddProtectedInstructionLanding(uint32_t instr_offset,
+                                      uint32_t landing_offset);
+
+  bool wasm_runtime_exception_support() const;
+
+  SourcePosition start_source_position() const {
+    return start_source_position_;
+  }
+
+  void AssembleSourcePosition(Instruction* instr);
+  void AssembleSourcePosition(SourcePosition source_position);
+
+  // Record a safepoint with the given pointer map.
+  void RecordSafepoint(ReferenceMap* references,
+                       Safepoint::DeoptMode deopt_mode);
+
+  Zone* zone() const { return zone_; }
+  TurboAssembler* tasm() { return &tasm_; }
+  SafepointTableBuilder* safepoint_table_builder() { return &safepoints_; }
+  size_t GetSafepointTableOffset() const { return safepoints_.GetCodeOffset(); }
+  size_t GetHandlerTableOffset() const { return handler_table_offset_; }
+
+  const ZoneVector<int>& block_starts() const { return block_starts_; }
+  const ZoneVector<TurbolizerInstructionStartInfo>& instr_starts() const {
+    return instr_starts_;
+  }
+
+  const TurbolizerCodeOffsetsInfo& offsets_info() const {
+    return offsets_info_;
+  }
+
+  static constexpr int kBinarySearchSwitchMinimalCases = 4;
+
+  // Returns true if an offset should be applied to the given stack check. There
+  // are two reasons that this could happen:
+  // 1. The optimized frame is smaller than the corresponding deoptimized frames
+  //    and an offset must be applied in order to be able to deopt safely.
+  // 2. The current function pushes a large number of arguments to the stack.
+  //    These are not accounted for by the initial frame setup.
+  bool ShouldApplyOffsetToStackCheck(Instruction* instr, uint32_t* offset);
+  uint32_t GetStackCheckOffset();
+
+ private:
+  GapResolver* resolver() { return &resolver_; }
+  SafepointTableBuilder* safepoints() { return &safepoints_; }
+  OptimizedCompilationInfo* info() const { return info_; }
+  OsrHelper* osr_helper() { return &(*osr_helper_); }
+
+  // Create the FrameAccessState object. The Frame is immutable from here on.
+  void CreateFrameAccessState(Frame* frame);
+
+  // Architecture - specific frame finalization.
+  void FinishFrame(Frame* frame);
+
+  // Checks if {block} will appear directly after {current_block_} when
+  // assembling code, in which case, a fall-through can be used.
+  bool IsNextInAssemblyOrder(RpoNumber block) const;
+
+  // Check if a heap object can be materialized by loading from a heap root,
+  // which is cheaper on some platforms than materializing the actual heap
+  // object constant.
+  bool IsMaterializableFromRoot(Handle<HeapObject> object,
+                                RootIndex* index_return);
+
+  enum CodeGenResult { kSuccess, kTooManyDeoptimizationBailouts };
+
+  // Assemble instructions for the specified block.
+  CodeGenResult AssembleBlock(const InstructionBlock* block);
+
+  // Inserts mask update at the beginning of an instruction block if the
+  // predecessor blocks ends with a masking branch.
+  void TryInsertBranchPoisoning(const InstructionBlock* block);
+
+  // Initializes the masking register in the prologue of a function.
+  void InitializeSpeculationPoison();
+  // Reset the masking register during execution of a function.
+  void ResetSpeculationPoison();
+  // Generates a mask from the pc passed in {kJavaScriptCallCodeStartRegister}.
+  void GenerateSpeculationPoisonFromCodeStartRegister();
+
+  // Assemble code for the specified instruction.
+  CodeGenResult AssembleInstruction(int instruction_index,
+                                    const InstructionBlock* block);
+  void AssembleGaps(Instruction* instr);
+
+  // Compute branch info from given instruction. Returns a valid rpo number
+  // if the branch is redundant, the returned rpo number point to the target
+  // basic block.
+  RpoNumber ComputeBranchInfo(BranchInfo* branch, Instruction* instr);
+
+  // Returns true if a instruction is a tail call that needs to adjust the stack
+  // pointer before execution. The stack slot index to the empty slot above the
+  // adjusted stack pointer is returned in |slot|.
+  bool GetSlotAboveSPBeforeTailCall(Instruction* instr, int* slot);
+
+  // Determines how to call helper stubs depending on the code kind.
+  StubCallMode DetermineStubCallMode() const;
+
+  CodeGenResult AssembleDeoptimizerCall(DeoptimizationExit* exit);
+
+  // ===========================================================================
+  // ============= Architecture-specific code generation methods. ==============
+  // ===========================================================================
+
+  CodeGenResult AssembleArchInstruction(Instruction* instr);
+  void AssembleArchJump(RpoNumber target);
+  void AssembleArchBranch(Instruction* instr, BranchInfo* branch);
+
+  // Generates special branch for deoptimization condition.
+  void AssembleArchDeoptBranch(Instruction* instr, BranchInfo* branch);
+
+  void AssembleArchBoolean(Instruction* instr, FlagsCondition condition);
+  void AssembleArchTrap(Instruction* instr, FlagsCondition condition);
+  void AssembleArchBinarySearchSwitchRange(Register input, RpoNumber def_block,
+                                           std::pair<int32_t, Label*>* begin,
+                                           std::pair<int32_t, Label*>* end);
+  void AssembleArchBinarySearchSwitch(Instruction* instr);
+  void AssembleArchTableSwitch(Instruction* instr);
+
+  // Generates code that checks whether the {kJavaScriptCallCodeStartRegister}
+  // contains the expected pointer to the start of the instruction stream.
+  void AssembleCodeStartRegisterCheck();
+
+  void AssembleBranchPoisoning(FlagsCondition condition, Instruction* instr);
+
+  // When entering a code that is marked for deoptimization, rather continuing
+  // with its execution, we jump to a lazy compiled code. We need to do this
+  // because this code has already been deoptimized and needs to be unlinked
+  // from the JS functions referring it.
+  void BailoutIfDeoptimized();
+
+  // Generates code to poison the stack pointer and implicit register arguments
+  // like the context register and the function register.
+  void AssembleRegisterArgumentPoisoning();
+
+  // Generates an architecture-specific, descriptor-specific prologue
+  // to set up a stack frame.
+  void AssembleConstructFrame();
+
+  // Generates an architecture-specific, descriptor-specific return sequence
+  // to tear down a stack frame.
+  void AssembleReturn(InstructionOperand* pop);
+
+  void AssembleDeconstructFrame();
+
+  // Generates code to manipulate the stack in preparation for a tail call.
+  void AssemblePrepareTailCall();
+
+  // Generates code to pop current frame if it is an arguments adaptor frame.
+  void AssemblePopArgumentsAdaptorFrame(Register args_reg, Register scratch1,
+                                        Register scratch2, Register scratch3);
+
+  enum PushTypeFlag {
+    kImmediatePush = 0x1,
+    kRegisterPush = 0x2,
+    kStackSlotPush = 0x4,
+    kScalarPush = kRegisterPush | kStackSlotPush
+  };
+
+  using PushTypeFlags = base::Flags<PushTypeFlag>;
+
+  static bool IsValidPush(InstructionOperand source, PushTypeFlags push_type);
+
+  // Generate a list of moves from an instruction that are candidates to be
+  // turned into push instructions on platforms that support them. In general,
+  // the list of push candidates are moves to a set of contiguous destination
+  // InstructionOperand locations on the stack that don't clobber values that
+  // are needed to resolve the gap or use values generated by the gap,
+  // i.e. moves that can be hoisted together before the actual gap and assembled
+  // together.
+  static void GetPushCompatibleMoves(Instruction* instr,
+                                     PushTypeFlags push_type,
+                                     ZoneVector<MoveOperands*>* pushes);
+
+  class MoveType {
+   public:
+    enum Type {
+      kRegisterToRegister,
+      kRegisterToStack,
+      kStackToRegister,
+      kStackToStack,
+      kConstantToRegister,
+      kConstantToStack
+    };
+
+    // Detect what type of move or swap needs to be performed. Note that these
+    // functions do not take into account the representation (Tagged, FP,
+    // ...etc).
+
+    static Type InferMove(InstructionOperand* source,
+                          InstructionOperand* destination);
+    static Type InferSwap(InstructionOperand* source,
+                          InstructionOperand* destination);
+  };
+  // Called before a tail call |instr|'s gap moves are assembled and allows
+  // gap-specific pre-processing, e.g. adjustment of the sp for tail calls that
+  // need it before gap moves or conversion of certain gap moves into pushes.
+  void AssembleTailCallBeforeGap(Instruction* instr,
+                                 int first_unused_stack_slot);
+  // Called after a tail call |instr|'s gap moves are assembled and allows
+  // gap-specific post-processing, e.g. adjustment of the sp for tail calls that
+  // need it after gap moves.
+  void AssembleTailCallAfterGap(Instruction* instr,
+                                int first_unused_stack_slot);
+
+  void FinishCode();
+  void MaybeEmitOutOfLineConstantPool();
+
+  void IncrementStackAccessCounter(InstructionOperand* source,
+                                   InstructionOperand* destination);
+
+  // ===========================================================================
+  // ============== Architecture-specific gap resolver methods. ================
+  // ===========================================================================
+
+  // Interface used by the gap resolver to emit moves and swaps.
+  void AssembleMove(InstructionOperand* source,
+                    InstructionOperand* destination) final;
+  void AssembleSwap(InstructionOperand* source,
+                    InstructionOperand* destination) final;
+
+  // ===========================================================================
+  // =================== Jump table construction methods. ======================
+  // ===========================================================================
+
+  class JumpTable;
+  // Adds a jump table that is emitted after the actual code.  Returns label
+  // pointing to the beginning of the table.  {targets} is assumed to be static
+  // or zone allocated.
+  Label* AddJumpTable(Label** targets, size_t target_count);
+  // Emits a jump table.
+  void AssembleJumpTable(Label** targets, size_t target_count);
+
+  // ===========================================================================
+  // ================== Deoptimization table construction. =====================
+  // ===========================================================================
+
+  void RecordCallPosition(Instruction* instr);
+  Handle<DeoptimizationData> GenerateDeoptimizationData();
+  int DefineDeoptimizationLiteral(DeoptimizationLiteral literal);
+  DeoptimizationEntry const& GetDeoptimizationEntry(Instruction* instr,
+                                                    size_t frame_state_offset);
+  DeoptimizationExit* BuildTranslation(Instruction* instr, int pc_offset,
+                                       size_t frame_state_offset,
+                                       OutputFrameStateCombine state_combine);
+  void BuildTranslationForFrameStateDescriptor(
+      FrameStateDescriptor* descriptor, InstructionOperandIterator* iter,
+      Translation* translation, OutputFrameStateCombine state_combine);
+  void TranslateStateValueDescriptor(StateValueDescriptor* desc,
+                                     StateValueList* nested,
+                                     Translation* translation,
+                                     InstructionOperandIterator* iter);
+  void TranslateFrameStateDescriptorOperands(FrameStateDescriptor* desc,
+                                             InstructionOperandIterator* iter,
+                                             Translation* translation);
+  void AddTranslationForOperand(Translation* translation, Instruction* instr,
+                                InstructionOperand* op, MachineType type);
+  void MarkLazyDeoptSite();
+
+  void PrepareForDeoptimizationExits(ZoneDeque<DeoptimizationExit*>* exits);
+  DeoptimizationExit* AddDeoptimizationExit(Instruction* instr,
+                                            size_t frame_state_offset);
+
+  // ===========================================================================
+
+  struct HandlerInfo {
+    Label* handler;
+    int pc_offset;
+  };
+
+  friend class OutOfLineCode;
+  friend class CodeGeneratorTester;
+
+  Zone* zone_;
+  Isolate* isolate_;
+  FrameAccessState* frame_access_state_;
+  Linkage* const linkage_;
+  InstructionSequence* const instructions_;
+  UnwindingInfoWriter unwinding_info_writer_;
+  OptimizedCompilationInfo* const info_;
+  Label* const labels_;
+  Label return_label_;
+  RpoNumber current_block_;
+  SourcePosition start_source_position_;
+  SourcePosition current_source_position_;
+  TurboAssembler tasm_;
+  GapResolver resolver_;
+  SafepointTableBuilder safepoints_;
+  ZoneVector<HandlerInfo> handlers_;
+  int next_deoptimization_id_ = 0;
+  int deopt_exit_start_offset_ = 0;
+  int non_lazy_deopt_count_ = 0;
+  ZoneDeque<DeoptimizationExit*> deoptimization_exits_;
+  ZoneDeque<DeoptimizationLiteral> deoptimization_literals_;
+  size_t inlined_function_count_ = 0;
+  TranslationBuffer translations_;
+  int handler_table_offset_ = 0;
+  int last_lazy_deopt_pc_ = 0;
+
+  // Deoptimization exits must be as small as possible, since their count grows
+  // with function size. {jump_deoptimization_entry_labels_} is an optimization
+  // to that effect, which extracts the (potentially large) instruction
+  // sequence for the final jump to the deoptimization entry into a single spot
+  // per Code object. All deopt exits can then near-call to this label. Note:
+  // not used on all architectures.
+  Label jump_deoptimization_entry_labels_[kDeoptimizeKindCount];
+
+  // The maximal combined height of all frames produced upon deoptimization, and
+  // the maximal number of pushed arguments for function calls. Applied as an
+  // offset to the first stack check of an optimized function.
+  const size_t max_unoptimized_frame_height_;
+  const size_t max_pushed_argument_count_;
+
+  // kArchCallCFunction could be reached either:
+  //   kArchCallCFunction;
+  // or:
+  //   kArchSaveCallerRegisters;
+  //   kArchCallCFunction;
+  //   kArchRestoreCallerRegisters;
+  // The boolean is used to distinguish the two cases. In the latter case, we
+  // also need to decide if FP registers need to be saved, which is controlled
+  // by fp_mode_.
+  bool caller_registers_saved_;
+  SaveFPRegsMode fp_mode_;
+
+  JumpTable* jump_tables_;
+  OutOfLineCode* ools_;
+  base::Optional<OsrHelper> osr_helper_;
+  int osr_pc_offset_;
+  int optimized_out_literal_id_;
+  SourcePositionTableBuilder source_position_table_builder_;
+  ZoneVector<trap_handler::ProtectedInstructionData> protected_instructions_;
+  CodeGenResult result_;
+  PoisoningMitigationLevel poisoning_level_;
+  ZoneVector<int> block_starts_;
+  TurbolizerCodeOffsetsInfo offsets_info_;
+  ZoneVector<TurbolizerInstructionStartInfo> instr_starts_;
+
+  const char* debug_name_ = nullptr;
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_CODE_GENERATOR_H_
diff --git a/src/compiler/backend/frame-elider.cc b/src/compiler/backend/frame-elider.cc
new file mode 100644
index 0000000..293fc93
--- /dev/null
+++ b/src/compiler/backend/frame-elider.cc
@@ -0,0 +1,158 @@
+// Copyright 2015 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/frame-elider.h"
+
+#include "src/base/iterator.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+FrameElider::FrameElider(InstructionSequence* code) : code_(code) {}
+
+void FrameElider::Run() {
+  MarkBlocks();
+  PropagateMarks();
+  MarkDeConstruction();
+}
+
+void FrameElider::MarkBlocks() {
+  for (InstructionBlock* block : instruction_blocks()) {
+    if (block->needs_frame()) continue;
+    for (int i = block->code_start(); i < block->code_end(); ++i) {
+      const Instruction* instr = InstructionAt(i);
+      if (instr->IsCall() || instr->IsDeoptimizeCall() ||
+          instr->arch_opcode() == ArchOpcode::kArchStackPointerGreaterThan ||
+          instr->arch_opcode() == ArchOpcode::kArchFramePointer) {
+        block->mark_needs_frame();
+        break;
+      }
+    }
+  }
+}
+
+void FrameElider::PropagateMarks() {
+  while (PropagateInOrder() || PropagateReversed()) {
+  }
+}
+
+void FrameElider::MarkDeConstruction() {
+  for (InstructionBlock* block : instruction_blocks()) {
+    if (block->needs_frame()) {
+      // Special case: The start block needs a frame.
+      if (block->predecessors().empty()) {
+        block->mark_must_construct_frame();
+      }
+      // Find "frame -> no frame" transitions, inserting frame
+      // deconstructions.
+      for (RpoNumber& succ : block->successors()) {
+        if (!InstructionBlockAt(succ)->needs_frame()) {
+          DCHECK_EQ(1U, block->SuccessorCount());
+          const Instruction* last =
+              InstructionAt(block->last_instruction_index());
+          if (last->IsThrow() || last->IsTailCall() ||
+              last->IsDeoptimizeCall()) {
+            // We need to keep the frame if we exit the block through any
+            // of these.
+            continue;
+          }
+          // The only cases when we need to deconstruct are ret and jump.
+          DCHECK(last->IsRet() || last->IsJump());
+          block->mark_must_deconstruct_frame();
+        }
+      }
+    } else {
+      // Find "no frame -> frame" transitions, inserting frame constructions.
+      for (RpoNumber& succ : block->successors()) {
+        if (InstructionBlockAt(succ)->needs_frame()) {
+          DCHECK_NE(1U, block->SuccessorCount());
+          InstructionBlockAt(succ)->mark_must_construct_frame();
+        }
+      }
+    }
+  }
+}
+
+bool FrameElider::PropagateInOrder() {
+  bool changed = false;
+  for (InstructionBlock* block : instruction_blocks()) {
+    changed |= PropagateIntoBlock(block);
+  }
+  return changed;
+}
+
+bool FrameElider::PropagateReversed() {
+  bool changed = false;
+  for (InstructionBlock* block : base::Reversed(instruction_blocks())) {
+    changed |= PropagateIntoBlock(block);
+  }
+  return changed;
+}
+
+bool FrameElider::PropagateIntoBlock(InstructionBlock* block) {
+  // Already marked, nothing to do...
+  if (block->needs_frame()) return false;
+
+  // Never mark the dummy end node, otherwise we might incorrectly decide to
+  // put frame deconstruction code there later,
+  if (block->successors().empty()) return false;
+
+  // Propagate towards the end ("downwards") if there is a predecessor needing
+  // a frame, but don't "bleed" from deferred code to non-deferred code.
+  for (RpoNumber& pred : block->predecessors()) {
+    if (InstructionBlockAt(pred)->needs_frame() &&
+        (!InstructionBlockAt(pred)->IsDeferred() || block->IsDeferred())) {
+      block->mark_needs_frame();
+      return true;
+    }
+  }
+
+  // Propagate towards start ("upwards")
+  bool need_frame_successors = false;
+  if (block->SuccessorCount() == 1) {
+    // For single successors, propagate the needs_frame information.
+    need_frame_successors =
+        InstructionBlockAt(block->successors()[0])->needs_frame();
+  } else {
+    // For multiple successors, each successor must only have a single
+    // predecessor (because the graph is in edge-split form), so each successor
+    // can independently create/dismantle a frame if needed. Given this
+    // independent control, only propagate needs_frame if all non-deferred
+    // blocks need a frame.
+    for (RpoNumber& succ : block->successors()) {
+      InstructionBlock* successor_block = InstructionBlockAt(succ);
+      DCHECK_EQ(1, successor_block->PredecessorCount());
+      if (!successor_block->IsDeferred()) {
+        if (successor_block->needs_frame()) {
+          need_frame_successors = true;
+        } else {
+          return false;
+        }
+      }
+    }
+  }
+  if (need_frame_successors) {
+    block->mark_needs_frame();
+    return true;
+  } else {
+    return false;
+  }
+}
+
+const InstructionBlocks& FrameElider::instruction_blocks() const {
+  return code_->instruction_blocks();
+}
+
+InstructionBlock* FrameElider::InstructionBlockAt(RpoNumber rpo_number) const {
+  return code_->InstructionBlockAt(rpo_number);
+}
+
+Instruction* FrameElider::InstructionAt(int index) const {
+  return code_->InstructionAt(index);
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/frame-elider.h b/src/compiler/backend/frame-elider.h
new file mode 100644
index 0000000..11dfce2
--- /dev/null
+++ b/src/compiler/backend/frame-elider.h
@@ -0,0 +1,39 @@
+// Copyright 2015 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_FRAME_ELIDER_H_
+#define V8_COMPILER_BACKEND_FRAME_ELIDER_H_
+
+#include "src/compiler/backend/instruction.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+// Determine which instruction blocks need a frame and where frames must be
+// constructed/deconstructed.
+class FrameElider {
+ public:
+  explicit FrameElider(InstructionSequence* code);
+  void Run();
+
+ private:
+  void MarkBlocks();
+  void PropagateMarks();
+  void MarkDeConstruction();
+  bool PropagateInOrder();
+  bool PropagateReversed();
+  bool PropagateIntoBlock(InstructionBlock* block);
+  const InstructionBlocks& instruction_blocks() const;
+  InstructionBlock* InstructionBlockAt(RpoNumber rpo_number) const;
+  Instruction* InstructionAt(int index) const;
+
+  InstructionSequence* const code_;
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_FRAME_ELIDER_H_
diff --git a/src/compiler/backend/gap-resolver.cc b/src/compiler/backend/gap-resolver.cc
new file mode 100644
index 0000000..e9aeb2f
--- /dev/null
+++ b/src/compiler/backend/gap-resolver.cc
@@ -0,0 +1,272 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/gap-resolver.h"
+
+#include <algorithm>
+#include <set>
+
+#include "src/base/enum-set.h"
+#include "src/codegen/register-configuration.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+namespace {
+
+// Splits a FP move between two location operands into the equivalent series of
+// moves between smaller sub-operands, e.g. a double move to two single moves.
+// This helps reduce the number of cycles that would normally occur under FP
+// aliasing, and makes swaps much easier to implement.
+MoveOperands* Split(MoveOperands* move, MachineRepresentation smaller_rep,
+                    ParallelMove* moves) {
+  DCHECK(!kSimpleFPAliasing);
+  // Splitting is only possible when the slot size is the same as float size.
+  DCHECK_EQ(kSystemPointerSize, kFloatSize);
+  const LocationOperand& src_loc = LocationOperand::cast(move->source());
+  const LocationOperand& dst_loc = LocationOperand::cast(move->destination());
+  MachineRepresentation dst_rep = dst_loc.representation();
+  DCHECK_NE(smaller_rep, dst_rep);
+  auto src_kind = src_loc.location_kind();
+  auto dst_kind = dst_loc.location_kind();
+
+  int aliases =
+      1 << (ElementSizeLog2Of(dst_rep) - ElementSizeLog2Of(smaller_rep));
+  int base = -1;
+  USE(base);
+  DCHECK_EQ(aliases, RegisterConfiguration::Default()->GetAliases(
+                         dst_rep, 0, smaller_rep, &base));
+
+  int src_index = -1;
+  int slot_size = (1 << ElementSizeLog2Of(smaller_rep)) / kSystemPointerSize;
+  int src_step = 1;
+  if (src_kind == LocationOperand::REGISTER) {
+    src_index = src_loc.register_code() * aliases;
+  } else {
+    src_index = src_loc.index();
+    // For operands that occupy multiple slots, the index refers to the last
+    // slot. On little-endian architectures, we start at the high slot and use a
+    // negative step so that register-to-slot moves are in the correct order.
+    src_step = -slot_size;
+  }
+  int dst_index = -1;
+  int dst_step = 1;
+  if (dst_kind == LocationOperand::REGISTER) {
+    dst_index = dst_loc.register_code() * aliases;
+  } else {
+    dst_index = dst_loc.index();
+    dst_step = -slot_size;
+  }
+
+  // Reuse 'move' for the first fragment. It is not pending.
+  move->set_source(AllocatedOperand(src_kind, smaller_rep, src_index));
+  move->set_destination(AllocatedOperand(dst_kind, smaller_rep, dst_index));
+  // Add the remaining fragment moves.
+  for (int i = 1; i < aliases; ++i) {
+    src_index += src_step;
+    dst_index += dst_step;
+    moves->AddMove(AllocatedOperand(src_kind, smaller_rep, src_index),
+                   AllocatedOperand(dst_kind, smaller_rep, dst_index));
+  }
+  // Return the first fragment.
+  return move;
+}
+
+enum MoveOperandKind : uint8_t { kConstant, kGpReg, kFpReg, kStack };
+
+MoveOperandKind GetKind(const InstructionOperand& move) {
+  if (move.IsConstant()) return kConstant;
+  LocationOperand loc_op = LocationOperand::cast(move);
+  if (loc_op.location_kind() != LocationOperand::REGISTER) return kStack;
+  return IsFloatingPoint(loc_op.representation()) ? kFpReg : kGpReg;
+}
+
+}  // namespace
+
+void GapResolver::Resolve(ParallelMove* moves) {
+  base::EnumSet<MoveOperandKind, uint8_t> source_kinds;
+  base::EnumSet<MoveOperandKind, uint8_t> destination_kinds;
+
+  // Remove redundant moves, collect source kinds and destination kinds to
+  // detect simple non-overlapping moves, and collect FP move representations if
+  // aliasing is non-simple.
+  int fp_reps = 0;
+  size_t nmoves = moves->size();
+  for (size_t i = 0; i < nmoves;) {
+    MoveOperands* move = (*moves)[i];
+    if (move->IsRedundant()) {
+      nmoves--;
+      if (i < nmoves) (*moves)[i] = (*moves)[nmoves];
+      continue;
+    }
+    i++;
+    source_kinds.Add(GetKind(move->source()));
+    destination_kinds.Add(GetKind(move->destination()));
+    if (!kSimpleFPAliasing && move->destination().IsFPRegister()) {
+      fp_reps |= RepresentationBit(
+          LocationOperand::cast(move->destination()).representation());
+    }
+  }
+  if (nmoves != moves->size()) moves->resize(nmoves);
+
+  if ((source_kinds & destination_kinds).empty() || moves->size() < 2) {
+    // Fast path for non-conflicting parallel moves.
+    for (MoveOperands* move : *moves) {
+      assembler_->AssembleMove(&move->source(), &move->destination());
+    }
+    return;
+  }
+
+  if (!kSimpleFPAliasing) {
+    if (fp_reps && !base::bits::IsPowerOfTwo(fp_reps)) {
+      // Start with the smallest FP moves, so we never encounter smaller moves
+      // in the middle of a cycle of larger moves.
+      if ((fp_reps & RepresentationBit(MachineRepresentation::kFloat32)) != 0) {
+        split_rep_ = MachineRepresentation::kFloat32;
+        for (size_t i = 0; i < moves->size(); ++i) {
+          auto move = (*moves)[i];
+          if (!move->IsEliminated() && move->destination().IsFloatRegister())
+            PerformMove(moves, move);
+        }
+      }
+      if ((fp_reps & RepresentationBit(MachineRepresentation::kFloat64)) != 0) {
+        split_rep_ = MachineRepresentation::kFloat64;
+        for (size_t i = 0; i < moves->size(); ++i) {
+          auto move = (*moves)[i];
+          if (!move->IsEliminated() && move->destination().IsDoubleRegister())
+            PerformMove(moves, move);
+        }
+      }
+    }
+    split_rep_ = MachineRepresentation::kSimd128;
+  }
+
+  for (size_t i = 0; i < moves->size(); ++i) {
+    auto move = (*moves)[i];
+    if (!move->IsEliminated()) PerformMove(moves, move);
+  }
+}
+
+void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) {
+  // Each call to this function performs a move and deletes it from the move
+  // graph.  We first recursively perform any move blocking this one.  We mark a
+  // move as "pending" on entry to PerformMove in order to detect cycles in the
+  // move graph.  We use operand swaps to resolve cycles, which means that a
+  // call to PerformMove could change any source operand in the move graph.
+  DCHECK(!move->IsPending());
+  DCHECK(!move->IsRedundant());
+
+  // Clear this move's destination to indicate a pending move.  The actual
+  // destination is saved on the side.
+  InstructionOperand source = move->source();
+  DCHECK(!source.IsInvalid());  // Or else it will look eliminated.
+  InstructionOperand destination = move->destination();
+  move->SetPending();
+
+  // We may need to split moves between FP locations differently.
+  const bool is_fp_loc_move =
+      !kSimpleFPAliasing && destination.IsFPLocationOperand();
+
+  // Perform a depth-first traversal of the move graph to resolve dependencies.
+  // Any unperformed, unpending move with a source the same as this one's
+  // destination blocks this one so recursively perform all such moves.
+  for (size_t i = 0; i < moves->size(); ++i) {
+    auto other = (*moves)[i];
+    if (other->IsEliminated()) continue;
+    if (other->IsPending()) continue;
+    if (other->source().InterferesWith(destination)) {
+      if (is_fp_loc_move &&
+          LocationOperand::cast(other->source()).representation() >
+              split_rep_) {
+        // 'other' must also be an FP location move. Break it into fragments
+        // of the same size as 'move'. 'other' is set to one of the fragments,
+        // and the rest are appended to 'moves'.
+        other = Split(other, split_rep_, moves);
+        // 'other' may not block destination now.
+        if (!other->source().InterferesWith(destination)) continue;
+      }
+      // Though PerformMove can change any source operand in the move graph,
+      // this call cannot create a blocking move via a swap (this loop does not
+      // miss any).  Assume there is a non-blocking move with source A and this
+      // move is blocked on source B and there is a swap of A and B.  Then A and
+      // B must be involved in the same cycle (or they would not be swapped).
+      // Since this move's destination is B and there is only a single incoming
+      // edge to an operand, this move must also be involved in the same cycle.
+      // In that case, the blocking move will be created but will be "pending"
+      // when we return from PerformMove.
+      PerformMove(moves, other);
+    }
+  }
+
+  // This move's source may have changed due to swaps to resolve cycles and so
+  // it may now be the last move in the cycle.  If so remove it.
+  source = move->source();
+  if (source.EqualsCanonicalized(destination)) {
+    move->Eliminate();
+    return;
+  }
+
+  // We are about to resolve this move and don't need it marked as pending, so
+  // restore its destination.
+  move->set_destination(destination);
+
+  // The move may be blocked on a (at most one) pending move, in which case we
+  // have a cycle.  Search for such a blocking move and perform a swap to
+  // resolve it.
+  auto blocker =
+      std::find_if(moves->begin(), moves->end(), [&](MoveOperands* move) {
+        return !move->IsEliminated() &&
+               move->source().InterferesWith(destination);
+      });
+  if (blocker == moves->end()) {
+    // The easy case: This move is not blocked.
+    assembler_->AssembleMove(&source, &destination);
+    move->Eliminate();
+    return;
+  }
+
+  // Ensure source is a register or both are stack slots, to limit swap cases.
+  if (source.IsStackSlot() || source.IsFPStackSlot()) {
+    std::swap(source, destination);
+  }
+  assembler_->AssembleSwap(&source, &destination);
+  move->Eliminate();
+
+  // Update outstanding moves whose source may now have been moved.
+  if (is_fp_loc_move) {
+    // We may have to split larger moves.
+    for (size_t i = 0; i < moves->size(); ++i) {
+      auto other = (*moves)[i];
+      if (other->IsEliminated()) continue;
+      if (source.InterferesWith(other->source())) {
+        if (LocationOperand::cast(other->source()).representation() >
+            split_rep_) {
+          other = Split(other, split_rep_, moves);
+          if (!source.InterferesWith(other->source())) continue;
+        }
+        other->set_source(destination);
+      } else if (destination.InterferesWith(other->source())) {
+        if (LocationOperand::cast(other->source()).representation() >
+            split_rep_) {
+          other = Split(other, split_rep_, moves);
+          if (!destination.InterferesWith(other->source())) continue;
+        }
+        other->set_source(source);
+      }
+    }
+  } else {
+    for (auto other : *moves) {
+      if (other->IsEliminated()) continue;
+      if (source.EqualsCanonicalized(other->source())) {
+        other->set_source(destination);
+      } else if (destination.EqualsCanonicalized(other->source())) {
+        other->set_source(source);
+      }
+    }
+  }
+}
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/gap-resolver.h b/src/compiler/backend/gap-resolver.h
new file mode 100644
index 0000000..19b6705
--- /dev/null
+++ b/src/compiler/backend/gap-resolver.h
@@ -0,0 +1,53 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_GAP_RESOLVER_H_
+#define V8_COMPILER_BACKEND_GAP_RESOLVER_H_
+
+#include "src/compiler/backend/instruction.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+class GapResolver final {
+ public:
+  // Interface used by the gap resolver to emit moves and swaps.
+  class Assembler {
+   public:
+    virtual ~Assembler() = default;
+
+    // Assemble move.
+    virtual void AssembleMove(InstructionOperand* source,
+                              InstructionOperand* destination) = 0;
+    // Assemble swap.
+    virtual void AssembleSwap(InstructionOperand* source,
+                              InstructionOperand* destination) = 0;
+  };
+
+  explicit GapResolver(Assembler* assembler)
+      : assembler_(assembler), split_rep_(MachineRepresentation::kSimd128) {}
+
+  // Resolve a set of parallel moves, emitting assembler instructions.
+  V8_EXPORT_PRIVATE void Resolve(ParallelMove* parallel_move);
+
+ private:
+  // Performs the given move, possibly performing other moves to unblock the
+  // destination operand.
+  void PerformMove(ParallelMove* moves, MoveOperands* move);
+
+  // Assembler used to emit moves and save registers.
+  Assembler* const assembler_;
+
+  // While resolving moves, the largest FP representation that can be moved.
+  // Any larger moves must be split into an equivalent series of moves of this
+  // representation.
+  MachineRepresentation split_rep_;
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_GAP_RESOLVER_H_
diff --git a/src/compiler/backend/ia32/code-generator-ia32.cc b/src/compiler/backend/ia32/code-generator-ia32.cc
new file mode 100644
index 0000000..1820e39
--- /dev/null
+++ b/src/compiler/backend/ia32/code-generator-ia32.cc
@@ -0,0 +1,5134 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/base/overflowing-math.h"
+#include "src/codegen/assembler-inl.h"
+#include "src/codegen/callable.h"
+#include "src/codegen/ia32/assembler-ia32.h"
+#include "src/codegen/macro-assembler.h"
+#include "src/codegen/optimized-compilation-info.h"
+#include "src/compiler/backend/code-generator-impl.h"
+#include "src/compiler/backend/code-generator.h"
+#include "src/compiler/backend/gap-resolver.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/osr.h"
+#include "src/execution/frame-constants.h"
+#include "src/execution/frames.h"
+#include "src/heap/memory-chunk.h"
+#include "src/objects/smi.h"
+#include "src/wasm/wasm-code-manager.h"
+#include "src/wasm/wasm-objects.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+#define __ tasm()->
+
+#define kScratchDoubleReg xmm0
+
+// Adds IA-32 specific methods for decoding operands.
+class IA32OperandConverter : public InstructionOperandConverter {
+ public:
+  IA32OperandConverter(CodeGenerator* gen, Instruction* instr)
+      : InstructionOperandConverter(gen, instr) {}
+
+  Operand InputOperand(size_t index, int extra = 0) {
+    return ToOperand(instr_->InputAt(index), extra);
+  }
+
+  Immediate InputImmediate(size_t index) {
+    return ToImmediate(instr_->InputAt(index));
+  }
+
+  Operand OutputOperand() { return ToOperand(instr_->Output()); }
+
+  Operand ToOperand(InstructionOperand* op, int extra = 0) {
+    if (op->IsRegister()) {
+      DCHECK_EQ(0, extra);
+      return Operand(ToRegister(op));
+    } else if (op->IsFPRegister()) {
+      DCHECK_EQ(0, extra);
+      return Operand(ToDoubleRegister(op));
+    }
+    DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
+    return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
+  }
+
+  Operand SlotToOperand(int slot, int extra = 0) {
+    FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
+    return Operand(offset.from_stack_pointer() ? esp : ebp,
+                   offset.offset() + extra);
+  }
+
+  Immediate ToImmediate(InstructionOperand* operand) {
+    Constant constant = ToConstant(operand);
+    if (constant.type() == Constant::kInt32 &&
+        RelocInfo::IsWasmReference(constant.rmode())) {
+      return Immediate(static_cast<Address>(constant.ToInt32()),
+                       constant.rmode());
+    }
+    switch (constant.type()) {
+      case Constant::kInt32:
+        return Immediate(constant.ToInt32());
+      case Constant::kFloat32:
+        return Immediate::EmbeddedNumber(constant.ToFloat32());
+      case Constant::kFloat64:
+        return Immediate::EmbeddedNumber(constant.ToFloat64().value());
+      case Constant::kExternalReference:
+        return Immediate(constant.ToExternalReference());
+      case Constant::kHeapObject:
+        return Immediate(constant.ToHeapObject());
+      case Constant::kCompressedHeapObject:
+        break;
+      case Constant::kDelayedStringConstant:
+        return Immediate::EmbeddedStringConstant(
+            constant.ToDelayedStringConstant());
+      case Constant::kInt64:
+        break;
+      case Constant::kRpoNumber:
+        return Immediate::CodeRelativeOffset(ToLabel(operand));
+    }
+    UNREACHABLE();
+  }
+
+  static size_t NextOffset(size_t* offset) {
+    size_t i = *offset;
+    (*offset)++;
+    return i;
+  }
+
+  static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
+    STATIC_ASSERT(0 == static_cast<int>(times_1));
+    STATIC_ASSERT(1 == static_cast<int>(times_2));
+    STATIC_ASSERT(2 == static_cast<int>(times_4));
+    STATIC_ASSERT(3 == static_cast<int>(times_8));
+    int scale = static_cast<int>(mode - one);
+    DCHECK(scale >= 0 && scale < 4);
+    return static_cast<ScaleFactor>(scale);
+  }
+
+  Operand MemoryOperand(size_t* offset) {
+    AddressingMode mode = AddressingModeField::decode(instr_->opcode());
+    switch (mode) {
+      case kMode_MR: {
+        Register base = InputRegister(NextOffset(offset));
+        int32_t disp = 0;
+        return Operand(base, disp);
+      }
+      case kMode_MRI: {
+        Register base = InputRegister(NextOffset(offset));
+        Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
+        return Operand(base, ctant.ToInt32(), ctant.rmode());
+      }
+      case kMode_MR1:
+      case kMode_MR2:
+      case kMode_MR4:
+      case kMode_MR8: {
+        Register base = InputRegister(NextOffset(offset));
+        Register index = InputRegister(NextOffset(offset));
+        ScaleFactor scale = ScaleFor(kMode_MR1, mode);
+        int32_t disp = 0;
+        return Operand(base, index, scale, disp);
+      }
+      case kMode_MR1I:
+      case kMode_MR2I:
+      case kMode_MR4I:
+      case kMode_MR8I: {
+        Register base = InputRegister(NextOffset(offset));
+        Register index = InputRegister(NextOffset(offset));
+        ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
+        Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
+        return Operand(base, index, scale, ctant.ToInt32(), ctant.rmode());
+      }
+      case kMode_M1:
+      case kMode_M2:
+      case kMode_M4:
+      case kMode_M8: {
+        Register index = InputRegister(NextOffset(offset));
+        ScaleFactor scale = ScaleFor(kMode_M1, mode);
+        int32_t disp = 0;
+        return Operand(index, scale, disp);
+      }
+      case kMode_M1I:
+      case kMode_M2I:
+      case kMode_M4I:
+      case kMode_M8I: {
+        Register index = InputRegister(NextOffset(offset));
+        ScaleFactor scale = ScaleFor(kMode_M1I, mode);
+        Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
+        return Operand(index, scale, ctant.ToInt32(), ctant.rmode());
+      }
+      case kMode_MI: {
+        Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
+        return Operand(ctant.ToInt32(), ctant.rmode());
+      }
+      case kMode_Root: {
+        Register base = kRootRegister;
+        int32_t disp = InputInt32(NextOffset(offset));
+        return Operand(base, disp);
+      }
+      case kMode_None:
+        UNREACHABLE();
+    }
+    UNREACHABLE();
+  }
+
+  Operand MemoryOperand(size_t first_input = 0) {
+    return MemoryOperand(&first_input);
+  }
+
+  Operand NextMemoryOperand(size_t offset = 0) {
+    AddressingMode mode = AddressingModeField::decode(instr_->opcode());
+    Register base = InputRegister(NextOffset(&offset));
+    const int32_t disp = 4;
+    if (mode == kMode_MR1) {
+      Register index = InputRegister(NextOffset(&offset));
+      ScaleFactor scale = ScaleFor(kMode_MR1, kMode_MR1);
+      return Operand(base, index, scale, disp);
+    } else if (mode == kMode_MRI) {
+      Constant ctant = ToConstant(instr_->InputAt(NextOffset(&offset)));
+      return Operand(base, ctant.ToInt32() + disp, ctant.rmode());
+    } else {
+      UNREACHABLE();
+    }
+  }
+
+  void MoveInstructionOperandToRegister(Register destination,
+                                        InstructionOperand* op) {
+    if (op->IsImmediate() || op->IsConstant()) {
+      gen_->tasm()->mov(destination, ToImmediate(op));
+    } else if (op->IsRegister()) {
+      gen_->tasm()->Move(destination, ToRegister(op));
+    } else {
+      gen_->tasm()->mov(destination, ToOperand(op));
+    }
+  }
+};
+
+namespace {
+
+bool HasAddressingMode(Instruction* instr) {
+  return instr->addressing_mode() != kMode_None;
+}
+
+bool HasImmediateInput(Instruction* instr, size_t index) {
+  return instr->InputAt(index)->IsImmediate();
+}
+
+bool HasRegisterInput(Instruction* instr, size_t index) {
+  return instr->InputAt(index)->IsRegister();
+}
+
+class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
+ public:
+  OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
+      : OutOfLineCode(gen), result_(result) {}
+
+  void Generate() final {
+    __ xorps(result_, result_);
+    __ divss(result_, result_);
+  }
+
+ private:
+  XMMRegister const result_;
+};
+
+class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
+ public:
+  OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
+      : OutOfLineCode(gen), result_(result) {}
+
+  void Generate() final {
+    __ xorpd(result_, result_);
+    __ divsd(result_, result_);
+  }
+
+ private:
+  XMMRegister const result_;
+};
+
+class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
+ public:
+  OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
+                             XMMRegister input, StubCallMode stub_mode)
+      : OutOfLineCode(gen),
+        result_(result),
+        input_(input),
+        stub_mode_(stub_mode),
+        isolate_(gen->isolate()),
+        zone_(gen->zone()) {}
+
+  void Generate() final {
+    __ AllocateStackSpace(kDoubleSize);
+    __ movsd(MemOperand(esp, 0), input_);
+    if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
+      // A direct call to a wasm runtime stub defined in this module.
+      // Just encode the stub index. This will be patched when the code
+      // is added to the native module and copied into wasm code space.
+      __ wasm_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
+    } else if (tasm()->options().inline_offheap_trampolines) {
+      __ CallBuiltin(Builtins::kDoubleToI);
+    } else {
+      __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
+    }
+    __ mov(result_, MemOperand(esp, 0));
+    __ add(esp, Immediate(kDoubleSize));
+  }
+
+ private:
+  Register const result_;
+  XMMRegister const input_;
+  StubCallMode stub_mode_;
+  Isolate* isolate_;
+  Zone* zone_;
+};
+
+class OutOfLineRecordWrite final : public OutOfLineCode {
+ public:
+  OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
+                       Register value, Register scratch0, Register scratch1,
+                       RecordWriteMode mode, StubCallMode stub_mode)
+      : OutOfLineCode(gen),
+        object_(object),
+        operand_(operand),
+        value_(value),
+        scratch0_(scratch0),
+        scratch1_(scratch1),
+        mode_(mode),
+        stub_mode_(stub_mode),
+        zone_(gen->zone()) {}
+
+  void Generate() final {
+    if (mode_ > RecordWriteMode::kValueIsPointer) {
+      __ JumpIfSmi(value_, exit());
+    }
+    __ CheckPageFlag(value_, scratch0_,
+                     MemoryChunk::kPointersToHereAreInterestingMask, zero,
+                     exit());
+    __ lea(scratch1_, operand_);
+    RememberedSetAction const remembered_set_action =
+        mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
+                                             : OMIT_REMEMBERED_SET;
+    SaveFPRegsMode const save_fp_mode =
+        frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
+    if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
+      __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
+    } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
+      // A direct call to a wasm runtime stub defined in this module.
+      // Just encode the stub index. This will be patched when the code
+      // is added to the native module and copied into wasm code space.
+      __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
+                             save_fp_mode, wasm::WasmCode::kRecordWrite);
+    } else {
+      __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
+                             save_fp_mode);
+    }
+  }
+
+ private:
+  Register const object_;
+  Operand const operand_;
+  Register const value_;
+  Register const scratch0_;
+  Register const scratch1_;
+  RecordWriteMode const mode_;
+  StubCallMode const stub_mode_;
+  Zone* zone_;
+};
+
+}  // namespace
+
+#define ASSEMBLE_COMPARE(asm_instr)                              \
+  do {                                                           \
+    if (HasAddressingMode(instr)) {                              \
+      size_t index = 0;                                          \
+      Operand left = i.MemoryOperand(&index);                    \
+      if (HasImmediateInput(instr, index)) {                     \
+        __ asm_instr(left, i.InputImmediate(index));             \
+      } else {                                                   \
+        __ asm_instr(left, i.InputRegister(index));              \
+      }                                                          \
+    } else {                                                     \
+      if (HasImmediateInput(instr, 1)) {                         \
+        if (HasRegisterInput(instr, 0)) {                        \
+          __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
+        } else {                                                 \
+          __ asm_instr(i.InputOperand(0), i.InputImmediate(1));  \
+        }                                                        \
+      } else {                                                   \
+        if (HasRegisterInput(instr, 1)) {                        \
+          __ asm_instr(i.InputRegister(0), i.InputRegister(1));  \
+        } else {                                                 \
+          __ asm_instr(i.InputRegister(0), i.InputOperand(1));   \
+        }                                                        \
+      }                                                          \
+    }                                                            \
+  } while (0)
+
+#define ASSEMBLE_IEEE754_BINOP(name)                                     \
+  do {                                                                   \
+    /* Pass two doubles as arguments on the stack. */                    \
+    __ PrepareCallCFunction(4, eax);                                     \
+    __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0));   \
+    __ movsd(Operand(esp, 1 * kDoubleSize), i.InputDoubleRegister(1));   \
+    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 4); \
+    /* Return value is in st(0) on ia32. */                              \
+    /* Store it into the result register. */                             \
+    __ AllocateStackSpace(kDoubleSize);                                  \
+    __ fstp_d(Operand(esp, 0));                                          \
+    __ movsd(i.OutputDoubleRegister(), Operand(esp, 0));                 \
+    __ add(esp, Immediate(kDoubleSize));                                 \
+  } while (false)
+
+#define ASSEMBLE_IEEE754_UNOP(name)                                      \
+  do {                                                                   \
+    /* Pass one double as argument on the stack. */                      \
+    __ PrepareCallCFunction(2, eax);                                     \
+    __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0));   \
+    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
+    /* Return value is in st(0) on ia32. */                              \
+    /* Store it into the result register. */                             \
+    __ AllocateStackSpace(kDoubleSize);                                  \
+    __ fstp_d(Operand(esp, 0));                                          \
+    __ movsd(i.OutputDoubleRegister(), Operand(esp, 0));                 \
+    __ add(esp, Immediate(kDoubleSize));                                 \
+  } while (false)
+
+#define ASSEMBLE_BINOP(asm_instr)                             \
+  do {                                                        \
+    if (HasAddressingMode(instr)) {                           \
+      size_t index = 1;                                       \
+      Operand right = i.MemoryOperand(&index);                \
+      __ asm_instr(i.InputRegister(0), right);                \
+    } else {                                                  \
+      if (HasImmediateInput(instr, 1)) {                      \
+        __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
+      } else {                                                \
+        __ asm_instr(i.InputRegister(0), i.InputOperand(1));  \
+      }                                                       \
+    }                                                         \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
+  do {                                                          \
+    Label binop;                                                \
+    __ bind(&binop);                                            \
+    __ mov_inst(eax, i.MemoryOperand(1));                       \
+    __ Move(i.TempRegister(0), eax);                            \
+    __ bin_inst(i.TempRegister(0), i.InputRegister(0));         \
+    __ lock();                                                  \
+    __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));     \
+    __ j(not_equal, &binop);                                    \
+  } while (false)
+
+#define ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2)                \
+  do {                                                          \
+    Label binop;                                                \
+    __ bind(&binop);                                            \
+    __ mov(eax, i.MemoryOperand(2));                            \
+    __ mov(edx, i.NextMemoryOperand(2));                        \
+    __ push(ebx);                                               \
+    frame_access_state()->IncreaseSPDelta(1);                   \
+    i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0)); \
+    __ push(i.InputRegister(1));                                \
+    __ instr1(ebx, eax);                                        \
+    __ instr2(i.InputRegister(1), edx);                         \
+    __ lock();                                                  \
+    __ cmpxchg8b(i.MemoryOperand(2));                           \
+    __ pop(i.InputRegister(1));                                 \
+    __ pop(ebx);                                                \
+    frame_access_state()->IncreaseSPDelta(-1);                  \
+    __ j(not_equal, &binop);                                    \
+  } while (false);
+
+#define ASSEMBLE_MOVX(mov_instr)                            \
+  do {                                                      \
+    if (HasAddressingMode(instr)) {                         \
+      __ mov_instr(i.OutputRegister(), i.MemoryOperand());  \
+    } else if (HasRegisterInput(instr, 0)) {                \
+      __ mov_instr(i.OutputRegister(), i.InputRegister(0)); \
+    } else {                                                \
+      __ mov_instr(i.OutputRegister(), i.InputOperand(0));  \
+    }                                                       \
+  } while (0)
+
+#define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode)                         \
+  do {                                                               \
+    XMMRegister src0 = i.InputSimd128Register(0);                    \
+    Operand src1 = i.InputOperand(instr->InputCount() == 2 ? 1 : 0); \
+    if (CpuFeatures::IsSupported(AVX)) {                             \
+      CpuFeatureScope avx_scope(tasm(), AVX);                        \
+      __ v##opcode(i.OutputSimd128Register(), src0, src1);           \
+    } else {                                                         \
+      DCHECK_EQ(i.OutputSimd128Register(), src0);                    \
+      __ opcode(i.OutputSimd128Register(), src1);                    \
+    }                                                                \
+  } while (false)
+
+#define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm)               \
+  if (CpuFeatures::IsSupported(AVX)) {                                 \
+    CpuFeatureScope avx_scope(tasm(), AVX);                            \
+    __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \
+                 i.InputOperand(1), imm);                              \
+  } else {                                                             \
+    CpuFeatureScope sse_scope(tasm(), SSELevel);                       \
+    DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));   \
+    __ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm);      \
+  }
+
+#define ASSEMBLE_SIMD_ALL_TRUE(opcode)               \
+  do {                                               \
+    Register dst = i.OutputRegister();               \
+    Operand src = i.InputOperand(0);                 \
+    Register tmp = i.TempRegister(0);                \
+    XMMRegister tmp_simd = i.TempSimd128Register(1); \
+    __ mov(tmp, Immediate(1));                       \
+    __ xor_(dst, dst);                               \
+    __ Pxor(tmp_simd, tmp_simd);                     \
+    __ opcode(tmp_simd, src);                        \
+    __ Ptest(tmp_simd, tmp_simd);                    \
+    __ cmov(zero, dst, tmp);                         \
+  } while (false)
+
+#define ASSEMBLE_SIMD_SHIFT(opcode, width)             \
+  do {                                                 \
+    XMMRegister dst = i.OutputSimd128Register();       \
+    DCHECK_EQ(dst, i.InputSimd128Register(0));         \
+    if (HasImmediateInput(instr, 1)) {                 \
+      __ opcode(dst, dst, byte{i.InputInt##width(1)}); \
+    } else {                                           \
+      XMMRegister tmp = i.TempSimd128Register(0);      \
+      Register tmp_shift = i.TempRegister(1);          \
+      constexpr int mask = (1 << width) - 1;           \
+      __ mov(tmp_shift, i.InputRegister(1));           \
+      __ and_(tmp_shift, Immediate(mask));             \
+      __ Movd(tmp, tmp_shift);                         \
+      __ opcode(dst, dst, tmp);                        \
+    }                                                  \
+  } while (false)
+
+void CodeGenerator::AssembleDeconstructFrame() {
+  __ mov(esp, ebp);
+  __ pop(ebp);
+}
+
+void CodeGenerator::AssemblePrepareTailCall() {
+  if (frame_access_state()->has_frame()) {
+    __ mov(ebp, MemOperand(ebp, 0));
+  }
+  frame_access_state()->SetFrameAccessToSP();
+}
+
+void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
+                                                     Register, Register,
+                                                     Register) {
+  // There are not enough temp registers left on ia32 for a call instruction
+  // so we pick some scratch registers and save/restore them manually here.
+  int scratch_count = 3;
+  Register scratch1 = esi;
+  Register scratch2 = ecx;
+  Register scratch3 = edx;
+  DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
+  Label done;
+
+  // Check if current frame is an arguments adaptor frame.
+  __ cmp(Operand(ebp, StandardFrameConstants::kContextOffset),
+         Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
+  __ j(not_equal, &done, Label::kNear);
+
+  __ push(scratch1);
+  __ push(scratch2);
+  __ push(scratch3);
+
+  // Load arguments count from current arguments adaptor frame (note, it
+  // does not include receiver).
+  Register caller_args_count_reg = scratch1;
+  __ mov(caller_args_count_reg,
+         Operand(ebp, ArgumentsAdaptorFrameConstants::kLengthOffset));
+  __ SmiUntag(caller_args_count_reg);
+
+  __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3,
+                        scratch_count);
+  __ pop(scratch3);
+  __ pop(scratch2);
+  __ pop(scratch1);
+
+  __ bind(&done);
+}
+
+namespace {
+
+void AdjustStackPointerForTailCall(TurboAssembler* tasm,
+                                   FrameAccessState* state,
+                                   int new_slot_above_sp,
+                                   bool allow_shrinkage = true) {
+  int current_sp_offset = state->GetSPToFPSlotCount() +
+                          StandardFrameConstants::kFixedSlotCountAboveFp;
+  int stack_slot_delta = new_slot_above_sp - current_sp_offset;
+  if (stack_slot_delta > 0) {
+    tasm->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
+    state->IncreaseSPDelta(stack_slot_delta);
+  } else if (allow_shrinkage && stack_slot_delta < 0) {
+    tasm->add(esp, Immediate(-stack_slot_delta * kSystemPointerSize));
+    state->IncreaseSPDelta(stack_slot_delta);
+  }
+}
+
+#ifdef DEBUG
+bool VerifyOutputOfAtomicPairInstr(IA32OperandConverter* converter,
+                                   const Instruction* instr) {
+  if (instr->OutputCount() == 2) {
+    return (converter->OutputRegister(0) == eax &&
+            converter->OutputRegister(1) == edx);
+  }
+  if (instr->OutputCount() == 1) {
+    return (converter->OutputRegister(0) == eax &&
+            converter->TempRegister(0) == edx) ||
+           (converter->OutputRegister(0) == edx &&
+            converter->TempRegister(0) == eax);
+  }
+  DCHECK_EQ(instr->OutputCount(), 0);
+  return (converter->TempRegister(0) == eax &&
+          converter->TempRegister(1) == edx);
+}
+#endif
+
+}  // namespace
+
+void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
+                                              int first_unused_stack_slot) {
+  CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
+  ZoneVector<MoveOperands*> pushes(zone());
+  GetPushCompatibleMoves(instr, flags, &pushes);
+
+  if (!pushes.empty() &&
+      (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
+       first_unused_stack_slot)) {
+    IA32OperandConverter g(this, instr);
+    for (auto move : pushes) {
+      LocationOperand destination_location(
+          LocationOperand::cast(move->destination()));
+      InstructionOperand source(move->source());
+      AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+                                    destination_location.index());
+      if (source.IsStackSlot()) {
+        LocationOperand source_location(LocationOperand::cast(source));
+        __ push(g.SlotToOperand(source_location.index()));
+      } else if (source.IsRegister()) {
+        LocationOperand source_location(LocationOperand::cast(source));
+        __ push(source_location.GetRegister());
+      } else if (source.IsImmediate()) {
+        __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
+      } else {
+        // Pushes of non-scalar data types is not supported.
+        UNIMPLEMENTED();
+      }
+      frame_access_state()->IncreaseSPDelta(1);
+      move->Eliminate();
+    }
+  }
+  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+                                first_unused_stack_slot, false);
+}
+
+void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
+                                             int first_unused_stack_slot) {
+  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+                                first_unused_stack_slot);
+}
+
+// Check that {kJavaScriptCallCodeStartRegister} is correct.
+void CodeGenerator::AssembleCodeStartRegisterCheck() {
+  __ push(eax);  // Push eax so we can use it as a scratch register.
+  __ ComputeCodeStartAddress(eax);
+  __ cmp(eax, kJavaScriptCallCodeStartRegister);
+  __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
+  __ pop(eax);  // Restore eax.
+}
+
+// Check if the code object is marked for deoptimization. If it is, then it
+// jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
+// to:
+//    1. read from memory the word that contains that bit, which can be found in
+//       the flags in the referenced {CodeDataContainer} object;
+//    2. test kMarkedForDeoptimizationBit in those flags; and
+//    3. if it is not zero then it jumps to the builtin.
+void CodeGenerator::BailoutIfDeoptimized() {
+  int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
+  __ push(eax);  // Push eax so we can use it as a scratch register.
+  __ mov(eax, Operand(kJavaScriptCallCodeStartRegister, offset));
+  __ test(FieldOperand(eax, CodeDataContainer::kKindSpecificFlagsOffset),
+          Immediate(1 << Code::kMarkedForDeoptimizationBit));
+  __ pop(eax);  // Restore eax.
+
+  Label skip;
+  __ j(zero, &skip, Label::kNear);
+  __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
+          RelocInfo::CODE_TARGET);
+  __ bind(&skip);
+}
+
+void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
+  // TODO(860429): Remove remaining poisoning infrastructure on ia32.
+  UNREACHABLE();
+}
+
+void CodeGenerator::AssembleRegisterArgumentPoisoning() {
+  // TODO(860429): Remove remaining poisoning infrastructure on ia32.
+  UNREACHABLE();
+}
+
+// Assembles an instruction after register allocation, producing machine code.
+CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
+    Instruction* instr) {
+  IA32OperandConverter i(this, instr);
+  InstructionCode opcode = instr->opcode();
+  ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
+  switch (arch_opcode) {
+    case kArchCallCodeObject: {
+      InstructionOperand* op = instr->InputAt(0);
+      if (op->IsImmediate()) {
+        Handle<Code> code = i.InputCode(0);
+        __ Call(code, RelocInfo::CODE_TARGET);
+      } else {
+        Register reg = i.InputRegister(0);
+        DCHECK_IMPLIES(
+            instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+            reg == kJavaScriptCallCodeStartRegister);
+        __ LoadCodeObjectEntry(reg, reg);
+        if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
+          __ RetpolineCall(reg);
+        } else {
+          __ call(reg);
+        }
+      }
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchCallBuiltinPointer: {
+      DCHECK(!HasImmediateInput(instr, 0));
+      Register builtin_index = i.InputRegister(0);
+      __ CallBuiltinByIndex(builtin_index);
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchCallWasmFunction: {
+      if (HasImmediateInput(instr, 0)) {
+        Constant constant = i.ToConstant(instr->InputAt(0));
+        Address wasm_code = static_cast<Address>(constant.ToInt32());
+        if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
+          __ wasm_call(wasm_code, constant.rmode());
+        } else {
+          if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
+            __ RetpolineCall(wasm_code, constant.rmode());
+          } else {
+            __ call(wasm_code, constant.rmode());
+          }
+        }
+      } else {
+        Register reg = i.InputRegister(0);
+        if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
+          __ RetpolineCall(reg);
+        } else {
+          __ call(reg);
+        }
+      }
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchTailCallCodeObjectFromJSFunction:
+    case kArchTailCallCodeObject: {
+      if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
+        AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
+                                         no_reg, no_reg, no_reg);
+      }
+      if (HasImmediateInput(instr, 0)) {
+        Handle<Code> code = i.InputCode(0);
+        __ Jump(code, RelocInfo::CODE_TARGET);
+      } else {
+        Register reg = i.InputRegister(0);
+        DCHECK_IMPLIES(
+            instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+            reg == kJavaScriptCallCodeStartRegister);
+        __ LoadCodeObjectEntry(reg, reg);
+        if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
+          __ RetpolineJump(reg);
+        } else {
+          __ jmp(reg);
+        }
+      }
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchTailCallWasm: {
+      if (HasImmediateInput(instr, 0)) {
+        Constant constant = i.ToConstant(instr->InputAt(0));
+        Address wasm_code = static_cast<Address>(constant.ToInt32());
+        __ jmp(wasm_code, constant.rmode());
+      } else {
+        Register reg = i.InputRegister(0);
+        if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
+          __ RetpolineJump(reg);
+        } else {
+          __ jmp(reg);
+        }
+      }
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchTailCallAddress: {
+      CHECK(!HasImmediateInput(instr, 0));
+      Register reg = i.InputRegister(0);
+      DCHECK_IMPLIES(
+          instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+          reg == kJavaScriptCallCodeStartRegister);
+      if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
+        __ RetpolineJump(reg);
+      } else {
+        __ jmp(reg);
+      }
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchCallJSFunction: {
+      Register func = i.InputRegister(0);
+      if (FLAG_debug_code) {
+        // Check the function's context matches the context argument.
+        __ cmp(esi, FieldOperand(func, JSFunction::kContextOffset));
+        __ Assert(equal, AbortReason::kWrongFunctionContext);
+      }
+      static_assert(kJavaScriptCallCodeStartRegister == ecx, "ABI mismatch");
+      __ mov(ecx, FieldOperand(func, JSFunction::kCodeOffset));
+      __ CallCodeObject(ecx);
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchPrepareCallCFunction: {
+      // Frame alignment requires using FP-relative frame addressing.
+      frame_access_state()->SetFrameAccessToFP();
+      int const num_parameters = MiscField::decode(instr->opcode());
+      __ PrepareCallCFunction(num_parameters, i.TempRegister(0));
+      break;
+    }
+    case kArchSaveCallerRegisters: {
+      fp_mode_ =
+          static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
+      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+      // kReturnRegister0 should have been saved before entering the stub.
+      int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
+      DCHECK(IsAligned(bytes, kSystemPointerSize));
+      DCHECK_EQ(0, frame_access_state()->sp_delta());
+      frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+      DCHECK(!caller_registers_saved_);
+      caller_registers_saved_ = true;
+      break;
+    }
+    case kArchRestoreCallerRegisters: {
+      DCHECK(fp_mode_ ==
+             static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
+      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+      // Don't overwrite the returned value.
+      int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
+      frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
+      DCHECK_EQ(0, frame_access_state()->sp_delta());
+      DCHECK(caller_registers_saved_);
+      caller_registers_saved_ = false;
+      break;
+    }
+    case kArchPrepareTailCall:
+      AssemblePrepareTailCall();
+      break;
+    case kArchCallCFunction: {
+      int const num_parameters = MiscField::decode(instr->opcode());
+      Label return_location;
+      if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
+        // Put the return address in a stack slot.
+        Register scratch = eax;
+        __ push(scratch);
+        __ PushPC();
+        int pc = __ pc_offset();
+        __ pop(scratch);
+        __ sub(scratch, Immediate(pc + Code::kHeaderSize - kHeapObjectTag));
+        __ add(scratch, Immediate::CodeRelativeOffset(&return_location));
+        __ mov(MemOperand(ebp, WasmExitFrameConstants::kCallingPCOffset),
+               scratch);
+        __ pop(scratch);
+      }
+      if (HasImmediateInput(instr, 0)) {
+        ExternalReference ref = i.InputExternalReference(0);
+        __ CallCFunction(ref, num_parameters);
+      } else {
+        Register func = i.InputRegister(0);
+        __ CallCFunction(func, num_parameters);
+      }
+      __ bind(&return_location);
+      if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
+        RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
+      }
+      frame_access_state()->SetFrameAccessToDefault();
+      // Ideally, we should decrement SP delta to match the change of stack
+      // pointer in CallCFunction. However, for certain architectures (e.g.
+      // ARM), there may be more strict alignment requirement, causing old SP
+      // to be saved on the stack. In those cases, we can not calculate the SP
+      // delta statically.
+      frame_access_state()->ClearSPDelta();
+      if (caller_registers_saved_) {
+        // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
+        // Here, we assume the sequence to be:
+        //   kArchSaveCallerRegisters;
+        //   kArchCallCFunction;
+        //   kArchRestoreCallerRegisters;
+        int bytes =
+            __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
+        frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+      }
+      break;
+    }
+    case kArchJmp:
+      AssembleArchJump(i.InputRpo(0));
+      break;
+    case kArchBinarySearchSwitch:
+      AssembleArchBinarySearchSwitch(instr);
+      break;
+    case kArchTableSwitch:
+      AssembleArchTableSwitch(instr);
+      break;
+    case kArchComment:
+      __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
+      break;
+    case kArchAbortCSAAssert:
+      DCHECK(i.InputRegister(0) == edx);
+      {
+        // We don't actually want to generate a pile of code for this, so just
+        // claim there is a stack frame, without generating one.
+        FrameScope scope(tasm(), StackFrame::NONE);
+        __ Call(
+            isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
+            RelocInfo::CODE_TARGET);
+      }
+      __ int3();
+      break;
+    case kArchDebugBreak:
+      __ DebugBreak();
+      break;
+    case kArchNop:
+    case kArchThrowTerminator:
+      // don't emit code for nops.
+      break;
+    case kArchDeoptimize: {
+      DeoptimizationExit* exit =
+          BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
+      __ jmp(exit->label());
+      break;
+    }
+    case kArchRet:
+      AssembleReturn(instr->InputAt(0));
+      break;
+    case kArchFramePointer:
+      __ mov(i.OutputRegister(), ebp);
+      break;
+    case kArchParentFramePointer:
+      if (frame_access_state()->has_frame()) {
+        __ mov(i.OutputRegister(), Operand(ebp, 0));
+      } else {
+        __ mov(i.OutputRegister(), ebp);
+      }
+      break;
+    case kArchStackPointerGreaterThan: {
+      // Potentially apply an offset to the current stack pointer before the
+      // comparison to consider the size difference of an optimized frame versus
+      // the contained unoptimized frames.
+      Register lhs_register = esp;
+      uint32_t offset;
+
+      if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
+        lhs_register = i.TempRegister(0);
+        __ lea(lhs_register, Operand(esp, -1 * static_cast<int32_t>(offset)));
+      }
+
+      constexpr size_t kValueIndex = 0;
+      if (HasAddressingMode(instr)) {
+        __ cmp(lhs_register, i.MemoryOperand(kValueIndex));
+      } else {
+        __ cmp(lhs_register, i.InputRegister(kValueIndex));
+      }
+      break;
+    }
+    case kArchStackCheckOffset:
+      __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
+      break;
+    case kArchTruncateDoubleToI: {
+      auto result = i.OutputRegister();
+      auto input = i.InputDoubleRegister(0);
+      auto ool = zone()->New<OutOfLineTruncateDoubleToI>(
+          this, result, input, DetermineStubCallMode());
+      __ cvttsd2si(result, Operand(input));
+      __ cmp(result, 1);
+      __ j(overflow, ool->entry());
+      __ bind(ool->exit());
+      break;
+    }
+    case kArchStoreWithWriteBarrier: {
+      RecordWriteMode mode =
+          static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
+      Register object = i.InputRegister(0);
+      size_t index = 0;
+      Operand operand = i.MemoryOperand(&index);
+      Register value = i.InputRegister(index);
+      Register scratch0 = i.TempRegister(0);
+      Register scratch1 = i.TempRegister(1);
+      auto ool = zone()->New<OutOfLineRecordWrite>(this, object, operand, value,
+                                                   scratch0, scratch1, mode,
+                                                   DetermineStubCallMode());
+      __ mov(operand, value);
+      __ CheckPageFlag(object, scratch0,
+                       MemoryChunk::kPointersFromHereAreInterestingMask,
+                       not_zero, ool->entry());
+      __ bind(ool->exit());
+      break;
+    }
+    case kArchStackSlot: {
+      FrameOffset offset =
+          frame_access_state()->GetFrameOffset(i.InputInt32(0));
+      Register base = offset.from_stack_pointer() ? esp : ebp;
+      __ lea(i.OutputRegister(), Operand(base, offset.offset()));
+      break;
+    }
+    case kIeee754Float64Acos:
+      ASSEMBLE_IEEE754_UNOP(acos);
+      break;
+    case kIeee754Float64Acosh:
+      ASSEMBLE_IEEE754_UNOP(acosh);
+      break;
+    case kIeee754Float64Asin:
+      ASSEMBLE_IEEE754_UNOP(asin);
+      break;
+    case kIeee754Float64Asinh:
+      ASSEMBLE_IEEE754_UNOP(asinh);
+      break;
+    case kIeee754Float64Atan:
+      ASSEMBLE_IEEE754_UNOP(atan);
+      break;
+    case kIeee754Float64Atanh:
+      ASSEMBLE_IEEE754_UNOP(atanh);
+      break;
+    case kIeee754Float64Atan2:
+      ASSEMBLE_IEEE754_BINOP(atan2);
+      break;
+    case kIeee754Float64Cbrt:
+      ASSEMBLE_IEEE754_UNOP(cbrt);
+      break;
+    case kIeee754Float64Cos:
+      ASSEMBLE_IEEE754_UNOP(cos);
+      break;
+    case kIeee754Float64Cosh:
+      ASSEMBLE_IEEE754_UNOP(cosh);
+      break;
+    case kIeee754Float64Expm1:
+      ASSEMBLE_IEEE754_UNOP(expm1);
+      break;
+    case kIeee754Float64Exp:
+      ASSEMBLE_IEEE754_UNOP(exp);
+      break;
+    case kIeee754Float64Log:
+      ASSEMBLE_IEEE754_UNOP(log);
+      break;
+    case kIeee754Float64Log1p:
+      ASSEMBLE_IEEE754_UNOP(log1p);
+      break;
+    case kIeee754Float64Log2:
+      ASSEMBLE_IEEE754_UNOP(log2);
+      break;
+    case kIeee754Float64Log10:
+      ASSEMBLE_IEEE754_UNOP(log10);
+      break;
+    case kIeee754Float64Pow:
+      ASSEMBLE_IEEE754_BINOP(pow);
+      break;
+    case kIeee754Float64Sin:
+      ASSEMBLE_IEEE754_UNOP(sin);
+      break;
+    case kIeee754Float64Sinh:
+      ASSEMBLE_IEEE754_UNOP(sinh);
+      break;
+    case kIeee754Float64Tan:
+      ASSEMBLE_IEEE754_UNOP(tan);
+      break;
+    case kIeee754Float64Tanh:
+      ASSEMBLE_IEEE754_UNOP(tanh);
+      break;
+    case kIA32Add:
+      ASSEMBLE_BINOP(add);
+      break;
+    case kIA32And:
+      ASSEMBLE_BINOP(and_);
+      break;
+    case kIA32Cmp:
+      ASSEMBLE_COMPARE(cmp);
+      break;
+    case kIA32Cmp16:
+      ASSEMBLE_COMPARE(cmpw);
+      break;
+    case kIA32Cmp8:
+      ASSEMBLE_COMPARE(cmpb);
+      break;
+    case kIA32Test:
+      ASSEMBLE_COMPARE(test);
+      break;
+    case kIA32Test16:
+      ASSEMBLE_COMPARE(test_w);
+      break;
+    case kIA32Test8:
+      ASSEMBLE_COMPARE(test_b);
+      break;
+    case kIA32Imul:
+      if (HasImmediateInput(instr, 1)) {
+        __ imul(i.OutputRegister(), i.InputOperand(0), i.InputInt32(1));
+      } else {
+        __ imul(i.OutputRegister(), i.InputOperand(1));
+      }
+      break;
+    case kIA32ImulHigh:
+      __ imul(i.InputRegister(1));
+      break;
+    case kIA32UmulHigh:
+      __ mul(i.InputRegister(1));
+      break;
+    case kIA32Idiv:
+      __ cdq();
+      __ idiv(i.InputOperand(1));
+      break;
+    case kIA32Udiv:
+      __ Move(edx, Immediate(0));
+      __ div(i.InputOperand(1));
+      break;
+    case kIA32Not:
+      __ not_(i.OutputOperand());
+      break;
+    case kIA32Neg:
+      __ neg(i.OutputOperand());
+      break;
+    case kIA32Or:
+      ASSEMBLE_BINOP(or_);
+      break;
+    case kIA32Xor:
+      ASSEMBLE_BINOP(xor_);
+      break;
+    case kIA32Sub:
+      ASSEMBLE_BINOP(sub);
+      break;
+    case kIA32Shl:
+      if (HasImmediateInput(instr, 1)) {
+        __ shl(i.OutputOperand(), i.InputInt5(1));
+      } else {
+        __ shl_cl(i.OutputOperand());
+      }
+      break;
+    case kIA32Shr:
+      if (HasImmediateInput(instr, 1)) {
+        __ shr(i.OutputOperand(), i.InputInt5(1));
+      } else {
+        __ shr_cl(i.OutputOperand());
+      }
+      break;
+    case kIA32Sar:
+      if (HasImmediateInput(instr, 1)) {
+        __ sar(i.OutputOperand(), i.InputInt5(1));
+      } else {
+        __ sar_cl(i.OutputOperand());
+      }
+      break;
+    case kIA32AddPair: {
+      // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
+      // i.InputRegister(1) ... left high word.
+      // i.InputRegister(2) ... right low word.
+      // i.InputRegister(3) ... right high word.
+      bool use_temp = false;
+      if ((HasRegisterInput(instr, 1) &&
+           i.OutputRegister(0).code() == i.InputRegister(1).code()) ||
+          i.OutputRegister(0).code() == i.InputRegister(3).code()) {
+        // We cannot write to the output register directly, because it would
+        // overwrite an input for adc. We have to use the temp register.
+        use_temp = true;
+        __ Move(i.TempRegister(0), i.InputRegister(0));
+        __ add(i.TempRegister(0), i.InputRegister(2));
+      } else {
+        __ add(i.OutputRegister(0), i.InputRegister(2));
+      }
+      i.MoveInstructionOperandToRegister(i.OutputRegister(1),
+                                         instr->InputAt(1));
+      __ adc(i.OutputRegister(1), Operand(i.InputRegister(3)));
+      if (use_temp) {
+        __ Move(i.OutputRegister(0), i.TempRegister(0));
+      }
+      break;
+    }
+    case kIA32SubPair: {
+      // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
+      // i.InputRegister(1) ... left high word.
+      // i.InputRegister(2) ... right low word.
+      // i.InputRegister(3) ... right high word.
+      bool use_temp = false;
+      if ((HasRegisterInput(instr, 1) &&
+           i.OutputRegister(0).code() == i.InputRegister(1).code()) ||
+          i.OutputRegister(0).code() == i.InputRegister(3).code()) {
+        // We cannot write to the output register directly, because it would
+        // overwrite an input for adc. We have to use the temp register.
+        use_temp = true;
+        __ Move(i.TempRegister(0), i.InputRegister(0));
+        __ sub(i.TempRegister(0), i.InputRegister(2));
+      } else {
+        __ sub(i.OutputRegister(0), i.InputRegister(2));
+      }
+      i.MoveInstructionOperandToRegister(i.OutputRegister(1),
+                                         instr->InputAt(1));
+      __ sbb(i.OutputRegister(1), Operand(i.InputRegister(3)));
+      if (use_temp) {
+        __ Move(i.OutputRegister(0), i.TempRegister(0));
+      }
+      break;
+    }
+    case kIA32MulPair: {
+      __ imul(i.OutputRegister(1), i.InputOperand(0));
+      i.MoveInstructionOperandToRegister(i.TempRegister(0), instr->InputAt(1));
+      __ imul(i.TempRegister(0), i.InputOperand(2));
+      __ add(i.OutputRegister(1), i.TempRegister(0));
+      __ mov(i.OutputRegister(0), i.InputOperand(0));
+      // Multiplies the low words and stores them in eax and edx.
+      __ mul(i.InputRegister(2));
+      __ add(i.OutputRegister(1), i.TempRegister(0));
+
+      break;
+    }
+    case kIA32ShlPair:
+      if (HasImmediateInput(instr, 2)) {
+        __ ShlPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
+      } else {
+        // Shift has been loaded into CL by the register allocator.
+        __ ShlPair_cl(i.InputRegister(1), i.InputRegister(0));
+      }
+      break;
+    case kIA32ShrPair:
+      if (HasImmediateInput(instr, 2)) {
+        __ ShrPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
+      } else {
+        // Shift has been loaded into CL by the register allocator.
+        __ ShrPair_cl(i.InputRegister(1), i.InputRegister(0));
+      }
+      break;
+    case kIA32SarPair:
+      if (HasImmediateInput(instr, 2)) {
+        __ SarPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
+      } else {
+        // Shift has been loaded into CL by the register allocator.
+        __ SarPair_cl(i.InputRegister(1), i.InputRegister(0));
+      }
+      break;
+    case kIA32Rol:
+      if (HasImmediateInput(instr, 1)) {
+        __ rol(i.OutputOperand(), i.InputInt5(1));
+      } else {
+        __ rol_cl(i.OutputOperand());
+      }
+      break;
+    case kIA32Ror:
+      if (HasImmediateInput(instr, 1)) {
+        __ ror(i.OutputOperand(), i.InputInt5(1));
+      } else {
+        __ ror_cl(i.OutputOperand());
+      }
+      break;
+    case kIA32Lzcnt:
+      __ Lzcnt(i.OutputRegister(), i.InputOperand(0));
+      break;
+    case kIA32Tzcnt:
+      __ Tzcnt(i.OutputRegister(), i.InputOperand(0));
+      break;
+    case kIA32Popcnt:
+      __ Popcnt(i.OutputRegister(), i.InputOperand(0));
+      break;
+    case kIA32Bswap:
+      __ bswap(i.OutputRegister());
+      break;
+    case kArchWordPoisonOnSpeculation:
+      // TODO(860429): Remove remaining poisoning infrastructure on ia32.
+      UNREACHABLE();
+    case kIA32MFence:
+      __ mfence();
+      break;
+    case kIA32LFence:
+      __ lfence();
+      break;
+    case kSSEFloat32Cmp:
+      __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
+      break;
+    case kSSEFloat32Add:
+      __ addss(i.InputDoubleRegister(0), i.InputOperand(1));
+      break;
+    case kSSEFloat32Sub:
+      __ subss(i.InputDoubleRegister(0), i.InputOperand(1));
+      break;
+    case kSSEFloat32Mul:
+      __ mulss(i.InputDoubleRegister(0), i.InputOperand(1));
+      break;
+    case kSSEFloat32Div:
+      __ divss(i.InputDoubleRegister(0), i.InputOperand(1));
+      // Don't delete this mov. It may improve performance on some CPUs,
+      // when there is a (v)mulss depending on the result.
+      __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+      break;
+    case kSSEFloat32Sqrt:
+      __ sqrtss(i.OutputDoubleRegister(), i.InputOperand(0));
+      break;
+    case kSSEFloat32Abs: {
+      // TODO(bmeurer): Use 128-bit constants.
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ pcmpeqd(tmp, tmp);
+      __ psrlq(tmp, 33);
+      __ andps(i.OutputDoubleRegister(), tmp);
+      break;
+    }
+    case kSSEFloat32Neg: {
+      // TODO(bmeurer): Use 128-bit constants.
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ pcmpeqd(tmp, tmp);
+      __ psllq(tmp, 31);
+      __ xorps(i.OutputDoubleRegister(), tmp);
+      break;
+    }
+    case kSSEFloat32Round: {
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      RoundingMode const mode =
+          static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+      __ roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
+      break;
+    }
+    case kSSEFloat64Cmp:
+      __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
+      break;
+    case kSSEFloat64Add:
+      __ addsd(i.InputDoubleRegister(0), i.InputOperand(1));
+      break;
+    case kSSEFloat64Sub:
+      __ subsd(i.InputDoubleRegister(0), i.InputOperand(1));
+      break;
+    case kSSEFloat64Mul:
+      __ mulsd(i.InputDoubleRegister(0), i.InputOperand(1));
+      break;
+    case kSSEFloat64Div:
+      __ divsd(i.InputDoubleRegister(0), i.InputOperand(1));
+      // Don't delete this mov. It may improve performance on some CPUs,
+      // when there is a (v)mulsd depending on the result.
+      __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+      break;
+    case kSSEFloat32Max: {
+      Label compare_swap, done_compare;
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      auto ool =
+          zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
+      __ j(parity_even, ool->entry());
+      __ j(above, &done_compare, Label::kNear);
+      __ j(below, &compare_swap, Label::kNear);
+      __ movmskps(i.TempRegister(0), i.InputDoubleRegister(0));
+      __ test(i.TempRegister(0), Immediate(1));
+      __ j(zero, &done_compare, Label::kNear);
+      __ bind(&compare_swap);
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ movss(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      __ bind(&done_compare);
+      __ bind(ool->exit());
+      break;
+    }
+
+    case kSSEFloat64Max: {
+      Label compare_swap, done_compare;
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      auto ool =
+          zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
+      __ j(parity_even, ool->entry());
+      __ j(above, &done_compare, Label::kNear);
+      __ j(below, &compare_swap, Label::kNear);
+      __ movmskpd(i.TempRegister(0), i.InputDoubleRegister(0));
+      __ test(i.TempRegister(0), Immediate(1));
+      __ j(zero, &done_compare, Label::kNear);
+      __ bind(&compare_swap);
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ movsd(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      __ bind(&done_compare);
+      __ bind(ool->exit());
+      break;
+    }
+    case kSSEFloat32Min: {
+      Label compare_swap, done_compare;
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      auto ool =
+          zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
+      __ j(parity_even, ool->entry());
+      __ j(below, &done_compare, Label::kNear);
+      __ j(above, &compare_swap, Label::kNear);
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ movmskps(i.TempRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ movss(kScratchDoubleReg, i.InputOperand(1));
+        __ movmskps(i.TempRegister(0), kScratchDoubleReg);
+      }
+      __ test(i.TempRegister(0), Immediate(1));
+      __ j(zero, &done_compare, Label::kNear);
+      __ bind(&compare_swap);
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ movss(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      __ bind(&done_compare);
+      __ bind(ool->exit());
+      break;
+    }
+    case kSSEFloat64Min: {
+      Label compare_swap, done_compare;
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      auto ool =
+          zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
+      __ j(parity_even, ool->entry());
+      __ j(below, &done_compare, Label::kNear);
+      __ j(above, &compare_swap, Label::kNear);
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ movmskpd(i.TempRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ movsd(kScratchDoubleReg, i.InputOperand(1));
+        __ movmskpd(i.TempRegister(0), kScratchDoubleReg);
+      }
+      __ test(i.TempRegister(0), Immediate(1));
+      __ j(zero, &done_compare, Label::kNear);
+      __ bind(&compare_swap);
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ movsd(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      __ bind(&done_compare);
+      __ bind(ool->exit());
+      break;
+    }
+    case kSSEFloat64Mod: {
+      Register tmp = i.TempRegister(1);
+      __ mov(tmp, esp);
+      __ AllocateStackSpace(kDoubleSize);
+      __ and_(esp, -8);  // align to 8 byte boundary.
+      // Move values to st(0) and st(1).
+      __ movsd(Operand(esp, 0), i.InputDoubleRegister(1));
+      __ fld_d(Operand(esp, 0));
+      __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
+      __ fld_d(Operand(esp, 0));
+      // Loop while fprem isn't done.
+      Label mod_loop;
+      __ bind(&mod_loop);
+      // This instruction traps on all kinds of inputs, but we are assuming the
+      // floating point control word is set to ignore them all.
+      __ fprem();
+      // fnstsw_ax clobbers eax.
+      DCHECK_EQ(eax, i.TempRegister(0));
+      __ fnstsw_ax();
+      __ sahf();
+      __ j(parity_even, &mod_loop);
+      // Move output to stack and clean up.
+      __ fstp(1);
+      __ fstp_d(Operand(esp, 0));
+      __ movsd(i.OutputDoubleRegister(), Operand(esp, 0));
+      __ mov(esp, tmp);
+      break;
+    }
+    case kSSEFloat64Abs: {
+      // TODO(bmeurer): Use 128-bit constants.
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ pcmpeqd(tmp, tmp);
+      __ psrlq(tmp, 1);
+      __ andpd(i.OutputDoubleRegister(), tmp);
+      break;
+    }
+    case kSSEFloat64Neg: {
+      // TODO(bmeurer): Use 128-bit constants.
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ pcmpeqd(tmp, tmp);
+      __ psllq(tmp, 63);
+      __ xorpd(i.OutputDoubleRegister(), tmp);
+      break;
+    }
+    case kSSEFloat64Sqrt:
+      __ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
+      break;
+    case kSSEFloat64Round: {
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      RoundingMode const mode =
+          static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+      __ roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
+      break;
+    }
+    case kSSEFloat32ToFloat64:
+      __ cvtss2sd(i.OutputDoubleRegister(), i.InputOperand(0));
+      break;
+    case kSSEFloat64ToFloat32:
+      __ cvtsd2ss(i.OutputDoubleRegister(), i.InputOperand(0));
+      break;
+    case kSSEFloat32ToInt32:
+      __ cvttss2si(i.OutputRegister(), i.InputOperand(0));
+      break;
+    case kSSEFloat32ToUint32:
+      __ Cvttss2ui(i.OutputRegister(), i.InputOperand(0),
+                   i.TempSimd128Register(0));
+      break;
+    case kSSEFloat64ToInt32:
+      __ cvttsd2si(i.OutputRegister(), i.InputOperand(0));
+      break;
+    case kSSEFloat64ToUint32:
+      __ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0),
+                   i.TempSimd128Register(0));
+      break;
+    case kSSEInt32ToFloat32:
+      __ cvtsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
+      break;
+    case kSSEUint32ToFloat32:
+      __ Cvtui2ss(i.OutputDoubleRegister(), i.InputOperand(0),
+                  i.TempRegister(0));
+      break;
+    case kSSEInt32ToFloat64:
+      __ cvtsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
+      break;
+    case kSSEUint32ToFloat64:
+      __ Cvtui2sd(i.OutputDoubleRegister(), i.InputOperand(0),
+                  i.TempRegister(0));
+      break;
+    case kSSEFloat64ExtractLowWord32:
+      if (instr->InputAt(0)->IsFPStackSlot()) {
+        __ mov(i.OutputRegister(), i.InputOperand(0));
+      } else {
+        __ movd(i.OutputRegister(), i.InputDoubleRegister(0));
+      }
+      break;
+    case kSSEFloat64ExtractHighWord32:
+      if (instr->InputAt(0)->IsFPStackSlot()) {
+        __ mov(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
+      } else {
+        __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
+      }
+      break;
+    case kSSEFloat64InsertLowWord32:
+      __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
+      break;
+    case kSSEFloat64InsertHighWord32:
+      __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
+      break;
+    case kSSEFloat64LoadLowWord32:
+      __ movd(i.OutputDoubleRegister(), i.InputOperand(0));
+      break;
+    case kAVXFloat32Add: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vaddss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+                i.InputOperand(1));
+      break;
+    }
+    case kAVXFloat32Sub: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vsubss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+                i.InputOperand(1));
+      break;
+    }
+    case kAVXFloat32Mul: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vmulss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+                i.InputOperand(1));
+      break;
+    }
+    case kAVXFloat32Div: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vdivss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+                i.InputOperand(1));
+      // Don't delete this mov. It may improve performance on some CPUs,
+      // when there is a (v)mulss depending on the result.
+      __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+      break;
+    }
+    case kAVXFloat64Add: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vaddsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+                i.InputOperand(1));
+      break;
+    }
+    case kAVXFloat64Sub: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vsubsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+                i.InputOperand(1));
+      break;
+    }
+    case kAVXFloat64Mul: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vmulsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+                i.InputOperand(1));
+      break;
+    }
+    case kAVXFloat64Div: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vdivsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+                i.InputOperand(1));
+      // Don't delete this mov. It may improve performance on some CPUs,
+      // when there is a (v)mulsd depending on the result.
+      __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+      break;
+    }
+    case kAVXFloat32Abs: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ pcmpeqd(tmp, tmp);
+      __ psrlq(tmp, 33);
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vandps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
+      break;
+    }
+    case kAVXFloat32Neg: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ pcmpeqd(tmp, tmp);
+      __ psllq(tmp, 31);
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vxorps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
+      break;
+    }
+    case kAVXFloat64Abs: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ pcmpeqd(tmp, tmp);
+      __ psrlq(tmp, 1);
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vandpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
+      break;
+    }
+    case kAVXFloat64Neg: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ pcmpeqd(tmp, tmp);
+      __ psllq(tmp, 63);
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
+      break;
+    }
+    case kSSEFloat64SilenceNaN:
+      __ xorpd(kScratchDoubleReg, kScratchDoubleReg);
+      __ subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
+      break;
+    case kIA32Movsxbl:
+      ASSEMBLE_MOVX(movsx_b);
+      break;
+    case kIA32Movzxbl:
+      ASSEMBLE_MOVX(movzx_b);
+      break;
+    case kIA32Movb: {
+      size_t index = 0;
+      Operand operand = i.MemoryOperand(&index);
+      if (HasImmediateInput(instr, index)) {
+        __ mov_b(operand, i.InputInt8(index));
+      } else {
+        __ mov_b(operand, i.InputRegister(index));
+      }
+      break;
+    }
+    case kIA32Movsxwl:
+      ASSEMBLE_MOVX(movsx_w);
+      break;
+    case kIA32Movzxwl:
+      ASSEMBLE_MOVX(movzx_w);
+      break;
+    case kIA32Movw: {
+      size_t index = 0;
+      Operand operand = i.MemoryOperand(&index);
+      if (HasImmediateInput(instr, index)) {
+        __ mov_w(operand, i.InputInt16(index));
+      } else {
+        __ mov_w(operand, i.InputRegister(index));
+      }
+      break;
+    }
+    case kIA32Movl:
+      if (instr->HasOutput()) {
+        __ mov(i.OutputRegister(), i.MemoryOperand());
+      } else {
+        size_t index = 0;
+        Operand operand = i.MemoryOperand(&index);
+        if (HasImmediateInput(instr, index)) {
+          __ mov(operand, i.InputImmediate(index));
+        } else {
+          __ mov(operand, i.InputRegister(index));
+        }
+      }
+      break;
+    case kIA32Movsd:
+      if (instr->HasOutput()) {
+        __ movsd(i.OutputDoubleRegister(), i.MemoryOperand());
+      } else {
+        size_t index = 0;
+        Operand operand = i.MemoryOperand(&index);
+        __ movsd(operand, i.InputDoubleRegister(index));
+      }
+      break;
+    case kIA32Movss:
+      if (instr->HasOutput()) {
+        __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
+      } else {
+        size_t index = 0;
+        Operand operand = i.MemoryOperand(&index);
+        __ movss(operand, i.InputDoubleRegister(index));
+      }
+      break;
+    case kIA32Movdqu:
+      if (instr->HasOutput()) {
+        __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
+      } else {
+        size_t index = 0;
+        Operand operand = i.MemoryOperand(&index);
+        __ Movdqu(operand, i.InputSimd128Register(index));
+      }
+      break;
+    case kIA32BitcastFI:
+      if (instr->InputAt(0)->IsFPStackSlot()) {
+        __ mov(i.OutputRegister(), i.InputOperand(0));
+      } else {
+        __ movd(i.OutputRegister(), i.InputDoubleRegister(0));
+      }
+      break;
+    case kIA32BitcastIF:
+      if (HasRegisterInput(instr, 0)) {
+        __ movd(i.OutputDoubleRegister(), i.InputRegister(0));
+      } else {
+        __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
+      }
+      break;
+    case kIA32Lea: {
+      AddressingMode mode = AddressingModeField::decode(instr->opcode());
+      // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
+      // and addressing mode just happens to work out. The "addl"/"subl" forms
+      // in these cases are faster based on measurements.
+      if (mode == kMode_MI) {
+        __ Move(i.OutputRegister(), Immediate(i.InputInt32(0)));
+      } else if (i.InputRegister(0) == i.OutputRegister()) {
+        if (mode == kMode_MRI) {
+          int32_t constant_summand = i.InputInt32(1);
+          if (constant_summand > 0) {
+            __ add(i.OutputRegister(), Immediate(constant_summand));
+          } else if (constant_summand < 0) {
+            __ sub(i.OutputRegister(),
+                   Immediate(base::NegateWithWraparound(constant_summand)));
+          }
+        } else if (mode == kMode_MR1) {
+          if (i.InputRegister(1) == i.OutputRegister()) {
+            __ shl(i.OutputRegister(), 1);
+          } else {
+            __ add(i.OutputRegister(), i.InputRegister(1));
+          }
+        } else if (mode == kMode_M2) {
+          __ shl(i.OutputRegister(), 1);
+        } else if (mode == kMode_M4) {
+          __ shl(i.OutputRegister(), 2);
+        } else if (mode == kMode_M8) {
+          __ shl(i.OutputRegister(), 3);
+        } else {
+          __ lea(i.OutputRegister(), i.MemoryOperand());
+        }
+      } else if (mode == kMode_MR1 &&
+                 i.InputRegister(1) == i.OutputRegister()) {
+        __ add(i.OutputRegister(), i.InputRegister(0));
+      } else {
+        __ lea(i.OutputRegister(), i.MemoryOperand());
+      }
+      break;
+    }
+    case kIA32PushFloat32:
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ AllocateStackSpace(kFloatSize);
+        __ movss(Operand(esp, 0), i.InputDoubleRegister(0));
+        frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize);
+      } else if (HasImmediateInput(instr, 0)) {
+        __ Move(kScratchDoubleReg, i.InputFloat32(0));
+        __ AllocateStackSpace(kFloatSize);
+        __ movss(Operand(esp, 0), kScratchDoubleReg);
+        frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize);
+      } else {
+        __ movss(kScratchDoubleReg, i.InputOperand(0));
+        __ AllocateStackSpace(kFloatSize);
+        __ movss(Operand(esp, 0), kScratchDoubleReg);
+        frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize);
+      }
+      break;
+    case kIA32PushFloat64:
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ AllocateStackSpace(kDoubleSize);
+        __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
+        frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
+      } else if (HasImmediateInput(instr, 0)) {
+        __ Move(kScratchDoubleReg, i.InputDouble(0));
+        __ AllocateStackSpace(kDoubleSize);
+        __ movsd(Operand(esp, 0), kScratchDoubleReg);
+        frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
+      } else {
+        __ movsd(kScratchDoubleReg, i.InputOperand(0));
+        __ AllocateStackSpace(kDoubleSize);
+        __ movsd(Operand(esp, 0), kScratchDoubleReg);
+        frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
+      }
+      break;
+    case kIA32PushSimd128:
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ AllocateStackSpace(kSimd128Size);
+        __ movups(Operand(esp, 0), i.InputSimd128Register(0));
+      } else {
+        __ movups(kScratchDoubleReg, i.InputOperand(0));
+        __ AllocateStackSpace(kSimd128Size);
+        __ movups(Operand(esp, 0), kScratchDoubleReg);
+      }
+      frame_access_state()->IncreaseSPDelta(kSimd128Size / kSystemPointerSize);
+      break;
+    case kIA32Push:
+      if (HasAddressingMode(instr)) {
+        size_t index = 0;
+        Operand operand = i.MemoryOperand(&index);
+        __ push(operand);
+        frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize);
+      } else if (instr->InputAt(0)->IsFPRegister()) {
+        __ AllocateStackSpace(kFloatSize);
+        __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
+        frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize);
+      } else if (HasImmediateInput(instr, 0)) {
+        __ push(i.InputImmediate(0));
+        frame_access_state()->IncreaseSPDelta(1);
+      } else {
+        __ push(i.InputOperand(0));
+        frame_access_state()->IncreaseSPDelta(1);
+      }
+      break;
+    case kIA32Poke: {
+      int slot = MiscField::decode(instr->opcode());
+      if (HasImmediateInput(instr, 0)) {
+        __ mov(Operand(esp, slot * kSystemPointerSize), i.InputImmediate(0));
+      } else {
+        __ mov(Operand(esp, slot * kSystemPointerSize), i.InputRegister(0));
+      }
+      break;
+    }
+    case kIA32Peek: {
+      int reverse_slot = i.InputInt32(0);
+      int offset =
+          FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
+      if (instr->OutputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          __ movsd(i.OutputDoubleRegister(), Operand(ebp, offset));
+        } else if (op->representation() == MachineRepresentation::kFloat32) {
+          __ movss(i.OutputFloatRegister(), Operand(ebp, offset));
+        } else {
+          DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
+          __ movdqu(i.OutputSimd128Register(), Operand(ebp, offset));
+        }
+      } else {
+        __ mov(i.OutputRegister(), Operand(ebp, offset));
+      }
+      break;
+    }
+    case kSSEF64x2Splat: {
+      DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      XMMRegister dst = i.OutputSimd128Register();
+      __ shufpd(dst, dst, 0x0);
+      break;
+    }
+    case kAVXF64x2Splat: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister src = i.InputDoubleRegister(0);
+      __ vshufpd(i.OutputSimd128Register(), src, src, 0x0);
+      break;
+    }
+    case kSSEF64x2ExtractLane: {
+      DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      XMMRegister dst = i.OutputDoubleRegister();
+      int8_t lane = i.InputInt8(1);
+      if (lane != 0) {
+        DCHECK_LT(lane, 4);
+        __ shufpd(dst, dst, lane);
+      }
+      break;
+    }
+    case kAVXF64x2ExtractLane: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputDoubleRegister();
+      XMMRegister src = i.InputSimd128Register(0);
+      int8_t lane = i.InputInt8(1);
+      if (lane == 0) {
+        if (dst != src) __ vmovapd(dst, src);
+      } else {
+        DCHECK_LT(lane, 4);
+        __ vshufpd(dst, src, src, lane);
+      }
+      break;
+    }
+    case kSSEF64x2ReplaceLane: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      XMMRegister dst = i.OutputSimd128Register();
+      int8_t lane = i.InputInt8(1);
+      DoubleRegister rep = i.InputDoubleRegister(2);
+
+      // insertps takes a mask which contains (high to low):
+      // - 2 bit specifying source float element to copy
+      // - 2 bit specifying destination float element to write to
+      // - 4 bits specifying which elements of the destination to zero
+      DCHECK_LT(lane, 2);
+      if (lane == 0) {
+        __ insertps(dst, rep, 0b00000000);
+        __ insertps(dst, rep, 0b01010000);
+      } else {
+        __ insertps(dst, rep, 0b00100000);
+        __ insertps(dst, rep, 0b01110000);
+      }
+      break;
+    }
+    case kAVXF64x2ReplaceLane: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(0);
+      int8_t lane = i.InputInt8(1);
+      DoubleRegister rep = i.InputDoubleRegister(2);
+      DCHECK_NE(dst, rep);
+
+      DCHECK_LT(lane, 2);
+      if (lane == 0) {
+        __ vinsertps(dst, src, rep, 0b00000000);
+        __ vinsertps(dst, dst, rep, 0b01010000);
+      } else {
+        __ vinsertps(dst, src, rep, 0b00100000);
+        __ vinsertps(dst, dst, rep, 0b01110000);
+      }
+      break;
+    }
+    case kIA32F64x2Sqrt: {
+      __ Sqrtpd(i.OutputSimd128Register(), i.InputOperand(0));
+      break;
+    }
+    case kIA32F64x2Add: {
+      __ Addpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputOperand(1));
+      break;
+    }
+    case kIA32F64x2Sub: {
+      __ Subpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputOperand(1));
+      break;
+    }
+    case kIA32F64x2Mul: {
+      __ Mulpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputOperand(1));
+      break;
+    }
+    case kIA32F64x2Div: {
+      __ Divpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputOperand(1));
+      break;
+    }
+    case kIA32F64x2Min: {
+      Operand src1 = i.InputOperand(1);
+      XMMRegister dst = i.OutputSimd128Register(),
+                  src = i.InputSimd128Register(0),
+                  tmp = i.TempSimd128Register(0);
+      // The minpd instruction doesn't propagate NaNs and +0's in its first
+      // operand. Perform minpd in both orders, merge the resuls, and adjust.
+      __ Movupd(tmp, src1);
+      __ Minpd(tmp, tmp, src);
+      __ Minpd(dst, src, src1);
+      // propagate -0's and NaNs, which may be non-canonical.
+      __ Orpd(tmp, dst);
+      // Canonicalize NaNs by quieting and clearing the payload.
+      __ Cmpunordpd(dst, dst, tmp);
+      __ Orpd(tmp, dst);
+      __ Psrlq(dst, 13);
+      __ Andnpd(dst, tmp);
+      break;
+    }
+    case kIA32F64x2Max: {
+      Operand src1 = i.InputOperand(1);
+      XMMRegister dst = i.OutputSimd128Register(),
+                  src = i.InputSimd128Register(0),
+                  tmp = i.TempSimd128Register(0);
+      // The maxpd instruction doesn't propagate NaNs and +0's in its first
+      // operand. Perform maxpd in both orders, merge the resuls, and adjust.
+      __ Movupd(tmp, src1);
+      __ Maxpd(tmp, tmp, src);
+      __ Maxpd(dst, src, src1);
+      // Find discrepancies.
+      __ Xorpd(dst, tmp);
+      // Propagate NaNs, which may be non-canonical.
+      __ Orpd(tmp, dst);
+      // Propagate sign discrepancy and (subtle) quiet NaNs.
+      __ Subpd(tmp, tmp, dst);
+      // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
+      __ Cmpunordpd(dst, dst, tmp);
+      __ Psrlq(dst, 13);
+      __ Andnpd(dst, tmp);
+      break;
+    }
+    case kIA32F64x2Eq: {
+      __ Cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kIA32F64x2Ne: {
+      __ Cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputOperand(1));
+      break;
+    }
+    case kIA32F64x2Lt: {
+      __ Cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kIA32F64x2Le: {
+      __ Cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kIA32F64x2Pmin: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      __ Minpd(dst, dst, i.InputSimd128Register(1));
+      break;
+    }
+    case kIA32F64x2Pmax: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      __ Maxpd(dst, dst, i.InputSimd128Register(1));
+      break;
+    }
+    case kIA32F64x2Round: {
+      RoundingMode const mode =
+          static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+      __ Roundpd(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode);
+      break;
+    }
+    case kIA32I64x2SplatI32Pair: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Pinsrd(dst, i.InputRegister(0), 0);
+      __ Pinsrd(dst, i.InputOperand(1), 1);
+      __ Pshufd(dst, dst, 0x44);
+      break;
+    }
+    case kIA32I64x2ReplaceLaneI32Pair: {
+      int8_t lane = i.InputInt8(1);
+      __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), lane * 2);
+      __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(3), lane * 2 + 1);
+      break;
+    }
+    case kIA32I64x2Neg: {
+      XMMRegister dst = i.OutputSimd128Register();
+      Operand src = i.InputOperand(0);
+      __ Pxor(dst, dst);
+      __ Psubq(dst, src);
+      break;
+    }
+    case kIA32I64x2Shl: {
+      ASSEMBLE_SIMD_SHIFT(Psllq, 6);
+      break;
+    }
+    case kIA32I64x2ShrS: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(0);
+      XMMRegister tmp = i.TempSimd128Register(0);
+      XMMRegister tmp2 = i.TempSimd128Register(1);
+      Operand shift = i.InputOperand(1);
+
+      // Take shift value modulo 64.
+      __ and_(shift, Immediate(63));
+      __ Movd(tmp, shift);
+
+      // Set up a mask [0x80000000,0,0x80000000,0].
+      __ Pcmpeqb(tmp2, tmp2);
+      __ Psllq(tmp2, tmp2, 63);
+
+      __ Psrlq(tmp2, tmp2, tmp);
+      __ Psrlq(dst, src, tmp);
+      __ Pxor(dst, tmp2);
+      __ Psubq(dst, tmp2);
+      break;
+    }
+    case kIA32I64x2Add: {
+      __ Paddq(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputOperand(1));
+      break;
+    }
+    case kIA32I64x2Sub: {
+      __ Psubq(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputOperand(1));
+      break;
+    }
+    case kIA32I64x2Mul: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister left = i.InputSimd128Register(0);
+      XMMRegister right = i.InputSimd128Register(1);
+      XMMRegister tmp1 = i.TempSimd128Register(0);
+      XMMRegister tmp2 = i.TempSimd128Register(1);
+
+      __ Movaps(tmp1, left);
+      __ Movaps(tmp2, right);
+
+      // Multiply high dword of each qword of left with right.
+      __ Psrlq(tmp1, 32);
+      __ Pmuludq(tmp1, tmp1, right);
+
+      // Multiply high dword of each qword of right with left.
+      __ Psrlq(tmp2, 32);
+      __ Pmuludq(tmp2, tmp2, left);
+
+      __ Paddq(tmp2, tmp2, tmp1);
+      __ Psllq(tmp2, tmp2, 32);
+
+      __ Pmuludq(dst, left, right);
+      __ Paddq(dst, dst, tmp2);
+      break;
+    }
+    case kIA32I64x2ShrU: {
+      ASSEMBLE_SIMD_SHIFT(Psrlq, 6);
+      break;
+    }
+    case kSSEF32x4Splat: {
+      DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      XMMRegister dst = i.OutputSimd128Register();
+      __ shufps(dst, dst, 0x0);
+      break;
+    }
+    case kAVXF32x4Splat: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister src = i.InputFloatRegister(0);
+      __ vshufps(i.OutputSimd128Register(), src, src, 0x0);
+      break;
+    }
+    case kSSEF32x4ExtractLane: {
+      DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      XMMRegister dst = i.OutputFloatRegister();
+      int8_t lane = i.InputInt8(1);
+      if (lane != 0) {
+        DCHECK_LT(lane, 4);
+        __ shufps(dst, dst, lane);
+      }
+      break;
+    }
+    case kAVXF32x4ExtractLane: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputFloatRegister();
+      XMMRegister src = i.InputSimd128Register(0);
+      int8_t lane = i.InputInt8(1);
+      if (lane == 0) {
+        if (dst != src) __ vmovaps(dst, src);
+      } else {
+        DCHECK_LT(lane, 4);
+        __ vshufps(dst, src, src, lane);
+      }
+      break;
+    }
+    case kSSEF32x4ReplaceLane: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      __ insertps(i.OutputSimd128Register(), i.InputOperand(2),
+                  i.InputInt8(1) << 4);
+      break;
+    }
+    case kAVXF32x4ReplaceLane: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vinsertps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                   i.InputOperand(2), i.InputInt8(1) << 4);
+      break;
+    }
+    case kIA32F32x4SConvertI32x4: {
+      __ Cvtdq2ps(i.OutputSimd128Register(), i.InputOperand(0));
+      break;
+    }
+    case kSSEF32x4UConvertI32x4: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      XMMRegister dst = i.OutputSimd128Register();
+      __ pxor(kScratchDoubleReg, kScratchDoubleReg);      // zeros
+      __ pblendw(kScratchDoubleReg, dst, 0x55);           // get lo 16 bits
+      __ psubd(dst, kScratchDoubleReg);                   // get hi 16 bits
+      __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // convert lo exactly
+      __ psrld(dst, 1);                  // divide by 2 to get in unsigned range
+      __ cvtdq2ps(dst, dst);             // convert hi exactly
+      __ addps(dst, dst);                // double hi, exactly
+      __ addps(dst, kScratchDoubleReg);  // add hi and lo, may round.
+      break;
+    }
+    case kAVXF32x4UConvertI32x4: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(0);
+      __ vpxor(kScratchDoubleReg, kScratchDoubleReg,
+               kScratchDoubleReg);  // zeros
+      __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, src,
+                  0x55);                                   // get lo 16 bits
+      __ vpsubd(dst, src, kScratchDoubleReg);              // get hi 16 bits
+      __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // convert lo exactly
+      __ vpsrld(dst, dst, 1);    // divide by 2 to get in unsigned range
+      __ vcvtdq2ps(dst, dst);    // convert hi exactly
+      __ vaddps(dst, dst, dst);  // double hi, exactly
+      __ vaddps(dst, dst, kScratchDoubleReg);  // add hi and lo, may round.
+      break;
+    }
+    case kSSEF32x4Abs: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(i.InputSimd128Register(0), dst);
+      __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+      __ psrld(kScratchDoubleReg, 1);
+      __ andps(dst, kScratchDoubleReg);
+      break;
+    }
+    case kAVXF32x4Abs: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
+      __ vandps(i.OutputSimd128Register(), kScratchDoubleReg,
+                i.InputOperand(0));
+      break;
+    }
+    case kSSEF32x4Neg: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+      __ pslld(kScratchDoubleReg, 31);
+      __ xorps(dst, kScratchDoubleReg);
+      break;
+    }
+    case kAVXF32x4Neg: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vpslld(kScratchDoubleReg, kScratchDoubleReg, 31);
+      __ vxorps(i.OutputSimd128Register(), kScratchDoubleReg,
+                i.InputOperand(0));
+      break;
+    }
+    case kSSEF32x4Sqrt: {
+      __ sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kAVXF32x4Sqrt: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vsqrtps(i.OutputSimd128Register(), i.InputOperand(0));
+      break;
+    }
+    case kIA32F32x4RecipApprox: {
+      __ Rcpps(i.OutputSimd128Register(), i.InputOperand(0));
+      break;
+    }
+    case kIA32F32x4RecipSqrtApprox: {
+      __ Rsqrtps(i.OutputSimd128Register(), i.InputOperand(0));
+      break;
+    }
+    case kSSEF32x4Add: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ addps(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXF32x4Add: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vaddps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputOperand(1));
+      break;
+    }
+    case kSSEF32x4AddHoriz: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE3);
+      __ haddps(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXF32x4AddHoriz: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vhaddps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEF32x4Sub: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ subps(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXF32x4Sub: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vsubps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputOperand(1));
+      break;
+    }
+    case kSSEF32x4Mul: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ mulps(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXF32x4Mul: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vmulps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputOperand(1));
+      break;
+    }
+    case kSSEF32x4Div: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ divps(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXF32x4Div: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vdivps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputOperand(1));
+      break;
+    }
+    case kSSEF32x4Min: {
+      XMMRegister src1 = i.InputSimd128Register(1),
+                  dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      // The minps instruction doesn't propagate NaNs and +0's in its first
+      // operand. Perform minps in both orders, merge the resuls, and adjust.
+      __ movaps(kScratchDoubleReg, src1);
+      __ minps(kScratchDoubleReg, dst);
+      __ minps(dst, src1);
+      // propagate -0's and NaNs, which may be non-canonical.
+      __ orps(kScratchDoubleReg, dst);
+      // Canonicalize NaNs by quieting and clearing the payload.
+      __ cmpps(dst, kScratchDoubleReg, 3);
+      __ orps(kScratchDoubleReg, dst);
+      __ psrld(dst, 10);
+      __ andnps(dst, kScratchDoubleReg);
+      break;
+    }
+    case kAVXF32x4Min: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src0 = i.InputSimd128Register(0);
+      Operand src1 = i.InputOperand(1);
+      // See comment above for correction of minps.
+      __ movups(kScratchDoubleReg, src1);
+      __ vminps(kScratchDoubleReg, kScratchDoubleReg, src0);
+      __ vminps(dst, src0, src1);
+      __ vorps(dst, dst, kScratchDoubleReg);
+      __ vcmpneqps(kScratchDoubleReg, dst, dst);
+      __ vorps(dst, dst, kScratchDoubleReg);
+      __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 10);
+      __ vandnps(dst, kScratchDoubleReg, dst);
+      break;
+    }
+    case kSSEF32x4Max: {
+      XMMRegister src1 = i.InputSimd128Register(1),
+                  dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      // The maxps instruction doesn't propagate NaNs and +0's in its first
+      // operand. Perform maxps in both orders, merge the resuls, and adjust.
+      __ movaps(kScratchDoubleReg, src1);
+      __ maxps(kScratchDoubleReg, dst);
+      __ maxps(dst, src1);
+      // Find discrepancies.
+      __ xorps(dst, kScratchDoubleReg);
+      // Propagate NaNs, which may be non-canonical.
+      __ orps(kScratchDoubleReg, dst);
+      // Propagate sign discrepancy and (subtle) quiet NaNs.
+      __ subps(kScratchDoubleReg, dst);
+      // Canonicalize NaNs by clearing the payload.
+      __ cmpps(dst, kScratchDoubleReg, 3);
+      __ psrld(dst, 10);
+      __ andnps(dst, kScratchDoubleReg);
+      break;
+    }
+    case kAVXF32x4Max: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src0 = i.InputSimd128Register(0);
+      Operand src1 = i.InputOperand(1);
+      // See comment above for correction of maxps.
+      __ vmovups(kScratchDoubleReg, src1);
+      __ vmaxps(kScratchDoubleReg, kScratchDoubleReg, src0);
+      __ vmaxps(dst, src0, src1);
+      __ vxorps(dst, dst, kScratchDoubleReg);
+      __ vorps(kScratchDoubleReg, kScratchDoubleReg, dst);
+      __ vsubps(kScratchDoubleReg, kScratchDoubleReg, dst);
+      __ vcmpneqps(dst, kScratchDoubleReg, kScratchDoubleReg);
+      __ vpsrld(dst, dst, 10);
+      __ vandnps(dst, dst, kScratchDoubleReg);
+      break;
+    }
+    case kSSEF32x4Eq: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ cmpeqps(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXF32x4Eq: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vcmpeqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputOperand(1));
+      break;
+    }
+    case kSSEF32x4Ne: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ cmpneqps(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXF32x4Ne: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vcmpneqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                   i.InputOperand(1));
+      break;
+    }
+    case kSSEF32x4Lt: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ cmpltps(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXF32x4Lt: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vcmpltps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputOperand(1));
+      break;
+    }
+    case kSSEF32x4Le: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ cmpleps(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXF32x4Le: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vcmpleps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputOperand(1));
+      break;
+    }
+    case kIA32F32x4Pmin: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      __ Minps(dst, dst, i.InputSimd128Register(1));
+      break;
+    }
+    case kIA32F32x4Pmax: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      __ Maxps(dst, dst, i.InputSimd128Register(1));
+      break;
+    }
+    case kIA32F32x4Round: {
+      RoundingMode const mode =
+          static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+      __ Roundps(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode);
+      break;
+    }
+    case kIA32I32x4Splat: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Movd(dst, i.InputOperand(0));
+      __ Pshufd(dst, dst, 0x0);
+      break;
+    }
+    case kIA32I32x4ExtractLane: {
+      __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
+      break;
+    }
+    case kSSEI32x4ReplaceLane: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      __ pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
+      break;
+    }
+    case kAVXI32x4ReplaceLane: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpinsrd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(2), i.InputInt8(1));
+      break;
+    }
+    case kSSEI32x4SConvertF32x4: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      XMMRegister dst = i.OutputSimd128Register();
+      // NAN->0
+      __ movaps(kScratchDoubleReg, dst);
+      __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
+      __ pand(dst, kScratchDoubleReg);
+      // Set top bit if >= 0 (but not -0.0!)
+      __ pxor(kScratchDoubleReg, dst);
+      // Convert
+      __ cvttps2dq(dst, dst);
+      // Set top bit if >=0 is now < 0
+      __ pand(kScratchDoubleReg, dst);
+      __ psrad(kScratchDoubleReg, 31);
+      // Set positive overflow lanes to 0x7FFFFFFF
+      __ pxor(dst, kScratchDoubleReg);
+      break;
+    }
+    case kAVXI32x4SConvertF32x4: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(0);
+      // NAN->0
+      __ vcmpeqps(kScratchDoubleReg, src, src);
+      __ vpand(dst, src, kScratchDoubleReg);
+      // Set top bit if >= 0 (but not -0.0!)
+      __ vpxor(kScratchDoubleReg, kScratchDoubleReg, dst);
+      // Convert
+      __ vcvttps2dq(dst, dst);
+      // Set top bit if >=0 is now < 0
+      __ vpand(kScratchDoubleReg, kScratchDoubleReg, dst);
+      __ vpsrad(kScratchDoubleReg, kScratchDoubleReg, 31);
+      // Set positive overflow lanes to 0x7FFFFFFF
+      __ vpxor(dst, dst, kScratchDoubleReg);
+      break;
+    }
+    case kIA32I32x4SConvertI16x8Low: {
+      __ Pmovsxwd(i.OutputSimd128Register(), i.InputOperand(0));
+      break;
+    }
+    case kIA32I32x4SConvertI16x8High: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Palignr(dst, i.InputOperand(0), 8);
+      __ Pmovsxwd(dst, dst);
+      break;
+    }
+    case kIA32I32x4Neg: {
+      XMMRegister dst = i.OutputSimd128Register();
+      Operand src = i.InputOperand(0);
+      if (src.is_reg(dst)) {
+        __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+        __ Psignd(dst, kScratchDoubleReg);
+      } else {
+        __ Pxor(dst, dst);
+        __ Psubd(dst, src);
+      }
+      break;
+    }
+    case kIA32I32x4Shl: {
+      ASSEMBLE_SIMD_SHIFT(Pslld, 5);
+      break;
+    }
+    case kIA32I32x4ShrS: {
+      ASSEMBLE_SIMD_SHIFT(Psrad, 5);
+      break;
+    }
+    case kSSEI32x4Add: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ paddd(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI32x4Add: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputOperand(1));
+      break;
+    }
+    case kSSEI32x4AddHoriz: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSSE3);
+      __ phaddd(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI32x4AddHoriz: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vphaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI32x4Sub: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ psubd(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI32x4Sub: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpsubd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputOperand(1));
+      break;
+    }
+    case kSSEI32x4Mul: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      __ pmulld(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI32x4Mul: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpmulld(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI32x4MinS: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      __ pminsd(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI32x4MinS: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpminsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI32x4MaxS: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      __ pmaxsd(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI32x4MaxS: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI32x4Eq: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI32x4Eq: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputOperand(1));
+      break;
+    }
+    case kSSEI32x4Ne: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1));
+      __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+      __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
+      break;
+    }
+    case kAVXI32x4Ne: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputOperand(1));
+      __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+               kScratchDoubleReg);
+      break;
+    }
+    case kSSEI32x4GtS: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ pcmpgtd(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI32x4GtS: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputOperand(1));
+      break;
+    }
+    case kSSEI32x4GeS: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      XMMRegister dst = i.OutputSimd128Register();
+      Operand src = i.InputOperand(1);
+      __ pminsd(dst, src);
+      __ pcmpeqd(dst, src);
+      break;
+    }
+    case kAVXI32x4GeS: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister src1 = i.InputSimd128Register(0);
+      Operand src2 = i.InputOperand(1);
+      __ vpminsd(kScratchDoubleReg, src1, src2);
+      __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
+      break;
+    }
+    case kSSEI32x4UConvertF32x4: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister tmp = i.TempSimd128Register(0);
+      // NAN->0, negative->0
+      __ pxor(kScratchDoubleReg, kScratchDoubleReg);
+      __ maxps(dst, kScratchDoubleReg);
+      // scratch: float representation of max_signed
+      __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+      __ psrld(kScratchDoubleReg, 1);                     // 0x7fffffff
+      __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // 0x4f000000
+      // tmp: convert (src-max_signed).
+      // Positive overflow lanes -> 0x7FFFFFFF
+      // Negative lanes -> 0
+      __ movaps(tmp, dst);
+      __ subps(tmp, kScratchDoubleReg);
+      __ cmpleps(kScratchDoubleReg, tmp);
+      __ cvttps2dq(tmp, tmp);
+      __ pxor(tmp, kScratchDoubleReg);
+      __ pxor(kScratchDoubleReg, kScratchDoubleReg);
+      __ pmaxsd(tmp, kScratchDoubleReg);
+      // convert. Overflow lanes above max_signed will be 0x80000000
+      __ cvttps2dq(dst, dst);
+      // Add (src-max_signed) for overflow lanes.
+      __ paddd(dst, tmp);
+      break;
+    }
+    case kAVXI32x4UConvertF32x4: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister tmp = i.TempSimd128Register(0);
+      // NAN->0, negative->0
+      __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vmaxps(dst, dst, kScratchDoubleReg);
+      // scratch: float representation of max_signed
+      __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);  // 0x7fffffff
+      __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // 0x4f000000
+      // tmp: convert (src-max_signed).
+      // Positive overflow lanes -> 0x7FFFFFFF
+      // Negative lanes -> 0
+      __ vsubps(tmp, dst, kScratchDoubleReg);
+      __ vcmpleps(kScratchDoubleReg, kScratchDoubleReg, tmp);
+      __ vcvttps2dq(tmp, tmp);
+      __ vpxor(tmp, tmp, kScratchDoubleReg);
+      __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vpmaxsd(tmp, tmp, kScratchDoubleReg);
+      // convert. Overflow lanes above max_signed will be 0x80000000
+      __ vcvttps2dq(dst, dst);
+      // Add (src-max_signed) for overflow lanes.
+      __ vpaddd(dst, dst, tmp);
+      break;
+    }
+    case kIA32I32x4UConvertI16x8Low: {
+      __ Pmovzxwd(i.OutputSimd128Register(), i.InputOperand(0));
+      break;
+    }
+    case kIA32I32x4UConvertI16x8High: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Palignr(dst, i.InputOperand(0), 8);
+      __ Pmovzxwd(dst, dst);
+      break;
+    }
+    case kIA32I32x4ShrU: {
+      ASSEMBLE_SIMD_SHIFT(Psrld, 5);
+      break;
+    }
+    case kSSEI32x4MinU: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      __ pminud(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI32x4MinU: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpminud(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI32x4MaxU: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      __ pmaxud(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI32x4MaxU: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpmaxud(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI32x4GtU: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      XMMRegister dst = i.OutputSimd128Register();
+      Operand src = i.InputOperand(1);
+      __ pmaxud(dst, src);
+      __ pcmpeqd(dst, src);
+      __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+      __ pxor(dst, kScratchDoubleReg);
+      break;
+    }
+    case kAVXI32x4GtU: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src1 = i.InputSimd128Register(0);
+      Operand src2 = i.InputOperand(1);
+      __ vpmaxud(kScratchDoubleReg, src1, src2);
+      __ vpcmpeqd(dst, kScratchDoubleReg, src2);
+      __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vpxor(dst, dst, kScratchDoubleReg);
+      break;
+    }
+    case kSSEI32x4GeU: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      XMMRegister dst = i.OutputSimd128Register();
+      Operand src = i.InputOperand(1);
+      __ pminud(dst, src);
+      __ pcmpeqd(dst, src);
+      break;
+    }
+    case kAVXI32x4GeU: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister src1 = i.InputSimd128Register(0);
+      Operand src2 = i.InputOperand(1);
+      __ vpminud(kScratchDoubleReg, src1, src2);
+      __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
+      break;
+    }
+    case kIA32I32x4Abs: {
+      __ Pabsd(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kIA32I32x4BitMask: {
+      __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
+      break;
+    }
+    case kIA32I32x4DotI16x8S: {
+      __ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kIA32I16x8Splat: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Movd(dst, i.InputOperand(0));
+      __ Pshuflw(dst, dst, 0x0);
+      __ Pshufd(dst, dst, 0x0);
+      break;
+    }
+    case kIA32I16x8ExtractLaneU: {
+      Register dst = i.OutputRegister();
+      __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
+      break;
+    }
+    case kIA32I16x8ExtractLaneS: {
+      Register dst = i.OutputRegister();
+      __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
+      __ movsx_w(dst, dst);
+      break;
+    }
+    case kSSEI16x8ReplaceLane: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
+      break;
+    }
+    case kAVXI16x8ReplaceLane: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpinsrw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(2), i.InputInt8(1));
+      break;
+    }
+    case kIA32I16x8SConvertI8x16Low: {
+      __ Pmovsxbw(i.OutputSimd128Register(), i.InputOperand(0));
+      break;
+    }
+    case kIA32I16x8SConvertI8x16High: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Palignr(dst, i.InputOperand(0), 8);
+      __ Pmovsxbw(dst, dst);
+      break;
+    }
+    case kIA32I16x8Neg: {
+      XMMRegister dst = i.OutputSimd128Register();
+      Operand src = i.InputOperand(0);
+      if (src.is_reg(dst)) {
+        __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+        __ Psignw(dst, kScratchDoubleReg);
+      } else {
+        __ Pxor(dst, dst);
+        __ Psubw(dst, src);
+      }
+      break;
+    }
+    case kIA32I16x8Shl: {
+      ASSEMBLE_SIMD_SHIFT(Psllw, 4);
+      break;
+    }
+    case kIA32I16x8ShrS: {
+      ASSEMBLE_SIMD_SHIFT(Psraw, 4);
+      break;
+    }
+    case kSSEI16x8SConvertI32x4: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+      break;
+    }
+    case kAVXI16x8SConvertI32x4: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpackssdw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                   i.InputOperand(1));
+      break;
+    }
+    case kSSEI16x8Add: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ paddw(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI16x8Add: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpaddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputOperand(1));
+      break;
+    }
+    case kSSEI16x8AddSatS: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ paddsw(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI16x8AddSatS: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpaddsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI16x8AddHoriz: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSSE3);
+      __ phaddw(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI16x8AddHoriz: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vphaddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI16x8Sub: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ psubw(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI16x8Sub: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpsubw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputOperand(1));
+      break;
+    }
+    case kSSEI16x8SubSatS: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ psubsw(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI16x8SubSatS: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpsubsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI16x8Mul: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ pmullw(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI16x8Mul: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpmullw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI16x8MinS: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ pminsw(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI16x8MinS: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpminsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI16x8MaxS: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ pmaxsw(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI16x8MaxS: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI16x8Eq: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI16x8Eq: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputOperand(1));
+      break;
+    }
+    case kSSEI16x8Ne: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1));
+      __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
+      __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
+      break;
+    }
+    case kAVXI16x8Ne: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputOperand(1));
+      __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+               kScratchDoubleReg);
+      break;
+    }
+    case kSSEI16x8GtS: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ pcmpgtw(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI16x8GtS: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputOperand(1));
+      break;
+    }
+    case kSSEI16x8GeS: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      XMMRegister dst = i.OutputSimd128Register();
+      Operand src = i.InputOperand(1);
+      __ pminsw(dst, src);
+      __ pcmpeqw(dst, src);
+      break;
+    }
+    case kAVXI16x8GeS: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister src1 = i.InputSimd128Register(0);
+      Operand src2 = i.InputOperand(1);
+      __ vpminsw(kScratchDoubleReg, src1, src2);
+      __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
+      break;
+    }
+    case kIA32I16x8UConvertI8x16Low: {
+      __ Pmovzxbw(i.OutputSimd128Register(), i.InputOperand(0));
+      break;
+    }
+    case kIA32I16x8UConvertI8x16High: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Palignr(dst, i.InputOperand(0), 8);
+      __ Pmovzxbw(dst, dst);
+      break;
+    }
+    case kIA32I16x8ShrU: {
+      ASSEMBLE_SIMD_SHIFT(Psrlw, 4);
+      break;
+    }
+    case kSSEI16x8UConvertI32x4: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      __ packusdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+      break;
+    }
+    case kAVXI16x8UConvertI32x4: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      __ vpackusdw(dst, dst, i.InputSimd128Register(1));
+      break;
+    }
+    case kSSEI16x8AddSatU: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ paddusw(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI16x8AddSatU: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpaddusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputOperand(1));
+      break;
+    }
+    case kSSEI16x8SubSatU: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ psubusw(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI16x8SubSatU: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpsubusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputOperand(1));
+      break;
+    }
+    case kSSEI16x8MinU: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      __ pminuw(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI16x8MinU: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpminuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI16x8MaxU: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      __ pmaxuw(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI16x8MaxU: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI16x8GtU: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      XMMRegister dst = i.OutputSimd128Register();
+      Operand src = i.InputOperand(1);
+      __ pmaxuw(dst, src);
+      __ pcmpeqw(dst, src);
+      __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
+      __ pxor(dst, kScratchDoubleReg);
+      break;
+    }
+    case kAVXI16x8GtU: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src1 = i.InputSimd128Register(0);
+      Operand src2 = i.InputOperand(1);
+      __ vpmaxuw(kScratchDoubleReg, src1, src2);
+      __ vpcmpeqw(dst, kScratchDoubleReg, src2);
+      __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vpxor(dst, dst, kScratchDoubleReg);
+      break;
+    }
+    case kSSEI16x8GeU: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      XMMRegister dst = i.OutputSimd128Register();
+      Operand src = i.InputOperand(1);
+      __ pminuw(dst, src);
+      __ pcmpeqw(dst, src);
+      break;
+    }
+    case kAVXI16x8GeU: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister src1 = i.InputSimd128Register(0);
+      Operand src2 = i.InputOperand(1);
+      __ vpminuw(kScratchDoubleReg, src1, src2);
+      __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
+      break;
+    }
+    case kIA32I16x8RoundingAverageU: {
+      __ Pavgw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputOperand(1));
+      break;
+    }
+    case kIA32I16x8Abs: {
+      __ Pabsw(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kIA32I16x8BitMask: {
+      Register dst = i.OutputRegister();
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ Packsswb(tmp, i.InputSimd128Register(0));
+      __ Pmovmskb(dst, tmp);
+      __ shr(dst, 8);
+      break;
+    }
+    case kIA32I8x16Splat: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Movd(dst, i.InputOperand(0));
+      __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
+      __ Pshufb(dst, kScratchDoubleReg);
+      break;
+    }
+    case kIA32I8x16ExtractLaneU: {
+      Register dst = i.OutputRegister();
+      __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
+      break;
+    }
+    case kIA32I8x16ExtractLaneS: {
+      Register dst = i.OutputRegister();
+      __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
+      __ movsx_b(dst, dst);
+      break;
+    }
+    case kSSEI8x16ReplaceLane: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
+      break;
+    }
+    case kAVXI8x16ReplaceLane: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpinsrb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(2), i.InputInt8(1));
+      break;
+    }
+    case kSSEI8x16SConvertI16x8: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ packsswb(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI8x16SConvertI16x8: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpacksswb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                   i.InputOperand(1));
+      break;
+    }
+    case kIA32I8x16Neg: {
+      XMMRegister dst = i.OutputSimd128Register();
+      Operand src = i.InputOperand(0);
+      if (src.is_reg(dst)) {
+        __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+        __ Psignb(dst, kScratchDoubleReg);
+      } else {
+        __ Pxor(dst, dst);
+        __ Psubb(dst, src);
+      }
+      break;
+    }
+    case kIA32I8x16Shl: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      Register tmp = i.ToRegister(instr->TempAt(0));
+      XMMRegister tmp_simd = i.TempSimd128Register(1);
+
+      if (HasImmediateInput(instr, 1)) {
+        // Perform 16-bit shift, then mask away low bits.
+        uint8_t shift = i.InputInt3(1);
+        __ Psllw(dst, dst, byte{shift});
+
+        uint8_t bmask = static_cast<uint8_t>(0xff << shift);
+        uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
+        __ mov(tmp, mask);
+        __ Movd(tmp_simd, tmp);
+        __ Pshufd(tmp_simd, tmp_simd, 0);
+        __ Pand(dst, tmp_simd);
+      } else {
+        // Take shift value modulo 8.
+        __ mov(tmp, i.InputRegister(1));
+        __ and_(tmp, 7);
+        // Mask off the unwanted bits before word-shifting.
+        __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
+        __ add(tmp, Immediate(8));
+        __ Movd(tmp_simd, tmp);
+        __ Psrlw(kScratchDoubleReg, kScratchDoubleReg, tmp_simd);
+        __ Packuswb(kScratchDoubleReg, kScratchDoubleReg);
+        __ Pand(dst, kScratchDoubleReg);
+        // TODO(zhin): sub here to avoid asking for another temporary register,
+        // examine codegen for other i8x16 shifts, they use less instructions.
+        __ sub(tmp, Immediate(8));
+        __ Movd(tmp_simd, tmp);
+        __ Psllw(dst, dst, tmp_simd);
+      }
+      break;
+    }
+    case kIA32I8x16ShrS: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      if (HasImmediateInput(instr, 1)) {
+        __ Punpckhbw(kScratchDoubleReg, dst);
+        __ Punpcklbw(dst, dst);
+        uint8_t shift = i.InputInt3(1) + 8;
+        __ Psraw(kScratchDoubleReg, shift);
+        __ Psraw(dst, shift);
+        __ Packsswb(dst, kScratchDoubleReg);
+      } else {
+        Register tmp = i.ToRegister(instr->TempAt(0));
+        XMMRegister tmp_simd = i.TempSimd128Register(1);
+        // Unpack the bytes into words, do arithmetic shifts, and repack.
+        __ Punpckhbw(kScratchDoubleReg, dst);
+        __ Punpcklbw(dst, dst);
+        __ mov(tmp, i.InputRegister(1));
+        // Take shift value modulo 8.
+        __ and_(tmp, 7);
+        __ add(tmp, Immediate(8));
+        __ Movd(tmp_simd, tmp);
+        __ Psraw(kScratchDoubleReg, kScratchDoubleReg, tmp_simd);
+        __ Psraw(dst, dst, tmp_simd);
+        __ Packsswb(dst, kScratchDoubleReg);
+      }
+      break;
+    }
+    case kSSEI8x16Add: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ paddb(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI8x16Add: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpaddb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputOperand(1));
+      break;
+    }
+    case kSSEI8x16AddSatS: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ paddsb(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI8x16AddSatS: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpaddsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI8x16Sub: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ psubb(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI8x16Sub: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpsubb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputOperand(1));
+      break;
+    }
+    case kSSEI8x16SubSatS: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ psubsb(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI8x16SubSatS: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpsubsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI8x16Mul: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      XMMRegister right = i.InputSimd128Register(1);
+      XMMRegister tmp = i.TempSimd128Register(0);
+
+      // I16x8 view of I8x16
+      // left = AAaa AAaa ... AAaa AAaa
+      // right= BBbb BBbb ... BBbb BBbb
+
+      // t = 00AA 00AA ... 00AA 00AA
+      // s = 00BB 00BB ... 00BB 00BB
+      __ movaps(tmp, dst);
+      __ movaps(kScratchDoubleReg, right);
+      __ psrlw(tmp, 8);
+      __ psrlw(kScratchDoubleReg, 8);
+      // dst = left * 256
+      __ psllw(dst, 8);
+
+      // t = I16x8Mul(t, s)
+      //    => __PP __PP ...  __PP  __PP
+      __ pmullw(tmp, kScratchDoubleReg);
+      // dst = I16x8Mul(left * 256, right)
+      //    => pp__ pp__ ...  pp__  pp__
+      __ pmullw(dst, right);
+
+      // t = I16x8Shl(t, 8)
+      //    => PP00 PP00 ...  PP00  PP00
+      __ psllw(tmp, 8);
+
+      // dst = I16x8Shr(dst, 8)
+      //    => 00pp 00pp ...  00pp  00pp
+      __ psrlw(dst, 8);
+
+      // dst = I16x8Or(dst, t)
+      //    => PPpp PPpp ...  PPpp  PPpp
+      __ por(dst, tmp);
+      break;
+    }
+    case kAVXI8x16Mul: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister left = i.InputSimd128Register(0);
+      XMMRegister right = i.InputSimd128Register(1);
+      XMMRegister tmp = i.TempSimd128Register(0);
+
+      // I16x8 view of I8x16
+      // left = AAaa AAaa ... AAaa AAaa
+      // right= BBbb BBbb ... BBbb BBbb
+
+      // t = 00AA 00AA ... 00AA 00AA
+      // s = 00BB 00BB ... 00BB 00BB
+      __ vpsrlw(tmp, left, 8);
+      __ vpsrlw(kScratchDoubleReg, right, 8);
+
+      // t = I16x8Mul(t0, t1)
+      //    => __PP __PP ...  __PP  __PP
+      __ vpmullw(tmp, tmp, kScratchDoubleReg);
+
+      // s = left * 256
+      __ vpsllw(kScratchDoubleReg, left, 8);
+
+      // dst = I16x8Mul(left * 256, right)
+      //    => pp__ pp__ ...  pp__  pp__
+      __ vpmullw(dst, kScratchDoubleReg, right);
+
+      // dst = I16x8Shr(dst, 8)
+      //    => 00pp 00pp ...  00pp  00pp
+      __ vpsrlw(dst, dst, 8);
+
+      // t = I16x8Shl(t, 8)
+      //    => PP00 PP00 ...  PP00  PP00
+      __ vpsllw(tmp, tmp, 8);
+
+      // dst = I16x8Or(dst, t)
+      //    => PPpp PPpp ...  PPpp  PPpp
+      __ vpor(dst, dst, tmp);
+      break;
+    }
+    case kSSEI8x16MinS: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      __ pminsb(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI8x16MinS: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpminsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI8x16MaxS: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      __ pmaxsb(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI8x16MaxS: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI8x16Eq: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI8x16Eq: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputOperand(1));
+      break;
+    }
+    case kSSEI8x16Ne: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
+      __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
+      __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
+      break;
+    }
+    case kAVXI8x16Ne: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputOperand(1));
+      __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+               kScratchDoubleReg);
+      break;
+    }
+    case kSSEI8x16GtS: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ pcmpgtb(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI8x16GtS: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputOperand(1));
+      break;
+    }
+    case kSSEI8x16GeS: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      XMMRegister dst = i.OutputSimd128Register();
+      Operand src = i.InputOperand(1);
+      __ pminsb(dst, src);
+      __ pcmpeqb(dst, src);
+      break;
+    }
+    case kAVXI8x16GeS: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister src1 = i.InputSimd128Register(0);
+      Operand src2 = i.InputOperand(1);
+      __ vpminsb(kScratchDoubleReg, src1, src2);
+      __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
+      break;
+    }
+    case kSSEI8x16UConvertI16x8: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      XMMRegister dst = i.OutputSimd128Register();
+      __ packuswb(dst, i.InputOperand(1));
+      break;
+    }
+    case kAVXI8x16UConvertI16x8: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      __ vpackuswb(dst, dst, i.InputOperand(1));
+      break;
+    }
+    case kSSEI8x16AddSatU: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ paddusb(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI8x16AddSatU: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpaddusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputOperand(1));
+      break;
+    }
+    case kSSEI8x16SubSatU: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ psubusb(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI8x16SubSatU: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpsubusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputOperand(1));
+      break;
+    }
+    case kIA32I8x16ShrU: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      Register tmp = i.ToRegister(instr->TempAt(0));
+      XMMRegister tmp_simd = i.TempSimd128Register(1);
+
+      if (HasImmediateInput(instr, 1)) {
+        // Perform 16-bit shift, then mask away high bits.
+        uint8_t shift = i.InputInt3(1);
+        __ Psrlw(dst, dst, byte{shift});
+
+        uint8_t bmask = 0xff >> shift;
+        uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
+        __ mov(tmp, mask);
+        __ Movd(tmp_simd, tmp);
+        __ Pshufd(tmp_simd, tmp_simd, 0);
+        __ Pand(dst, tmp_simd);
+      } else {
+        // Unpack the bytes into words, do logical shifts, and repack.
+        __ Punpckhbw(kScratchDoubleReg, dst);
+        __ Punpcklbw(dst, dst);
+        __ mov(tmp, i.InputRegister(1));
+        // Take shift value modulo 8.
+        __ and_(tmp, 7);
+        __ add(tmp, Immediate(8));
+        __ Movd(tmp_simd, tmp);
+        __ Psrlw(kScratchDoubleReg, kScratchDoubleReg, tmp_simd);
+        __ Psrlw(dst, dst, tmp_simd);
+        __ Packuswb(dst, kScratchDoubleReg);
+      }
+      break;
+    }
+    case kSSEI8x16MinU: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      __ pminub(dst, i.InputOperand(1));
+      break;
+    }
+    case kAVXI8x16MinU: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpminub(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI8x16MaxU: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ pmaxub(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXI8x16MaxU: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputOperand(1));
+      break;
+    }
+    case kSSEI8x16GtU: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      XMMRegister dst = i.OutputSimd128Register();
+      Operand src = i.InputOperand(1);
+      __ pmaxub(dst, src);
+      __ pcmpeqb(dst, src);
+      __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
+      __ pxor(dst, kScratchDoubleReg);
+      break;
+    }
+    case kAVXI8x16GtU: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src1 = i.InputSimd128Register(0);
+      Operand src2 = i.InputOperand(1);
+      __ vpmaxub(kScratchDoubleReg, src1, src2);
+      __ vpcmpeqb(dst, kScratchDoubleReg, src2);
+      __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vpxor(dst, dst, kScratchDoubleReg);
+      break;
+    }
+    case kSSEI8x16GeU: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      XMMRegister dst = i.OutputSimd128Register();
+      Operand src = i.InputOperand(1);
+      __ pminub(dst, src);
+      __ pcmpeqb(dst, src);
+      break;
+    }
+    case kAVXI8x16GeU: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister src1 = i.InputSimd128Register(0);
+      Operand src2 = i.InputOperand(1);
+      __ vpminub(kScratchDoubleReg, src1, src2);
+      __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
+      break;
+    }
+    case kIA32I8x16RoundingAverageU: {
+      __ Pavgb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputOperand(1));
+      break;
+    }
+    case kIA32I8x16Abs: {
+      __ Pabsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kIA32I8x16BitMask: {
+      __ Pmovmskb(i.OutputRegister(), i.InputSimd128Register(0));
+      break;
+    }
+    case kIA32S128Const: {
+      XMMRegister dst = i.OutputSimd128Register();
+      Register tmp = i.TempRegister(0);
+      uint64_t low_qword = make_uint64(i.InputUint32(1), i.InputUint32(0));
+      __ Move(dst, low_qword);
+      __ Move(tmp, Immediate(i.InputUint32(2)));
+      __ Pinsrd(dst, tmp, 2);
+      __ Move(tmp, Immediate(i.InputUint32(3)));
+      __ Pinsrd(dst, tmp, 3);
+      break;
+    }
+    case kIA32S128Zero: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Pxor(dst, dst);
+      break;
+    }
+    case kIA32S128AllOnes: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Pcmpeqd(dst, dst);
+      break;
+    }
+    case kSSES128Not: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+      __ pxor(dst, kScratchDoubleReg);
+      break;
+    }
+    case kAVXS128Not: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vpxor(i.OutputSimd128Register(), kScratchDoubleReg, i.InputOperand(0));
+      break;
+    }
+    case kSSES128And: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ pand(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXS128And: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpand(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputOperand(1));
+      break;
+    }
+    case kSSES128Or: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ por(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXS128Or: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpor(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputOperand(1));
+      break;
+    }
+    case kSSES128Xor: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ pxor(i.OutputSimd128Register(), i.InputOperand(1));
+      break;
+    }
+    case kAVXS128Xor: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      __ vpxor(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputOperand(1));
+      break;
+    }
+    case kSSES128Select: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      // Mask used here is stored in dst.
+      XMMRegister dst = i.OutputSimd128Register();
+      __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
+      __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
+      __ andps(dst, kScratchDoubleReg);
+      __ xorps(dst, i.InputSimd128Register(2));
+      break;
+    }
+    case kAVXS128Select: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      __ vxorps(kScratchDoubleReg, i.InputSimd128Register(2),
+                i.InputOperand(1));
+      __ vandps(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(0));
+      __ vxorps(dst, kScratchDoubleReg, i.InputSimd128Register(2));
+      break;
+    }
+    case kIA32S128AndNot: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      // The inputs have been inverted by instruction selector, so we can call
+      // andnps here without any modifications.
+      XMMRegister src1 = i.InputSimd128Register(1);
+      __ Andnps(dst, src1);
+      break;
+    }
+    case kIA32I8x16Swizzle: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister mask = i.TempSimd128Register(0);
+
+      // Out-of-range indices should return 0, add 112 so that any value > 15
+      // saturates to 128 (top bit set), so pshufb will zero that lane.
+      __ Move(mask, uint32_t{0x70707070});
+      __ Pshufd(mask, mask, 0x0);
+      __ Paddusb(mask, i.InputSimd128Register(1));
+      __ Pshufb(dst, mask);
+      break;
+    }
+    case kIA32I8x16Shuffle: {
+      XMMRegister dst = i.OutputSimd128Register();
+      Operand src0 = i.InputOperand(0);
+      Register tmp = i.TempRegister(0);
+      // Prepare 16 byte aligned buffer for shuffle control mask
+      __ mov(tmp, esp);
+      __ and_(esp, -16);
+      if (instr->InputCount() == 5) {  // only one input operand
+        DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+        for (int j = 4; j > 0; j--) {
+          uint32_t mask = i.InputUint32(j);
+          __ push(Immediate(mask));
+        }
+        __ Pshufb(dst, Operand(esp, 0));
+      } else {  // two input operands
+        DCHECK_EQ(6, instr->InputCount());
+        __ movups(kScratchDoubleReg, src0);
+        for (int j = 5; j > 1; j--) {
+          uint32_t lanes = i.InputUint32(j);
+          uint32_t mask = 0;
+          for (int k = 0; k < 32; k += 8) {
+            uint8_t lane = lanes >> k;
+            mask |= (lane < kSimd128Size ? lane : 0x80) << k;
+          }
+          __ push(Immediate(mask));
+        }
+        __ Pshufb(kScratchDoubleReg, Operand(esp, 0));
+        Operand src1 = i.InputOperand(1);
+        if (!src1.is_reg(dst)) __ movups(dst, src1);
+        for (int j = 5; j > 1; j--) {
+          uint32_t lanes = i.InputUint32(j);
+          uint32_t mask = 0;
+          for (int k = 0; k < 32; k += 8) {
+            uint8_t lane = lanes >> k;
+            mask |= (lane >= kSimd128Size ? (lane & 0xF) : 0x80) << k;
+          }
+          __ push(Immediate(mask));
+        }
+        __ Pshufb(dst, Operand(esp, 0));
+        __ por(dst, kScratchDoubleReg);
+      }
+      __ mov(esp, tmp);
+      break;
+    }
+    case kIA32S128Load8Splat: {
+      __ Pinsrb(i.OutputSimd128Register(), i.MemoryOperand(), 0);
+      __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
+      __ Pshufb(i.OutputSimd128Register(), kScratchDoubleReg);
+      break;
+    }
+    case kIA32S128Load16Splat: {
+      __ Pinsrw(i.OutputSimd128Register(), i.MemoryOperand(), 0);
+      __ Pshuflw(i.OutputSimd128Register(), i.OutputSimd128Register(),
+                 uint8_t{0});
+      __ Punpcklqdq(i.OutputSimd128Register(), i.OutputSimd128Register());
+      break;
+    }
+    case kIA32S128Load32Splat: {
+      __ Vbroadcastss(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    }
+    case kIA32S128Load64Splat: {
+      __ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    }
+    case kIA32S128Load8x8S: {
+      __ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    }
+    case kIA32S128Load8x8U: {
+      __ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    }
+    case kIA32S128Load16x4S: {
+      __ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    }
+    case kIA32S128Load16x4U: {
+      __ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    }
+    case kIA32S128Load32x2S: {
+      __ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    }
+    case kIA32S128Load32x2U: {
+      __ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    }
+    case kIA32S32x4Swizzle: {
+      DCHECK_EQ(2, instr->InputCount());
+      __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1));
+      break;
+    }
+    case kIA32S32x4Shuffle: {
+      DCHECK_EQ(4, instr->InputCount());  // Swizzles should be handled above.
+      int8_t shuffle = i.InputInt8(2);
+      DCHECK_NE(0xe4, shuffle);  // A simple blend should be handled below.
+      __ Pshufd(kScratchDoubleReg, i.InputOperand(1), shuffle);
+      __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), shuffle);
+      __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
+      break;
+    }
+    case kIA32S16x8Blend:
+      ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
+      break;
+    case kIA32S16x8HalfShuffle1: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Pshuflw(dst, i.InputOperand(0), i.InputInt8(1));
+      __ Pshufhw(dst, dst, i.InputInt8(2));
+      break;
+    }
+    case kIA32S16x8HalfShuffle2: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Pshuflw(kScratchDoubleReg, i.InputOperand(1), i.InputInt8(2));
+      __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
+      __ Pshuflw(dst, i.InputOperand(0), i.InputInt8(2));
+      __ Pshufhw(dst, dst, i.InputInt8(3));
+      __ Pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
+      break;
+    }
+    case kIA32S8x16Alignr:
+      ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
+      break;
+    case kIA32S16x8Dup: {
+      XMMRegister dst = i.OutputSimd128Register();
+      Operand src = i.InputOperand(0);
+      int8_t lane = i.InputInt8(1) & 0x7;
+      int8_t lane4 = lane & 0x3;
+      int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
+      if (lane < 4) {
+        __ Pshuflw(dst, src, half_dup);
+        __ Pshufd(dst, dst, 0);
+      } else {
+        __ Pshufhw(dst, src, half_dup);
+        __ Pshufd(dst, dst, 0xaa);
+      }
+      break;
+    }
+    case kIA32S8x16Dup: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(0);
+      int8_t lane = i.InputInt8(1) & 0xf;
+      if (CpuFeatures::IsSupported(AVX)) {
+        CpuFeatureScope avx_scope(tasm(), AVX);
+        if (lane < 8) {
+          __ vpunpcklbw(dst, src, src);
+        } else {
+          __ vpunpckhbw(dst, src, src);
+        }
+      } else {
+        DCHECK_EQ(dst, src);
+        if (lane < 8) {
+          __ punpcklbw(dst, dst);
+        } else {
+          __ punpckhbw(dst, dst);
+        }
+      }
+      lane &= 0x7;
+      int8_t lane4 = lane & 0x3;
+      int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
+      if (lane < 4) {
+        __ Pshuflw(dst, dst, half_dup);
+        __ Pshufd(dst, dst, 0);
+      } else {
+        __ Pshufhw(dst, dst, half_dup);
+        __ Pshufd(dst, dst, 0xaa);
+      }
+      break;
+    }
+    case kIA32S64x2UnpackHigh:
+      ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
+      break;
+    case kIA32S32x4UnpackHigh:
+      ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
+      break;
+    case kIA32S16x8UnpackHigh:
+      ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
+      break;
+    case kIA32S8x16UnpackHigh:
+      ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
+      break;
+    case kIA32S64x2UnpackLow:
+      ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
+      break;
+    case kIA32S32x4UnpackLow:
+      ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
+      break;
+    case kIA32S16x8UnpackLow:
+      ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
+      break;
+    case kIA32S8x16UnpackLow:
+      ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
+      break;
+    case kSSES16x8UnzipHigh: {
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src2 = dst;
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      if (instr->InputCount() == 2) {
+        __ movups(kScratchDoubleReg, i.InputOperand(1));
+        __ psrld(kScratchDoubleReg, 16);
+        src2 = kScratchDoubleReg;
+      }
+      __ psrld(dst, 16);
+      __ packusdw(dst, src2);
+      break;
+    }
+    case kAVXS16x8UnzipHigh: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src2 = dst;
+      if (instr->InputCount() == 2) {
+        __ vpsrld(kScratchDoubleReg, i.InputSimd128Register(1), 16);
+        src2 = kScratchDoubleReg;
+      }
+      __ vpsrld(dst, i.InputSimd128Register(0), 16);
+      __ vpackusdw(dst, dst, src2);
+      break;
+    }
+    case kSSES16x8UnzipLow: {
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src2 = dst;
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      __ pxor(kScratchDoubleReg, kScratchDoubleReg);
+      if (instr->InputCount() == 2) {
+        __ pblendw(kScratchDoubleReg, i.InputOperand(1), 0x55);
+        src2 = kScratchDoubleReg;
+      }
+      __ pblendw(dst, kScratchDoubleReg, 0xaa);
+      __ packusdw(dst, src2);
+      break;
+    }
+    case kAVXS16x8UnzipLow: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src2 = dst;
+      __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      if (instr->InputCount() == 2) {
+        __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1),
+                    0x55);
+        src2 = kScratchDoubleReg;
+      }
+      __ vpblendw(dst, kScratchDoubleReg, i.InputSimd128Register(0), 0x55);
+      __ vpackusdw(dst, dst, src2);
+      break;
+    }
+    case kSSES8x16UnzipHigh: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src2 = dst;
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      if (instr->InputCount() == 2) {
+        __ movups(kScratchDoubleReg, i.InputOperand(1));
+        __ psrlw(kScratchDoubleReg, 8);
+        src2 = kScratchDoubleReg;
+      }
+      __ psrlw(dst, 8);
+      __ packuswb(dst, src2);
+      break;
+    }
+    case kAVXS8x16UnzipHigh: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src2 = dst;
+      if (instr->InputCount() == 2) {
+        __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
+        src2 = kScratchDoubleReg;
+      }
+      __ vpsrlw(dst, i.InputSimd128Register(0), 8);
+      __ vpackuswb(dst, dst, src2);
+      break;
+    }
+    case kSSES8x16UnzipLow: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src2 = dst;
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      if (instr->InputCount() == 2) {
+        __ movups(kScratchDoubleReg, i.InputOperand(1));
+        __ psllw(kScratchDoubleReg, 8);
+        __ psrlw(kScratchDoubleReg, 8);
+        src2 = kScratchDoubleReg;
+      }
+      __ psllw(dst, 8);
+      __ psrlw(dst, 8);
+      __ packuswb(dst, src2);
+      break;
+    }
+    case kAVXS8x16UnzipLow: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src2 = dst;
+      if (instr->InputCount() == 2) {
+        __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
+        __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 8);
+        src2 = kScratchDoubleReg;
+      }
+      __ vpsllw(dst, i.InputSimd128Register(0), 8);
+      __ vpsrlw(dst, dst, 8);
+      __ vpackuswb(dst, dst, src2);
+      break;
+    }
+    case kSSES8x16TransposeLow: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      __ psllw(dst, 8);
+      if (instr->InputCount() == 1) {
+        __ movups(kScratchDoubleReg, dst);
+      } else {
+        DCHECK_EQ(2, instr->InputCount());
+        __ movups(kScratchDoubleReg, i.InputOperand(1));
+        __ psllw(kScratchDoubleReg, 8);
+      }
+      __ psrlw(dst, 8);
+      __ por(dst, kScratchDoubleReg);
+      break;
+    }
+    case kAVXS8x16TransposeLow: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      if (instr->InputCount() == 1) {
+        __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(0), 8);
+        __ vpsrlw(dst, kScratchDoubleReg, 8);
+      } else {
+        DCHECK_EQ(2, instr->InputCount());
+        __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
+        __ vpsllw(dst, i.InputSimd128Register(0), 8);
+        __ vpsrlw(dst, dst, 8);
+      }
+      __ vpor(dst, dst, kScratchDoubleReg);
+      break;
+    }
+    case kSSES8x16TransposeHigh: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      __ psrlw(dst, 8);
+      if (instr->InputCount() == 1) {
+        __ movups(kScratchDoubleReg, dst);
+      } else {
+        DCHECK_EQ(2, instr->InputCount());
+        __ movups(kScratchDoubleReg, i.InputOperand(1));
+        __ psrlw(kScratchDoubleReg, 8);
+      }
+      __ psllw(kScratchDoubleReg, 8);
+      __ por(dst, kScratchDoubleReg);
+      break;
+    }
+    case kAVXS8x16TransposeHigh: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      if (instr->InputCount() == 1) {
+        __ vpsrlw(dst, i.InputSimd128Register(0), 8);
+        __ vpsllw(kScratchDoubleReg, dst, 8);
+      } else {
+        DCHECK_EQ(2, instr->InputCount());
+        __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
+        __ vpsrlw(dst, i.InputSimd128Register(0), 8);
+        __ vpsllw(kScratchDoubleReg, kScratchDoubleReg, 8);
+      }
+      __ vpor(dst, dst, kScratchDoubleReg);
+      break;
+    }
+    case kSSES8x8Reverse:
+    case kSSES8x4Reverse:
+    case kSSES8x2Reverse: {
+      DCHECK_EQ(1, instr->InputCount());
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      if (arch_opcode != kSSES8x2Reverse) {
+        // First shuffle words into position.
+        int8_t shuffle_mask = arch_opcode == kSSES8x4Reverse ? 0xB1 : 0x1B;
+        __ pshuflw(dst, dst, shuffle_mask);
+        __ pshufhw(dst, dst, shuffle_mask);
+      }
+      __ movaps(kScratchDoubleReg, dst);
+      __ psrlw(kScratchDoubleReg, 8);
+      __ psllw(dst, 8);
+      __ por(dst, kScratchDoubleReg);
+      break;
+    }
+    case kAVXS8x2Reverse:
+    case kAVXS8x4Reverse:
+    case kAVXS8x8Reverse: {
+      DCHECK_EQ(1, instr->InputCount());
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = dst;
+      if (arch_opcode != kAVXS8x2Reverse) {
+        // First shuffle words into position.
+        int8_t shuffle_mask = arch_opcode == kAVXS8x4Reverse ? 0xB1 : 0x1B;
+        __ vpshuflw(dst, i.InputOperand(0), shuffle_mask);
+        __ vpshufhw(dst, dst, shuffle_mask);
+      } else {
+        src = i.InputSimd128Register(0);
+      }
+      // Reverse each 16 bit lane.
+      __ vpsrlw(kScratchDoubleReg, src, 8);
+      __ vpsllw(dst, src, 8);
+      __ vpor(dst, dst, kScratchDoubleReg);
+      break;
+    }
+    case kIA32V32x4AnyTrue:
+    case kIA32V16x8AnyTrue:
+    case kIA32V8x16AnyTrue: {
+      Register dst = i.OutputRegister();
+      XMMRegister src = i.InputSimd128Register(0);
+      Register tmp = i.TempRegister(0);
+      __ xor_(tmp, tmp);
+      __ mov(dst, Immediate(1));
+      __ Ptest(src, src);
+      __ cmov(zero, dst, tmp);
+      break;
+    }
+    // Need to split up all the different lane structures because the
+    // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
+    // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
+    // respectively.
+    case kIA32V32x4AllTrue:
+      ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
+      break;
+    case kIA32V16x8AllTrue:
+      ASSEMBLE_SIMD_ALL_TRUE(pcmpeqw);
+      break;
+    case kIA32V8x16AllTrue: {
+      ASSEMBLE_SIMD_ALL_TRUE(pcmpeqb);
+      break;
+    }
+    case kIA32Word32AtomicPairLoad: {
+      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+      __ movq(tmp, i.MemoryOperand());
+      __ Pextrd(i.OutputRegister(0), tmp, 0);
+      __ Pextrd(i.OutputRegister(1), tmp, 1);
+      break;
+    }
+    case kIA32Word32AtomicPairStore: {
+      Label store;
+      __ bind(&store);
+      __ mov(i.TempRegister(0), i.MemoryOperand(2));
+      __ mov(i.TempRegister(1), i.NextMemoryOperand(2));
+      __ push(ebx);
+      frame_access_state()->IncreaseSPDelta(1);
+      i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
+      __ lock();
+      __ cmpxchg8b(i.MemoryOperand(2));
+      __ pop(ebx);
+      frame_access_state()->IncreaseSPDelta(-1);
+      __ j(not_equal, &store);
+      break;
+    }
+    case kWord32AtomicExchangeInt8: {
+      __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
+      __ movsx_b(i.InputRegister(0), i.InputRegister(0));
+      break;
+    }
+    case kWord32AtomicExchangeUint8: {
+      __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
+      __ movzx_b(i.InputRegister(0), i.InputRegister(0));
+      break;
+    }
+    case kWord32AtomicExchangeInt16: {
+      __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
+      __ movsx_w(i.InputRegister(0), i.InputRegister(0));
+      break;
+    }
+    case kWord32AtomicExchangeUint16: {
+      __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
+      __ movzx_w(i.InputRegister(0), i.InputRegister(0));
+      break;
+    }
+    case kWord32AtomicExchangeWord32: {
+      __ xchg(i.InputRegister(0), i.MemoryOperand(1));
+      break;
+    }
+    case kIA32Word32AtomicPairExchange: {
+      DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr));
+      Label exchange;
+      __ bind(&exchange);
+      __ mov(eax, i.MemoryOperand(2));
+      __ mov(edx, i.NextMemoryOperand(2));
+      __ push(ebx);
+      frame_access_state()->IncreaseSPDelta(1);
+      i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
+      __ lock();
+      __ cmpxchg8b(i.MemoryOperand(2));
+      __ pop(ebx);
+      frame_access_state()->IncreaseSPDelta(-1);
+      __ j(not_equal, &exchange);
+      break;
+    }
+    case kWord32AtomicCompareExchangeInt8: {
+      __ lock();
+      __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
+      __ movsx_b(eax, eax);
+      break;
+    }
+    case kWord32AtomicCompareExchangeUint8: {
+      __ lock();
+      __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
+      __ movzx_b(eax, eax);
+      break;
+    }
+    case kWord32AtomicCompareExchangeInt16: {
+      __ lock();
+      __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
+      __ movsx_w(eax, eax);
+      break;
+    }
+    case kWord32AtomicCompareExchangeUint16: {
+      __ lock();
+      __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
+      __ movzx_w(eax, eax);
+      break;
+    }
+    case kWord32AtomicCompareExchangeWord32: {
+      __ lock();
+      __ cmpxchg(i.MemoryOperand(2), i.InputRegister(1));
+      break;
+    }
+    case kIA32Word32AtomicPairCompareExchange: {
+      __ push(ebx);
+      frame_access_state()->IncreaseSPDelta(1);
+      i.MoveInstructionOperandToRegister(ebx, instr->InputAt(2));
+      __ lock();
+      __ cmpxchg8b(i.MemoryOperand(4));
+      __ pop(ebx);
+      frame_access_state()->IncreaseSPDelta(-1);
+      break;
+    }
+#define ATOMIC_BINOP_CASE(op, inst)                \
+  case kWord32Atomic##op##Int8: {                  \
+    ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \
+    __ movsx_b(eax, eax);                          \
+    break;                                         \
+  }                                                \
+  case kWord32Atomic##op##Uint8: {                 \
+    ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \
+    __ movzx_b(eax, eax);                          \
+    break;                                         \
+  }                                                \
+  case kWord32Atomic##op##Int16: {                 \
+    ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \
+    __ movsx_w(eax, eax);                          \
+    break;                                         \
+  }                                                \
+  case kWord32Atomic##op##Uint16: {                \
+    ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \
+    __ movzx_w(eax, eax);                          \
+    break;                                         \
+  }                                                \
+  case kWord32Atomic##op##Word32: {                \
+    ASSEMBLE_ATOMIC_BINOP(inst, mov, cmpxchg);     \
+    break;                                         \
+  }
+      ATOMIC_BINOP_CASE(Add, add)
+      ATOMIC_BINOP_CASE(Sub, sub)
+      ATOMIC_BINOP_CASE(And, and_)
+      ATOMIC_BINOP_CASE(Or, or_)
+      ATOMIC_BINOP_CASE(Xor, xor_)
+#undef ATOMIC_BINOP_CASE
+#define ATOMIC_BINOP_CASE(op, instr1, instr2)         \
+  case kIA32Word32AtomicPair##op: {                   \
+    DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr)); \
+    ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2)          \
+    break;                                            \
+  }
+      ATOMIC_BINOP_CASE(Add, add, adc)
+      ATOMIC_BINOP_CASE(And, and_, and_)
+      ATOMIC_BINOP_CASE(Or, or_, or_)
+      ATOMIC_BINOP_CASE(Xor, xor_, xor_)
+#undef ATOMIC_BINOP_CASE
+    case kIA32Word32AtomicPairSub: {
+      DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr));
+      Label binop;
+      __ bind(&binop);
+      // Move memory operand into edx:eax
+      __ mov(eax, i.MemoryOperand(2));
+      __ mov(edx, i.NextMemoryOperand(2));
+      // Save input registers temporarily on the stack.
+      __ push(ebx);
+      frame_access_state()->IncreaseSPDelta(1);
+      i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
+      __ push(i.InputRegister(1));
+      // Negate input in place
+      __ neg(ebx);
+      __ adc(i.InputRegister(1), 0);
+      __ neg(i.InputRegister(1));
+      // Add memory operand, negated input.
+      __ add(ebx, eax);
+      __ adc(i.InputRegister(1), edx);
+      __ lock();
+      __ cmpxchg8b(i.MemoryOperand(2));
+      // Restore input registers
+      __ pop(i.InputRegister(1));
+      __ pop(ebx);
+      frame_access_state()->IncreaseSPDelta(-1);
+      __ j(not_equal, &binop);
+      break;
+    }
+    case kWord32AtomicLoadInt8:
+    case kWord32AtomicLoadUint8:
+    case kWord32AtomicLoadInt16:
+    case kWord32AtomicLoadUint16:
+    case kWord32AtomicLoadWord32:
+    case kWord32AtomicStoreWord8:
+    case kWord32AtomicStoreWord16:
+    case kWord32AtomicStoreWord32:
+      UNREACHABLE();  // Won't be generated by instruction selector.
+      break;
+  }
+  return kSuccess;
+}  // NOLINT(readability/fn_size)
+
+static Condition FlagsConditionToCondition(FlagsCondition condition) {
+  switch (condition) {
+    case kUnorderedEqual:
+    case kEqual:
+      return equal;
+      break;
+    case kUnorderedNotEqual:
+    case kNotEqual:
+      return not_equal;
+      break;
+    case kSignedLessThan:
+      return less;
+      break;
+    case kSignedGreaterThanOrEqual:
+      return greater_equal;
+      break;
+    case kSignedLessThanOrEqual:
+      return less_equal;
+      break;
+    case kSignedGreaterThan:
+      return greater;
+      break;
+    case kUnsignedLessThan:
+      return below;
+      break;
+    case kUnsignedGreaterThanOrEqual:
+      return above_equal;
+      break;
+    case kUnsignedLessThanOrEqual:
+      return below_equal;
+      break;
+    case kUnsignedGreaterThan:
+      return above;
+      break;
+    case kOverflow:
+      return overflow;
+      break;
+    case kNotOverflow:
+      return no_overflow;
+      break;
+    default:
+      UNREACHABLE();
+  }
+}
+
+// Assembles a branch after an instruction.
+void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
+  Label::Distance flabel_distance =
+      branch->fallthru ? Label::kNear : Label::kFar;
+  Label* tlabel = branch->true_label;
+  Label* flabel = branch->false_label;
+  if (branch->condition == kUnorderedEqual) {
+    __ j(parity_even, flabel, flabel_distance);
+  } else if (branch->condition == kUnorderedNotEqual) {
+    __ j(parity_even, tlabel);
+  }
+  __ j(FlagsConditionToCondition(branch->condition), tlabel);
+
+  // Add a jump if not falling through to the next block.
+  if (!branch->fallthru) __ jmp(flabel);
+}
+
+void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
+                                            Instruction* instr) {
+  // TODO(860429): Remove remaining poisoning infrastructure on ia32.
+  UNREACHABLE();
+}
+
+void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
+                                            BranchInfo* branch) {
+  AssembleArchBranch(instr, branch);
+}
+
+void CodeGenerator::AssembleArchJump(RpoNumber target) {
+  if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
+}
+
+void CodeGenerator::AssembleArchTrap(Instruction* instr,
+                                     FlagsCondition condition) {
+  class OutOfLineTrap final : public OutOfLineCode {
+   public:
+    OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
+        : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
+
+    void Generate() final {
+      IA32OperandConverter i(gen_, instr_);
+      TrapId trap_id =
+          static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
+      GenerateCallToTrap(trap_id);
+    }
+
+   private:
+    void GenerateCallToTrap(TrapId trap_id) {
+      if (trap_id == TrapId::kInvalid) {
+        // We cannot test calls to the runtime in cctest/test-run-wasm.
+        // Therefore we emit a call to C here instead of a call to the runtime.
+        __ PrepareCallCFunction(0, esi);
+        __ CallCFunction(
+            ExternalReference::wasm_call_trap_callback_for_testing(), 0);
+        __ LeaveFrame(StackFrame::WASM);
+        auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
+        size_t pop_size =
+            call_descriptor->StackParameterCount() * kSystemPointerSize;
+        // Use ecx as a scratch register, we return anyways immediately.
+        __ Ret(static_cast<int>(pop_size), ecx);
+      } else {
+        gen_->AssembleSourcePosition(instr_);
+        // A direct call to a wasm runtime stub defined in this module.
+        // Just encode the stub index. This will be patched when the code
+        // is added to the native module and copied into wasm code space.
+        __ wasm_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
+        ReferenceMap* reference_map =
+            gen_->zone()->New<ReferenceMap>(gen_->zone());
+        gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
+        __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
+      }
+    }
+
+    Instruction* instr_;
+    CodeGenerator* gen_;
+  };
+  auto ool = zone()->New<OutOfLineTrap>(this, instr);
+  Label* tlabel = ool->entry();
+  Label end;
+  if (condition == kUnorderedEqual) {
+    __ j(parity_even, &end, Label::kNear);
+  } else if (condition == kUnorderedNotEqual) {
+    __ j(parity_even, tlabel);
+  }
+  __ j(FlagsConditionToCondition(condition), tlabel);
+  __ bind(&end);
+}
+
+// Assembles boolean materializations after an instruction.
+void CodeGenerator::AssembleArchBoolean(Instruction* instr,
+                                        FlagsCondition condition) {
+  IA32OperandConverter i(this, instr);
+  Label done;
+
+  // Materialize a full 32-bit 1 or 0 value. The result register is always the
+  // last output of the instruction.
+  Label check;
+  DCHECK_NE(0u, instr->OutputCount());
+  Register reg = i.OutputRegister(instr->OutputCount() - 1);
+  if (condition == kUnorderedEqual) {
+    __ j(parity_odd, &check, Label::kNear);
+    __ Move(reg, Immediate(0));
+    __ jmp(&done, Label::kNear);
+  } else if (condition == kUnorderedNotEqual) {
+    __ j(parity_odd, &check, Label::kNear);
+    __ mov(reg, Immediate(1));
+    __ jmp(&done, Label::kNear);
+  }
+  Condition cc = FlagsConditionToCondition(condition);
+
+  __ bind(&check);
+  if (reg.is_byte_register()) {
+    // setcc for byte registers (al, bl, cl, dl).
+    __ setcc(cc, reg);
+    __ movzx_b(reg, reg);
+  } else {
+    // Emit a branch to set a register to either 1 or 0.
+    Label set;
+    __ j(cc, &set, Label::kNear);
+    __ Move(reg, Immediate(0));
+    __ jmp(&done, Label::kNear);
+    __ bind(&set);
+    __ mov(reg, Immediate(1));
+  }
+  __ bind(&done);
+}
+
+void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
+  IA32OperandConverter i(this, instr);
+  Register input = i.InputRegister(0);
+  std::vector<std::pair<int32_t, Label*>> cases;
+  for (size_t index = 2; index < instr->InputCount(); index += 2) {
+    cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
+  }
+  AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
+                                      cases.data() + cases.size());
+}
+
+void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
+  IA32OperandConverter i(this, instr);
+  Register input = i.InputRegister(0);
+  size_t const case_count = instr->InputCount() - 2;
+  Label** cases = zone()->NewArray<Label*>(case_count);
+  for (size_t index = 0; index < case_count; ++index) {
+    cases[index] = GetLabel(i.InputRpo(index + 2));
+  }
+  Label* const table = AddJumpTable(cases, case_count);
+  __ cmp(input, Immediate(case_count));
+  __ j(above_equal, GetLabel(i.InputRpo(1)));
+  __ jmp(Operand::JumpTable(input, times_system_pointer_size, table));
+}
+
+// The calling convention for JSFunctions on IA32 passes arguments on the
+// stack and the JSFunction and context in EDI and ESI, respectively, thus
+// the steps of the call look as follows:
+
+// --{ before the call instruction }--------------------------------------------
+//                                                         |  caller frame |
+//                                                         ^ esp           ^ ebp
+
+// --{ push arguments and setup ESI, EDI }--------------------------------------
+//                                       | args + receiver |  caller frame |
+//                                       ^ esp                             ^ ebp
+//                 [edi = JSFunction, esi = context]
+
+// --{ call [edi + kCodeEntryOffset] }------------------------------------------
+//                                 | RET | args + receiver |  caller frame |
+//                                 ^ esp                                   ^ ebp
+
+// =={ prologue of called function }============================================
+// --{ push ebp }---------------------------------------------------------------
+//                            | FP | RET | args + receiver |  caller frame |
+//                            ^ esp                                        ^ ebp
+
+// --{ mov ebp, esp }-----------------------------------------------------------
+//                            | FP | RET | args + receiver |  caller frame |
+//                            ^ ebp,esp
+
+// --{ push esi }---------------------------------------------------------------
+//                      | CTX | FP | RET | args + receiver |  caller frame |
+//                      ^esp  ^ ebp
+
+// --{ push edi }---------------------------------------------------------------
+//                | FNC | CTX | FP | RET | args + receiver |  caller frame |
+//                ^esp        ^ ebp
+
+// --{ subi esp, #N }-----------------------------------------------------------
+// | callee frame | FNC | CTX | FP | RET | args + receiver |  caller frame |
+// ^esp                       ^ ebp
+
+// =={ body of called function }================================================
+
+// =={ epilogue of called function }============================================
+// --{ mov esp, ebp }-----------------------------------------------------------
+//                            | FP | RET | args + receiver |  caller frame |
+//                            ^ esp,ebp
+
+// --{ pop ebp }-----------------------------------------------------------
+// |                               | RET | args + receiver |  caller frame |
+//                                 ^ esp                                   ^ ebp
+
+// --{ ret #A+1 }-----------------------------------------------------------
+// |                                                       |  caller frame |
+//                                                         ^ esp           ^ ebp
+
+// Runtime function calls are accomplished by doing a stub call to the
+// CEntry (a real code object). On IA32 passes arguments on the
+// stack, the number of arguments in EAX, the address of the runtime function
+// in EBX, and the context in ESI.
+
+// --{ before the call instruction }--------------------------------------------
+//                                                         |  caller frame |
+//                                                         ^ esp           ^ ebp
+
+// --{ push arguments and setup EAX, EBX, and ESI }-----------------------------
+//                                       | args + receiver |  caller frame |
+//                                       ^ esp                             ^ ebp
+//              [eax = #args, ebx = runtime function, esi = context]
+
+// --{ call #CEntry }-----------------------------------------------------------
+//                                 | RET | args + receiver |  caller frame |
+//                                 ^ esp                                   ^ ebp
+
+// =={ body of runtime function }===============================================
+
+// --{ runtime returns }--------------------------------------------------------
+//                                                         |  caller frame |
+//                                                         ^ esp           ^ ebp
+
+// Other custom linkages (e.g. for calling directly into and out of C++) may
+// need to save callee-saved registers on the stack, which is done in the
+// function prologue of generated code.
+
+// --{ before the call instruction }--------------------------------------------
+//                                                         |  caller frame |
+//                                                         ^ esp           ^ ebp
+
+// --{ set up arguments in registers on stack }---------------------------------
+//                                                  | args |  caller frame |
+//                                                  ^ esp                  ^ ebp
+//                  [r0 = arg0, r1 = arg1, ...]
+
+// --{ call code }--------------------------------------------------------------
+//                                            | RET | args |  caller frame |
+//                                            ^ esp                        ^ ebp
+
+// =={ prologue of called function }============================================
+// --{ push ebp }---------------------------------------------------------------
+//                                       | FP | RET | args |  caller frame |
+//                                       ^ esp                             ^ ebp
+
+// --{ mov ebp, esp }-----------------------------------------------------------
+//                                       | FP | RET | args |  caller frame |
+//                                       ^ ebp,esp
+
+// --{ save registers }---------------------------------------------------------
+//                                | regs | FP | RET | args |  caller frame |
+//                                ^ esp  ^ ebp
+
+// --{ subi esp, #N }-----------------------------------------------------------
+//                 | callee frame | regs | FP | RET | args |  caller frame |
+//                 ^esp                  ^ ebp
+
+// =={ body of called function }================================================
+
+// =={ epilogue of called function }============================================
+// --{ restore registers }------------------------------------------------------
+//                                | regs | FP | RET | args |  caller frame |
+//                                ^ esp  ^ ebp
+
+// --{ mov esp, ebp }-----------------------------------------------------------
+//                                       | FP | RET | args |  caller frame |
+//                                       ^ esp,ebp
+
+// --{ pop ebp }----------------------------------------------------------------
+//                                            | RET | args |  caller frame |
+//                                            ^ esp                        ^ ebp
+
+void CodeGenerator::FinishFrame(Frame* frame) {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+  if (saves != 0) {  // Save callee-saved registers.
+    DCHECK(!info()->is_osr());
+    int pushed = 0;
+    for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
+      if (!((1 << i) & saves)) continue;
+      ++pushed;
+    }
+    frame->AllocateSavedCalleeRegisterSlots(pushed);
+  }
+}
+
+void CodeGenerator::AssembleConstructFrame() {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+  if (frame_access_state()->has_frame()) {
+    if (call_descriptor->IsCFunctionCall()) {
+      __ push(ebp);
+      __ mov(ebp, esp);
+      if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
+        __ Push(Immediate(StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY)));
+        // Reserve stack space for saving the c_entry_fp later.
+        __ AllocateStackSpace(kSystemPointerSize);
+      }
+    } else if (call_descriptor->IsJSFunctionCall()) {
+      __ Prologue();
+    } else {
+      __ StubPrologue(info()->GetOutputStackFrameType());
+      if (call_descriptor->IsWasmFunctionCall()) {
+        __ push(kWasmInstanceRegister);
+      } else if (call_descriptor->IsWasmImportWrapper() ||
+                 call_descriptor->IsWasmCapiFunction()) {
+        // Wasm import wrappers are passed a tuple in the place of the instance.
+        // Unpack the tuple into the instance and the target callable.
+        // This must be done here in the codegen because it cannot be expressed
+        // properly in the graph.
+        __ mov(kJSFunctionRegister,
+               Operand(kWasmInstanceRegister,
+                       Tuple2::kValue2Offset - kHeapObjectTag));
+        __ mov(kWasmInstanceRegister,
+               Operand(kWasmInstanceRegister,
+                       Tuple2::kValue1Offset - kHeapObjectTag));
+        __ push(kWasmInstanceRegister);
+        if (call_descriptor->IsWasmCapiFunction()) {
+          // Reserve space for saving the PC later.
+          __ AllocateStackSpace(kSystemPointerSize);
+        }
+      }
+    }
+  }
+
+  int required_slots =
+      frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
+
+  if (info()->is_osr()) {
+    // TurboFan OSR-compiled functions cannot be entered directly.
+    __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
+
+    // Unoptimized code jumps directly to this entrypoint while the unoptimized
+    // frame is still on the stack. Optimized code uses OSR values directly from
+    // the unoptimized frame. Thus, all that needs to be done is to allocate the
+    // remaining stack slots.
+    if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
+    osr_pc_offset_ = __ pc_offset();
+    required_slots -= osr_helper()->UnoptimizedFrameSlots();
+  }
+
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+  if (required_slots > 0) {
+    DCHECK(frame_access_state()->has_frame());
+    if (info()->IsWasm() && required_slots > 128) {
+      // For WebAssembly functions with big frames we have to do the stack
+      // overflow check before we construct the frame. Otherwise we may not
+      // have enough space on the stack to call the runtime for the stack
+      // overflow.
+      Label done;
+
+      // If the frame is bigger than the stack, we throw the stack overflow
+      // exception unconditionally. Thereby we can avoid the integer overflow
+      // check in the condition code.
+      if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
+        Register scratch = esi;
+        __ push(scratch);
+        __ mov(scratch,
+               FieldOperand(kWasmInstanceRegister,
+                            WasmInstanceObject::kRealStackLimitAddressOffset));
+        __ mov(scratch, Operand(scratch, 0));
+        __ add(scratch, Immediate(required_slots * kSystemPointerSize));
+        __ cmp(esp, scratch);
+        __ pop(scratch);
+        __ j(above_equal, &done, Label::kNear);
+      }
+
+      __ wasm_call(wasm::WasmCode::kWasmStackOverflow,
+                   RelocInfo::WASM_STUB_CALL);
+      ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
+      RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
+      __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
+      __ bind(&done);
+    }
+
+    // Skip callee-saved and return slots, which are created below.
+    required_slots -= base::bits::CountPopulation(saves);
+    required_slots -= frame()->GetReturnSlotCount();
+    if (required_slots > 0) {
+      __ AllocateStackSpace(required_slots * kSystemPointerSize);
+    }
+  }
+
+  if (saves != 0) {  // Save callee-saved registers.
+    DCHECK(!info()->is_osr());
+    for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
+      if (((1 << i) & saves)) __ push(Register::from_code(i));
+    }
+  }
+
+  // Allocate return slots (located after callee-saved).
+  if (frame()->GetReturnSlotCount() > 0) {
+    __ AllocateStackSpace(frame()->GetReturnSlotCount() * kSystemPointerSize);
+  }
+}
+
+void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+  // Restore registers.
+  if (saves != 0) {
+    const int returns = frame()->GetReturnSlotCount();
+    if (returns != 0) {
+      __ add(esp, Immediate(returns * kSystemPointerSize));
+    }
+    for (int i = 0; i < Register::kNumRegisters; i++) {
+      if (!((1 << i) & saves)) continue;
+      __ pop(Register::from_code(i));
+    }
+  }
+
+  // We might need ecx and edx for scratch.
+  DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & edx.bit());
+  DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & ecx.bit());
+  IA32OperandConverter g(this, nullptr);
+  int parameter_count =
+      static_cast<int>(call_descriptor->StackParameterCount());
+
+  // {aditional_pop_count} is only greater than zero if {parameter_count = 0}.
+  // Check RawMachineAssembler::PopAndReturn.
+  if (parameter_count != 0) {
+    if (additional_pop_count->IsImmediate()) {
+      DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
+    } else if (__ emit_debug_code()) {
+      __ cmp(g.ToRegister(additional_pop_count), Immediate(0));
+      __ Assert(equal, AbortReason::kUnexpectedAdditionalPopValue);
+    }
+  }
+
+  Register argc_reg = ecx;
+#ifdef V8_NO_ARGUMENTS_ADAPTOR
+  // Functions with JS linkage have at least one parameter (the receiver).
+  // If {parameter_count} == 0, it means it is a builtin with
+  // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
+  // itself.
+  const bool drop_jsargs = frame_access_state()->has_frame() &&
+                           call_descriptor->IsJSFunctionCall() &&
+                           parameter_count != 0;
+#else
+  const bool drop_jsargs = false;
+#endif
+  if (call_descriptor->IsCFunctionCall()) {
+    AssembleDeconstructFrame();
+  } else if (frame_access_state()->has_frame()) {
+    // Canonicalize JSFunction return sites for now if they always have the same
+    // number of return args.
+    if (additional_pop_count->IsImmediate() &&
+        g.ToConstant(additional_pop_count).ToInt32() == 0) {
+      if (return_label_.is_bound()) {
+        __ jmp(&return_label_);
+        return;
+      } else {
+        __ bind(&return_label_);
+      }
+    }
+    if (drop_jsargs) {
+      // Get the actual argument count.
+      __ mov(argc_reg, Operand(ebp, StandardFrameConstants::kArgCOffset));
+    }
+    AssembleDeconstructFrame();
+  }
+
+  if (drop_jsargs) {
+    // We must pop all arguments from the stack (including the receiver). This
+    // number of arguments is given by max(1 + argc_reg, parameter_count).
+    int parameter_count_without_receiver =
+        parameter_count - 1;  // Exclude the receiver to simplify the
+                              // computation. We'll account for it at the end.
+    Label mismatch_return;
+    Register scratch_reg = edx;
+    DCHECK_NE(argc_reg, scratch_reg);
+    __ cmp(argc_reg, Immediate(parameter_count_without_receiver));
+    __ j(greater, &mismatch_return, Label::kNear);
+    __ Ret(parameter_count * kSystemPointerSize, scratch_reg);
+    __ bind(&mismatch_return);
+    __ PopReturnAddressTo(scratch_reg);
+    __ lea(esp, Operand(esp, argc_reg, times_system_pointer_size,
+                        kSystemPointerSize));  // Also pop the receiver.
+    // We use a return instead of a jump for better return address prediction.
+    __ PushReturnAddressFrom(scratch_reg);
+    __ Ret();
+  } else if (additional_pop_count->IsImmediate()) {
+    Register scratch_reg = ecx;
+    int additional_count = g.ToConstant(additional_pop_count).ToInt32();
+    size_t pop_size = (parameter_count + additional_count) * kSystemPointerSize;
+    CHECK_LE(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
+    __ Ret(static_cast<int>(pop_size), scratch_reg);
+  } else {
+    Register pop_reg = g.ToRegister(additional_pop_count);
+    Register scratch_reg = pop_reg == ecx ? edx : ecx;
+    int pop_size = static_cast<int>(parameter_count * kSystemPointerSize);
+    __ PopReturnAddressTo(scratch_reg);
+    __ lea(esp, Operand(esp, pop_reg, times_system_pointer_size,
+                        static_cast<int>(pop_size)));
+    __ PushReturnAddressFrom(scratch_reg);
+    __ Ret();
+  }
+}
+
+void CodeGenerator::FinishCode() {}
+
+void CodeGenerator::PrepareForDeoptimizationExits(
+    ZoneDeque<DeoptimizationExit*>* exits) {}
+
+void CodeGenerator::AssembleMove(InstructionOperand* source,
+                                 InstructionOperand* destination) {
+  IA32OperandConverter g(this, nullptr);
+  // Dispatch on the source and destination operand kinds.
+  switch (MoveType::InferMove(source, destination)) {
+    case MoveType::kRegisterToRegister:
+      if (source->IsRegister()) {
+        __ mov(g.ToRegister(destination), g.ToRegister(source));
+      } else {
+        DCHECK(source->IsFPRegister());
+        __ movaps(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
+      }
+      return;
+    case MoveType::kRegisterToStack: {
+      Operand dst = g.ToOperand(destination);
+      if (source->IsRegister()) {
+        __ mov(dst, g.ToRegister(source));
+      } else {
+        DCHECK(source->IsFPRegister());
+        XMMRegister src = g.ToDoubleRegister(source);
+        MachineRepresentation rep =
+            LocationOperand::cast(source)->representation();
+        if (rep == MachineRepresentation::kFloat32) {
+          __ movss(dst, src);
+        } else if (rep == MachineRepresentation::kFloat64) {
+          __ movsd(dst, src);
+        } else {
+          DCHECK_EQ(MachineRepresentation::kSimd128, rep);
+          __ movups(dst, src);
+        }
+      }
+      return;
+    }
+    case MoveType::kStackToRegister: {
+      Operand src = g.ToOperand(source);
+      if (source->IsStackSlot()) {
+        __ mov(g.ToRegister(destination), src);
+      } else {
+        DCHECK(source->IsFPStackSlot());
+        XMMRegister dst = g.ToDoubleRegister(destination);
+        MachineRepresentation rep =
+            LocationOperand::cast(source)->representation();
+        if (rep == MachineRepresentation::kFloat32) {
+          __ movss(dst, src);
+        } else if (rep == MachineRepresentation::kFloat64) {
+          __ movsd(dst, src);
+        } else {
+          DCHECK_EQ(MachineRepresentation::kSimd128, rep);
+          __ movups(dst, src);
+        }
+      }
+      return;
+    }
+    case MoveType::kStackToStack: {
+      Operand src = g.ToOperand(source);
+      Operand dst = g.ToOperand(destination);
+      if (source->IsStackSlot()) {
+        __ push(src);
+        __ pop(dst);
+      } else {
+        MachineRepresentation rep =
+            LocationOperand::cast(source)->representation();
+        if (rep == MachineRepresentation::kFloat32) {
+          __ movss(kScratchDoubleReg, src);
+          __ movss(dst, kScratchDoubleReg);
+        } else if (rep == MachineRepresentation::kFloat64) {
+          __ movsd(kScratchDoubleReg, src);
+          __ movsd(dst, kScratchDoubleReg);
+        } else {
+          DCHECK_EQ(MachineRepresentation::kSimd128, rep);
+          __ movups(kScratchDoubleReg, src);
+          __ movups(dst, kScratchDoubleReg);
+        }
+      }
+      return;
+    }
+    case MoveType::kConstantToRegister: {
+      Constant src = g.ToConstant(source);
+      if (destination->IsRegister()) {
+        Register dst = g.ToRegister(destination);
+        if (src.type() == Constant::kHeapObject) {
+          __ Move(dst, src.ToHeapObject());
+        } else {
+          __ Move(dst, g.ToImmediate(source));
+        }
+      } else {
+        DCHECK(destination->IsFPRegister());
+        XMMRegister dst = g.ToDoubleRegister(destination);
+        if (src.type() == Constant::kFloat32) {
+          // TODO(turbofan): Can we do better here?
+          __ Move(dst, src.ToFloat32AsInt());
+        } else {
+          DCHECK_EQ(src.type(), Constant::kFloat64);
+          __ Move(dst, src.ToFloat64().AsUint64());
+        }
+      }
+      return;
+    }
+    case MoveType::kConstantToStack: {
+      Constant src = g.ToConstant(source);
+      Operand dst = g.ToOperand(destination);
+      if (destination->IsStackSlot()) {
+        __ Move(dst, g.ToImmediate(source));
+      } else {
+        DCHECK(destination->IsFPStackSlot());
+        if (src.type() == Constant::kFloat32) {
+          __ Move(dst, Immediate(src.ToFloat32AsInt()));
+        } else {
+          DCHECK_EQ(src.type(), Constant::kFloat64);
+          uint64_t constant_value = src.ToFloat64().AsUint64();
+          uint32_t lower = static_cast<uint32_t>(constant_value);
+          uint32_t upper = static_cast<uint32_t>(constant_value >> 32);
+          Operand dst0 = dst;
+          Operand dst1 = g.ToOperand(destination, kSystemPointerSize);
+          __ Move(dst0, Immediate(lower));
+          __ Move(dst1, Immediate(upper));
+        }
+      }
+      return;
+    }
+  }
+  UNREACHABLE();
+}
+
+void CodeGenerator::AssembleSwap(InstructionOperand* source,
+                                 InstructionOperand* destination) {
+  IA32OperandConverter g(this, nullptr);
+  // Dispatch on the source and destination operand kinds.  Not all
+  // combinations are possible.
+  switch (MoveType::InferSwap(source, destination)) {
+    case MoveType::kRegisterToRegister: {
+      if (source->IsRegister()) {
+        Register src = g.ToRegister(source);
+        Register dst = g.ToRegister(destination);
+        __ push(src);
+        __ mov(src, dst);
+        __ pop(dst);
+      } else {
+        DCHECK(source->IsFPRegister());
+        XMMRegister src = g.ToDoubleRegister(source);
+        XMMRegister dst = g.ToDoubleRegister(destination);
+        __ movaps(kScratchDoubleReg, src);
+        __ movaps(src, dst);
+        __ movaps(dst, kScratchDoubleReg);
+      }
+      return;
+    }
+    case MoveType::kRegisterToStack: {
+      if (source->IsRegister()) {
+        Register src = g.ToRegister(source);
+        __ push(src);
+        frame_access_state()->IncreaseSPDelta(1);
+        Operand dst = g.ToOperand(destination);
+        __ mov(src, dst);
+        frame_access_state()->IncreaseSPDelta(-1);
+        dst = g.ToOperand(destination);
+        __ pop(dst);
+      } else {
+        DCHECK(source->IsFPRegister());
+        XMMRegister src = g.ToDoubleRegister(source);
+        Operand dst = g.ToOperand(destination);
+        MachineRepresentation rep =
+            LocationOperand::cast(source)->representation();
+        if (rep == MachineRepresentation::kFloat32) {
+          __ movss(kScratchDoubleReg, dst);
+          __ movss(dst, src);
+          __ movaps(src, kScratchDoubleReg);
+        } else if (rep == MachineRepresentation::kFloat64) {
+          __ movsd(kScratchDoubleReg, dst);
+          __ movsd(dst, src);
+          __ movaps(src, kScratchDoubleReg);
+        } else {
+          DCHECK_EQ(MachineRepresentation::kSimd128, rep);
+          __ movups(kScratchDoubleReg, dst);
+          __ movups(dst, src);
+          __ movups(src, kScratchDoubleReg);
+        }
+      }
+      return;
+    }
+    case MoveType::kStackToStack: {
+      if (source->IsStackSlot()) {
+        Operand dst1 = g.ToOperand(destination);
+        __ push(dst1);
+        frame_access_state()->IncreaseSPDelta(1);
+        Operand src1 = g.ToOperand(source);
+        __ push(src1);
+        Operand dst2 = g.ToOperand(destination);
+        __ pop(dst2);
+        frame_access_state()->IncreaseSPDelta(-1);
+        Operand src2 = g.ToOperand(source);
+        __ pop(src2);
+      } else {
+        DCHECK(source->IsFPStackSlot());
+        Operand src0 = g.ToOperand(source);
+        Operand dst0 = g.ToOperand(destination);
+        MachineRepresentation rep =
+            LocationOperand::cast(source)->representation();
+        if (rep == MachineRepresentation::kFloat32) {
+          __ movss(kScratchDoubleReg, dst0);  // Save dst in scratch register.
+          __ push(src0);  // Then use stack to copy src to destination.
+          __ pop(dst0);
+          __ movss(src0, kScratchDoubleReg);
+        } else if (rep == MachineRepresentation::kFloat64) {
+          __ movsd(kScratchDoubleReg, dst0);  // Save dst in scratch register.
+          __ push(src0);  // Then use stack to copy src to destination.
+          __ pop(dst0);
+          __ push(g.ToOperand(source, kSystemPointerSize));
+          __ pop(g.ToOperand(destination, kSystemPointerSize));
+          __ movsd(src0, kScratchDoubleReg);
+        } else {
+          DCHECK_EQ(MachineRepresentation::kSimd128, rep);
+          __ movups(kScratchDoubleReg, dst0);  // Save dst in scratch register.
+          __ push(src0);  // Then use stack to copy src to destination.
+          __ pop(dst0);
+          __ push(g.ToOperand(source, kSystemPointerSize));
+          __ pop(g.ToOperand(destination, kSystemPointerSize));
+          __ push(g.ToOperand(source, 2 * kSystemPointerSize));
+          __ pop(g.ToOperand(destination, 2 * kSystemPointerSize));
+          __ push(g.ToOperand(source, 3 * kSystemPointerSize));
+          __ pop(g.ToOperand(destination, 3 * kSystemPointerSize));
+          __ movups(src0, kScratchDoubleReg);
+        }
+      }
+      return;
+    }
+    default:
+      UNREACHABLE();
+  }
+}
+
+void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
+  for (size_t index = 0; index < target_count; ++index) {
+    __ dd(targets[index]);
+  }
+}
+
+#undef __
+#undef kScratchDoubleReg
+#undef ASSEMBLE_COMPARE
+#undef ASSEMBLE_IEEE754_BINOP
+#undef ASSEMBLE_IEEE754_UNOP
+#undef ASSEMBLE_BINOP
+#undef ASSEMBLE_ATOMIC_BINOP
+#undef ASSEMBLE_I64ATOMIC_BINOP
+#undef ASSEMBLE_MOVX
+#undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
+#undef ASSEMBLE_SIMD_IMM_SHUFFLE
+#undef ASSEMBLE_SIMD_ALL_TRUE
+#undef ASSEMBLE_SIMD_SHIFT
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/ia32/instruction-codes-ia32.h b/src/compiler/backend/ia32/instruction-codes-ia32.h
new file mode 100644
index 0000000..a564864
--- /dev/null
+++ b/src/compiler/backend/ia32/instruction-codes-ia32.h
@@ -0,0 +1,458 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_IA32_INSTRUCTION_CODES_IA32_H_
+#define V8_COMPILER_BACKEND_IA32_INSTRUCTION_CODES_IA32_H_
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+// IA32-specific opcodes that specify which assembly sequence to emit.
+// Most opcodes specify a single instruction.
+#define TARGET_ARCH_OPCODE_LIST(V) \
+  V(IA32Add)                       \
+  V(IA32And)                       \
+  V(IA32Cmp)                       \
+  V(IA32Cmp16)                     \
+  V(IA32Cmp8)                      \
+  V(IA32Test)                      \
+  V(IA32Test16)                    \
+  V(IA32Test8)                     \
+  V(IA32Or)                        \
+  V(IA32Xor)                       \
+  V(IA32Sub)                       \
+  V(IA32Imul)                      \
+  V(IA32ImulHigh)                  \
+  V(IA32UmulHigh)                  \
+  V(IA32Idiv)                      \
+  V(IA32Udiv)                      \
+  V(IA32Not)                       \
+  V(IA32Neg)                       \
+  V(IA32Shl)                       \
+  V(IA32Shr)                       \
+  V(IA32Sar)                       \
+  V(IA32AddPair)                   \
+  V(IA32SubPair)                   \
+  V(IA32MulPair)                   \
+  V(IA32ShlPair)                   \
+  V(IA32ShrPair)                   \
+  V(IA32SarPair)                   \
+  V(IA32Rol)                       \
+  V(IA32Ror)                       \
+  V(IA32Lzcnt)                     \
+  V(IA32Tzcnt)                     \
+  V(IA32Popcnt)                    \
+  V(IA32Bswap)                     \
+  V(IA32MFence)                    \
+  V(IA32LFence)                    \
+  V(SSEFloat32Cmp)                 \
+  V(SSEFloat32Add)                 \
+  V(SSEFloat32Sub)                 \
+  V(SSEFloat32Mul)                 \
+  V(SSEFloat32Div)                 \
+  V(SSEFloat32Abs)                 \
+  V(SSEFloat32Neg)                 \
+  V(SSEFloat32Sqrt)                \
+  V(SSEFloat32Round)               \
+  V(SSEFloat64Cmp)                 \
+  V(SSEFloat64Add)                 \
+  V(SSEFloat64Sub)                 \
+  V(SSEFloat64Mul)                 \
+  V(SSEFloat64Div)                 \
+  V(SSEFloat64Mod)                 \
+  V(SSEFloat32Max)                 \
+  V(SSEFloat64Max)                 \
+  V(SSEFloat32Min)                 \
+  V(SSEFloat64Min)                 \
+  V(SSEFloat64Abs)                 \
+  V(SSEFloat64Neg)                 \
+  V(SSEFloat64Sqrt)                \
+  V(SSEFloat64Round)               \
+  V(SSEFloat32ToFloat64)           \
+  V(SSEFloat64ToFloat32)           \
+  V(SSEFloat32ToInt32)             \
+  V(SSEFloat32ToUint32)            \
+  V(SSEFloat64ToInt32)             \
+  V(SSEFloat64ToUint32)            \
+  V(SSEInt32ToFloat32)             \
+  V(SSEUint32ToFloat32)            \
+  V(SSEInt32ToFloat64)             \
+  V(SSEUint32ToFloat64)            \
+  V(SSEFloat64ExtractLowWord32)    \
+  V(SSEFloat64ExtractHighWord32)   \
+  V(SSEFloat64InsertLowWord32)     \
+  V(SSEFloat64InsertHighWord32)    \
+  V(SSEFloat64LoadLowWord32)       \
+  V(SSEFloat64SilenceNaN)          \
+  V(AVXFloat32Add)                 \
+  V(AVXFloat32Sub)                 \
+  V(AVXFloat32Mul)                 \
+  V(AVXFloat32Div)                 \
+  V(AVXFloat64Add)                 \
+  V(AVXFloat64Sub)                 \
+  V(AVXFloat64Mul)                 \
+  V(AVXFloat64Div)                 \
+  V(AVXFloat64Abs)                 \
+  V(AVXFloat64Neg)                 \
+  V(AVXFloat32Abs)                 \
+  V(AVXFloat32Neg)                 \
+  V(IA32Movsxbl)                   \
+  V(IA32Movzxbl)                   \
+  V(IA32Movb)                      \
+  V(IA32Movsxwl)                   \
+  V(IA32Movzxwl)                   \
+  V(IA32Movw)                      \
+  V(IA32Movl)                      \
+  V(IA32Movss)                     \
+  V(IA32Movsd)                     \
+  V(IA32Movdqu)                    \
+  V(IA32BitcastFI)                 \
+  V(IA32BitcastIF)                 \
+  V(IA32Lea)                       \
+  V(IA32Push)                      \
+  V(IA32PushFloat32)               \
+  V(IA32PushFloat64)               \
+  V(IA32PushSimd128)               \
+  V(IA32Poke)                      \
+  V(IA32Peek)                      \
+  V(SSEF64x2Splat)                 \
+  V(AVXF64x2Splat)                 \
+  V(SSEF64x2ExtractLane)           \
+  V(AVXF64x2ExtractLane)           \
+  V(SSEF64x2ReplaceLane)           \
+  V(AVXF64x2ReplaceLane)           \
+  V(IA32F64x2Sqrt)                 \
+  V(IA32F64x2Add)                  \
+  V(IA32F64x2Sub)                  \
+  V(IA32F64x2Mul)                  \
+  V(IA32F64x2Div)                  \
+  V(IA32F64x2Min)                  \
+  V(IA32F64x2Max)                  \
+  V(IA32F64x2Eq)                   \
+  V(IA32F64x2Ne)                   \
+  V(IA32F64x2Lt)                   \
+  V(IA32F64x2Le)                   \
+  V(IA32F64x2Pmin)                 \
+  V(IA32F64x2Pmax)                 \
+  V(IA32F64x2Round)                \
+  V(IA32I64x2SplatI32Pair)         \
+  V(IA32I64x2ReplaceLaneI32Pair)   \
+  V(IA32I64x2Neg)                  \
+  V(IA32I64x2Shl)                  \
+  V(IA32I64x2ShrS)                 \
+  V(IA32I64x2Add)                  \
+  V(IA32I64x2Sub)                  \
+  V(IA32I64x2Mul)                  \
+  V(IA32I64x2ShrU)                 \
+  V(SSEF32x4Splat)                 \
+  V(AVXF32x4Splat)                 \
+  V(SSEF32x4ExtractLane)           \
+  V(AVXF32x4ExtractLane)           \
+  V(SSEF32x4ReplaceLane)           \
+  V(AVXF32x4ReplaceLane)           \
+  V(IA32F32x4SConvertI32x4)        \
+  V(SSEF32x4UConvertI32x4)         \
+  V(AVXF32x4UConvertI32x4)         \
+  V(SSEF32x4Abs)                   \
+  V(AVXF32x4Abs)                   \
+  V(SSEF32x4Neg)                   \
+  V(AVXF32x4Neg)                   \
+  V(SSEF32x4Sqrt)                  \
+  V(AVXF32x4Sqrt)                  \
+  V(IA32F32x4RecipApprox)          \
+  V(IA32F32x4RecipSqrtApprox)      \
+  V(SSEF32x4Add)                   \
+  V(AVXF32x4Add)                   \
+  V(SSEF32x4AddHoriz)              \
+  V(AVXF32x4AddHoriz)              \
+  V(SSEF32x4Sub)                   \
+  V(AVXF32x4Sub)                   \
+  V(SSEF32x4Mul)                   \
+  V(AVXF32x4Mul)                   \
+  V(SSEF32x4Div)                   \
+  V(AVXF32x4Div)                   \
+  V(SSEF32x4Min)                   \
+  V(AVXF32x4Min)                   \
+  V(SSEF32x4Max)                   \
+  V(AVXF32x4Max)                   \
+  V(SSEF32x4Eq)                    \
+  V(AVXF32x4Eq)                    \
+  V(SSEF32x4Ne)                    \
+  V(AVXF32x4Ne)                    \
+  V(SSEF32x4Lt)                    \
+  V(AVXF32x4Lt)                    \
+  V(SSEF32x4Le)                    \
+  V(AVXF32x4Le)                    \
+  V(IA32F32x4Pmin)                 \
+  V(IA32F32x4Pmax)                 \
+  V(IA32F32x4Round)                \
+  V(IA32I32x4Splat)                \
+  V(IA32I32x4ExtractLane)          \
+  V(SSEI32x4ReplaceLane)           \
+  V(AVXI32x4ReplaceLane)           \
+  V(SSEI32x4SConvertF32x4)         \
+  V(AVXI32x4SConvertF32x4)         \
+  V(IA32I32x4SConvertI16x8Low)     \
+  V(IA32I32x4SConvertI16x8High)    \
+  V(IA32I32x4Neg)                  \
+  V(IA32I32x4Shl)                  \
+  V(IA32I32x4ShrS)                 \
+  V(SSEI32x4Add)                   \
+  V(AVXI32x4Add)                   \
+  V(SSEI32x4AddHoriz)              \
+  V(AVXI32x4AddHoriz)              \
+  V(SSEI32x4Sub)                   \
+  V(AVXI32x4Sub)                   \
+  V(SSEI32x4Mul)                   \
+  V(AVXI32x4Mul)                   \
+  V(SSEI32x4MinS)                  \
+  V(AVXI32x4MinS)                  \
+  V(SSEI32x4MaxS)                  \
+  V(AVXI32x4MaxS)                  \
+  V(SSEI32x4Eq)                    \
+  V(AVXI32x4Eq)                    \
+  V(SSEI32x4Ne)                    \
+  V(AVXI32x4Ne)                    \
+  V(SSEI32x4GtS)                   \
+  V(AVXI32x4GtS)                   \
+  V(SSEI32x4GeS)                   \
+  V(AVXI32x4GeS)                   \
+  V(SSEI32x4UConvertF32x4)         \
+  V(AVXI32x4UConvertF32x4)         \
+  V(IA32I32x4UConvertI16x8Low)     \
+  V(IA32I32x4UConvertI16x8High)    \
+  V(IA32I32x4ShrU)                 \
+  V(SSEI32x4MinU)                  \
+  V(AVXI32x4MinU)                  \
+  V(SSEI32x4MaxU)                  \
+  V(AVXI32x4MaxU)                  \
+  V(SSEI32x4GtU)                   \
+  V(AVXI32x4GtU)                   \
+  V(SSEI32x4GeU)                   \
+  V(AVXI32x4GeU)                   \
+  V(IA32I32x4Abs)                  \
+  V(IA32I32x4BitMask)              \
+  V(IA32I32x4DotI16x8S)            \
+  V(IA32I16x8Splat)                \
+  V(IA32I16x8ExtractLaneU)         \
+  V(IA32I16x8ExtractLaneS)         \
+  V(SSEI16x8ReplaceLane)           \
+  V(AVXI16x8ReplaceLane)           \
+  V(IA32I16x8SConvertI8x16Low)     \
+  V(IA32I16x8SConvertI8x16High)    \
+  V(IA32I16x8Neg)                  \
+  V(IA32I16x8Shl)                  \
+  V(IA32I16x8ShrS)                 \
+  V(SSEI16x8SConvertI32x4)         \
+  V(AVXI16x8SConvertI32x4)         \
+  V(SSEI16x8Add)                   \
+  V(AVXI16x8Add)                   \
+  V(SSEI16x8AddSatS)               \
+  V(AVXI16x8AddSatS)               \
+  V(SSEI16x8AddHoriz)              \
+  V(AVXI16x8AddHoriz)              \
+  V(SSEI16x8Sub)                   \
+  V(AVXI16x8Sub)                   \
+  V(SSEI16x8SubSatS)               \
+  V(AVXI16x8SubSatS)               \
+  V(SSEI16x8Mul)                   \
+  V(AVXI16x8Mul)                   \
+  V(SSEI16x8MinS)                  \
+  V(AVXI16x8MinS)                  \
+  V(SSEI16x8MaxS)                  \
+  V(AVXI16x8MaxS)                  \
+  V(SSEI16x8Eq)                    \
+  V(AVXI16x8Eq)                    \
+  V(SSEI16x8Ne)                    \
+  V(AVXI16x8Ne)                    \
+  V(SSEI16x8GtS)                   \
+  V(AVXI16x8GtS)                   \
+  V(SSEI16x8GeS)                   \
+  V(AVXI16x8GeS)                   \
+  V(IA32I16x8UConvertI8x16Low)     \
+  V(IA32I16x8UConvertI8x16High)    \
+  V(IA32I16x8ShrU)                 \
+  V(SSEI16x8UConvertI32x4)         \
+  V(AVXI16x8UConvertI32x4)         \
+  V(SSEI16x8AddSatU)               \
+  V(AVXI16x8AddSatU)               \
+  V(SSEI16x8SubSatU)               \
+  V(AVXI16x8SubSatU)               \
+  V(SSEI16x8MinU)                  \
+  V(AVXI16x8MinU)                  \
+  V(SSEI16x8MaxU)                  \
+  V(AVXI16x8MaxU)                  \
+  V(SSEI16x8GtU)                   \
+  V(AVXI16x8GtU)                   \
+  V(SSEI16x8GeU)                   \
+  V(AVXI16x8GeU)                   \
+  V(IA32I16x8RoundingAverageU)     \
+  V(IA32I16x8Abs)                  \
+  V(IA32I16x8BitMask)              \
+  V(IA32I8x16Splat)                \
+  V(IA32I8x16ExtractLaneU)         \
+  V(IA32I8x16ExtractLaneS)         \
+  V(SSEI8x16ReplaceLane)           \
+  V(AVXI8x16ReplaceLane)           \
+  V(SSEI8x16SConvertI16x8)         \
+  V(AVXI8x16SConvertI16x8)         \
+  V(IA32I8x16Neg)                  \
+  V(IA32I8x16Shl)                  \
+  V(IA32I8x16ShrS)                 \
+  V(SSEI8x16Add)                   \
+  V(AVXI8x16Add)                   \
+  V(SSEI8x16AddSatS)               \
+  V(AVXI8x16AddSatS)               \
+  V(SSEI8x16Sub)                   \
+  V(AVXI8x16Sub)                   \
+  V(SSEI8x16SubSatS)               \
+  V(AVXI8x16SubSatS)               \
+  V(SSEI8x16Mul)                   \
+  V(AVXI8x16Mul)                   \
+  V(SSEI8x16MinS)                  \
+  V(AVXI8x16MinS)                  \
+  V(SSEI8x16MaxS)                  \
+  V(AVXI8x16MaxS)                  \
+  V(SSEI8x16Eq)                    \
+  V(AVXI8x16Eq)                    \
+  V(SSEI8x16Ne)                    \
+  V(AVXI8x16Ne)                    \
+  V(SSEI8x16GtS)                   \
+  V(AVXI8x16GtS)                   \
+  V(SSEI8x16GeS)                   \
+  V(AVXI8x16GeS)                   \
+  V(SSEI8x16UConvertI16x8)         \
+  V(AVXI8x16UConvertI16x8)         \
+  V(SSEI8x16AddSatU)               \
+  V(AVXI8x16AddSatU)               \
+  V(SSEI8x16SubSatU)               \
+  V(AVXI8x16SubSatU)               \
+  V(IA32I8x16ShrU)                 \
+  V(SSEI8x16MinU)                  \
+  V(AVXI8x16MinU)                  \
+  V(SSEI8x16MaxU)                  \
+  V(AVXI8x16MaxU)                  \
+  V(SSEI8x16GtU)                   \
+  V(AVXI8x16GtU)                   \
+  V(SSEI8x16GeU)                   \
+  V(AVXI8x16GeU)                   \
+  V(IA32I8x16RoundingAverageU)     \
+  V(IA32I8x16Abs)                  \
+  V(IA32I8x16BitMask)              \
+  V(IA32S128Const)                 \
+  V(IA32S128Zero)                  \
+  V(IA32S128AllOnes)               \
+  V(SSES128Not)                    \
+  V(AVXS128Not)                    \
+  V(SSES128And)                    \
+  V(AVXS128And)                    \
+  V(SSES128Or)                     \
+  V(AVXS128Or)                     \
+  V(SSES128Xor)                    \
+  V(AVXS128Xor)                    \
+  V(SSES128Select)                 \
+  V(AVXS128Select)                 \
+  V(IA32S128AndNot)                \
+  V(IA32I8x16Swizzle)              \
+  V(IA32I8x16Shuffle)              \
+  V(IA32S128Load8Splat)            \
+  V(IA32S128Load16Splat)           \
+  V(IA32S128Load32Splat)           \
+  V(IA32S128Load64Splat)           \
+  V(IA32S128Load8x8S)              \
+  V(IA32S128Load8x8U)              \
+  V(IA32S128Load16x4S)             \
+  V(IA32S128Load16x4U)             \
+  V(IA32S128Load32x2S)             \
+  V(IA32S128Load32x2U)             \
+  V(IA32S32x4Swizzle)              \
+  V(IA32S32x4Shuffle)              \
+  V(IA32S16x8Blend)                \
+  V(IA32S16x8HalfShuffle1)         \
+  V(IA32S16x8HalfShuffle2)         \
+  V(IA32S8x16Alignr)               \
+  V(IA32S16x8Dup)                  \
+  V(IA32S8x16Dup)                  \
+  V(SSES16x8UnzipHigh)             \
+  V(AVXS16x8UnzipHigh)             \
+  V(SSES16x8UnzipLow)              \
+  V(AVXS16x8UnzipLow)              \
+  V(SSES8x16UnzipHigh)             \
+  V(AVXS8x16UnzipHigh)             \
+  V(SSES8x16UnzipLow)              \
+  V(AVXS8x16UnzipLow)              \
+  V(IA32S64x2UnpackHigh)           \
+  V(IA32S32x4UnpackHigh)           \
+  V(IA32S16x8UnpackHigh)           \
+  V(IA32S8x16UnpackHigh)           \
+  V(IA32S64x2UnpackLow)            \
+  V(IA32S32x4UnpackLow)            \
+  V(IA32S16x8UnpackLow)            \
+  V(IA32S8x16UnpackLow)            \
+  V(SSES8x16TransposeLow)          \
+  V(AVXS8x16TransposeLow)          \
+  V(SSES8x16TransposeHigh)         \
+  V(AVXS8x16TransposeHigh)         \
+  V(SSES8x8Reverse)                \
+  V(AVXS8x8Reverse)                \
+  V(SSES8x4Reverse)                \
+  V(AVXS8x4Reverse)                \
+  V(SSES8x2Reverse)                \
+  V(AVXS8x2Reverse)                \
+  V(IA32V32x4AnyTrue)              \
+  V(IA32V32x4AllTrue)              \
+  V(IA32V16x8AnyTrue)              \
+  V(IA32V16x8AllTrue)              \
+  V(IA32V8x16AnyTrue)              \
+  V(IA32V8x16AllTrue)              \
+  V(IA32Word32AtomicPairLoad)      \
+  V(IA32Word32AtomicPairStore)     \
+  V(IA32Word32AtomicPairAdd)       \
+  V(IA32Word32AtomicPairSub)       \
+  V(IA32Word32AtomicPairAnd)       \
+  V(IA32Word32AtomicPairOr)        \
+  V(IA32Word32AtomicPairXor)       \
+  V(IA32Word32AtomicPairExchange)  \
+  V(IA32Word32AtomicPairCompareExchange)
+
+// Addressing modes represent the "shape" of inputs to an instruction.
+// Many instructions support multiple addressing modes. Addressing modes
+// are encoded into the InstructionCode of the instruction and tell the
+// code generator after register allocation which assembler method to call.
+//
+// We use the following local notation for addressing modes:
+//
+// M = memory operand
+// R = base register
+// N = index register * N for N in {1, 2, 4, 8}
+// I = immediate displacement (int32_t)
+
+#define TARGET_ADDRESSING_MODE_LIST(V) \
+  V(MR)   /* [%r1            ] */      \
+  V(MRI)  /* [%r1         + K] */      \
+  V(MR1)  /* [%r1 + %r2*1    ] */      \
+  V(MR2)  /* [%r1 + %r2*2    ] */      \
+  V(MR4)  /* [%r1 + %r2*4    ] */      \
+  V(MR8)  /* [%r1 + %r2*8    ] */      \
+  V(MR1I) /* [%r1 + %r2*1 + K] */      \
+  V(MR2I) /* [%r1 + %r2*2 + K] */      \
+  V(MR4I) /* [%r1 + %r2*3 + K] */      \
+  V(MR8I) /* [%r1 + %r2*4 + K] */      \
+  V(M1)   /* [      %r2*1    ] */      \
+  V(M2)   /* [      %r2*2    ] */      \
+  V(M4)   /* [      %r2*4    ] */      \
+  V(M8)   /* [      %r2*8    ] */      \
+  V(M1I)  /* [      %r2*1 + K] */      \
+  V(M2I)  /* [      %r2*2 + K] */      \
+  V(M4I)  /* [      %r2*4 + K] */      \
+  V(M8I)  /* [      %r2*8 + K] */      \
+  V(MI)   /* [              K] */      \
+  V(Root) /* [%root       + K] */
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_IA32_INSTRUCTION_CODES_IA32_H_
diff --git a/src/compiler/backend/ia32/instruction-scheduler-ia32.cc b/src/compiler/backend/ia32/instruction-scheduler-ia32.cc
new file mode 100644
index 0000000..c8f3b19
--- /dev/null
+++ b/src/compiler/backend/ia32/instruction-scheduler-ia32.cc
@@ -0,0 +1,508 @@
+// Copyright 2015 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/instruction-scheduler.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+bool InstructionScheduler::SchedulerSupported() { return true; }
+
+int InstructionScheduler::GetTargetInstructionFlags(
+    const Instruction* instr) const {
+  switch (instr->arch_opcode()) {
+    case kIA32Add:
+    case kIA32And:
+    case kIA32Cmp:
+    case kIA32Cmp16:
+    case kIA32Cmp8:
+    case kIA32Test:
+    case kIA32Test16:
+    case kIA32Test8:
+    case kIA32Or:
+    case kIA32Xor:
+    case kIA32Sub:
+    case kIA32Imul:
+    case kIA32ImulHigh:
+    case kIA32UmulHigh:
+    case kIA32Not:
+    case kIA32Neg:
+    case kIA32Shl:
+    case kIA32Shr:
+    case kIA32Sar:
+    case kIA32AddPair:
+    case kIA32SubPair:
+    case kIA32MulPair:
+    case kIA32ShlPair:
+    case kIA32ShrPair:
+    case kIA32SarPair:
+    case kIA32Rol:
+    case kIA32Ror:
+    case kIA32Lzcnt:
+    case kIA32Tzcnt:
+    case kIA32Popcnt:
+    case kIA32Bswap:
+    case kIA32Lea:
+    case kSSEFloat32Cmp:
+    case kSSEFloat32Add:
+    case kSSEFloat32Sub:
+    case kSSEFloat32Mul:
+    case kSSEFloat32Div:
+    case kSSEFloat32Abs:
+    case kSSEFloat32Neg:
+    case kSSEFloat32Sqrt:
+    case kSSEFloat32Round:
+    case kSSEFloat64Cmp:
+    case kSSEFloat64Add:
+    case kSSEFloat64Sub:
+    case kSSEFloat64Mul:
+    case kSSEFloat64Div:
+    case kSSEFloat64Mod:
+    case kSSEFloat32Max:
+    case kSSEFloat64Max:
+    case kSSEFloat32Min:
+    case kSSEFloat64Min:
+    case kSSEFloat64Abs:
+    case kSSEFloat64Neg:
+    case kSSEFloat64Sqrt:
+    case kSSEFloat64Round:
+    case kSSEFloat32ToFloat64:
+    case kSSEFloat64ToFloat32:
+    case kSSEFloat32ToInt32:
+    case kSSEFloat32ToUint32:
+    case kSSEFloat64ToInt32:
+    case kSSEFloat64ToUint32:
+    case kSSEInt32ToFloat32:
+    case kSSEUint32ToFloat32:
+    case kSSEInt32ToFloat64:
+    case kSSEUint32ToFloat64:
+    case kSSEFloat64ExtractLowWord32:
+    case kSSEFloat64ExtractHighWord32:
+    case kSSEFloat64InsertLowWord32:
+    case kSSEFloat64InsertHighWord32:
+    case kSSEFloat64LoadLowWord32:
+    case kSSEFloat64SilenceNaN:
+    case kAVXFloat32Add:
+    case kAVXFloat32Sub:
+    case kAVXFloat32Mul:
+    case kAVXFloat32Div:
+    case kAVXFloat64Add:
+    case kAVXFloat64Sub:
+    case kAVXFloat64Mul:
+    case kAVXFloat64Div:
+    case kAVXFloat64Abs:
+    case kAVXFloat64Neg:
+    case kAVXFloat32Abs:
+    case kAVXFloat32Neg:
+    case kIA32BitcastFI:
+    case kIA32BitcastIF:
+    case kSSEF64x2Splat:
+    case kAVXF64x2Splat:
+    case kSSEF64x2ExtractLane:
+    case kAVXF64x2ExtractLane:
+    case kSSEF64x2ReplaceLane:
+    case kAVXF64x2ReplaceLane:
+    case kIA32F64x2Sqrt:
+    case kIA32F64x2Add:
+    case kIA32F64x2Sub:
+    case kIA32F64x2Mul:
+    case kIA32F64x2Div:
+    case kIA32F64x2Min:
+    case kIA32F64x2Max:
+    case kIA32F64x2Eq:
+    case kIA32F64x2Ne:
+    case kIA32F64x2Lt:
+    case kIA32F64x2Le:
+    case kIA32F64x2Pmin:
+    case kIA32F64x2Pmax:
+    case kIA32F64x2Round:
+    case kIA32I64x2SplatI32Pair:
+    case kIA32I64x2ReplaceLaneI32Pair:
+    case kIA32I64x2Neg:
+    case kIA32I64x2Shl:
+    case kIA32I64x2ShrS:
+    case kIA32I64x2Add:
+    case kIA32I64x2Sub:
+    case kIA32I64x2Mul:
+    case kIA32I64x2ShrU:
+    case kSSEF32x4Splat:
+    case kAVXF32x4Splat:
+    case kSSEF32x4ExtractLane:
+    case kAVXF32x4ExtractLane:
+    case kSSEF32x4ReplaceLane:
+    case kAVXF32x4ReplaceLane:
+    case kIA32F32x4SConvertI32x4:
+    case kSSEF32x4UConvertI32x4:
+    case kAVXF32x4UConvertI32x4:
+    case kSSEF32x4Abs:
+    case kAVXF32x4Abs:
+    case kSSEF32x4Neg:
+    case kAVXF32x4Neg:
+    case kSSEF32x4Sqrt:
+    case kAVXF32x4Sqrt:
+    case kIA32F32x4RecipApprox:
+    case kIA32F32x4RecipSqrtApprox:
+    case kSSEF32x4Add:
+    case kAVXF32x4Add:
+    case kSSEF32x4AddHoriz:
+    case kAVXF32x4AddHoriz:
+    case kSSEF32x4Sub:
+    case kAVXF32x4Sub:
+    case kSSEF32x4Mul:
+    case kAVXF32x4Mul:
+    case kSSEF32x4Div:
+    case kAVXF32x4Div:
+    case kSSEF32x4Min:
+    case kAVXF32x4Min:
+    case kSSEF32x4Max:
+    case kAVXF32x4Max:
+    case kSSEF32x4Eq:
+    case kAVXF32x4Eq:
+    case kSSEF32x4Ne:
+    case kAVXF32x4Ne:
+    case kSSEF32x4Lt:
+    case kAVXF32x4Lt:
+    case kSSEF32x4Le:
+    case kAVXF32x4Le:
+    case kIA32F32x4Pmin:
+    case kIA32F32x4Pmax:
+    case kIA32F32x4Round:
+    case kIA32I32x4Splat:
+    case kIA32I32x4ExtractLane:
+    case kSSEI32x4ReplaceLane:
+    case kAVXI32x4ReplaceLane:
+    case kSSEI32x4SConvertF32x4:
+    case kAVXI32x4SConvertF32x4:
+    case kIA32I32x4SConvertI16x8Low:
+    case kIA32I32x4SConvertI16x8High:
+    case kIA32I32x4Neg:
+    case kIA32I32x4Shl:
+    case kIA32I32x4ShrS:
+    case kSSEI32x4Add:
+    case kAVXI32x4Add:
+    case kSSEI32x4AddHoriz:
+    case kAVXI32x4AddHoriz:
+    case kSSEI32x4Sub:
+    case kAVXI32x4Sub:
+    case kSSEI32x4Mul:
+    case kAVXI32x4Mul:
+    case kSSEI32x4MinS:
+    case kAVXI32x4MinS:
+    case kSSEI32x4MaxS:
+    case kAVXI32x4MaxS:
+    case kSSEI32x4Eq:
+    case kAVXI32x4Eq:
+    case kSSEI32x4Ne:
+    case kAVXI32x4Ne:
+    case kSSEI32x4GtS:
+    case kAVXI32x4GtS:
+    case kSSEI32x4GeS:
+    case kAVXI32x4GeS:
+    case kSSEI32x4UConvertF32x4:
+    case kAVXI32x4UConvertF32x4:
+    case kIA32I32x4UConvertI16x8Low:
+    case kIA32I32x4UConvertI16x8High:
+    case kIA32I32x4ShrU:
+    case kSSEI32x4MinU:
+    case kAVXI32x4MinU:
+    case kSSEI32x4MaxU:
+    case kAVXI32x4MaxU:
+    case kSSEI32x4GtU:
+    case kAVXI32x4GtU:
+    case kSSEI32x4GeU:
+    case kAVXI32x4GeU:
+    case kIA32I32x4Abs:
+    case kIA32I32x4BitMask:
+    case kIA32I32x4DotI16x8S:
+    case kIA32I16x8Splat:
+    case kIA32I16x8ExtractLaneU:
+    case kIA32I16x8ExtractLaneS:
+    case kSSEI16x8ReplaceLane:
+    case kAVXI16x8ReplaceLane:
+    case kIA32I16x8SConvertI8x16Low:
+    case kIA32I16x8SConvertI8x16High:
+    case kIA32I16x8Neg:
+    case kIA32I16x8Shl:
+    case kIA32I16x8ShrS:
+    case kSSEI16x8SConvertI32x4:
+    case kAVXI16x8SConvertI32x4:
+    case kSSEI16x8Add:
+    case kAVXI16x8Add:
+    case kSSEI16x8AddSatS:
+    case kAVXI16x8AddSatS:
+    case kSSEI16x8AddHoriz:
+    case kAVXI16x8AddHoriz:
+    case kSSEI16x8Sub:
+    case kAVXI16x8Sub:
+    case kSSEI16x8SubSatS:
+    case kAVXI16x8SubSatS:
+    case kSSEI16x8Mul:
+    case kAVXI16x8Mul:
+    case kSSEI16x8MinS:
+    case kAVXI16x8MinS:
+    case kSSEI16x8MaxS:
+    case kAVXI16x8MaxS:
+    case kSSEI16x8Eq:
+    case kAVXI16x8Eq:
+    case kSSEI16x8Ne:
+    case kAVXI16x8Ne:
+    case kSSEI16x8GtS:
+    case kAVXI16x8GtS:
+    case kSSEI16x8GeS:
+    case kAVXI16x8GeS:
+    case kIA32I16x8UConvertI8x16Low:
+    case kIA32I16x8UConvertI8x16High:
+    case kIA32I16x8ShrU:
+    case kSSEI16x8UConvertI32x4:
+    case kAVXI16x8UConvertI32x4:
+    case kSSEI16x8AddSatU:
+    case kAVXI16x8AddSatU:
+    case kSSEI16x8SubSatU:
+    case kAVXI16x8SubSatU:
+    case kSSEI16x8MinU:
+    case kAVXI16x8MinU:
+    case kSSEI16x8MaxU:
+    case kAVXI16x8MaxU:
+    case kSSEI16x8GtU:
+    case kAVXI16x8GtU:
+    case kSSEI16x8GeU:
+    case kAVXI16x8GeU:
+    case kIA32I16x8RoundingAverageU:
+    case kIA32I16x8Abs:
+    case kIA32I16x8BitMask:
+    case kIA32I8x16Splat:
+    case kIA32I8x16ExtractLaneU:
+    case kIA32I8x16ExtractLaneS:
+    case kSSEI8x16ReplaceLane:
+    case kAVXI8x16ReplaceLane:
+    case kSSEI8x16SConvertI16x8:
+    case kAVXI8x16SConvertI16x8:
+    case kIA32I8x16Neg:
+    case kIA32I8x16Shl:
+    case kIA32I8x16ShrS:
+    case kSSEI8x16Add:
+    case kAVXI8x16Add:
+    case kSSEI8x16AddSatS:
+    case kAVXI8x16AddSatS:
+    case kSSEI8x16Sub:
+    case kAVXI8x16Sub:
+    case kSSEI8x16SubSatS:
+    case kAVXI8x16SubSatS:
+    case kSSEI8x16Mul:
+    case kAVXI8x16Mul:
+    case kSSEI8x16MinS:
+    case kAVXI8x16MinS:
+    case kSSEI8x16MaxS:
+    case kAVXI8x16MaxS:
+    case kSSEI8x16Eq:
+    case kAVXI8x16Eq:
+    case kSSEI8x16Ne:
+    case kAVXI8x16Ne:
+    case kSSEI8x16GtS:
+    case kAVXI8x16GtS:
+    case kSSEI8x16GeS:
+    case kAVXI8x16GeS:
+    case kSSEI8x16UConvertI16x8:
+    case kAVXI8x16UConvertI16x8:
+    case kSSEI8x16AddSatU:
+    case kAVXI8x16AddSatU:
+    case kSSEI8x16SubSatU:
+    case kAVXI8x16SubSatU:
+    case kIA32I8x16ShrU:
+    case kSSEI8x16MinU:
+    case kAVXI8x16MinU:
+    case kSSEI8x16MaxU:
+    case kAVXI8x16MaxU:
+    case kSSEI8x16GtU:
+    case kAVXI8x16GtU:
+    case kSSEI8x16GeU:
+    case kAVXI8x16GeU:
+    case kIA32I8x16RoundingAverageU:
+    case kIA32I8x16Abs:
+    case kIA32I8x16BitMask:
+    case kIA32S128Const:
+    case kIA32S128Zero:
+    case kIA32S128AllOnes:
+    case kSSES128Not:
+    case kAVXS128Not:
+    case kSSES128And:
+    case kAVXS128And:
+    case kSSES128Or:
+    case kAVXS128Or:
+    case kSSES128Xor:
+    case kAVXS128Xor:
+    case kSSES128Select:
+    case kAVXS128Select:
+    case kIA32S128AndNot:
+    case kIA32I8x16Swizzle:
+    case kIA32I8x16Shuffle:
+    case kIA32S32x4Swizzle:
+    case kIA32S32x4Shuffle:
+    case kIA32S16x8Blend:
+    case kIA32S16x8HalfShuffle1:
+    case kIA32S16x8HalfShuffle2:
+    case kIA32S8x16Alignr:
+    case kIA32S16x8Dup:
+    case kIA32S8x16Dup:
+    case kSSES16x8UnzipHigh:
+    case kAVXS16x8UnzipHigh:
+    case kSSES16x8UnzipLow:
+    case kAVXS16x8UnzipLow:
+    case kSSES8x16UnzipHigh:
+    case kAVXS8x16UnzipHigh:
+    case kSSES8x16UnzipLow:
+    case kAVXS8x16UnzipLow:
+    case kIA32S64x2UnpackHigh:
+    case kIA32S32x4UnpackHigh:
+    case kIA32S16x8UnpackHigh:
+    case kIA32S8x16UnpackHigh:
+    case kIA32S64x2UnpackLow:
+    case kIA32S32x4UnpackLow:
+    case kIA32S16x8UnpackLow:
+    case kIA32S8x16UnpackLow:
+    case kSSES8x16TransposeLow:
+    case kAVXS8x16TransposeLow:
+    case kSSES8x16TransposeHigh:
+    case kAVXS8x16TransposeHigh:
+    case kSSES8x8Reverse:
+    case kAVXS8x8Reverse:
+    case kSSES8x4Reverse:
+    case kAVXS8x4Reverse:
+    case kSSES8x2Reverse:
+    case kAVXS8x2Reverse:
+    case kIA32V32x4AnyTrue:
+    case kIA32V32x4AllTrue:
+    case kIA32V16x8AnyTrue:
+    case kIA32V16x8AllTrue:
+    case kIA32V8x16AnyTrue:
+    case kIA32V8x16AllTrue:
+      return (instr->addressing_mode() == kMode_None)
+                 ? kNoOpcodeFlags
+                 : kIsLoadOperation | kHasSideEffect;
+
+    case kIA32Idiv:
+    case kIA32Udiv:
+      return (instr->addressing_mode() == kMode_None)
+                 ? kMayNeedDeoptOrTrapCheck
+                 : kMayNeedDeoptOrTrapCheck | kIsLoadOperation | kHasSideEffect;
+
+    case kIA32Movsxbl:
+    case kIA32Movzxbl:
+    case kIA32Movb:
+    case kIA32Movsxwl:
+    case kIA32Movzxwl:
+    case kIA32Movw:
+    case kIA32Movl:
+    case kIA32Movss:
+    case kIA32Movsd:
+    case kIA32Movdqu:
+    // Moves are used for memory load/store operations.
+    case kIA32S128Load8Splat:
+    case kIA32S128Load16Splat:
+    case kIA32S128Load32Splat:
+    case kIA32S128Load64Splat:
+    case kIA32S128Load8x8S:
+    case kIA32S128Load8x8U:
+    case kIA32S128Load16x4S:
+    case kIA32S128Load16x4U:
+    case kIA32S128Load32x2S:
+    case kIA32S128Load32x2U:
+      return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
+
+    case kIA32Peek:
+      return kIsLoadOperation;
+
+    case kIA32Push:
+    case kIA32PushFloat32:
+    case kIA32PushFloat64:
+    case kIA32PushSimd128:
+    case kIA32Poke:
+    case kIA32MFence:
+    case kIA32LFence:
+      return kHasSideEffect;
+
+    case kIA32Word32AtomicPairLoad:
+      return kIsLoadOperation;
+
+    case kIA32Word32AtomicPairStore:
+    case kIA32Word32AtomicPairAdd:
+    case kIA32Word32AtomicPairSub:
+    case kIA32Word32AtomicPairAnd:
+    case kIA32Word32AtomicPairOr:
+    case kIA32Word32AtomicPairXor:
+    case kIA32Word32AtomicPairExchange:
+    case kIA32Word32AtomicPairCompareExchange:
+      return kHasSideEffect;
+
+#define CASE(Name) case k##Name:
+      COMMON_ARCH_OPCODE_LIST(CASE)
+#undef CASE
+      // Already covered in architecture independent code.
+      UNREACHABLE();
+  }
+
+  UNREACHABLE();
+}
+
+int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
+  // Basic latency modeling for ia32 instructions. They have been determined
+  // in an empirical way.
+  switch (instr->arch_opcode()) {
+    case kSSEFloat64Mul:
+      return 5;
+    case kIA32Imul:
+    case kIA32ImulHigh:
+      return 5;
+    case kSSEFloat32Cmp:
+    case kSSEFloat64Cmp:
+      return 9;
+    case kSSEFloat32Add:
+    case kSSEFloat32Sub:
+    case kSSEFloat32Abs:
+    case kSSEFloat32Neg:
+    case kSSEFloat64Add:
+    case kSSEFloat64Sub:
+    case kSSEFloat64Max:
+    case kSSEFloat64Min:
+    case kSSEFloat64Abs:
+    case kSSEFloat64Neg:
+      return 5;
+    case kSSEFloat32Mul:
+      return 4;
+    case kSSEFloat32ToFloat64:
+    case kSSEFloat64ToFloat32:
+      return 6;
+    case kSSEFloat32Round:
+    case kSSEFloat64Round:
+    case kSSEFloat32ToInt32:
+    case kSSEFloat64ToInt32:
+      return 8;
+    case kSSEFloat32ToUint32:
+      return 21;
+    case kSSEFloat64ToUint32:
+      return 15;
+    case kIA32Idiv:
+      return 33;
+    case kIA32Udiv:
+      return 26;
+    case kSSEFloat32Div:
+      return 35;
+    case kSSEFloat64Div:
+      return 63;
+    case kSSEFloat32Sqrt:
+    case kSSEFloat64Sqrt:
+      return 25;
+    case kSSEFloat64Mod:
+      return 50;
+    case kArchTruncateDoubleToI:
+      return 9;
+    default:
+      return 1;
+  }
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/ia32/instruction-selector-ia32.cc b/src/compiler/backend/ia32/instruction-selector-ia32.cc
new file mode 100644
index 0000000..c16584a
--- /dev/null
+++ b/src/compiler/backend/ia32/instruction-selector-ia32.cc
@@ -0,0 +1,2932 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/base/iterator.h"
+#include "src/compiler/backend/instruction-selector-impl.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/node-properties.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+// Adds IA32-specific methods for generating operands.
+class IA32OperandGenerator final : public OperandGenerator {
+ public:
+  explicit IA32OperandGenerator(InstructionSelector* selector)
+      : OperandGenerator(selector) {}
+
+  InstructionOperand UseByteRegister(Node* node) {
+    // TODO(titzer): encode byte register use constraints.
+    return UseFixed(node, edx);
+  }
+
+  InstructionOperand DefineAsByteRegister(Node* node) {
+    // TODO(titzer): encode byte register def constraints.
+    return DefineAsRegister(node);
+  }
+
+  bool CanBeMemoryOperand(InstructionCode opcode, Node* node, Node* input,
+                          int effect_level) {
+    if (input->opcode() != IrOpcode::kLoad ||
+        !selector()->CanCover(node, input)) {
+      return false;
+    }
+    if (effect_level != selector()->GetEffectLevel(input)) {
+      return false;
+    }
+    MachineRepresentation rep =
+        LoadRepresentationOf(input->op()).representation();
+    switch (opcode) {
+      case kIA32And:
+      case kIA32Or:
+      case kIA32Xor:
+      case kIA32Add:
+      case kIA32Sub:
+      case kIA32Cmp:
+      case kIA32Test:
+        return rep == MachineRepresentation::kWord32 || IsAnyTagged(rep);
+      case kIA32Cmp16:
+      case kIA32Test16:
+        return rep == MachineRepresentation::kWord16;
+      case kIA32Cmp8:
+      case kIA32Test8:
+        return rep == MachineRepresentation::kWord8;
+      default:
+        break;
+    }
+    return false;
+  }
+
+  bool CanBeImmediate(Node* node) {
+    switch (node->opcode()) {
+      case IrOpcode::kInt32Constant:
+      case IrOpcode::kNumberConstant:
+      case IrOpcode::kExternalConstant:
+      case IrOpcode::kRelocatableInt32Constant:
+      case IrOpcode::kRelocatableInt64Constant:
+        return true;
+      case IrOpcode::kHeapConstant: {
+// TODO(bmeurer): We must not dereference handles concurrently. If we
+// really have to this here, then we need to find a way to put this
+// information on the HeapConstant node already.
+#if 0
+        // Constants in young generation cannot be used as immediates in V8
+        // because the GC does not scan code objects when collecting the young
+        // generation.
+        Handle<HeapObject> value = HeapConstantOf(node->op());
+        return !Heap::InYoungGeneration(*value);
+#else
+        return false;
+#endif
+      }
+      default:
+        return false;
+    }
+  }
+
+  AddressingMode GenerateMemoryOperandInputs(
+      Node* index, int scale, Node* base, int32_t displacement,
+      DisplacementMode displacement_mode, InstructionOperand inputs[],
+      size_t* input_count, RegisterMode register_mode = kRegister) {
+    AddressingMode mode = kMode_MRI;
+    if (displacement_mode == kNegativeDisplacement) {
+      displacement = -displacement;
+    }
+    if (base != nullptr) {
+      if (base->opcode() == IrOpcode::kInt32Constant) {
+        displacement += OpParameter<int32_t>(base->op());
+        base = nullptr;
+      }
+    }
+    if (base != nullptr) {
+      inputs[(*input_count)++] = UseRegisterWithMode(base, register_mode);
+      if (index != nullptr) {
+        DCHECK(scale >= 0 && scale <= 3);
+        inputs[(*input_count)++] = UseRegisterWithMode(index, register_mode);
+        if (displacement != 0) {
+          inputs[(*input_count)++] = TempImmediate(displacement);
+          static const AddressingMode kMRnI_modes[] = {kMode_MR1I, kMode_MR2I,
+                                                       kMode_MR4I, kMode_MR8I};
+          mode = kMRnI_modes[scale];
+        } else {
+          static const AddressingMode kMRn_modes[] = {kMode_MR1, kMode_MR2,
+                                                      kMode_MR4, kMode_MR8};
+          mode = kMRn_modes[scale];
+        }
+      } else {
+        if (displacement == 0) {
+          mode = kMode_MR;
+        } else {
+          inputs[(*input_count)++] = TempImmediate(displacement);
+          mode = kMode_MRI;
+        }
+      }
+    } else {
+      DCHECK(scale >= 0 && scale <= 3);
+      if (index != nullptr) {
+        inputs[(*input_count)++] = UseRegisterWithMode(index, register_mode);
+        if (displacement != 0) {
+          inputs[(*input_count)++] = TempImmediate(displacement);
+          static const AddressingMode kMnI_modes[] = {kMode_MRI, kMode_M2I,
+                                                      kMode_M4I, kMode_M8I};
+          mode = kMnI_modes[scale];
+        } else {
+          static const AddressingMode kMn_modes[] = {kMode_MR, kMode_M2,
+                                                     kMode_M4, kMode_M8};
+          mode = kMn_modes[scale];
+        }
+      } else {
+        inputs[(*input_count)++] = TempImmediate(displacement);
+        return kMode_MI;
+      }
+    }
+    return mode;
+  }
+
+  AddressingMode GenerateMemoryOperandInputs(
+      Node* index, int scale, Node* base, Node* displacement_node,
+      DisplacementMode displacement_mode, InstructionOperand inputs[],
+      size_t* input_count, RegisterMode register_mode = kRegister) {
+    int32_t displacement = (displacement_node == nullptr)
+                               ? 0
+                               : OpParameter<int32_t>(displacement_node->op());
+    return GenerateMemoryOperandInputs(index, scale, base, displacement,
+                                       displacement_mode, inputs, input_count,
+                                       register_mode);
+  }
+
+  AddressingMode GetEffectiveAddressMemoryOperand(
+      Node* node, InstructionOperand inputs[], size_t* input_count,
+      RegisterMode register_mode = kRegister) {
+    {
+      LoadMatcher<ExternalReferenceMatcher> m(node);
+      if (m.index().HasResolvedValue() && m.object().HasResolvedValue() &&
+          selector()->CanAddressRelativeToRootsRegister(
+              m.object().ResolvedValue())) {
+        ptrdiff_t const delta =
+            m.index().ResolvedValue() +
+            TurboAssemblerBase::RootRegisterOffsetForExternalReference(
+                selector()->isolate(), m.object().ResolvedValue());
+        if (is_int32(delta)) {
+          inputs[(*input_count)++] = TempImmediate(static_cast<int32_t>(delta));
+          return kMode_Root;
+        }
+      }
+    }
+
+    BaseWithIndexAndDisplacement32Matcher m(node, AddressOption::kAllowAll);
+    DCHECK(m.matches());
+    if ((m.displacement() == nullptr || CanBeImmediate(m.displacement()))) {
+      return GenerateMemoryOperandInputs(
+          m.index(), m.scale(), m.base(), m.displacement(),
+          m.displacement_mode(), inputs, input_count, register_mode);
+    } else {
+      inputs[(*input_count)++] =
+          UseRegisterWithMode(node->InputAt(0), register_mode);
+      inputs[(*input_count)++] =
+          UseRegisterWithMode(node->InputAt(1), register_mode);
+      return kMode_MR1;
+    }
+  }
+
+  InstructionOperand GetEffectiveIndexOperand(Node* index,
+                                              AddressingMode* mode) {
+    if (CanBeImmediate(index)) {
+      *mode = kMode_MRI;
+      return UseImmediate(index);
+    } else {
+      *mode = kMode_MR1;
+      return UseUniqueRegister(index);
+    }
+  }
+
+  bool CanBeBetterLeftOperand(Node* node) const {
+    return !selector()->IsLive(node);
+  }
+};
+
+namespace {
+
+void VisitRO(InstructionSelector* selector, Node* node, ArchOpcode opcode) {
+  IA32OperandGenerator g(selector);
+  Node* input = node->InputAt(0);
+  // We have to use a byte register as input to movsxb.
+  InstructionOperand input_op =
+      opcode == kIA32Movsxbl ? g.UseFixed(input, eax) : g.Use(input);
+  selector->Emit(opcode, g.DefineAsRegister(node), input_op);
+}
+
+void VisitROWithTemp(InstructionSelector* selector, Node* node,
+                     ArchOpcode opcode) {
+  IA32OperandGenerator g(selector);
+  InstructionOperand temps[] = {g.TempRegister()};
+  selector->Emit(opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0)),
+                 arraysize(temps), temps);
+}
+
+void VisitROWithTempSimd(InstructionSelector* selector, Node* node,
+                         ArchOpcode opcode) {
+  IA32OperandGenerator g(selector);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseUniqueRegister(node->InputAt(0)), arraysize(temps),
+                 temps);
+}
+
+void VisitRR(InstructionSelector* selector, Node* node,
+             InstructionCode opcode) {
+  IA32OperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)));
+}
+
+void VisitRROFloat(InstructionSelector* selector, Node* node,
+                   ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
+  IA32OperandGenerator g(selector);
+  InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
+  InstructionOperand operand1 = g.Use(node->InputAt(1));
+  if (selector->IsSupported(AVX)) {
+    selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0, operand1);
+  } else {
+    selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0, operand1);
+  }
+}
+
+void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input,
+                    ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
+  IA32OperandGenerator g(selector);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  if (selector->IsSupported(AVX)) {
+    selector->Emit(avx_opcode, g.DefineAsRegister(node), g.UseUnique(input),
+                   arraysize(temps), temps);
+  } else {
+    selector->Emit(sse_opcode, g.DefineSameAsFirst(node),
+                   g.UseUniqueRegister(input), arraysize(temps), temps);
+  }
+}
+
+void VisitRRSimd(InstructionSelector* selector, Node* node,
+                 ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
+  IA32OperandGenerator g(selector);
+  InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
+  if (selector->IsSupported(AVX)) {
+    selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0);
+  } else {
+    selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0);
+  }
+}
+
+// TODO(v8:9198): Like VisitRROFloat, but for SIMD. SSE requires operand1 to be
+// a register as we don't have memory alignment yet. For AVX, memory operands
+// are fine, but can have performance issues if not aligned to 16/32 bytes
+// (based on load size), see SDM Vol 1, chapter 14.9
+void VisitRROSimd(InstructionSelector* selector, Node* node,
+                  ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
+  IA32OperandGenerator g(selector);
+  InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
+  if (selector->IsSupported(AVX)) {
+    selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0,
+                   g.Use(node->InputAt(1)));
+  } else {
+    selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0,
+                   g.UseRegister(node->InputAt(1)));
+  }
+}
+
+void VisitRRISimd(InstructionSelector* selector, Node* node,
+                  ArchOpcode opcode) {
+  IA32OperandGenerator g(selector);
+  InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
+  InstructionOperand operand1 =
+      g.UseImmediate(OpParameter<int32_t>(node->op()));
+  // 8x16 uses movsx_b on dest to extract a byte, which only works
+  // if dest is a byte register.
+  InstructionOperand dest = opcode == kIA32I8x16ExtractLaneS
+                                ? g.DefineAsFixed(node, eax)
+                                : g.DefineAsRegister(node);
+  selector->Emit(opcode, dest, operand0, operand1);
+}
+
+void VisitRRISimd(InstructionSelector* selector, Node* node,
+                  ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
+  IA32OperandGenerator g(selector);
+  InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
+  InstructionOperand operand1 =
+      g.UseImmediate(OpParameter<int32_t>(node->op()));
+  if (selector->IsSupported(AVX)) {
+    selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0, operand1);
+  } else {
+    selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0, operand1);
+  }
+}
+
+void VisitRROSimdShift(InstructionSelector* selector, Node* node,
+                       ArchOpcode opcode) {
+  IA32OperandGenerator g(selector);
+  if (g.CanBeImmediate(node->InputAt(1))) {
+    selector->Emit(opcode, g.DefineSameAsFirst(node),
+                   g.UseRegister(node->InputAt(0)),
+                   g.UseImmediate(node->InputAt(1)));
+  } else {
+    InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
+    InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
+    InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
+    selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1,
+                   arraysize(temps), temps);
+  }
+}
+
+void VisitRROI8x16SimdShift(InstructionSelector* selector, Node* node,
+                            ArchOpcode opcode) {
+  IA32OperandGenerator g(selector);
+  InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
+  InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
+  InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
+  selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1,
+                 arraysize(temps), temps);
+}
+}  // namespace
+
+void InstructionSelector::VisitStackSlot(Node* node) {
+  StackSlotRepresentation rep = StackSlotRepresentationOf(node->op());
+  int slot = frame_->AllocateSpillSlot(rep.size());
+  OperandGenerator g(this);
+
+  Emit(kArchStackSlot, g.DefineAsRegister(node),
+       sequence()->AddImmediate(Constant(slot)), 0, nullptr);
+}
+
+void InstructionSelector::VisitAbortCSAAssert(Node* node) {
+  IA32OperandGenerator g(this);
+  Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), edx));
+}
+
+void InstructionSelector::VisitLoadTransform(Node* node) {
+  LoadTransformParameters params = LoadTransformParametersOf(node->op());
+  InstructionCode opcode;
+  switch (params.transformation) {
+    case LoadTransformation::kS128Load8Splat:
+      opcode = kIA32S128Load8Splat;
+      break;
+    case LoadTransformation::kS128Load16Splat:
+      opcode = kIA32S128Load16Splat;
+      break;
+    case LoadTransformation::kS128Load32Splat:
+      opcode = kIA32S128Load32Splat;
+      break;
+    case LoadTransformation::kS128Load64Splat:
+      opcode = kIA32S128Load64Splat;
+      break;
+    case LoadTransformation::kS128Load8x8S:
+      opcode = kIA32S128Load8x8S;
+      break;
+    case LoadTransformation::kS128Load8x8U:
+      opcode = kIA32S128Load8x8U;
+      break;
+    case LoadTransformation::kS128Load16x4S:
+      opcode = kIA32S128Load16x4S;
+      break;
+    case LoadTransformation::kS128Load16x4U:
+      opcode = kIA32S128Load16x4U;
+      break;
+    case LoadTransformation::kS128Load32x2S:
+      opcode = kIA32S128Load32x2S;
+      break;
+    case LoadTransformation::kS128Load32x2U:
+      opcode = kIA32S128Load32x2U;
+      break;
+    case LoadTransformation::kS128Load32Zero:
+      opcode = kIA32Movss;
+      break;
+    case LoadTransformation::kS128Load64Zero:
+      opcode = kIA32Movsd;
+      break;
+    default:
+      UNREACHABLE();
+  }
+
+  // IA32 supports unaligned loads.
+  DCHECK_NE(params.kind, MemoryAccessKind::kUnaligned);
+  // Trap handler is not supported on IA32.
+  DCHECK_NE(params.kind, MemoryAccessKind::kProtected);
+
+  IA32OperandGenerator g(this);
+  InstructionOperand outputs[1];
+  outputs[0] = g.DefineAsRegister(node);
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  AddressingMode mode =
+      g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
+  InstructionCode code = opcode | AddressingModeField::encode(mode);
+  Emit(code, 1, outputs, input_count, inputs);
+}
+
+void InstructionSelector::VisitLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+
+  ArchOpcode opcode;
+  switch (load_rep.representation()) {
+    case MachineRepresentation::kFloat32:
+      opcode = kIA32Movss;
+      break;
+    case MachineRepresentation::kFloat64:
+      opcode = kIA32Movsd;
+      break;
+    case MachineRepresentation::kBit:  // Fall through.
+    case MachineRepresentation::kWord8:
+      opcode = load_rep.IsSigned() ? kIA32Movsxbl : kIA32Movzxbl;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = load_rep.IsSigned() ? kIA32Movsxwl : kIA32Movzxwl;
+      break;
+    case MachineRepresentation::kTaggedSigned:   // Fall through.
+    case MachineRepresentation::kTaggedPointer:  // Fall through.
+    case MachineRepresentation::kTagged:         // Fall through.
+    case MachineRepresentation::kWord32:
+      opcode = kIA32Movl;
+      break;
+    case MachineRepresentation::kSimd128:
+      opcode = kIA32Movdqu;
+      break;
+    case MachineRepresentation::kCompressedPointer:  // Fall through.
+    case MachineRepresentation::kCompressed:         // Fall through.
+    case MachineRepresentation::kWord64:             // Fall through.
+    case MachineRepresentation::kNone:
+      UNREACHABLE();
+  }
+
+  IA32OperandGenerator g(this);
+  InstructionOperand outputs[1];
+  outputs[0] = g.DefineAsRegister(node);
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  AddressingMode mode =
+      g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
+  InstructionCode code = opcode | AddressingModeField::encode(mode);
+  if (node->opcode() == IrOpcode::kPoisonedLoad) {
+    CHECK_NE(poisoning_level_, PoisoningMitigationLevel::kDontPoison);
+    code |= MiscField::encode(kMemoryAccessPoisoned);
+  }
+  Emit(code, 1, outputs, input_count, inputs);
+}
+
+void InstructionSelector::VisitPoisonedLoad(Node* node) { VisitLoad(node); }
+
+void InstructionSelector::VisitProtectedLoad(Node* node) {
+  // TODO(eholk)
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitStore(Node* node) {
+  IA32OperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+
+  StoreRepresentation store_rep = StoreRepresentationOf(node->op());
+  WriteBarrierKind write_barrier_kind = store_rep.write_barrier_kind();
+  MachineRepresentation rep = store_rep.representation();
+
+  if (FLAG_enable_unconditional_write_barriers && CanBeTaggedPointer(rep)) {
+    write_barrier_kind = kFullWriteBarrier;
+  }
+
+  if (write_barrier_kind != kNoWriteBarrier &&
+      V8_LIKELY(!FLAG_disable_write_barriers)) {
+    DCHECK(CanBeTaggedPointer(rep));
+    AddressingMode addressing_mode;
+    InstructionOperand inputs[] = {
+        g.UseUniqueRegister(base),
+        g.GetEffectiveIndexOperand(index, &addressing_mode),
+        g.UseUniqueRegister(value)};
+    RecordWriteMode record_write_mode =
+        WriteBarrierKindToRecordWriteMode(write_barrier_kind);
+    InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
+    size_t const temp_count = arraysize(temps);
+    InstructionCode code = kArchStoreWithWriteBarrier;
+    code |= AddressingModeField::encode(addressing_mode);
+    code |= MiscField::encode(static_cast<int>(record_write_mode));
+    Emit(code, 0, nullptr, arraysize(inputs), inputs, temp_count, temps);
+  } else {
+    ArchOpcode opcode;
+    switch (rep) {
+      case MachineRepresentation::kFloat32:
+        opcode = kIA32Movss;
+        break;
+      case MachineRepresentation::kFloat64:
+        opcode = kIA32Movsd;
+        break;
+      case MachineRepresentation::kBit:  // Fall through.
+      case MachineRepresentation::kWord8:
+        opcode = kIA32Movb;
+        break;
+      case MachineRepresentation::kWord16:
+        opcode = kIA32Movw;
+        break;
+      case MachineRepresentation::kTaggedSigned:   // Fall through.
+      case MachineRepresentation::kTaggedPointer:  // Fall through.
+      case MachineRepresentation::kTagged:         // Fall through.
+      case MachineRepresentation::kWord32:
+        opcode = kIA32Movl;
+        break;
+      case MachineRepresentation::kSimd128:
+        opcode = kIA32Movdqu;
+        break;
+      case MachineRepresentation::kCompressedPointer:  // Fall through.
+      case MachineRepresentation::kCompressed:         // Fall through.
+      case MachineRepresentation::kWord64:             // Fall through.
+      case MachineRepresentation::kNone:
+        UNREACHABLE();
+    }
+
+    InstructionOperand val;
+    if (g.CanBeImmediate(value)) {
+      val = g.UseImmediate(value);
+    } else if (rep == MachineRepresentation::kWord8 ||
+               rep == MachineRepresentation::kBit) {
+      val = g.UseByteRegister(value);
+    } else {
+      val = g.UseRegister(value);
+    }
+
+    InstructionOperand inputs[4];
+    size_t input_count = 0;
+    AddressingMode addressing_mode =
+        g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
+    InstructionCode code =
+        opcode | AddressingModeField::encode(addressing_mode);
+    inputs[input_count++] = val;
+    Emit(code, 0, static_cast<InstructionOperand*>(nullptr), input_count,
+         inputs);
+  }
+}
+
+void InstructionSelector::VisitProtectedStore(Node* node) {
+  // TODO(eholk)
+  UNIMPLEMENTED();
+}
+
+// Architecture supports unaligned access, therefore VisitLoad is used instead
+void InstructionSelector::VisitUnalignedLoad(Node* node) { UNREACHABLE(); }
+
+// Architecture supports unaligned access, therefore VisitStore is used instead
+void InstructionSelector::VisitUnalignedStore(Node* node) { UNREACHABLE(); }
+
+namespace {
+
+// Shared routine for multiple binary operations.
+void VisitBinop(InstructionSelector* selector, Node* node,
+                InstructionCode opcode, FlagsContinuation* cont) {
+  IA32OperandGenerator g(selector);
+  Int32BinopMatcher m(node);
+  Node* left = m.left().node();
+  Node* right = m.right().node();
+  InstructionOperand inputs[6];
+  size_t input_count = 0;
+  InstructionOperand outputs[1];
+  size_t output_count = 0;
+
+  // TODO(turbofan): match complex addressing modes.
+  if (left == right) {
+    // If both inputs refer to the same operand, enforce allocating a register
+    // for both of them to ensure that we don't end up generating code like
+    // this:
+    //
+    //   mov eax, [ebp-0x10]
+    //   add eax, [ebp-0x10]
+    //   jo label
+    InstructionOperand const input = g.UseRegister(left);
+    inputs[input_count++] = input;
+    inputs[input_count++] = input;
+  } else if (g.CanBeImmediate(right)) {
+    inputs[input_count++] = g.UseRegister(left);
+    inputs[input_count++] = g.UseImmediate(right);
+  } else {
+    int effect_level = selector->GetEffectLevel(node, cont);
+    if (node->op()->HasProperty(Operator::kCommutative) &&
+        g.CanBeBetterLeftOperand(right) &&
+        (!g.CanBeBetterLeftOperand(left) ||
+         !g.CanBeMemoryOperand(opcode, node, right, effect_level))) {
+      std::swap(left, right);
+    }
+    if (g.CanBeMemoryOperand(opcode, node, right, effect_level)) {
+      inputs[input_count++] = g.UseRegister(left);
+      AddressingMode addressing_mode =
+          g.GetEffectiveAddressMemoryOperand(right, inputs, &input_count);
+      opcode |= AddressingModeField::encode(addressing_mode);
+    } else {
+      inputs[input_count++] = g.UseRegister(left);
+      inputs[input_count++] = g.Use(right);
+    }
+  }
+
+  outputs[output_count++] = g.DefineSameAsFirst(node);
+
+  DCHECK_NE(0u, input_count);
+  DCHECK_EQ(1u, output_count);
+  DCHECK_GE(arraysize(inputs), input_count);
+  DCHECK_GE(arraysize(outputs), output_count);
+
+  selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
+                                 inputs, cont);
+}
+
+// Shared routine for multiple binary operations.
+void VisitBinop(InstructionSelector* selector, Node* node,
+                InstructionCode opcode) {
+  FlagsContinuation cont;
+  VisitBinop(selector, node, opcode, &cont);
+}
+
+}  // namespace
+
+void InstructionSelector::VisitWord32And(Node* node) {
+  VisitBinop(this, node, kIA32And);
+}
+
+void InstructionSelector::VisitWord32Or(Node* node) {
+  VisitBinop(this, node, kIA32Or);
+}
+
+void InstructionSelector::VisitWord32Xor(Node* node) {
+  IA32OperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (m.right().Is(-1)) {
+    Emit(kIA32Not, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()));
+  } else {
+    VisitBinop(this, node, kIA32Xor);
+  }
+}
+
+void InstructionSelector::VisitStackPointerGreaterThan(
+    Node* node, FlagsContinuation* cont) {
+  StackCheckKind kind = StackCheckKindOf(node->op());
+  InstructionCode opcode =
+      kArchStackPointerGreaterThan | MiscField::encode(static_cast<int>(kind));
+
+  int effect_level = GetEffectLevel(node, cont);
+
+  IA32OperandGenerator g(this);
+
+  // No outputs.
+  InstructionOperand* const outputs = nullptr;
+  const int output_count = 0;
+
+  // Applying an offset to this stack check requires a temp register. Offsets
+  // are only applied to the first stack check. If applying an offset, we must
+  // ensure the input and temp registers do not alias, thus kUniqueRegister.
+  InstructionOperand temps[] = {g.TempRegister()};
+  const int temp_count = (kind == StackCheckKind::kJSFunctionEntry) ? 1 : 0;
+  const auto register_mode = (kind == StackCheckKind::kJSFunctionEntry)
+                                 ? OperandGenerator::kUniqueRegister
+                                 : OperandGenerator::kRegister;
+
+  Node* const value = node->InputAt(0);
+  if (g.CanBeMemoryOperand(kIA32Cmp, node, value, effect_level)) {
+    DCHECK_EQ(IrOpcode::kLoad, value->opcode());
+
+    // GetEffectiveAddressMemoryOperand can create at most 3 inputs.
+    static constexpr int kMaxInputCount = 3;
+
+    size_t input_count = 0;
+    InstructionOperand inputs[kMaxInputCount];
+    AddressingMode addressing_mode = g.GetEffectiveAddressMemoryOperand(
+        value, inputs, &input_count, register_mode);
+    opcode |= AddressingModeField::encode(addressing_mode);
+    DCHECK_LE(input_count, kMaxInputCount);
+
+    EmitWithContinuation(opcode, output_count, outputs, input_count, inputs,
+                         temp_count, temps, cont);
+  } else {
+    InstructionOperand inputs[] = {g.UseRegisterWithMode(value, register_mode)};
+    static constexpr int input_count = arraysize(inputs);
+    EmitWithContinuation(opcode, output_count, outputs, input_count, inputs,
+                         temp_count, temps, cont);
+  }
+}
+
+// Shared routine for multiple shift operations.
+static inline void VisitShift(InstructionSelector* selector, Node* node,
+                              ArchOpcode opcode) {
+  IA32OperandGenerator g(selector);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+
+  if (g.CanBeImmediate(right)) {
+    selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(left),
+                   g.UseImmediate(right));
+  } else {
+    selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(left),
+                   g.UseFixed(right, ecx));
+  }
+}
+
+namespace {
+
+void VisitMulHigh(InstructionSelector* selector, Node* node,
+                  ArchOpcode opcode) {
+  IA32OperandGenerator g(selector);
+  InstructionOperand temps[] = {g.TempRegister(eax)};
+  selector->Emit(
+      opcode, g.DefineAsFixed(node, edx), g.UseFixed(node->InputAt(0), eax),
+      g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
+}
+
+void VisitDiv(InstructionSelector* selector, Node* node, ArchOpcode opcode) {
+  IA32OperandGenerator g(selector);
+  InstructionOperand temps[] = {g.TempRegister(edx)};
+  selector->Emit(opcode, g.DefineAsFixed(node, eax),
+                 g.UseFixed(node->InputAt(0), eax),
+                 g.UseUnique(node->InputAt(1)), arraysize(temps), temps);
+}
+
+void VisitMod(InstructionSelector* selector, Node* node, ArchOpcode opcode) {
+  IA32OperandGenerator g(selector);
+  InstructionOperand temps[] = {g.TempRegister(eax)};
+  selector->Emit(opcode, g.DefineAsFixed(node, edx),
+                 g.UseFixed(node->InputAt(0), eax),
+                 g.UseUnique(node->InputAt(1)), arraysize(temps), temps);
+}
+
+void EmitLea(InstructionSelector* selector, Node* result, Node* index,
+             int scale, Node* base, Node* displacement,
+             DisplacementMode displacement_mode) {
+  IA32OperandGenerator g(selector);
+  InstructionOperand inputs[4];
+  size_t input_count = 0;
+  AddressingMode mode =
+      g.GenerateMemoryOperandInputs(index, scale, base, displacement,
+                                    displacement_mode, inputs, &input_count);
+
+  DCHECK_NE(0u, input_count);
+  DCHECK_GE(arraysize(inputs), input_count);
+
+  InstructionOperand outputs[1];
+  outputs[0] = g.DefineAsRegister(result);
+
+  InstructionCode opcode = AddressingModeField::encode(mode) | kIA32Lea;
+
+  selector->Emit(opcode, 1, outputs, input_count, inputs);
+}
+
+}  // namespace
+
+void InstructionSelector::VisitWord32Shl(Node* node) {
+  Int32ScaleMatcher m(node, true);
+  if (m.matches()) {
+    Node* index = node->InputAt(0);
+    Node* base = m.power_of_two_plus_one() ? index : nullptr;
+    EmitLea(this, node, index, m.scale(), base, nullptr, kPositiveDisplacement);
+    return;
+  }
+  VisitShift(this, node, kIA32Shl);
+}
+
+void InstructionSelector::VisitWord32Shr(Node* node) {
+  VisitShift(this, node, kIA32Shr);
+}
+
+void InstructionSelector::VisitWord32Sar(Node* node) {
+  VisitShift(this, node, kIA32Sar);
+}
+
+void InstructionSelector::VisitInt32PairAdd(Node* node) {
+  IA32OperandGenerator g(this);
+
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  if (projection1) {
+    // We use UseUniqueRegister here to avoid register sharing with the temp
+    // register.
+    InstructionOperand inputs[] = {
+        g.UseRegister(node->InputAt(0)),
+        g.UseUniqueRegisterOrSlotOrConstant(node->InputAt(1)),
+        g.UseRegister(node->InputAt(2)), g.UseUniqueRegister(node->InputAt(3))};
+
+    InstructionOperand outputs[] = {g.DefineSameAsFirst(node),
+                                    g.DefineAsRegister(projection1)};
+
+    InstructionOperand temps[] = {g.TempRegister()};
+
+    Emit(kIA32AddPair, 2, outputs, 4, inputs, 1, temps);
+  } else {
+    // The high word of the result is not used, so we emit the standard 32 bit
+    // instruction.
+    Emit(kIA32Add, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
+         g.Use(node->InputAt(2)));
+  }
+}
+
+void InstructionSelector::VisitInt32PairSub(Node* node) {
+  IA32OperandGenerator g(this);
+
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  if (projection1) {
+    // We use UseUniqueRegister here to avoid register sharing with the temp
+    // register.
+    InstructionOperand inputs[] = {
+        g.UseRegister(node->InputAt(0)),
+        g.UseUniqueRegisterOrSlotOrConstant(node->InputAt(1)),
+        g.UseRegister(node->InputAt(2)), g.UseUniqueRegister(node->InputAt(3))};
+
+    InstructionOperand outputs[] = {g.DefineSameAsFirst(node),
+                                    g.DefineAsRegister(projection1)};
+
+    InstructionOperand temps[] = {g.TempRegister()};
+
+    Emit(kIA32SubPair, 2, outputs, 4, inputs, 1, temps);
+  } else {
+    // The high word of the result is not used, so we emit the standard 32 bit
+    // instruction.
+    Emit(kIA32Sub, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
+         g.Use(node->InputAt(2)));
+  }
+}
+
+void InstructionSelector::VisitInt32PairMul(Node* node) {
+  IA32OperandGenerator g(this);
+
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  if (projection1) {
+    // InputAt(3) explicitly shares ecx with OutputRegister(1) to save one
+    // register and one mov instruction.
+    InstructionOperand inputs[] = {
+        g.UseUnique(node->InputAt(0)),
+        g.UseUniqueRegisterOrSlotOrConstant(node->InputAt(1)),
+        g.UseUniqueRegister(node->InputAt(2)),
+        g.UseFixed(node->InputAt(3), ecx)};
+
+    InstructionOperand outputs[] = {
+        g.DefineAsFixed(node, eax),
+        g.DefineAsFixed(NodeProperties::FindProjection(node, 1), ecx)};
+
+    InstructionOperand temps[] = {g.TempRegister(edx)};
+
+    Emit(kIA32MulPair, 2, outputs, 4, inputs, 1, temps);
+  } else {
+    // The high word of the result is not used, so we emit the standard 32 bit
+    // instruction.
+    Emit(kIA32Imul, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
+         g.Use(node->InputAt(2)));
+  }
+}
+
+void VisitWord32PairShift(InstructionSelector* selector, InstructionCode opcode,
+                          Node* node) {
+  IA32OperandGenerator g(selector);
+
+  Node* shift = node->InputAt(2);
+  InstructionOperand shift_operand;
+  if (g.CanBeImmediate(shift)) {
+    shift_operand = g.UseImmediate(shift);
+  } else {
+    shift_operand = g.UseFixed(shift, ecx);
+  }
+  InstructionOperand inputs[] = {g.UseFixed(node->InputAt(0), eax),
+                                 g.UseFixed(node->InputAt(1), edx),
+                                 shift_operand};
+
+  InstructionOperand outputs[2];
+  InstructionOperand temps[1];
+  int32_t output_count = 0;
+  int32_t temp_count = 0;
+  outputs[output_count++] = g.DefineAsFixed(node, eax);
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  if (projection1) {
+    outputs[output_count++] = g.DefineAsFixed(projection1, edx);
+  } else {
+    temps[temp_count++] = g.TempRegister(edx);
+  }
+
+  selector->Emit(opcode, output_count, outputs, 3, inputs, temp_count, temps);
+}
+
+void InstructionSelector::VisitWord32PairShl(Node* node) {
+  VisitWord32PairShift(this, kIA32ShlPair, node);
+}
+
+void InstructionSelector::VisitWord32PairShr(Node* node) {
+  VisitWord32PairShift(this, kIA32ShrPair, node);
+}
+
+void InstructionSelector::VisitWord32PairSar(Node* node) {
+  VisitWord32PairShift(this, kIA32SarPair, node);
+}
+
+void InstructionSelector::VisitWord32Rol(Node* node) {
+  VisitShift(this, node, kIA32Rol);
+}
+
+void InstructionSelector::VisitWord32Ror(Node* node) {
+  VisitShift(this, node, kIA32Ror);
+}
+
+#define RO_OP_LIST(V)                                       \
+  V(Word32Clz, kIA32Lzcnt)                                  \
+  V(Word32Ctz, kIA32Tzcnt)                                  \
+  V(Word32Popcnt, kIA32Popcnt)                              \
+  V(ChangeFloat32ToFloat64, kSSEFloat32ToFloat64)           \
+  V(RoundInt32ToFloat32, kSSEInt32ToFloat32)                \
+  V(ChangeInt32ToFloat64, kSSEInt32ToFloat64)               \
+  V(TruncateFloat32ToInt32, kSSEFloat32ToInt32)             \
+  V(ChangeFloat64ToInt32, kSSEFloat64ToInt32)               \
+  V(TruncateFloat64ToFloat32, kSSEFloat64ToFloat32)         \
+  V(RoundFloat64ToInt32, kSSEFloat64ToInt32)                \
+  V(BitcastFloat32ToInt32, kIA32BitcastFI)                  \
+  V(BitcastInt32ToFloat32, kIA32BitcastIF)                  \
+  V(Float32Sqrt, kSSEFloat32Sqrt)                           \
+  V(Float64Sqrt, kSSEFloat64Sqrt)                           \
+  V(Float64ExtractLowWord32, kSSEFloat64ExtractLowWord32)   \
+  V(Float64ExtractHighWord32, kSSEFloat64ExtractHighWord32) \
+  V(SignExtendWord8ToInt32, kIA32Movsxbl)                   \
+  V(SignExtendWord16ToInt32, kIA32Movsxwl)                  \
+  V(F64x2Sqrt, kIA32F64x2Sqrt)
+
+#define RO_WITH_TEMP_OP_LIST(V) V(ChangeUint32ToFloat64, kSSEUint32ToFloat64)
+
+#define RO_WITH_TEMP_SIMD_OP_LIST(V)              \
+  V(TruncateFloat32ToUint32, kSSEFloat32ToUint32) \
+  V(ChangeFloat64ToUint32, kSSEFloat64ToUint32)   \
+  V(TruncateFloat64ToUint32, kSSEFloat64ToUint32)
+
+#define RR_OP_LIST(V)                                                         \
+  V(TruncateFloat64ToWord32, kArchTruncateDoubleToI)                          \
+  V(Float32RoundDown, kSSEFloat32Round | MiscField::encode(kRoundDown))       \
+  V(Float64RoundDown, kSSEFloat64Round | MiscField::encode(kRoundDown))       \
+  V(Float32RoundUp, kSSEFloat32Round | MiscField::encode(kRoundUp))           \
+  V(Float64RoundUp, kSSEFloat64Round | MiscField::encode(kRoundUp))           \
+  V(Float32RoundTruncate, kSSEFloat32Round | MiscField::encode(kRoundToZero)) \
+  V(Float64RoundTruncate, kSSEFloat64Round | MiscField::encode(kRoundToZero)) \
+  V(Float32RoundTiesEven,                                                     \
+    kSSEFloat32Round | MiscField::encode(kRoundToNearest))                    \
+  V(Float64RoundTiesEven,                                                     \
+    kSSEFloat64Round | MiscField::encode(kRoundToNearest))                    \
+  V(F32x4Ceil, kIA32F32x4Round | MiscField::encode(kRoundUp))                 \
+  V(F32x4Floor, kIA32F32x4Round | MiscField::encode(kRoundDown))              \
+  V(F32x4Trunc, kIA32F32x4Round | MiscField::encode(kRoundToZero))            \
+  V(F32x4NearestInt, kIA32F32x4Round | MiscField::encode(kRoundToNearest))    \
+  V(F64x2Ceil, kIA32F64x2Round | MiscField::encode(kRoundUp))                 \
+  V(F64x2Floor, kIA32F64x2Round | MiscField::encode(kRoundDown))              \
+  V(F64x2Trunc, kIA32F64x2Round | MiscField::encode(kRoundToZero))            \
+  V(F64x2NearestInt, kIA32F64x2Round | MiscField::encode(kRoundToNearest))
+
+#define RRO_FLOAT_OP_LIST(V)                    \
+  V(Float32Add, kAVXFloat32Add, kSSEFloat32Add) \
+  V(Float64Add, kAVXFloat64Add, kSSEFloat64Add) \
+  V(Float32Sub, kAVXFloat32Sub, kSSEFloat32Sub) \
+  V(Float64Sub, kAVXFloat64Sub, kSSEFloat64Sub) \
+  V(Float32Mul, kAVXFloat32Mul, kSSEFloat32Mul) \
+  V(Float64Mul, kAVXFloat64Mul, kSSEFloat64Mul) \
+  V(Float32Div, kAVXFloat32Div, kSSEFloat32Div) \
+  V(Float64Div, kAVXFloat64Div, kSSEFloat64Div) \
+  V(F64x2Add, kIA32F64x2Add, kIA32F64x2Add)     \
+  V(F64x2Sub, kIA32F64x2Sub, kIA32F64x2Sub)     \
+  V(F64x2Mul, kIA32F64x2Mul, kIA32F64x2Mul)     \
+  V(F64x2Div, kIA32F64x2Div, kIA32F64x2Div)     \
+  V(F64x2Eq, kIA32F64x2Eq, kIA32F64x2Eq)        \
+  V(F64x2Ne, kIA32F64x2Ne, kIA32F64x2Ne)        \
+  V(F64x2Lt, kIA32F64x2Lt, kIA32F64x2Lt)        \
+  V(F64x2Le, kIA32F64x2Le, kIA32F64x2Le)
+
+#define FLOAT_UNOP_LIST(V)                      \
+  V(Float32Abs, kAVXFloat32Abs, kSSEFloat32Abs) \
+  V(Float64Abs, kAVXFloat64Abs, kSSEFloat64Abs) \
+  V(Float32Neg, kAVXFloat32Neg, kSSEFloat32Neg) \
+  V(Float64Neg, kAVXFloat64Neg, kSSEFloat64Neg) \
+  V(F64x2Abs, kAVXFloat64Abs, kSSEFloat64Abs)   \
+  V(F64x2Neg, kAVXFloat64Neg, kSSEFloat64Neg)
+
+#define RO_VISITOR(Name, opcode)                      \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRO(this, node, opcode);                      \
+  }
+RO_OP_LIST(RO_VISITOR)
+#undef RO_VISITOR
+#undef RO_OP_LIST
+
+#define RO_WITH_TEMP_VISITOR(Name, opcode)            \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitROWithTemp(this, node, opcode);              \
+  }
+RO_WITH_TEMP_OP_LIST(RO_WITH_TEMP_VISITOR)
+#undef RO_WITH_TEMP_VISITOR
+#undef RO_WITH_TEMP_OP_LIST
+
+#define RO_WITH_TEMP_SIMD_VISITOR(Name, opcode)       \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitROWithTempSimd(this, node, opcode);          \
+  }
+RO_WITH_TEMP_SIMD_OP_LIST(RO_WITH_TEMP_SIMD_VISITOR)
+#undef RO_WITH_TEMP_SIMD_VISITOR
+#undef RO_WITH_TEMP_SIMD_OP_LIST
+
+#define RR_VISITOR(Name, opcode)                      \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRR(this, node, opcode);                      \
+  }
+RR_OP_LIST(RR_VISITOR)
+#undef RR_VISITOR
+#undef RR_OP_LIST
+
+#define RRO_FLOAT_VISITOR(Name, avx, sse)             \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRROFloat(this, node, avx, sse);              \
+  }
+RRO_FLOAT_OP_LIST(RRO_FLOAT_VISITOR)
+#undef RRO_FLOAT_VISITOR
+#undef RRO_FLOAT_OP_LIST
+
+#define FLOAT_UNOP_VISITOR(Name, avx, sse)                  \
+  void InstructionSelector::Visit##Name(Node* node) {       \
+    VisitFloatUnop(this, node, node->InputAt(0), avx, sse); \
+  }
+FLOAT_UNOP_LIST(FLOAT_UNOP_VISITOR)
+#undef FLOAT_UNOP_VISITOR
+#undef FLOAT_UNOP_LIST
+
+void InstructionSelector::VisitWord32ReverseBits(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord64ReverseBytes(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
+  IA32OperandGenerator g(this);
+  Emit(kIA32Bswap, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitInt32Add(Node* node) {
+  IA32OperandGenerator g(this);
+
+  // Try to match the Add to a lea pattern
+  BaseWithIndexAndDisplacement32Matcher m(node);
+  if (m.matches() &&
+      (m.displacement() == nullptr || g.CanBeImmediate(m.displacement()))) {
+    InstructionOperand inputs[4];
+    size_t input_count = 0;
+    AddressingMode mode = g.GenerateMemoryOperandInputs(
+        m.index(), m.scale(), m.base(), m.displacement(), m.displacement_mode(),
+        inputs, &input_count);
+
+    DCHECK_NE(0u, input_count);
+    DCHECK_GE(arraysize(inputs), input_count);
+
+    InstructionOperand outputs[1];
+    outputs[0] = g.DefineAsRegister(node);
+
+    InstructionCode opcode = AddressingModeField::encode(mode) | kIA32Lea;
+    Emit(opcode, 1, outputs, input_count, inputs);
+    return;
+  }
+
+  // No lea pattern match, use add
+  VisitBinop(this, node, kIA32Add);
+}
+
+void InstructionSelector::VisitInt32Sub(Node* node) {
+  IA32OperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (m.left().Is(0)) {
+    Emit(kIA32Neg, g.DefineSameAsFirst(node), g.Use(m.right().node()));
+  } else {
+    VisitBinop(this, node, kIA32Sub);
+  }
+}
+
+void InstructionSelector::VisitInt32Mul(Node* node) {
+  Int32ScaleMatcher m(node, true);
+  if (m.matches()) {
+    Node* index = node->InputAt(0);
+    Node* base = m.power_of_two_plus_one() ? index : nullptr;
+    EmitLea(this, node, index, m.scale(), base, nullptr, kPositiveDisplacement);
+    return;
+  }
+  IA32OperandGenerator g(this);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  if (g.CanBeImmediate(right)) {
+    Emit(kIA32Imul, g.DefineAsRegister(node), g.Use(left),
+         g.UseImmediate(right));
+  } else {
+    if (g.CanBeBetterLeftOperand(right)) {
+      std::swap(left, right);
+    }
+    Emit(kIA32Imul, g.DefineSameAsFirst(node), g.UseRegister(left),
+         g.Use(right));
+  }
+}
+
+void InstructionSelector::VisitInt32MulHigh(Node* node) {
+  VisitMulHigh(this, node, kIA32ImulHigh);
+}
+
+void InstructionSelector::VisitUint32MulHigh(Node* node) {
+  VisitMulHigh(this, node, kIA32UmulHigh);
+}
+
+void InstructionSelector::VisitInt32Div(Node* node) {
+  VisitDiv(this, node, kIA32Idiv);
+}
+
+void InstructionSelector::VisitUint32Div(Node* node) {
+  VisitDiv(this, node, kIA32Udiv);
+}
+
+void InstructionSelector::VisitInt32Mod(Node* node) {
+  VisitMod(this, node, kIA32Idiv);
+}
+
+void InstructionSelector::VisitUint32Mod(Node* node) {
+  VisitMod(this, node, kIA32Udiv);
+}
+
+void InstructionSelector::VisitRoundUint32ToFloat32(Node* node) {
+  IA32OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempRegister()};
+  Emit(kSSEUint32ToFloat32, g.DefineAsRegister(node), g.Use(node->InputAt(0)),
+       arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitFloat64Mod(Node* node) {
+  IA32OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempRegister(eax), g.TempRegister()};
+  Emit(kSSEFloat64Mod, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
+       arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitFloat32Max(Node* node) {
+  IA32OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempRegister()};
+  Emit(kSSEFloat32Max, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)), g.Use(node->InputAt(1)),
+       arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitFloat64Max(Node* node) {
+  IA32OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempRegister()};
+  Emit(kSSEFloat64Max, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)), g.Use(node->InputAt(1)),
+       arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitFloat32Min(Node* node) {
+  IA32OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempRegister()};
+  Emit(kSSEFloat32Min, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)), g.Use(node->InputAt(1)),
+       arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitFloat64Min(Node* node) {
+  IA32OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempRegister()};
+  Emit(kSSEFloat64Min, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)), g.Use(node->InputAt(1)),
+       arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitFloat64RoundTiesAway(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitFloat64Ieee754Binop(Node* node,
+                                                   InstructionCode opcode) {
+  IA32OperandGenerator g(this);
+  Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
+       g.UseRegister(node->InputAt(1)))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::VisitFloat64Ieee754Unop(Node* node,
+                                                  InstructionCode opcode) {
+  IA32OperandGenerator g(this);
+  Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::EmitPrepareArguments(
+    ZoneVector<PushParameter>* arguments, const CallDescriptor* call_descriptor,
+    Node* node) {
+  IA32OperandGenerator g(this);
+
+  // Prepare for C function call.
+  if (call_descriptor->IsCFunctionCall()) {
+    InstructionOperand temps[] = {g.TempRegister()};
+    size_t const temp_count = arraysize(temps);
+    Emit(kArchPrepareCallCFunction | MiscField::encode(static_cast<int>(
+                                         call_descriptor->ParameterCount())),
+         0, nullptr, 0, nullptr, temp_count, temps);
+
+    // Poke any stack arguments.
+    for (size_t n = 0; n < arguments->size(); ++n) {
+      PushParameter input = (*arguments)[n];
+      if (input.node) {
+        int const slot = static_cast<int>(n);
+        InstructionOperand value = g.CanBeImmediate(node)
+                                       ? g.UseImmediate(input.node)
+                                       : g.UseRegister(input.node);
+        Emit(kIA32Poke | MiscField::encode(slot), g.NoOutput(), value);
+      }
+    }
+  } else {
+    // Push any stack arguments.
+    int effect_level = GetEffectLevel(node);
+    for (PushParameter input : base::Reversed(*arguments)) {
+      // Skip any alignment holes in pushed nodes.
+      if (input.node == nullptr) continue;
+      if (g.CanBeMemoryOperand(kIA32Push, node, input.node, effect_level)) {
+        InstructionOperand outputs[1];
+        InstructionOperand inputs[4];
+        size_t input_count = 0;
+        InstructionCode opcode = kIA32Push;
+        AddressingMode mode = g.GetEffectiveAddressMemoryOperand(
+            input.node, inputs, &input_count);
+        opcode |= AddressingModeField::encode(mode);
+        Emit(opcode, 0, outputs, input_count, inputs);
+      } else {
+        InstructionOperand value =
+            g.CanBeImmediate(input.node)
+                ? g.UseImmediate(input.node)
+                : IsSupported(ATOM) ||
+                          sequence()->IsFP(GetVirtualRegister(input.node))
+                      ? g.UseRegister(input.node)
+                      : g.Use(input.node);
+        if (input.location.GetType() == MachineType::Float32()) {
+          Emit(kIA32PushFloat32, g.NoOutput(), value);
+        } else if (input.location.GetType() == MachineType::Float64()) {
+          Emit(kIA32PushFloat64, g.NoOutput(), value);
+        } else if (input.location.GetType() == MachineType::Simd128()) {
+          Emit(kIA32PushSimd128, g.NoOutput(), value);
+        } else {
+          Emit(kIA32Push, g.NoOutput(), value);
+        }
+      }
+    }
+  }
+}
+
+void InstructionSelector::EmitPrepareResults(
+    ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
+    Node* node) {
+  IA32OperandGenerator g(this);
+
+  int reverse_slot = 1;
+  for (PushParameter output : *results) {
+    if (!output.location.IsCallerFrameSlot()) continue;
+    // Skip any alignment holes in nodes.
+    if (output.node != nullptr) {
+      DCHECK(!call_descriptor->IsCFunctionCall());
+      if (output.location.GetType() == MachineType::Float32()) {
+        MarkAsFloat32(output.node);
+      } else if (output.location.GetType() == MachineType::Float64()) {
+        MarkAsFloat64(output.node);
+      } else if (output.location.GetType() == MachineType::Simd128()) {
+        MarkAsSimd128(output.node);
+      }
+      Emit(kIA32Peek, g.DefineAsRegister(output.node),
+           g.UseImmediate(reverse_slot));
+    }
+    reverse_slot += output.location.GetSizeInPointers();
+  }
+}
+
+bool InstructionSelector::IsTailCallAddressImmediate() { return true; }
+
+int InstructionSelector::GetTempsCountForTailCallFromJSFunction() { return 0; }
+
+namespace {
+
+void VisitCompareWithMemoryOperand(InstructionSelector* selector,
+                                   InstructionCode opcode, Node* left,
+                                   InstructionOperand right,
+                                   FlagsContinuation* cont) {
+  DCHECK_EQ(IrOpcode::kLoad, left->opcode());
+  IA32OperandGenerator g(selector);
+  size_t input_count = 0;
+  InstructionOperand inputs[4];
+  AddressingMode addressing_mode =
+      g.GetEffectiveAddressMemoryOperand(left, inputs, &input_count);
+  opcode |= AddressingModeField::encode(addressing_mode);
+  inputs[input_count++] = right;
+
+  selector->EmitWithContinuation(opcode, 0, nullptr, input_count, inputs, cont);
+}
+
+// Shared routine for multiple compare operations.
+void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
+                  InstructionOperand left, InstructionOperand right,
+                  FlagsContinuation* cont) {
+  selector->EmitWithContinuation(opcode, left, right, cont);
+}
+
+// Shared routine for multiple compare operations.
+void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
+                  Node* left, Node* right, FlagsContinuation* cont,
+                  bool commutative) {
+  IA32OperandGenerator g(selector);
+  if (commutative && g.CanBeBetterLeftOperand(right)) {
+    std::swap(left, right);
+  }
+  VisitCompare(selector, opcode, g.UseRegister(left), g.Use(right), cont);
+}
+
+MachineType MachineTypeForNarrow(Node* node, Node* hint_node) {
+  if (hint_node->opcode() == IrOpcode::kLoad) {
+    MachineType hint = LoadRepresentationOf(hint_node->op());
+    if (node->opcode() == IrOpcode::kInt32Constant ||
+        node->opcode() == IrOpcode::kInt64Constant) {
+      int64_t constant = node->opcode() == IrOpcode::kInt32Constant
+                             ? OpParameter<int32_t>(node->op())
+                             : OpParameter<int64_t>(node->op());
+      if (hint == MachineType::Int8()) {
+        if (constant >= std::numeric_limits<int8_t>::min() &&
+            constant <= std::numeric_limits<int8_t>::max()) {
+          return hint;
+        }
+      } else if (hint == MachineType::Uint8()) {
+        if (constant >= std::numeric_limits<uint8_t>::min() &&
+            constant <= std::numeric_limits<uint8_t>::max()) {
+          return hint;
+        }
+      } else if (hint == MachineType::Int16()) {
+        if (constant >= std::numeric_limits<int16_t>::min() &&
+            constant <= std::numeric_limits<int16_t>::max()) {
+          return hint;
+        }
+      } else if (hint == MachineType::Uint16()) {
+        if (constant >= std::numeric_limits<uint16_t>::min() &&
+            constant <= std::numeric_limits<uint16_t>::max()) {
+          return hint;
+        }
+      } else if (hint == MachineType::Int32()) {
+        return hint;
+      } else if (hint == MachineType::Uint32()) {
+        if (constant >= 0) return hint;
+      }
+    }
+  }
+  return node->opcode() == IrOpcode::kLoad ? LoadRepresentationOf(node->op())
+                                           : MachineType::None();
+}
+
+// Tries to match the size of the given opcode to that of the operands, if
+// possible.
+InstructionCode TryNarrowOpcodeSize(InstructionCode opcode, Node* left,
+                                    Node* right, FlagsContinuation* cont) {
+  // TODO(epertoso): we can probably get some size information out of phi nodes.
+  // If the load representations don't match, both operands will be
+  // zero/sign-extended to 32bit.
+  MachineType left_type = MachineTypeForNarrow(left, right);
+  MachineType right_type = MachineTypeForNarrow(right, left);
+  if (left_type == right_type) {
+    switch (left_type.representation()) {
+      case MachineRepresentation::kBit:
+      case MachineRepresentation::kWord8: {
+        if (opcode == kIA32Test) return kIA32Test8;
+        if (opcode == kIA32Cmp) {
+          if (left_type.semantic() == MachineSemantic::kUint32) {
+            cont->OverwriteUnsignedIfSigned();
+          } else {
+            CHECK_EQ(MachineSemantic::kInt32, left_type.semantic());
+          }
+          return kIA32Cmp8;
+        }
+        break;
+      }
+      case MachineRepresentation::kWord16:
+        if (opcode == kIA32Test) return kIA32Test16;
+        if (opcode == kIA32Cmp) {
+          if (left_type.semantic() == MachineSemantic::kUint32) {
+            cont->OverwriteUnsignedIfSigned();
+          } else {
+            CHECK_EQ(MachineSemantic::kInt32, left_type.semantic());
+          }
+          return kIA32Cmp16;
+        }
+        break;
+      default:
+        break;
+    }
+  }
+  return opcode;
+}
+
+// Shared routine for multiple float32 compare operations (inputs commuted).
+void VisitFloat32Compare(InstructionSelector* selector, Node* node,
+                         FlagsContinuation* cont) {
+  Node* const left = node->InputAt(0);
+  Node* const right = node->InputAt(1);
+  VisitCompare(selector, kSSEFloat32Cmp, right, left, cont, false);
+}
+
+// Shared routine for multiple float64 compare operations (inputs commuted).
+void VisitFloat64Compare(InstructionSelector* selector, Node* node,
+                         FlagsContinuation* cont) {
+  Node* const left = node->InputAt(0);
+  Node* const right = node->InputAt(1);
+  VisitCompare(selector, kSSEFloat64Cmp, right, left, cont, false);
+}
+
+// Shared routine for multiple word compare operations.
+void VisitWordCompare(InstructionSelector* selector, Node* node,
+                      InstructionCode opcode, FlagsContinuation* cont) {
+  IA32OperandGenerator g(selector);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+
+  InstructionCode narrowed_opcode =
+      TryNarrowOpcodeSize(opcode, left, right, cont);
+
+  int effect_level = selector->GetEffectLevel(node, cont);
+
+  // If one of the two inputs is an immediate, make sure it's on the right, or
+  // if one of the two inputs is a memory operand, make sure it's on the left.
+  if ((!g.CanBeImmediate(right) && g.CanBeImmediate(left)) ||
+      (g.CanBeMemoryOperand(narrowed_opcode, node, right, effect_level) &&
+       !g.CanBeMemoryOperand(narrowed_opcode, node, left, effect_level))) {
+    if (!node->op()->HasProperty(Operator::kCommutative)) cont->Commute();
+    std::swap(left, right);
+  }
+
+  // Match immediates on right side of comparison.
+  if (g.CanBeImmediate(right)) {
+    if (g.CanBeMemoryOperand(narrowed_opcode, node, left, effect_level)) {
+      return VisitCompareWithMemoryOperand(selector, narrowed_opcode, left,
+                                           g.UseImmediate(right), cont);
+    }
+    return VisitCompare(selector, opcode, g.Use(left), g.UseImmediate(right),
+                        cont);
+  }
+
+  // Match memory operands on left side of comparison.
+  if (g.CanBeMemoryOperand(narrowed_opcode, node, left, effect_level)) {
+    bool needs_byte_register =
+        narrowed_opcode == kIA32Test8 || narrowed_opcode == kIA32Cmp8;
+    return VisitCompareWithMemoryOperand(
+        selector, narrowed_opcode, left,
+        needs_byte_register ? g.UseByteRegister(right) : g.UseRegister(right),
+        cont);
+  }
+
+  return VisitCompare(selector, opcode, left, right, cont,
+                      node->op()->HasProperty(Operator::kCommutative));
+}
+
+void VisitWordCompare(InstructionSelector* selector, Node* node,
+                      FlagsContinuation* cont) {
+  VisitWordCompare(selector, node, kIA32Cmp, cont);
+}
+
+void VisitAtomicExchange(InstructionSelector* selector, Node* node,
+                         ArchOpcode opcode, MachineRepresentation rep) {
+  IA32OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+
+  AddressingMode addressing_mode;
+  InstructionOperand value_operand = (rep == MachineRepresentation::kWord8)
+                                         ? g.UseFixed(value, edx)
+                                         : g.UseUniqueRegister(value);
+  InstructionOperand inputs[] = {
+      value_operand, g.UseUniqueRegister(base),
+      g.GetEffectiveIndexOperand(index, &addressing_mode)};
+  InstructionOperand outputs[] = {
+      (rep == MachineRepresentation::kWord8)
+          // Using DefineSameAsFirst requires the register to be unallocated.
+          ? g.DefineAsFixed(node, edx)
+          : g.DefineSameAsFirst(node)};
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  selector->Emit(code, 1, outputs, arraysize(inputs), inputs);
+}
+
+void VisitAtomicBinOp(InstructionSelector* selector, Node* node,
+                      ArchOpcode opcode, MachineRepresentation rep) {
+  AddressingMode addressing_mode;
+  IA32OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  InstructionOperand inputs[] = {
+      g.UseUniqueRegister(value), g.UseUniqueRegister(base),
+      g.GetEffectiveIndexOperand(index, &addressing_mode)};
+  InstructionOperand outputs[] = {g.DefineAsFixed(node, eax)};
+  InstructionOperand temp[] = {(rep == MachineRepresentation::kWord8)
+                                   ? g.UseByteRegister(node)
+                                   : g.TempRegister()};
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs,
+                 arraysize(temp), temp);
+}
+
+void VisitPairAtomicBinOp(InstructionSelector* selector, Node* node,
+                          ArchOpcode opcode) {
+  IA32OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  // For Word64 operations, the value input is split into the a high node,
+  // and a low node in the int64-lowering phase.
+  Node* value_high = node->InputAt(3);
+
+  // Wasm lives in 32-bit address space, so we do not need to worry about
+  // base/index lowering. This will need to be fixed for Wasm64.
+  AddressingMode addressing_mode;
+  InstructionOperand inputs[] = {
+      g.UseUniqueRegisterOrSlotOrConstant(value), g.UseFixed(value_high, ecx),
+      g.UseUniqueRegister(base),
+      g.GetEffectiveIndexOperand(index, &addressing_mode)};
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  Node* projection0 = NodeProperties::FindProjection(node, 0);
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  InstructionOperand temps[2];
+  size_t temp_count = 0;
+  if (projection0) {
+    outputs[output_count++] = g.DefineAsFixed(projection0, eax);
+  } else {
+    temps[temp_count++] = g.TempRegister(eax);
+  }
+  if (projection1) {
+    outputs[output_count++] = g.DefineAsFixed(projection1, edx);
+  } else {
+    temps[temp_count++] = g.TempRegister(edx);
+  }
+  selector->Emit(code, output_count, outputs, arraysize(inputs), inputs,
+                 temp_count, temps);
+}
+
+}  // namespace
+
+// Shared routine for word comparison with zero.
+void InstructionSelector::VisitWordCompareZero(Node* user, Node* value,
+                                               FlagsContinuation* cont) {
+  // Try to combine with comparisons against 0 by simply inverting the branch.
+  while (value->opcode() == IrOpcode::kWord32Equal && CanCover(user, value)) {
+    Int32BinopMatcher m(value);
+    if (!m.right().Is(0)) break;
+
+    user = value;
+    value = m.left().node();
+    cont->Negate();
+  }
+
+  if (CanCover(user, value)) {
+    switch (value->opcode()) {
+      case IrOpcode::kWord32Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitWordCompare(this, value, cont);
+      case IrOpcode::kInt32LessThan:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThan);
+        return VisitWordCompare(this, value, cont);
+      case IrOpcode::kInt32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
+        return VisitWordCompare(this, value, cont);
+      case IrOpcode::kUint32LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitWordCompare(this, value, cont);
+      case IrOpcode::kUint32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitWordCompare(this, value, cont);
+      case IrOpcode::kFloat32Equal:
+        cont->OverwriteAndNegateIfEqual(kUnorderedEqual);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat32LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThan);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThanOrEqual);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat64Equal:
+        cont->OverwriteAndNegateIfEqual(kUnorderedEqual);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kFloat64LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThan);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kFloat64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThanOrEqual);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kProjection:
+        // Check if this is the overflow output projection of an
+        // <Operation>WithOverflow node.
+        if (ProjectionIndexOf(value->op()) == 1u) {
+          // We cannot combine the <Operation>WithOverflow with this branch
+          // unless the 0th projection (the use of the actual value of the
+          // <Operation> is either nullptr, which means there's no use of the
+          // actual value, or was already defined, which means it is scheduled
+          // *AFTER* this branch).
+          Node* const node = value->InputAt(0);
+          Node* const result = NodeProperties::FindProjection(node, 0);
+          if (result == nullptr || IsDefined(result)) {
+            switch (node->opcode()) {
+              case IrOpcode::kInt32AddWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kIA32Add, cont);
+              case IrOpcode::kInt32SubWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kIA32Sub, cont);
+              case IrOpcode::kInt32MulWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kIA32Imul, cont);
+              default:
+                break;
+            }
+          }
+        }
+        break;
+      case IrOpcode::kInt32Sub:
+        return VisitWordCompare(this, value, cont);
+      case IrOpcode::kWord32And:
+        return VisitWordCompare(this, value, kIA32Test, cont);
+      case IrOpcode::kStackPointerGreaterThan:
+        cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition);
+        return VisitStackPointerGreaterThan(value, cont);
+      default:
+        break;
+    }
+  }
+
+  // Continuation could not be combined with a compare, emit compare against 0.
+  IA32OperandGenerator g(this);
+  VisitCompare(this, kIA32Cmp, g.Use(value), g.TempImmediate(0), cont);
+}
+
+void InstructionSelector::VisitSwitch(Node* node, const SwitchInfo& sw) {
+  IA32OperandGenerator g(this);
+  InstructionOperand value_operand = g.UseRegister(node->InputAt(0));
+
+  // Emit either ArchTableSwitch or ArchBinarySearchSwitch.
+  if (enable_switch_jump_table_ == kEnableSwitchJumpTable) {
+    static const size_t kMaxTableSwitchValueRange = 2 << 16;
+    size_t table_space_cost = 4 + sw.value_range();
+    size_t table_time_cost = 3;
+    size_t lookup_space_cost = 3 + 2 * sw.case_count();
+    size_t lookup_time_cost = sw.case_count();
+    if (sw.case_count() > 4 &&
+        table_space_cost + 3 * table_time_cost <=
+            lookup_space_cost + 3 * lookup_time_cost &&
+        sw.min_value() > std::numeric_limits<int32_t>::min() &&
+        sw.value_range() <= kMaxTableSwitchValueRange) {
+      InstructionOperand index_operand = value_operand;
+      if (sw.min_value()) {
+        index_operand = g.TempRegister();
+        Emit(kIA32Lea | AddressingModeField::encode(kMode_MRI), index_operand,
+             value_operand, g.TempImmediate(-sw.min_value()));
+      }
+      // Generate a table lookup.
+      return EmitTableSwitch(sw, index_operand);
+    }
+  }
+
+  // Generate a tree of conditional jumps.
+  return EmitBinarySearchSwitch(sw, value_operand);
+}
+
+void InstructionSelector::VisitWord32Equal(Node* const node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  Int32BinopMatcher m(node);
+  if (m.right().Is(0)) {
+    return VisitWordCompareZero(m.node(), m.left().node(), &cont);
+  }
+  VisitWordCompare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
+  VisitWordCompare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
+  VisitWordCompare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitWordCompare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitWordCompare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32AddWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kIA32Add, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kIA32Add, &cont);
+}
+
+void InstructionSelector::VisitInt32SubWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kIA32Sub, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kIA32Sub, &cont);
+}
+
+void InstructionSelector::VisitInt32MulWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kIA32Imul, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kIA32Imul, &cont);
+}
+
+void InstructionSelector::VisitFloat32Equal(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnorderedEqual, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32LessThan(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedGreaterThan, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedGreaterThanOrEqual, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64Equal(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnorderedEqual, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64LessThan(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedGreaterThan, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedGreaterThanOrEqual, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64InsertLowWord32(Node* node) {
+  IA32OperandGenerator g(this);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  Float64Matcher mleft(left);
+  if (mleft.HasResolvedValue() &&
+      (bit_cast<uint64_t>(mleft.ResolvedValue()) >> 32) == 0u) {
+    Emit(kSSEFloat64LoadLowWord32, g.DefineAsRegister(node), g.Use(right));
+    return;
+  }
+  Emit(kSSEFloat64InsertLowWord32, g.DefineSameAsFirst(node),
+       g.UseRegister(left), g.Use(right));
+}
+
+void InstructionSelector::VisitFloat64InsertHighWord32(Node* node) {
+  IA32OperandGenerator g(this);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  Emit(kSSEFloat64InsertHighWord32, g.DefineSameAsFirst(node),
+       g.UseRegister(left), g.Use(right));
+}
+
+void InstructionSelector::VisitFloat64SilenceNaN(Node* node) {
+  IA32OperandGenerator g(this);
+  Emit(kSSEFloat64SilenceNaN, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitMemoryBarrier(Node* node) {
+  IA32OperandGenerator g(this);
+  Emit(kIA32MFence, g.NoOutput());
+}
+
+void InstructionSelector::VisitWord32AtomicLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  DCHECK(load_rep.representation() == MachineRepresentation::kWord8 ||
+         load_rep.representation() == MachineRepresentation::kWord16 ||
+         load_rep.representation() == MachineRepresentation::kWord32);
+  USE(load_rep);
+  VisitLoad(node);
+}
+
+void InstructionSelector::VisitWord32AtomicStore(Node* node) {
+  IA32OperandGenerator g(this);
+  MachineRepresentation rep = AtomicStoreRepresentationOf(node->op());
+  ArchOpcode opcode;
+  switch (rep) {
+    case MachineRepresentation::kWord8:
+      opcode = kWord32AtomicExchangeInt8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = kWord32AtomicExchangeInt16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kWord32AtomicExchangeWord32;
+      break;
+    default:
+      UNREACHABLE();
+  }
+  VisitAtomicExchange(this, node, opcode, rep);
+}
+
+void InstructionSelector::VisitWord32AtomicExchange(Node* node) {
+  IA32OperandGenerator g(this);
+  MachineType type = AtomicOpType(node->op());
+  ArchOpcode opcode;
+  if (type == MachineType::Int8()) {
+    opcode = kWord32AtomicExchangeInt8;
+  } else if (type == MachineType::Uint8()) {
+    opcode = kWord32AtomicExchangeUint8;
+  } else if (type == MachineType::Int16()) {
+    opcode = kWord32AtomicExchangeInt16;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kWord32AtomicExchangeUint16;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = kWord32AtomicExchangeWord32;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicExchange(this, node, opcode, type.representation());
+}
+
+void InstructionSelector::VisitWord32AtomicCompareExchange(Node* node) {
+  IA32OperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* old_value = node->InputAt(2);
+  Node* new_value = node->InputAt(3);
+
+  MachineType type = AtomicOpType(node->op());
+  ArchOpcode opcode;
+  if (type == MachineType::Int8()) {
+    opcode = kWord32AtomicCompareExchangeInt8;
+  } else if (type == MachineType::Uint8()) {
+    opcode = kWord32AtomicCompareExchangeUint8;
+  } else if (type == MachineType::Int16()) {
+    opcode = kWord32AtomicCompareExchangeInt16;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kWord32AtomicCompareExchangeUint16;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = kWord32AtomicCompareExchangeWord32;
+  } else {
+    UNREACHABLE();
+  }
+  AddressingMode addressing_mode;
+  InstructionOperand new_val_operand =
+      (type.representation() == MachineRepresentation::kWord8)
+          ? g.UseByteRegister(new_value)
+          : g.UseUniqueRegister(new_value);
+  InstructionOperand inputs[] = {
+      g.UseFixed(old_value, eax), new_val_operand, g.UseUniqueRegister(base),
+      g.GetEffectiveIndexOperand(index, &addressing_mode)};
+  InstructionOperand outputs[] = {g.DefineAsFixed(node, eax)};
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  Emit(code, 1, outputs, arraysize(inputs), inputs);
+}
+
+void InstructionSelector::VisitWord32AtomicBinaryOperation(
+    Node* node, ArchOpcode int8_op, ArchOpcode uint8_op, ArchOpcode int16_op,
+    ArchOpcode uint16_op, ArchOpcode word32_op) {
+  MachineType type = AtomicOpType(node->op());
+  ArchOpcode opcode;
+  if (type == MachineType::Int8()) {
+    opcode = int8_op;
+  } else if (type == MachineType::Uint8()) {
+    opcode = uint8_op;
+  } else if (type == MachineType::Int16()) {
+    opcode = int16_op;
+  } else if (type == MachineType::Uint16()) {
+    opcode = uint16_op;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = word32_op;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicBinOp(this, node, opcode, type.representation());
+}
+
+#define VISIT_ATOMIC_BINOP(op)                                   \
+  void InstructionSelector::VisitWord32Atomic##op(Node* node) {  \
+    VisitWord32AtomicBinaryOperation(                            \
+        node, kWord32Atomic##op##Int8, kWord32Atomic##op##Uint8, \
+        kWord32Atomic##op##Int16, kWord32Atomic##op##Uint16,     \
+        kWord32Atomic##op##Word32);                              \
+  }
+VISIT_ATOMIC_BINOP(Add)
+VISIT_ATOMIC_BINOP(Sub)
+VISIT_ATOMIC_BINOP(And)
+VISIT_ATOMIC_BINOP(Or)
+VISIT_ATOMIC_BINOP(Xor)
+#undef VISIT_ATOMIC_BINOP
+
+void InstructionSelector::VisitWord32AtomicPairLoad(Node* node) {
+  IA32OperandGenerator g(this);
+  AddressingMode mode;
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* projection0 = NodeProperties::FindProjection(node, 0);
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  if (projection0 && projection1) {
+    InstructionOperand inputs[] = {g.UseUniqueRegister(base),
+                                   g.GetEffectiveIndexOperand(index, &mode)};
+    InstructionCode code =
+        kIA32Word32AtomicPairLoad | AddressingModeField::encode(mode);
+    InstructionOperand temps[] = {g.TempDoubleRegister()};
+    InstructionOperand outputs[] = {g.DefineAsRegister(projection0),
+                                    g.DefineAsRegister(projection1)};
+    Emit(code, 2, outputs, 2, inputs, 1, temps);
+  } else if (projection0 || projection1) {
+    // Only one word is needed, so it's enough to load just that.
+    ArchOpcode opcode = kIA32Movl;
+
+    InstructionOperand outputs[] = {
+        g.DefineAsRegister(projection0 ? projection0 : projection1)};
+    InstructionOperand inputs[3];
+    size_t input_count = 0;
+    // TODO(ahaas): Introduce an enum for {scale} instead of an integer.
+    // {scale = 0} means *1 in the generated code.
+    int scale = 0;
+    AddressingMode mode = g.GenerateMemoryOperandInputs(
+        index, scale, base, projection0 ? 0 : 4, kPositiveDisplacement, inputs,
+        &input_count);
+    InstructionCode code = opcode | AddressingModeField::encode(mode);
+    Emit(code, 1, outputs, input_count, inputs);
+  }
+}
+
+void InstructionSelector::VisitWord32AtomicPairStore(Node* node) {
+  IA32OperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  Node* value_high = node->InputAt(3);
+
+  AddressingMode addressing_mode;
+  InstructionOperand inputs[] = {
+      g.UseUniqueRegisterOrSlotOrConstant(value), g.UseFixed(value_high, ecx),
+      g.UseUniqueRegister(base),
+      g.GetEffectiveIndexOperand(index, &addressing_mode)};
+  // Allocating temp registers here as stores are performed using an atomic
+  // exchange, the output of which is stored in edx:eax, which should be saved
+  // and restored at the end of the instruction.
+  InstructionOperand temps[] = {g.TempRegister(eax), g.TempRegister(edx)};
+  const int num_temps = arraysize(temps);
+  InstructionCode code =
+      kIA32Word32AtomicPairStore | AddressingModeField::encode(addressing_mode);
+  Emit(code, 0, nullptr, arraysize(inputs), inputs, num_temps, temps);
+}
+
+void InstructionSelector::VisitWord32AtomicPairAdd(Node* node) {
+  VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairAdd);
+}
+
+void InstructionSelector::VisitWord32AtomicPairSub(Node* node) {
+  VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairSub);
+}
+
+void InstructionSelector::VisitWord32AtomicPairAnd(Node* node) {
+  VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairAnd);
+}
+
+void InstructionSelector::VisitWord32AtomicPairOr(Node* node) {
+  VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairOr);
+}
+
+void InstructionSelector::VisitWord32AtomicPairXor(Node* node) {
+  VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairXor);
+}
+
+void InstructionSelector::VisitWord32AtomicPairExchange(Node* node) {
+  VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairExchange);
+}
+
+void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
+  IA32OperandGenerator g(this);
+  Node* index = node->InputAt(1);
+  AddressingMode addressing_mode;
+
+  InstructionOperand inputs[] = {
+      // High, Low values of old value
+      g.UseFixed(node->InputAt(2), eax), g.UseFixed(node->InputAt(3), edx),
+      // High, Low values of new value
+      g.UseUniqueRegisterOrSlotOrConstant(node->InputAt(4)),
+      g.UseFixed(node->InputAt(5), ecx),
+      // InputAt(0) => base
+      g.UseUniqueRegister(node->InputAt(0)),
+      g.GetEffectiveIndexOperand(index, &addressing_mode)};
+  Node* projection0 = NodeProperties::FindProjection(node, 0);
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  InstructionCode code = kIA32Word32AtomicPairCompareExchange |
+                         AddressingModeField::encode(addressing_mode);
+
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  InstructionOperand temps[2];
+  size_t temp_count = 0;
+  if (projection0) {
+    outputs[output_count++] = g.DefineAsFixed(projection0, eax);
+  } else {
+    temps[temp_count++] = g.TempRegister(eax);
+  }
+  if (projection1) {
+    outputs[output_count++] = g.DefineAsFixed(projection1, edx);
+  } else {
+    temps[temp_count++] = g.TempRegister(edx);
+  }
+  Emit(code, output_count, outputs, arraysize(inputs), inputs, temp_count,
+       temps);
+}
+
+#define SIMD_INT_TYPES(V) \
+  V(I32x4)                \
+  V(I16x8)                \
+  V(I8x16)
+
+#define SIMD_BINOP_LIST(V) \
+  V(F32x4Add)              \
+  V(F32x4AddHoriz)         \
+  V(F32x4Sub)              \
+  V(F32x4Mul)              \
+  V(F32x4Div)              \
+  V(F32x4Min)              \
+  V(F32x4Max)              \
+  V(F32x4Eq)               \
+  V(F32x4Ne)               \
+  V(F32x4Lt)               \
+  V(F32x4Le)               \
+  V(I32x4Add)              \
+  V(I32x4AddHoriz)         \
+  V(I32x4Sub)              \
+  V(I32x4Mul)              \
+  V(I32x4MinS)             \
+  V(I32x4MaxS)             \
+  V(I32x4Eq)               \
+  V(I32x4Ne)               \
+  V(I32x4GtS)              \
+  V(I32x4GeS)              \
+  V(I32x4MinU)             \
+  V(I32x4MaxU)             \
+  V(I32x4GtU)              \
+  V(I32x4GeU)              \
+  V(I16x8SConvertI32x4)    \
+  V(I16x8Add)              \
+  V(I16x8AddSatS)          \
+  V(I16x8AddHoriz)         \
+  V(I16x8Sub)              \
+  V(I16x8SubSatS)          \
+  V(I16x8Mul)              \
+  V(I16x8MinS)             \
+  V(I16x8MaxS)             \
+  V(I16x8Eq)               \
+  V(I16x8Ne)               \
+  V(I16x8GtS)              \
+  V(I16x8GeS)              \
+  V(I16x8AddSatU)          \
+  V(I16x8SubSatU)          \
+  V(I16x8MinU)             \
+  V(I16x8MaxU)             \
+  V(I16x8GtU)              \
+  V(I16x8GeU)              \
+  V(I8x16SConvertI16x8)    \
+  V(I8x16Add)              \
+  V(I8x16AddSatS)          \
+  V(I8x16Sub)              \
+  V(I8x16SubSatS)          \
+  V(I8x16MinS)             \
+  V(I8x16MaxS)             \
+  V(I8x16Eq)               \
+  V(I8x16Ne)               \
+  V(I8x16GtS)              \
+  V(I8x16GeS)              \
+  V(I8x16AddSatU)          \
+  V(I8x16SubSatU)          \
+  V(I8x16MinU)             \
+  V(I8x16MaxU)             \
+  V(I8x16GtU)              \
+  V(I8x16GeU)              \
+  V(S128And)               \
+  V(S128Or)                \
+  V(S128Xor)
+
+#define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V) \
+  V(I64x2Add)                              \
+  V(I64x2Sub)                              \
+  V(I32x4DotI16x8S)                        \
+  V(I16x8RoundingAverageU)                 \
+  V(I8x16RoundingAverageU)
+
+#define SIMD_UNOP_LIST(V)   \
+  V(F32x4SConvertI32x4)     \
+  V(F32x4RecipApprox)       \
+  V(F32x4RecipSqrtApprox)   \
+  V(I32x4SConvertI16x8Low)  \
+  V(I32x4SConvertI16x8High) \
+  V(I32x4Neg)               \
+  V(I32x4UConvertI16x8Low)  \
+  V(I32x4UConvertI16x8High) \
+  V(I32x4Abs)               \
+  V(I32x4BitMask)           \
+  V(I16x8SConvertI8x16Low)  \
+  V(I16x8SConvertI8x16High) \
+  V(I16x8Neg)               \
+  V(I16x8UConvertI8x16Low)  \
+  V(I16x8UConvertI8x16High) \
+  V(I16x8Abs)               \
+  V(I8x16Neg)               \
+  V(I8x16Abs)               \
+  V(I8x16BitMask)
+
+#define SIMD_UNOP_PREFIX_LIST(V) \
+  V(F32x4Abs)                    \
+  V(F32x4Neg)                    \
+  V(F32x4Sqrt)                   \
+  V(S128Not)
+
+#define SIMD_ANYTRUE_LIST(V) \
+  V(V32x4AnyTrue)            \
+  V(V16x8AnyTrue)            \
+  V(V8x16AnyTrue)
+
+#define SIMD_ALLTRUE_LIST(V) \
+  V(V32x4AllTrue)            \
+  V(V16x8AllTrue)            \
+  V(V8x16AllTrue)
+
+#define SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(V) \
+  V(I64x2Shl)                                \
+  V(I64x2ShrU)                               \
+  V(I32x4Shl)                                \
+  V(I32x4ShrS)                               \
+  V(I32x4ShrU)                               \
+  V(I16x8Shl)                                \
+  V(I16x8ShrS)                               \
+  V(I16x8ShrU)
+
+void InstructionSelector::VisitS128Const(Node* node) {
+  IA32OperandGenerator g(this);
+  static const int kUint32Immediates = kSimd128Size / sizeof(uint32_t);
+  uint32_t val[kUint32Immediates];
+  memcpy(val, S128ImmediateParameterOf(node->op()).data(), kSimd128Size);
+  // If all bytes are zeros or ones, avoid emitting code for generic constants
+  bool all_zeros = !(val[0] || val[1] || val[2] || val[3]);
+  bool all_ones = val[0] == UINT32_MAX && val[1] == UINT32_MAX &&
+                  val[2] == UINT32_MAX && val[3] == UINT32_MAX;
+  InstructionOperand dst = g.DefineAsRegister(node);
+  if (all_zeros) {
+    Emit(kIA32S128Zero, dst);
+  } else if (all_ones) {
+    Emit(kIA32S128AllOnes, dst);
+  } else {
+    InstructionOperand inputs[kUint32Immediates];
+    for (int i = 0; i < kUint32Immediates; ++i) {
+      inputs[i] = g.UseImmediate(val[i]);
+    }
+    InstructionOperand temp(g.TempRegister());
+    Emit(kIA32S128Const, 1, &dst, kUint32Immediates, inputs, 1, &temp);
+  }
+}
+
+void InstructionSelector::VisitF64x2Min(Node* node) {
+  IA32OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
+  InstructionOperand operand1 = g.UseUnique(node->InputAt(1));
+
+  if (IsSupported(AVX)) {
+    Emit(kIA32F64x2Min, g.DefineAsRegister(node), operand0, operand1,
+         arraysize(temps), temps);
+  } else {
+    Emit(kIA32F64x2Min, g.DefineSameAsFirst(node), operand0, operand1,
+         arraysize(temps), temps);
+  }
+}
+
+void InstructionSelector::VisitF64x2Max(Node* node) {
+  IA32OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
+  InstructionOperand operand1 = g.UseUnique(node->InputAt(1));
+  if (IsSupported(AVX)) {
+    Emit(kIA32F64x2Max, g.DefineAsRegister(node), operand0, operand1,
+         arraysize(temps), temps);
+  } else {
+    Emit(kIA32F64x2Max, g.DefineSameAsFirst(node), operand0, operand1,
+         arraysize(temps), temps);
+  }
+}
+
+void InstructionSelector::VisitF64x2Splat(Node* node) {
+  VisitRRSimd(this, node, kAVXF64x2Splat, kSSEF64x2Splat);
+}
+
+void InstructionSelector::VisitF64x2ExtractLane(Node* node) {
+  VisitRRISimd(this, node, kAVXF64x2ExtractLane, kSSEF64x2ExtractLane);
+}
+
+void InstructionSelector::VisitI64x2SplatI32Pair(Node* node) {
+  IA32OperandGenerator g(this);
+  Int32Matcher match_left(node->InputAt(0));
+  Int32Matcher match_right(node->InputAt(1));
+  if (match_left.Is(0) && match_right.Is(0)) {
+    Emit(kIA32S128Zero, g.DefineAsRegister(node));
+  } else {
+    InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
+    InstructionOperand operand1 = g.Use(node->InputAt(1));
+    Emit(kIA32I64x2SplatI32Pair, g.DefineAsRegister(node), operand0, operand1);
+  }
+}
+
+void InstructionSelector::VisitI64x2ReplaceLaneI32Pair(Node* node) {
+  IA32OperandGenerator g(this);
+  InstructionOperand operand = g.UseRegister(node->InputAt(0));
+  InstructionOperand lane = g.UseImmediate(OpParameter<int32_t>(node->op()));
+  InstructionOperand low = g.Use(node->InputAt(1));
+  InstructionOperand high = g.Use(node->InputAt(2));
+  Emit(kIA32I64x2ReplaceLaneI32Pair, g.DefineSameAsFirst(node), operand, lane,
+       low, high);
+}
+
+void InstructionSelector::VisitI64x2Neg(Node* node) {
+  IA32OperandGenerator g(this);
+  InstructionOperand operand0 = g.UseUnique(node->InputAt(0));
+  Emit(kIA32I64x2Neg, g.DefineAsRegister(node), operand0);
+}
+
+void InstructionSelector::VisitI64x2ShrS(Node* node) {
+  IA32OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register(),
+                                g.TempSimd128Register()};
+  if (IsSupported(AVX)) {
+    Emit(kIA32I64x2ShrS, g.DefineAsRegister(node),
+         g.UseUniqueRegister(node->InputAt(0)), g.Use(node->InputAt(1)),
+         arraysize(temps), temps);
+  } else {
+    Emit(kIA32I64x2ShrS, g.DefineSameAsFirst(node),
+         g.UseUniqueRegister(node->InputAt(0)), g.Use(node->InputAt(1)),
+         arraysize(temps), temps);
+  }
+}
+
+void InstructionSelector::VisitI64x2Mul(Node* node) {
+  IA32OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register(),
+                                g.TempSimd128Register()};
+  if (IsSupported(AVX)) {
+    Emit(kIA32I64x2Mul, g.DefineAsRegister(node),
+         g.UseUniqueRegister(node->InputAt(0)),
+         g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
+  } else {
+    Emit(kIA32I64x2Mul, g.DefineSameAsFirst(node),
+         g.UseUniqueRegister(node->InputAt(0)),
+         g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
+  }
+}
+
+void InstructionSelector::VisitF32x4Splat(Node* node) {
+  VisitRRSimd(this, node, kAVXF32x4Splat, kSSEF32x4Splat);
+}
+
+void InstructionSelector::VisitF32x4ExtractLane(Node* node) {
+  VisitRRISimd(this, node, kAVXF32x4ExtractLane, kSSEF32x4ExtractLane);
+}
+
+void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
+  VisitRRSimd(this, node, kAVXF32x4UConvertI32x4, kSSEF32x4UConvertI32x4);
+}
+
+void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
+  VisitRRSimd(this, node, kAVXI32x4SConvertF32x4, kSSEI32x4SConvertF32x4);
+}
+
+void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
+  IA32OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  InstructionCode opcode =
+      IsSupported(AVX) ? kAVXI32x4UConvertF32x4 : kSSEI32x4UConvertF32x4;
+  Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
+       arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitI8x16Mul(Node* node) {
+  IA32OperandGenerator g(this);
+  InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
+  InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  if (IsSupported(AVX)) {
+    Emit(kAVXI8x16Mul, g.DefineAsRegister(node), operand0, operand1,
+         arraysize(temps), temps);
+  } else {
+    Emit(kSSEI8x16Mul, g.DefineSameAsFirst(node), operand0, operand1,
+         arraysize(temps), temps);
+  }
+}
+
+void InstructionSelector::VisitS128Zero(Node* node) {
+  IA32OperandGenerator g(this);
+  Emit(kIA32S128Zero, g.DefineAsRegister(node));
+}
+
+void InstructionSelector::VisitS128Select(Node* node) {
+  IA32OperandGenerator g(this);
+  InstructionOperand operand2 = g.UseRegister(node->InputAt(2));
+  if (IsSupported(AVX)) {
+    // AVX supports unaligned memory operands, so Use here is okay.
+    Emit(kAVXS128Select, g.DefineAsRegister(node), g.Use(node->InputAt(0)),
+         g.Use(node->InputAt(1)), operand2);
+  } else {
+    Emit(kSSES128Select, g.DefineSameAsFirst(node),
+         g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
+         operand2);
+  }
+}
+
+void InstructionSelector::VisitS128AndNot(Node* node) {
+  IA32OperandGenerator g(this);
+  // andnps a b does ~a & b, but we want a & !b, so flip the input.
+  Emit(kIA32S128AndNot, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(0)));
+}
+
+#define VISIT_SIMD_SPLAT(Type)                               \
+  void InstructionSelector::Visit##Type##Splat(Node* node) { \
+    Int32Matcher int32_matcher(node->InputAt(0));            \
+    if (int32_matcher.Is(0)) {                               \
+      IA32OperandGenerator g(this);                          \
+      Emit(kIA32S128Zero, g.DefineAsRegister(node));         \
+    } else {                                                 \
+      VisitRO(this, node, kIA32##Type##Splat);               \
+    }                                                        \
+  }
+SIMD_INT_TYPES(VISIT_SIMD_SPLAT)
+#undef VISIT_SIMD_SPLAT
+
+#define SIMD_VISIT_EXTRACT_LANE(Type, Sign)                              \
+  void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \
+    VisitRRISimd(this, node, kIA32##Type##ExtractLane##Sign);            \
+  }
+SIMD_VISIT_EXTRACT_LANE(I32x4, )
+SIMD_VISIT_EXTRACT_LANE(I16x8, U)
+SIMD_VISIT_EXTRACT_LANE(I16x8, S)
+SIMD_VISIT_EXTRACT_LANE(I8x16, U)
+SIMD_VISIT_EXTRACT_LANE(I8x16, S)
+#undef SIMD_VISIT_EXTRACT_LANE
+
+#define VISIT_SIMD_REPLACE_LANE(Type)                                    \
+  void InstructionSelector::Visit##Type##ReplaceLane(Node* node) {       \
+    IA32OperandGenerator g(this);                                        \
+    InstructionOperand operand0 = g.UseRegister(node->InputAt(0));       \
+    InstructionOperand operand1 =                                        \
+        g.UseImmediate(OpParameter<int32_t>(node->op()));                \
+    InstructionOperand operand2 = g.Use(node->InputAt(1));               \
+    if (IsSupported(AVX)) {                                              \
+      Emit(kAVX##Type##ReplaceLane, g.DefineAsRegister(node), operand0,  \
+           operand1, operand2);                                          \
+    } else {                                                             \
+      Emit(kSSE##Type##ReplaceLane, g.DefineSameAsFirst(node), operand0, \
+           operand1, operand2);                                          \
+    }                                                                    \
+  }
+SIMD_INT_TYPES(VISIT_SIMD_REPLACE_LANE)
+VISIT_SIMD_REPLACE_LANE(F32x4)
+#undef VISIT_SIMD_REPLACE_LANE
+#undef SIMD_INT_TYPES
+
+// The difference between this and VISIT_SIMD_REPLACE_LANE is that this forces
+// operand2 to be UseRegister, because the codegen relies on insertps using
+// registers.
+// TODO(v8:9764) Remove this UseRegister requirement
+#define VISIT_SIMD_REPLACE_LANE_USE_REG(Type)                            \
+  void InstructionSelector::Visit##Type##ReplaceLane(Node* node) {       \
+    IA32OperandGenerator g(this);                                        \
+    InstructionOperand operand0 = g.UseRegister(node->InputAt(0));       \
+    InstructionOperand operand1 =                                        \
+        g.UseImmediate(OpParameter<int32_t>(node->op()));                \
+    InstructionOperand operand2 = g.UseUniqueRegister(node->InputAt(1)); \
+    if (IsSupported(AVX)) {                                              \
+      Emit(kAVX##Type##ReplaceLane, g.DefineAsRegister(node), operand0,  \
+           operand1, operand2);                                          \
+    } else {                                                             \
+      Emit(kSSE##Type##ReplaceLane, g.DefineSameAsFirst(node), operand0, \
+           operand1, operand2);                                          \
+    }                                                                    \
+  }
+VISIT_SIMD_REPLACE_LANE_USE_REG(F64x2)
+#undef VISIT_SIMD_REPLACE_LANE_USE_REG
+
+#define VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX(Opcode)        \
+  void InstructionSelector::Visit##Opcode(Node* node) { \
+    VisitRROSimdShift(this, node, kIA32##Opcode);       \
+  }
+SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX)
+#undef VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX
+#undef SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX
+
+// TODO(v8:9198): SSE requires operand0 to be a register as we don't have memory
+// alignment yet. For AVX, memory operands are fine, but can have performance
+// issues if not aligned to 16/32 bytes (based on load size), see SDM Vol 1,
+// chapter 14.9
+#define VISIT_SIMD_UNOP(Opcode)                         \
+  void InstructionSelector::Visit##Opcode(Node* node) { \
+    IA32OperandGenerator g(this);                       \
+    Emit(kIA32##Opcode, g.DefineAsRegister(node),       \
+         g.UseRegister(node->InputAt(0)));              \
+  }
+SIMD_UNOP_LIST(VISIT_SIMD_UNOP)
+#undef VISIT_SIMD_UNOP
+#undef SIMD_UNOP_LIST
+
+// TODO(v8:9198): SSE instructions that read 16 bytes from memory require the
+// operand to be 16-byte aligned. AVX instructions relax this requirement, but
+// might have reduced performance if the memory crosses cache line. But since we
+// have limited xmm registers, this might be okay to alleviate register
+// pressure.
+#define VISIT_SIMD_UNOP_PREFIX(Opcode)                                       \
+  void InstructionSelector::Visit##Opcode(Node* node) {                      \
+    IA32OperandGenerator g(this);                                            \
+    if (IsSupported(AVX)) {                                                  \
+      Emit(kAVX##Opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0))); \
+    } else {                                                                 \
+      Emit(kSSE##Opcode, g.DefineSameAsFirst(node),                          \
+           g.UseRegister(node->InputAt(0)));                                 \
+    }                                                                        \
+  }
+SIMD_UNOP_PREFIX_LIST(VISIT_SIMD_UNOP_PREFIX)
+#undef VISIT_SIMD_UNOP_PREFIX
+#undef SIMD_UNOP_PREFIX_LIST
+
+#define VISIT_SIMD_ANYTRUE(Opcode)                                  \
+  void InstructionSelector::Visit##Opcode(Node* node) {             \
+    IA32OperandGenerator g(this);                                   \
+    InstructionOperand temps[] = {g.TempRegister()};                \
+    Emit(kIA32##Opcode, g.DefineAsRegister(node),                   \
+         g.UseRegister(node->InputAt(0)), arraysize(temps), temps); \
+  }
+SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE)
+#undef VISIT_SIMD_ANYTRUE
+#undef SIMD_ANYTRUE_LIST
+
+#define VISIT_SIMD_ALLTRUE(Opcode)                                            \
+  void InstructionSelector::Visit##Opcode(Node* node) {                       \
+    IA32OperandGenerator g(this);                                             \
+    InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \
+    Emit(kIA32##Opcode, g.DefineAsRegister(node),                             \
+         g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps);     \
+  }
+SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE)
+#undef VISIT_SIMD_ALLTRUE
+#undef SIMD_ALLTRUE_LIST
+
+#define VISIT_SIMD_BINOP(Opcode)                          \
+  void InstructionSelector::Visit##Opcode(Node* node) {   \
+    VisitRROSimd(this, node, kAVX##Opcode, kSSE##Opcode); \
+  }
+SIMD_BINOP_LIST(VISIT_SIMD_BINOP)
+#undef VISIT_SIMD_BINOP
+#undef SIMD_BINOP_LIST
+
+#define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode)            \
+  void InstructionSelector::Visit##Opcode(Node* node) {     \
+    VisitRROSimd(this, node, kIA32##Opcode, kIA32##Opcode); \
+  }
+SIMD_BINOP_UNIFIED_SSE_AVX_LIST(VISIT_SIMD_BINOP_UNIFIED_SSE_AVX)
+#undef VISIT_SIMD_BINOP_UNIFIED_SSE_AVX
+#undef SIMD_BINOP_UNIFIED_SSE_AVX_LIST
+
+// TODO(v8:9198): SSE requires operand1 to be a register as we don't have memory
+// alignment yet. For AVX, memory operands are fine, but can have performance
+// issues if not aligned to 16/32 bytes (based on load size), see SDM Vol 1,
+// chapter 14.9
+void VisitPack(InstructionSelector* selector, Node* node, ArchOpcode avx_opcode,
+               ArchOpcode sse_opcode) {
+  IA32OperandGenerator g(selector);
+  InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
+  InstructionOperand operand1 = g.UseRegister(node->InputAt(1));
+  if (selector->IsSupported(AVX)) {
+    selector->Emit(avx_opcode, g.DefineSameAsFirst(node), operand0, operand1);
+  } else {
+    selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0, operand1);
+  }
+}
+
+void InstructionSelector::VisitI16x8UConvertI32x4(Node* node) {
+  VisitPack(this, node, kAVXI16x8UConvertI32x4, kSSEI16x8UConvertI32x4);
+}
+
+void InstructionSelector::VisitI16x8BitMask(Node* node) {
+  IA32OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  Emit(kIA32I16x8BitMask, g.DefineAsRegister(node),
+       g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) {
+  VisitPack(this, node, kAVXI8x16UConvertI16x8, kSSEI8x16UConvertI16x8);
+}
+
+void InstructionSelector::VisitI8x16Shl(Node* node) {
+  IA32OperandGenerator g(this);
+  if (g.CanBeImmediate(node->InputAt(1))) {
+    InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
+    this->Emit(kIA32I8x16Shl, g.DefineSameAsFirst(node),
+               g.UseRegister(node->InputAt(0)),
+               g.UseImmediate(node->InputAt(1)), arraysize(temps), temps);
+  } else {
+    VisitRROI8x16SimdShift(this, node, kIA32I8x16Shl);
+  }
+}
+
+void InstructionSelector::VisitI8x16ShrS(Node* node) {
+  IA32OperandGenerator g(this);
+  if (g.CanBeImmediate(node->InputAt(1))) {
+    this->Emit(kIA32I8x16ShrS, g.DefineSameAsFirst(node),
+               g.UseRegister(node->InputAt(0)),
+               g.UseImmediate(node->InputAt(1)));
+  } else {
+    VisitRROI8x16SimdShift(this, node, kIA32I8x16ShrS);
+  }
+}
+
+void InstructionSelector::VisitI8x16ShrU(Node* node) {
+  IA32OperandGenerator g(this);
+  if (g.CanBeImmediate(node->InputAt(1))) {
+    InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
+    this->Emit(kIA32I8x16ShrU, g.DefineSameAsFirst(node),
+               g.UseRegister(node->InputAt(0)),
+               g.UseImmediate(node->InputAt(1)), arraysize(temps), temps);
+  } else {
+    VisitRROI8x16SimdShift(this, node, kIA32I8x16ShrU);
+  }
+}
+
+void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
+  UNREACHABLE();
+}
+
+namespace {
+
+// Returns true if shuffle can be decomposed into two 16x4 half shuffles
+// followed by a 16x8 blend.
+// E.g. [3 2 1 0 15 14 13 12].
+bool TryMatch16x8HalfShuffle(uint8_t* shuffle16x8, uint8_t* blend_mask) {
+  *blend_mask = 0;
+  for (int i = 0; i < 8; i++) {
+    if ((shuffle16x8[i] & 0x4) != (i & 0x4)) return false;
+    *blend_mask |= (shuffle16x8[i] > 7 ? 1 : 0) << i;
+  }
+  return true;
+}
+
+struct ShuffleEntry {
+  uint8_t shuffle[kSimd128Size];
+  ArchOpcode opcode;
+  ArchOpcode avx_opcode;
+  bool src0_needs_reg;
+  bool src1_needs_reg;
+};
+
+// Shuffles that map to architecture-specific instruction sequences. These are
+// matched very early, so we shouldn't include shuffles that match better in
+// later tests, like 32x4 and 16x8 shuffles. In general, these patterns should
+// map to either a single instruction, or be finer grained, such as zip/unzip or
+// transpose patterns.
+static const ShuffleEntry arch_shuffles[] = {
+    {{0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23},
+     kIA32S64x2UnpackLow,
+     kIA32S64x2UnpackLow,
+     true,
+     false},
+    {{8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31},
+     kIA32S64x2UnpackHigh,
+     kIA32S64x2UnpackHigh,
+     true,
+     false},
+    {{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23},
+     kIA32S32x4UnpackLow,
+     kIA32S32x4UnpackLow,
+     true,
+     false},
+    {{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31},
+     kIA32S32x4UnpackHigh,
+     kIA32S32x4UnpackHigh,
+     true,
+     false},
+    {{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23},
+     kIA32S16x8UnpackLow,
+     kIA32S16x8UnpackLow,
+     true,
+     false},
+    {{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31},
+     kIA32S16x8UnpackHigh,
+     kIA32S16x8UnpackHigh,
+     true,
+     false},
+    {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
+     kIA32S8x16UnpackLow,
+     kIA32S8x16UnpackLow,
+     true,
+     false},
+    {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31},
+     kIA32S8x16UnpackHigh,
+     kIA32S8x16UnpackHigh,
+     true,
+     false},
+
+    {{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29},
+     kSSES16x8UnzipLow,
+     kAVXS16x8UnzipLow,
+     true,
+     false},
+    {{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31},
+     kSSES16x8UnzipHigh,
+     kAVXS16x8UnzipHigh,
+     true,
+     true},
+    {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30},
+     kSSES8x16UnzipLow,
+     kAVXS8x16UnzipLow,
+     true,
+     true},
+    {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31},
+     kSSES8x16UnzipHigh,
+     kAVXS8x16UnzipHigh,
+     true,
+     true},
+
+    {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30},
+     kSSES8x16TransposeLow,
+     kAVXS8x16TransposeLow,
+     true,
+     true},
+    {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31},
+     kSSES8x16TransposeHigh,
+     kAVXS8x16TransposeHigh,
+     true,
+     true},
+    {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8},
+     kSSES8x8Reverse,
+     kAVXS8x8Reverse,
+     true,
+     true},
+    {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
+     kSSES8x4Reverse,
+     kAVXS8x4Reverse,
+     true,
+     true},
+    {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
+     kSSES8x2Reverse,
+     kAVXS8x2Reverse,
+     true,
+     true}};
+
+bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
+                         size_t num_entries, bool is_swizzle,
+                         const ShuffleEntry** arch_shuffle) {
+  uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
+  for (size_t i = 0; i < num_entries; ++i) {
+    const ShuffleEntry& entry = table[i];
+    int j = 0;
+    for (; j < kSimd128Size; ++j) {
+      if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) {
+        break;
+      }
+    }
+    if (j == kSimd128Size) {
+      *arch_shuffle = &entry;
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace
+
+void InstructionSelector::VisitI8x16Shuffle(Node* node) {
+  uint8_t shuffle[kSimd128Size];
+  bool is_swizzle;
+  CanonicalizeShuffle(node, shuffle, &is_swizzle);
+
+  int imm_count = 0;
+  static const int kMaxImms = 6;
+  uint32_t imms[kMaxImms];
+  int temp_count = 0;
+  static const int kMaxTemps = 2;
+  InstructionOperand temps[kMaxTemps];
+
+  IA32OperandGenerator g(this);
+  bool use_avx = CpuFeatures::IsSupported(AVX);
+  // AVX and swizzles don't generally need DefineSameAsFirst to avoid a move.
+  bool no_same_as_first = use_avx || is_swizzle;
+  // We generally need UseRegister for input0, Use for input1.
+  // TODO(v8:9198): We don't have 16-byte alignment for SIMD operands yet, but
+  // we retain this logic (continue setting these in the various shuffle match
+  // clauses), but ignore it when selecting registers or slots.
+  bool src0_needs_reg = true;
+  bool src1_needs_reg = false;
+  ArchOpcode opcode = kIA32I8x16Shuffle;  // general shuffle is the default
+
+  uint8_t offset;
+  uint8_t shuffle32x4[4];
+  uint8_t shuffle16x8[8];
+  int index;
+  const ShuffleEntry* arch_shuffle;
+  if (wasm::SimdShuffle::TryMatchConcat(shuffle, &offset)) {
+    // Swap inputs from the normal order for (v)palignr.
+    SwapShuffleInputs(node);
+    is_swizzle = false;  // It's simpler to just handle the general case.
+    no_same_as_first = use_avx;  // SSE requires same-as-first.
+    opcode = kIA32S8x16Alignr;
+    // palignr takes a single imm8 offset.
+    imms[imm_count++] = offset;
+  } else if (TryMatchArchShuffle(shuffle, arch_shuffles,
+                                 arraysize(arch_shuffles), is_swizzle,
+                                 &arch_shuffle)) {
+    opcode = use_avx ? arch_shuffle->avx_opcode : arch_shuffle->opcode;
+    src0_needs_reg = !use_avx || arch_shuffle->src0_needs_reg;
+    // SSE can't take advantage of both operands in registers and needs
+    // same-as-first.
+    src1_needs_reg = use_avx && arch_shuffle->src1_needs_reg;
+    no_same_as_first = use_avx;
+  } else if (wasm::SimdShuffle::TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
+    uint8_t shuffle_mask = wasm::SimdShuffle::PackShuffle4(shuffle32x4);
+    if (is_swizzle) {
+      if (wasm::SimdShuffle::TryMatchIdentity(shuffle)) {
+        // Bypass normal shuffle code generation in this case.
+        EmitIdentity(node);
+        return;
+      } else {
+        // pshufd takes a single imm8 shuffle mask.
+        opcode = kIA32S32x4Swizzle;
+        no_same_as_first = true;
+        // TODO(v8:9198): This doesn't strictly require a register, forcing the
+        // swizzles to always use registers until generation of incorrect memory
+        // operands can be fixed.
+        src0_needs_reg = true;
+        imms[imm_count++] = shuffle_mask;
+      }
+    } else {
+      // 2 operand shuffle
+      // A blend is more efficient than a general 32x4 shuffle; try it first.
+      if (wasm::SimdShuffle::TryMatchBlend(shuffle)) {
+        opcode = kIA32S16x8Blend;
+        uint8_t blend_mask = wasm::SimdShuffle::PackBlend4(shuffle32x4);
+        imms[imm_count++] = blend_mask;
+      } else {
+        opcode = kIA32S32x4Shuffle;
+        no_same_as_first = true;
+        // TODO(v8:9198): src0 and src1 is used by pshufd in codegen, which
+        // requires memory to be 16-byte aligned, since we cannot guarantee that
+        // yet, force using a register here.
+        src0_needs_reg = true;
+        src1_needs_reg = true;
+        imms[imm_count++] = shuffle_mask;
+        int8_t blend_mask = wasm::SimdShuffle::PackBlend4(shuffle32x4);
+        imms[imm_count++] = blend_mask;
+      }
+    }
+  } else if (wasm::SimdShuffle::TryMatch16x8Shuffle(shuffle, shuffle16x8)) {
+    uint8_t blend_mask;
+    if (wasm::SimdShuffle::TryMatchBlend(shuffle)) {
+      opcode = kIA32S16x8Blend;
+      blend_mask = wasm::SimdShuffle::PackBlend8(shuffle16x8);
+      imms[imm_count++] = blend_mask;
+    } else if (wasm::SimdShuffle::TryMatchSplat<8>(shuffle, &index)) {
+      opcode = kIA32S16x8Dup;
+      src0_needs_reg = false;
+      imms[imm_count++] = index;
+    } else if (TryMatch16x8HalfShuffle(shuffle16x8, &blend_mask)) {
+      opcode = is_swizzle ? kIA32S16x8HalfShuffle1 : kIA32S16x8HalfShuffle2;
+      // Half-shuffles don't need DefineSameAsFirst or UseRegister(src0).
+      no_same_as_first = true;
+      src0_needs_reg = false;
+      uint8_t mask_lo = wasm::SimdShuffle::PackShuffle4(shuffle16x8);
+      uint8_t mask_hi = wasm::SimdShuffle::PackShuffle4(shuffle16x8 + 4);
+      imms[imm_count++] = mask_lo;
+      imms[imm_count++] = mask_hi;
+      if (!is_swizzle) imms[imm_count++] = blend_mask;
+    }
+  } else if (wasm::SimdShuffle::TryMatchSplat<16>(shuffle, &index)) {
+    opcode = kIA32S8x16Dup;
+    no_same_as_first = use_avx;
+    src0_needs_reg = true;
+    imms[imm_count++] = index;
+  }
+  if (opcode == kIA32I8x16Shuffle) {
+    // Use same-as-first for general swizzle, but not shuffle.
+    no_same_as_first = !is_swizzle;
+    src0_needs_reg = !no_same_as_first;
+    imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle);
+    imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle + 4);
+    imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle + 8);
+    imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle + 12);
+    temps[temp_count++] = g.TempRegister();
+  }
+
+  // Use DefineAsRegister(node) and Use(src0) if we can without forcing an extra
+  // move instruction in the CodeGenerator.
+  Node* input0 = node->InputAt(0);
+  InstructionOperand dst =
+      no_same_as_first ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
+  // TODO(v8:9198): Use src0_needs_reg when we have memory alignment for SIMD.
+  InstructionOperand src0 = g.UseRegister(input0);
+  USE(src0_needs_reg);
+
+  int input_count = 0;
+  InstructionOperand inputs[2 + kMaxImms + kMaxTemps];
+  inputs[input_count++] = src0;
+  if (!is_swizzle) {
+    Node* input1 = node->InputAt(1);
+    // TODO(v8:9198): Use src1_needs_reg when we have memory alignment for SIMD.
+    inputs[input_count++] = g.UseRegister(input1);
+    USE(src1_needs_reg);
+  }
+  for (int i = 0; i < imm_count; ++i) {
+    inputs[input_count++] = g.UseImmediate(imms[i]);
+  }
+  Emit(opcode, 1, &dst, input_count, inputs, temp_count, temps);
+}
+
+void InstructionSelector::VisitI8x16Swizzle(Node* node) {
+  IA32OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  Emit(kIA32I8x16Swizzle, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(1)),
+       arraysize(temps), temps);
+}
+
+namespace {
+void VisitPminOrPmax(InstructionSelector* selector, Node* node,
+                     ArchOpcode opcode) {
+  // Due to the way minps/minpd work, we want the dst to be same as the second
+  // input: b = pmin(a, b) directly maps to minps b a.
+  IA32OperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineSameAsFirst(node),
+                 g.UseRegister(node->InputAt(1)),
+                 g.UseRegister(node->InputAt(0)));
+}
+}  // namespace
+
+void InstructionSelector::VisitF32x4Pmin(Node* node) {
+  VisitPminOrPmax(this, node, kIA32F32x4Pmin);
+}
+
+void InstructionSelector::VisitF32x4Pmax(Node* node) {
+  VisitPminOrPmax(this, node, kIA32F32x4Pmax);
+}
+
+void InstructionSelector::VisitF64x2Pmin(Node* node) {
+  VisitPminOrPmax(this, node, kIA32F64x2Pmin);
+}
+
+void InstructionSelector::VisitF64x2Pmax(Node* node) {
+  VisitPminOrPmax(this, node, kIA32F64x2Pmax);
+}
+
+// static
+MachineOperatorBuilder::Flags
+InstructionSelector::SupportedMachineOperatorFlags() {
+  MachineOperatorBuilder::Flags flags =
+      MachineOperatorBuilder::kWord32ShiftIsSafe |
+      MachineOperatorBuilder::kWord32Ctz | MachineOperatorBuilder::kWord32Rol;
+  if (CpuFeatures::IsSupported(POPCNT)) {
+    flags |= MachineOperatorBuilder::kWord32Popcnt;
+  }
+  if (CpuFeatures::IsSupported(SSE4_1)) {
+    flags |= MachineOperatorBuilder::kFloat32RoundDown |
+             MachineOperatorBuilder::kFloat64RoundDown |
+             MachineOperatorBuilder::kFloat32RoundUp |
+             MachineOperatorBuilder::kFloat64RoundUp |
+             MachineOperatorBuilder::kFloat32RoundTruncate |
+             MachineOperatorBuilder::kFloat64RoundTruncate |
+             MachineOperatorBuilder::kFloat32RoundTiesEven |
+             MachineOperatorBuilder::kFloat64RoundTiesEven;
+  }
+  return flags;
+}
+
+// static
+MachineOperatorBuilder::AlignmentRequirements
+InstructionSelector::AlignmentRequirements() {
+  return MachineOperatorBuilder::AlignmentRequirements::
+      FullUnalignedAccessSupport();
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/instruction-codes.h b/src/compiler/backend/instruction-codes.h
new file mode 100644
index 0000000..f9e68ce
--- /dev/null
+++ b/src/compiler/backend/instruction-codes.h
@@ -0,0 +1,288 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_INSTRUCTION_CODES_H_
+#define V8_COMPILER_BACKEND_INSTRUCTION_CODES_H_
+
+#include <iosfwd>
+
+#if V8_TARGET_ARCH_ARM
+#include "src/compiler/backend/arm/instruction-codes-arm.h"
+#elif V8_TARGET_ARCH_ARM64
+#include "src/compiler/backend/arm64/instruction-codes-arm64.h"
+#elif V8_TARGET_ARCH_IA32
+#include "src/compiler/backend/ia32/instruction-codes-ia32.h"
+#elif V8_TARGET_ARCH_MIPS
+#include "src/compiler/backend/mips/instruction-codes-mips.h"
+#elif V8_TARGET_ARCH_MIPS64
+#include "src/compiler/backend/mips64/instruction-codes-mips64.h"
+#elif V8_TARGET_ARCH_X64
+#include "src/compiler/backend/x64/instruction-codes-x64.h"
+#elif V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_PPC64
+#include "src/compiler/backend/ppc/instruction-codes-ppc.h"
+#elif V8_TARGET_ARCH_S390
+#include "src/compiler/backend/s390/instruction-codes-s390.h"
+#else
+#define TARGET_ARCH_OPCODE_LIST(V)
+#define TARGET_ADDRESSING_MODE_LIST(V)
+#endif
+#include "src/base/bit-field.h"
+#include "src/compiler/write-barrier-kind.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+// Modes for ArchStoreWithWriteBarrier below.
+enum class RecordWriteMode {
+  kValueIsMap,
+  kValueIsPointer,
+  kValueIsEphemeronKey,
+  kValueIsAny,
+};
+
+inline RecordWriteMode WriteBarrierKindToRecordWriteMode(
+    WriteBarrierKind write_barrier_kind) {
+  switch (write_barrier_kind) {
+    case kMapWriteBarrier:
+      return RecordWriteMode::kValueIsMap;
+    case kPointerWriteBarrier:
+      return RecordWriteMode::kValueIsPointer;
+    case kEphemeronKeyWriteBarrier:
+      return RecordWriteMode::kValueIsEphemeronKey;
+    case kFullWriteBarrier:
+      return RecordWriteMode::kValueIsAny;
+    case kNoWriteBarrier:
+    // Should not be passed as argument.
+    default:
+      break;
+  }
+  UNREACHABLE();
+}
+
+// Target-specific opcodes that specify which assembly sequence to emit.
+// Most opcodes specify a single instruction.
+#define COMMON_ARCH_OPCODE_LIST(V)                                         \
+  /* Tail call opcodes are grouped together to make IsTailCall fast */     \
+  /* and Arch call opcodes are grouped together to make */                 \
+  /* IsCallWithDescriptorFlags fast */                                     \
+  V(ArchTailCallCodeObjectFromJSFunction)                                  \
+  V(ArchTailCallCodeObject)                                                \
+  V(ArchTailCallAddress)                                                   \
+  V(ArchTailCallWasm)                                                      \
+  /* Update IsTailCall if further TailCall opcodes are added */            \
+                                                                           \
+  V(ArchCallCodeObject)                                                    \
+  V(ArchCallJSFunction)                                                    \
+  V(ArchCallWasmFunction)                                                  \
+  V(ArchCallBuiltinPointer)                                                \
+  /* Update IsCallWithDescriptorFlags if further Call opcodes are added */ \
+                                                                           \
+  V(ArchPrepareCallCFunction)                                              \
+  V(ArchSaveCallerRegisters)                                               \
+  V(ArchRestoreCallerRegisters)                                            \
+  V(ArchCallCFunction)                                                     \
+  V(ArchPrepareTailCall)                                                   \
+  V(ArchJmp)                                                               \
+  V(ArchBinarySearchSwitch)                                                \
+  V(ArchTableSwitch)                                                       \
+  V(ArchNop)                                                               \
+  V(ArchAbortCSAAssert)                                                    \
+  V(ArchDebugBreak)                                                        \
+  V(ArchComment)                                                           \
+  V(ArchThrowTerminator)                                                   \
+  V(ArchDeoptimize)                                                        \
+  V(ArchRet)                                                               \
+  V(ArchFramePointer)                                                      \
+  V(ArchParentFramePointer)                                                \
+  V(ArchTruncateDoubleToI)                                                 \
+  V(ArchStoreWithWriteBarrier)                                             \
+  V(ArchStackSlot)                                                         \
+  V(ArchWordPoisonOnSpeculation)                                           \
+  V(ArchStackPointerGreaterThan)                                           \
+  V(ArchStackCheckOffset)                                                  \
+  V(Word32AtomicLoadInt8)                                                  \
+  V(Word32AtomicLoadUint8)                                                 \
+  V(Word32AtomicLoadInt16)                                                 \
+  V(Word32AtomicLoadUint16)                                                \
+  V(Word32AtomicLoadWord32)                                                \
+  V(Word32AtomicStoreWord8)                                                \
+  V(Word32AtomicStoreWord16)                                               \
+  V(Word32AtomicStoreWord32)                                               \
+  V(Word32AtomicExchangeInt8)                                              \
+  V(Word32AtomicExchangeUint8)                                             \
+  V(Word32AtomicExchangeInt16)                                             \
+  V(Word32AtomicExchangeUint16)                                            \
+  V(Word32AtomicExchangeWord32)                                            \
+  V(Word32AtomicCompareExchangeInt8)                                       \
+  V(Word32AtomicCompareExchangeUint8)                                      \
+  V(Word32AtomicCompareExchangeInt16)                                      \
+  V(Word32AtomicCompareExchangeUint16)                                     \
+  V(Word32AtomicCompareExchangeWord32)                                     \
+  V(Word32AtomicAddInt8)                                                   \
+  V(Word32AtomicAddUint8)                                                  \
+  V(Word32AtomicAddInt16)                                                  \
+  V(Word32AtomicAddUint16)                                                 \
+  V(Word32AtomicAddWord32)                                                 \
+  V(Word32AtomicSubInt8)                                                   \
+  V(Word32AtomicSubUint8)                                                  \
+  V(Word32AtomicSubInt16)                                                  \
+  V(Word32AtomicSubUint16)                                                 \
+  V(Word32AtomicSubWord32)                                                 \
+  V(Word32AtomicAndInt8)                                                   \
+  V(Word32AtomicAndUint8)                                                  \
+  V(Word32AtomicAndInt16)                                                  \
+  V(Word32AtomicAndUint16)                                                 \
+  V(Word32AtomicAndWord32)                                                 \
+  V(Word32AtomicOrInt8)                                                    \
+  V(Word32AtomicOrUint8)                                                   \
+  V(Word32AtomicOrInt16)                                                   \
+  V(Word32AtomicOrUint16)                                                  \
+  V(Word32AtomicOrWord32)                                                  \
+  V(Word32AtomicXorInt8)                                                   \
+  V(Word32AtomicXorUint8)                                                  \
+  V(Word32AtomicXorInt16)                                                  \
+  V(Word32AtomicXorUint16)                                                 \
+  V(Word32AtomicXorWord32)                                                 \
+  V(Ieee754Float64Acos)                                                    \
+  V(Ieee754Float64Acosh)                                                   \
+  V(Ieee754Float64Asin)                                                    \
+  V(Ieee754Float64Asinh)                                                   \
+  V(Ieee754Float64Atan)                                                    \
+  V(Ieee754Float64Atanh)                                                   \
+  V(Ieee754Float64Atan2)                                                   \
+  V(Ieee754Float64Cbrt)                                                    \
+  V(Ieee754Float64Cos)                                                     \
+  V(Ieee754Float64Cosh)                                                    \
+  V(Ieee754Float64Exp)                                                     \
+  V(Ieee754Float64Expm1)                                                   \
+  V(Ieee754Float64Log)                                                     \
+  V(Ieee754Float64Log1p)                                                   \
+  V(Ieee754Float64Log10)                                                   \
+  V(Ieee754Float64Log2)                                                    \
+  V(Ieee754Float64Pow)                                                     \
+  V(Ieee754Float64Sin)                                                     \
+  V(Ieee754Float64Sinh)                                                    \
+  V(Ieee754Float64Tan)                                                     \
+  V(Ieee754Float64Tanh)
+
+#define ARCH_OPCODE_LIST(V)  \
+  COMMON_ARCH_OPCODE_LIST(V) \
+  TARGET_ARCH_OPCODE_LIST(V)
+
+enum ArchOpcode {
+#define DECLARE_ARCH_OPCODE(Name) k##Name,
+  ARCH_OPCODE_LIST(DECLARE_ARCH_OPCODE)
+#undef DECLARE_ARCH_OPCODE
+#define COUNT_ARCH_OPCODE(Name) +1
+      kLastArchOpcode = -1 ARCH_OPCODE_LIST(COUNT_ARCH_OPCODE)
+#undef COUNT_ARCH_OPCODE
+};
+
+V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream& os,
+                                           const ArchOpcode& ao);
+
+// Addressing modes represent the "shape" of inputs to an instruction.
+// Many instructions support multiple addressing modes. Addressing modes
+// are encoded into the InstructionCode of the instruction and tell the
+// code generator after register allocation which assembler method to call.
+#define ADDRESSING_MODE_LIST(V) \
+  V(None)                       \
+  TARGET_ADDRESSING_MODE_LIST(V)
+
+enum AddressingMode {
+#define DECLARE_ADDRESSING_MODE(Name) kMode_##Name,
+  ADDRESSING_MODE_LIST(DECLARE_ADDRESSING_MODE)
+#undef DECLARE_ADDRESSING_MODE
+#define COUNT_ADDRESSING_MODE(Name) +1
+      kLastAddressingMode = -1 ADDRESSING_MODE_LIST(COUNT_ADDRESSING_MODE)
+#undef COUNT_ADDRESSING_MODE
+};
+
+V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream& os,
+                                           const AddressingMode& am);
+
+// The mode of the flags continuation (see below).
+enum FlagsMode {
+  kFlags_none = 0,
+  kFlags_branch = 1,
+  kFlags_branch_and_poison = 2,
+  kFlags_deoptimize = 3,
+  kFlags_deoptimize_and_poison = 4,
+  kFlags_set = 5,
+  kFlags_trap = 6
+};
+
+V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream& os,
+                                           const FlagsMode& fm);
+
+// The condition of flags continuation (see below).
+enum FlagsCondition {
+  kEqual,
+  kNotEqual,
+  kSignedLessThan,
+  kSignedGreaterThanOrEqual,
+  kSignedLessThanOrEqual,
+  kSignedGreaterThan,
+  kUnsignedLessThan,
+  kUnsignedGreaterThanOrEqual,
+  kUnsignedLessThanOrEqual,
+  kUnsignedGreaterThan,
+  kFloatLessThanOrUnordered,
+  kFloatGreaterThanOrEqual,
+  kFloatLessThanOrEqual,
+  kFloatGreaterThanOrUnordered,
+  kFloatLessThan,
+  kFloatGreaterThanOrEqualOrUnordered,
+  kFloatLessThanOrEqualOrUnordered,
+  kFloatGreaterThan,
+  kUnorderedEqual,
+  kUnorderedNotEqual,
+  kOverflow,
+  kNotOverflow,
+  kPositiveOrZero,
+  kNegative
+};
+
+static constexpr FlagsCondition kStackPointerGreaterThanCondition =
+    kUnsignedGreaterThan;
+
+inline FlagsCondition NegateFlagsCondition(FlagsCondition condition) {
+  return static_cast<FlagsCondition>(condition ^ 1);
+}
+
+FlagsCondition CommuteFlagsCondition(FlagsCondition condition);
+
+V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream& os,
+                                           const FlagsCondition& fc);
+
+enum MemoryAccessMode {
+  kMemoryAccessDirect = 0,
+  kMemoryAccessProtected = 1,
+  kMemoryAccessPoisoned = 2
+};
+
+// The InstructionCode is an opaque, target-specific integer that encodes
+// what code to emit for an instruction in the code generator. It is not
+// interesting to the register allocator, as the inputs and flags on the
+// instructions specify everything of interest.
+using InstructionCode = uint32_t;
+
+// Helpers for encoding / decoding InstructionCode into the fields needed
+// for code generation. We encode the instruction, addressing mode, and flags
+// continuation into a single InstructionCode which is stored as part of
+// the instruction.
+using ArchOpcodeField = base::BitField<ArchOpcode, 0, 9>;
+static_assert(ArchOpcodeField::is_valid(kLastArchOpcode),
+              "All opcodes must fit in the 9-bit ArchOpcodeField.");
+using AddressingModeField = base::BitField<AddressingMode, 9, 5>;
+using FlagsModeField = base::BitField<FlagsMode, 14, 3>;
+using FlagsConditionField = base::BitField<FlagsCondition, 17, 5>;
+using MiscField = base::BitField<int, 22, 10>;
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_INSTRUCTION_CODES_H_
diff --git a/src/compiler/backend/instruction-scheduler.cc b/src/compiler/backend/instruction-scheduler.cc
new file mode 100644
index 0000000..2819505
--- /dev/null
+++ b/src/compiler/backend/instruction-scheduler.cc
@@ -0,0 +1,411 @@
+// Copyright 2015 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/instruction-scheduler.h"
+
+#include "src/base/iterator.h"
+#include "src/base/optional.h"
+#include "src/base/utils/random-number-generator.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+void InstructionScheduler::SchedulingQueueBase::AddNode(
+    ScheduleGraphNode* node) {
+  // We keep the ready list sorted by total latency so that we can quickly find
+  // the next best candidate to schedule.
+  auto it = nodes_.begin();
+  while ((it != nodes_.end()) &&
+         ((*it)->total_latency() >= node->total_latency())) {
+    ++it;
+  }
+  nodes_.insert(it, node);
+}
+
+InstructionScheduler::ScheduleGraphNode*
+InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
+  DCHECK(!IsEmpty());
+  auto candidate = nodes_.end();
+  for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
+    // We only consider instructions that have all their operands ready.
+    if (cycle >= (*iterator)->start_cycle()) {
+      candidate = iterator;
+      break;
+    }
+  }
+
+  if (candidate != nodes_.end()) {
+    ScheduleGraphNode* result = *candidate;
+    nodes_.erase(candidate);
+    return result;
+  }
+
+  return nullptr;
+}
+
+InstructionScheduler::ScheduleGraphNode*
+InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
+  DCHECK(!IsEmpty());
+  // Choose a random element from the ready list.
+  auto candidate = nodes_.begin();
+  std::advance(candidate, random_number_generator()->NextInt(
+                              static_cast<int>(nodes_.size())));
+  ScheduleGraphNode* result = *candidate;
+  nodes_.erase(candidate);
+  return result;
+}
+
+InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(Zone* zone,
+                                                           Instruction* instr)
+    : instr_(instr),
+      successors_(zone),
+      unscheduled_predecessors_count_(0),
+      latency_(GetInstructionLatency(instr)),
+      total_latency_(-1),
+      start_cycle_(-1) {}
+
+void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
+    ScheduleGraphNode* node) {
+  successors_.push_back(node);
+  node->unscheduled_predecessors_count_++;
+}
+
+InstructionScheduler::InstructionScheduler(Zone* zone,
+                                           InstructionSequence* sequence)
+    : zone_(zone),
+      sequence_(sequence),
+      graph_(zone),
+      last_side_effect_instr_(nullptr),
+      pending_loads_(zone),
+      last_live_in_reg_marker_(nullptr),
+      last_deopt_or_trap_(nullptr),
+      operands_map_(zone) {
+  if (FLAG_turbo_stress_instruction_scheduling) {
+    random_number_generator_ =
+        base::Optional<base::RandomNumberGenerator>(FLAG_random_seed);
+  }
+}
+
+void InstructionScheduler::StartBlock(RpoNumber rpo) {
+  DCHECK(graph_.empty());
+  DCHECK_NULL(last_side_effect_instr_);
+  DCHECK(pending_loads_.empty());
+  DCHECK_NULL(last_live_in_reg_marker_);
+  DCHECK_NULL(last_deopt_or_trap_);
+  DCHECK(operands_map_.empty());
+  sequence()->StartBlock(rpo);
+}
+
+void InstructionScheduler::EndBlock(RpoNumber rpo) {
+  if (FLAG_turbo_stress_instruction_scheduling) {
+    Schedule<StressSchedulerQueue>();
+  } else {
+    Schedule<CriticalPathFirstQueue>();
+  }
+  sequence()->EndBlock(rpo);
+}
+
+void InstructionScheduler::AddTerminator(Instruction* instr) {
+  ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr);
+  // Make sure that basic block terminators are not moved by adding them
+  // as successor of every instruction.
+  for (ScheduleGraphNode* node : graph_) {
+    node->AddSuccessor(new_node);
+  }
+  graph_.push_back(new_node);
+}
+
+void InstructionScheduler::AddInstruction(Instruction* instr) {
+  if (IsBarrier(instr)) {
+    if (FLAG_turbo_stress_instruction_scheduling) {
+      Schedule<StressSchedulerQueue>();
+    } else {
+      Schedule<CriticalPathFirstQueue>();
+    }
+    sequence()->AddInstruction(instr);
+    return;
+  }
+
+  ScheduleGraphNode* new_node = zone()->New<ScheduleGraphNode>(zone(), instr);
+
+  // We should not have branches in the middle of a block.
+  DCHECK_NE(instr->flags_mode(), kFlags_branch);
+  DCHECK_NE(instr->flags_mode(), kFlags_branch_and_poison);
+
+  if (IsFixedRegisterParameter(instr)) {
+    if (last_live_in_reg_marker_ != nullptr) {
+      last_live_in_reg_marker_->AddSuccessor(new_node);
+    }
+    last_live_in_reg_marker_ = new_node;
+  } else {
+    if (last_live_in_reg_marker_ != nullptr) {
+      last_live_in_reg_marker_->AddSuccessor(new_node);
+    }
+
+    // Make sure that instructions are not scheduled before the last
+    // deoptimization or trap point when they depend on it.
+    if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) {
+      last_deopt_or_trap_->AddSuccessor(new_node);
+    }
+
+    // Instructions with side effects and memory operations can't be
+    // reordered with respect to each other.
+    if (HasSideEffect(instr)) {
+      if (last_side_effect_instr_ != nullptr) {
+        last_side_effect_instr_->AddSuccessor(new_node);
+      }
+      for (ScheduleGraphNode* load : pending_loads_) {
+        load->AddSuccessor(new_node);
+      }
+      pending_loads_.clear();
+      last_side_effect_instr_ = new_node;
+    } else if (IsLoadOperation(instr)) {
+      // Load operations can't be reordered with side effects instructions but
+      // independent loads can be reordered with respect to each other.
+      if (last_side_effect_instr_ != nullptr) {
+        last_side_effect_instr_->AddSuccessor(new_node);
+      }
+      pending_loads_.push_back(new_node);
+    } else if (instr->IsDeoptimizeCall() || instr->IsTrap()) {
+      // Ensure that deopts or traps are not reordered with respect to
+      // side-effect instructions.
+      if (last_side_effect_instr_ != nullptr) {
+        last_side_effect_instr_->AddSuccessor(new_node);
+      }
+      last_deopt_or_trap_ = new_node;
+    }
+
+    // Look for operand dependencies.
+    for (size_t i = 0; i < instr->InputCount(); ++i) {
+      const InstructionOperand* input = instr->InputAt(i);
+      if (input->IsUnallocated()) {
+        int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
+        auto it = operands_map_.find(vreg);
+        if (it != operands_map_.end()) {
+          it->second->AddSuccessor(new_node);
+        }
+      }
+    }
+
+    // Record the virtual registers defined by this instruction.
+    for (size_t i = 0; i < instr->OutputCount(); ++i) {
+      const InstructionOperand* output = instr->OutputAt(i);
+      if (output->IsUnallocated()) {
+        operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
+            new_node;
+      } else if (output->IsConstant()) {
+        operands_map_[ConstantOperand::cast(output)->virtual_register()] =
+            new_node;
+      }
+    }
+  }
+
+  graph_.push_back(new_node);
+}
+
+template <typename QueueType>
+void InstructionScheduler::Schedule() {
+  QueueType ready_list(this);
+
+  // Compute total latencies so that we can schedule the critical path first.
+  ComputeTotalLatencies();
+
+  // Add nodes which don't have dependencies to the ready list.
+  for (ScheduleGraphNode* node : graph_) {
+    if (!node->HasUnscheduledPredecessor()) {
+      ready_list.AddNode(node);
+    }
+  }
+
+  // Go through the ready list and schedule the instructions.
+  int cycle = 0;
+  while (!ready_list.IsEmpty()) {
+    ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
+
+    if (candidate != nullptr) {
+      sequence()->AddInstruction(candidate->instruction());
+
+      for (ScheduleGraphNode* successor : candidate->successors()) {
+        successor->DropUnscheduledPredecessor();
+        successor->set_start_cycle(
+            std::max(successor->start_cycle(), cycle + candidate->latency()));
+
+        if (!successor->HasUnscheduledPredecessor()) {
+          ready_list.AddNode(successor);
+        }
+      }
+    }
+
+    cycle++;
+  }
+
+  // Reset own state.
+  graph_.clear();
+  operands_map_.clear();
+  pending_loads_.clear();
+  last_deopt_or_trap_ = nullptr;
+  last_live_in_reg_marker_ = nullptr;
+  last_side_effect_instr_ = nullptr;
+}
+
+int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
+  switch (instr->arch_opcode()) {
+    case kArchNop:
+    case kArchStackCheckOffset:
+    case kArchFramePointer:
+    case kArchParentFramePointer:
+    case kArchStackSlot:  // Despite its name this opcode will produce a
+                          // reference to a frame slot, so it is not affected
+                          // by the arm64 dual stack issues mentioned below.
+    case kArchComment:
+    case kArchDeoptimize:
+    case kArchJmp:
+    case kArchBinarySearchSwitch:
+    case kArchRet:
+    case kArchTableSwitch:
+    case kArchThrowTerminator:
+      return kNoOpcodeFlags;
+
+    case kArchTruncateDoubleToI:
+    case kIeee754Float64Acos:
+    case kIeee754Float64Acosh:
+    case kIeee754Float64Asin:
+    case kIeee754Float64Asinh:
+    case kIeee754Float64Atan:
+    case kIeee754Float64Atanh:
+    case kIeee754Float64Atan2:
+    case kIeee754Float64Cbrt:
+    case kIeee754Float64Cos:
+    case kIeee754Float64Cosh:
+    case kIeee754Float64Exp:
+    case kIeee754Float64Expm1:
+    case kIeee754Float64Log:
+    case kIeee754Float64Log1p:
+    case kIeee754Float64Log10:
+    case kIeee754Float64Log2:
+    case kIeee754Float64Pow:
+    case kIeee754Float64Sin:
+    case kIeee754Float64Sinh:
+    case kIeee754Float64Tan:
+    case kIeee754Float64Tanh:
+      return kNoOpcodeFlags;
+
+    case kArchStackPointerGreaterThan:
+      // The ArchStackPointerGreaterThan instruction loads the current stack
+      // pointer value and must not be reordered with instructions with side
+      // effects.
+      return kIsLoadOperation;
+
+    case kArchWordPoisonOnSpeculation:
+      // While poisoning operations have no side effect, they must not be
+      // reordered relative to branches.
+      return kHasSideEffect;
+
+    case kArchPrepareCallCFunction:
+    case kArchPrepareTailCall:
+    case kArchTailCallCodeObjectFromJSFunction:
+    case kArchTailCallCodeObject:
+    case kArchTailCallAddress:
+    case kArchTailCallWasm:
+    case kArchAbortCSAAssert:
+      return kHasSideEffect;
+
+    case kArchDebugBreak:
+      return kIsBarrier;
+
+    case kArchSaveCallerRegisters:
+    case kArchRestoreCallerRegisters:
+      return kIsBarrier;
+
+    case kArchCallCFunction:
+    case kArchCallCodeObject:
+    case kArchCallJSFunction:
+    case kArchCallWasmFunction:
+    case kArchCallBuiltinPointer:
+      // Calls can cause GC and GC may relocate objects. If a pure instruction
+      // operates on a tagged pointer that was cast to a word then it may be
+      // incorrect to move the instruction across the call. Hence we mark all
+      // (non-tail-)calls as barriers.
+      return kIsBarrier;
+
+    case kArchStoreWithWriteBarrier:
+      return kHasSideEffect;
+
+    case kWord32AtomicLoadInt8:
+    case kWord32AtomicLoadUint8:
+    case kWord32AtomicLoadInt16:
+    case kWord32AtomicLoadUint16:
+    case kWord32AtomicLoadWord32:
+      return kIsLoadOperation;
+
+    case kWord32AtomicStoreWord8:
+    case kWord32AtomicStoreWord16:
+    case kWord32AtomicStoreWord32:
+      return kHasSideEffect;
+
+    case kWord32AtomicExchangeInt8:
+    case kWord32AtomicExchangeUint8:
+    case kWord32AtomicExchangeInt16:
+    case kWord32AtomicExchangeUint16:
+    case kWord32AtomicExchangeWord32:
+    case kWord32AtomicCompareExchangeInt8:
+    case kWord32AtomicCompareExchangeUint8:
+    case kWord32AtomicCompareExchangeInt16:
+    case kWord32AtomicCompareExchangeUint16:
+    case kWord32AtomicCompareExchangeWord32:
+    case kWord32AtomicAddInt8:
+    case kWord32AtomicAddUint8:
+    case kWord32AtomicAddInt16:
+    case kWord32AtomicAddUint16:
+    case kWord32AtomicAddWord32:
+    case kWord32AtomicSubInt8:
+    case kWord32AtomicSubUint8:
+    case kWord32AtomicSubInt16:
+    case kWord32AtomicSubUint16:
+    case kWord32AtomicSubWord32:
+    case kWord32AtomicAndInt8:
+    case kWord32AtomicAndUint8:
+    case kWord32AtomicAndInt16:
+    case kWord32AtomicAndUint16:
+    case kWord32AtomicAndWord32:
+    case kWord32AtomicOrInt8:
+    case kWord32AtomicOrUint8:
+    case kWord32AtomicOrInt16:
+    case kWord32AtomicOrUint16:
+    case kWord32AtomicOrWord32:
+    case kWord32AtomicXorInt8:
+    case kWord32AtomicXorUint8:
+    case kWord32AtomicXorInt16:
+    case kWord32AtomicXorUint16:
+    case kWord32AtomicXorWord32:
+      return kHasSideEffect;
+
+#define CASE(Name) case k##Name:
+      TARGET_ARCH_OPCODE_LIST(CASE)
+#undef CASE
+      return GetTargetInstructionFlags(instr);
+  }
+
+  UNREACHABLE();
+}
+
+void InstructionScheduler::ComputeTotalLatencies() {
+  for (ScheduleGraphNode* node : base::Reversed(graph_)) {
+    int max_latency = 0;
+
+    for (ScheduleGraphNode* successor : node->successors()) {
+      DCHECK_NE(-1, successor->total_latency());
+      if (successor->total_latency() > max_latency) {
+        max_latency = successor->total_latency();
+      }
+    }
+
+    node->set_total_latency(max_latency + node->latency());
+  }
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/instruction-scheduler.h b/src/compiler/backend/instruction-scheduler.h
new file mode 100644
index 0000000..a7c1cc5
--- /dev/null
+++ b/src/compiler/backend/instruction-scheduler.h
@@ -0,0 +1,251 @@
+// Copyright 2015 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_INSTRUCTION_SCHEDULER_H_
+#define V8_COMPILER_BACKEND_INSTRUCTION_SCHEDULER_H_
+
+#include "src/base/optional.h"
+#include "src/compiler/backend/instruction.h"
+#include "src/zone/zone-containers.h"
+
+namespace v8 {
+
+namespace base {
+class RandomNumberGenerator;
+}  // namespace base
+
+namespace internal {
+namespace compiler {
+
+// A set of flags describing properties of the instructions so that the
+// scheduler is aware of dependencies between instructions.
+enum ArchOpcodeFlags {
+  kNoOpcodeFlags = 0,
+  kHasSideEffect = 1,    // The instruction has some side effects (memory
+                         // store, function call...)
+  kIsLoadOperation = 2,  // The instruction is a memory load.
+  kMayNeedDeoptOrTrapCheck = 4,  // The instruction may be associated with a
+                                 // deopt or trap check which must be run before
+                                 // instruction e.g. div on Intel platform which
+                                 // will raise an exception when the divisor is
+                                 // zero.
+  kIsBarrier = 8,  // The instruction can cause GC or it reads/writes registers
+                   // that are not explicitly given. Nothing can be reordered
+                   // across such an instruction.
+};
+
+class InstructionScheduler final : public ZoneObject {
+ public:
+  V8_EXPORT_PRIVATE InstructionScheduler(Zone* zone,
+                                         InstructionSequence* sequence);
+
+  V8_EXPORT_PRIVATE void StartBlock(RpoNumber rpo);
+  V8_EXPORT_PRIVATE void EndBlock(RpoNumber rpo);
+
+  V8_EXPORT_PRIVATE void AddInstruction(Instruction* instr);
+  V8_EXPORT_PRIVATE void AddTerminator(Instruction* instr);
+
+  static bool SchedulerSupported();
+
+ private:
+  // A scheduling graph node.
+  // Represent an instruction and their dependencies.
+  class ScheduleGraphNode : public ZoneObject {
+   public:
+    ScheduleGraphNode(Zone* zone, Instruction* instr);
+
+    // Mark the instruction represented by 'node' as a dependency of this one.
+    // The current instruction will be registered as an unscheduled predecessor
+    // of 'node' (i.e. it must be scheduled before 'node').
+    void AddSuccessor(ScheduleGraphNode* node);
+
+    // Check if all the predecessors of this instruction have been scheduled.
+    bool HasUnscheduledPredecessor() {
+      return unscheduled_predecessors_count_ != 0;
+    }
+
+    // Record that we have scheduled one of the predecessors of this node.
+    void DropUnscheduledPredecessor() {
+      DCHECK_LT(0, unscheduled_predecessors_count_);
+      unscheduled_predecessors_count_--;
+    }
+
+    Instruction* instruction() { return instr_; }
+    ZoneDeque<ScheduleGraphNode*>& successors() { return successors_; }
+    int latency() const { return latency_; }
+
+    int total_latency() const { return total_latency_; }
+    void set_total_latency(int latency) { total_latency_ = latency; }
+
+    int start_cycle() const { return start_cycle_; }
+    void set_start_cycle(int start_cycle) { start_cycle_ = start_cycle; }
+
+   private:
+    Instruction* instr_;
+    ZoneDeque<ScheduleGraphNode*> successors_;
+
+    // Number of unscheduled predecessors for this node.
+    int unscheduled_predecessors_count_;
+
+    // Estimate of the instruction latency (the number of cycles it takes for
+    // instruction to complete).
+    int latency_;
+
+    // The sum of all the latencies on the path from this node to the end of
+    // the graph (i.e. a node with no successor).
+    int total_latency_;
+
+    // The scheduler keeps a nominal cycle count to keep track of when the
+    // result of an instruction is available. This field is updated by the
+    // scheduler to indicate when the value of all the operands of this
+    // instruction will be available.
+    int start_cycle_;
+  };
+
+  // Keep track of all nodes ready to be scheduled (i.e. all their dependencies
+  // have been scheduled. Note that this class is inteded to be extended by
+  // concrete implementation of the scheduling queue which define the policy
+  // to pop node from the queue.
+  class SchedulingQueueBase {
+   public:
+    explicit SchedulingQueueBase(InstructionScheduler* scheduler)
+        : scheduler_(scheduler), nodes_(scheduler->zone()) {}
+
+    void AddNode(ScheduleGraphNode* node);
+
+    bool IsEmpty() const { return nodes_.empty(); }
+
+   protected:
+    InstructionScheduler* scheduler_;
+    ZoneLinkedList<ScheduleGraphNode*> nodes_;
+  };
+
+  // A scheduling queue which prioritize nodes on the critical path (we look
+  // for the instruction with the highest latency on the path to reach the end
+  // of the graph).
+  class CriticalPathFirstQueue : public SchedulingQueueBase {
+   public:
+    explicit CriticalPathFirstQueue(InstructionScheduler* scheduler)
+        : SchedulingQueueBase(scheduler) {}
+
+    // Look for the best candidate to schedule, remove it from the queue and
+    // return it.
+    ScheduleGraphNode* PopBestCandidate(int cycle);
+  };
+
+  // A queue which pop a random node from the queue to perform stress tests on
+  // the scheduler.
+  class StressSchedulerQueue : public SchedulingQueueBase {
+   public:
+    explicit StressSchedulerQueue(InstructionScheduler* scheduler)
+        : SchedulingQueueBase(scheduler) {}
+
+    ScheduleGraphNode* PopBestCandidate(int cycle);
+
+   private:
+    base::RandomNumberGenerator* random_number_generator() {
+      return scheduler_->random_number_generator();
+    }
+  };
+
+  // Perform scheduling for the current block specifying the queue type to
+  // use to determine the next best candidate.
+  template <typename QueueType>
+  void Schedule();
+
+  // Return the scheduling properties of the given instruction.
+  V8_EXPORT_PRIVATE int GetInstructionFlags(const Instruction* instr) const;
+  int GetTargetInstructionFlags(const Instruction* instr) const;
+
+  bool IsBarrier(const Instruction* instr) const {
+    return (GetInstructionFlags(instr) & kIsBarrier) != 0;
+  }
+
+  // Check whether the given instruction has side effects (e.g. function call,
+  // memory store).
+  bool HasSideEffect(const Instruction* instr) const {
+    return (GetInstructionFlags(instr) & kHasSideEffect) != 0;
+  }
+
+  // Return true if the instruction is a memory load.
+  bool IsLoadOperation(const Instruction* instr) const {
+    return (GetInstructionFlags(instr) & kIsLoadOperation) != 0;
+  }
+
+  // The scheduler will not move the following instructions before the last
+  // deopt/trap check:
+  //  * loads (this is conservative)
+  //  * instructions with side effect
+  //  * other deopts/traps
+  // Any other instruction can be moved, apart from those that raise exceptions
+  // on specific inputs - these are filtered out by the deopt/trap check.
+  bool MayNeedDeoptOrTrapCheck(const Instruction* instr) const {
+    return (GetInstructionFlags(instr) & kMayNeedDeoptOrTrapCheck) != 0;
+  }
+
+  // Return true if the instruction cannot be moved before the last deopt or
+  // trap point we encountered.
+  bool DependsOnDeoptOrTrap(const Instruction* instr) const {
+    return MayNeedDeoptOrTrapCheck(instr) || instr->IsDeoptimizeCall() ||
+           instr->IsTrap() || HasSideEffect(instr) || IsLoadOperation(instr);
+  }
+
+  // Identify nops used as a definition point for live-in registers at
+  // function entry.
+  bool IsFixedRegisterParameter(const Instruction* instr) const {
+    return (instr->arch_opcode() == kArchNop) && (instr->OutputCount() == 1) &&
+           (instr->OutputAt(0)->IsUnallocated()) &&
+           (UnallocatedOperand::cast(instr->OutputAt(0))
+                ->HasFixedRegisterPolicy() ||
+            UnallocatedOperand::cast(instr->OutputAt(0))
+                ->HasFixedFPRegisterPolicy());
+  }
+
+  void ComputeTotalLatencies();
+
+  static int GetInstructionLatency(const Instruction* instr);
+
+  Zone* zone() { return zone_; }
+  InstructionSequence* sequence() { return sequence_; }
+  base::RandomNumberGenerator* random_number_generator() {
+    return &random_number_generator_.value();
+  }
+
+  Zone* zone_;
+  InstructionSequence* sequence_;
+  ZoneVector<ScheduleGraphNode*> graph_;
+
+  friend class InstructionSchedulerTester;
+
+  // Last side effect instruction encountered while building the graph.
+  ScheduleGraphNode* last_side_effect_instr_;
+
+  // Set of load instructions encountered since the last side effect instruction
+  // which will be added as predecessors of the next instruction with side
+  // effects.
+  ZoneVector<ScheduleGraphNode*> pending_loads_;
+
+  // Live-in register markers are nop instructions which are emitted at the
+  // beginning of a basic block so that the register allocator will find a
+  // defining instruction for live-in values. They must not be moved.
+  // All these nops are chained together and added as a predecessor of every
+  // other instructions in the basic block.
+  ScheduleGraphNode* last_live_in_reg_marker_;
+
+  // Last deoptimization or trap instruction encountered while building the
+  // graph.
+  ScheduleGraphNode* last_deopt_or_trap_;
+
+  // Keep track of definition points for virtual registers. This is used to
+  // record operand dependencies in the scheduling graph.
+  ZoneMap<int32_t, ScheduleGraphNode*> operands_map_;
+
+  base::Optional<base::RandomNumberGenerator> random_number_generator_;
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_INSTRUCTION_SCHEDULER_H_
diff --git a/src/compiler/backend/instruction-selector-impl.h b/src/compiler/backend/instruction-selector-impl.h
new file mode 100644
index 0000000..7e1f183
--- /dev/null
+++ b/src/compiler/backend/instruction-selector-impl.h
@@ -0,0 +1,447 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_INSTRUCTION_SELECTOR_IMPL_H_
+#define V8_COMPILER_BACKEND_INSTRUCTION_SELECTOR_IMPL_H_
+
+#include "src/codegen/macro-assembler.h"
+#include "src/compiler/backend/instruction-selector.h"
+#include "src/compiler/backend/instruction.h"
+#include "src/compiler/common-operator.h"
+#include "src/compiler/linkage.h"
+#include "src/compiler/schedule.h"
+#include "src/objects/tagged-index.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+struct CaseInfo {
+  int32_t value;  // The case value.
+  int32_t order;  // The order for lowering to comparisons (less means earlier).
+  BasicBlock* branch;  // The basic blocks corresponding to the case value.
+};
+
+inline bool operator<(const CaseInfo& l, const CaseInfo& r) {
+  return l.order < r.order;
+}
+
+// Helper struct containing data about a table or lookup switch.
+class SwitchInfo {
+ public:
+  SwitchInfo(ZoneVector<CaseInfo> const& cases, int32_t min_value,
+             int32_t max_value, BasicBlock* default_branch)
+      : cases_(cases),
+        min_value_(min_value),
+        max_value_(max_value),
+        default_branch_(default_branch) {
+    if (cases.size() != 0) {
+      DCHECK_LE(min_value, max_value);
+      // Note that {value_range} can be 0 if {min_value} is -2^31 and
+      // {max_value} is 2^31-1, so don't assume that it's non-zero below.
+      value_range_ =
+          1u + bit_cast<uint32_t>(max_value) - bit_cast<uint32_t>(min_value);
+    } else {
+      value_range_ = 0;
+    }
+  }
+
+  std::vector<CaseInfo> CasesSortedByValue() const {
+    std::vector<CaseInfo> result(cases_.begin(), cases_.end());
+    std::stable_sort(result.begin(), result.end(),
+                     [](CaseInfo a, CaseInfo b) { return a.value < b.value; });
+    return result;
+  }
+  const ZoneVector<CaseInfo>& CasesUnsorted() const { return cases_; }
+  int32_t min_value() const { return min_value_; }
+  int32_t max_value() const { return max_value_; }
+  size_t value_range() const { return value_range_; }
+  size_t case_count() const { return cases_.size(); }
+  BasicBlock* default_branch() const { return default_branch_; }
+
+ private:
+  const ZoneVector<CaseInfo>& cases_;
+  int32_t min_value_;   // minimum value of {cases_}
+  int32_t max_value_;   // maximum value of {cases_}
+  size_t value_range_;  // |max_value - min_value| + 1
+  BasicBlock* default_branch_;
+};
+
+// A helper class for the instruction selector that simplifies construction of
+// Operands. This class implements a base for architecture-specific helpers.
+class OperandGenerator {
+ public:
+  explicit OperandGenerator(InstructionSelector* selector)
+      : selector_(selector) {}
+
+  InstructionOperand NoOutput() {
+    return InstructionOperand();  // Generates an invalid operand.
+  }
+
+  InstructionOperand DefineAsRegister(Node* node) {
+    return Define(node,
+                  UnallocatedOperand(UnallocatedOperand::MUST_HAVE_REGISTER,
+                                     GetVReg(node)));
+  }
+
+  InstructionOperand DefineSameAsFirst(Node* node) {
+    return Define(node,
+                  UnallocatedOperand(UnallocatedOperand::SAME_AS_FIRST_INPUT,
+                                     GetVReg(node)));
+  }
+
+  InstructionOperand DefineAsFixed(Node* node, Register reg) {
+    return Define(node, UnallocatedOperand(UnallocatedOperand::FIXED_REGISTER,
+                                           reg.code(), GetVReg(node)));
+  }
+
+  template <typename FPRegType>
+  InstructionOperand DefineAsFixed(Node* node, FPRegType reg) {
+    return Define(node,
+                  UnallocatedOperand(UnallocatedOperand::FIXED_FP_REGISTER,
+                                     reg.code(), GetVReg(node)));
+  }
+
+  InstructionOperand DefineAsConstant(Node* node) {
+    selector()->MarkAsDefined(node);
+    int virtual_register = GetVReg(node);
+    sequence()->AddConstant(virtual_register, ToConstant(node));
+    return ConstantOperand(virtual_register);
+  }
+
+  InstructionOperand DefineAsLocation(Node* node, LinkageLocation location) {
+    return Define(node, ToUnallocatedOperand(location, GetVReg(node)));
+  }
+
+  InstructionOperand DefineAsDualLocation(Node* node,
+                                          LinkageLocation primary_location,
+                                          LinkageLocation secondary_location) {
+    return Define(node,
+                  ToDualLocationUnallocatedOperand(
+                      primary_location, secondary_location, GetVReg(node)));
+  }
+
+  InstructionOperand Use(Node* node) {
+    return Use(node, UnallocatedOperand(UnallocatedOperand::NONE,
+                                        UnallocatedOperand::USED_AT_START,
+                                        GetVReg(node)));
+  }
+
+  InstructionOperand UseAnyAtEnd(Node* node) {
+    return Use(node, UnallocatedOperand(UnallocatedOperand::REGISTER_OR_SLOT,
+                                        UnallocatedOperand::USED_AT_END,
+                                        GetVReg(node)));
+  }
+
+  InstructionOperand UseAny(Node* node) {
+    return Use(node, UnallocatedOperand(UnallocatedOperand::REGISTER_OR_SLOT,
+                                        UnallocatedOperand::USED_AT_START,
+                                        GetVReg(node)));
+  }
+
+  InstructionOperand UseRegisterOrSlotOrConstant(Node* node) {
+    return Use(node, UnallocatedOperand(
+                         UnallocatedOperand::REGISTER_OR_SLOT_OR_CONSTANT,
+                         UnallocatedOperand::USED_AT_START, GetVReg(node)));
+  }
+
+  InstructionOperand UseUniqueRegisterOrSlotOrConstant(Node* node) {
+    return Use(node, UnallocatedOperand(
+                         UnallocatedOperand::REGISTER_OR_SLOT_OR_CONSTANT,
+                         GetVReg(node)));
+  }
+
+  InstructionOperand UseRegister(Node* node) {
+    return Use(node, UnallocatedOperand(UnallocatedOperand::MUST_HAVE_REGISTER,
+                                        UnallocatedOperand::USED_AT_START,
+                                        GetVReg(node)));
+  }
+
+  InstructionOperand UseUniqueSlot(Node* node) {
+    return Use(node, UnallocatedOperand(UnallocatedOperand::MUST_HAVE_SLOT,
+                                        GetVReg(node)));
+  }
+
+  // Use register or operand for the node. If a register is chosen, it won't
+  // alias any temporary or output registers.
+  InstructionOperand UseUnique(Node* node) {
+    return Use(node,
+               UnallocatedOperand(UnallocatedOperand::NONE, GetVReg(node)));
+  }
+
+  // Use a unique register for the node that does not alias any temporary or
+  // output registers.
+  InstructionOperand UseUniqueRegister(Node* node) {
+    return Use(node, UnallocatedOperand(UnallocatedOperand::MUST_HAVE_REGISTER,
+                                        GetVReg(node)));
+  }
+
+  InstructionOperand UseFixed(Node* node, Register reg) {
+    return Use(node, UnallocatedOperand(UnallocatedOperand::FIXED_REGISTER,
+                                        reg.code(), GetVReg(node)));
+  }
+
+  template <typename FPRegType>
+  InstructionOperand UseFixed(Node* node, FPRegType reg) {
+    return Use(node, UnallocatedOperand(UnallocatedOperand::FIXED_FP_REGISTER,
+                                        reg.code(), GetVReg(node)));
+  }
+
+  InstructionOperand UseImmediate(int immediate) {
+    return sequence()->AddImmediate(Constant(immediate));
+  }
+
+  InstructionOperand UseImmediate(Node* node) {
+    return sequence()->AddImmediate(ToConstant(node));
+  }
+
+  InstructionOperand UseNegatedImmediate(Node* node) {
+    return sequence()->AddImmediate(ToNegatedConstant(node));
+  }
+
+  InstructionOperand UseLocation(Node* node, LinkageLocation location) {
+    return Use(node, ToUnallocatedOperand(location, GetVReg(node)));
+  }
+
+  // Used to force gap moves from the from_location to the to_location
+  // immediately before an instruction.
+  InstructionOperand UsePointerLocation(LinkageLocation to_location,
+                                        LinkageLocation from_location) {
+    UnallocatedOperand casted_from_operand =
+        UnallocatedOperand::cast(TempLocation(from_location));
+    selector_->Emit(kArchNop, casted_from_operand);
+    return ToUnallocatedOperand(to_location,
+                                casted_from_operand.virtual_register());
+  }
+
+  InstructionOperand TempRegister() {
+    return UnallocatedOperand(UnallocatedOperand::MUST_HAVE_REGISTER,
+                              UnallocatedOperand::USED_AT_START,
+                              sequence()->NextVirtualRegister());
+  }
+
+  int AllocateVirtualRegister() { return sequence()->NextVirtualRegister(); }
+
+  InstructionOperand DefineSameAsFirstForVreg(int vreg) {
+    return UnallocatedOperand(UnallocatedOperand::SAME_AS_FIRST_INPUT, vreg);
+  }
+
+  InstructionOperand DefineAsRegistertForVreg(int vreg) {
+    return UnallocatedOperand(UnallocatedOperand::MUST_HAVE_REGISTER, vreg);
+  }
+
+  InstructionOperand UseRegisterForVreg(int vreg) {
+    return UnallocatedOperand(UnallocatedOperand::MUST_HAVE_REGISTER,
+                              UnallocatedOperand::USED_AT_START, vreg);
+  }
+
+  // The kind of register generated for memory operands. kRegister is alive
+  // until the start of the operation, kUniqueRegister until the end.
+  enum RegisterMode {
+    kRegister,
+    kUniqueRegister,
+  };
+
+  InstructionOperand UseRegisterWithMode(Node* node,
+                                         RegisterMode register_mode) {
+    return register_mode == kRegister ? UseRegister(node)
+                                      : UseUniqueRegister(node);
+  }
+
+  InstructionOperand TempDoubleRegister() {
+    UnallocatedOperand op = UnallocatedOperand(
+        UnallocatedOperand::MUST_HAVE_REGISTER,
+        UnallocatedOperand::USED_AT_START, sequence()->NextVirtualRegister());
+    sequence()->MarkAsRepresentation(MachineRepresentation::kFloat64,
+                                     op.virtual_register());
+    return op;
+  }
+
+  InstructionOperand TempSimd128Register() {
+    UnallocatedOperand op = UnallocatedOperand(
+        UnallocatedOperand::MUST_HAVE_REGISTER,
+        UnallocatedOperand::USED_AT_START, sequence()->NextVirtualRegister());
+    sequence()->MarkAsRepresentation(MachineRepresentation::kSimd128,
+                                     op.virtual_register());
+    return op;
+  }
+
+  InstructionOperand TempRegister(Register reg) {
+    return UnallocatedOperand(UnallocatedOperand::FIXED_REGISTER, reg.code(),
+                              InstructionOperand::kInvalidVirtualRegister);
+  }
+
+  template <typename FPRegType>
+  InstructionOperand TempFpRegister(FPRegType reg) {
+    UnallocatedOperand op =
+        UnallocatedOperand(UnallocatedOperand::FIXED_FP_REGISTER, reg.code(),
+                           sequence()->NextVirtualRegister());
+    sequence()->MarkAsRepresentation(MachineRepresentation::kSimd128,
+                                     op.virtual_register());
+    return op;
+  }
+
+  InstructionOperand TempImmediate(int32_t imm) {
+    return sequence()->AddImmediate(Constant(imm));
+  }
+
+  InstructionOperand TempLocation(LinkageLocation location) {
+    return ToUnallocatedOperand(location, sequence()->NextVirtualRegister());
+  }
+
+  InstructionOperand Label(BasicBlock* block) {
+    return sequence()->AddImmediate(
+        Constant(RpoNumber::FromInt(block->rpo_number())));
+  }
+
+ protected:
+  InstructionSelector* selector() const { return selector_; }
+  InstructionSequence* sequence() const { return selector()->sequence(); }
+  Zone* zone() const { return selector()->instruction_zone(); }
+
+ private:
+  int GetVReg(Node* node) const { return selector_->GetVirtualRegister(node); }
+
+  static Constant ToConstant(const Node* node) {
+    switch (node->opcode()) {
+      case IrOpcode::kInt32Constant:
+        return Constant(OpParameter<int32_t>(node->op()));
+      case IrOpcode::kInt64Constant:
+        return Constant(OpParameter<int64_t>(node->op()));
+      case IrOpcode::kTaggedIndexConstant: {
+        // Unencoded index value.
+        intptr_t value =
+            static_cast<intptr_t>(OpParameter<int32_t>(node->op()));
+        DCHECK(TaggedIndex::IsValid(value));
+        // Generate it as 32/64-bit constant in a tagged form.
+        Address tagged_index = TaggedIndex::FromIntptr(value).ptr();
+        if (kSystemPointerSize == kInt32Size) {
+          return Constant(static_cast<int32_t>(tagged_index));
+        } else {
+          return Constant(static_cast<int64_t>(tagged_index));
+        }
+      }
+      case IrOpcode::kFloat32Constant:
+        return Constant(OpParameter<float>(node->op()));
+      case IrOpcode::kRelocatableInt32Constant:
+      case IrOpcode::kRelocatableInt64Constant:
+        return Constant(OpParameter<RelocatablePtrConstantInfo>(node->op()));
+      case IrOpcode::kFloat64Constant:
+      case IrOpcode::kNumberConstant:
+        return Constant(OpParameter<double>(node->op()));
+      case IrOpcode::kExternalConstant:
+        return Constant(OpParameter<ExternalReference>(node->op()));
+      case IrOpcode::kComment: {
+        // We cannot use {intptr_t} here, since the Constant constructor would
+        // be ambiguous on some architectures.
+        using ptrsize_int_t =
+            std::conditional<kSystemPointerSize == 8, int64_t, int32_t>::type;
+        return Constant(reinterpret_cast<ptrsize_int_t>(
+            OpParameter<const char*>(node->op())));
+      }
+      case IrOpcode::kHeapConstant:
+        return Constant(HeapConstantOf(node->op()));
+      case IrOpcode::kCompressedHeapConstant:
+        return Constant(HeapConstantOf(node->op()), true);
+      case IrOpcode::kDelayedStringConstant:
+        return Constant(StringConstantBaseOf(node->op()));
+      case IrOpcode::kDeadValue: {
+        switch (DeadValueRepresentationOf(node->op())) {
+          case MachineRepresentation::kBit:
+          case MachineRepresentation::kWord32:
+          case MachineRepresentation::kTagged:
+          case MachineRepresentation::kTaggedSigned:
+          case MachineRepresentation::kTaggedPointer:
+          case MachineRepresentation::kCompressed:
+          case MachineRepresentation::kCompressedPointer:
+            return Constant(static_cast<int32_t>(0));
+          case MachineRepresentation::kWord64:
+            return Constant(static_cast<int64_t>(0));
+          case MachineRepresentation::kFloat64:
+            return Constant(static_cast<double>(0));
+          case MachineRepresentation::kFloat32:
+            return Constant(static_cast<float>(0));
+          default:
+            UNREACHABLE();
+        }
+        break;
+      }
+      default:
+        break;
+    }
+    UNREACHABLE();
+  }
+
+  static Constant ToNegatedConstant(const Node* node) {
+    switch (node->opcode()) {
+      case IrOpcode::kInt32Constant:
+        return Constant(-OpParameter<int32_t>(node->op()));
+      case IrOpcode::kInt64Constant:
+        return Constant(-OpParameter<int64_t>(node->op()));
+      default:
+        break;
+    }
+    UNREACHABLE();
+  }
+
+  UnallocatedOperand Define(Node* node, UnallocatedOperand operand) {
+    DCHECK_NOT_NULL(node);
+    DCHECK_EQ(operand.virtual_register(), GetVReg(node));
+    selector()->MarkAsDefined(node);
+    return operand;
+  }
+
+  UnallocatedOperand Use(Node* node, UnallocatedOperand operand) {
+    DCHECK_NOT_NULL(node);
+    DCHECK_EQ(operand.virtual_register(), GetVReg(node));
+    selector()->MarkAsUsed(node);
+    return operand;
+  }
+
+  UnallocatedOperand ToDualLocationUnallocatedOperand(
+      LinkageLocation primary_location, LinkageLocation secondary_location,
+      int virtual_register) {
+    // We only support the primary location being a register and the secondary
+    // one a slot.
+    DCHECK(primary_location.IsRegister() &&
+           secondary_location.IsCalleeFrameSlot());
+    int reg_id = primary_location.AsRegister();
+    int slot_id = secondary_location.AsCalleeFrameSlot();
+    return UnallocatedOperand(reg_id, slot_id, virtual_register);
+  }
+
+  UnallocatedOperand ToUnallocatedOperand(LinkageLocation location,
+                                          int virtual_register) {
+    if (location.IsAnyRegister()) {
+      // any machine register.
+      return UnallocatedOperand(UnallocatedOperand::MUST_HAVE_REGISTER,
+                                virtual_register);
+    }
+    if (location.IsCallerFrameSlot()) {
+      // a location on the caller frame.
+      return UnallocatedOperand(UnallocatedOperand::FIXED_SLOT,
+                                location.AsCallerFrameSlot(), virtual_register);
+    }
+    if (location.IsCalleeFrameSlot()) {
+      // a spill location on this (callee) frame.
+      return UnallocatedOperand(UnallocatedOperand::FIXED_SLOT,
+                                location.AsCalleeFrameSlot(), virtual_register);
+    }
+    // a fixed register.
+    if (IsFloatingPoint(location.GetType().representation())) {
+      return UnallocatedOperand(UnallocatedOperand::FIXED_FP_REGISTER,
+                                location.AsRegister(), virtual_register);
+    }
+    return UnallocatedOperand(UnallocatedOperand::FIXED_REGISTER,
+                              location.AsRegister(), virtual_register);
+  }
+
+  InstructionSelector* selector_;
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_INSTRUCTION_SELECTOR_IMPL_H_
diff --git a/src/compiler/backend/instruction-selector.cc b/src/compiler/backend/instruction-selector.cc
new file mode 100644
index 0000000..b62cc83
--- /dev/null
+++ b/src/compiler/backend/instruction-selector.cc
@@ -0,0 +1,3357 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/instruction-selector.h"
+
+#include <limits>
+
+#include "src/base/iterator.h"
+#include "src/codegen/assembler-inl.h"
+#include "src/codegen/tick-counter.h"
+#include "src/compiler/backend/instruction-selector-impl.h"
+#include "src/compiler/compiler-source-position-table.h"
+#include "src/compiler/js-heap-broker.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/node-properties.h"
+#include "src/compiler/pipeline.h"
+#include "src/compiler/schedule.h"
+#include "src/compiler/state-values-utils.h"
+#include "src/deoptimizer/deoptimizer.h"
+#include "src/wasm/simd-shuffle.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+InstructionSelector::InstructionSelector(
+    Zone* zone, size_t node_count, Linkage* linkage,
+    InstructionSequence* sequence, Schedule* schedule,
+    SourcePositionTable* source_positions, Frame* frame,
+    EnableSwitchJumpTable enable_switch_jump_table, TickCounter* tick_counter,
+    JSHeapBroker* broker, size_t* max_unoptimized_frame_height,
+    size_t* max_pushed_argument_count, SourcePositionMode source_position_mode,
+    Features features, EnableScheduling enable_scheduling,
+    EnableRootsRelativeAddressing enable_roots_relative_addressing,
+    PoisoningMitigationLevel poisoning_level, EnableTraceTurboJson trace_turbo)
+    : zone_(zone),
+      linkage_(linkage),
+      sequence_(sequence),
+      source_positions_(source_positions),
+      source_position_mode_(source_position_mode),
+      features_(features),
+      schedule_(schedule),
+      current_block_(nullptr),
+      instructions_(zone),
+      continuation_inputs_(sequence->zone()),
+      continuation_outputs_(sequence->zone()),
+      continuation_temps_(sequence->zone()),
+      defined_(node_count, false, zone),
+      used_(node_count, false, zone),
+      effect_level_(node_count, 0, zone),
+      virtual_registers_(node_count,
+                         InstructionOperand::kInvalidVirtualRegister, zone),
+      virtual_register_rename_(zone),
+      scheduler_(nullptr),
+      enable_scheduling_(enable_scheduling),
+      enable_roots_relative_addressing_(enable_roots_relative_addressing),
+      enable_switch_jump_table_(enable_switch_jump_table),
+      poisoning_level_(poisoning_level),
+      frame_(frame),
+      instruction_selection_failed_(false),
+      instr_origins_(sequence->zone()),
+      trace_turbo_(trace_turbo),
+      tick_counter_(tick_counter),
+      broker_(broker),
+      max_unoptimized_frame_height_(max_unoptimized_frame_height),
+      max_pushed_argument_count_(max_pushed_argument_count)
+#if V8_TARGET_ARCH_64_BIT
+      ,
+      phi_states_(node_count, Upper32BitsState::kNotYetChecked, zone)
+#endif
+{
+  DCHECK_EQ(*max_unoptimized_frame_height, 0);  // Caller-initialized.
+
+  instructions_.reserve(node_count);
+  continuation_inputs_.reserve(5);
+  continuation_outputs_.reserve(2);
+
+  if (trace_turbo_ == kEnableTraceTurboJson) {
+    instr_origins_.assign(node_count, {-1, 0});
+  }
+}
+
+bool InstructionSelector::SelectInstructions() {
+  // Mark the inputs of all phis in loop headers as used.
+  BasicBlockVector* blocks = schedule()->rpo_order();
+  for (auto const block : *blocks) {
+    if (!block->IsLoopHeader()) continue;
+    DCHECK_LE(2u, block->PredecessorCount());
+    for (Node* const phi : *block) {
+      if (phi->opcode() != IrOpcode::kPhi) continue;
+
+      // Mark all inputs as used.
+      for (Node* const input : phi->inputs()) {
+        MarkAsUsed(input);
+      }
+    }
+  }
+
+  // Visit each basic block in post order.
+  for (auto i = blocks->rbegin(); i != blocks->rend(); ++i) {
+    VisitBlock(*i);
+    if (instruction_selection_failed()) return false;
+  }
+
+  // Schedule the selected instructions.
+  if (UseInstructionScheduling()) {
+    scheduler_ = zone()->New<InstructionScheduler>(zone(), sequence());
+  }
+
+  for (auto const block : *blocks) {
+    InstructionBlock* instruction_block =
+        sequence()->InstructionBlockAt(RpoNumber::FromInt(block->rpo_number()));
+    for (size_t i = 0; i < instruction_block->phis().size(); i++) {
+      UpdateRenamesInPhi(instruction_block->PhiAt(i));
+    }
+    size_t end = instruction_block->code_end();
+    size_t start = instruction_block->code_start();
+    DCHECK_LE(end, start);
+    StartBlock(RpoNumber::FromInt(block->rpo_number()));
+    if (end != start) {
+      while (start-- > end + 1) {
+        UpdateRenames(instructions_[start]);
+        AddInstruction(instructions_[start]);
+      }
+      UpdateRenames(instructions_[end]);
+      AddTerminator(instructions_[end]);
+    }
+    EndBlock(RpoNumber::FromInt(block->rpo_number()));
+  }
+#if DEBUG
+  sequence()->ValidateSSA();
+#endif
+  return true;
+}
+
+void InstructionSelector::StartBlock(RpoNumber rpo) {
+  if (UseInstructionScheduling()) {
+    DCHECK_NOT_NULL(scheduler_);
+    scheduler_->StartBlock(rpo);
+  } else {
+    sequence()->StartBlock(rpo);
+  }
+}
+
+void InstructionSelector::EndBlock(RpoNumber rpo) {
+  if (UseInstructionScheduling()) {
+    DCHECK_NOT_NULL(scheduler_);
+    scheduler_->EndBlock(rpo);
+  } else {
+    sequence()->EndBlock(rpo);
+  }
+}
+
+void InstructionSelector::AddTerminator(Instruction* instr) {
+  if (UseInstructionScheduling()) {
+    DCHECK_NOT_NULL(scheduler_);
+    scheduler_->AddTerminator(instr);
+  } else {
+    sequence()->AddInstruction(instr);
+  }
+}
+
+void InstructionSelector::AddInstruction(Instruction* instr) {
+  if (UseInstructionScheduling()) {
+    DCHECK_NOT_NULL(scheduler_);
+    scheduler_->AddInstruction(instr);
+  } else {
+    sequence()->AddInstruction(instr);
+  }
+}
+
+Instruction* InstructionSelector::Emit(InstructionCode opcode,
+                                       InstructionOperand output,
+                                       size_t temp_count,
+                                       InstructionOperand* temps) {
+  size_t output_count = output.IsInvalid() ? 0 : 1;
+  return Emit(opcode, output_count, &output, 0, nullptr, temp_count, temps);
+}
+
+Instruction* InstructionSelector::Emit(InstructionCode opcode,
+                                       InstructionOperand output,
+                                       InstructionOperand a, size_t temp_count,
+                                       InstructionOperand* temps) {
+  size_t output_count = output.IsInvalid() ? 0 : 1;
+  return Emit(opcode, output_count, &output, 1, &a, temp_count, temps);
+}
+
+Instruction* InstructionSelector::Emit(InstructionCode opcode,
+                                       InstructionOperand output,
+                                       InstructionOperand a,
+                                       InstructionOperand b, size_t temp_count,
+                                       InstructionOperand* temps) {
+  size_t output_count = output.IsInvalid() ? 0 : 1;
+  InstructionOperand inputs[] = {a, b};
+  size_t input_count = arraysize(inputs);
+  return Emit(opcode, output_count, &output, input_count, inputs, temp_count,
+              temps);
+}
+
+Instruction* InstructionSelector::Emit(InstructionCode opcode,
+                                       InstructionOperand output,
+                                       InstructionOperand a,
+                                       InstructionOperand b,
+                                       InstructionOperand c, size_t temp_count,
+                                       InstructionOperand* temps) {
+  size_t output_count = output.IsInvalid() ? 0 : 1;
+  InstructionOperand inputs[] = {a, b, c};
+  size_t input_count = arraysize(inputs);
+  return Emit(opcode, output_count, &output, input_count, inputs, temp_count,
+              temps);
+}
+
+Instruction* InstructionSelector::Emit(
+    InstructionCode opcode, InstructionOperand output, InstructionOperand a,
+    InstructionOperand b, InstructionOperand c, InstructionOperand d,
+    size_t temp_count, InstructionOperand* temps) {
+  size_t output_count = output.IsInvalid() ? 0 : 1;
+  InstructionOperand inputs[] = {a, b, c, d};
+  size_t input_count = arraysize(inputs);
+  return Emit(opcode, output_count, &output, input_count, inputs, temp_count,
+              temps);
+}
+
+Instruction* InstructionSelector::Emit(
+    InstructionCode opcode, InstructionOperand output, InstructionOperand a,
+    InstructionOperand b, InstructionOperand c, InstructionOperand d,
+    InstructionOperand e, size_t temp_count, InstructionOperand* temps) {
+  size_t output_count = output.IsInvalid() ? 0 : 1;
+  InstructionOperand inputs[] = {a, b, c, d, e};
+  size_t input_count = arraysize(inputs);
+  return Emit(opcode, output_count, &output, input_count, inputs, temp_count,
+              temps);
+}
+
+Instruction* InstructionSelector::Emit(
+    InstructionCode opcode, InstructionOperand output, InstructionOperand a,
+    InstructionOperand b, InstructionOperand c, InstructionOperand d,
+    InstructionOperand e, InstructionOperand f, size_t temp_count,
+    InstructionOperand* temps) {
+  size_t output_count = output.IsInvalid() ? 0 : 1;
+  InstructionOperand inputs[] = {a, b, c, d, e, f};
+  size_t input_count = arraysize(inputs);
+  return Emit(opcode, output_count, &output, input_count, inputs, temp_count,
+              temps);
+}
+
+Instruction* InstructionSelector::Emit(
+    InstructionCode opcode, size_t output_count, InstructionOperand* outputs,
+    size_t input_count, InstructionOperand* inputs, size_t temp_count,
+    InstructionOperand* temps) {
+  if (output_count >= Instruction::kMaxOutputCount ||
+      input_count >= Instruction::kMaxInputCount ||
+      temp_count >= Instruction::kMaxTempCount) {
+    set_instruction_selection_failed();
+    return nullptr;
+  }
+
+  Instruction* instr =
+      Instruction::New(instruction_zone(), opcode, output_count, outputs,
+                       input_count, inputs, temp_count, temps);
+  return Emit(instr);
+}
+
+Instruction* InstructionSelector::Emit(Instruction* instr) {
+  instructions_.push_back(instr);
+  return instr;
+}
+
+bool InstructionSelector::CanCover(Node* user, Node* node) const {
+  // 1. Both {user} and {node} must be in the same basic block.
+  if (schedule()->block(node) != schedule()->block(user)) {
+    return false;
+  }
+  // 2. Pure {node}s must be owned by the {user}.
+  if (node->op()->HasProperty(Operator::kPure)) {
+    return node->OwnedBy(user);
+  }
+  // 3. Impure {node}s must match the effect level of {user}.
+  if (GetEffectLevel(node) != GetEffectLevel(user)) {
+    return false;
+  }
+  // 4. Only {node} must have value edges pointing to {user}.
+  for (Edge const edge : node->use_edges()) {
+    if (edge.from() != user && NodeProperties::IsValueEdge(edge)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool InstructionSelector::CanCoverTransitively(Node* user, Node* node,
+                                               Node* node_input) const {
+  if (CanCover(user, node) && CanCover(node, node_input)) {
+    // If {node} is pure, transitivity might not hold.
+    if (node->op()->HasProperty(Operator::kPure)) {
+      // If {node_input} is pure, the effect levels do not matter.
+      if (node_input->op()->HasProperty(Operator::kPure)) return true;
+      // Otherwise, {user} and {node_input} must have the same effect level.
+      return GetEffectLevel(user) == GetEffectLevel(node_input);
+    }
+    return true;
+  }
+  return false;
+}
+
+bool InstructionSelector::IsOnlyUserOfNodeInSameBlock(Node* user,
+                                                      Node* node) const {
+  BasicBlock* bb_user = schedule()->block(user);
+  BasicBlock* bb_node = schedule()->block(node);
+  if (bb_user != bb_node) return false;
+  for (Edge const edge : node->use_edges()) {
+    Node* from = edge.from();
+    if ((from != user) && (schedule()->block(from) == bb_user)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+void InstructionSelector::UpdateRenames(Instruction* instruction) {
+  for (size_t i = 0; i < instruction->InputCount(); i++) {
+    TryRename(instruction->InputAt(i));
+  }
+}
+
+void InstructionSelector::UpdateRenamesInPhi(PhiInstruction* phi) {
+  for (size_t i = 0; i < phi->operands().size(); i++) {
+    int vreg = phi->operands()[i];
+    int renamed = GetRename(vreg);
+    if (vreg != renamed) {
+      phi->RenameInput(i, renamed);
+    }
+  }
+}
+
+int InstructionSelector::GetRename(int virtual_register) {
+  int rename = virtual_register;
+  while (true) {
+    if (static_cast<size_t>(rename) >= virtual_register_rename_.size()) break;
+    int next = virtual_register_rename_[rename];
+    if (next == InstructionOperand::kInvalidVirtualRegister) {
+      break;
+    }
+    rename = next;
+  }
+  return rename;
+}
+
+void InstructionSelector::TryRename(InstructionOperand* op) {
+  if (!op->IsUnallocated()) return;
+  UnallocatedOperand* unalloc = UnallocatedOperand::cast(op);
+  int vreg = unalloc->virtual_register();
+  int rename = GetRename(vreg);
+  if (rename != vreg) {
+    *unalloc = UnallocatedOperand(*unalloc, rename);
+  }
+}
+
+void InstructionSelector::SetRename(const Node* node, const Node* rename) {
+  int vreg = GetVirtualRegister(node);
+  if (static_cast<size_t>(vreg) >= virtual_register_rename_.size()) {
+    int invalid = InstructionOperand::kInvalidVirtualRegister;
+    virtual_register_rename_.resize(vreg + 1, invalid);
+  }
+  virtual_register_rename_[vreg] = GetVirtualRegister(rename);
+}
+
+int InstructionSelector::GetVirtualRegister(const Node* node) {
+  DCHECK_NOT_NULL(node);
+  size_t const id = node->id();
+  DCHECK_LT(id, virtual_registers_.size());
+  int virtual_register = virtual_registers_[id];
+  if (virtual_register == InstructionOperand::kInvalidVirtualRegister) {
+    virtual_register = sequence()->NextVirtualRegister();
+    virtual_registers_[id] = virtual_register;
+  }
+  return virtual_register;
+}
+
+const std::map<NodeId, int> InstructionSelector::GetVirtualRegistersForTesting()
+    const {
+  std::map<NodeId, int> virtual_registers;
+  for (size_t n = 0; n < virtual_registers_.size(); ++n) {
+    if (virtual_registers_[n] != InstructionOperand::kInvalidVirtualRegister) {
+      NodeId const id = static_cast<NodeId>(n);
+      virtual_registers.insert(std::make_pair(id, virtual_registers_[n]));
+    }
+  }
+  return virtual_registers;
+}
+
+bool InstructionSelector::IsDefined(Node* node) const {
+  DCHECK_NOT_NULL(node);
+  size_t const id = node->id();
+  DCHECK_LT(id, defined_.size());
+  return defined_[id];
+}
+
+void InstructionSelector::MarkAsDefined(Node* node) {
+  DCHECK_NOT_NULL(node);
+  size_t const id = node->id();
+  DCHECK_LT(id, defined_.size());
+  defined_[id] = true;
+}
+
+bool InstructionSelector::IsUsed(Node* node) const {
+  DCHECK_NOT_NULL(node);
+  // TODO(bmeurer): This is a terrible monster hack, but we have to make sure
+  // that the Retain is actually emitted, otherwise the GC will mess up.
+  if (node->opcode() == IrOpcode::kRetain) return true;
+  if (!node->op()->HasProperty(Operator::kEliminatable)) return true;
+  size_t const id = node->id();
+  DCHECK_LT(id, used_.size());
+  return used_[id];
+}
+
+void InstructionSelector::MarkAsUsed(Node* node) {
+  DCHECK_NOT_NULL(node);
+  size_t const id = node->id();
+  DCHECK_LT(id, used_.size());
+  used_[id] = true;
+}
+
+int InstructionSelector::GetEffectLevel(Node* node) const {
+  DCHECK_NOT_NULL(node);
+  size_t const id = node->id();
+  DCHECK_LT(id, effect_level_.size());
+  return effect_level_[id];
+}
+
+int InstructionSelector::GetEffectLevel(Node* node,
+                                        FlagsContinuation* cont) const {
+  return cont->IsBranch()
+             ? GetEffectLevel(
+                   cont->true_block()->PredecessorAt(0)->control_input())
+             : GetEffectLevel(node);
+}
+
+void InstructionSelector::SetEffectLevel(Node* node, int effect_level) {
+  DCHECK_NOT_NULL(node);
+  size_t const id = node->id();
+  DCHECK_LT(id, effect_level_.size());
+  effect_level_[id] = effect_level;
+}
+
+bool InstructionSelector::CanAddressRelativeToRootsRegister(
+    const ExternalReference& reference) const {
+  // There are three things to consider here:
+  // 1. CanUseRootsRegister: Is kRootRegister initialized?
+  const bool root_register_is_available_and_initialized = CanUseRootsRegister();
+  if (!root_register_is_available_and_initialized) return false;
+
+  // 2. enable_roots_relative_addressing_: Can we address everything on the heap
+  //    through the root register, i.e. are root-relative addresses to arbitrary
+  //    addresses guaranteed not to change between code generation and
+  //    execution?
+  const bool all_root_relative_offsets_are_constant =
+      (enable_roots_relative_addressing_ == kEnableRootsRelativeAddressing);
+  if (all_root_relative_offsets_are_constant) return true;
+
+  // 3. IsAddressableThroughRootRegister: Is the target address guaranteed to
+  //    have a fixed root-relative offset? If so, we can ignore 2.
+  const bool this_root_relative_offset_is_constant =
+      TurboAssemblerBase::IsAddressableThroughRootRegister(isolate(),
+                                                           reference);
+  return this_root_relative_offset_is_constant;
+}
+
+bool InstructionSelector::CanUseRootsRegister() const {
+  return linkage()->GetIncomingDescriptor()->flags() &
+         CallDescriptor::kCanUseRoots;
+}
+
+void InstructionSelector::MarkAsRepresentation(MachineRepresentation rep,
+                                               const InstructionOperand& op) {
+  UnallocatedOperand unalloc = UnallocatedOperand::cast(op);
+  sequence()->MarkAsRepresentation(rep, unalloc.virtual_register());
+}
+
+void InstructionSelector::MarkAsRepresentation(MachineRepresentation rep,
+                                               Node* node) {
+  sequence()->MarkAsRepresentation(rep, GetVirtualRegister(node));
+}
+
+namespace {
+
+InstructionOperand OperandForDeopt(Isolate* isolate, OperandGenerator* g,
+                                   Node* input, FrameStateInputKind kind,
+                                   MachineRepresentation rep) {
+  if (rep == MachineRepresentation::kNone) {
+    return g->TempImmediate(FrameStateDescriptor::kImpossibleValue);
+  }
+
+  switch (input->opcode()) {
+    case IrOpcode::kInt32Constant:
+    case IrOpcode::kInt64Constant:
+    case IrOpcode::kNumberConstant:
+    case IrOpcode::kFloat32Constant:
+    case IrOpcode::kFloat64Constant:
+    case IrOpcode::kDelayedStringConstant:
+      return g->UseImmediate(input);
+    case IrOpcode::kCompressedHeapConstant:
+    case IrOpcode::kHeapConstant: {
+      if (!CanBeTaggedOrCompressedPointer(rep)) {
+        // If we have inconsistent static and dynamic types, e.g. if we
+        // smi-check a string, we can get here with a heap object that
+        // says it is a smi. In that case, we return an invalid instruction
+        // operand, which will be interpreted as an optimized-out value.
+
+        // TODO(jarin) Ideally, we should turn the current instruction
+        // into an abort (we should never execute it).
+        return InstructionOperand();
+      }
+
+      Handle<HeapObject> constant = HeapConstantOf(input->op());
+      RootIndex root_index;
+      if (isolate->roots_table().IsRootHandle(constant, &root_index) &&
+          root_index == RootIndex::kOptimizedOut) {
+        // For an optimized-out object we return an invalid instruction
+        // operand, so that we take the fast path for optimized-out values.
+        return InstructionOperand();
+      }
+
+      return g->UseImmediate(input);
+    }
+    case IrOpcode::kArgumentsElementsState:
+    case IrOpcode::kArgumentsLengthState:
+    case IrOpcode::kObjectState:
+    case IrOpcode::kTypedObjectState:
+      UNREACHABLE();
+    default:
+      switch (kind) {
+        case FrameStateInputKind::kStackSlot:
+          return g->UseUniqueSlot(input);
+        case FrameStateInputKind::kAny:
+          // Currently deopts "wrap" other operations, so the deopt's inputs
+          // are potentially needed until the end of the deoptimising code.
+          return g->UseAnyAtEnd(input);
+      }
+  }
+  UNREACHABLE();
+}
+
+}  // namespace
+
+class StateObjectDeduplicator {
+ public:
+  explicit StateObjectDeduplicator(Zone* zone) : objects_(zone) {}
+  static const size_t kNotDuplicated = SIZE_MAX;
+
+  size_t GetObjectId(Node* node) {
+    DCHECK(node->opcode() == IrOpcode::kTypedObjectState ||
+           node->opcode() == IrOpcode::kObjectId ||
+           node->opcode() == IrOpcode::kArgumentsElementsState);
+    for (size_t i = 0; i < objects_.size(); ++i) {
+      if (objects_[i] == node) return i;
+      // ObjectId nodes are the Turbofan way to express objects with the same
+      // identity in the deopt info. So they should always be mapped to
+      // previously appearing TypedObjectState nodes.
+      if (HasObjectId(objects_[i]) && HasObjectId(node) &&
+          ObjectIdOf(objects_[i]->op()) == ObjectIdOf(node->op())) {
+        return i;
+      }
+    }
+    DCHECK(node->opcode() == IrOpcode::kTypedObjectState ||
+           node->opcode() == IrOpcode::kArgumentsElementsState);
+    return kNotDuplicated;
+  }
+
+  size_t InsertObject(Node* node) {
+    DCHECK(node->opcode() == IrOpcode::kTypedObjectState ||
+           node->opcode() == IrOpcode::kObjectId ||
+           node->opcode() == IrOpcode::kArgumentsElementsState);
+    size_t id = objects_.size();
+    objects_.push_back(node);
+    return id;
+  }
+
+ private:
+  static bool HasObjectId(Node* node) {
+    return node->opcode() == IrOpcode::kTypedObjectState ||
+           node->opcode() == IrOpcode::kObjectId;
+  }
+
+  ZoneVector<Node*> objects_;
+};
+
+// Returns the number of instruction operands added to inputs.
+size_t InstructionSelector::AddOperandToStateValueDescriptor(
+    StateValueList* values, InstructionOperandVector* inputs,
+    OperandGenerator* g, StateObjectDeduplicator* deduplicator, Node* input,
+    MachineType type, FrameStateInputKind kind, Zone* zone) {
+  DCHECK_NOT_NULL(input);
+  switch (input->opcode()) {
+    case IrOpcode::kArgumentsElementsState: {
+      values->PushArgumentsElements(ArgumentsStateTypeOf(input->op()));
+      // The elements backing store of an arguments object participates in the
+      // duplicate object counting, but can itself never appear duplicated.
+      DCHECK_EQ(StateObjectDeduplicator::kNotDuplicated,
+                deduplicator->GetObjectId(input));
+      deduplicator->InsertObject(input);
+      return 0;
+    }
+    case IrOpcode::kArgumentsLengthState: {
+      values->PushArgumentsLength();
+      return 0;
+    }
+    case IrOpcode::kObjectState:
+      UNREACHABLE();
+    case IrOpcode::kTypedObjectState:
+    case IrOpcode::kObjectId: {
+      size_t id = deduplicator->GetObjectId(input);
+      if (id == StateObjectDeduplicator::kNotDuplicated) {
+        DCHECK_EQ(IrOpcode::kTypedObjectState, input->opcode());
+        size_t entries = 0;
+        id = deduplicator->InsertObject(input);
+        StateValueList* nested = values->PushRecursiveField(zone, id);
+        int const input_count = input->op()->ValueInputCount();
+        ZoneVector<MachineType> const* types = MachineTypesOf(input->op());
+        for (int i = 0; i < input_count; ++i) {
+          entries += AddOperandToStateValueDescriptor(
+              nested, inputs, g, deduplicator, input->InputAt(i), types->at(i),
+              kind, zone);
+        }
+        return entries;
+      } else {
+        // Deoptimizer counts duplicate objects for the running id, so we have
+        // to push the input again.
+        deduplicator->InsertObject(input);
+        values->PushDuplicate(id);
+        return 0;
+      }
+    }
+    default: {
+      InstructionOperand op =
+          OperandForDeopt(isolate(), g, input, kind, type.representation());
+      if (op.kind() == InstructionOperand::INVALID) {
+        // Invalid operand means the value is impossible or optimized-out.
+        values->PushOptimizedOut();
+        return 0;
+      } else {
+        inputs->push_back(op);
+        values->PushPlain(type);
+        return 1;
+      }
+    }
+  }
+}
+
+size_t InstructionSelector::AddInputsToFrameStateDescriptor(
+    StateValueList* values, InstructionOperandVector* inputs,
+    OperandGenerator* g, StateObjectDeduplicator* deduplicator, Node* node,
+    FrameStateInputKind kind, Zone* zone) {
+  size_t entries = 0;
+  StateValuesAccess::iterator it = StateValuesAccess(node).begin();
+  // Take advantage of sparse nature of StateValuesAccess to skip over multiple
+  // empty nodes at once pushing repeated OptimizedOuts all in one go.
+  while (!it.done()) {
+    values->PushOptimizedOut(it.AdvanceTillNotEmpty());
+    if (it.done()) break;
+    StateValuesAccess::TypedNode input_node = *it;
+    entries += AddOperandToStateValueDescriptor(values, inputs, g, deduplicator,
+                                                input_node.node,
+                                                input_node.type, kind, zone);
+    ++it;
+  }
+  return entries;
+}
+
+// Returns the number of instruction operands added to inputs.
+size_t InstructionSelector::AddInputsToFrameStateDescriptor(
+    FrameStateDescriptor* descriptor, Node* state, OperandGenerator* g,
+    StateObjectDeduplicator* deduplicator, InstructionOperandVector* inputs,
+    FrameStateInputKind kind, Zone* zone) {
+  DCHECK_EQ(IrOpcode::kFrameState, state->op()->opcode());
+
+  size_t entries = 0;
+  size_t initial_size = inputs->size();
+  USE(initial_size);  // initial_size is only used for debug.
+
+  if (descriptor->outer_state()) {
+    entries += AddInputsToFrameStateDescriptor(
+        descriptor->outer_state(), state->InputAt(kFrameStateOuterStateInput),
+        g, deduplicator, inputs, kind, zone);
+  }
+
+  Node* parameters = state->InputAt(kFrameStateParametersInput);
+  Node* locals = state->InputAt(kFrameStateLocalsInput);
+  Node* stack = state->InputAt(kFrameStateStackInput);
+  Node* context = state->InputAt(kFrameStateContextInput);
+  Node* function = state->InputAt(kFrameStateFunctionInput);
+
+  DCHECK_EQ(descriptor->parameters_count(),
+            StateValuesAccess(parameters).size());
+  DCHECK_EQ(descriptor->locals_count(), StateValuesAccess(locals).size());
+  DCHECK_EQ(descriptor->stack_count(), StateValuesAccess(stack).size());
+
+  StateValueList* values_descriptor = descriptor->GetStateValueDescriptors();
+
+  DCHECK_EQ(values_descriptor->size(), 0u);
+  values_descriptor->ReserveSize(descriptor->GetSize());
+
+  DCHECK_NOT_NULL(function);
+  entries += AddOperandToStateValueDescriptor(
+      values_descriptor, inputs, g, deduplicator, function,
+      MachineType::AnyTagged(), FrameStateInputKind::kStackSlot, zone);
+
+  entries += AddInputsToFrameStateDescriptor(
+      values_descriptor, inputs, g, deduplicator, parameters, kind, zone);
+
+  if (descriptor->HasContext()) {
+    DCHECK_NOT_NULL(context);
+    entries += AddOperandToStateValueDescriptor(
+        values_descriptor, inputs, g, deduplicator, context,
+        MachineType::AnyTagged(), FrameStateInputKind::kStackSlot, zone);
+  }
+
+  entries += AddInputsToFrameStateDescriptor(values_descriptor, inputs, g,
+                                             deduplicator, locals, kind, zone);
+  entries += AddInputsToFrameStateDescriptor(values_descriptor, inputs, g,
+                                             deduplicator, stack, kind, zone);
+  DCHECK_EQ(initial_size + entries, inputs->size());
+  return entries;
+}
+
+Instruction* InstructionSelector::EmitWithContinuation(
+    InstructionCode opcode, FlagsContinuation* cont) {
+  return EmitWithContinuation(opcode, 0, nullptr, 0, nullptr, cont);
+}
+
+Instruction* InstructionSelector::EmitWithContinuation(
+    InstructionCode opcode, InstructionOperand a, FlagsContinuation* cont) {
+  return EmitWithContinuation(opcode, 0, nullptr, 1, &a, cont);
+}
+
+Instruction* InstructionSelector::EmitWithContinuation(
+    InstructionCode opcode, InstructionOperand a, InstructionOperand b,
+    FlagsContinuation* cont) {
+  InstructionOperand inputs[] = {a, b};
+  return EmitWithContinuation(opcode, 0, nullptr, arraysize(inputs), inputs,
+                              cont);
+}
+
+Instruction* InstructionSelector::EmitWithContinuation(
+    InstructionCode opcode, InstructionOperand a, InstructionOperand b,
+    InstructionOperand c, FlagsContinuation* cont) {
+  InstructionOperand inputs[] = {a, b, c};
+  return EmitWithContinuation(opcode, 0, nullptr, arraysize(inputs), inputs,
+                              cont);
+}
+
+Instruction* InstructionSelector::EmitWithContinuation(
+    InstructionCode opcode, size_t output_count, InstructionOperand* outputs,
+    size_t input_count, InstructionOperand* inputs, FlagsContinuation* cont) {
+  return EmitWithContinuation(opcode, output_count, outputs, input_count,
+                              inputs, 0, nullptr, cont);
+}
+
+Instruction* InstructionSelector::EmitWithContinuation(
+    InstructionCode opcode, size_t output_count, InstructionOperand* outputs,
+    size_t input_count, InstructionOperand* inputs, size_t temp_count,
+    InstructionOperand* temps, FlagsContinuation* cont) {
+  OperandGenerator g(this);
+
+  opcode = cont->Encode(opcode);
+
+  continuation_inputs_.resize(0);
+  for (size_t i = 0; i < input_count; i++) {
+    continuation_inputs_.push_back(inputs[i]);
+  }
+
+  continuation_outputs_.resize(0);
+  for (size_t i = 0; i < output_count; i++) {
+    continuation_outputs_.push_back(outputs[i]);
+  }
+
+  continuation_temps_.resize(0);
+  for (size_t i = 0; i < temp_count; i++) {
+    continuation_temps_.push_back(temps[i]);
+  }
+
+  if (cont->IsBranch()) {
+    continuation_inputs_.push_back(g.Label(cont->true_block()));
+    continuation_inputs_.push_back(g.Label(cont->false_block()));
+  } else if (cont->IsDeoptimize()) {
+    opcode |= MiscField::encode(static_cast<int>(input_count));
+    AppendDeoptimizeArguments(&continuation_inputs_, cont->kind(),
+                              cont->reason(), cont->feedback(),
+                              cont->frame_state());
+  } else if (cont->IsSet()) {
+    continuation_outputs_.push_back(g.DefineAsRegister(cont->result()));
+  } else if (cont->IsTrap()) {
+    int trap_id = static_cast<int>(cont->trap_id());
+    continuation_inputs_.push_back(g.UseImmediate(trap_id));
+  } else {
+    DCHECK(cont->IsNone());
+  }
+
+  size_t const emit_inputs_size = continuation_inputs_.size();
+  auto* emit_inputs =
+      emit_inputs_size ? &continuation_inputs_.front() : nullptr;
+  size_t const emit_outputs_size = continuation_outputs_.size();
+  auto* emit_outputs =
+      emit_outputs_size ? &continuation_outputs_.front() : nullptr;
+  size_t const emit_temps_size = continuation_temps_.size();
+  auto* emit_temps = emit_temps_size ? &continuation_temps_.front() : nullptr;
+  return Emit(opcode, emit_outputs_size, emit_outputs, emit_inputs_size,
+              emit_inputs, emit_temps_size, emit_temps);
+}
+
+void InstructionSelector::AppendDeoptimizeArguments(
+    InstructionOperandVector* args, DeoptimizeKind kind,
+    DeoptimizeReason reason, FeedbackSource const& feedback,
+    Node* frame_state) {
+  OperandGenerator g(this);
+  FrameStateDescriptor* const descriptor = GetFrameStateDescriptor(frame_state);
+  DCHECK_NE(DeoptimizeKind::kLazy, kind);
+  int const state_id =
+      sequence()->AddDeoptimizationEntry(descriptor, kind, reason, feedback);
+  args->push_back(g.TempImmediate(state_id));
+  StateObjectDeduplicator deduplicator(instruction_zone());
+  AddInputsToFrameStateDescriptor(descriptor, frame_state, &g, &deduplicator,
+                                  args, FrameStateInputKind::kAny,
+                                  instruction_zone());
+}
+
+// An internal helper class for generating the operands to calls.
+// TODO(bmeurer): Get rid of the CallBuffer business and make
+// InstructionSelector::VisitCall platform independent instead.
+struct CallBuffer {
+  CallBuffer(Zone* zone, const CallDescriptor* call_descriptor,
+             FrameStateDescriptor* frame_state)
+      : descriptor(call_descriptor),
+        frame_state_descriptor(frame_state),
+        output_nodes(zone),
+        outputs(zone),
+        instruction_args(zone),
+        pushed_nodes(zone) {
+    output_nodes.reserve(call_descriptor->ReturnCount());
+    outputs.reserve(call_descriptor->ReturnCount());
+    pushed_nodes.reserve(input_count());
+    instruction_args.reserve(input_count() + frame_state_value_count());
+  }
+
+  const CallDescriptor* descriptor;
+  FrameStateDescriptor* frame_state_descriptor;
+  ZoneVector<PushParameter> output_nodes;
+  InstructionOperandVector outputs;
+  InstructionOperandVector instruction_args;
+  ZoneVector<PushParameter> pushed_nodes;
+
+  size_t input_count() const { return descriptor->InputCount(); }
+
+  size_t frame_state_count() const { return descriptor->FrameStateCount(); }
+
+  size_t frame_state_value_count() const {
+    return (frame_state_descriptor == nullptr)
+               ? 0
+               : (frame_state_descriptor->GetTotalSize() +
+                  1);  // Include deopt id.
+  }
+};
+
+// TODO(bmeurer): Get rid of the CallBuffer business and make
+// InstructionSelector::VisitCall platform independent instead.
+void InstructionSelector::InitializeCallBuffer(Node* call, CallBuffer* buffer,
+                                               CallBufferFlags flags,
+                                               bool is_tail_call,
+                                               int stack_param_delta) {
+  OperandGenerator g(this);
+  size_t ret_count = buffer->descriptor->ReturnCount();
+  DCHECK_LE(call->op()->ValueOutputCount(), ret_count);
+  DCHECK_EQ(
+      call->op()->ValueInputCount(),
+      static_cast<int>(buffer->input_count() + buffer->frame_state_count()));
+
+  if (ret_count > 0) {
+    // Collect the projections that represent multiple outputs from this call.
+    if (ret_count == 1) {
+      PushParameter result = {call, buffer->descriptor->GetReturnLocation(0)};
+      buffer->output_nodes.push_back(result);
+    } else {
+      buffer->output_nodes.resize(ret_count);
+      int stack_count = 0;
+      for (size_t i = 0; i < ret_count; ++i) {
+        LinkageLocation location = buffer->descriptor->GetReturnLocation(i);
+        buffer->output_nodes[i] = PushParameter(nullptr, location);
+        if (location.IsCallerFrameSlot()) {
+          stack_count += location.GetSizeInPointers();
+        }
+      }
+      for (Edge const edge : call->use_edges()) {
+        if (!NodeProperties::IsValueEdge(edge)) continue;
+        Node* node = edge.from();
+        DCHECK_EQ(IrOpcode::kProjection, node->opcode());
+        size_t const index = ProjectionIndexOf(node->op());
+
+        DCHECK_LT(index, buffer->output_nodes.size());
+        DCHECK(!buffer->output_nodes[index].node);
+        buffer->output_nodes[index].node = node;
+      }
+      frame_->EnsureReturnSlots(stack_count);
+    }
+
+    // Filter out the outputs that aren't live because no projection uses them.
+    size_t outputs_needed_by_framestate =
+        buffer->frame_state_descriptor == nullptr
+            ? 0
+            : buffer->frame_state_descriptor->state_combine()
+                  .ConsumedOutputCount();
+    for (size_t i = 0; i < buffer->output_nodes.size(); i++) {
+      bool output_is_live = buffer->output_nodes[i].node != nullptr ||
+                            i < outputs_needed_by_framestate;
+      if (output_is_live) {
+        LinkageLocation location = buffer->output_nodes[i].location;
+        MachineRepresentation rep = location.GetType().representation();
+
+        Node* output = buffer->output_nodes[i].node;
+        InstructionOperand op = output == nullptr
+                                    ? g.TempLocation(location)
+                                    : g.DefineAsLocation(output, location);
+        MarkAsRepresentation(rep, op);
+
+        if (!UnallocatedOperand::cast(op).HasFixedSlotPolicy()) {
+          buffer->outputs.push_back(op);
+          buffer->output_nodes[i].node = nullptr;
+        }
+      }
+    }
+  }
+
+  // The first argument is always the callee code.
+  Node* callee = call->InputAt(0);
+  bool call_code_immediate = (flags & kCallCodeImmediate) != 0;
+  bool call_address_immediate = (flags & kCallAddressImmediate) != 0;
+  bool call_use_fixed_target_reg = (flags & kCallFixedTargetRegister) != 0;
+  switch (buffer->descriptor->kind()) {
+    case CallDescriptor::kCallCodeObject:
+      // TODO(jgruber, v8:7449): The below is a hack to support tail-calls from
+      // JS-linkage callers with a register code target. The problem is that the
+      // code target register may be clobbered before the final jmp by
+      // AssemblePopArgumentsAdaptorFrame. As a more permanent fix we could
+      // entirely remove support for tail-calls from JS-linkage callers.
+      buffer->instruction_args.push_back(
+          (call_code_immediate && callee->opcode() == IrOpcode::kHeapConstant)
+              ? g.UseImmediate(callee)
+              : call_use_fixed_target_reg
+                    ? g.UseFixed(callee, kJavaScriptCallCodeStartRegister)
+                    : is_tail_call ? g.UseUniqueRegister(callee)
+                                   : g.UseRegister(callee));
+      break;
+    case CallDescriptor::kCallAddress:
+      buffer->instruction_args.push_back(
+          (call_address_immediate &&
+           callee->opcode() == IrOpcode::kExternalConstant)
+              ? g.UseImmediate(callee)
+              : call_use_fixed_target_reg
+                    ? g.UseFixed(callee, kJavaScriptCallCodeStartRegister)
+                    : g.UseRegister(callee));
+      break;
+    case CallDescriptor::kCallWasmCapiFunction:
+    case CallDescriptor::kCallWasmFunction:
+    case CallDescriptor::kCallWasmImportWrapper:
+      buffer->instruction_args.push_back(
+          (call_address_immediate &&
+           (callee->opcode() == IrOpcode::kRelocatableInt64Constant ||
+            callee->opcode() == IrOpcode::kRelocatableInt32Constant))
+              ? g.UseImmediate(callee)
+              : call_use_fixed_target_reg
+                    ? g.UseFixed(callee, kJavaScriptCallCodeStartRegister)
+                    : g.UseRegister(callee));
+      break;
+    case CallDescriptor::kCallBuiltinPointer:
+      // The common case for builtin pointers is to have the target in a
+      // register. If we have a constant, we use a register anyway to simplify
+      // related code.
+      buffer->instruction_args.push_back(
+          call_use_fixed_target_reg
+              ? g.UseFixed(callee, kJavaScriptCallCodeStartRegister)
+              : g.UseRegister(callee));
+      break;
+    case CallDescriptor::kCallJSFunction:
+      buffer->instruction_args.push_back(
+          g.UseLocation(callee, buffer->descriptor->GetInputLocation(0)));
+      break;
+  }
+  DCHECK_EQ(1u, buffer->instruction_args.size());
+
+  // Argument 1 is used for poison-alias index (encoded in a word-sized
+  // immediate. This an index of the operand that aliases with poison register
+  // or -1 if there is no aliasing.
+  buffer->instruction_args.push_back(g.TempImmediate(-1));
+  const size_t poison_alias_index = 1;
+  DCHECK_EQ(buffer->instruction_args.size() - 1, poison_alias_index);
+
+  // If the call needs a frame state, we insert the state information as
+  // follows (n is the number of value inputs to the frame state):
+  // arg 2               : deoptimization id.
+  // arg 3 - arg (n + 2) : value inputs to the frame state.
+  size_t frame_state_entries = 0;
+  USE(frame_state_entries);  // frame_state_entries is only used for debug.
+  if (buffer->frame_state_descriptor != nullptr) {
+    Node* frame_state =
+        call->InputAt(static_cast<int>(buffer->descriptor->InputCount()));
+
+    // If it was a syntactic tail call we need to drop the current frame and
+    // all the frames on top of it that are either an arguments adaptor frame
+    // or a tail caller frame.
+    if (is_tail_call) {
+      frame_state = NodeProperties::GetFrameStateInput(frame_state);
+      buffer->frame_state_descriptor =
+          buffer->frame_state_descriptor->outer_state();
+      while (buffer->frame_state_descriptor != nullptr &&
+             buffer->frame_state_descriptor->type() ==
+                 FrameStateType::kArgumentsAdaptor) {
+        frame_state = NodeProperties::GetFrameStateInput(frame_state);
+        buffer->frame_state_descriptor =
+            buffer->frame_state_descriptor->outer_state();
+      }
+    }
+
+    int const state_id = sequence()->AddDeoptimizationEntry(
+        buffer->frame_state_descriptor, DeoptimizeKind::kLazy,
+        DeoptimizeReason::kUnknown, FeedbackSource());
+    buffer->instruction_args.push_back(g.TempImmediate(state_id));
+
+    StateObjectDeduplicator deduplicator(instruction_zone());
+
+    frame_state_entries =
+        1 + AddInputsToFrameStateDescriptor(
+                buffer->frame_state_descriptor, frame_state, &g, &deduplicator,
+                &buffer->instruction_args, FrameStateInputKind::kStackSlot,
+                instruction_zone());
+
+    DCHECK_EQ(2 + frame_state_entries, buffer->instruction_args.size());
+  }
+
+  size_t input_count = static_cast<size_t>(buffer->input_count());
+
+  // Split the arguments into pushed_nodes and instruction_args. Pushed
+  // arguments require an explicit push instruction before the call and do
+  // not appear as arguments to the call. Everything else ends up
+  // as an InstructionOperand argument to the call.
+  auto iter(call->inputs().begin());
+  size_t pushed_count = 0;
+  bool call_tail = (flags & kCallTail) != 0;
+  for (size_t index = 0; index < input_count; ++iter, ++index) {
+    DCHECK(iter != call->inputs().end());
+    DCHECK_NE(IrOpcode::kFrameState, (*iter)->op()->opcode());
+    if (index == 0) continue;  // The first argument (callee) is already done.
+
+    LinkageLocation location = buffer->descriptor->GetInputLocation(index);
+    if (call_tail) {
+      location = LinkageLocation::ConvertToTailCallerLocation(
+          location, stack_param_delta);
+    }
+    InstructionOperand op = g.UseLocation(*iter, location);
+    UnallocatedOperand unallocated = UnallocatedOperand::cast(op);
+    if (unallocated.HasFixedSlotPolicy() && !call_tail) {
+      int stack_index = buffer->descriptor->GetStackIndexFromSlot(
+          unallocated.fixed_slot_index());
+      // This can insert empty slots before stack_index and will insert enough
+      // slots after stack_index to store the parameter.
+      if (static_cast<size_t>(stack_index) >= buffer->pushed_nodes.size()) {
+        int num_slots = std::max(
+            1, (ElementSizeInBytes(location.GetType().representation()) /
+                kSystemPointerSize));
+        buffer->pushed_nodes.resize(stack_index + num_slots);
+      }
+      PushParameter param = {*iter, location};
+      buffer->pushed_nodes[stack_index] = param;
+      pushed_count++;
+    } else {
+      // If we do load poisoning and the linkage uses the poisoning register,
+      // then we request the input in memory location, and during code
+      // generation, we move the input to the register.
+      if (poisoning_level_ != PoisoningMitigationLevel::kDontPoison &&
+          unallocated.HasFixedRegisterPolicy()) {
+        int reg = unallocated.fixed_register_index();
+        if (Register::from_code(reg) == kSpeculationPoisonRegister) {
+          buffer->instruction_args[poison_alias_index] = g.TempImmediate(
+              static_cast<int32_t>(buffer->instruction_args.size()));
+          op = g.UseRegisterOrSlotOrConstant(*iter);
+        }
+      }
+      buffer->instruction_args.push_back(op);
+    }
+  }
+  DCHECK_EQ(input_count, buffer->instruction_args.size() + pushed_count -
+                             frame_state_entries - 1);
+  if (V8_TARGET_ARCH_STORES_RETURN_ADDRESS_ON_STACK && call_tail &&
+      stack_param_delta != 0) {
+    // For tail calls that change the size of their parameter list and keep
+    // their return address on the stack, move the return address to just above
+    // the parameters.
+    LinkageLocation saved_return_location =
+        LinkageLocation::ForSavedCallerReturnAddress();
+    InstructionOperand return_address =
+        g.UsePointerLocation(LinkageLocation::ConvertToTailCallerLocation(
+                                 saved_return_location, stack_param_delta),
+                             saved_return_location);
+    buffer->instruction_args.push_back(return_address);
+  }
+}
+
+bool InstructionSelector::IsSourcePositionUsed(Node* node) {
+  return (source_position_mode_ == kAllSourcePositions ||
+          node->opcode() == IrOpcode::kCall ||
+          node->opcode() == IrOpcode::kTrapIf ||
+          node->opcode() == IrOpcode::kTrapUnless ||
+          node->opcode() == IrOpcode::kProtectedLoad ||
+          node->opcode() == IrOpcode::kProtectedStore);
+}
+
+void InstructionSelector::VisitBlock(BasicBlock* block) {
+  DCHECK(!current_block_);
+  current_block_ = block;
+  auto current_num_instructions = [&] {
+    DCHECK_GE(kMaxInt, instructions_.size());
+    return static_cast<int>(instructions_.size());
+  };
+  int current_block_end = current_num_instructions();
+
+  int effect_level = 0;
+  for (Node* const node : *block) {
+    SetEffectLevel(node, effect_level);
+    if (node->opcode() == IrOpcode::kStore ||
+        node->opcode() == IrOpcode::kUnalignedStore ||
+        node->opcode() == IrOpcode::kCall ||
+        node->opcode() == IrOpcode::kProtectedLoad ||
+        node->opcode() == IrOpcode::kProtectedStore ||
+        node->opcode() == IrOpcode::kLoadTransform ||
+#define ADD_EFFECT_FOR_ATOMIC_OP(Opcode) \
+  node->opcode() == IrOpcode::k##Opcode ||
+        MACHINE_ATOMIC_OP_LIST(ADD_EFFECT_FOR_ATOMIC_OP)
+#undef ADD_EFFECT_FOR_ATOMIC_OP
+                node->opcode() == IrOpcode::kMemoryBarrier) {
+      ++effect_level;
+    }
+  }
+
+  // We visit the control first, then the nodes in the block, so the block's
+  // control input should be on the same effect level as the last node.
+  if (block->control_input() != nullptr) {
+    SetEffectLevel(block->control_input(), effect_level);
+  }
+
+  auto FinishEmittedInstructions = [&](Node* node, int instruction_start) {
+    if (instruction_selection_failed()) return false;
+    if (current_num_instructions() == instruction_start) return true;
+    std::reverse(instructions_.begin() + instruction_start,
+                 instructions_.end());
+    if (!node) return true;
+    if (!source_positions_) return true;
+    SourcePosition source_position = source_positions_->GetSourcePosition(node);
+    if (source_position.IsKnown() && IsSourcePositionUsed(node)) {
+      sequence()->SetSourcePosition(instructions_[instruction_start],
+                                    source_position);
+    }
+    return true;
+  };
+
+  // Generate code for the block control "top down", but schedule the code
+  // "bottom up".
+  VisitControl(block);
+  if (!FinishEmittedInstructions(block->control_input(), current_block_end))
+    return;
+
+  // Visit code in reverse control flow order, because architecture-specific
+  // matching may cover more than one node at a time.
+  for (auto node : base::Reversed(*block)) {
+    int current_node_end = current_num_instructions();
+    // Skip nodes that are unused or already defined.
+    if (IsUsed(node) && !IsDefined(node)) {
+      // Generate code for this node "top down", but schedule the code "bottom
+      // up".
+      VisitNode(node);
+      if (!FinishEmittedInstructions(node, current_node_end)) return;
+    }
+    if (trace_turbo_ == kEnableTraceTurboJson) {
+      instr_origins_[node->id()] = {current_num_instructions(),
+                                    current_node_end};
+    }
+  }
+
+  // We're done with the block.
+  InstructionBlock* instruction_block =
+      sequence()->InstructionBlockAt(RpoNumber::FromInt(block->rpo_number()));
+  if (current_num_instructions() == current_block_end) {
+    // Avoid empty block: insert a {kArchNop} instruction.
+    Emit(Instruction::New(sequence()->zone(), kArchNop));
+  }
+  instruction_block->set_code_start(current_num_instructions());
+  instruction_block->set_code_end(current_block_end);
+  current_block_ = nullptr;
+}
+
+void InstructionSelector::VisitControl(BasicBlock* block) {
+#ifdef DEBUG
+  // SSA deconstruction requires targets of branches not to have phis.
+  // Edge split form guarantees this property, but is more strict.
+  if (block->SuccessorCount() > 1) {
+    for (BasicBlock* const successor : block->successors()) {
+      for (Node* const node : *successor) {
+        if (IrOpcode::IsPhiOpcode(node->opcode())) {
+          std::ostringstream str;
+          str << "You might have specified merged variables for a label with "
+              << "only one predecessor." << std::endl
+              << "# Current Block: " << *successor << std::endl
+              << "#          Node: " << *node;
+          FATAL("%s", str.str().c_str());
+        }
+      }
+    }
+  }
+#endif
+
+  Node* input = block->control_input();
+  int instruction_end = static_cast<int>(instructions_.size());
+  switch (block->control()) {
+    case BasicBlock::kGoto:
+      VisitGoto(block->SuccessorAt(0));
+      break;
+    case BasicBlock::kCall: {
+      DCHECK_EQ(IrOpcode::kCall, input->opcode());
+      BasicBlock* success = block->SuccessorAt(0);
+      BasicBlock* exception = block->SuccessorAt(1);
+      VisitCall(input, exception);
+      VisitGoto(success);
+      break;
+    }
+    case BasicBlock::kTailCall: {
+      DCHECK_EQ(IrOpcode::kTailCall, input->opcode());
+      VisitTailCall(input);
+      break;
+    }
+    case BasicBlock::kBranch: {
+      DCHECK_EQ(IrOpcode::kBranch, input->opcode());
+      BasicBlock* tbranch = block->SuccessorAt(0);
+      BasicBlock* fbranch = block->SuccessorAt(1);
+      if (tbranch == fbranch) {
+        VisitGoto(tbranch);
+      } else {
+        VisitBranch(input, tbranch, fbranch);
+      }
+      break;
+    }
+    case BasicBlock::kSwitch: {
+      DCHECK_EQ(IrOpcode::kSwitch, input->opcode());
+      // Last successor must be {IfDefault}.
+      BasicBlock* default_branch = block->successors().back();
+      DCHECK_EQ(IrOpcode::kIfDefault, default_branch->front()->opcode());
+      // All other successors must be {IfValue}s.
+      int32_t min_value = std::numeric_limits<int32_t>::max();
+      int32_t max_value = std::numeric_limits<int32_t>::min();
+      size_t case_count = block->SuccessorCount() - 1;
+      ZoneVector<CaseInfo> cases(case_count, zone());
+      for (size_t i = 0; i < case_count; ++i) {
+        BasicBlock* branch = block->SuccessorAt(i);
+        const IfValueParameters& p = IfValueParametersOf(branch->front()->op());
+        cases[i] = CaseInfo{p.value(), p.comparison_order(), branch};
+        if (min_value > p.value()) min_value = p.value();
+        if (max_value < p.value()) max_value = p.value();
+      }
+      SwitchInfo sw(cases, min_value, max_value, default_branch);
+      VisitSwitch(input, sw);
+      break;
+    }
+    case BasicBlock::kReturn: {
+      DCHECK_EQ(IrOpcode::kReturn, input->opcode());
+      VisitReturn(input);
+      break;
+    }
+    case BasicBlock::kDeoptimize: {
+      DeoptimizeParameters p = DeoptimizeParametersOf(input->op());
+      Node* value = input->InputAt(0);
+      VisitDeoptimize(p.kind(), p.reason(), p.feedback(), value);
+      break;
+    }
+    case BasicBlock::kThrow:
+      DCHECK_EQ(IrOpcode::kThrow, input->opcode());
+      VisitThrow(input);
+      break;
+    case BasicBlock::kNone: {
+      // Exit block doesn't have control.
+      DCHECK_NULL(input);
+      break;
+    }
+    default:
+      UNREACHABLE();
+  }
+  if (trace_turbo_ == kEnableTraceTurboJson && input) {
+    int instruction_start = static_cast<int>(instructions_.size());
+    instr_origins_[input->id()] = {instruction_start, instruction_end};
+  }
+}
+
+void InstructionSelector::MarkPairProjectionsAsWord32(Node* node) {
+  Node* projection0 = NodeProperties::FindProjection(node, 0);
+  if (projection0) {
+    MarkAsWord32(projection0);
+  }
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  if (projection1) {
+    MarkAsWord32(projection1);
+  }
+}
+
+void InstructionSelector::VisitNode(Node* node) {
+  tick_counter_->TickAndMaybeEnterSafepoint();
+  DCHECK_NOT_NULL(schedule()->block(node));  // should only use scheduled nodes.
+  switch (node->opcode()) {
+    case IrOpcode::kStart:
+    case IrOpcode::kLoop:
+    case IrOpcode::kEnd:
+    case IrOpcode::kBranch:
+    case IrOpcode::kIfTrue:
+    case IrOpcode::kIfFalse:
+    case IrOpcode::kIfSuccess:
+    case IrOpcode::kSwitch:
+    case IrOpcode::kIfValue:
+    case IrOpcode::kIfDefault:
+    case IrOpcode::kEffectPhi:
+    case IrOpcode::kMerge:
+    case IrOpcode::kTerminate:
+    case IrOpcode::kBeginRegion:
+      // No code needed for these graph artifacts.
+      return;
+    case IrOpcode::kIfException:
+      return MarkAsTagged(node), VisitIfException(node);
+    case IrOpcode::kFinishRegion:
+      return MarkAsTagged(node), VisitFinishRegion(node);
+    case IrOpcode::kParameter: {
+      // Parameters should always be scheduled to the first block.
+      DCHECK_EQ(schedule()->block(node)->rpo_number(), 0);
+      MachineType type =
+          linkage()->GetParameterType(ParameterIndexOf(node->op()));
+      MarkAsRepresentation(type.representation(), node);
+      return VisitParameter(node);
+    }
+    case IrOpcode::kOsrValue:
+      return MarkAsTagged(node), VisitOsrValue(node);
+    case IrOpcode::kPhi: {
+      MachineRepresentation rep = PhiRepresentationOf(node->op());
+      if (rep == MachineRepresentation::kNone) return;
+      MarkAsRepresentation(rep, node);
+      return VisitPhi(node);
+    }
+    case IrOpcode::kProjection:
+      return VisitProjection(node);
+    case IrOpcode::kInt32Constant:
+    case IrOpcode::kInt64Constant:
+    case IrOpcode::kTaggedIndexConstant:
+    case IrOpcode::kExternalConstant:
+    case IrOpcode::kRelocatableInt32Constant:
+    case IrOpcode::kRelocatableInt64Constant:
+      return VisitConstant(node);
+    case IrOpcode::kFloat32Constant:
+      return MarkAsFloat32(node), VisitConstant(node);
+    case IrOpcode::kFloat64Constant:
+      return MarkAsFloat64(node), VisitConstant(node);
+    case IrOpcode::kHeapConstant:
+      return MarkAsTagged(node), VisitConstant(node);
+    case IrOpcode::kCompressedHeapConstant:
+      return MarkAsCompressed(node), VisitConstant(node);
+    case IrOpcode::kNumberConstant: {
+      double value = OpParameter<double>(node->op());
+      if (!IsSmiDouble(value)) MarkAsTagged(node);
+      return VisitConstant(node);
+    }
+    case IrOpcode::kDelayedStringConstant:
+      return MarkAsTagged(node), VisitConstant(node);
+    case IrOpcode::kCall:
+      return VisitCall(node);
+    case IrOpcode::kDeoptimizeIf:
+      return VisitDeoptimizeIf(node);
+    case IrOpcode::kDeoptimizeUnless:
+      return VisitDeoptimizeUnless(node);
+    case IrOpcode::kTrapIf:
+      return VisitTrapIf(node, TrapIdOf(node->op()));
+    case IrOpcode::kTrapUnless:
+      return VisitTrapUnless(node, TrapIdOf(node->op()));
+    case IrOpcode::kFrameState:
+    case IrOpcode::kStateValues:
+    case IrOpcode::kObjectState:
+      return;
+    case IrOpcode::kAbortCSAAssert:
+      VisitAbortCSAAssert(node);
+      return;
+    case IrOpcode::kDebugBreak:
+      VisitDebugBreak(node);
+      return;
+    case IrOpcode::kUnreachable:
+      VisitUnreachable(node);
+      return;
+    case IrOpcode::kStaticAssert:
+      VisitStaticAssert(node);
+      return;
+    case IrOpcode::kDeadValue:
+      VisitDeadValue(node);
+      return;
+    case IrOpcode::kComment:
+      VisitComment(node);
+      return;
+    case IrOpcode::kRetain:
+      VisitRetain(node);
+      return;
+    case IrOpcode::kLoad: {
+      LoadRepresentation type = LoadRepresentationOf(node->op());
+      MarkAsRepresentation(type.representation(), node);
+      return VisitLoad(node);
+    }
+    case IrOpcode::kLoadTransform: {
+      MarkAsRepresentation(MachineRepresentation::kSimd128, node);
+      return VisitLoadTransform(node);
+    }
+    case IrOpcode::kLoadLane: {
+      MarkAsRepresentation(MachineRepresentation::kSimd128, node);
+      return VisitLoadLane(node);
+    }
+    case IrOpcode::kPoisonedLoad: {
+      LoadRepresentation type = LoadRepresentationOf(node->op());
+      MarkAsRepresentation(type.representation(), node);
+      return VisitPoisonedLoad(node);
+    }
+    case IrOpcode::kStore:
+      return VisitStore(node);
+    case IrOpcode::kProtectedStore:
+      return VisitProtectedStore(node);
+    case IrOpcode::kStoreLane: {
+      MarkAsRepresentation(MachineRepresentation::kSimd128, node);
+      return VisitStoreLane(node);
+    }
+    case IrOpcode::kWord32And:
+      return MarkAsWord32(node), VisitWord32And(node);
+    case IrOpcode::kWord32Or:
+      return MarkAsWord32(node), VisitWord32Or(node);
+    case IrOpcode::kWord32Xor:
+      return MarkAsWord32(node), VisitWord32Xor(node);
+    case IrOpcode::kWord32Shl:
+      return MarkAsWord32(node), VisitWord32Shl(node);
+    case IrOpcode::kWord32Shr:
+      return MarkAsWord32(node), VisitWord32Shr(node);
+    case IrOpcode::kWord32Sar:
+      return MarkAsWord32(node), VisitWord32Sar(node);
+    case IrOpcode::kWord32Rol:
+      return MarkAsWord32(node), VisitWord32Rol(node);
+    case IrOpcode::kWord32Ror:
+      return MarkAsWord32(node), VisitWord32Ror(node);
+    case IrOpcode::kWord32Equal:
+      return VisitWord32Equal(node);
+    case IrOpcode::kWord32Clz:
+      return MarkAsWord32(node), VisitWord32Clz(node);
+    case IrOpcode::kWord32Ctz:
+      return MarkAsWord32(node), VisitWord32Ctz(node);
+    case IrOpcode::kWord32ReverseBits:
+      return MarkAsWord32(node), VisitWord32ReverseBits(node);
+    case IrOpcode::kWord32ReverseBytes:
+      return MarkAsWord32(node), VisitWord32ReverseBytes(node);
+    case IrOpcode::kInt32AbsWithOverflow:
+      return MarkAsWord32(node), VisitInt32AbsWithOverflow(node);
+    case IrOpcode::kWord32Popcnt:
+      return MarkAsWord32(node), VisitWord32Popcnt(node);
+    case IrOpcode::kWord64Popcnt:
+      return MarkAsWord32(node), VisitWord64Popcnt(node);
+    case IrOpcode::kWord64And:
+      return MarkAsWord64(node), VisitWord64And(node);
+    case IrOpcode::kWord64Or:
+      return MarkAsWord64(node), VisitWord64Or(node);
+    case IrOpcode::kWord64Xor:
+      return MarkAsWord64(node), VisitWord64Xor(node);
+    case IrOpcode::kWord64Shl:
+      return MarkAsWord64(node), VisitWord64Shl(node);
+    case IrOpcode::kWord64Shr:
+      return MarkAsWord64(node), VisitWord64Shr(node);
+    case IrOpcode::kWord64Sar:
+      return MarkAsWord64(node), VisitWord64Sar(node);
+    case IrOpcode::kWord64Rol:
+      return MarkAsWord64(node), VisitWord64Rol(node);
+    case IrOpcode::kWord64Ror:
+      return MarkAsWord64(node), VisitWord64Ror(node);
+    case IrOpcode::kWord64Clz:
+      return MarkAsWord64(node), VisitWord64Clz(node);
+    case IrOpcode::kWord64Ctz:
+      return MarkAsWord64(node), VisitWord64Ctz(node);
+    case IrOpcode::kWord64ReverseBits:
+      return MarkAsWord64(node), VisitWord64ReverseBits(node);
+    case IrOpcode::kWord64ReverseBytes:
+      return MarkAsWord64(node), VisitWord64ReverseBytes(node);
+    case IrOpcode::kSimd128ReverseBytes:
+      return MarkAsSimd128(node), VisitSimd128ReverseBytes(node);
+    case IrOpcode::kInt64AbsWithOverflow:
+      return MarkAsWord64(node), VisitInt64AbsWithOverflow(node);
+    case IrOpcode::kWord64Equal:
+      return VisitWord64Equal(node);
+    case IrOpcode::kInt32Add:
+      return MarkAsWord32(node), VisitInt32Add(node);
+    case IrOpcode::kInt32AddWithOverflow:
+      return MarkAsWord32(node), VisitInt32AddWithOverflow(node);
+    case IrOpcode::kInt32Sub:
+      return MarkAsWord32(node), VisitInt32Sub(node);
+    case IrOpcode::kInt32SubWithOverflow:
+      return VisitInt32SubWithOverflow(node);
+    case IrOpcode::kInt32Mul:
+      return MarkAsWord32(node), VisitInt32Mul(node);
+    case IrOpcode::kInt32MulWithOverflow:
+      return MarkAsWord32(node), VisitInt32MulWithOverflow(node);
+    case IrOpcode::kInt32MulHigh:
+      return VisitInt32MulHigh(node);
+    case IrOpcode::kInt32Div:
+      return MarkAsWord32(node), VisitInt32Div(node);
+    case IrOpcode::kInt32Mod:
+      return MarkAsWord32(node), VisitInt32Mod(node);
+    case IrOpcode::kInt32LessThan:
+      return VisitInt32LessThan(node);
+    case IrOpcode::kInt32LessThanOrEqual:
+      return VisitInt32LessThanOrEqual(node);
+    case IrOpcode::kUint32Div:
+      return MarkAsWord32(node), VisitUint32Div(node);
+    case IrOpcode::kUint32LessThan:
+      return VisitUint32LessThan(node);
+    case IrOpcode::kUint32LessThanOrEqual:
+      return VisitUint32LessThanOrEqual(node);
+    case IrOpcode::kUint32Mod:
+      return MarkAsWord32(node), VisitUint32Mod(node);
+    case IrOpcode::kUint32MulHigh:
+      return VisitUint32MulHigh(node);
+    case IrOpcode::kInt64Add:
+      return MarkAsWord64(node), VisitInt64Add(node);
+    case IrOpcode::kInt64AddWithOverflow:
+      return MarkAsWord64(node), VisitInt64AddWithOverflow(node);
+    case IrOpcode::kInt64Sub:
+      return MarkAsWord64(node), VisitInt64Sub(node);
+    case IrOpcode::kInt64SubWithOverflow:
+      return MarkAsWord64(node), VisitInt64SubWithOverflow(node);
+    case IrOpcode::kInt64Mul:
+      return MarkAsWord64(node), VisitInt64Mul(node);
+    case IrOpcode::kInt64Div:
+      return MarkAsWord64(node), VisitInt64Div(node);
+    case IrOpcode::kInt64Mod:
+      return MarkAsWord64(node), VisitInt64Mod(node);
+    case IrOpcode::kInt64LessThan:
+      return VisitInt64LessThan(node);
+    case IrOpcode::kInt64LessThanOrEqual:
+      return VisitInt64LessThanOrEqual(node);
+    case IrOpcode::kUint64Div:
+      return MarkAsWord64(node), VisitUint64Div(node);
+    case IrOpcode::kUint64LessThan:
+      return VisitUint64LessThan(node);
+    case IrOpcode::kUint64LessThanOrEqual:
+      return VisitUint64LessThanOrEqual(node);
+    case IrOpcode::kUint64Mod:
+      return MarkAsWord64(node), VisitUint64Mod(node);
+    case IrOpcode::kBitcastTaggedToWord:
+    case IrOpcode::kBitcastTaggedToWordForTagAndSmiBits:
+      return MarkAsRepresentation(MachineType::PointerRepresentation(), node),
+             VisitBitcastTaggedToWord(node);
+    case IrOpcode::kBitcastWordToTagged:
+      return MarkAsTagged(node), VisitBitcastWordToTagged(node);
+    case IrOpcode::kBitcastWordToTaggedSigned:
+      return MarkAsRepresentation(MachineRepresentation::kTaggedSigned, node),
+             EmitIdentity(node);
+    case IrOpcode::kChangeFloat32ToFloat64:
+      return MarkAsFloat64(node), VisitChangeFloat32ToFloat64(node);
+    case IrOpcode::kChangeInt32ToFloat64:
+      return MarkAsFloat64(node), VisitChangeInt32ToFloat64(node);
+    case IrOpcode::kChangeInt64ToFloat64:
+      return MarkAsFloat64(node), VisitChangeInt64ToFloat64(node);
+    case IrOpcode::kChangeUint32ToFloat64:
+      return MarkAsFloat64(node), VisitChangeUint32ToFloat64(node);
+    case IrOpcode::kChangeFloat64ToInt32:
+      return MarkAsWord32(node), VisitChangeFloat64ToInt32(node);
+    case IrOpcode::kChangeFloat64ToInt64:
+      return MarkAsWord64(node), VisitChangeFloat64ToInt64(node);
+    case IrOpcode::kChangeFloat64ToUint32:
+      return MarkAsWord32(node), VisitChangeFloat64ToUint32(node);
+    case IrOpcode::kChangeFloat64ToUint64:
+      return MarkAsWord64(node), VisitChangeFloat64ToUint64(node);
+    case IrOpcode::kFloat64SilenceNaN:
+      MarkAsFloat64(node);
+      if (CanProduceSignalingNaN(node->InputAt(0))) {
+        return VisitFloat64SilenceNaN(node);
+      } else {
+        return EmitIdentity(node);
+      }
+    case IrOpcode::kTruncateFloat64ToInt64:
+      return MarkAsWord64(node), VisitTruncateFloat64ToInt64(node);
+    case IrOpcode::kTruncateFloat64ToUint32:
+      return MarkAsWord32(node), VisitTruncateFloat64ToUint32(node);
+    case IrOpcode::kTruncateFloat32ToInt32:
+      return MarkAsWord32(node), VisitTruncateFloat32ToInt32(node);
+    case IrOpcode::kTruncateFloat32ToUint32:
+      return MarkAsWord32(node), VisitTruncateFloat32ToUint32(node);
+    case IrOpcode::kTryTruncateFloat32ToInt64:
+      return MarkAsWord64(node), VisitTryTruncateFloat32ToInt64(node);
+    case IrOpcode::kTryTruncateFloat64ToInt64:
+      return MarkAsWord64(node), VisitTryTruncateFloat64ToInt64(node);
+    case IrOpcode::kTryTruncateFloat32ToUint64:
+      return MarkAsWord64(node), VisitTryTruncateFloat32ToUint64(node);
+    case IrOpcode::kTryTruncateFloat64ToUint64:
+      return MarkAsWord64(node), VisitTryTruncateFloat64ToUint64(node);
+    case IrOpcode::kBitcastWord32ToWord64:
+      return MarkAsWord64(node), VisitBitcastWord32ToWord64(node);
+    case IrOpcode::kChangeInt32ToInt64:
+      return MarkAsWord64(node), VisitChangeInt32ToInt64(node);
+    case IrOpcode::kChangeUint32ToUint64:
+      return MarkAsWord64(node), VisitChangeUint32ToUint64(node);
+    case IrOpcode::kTruncateFloat64ToFloat32:
+      return MarkAsFloat32(node), VisitTruncateFloat64ToFloat32(node);
+    case IrOpcode::kTruncateFloat64ToWord32:
+      return MarkAsWord32(node), VisitTruncateFloat64ToWord32(node);
+    case IrOpcode::kTruncateInt64ToInt32:
+      return MarkAsWord32(node), VisitTruncateInt64ToInt32(node);
+    case IrOpcode::kRoundFloat64ToInt32:
+      return MarkAsWord32(node), VisitRoundFloat64ToInt32(node);
+    case IrOpcode::kRoundInt64ToFloat32:
+      return MarkAsFloat32(node), VisitRoundInt64ToFloat32(node);
+    case IrOpcode::kRoundInt32ToFloat32:
+      return MarkAsFloat32(node), VisitRoundInt32ToFloat32(node);
+    case IrOpcode::kRoundInt64ToFloat64:
+      return MarkAsFloat64(node), VisitRoundInt64ToFloat64(node);
+    case IrOpcode::kBitcastFloat32ToInt32:
+      return MarkAsWord32(node), VisitBitcastFloat32ToInt32(node);
+    case IrOpcode::kRoundUint32ToFloat32:
+      return MarkAsFloat32(node), VisitRoundUint32ToFloat32(node);
+    case IrOpcode::kRoundUint64ToFloat32:
+      return MarkAsFloat64(node), VisitRoundUint64ToFloat32(node);
+    case IrOpcode::kRoundUint64ToFloat64:
+      return MarkAsFloat64(node), VisitRoundUint64ToFloat64(node);
+    case IrOpcode::kBitcastFloat64ToInt64:
+      return MarkAsWord64(node), VisitBitcastFloat64ToInt64(node);
+    case IrOpcode::kBitcastInt32ToFloat32:
+      return MarkAsFloat32(node), VisitBitcastInt32ToFloat32(node);
+    case IrOpcode::kBitcastInt64ToFloat64:
+      return MarkAsFloat64(node), VisitBitcastInt64ToFloat64(node);
+    case IrOpcode::kFloat32Add:
+      return MarkAsFloat32(node), VisitFloat32Add(node);
+    case IrOpcode::kFloat32Sub:
+      return MarkAsFloat32(node), VisitFloat32Sub(node);
+    case IrOpcode::kFloat32Neg:
+      return MarkAsFloat32(node), VisitFloat32Neg(node);
+    case IrOpcode::kFloat32Mul:
+      return MarkAsFloat32(node), VisitFloat32Mul(node);
+    case IrOpcode::kFloat32Div:
+      return MarkAsFloat32(node), VisitFloat32Div(node);
+    case IrOpcode::kFloat32Abs:
+      return MarkAsFloat32(node), VisitFloat32Abs(node);
+    case IrOpcode::kFloat32Sqrt:
+      return MarkAsFloat32(node), VisitFloat32Sqrt(node);
+    case IrOpcode::kFloat32Equal:
+      return VisitFloat32Equal(node);
+    case IrOpcode::kFloat32LessThan:
+      return VisitFloat32LessThan(node);
+    case IrOpcode::kFloat32LessThanOrEqual:
+      return VisitFloat32LessThanOrEqual(node);
+    case IrOpcode::kFloat32Max:
+      return MarkAsFloat32(node), VisitFloat32Max(node);
+    case IrOpcode::kFloat32Min:
+      return MarkAsFloat32(node), VisitFloat32Min(node);
+    case IrOpcode::kFloat64Add:
+      return MarkAsFloat64(node), VisitFloat64Add(node);
+    case IrOpcode::kFloat64Sub:
+      return MarkAsFloat64(node), VisitFloat64Sub(node);
+    case IrOpcode::kFloat64Neg:
+      return MarkAsFloat64(node), VisitFloat64Neg(node);
+    case IrOpcode::kFloat64Mul:
+      return MarkAsFloat64(node), VisitFloat64Mul(node);
+    case IrOpcode::kFloat64Div:
+      return MarkAsFloat64(node), VisitFloat64Div(node);
+    case IrOpcode::kFloat64Mod:
+      return MarkAsFloat64(node), VisitFloat64Mod(node);
+    case IrOpcode::kFloat64Min:
+      return MarkAsFloat64(node), VisitFloat64Min(node);
+    case IrOpcode::kFloat64Max:
+      return MarkAsFloat64(node), VisitFloat64Max(node);
+    case IrOpcode::kFloat64Abs:
+      return MarkAsFloat64(node), VisitFloat64Abs(node);
+    case IrOpcode::kFloat64Acos:
+      return MarkAsFloat64(node), VisitFloat64Acos(node);
+    case IrOpcode::kFloat64Acosh:
+      return MarkAsFloat64(node), VisitFloat64Acosh(node);
+    case IrOpcode::kFloat64Asin:
+      return MarkAsFloat64(node), VisitFloat64Asin(node);
+    case IrOpcode::kFloat64Asinh:
+      return MarkAsFloat64(node), VisitFloat64Asinh(node);
+    case IrOpcode::kFloat64Atan:
+      return MarkAsFloat64(node), VisitFloat64Atan(node);
+    case IrOpcode::kFloat64Atanh:
+      return MarkAsFloat64(node), VisitFloat64Atanh(node);
+    case IrOpcode::kFloat64Atan2:
+      return MarkAsFloat64(node), VisitFloat64Atan2(node);
+    case IrOpcode::kFloat64Cbrt:
+      return MarkAsFloat64(node), VisitFloat64Cbrt(node);
+    case IrOpcode::kFloat64Cos:
+      return MarkAsFloat64(node), VisitFloat64Cos(node);
+    case IrOpcode::kFloat64Cosh:
+      return MarkAsFloat64(node), VisitFloat64Cosh(node);
+    case IrOpcode::kFloat64Exp:
+      return MarkAsFloat64(node), VisitFloat64Exp(node);
+    case IrOpcode::kFloat64Expm1:
+      return MarkAsFloat64(node), VisitFloat64Expm1(node);
+    case IrOpcode::kFloat64Log:
+      return MarkAsFloat64(node), VisitFloat64Log(node);
+    case IrOpcode::kFloat64Log1p:
+      return MarkAsFloat64(node), VisitFloat64Log1p(node);
+    case IrOpcode::kFloat64Log10:
+      return MarkAsFloat64(node), VisitFloat64Log10(node);
+    case IrOpcode::kFloat64Log2:
+      return MarkAsFloat64(node), VisitFloat64Log2(node);
+    case IrOpcode::kFloat64Pow:
+      return MarkAsFloat64(node), VisitFloat64Pow(node);
+    case IrOpcode::kFloat64Sin:
+      return MarkAsFloat64(node), VisitFloat64Sin(node);
+    case IrOpcode::kFloat64Sinh:
+      return MarkAsFloat64(node), VisitFloat64Sinh(node);
+    case IrOpcode::kFloat64Sqrt:
+      return MarkAsFloat64(node), VisitFloat64Sqrt(node);
+    case IrOpcode::kFloat64Tan:
+      return MarkAsFloat64(node), VisitFloat64Tan(node);
+    case IrOpcode::kFloat64Tanh:
+      return MarkAsFloat64(node), VisitFloat64Tanh(node);
+    case IrOpcode::kFloat64Equal:
+      return VisitFloat64Equal(node);
+    case IrOpcode::kFloat64LessThan:
+      return VisitFloat64LessThan(node);
+    case IrOpcode::kFloat64LessThanOrEqual:
+      return VisitFloat64LessThanOrEqual(node);
+    case IrOpcode::kFloat32RoundDown:
+      return MarkAsFloat32(node), VisitFloat32RoundDown(node);
+    case IrOpcode::kFloat64RoundDown:
+      return MarkAsFloat64(node), VisitFloat64RoundDown(node);
+    case IrOpcode::kFloat32RoundUp:
+      return MarkAsFloat32(node), VisitFloat32RoundUp(node);
+    case IrOpcode::kFloat64RoundUp:
+      return MarkAsFloat64(node), VisitFloat64RoundUp(node);
+    case IrOpcode::kFloat32RoundTruncate:
+      return MarkAsFloat32(node), VisitFloat32RoundTruncate(node);
+    case IrOpcode::kFloat64RoundTruncate:
+      return MarkAsFloat64(node), VisitFloat64RoundTruncate(node);
+    case IrOpcode::kFloat64RoundTiesAway:
+      return MarkAsFloat64(node), VisitFloat64RoundTiesAway(node);
+    case IrOpcode::kFloat32RoundTiesEven:
+      return MarkAsFloat32(node), VisitFloat32RoundTiesEven(node);
+    case IrOpcode::kFloat64RoundTiesEven:
+      return MarkAsFloat64(node), VisitFloat64RoundTiesEven(node);
+    case IrOpcode::kFloat64ExtractLowWord32:
+      return MarkAsWord32(node), VisitFloat64ExtractLowWord32(node);
+    case IrOpcode::kFloat64ExtractHighWord32:
+      return MarkAsWord32(node), VisitFloat64ExtractHighWord32(node);
+    case IrOpcode::kFloat64InsertLowWord32:
+      return MarkAsFloat64(node), VisitFloat64InsertLowWord32(node);
+    case IrOpcode::kFloat64InsertHighWord32:
+      return MarkAsFloat64(node), VisitFloat64InsertHighWord32(node);
+    case IrOpcode::kTaggedPoisonOnSpeculation:
+      return MarkAsTagged(node), VisitTaggedPoisonOnSpeculation(node);
+    case IrOpcode::kWord32PoisonOnSpeculation:
+      return MarkAsWord32(node), VisitWord32PoisonOnSpeculation(node);
+    case IrOpcode::kWord64PoisonOnSpeculation:
+      return MarkAsWord64(node), VisitWord64PoisonOnSpeculation(node);
+    case IrOpcode::kStackSlot:
+      return VisitStackSlot(node);
+    case IrOpcode::kStackPointerGreaterThan:
+      return VisitStackPointerGreaterThan(node);
+    case IrOpcode::kLoadStackCheckOffset:
+      return VisitLoadStackCheckOffset(node);
+    case IrOpcode::kLoadFramePointer:
+      return VisitLoadFramePointer(node);
+    case IrOpcode::kLoadParentFramePointer:
+      return VisitLoadParentFramePointer(node);
+    case IrOpcode::kUnalignedLoad: {
+      LoadRepresentation type = LoadRepresentationOf(node->op());
+      MarkAsRepresentation(type.representation(), node);
+      return VisitUnalignedLoad(node);
+    }
+    case IrOpcode::kUnalignedStore:
+      return VisitUnalignedStore(node);
+    case IrOpcode::kInt32PairAdd:
+      MarkAsWord32(node);
+      MarkPairProjectionsAsWord32(node);
+      return VisitInt32PairAdd(node);
+    case IrOpcode::kInt32PairSub:
+      MarkAsWord32(node);
+      MarkPairProjectionsAsWord32(node);
+      return VisitInt32PairSub(node);
+    case IrOpcode::kInt32PairMul:
+      MarkAsWord32(node);
+      MarkPairProjectionsAsWord32(node);
+      return VisitInt32PairMul(node);
+    case IrOpcode::kWord32PairShl:
+      MarkAsWord32(node);
+      MarkPairProjectionsAsWord32(node);
+      return VisitWord32PairShl(node);
+    case IrOpcode::kWord32PairShr:
+      MarkAsWord32(node);
+      MarkPairProjectionsAsWord32(node);
+      return VisitWord32PairShr(node);
+    case IrOpcode::kWord32PairSar:
+      MarkAsWord32(node);
+      MarkPairProjectionsAsWord32(node);
+      return VisitWord32PairSar(node);
+    case IrOpcode::kMemoryBarrier:
+      return VisitMemoryBarrier(node);
+    case IrOpcode::kWord32AtomicLoad: {
+      LoadRepresentation type = LoadRepresentationOf(node->op());
+      MarkAsRepresentation(type.representation(), node);
+      return VisitWord32AtomicLoad(node);
+    }
+    case IrOpcode::kWord64AtomicLoad: {
+      LoadRepresentation type = LoadRepresentationOf(node->op());
+      MarkAsRepresentation(type.representation(), node);
+      return VisitWord64AtomicLoad(node);
+    }
+    case IrOpcode::kWord32AtomicStore:
+      return VisitWord32AtomicStore(node);
+    case IrOpcode::kWord64AtomicStore:
+      return VisitWord64AtomicStore(node);
+    case IrOpcode::kWord32AtomicPairStore:
+      return VisitWord32AtomicPairStore(node);
+    case IrOpcode::kWord32AtomicPairLoad: {
+      MarkAsWord32(node);
+      MarkPairProjectionsAsWord32(node);
+      return VisitWord32AtomicPairLoad(node);
+    }
+#define ATOMIC_CASE(name, rep)                         \
+  case IrOpcode::k##rep##Atomic##name: {               \
+    MachineType type = AtomicOpType(node->op());       \
+    MarkAsRepresentation(type.representation(), node); \
+    return Visit##rep##Atomic##name(node);             \
+  }
+      ATOMIC_CASE(Add, Word32)
+      ATOMIC_CASE(Add, Word64)
+      ATOMIC_CASE(Sub, Word32)
+      ATOMIC_CASE(Sub, Word64)
+      ATOMIC_CASE(And, Word32)
+      ATOMIC_CASE(And, Word64)
+      ATOMIC_CASE(Or, Word32)
+      ATOMIC_CASE(Or, Word64)
+      ATOMIC_CASE(Xor, Word32)
+      ATOMIC_CASE(Xor, Word64)
+      ATOMIC_CASE(Exchange, Word32)
+      ATOMIC_CASE(Exchange, Word64)
+      ATOMIC_CASE(CompareExchange, Word32)
+      ATOMIC_CASE(CompareExchange, Word64)
+#undef ATOMIC_CASE
+#define ATOMIC_CASE(name)                     \
+  case IrOpcode::kWord32AtomicPair##name: {   \
+    MarkAsWord32(node);                       \
+    MarkPairProjectionsAsWord32(node);        \
+    return VisitWord32AtomicPair##name(node); \
+  }
+      ATOMIC_CASE(Add)
+      ATOMIC_CASE(Sub)
+      ATOMIC_CASE(And)
+      ATOMIC_CASE(Or)
+      ATOMIC_CASE(Xor)
+      ATOMIC_CASE(Exchange)
+      ATOMIC_CASE(CompareExchange)
+#undef ATOMIC_CASE
+    case IrOpcode::kProtectedLoad: {
+      LoadRepresentation type = LoadRepresentationOf(node->op());
+      MarkAsRepresentation(type.representation(), node);
+      return VisitProtectedLoad(node);
+    }
+    case IrOpcode::kSignExtendWord8ToInt32:
+      return MarkAsWord32(node), VisitSignExtendWord8ToInt32(node);
+    case IrOpcode::kSignExtendWord16ToInt32:
+      return MarkAsWord32(node), VisitSignExtendWord16ToInt32(node);
+    case IrOpcode::kSignExtendWord8ToInt64:
+      return MarkAsWord64(node), VisitSignExtendWord8ToInt64(node);
+    case IrOpcode::kSignExtendWord16ToInt64:
+      return MarkAsWord64(node), VisitSignExtendWord16ToInt64(node);
+    case IrOpcode::kSignExtendWord32ToInt64:
+      return MarkAsWord64(node), VisitSignExtendWord32ToInt64(node);
+    case IrOpcode::kUnsafePointerAdd:
+      MarkAsRepresentation(MachineType::PointerRepresentation(), node);
+      return VisitUnsafePointerAdd(node);
+    case IrOpcode::kF64x2Splat:
+      return MarkAsSimd128(node), VisitF64x2Splat(node);
+    case IrOpcode::kF64x2ExtractLane:
+      return MarkAsFloat64(node), VisitF64x2ExtractLane(node);
+    case IrOpcode::kF64x2ReplaceLane:
+      return MarkAsSimd128(node), VisitF64x2ReplaceLane(node);
+    case IrOpcode::kF64x2Abs:
+      return MarkAsSimd128(node), VisitF64x2Abs(node);
+    case IrOpcode::kF64x2Neg:
+      return MarkAsSimd128(node), VisitF64x2Neg(node);
+    case IrOpcode::kF64x2Sqrt:
+      return MarkAsSimd128(node), VisitF64x2Sqrt(node);
+    case IrOpcode::kF64x2Add:
+      return MarkAsSimd128(node), VisitF64x2Add(node);
+    case IrOpcode::kF64x2Sub:
+      return MarkAsSimd128(node), VisitF64x2Sub(node);
+    case IrOpcode::kF64x2Mul:
+      return MarkAsSimd128(node), VisitF64x2Mul(node);
+    case IrOpcode::kF64x2Div:
+      return MarkAsSimd128(node), VisitF64x2Div(node);
+    case IrOpcode::kF64x2Min:
+      return MarkAsSimd128(node), VisitF64x2Min(node);
+    case IrOpcode::kF64x2Max:
+      return MarkAsSimd128(node), VisitF64x2Max(node);
+    case IrOpcode::kF64x2Eq:
+      return MarkAsSimd128(node), VisitF64x2Eq(node);
+    case IrOpcode::kF64x2Ne:
+      return MarkAsSimd128(node), VisitF64x2Ne(node);
+    case IrOpcode::kF64x2Lt:
+      return MarkAsSimd128(node), VisitF64x2Lt(node);
+    case IrOpcode::kF64x2Le:
+      return MarkAsSimd128(node), VisitF64x2Le(node);
+    case IrOpcode::kF64x2Qfma:
+      return MarkAsSimd128(node), VisitF64x2Qfma(node);
+    case IrOpcode::kF64x2Qfms:
+      return MarkAsSimd128(node), VisitF64x2Qfms(node);
+    case IrOpcode::kF64x2Pmin:
+      return MarkAsSimd128(node), VisitF64x2Pmin(node);
+    case IrOpcode::kF64x2Pmax:
+      return MarkAsSimd128(node), VisitF64x2Pmax(node);
+    case IrOpcode::kF64x2Ceil:
+      return MarkAsSimd128(node), VisitF64x2Ceil(node);
+    case IrOpcode::kF64x2Floor:
+      return MarkAsSimd128(node), VisitF64x2Floor(node);
+    case IrOpcode::kF64x2Trunc:
+      return MarkAsSimd128(node), VisitF64x2Trunc(node);
+    case IrOpcode::kF64x2NearestInt:
+      return MarkAsSimd128(node), VisitF64x2NearestInt(node);
+    case IrOpcode::kF32x4Splat:
+      return MarkAsSimd128(node), VisitF32x4Splat(node);
+    case IrOpcode::kF32x4ExtractLane:
+      return MarkAsFloat32(node), VisitF32x4ExtractLane(node);
+    case IrOpcode::kF32x4ReplaceLane:
+      return MarkAsSimd128(node), VisitF32x4ReplaceLane(node);
+    case IrOpcode::kF32x4SConvertI32x4:
+      return MarkAsSimd128(node), VisitF32x4SConvertI32x4(node);
+    case IrOpcode::kF32x4UConvertI32x4:
+      return MarkAsSimd128(node), VisitF32x4UConvertI32x4(node);
+    case IrOpcode::kF32x4Abs:
+      return MarkAsSimd128(node), VisitF32x4Abs(node);
+    case IrOpcode::kF32x4Neg:
+      return MarkAsSimd128(node), VisitF32x4Neg(node);
+    case IrOpcode::kF32x4Sqrt:
+      return MarkAsSimd128(node), VisitF32x4Sqrt(node);
+    case IrOpcode::kF32x4RecipApprox:
+      return MarkAsSimd128(node), VisitF32x4RecipApprox(node);
+    case IrOpcode::kF32x4RecipSqrtApprox:
+      return MarkAsSimd128(node), VisitF32x4RecipSqrtApprox(node);
+    case IrOpcode::kF32x4Add:
+      return MarkAsSimd128(node), VisitF32x4Add(node);
+    case IrOpcode::kF32x4AddHoriz:
+      return MarkAsSimd128(node), VisitF32x4AddHoriz(node);
+    case IrOpcode::kF32x4Sub:
+      return MarkAsSimd128(node), VisitF32x4Sub(node);
+    case IrOpcode::kF32x4Mul:
+      return MarkAsSimd128(node), VisitF32x4Mul(node);
+    case IrOpcode::kF32x4Div:
+      return MarkAsSimd128(node), VisitF32x4Div(node);
+    case IrOpcode::kF32x4Min:
+      return MarkAsSimd128(node), VisitF32x4Min(node);
+    case IrOpcode::kF32x4Max:
+      return MarkAsSimd128(node), VisitF32x4Max(node);
+    case IrOpcode::kF32x4Eq:
+      return MarkAsSimd128(node), VisitF32x4Eq(node);
+    case IrOpcode::kF32x4Ne:
+      return MarkAsSimd128(node), VisitF32x4Ne(node);
+    case IrOpcode::kF32x4Lt:
+      return MarkAsSimd128(node), VisitF32x4Lt(node);
+    case IrOpcode::kF32x4Le:
+      return MarkAsSimd128(node), VisitF32x4Le(node);
+    case IrOpcode::kF32x4Qfma:
+      return MarkAsSimd128(node), VisitF32x4Qfma(node);
+    case IrOpcode::kF32x4Qfms:
+      return MarkAsSimd128(node), VisitF32x4Qfms(node);
+    case IrOpcode::kF32x4Pmin:
+      return MarkAsSimd128(node), VisitF32x4Pmin(node);
+    case IrOpcode::kF32x4Pmax:
+      return MarkAsSimd128(node), VisitF32x4Pmax(node);
+    case IrOpcode::kF32x4Ceil:
+      return MarkAsSimd128(node), VisitF32x4Ceil(node);
+    case IrOpcode::kF32x4Floor:
+      return MarkAsSimd128(node), VisitF32x4Floor(node);
+    case IrOpcode::kF32x4Trunc:
+      return MarkAsSimd128(node), VisitF32x4Trunc(node);
+    case IrOpcode::kF32x4NearestInt:
+      return MarkAsSimd128(node), VisitF32x4NearestInt(node);
+    case IrOpcode::kI64x2Splat:
+      return MarkAsSimd128(node), VisitI64x2Splat(node);
+    case IrOpcode::kI64x2SplatI32Pair:
+      return MarkAsSimd128(node), VisitI64x2SplatI32Pair(node);
+    case IrOpcode::kI64x2ExtractLane:
+      return MarkAsWord64(node), VisitI64x2ExtractLane(node);
+    case IrOpcode::kI64x2ReplaceLane:
+      return MarkAsSimd128(node), VisitI64x2ReplaceLane(node);
+    case IrOpcode::kI64x2ReplaceLaneI32Pair:
+      return MarkAsSimd128(node), VisitI64x2ReplaceLaneI32Pair(node);
+    case IrOpcode::kI64x2Neg:
+      return MarkAsSimd128(node), VisitI64x2Neg(node);
+    case IrOpcode::kI64x2SConvertI32x4Low:
+      return MarkAsSimd128(node), VisitI64x2SConvertI32x4Low(node);
+    case IrOpcode::kI64x2SConvertI32x4High:
+      return MarkAsSimd128(node), VisitI64x2SConvertI32x4High(node);
+    case IrOpcode::kI64x2UConvertI32x4Low:
+      return MarkAsSimd128(node), VisitI64x2UConvertI32x4Low(node);
+    case IrOpcode::kI64x2UConvertI32x4High:
+      return MarkAsSimd128(node), VisitI64x2UConvertI32x4High(node);
+    case IrOpcode::kI64x2BitMask:
+      return MarkAsWord32(node), VisitI64x2BitMask(node);
+    case IrOpcode::kI64x2Shl:
+      return MarkAsSimd128(node), VisitI64x2Shl(node);
+    case IrOpcode::kI64x2ShrS:
+      return MarkAsSimd128(node), VisitI64x2ShrS(node);
+    case IrOpcode::kI64x2Add:
+      return MarkAsSimd128(node), VisitI64x2Add(node);
+    case IrOpcode::kI64x2Sub:
+      return MarkAsSimd128(node), VisitI64x2Sub(node);
+    case IrOpcode::kI64x2Mul:
+      return MarkAsSimd128(node), VisitI64x2Mul(node);
+    case IrOpcode::kI64x2Eq:
+      return MarkAsSimd128(node), VisitI64x2Eq(node);
+    case IrOpcode::kI64x2ShrU:
+      return MarkAsSimd128(node), VisitI64x2ShrU(node);
+    case IrOpcode::kI64x2ExtMulLowI32x4S:
+      return MarkAsSimd128(node), VisitI64x2ExtMulLowI32x4S(node);
+    case IrOpcode::kI64x2ExtMulHighI32x4S:
+      return MarkAsSimd128(node), VisitI64x2ExtMulHighI32x4S(node);
+    case IrOpcode::kI64x2ExtMulLowI32x4U:
+      return MarkAsSimd128(node), VisitI64x2ExtMulLowI32x4U(node);
+    case IrOpcode::kI64x2ExtMulHighI32x4U:
+      return MarkAsSimd128(node), VisitI64x2ExtMulHighI32x4U(node);
+    case IrOpcode::kI64x2SignSelect:
+      return MarkAsSimd128(node), VisitI64x2SignSelect(node);
+    case IrOpcode::kI32x4Splat:
+      return MarkAsSimd128(node), VisitI32x4Splat(node);
+    case IrOpcode::kI32x4ExtractLane:
+      return MarkAsWord32(node), VisitI32x4ExtractLane(node);
+    case IrOpcode::kI32x4ReplaceLane:
+      return MarkAsSimd128(node), VisitI32x4ReplaceLane(node);
+    case IrOpcode::kI32x4SConvertF32x4:
+      return MarkAsSimd128(node), VisitI32x4SConvertF32x4(node);
+    case IrOpcode::kI32x4SConvertI16x8Low:
+      return MarkAsSimd128(node), VisitI32x4SConvertI16x8Low(node);
+    case IrOpcode::kI32x4SConvertI16x8High:
+      return MarkAsSimd128(node), VisitI32x4SConvertI16x8High(node);
+    case IrOpcode::kI32x4Neg:
+      return MarkAsSimd128(node), VisitI32x4Neg(node);
+    case IrOpcode::kI32x4Shl:
+      return MarkAsSimd128(node), VisitI32x4Shl(node);
+    case IrOpcode::kI32x4ShrS:
+      return MarkAsSimd128(node), VisitI32x4ShrS(node);
+    case IrOpcode::kI32x4Add:
+      return MarkAsSimd128(node), VisitI32x4Add(node);
+    case IrOpcode::kI32x4AddHoriz:
+      return MarkAsSimd128(node), VisitI32x4AddHoriz(node);
+    case IrOpcode::kI32x4Sub:
+      return MarkAsSimd128(node), VisitI32x4Sub(node);
+    case IrOpcode::kI32x4Mul:
+      return MarkAsSimd128(node), VisitI32x4Mul(node);
+    case IrOpcode::kI32x4MinS:
+      return MarkAsSimd128(node), VisitI32x4MinS(node);
+    case IrOpcode::kI32x4MaxS:
+      return MarkAsSimd128(node), VisitI32x4MaxS(node);
+    case IrOpcode::kI32x4Eq:
+      return MarkAsSimd128(node), VisitI32x4Eq(node);
+    case IrOpcode::kI32x4Ne:
+      return MarkAsSimd128(node), VisitI32x4Ne(node);
+    case IrOpcode::kI32x4GtS:
+      return MarkAsSimd128(node), VisitI32x4GtS(node);
+    case IrOpcode::kI32x4GeS:
+      return MarkAsSimd128(node), VisitI32x4GeS(node);
+    case IrOpcode::kI32x4UConvertF32x4:
+      return MarkAsSimd128(node), VisitI32x4UConvertF32x4(node);
+    case IrOpcode::kI32x4UConvertI16x8Low:
+      return MarkAsSimd128(node), VisitI32x4UConvertI16x8Low(node);
+    case IrOpcode::kI32x4UConvertI16x8High:
+      return MarkAsSimd128(node), VisitI32x4UConvertI16x8High(node);
+    case IrOpcode::kI32x4ShrU:
+      return MarkAsSimd128(node), VisitI32x4ShrU(node);
+    case IrOpcode::kI32x4MinU:
+      return MarkAsSimd128(node), VisitI32x4MinU(node);
+    case IrOpcode::kI32x4MaxU:
+      return MarkAsSimd128(node), VisitI32x4MaxU(node);
+    case IrOpcode::kI32x4GtU:
+      return MarkAsSimd128(node), VisitI32x4GtU(node);
+    case IrOpcode::kI32x4GeU:
+      return MarkAsSimd128(node), VisitI32x4GeU(node);
+    case IrOpcode::kI32x4Abs:
+      return MarkAsSimd128(node), VisitI32x4Abs(node);
+    case IrOpcode::kI32x4BitMask:
+      return MarkAsWord32(node), VisitI32x4BitMask(node);
+    case IrOpcode::kI32x4DotI16x8S:
+      return MarkAsSimd128(node), VisitI32x4DotI16x8S(node);
+    case IrOpcode::kI32x4ExtMulLowI16x8S:
+      return MarkAsSimd128(node), VisitI32x4ExtMulLowI16x8S(node);
+    case IrOpcode::kI32x4ExtMulHighI16x8S:
+      return MarkAsSimd128(node), VisitI32x4ExtMulHighI16x8S(node);
+    case IrOpcode::kI32x4ExtMulLowI16x8U:
+      return MarkAsSimd128(node), VisitI32x4ExtMulLowI16x8U(node);
+    case IrOpcode::kI32x4ExtMulHighI16x8U:
+      return MarkAsSimd128(node), VisitI32x4ExtMulHighI16x8U(node);
+    case IrOpcode::kI32x4SignSelect:
+      return MarkAsSimd128(node), VisitI32x4SignSelect(node);
+    case IrOpcode::kI32x4ExtAddPairwiseI16x8S:
+      return MarkAsSimd128(node), VisitI32x4ExtAddPairwiseI16x8S(node);
+    case IrOpcode::kI32x4ExtAddPairwiseI16x8U:
+      return MarkAsSimd128(node), VisitI32x4ExtAddPairwiseI16x8U(node);
+    case IrOpcode::kI16x8Splat:
+      return MarkAsSimd128(node), VisitI16x8Splat(node);
+    case IrOpcode::kI16x8ExtractLaneU:
+      return MarkAsWord32(node), VisitI16x8ExtractLaneU(node);
+    case IrOpcode::kI16x8ExtractLaneS:
+      return MarkAsWord32(node), VisitI16x8ExtractLaneS(node);
+    case IrOpcode::kI16x8ReplaceLane:
+      return MarkAsSimd128(node), VisitI16x8ReplaceLane(node);
+    case IrOpcode::kI16x8SConvertI8x16Low:
+      return MarkAsSimd128(node), VisitI16x8SConvertI8x16Low(node);
+    case IrOpcode::kI16x8SConvertI8x16High:
+      return MarkAsSimd128(node), VisitI16x8SConvertI8x16High(node);
+    case IrOpcode::kI16x8Neg:
+      return MarkAsSimd128(node), VisitI16x8Neg(node);
+    case IrOpcode::kI16x8Shl:
+      return MarkAsSimd128(node), VisitI16x8Shl(node);
+    case IrOpcode::kI16x8ShrS:
+      return MarkAsSimd128(node), VisitI16x8ShrS(node);
+    case IrOpcode::kI16x8SConvertI32x4:
+      return MarkAsSimd128(node), VisitI16x8SConvertI32x4(node);
+    case IrOpcode::kI16x8Add:
+      return MarkAsSimd128(node), VisitI16x8Add(node);
+    case IrOpcode::kI16x8AddSatS:
+      return MarkAsSimd128(node), VisitI16x8AddSatS(node);
+    case IrOpcode::kI16x8AddHoriz:
+      return MarkAsSimd128(node), VisitI16x8AddHoriz(node);
+    case IrOpcode::kI16x8Sub:
+      return MarkAsSimd128(node), VisitI16x8Sub(node);
+    case IrOpcode::kI16x8SubSatS:
+      return MarkAsSimd128(node), VisitI16x8SubSatS(node);
+    case IrOpcode::kI16x8Mul:
+      return MarkAsSimd128(node), VisitI16x8Mul(node);
+    case IrOpcode::kI16x8MinS:
+      return MarkAsSimd128(node), VisitI16x8MinS(node);
+    case IrOpcode::kI16x8MaxS:
+      return MarkAsSimd128(node), VisitI16x8MaxS(node);
+    case IrOpcode::kI16x8Eq:
+      return MarkAsSimd128(node), VisitI16x8Eq(node);
+    case IrOpcode::kI16x8Ne:
+      return MarkAsSimd128(node), VisitI16x8Ne(node);
+    case IrOpcode::kI16x8GtS:
+      return MarkAsSimd128(node), VisitI16x8GtS(node);
+    case IrOpcode::kI16x8GeS:
+      return MarkAsSimd128(node), VisitI16x8GeS(node);
+    case IrOpcode::kI16x8UConvertI8x16Low:
+      return MarkAsSimd128(node), VisitI16x8UConvertI8x16Low(node);
+    case IrOpcode::kI16x8UConvertI8x16High:
+      return MarkAsSimd128(node), VisitI16x8UConvertI8x16High(node);
+    case IrOpcode::kI16x8ShrU:
+      return MarkAsSimd128(node), VisitI16x8ShrU(node);
+    case IrOpcode::kI16x8UConvertI32x4:
+      return MarkAsSimd128(node), VisitI16x8UConvertI32x4(node);
+    case IrOpcode::kI16x8AddSatU:
+      return MarkAsSimd128(node), VisitI16x8AddSatU(node);
+    case IrOpcode::kI16x8SubSatU:
+      return MarkAsSimd128(node), VisitI16x8SubSatU(node);
+    case IrOpcode::kI16x8MinU:
+      return MarkAsSimd128(node), VisitI16x8MinU(node);
+    case IrOpcode::kI16x8MaxU:
+      return MarkAsSimd128(node), VisitI16x8MaxU(node);
+    case IrOpcode::kI16x8GtU:
+      return MarkAsSimd128(node), VisitI16x8GtU(node);
+    case IrOpcode::kI16x8GeU:
+      return MarkAsSimd128(node), VisitI16x8GeU(node);
+    case IrOpcode::kI16x8RoundingAverageU:
+      return MarkAsSimd128(node), VisitI16x8RoundingAverageU(node);
+    case IrOpcode::kI16x8Q15MulRSatS:
+      return MarkAsSimd128(node), VisitI16x8Q15MulRSatS(node);
+    case IrOpcode::kI16x8Abs:
+      return MarkAsSimd128(node), VisitI16x8Abs(node);
+    case IrOpcode::kI16x8BitMask:
+      return MarkAsWord32(node), VisitI16x8BitMask(node);
+    case IrOpcode::kI16x8ExtMulLowI8x16S:
+      return MarkAsSimd128(node), VisitI16x8ExtMulLowI8x16S(node);
+    case IrOpcode::kI16x8ExtMulHighI8x16S:
+      return MarkAsSimd128(node), VisitI16x8ExtMulHighI8x16S(node);
+    case IrOpcode::kI16x8ExtMulLowI8x16U:
+      return MarkAsSimd128(node), VisitI16x8ExtMulLowI8x16U(node);
+    case IrOpcode::kI16x8ExtMulHighI8x16U:
+      return MarkAsSimd128(node), VisitI16x8ExtMulHighI8x16U(node);
+    case IrOpcode::kI16x8SignSelect:
+      return MarkAsSimd128(node), VisitI16x8SignSelect(node);
+    case IrOpcode::kI16x8ExtAddPairwiseI8x16S:
+      return MarkAsSimd128(node), VisitI16x8ExtAddPairwiseI8x16S(node);
+    case IrOpcode::kI16x8ExtAddPairwiseI8x16U:
+      return MarkAsSimd128(node), VisitI16x8ExtAddPairwiseI8x16U(node);
+    case IrOpcode::kI8x16Splat:
+      return MarkAsSimd128(node), VisitI8x16Splat(node);
+    case IrOpcode::kI8x16ExtractLaneU:
+      return MarkAsWord32(node), VisitI8x16ExtractLaneU(node);
+    case IrOpcode::kI8x16ExtractLaneS:
+      return MarkAsWord32(node), VisitI8x16ExtractLaneS(node);
+    case IrOpcode::kI8x16ReplaceLane:
+      return MarkAsSimd128(node), VisitI8x16ReplaceLane(node);
+    case IrOpcode::kI8x16Neg:
+      return MarkAsSimd128(node), VisitI8x16Neg(node);
+    case IrOpcode::kI8x16Shl:
+      return MarkAsSimd128(node), VisitI8x16Shl(node);
+    case IrOpcode::kI8x16ShrS:
+      return MarkAsSimd128(node), VisitI8x16ShrS(node);
+    case IrOpcode::kI8x16SConvertI16x8:
+      return MarkAsSimd128(node), VisitI8x16SConvertI16x8(node);
+    case IrOpcode::kI8x16Add:
+      return MarkAsSimd128(node), VisitI8x16Add(node);
+    case IrOpcode::kI8x16AddSatS:
+      return MarkAsSimd128(node), VisitI8x16AddSatS(node);
+    case IrOpcode::kI8x16Sub:
+      return MarkAsSimd128(node), VisitI8x16Sub(node);
+    case IrOpcode::kI8x16SubSatS:
+      return MarkAsSimd128(node), VisitI8x16SubSatS(node);
+    case IrOpcode::kI8x16Mul:
+      return MarkAsSimd128(node), VisitI8x16Mul(node);
+    case IrOpcode::kI8x16MinS:
+      return MarkAsSimd128(node), VisitI8x16MinS(node);
+    case IrOpcode::kI8x16MaxS:
+      return MarkAsSimd128(node), VisitI8x16MaxS(node);
+    case IrOpcode::kI8x16Eq:
+      return MarkAsSimd128(node), VisitI8x16Eq(node);
+    case IrOpcode::kI8x16Ne:
+      return MarkAsSimd128(node), VisitI8x16Ne(node);
+    case IrOpcode::kI8x16GtS:
+      return MarkAsSimd128(node), VisitI8x16GtS(node);
+    case IrOpcode::kI8x16GeS:
+      return MarkAsSimd128(node), VisitI8x16GeS(node);
+    case IrOpcode::kI8x16ShrU:
+      return MarkAsSimd128(node), VisitI8x16ShrU(node);
+    case IrOpcode::kI8x16UConvertI16x8:
+      return MarkAsSimd128(node), VisitI8x16UConvertI16x8(node);
+    case IrOpcode::kI8x16AddSatU:
+      return MarkAsSimd128(node), VisitI8x16AddSatU(node);
+    case IrOpcode::kI8x16SubSatU:
+      return MarkAsSimd128(node), VisitI8x16SubSatU(node);
+    case IrOpcode::kI8x16MinU:
+      return MarkAsSimd128(node), VisitI8x16MinU(node);
+    case IrOpcode::kI8x16MaxU:
+      return MarkAsSimd128(node), VisitI8x16MaxU(node);
+    case IrOpcode::kI8x16GtU:
+      return MarkAsSimd128(node), VisitI8x16GtU(node);
+    case IrOpcode::kI8x16GeU:
+      return MarkAsSimd128(node), VisitI8x16GeU(node);
+    case IrOpcode::kI8x16RoundingAverageU:
+      return MarkAsSimd128(node), VisitI8x16RoundingAverageU(node);
+    case IrOpcode::kI8x16Popcnt:
+      return MarkAsSimd128(node), VisitI8x16Popcnt(node);
+    case IrOpcode::kI8x16Abs:
+      return MarkAsSimd128(node), VisitI8x16Abs(node);
+    case IrOpcode::kI8x16BitMask:
+      return MarkAsWord32(node), VisitI8x16BitMask(node);
+    case IrOpcode::kI8x16SignSelect:
+      return MarkAsSimd128(node), VisitI8x16SignSelect(node);
+    case IrOpcode::kS128Const:
+      return MarkAsSimd128(node), VisitS128Const(node);
+    case IrOpcode::kS128Zero:
+      return MarkAsSimd128(node), VisitS128Zero(node);
+    case IrOpcode::kS128And:
+      return MarkAsSimd128(node), VisitS128And(node);
+    case IrOpcode::kS128Or:
+      return MarkAsSimd128(node), VisitS128Or(node);
+    case IrOpcode::kS128Xor:
+      return MarkAsSimd128(node), VisitS128Xor(node);
+    case IrOpcode::kS128Not:
+      return MarkAsSimd128(node), VisitS128Not(node);
+    case IrOpcode::kS128Select:
+      return MarkAsSimd128(node), VisitS128Select(node);
+    case IrOpcode::kS128AndNot:
+      return MarkAsSimd128(node), VisitS128AndNot(node);
+    case IrOpcode::kI8x16Swizzle:
+      return MarkAsSimd128(node), VisitI8x16Swizzle(node);
+    case IrOpcode::kI8x16Shuffle:
+      return MarkAsSimd128(node), VisitI8x16Shuffle(node);
+    case IrOpcode::kV32x4AnyTrue:
+      return MarkAsWord32(node), VisitV32x4AnyTrue(node);
+    case IrOpcode::kV32x4AllTrue:
+      return MarkAsWord32(node), VisitV32x4AllTrue(node);
+    case IrOpcode::kV16x8AnyTrue:
+      return MarkAsWord32(node), VisitV16x8AnyTrue(node);
+    case IrOpcode::kV16x8AllTrue:
+      return MarkAsWord32(node), VisitV16x8AllTrue(node);
+    case IrOpcode::kV8x16AnyTrue:
+      return MarkAsWord32(node), VisitV8x16AnyTrue(node);
+    case IrOpcode::kV8x16AllTrue:
+      return MarkAsWord32(node), VisitV8x16AllTrue(node);
+    default:
+      FATAL("Unexpected operator #%d:%s @ node #%d", node->opcode(),
+            node->op()->mnemonic(), node->id());
+      break;
+  }
+}
+
+void InstructionSelector::EmitWordPoisonOnSpeculation(Node* node) {
+  if (poisoning_level_ != PoisoningMitigationLevel::kDontPoison) {
+    OperandGenerator g(this);
+    Node* input_node = NodeProperties::GetValueInput(node, 0);
+    InstructionOperand input = g.UseRegister(input_node);
+    InstructionOperand output = g.DefineSameAsFirst(node);
+    Emit(kArchWordPoisonOnSpeculation, output, input);
+  } else {
+    EmitIdentity(node);
+  }
+}
+
+void InstructionSelector::VisitWord32PoisonOnSpeculation(Node* node) {
+  EmitWordPoisonOnSpeculation(node);
+}
+
+void InstructionSelector::VisitWord64PoisonOnSpeculation(Node* node) {
+  EmitWordPoisonOnSpeculation(node);
+}
+
+void InstructionSelector::VisitTaggedPoisonOnSpeculation(Node* node) {
+  EmitWordPoisonOnSpeculation(node);
+}
+
+void InstructionSelector::VisitStackPointerGreaterThan(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kStackPointerGreaterThanCondition, node);
+  VisitStackPointerGreaterThan(node, &cont);
+}
+
+void InstructionSelector::VisitLoadStackCheckOffset(Node* node) {
+  OperandGenerator g(this);
+  Emit(kArchStackCheckOffset, g.DefineAsRegister(node));
+}
+
+void InstructionSelector::VisitLoadFramePointer(Node* node) {
+  OperandGenerator g(this);
+  Emit(kArchFramePointer, g.DefineAsRegister(node));
+}
+
+void InstructionSelector::VisitLoadParentFramePointer(Node* node) {
+  OperandGenerator g(this);
+  Emit(kArchParentFramePointer, g.DefineAsRegister(node));
+}
+
+void InstructionSelector::VisitFloat64Acos(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Acos);
+}
+
+void InstructionSelector::VisitFloat64Acosh(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Acosh);
+}
+
+void InstructionSelector::VisitFloat64Asin(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Asin);
+}
+
+void InstructionSelector::VisitFloat64Asinh(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Asinh);
+}
+
+void InstructionSelector::VisitFloat64Atan(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Atan);
+}
+
+void InstructionSelector::VisitFloat64Atanh(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Atanh);
+}
+
+void InstructionSelector::VisitFloat64Atan2(Node* node) {
+  VisitFloat64Ieee754Binop(node, kIeee754Float64Atan2);
+}
+
+void InstructionSelector::VisitFloat64Cbrt(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Cbrt);
+}
+
+void InstructionSelector::VisitFloat64Cos(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Cos);
+}
+
+void InstructionSelector::VisitFloat64Cosh(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Cosh);
+}
+
+void InstructionSelector::VisitFloat64Exp(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Exp);
+}
+
+void InstructionSelector::VisitFloat64Expm1(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Expm1);
+}
+
+void InstructionSelector::VisitFloat64Log(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Log);
+}
+
+void InstructionSelector::VisitFloat64Log1p(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Log1p);
+}
+
+void InstructionSelector::VisitFloat64Log2(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Log2);
+}
+
+void InstructionSelector::VisitFloat64Log10(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Log10);
+}
+
+void InstructionSelector::VisitFloat64Pow(Node* node) {
+  VisitFloat64Ieee754Binop(node, kIeee754Float64Pow);
+}
+
+void InstructionSelector::VisitFloat64Sin(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Sin);
+}
+
+void InstructionSelector::VisitFloat64Sinh(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Sinh);
+}
+
+void InstructionSelector::VisitFloat64Tan(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Tan);
+}
+
+void InstructionSelector::VisitFloat64Tanh(Node* node) {
+  VisitFloat64Ieee754Unop(node, kIeee754Float64Tanh);
+}
+
+void InstructionSelector::EmitTableSwitch(
+    const SwitchInfo& sw, InstructionOperand const& index_operand) {
+  OperandGenerator g(this);
+  size_t input_count = 2 + sw.value_range();
+  DCHECK_LE(sw.value_range(), std::numeric_limits<size_t>::max() - 2);
+  auto* inputs = zone()->NewArray<InstructionOperand>(input_count);
+  inputs[0] = index_operand;
+  InstructionOperand default_operand = g.Label(sw.default_branch());
+  std::fill(&inputs[1], &inputs[input_count], default_operand);
+  for (const CaseInfo& c : sw.CasesUnsorted()) {
+    size_t value = c.value - sw.min_value();
+    DCHECK_LE(0u, value);
+    DCHECK_LT(value + 2, input_count);
+    inputs[value + 2] = g.Label(c.branch);
+  }
+  Emit(kArchTableSwitch, 0, nullptr, input_count, inputs, 0, nullptr);
+}
+
+void InstructionSelector::EmitBinarySearchSwitch(
+    const SwitchInfo& sw, InstructionOperand const& value_operand) {
+  OperandGenerator g(this);
+  size_t input_count = 2 + sw.case_count() * 2;
+  DCHECK_LE(sw.case_count(), (std::numeric_limits<size_t>::max() - 2) / 2);
+  auto* inputs = zone()->NewArray<InstructionOperand>(input_count);
+  inputs[0] = value_operand;
+  inputs[1] = g.Label(sw.default_branch());
+  std::vector<CaseInfo> cases = sw.CasesSortedByValue();
+  for (size_t index = 0; index < cases.size(); ++index) {
+    const CaseInfo& c = cases[index];
+    inputs[index * 2 + 2 + 0] = g.TempImmediate(c.value);
+    inputs[index * 2 + 2 + 1] = g.Label(c.branch);
+  }
+  Emit(kArchBinarySearchSwitch, 0, nullptr, input_count, inputs, 0, nullptr);
+}
+
+void InstructionSelector::VisitBitcastTaggedToWord(Node* node) {
+  EmitIdentity(node);
+}
+
+void InstructionSelector::VisitBitcastWordToTagged(Node* node) {
+  OperandGenerator g(this);
+  Emit(kArchNop, g.DefineSameAsFirst(node), g.Use(node->InputAt(0)));
+}
+
+// 32 bit targets do not implement the following instructions.
+#if V8_TARGET_ARCH_32_BIT
+
+void InstructionSelector::VisitWord64And(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord64Or(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord64Xor(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord64Shl(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord64Shr(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord64Sar(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord64Rol(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord64Ror(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord64Clz(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord64Ctz(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord64ReverseBits(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitWord64Popcnt(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord64Equal(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitInt64Add(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitInt64AddWithOverflow(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitInt64Sub(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitInt64SubWithOverflow(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitInt64Mul(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitInt64Div(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitInt64LessThan(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitInt64LessThanOrEqual(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitUint64Div(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitInt64Mod(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitUint64LessThan(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitUint64LessThanOrEqual(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitUint64Mod(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitBitcastWord32ToWord64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitChangeInt32ToInt64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitChangeInt64ToFloat64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitChangeUint32ToUint64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitChangeFloat64ToInt64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitChangeFloat64ToUint64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitTruncateFloat64ToInt64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitTryTruncateFloat32ToInt64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitTryTruncateFloat64ToInt64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitTryTruncateFloat32ToUint64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitTryTruncateFloat64ToUint64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitTruncateInt64ToInt32(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitRoundInt64ToFloat32(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitRoundInt64ToFloat64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitRoundUint64ToFloat32(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitRoundUint64ToFloat64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitBitcastFloat64ToInt64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitBitcastInt64ToFloat64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitSignExtendWord8ToInt64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitSignExtendWord16ToInt64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitSignExtendWord32ToInt64(Node* node) {
+  UNIMPLEMENTED();
+}
+#endif  // V8_TARGET_ARCH_32_BIT
+
+// 64 bit targets do not implement the following instructions.
+#if V8_TARGET_ARCH_64_BIT
+void InstructionSelector::VisitInt32PairAdd(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitInt32PairSub(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitInt32PairMul(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord32PairShl(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord32PairShr(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord32PairSar(Node* node) { UNIMPLEMENTED(); }
+#endif  // V8_TARGET_ARCH_64_BIT
+
+#if !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS
+void InstructionSelector::VisitWord32AtomicPairLoad(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitWord32AtomicPairStore(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitWord32AtomicPairAdd(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitWord32AtomicPairSub(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitWord32AtomicPairAnd(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitWord32AtomicPairOr(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitWord32AtomicPairXor(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitWord32AtomicPairExchange(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
+  UNIMPLEMENTED();
+}
+#endif  // !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS
+
+#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS64 && \
+    !V8_TARGET_ARCH_S390 && !V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitWord64AtomicLoad(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord64AtomicStore(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitWord64AtomicAdd(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord64AtomicSub(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord64AtomicAnd(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord64AtomicOr(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord64AtomicXor(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitWord64AtomicExchange(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
+  UNIMPLEMENTED();
+}
+#endif  // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_PPC64
+        // !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_S390
+
+#if !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM
+// This is only needed on 32-bit to split the 64-bit value into two operands.
+void InstructionSelector::VisitI64x2SplatI32Pair(Node* node) {
+  UNIMPLEMENTED();
+}
+void InstructionSelector::VisitI64x2ReplaceLaneI32Pair(Node* node) {
+  UNIMPLEMENTED();
+}
+#endif  // !V8_TARGET_ARCH_IA32
+
+#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X
+#if !V8_TARGET_ARCH_ARM64
+#if !V8_TARGET_ARCH_MIPS64
+void InstructionSelector::VisitI64x2Splat(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitI64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitI64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
+#endif  // !V8_TARGET_ARCH_MIPS64
+void InstructionSelector::VisitI64x2Eq(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF64x2Qfma(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF64x2Qfms(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF32x4Qfma(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitF32x4Qfms(Node* node) { UNIMPLEMENTED(); }
+#endif  // !V8_TARGET_ARCH_ARM64
+#endif  // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X
+
+#if !V8_TARGET_ARCH_ARM64
+// TODO(v8:10971) Prototype i16x8.q15mulr_sat_s
+void InstructionSelector::VisitI16x8Q15MulRSatS(Node* node) { UNIMPLEMENTED(); }
+
+// TODO(v8:10972) Prototype i64x2 widen i32x4.
+void InstructionSelector::VisitI64x2SConvertI32x4Low(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitI64x2SConvertI32x4High(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitI64x2UConvertI32x4Low(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitI64x2UConvertI32x4High(Node* node) {
+  UNIMPLEMENTED();
+}
+
+// TODO(v8:11002) Prototype i8x16.popcnt.
+void InstructionSelector::VisitI8x16Popcnt(Node* node) { UNIMPLEMENTED(); }
+
+// TODO(v8:11008) Prototype extended multiplication.
+void InstructionSelector::VisitI64x2ExtMulLowI32x4S(Node* node) {
+  UNIMPLEMENTED();
+}
+void InstructionSelector::VisitI64x2ExtMulHighI32x4S(Node* node) {
+  UNIMPLEMENTED();
+}
+void InstructionSelector::VisitI64x2ExtMulLowI32x4U(Node* node) {
+  UNIMPLEMENTED();
+}
+void InstructionSelector::VisitI64x2ExtMulHighI32x4U(Node* node) {
+  UNIMPLEMENTED();
+}
+void InstructionSelector::VisitI32x4ExtMulLowI16x8S(Node* node) {
+  UNIMPLEMENTED();
+}
+void InstructionSelector::VisitI32x4ExtMulHighI16x8S(Node* node) {
+  UNIMPLEMENTED();
+}
+void InstructionSelector::VisitI32x4ExtMulLowI16x8U(Node* node) {
+  UNIMPLEMENTED();
+}
+void InstructionSelector::VisitI32x4ExtMulHighI16x8U(Node* node) {
+  UNIMPLEMENTED();
+}
+void InstructionSelector::VisitI16x8ExtMulLowI8x16S(Node* node) {
+  UNIMPLEMENTED();
+}
+void InstructionSelector::VisitI16x8ExtMulHighI8x16S(Node* node) {
+  UNIMPLEMENTED();
+}
+void InstructionSelector::VisitI16x8ExtMulLowI8x16U(Node* node) {
+  UNIMPLEMENTED();
+}
+void InstructionSelector::VisitI16x8ExtMulHighI8x16U(Node* node) {
+  UNIMPLEMENTED();
+}
+
+// TODO(v8:11086) Prototype extended pairwise add.
+void InstructionSelector::VisitI32x4ExtAddPairwiseI16x8S(Node* node) {
+  UNIMPLEMENTED();
+}
+void InstructionSelector::VisitI32x4ExtAddPairwiseI16x8U(Node* node) {
+  UNIMPLEMENTED();
+}
+void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16S(Node* node) {
+  UNIMPLEMENTED();
+}
+void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16U(Node* node) {
+  UNIMPLEMENTED();
+}
+#endif  // !V8_TARGET_ARCH_ARM64
+
+#if !V8_TARGET_ARCH_X64
+// TODO(v8:10975): Prototyping load lane and store lane.
+void InstructionSelector::VisitLoadLane(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitStoreLane(Node* node) { UNIMPLEMENTED(); }
+
+// TODO(v8:10997) Prototype i64x2.bitmask.
+void InstructionSelector::VisitI64x2BitMask(Node* node) { UNIMPLEMENTED(); }
+
+// TODO(v8:10983) Prototyping sign select.
+void InstructionSelector::VisitI8x16SignSelect(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitI16x8SignSelect(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitI32x4SignSelect(Node* node) { UNIMPLEMENTED(); }
+void InstructionSelector::VisitI64x2SignSelect(Node* node) { UNIMPLEMENTED(); }
+#endif  // !V8_TARGET_ARCH_X64
+
+void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
+
+void InstructionSelector::VisitParameter(Node* node) {
+  OperandGenerator g(this);
+  int index = ParameterIndexOf(node->op());
+  InstructionOperand op =
+      linkage()->ParameterHasSecondaryLocation(index)
+          ? g.DefineAsDualLocation(
+                node, linkage()->GetParameterLocation(index),
+                linkage()->GetParameterSecondaryLocation(index))
+          : g.DefineAsLocation(node, linkage()->GetParameterLocation(index));
+
+  Emit(kArchNop, op);
+}
+
+namespace {
+
+LinkageLocation ExceptionLocation() {
+  return LinkageLocation::ForRegister(kReturnRegister0.code(),
+                                      MachineType::IntPtr());
+}
+
+constexpr InstructionCode EncodeCallDescriptorFlags(
+    InstructionCode opcode, CallDescriptor::Flags flags) {
+  // Note: Not all bits of `flags` are preserved.
+  STATIC_ASSERT(CallDescriptor::kFlagsBitsEncodedInInstructionCode ==
+                MiscField::kSize);
+  CONSTEXPR_DCHECK(Instruction::IsCallWithDescriptorFlags(opcode));
+  return opcode | MiscField::encode(flags & MiscField::kMax);
+}
+
+}  // namespace
+
+void InstructionSelector::VisitIfException(Node* node) {
+  OperandGenerator g(this);
+  DCHECK_EQ(IrOpcode::kCall, node->InputAt(1)->opcode());
+  Emit(kArchNop, g.DefineAsLocation(node, ExceptionLocation()));
+}
+
+void InstructionSelector::VisitOsrValue(Node* node) {
+  OperandGenerator g(this);
+  int index = OsrValueIndexOf(node->op());
+  Emit(kArchNop,
+       g.DefineAsLocation(node, linkage()->GetOsrValueLocation(index)));
+}
+
+void InstructionSelector::VisitPhi(Node* node) {
+  const int input_count = node->op()->ValueInputCount();
+  DCHECK_EQ(input_count, current_block_->PredecessorCount());
+  PhiInstruction* phi = instruction_zone()->New<PhiInstruction>(
+      instruction_zone(), GetVirtualRegister(node),
+      static_cast<size_t>(input_count));
+  sequence()
+      ->InstructionBlockAt(RpoNumber::FromInt(current_block_->rpo_number()))
+      ->AddPhi(phi);
+  for (int i = 0; i < input_count; ++i) {
+    Node* const input = node->InputAt(i);
+    MarkAsUsed(input);
+    phi->SetInput(static_cast<size_t>(i), GetVirtualRegister(input));
+  }
+}
+
+void InstructionSelector::VisitProjection(Node* node) {
+  OperandGenerator g(this);
+  Node* value = node->InputAt(0);
+  switch (value->opcode()) {
+    case IrOpcode::kInt32AddWithOverflow:
+    case IrOpcode::kInt32SubWithOverflow:
+    case IrOpcode::kInt32MulWithOverflow:
+    case IrOpcode::kInt64AddWithOverflow:
+    case IrOpcode::kInt64SubWithOverflow:
+    case IrOpcode::kTryTruncateFloat32ToInt64:
+    case IrOpcode::kTryTruncateFloat64ToInt64:
+    case IrOpcode::kTryTruncateFloat32ToUint64:
+    case IrOpcode::kTryTruncateFloat64ToUint64:
+    case IrOpcode::kInt32PairAdd:
+    case IrOpcode::kInt32PairSub:
+    case IrOpcode::kInt32PairMul:
+    case IrOpcode::kWord32PairShl:
+    case IrOpcode::kWord32PairShr:
+    case IrOpcode::kWord32PairSar:
+    case IrOpcode::kInt32AbsWithOverflow:
+    case IrOpcode::kInt64AbsWithOverflow:
+      if (ProjectionIndexOf(node->op()) == 0u) {
+        Emit(kArchNop, g.DefineSameAsFirst(node), g.Use(value));
+      } else {
+        DCHECK_EQ(1u, ProjectionIndexOf(node->op()));
+        MarkAsUsed(value);
+      }
+      break;
+    default:
+      break;
+  }
+}
+
+void InstructionSelector::VisitConstant(Node* node) {
+  // We must emit a NOP here because every live range needs a defining
+  // instruction in the register allocator.
+  OperandGenerator g(this);
+  Emit(kArchNop, g.DefineAsConstant(node));
+}
+
+void InstructionSelector::UpdateMaxPushedArgumentCount(size_t count) {
+  *max_pushed_argument_count_ = std::max(count, *max_pushed_argument_count_);
+}
+
+void InstructionSelector::VisitCall(Node* node, BasicBlock* handler) {
+  OperandGenerator g(this);
+  auto call_descriptor = CallDescriptorOf(node->op());
+
+  if (call_descriptor->NeedsCallerSavedRegisters()) {
+    SaveFPRegsMode mode = call_descriptor->NeedsCallerSavedFPRegisters()
+                              ? kSaveFPRegs
+                              : kDontSaveFPRegs;
+    Emit(kArchSaveCallerRegisters | MiscField::encode(static_cast<int>(mode)),
+         g.NoOutput());
+  }
+
+  FrameStateDescriptor* frame_state_descriptor = nullptr;
+  if (call_descriptor->NeedsFrameState()) {
+    frame_state_descriptor = GetFrameStateDescriptor(
+        node->InputAt(static_cast<int>(call_descriptor->InputCount())));
+  }
+
+  CallBuffer buffer(zone(), call_descriptor, frame_state_descriptor);
+  CallDescriptor::Flags flags = call_descriptor->flags();
+
+  // Compute InstructionOperands for inputs and outputs.
+  // TODO(turbofan): on some architectures it's probably better to use
+  // the code object in a register if there are multiple uses of it.
+  // Improve constant pool and the heuristics in the register allocator
+  // for where to emit constants.
+  CallBufferFlags call_buffer_flags(kCallCodeImmediate | kCallAddressImmediate);
+  InitializeCallBuffer(node, &buffer, call_buffer_flags, false);
+
+  EmitPrepareArguments(&buffer.pushed_nodes, call_descriptor, node);
+  UpdateMaxPushedArgumentCount(buffer.pushed_nodes.size());
+
+  // Pass label of exception handler block.
+  if (handler) {
+    DCHECK_EQ(IrOpcode::kIfException, handler->front()->opcode());
+    flags |= CallDescriptor::kHasExceptionHandler;
+    buffer.instruction_args.push_back(g.Label(handler));
+  }
+
+  // Select the appropriate opcode based on the call type.
+  InstructionCode opcode;
+  switch (call_descriptor->kind()) {
+    case CallDescriptor::kCallAddress: {
+      int misc_field = static_cast<int>(call_descriptor->ParameterCount());
+#if ABI_USES_FUNCTION_DESCRIPTORS
+      // Highest misc_field bit is used on AIX to indicate if a CFunction call
+      // has function descriptor or not.
+      STATIC_ASSERT(MiscField::kSize == kHasFunctionDescriptorBitShift + 1);
+      if (!call_descriptor->NoFunctionDescriptor()) {
+        misc_field |= 1 << kHasFunctionDescriptorBitShift;
+      }
+#endif
+      opcode = kArchCallCFunction | MiscField::encode(misc_field);
+      break;
+    }
+    case CallDescriptor::kCallCodeObject:
+      opcode = EncodeCallDescriptorFlags(kArchCallCodeObject, flags);
+      break;
+    case CallDescriptor::kCallJSFunction:
+      opcode = EncodeCallDescriptorFlags(kArchCallJSFunction, flags);
+      break;
+    case CallDescriptor::kCallWasmCapiFunction:
+    case CallDescriptor::kCallWasmFunction:
+    case CallDescriptor::kCallWasmImportWrapper:
+      opcode = EncodeCallDescriptorFlags(kArchCallWasmFunction, flags);
+      break;
+    case CallDescriptor::kCallBuiltinPointer:
+      opcode = EncodeCallDescriptorFlags(kArchCallBuiltinPointer, flags);
+      break;
+  }
+
+  // Emit the call instruction.
+  size_t const output_count = buffer.outputs.size();
+  auto* outputs = output_count ? &buffer.outputs.front() : nullptr;
+  Instruction* call_instr =
+      Emit(opcode, output_count, outputs, buffer.instruction_args.size(),
+           &buffer.instruction_args.front());
+  if (instruction_selection_failed()) return;
+  call_instr->MarkAsCall();
+
+  EmitPrepareResults(&(buffer.output_nodes), call_descriptor, node);
+
+  if (call_descriptor->NeedsCallerSavedRegisters()) {
+    SaveFPRegsMode mode = call_descriptor->NeedsCallerSavedFPRegisters()
+                              ? kSaveFPRegs
+                              : kDontSaveFPRegs;
+    Emit(
+        kArchRestoreCallerRegisters | MiscField::encode(static_cast<int>(mode)),
+        g.NoOutput());
+  }
+}
+
+void InstructionSelector::VisitTailCall(Node* node) {
+  OperandGenerator g(this);
+  auto call_descriptor = CallDescriptorOf(node->op());
+
+  CallDescriptor* caller = linkage()->GetIncomingDescriptor();
+  const CallDescriptor* callee = CallDescriptorOf(node->op());
+  DCHECK(caller->CanTailCall(callee));
+  const int stack_param_delta = callee->GetStackParameterDelta(caller);
+  CallBuffer buffer(zone(), call_descriptor, nullptr);
+
+  // Compute InstructionOperands for inputs and outputs.
+  CallBufferFlags flags(kCallCodeImmediate | kCallTail);
+  if (IsTailCallAddressImmediate()) {
+    flags |= kCallAddressImmediate;
+  }
+  if (callee->flags() & CallDescriptor::kFixedTargetRegister) {
+    flags |= kCallFixedTargetRegister;
+  }
+  InitializeCallBuffer(node, &buffer, flags, true, stack_param_delta);
+  UpdateMaxPushedArgumentCount(stack_param_delta);
+
+  // Select the appropriate opcode based on the call type.
+  InstructionCode opcode;
+  InstructionOperandVector temps(zone());
+  if (caller->IsJSFunctionCall()) {
+    switch (call_descriptor->kind()) {
+      case CallDescriptor::kCallCodeObject:
+        opcode = kArchTailCallCodeObjectFromJSFunction;
+        break;
+      default:
+        UNREACHABLE();
+    }
+    int temps_count = GetTempsCountForTailCallFromJSFunction();
+    for (int i = 0; i < temps_count; i++) {
+      temps.push_back(g.TempRegister());
+    }
+  } else {
+    switch (call_descriptor->kind()) {
+      case CallDescriptor::kCallCodeObject:
+        opcode = kArchTailCallCodeObject;
+        break;
+      case CallDescriptor::kCallAddress:
+        opcode = kArchTailCallAddress;
+        break;
+      case CallDescriptor::kCallWasmFunction:
+        opcode = kArchTailCallWasm;
+        break;
+      default:
+        UNREACHABLE();
+    }
+  }
+  opcode = EncodeCallDescriptorFlags(opcode, call_descriptor->flags());
+
+  Emit(kArchPrepareTailCall, g.NoOutput());
+
+  // Add an immediate operand that represents the first slot that is unused
+  // with respect to the stack pointer that has been updated for the tail call
+  // instruction. This is used by backends that need to pad arguments for stack
+  // alignment, in order to store an optional slot of padding above the
+  // arguments.
+  const int optional_padding_slot = callee->GetFirstUnusedStackSlot();
+  buffer.instruction_args.push_back(g.TempImmediate(optional_padding_slot));
+
+  const int first_unused_stack_slot =
+      kReturnAddressStackSlotCount + stack_param_delta;
+  buffer.instruction_args.push_back(g.TempImmediate(first_unused_stack_slot));
+
+  // Emit the tailcall instruction.
+  Emit(opcode, 0, nullptr, buffer.instruction_args.size(),
+       &buffer.instruction_args.front(), temps.size(),
+       temps.empty() ? nullptr : &temps.front());
+}
+
+void InstructionSelector::VisitGoto(BasicBlock* target) {
+  // jump to the next block.
+  OperandGenerator g(this);
+  Emit(kArchJmp, g.NoOutput(), g.Label(target));
+}
+
+void InstructionSelector::VisitReturn(Node* ret) {
+  OperandGenerator g(this);
+  const int input_count = linkage()->GetIncomingDescriptor()->ReturnCount() == 0
+                              ? 1
+                              : ret->op()->ValueInputCount();
+  DCHECK_GE(input_count, 1);
+  auto value_locations = zone()->NewArray<InstructionOperand>(input_count);
+  Node* pop_count = ret->InputAt(0);
+  value_locations[0] = (pop_count->opcode() == IrOpcode::kInt32Constant ||
+                        pop_count->opcode() == IrOpcode::kInt64Constant)
+                           ? g.UseImmediate(pop_count)
+                           : g.UseRegister(pop_count);
+  for (int i = 1; i < input_count; ++i) {
+    value_locations[i] =
+        g.UseLocation(ret->InputAt(i), linkage()->GetReturnLocation(i - 1));
+  }
+  Emit(kArchRet, 0, nullptr, input_count, value_locations);
+}
+
+void InstructionSelector::VisitBranch(Node* branch, BasicBlock* tbranch,
+                                      BasicBlock* fbranch) {
+  if (NeedsPoisoning(IsSafetyCheckOf(branch->op()))) {
+    FlagsContinuation cont =
+        FlagsContinuation::ForBranchAndPoison(kNotEqual, tbranch, fbranch);
+    VisitWordCompareZero(branch, branch->InputAt(0), &cont);
+  } else {
+    FlagsContinuation cont =
+        FlagsContinuation::ForBranch(kNotEqual, tbranch, fbranch);
+    VisitWordCompareZero(branch, branch->InputAt(0), &cont);
+  }
+}
+
+void InstructionSelector::VisitDeoptimizeIf(Node* node) {
+  DeoptimizeParameters p = DeoptimizeParametersOf(node->op());
+  if (NeedsPoisoning(p.is_safety_check())) {
+    FlagsContinuation cont = FlagsContinuation::ForDeoptimizeAndPoison(
+        kNotEqual, p.kind(), p.reason(), p.feedback(), node->InputAt(1));
+    VisitWordCompareZero(node, node->InputAt(0), &cont);
+  } else {
+    FlagsContinuation cont = FlagsContinuation::ForDeoptimize(
+        kNotEqual, p.kind(), p.reason(), p.feedback(), node->InputAt(1));
+    VisitWordCompareZero(node, node->InputAt(0), &cont);
+  }
+}
+
+void InstructionSelector::VisitDeoptimizeUnless(Node* node) {
+  DeoptimizeParameters p = DeoptimizeParametersOf(node->op());
+  if (NeedsPoisoning(p.is_safety_check())) {
+    FlagsContinuation cont = FlagsContinuation::ForDeoptimizeAndPoison(
+        kEqual, p.kind(), p.reason(), p.feedback(), node->InputAt(1));
+    VisitWordCompareZero(node, node->InputAt(0), &cont);
+  } else {
+    FlagsContinuation cont = FlagsContinuation::ForDeoptimize(
+        kEqual, p.kind(), p.reason(), p.feedback(), node->InputAt(1));
+    VisitWordCompareZero(node, node->InputAt(0), &cont);
+  }
+}
+
+void InstructionSelector::VisitTrapIf(Node* node, TrapId trap_id) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForTrap(kNotEqual, trap_id, node->InputAt(1));
+  VisitWordCompareZero(node, node->InputAt(0), &cont);
+}
+
+void InstructionSelector::VisitTrapUnless(Node* node, TrapId trap_id) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForTrap(kEqual, trap_id, node->InputAt(1));
+  VisitWordCompareZero(node, node->InputAt(0), &cont);
+}
+
+void InstructionSelector::EmitIdentity(Node* node) {
+  MarkAsUsed(node->InputAt(0));
+  SetRename(node, node->InputAt(0));
+}
+
+void InstructionSelector::VisitDeoptimize(DeoptimizeKind kind,
+                                          DeoptimizeReason reason,
+                                          FeedbackSource const& feedback,
+                                          Node* frame_state) {
+  InstructionOperandVector args(instruction_zone());
+  AppendDeoptimizeArguments(&args, kind, reason, feedback, frame_state);
+  Emit(kArchDeoptimize, 0, nullptr, args.size(), &args.front(), 0, nullptr);
+}
+
+void InstructionSelector::VisitThrow(Node* node) {
+  OperandGenerator g(this);
+  Emit(kArchThrowTerminator, g.NoOutput());
+}
+
+void InstructionSelector::VisitDebugBreak(Node* node) {
+  OperandGenerator g(this);
+  Emit(kArchDebugBreak, g.NoOutput());
+}
+
+void InstructionSelector::VisitUnreachable(Node* node) {
+  OperandGenerator g(this);
+  Emit(kArchDebugBreak, g.NoOutput());
+}
+
+void InstructionSelector::VisitStaticAssert(Node* node) {
+  Node* asserted = node->InputAt(0);
+  UnparkedScopeIfNeeded scope(broker_);
+  AllowHandleDereference allow_handle_dereference;
+  asserted->Print(4);
+  FATAL(
+      "Expected Turbofan static assert to hold, but got non-true input:\n  %s",
+      StaticAssertSourceOf(node->op()));
+}
+
+void InstructionSelector::VisitDeadValue(Node* node) {
+  OperandGenerator g(this);
+  MarkAsRepresentation(DeadValueRepresentationOf(node->op()), node);
+  Emit(kArchDebugBreak, g.DefineAsConstant(node));
+}
+
+void InstructionSelector::VisitComment(Node* node) {
+  OperandGenerator g(this);
+  InstructionOperand operand(g.UseImmediate(node));
+  Emit(kArchComment, 0, nullptr, 1, &operand);
+}
+
+void InstructionSelector::VisitUnsafePointerAdd(Node* node) {
+#if V8_TARGET_ARCH_64_BIT
+  VisitInt64Add(node);
+#else   // V8_TARGET_ARCH_64_BIT
+  VisitInt32Add(node);
+#endif  // V8_TARGET_ARCH_64_BIT
+}
+
+void InstructionSelector::VisitRetain(Node* node) {
+  OperandGenerator g(this);
+  Emit(kArchNop, g.NoOutput(), g.UseAny(node->InputAt(0)));
+}
+
+bool InstructionSelector::CanProduceSignalingNaN(Node* node) {
+  // TODO(jarin) Improve the heuristic here.
+  if (node->opcode() == IrOpcode::kFloat64Add ||
+      node->opcode() == IrOpcode::kFloat64Sub ||
+      node->opcode() == IrOpcode::kFloat64Mul) {
+    return false;
+  }
+  return true;
+}
+
+#if V8_TARGET_ARCH_64_BIT
+bool InstructionSelector::ZeroExtendsWord32ToWord64(Node* node,
+                                                    int recursion_depth) {
+  // To compute whether a Node sets its upper 32 bits to zero, there are three
+  // cases.
+  // 1. Phi node, with a computed result already available in phi_states_:
+  //    Read the value from phi_states_.
+  // 2. Phi node, with no result available in phi_states_ yet:
+  //    Recursively check its inputs, and store the result in phi_states_.
+  // 3. Anything else:
+  //    Call the architecture-specific ZeroExtendsWord32ToWord64NoPhis.
+
+  // Limit recursion depth to avoid the possibility of stack overflow on very
+  // large functions.
+  const int kMaxRecursionDepth = 100;
+
+  if (node->opcode() == IrOpcode::kPhi) {
+    Upper32BitsState current = phi_states_[node->id()];
+    if (current != Upper32BitsState::kNotYetChecked) {
+      return current == Upper32BitsState::kUpperBitsGuaranteedZero;
+    }
+
+    // If further recursion is prevented, we can't make any assumptions about
+    // the output of this phi node.
+    if (recursion_depth >= kMaxRecursionDepth) {
+      return false;
+    }
+
+    // Mark the current node so that we skip it if we recursively visit it
+    // again. Or, said differently, we compute a largest fixed-point so we can
+    // be optimistic when we hit cycles.
+    phi_states_[node->id()] = Upper32BitsState::kUpperBitsGuaranteedZero;
+
+    int input_count = node->op()->ValueInputCount();
+    for (int i = 0; i < input_count; ++i) {
+      Node* input = NodeProperties::GetValueInput(node, i);
+      if (!ZeroExtendsWord32ToWord64(input, recursion_depth + 1)) {
+        phi_states_[node->id()] = Upper32BitsState::kNoGuarantee;
+        return false;
+      }
+    }
+
+    return true;
+  }
+  return ZeroExtendsWord32ToWord64NoPhis(node);
+}
+#endif  // V8_TARGET_ARCH_64_BIT
+
+namespace {
+
+FrameStateDescriptor* GetFrameStateDescriptorInternal(Zone* zone, Node* state) {
+  DCHECK_EQ(IrOpcode::kFrameState, state->opcode());
+  DCHECK_EQ(kFrameStateInputCount, state->InputCount());
+  const FrameStateInfo& state_info = FrameStateInfoOf(state->op());
+  int parameters = state_info.parameter_count();
+  int locals = state_info.local_count();
+  int stack = state_info.type() == FrameStateType::kInterpretedFunction ? 1 : 0;
+
+  FrameStateDescriptor* outer_state = nullptr;
+  Node* outer_node = state->InputAt(kFrameStateOuterStateInput);
+  if (outer_node->opcode() == IrOpcode::kFrameState) {
+    outer_state = GetFrameStateDescriptorInternal(zone, outer_node);
+  }
+
+  return zone->New<FrameStateDescriptor>(
+      zone, state_info.type(), state_info.bailout_id(),
+      state_info.state_combine(), parameters, locals, stack,
+      state_info.shared_info(), outer_state);
+}
+
+}  // namespace
+
+FrameStateDescriptor* InstructionSelector::GetFrameStateDescriptor(
+    Node* state) {
+  auto* desc = GetFrameStateDescriptorInternal(instruction_zone(), state);
+  *max_unoptimized_frame_height_ =
+      std::max(*max_unoptimized_frame_height_,
+               desc->total_conservative_frame_size_in_bytes());
+  return desc;
+}
+
+void InstructionSelector::CanonicalizeShuffle(Node* node, uint8_t* shuffle,
+                                              bool* is_swizzle) {
+  // Get raw shuffle indices.
+  memcpy(shuffle, S128ImmediateParameterOf(node->op()).data(), kSimd128Size);
+  bool needs_swap;
+  bool inputs_equal = GetVirtualRegister(node->InputAt(0)) ==
+                      GetVirtualRegister(node->InputAt(1));
+  wasm::SimdShuffle::CanonicalizeShuffle(inputs_equal, shuffle, &needs_swap,
+                                         is_swizzle);
+  if (needs_swap) {
+    SwapShuffleInputs(node);
+  }
+  // Duplicate the first input; for some shuffles on some architectures, it's
+  // easiest to implement a swizzle as a shuffle so it might be used.
+  if (*is_swizzle) {
+    node->ReplaceInput(1, node->InputAt(0));
+  }
+}
+
+// static
+void InstructionSelector::SwapShuffleInputs(Node* node) {
+  Node* input0 = node->InputAt(0);
+  Node* input1 = node->InputAt(1);
+  node->ReplaceInput(0, input1);
+  node->ReplaceInput(1, input0);
+}
+
+// static
+bool InstructionSelector::NeedsPoisoning(IsSafetyCheck safety_check) const {
+  switch (poisoning_level_) {
+    case PoisoningMitigationLevel::kDontPoison:
+      return false;
+    case PoisoningMitigationLevel::kPoisonAll:
+      return safety_check != IsSafetyCheck::kNoSafetyCheck;
+    case PoisoningMitigationLevel::kPoisonCriticalOnly:
+      return safety_check == IsSafetyCheck::kCriticalSafetyCheck;
+  }
+  UNREACHABLE();
+}
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/instruction-selector.h b/src/compiler/backend/instruction-selector.h
new file mode 100644
index 0000000..fc16814
--- /dev/null
+++ b/src/compiler/backend/instruction-selector.h
@@ -0,0 +1,733 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_INSTRUCTION_SELECTOR_H_
+#define V8_COMPILER_BACKEND_INSTRUCTION_SELECTOR_H_
+
+#include <map>
+
+#include "src/codegen/cpu-features.h"
+#include "src/common/globals.h"
+#include "src/compiler/backend/instruction-scheduler.h"
+#include "src/compiler/backend/instruction.h"
+#include "src/compiler/common-operator.h"
+#include "src/compiler/feedback-source.h"
+#include "src/compiler/linkage.h"
+#include "src/compiler/machine-operator.h"
+#include "src/compiler/node.h"
+#include "src/wasm/simd-shuffle.h"
+#include "src/zone/zone-containers.h"
+
+namespace v8 {
+namespace internal {
+
+class TickCounter;
+
+namespace compiler {
+
+// Forward declarations.
+class BasicBlock;
+struct CallBuffer;  // TODO(bmeurer): Remove this.
+class Linkage;
+class OperandGenerator;
+class SwitchInfo;
+class StateObjectDeduplicator;
+
+// The flags continuation is a way to combine a branch or a materialization
+// of a boolean value with an instruction that sets the flags register.
+// The whole instruction is treated as a unit by the register allocator, and
+// thus no spills or moves can be introduced between the flags-setting
+// instruction and the branch or set it should be combined with.
+class FlagsContinuation final {
+ public:
+  FlagsContinuation() : mode_(kFlags_none) {}
+
+  // Creates a new flags continuation from the given condition and true/false
+  // blocks.
+  static FlagsContinuation ForBranch(FlagsCondition condition,
+                                     BasicBlock* true_block,
+                                     BasicBlock* false_block) {
+    return FlagsContinuation(kFlags_branch, condition, true_block, false_block);
+  }
+
+  static FlagsContinuation ForBranchAndPoison(FlagsCondition condition,
+                                              BasicBlock* true_block,
+                                              BasicBlock* false_block) {
+    return FlagsContinuation(kFlags_branch_and_poison, condition, true_block,
+                             false_block);
+  }
+
+  // Creates a new flags continuation for an eager deoptimization exit.
+  static FlagsContinuation ForDeoptimize(FlagsCondition condition,
+                                         DeoptimizeKind kind,
+                                         DeoptimizeReason reason,
+                                         FeedbackSource const& feedback,
+                                         Node* frame_state) {
+    return FlagsContinuation(kFlags_deoptimize, condition, kind, reason,
+                             feedback, frame_state);
+  }
+
+  // Creates a new flags continuation for an eager deoptimization exit.
+  static FlagsContinuation ForDeoptimizeAndPoison(
+      FlagsCondition condition, DeoptimizeKind kind, DeoptimizeReason reason,
+      FeedbackSource const& feedback, Node* frame_state) {
+    return FlagsContinuation(kFlags_deoptimize_and_poison, condition, kind,
+                             reason, feedback, frame_state);
+  }
+
+  // Creates a new flags continuation for a boolean value.
+  static FlagsContinuation ForSet(FlagsCondition condition, Node* result) {
+    return FlagsContinuation(condition, result);
+  }
+
+  // Creates a new flags continuation for a wasm trap.
+  static FlagsContinuation ForTrap(FlagsCondition condition, TrapId trap_id,
+                                   Node* result) {
+    return FlagsContinuation(condition, trap_id, result);
+  }
+
+  bool IsNone() const { return mode_ == kFlags_none; }
+  bool IsBranch() const {
+    return mode_ == kFlags_branch || mode_ == kFlags_branch_and_poison;
+  }
+  bool IsDeoptimize() const {
+    return mode_ == kFlags_deoptimize || mode_ == kFlags_deoptimize_and_poison;
+  }
+  bool IsPoisoned() const {
+    return mode_ == kFlags_branch_and_poison ||
+           mode_ == kFlags_deoptimize_and_poison;
+  }
+  bool IsSet() const { return mode_ == kFlags_set; }
+  bool IsTrap() const { return mode_ == kFlags_trap; }
+  FlagsCondition condition() const {
+    DCHECK(!IsNone());
+    return condition_;
+  }
+  DeoptimizeKind kind() const {
+    DCHECK(IsDeoptimize());
+    return kind_;
+  }
+  DeoptimizeReason reason() const {
+    DCHECK(IsDeoptimize());
+    return reason_;
+  }
+  FeedbackSource const& feedback() const {
+    DCHECK(IsDeoptimize());
+    return feedback_;
+  }
+  Node* frame_state() const {
+    DCHECK(IsDeoptimize());
+    return frame_state_or_result_;
+  }
+  Node* result() const {
+    DCHECK(IsSet());
+    return frame_state_or_result_;
+  }
+  TrapId trap_id() const {
+    DCHECK(IsTrap());
+    return trap_id_;
+  }
+  BasicBlock* true_block() const {
+    DCHECK(IsBranch());
+    return true_block_;
+  }
+  BasicBlock* false_block() const {
+    DCHECK(IsBranch());
+    return false_block_;
+  }
+
+  void Negate() {
+    DCHECK(!IsNone());
+    condition_ = NegateFlagsCondition(condition_);
+  }
+
+  void Commute() {
+    DCHECK(!IsNone());
+    condition_ = CommuteFlagsCondition(condition_);
+  }
+
+  void Overwrite(FlagsCondition condition) { condition_ = condition; }
+
+  void OverwriteAndNegateIfEqual(FlagsCondition condition) {
+    DCHECK(condition_ == kEqual || condition_ == kNotEqual);
+    bool negate = condition_ == kEqual;
+    condition_ = condition;
+    if (negate) Negate();
+  }
+
+  void OverwriteUnsignedIfSigned() {
+    switch (condition_) {
+      case kSignedLessThan:
+        condition_ = kUnsignedLessThan;
+        break;
+      case kSignedLessThanOrEqual:
+        condition_ = kUnsignedLessThanOrEqual;
+        break;
+      case kSignedGreaterThan:
+        condition_ = kUnsignedGreaterThan;
+        break;
+      case kSignedGreaterThanOrEqual:
+        condition_ = kUnsignedGreaterThanOrEqual;
+        break;
+      default:
+        break;
+    }
+  }
+
+  // Encodes this flags continuation into the given opcode.
+  InstructionCode Encode(InstructionCode opcode) {
+    opcode |= FlagsModeField::encode(mode_);
+    if (mode_ != kFlags_none) {
+      opcode |= FlagsConditionField::encode(condition_);
+    }
+    return opcode;
+  }
+
+ private:
+  FlagsContinuation(FlagsMode mode, FlagsCondition condition,
+                    BasicBlock* true_block, BasicBlock* false_block)
+      : mode_(mode),
+        condition_(condition),
+        true_block_(true_block),
+        false_block_(false_block) {
+    DCHECK(mode == kFlags_branch || mode == kFlags_branch_and_poison);
+    DCHECK_NOT_NULL(true_block);
+    DCHECK_NOT_NULL(false_block);
+  }
+
+  FlagsContinuation(FlagsMode mode, FlagsCondition condition,
+                    DeoptimizeKind kind, DeoptimizeReason reason,
+                    FeedbackSource const& feedback, Node* frame_state)
+      : mode_(mode),
+        condition_(condition),
+        kind_(kind),
+        reason_(reason),
+        feedback_(feedback),
+        frame_state_or_result_(frame_state) {
+    DCHECK(mode == kFlags_deoptimize || mode == kFlags_deoptimize_and_poison);
+    DCHECK_NOT_NULL(frame_state);
+  }
+
+  FlagsContinuation(FlagsCondition condition, Node* result)
+      : mode_(kFlags_set),
+        condition_(condition),
+        frame_state_or_result_(result) {
+    DCHECK_NOT_NULL(result);
+  }
+
+  FlagsContinuation(FlagsCondition condition, TrapId trap_id, Node* result)
+      : mode_(kFlags_trap),
+        condition_(condition),
+        frame_state_or_result_(result),
+        trap_id_(trap_id) {
+    DCHECK_NOT_NULL(result);
+  }
+
+  FlagsMode const mode_;
+  FlagsCondition condition_;
+  DeoptimizeKind kind_;          // Only valid if mode_ == kFlags_deoptimize*
+  DeoptimizeReason reason_;      // Only valid if mode_ == kFlags_deoptimize*
+  FeedbackSource feedback_;      // Only valid if mode_ == kFlags_deoptimize*
+  Node* frame_state_or_result_;  // Only valid if mode_ == kFlags_deoptimize*
+                                 // or mode_ == kFlags_set.
+  BasicBlock* true_block_;       // Only valid if mode_ == kFlags_branch*.
+  BasicBlock* false_block_;      // Only valid if mode_ == kFlags_branch*.
+  TrapId trap_id_;               // Only valid if mode_ == kFlags_trap.
+};
+
+// This struct connects nodes of parameters which are going to be pushed on the
+// call stack with their parameter index in the call descriptor of the callee.
+struct PushParameter {
+  PushParameter(Node* n = nullptr,
+                LinkageLocation l = LinkageLocation::ForAnyRegister())
+      : node(n), location(l) {}
+
+  Node* node;
+  LinkageLocation location;
+};
+
+enum class FrameStateInputKind { kAny, kStackSlot };
+
+// Instruction selection generates an InstructionSequence for a given Schedule.
+class V8_EXPORT_PRIVATE InstructionSelector final {
+ public:
+  // Forward declarations.
+  class Features;
+
+  enum SourcePositionMode { kCallSourcePositions, kAllSourcePositions };
+  enum EnableScheduling { kDisableScheduling, kEnableScheduling };
+  enum EnableRootsRelativeAddressing {
+    kDisableRootsRelativeAddressing,
+    kEnableRootsRelativeAddressing
+  };
+  enum EnableSwitchJumpTable {
+    kDisableSwitchJumpTable,
+    kEnableSwitchJumpTable
+  };
+  enum EnableTraceTurboJson { kDisableTraceTurboJson, kEnableTraceTurboJson };
+
+  InstructionSelector(
+      Zone* zone, size_t node_count, Linkage* linkage,
+      InstructionSequence* sequence, Schedule* schedule,
+      SourcePositionTable* source_positions, Frame* frame,
+      EnableSwitchJumpTable enable_switch_jump_table, TickCounter* tick_counter,
+      JSHeapBroker* broker, size_t* max_unoptimized_frame_height,
+      size_t* max_pushed_argument_count,
+      SourcePositionMode source_position_mode = kCallSourcePositions,
+      Features features = SupportedFeatures(),
+      EnableScheduling enable_scheduling = FLAG_turbo_instruction_scheduling
+                                               ? kEnableScheduling
+                                               : kDisableScheduling,
+      EnableRootsRelativeAddressing enable_roots_relative_addressing =
+          kDisableRootsRelativeAddressing,
+      PoisoningMitigationLevel poisoning_level =
+          PoisoningMitigationLevel::kDontPoison,
+      EnableTraceTurboJson trace_turbo = kDisableTraceTurboJson);
+
+  // Visit code for the entire graph with the included schedule.
+  bool SelectInstructions();
+
+  void StartBlock(RpoNumber rpo);
+  void EndBlock(RpoNumber rpo);
+  void AddInstruction(Instruction* instr);
+  void AddTerminator(Instruction* instr);
+
+  // ===========================================================================
+  // ============= Architecture-independent code emission methods. =============
+  // ===========================================================================
+
+  Instruction* Emit(InstructionCode opcode, InstructionOperand output,
+                    size_t temp_count = 0, InstructionOperand* temps = nullptr);
+  Instruction* Emit(InstructionCode opcode, InstructionOperand output,
+                    InstructionOperand a, size_t temp_count = 0,
+                    InstructionOperand* temps = nullptr);
+  Instruction* Emit(InstructionCode opcode, InstructionOperand output,
+                    InstructionOperand a, InstructionOperand b,
+                    size_t temp_count = 0, InstructionOperand* temps = nullptr);
+  Instruction* Emit(InstructionCode opcode, InstructionOperand output,
+                    InstructionOperand a, InstructionOperand b,
+                    InstructionOperand c, size_t temp_count = 0,
+                    InstructionOperand* temps = nullptr);
+  Instruction* Emit(InstructionCode opcode, InstructionOperand output,
+                    InstructionOperand a, InstructionOperand b,
+                    InstructionOperand c, InstructionOperand d,
+                    size_t temp_count = 0, InstructionOperand* temps = nullptr);
+  Instruction* Emit(InstructionCode opcode, InstructionOperand output,
+                    InstructionOperand a, InstructionOperand b,
+                    InstructionOperand c, InstructionOperand d,
+                    InstructionOperand e, size_t temp_count = 0,
+                    InstructionOperand* temps = nullptr);
+  Instruction* Emit(InstructionCode opcode, InstructionOperand output,
+                    InstructionOperand a, InstructionOperand b,
+                    InstructionOperand c, InstructionOperand d,
+                    InstructionOperand e, InstructionOperand f,
+                    size_t temp_count = 0, InstructionOperand* temps = nullptr);
+  Instruction* Emit(InstructionCode opcode, size_t output_count,
+                    InstructionOperand* outputs, size_t input_count,
+                    InstructionOperand* inputs, size_t temp_count = 0,
+                    InstructionOperand* temps = nullptr);
+  Instruction* Emit(Instruction* instr);
+
+  // [0-3] operand instructions with no output, uses labels for true and false
+  // blocks of the continuation.
+  Instruction* EmitWithContinuation(InstructionCode opcode,
+                                    FlagsContinuation* cont);
+  Instruction* EmitWithContinuation(InstructionCode opcode,
+                                    InstructionOperand a,
+                                    FlagsContinuation* cont);
+  Instruction* EmitWithContinuation(InstructionCode opcode,
+                                    InstructionOperand a, InstructionOperand b,
+                                    FlagsContinuation* cont);
+  Instruction* EmitWithContinuation(InstructionCode opcode,
+                                    InstructionOperand a, InstructionOperand b,
+                                    InstructionOperand c,
+                                    FlagsContinuation* cont);
+  Instruction* EmitWithContinuation(InstructionCode opcode, size_t output_count,
+                                    InstructionOperand* outputs,
+                                    size_t input_count,
+                                    InstructionOperand* inputs,
+                                    FlagsContinuation* cont);
+  Instruction* EmitWithContinuation(
+      InstructionCode opcode, size_t output_count, InstructionOperand* outputs,
+      size_t input_count, InstructionOperand* inputs, size_t temp_count,
+      InstructionOperand* temps, FlagsContinuation* cont);
+
+  void EmitIdentity(Node* node);
+
+  // ===========================================================================
+  // ============== Architecture-independent CPU feature methods. ==============
+  // ===========================================================================
+
+  class Features final {
+   public:
+    Features() : bits_(0) {}
+    explicit Features(unsigned bits) : bits_(bits) {}
+    explicit Features(CpuFeature f) : bits_(1u << f) {}
+    Features(CpuFeature f1, CpuFeature f2) : bits_((1u << f1) | (1u << f2)) {}
+
+    bool Contains(CpuFeature f) const { return (bits_ & (1u << f)); }
+
+   private:
+    unsigned bits_;
+  };
+
+  bool IsSupported(CpuFeature feature) const {
+    return features_.Contains(feature);
+  }
+
+  // Returns the features supported on the target platform.
+  static Features SupportedFeatures() {
+    return Features(CpuFeatures::SupportedFeatures());
+  }
+
+  // TODO(sigurds) This should take a CpuFeatures argument.
+  static MachineOperatorBuilder::Flags SupportedMachineOperatorFlags();
+
+  static MachineOperatorBuilder::AlignmentRequirements AlignmentRequirements();
+
+  bool NeedsPoisoning(IsSafetyCheck safety_check) const;
+
+  // ===========================================================================
+  // ============ Architecture-independent graph covering methods. =============
+  // ===========================================================================
+
+  // Used in pattern matching during code generation.
+  // Check if {node} can be covered while generating code for the current
+  // instruction. A node can be covered if the {user} of the node has the only
+  // edge and the two are in the same basic block.
+  bool CanCover(Node* user, Node* node) const;
+  // CanCover is not transitive.  The counter example are Nodes A,B,C such that
+  // CanCover(A, B) and CanCover(B,C) and B is pure: The the effect level of A
+  // and B might differ. CanCoverTransitively does the additional checks.
+  bool CanCoverTransitively(Node* user, Node* node, Node* node_input) const;
+
+  // Used in pattern matching during code generation.
+  // This function checks that {node} and {user} are in the same basic block,
+  // and that {user} is the only user of {node} in this basic block.  This
+  // check guarantees that there are no users of {node} scheduled between
+  // {node} and {user}, and thus we can select a single instruction for both
+  // nodes, if such an instruction exists. This check can be used for example
+  // when selecting instructions for:
+  //   n = Int32Add(a, b)
+  //   c = Word32Compare(n, 0, cond)
+  //   Branch(c, true_label, false_label)
+  // Here we can generate a flag-setting add instruction, even if the add has
+  // uses in other basic blocks, since the flag-setting add instruction will
+  // still generate the result of the addition and not just set the flags.
+  // However, if we had uses of the add in the same basic block, we could have:
+  //   n = Int32Add(a, b)
+  //   o = OtherOp(n, ...)
+  //   c = Word32Compare(n, 0, cond)
+  //   Branch(c, true_label, false_label)
+  // where we cannot select the add and the compare together.  If we were to
+  // select a flag-setting add instruction for Word32Compare and Int32Add while
+  // visiting Word32Compare, we would then have to select an instruction for
+  // OtherOp *afterwards*, which means we would attempt to use the result of
+  // the add before we have defined it.
+  bool IsOnlyUserOfNodeInSameBlock(Node* user, Node* node) const;
+
+  // Checks if {node} was already defined, and therefore code was already
+  // generated for it.
+  bool IsDefined(Node* node) const;
+
+  // Checks if {node} has any uses, and therefore code has to be generated for
+  // it.
+  bool IsUsed(Node* node) const;
+
+  // Checks if {node} is currently live.
+  bool IsLive(Node* node) const { return !IsDefined(node) && IsUsed(node); }
+
+  // Gets the effect level of {node}.
+  int GetEffectLevel(Node* node) const;
+
+  // Gets the effect level of {node}, appropriately adjusted based on
+  // continuation flags if the node is a branch.
+  int GetEffectLevel(Node* node, FlagsContinuation* cont) const;
+
+  int GetVirtualRegister(const Node* node);
+  const std::map<NodeId, int> GetVirtualRegistersForTesting() const;
+
+  // Check if we can generate loads and stores of ExternalConstants relative
+  // to the roots register.
+  bool CanAddressRelativeToRootsRegister(
+      const ExternalReference& reference) const;
+  // Check if we can use the roots register to access GC roots.
+  bool CanUseRootsRegister() const;
+
+  Isolate* isolate() const { return sequence()->isolate(); }
+
+  const ZoneVector<std::pair<int, int>>& instr_origins() const {
+    return instr_origins_;
+  }
+
+ private:
+  friend class OperandGenerator;
+
+  bool UseInstructionScheduling() const {
+    return (enable_scheduling_ == kEnableScheduling) &&
+           InstructionScheduler::SchedulerSupported();
+  }
+
+  void AppendDeoptimizeArguments(InstructionOperandVector* args,
+                                 DeoptimizeKind kind, DeoptimizeReason reason,
+                                 FeedbackSource const& feedback,
+                                 Node* frame_state);
+
+  void EmitTableSwitch(const SwitchInfo& sw,
+                       InstructionOperand const& index_operand);
+  void EmitBinarySearchSwitch(const SwitchInfo& sw,
+                              InstructionOperand const& value_operand);
+
+  void TryRename(InstructionOperand* op);
+  int GetRename(int virtual_register);
+  void SetRename(const Node* node, const Node* rename);
+  void UpdateRenames(Instruction* instruction);
+  void UpdateRenamesInPhi(PhiInstruction* phi);
+
+  // Inform the instruction selection that {node} was just defined.
+  void MarkAsDefined(Node* node);
+
+  // Inform the instruction selection that {node} has at least one use and we
+  // will need to generate code for it.
+  void MarkAsUsed(Node* node);
+
+  // Sets the effect level of {node}.
+  void SetEffectLevel(Node* node, int effect_level);
+
+  // Inform the register allocation of the representation of the value produced
+  // by {node}.
+  void MarkAsRepresentation(MachineRepresentation rep, Node* node);
+  void MarkAsWord32(Node* node) {
+    MarkAsRepresentation(MachineRepresentation::kWord32, node);
+  }
+  void MarkAsWord64(Node* node) {
+    MarkAsRepresentation(MachineRepresentation::kWord64, node);
+  }
+  void MarkAsFloat32(Node* node) {
+    MarkAsRepresentation(MachineRepresentation::kFloat32, node);
+  }
+  void MarkAsFloat64(Node* node) {
+    MarkAsRepresentation(MachineRepresentation::kFloat64, node);
+  }
+  void MarkAsSimd128(Node* node) {
+    MarkAsRepresentation(MachineRepresentation::kSimd128, node);
+  }
+  void MarkAsTagged(Node* node) {
+    MarkAsRepresentation(MachineRepresentation::kTagged, node);
+  }
+  void MarkAsCompressed(Node* node) {
+    MarkAsRepresentation(MachineRepresentation::kCompressed, node);
+  }
+
+  // Inform the register allocation of the representation of the unallocated
+  // operand {op}.
+  void MarkAsRepresentation(MachineRepresentation rep,
+                            const InstructionOperand& op);
+
+  enum CallBufferFlag {
+    kCallCodeImmediate = 1u << 0,
+    kCallAddressImmediate = 1u << 1,
+    kCallTail = 1u << 2,
+    kCallFixedTargetRegister = 1u << 3
+  };
+  using CallBufferFlags = base::Flags<CallBufferFlag>;
+
+  // Initialize the call buffer with the InstructionOperands, nodes, etc,
+  // corresponding
+  // to the inputs and outputs of the call.
+  // {call_code_immediate} to generate immediate operands to calls of code.
+  // {call_address_immediate} to generate immediate operands to address calls.
+  void InitializeCallBuffer(Node* call, CallBuffer* buffer,
+                            CallBufferFlags flags, bool is_tail_call,
+                            int stack_slot_delta = 0);
+  bool IsTailCallAddressImmediate();
+  int GetTempsCountForTailCallFromJSFunction();
+
+  void UpdateMaxPushedArgumentCount(size_t count);
+
+  FrameStateDescriptor* GetFrameStateDescriptor(Node* node);
+  size_t AddInputsToFrameStateDescriptor(FrameStateDescriptor* descriptor,
+                                         Node* state, OperandGenerator* g,
+                                         StateObjectDeduplicator* deduplicator,
+                                         InstructionOperandVector* inputs,
+                                         FrameStateInputKind kind, Zone* zone);
+  size_t AddInputsToFrameStateDescriptor(StateValueList* values,
+                                         InstructionOperandVector* inputs,
+                                         OperandGenerator* g,
+                                         StateObjectDeduplicator* deduplicator,
+                                         Node* node, FrameStateInputKind kind,
+                                         Zone* zone);
+  size_t AddOperandToStateValueDescriptor(StateValueList* values,
+                                          InstructionOperandVector* inputs,
+                                          OperandGenerator* g,
+                                          StateObjectDeduplicator* deduplicator,
+                                          Node* input, MachineType type,
+                                          FrameStateInputKind kind, Zone* zone);
+
+  // ===========================================================================
+  // ============= Architecture-specific graph covering methods. ===============
+  // ===========================================================================
+
+  // Visit nodes in the given block and generate code.
+  void VisitBlock(BasicBlock* block);
+
+  // Visit the node for the control flow at the end of the block, generating
+  // code if necessary.
+  void VisitControl(BasicBlock* block);
+
+  // Visit the node and generate code, if any.
+  void VisitNode(Node* node);
+
+  // Visit the node and generate code for IEEE 754 functions.
+  void VisitFloat64Ieee754Binop(Node*, InstructionCode code);
+  void VisitFloat64Ieee754Unop(Node*, InstructionCode code);
+
+#define DECLARE_GENERATOR(x) void Visit##x(Node* node);
+  MACHINE_OP_LIST(DECLARE_GENERATOR)
+  MACHINE_SIMD_OP_LIST(DECLARE_GENERATOR)
+#undef DECLARE_GENERATOR
+
+  // Visit the load node with a value and opcode to replace with.
+  void VisitLoad(Node* node, Node* value, InstructionCode opcode);
+  void VisitLoadTransform(Node* node, Node* value, InstructionCode opcode);
+  void VisitFinishRegion(Node* node);
+  void VisitParameter(Node* node);
+  void VisitIfException(Node* node);
+  void VisitOsrValue(Node* node);
+  void VisitPhi(Node* node);
+  void VisitProjection(Node* node);
+  void VisitConstant(Node* node);
+  void VisitCall(Node* call, BasicBlock* handler = nullptr);
+  void VisitDeoptimizeIf(Node* node);
+  void VisitDeoptimizeUnless(Node* node);
+  void VisitTrapIf(Node* node, TrapId trap_id);
+  void VisitTrapUnless(Node* node, TrapId trap_id);
+  void VisitTailCall(Node* call);
+  void VisitGoto(BasicBlock* target);
+  void VisitBranch(Node* input, BasicBlock* tbranch, BasicBlock* fbranch);
+  void VisitSwitch(Node* node, const SwitchInfo& sw);
+  void VisitDeoptimize(DeoptimizeKind kind, DeoptimizeReason reason,
+                       FeedbackSource const& feedback, Node* frame_state);
+  void VisitReturn(Node* ret);
+  void VisitThrow(Node* node);
+  void VisitRetain(Node* node);
+  void VisitUnreachable(Node* node);
+  void VisitStaticAssert(Node* node);
+  void VisitDeadValue(Node* node);
+
+  void VisitStackPointerGreaterThan(Node* node, FlagsContinuation* cont);
+
+  void VisitWordCompareZero(Node* user, Node* value, FlagsContinuation* cont);
+
+  void EmitWordPoisonOnSpeculation(Node* node);
+
+  void EmitPrepareArguments(ZoneVector<compiler::PushParameter>* arguments,
+                            const CallDescriptor* call_descriptor, Node* node);
+  void EmitPrepareResults(ZoneVector<compiler::PushParameter>* results,
+                          const CallDescriptor* call_descriptor, Node* node);
+
+  bool CanProduceSignalingNaN(Node* node);
+
+  // ===========================================================================
+  // ============= Vector instruction (SIMD) helper fns. =======================
+  // ===========================================================================
+
+  // Canonicalize shuffles to make pattern matching simpler. Returns the shuffle
+  // indices, and a boolean indicating if the shuffle is a swizzle (one input).
+  void CanonicalizeShuffle(Node* node, uint8_t* shuffle, bool* is_swizzle);
+
+  // Swaps the two first input operands of the node, to help match shuffles
+  // to specific architectural instructions.
+  void SwapShuffleInputs(Node* node);
+
+  // ===========================================================================
+
+  Schedule* schedule() const { return schedule_; }
+  Linkage* linkage() const { return linkage_; }
+  InstructionSequence* sequence() const { return sequence_; }
+  Zone* instruction_zone() const { return sequence()->zone(); }
+  Zone* zone() const { return zone_; }
+
+  void set_instruction_selection_failed() {
+    instruction_selection_failed_ = true;
+  }
+  bool instruction_selection_failed() { return instruction_selection_failed_; }
+
+  void MarkPairProjectionsAsWord32(Node* node);
+  bool IsSourcePositionUsed(Node* node);
+  void VisitWord32AtomicBinaryOperation(Node* node, ArchOpcode int8_op,
+                                        ArchOpcode uint8_op,
+                                        ArchOpcode int16_op,
+                                        ArchOpcode uint16_op,
+                                        ArchOpcode word32_op);
+  void VisitWord64AtomicBinaryOperation(Node* node, ArchOpcode uint8_op,
+                                        ArchOpcode uint16_op,
+                                        ArchOpcode uint32_op,
+                                        ArchOpcode uint64_op);
+  void VisitWord64AtomicNarrowBinop(Node* node, ArchOpcode uint8_op,
+                                    ArchOpcode uint16_op, ArchOpcode uint32_op);
+
+#if V8_TARGET_ARCH_64_BIT
+  bool ZeroExtendsWord32ToWord64(Node* node, int recursion_depth = 0);
+  bool ZeroExtendsWord32ToWord64NoPhis(Node* node);
+
+  enum Upper32BitsState : uint8_t {
+    kNotYetChecked,
+    kUpperBitsGuaranteedZero,
+    kNoGuarantee,
+  };
+#endif  // V8_TARGET_ARCH_64_BIT
+
+  // ===========================================================================
+
+  Zone* const zone_;
+  Linkage* const linkage_;
+  InstructionSequence* const sequence_;
+  SourcePositionTable* const source_positions_;
+  SourcePositionMode const source_position_mode_;
+  Features features_;
+  Schedule* const schedule_;
+  BasicBlock* current_block_;
+  ZoneVector<Instruction*> instructions_;
+  InstructionOperandVector continuation_inputs_;
+  InstructionOperandVector continuation_outputs_;
+  InstructionOperandVector continuation_temps_;
+  BoolVector defined_;
+  BoolVector used_;
+  IntVector effect_level_;
+  IntVector virtual_registers_;
+  IntVector virtual_register_rename_;
+  InstructionScheduler* scheduler_;
+  EnableScheduling enable_scheduling_;
+  EnableRootsRelativeAddressing enable_roots_relative_addressing_;
+  EnableSwitchJumpTable enable_switch_jump_table_;
+
+  PoisoningMitigationLevel poisoning_level_;
+  Frame* frame_;
+  bool instruction_selection_failed_;
+  ZoneVector<std::pair<int, int>> instr_origins_;
+  EnableTraceTurboJson trace_turbo_;
+  TickCounter* const tick_counter_;
+  // The broker is only used for unparking the LocalHeap for diagnostic printing
+  // for failed StaticAsserts.
+  JSHeapBroker* const broker_;
+
+  // Store the maximal unoptimized frame height and an maximal number of pushed
+  // arguments (for calls). Later used to apply an offset to stack checks.
+  size_t* max_unoptimized_frame_height_;
+  size_t* max_pushed_argument_count_;
+
+#if V8_TARGET_ARCH_64_BIT
+  // Holds lazily-computed results for whether phi nodes guarantee their upper
+  // 32 bits to be zero. Indexed by node ID; nobody reads or writes the values
+  // for non-phi nodes.
+  ZoneVector<Upper32BitsState> phi_states_;
+#endif
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_INSTRUCTION_SELECTOR_H_
diff --git a/src/compiler/backend/instruction.cc b/src/compiler/backend/instruction.cc
new file mode 100644
index 0000000..e1e54c9
--- /dev/null
+++ b/src/compiler/backend/instruction.cc
@@ -0,0 +1,1148 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/instruction.h"
+
+#include <cstddef>
+#include <iomanip>
+
+#include "src/codegen/interface-descriptors.h"
+#include "src/codegen/register-configuration.h"
+#include "src/codegen/source-position.h"
+#include "src/compiler/common-operator.h"
+#include "src/compiler/graph.h"
+#include "src/compiler/node.h"
+#include "src/compiler/schedule.h"
+#include "src/execution/frames.h"
+#include "src/utils/ostreams.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+const RegisterConfiguration* (*GetRegConfig)() = RegisterConfiguration::Default;
+
+FlagsCondition CommuteFlagsCondition(FlagsCondition condition) {
+  switch (condition) {
+    case kSignedLessThan:
+      return kSignedGreaterThan;
+    case kSignedGreaterThanOrEqual:
+      return kSignedLessThanOrEqual;
+    case kSignedLessThanOrEqual:
+      return kSignedGreaterThanOrEqual;
+    case kSignedGreaterThan:
+      return kSignedLessThan;
+    case kUnsignedLessThan:
+      return kUnsignedGreaterThan;
+    case kUnsignedGreaterThanOrEqual:
+      return kUnsignedLessThanOrEqual;
+    case kUnsignedLessThanOrEqual:
+      return kUnsignedGreaterThanOrEqual;
+    case kUnsignedGreaterThan:
+      return kUnsignedLessThan;
+    case kFloatLessThanOrUnordered:
+      return kFloatGreaterThanOrUnordered;
+    case kFloatGreaterThanOrEqual:
+      return kFloatLessThanOrEqual;
+    case kFloatLessThanOrEqual:
+      return kFloatGreaterThanOrEqual;
+    case kFloatGreaterThanOrUnordered:
+      return kFloatLessThanOrUnordered;
+    case kFloatLessThan:
+      return kFloatGreaterThan;
+    case kFloatGreaterThanOrEqualOrUnordered:
+      return kFloatLessThanOrEqualOrUnordered;
+    case kFloatLessThanOrEqualOrUnordered:
+      return kFloatGreaterThanOrEqualOrUnordered;
+    case kFloatGreaterThan:
+      return kFloatLessThan;
+    case kPositiveOrZero:
+    case kNegative:
+      UNREACHABLE();
+    case kEqual:
+    case kNotEqual:
+    case kOverflow:
+    case kNotOverflow:
+    case kUnorderedEqual:
+    case kUnorderedNotEqual:
+      return condition;
+  }
+  UNREACHABLE();
+}
+
+bool InstructionOperand::InterferesWith(const InstructionOperand& other) const {
+  if (kSimpleFPAliasing || !this->IsFPLocationOperand() ||
+      !other.IsFPLocationOperand())
+    return EqualsCanonicalized(other);
+  // Aliasing is complex and both operands are fp locations.
+  const LocationOperand& loc = *LocationOperand::cast(this);
+  const LocationOperand& other_loc = LocationOperand::cast(other);
+  LocationOperand::LocationKind kind = loc.location_kind();
+  LocationOperand::LocationKind other_kind = other_loc.location_kind();
+  if (kind != other_kind) return false;
+  MachineRepresentation rep = loc.representation();
+  MachineRepresentation other_rep = other_loc.representation();
+  if (rep == other_rep) return EqualsCanonicalized(other);
+  if (kind == LocationOperand::REGISTER) {
+    // FP register-register interference.
+    return GetRegConfig()->AreAliases(rep, loc.register_code(), other_rep,
+                                      other_loc.register_code());
+  } else {
+    // FP slot-slot interference. Slots of different FP reps can alias because
+    // the gap resolver may break a move into 2 or 4 equivalent smaller moves.
+    DCHECK_EQ(LocationOperand::STACK_SLOT, kind);
+    int index_hi = loc.index();
+    int index_lo =
+        index_hi - (1 << ElementSizeLog2Of(rep)) / kSystemPointerSize + 1;
+    int other_index_hi = other_loc.index();
+    int other_index_lo =
+        other_index_hi -
+        (1 << ElementSizeLog2Of(other_rep)) / kSystemPointerSize + 1;
+    return other_index_hi >= index_lo && index_hi >= other_index_lo;
+  }
+  return false;
+}
+
+bool LocationOperand::IsCompatible(LocationOperand* op) {
+  if (IsRegister() || IsStackSlot()) {
+    return op->IsRegister() || op->IsStackSlot();
+  } else if (kSimpleFPAliasing) {
+    // A backend may choose to generate the same instruction sequence regardless
+    // of the FP representation. As a result, we can relax the compatibility and
+    // allow a Double to be moved in a Float for example. However, this is only
+    // allowed if registers do not overlap.
+    return (IsFPRegister() || IsFPStackSlot()) &&
+           (op->IsFPRegister() || op->IsFPStackSlot());
+  } else if (IsFloatRegister() || IsFloatStackSlot()) {
+    return op->IsFloatRegister() || op->IsFloatStackSlot();
+  } else if (IsDoubleRegister() || IsDoubleStackSlot()) {
+    return op->IsDoubleRegister() || op->IsDoubleStackSlot();
+  } else {
+    return (IsSimd128Register() || IsSimd128StackSlot()) &&
+           (op->IsSimd128Register() || op->IsSimd128StackSlot());
+  }
+}
+
+void InstructionOperand::Print() const { StdoutStream{} << *this << std::endl; }
+
+std::ostream& operator<<(std::ostream& os, const InstructionOperand& op) {
+  switch (op.kind()) {
+    case InstructionOperand::UNALLOCATED: {
+      const UnallocatedOperand* unalloc = UnallocatedOperand::cast(&op);
+      os << "v" << unalloc->virtual_register();
+      if (unalloc->basic_policy() == UnallocatedOperand::FIXED_SLOT) {
+        return os << "(=" << unalloc->fixed_slot_index() << "S)";
+      }
+      switch (unalloc->extended_policy()) {
+        case UnallocatedOperand::NONE:
+          return os;
+        case UnallocatedOperand::FIXED_REGISTER:
+          return os << "(="
+                    << Register::from_code(unalloc->fixed_register_index())
+                    << ")";
+        case UnallocatedOperand::FIXED_FP_REGISTER:
+          return os << "(="
+                    << DoubleRegister::from_code(
+                           unalloc->fixed_register_index())
+                    << ")";
+        case UnallocatedOperand::MUST_HAVE_REGISTER:
+          return os << "(R)";
+        case UnallocatedOperand::MUST_HAVE_SLOT:
+          return os << "(S)";
+        case UnallocatedOperand::SAME_AS_FIRST_INPUT:
+          return os << "(1)";
+        case UnallocatedOperand::REGISTER_OR_SLOT:
+          return os << "(-)";
+        case UnallocatedOperand::REGISTER_OR_SLOT_OR_CONSTANT:
+          return os << "(*)";
+      }
+    }
+    case InstructionOperand::CONSTANT:
+      return os << "[constant:" << ConstantOperand::cast(op).virtual_register()
+                << "]";
+    case InstructionOperand::IMMEDIATE: {
+      ImmediateOperand imm = ImmediateOperand::cast(op);
+      switch (imm.type()) {
+        case ImmediateOperand::INLINE:
+          return os << "#" << imm.inline_value();
+        case ImmediateOperand::INDEXED:
+          return os << "[immediate:" << imm.indexed_value() << "]";
+      }
+    }
+    case InstructionOperand::PENDING:
+      return os << "[pending: " << PendingOperand::cast(op).next() << "]";
+    case InstructionOperand::ALLOCATED: {
+      LocationOperand allocated = LocationOperand::cast(op);
+      if (op.IsStackSlot()) {
+        os << "[stack:" << allocated.index();
+      } else if (op.IsFPStackSlot()) {
+        os << "[fp_stack:" << allocated.index();
+      } else if (op.IsRegister()) {
+        const char* name =
+            allocated.register_code() < Register::kNumRegisters
+                ? RegisterName(Register::from_code(allocated.register_code()))
+                : Register::GetSpecialRegisterName(allocated.register_code());
+        os << "[" << name << "|R";
+      } else if (op.IsDoubleRegister()) {
+        os << "[" << DoubleRegister::from_code(allocated.register_code())
+           << "|R";
+      } else if (op.IsFloatRegister()) {
+        os << "[" << FloatRegister::from_code(allocated.register_code())
+           << "|R";
+      } else {
+        DCHECK(op.IsSimd128Register());
+        os << "[" << Simd128Register::from_code(allocated.register_code())
+           << "|R";
+      }
+      switch (allocated.representation()) {
+        case MachineRepresentation::kNone:
+          os << "|-";
+          break;
+        case MachineRepresentation::kBit:
+          os << "|b";
+          break;
+        case MachineRepresentation::kWord8:
+          os << "|w8";
+          break;
+        case MachineRepresentation::kWord16:
+          os << "|w16";
+          break;
+        case MachineRepresentation::kWord32:
+          os << "|w32";
+          break;
+        case MachineRepresentation::kWord64:
+          os << "|w64";
+          break;
+        case MachineRepresentation::kFloat32:
+          os << "|f32";
+          break;
+        case MachineRepresentation::kFloat64:
+          os << "|f64";
+          break;
+        case MachineRepresentation::kSimd128:
+          os << "|s128";
+          break;
+        case MachineRepresentation::kTaggedSigned:
+          os << "|ts";
+          break;
+        case MachineRepresentation::kTaggedPointer:
+          os << "|tp";
+          break;
+        case MachineRepresentation::kTagged:
+          os << "|t";
+          break;
+        case MachineRepresentation::kCompressedPointer:
+          os << "|cp";
+          break;
+        case MachineRepresentation::kCompressed:
+          os << "|c";
+          break;
+      }
+      return os << "]";
+    }
+    case InstructionOperand::INVALID:
+      return os << "(x)";
+  }
+  UNREACHABLE();
+}
+
+void MoveOperands::Print() const {
+  StdoutStream{} << destination() << " = " << source() << std::endl;
+}
+
+std::ostream& operator<<(std::ostream& os, const MoveOperands& mo) {
+  os << mo.destination();
+  if (!mo.source().Equals(mo.destination())) {
+    os << " = " << mo.source();
+  }
+  return os << ";";
+}
+
+bool ParallelMove::IsRedundant() const {
+  for (MoveOperands* move : *this) {
+    if (!move->IsRedundant()) return false;
+  }
+  return true;
+}
+
+void ParallelMove::PrepareInsertAfter(
+    MoveOperands* move, ZoneVector<MoveOperands*>* to_eliminate) const {
+  bool no_aliasing =
+      kSimpleFPAliasing || !move->destination().IsFPLocationOperand();
+  MoveOperands* replacement = nullptr;
+  MoveOperands* eliminated = nullptr;
+  for (MoveOperands* curr : *this) {
+    if (curr->IsEliminated()) continue;
+    if (curr->destination().EqualsCanonicalized(move->source())) {
+      // We must replace move's source with curr's destination in order to
+      // insert it into this ParallelMove.
+      DCHECK(!replacement);
+      replacement = curr;
+      if (no_aliasing && eliminated != nullptr) break;
+    } else if (curr->destination().InterferesWith(move->destination())) {
+      // We can eliminate curr, since move overwrites at least a part of its
+      // destination, implying its value is no longer live.
+      eliminated = curr;
+      to_eliminate->push_back(curr);
+      if (no_aliasing && replacement != nullptr) break;
+    }
+  }
+  if (replacement != nullptr) move->set_source(replacement->source());
+}
+
+Instruction::Instruction(InstructionCode opcode)
+    : opcode_(opcode),
+      bit_field_(OutputCountField::encode(0) | InputCountField::encode(0) |
+                 TempCountField::encode(0) | IsCallField::encode(false)),
+      reference_map_(nullptr),
+      block_(nullptr) {
+  parallel_moves_[0] = nullptr;
+  parallel_moves_[1] = nullptr;
+
+  // PendingOperands are required to be 8 byte aligned.
+  STATIC_ASSERT(offsetof(Instruction, operands_) % 8 == 0);
+}
+
+Instruction::Instruction(InstructionCode opcode, size_t output_count,
+                         InstructionOperand* outputs, size_t input_count,
+                         InstructionOperand* inputs, size_t temp_count,
+                         InstructionOperand* temps)
+    : opcode_(opcode),
+      bit_field_(OutputCountField::encode(output_count) |
+                 InputCountField::encode(input_count) |
+                 TempCountField::encode(temp_count) |
+                 IsCallField::encode(false)),
+      reference_map_(nullptr),
+      block_(nullptr) {
+  parallel_moves_[0] = nullptr;
+  parallel_moves_[1] = nullptr;
+  size_t offset = 0;
+  for (size_t i = 0; i < output_count; ++i) {
+    DCHECK(!outputs[i].IsInvalid());
+    operands_[offset++] = outputs[i];
+  }
+  for (size_t i = 0; i < input_count; ++i) {
+    DCHECK(!inputs[i].IsInvalid());
+    operands_[offset++] = inputs[i];
+  }
+  for (size_t i = 0; i < temp_count; ++i) {
+    DCHECK(!temps[i].IsInvalid());
+    operands_[offset++] = temps[i];
+  }
+}
+
+bool Instruction::AreMovesRedundant() const {
+  for (int i = Instruction::FIRST_GAP_POSITION;
+       i <= Instruction::LAST_GAP_POSITION; i++) {
+    if (parallel_moves_[i] != nullptr && !parallel_moves_[i]->IsRedundant()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+void Instruction::Print() const { StdoutStream{} << *this << std::endl; }
+
+std::ostream& operator<<(std::ostream& os, const ParallelMove& pm) {
+  const char* space = "";
+  for (MoveOperands* move : pm) {
+    if (move->IsEliminated()) continue;
+    os << space << *move;
+    space = " ";
+  }
+  return os;
+}
+
+void ReferenceMap::RecordReference(const AllocatedOperand& op) {
+  // Do not record arguments as pointers.
+  if (op.IsStackSlot() && LocationOperand::cast(op).index() < 0) return;
+  DCHECK(!op.IsFPRegister() && !op.IsFPStackSlot());
+  reference_operands_.push_back(op);
+}
+
+std::ostream& operator<<(std::ostream& os, const ReferenceMap& pm) {
+  os << "{";
+  const char* separator = "";
+  for (const InstructionOperand& op : pm.reference_operands_) {
+    os << separator << op;
+    separator = ";";
+  }
+  return os << "}";
+}
+
+std::ostream& operator<<(std::ostream& os, const ArchOpcode& ao) {
+  switch (ao) {
+#define CASE(Name) \
+  case k##Name:    \
+    return os << #Name;
+    ARCH_OPCODE_LIST(CASE)
+#undef CASE
+  }
+  UNREACHABLE();
+}
+
+std::ostream& operator<<(std::ostream& os, const AddressingMode& am) {
+  switch (am) {
+    case kMode_None:
+      return os;
+#define CASE(Name)   \
+  case kMode_##Name: \
+    return os << #Name;
+      TARGET_ADDRESSING_MODE_LIST(CASE)
+#undef CASE
+  }
+  UNREACHABLE();
+}
+
+std::ostream& operator<<(std::ostream& os, const FlagsMode& fm) {
+  switch (fm) {
+    case kFlags_none:
+      return os;
+    case kFlags_branch:
+      return os << "branch";
+    case kFlags_branch_and_poison:
+      return os << "branch_and_poison";
+    case kFlags_deoptimize:
+      return os << "deoptimize";
+    case kFlags_deoptimize_and_poison:
+      return os << "deoptimize_and_poison";
+    case kFlags_set:
+      return os << "set";
+    case kFlags_trap:
+      return os << "trap";
+  }
+  UNREACHABLE();
+}
+
+std::ostream& operator<<(std::ostream& os, const FlagsCondition& fc) {
+  switch (fc) {
+    case kEqual:
+      return os << "equal";
+    case kNotEqual:
+      return os << "not equal";
+    case kSignedLessThan:
+      return os << "signed less than";
+    case kSignedGreaterThanOrEqual:
+      return os << "signed greater than or equal";
+    case kSignedLessThanOrEqual:
+      return os << "signed less than or equal";
+    case kSignedGreaterThan:
+      return os << "signed greater than";
+    case kUnsignedLessThan:
+      return os << "unsigned less than";
+    case kUnsignedGreaterThanOrEqual:
+      return os << "unsigned greater than or equal";
+    case kUnsignedLessThanOrEqual:
+      return os << "unsigned less than or equal";
+    case kUnsignedGreaterThan:
+      return os << "unsigned greater than";
+    case kFloatLessThanOrUnordered:
+      return os << "less than or unordered (FP)";
+    case kFloatGreaterThanOrEqual:
+      return os << "greater than or equal (FP)";
+    case kFloatLessThanOrEqual:
+      return os << "less than or equal (FP)";
+    case kFloatGreaterThanOrUnordered:
+      return os << "greater than or unordered (FP)";
+    case kFloatLessThan:
+      return os << "less than (FP)";
+    case kFloatGreaterThanOrEqualOrUnordered:
+      return os << "greater than, equal or unordered (FP)";
+    case kFloatLessThanOrEqualOrUnordered:
+      return os << "less than, equal or unordered (FP)";
+    case kFloatGreaterThan:
+      return os << "greater than (FP)";
+    case kUnorderedEqual:
+      return os << "unordered equal";
+    case kUnorderedNotEqual:
+      return os << "unordered not equal";
+    case kOverflow:
+      return os << "overflow";
+    case kNotOverflow:
+      return os << "not overflow";
+    case kPositiveOrZero:
+      return os << "positive or zero";
+    case kNegative:
+      return os << "negative";
+  }
+  UNREACHABLE();
+}
+
+std::ostream& operator<<(std::ostream& os, const Instruction& instr) {
+  os << "gap ";
+  for (int i = Instruction::FIRST_GAP_POSITION;
+       i <= Instruction::LAST_GAP_POSITION; i++) {
+    os << "(";
+    if (instr.parallel_moves()[i] != nullptr) {
+      os << *instr.parallel_moves()[i];
+    }
+    os << ") ";
+  }
+  os << "\n          ";
+
+  if (instr.OutputCount() == 1) {
+    os << *instr.OutputAt(0) << " = ";
+  } else if (instr.OutputCount() > 1) {
+    os << "(" << *instr.OutputAt(0);
+    for (size_t i = 1; i < instr.OutputCount(); i++) {
+      os << ", " << *instr.OutputAt(i);
+    }
+    os << ") = ";
+  }
+
+  os << ArchOpcodeField::decode(instr.opcode());
+  AddressingMode am = AddressingModeField::decode(instr.opcode());
+  if (am != kMode_None) {
+    os << " : " << AddressingModeField::decode(instr.opcode());
+  }
+  FlagsMode fm = FlagsModeField::decode(instr.opcode());
+  if (fm != kFlags_none) {
+    os << " && " << fm << " if " << FlagsConditionField::decode(instr.opcode());
+  }
+  for (size_t i = 0; i < instr.InputCount(); i++) {
+    os << " " << *instr.InputAt(i);
+  }
+  return os;
+}
+
+Constant::Constant(int32_t v) : type_(kInt32), value_(v) {}
+
+Constant::Constant(RelocatablePtrConstantInfo info) {
+  if (info.type() == RelocatablePtrConstantInfo::kInt32) {
+    type_ = kInt32;
+  } else if (info.type() == RelocatablePtrConstantInfo::kInt64) {
+    type_ = kInt64;
+  } else {
+    UNREACHABLE();
+  }
+  value_ = info.value();
+  rmode_ = info.rmode();
+}
+
+Handle<HeapObject> Constant::ToHeapObject() const {
+  DCHECK(kHeapObject == type() || kCompressedHeapObject == type());
+  Handle<HeapObject> value(
+      reinterpret_cast<Address*>(static_cast<intptr_t>(value_)));
+  return value;
+}
+
+Handle<Code> Constant::ToCode() const {
+  DCHECK_EQ(kHeapObject, type());
+  Handle<Code> value(reinterpret_cast<Address*>(static_cast<intptr_t>(value_)));
+  return value;
+}
+
+const StringConstantBase* Constant::ToDelayedStringConstant() const {
+  DCHECK_EQ(kDelayedStringConstant, type());
+  const StringConstantBase* value =
+      bit_cast<StringConstantBase*>(static_cast<intptr_t>(value_));
+  return value;
+}
+
+std::ostream& operator<<(std::ostream& os, const Constant& constant) {
+  switch (constant.type()) {
+    case Constant::kInt32:
+      return os << constant.ToInt32();
+    case Constant::kInt64:
+      return os << constant.ToInt64() << "l";
+    case Constant::kFloat32:
+      return os << constant.ToFloat32() << "f";
+    case Constant::kFloat64:
+      return os << constant.ToFloat64().value();
+    case Constant::kExternalReference:
+      return os << constant.ToExternalReference().address();
+    case Constant::kHeapObject:  // Fall through.
+    case Constant::kCompressedHeapObject:
+      return os << Brief(*constant.ToHeapObject());
+    case Constant::kRpoNumber:
+      return os << "RPO" << constant.ToRpoNumber().ToInt();
+    case Constant::kDelayedStringConstant:
+      return os << "DelayedStringConstant: "
+                << constant.ToDelayedStringConstant();
+  }
+  UNREACHABLE();
+}
+
+PhiInstruction::PhiInstruction(Zone* zone, int virtual_register,
+                               size_t input_count)
+    : virtual_register_(virtual_register),
+      output_(UnallocatedOperand(UnallocatedOperand::NONE, virtual_register)),
+      operands_(input_count, InstructionOperand::kInvalidVirtualRegister,
+                zone) {}
+
+void PhiInstruction::SetInput(size_t offset, int virtual_register) {
+  DCHECK_EQ(InstructionOperand::kInvalidVirtualRegister, operands_[offset]);
+  operands_[offset] = virtual_register;
+}
+
+void PhiInstruction::RenameInput(size_t offset, int virtual_register) {
+  DCHECK_NE(InstructionOperand::kInvalidVirtualRegister, operands_[offset]);
+  operands_[offset] = virtual_register;
+}
+
+InstructionBlock::InstructionBlock(Zone* zone, RpoNumber rpo_number,
+                                   RpoNumber loop_header, RpoNumber loop_end,
+                                   RpoNumber dominator, bool deferred,
+                                   bool handler)
+    : successors_(zone),
+      predecessors_(zone),
+      phis_(zone),
+      ao_number_(RpoNumber::Invalid()),
+      rpo_number_(rpo_number),
+      loop_header_(loop_header),
+      loop_end_(loop_end),
+      dominator_(dominator),
+      deferred_(deferred),
+      handler_(handler) {}
+
+size_t InstructionBlock::PredecessorIndexOf(RpoNumber rpo_number) const {
+  size_t j = 0;
+  for (InstructionBlock::Predecessors::const_iterator i = predecessors_.begin();
+       i != predecessors_.end(); ++i, ++j) {
+    if (*i == rpo_number) break;
+  }
+  return j;
+}
+
+static RpoNumber GetRpo(const BasicBlock* block) {
+  if (block == nullptr) return RpoNumber::Invalid();
+  return RpoNumber::FromInt(block->rpo_number());
+}
+
+static RpoNumber GetLoopEndRpo(const BasicBlock* block) {
+  if (!block->IsLoopHeader()) return RpoNumber::Invalid();
+  return RpoNumber::FromInt(block->loop_end()->rpo_number());
+}
+
+static InstructionBlock* InstructionBlockFor(Zone* zone,
+                                             const BasicBlock* block) {
+  bool is_handler =
+      !block->empty() && block->front()->opcode() == IrOpcode::kIfException;
+  InstructionBlock* instr_block = zone->New<InstructionBlock>(
+      zone, GetRpo(block), GetRpo(block->loop_header()), GetLoopEndRpo(block),
+      GetRpo(block->dominator()), block->deferred(), is_handler);
+  // Map successors and precessors
+  instr_block->successors().reserve(block->SuccessorCount());
+  for (BasicBlock* successor : block->successors()) {
+    instr_block->successors().push_back(GetRpo(successor));
+  }
+  instr_block->predecessors().reserve(block->PredecessorCount());
+  for (BasicBlock* predecessor : block->predecessors()) {
+    instr_block->predecessors().push_back(GetRpo(predecessor));
+  }
+  if (block->PredecessorCount() == 1 &&
+      block->predecessors()[0]->control() == BasicBlock::Control::kSwitch) {
+    instr_block->set_switch_target(true);
+  }
+  return instr_block;
+}
+
+std::ostream& operator<<(std::ostream& os,
+                         const PrintableInstructionBlock& printable_block) {
+  const InstructionBlock* block = printable_block.block_;
+  const InstructionSequence* code = printable_block.code_;
+
+  os << "B" << block->rpo_number();
+  if (block->ao_number().IsValid()) {
+    os << ": AO#" << block->ao_number();
+  } else {
+    os << ": AO#?";
+  }
+  if (block->IsDeferred()) os << " (deferred)";
+  if (!block->needs_frame()) os << " (no frame)";
+  if (block->must_construct_frame()) os << " (construct frame)";
+  if (block->must_deconstruct_frame()) os << " (deconstruct frame)";
+  if (block->IsLoopHeader()) {
+    os << " loop blocks: [" << block->rpo_number() << ", " << block->loop_end()
+       << ")";
+  }
+  os << "  instructions: [" << block->code_start() << ", " << block->code_end()
+     << ")" << std::endl
+     << " predecessors:";
+
+  for (RpoNumber pred : block->predecessors()) {
+    os << " B" << pred.ToInt();
+  }
+  os << std::endl;
+
+  for (const PhiInstruction* phi : block->phis()) {
+    os << "     phi: " << phi->output() << " =";
+    for (int input : phi->operands()) {
+      os << " v" << input;
+    }
+    os << std::endl;
+  }
+
+  for (int j = block->first_instruction_index();
+       j <= block->last_instruction_index(); j++) {
+    os << "   " << std::setw(5) << j << ": " << *code->InstructionAt(j)
+       << std::endl;
+  }
+
+  os << " successors:";
+  for (RpoNumber succ : block->successors()) {
+    os << " B" << succ.ToInt();
+  }
+  os << std::endl;
+  return os;
+}
+
+InstructionBlocks* InstructionSequence::InstructionBlocksFor(
+    Zone* zone, const Schedule* schedule) {
+  InstructionBlocks* blocks = zone->NewArray<InstructionBlocks>(1);
+  new (blocks) InstructionBlocks(
+      static_cast<int>(schedule->rpo_order()->size()), nullptr, zone);
+  size_t rpo_number = 0;
+  for (BasicBlockVector::const_iterator it = schedule->rpo_order()->begin();
+       it != schedule->rpo_order()->end(); ++it, ++rpo_number) {
+    DCHECK(!(*blocks)[rpo_number]);
+    DCHECK(GetRpo(*it).ToSize() == rpo_number);
+    (*blocks)[rpo_number] = InstructionBlockFor(zone, *it);
+  }
+  return blocks;
+}
+
+void InstructionSequence::ValidateEdgeSplitForm() const {
+  // Validate blocks are in edge-split form: no block with multiple successors
+  // has an edge to a block (== a successor) with more than one predecessors.
+  for (const InstructionBlock* block : instruction_blocks()) {
+    if (block->SuccessorCount() > 1) {
+      for (const RpoNumber& successor_id : block->successors()) {
+        const InstructionBlock* successor = InstructionBlockAt(successor_id);
+        // Expect precisely one predecessor: "block".
+        CHECK(successor->PredecessorCount() == 1 &&
+              successor->predecessors()[0] == block->rpo_number());
+      }
+    }
+  }
+}
+
+void InstructionSequence::ValidateDeferredBlockExitPaths() const {
+  // A deferred block with more than one successor must have all its successors
+  // deferred.
+  for (const InstructionBlock* block : instruction_blocks()) {
+    if (!block->IsDeferred() || block->SuccessorCount() <= 1) continue;
+    for (RpoNumber successor_id : block->successors()) {
+      CHECK(InstructionBlockAt(successor_id)->IsDeferred());
+    }
+  }
+}
+
+void InstructionSequence::ValidateDeferredBlockEntryPaths() const {
+  // If a deferred block has multiple predecessors, they have to
+  // all be deferred. Otherwise, we can run into a situation where a range
+  // that spills only in deferred blocks inserts its spill in the block, but
+  // other ranges need moves inserted by ResolveControlFlow in the predecessors,
+  // which may clobber the register of this range.
+  for (const InstructionBlock* block : instruction_blocks()) {
+    if (!block->IsDeferred() || block->PredecessorCount() <= 1) continue;
+    for (RpoNumber predecessor_id : block->predecessors()) {
+      CHECK(InstructionBlockAt(predecessor_id)->IsDeferred());
+    }
+  }
+}
+
+void InstructionSequence::ValidateSSA() const {
+  // TODO(mtrofin): We could use a local zone here instead.
+  BitVector definitions(VirtualRegisterCount(), zone());
+  for (const Instruction* instruction : *this) {
+    for (size_t i = 0; i < instruction->OutputCount(); ++i) {
+      const InstructionOperand* output = instruction->OutputAt(i);
+      int vreg = (output->IsConstant())
+                     ? ConstantOperand::cast(output)->virtual_register()
+                     : UnallocatedOperand::cast(output)->virtual_register();
+      CHECK(!definitions.Contains(vreg));
+      definitions.Add(vreg);
+    }
+  }
+}
+
+void InstructionSequence::ComputeAssemblyOrder() {
+  int ao = 0;
+  RpoNumber invalid = RpoNumber::Invalid();
+
+  ao_blocks_ = zone()->NewArray<InstructionBlocks>(1);
+  new (ao_blocks_) InstructionBlocks(zone());
+  ao_blocks_->reserve(instruction_blocks_->size());
+
+  // Place non-deferred blocks.
+  for (InstructionBlock* const block : *instruction_blocks_) {
+    DCHECK_NOT_NULL(block);
+    if (block->IsDeferred()) continue;            // skip deferred blocks.
+    if (block->ao_number() != invalid) continue;  // loop rotated.
+    if (block->IsLoopHeader()) {
+      bool header_align = true;
+      if (FLAG_turbo_loop_rotation) {
+        // Perform loop rotation for non-deferred loops.
+        InstructionBlock* loop_end =
+            instruction_blocks_->at(block->loop_end().ToSize() - 1);
+        if (loop_end->SuccessorCount() == 1 && /* ends with goto */
+            loop_end != block /* not a degenerate infinite loop */) {
+          // If the last block has an unconditional jump back to the header,
+          // then move it to be in front of the header in the assembly order.
+          DCHECK_EQ(block->rpo_number(), loop_end->successors()[0]);
+          loop_end->set_ao_number(RpoNumber::FromInt(ao++));
+          ao_blocks_->push_back(loop_end);
+          // This block will be the new machine-level loop header, so align
+          // this block instead of the loop header block.
+          loop_end->set_alignment(true);
+          header_align = false;
+        }
+      }
+      block->set_alignment(header_align);
+    }
+    if (block->loop_header().IsValid() && block->IsSwitchTarget()) {
+      block->set_alignment(true);
+    }
+    block->set_ao_number(RpoNumber::FromInt(ao++));
+    ao_blocks_->push_back(block);
+  }
+  // Add all leftover (deferred) blocks.
+  for (InstructionBlock* const block : *instruction_blocks_) {
+    if (block->ao_number() == invalid) {
+      block->set_ao_number(RpoNumber::FromInt(ao++));
+      ao_blocks_->push_back(block);
+    }
+  }
+  DCHECK_EQ(instruction_blocks_->size(), ao);
+}
+
+void InstructionSequence::RecomputeAssemblyOrderForTesting() {
+  RpoNumber invalid = RpoNumber::Invalid();
+  for (InstructionBlock* block : *instruction_blocks_) {
+    block->set_ao_number(invalid);
+  }
+  ComputeAssemblyOrder();
+}
+
+InstructionSequence::InstructionSequence(Isolate* isolate,
+                                         Zone* instruction_zone,
+                                         InstructionBlocks* instruction_blocks)
+    : isolate_(isolate),
+      zone_(instruction_zone),
+      instruction_blocks_(instruction_blocks),
+      ao_blocks_(nullptr),
+      source_positions_(zone()),
+      constants_(ConstantMap::key_compare(),
+                 ConstantMap::allocator_type(zone())),
+      immediates_(zone()),
+      instructions_(zone()),
+      next_virtual_register_(0),
+      reference_maps_(zone()),
+      representations_(zone()),
+      representation_mask_(0),
+      deoptimization_entries_(zone()),
+      current_block_(nullptr) {
+  ComputeAssemblyOrder();
+}
+
+int InstructionSequence::NextVirtualRegister() {
+  int virtual_register = next_virtual_register_++;
+  CHECK_NE(virtual_register, InstructionOperand::kInvalidVirtualRegister);
+  return virtual_register;
+}
+
+Instruction* InstructionSequence::GetBlockStart(RpoNumber rpo) const {
+  const InstructionBlock* block = InstructionBlockAt(rpo);
+  return InstructionAt(block->code_start());
+}
+
+void InstructionSequence::StartBlock(RpoNumber rpo) {
+  DCHECK_NULL(current_block_);
+  current_block_ = InstructionBlockAt(rpo);
+  int code_start = static_cast<int>(instructions_.size());
+  current_block_->set_code_start(code_start);
+}
+
+void InstructionSequence::EndBlock(RpoNumber rpo) {
+  int end = static_cast<int>(instructions_.size());
+  DCHECK_EQ(current_block_->rpo_number(), rpo);
+  CHECK(current_block_->code_start() >= 0 &&
+        current_block_->code_start() < end);
+  current_block_->set_code_end(end);
+  current_block_ = nullptr;
+}
+
+int InstructionSequence::AddInstruction(Instruction* instr) {
+  DCHECK_NOT_NULL(current_block_);
+  int index = static_cast<int>(instructions_.size());
+  instr->set_block(current_block_);
+  instructions_.push_back(instr);
+  if (instr->NeedsReferenceMap()) {
+    DCHECK_NULL(instr->reference_map());
+    ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
+    reference_map->set_instruction_position(index);
+    instr->set_reference_map(reference_map);
+    reference_maps_.push_back(reference_map);
+  }
+  return index;
+}
+
+InstructionBlock* InstructionSequence::GetInstructionBlock(
+    int instruction_index) const {
+  return instructions()[instruction_index]->block();
+}
+
+static MachineRepresentation FilterRepresentation(MachineRepresentation rep) {
+  switch (rep) {
+    case MachineRepresentation::kBit:
+    case MachineRepresentation::kWord8:
+    case MachineRepresentation::kWord16:
+      return InstructionSequence::DefaultRepresentation();
+    case MachineRepresentation::kWord32:
+    case MachineRepresentation::kWord64:
+    case MachineRepresentation::kTaggedSigned:
+    case MachineRepresentation::kTaggedPointer:
+    case MachineRepresentation::kTagged:
+    case MachineRepresentation::kFloat32:
+    case MachineRepresentation::kFloat64:
+    case MachineRepresentation::kSimd128:
+    case MachineRepresentation::kCompressedPointer:
+    case MachineRepresentation::kCompressed:
+      return rep;
+    case MachineRepresentation::kNone:
+      break;
+  }
+
+  UNREACHABLE();
+}
+
+MachineRepresentation InstructionSequence::GetRepresentation(
+    int virtual_register) const {
+  DCHECK_LE(0, virtual_register);
+  DCHECK_LT(virtual_register, VirtualRegisterCount());
+  if (virtual_register >= static_cast<int>(representations_.size())) {
+    return DefaultRepresentation();
+  }
+  return representations_[virtual_register];
+}
+
+void InstructionSequence::MarkAsRepresentation(MachineRepresentation rep,
+                                               int virtual_register) {
+  DCHECK_LE(0, virtual_register);
+  DCHECK_LT(virtual_register, VirtualRegisterCount());
+  if (virtual_register >= static_cast<int>(representations_.size())) {
+    representations_.resize(VirtualRegisterCount(), DefaultRepresentation());
+  }
+  rep = FilterRepresentation(rep);
+  DCHECK_IMPLIES(representations_[virtual_register] != rep,
+                 representations_[virtual_register] == DefaultRepresentation());
+  representations_[virtual_register] = rep;
+  representation_mask_ |= RepresentationBit(rep);
+}
+
+int InstructionSequence::AddDeoptimizationEntry(
+    FrameStateDescriptor* descriptor, DeoptimizeKind kind,
+    DeoptimizeReason reason, FeedbackSource const& feedback) {
+  int deoptimization_id = static_cast<int>(deoptimization_entries_.size());
+  deoptimization_entries_.push_back(
+      DeoptimizationEntry(descriptor, kind, reason, feedback));
+  return deoptimization_id;
+}
+
+DeoptimizationEntry const& InstructionSequence::GetDeoptimizationEntry(
+    int state_id) {
+  return deoptimization_entries_[state_id];
+}
+
+RpoNumber InstructionSequence::InputRpo(Instruction* instr, size_t index) {
+  InstructionOperand* operand = instr->InputAt(index);
+  Constant constant =
+      operand->IsImmediate()
+          ? GetImmediate(ImmediateOperand::cast(operand))
+          : GetConstant(ConstantOperand::cast(operand)->virtual_register());
+  return constant.ToRpoNumber();
+}
+
+bool InstructionSequence::GetSourcePosition(const Instruction* instr,
+                                            SourcePosition* result) const {
+  auto it = source_positions_.find(instr);
+  if (it == source_positions_.end()) return false;
+  *result = it->second;
+  return true;
+}
+
+void InstructionSequence::SetSourcePosition(const Instruction* instr,
+                                            SourcePosition value) {
+  source_positions_.insert(std::make_pair(instr, value));
+}
+
+void InstructionSequence::Print() const {
+  StdoutStream{} << *this << std::endl;
+}
+
+void InstructionSequence::PrintBlock(int block_id) const {
+  RpoNumber rpo = RpoNumber::FromInt(block_id);
+  const InstructionBlock* block = InstructionBlockAt(rpo);
+  CHECK(block->rpo_number() == rpo);
+  StdoutStream{} << PrintableInstructionBlock{block, this} << std::endl;
+}
+
+const RegisterConfiguration*
+    InstructionSequence::registerConfigurationForTesting_ = nullptr;
+
+const RegisterConfiguration*
+InstructionSequence::RegisterConfigurationForTesting() {
+  DCHECK_NOT_NULL(registerConfigurationForTesting_);
+  return registerConfigurationForTesting_;
+}
+
+void InstructionSequence::SetRegisterConfigurationForTesting(
+    const RegisterConfiguration* regConfig) {
+  registerConfigurationForTesting_ = regConfig;
+  GetRegConfig = InstructionSequence::RegisterConfigurationForTesting;
+}
+
+namespace {
+
+size_t GetConservativeFrameSizeInBytes(FrameStateType type,
+                                       size_t parameters_count,
+                                       size_t locals_count,
+                                       BailoutId bailout_id) {
+  switch (type) {
+    case FrameStateType::kInterpretedFunction: {
+      auto info = InterpretedFrameInfo::Conservative(
+          static_cast<int>(parameters_count), static_cast<int>(locals_count));
+      return info.frame_size_in_bytes();
+    }
+    case FrameStateType::kArgumentsAdaptor: {
+      auto info = ArgumentsAdaptorFrameInfo::Conservative(
+          static_cast<int>(parameters_count));
+      return info.frame_size_in_bytes();
+    }
+    case FrameStateType::kConstructStub: {
+      auto info = ConstructStubFrameInfo::Conservative(
+          static_cast<int>(parameters_count));
+      return info.frame_size_in_bytes();
+    }
+    case FrameStateType::kBuiltinContinuation:
+    case FrameStateType::kJavaScriptBuiltinContinuation:
+    case FrameStateType::kJavaScriptBuiltinContinuationWithCatch: {
+      const RegisterConfiguration* config = RegisterConfiguration::Default();
+      auto info = BuiltinContinuationFrameInfo::Conservative(
+          static_cast<int>(parameters_count),
+          Builtins::CallInterfaceDescriptorFor(
+              Builtins::GetBuiltinFromBailoutId(bailout_id)),
+          config);
+      return info.frame_size_in_bytes();
+    }
+  }
+  UNREACHABLE();
+}
+
+size_t GetTotalConservativeFrameSizeInBytes(FrameStateType type,
+                                            size_t parameters_count,
+                                            size_t locals_count,
+                                            BailoutId bailout_id,
+                                            FrameStateDescriptor* outer_state) {
+  size_t outer_total_conservative_frame_size_in_bytes =
+      (outer_state == nullptr)
+          ? 0
+          : outer_state->total_conservative_frame_size_in_bytes();
+  return GetConservativeFrameSizeInBytes(type, parameters_count, locals_count,
+                                         bailout_id) +
+         outer_total_conservative_frame_size_in_bytes;
+}
+
+}  // namespace
+
+FrameStateDescriptor::FrameStateDescriptor(
+    Zone* zone, FrameStateType type, BailoutId bailout_id,
+    OutputFrameStateCombine state_combine, size_t parameters_count,
+    size_t locals_count, size_t stack_count,
+    MaybeHandle<SharedFunctionInfo> shared_info,
+    FrameStateDescriptor* outer_state)
+    : type_(type),
+      bailout_id_(bailout_id),
+      frame_state_combine_(state_combine),
+      parameters_count_(parameters_count),
+      locals_count_(locals_count),
+      stack_count_(stack_count),
+      total_conservative_frame_size_in_bytes_(
+          GetTotalConservativeFrameSizeInBytes(
+              type, parameters_count, locals_count, bailout_id, outer_state)),
+      values_(zone),
+      shared_info_(shared_info),
+      outer_state_(outer_state) {}
+
+size_t FrameStateDescriptor::GetHeight() const {
+  switch (type()) {
+    case FrameStateType::kInterpretedFunction:
+      return locals_count();  // The accumulator is *not* included.
+    case FrameStateType::kBuiltinContinuation:
+      // Custom, non-JS calling convention (that does not have a notion of
+      // a receiver or context).
+      return parameters_count();
+    case FrameStateType::kArgumentsAdaptor:
+    case FrameStateType::kConstructStub:
+    case FrameStateType::kJavaScriptBuiltinContinuation:
+    case FrameStateType::kJavaScriptBuiltinContinuationWithCatch:
+      // JS linkage. The parameters count
+      // - includes the receiver (input 1 in CreateArtificialFrameState, and
+      //   passed as part of stack parameters to
+      //   CreateJavaScriptBuiltinContinuationFrameState), and
+      // - does *not* include the context.
+      return parameters_count();
+  }
+  UNREACHABLE();
+}
+
+size_t FrameStateDescriptor::GetSize() const {
+  return 1 + parameters_count() + locals_count() + stack_count() +
+         (HasContext() ? 1 : 0);
+}
+
+size_t FrameStateDescriptor::GetTotalSize() const {
+  size_t total_size = 0;
+  for (const FrameStateDescriptor* iter = this; iter != nullptr;
+       iter = iter->outer_state_) {
+    total_size += iter->GetSize();
+  }
+  return total_size;
+}
+
+size_t FrameStateDescriptor::GetFrameCount() const {
+  size_t count = 0;
+  for (const FrameStateDescriptor* iter = this; iter != nullptr;
+       iter = iter->outer_state_) {
+    ++count;
+  }
+  return count;
+}
+
+size_t FrameStateDescriptor::GetJSFrameCount() const {
+  size_t count = 0;
+  for (const FrameStateDescriptor* iter = this; iter != nullptr;
+       iter = iter->outer_state_) {
+    if (FrameStateFunctionInfo::IsJSFunctionType(iter->type_)) {
+      ++count;
+    }
+  }
+  return count;
+}
+
+std::ostream& operator<<(std::ostream& os, const RpoNumber& rpo) {
+  return os << rpo.ToSize();
+}
+
+std::ostream& operator<<(std::ostream& os, const InstructionSequence& code) {
+  for (size_t i = 0; i < code.immediates_.size(); ++i) {
+    Constant constant = code.immediates_[i];
+    os << "IMM#" << i << ": " << constant << "\n";
+  }
+  int i = 0;
+  for (ConstantMap::const_iterator it = code.constants_.begin();
+       it != code.constants_.end(); ++i, ++it) {
+    os << "CST#" << i << ": v" << it->first << " = " << it->second << "\n";
+  }
+  for (int i = 0; i < code.InstructionBlockCount(); i++) {
+    auto* block = code.InstructionBlockAt(RpoNumber::FromInt(i));
+    os << PrintableInstructionBlock{block, &code};
+  }
+  return os;
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/instruction.h b/src/compiler/backend/instruction.h
new file mode 100644
index 0000000..55fce0a
--- /dev/null
+++ b/src/compiler/backend/instruction.h
@@ -0,0 +1,1729 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_INSTRUCTION_H_
+#define V8_COMPILER_BACKEND_INSTRUCTION_H_
+
+#include <deque>
+#include <iosfwd>
+#include <map>
+#include <set>
+
+#include "src/base/compiler-specific.h"
+#include "src/codegen/external-reference.h"
+#include "src/codegen/register-arch.h"
+#include "src/codegen/source-position.h"
+#include "src/common/globals.h"
+#include "src/compiler/backend/instruction-codes.h"
+#include "src/compiler/common-operator.h"
+#include "src/compiler/feedback-source.h"
+#include "src/compiler/frame.h"
+#include "src/compiler/opcodes.h"
+#include "src/numbers/double.h"
+#include "src/zone/zone-allocator.h"
+
+namespace v8 {
+namespace internal {
+
+class RegisterConfiguration;
+
+namespace compiler {
+
+class Schedule;
+class SourcePositionTable;
+
+#if defined(V8_CC_MSVC) && defined(V8_TARGET_ARCH_IA32)
+// MSVC on x86 has issues with ALIGNAS(8) on InstructionOperand, but does
+// align the object to 8 bytes anyway (covered by a static assert below).
+// See crbug.com/v8/10796
+#define INSTRUCTION_OPERAND_ALIGN
+#else
+#define INSTRUCTION_OPERAND_ALIGN ALIGNAS(8)
+#endif
+
+class V8_EXPORT_PRIVATE INSTRUCTION_OPERAND_ALIGN InstructionOperand {
+ public:
+  static const int kInvalidVirtualRegister = -1;
+
+  enum Kind {
+    INVALID,
+    UNALLOCATED,
+    CONSTANT,
+    IMMEDIATE,
+    PENDING,
+    // Location operand kinds.
+    ALLOCATED,
+    FIRST_LOCATION_OPERAND_KIND = ALLOCATED
+    // Location operand kinds must be last.
+  };
+
+  InstructionOperand() : InstructionOperand(INVALID) {}
+
+  Kind kind() const { return KindField::decode(value_); }
+
+#define INSTRUCTION_OPERAND_PREDICATE(name, type) \
+  bool Is##name() const { return kind() == type; }
+  INSTRUCTION_OPERAND_PREDICATE(Invalid, INVALID)
+  // UnallocatedOperands are place-holder operands created before register
+  // allocation. They later are assigned registers and become AllocatedOperands.
+  INSTRUCTION_OPERAND_PREDICATE(Unallocated, UNALLOCATED)
+  // Constant operands participate in register allocation. They are allocated to
+  // registers but have a special "spilling" behavior. When a ConstantOperand
+  // value must be rematerialized, it is loaded from an immediate constant
+  // rather from an unspilled slot.
+  INSTRUCTION_OPERAND_PREDICATE(Constant, CONSTANT)
+  // ImmediateOperands do not participate in register allocation and are only
+  // embedded directly in instructions, e.g. small integers and on some
+  // platforms Objects.
+  INSTRUCTION_OPERAND_PREDICATE(Immediate, IMMEDIATE)
+  // PendingOperands are pending allocation during register allocation and
+  // shouldn't be seen elsewhere. They chain together multiple operators that
+  // will be replaced together with the same value when finalized.
+  INSTRUCTION_OPERAND_PREDICATE(Pending, PENDING)
+  // AllocatedOperands are registers or stack slots that are assigned by the
+  // register allocator and are always associated with a virtual register.
+  INSTRUCTION_OPERAND_PREDICATE(Allocated, ALLOCATED)
+#undef INSTRUCTION_OPERAND_PREDICATE
+
+  inline bool IsAnyLocationOperand() const;
+  inline bool IsLocationOperand() const;
+  inline bool IsFPLocationOperand() const;
+  inline bool IsAnyRegister() const;
+  inline bool IsRegister() const;
+  inline bool IsFPRegister() const;
+  inline bool IsFloatRegister() const;
+  inline bool IsDoubleRegister() const;
+  inline bool IsSimd128Register() const;
+  inline bool IsAnyStackSlot() const;
+  inline bool IsStackSlot() const;
+  inline bool IsFPStackSlot() const;
+  inline bool IsFloatStackSlot() const;
+  inline bool IsDoubleStackSlot() const;
+  inline bool IsSimd128StackSlot() const;
+
+  template <typename SubKindOperand>
+  static SubKindOperand* New(Zone* zone, const SubKindOperand& op) {
+    return zone->New<SubKindOperand>(op);
+  }
+
+  static void ReplaceWith(InstructionOperand* dest,
+                          const InstructionOperand* src) {
+    *dest = *src;
+  }
+
+  bool Equals(const InstructionOperand& that) const {
+    if (IsPending()) {
+      // Pending operands are only equal if they are the same operand.
+      return this == &that;
+    }
+    return this->value_ == that.value_;
+  }
+
+  bool Compare(const InstructionOperand& that) const {
+    return this->value_ < that.value_;
+  }
+
+  bool EqualsCanonicalized(const InstructionOperand& that) const {
+    if (IsPending()) {
+      // Pending operands can't be canonicalized, so just compare for equality.
+      return Equals(that);
+    }
+    return this->GetCanonicalizedValue() == that.GetCanonicalizedValue();
+  }
+
+  bool CompareCanonicalized(const InstructionOperand& that) const {
+    DCHECK(!IsPending());
+    return this->GetCanonicalizedValue() < that.GetCanonicalizedValue();
+  }
+
+  bool InterferesWith(const InstructionOperand& other) const;
+
+  // APIs to aid debugging. For general-stream APIs, use operator<<.
+  void Print() const;
+
+ protected:
+  explicit InstructionOperand(Kind kind) : value_(KindField::encode(kind)) {}
+
+  inline uint64_t GetCanonicalizedValue() const;
+
+  using KindField = base::BitField64<Kind, 0, 3>;
+
+  uint64_t value_;
+};
+
+using InstructionOperandVector = ZoneVector<InstructionOperand>;
+
+std::ostream& operator<<(std::ostream&, const InstructionOperand&);
+
+#define INSTRUCTION_OPERAND_CASTS(OperandType, OperandKind)      \
+                                                                 \
+  static OperandType* cast(InstructionOperand* op) {             \
+    DCHECK_EQ(OperandKind, op->kind());                          \
+    return static_cast<OperandType*>(op);                        \
+  }                                                              \
+                                                                 \
+  static const OperandType* cast(const InstructionOperand* op) { \
+    DCHECK_EQ(OperandKind, op->kind());                          \
+    return static_cast<const OperandType*>(op);                  \
+  }                                                              \
+                                                                 \
+  static OperandType cast(const InstructionOperand& op) {        \
+    DCHECK_EQ(OperandKind, op.kind());                           \
+    return *static_cast<const OperandType*>(&op);                \
+  }
+
+class UnallocatedOperand final : public InstructionOperand {
+ public:
+  enum BasicPolicy { FIXED_SLOT, EXTENDED_POLICY };
+
+  enum ExtendedPolicy {
+    NONE,
+    REGISTER_OR_SLOT,
+    REGISTER_OR_SLOT_OR_CONSTANT,
+    FIXED_REGISTER,
+    FIXED_FP_REGISTER,
+    MUST_HAVE_REGISTER,
+    MUST_HAVE_SLOT,
+    SAME_AS_FIRST_INPUT
+  };
+
+  // Lifetime of operand inside the instruction.
+  enum Lifetime {
+    // USED_AT_START operand is guaranteed to be live only at instruction start.
+    // The register allocator is free to assign the same register to some other
+    // operand used inside instruction (i.e. temporary or output).
+    USED_AT_START,
+
+    // USED_AT_END operand is treated as live until the end of instruction.
+    // This means that register allocator will not reuse its register for any
+    // other operand inside instruction.
+    USED_AT_END
+  };
+
+  UnallocatedOperand(ExtendedPolicy policy, int virtual_register)
+      : UnallocatedOperand(virtual_register) {
+    value_ |= BasicPolicyField::encode(EXTENDED_POLICY);
+    value_ |= ExtendedPolicyField::encode(policy);
+    value_ |= LifetimeField::encode(USED_AT_END);
+  }
+
+  UnallocatedOperand(BasicPolicy policy, int index, int virtual_register)
+      : UnallocatedOperand(virtual_register) {
+    DCHECK(policy == FIXED_SLOT);
+    value_ |= BasicPolicyField::encode(policy);
+    value_ |= static_cast<uint64_t>(static_cast<int64_t>(index))
+              << FixedSlotIndexField::kShift;
+    DCHECK(this->fixed_slot_index() == index);
+  }
+
+  UnallocatedOperand(ExtendedPolicy policy, int index, int virtual_register)
+      : UnallocatedOperand(virtual_register) {
+    DCHECK(policy == FIXED_REGISTER || policy == FIXED_FP_REGISTER);
+    value_ |= BasicPolicyField::encode(EXTENDED_POLICY);
+    value_ |= ExtendedPolicyField::encode(policy);
+    value_ |= LifetimeField::encode(USED_AT_END);
+    value_ |= FixedRegisterField::encode(index);
+  }
+
+  UnallocatedOperand(ExtendedPolicy policy, Lifetime lifetime,
+                     int virtual_register)
+      : UnallocatedOperand(virtual_register) {
+    value_ |= BasicPolicyField::encode(EXTENDED_POLICY);
+    value_ |= ExtendedPolicyField::encode(policy);
+    value_ |= LifetimeField::encode(lifetime);
+  }
+
+  UnallocatedOperand(int reg_id, int slot_id, int virtual_register)
+      : UnallocatedOperand(FIXED_REGISTER, reg_id, virtual_register) {
+    value_ |= HasSecondaryStorageField::encode(true);
+    value_ |= SecondaryStorageField::encode(slot_id);
+  }
+
+  UnallocatedOperand(const UnallocatedOperand& other, int virtual_register) {
+    DCHECK_NE(kInvalidVirtualRegister, virtual_register);
+    value_ = VirtualRegisterField::update(
+        other.value_, static_cast<uint32_t>(virtual_register));
+  }
+
+  // Predicates for the operand policy.
+  bool HasRegisterOrSlotPolicy() const {
+    return basic_policy() == EXTENDED_POLICY &&
+           extended_policy() == REGISTER_OR_SLOT;
+  }
+  bool HasRegisterOrSlotOrConstantPolicy() const {
+    return basic_policy() == EXTENDED_POLICY &&
+           extended_policy() == REGISTER_OR_SLOT_OR_CONSTANT;
+  }
+  bool HasFixedPolicy() const {
+    return basic_policy() == FIXED_SLOT ||
+           extended_policy() == FIXED_REGISTER ||
+           extended_policy() == FIXED_FP_REGISTER;
+  }
+  bool HasRegisterPolicy() const {
+    return basic_policy() == EXTENDED_POLICY &&
+           extended_policy() == MUST_HAVE_REGISTER;
+  }
+  bool HasSlotPolicy() const {
+    return basic_policy() == EXTENDED_POLICY &&
+           extended_policy() == MUST_HAVE_SLOT;
+  }
+  bool HasSameAsInputPolicy() const {
+    return basic_policy() == EXTENDED_POLICY &&
+           extended_policy() == SAME_AS_FIRST_INPUT;
+  }
+  bool HasFixedSlotPolicy() const { return basic_policy() == FIXED_SLOT; }
+  bool HasFixedRegisterPolicy() const {
+    return basic_policy() == EXTENDED_POLICY &&
+           extended_policy() == FIXED_REGISTER;
+  }
+  bool HasFixedFPRegisterPolicy() const {
+    return basic_policy() == EXTENDED_POLICY &&
+           extended_policy() == FIXED_FP_REGISTER;
+  }
+  bool HasSecondaryStorage() const {
+    return basic_policy() == EXTENDED_POLICY &&
+           extended_policy() == FIXED_REGISTER &&
+           HasSecondaryStorageField::decode(value_);
+  }
+  int GetSecondaryStorage() const {
+    DCHECK(HasSecondaryStorage());
+    return SecondaryStorageField::decode(value_);
+  }
+
+  // [basic_policy]: Distinguish between FIXED_SLOT and all other policies.
+  BasicPolicy basic_policy() const { return BasicPolicyField::decode(value_); }
+
+  // [extended_policy]: Only for non-FIXED_SLOT. The finer-grained policy.
+  ExtendedPolicy extended_policy() const {
+    DCHECK(basic_policy() == EXTENDED_POLICY);
+    return ExtendedPolicyField::decode(value_);
+  }
+
+  // [fixed_slot_index]: Only for FIXED_SLOT.
+  int fixed_slot_index() const {
+    DCHECK(HasFixedSlotPolicy());
+    return static_cast<int>(static_cast<int64_t>(value_) >>
+                            FixedSlotIndexField::kShift);
+  }
+
+  // [fixed_register_index]: Only for FIXED_REGISTER or FIXED_FP_REGISTER.
+  int fixed_register_index() const {
+    DCHECK(HasFixedRegisterPolicy() || HasFixedFPRegisterPolicy());
+    return FixedRegisterField::decode(value_);
+  }
+
+  // [virtual_register]: The virtual register ID for this operand.
+  int32_t virtual_register() const {
+    return static_cast<int32_t>(VirtualRegisterField::decode(value_));
+  }
+
+  // [lifetime]: Only for non-FIXED_SLOT.
+  bool IsUsedAtStart() const {
+    return basic_policy() == EXTENDED_POLICY &&
+           LifetimeField::decode(value_) == USED_AT_START;
+  }
+
+  INSTRUCTION_OPERAND_CASTS(UnallocatedOperand, UNALLOCATED)
+
+  // The encoding used for UnallocatedOperand operands depends on the policy
+  // that is
+  // stored within the operand. The FIXED_SLOT policy uses a compact encoding
+  // because it accommodates a larger pay-load.
+  //
+  // For FIXED_SLOT policy:
+  //     +------------------------------------------------+
+  //     |      slot_index   | 0 | virtual_register | 001 |
+  //     +------------------------------------------------+
+  //
+  // For all other (extended) policies:
+  //     +-----------------------------------------------------+
+  //     |  reg_index  | L | PPP |  1 | virtual_register | 001 |
+  //     +-----------------------------------------------------+
+  //     L ... Lifetime
+  //     P ... Policy
+  //
+  // The slot index is a signed value which requires us to decode it manually
+  // instead of using the base::BitField utility class.
+
+  STATIC_ASSERT(KindField::kSize == 3);
+
+  using VirtualRegisterField = base::BitField64<uint32_t, 3, 32>;
+
+  // base::BitFields for all unallocated operands.
+  using BasicPolicyField = base::BitField64<BasicPolicy, 35, 1>;
+
+  // BitFields specific to BasicPolicy::FIXED_SLOT.
+  using FixedSlotIndexField = base::BitField64<int, 36, 28>;
+
+  // BitFields specific to BasicPolicy::EXTENDED_POLICY.
+  using ExtendedPolicyField = base::BitField64<ExtendedPolicy, 36, 3>;
+  using LifetimeField = base::BitField64<Lifetime, 39, 1>;
+  using HasSecondaryStorageField = base::BitField64<bool, 40, 1>;
+  using FixedRegisterField = base::BitField64<int, 41, 6>;
+  using SecondaryStorageField = base::BitField64<int, 47, 3>;
+
+ private:
+  explicit UnallocatedOperand(int virtual_register)
+      : InstructionOperand(UNALLOCATED) {
+    value_ |=
+        VirtualRegisterField::encode(static_cast<uint32_t>(virtual_register));
+  }
+};
+
+class ConstantOperand : public InstructionOperand {
+ public:
+  explicit ConstantOperand(int virtual_register)
+      : InstructionOperand(CONSTANT) {
+    value_ |=
+        VirtualRegisterField::encode(static_cast<uint32_t>(virtual_register));
+  }
+
+  int32_t virtual_register() const {
+    return static_cast<int32_t>(VirtualRegisterField::decode(value_));
+  }
+
+  static ConstantOperand* New(Zone* zone, int virtual_register) {
+    return InstructionOperand::New(zone, ConstantOperand(virtual_register));
+  }
+
+  INSTRUCTION_OPERAND_CASTS(ConstantOperand, CONSTANT)
+
+  STATIC_ASSERT(KindField::kSize == 3);
+  using VirtualRegisterField = base::BitField64<uint32_t, 3, 32>;
+};
+
+class ImmediateOperand : public InstructionOperand {
+ public:
+  enum ImmediateType { INLINE, INDEXED };
+
+  explicit ImmediateOperand(ImmediateType type, int32_t value)
+      : InstructionOperand(IMMEDIATE) {
+    value_ |= TypeField::encode(type);
+    value_ |= static_cast<uint64_t>(static_cast<int64_t>(value))
+              << ValueField::kShift;
+  }
+
+  ImmediateType type() const { return TypeField::decode(value_); }
+
+  int32_t inline_value() const {
+    DCHECK_EQ(INLINE, type());
+    return static_cast<int64_t>(value_) >> ValueField::kShift;
+  }
+
+  int32_t indexed_value() const {
+    DCHECK_EQ(INDEXED, type());
+    return static_cast<int64_t>(value_) >> ValueField::kShift;
+  }
+
+  static ImmediateOperand* New(Zone* zone, ImmediateType type, int32_t value) {
+    return InstructionOperand::New(zone, ImmediateOperand(type, value));
+  }
+
+  INSTRUCTION_OPERAND_CASTS(ImmediateOperand, IMMEDIATE)
+
+  STATIC_ASSERT(KindField::kSize == 3);
+  using TypeField = base::BitField64<ImmediateType, 3, 1>;
+  using ValueField = base::BitField64<int32_t, 32, 32>;
+};
+
+class PendingOperand : public InstructionOperand {
+ public:
+  PendingOperand() : InstructionOperand(PENDING) {}
+  explicit PendingOperand(PendingOperand* next_operand) : PendingOperand() {
+    set_next(next_operand);
+  }
+
+  void set_next(PendingOperand* next) {
+    DCHECK_NULL(this->next());
+    uintptr_t shifted_value =
+        reinterpret_cast<uintptr_t>(next) >> kPointerShift;
+    DCHECK_EQ(reinterpret_cast<uintptr_t>(next),
+              shifted_value << kPointerShift);
+    value_ |= NextOperandField::encode(static_cast<uint64_t>(shifted_value));
+  }
+
+  PendingOperand* next() const {
+    uintptr_t shifted_value =
+        static_cast<uint64_t>(NextOperandField::decode(value_));
+    return reinterpret_cast<PendingOperand*>(shifted_value << kPointerShift);
+  }
+
+  static PendingOperand* New(Zone* zone, PendingOperand* previous_operand) {
+    return InstructionOperand::New(zone, PendingOperand(previous_operand));
+  }
+
+  INSTRUCTION_OPERAND_CASTS(PendingOperand, PENDING)
+
+ private:
+  // Operands are uint64_t values and so are aligned to 8 byte boundaries,
+  // therefore we can shift off the bottom three zeros without losing data.
+  static const uint64_t kPointerShift = 3;
+  STATIC_ASSERT(alignof(InstructionOperand) >= (1 << kPointerShift));
+
+  STATIC_ASSERT(KindField::kSize == 3);
+  using NextOperandField = base::BitField64<uint64_t, 3, 61>;
+};
+
+class LocationOperand : public InstructionOperand {
+ public:
+  enum LocationKind { REGISTER, STACK_SLOT };
+
+  LocationOperand(InstructionOperand::Kind operand_kind,
+                  LocationOperand::LocationKind location_kind,
+                  MachineRepresentation rep, int index)
+      : InstructionOperand(operand_kind) {
+    DCHECK_IMPLIES(location_kind == REGISTER, index >= 0);
+    DCHECK(IsSupportedRepresentation(rep));
+    value_ |= LocationKindField::encode(location_kind);
+    value_ |= RepresentationField::encode(rep);
+    value_ |= static_cast<uint64_t>(static_cast<int64_t>(index))
+              << IndexField::kShift;
+  }
+
+  int index() const {
+    DCHECK(IsStackSlot() || IsFPStackSlot());
+    return static_cast<int64_t>(value_) >> IndexField::kShift;
+  }
+
+  int register_code() const {
+    DCHECK(IsRegister() || IsFPRegister());
+    return static_cast<int64_t>(value_) >> IndexField::kShift;
+  }
+
+  Register GetRegister() const {
+    DCHECK(IsRegister());
+    return Register::from_code(register_code());
+  }
+
+  FloatRegister GetFloatRegister() const {
+    DCHECK(IsFloatRegister());
+    return FloatRegister::from_code(register_code());
+  }
+
+  DoubleRegister GetDoubleRegister() const {
+    // On platforms where FloatRegister, DoubleRegister, and Simd128Register
+    // are all the same type, it's convenient to treat everything as a
+    // DoubleRegister, so be lax about type checking here.
+    DCHECK(IsFPRegister());
+    return DoubleRegister::from_code(register_code());
+  }
+
+  Simd128Register GetSimd128Register() const {
+    DCHECK(IsSimd128Register());
+    return Simd128Register::from_code(register_code());
+  }
+
+  LocationKind location_kind() const {
+    return LocationKindField::decode(value_);
+  }
+
+  MachineRepresentation representation() const {
+    return RepresentationField::decode(value_);
+  }
+
+  static bool IsSupportedRepresentation(MachineRepresentation rep) {
+    switch (rep) {
+      case MachineRepresentation::kWord32:
+      case MachineRepresentation::kWord64:
+      case MachineRepresentation::kFloat32:
+      case MachineRepresentation::kFloat64:
+      case MachineRepresentation::kSimd128:
+      case MachineRepresentation::kTaggedSigned:
+      case MachineRepresentation::kTaggedPointer:
+      case MachineRepresentation::kTagged:
+      case MachineRepresentation::kCompressedPointer:
+      case MachineRepresentation::kCompressed:
+        return true;
+      case MachineRepresentation::kBit:
+      case MachineRepresentation::kWord8:
+      case MachineRepresentation::kWord16:
+      case MachineRepresentation::kNone:
+        return false;
+    }
+    UNREACHABLE();
+  }
+
+  // Return true if the locations can be moved to one another.
+  bool IsCompatible(LocationOperand* op);
+
+  static LocationOperand* cast(InstructionOperand* op) {
+    DCHECK(op->IsAnyLocationOperand());
+    return static_cast<LocationOperand*>(op);
+  }
+
+  static const LocationOperand* cast(const InstructionOperand* op) {
+    DCHECK(op->IsAnyLocationOperand());
+    return static_cast<const LocationOperand*>(op);
+  }
+
+  static LocationOperand cast(const InstructionOperand& op) {
+    DCHECK(op.IsAnyLocationOperand());
+    return *static_cast<const LocationOperand*>(&op);
+  }
+
+  STATIC_ASSERT(KindField::kSize == 3);
+  using LocationKindField = base::BitField64<LocationKind, 3, 2>;
+  using RepresentationField = base::BitField64<MachineRepresentation, 5, 8>;
+  using IndexField = base::BitField64<int32_t, 35, 29>;
+};
+
+class AllocatedOperand : public LocationOperand {
+ public:
+  AllocatedOperand(LocationKind kind, MachineRepresentation rep, int index)
+      : LocationOperand(ALLOCATED, kind, rep, index) {}
+
+  static AllocatedOperand* New(Zone* zone, LocationKind kind,
+                               MachineRepresentation rep, int index) {
+    return InstructionOperand::New(zone, AllocatedOperand(kind, rep, index));
+  }
+
+  INSTRUCTION_OPERAND_CASTS(AllocatedOperand, ALLOCATED)
+};
+
+#undef INSTRUCTION_OPERAND_CASTS
+
+bool InstructionOperand::IsAnyLocationOperand() const {
+  return this->kind() >= FIRST_LOCATION_OPERAND_KIND;
+}
+
+bool InstructionOperand::IsLocationOperand() const {
+  return IsAnyLocationOperand() &&
+         !IsFloatingPoint(LocationOperand::cast(this)->representation());
+}
+
+bool InstructionOperand::IsFPLocationOperand() const {
+  return IsAnyLocationOperand() &&
+         IsFloatingPoint(LocationOperand::cast(this)->representation());
+}
+
+bool InstructionOperand::IsAnyRegister() const {
+  return IsAnyLocationOperand() &&
+         LocationOperand::cast(this)->location_kind() ==
+             LocationOperand::REGISTER;
+}
+
+bool InstructionOperand::IsRegister() const {
+  return IsAnyRegister() &&
+         !IsFloatingPoint(LocationOperand::cast(this)->representation());
+}
+
+bool InstructionOperand::IsFPRegister() const {
+  return IsAnyRegister() &&
+         IsFloatingPoint(LocationOperand::cast(this)->representation());
+}
+
+bool InstructionOperand::IsFloatRegister() const {
+  return IsAnyRegister() && LocationOperand::cast(this)->representation() ==
+                                MachineRepresentation::kFloat32;
+}
+
+bool InstructionOperand::IsDoubleRegister() const {
+  return IsAnyRegister() && LocationOperand::cast(this)->representation() ==
+                                MachineRepresentation::kFloat64;
+}
+
+bool InstructionOperand::IsSimd128Register() const {
+  return IsAnyRegister() && LocationOperand::cast(this)->representation() ==
+                                MachineRepresentation::kSimd128;
+}
+
+bool InstructionOperand::IsAnyStackSlot() const {
+  return IsAnyLocationOperand() &&
+         LocationOperand::cast(this)->location_kind() ==
+             LocationOperand::STACK_SLOT;
+}
+
+bool InstructionOperand::IsStackSlot() const {
+  return IsAnyStackSlot() &&
+         !IsFloatingPoint(LocationOperand::cast(this)->representation());
+}
+
+bool InstructionOperand::IsFPStackSlot() const {
+  return IsAnyStackSlot() &&
+         IsFloatingPoint(LocationOperand::cast(this)->representation());
+}
+
+bool InstructionOperand::IsFloatStackSlot() const {
+  return IsAnyLocationOperand() &&
+         LocationOperand::cast(this)->location_kind() ==
+             LocationOperand::STACK_SLOT &&
+         LocationOperand::cast(this)->representation() ==
+             MachineRepresentation::kFloat32;
+}
+
+bool InstructionOperand::IsDoubleStackSlot() const {
+  return IsAnyLocationOperand() &&
+         LocationOperand::cast(this)->location_kind() ==
+             LocationOperand::STACK_SLOT &&
+         LocationOperand::cast(this)->representation() ==
+             MachineRepresentation::kFloat64;
+}
+
+bool InstructionOperand::IsSimd128StackSlot() const {
+  return IsAnyLocationOperand() &&
+         LocationOperand::cast(this)->location_kind() ==
+             LocationOperand::STACK_SLOT &&
+         LocationOperand::cast(this)->representation() ==
+             MachineRepresentation::kSimd128;
+}
+
+uint64_t InstructionOperand::GetCanonicalizedValue() const {
+  if (IsAnyLocationOperand()) {
+    MachineRepresentation canonical = MachineRepresentation::kNone;
+    if (IsFPRegister()) {
+      if (kSimpleFPAliasing) {
+        // We treat all FP register operands the same for simple aliasing.
+        canonical = MachineRepresentation::kFloat64;
+      } else {
+        // We need to distinguish FP register operands of different reps when
+        // aliasing is not simple (e.g. ARM).
+        canonical = LocationOperand::cast(this)->representation();
+      }
+    }
+    return InstructionOperand::KindField::update(
+        LocationOperand::RepresentationField::update(this->value_, canonical),
+        LocationOperand::ALLOCATED);
+  }
+  return this->value_;
+}
+
+// Required for maps that don't care about machine type.
+struct CompareOperandModuloType {
+  bool operator()(const InstructionOperand& a,
+                  const InstructionOperand& b) const {
+    return a.CompareCanonicalized(b);
+  }
+};
+
+class V8_EXPORT_PRIVATE MoveOperands final
+    : public NON_EXPORTED_BASE(ZoneObject) {
+ public:
+  MoveOperands(const InstructionOperand& source,
+               const InstructionOperand& destination)
+      : source_(source), destination_(destination) {
+    DCHECK(!source.IsInvalid() && !destination.IsInvalid());
+  }
+
+  MoveOperands(const MoveOperands&) = delete;
+  MoveOperands& operator=(const MoveOperands&) = delete;
+
+  const InstructionOperand& source() const { return source_; }
+  InstructionOperand& source() { return source_; }
+  void set_source(const InstructionOperand& operand) { source_ = operand; }
+
+  const InstructionOperand& destination() const { return destination_; }
+  InstructionOperand& destination() { return destination_; }
+  void set_destination(const InstructionOperand& operand) {
+    destination_ = operand;
+  }
+
+  // The gap resolver marks moves as "in-progress" by clearing the
+  // destination (but not the source).
+  bool IsPending() const {
+    return destination_.IsInvalid() && !source_.IsInvalid();
+  }
+  void SetPending() { destination_ = InstructionOperand(); }
+
+  // A move is redundant if it's been eliminated or if its source and
+  // destination are the same.
+  bool IsRedundant() const {
+    DCHECK_IMPLIES(!destination_.IsInvalid(), !destination_.IsConstant());
+    return IsEliminated() || source_.EqualsCanonicalized(destination_);
+  }
+
+  // We clear both operands to indicate move that's been eliminated.
+  void Eliminate() { source_ = destination_ = InstructionOperand(); }
+  bool IsEliminated() const {
+    DCHECK_IMPLIES(source_.IsInvalid(), destination_.IsInvalid());
+    return source_.IsInvalid();
+  }
+
+  // APIs to aid debugging. For general-stream APIs, use operator<<.
+  void Print() const;
+
+ private:
+  InstructionOperand source_;
+  InstructionOperand destination_;
+};
+
+V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&, const MoveOperands&);
+
+class V8_EXPORT_PRIVATE ParallelMove final
+    : public NON_EXPORTED_BASE(ZoneVector<MoveOperands*>),
+      public NON_EXPORTED_BASE(ZoneObject) {
+ public:
+  explicit ParallelMove(Zone* zone) : ZoneVector<MoveOperands*>(zone) {}
+  ParallelMove(const ParallelMove&) = delete;
+  ParallelMove& operator=(const ParallelMove&) = delete;
+
+  MoveOperands* AddMove(const InstructionOperand& from,
+                        const InstructionOperand& to) {
+    Zone* zone = get_allocator().zone();
+    return AddMove(from, to, zone);
+  }
+
+  MoveOperands* AddMove(const InstructionOperand& from,
+                        const InstructionOperand& to,
+                        Zone* operand_allocation_zone) {
+    if (from.EqualsCanonicalized(to)) return nullptr;
+    MoveOperands* move = operand_allocation_zone->New<MoveOperands>(from, to);
+    if (empty()) reserve(4);
+    push_back(move);
+    return move;
+  }
+
+  bool IsRedundant() const;
+
+  // Prepare this ParallelMove to insert move as if it happened in a subsequent
+  // ParallelMove.  move->source() may be changed.  Any MoveOperands added to
+  // to_eliminate must be Eliminated.
+  void PrepareInsertAfter(MoveOperands* move,
+                          ZoneVector<MoveOperands*>* to_eliminate) const;
+};
+
+std::ostream& operator<<(std::ostream&, const ParallelMove&);
+
+class ReferenceMap final : public ZoneObject {
+ public:
+  explicit ReferenceMap(Zone* zone)
+      : reference_operands_(8, zone), instruction_position_(-1) {}
+
+  const ZoneVector<InstructionOperand>& reference_operands() const {
+    return reference_operands_;
+  }
+  int instruction_position() const { return instruction_position_; }
+
+  void set_instruction_position(int pos) {
+    DCHECK_EQ(-1, instruction_position_);
+    instruction_position_ = pos;
+  }
+
+  void RecordReference(const AllocatedOperand& op);
+
+ private:
+  friend std::ostream& operator<<(std::ostream&, const ReferenceMap&);
+
+  ZoneVector<InstructionOperand> reference_operands_;
+  int instruction_position_;
+};
+
+std::ostream& operator<<(std::ostream&, const ReferenceMap&);
+
+class InstructionBlock;
+
+class V8_EXPORT_PRIVATE Instruction final {
+ public:
+  Instruction(const Instruction&) = delete;
+  Instruction& operator=(const Instruction&) = delete;
+
+  size_t OutputCount() const { return OutputCountField::decode(bit_field_); }
+  const InstructionOperand* OutputAt(size_t i) const {
+    DCHECK_LT(i, OutputCount());
+    return &operands_[i];
+  }
+  InstructionOperand* OutputAt(size_t i) {
+    DCHECK_LT(i, OutputCount());
+    return &operands_[i];
+  }
+
+  bool HasOutput() const { return OutputCount() > 0; }
+  const InstructionOperand* Output() const { return OutputAt(0); }
+  InstructionOperand* Output() { return OutputAt(0); }
+
+  size_t InputCount() const { return InputCountField::decode(bit_field_); }
+  const InstructionOperand* InputAt(size_t i) const {
+    DCHECK_LT(i, InputCount());
+    return &operands_[OutputCount() + i];
+  }
+  InstructionOperand* InputAt(size_t i) {
+    DCHECK_LT(i, InputCount());
+    return &operands_[OutputCount() + i];
+  }
+
+  size_t TempCount() const { return TempCountField::decode(bit_field_); }
+  const InstructionOperand* TempAt(size_t i) const {
+    DCHECK_LT(i, TempCount());
+    return &operands_[OutputCount() + InputCount() + i];
+  }
+  InstructionOperand* TempAt(size_t i) {
+    DCHECK_LT(i, TempCount());
+    return &operands_[OutputCount() + InputCount() + i];
+  }
+
+  InstructionCode opcode() const { return opcode_; }
+  ArchOpcode arch_opcode() const { return ArchOpcodeField::decode(opcode()); }
+  AddressingMode addressing_mode() const {
+    return AddressingModeField::decode(opcode());
+  }
+  FlagsMode flags_mode() const { return FlagsModeField::decode(opcode()); }
+  FlagsCondition flags_condition() const {
+    return FlagsConditionField::decode(opcode());
+  }
+
+  static Instruction* New(Zone* zone, InstructionCode opcode) {
+    return New(zone, opcode, 0, nullptr, 0, nullptr, 0, nullptr);
+  }
+
+  static Instruction* New(Zone* zone, InstructionCode opcode,
+                          size_t output_count, InstructionOperand* outputs,
+                          size_t input_count, InstructionOperand* inputs,
+                          size_t temp_count, InstructionOperand* temps) {
+    DCHECK(output_count == 0 || outputs != nullptr);
+    DCHECK(input_count == 0 || inputs != nullptr);
+    DCHECK(temp_count == 0 || temps != nullptr);
+    // TODO(turbofan): Handle this gracefully. See crbug.com/582702.
+    CHECK(InputCountField::is_valid(input_count));
+
+    size_t total_extra_ops = output_count + input_count + temp_count;
+    if (total_extra_ops != 0) total_extra_ops--;
+    int size = static_cast<int>(
+        RoundUp(sizeof(Instruction), sizeof(InstructionOperand)) +
+        total_extra_ops * sizeof(InstructionOperand));
+    return new (zone->Allocate<Instruction>(size)) Instruction(
+        opcode, output_count, outputs, input_count, inputs, temp_count, temps);
+  }
+
+  Instruction* MarkAsCall() {
+    bit_field_ = IsCallField::update(bit_field_, true);
+    return this;
+  }
+  bool IsCall() const { return IsCallField::decode(bit_field_); }
+  bool NeedsReferenceMap() const { return IsCall(); }
+  bool HasReferenceMap() const { return reference_map_ != nullptr; }
+
+  bool ClobbersRegisters() const { return IsCall(); }
+  bool ClobbersTemps() const { return IsCall(); }
+  bool ClobbersDoubleRegisters() const { return IsCall(); }
+  ReferenceMap* reference_map() const { return reference_map_; }
+
+  void set_reference_map(ReferenceMap* map) {
+    DCHECK(NeedsReferenceMap());
+    DCHECK(!reference_map_);
+    reference_map_ = map;
+  }
+
+  void OverwriteWithNop() {
+    opcode_ = ArchOpcodeField::encode(kArchNop);
+    bit_field_ = 0;
+    reference_map_ = nullptr;
+  }
+
+  bool IsNop() const { return arch_opcode() == kArchNop; }
+
+  bool IsDeoptimizeCall() const {
+    return arch_opcode() == ArchOpcode::kArchDeoptimize ||
+           FlagsModeField::decode(opcode()) == kFlags_deoptimize ||
+           FlagsModeField::decode(opcode()) == kFlags_deoptimize_and_poison;
+  }
+
+  bool IsTrap() const {
+    return FlagsModeField::decode(opcode()) == kFlags_trap;
+  }
+
+  bool IsJump() const { return arch_opcode() == ArchOpcode::kArchJmp; }
+  bool IsRet() const { return arch_opcode() == ArchOpcode::kArchRet; }
+  bool IsTailCall() const {
+    return arch_opcode() <= ArchOpcode::kArchTailCallWasm;
+  }
+  bool IsThrow() const {
+    return arch_opcode() == ArchOpcode::kArchThrowTerminator;
+  }
+
+  static constexpr bool IsCallWithDescriptorFlags(InstructionCode arch_opcode) {
+    return arch_opcode <= ArchOpcode::kArchCallBuiltinPointer;
+  }
+  bool IsCallWithDescriptorFlags() const {
+    return IsCallWithDescriptorFlags(arch_opcode());
+  }
+  bool HasCallDescriptorFlag(CallDescriptor::Flag flag) const {
+    DCHECK(IsCallWithDescriptorFlags());
+    STATIC_ASSERT(CallDescriptor::kFlagsBitsEncodedInInstructionCode == 10);
+#ifdef DEBUG
+    static constexpr int kInstructionCodeFlagsMask =
+        ((1 << CallDescriptor::kFlagsBitsEncodedInInstructionCode) - 1);
+    DCHECK_EQ(static_cast<int>(flag) & kInstructionCodeFlagsMask, flag);
+#endif
+    return MiscField::decode(opcode()) & flag;
+  }
+
+  enum GapPosition {
+    START,
+    END,
+    FIRST_GAP_POSITION = START,
+    LAST_GAP_POSITION = END
+  };
+
+  ParallelMove* GetOrCreateParallelMove(GapPosition pos, Zone* zone) {
+    if (parallel_moves_[pos] == nullptr) {
+      parallel_moves_[pos] = zone->New<ParallelMove>(zone);
+    }
+    return parallel_moves_[pos];
+  }
+
+  ParallelMove* GetParallelMove(GapPosition pos) {
+    return parallel_moves_[pos];
+  }
+
+  const ParallelMove* GetParallelMove(GapPosition pos) const {
+    return parallel_moves_[pos];
+  }
+
+  bool AreMovesRedundant() const;
+
+  ParallelMove* const* parallel_moves() const { return &parallel_moves_[0]; }
+  ParallelMove** parallel_moves() { return &parallel_moves_[0]; }
+
+  // The block_id may be invalidated in JumpThreading. It is only important for
+  // register allocation, to avoid searching for blocks from instruction
+  // indexes.
+  InstructionBlock* block() const { return block_; }
+  void set_block(InstructionBlock* block) {
+    DCHECK_NOT_NULL(block);
+    block_ = block;
+  }
+
+  // APIs to aid debugging. For general-stream APIs, use operator<<.
+  void Print() const;
+
+  using OutputCountField = base::BitField<size_t, 0, 8>;
+  using InputCountField = base::BitField<size_t, 8, 16>;
+  using TempCountField = base::BitField<size_t, 24, 6>;
+
+  static const size_t kMaxOutputCount = OutputCountField::kMax;
+  static const size_t kMaxInputCount = InputCountField::kMax;
+  static const size_t kMaxTempCount = TempCountField::kMax;
+
+ private:
+  explicit Instruction(InstructionCode opcode);
+
+  Instruction(InstructionCode opcode, size_t output_count,
+              InstructionOperand* outputs, size_t input_count,
+              InstructionOperand* inputs, size_t temp_count,
+              InstructionOperand* temps);
+
+  using IsCallField = base::BitField<bool, 30, 1>;
+
+  InstructionCode opcode_;
+  uint32_t bit_field_;
+  ParallelMove* parallel_moves_[2];
+  ReferenceMap* reference_map_;
+  InstructionBlock* block_;
+  InstructionOperand operands_[1];
+};
+
+std::ostream& operator<<(std::ostream&, const Instruction&);
+
+class RpoNumber final {
+ public:
+  static const int kInvalidRpoNumber = -1;
+  int ToInt() const {
+    DCHECK(IsValid());
+    return index_;
+  }
+  size_t ToSize() const {
+    DCHECK(IsValid());
+    return static_cast<size_t>(index_);
+  }
+  bool IsValid() const { return index_ >= 0; }
+  static RpoNumber FromInt(int index) { return RpoNumber(index); }
+  static RpoNumber Invalid() { return RpoNumber(kInvalidRpoNumber); }
+
+  bool IsNext(const RpoNumber other) const {
+    DCHECK(IsValid());
+    return other.index_ == this->index_ + 1;
+  }
+
+  RpoNumber Next() const {
+    DCHECK(IsValid());
+    return RpoNumber(index_ + 1);
+  }
+
+  // Comparison operators.
+  bool operator==(RpoNumber other) const { return index_ == other.index_; }
+  bool operator!=(RpoNumber other) const { return index_ != other.index_; }
+  bool operator>(RpoNumber other) const { return index_ > other.index_; }
+  bool operator<(RpoNumber other) const { return index_ < other.index_; }
+  bool operator<=(RpoNumber other) const { return index_ <= other.index_; }
+  bool operator>=(RpoNumber other) const { return index_ >= other.index_; }
+
+ private:
+  explicit RpoNumber(int32_t index) : index_(index) {}
+  int32_t index_;
+};
+
+V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&, const RpoNumber&);
+
+class V8_EXPORT_PRIVATE Constant final {
+ public:
+  enum Type {
+    kInt32,
+    kInt64,
+    kFloat32,
+    kFloat64,
+    kExternalReference,
+    kCompressedHeapObject,
+    kHeapObject,
+    kRpoNumber,
+    kDelayedStringConstant
+  };
+
+  explicit Constant(int32_t v);
+  explicit Constant(int64_t v) : type_(kInt64), value_(v) {}
+  explicit Constant(float v) : type_(kFloat32), value_(bit_cast<int32_t>(v)) {}
+  explicit Constant(double v) : type_(kFloat64), value_(bit_cast<int64_t>(v)) {}
+  explicit Constant(ExternalReference ref)
+      : type_(kExternalReference), value_(bit_cast<intptr_t>(ref.address())) {}
+  explicit Constant(Handle<HeapObject> obj, bool is_compressed = false)
+      : type_(is_compressed ? kCompressedHeapObject : kHeapObject),
+        value_(bit_cast<intptr_t>(obj)) {}
+  explicit Constant(RpoNumber rpo) : type_(kRpoNumber), value_(rpo.ToInt()) {}
+  explicit Constant(const StringConstantBase* str)
+      : type_(kDelayedStringConstant), value_(bit_cast<intptr_t>(str)) {}
+  explicit Constant(RelocatablePtrConstantInfo info);
+
+  Type type() const { return type_; }
+
+  RelocInfo::Mode rmode() const { return rmode_; }
+
+  int32_t ToInt32() const {
+    DCHECK(type() == kInt32 || type() == kInt64);
+    const int32_t value = static_cast<int32_t>(value_);
+    DCHECK_EQ(value_, static_cast<int64_t>(value));
+    return value;
+  }
+
+  int64_t ToInt64() const {
+    if (type() == kInt32) return ToInt32();
+    DCHECK_EQ(kInt64, type());
+    return value_;
+  }
+
+  float ToFloat32() const {
+    // TODO(ahaas): We should remove this function. If value_ has the bit
+    // representation of a signalling NaN, then returning it as float can cause
+    // the signalling bit to flip, and value_ is returned as a quiet NaN.
+    DCHECK_EQ(kFloat32, type());
+    return bit_cast<float>(static_cast<int32_t>(value_));
+  }
+
+  uint32_t ToFloat32AsInt() const {
+    DCHECK_EQ(kFloat32, type());
+    return bit_cast<uint32_t>(static_cast<int32_t>(value_));
+  }
+
+  Double ToFloat64() const {
+    DCHECK_EQ(kFloat64, type());
+    return Double(bit_cast<uint64_t>(value_));
+  }
+
+  ExternalReference ToExternalReference() const {
+    DCHECK_EQ(kExternalReference, type());
+    return ExternalReference::FromRawAddress(static_cast<Address>(value_));
+  }
+
+  RpoNumber ToRpoNumber() const {
+    DCHECK_EQ(kRpoNumber, type());
+    return RpoNumber::FromInt(static_cast<int>(value_));
+  }
+
+  Handle<HeapObject> ToHeapObject() const;
+  Handle<Code> ToCode() const;
+  const StringConstantBase* ToDelayedStringConstant() const;
+
+ private:
+  Type type_;
+  RelocInfo::Mode rmode_ = RelocInfo::NONE;
+  int64_t value_;
+};
+
+std::ostream& operator<<(std::ostream&, const Constant&);
+
+// Forward declarations.
+class FrameStateDescriptor;
+
+enum class StateValueKind : uint8_t {
+  kArgumentsElements,
+  kArgumentsLength,
+  kPlain,
+  kOptimizedOut,
+  kNested,
+  kDuplicate
+};
+
+class StateValueDescriptor {
+ public:
+  StateValueDescriptor()
+      : kind_(StateValueKind::kPlain), type_(MachineType::AnyTagged()) {}
+
+  static StateValueDescriptor ArgumentsElements(ArgumentsStateType type) {
+    StateValueDescriptor descr(StateValueKind::kArgumentsElements,
+                               MachineType::AnyTagged());
+    descr.args_type_ = type;
+    return descr;
+  }
+  static StateValueDescriptor ArgumentsLength() {
+    return StateValueDescriptor(StateValueKind::kArgumentsLength,
+                                MachineType::AnyTagged());
+  }
+  static StateValueDescriptor Plain(MachineType type) {
+    return StateValueDescriptor(StateValueKind::kPlain, type);
+  }
+  static StateValueDescriptor OptimizedOut() {
+    return StateValueDescriptor(StateValueKind::kOptimizedOut,
+                                MachineType::AnyTagged());
+  }
+  static StateValueDescriptor Recursive(size_t id) {
+    StateValueDescriptor descr(StateValueKind::kNested,
+                               MachineType::AnyTagged());
+    descr.id_ = id;
+    return descr;
+  }
+  static StateValueDescriptor Duplicate(size_t id) {
+    StateValueDescriptor descr(StateValueKind::kDuplicate,
+                               MachineType::AnyTagged());
+    descr.id_ = id;
+    return descr;
+  }
+
+  bool IsArgumentsElements() const {
+    return kind_ == StateValueKind::kArgumentsElements;
+  }
+  bool IsArgumentsLength() const {
+    return kind_ == StateValueKind::kArgumentsLength;
+  }
+  bool IsPlain() const { return kind_ == StateValueKind::kPlain; }
+  bool IsOptimizedOut() const { return kind_ == StateValueKind::kOptimizedOut; }
+  bool IsNested() const { return kind_ == StateValueKind::kNested; }
+  bool IsDuplicate() const { return kind_ == StateValueKind::kDuplicate; }
+  MachineType type() const { return type_; }
+  size_t id() const {
+    DCHECK(kind_ == StateValueKind::kDuplicate ||
+           kind_ == StateValueKind::kNested);
+    return id_;
+  }
+  ArgumentsStateType arguments_type() const {
+    DCHECK(kind_ == StateValueKind::kArgumentsElements);
+    return args_type_;
+  }
+
+ private:
+  StateValueDescriptor(StateValueKind kind, MachineType type)
+      : kind_(kind), type_(type) {}
+
+  StateValueKind kind_;
+  MachineType type_;
+  union {
+    size_t id_;
+    ArgumentsStateType args_type_;
+  };
+};
+
+class StateValueList {
+ public:
+  explicit StateValueList(Zone* zone) : fields_(zone), nested_(zone) {}
+
+  size_t size() { return fields_.size(); }
+
+  struct Value {
+    StateValueDescriptor* desc;
+    StateValueList* nested;
+
+    Value(StateValueDescriptor* desc, StateValueList* nested)
+        : desc(desc), nested(nested) {}
+  };
+
+  class iterator {
+   public:
+    // Bare minimum of operators needed for range iteration.
+    bool operator!=(const iterator& other) const {
+      return field_iterator != other.field_iterator;
+    }
+    bool operator==(const iterator& other) const {
+      return field_iterator == other.field_iterator;
+    }
+    iterator& operator++() {
+      if (field_iterator->IsNested()) {
+        nested_iterator++;
+      }
+      ++field_iterator;
+      return *this;
+    }
+    Value operator*() {
+      StateValueDescriptor* desc = &(*field_iterator);
+      StateValueList* nested = desc->IsNested() ? *nested_iterator : nullptr;
+      return Value(desc, nested);
+    }
+
+   private:
+    friend class StateValueList;
+
+    iterator(ZoneVector<StateValueDescriptor>::iterator it,
+             ZoneVector<StateValueList*>::iterator nested)
+        : field_iterator(it), nested_iterator(nested) {}
+
+    ZoneVector<StateValueDescriptor>::iterator field_iterator;
+    ZoneVector<StateValueList*>::iterator nested_iterator;
+  };
+
+  void ReserveSize(size_t size) { fields_.reserve(size); }
+
+  StateValueList* PushRecursiveField(Zone* zone, size_t id) {
+    fields_.push_back(StateValueDescriptor::Recursive(id));
+    StateValueList* nested = zone->New<StateValueList>(zone);
+    nested_.push_back(nested);
+    return nested;
+  }
+  void PushArgumentsElements(ArgumentsStateType type) {
+    fields_.push_back(StateValueDescriptor::ArgumentsElements(type));
+  }
+  void PushArgumentsLength() {
+    fields_.push_back(StateValueDescriptor::ArgumentsLength());
+  }
+  void PushDuplicate(size_t id) {
+    fields_.push_back(StateValueDescriptor::Duplicate(id));
+  }
+  void PushPlain(MachineType type) {
+    fields_.push_back(StateValueDescriptor::Plain(type));
+  }
+  void PushOptimizedOut(size_t num = 1) {
+    fields_.insert(fields_.end(), num, StateValueDescriptor::OptimizedOut());
+  }
+
+  iterator begin() { return iterator(fields_.begin(), nested_.begin()); }
+  iterator end() { return iterator(fields_.end(), nested_.end()); }
+
+ private:
+  ZoneVector<StateValueDescriptor> fields_;
+  ZoneVector<StateValueList*> nested_;
+};
+
+class FrameStateDescriptor : public ZoneObject {
+ public:
+  FrameStateDescriptor(Zone* zone, FrameStateType type, BailoutId bailout_id,
+                       OutputFrameStateCombine state_combine,
+                       size_t parameters_count, size_t locals_count,
+                       size_t stack_count,
+                       MaybeHandle<SharedFunctionInfo> shared_info,
+                       FrameStateDescriptor* outer_state = nullptr);
+
+  FrameStateType type() const { return type_; }
+  BailoutId bailout_id() const { return bailout_id_; }
+  OutputFrameStateCombine state_combine() const { return frame_state_combine_; }
+  size_t parameters_count() const { return parameters_count_; }
+  size_t locals_count() const { return locals_count_; }
+  size_t stack_count() const { return stack_count_; }
+  MaybeHandle<SharedFunctionInfo> shared_info() const { return shared_info_; }
+  FrameStateDescriptor* outer_state() const { return outer_state_; }
+  bool HasContext() const {
+    return FrameStateFunctionInfo::IsJSFunctionType(type_) ||
+           type_ == FrameStateType::kBuiltinContinuation ||
+           type_ == FrameStateType::kConstructStub;
+  }
+
+  // The frame height on the stack, in number of slots, as serialized into a
+  // Translation and later used by the deoptimizer. Does *not* include
+  // information from the chain of outer states. Unlike |GetSize| this does not
+  // always include parameters, locals, and stack slots; instead, the returned
+  // slot kinds depend on the frame type.
+  size_t GetHeight() const;
+
+  // Returns an overapproximation of the unoptimized stack frame size in bytes,
+  // as later produced by the deoptimizer. Considers both this and the chain of
+  // outer states.
+  size_t total_conservative_frame_size_in_bytes() const {
+    return total_conservative_frame_size_in_bytes_;
+  }
+
+  size_t GetSize() const;
+  size_t GetTotalSize() const;
+  size_t GetFrameCount() const;
+  size_t GetJSFrameCount() const;
+
+  StateValueList* GetStateValueDescriptors() { return &values_; }
+
+  static const int kImpossibleValue = 0xdead;
+
+ private:
+  FrameStateType type_;
+  BailoutId bailout_id_;
+  OutputFrameStateCombine frame_state_combine_;
+  const size_t parameters_count_;
+  const size_t locals_count_;
+  const size_t stack_count_;
+  const size_t total_conservative_frame_size_in_bytes_;
+  StateValueList values_;
+  MaybeHandle<SharedFunctionInfo> const shared_info_;
+  FrameStateDescriptor* const outer_state_;
+};
+
+// A deoptimization entry is a pair of the reason why we deoptimize and the
+// frame state descriptor that we have to go back to.
+class DeoptimizationEntry final {
+ public:
+  DeoptimizationEntry() = default;
+  DeoptimizationEntry(FrameStateDescriptor* descriptor, DeoptimizeKind kind,
+                      DeoptimizeReason reason, FeedbackSource const& feedback)
+      : descriptor_(descriptor),
+        kind_(kind),
+        reason_(reason),
+        feedback_(feedback) {}
+
+  FrameStateDescriptor* descriptor() const { return descriptor_; }
+  DeoptimizeKind kind() const { return kind_; }
+  DeoptimizeReason reason() const { return reason_; }
+  FeedbackSource const& feedback() const { return feedback_; }
+
+ private:
+  FrameStateDescriptor* descriptor_ = nullptr;
+  DeoptimizeKind kind_ = DeoptimizeKind::kEager;
+  DeoptimizeReason reason_ = DeoptimizeReason::kUnknown;
+  FeedbackSource feedback_ = FeedbackSource();
+};
+
+using DeoptimizationVector = ZoneVector<DeoptimizationEntry>;
+
+class V8_EXPORT_PRIVATE PhiInstruction final
+    : public NON_EXPORTED_BASE(ZoneObject) {
+ public:
+  using Inputs = ZoneVector<InstructionOperand>;
+
+  PhiInstruction(Zone* zone, int virtual_register, size_t input_count);
+
+  void SetInput(size_t offset, int virtual_register);
+  void RenameInput(size_t offset, int virtual_register);
+
+  int virtual_register() const { return virtual_register_; }
+  const IntVector& operands() const { return operands_; }
+
+  // TODO(dcarney): this has no real business being here, since it's internal to
+  // the register allocator, but putting it here was convenient.
+  const InstructionOperand& output() const { return output_; }
+  InstructionOperand& output() { return output_; }
+
+ private:
+  const int virtual_register_;
+  InstructionOperand output_;
+  IntVector operands_;
+};
+
+// Analogue of BasicBlock for Instructions instead of Nodes.
+class V8_EXPORT_PRIVATE InstructionBlock final
+    : public NON_EXPORTED_BASE(ZoneObject) {
+ public:
+  InstructionBlock(Zone* zone, RpoNumber rpo_number, RpoNumber loop_header,
+                   RpoNumber loop_end, RpoNumber dominator, bool deferred,
+                   bool handler);
+
+  // Instruction indexes (used by the register allocator).
+  int first_instruction_index() const {
+    DCHECK_LE(0, code_start_);
+    DCHECK_LT(0, code_end_);
+    DCHECK_GE(code_end_, code_start_);
+    return code_start_;
+  }
+  int last_instruction_index() const {
+    DCHECK_LE(0, code_start_);
+    DCHECK_LT(0, code_end_);
+    DCHECK_GE(code_end_, code_start_);
+    return code_end_ - 1;
+  }
+
+  int32_t code_start() const { return code_start_; }
+  void set_code_start(int32_t start) { code_start_ = start; }
+
+  int32_t code_end() const { return code_end_; }
+  void set_code_end(int32_t end) { code_end_ = end; }
+
+  bool IsDeferred() const { return deferred_; }
+  bool IsHandler() const { return handler_; }
+  void MarkHandler() { handler_ = true; }
+  void UnmarkHandler() { handler_ = false; }
+
+  RpoNumber ao_number() const { return ao_number_; }
+  RpoNumber rpo_number() const { return rpo_number_; }
+  RpoNumber loop_header() const { return loop_header_; }
+  RpoNumber loop_end() const {
+    DCHECK(IsLoopHeader());
+    return loop_end_;
+  }
+  inline bool IsLoopHeader() const { return loop_end_.IsValid(); }
+  inline bool IsSwitchTarget() const { return switch_target_; }
+  inline bool ShouldAlign() const { return alignment_; }
+
+  using Predecessors = ZoneVector<RpoNumber>;
+  Predecessors& predecessors() { return predecessors_; }
+  const Predecessors& predecessors() const { return predecessors_; }
+  size_t PredecessorCount() const { return predecessors_.size(); }
+  size_t PredecessorIndexOf(RpoNumber rpo_number) const;
+
+  using Successors = ZoneVector<RpoNumber>;
+  Successors& successors() { return successors_; }
+  const Successors& successors() const { return successors_; }
+  size_t SuccessorCount() const { return successors_.size(); }
+
+  RpoNumber dominator() const { return dominator_; }
+  void set_dominator(RpoNumber dominator) { dominator_ = dominator; }
+
+  using PhiInstructions = ZoneVector<PhiInstruction*>;
+  const PhiInstructions& phis() const { return phis_; }
+  PhiInstruction* PhiAt(size_t i) const { return phis_[i]; }
+  void AddPhi(PhiInstruction* phi) { phis_.push_back(phi); }
+
+  void set_ao_number(RpoNumber ao_number) { ao_number_ = ao_number; }
+
+  void set_alignment(bool val) { alignment_ = val; }
+
+  void set_switch_target(bool val) { switch_target_ = val; }
+
+  bool needs_frame() const { return needs_frame_; }
+  void mark_needs_frame() { needs_frame_ = true; }
+
+  bool must_construct_frame() const { return must_construct_frame_; }
+  void mark_must_construct_frame() { must_construct_frame_ = true; }
+
+  bool must_deconstruct_frame() const { return must_deconstruct_frame_; }
+  void mark_must_deconstruct_frame() { must_deconstruct_frame_ = true; }
+  void clear_must_deconstruct_frame() { must_deconstruct_frame_ = false; }
+
+ private:
+  Successors successors_;
+  Predecessors predecessors_;
+  PhiInstructions phis_;
+  RpoNumber ao_number_;  // Assembly order number.
+  const RpoNumber rpo_number_;
+  const RpoNumber loop_header_;
+  const RpoNumber loop_end_;
+  RpoNumber dominator_;
+  int32_t code_start_;   // start index of arch-specific code.
+  int32_t code_end_ = -1;     // end index of arch-specific code.
+  const bool deferred_;       // Block contains deferred code.
+  bool handler_;              // Block is a handler entry point.
+  bool switch_target_ = false;
+  bool alignment_ = false;  // insert alignment before this block
+  bool needs_frame_ = false;
+  bool must_construct_frame_ = false;
+  bool must_deconstruct_frame_ = false;
+};
+
+class InstructionSequence;
+
+struct PrintableInstructionBlock {
+  const InstructionBlock* block_;
+  const InstructionSequence* code_;
+};
+
+std::ostream& operator<<(std::ostream&, const PrintableInstructionBlock&);
+
+using ConstantDeque = ZoneDeque<Constant>;
+using ConstantMap = std::map<int, Constant, std::less<int>,
+                             ZoneAllocator<std::pair<const int, Constant> > >;
+
+using InstructionDeque = ZoneDeque<Instruction*>;
+using ReferenceMapDeque = ZoneDeque<ReferenceMap*>;
+using InstructionBlocks = ZoneVector<InstructionBlock*>;
+
+// Represents architecture-specific generated code before, during, and after
+// register allocation.
+class V8_EXPORT_PRIVATE InstructionSequence final
+    : public NON_EXPORTED_BASE(ZoneObject) {
+ public:
+  static InstructionBlocks* InstructionBlocksFor(Zone* zone,
+                                                 const Schedule* schedule);
+  InstructionSequence(Isolate* isolate, Zone* zone,
+                      InstructionBlocks* instruction_blocks);
+  InstructionSequence(const InstructionSequence&) = delete;
+  InstructionSequence& operator=(const InstructionSequence&) = delete;
+
+  int NextVirtualRegister();
+  int VirtualRegisterCount() const { return next_virtual_register_; }
+
+  const InstructionBlocks& instruction_blocks() const {
+    return *instruction_blocks_;
+  }
+
+  const InstructionBlocks& ao_blocks() const { return *ao_blocks_; }
+
+  int InstructionBlockCount() const {
+    return static_cast<int>(instruction_blocks_->size());
+  }
+
+  InstructionBlock* InstructionBlockAt(RpoNumber rpo_number) {
+    return instruction_blocks_->at(rpo_number.ToSize());
+  }
+
+  int LastLoopInstructionIndex(const InstructionBlock* block) {
+    return instruction_blocks_->at(block->loop_end().ToSize() - 1)
+        ->last_instruction_index();
+  }
+
+  const InstructionBlock* InstructionBlockAt(RpoNumber rpo_number) const {
+    return instruction_blocks_->at(rpo_number.ToSize());
+  }
+
+  InstructionBlock* GetInstructionBlock(int instruction_index) const;
+
+  static MachineRepresentation DefaultRepresentation() {
+    return MachineType::PointerRepresentation();
+  }
+  MachineRepresentation GetRepresentation(int virtual_register) const;
+  void MarkAsRepresentation(MachineRepresentation rep, int virtual_register);
+
+  bool IsReference(int virtual_register) const {
+    return CanBeTaggedOrCompressedPointer(GetRepresentation(virtual_register));
+  }
+  bool IsFP(int virtual_register) const {
+    return IsFloatingPoint(GetRepresentation(virtual_register));
+  }
+  int representation_mask() const { return representation_mask_; }
+  bool HasFPVirtualRegisters() const {
+    constexpr int kFPRepMask =
+        RepresentationBit(MachineRepresentation::kFloat32) |
+        RepresentationBit(MachineRepresentation::kFloat64) |
+        RepresentationBit(MachineRepresentation::kSimd128);
+    return (representation_mask() & kFPRepMask) != 0;
+  }
+
+  Instruction* GetBlockStart(RpoNumber rpo) const;
+
+  using const_iterator = InstructionDeque::const_iterator;
+  const_iterator begin() const { return instructions_.begin(); }
+  const_iterator end() const { return instructions_.end(); }
+  const InstructionDeque& instructions() const { return instructions_; }
+  int LastInstructionIndex() const {
+    return static_cast<int>(instructions().size()) - 1;
+  }
+
+  Instruction* InstructionAt(int index) const {
+    DCHECK_LE(0, index);
+    DCHECK_GT(instructions_.size(), index);
+    return instructions_[index];
+  }
+
+  Isolate* isolate() const { return isolate_; }
+  const ReferenceMapDeque* reference_maps() const { return &reference_maps_; }
+  Zone* zone() const { return zone_; }
+
+  // Used by the instruction selector while adding instructions.
+  int AddInstruction(Instruction* instr);
+  void StartBlock(RpoNumber rpo);
+  void EndBlock(RpoNumber rpo);
+
+  int AddConstant(int virtual_register, Constant constant) {
+    // TODO(titzer): allow RPO numbers as constants?
+    DCHECK_NE(Constant::kRpoNumber, constant.type());
+    DCHECK(virtual_register >= 0 && virtual_register < next_virtual_register_);
+    DCHECK(constants_.find(virtual_register) == constants_.end());
+    constants_.insert(std::make_pair(virtual_register, constant));
+    return virtual_register;
+  }
+  Constant GetConstant(int virtual_register) const {
+    auto it = constants_.find(virtual_register);
+    DCHECK(it != constants_.end());
+    DCHECK_EQ(virtual_register, it->first);
+    return it->second;
+  }
+
+  using Immediates = ZoneVector<Constant>;
+  Immediates& immediates() { return immediates_; }
+
+  ImmediateOperand AddImmediate(const Constant& constant) {
+    if (constant.type() == Constant::kInt32 &&
+        RelocInfo::IsNone(constant.rmode())) {
+      return ImmediateOperand(ImmediateOperand::INLINE, constant.ToInt32());
+    }
+    int index = static_cast<int>(immediates_.size());
+    immediates_.push_back(constant);
+    return ImmediateOperand(ImmediateOperand::INDEXED, index);
+  }
+
+  Constant GetImmediate(const ImmediateOperand* op) const {
+    switch (op->type()) {
+      case ImmediateOperand::INLINE:
+        return Constant(op->inline_value());
+      case ImmediateOperand::INDEXED: {
+        int index = op->indexed_value();
+        DCHECK_LE(0, index);
+        DCHECK_GT(immediates_.size(), index);
+        return immediates_[index];
+      }
+    }
+    UNREACHABLE();
+  }
+
+  int AddDeoptimizationEntry(FrameStateDescriptor* descriptor,
+                             DeoptimizeKind kind, DeoptimizeReason reason,
+                             FeedbackSource const& feedback);
+  DeoptimizationEntry const& GetDeoptimizationEntry(int deoptimization_id);
+  int GetDeoptimizationEntryCount() const {
+    return static_cast<int>(deoptimization_entries_.size());
+  }
+
+  RpoNumber InputRpo(Instruction* instr, size_t index);
+
+  bool GetSourcePosition(const Instruction* instr,
+                         SourcePosition* result) const;
+  void SetSourcePosition(const Instruction* instr, SourcePosition value);
+
+  bool ContainsCall() const {
+    for (Instruction* instr : instructions_) {
+      if (instr->IsCall()) return true;
+    }
+    return false;
+  }
+
+  // APIs to aid debugging. For general-stream APIs, use operator<<.
+  void Print() const;
+
+  void PrintBlock(int block_id) const;
+
+  void ValidateEdgeSplitForm() const;
+  void ValidateDeferredBlockExitPaths() const;
+  void ValidateDeferredBlockEntryPaths() const;
+  void ValidateSSA() const;
+
+  static void SetRegisterConfigurationForTesting(
+      const RegisterConfiguration* regConfig);
+  static void ClearRegisterConfigurationForTesting();
+
+  void RecomputeAssemblyOrderForTesting();
+
+ private:
+  friend V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&,
+                                                    const InstructionSequence&);
+
+  using SourcePositionMap = ZoneMap<const Instruction*, SourcePosition>;
+
+  static const RegisterConfiguration* RegisterConfigurationForTesting();
+  static const RegisterConfiguration* registerConfigurationForTesting_;
+
+  // Puts the deferred blocks last and may rotate loops.
+  void ComputeAssemblyOrder();
+
+  Isolate* isolate_;
+  Zone* const zone_;
+  InstructionBlocks* const instruction_blocks_;
+  InstructionBlocks* ao_blocks_;
+  SourcePositionMap source_positions_;
+  ConstantMap constants_;
+  Immediates immediates_;
+  InstructionDeque instructions_;
+  int next_virtual_register_;
+  ReferenceMapDeque reference_maps_;
+  ZoneVector<MachineRepresentation> representations_;
+  int representation_mask_;
+  DeoptimizationVector deoptimization_entries_;
+
+  // Used at construction time
+  InstructionBlock* current_block_;
+};
+
+V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream&,
+                                           const InstructionSequence&);
+#undef INSTRUCTION_OPERAND_ALIGN
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_INSTRUCTION_H_
diff --git a/src/compiler/backend/jump-threading.cc b/src/compiler/backend/jump-threading.cc
new file mode 100644
index 0000000..c09274c
--- /dev/null
+++ b/src/compiler/backend/jump-threading.cc
@@ -0,0 +1,269 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/jump-threading.h"
+#include "src/compiler/backend/code-generator-impl.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+#define TRACE(...)                                \
+  do {                                            \
+    if (FLAG_trace_turbo_jt) PrintF(__VA_ARGS__); \
+  } while (false)
+
+namespace {
+
+struct JumpThreadingState {
+  bool forwarded;
+  ZoneVector<RpoNumber>& result;
+  ZoneStack<RpoNumber>& stack;
+
+  void Clear(size_t count) { result.assign(count, unvisited()); }
+  void PushIfUnvisited(RpoNumber num) {
+    if (result[num.ToInt()] == unvisited()) {
+      stack.push(num);
+      result[num.ToInt()] = onstack();
+    }
+  }
+  void Forward(RpoNumber to) {
+    RpoNumber from = stack.top();
+    RpoNumber to_to = result[to.ToInt()];
+    bool pop = true;
+    if (to == from) {
+      TRACE("  xx %d\n", from.ToInt());
+      result[from.ToInt()] = from;
+    } else if (to_to == unvisited()) {
+      TRACE("  fw %d -> %d (recurse)\n", from.ToInt(), to.ToInt());
+      stack.push(to);
+      result[to.ToInt()] = onstack();
+      pop = false;  // recurse.
+    } else if (to_to == onstack()) {
+      TRACE("  fw %d -> %d (cycle)\n", from.ToInt(), to.ToInt());
+      result[from.ToInt()] = to;  // break the cycle.
+      forwarded = true;
+    } else {
+      TRACE("  fw %d -> %d (forward)\n", from.ToInt(), to.ToInt());
+      result[from.ToInt()] = to_to;  // forward the block.
+      forwarded = true;
+    }
+    if (pop) stack.pop();
+  }
+  RpoNumber unvisited() { return RpoNumber::FromInt(-1); }
+  RpoNumber onstack() { return RpoNumber::FromInt(-2); }
+};
+
+bool IsBlockWithBranchPoisoning(InstructionSequence* code,
+                                InstructionBlock* block) {
+  if (block->PredecessorCount() != 1) return false;
+  RpoNumber pred_rpo = (block->predecessors())[0];
+  const InstructionBlock* pred = code->InstructionBlockAt(pred_rpo);
+  if (pred->code_start() == pred->code_end()) return false;
+  Instruction* instr = code->InstructionAt(pred->code_end() - 1);
+  FlagsMode mode = FlagsModeField::decode(instr->opcode());
+  return mode == kFlags_branch_and_poison;
+}
+
+}  // namespace
+
+bool JumpThreading::ComputeForwarding(Zone* local_zone,
+                                      ZoneVector<RpoNumber>* result,
+                                      InstructionSequence* code,
+                                      bool frame_at_start) {
+  ZoneStack<RpoNumber> stack(local_zone);
+  JumpThreadingState state = {false, *result, stack};
+  state.Clear(code->InstructionBlockCount());
+  RpoNumber empty_deconstruct_frame_return_block = RpoNumber::Invalid();
+  int32_t empty_deconstruct_frame_return_size;
+  RpoNumber empty_no_deconstruct_frame_return_block = RpoNumber::Invalid();
+  int32_t empty_no_deconstruct_frame_return_size;
+
+  // Iterate over the blocks forward, pushing the blocks onto the stack.
+  for (auto const block : code->instruction_blocks()) {
+    RpoNumber current = block->rpo_number();
+    state.PushIfUnvisited(current);
+
+    // Process the stack, which implements DFS through empty blocks.
+    while (!state.stack.empty()) {
+      InstructionBlock* block = code->InstructionBlockAt(state.stack.top());
+      // Process the instructions in a block up to a non-empty instruction.
+      TRACE("jt [%d] B%d\n", static_cast<int>(stack.size()),
+            block->rpo_number().ToInt());
+      RpoNumber fw = block->rpo_number();
+      if (!IsBlockWithBranchPoisoning(code, block)) {
+        bool fallthru = true;
+        for (int i = block->code_start(); i < block->code_end(); ++i) {
+          Instruction* instr = code->InstructionAt(i);
+          if (!instr->AreMovesRedundant()) {
+            // can't skip instructions with non redundant moves.
+            TRACE("  parallel move\n");
+            fallthru = false;
+          } else if (FlagsModeField::decode(instr->opcode()) != kFlags_none) {
+            // can't skip instructions with flags continuations.
+            TRACE("  flags\n");
+            fallthru = false;
+          } else if (instr->IsNop()) {
+            // skip nops.
+            TRACE("  nop\n");
+            continue;
+          } else if (instr->arch_opcode() == kArchJmp) {
+            // try to forward the jump instruction.
+            TRACE("  jmp\n");
+            // if this block deconstructs the frame, we can't forward it.
+            // TODO(mtrofin): we can still forward if we end up building
+            // the frame at start. So we should move the decision of whether
+            // to build a frame or not in the register allocator, and trickle it
+            // here and to the code generator.
+            if (frame_at_start || !(block->must_deconstruct_frame() ||
+                                    block->must_construct_frame())) {
+              fw = code->InputRpo(instr, 0);
+            }
+            fallthru = false;
+          } else if (instr->IsRet()) {
+            TRACE("  ret\n");
+            if (fallthru) {
+              CHECK_IMPLIES(block->must_construct_frame(),
+                            block->must_deconstruct_frame());
+              // Only handle returns with immediate/constant operands, since
+              // they must always be the same for all returns in a function.
+              // Dynamic return values might use different registers at
+              // different return sites and therefore cannot be shared.
+              if (instr->InputAt(0)->IsImmediate()) {
+                int32_t return_size =
+                    ImmediateOperand::cast(instr->InputAt(0))->inline_value();
+                // Instructions can be shared only for blocks that share
+                // the same |must_deconstruct_frame| attribute.
+                if (block->must_deconstruct_frame()) {
+                  if (empty_deconstruct_frame_return_block ==
+                      RpoNumber::Invalid()) {
+                    empty_deconstruct_frame_return_block = block->rpo_number();
+                    empty_deconstruct_frame_return_size = return_size;
+                  } else if (empty_deconstruct_frame_return_size ==
+                             return_size) {
+                    fw = empty_deconstruct_frame_return_block;
+                    block->clear_must_deconstruct_frame();
+                  }
+                } else {
+                  if (empty_no_deconstruct_frame_return_block ==
+                      RpoNumber::Invalid()) {
+                    empty_no_deconstruct_frame_return_block =
+                        block->rpo_number();
+                    empty_no_deconstruct_frame_return_size = return_size;
+                  } else if (empty_no_deconstruct_frame_return_size ==
+                             return_size) {
+                    fw = empty_no_deconstruct_frame_return_block;
+                  }
+                }
+              }
+            }
+            fallthru = false;
+          } else {
+            // can't skip other instructions.
+            TRACE("  other\n");
+            fallthru = false;
+          }
+          break;
+        }
+        if (fallthru) {
+          int next = 1 + block->rpo_number().ToInt();
+          if (next < code->InstructionBlockCount())
+            fw = RpoNumber::FromInt(next);
+        }
+      }
+      state.Forward(fw);
+    }
+  }
+
+#ifdef DEBUG
+  for (RpoNumber num : *result) {
+    DCHECK(num.IsValid());
+  }
+#endif
+
+  if (FLAG_trace_turbo_jt) {
+    for (int i = 0; i < static_cast<int>(result->size()); i++) {
+      TRACE("B%d ", i);
+      int to = (*result)[i].ToInt();
+      if (i != to) {
+        TRACE("-> B%d\n", to);
+      } else {
+        TRACE("\n");
+      }
+    }
+  }
+
+  return state.forwarded;
+}
+
+void JumpThreading::ApplyForwarding(Zone* local_zone,
+                                    ZoneVector<RpoNumber> const& result,
+                                    InstructionSequence* code) {
+  if (!FLAG_turbo_jt) return;
+
+  ZoneVector<bool> skip(static_cast<int>(result.size()), false, local_zone);
+
+  // Skip empty blocks when the previous block doesn't fall through.
+  bool prev_fallthru = true;
+  for (auto const block : code->instruction_blocks()) {
+    RpoNumber block_rpo = block->rpo_number();
+    int block_num = block_rpo.ToInt();
+    RpoNumber result_rpo = result[block_num];
+    skip[block_num] = !prev_fallthru && result_rpo != block_rpo;
+
+    if (result_rpo != block_rpo) {
+      // We need the handler information to be propagated, so that branch
+      // targets are annotated as necessary for control flow integrity
+      // checks (when enabled).
+      if (code->InstructionBlockAt(block_rpo)->IsHandler()) {
+        code->InstructionBlockAt(result_rpo)->MarkHandler();
+      }
+    }
+
+    bool fallthru = true;
+    for (int i = block->code_start(); i < block->code_end(); ++i) {
+      Instruction* instr = code->InstructionAt(i);
+      FlagsMode mode = FlagsModeField::decode(instr->opcode());
+      if (mode == kFlags_branch || mode == kFlags_branch_and_poison) {
+        fallthru = false;  // branches don't fall through to the next block.
+      } else if (instr->arch_opcode() == kArchJmp ||
+                 instr->arch_opcode() == kArchRet) {
+        if (skip[block_num]) {
+          // Overwrite a redundant jump with a nop.
+          TRACE("jt-fw nop @%d\n", i);
+          instr->OverwriteWithNop();
+          // If this block was marked as a handler, it can be unmarked now.
+          code->InstructionBlockAt(block_rpo)->UnmarkHandler();
+        }
+        fallthru = false;  // jumps don't fall through to the next block.
+      }
+    }
+    prev_fallthru = fallthru;
+  }
+
+  // Patch RPO immediates.
+  InstructionSequence::Immediates& immediates = code->immediates();
+  for (size_t i = 0; i < immediates.size(); i++) {
+    Constant constant = immediates[i];
+    if (constant.type() == Constant::kRpoNumber) {
+      RpoNumber rpo = constant.ToRpoNumber();
+      RpoNumber fw = result[rpo.ToInt()];
+      if (!(fw == rpo)) immediates[i] = Constant(fw);
+    }
+  }
+
+  // Renumber the blocks so that IsNextInAssemblyOrder() will return true,
+  // even if there are skipped blocks in-between.
+  int ao = 0;
+  for (auto const block : code->ao_blocks()) {
+    block->set_ao_number(RpoNumber::FromInt(ao));
+    if (!skip[block->rpo_number().ToInt()]) ao++;
+  }
+}
+
+#undef TRACE
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/jump-threading.h b/src/compiler/backend/jump-threading.h
new file mode 100644
index 0000000..ce9e394
--- /dev/null
+++ b/src/compiler/backend/jump-threading.h
@@ -0,0 +1,34 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_JUMP_THREADING_H_
+#define V8_COMPILER_BACKEND_JUMP_THREADING_H_
+
+#include "src/compiler/backend/instruction.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+// Forwards jumps to empty basic blocks that end with a second jump to the
+// destination of the second jump, transitively.
+class V8_EXPORT_PRIVATE JumpThreading {
+ public:
+  // Compute the forwarding map of basic blocks to their ultimate destination.
+  // Returns {true} if there is at least one block that is forwarded.
+  static bool ComputeForwarding(Zone* local_zone, ZoneVector<RpoNumber>* result,
+                                InstructionSequence* code, bool frame_at_start);
+
+  // Rewrite the instructions to forward jumps and branches.
+  // May also negate some branches.
+  static void ApplyForwarding(Zone* local_zone,
+                              ZoneVector<RpoNumber> const& forwarding,
+                              InstructionSequence* code);
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_JUMP_THREADING_H_
diff --git a/src/compiler/backend/mid-tier-register-allocator.cc b/src/compiler/backend/mid-tier-register-allocator.cc
new file mode 100644
index 0000000..4380852
--- /dev/null
+++ b/src/compiler/backend/mid-tier-register-allocator.cc
@@ -0,0 +1,3179 @@
+// Copyright 2020 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/mid-tier-register-allocator.h"
+
+#include "src/base/bits.h"
+#include "src/base/logging.h"
+#include "src/base/macros.h"
+#include "src/base/optional.h"
+#include "src/codegen/machine-type.h"
+#include "src/codegen/register-configuration.h"
+#include "src/codegen/tick-counter.h"
+#include "src/common/globals.h"
+#include "src/compiler/backend/instruction.h"
+#include "src/compiler/linkage.h"
+#include "src/logging/counters.h"
+#include "src/utils/bit-vector.h"
+#include "src/zone/zone-containers.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+class RegisterState;
+class DeferredBlocksRegion;
+
+// BlockState stores details associated with a particular basic block.
+class BlockState final {
+ public:
+  BlockState(int block_count, Zone* zone)
+      : general_registers_in_state_(nullptr),
+        double_registers_in_state_(nullptr),
+        deferred_blocks_region_(nullptr),
+        dominated_blocks_(block_count, zone),
+        successors_phi_index_(-1),
+        is_deferred_block_boundary_(false) {}
+
+  // Returns the RegisterState that applies to the input of this block. Can be
+  // |nullptr| if the no registers of |kind| have been allocated up to this
+  // point.
+  RegisterState* register_in_state(RegisterKind kind);
+  void set_register_in_state(RegisterState* register_state, RegisterKind kind);
+
+  // Returns a bitvector representing all the basic blocks that are dominated
+  // by this basic block.
+  BitVector* dominated_blocks() { return &dominated_blocks_; }
+
+  // Set / get this block's index for successor's phi operations. Will return
+  // -1 if this block has no successor's with phi operations.
+  int successors_phi_index() const { return successors_phi_index_; }
+  void set_successors_phi_index(int index) {
+    DCHECK_EQ(successors_phi_index_, -1);
+    successors_phi_index_ = index;
+  }
+
+  // If this block is deferred, this represents region of deferred blocks
+  // that are directly reachable from this block.
+  DeferredBlocksRegion* deferred_blocks_region() const {
+    return deferred_blocks_region_;
+  }
+  void set_deferred_blocks_region(DeferredBlocksRegion* region) {
+    DCHECK_NULL(deferred_blocks_region_);
+    deferred_blocks_region_ = region;
+  }
+
+  // Returns true if this block represents either a transition from
+  // non-deferred to deferred or vice versa.
+  bool is_deferred_block_boundary() const {
+    return is_deferred_block_boundary_;
+  }
+  void MarkAsDeferredBlockBoundary() { is_deferred_block_boundary_ = true; }
+
+  MOVE_ONLY_NO_DEFAULT_CONSTRUCTOR(BlockState);
+
+ private:
+  RegisterState* general_registers_in_state_;
+  RegisterState* double_registers_in_state_;
+
+  DeferredBlocksRegion* deferred_blocks_region_;
+
+  BitVector dominated_blocks_;
+  int successors_phi_index_;
+  bool is_deferred_block_boundary_;
+};
+
+RegisterState* BlockState::register_in_state(RegisterKind kind) {
+  switch (kind) {
+    case RegisterKind::kGeneral:
+      return general_registers_in_state_;
+    case RegisterKind::kDouble:
+      return double_registers_in_state_;
+  }
+}
+
+void BlockState::set_register_in_state(RegisterState* register_state,
+                                       RegisterKind kind) {
+  switch (kind) {
+    case RegisterKind::kGeneral:
+      DCHECK_NULL(general_registers_in_state_);
+      general_registers_in_state_ = register_state;
+      break;
+    case RegisterKind::kDouble:
+      DCHECK_NULL(double_registers_in_state_);
+      double_registers_in_state_ = register_state;
+      break;
+  }
+}
+
+MidTierRegisterAllocationData::MidTierRegisterAllocationData(
+    const RegisterConfiguration* config, Zone* zone, Frame* frame,
+    InstructionSequence* code, TickCounter* tick_counter,
+    const char* debug_name)
+    : RegisterAllocationData(Type::kMidTier),
+      allocation_zone_(zone),
+      frame_(frame),
+      code_(code),
+      debug_name_(debug_name),
+      config_(config),
+      virtual_register_data_(code->VirtualRegisterCount(), allocation_zone()),
+      block_states_(allocation_zone()),
+      reference_map_instructions_(allocation_zone()),
+      spilled_virtual_registers_(code->VirtualRegisterCount(),
+                                 allocation_zone()),
+      tick_counter_(tick_counter) {
+  int basic_block_count = code->InstructionBlockCount();
+  block_states_.reserve(basic_block_count);
+  for (int i = 0; i < basic_block_count; i++) {
+    block_states_.emplace_back(basic_block_count, allocation_zone());
+  }
+}
+
+MoveOperands* MidTierRegisterAllocationData::AddGapMove(
+    int instr_index, Instruction::GapPosition position,
+    const InstructionOperand& from, const InstructionOperand& to) {
+  Instruction* instr = code()->InstructionAt(instr_index);
+  ParallelMove* moves = instr->GetOrCreateParallelMove(position, code_zone());
+  return moves->AddMove(from, to);
+}
+
+MoveOperands* MidTierRegisterAllocationData::AddPendingOperandGapMove(
+    int instr_index, Instruction::GapPosition position) {
+  return AddGapMove(instr_index, position, PendingOperand(), PendingOperand());
+}
+
+MachineRepresentation MidTierRegisterAllocationData::RepresentationFor(
+    int virtual_register) {
+  if (virtual_register == InstructionOperand::kInvalidVirtualRegister) {
+    return InstructionSequence::DefaultRepresentation();
+  } else {
+    DCHECK_LT(virtual_register, code()->VirtualRegisterCount());
+    return code()->GetRepresentation(virtual_register);
+  }
+}
+
+BlockState& MidTierRegisterAllocationData::block_state(RpoNumber rpo_number) {
+  return block_states_[rpo_number.ToInt()];
+}
+
+const InstructionBlock* MidTierRegisterAllocationData::GetBlock(
+    RpoNumber rpo_number) {
+  return code()->InstructionBlockAt(rpo_number);
+}
+
+const InstructionBlock* MidTierRegisterAllocationData::GetBlock(
+    int instr_index) {
+  return code()->InstructionAt(instr_index)->block();
+}
+
+const BitVector* MidTierRegisterAllocationData::GetBlocksDominatedBy(
+    const InstructionBlock* block) {
+  return block_state(block->rpo_number()).dominated_blocks();
+}
+
+// RegisterIndex represents a particular register of a given kind (depending
+// on the RegisterKind of the allocator).
+class RegisterIndex final {
+ public:
+  RegisterIndex() : index_(kInvalidIndex) {}
+  explicit RegisterIndex(int index) : index_(index) {}
+  static RegisterIndex Invalid() { return RegisterIndex(); }
+
+  bool is_valid() const { return index_ != kInvalidIndex; }
+
+  int ToInt() const {
+    DCHECK(is_valid());
+    return index_;
+  }
+
+  uintptr_t ToBit(MachineRepresentation rep) const {
+    if (kSimpleFPAliasing || rep != MachineRepresentation::kSimd128) {
+      return 1ull << ToInt();
+    } else {
+      DCHECK_EQ(rep, MachineRepresentation::kSimd128);
+      return 3ull << ToInt();
+    }
+  }
+
+  bool operator==(const RegisterIndex& rhs) const {
+    return index_ == rhs.index_;
+  }
+  bool operator!=(const RegisterIndex& rhs) const {
+    return index_ != rhs.index_;
+  }
+
+  class Iterator {
+   public:
+    explicit Iterator(int index) : index_(index) {}
+
+    bool operator!=(const Iterator& rhs) const { return index_ != rhs.index_; }
+    void operator++() { index_++; }
+    RegisterIndex operator*() const { return RegisterIndex(index_); }
+
+   private:
+    int index_;
+  };
+
+ private:
+  static const int kInvalidIndex = -1;
+  int8_t index_;
+};
+
+// A Range from [start, end] of instructions, inclusive of start and end.
+class Range {
+ public:
+  Range() : start_(kMaxInt), end_(0) {}
+  Range(int start, int end) : start_(start), end_(end) {}
+
+  void AddInstr(int index) {
+    start_ = std::min(start_, index);
+    end_ = std::max(end_, index);
+  }
+
+  void AddRange(const Range& other) {
+    start_ = std::min(start_, other.start_);
+    end_ = std::max(end_, other.end_);
+  }
+
+  // Returns true if index is greater than start and less than or equal to end.
+  bool Contains(int index) { return index >= start_ && index <= end_; }
+
+  int start() const { return start_; }
+  int end() const { return end_; }
+
+ private:
+  int start_;
+  int end_;
+};
+
+// Represents a connected region of deferred basic blocks.
+class DeferredBlocksRegion final {
+ public:
+  explicit DeferredBlocksRegion(Zone* zone, int number_of_blocks)
+      : spilled_vregs_(zone), blocks_covered_(number_of_blocks, zone) {}
+
+  void AddBlock(RpoNumber block, MidTierRegisterAllocationData* data) {
+    DCHECK(data->GetBlock(block)->IsDeferred());
+    blocks_covered_.Add(block.ToInt());
+    data->block_state(block).set_deferred_blocks_region(this);
+  }
+
+  // Adds |vreg| to the list of variables to potentially defer their output to
+  // a spill slot until we enter this deferred block region.
+  void DeferSpillOutputUntilEntry(int vreg) { spilled_vregs_.insert(vreg); }
+
+  ZoneSet<int>::iterator begin() const { return spilled_vregs_.begin(); }
+  ZoneSet<int>::iterator end() const { return spilled_vregs_.end(); }
+
+  const BitVector* blocks_covered() const { return &blocks_covered_; }
+
+ private:
+  ZoneSet<int> spilled_vregs_;
+  BitVector blocks_covered_;
+};
+
+// VirtualRegisterData stores data specific to a particular virtual register,
+// and tracks spilled operands for that virtual register.
+class VirtualRegisterData final {
+ public:
+  VirtualRegisterData() = default;
+
+  // Define VirtualRegisterData with the type of output that produces this
+  // virtual register.
+  void DefineAsUnallocatedOperand(int virtual_register, int instr_index,
+                                  bool is_deferred_block,
+                                  bool is_exceptional_call_output);
+  void DefineAsFixedSpillOperand(AllocatedOperand* operand,
+                                 int virtual_register, int instr_index,
+                                 bool is_deferred_block,
+                                 bool is_exceptional_call_output);
+  void DefineAsConstantOperand(ConstantOperand* operand, int instr_index,
+                               bool is_deferred_block);
+  void DefineAsPhi(int virtual_register, int instr_index,
+                   bool is_deferred_block);
+
+  // Spill an operand that is assigned to this virtual register.
+  void SpillOperand(InstructionOperand* operand, int instr_index,
+                    MidTierRegisterAllocationData* data);
+
+  // Emit gap moves to / from the spill slot.
+  void EmitGapMoveToInputFromSpillSlot(AllocatedOperand to_operand,
+                                       int instr_index,
+                                       MidTierRegisterAllocationData* data);
+  void EmitGapMoveFromOutputToSpillSlot(AllocatedOperand from_operand,
+                                        const InstructionBlock* current_block,
+                                        int instr_index,
+                                        MidTierRegisterAllocationData* data);
+  void EmitGapMoveToSpillSlot(AllocatedOperand from_operand, int instr_index,
+                              MidTierRegisterAllocationData* data);
+
+  // Adds pending spills for deferred-blocks.
+  void AddDeferredSpillUse(int instr_index,
+                           MidTierRegisterAllocationData* data);
+  void AddDeferredSpillOutput(AllocatedOperand allocated_op, int instr_index,
+                              MidTierRegisterAllocationData* data);
+
+  // Accessors for spill operand, which may still be pending allocation.
+  bool HasSpillOperand() const { return spill_operand_ != nullptr; }
+  InstructionOperand* spill_operand() const {
+    DCHECK(HasSpillOperand());
+    return spill_operand_;
+  }
+
+  bool HasPendingSpillOperand() const {
+    return HasSpillOperand() && spill_operand_->IsPending();
+  }
+  bool HasAllocatedSpillOperand() const {
+    return HasSpillOperand() && spill_operand_->IsAllocated();
+  }
+  bool HasConstantSpillOperand() const {
+    DCHECK_EQ(is_constant(), HasSpillOperand() && spill_operand_->IsConstant());
+    return is_constant();
+  }
+
+  // Returns true if the virtual register should be spilled when it is output.
+  bool NeedsSpillAtOutput() const { return needs_spill_at_output_; }
+  void MarkAsNeedsSpillAtOutput() {
+    if (is_constant()) return;
+    needs_spill_at_output_ = true;
+    if (HasSpillRange()) spill_range()->ClearDeferredBlockSpills();
+  }
+
+  // Returns true if the virtual register should be spilled at entry to deferred
+  // blocks in which it is spilled (to avoid spilling on output on
+  // non-deferred blocks).
+  bool NeedsSpillAtDeferredBlocks() const;
+  void EmitDeferredSpillOutputs(MidTierRegisterAllocationData* data);
+
+  bool IsSpilledAt(int instr_index, MidTierRegisterAllocationData* data) {
+    DCHECK_GE(instr_index, output_instr_index());
+    if (NeedsSpillAtOutput() || HasConstantSpillOperand()) return true;
+    if (HasSpillOperand() && data->GetBlock(instr_index)->IsDeferred()) {
+      return true;
+    }
+    return false;
+  }
+
+  // Allocates pending spill operands to the |allocated| spill slot.
+  void AllocatePendingSpillOperand(const AllocatedOperand& allocated);
+
+  int vreg() const { return vreg_; }
+  int output_instr_index() const { return output_instr_index_; }
+  bool is_constant() const { return is_constant_; }
+  bool is_phi() const { return is_phi_; }
+  bool is_defined_in_deferred_block() const {
+    return is_defined_in_deferred_block_;
+  }
+  bool is_exceptional_call_output() const {
+    return is_exceptional_call_output_;
+  }
+
+  struct DeferredSpillSlotOutput {
+   public:
+    explicit DeferredSpillSlotOutput(int instr, AllocatedOperand op,
+                                     const BitVector* blocks)
+        : instr_index(instr), operand(op), live_blocks(blocks) {}
+
+    int instr_index;
+    AllocatedOperand operand;
+    const BitVector* live_blocks;
+  };
+
+  // Represents the range of instructions for which this virtual register needs
+  // to be spilled on the stack.
+  class SpillRange : public ZoneObject {
+   public:
+    // Defines a spill range for an output operand.
+    SpillRange(int definition_instr_index,
+               const InstructionBlock* definition_block,
+               MidTierRegisterAllocationData* data)
+        : live_range_(definition_instr_index, definition_instr_index),
+          live_blocks_(data->GetBlocksDominatedBy(definition_block)),
+          deferred_spill_outputs_(nullptr) {}
+
+    // Defines a spill range for a Phi variable.
+    SpillRange(const InstructionBlock* phi_block,
+               MidTierRegisterAllocationData* data)
+        : live_range_(phi_block->first_instruction_index(),
+                      phi_block->first_instruction_index()),
+          live_blocks_(data->GetBlocksDominatedBy(phi_block)),
+          deferred_spill_outputs_(nullptr) {
+      // For phis, add the gap move instructions in the predecssor blocks to
+      // the live range.
+      for (RpoNumber pred_rpo : phi_block->predecessors()) {
+        const InstructionBlock* block = data->GetBlock(pred_rpo);
+        live_range_.AddInstr(block->last_instruction_index());
+      }
+    }
+
+    SpillRange(const SpillRange&) = delete;
+    SpillRange& operator=(const SpillRange&) = delete;
+
+    bool IsLiveAt(int instr_index, InstructionBlock* block) {
+      if (!live_range_.Contains(instr_index)) return false;
+
+      int block_rpo = block->rpo_number().ToInt();
+      if (!live_blocks_->Contains(block_rpo)) return false;
+
+      if (!HasDeferredBlockSpills()) {
+        return true;
+      } else {
+        // If this spill range is only output for deferred block, then the spill
+        // slot will only be live for the deferred blocks, not all blocks that
+        // the virtual register is live.
+        for (auto deferred_spill_output : *deferred_spill_outputs()) {
+          if (deferred_spill_output.live_blocks->Contains(block_rpo)) {
+            return true;
+          }
+        }
+        return false;
+      }
+    }
+
+    void ExtendRangeTo(int instr_index) { live_range_.AddInstr(instr_index); }
+
+    void AddDeferredSpillOutput(AllocatedOperand allocated_op, int instr_index,
+                                MidTierRegisterAllocationData* data) {
+      if (deferred_spill_outputs_ == nullptr) {
+        Zone* zone = data->allocation_zone();
+        deferred_spill_outputs_ =
+            zone->New<ZoneVector<DeferredSpillSlotOutput>>(zone);
+      }
+      const InstructionBlock* block = data->GetBlock(instr_index);
+      DCHECK_EQ(block->first_instruction_index(), instr_index);
+      BlockState& block_state = data->block_state(block->rpo_number());
+      const BitVector* deferred_blocks =
+          block_state.deferred_blocks_region()->blocks_covered();
+      deferred_spill_outputs_->emplace_back(instr_index, allocated_op,
+                                            deferred_blocks);
+    }
+
+    void ClearDeferredBlockSpills() { deferred_spill_outputs_ = nullptr; }
+    bool HasDeferredBlockSpills() const {
+      return deferred_spill_outputs_ != nullptr;
+    }
+    const ZoneVector<DeferredSpillSlotOutput>* deferred_spill_outputs() const {
+      DCHECK(HasDeferredBlockSpills());
+      return deferred_spill_outputs_;
+    }
+
+    Range& live_range() { return live_range_; }
+
+   private:
+    Range live_range_;
+    const BitVector* live_blocks_;
+    ZoneVector<DeferredSpillSlotOutput>* deferred_spill_outputs_;
+  };
+
+  bool HasSpillRange() const { return spill_range_ != nullptr; }
+  SpillRange* spill_range() const {
+    DCHECK(HasSpillRange());
+    return spill_range_;
+  }
+
+ private:
+  void Initialize(int virtual_register, InstructionOperand* spill_operand,
+                  int instr_index, bool is_phi, bool is_constant,
+                  bool is_defined_in_deferred_block,
+                  bool is_exceptional_call_output);
+
+  void AddSpillUse(int instr_index, MidTierRegisterAllocationData* data);
+  void AddPendingSpillOperand(PendingOperand* pending_operand);
+  void EnsureSpillRange(MidTierRegisterAllocationData* data);
+  bool CouldSpillOnEntryToDeferred(const InstructionBlock* block);
+
+  InstructionOperand* spill_operand_;
+  SpillRange* spill_range_;
+  int output_instr_index_;
+
+  int vreg_;
+  bool is_phi_ : 1;
+  bool is_constant_ : 1;
+  bool is_defined_in_deferred_block_ : 1;
+  bool needs_spill_at_output_ : 1;
+  bool is_exceptional_call_output_ : 1;
+};
+
+VirtualRegisterData& MidTierRegisterAllocationData::VirtualRegisterDataFor(
+    int virtual_register) {
+  DCHECK_GE(virtual_register, 0);
+  DCHECK_LT(virtual_register, virtual_register_data_.size());
+  return virtual_register_data_[virtual_register];
+}
+
+void VirtualRegisterData::Initialize(int virtual_register,
+                                     InstructionOperand* spill_operand,
+                                     int instr_index, bool is_phi,
+                                     bool is_constant,
+                                     bool is_defined_in_deferred_block,
+                                     bool is_exceptional_call_output) {
+  vreg_ = virtual_register;
+  spill_operand_ = spill_operand;
+  spill_range_ = nullptr;
+  output_instr_index_ = instr_index;
+  is_phi_ = is_phi;
+  is_constant_ = is_constant;
+  is_defined_in_deferred_block_ = is_defined_in_deferred_block;
+  needs_spill_at_output_ = !is_constant_ && spill_operand_ != nullptr;
+  is_exceptional_call_output_ = is_exceptional_call_output;
+}
+
+void VirtualRegisterData::DefineAsConstantOperand(ConstantOperand* operand,
+                                                  int instr_index,
+                                                  bool is_deferred_block) {
+  Initialize(operand->virtual_register(), operand, instr_index, false, true,
+             is_deferred_block, false);
+}
+
+void VirtualRegisterData::DefineAsFixedSpillOperand(
+    AllocatedOperand* operand, int virtual_register, int instr_index,
+    bool is_deferred_block, bool is_exceptional_call_output) {
+  Initialize(virtual_register, operand, instr_index, false, false,
+             is_deferred_block, is_exceptional_call_output);
+}
+
+void VirtualRegisterData::DefineAsUnallocatedOperand(
+    int virtual_register, int instr_index, bool is_deferred_block,
+    bool is_exceptional_call_output) {
+  Initialize(virtual_register, nullptr, instr_index, false, false,
+             is_deferred_block, is_exceptional_call_output);
+}
+
+void VirtualRegisterData::DefineAsPhi(int virtual_register, int instr_index,
+                                      bool is_deferred_block) {
+  Initialize(virtual_register, nullptr, instr_index, true, false,
+             is_deferred_block, false);
+}
+
+void VirtualRegisterData::EnsureSpillRange(
+    MidTierRegisterAllocationData* data) {
+  DCHECK(!is_constant());
+  if (HasSpillRange()) return;
+
+  const InstructionBlock* definition_block =
+      data->GetBlock(output_instr_index_);
+  if (is_phi()) {
+    // Define a spill slot that is defined for the phi's range.
+    spill_range_ =
+        data->allocation_zone()->New<SpillRange>(definition_block, data);
+  } else {
+    if (is_exceptional_call_output()) {
+      // If this virtual register is output by a call which has an exception
+      // catch handler, then the output will only be live in the IfSuccess
+      // successor block, not the IfException side, so make the definition block
+      // the IfSuccess successor block explicitly.
+      DCHECK_EQ(output_instr_index_,
+                definition_block->last_instruction_index() - 1);
+      DCHECK_EQ(definition_block->SuccessorCount(), 2);
+      DCHECK(data->GetBlock(definition_block->successors()[1])->IsHandler());
+      definition_block = data->GetBlock(definition_block->successors()[0]);
+    }
+    // The spill slot will be defined after the instruction that outputs it.
+    spill_range_ = data->allocation_zone()->New<SpillRange>(
+        output_instr_index_ + 1, definition_block, data);
+  }
+  data->spilled_virtual_registers().Add(vreg());
+}
+
+void VirtualRegisterData::AddSpillUse(int instr_index,
+                                      MidTierRegisterAllocationData* data) {
+  if (is_constant()) return;
+
+  EnsureSpillRange(data);
+  spill_range_->ExtendRangeTo(instr_index);
+
+  const InstructionBlock* block = data->GetBlock(instr_index);
+  if (CouldSpillOnEntryToDeferred(block)) {
+    data->block_state(block->rpo_number())
+        .deferred_blocks_region()
+        ->DeferSpillOutputUntilEntry(vreg());
+  } else {
+    MarkAsNeedsSpillAtOutput();
+  }
+}
+
+void VirtualRegisterData::AddDeferredSpillUse(
+    int instr_index, MidTierRegisterAllocationData* data) {
+  DCHECK(data->GetBlock(instr_index)->IsDeferred());
+  DCHECK(!is_defined_in_deferred_block());
+  AddSpillUse(instr_index, data);
+}
+
+bool VirtualRegisterData::CouldSpillOnEntryToDeferred(
+    const InstructionBlock* block) {
+  return !NeedsSpillAtOutput() && block->IsDeferred() &&
+         !is_defined_in_deferred_block() && !is_constant();
+}
+
+void VirtualRegisterData::AddDeferredSpillOutput(
+    AllocatedOperand allocated_op, int instr_index,
+    MidTierRegisterAllocationData* data) {
+  DCHECK(!NeedsSpillAtOutput());
+  spill_range_->AddDeferredSpillOutput(allocated_op, instr_index, data);
+}
+
+void VirtualRegisterData::SpillOperand(InstructionOperand* operand,
+                                       int instr_index,
+                                       MidTierRegisterAllocationData* data) {
+  AddSpillUse(instr_index, data);
+  if (HasAllocatedSpillOperand() || HasConstantSpillOperand()) {
+    InstructionOperand::ReplaceWith(operand, spill_operand());
+  } else {
+    PendingOperand pending_op;
+    InstructionOperand::ReplaceWith(operand, &pending_op);
+    AddPendingSpillOperand(PendingOperand::cast(operand));
+  }
+}
+
+bool VirtualRegisterData::NeedsSpillAtDeferredBlocks() const {
+  return HasSpillRange() && spill_range()->HasDeferredBlockSpills();
+}
+
+void VirtualRegisterData::EmitDeferredSpillOutputs(
+    MidTierRegisterAllocationData* data) {
+  DCHECK(NeedsSpillAtDeferredBlocks());
+  for (auto deferred_spill : *spill_range()->deferred_spill_outputs()) {
+    EmitGapMoveToSpillSlot(deferred_spill.operand, deferred_spill.instr_index,
+                           data);
+  }
+}
+
+void VirtualRegisterData::EmitGapMoveToInputFromSpillSlot(
+    AllocatedOperand to_operand, int instr_index,
+    MidTierRegisterAllocationData* data) {
+  AddSpillUse(instr_index, data);
+  DCHECK(!to_operand.IsPending());
+  if (HasAllocatedSpillOperand() || HasConstantSpillOperand()) {
+    data->AddGapMove(instr_index, Instruction::END, *spill_operand(),
+                     to_operand);
+  } else {
+    MoveOperands* move_ops =
+        data->AddPendingOperandGapMove(instr_index, Instruction::END);
+    AddPendingSpillOperand(PendingOperand::cast(&move_ops->source()));
+    InstructionOperand::ReplaceWith(&move_ops->destination(), &to_operand);
+  }
+}
+
+void VirtualRegisterData::EmitGapMoveToSpillSlot(
+    AllocatedOperand from_operand, int instr_index,
+    MidTierRegisterAllocationData* data) {
+  AddSpillUse(instr_index, data);
+  if (HasAllocatedSpillOperand() || HasConstantSpillOperand()) {
+    data->AddGapMove(instr_index, Instruction::START, from_operand,
+                     *spill_operand());
+  } else {
+    MoveOperands* move_ops =
+        data->AddPendingOperandGapMove(instr_index, Instruction::START);
+    InstructionOperand::ReplaceWith(&move_ops->source(), &from_operand);
+    AddPendingSpillOperand(PendingOperand::cast(&move_ops->destination()));
+  }
+}
+
+void VirtualRegisterData::EmitGapMoveFromOutputToSpillSlot(
+    AllocatedOperand from_operand, const InstructionBlock* current_block,
+    int instr_index, MidTierRegisterAllocationData* data) {
+  DCHECK_EQ(data->GetBlock(instr_index), current_block);
+  if (instr_index == current_block->last_instruction_index()) {
+    // Add gap move to the first instruction of every successor block.
+    for (const RpoNumber& succ : current_block->successors()) {
+      const InstructionBlock* successor = data->GetBlock(succ);
+      DCHECK_EQ(1, successor->PredecessorCount());
+      EmitGapMoveToSpillSlot(from_operand, successor->first_instruction_index(),
+                             data);
+    }
+  } else {
+    // Add gap move to the next instruction.
+    EmitGapMoveToSpillSlot(from_operand, instr_index + 1, data);
+  }
+}
+
+void VirtualRegisterData::AddPendingSpillOperand(PendingOperand* pending_op) {
+  DCHECK(HasSpillRange());
+  DCHECK_NULL(pending_op->next());
+  if (HasSpillOperand()) {
+    pending_op->set_next(PendingOperand::cast(spill_operand()));
+  }
+  spill_operand_ = pending_op;
+}
+
+void VirtualRegisterData::AllocatePendingSpillOperand(
+    const AllocatedOperand& allocated) {
+  DCHECK(!HasAllocatedSpillOperand() && !HasConstantSpillOperand());
+  PendingOperand* current = PendingOperand::cast(spill_operand_);
+  while (current) {
+    PendingOperand* next = current->next();
+    InstructionOperand::ReplaceWith(current, &allocated);
+    current = next;
+  }
+}
+
+// RegisterState represents the state of the |kind| registers at a particular
+// point in program execution. The RegisterState can be cloned or merged with
+// other RegisterStates to model branches and merges in program control flow.
+class RegisterState final : public ZoneObject {
+ public:
+  static RegisterState* New(RegisterKind kind, int num_allocatable_registers,
+                            Zone* zone) {
+    return zone->New<RegisterState>(kind, num_allocatable_registers, zone);
+  }
+
+  RegisterState(RegisterKind kind, int num_allocatable_registers, Zone* zone);
+  RegisterState(const RegisterState& other) V8_NOEXCEPT;
+
+  bool IsAllocated(RegisterIndex reg);
+  bool IsShared(RegisterIndex reg);
+  int VirtualRegisterForRegister(RegisterIndex reg);
+
+  // Commit the |reg| with the |allocated| operand.
+  void Commit(RegisterIndex reg, AllocatedOperand allocated,
+              InstructionOperand* operand, MidTierRegisterAllocationData* data);
+
+  // Spill the contents of |reg| for an instruction in |current_block| using
+  // the |allocated| operand to commit the spill gap move.
+  void Spill(RegisterIndex reg, AllocatedOperand allocated,
+             const InstructionBlock* current_block,
+             MidTierRegisterAllocationData* data);
+
+  // Add a pending spill of the contents of |reg| at the exit point of a
+  // deferred block at |instr_index| using |allocated| operand to commit the
+  // spill gap move, if the register never gets spilled in a non-deferred block.
+  void SpillForDeferred(RegisterIndex reg, AllocatedOperand allocated,
+                        int instr_index, MidTierRegisterAllocationData* data);
+
+  // Add a pending gap move from |reg| to |virtual_register|'s spill at the
+  // entry point of a deferred block at |instr_index|, if the |virtual_register|
+  // never spilled in a non-deferred block.
+  void MoveToSpillSlotOnDeferred(RegisterIndex reg, int virtual_register,
+                                 int instr_index,
+                                 MidTierRegisterAllocationData* data);
+
+  // Allocate |reg| to |virtual_register| for the instruction at |instr_index|.
+  // If the register is later spilled, a gap move will be added immediately
+  // before |instr_index| to move |virtual_register| into this register.
+  void AllocateUse(RegisterIndex reg, int virtual_register,
+                   InstructionOperand* operand, int instr_index,
+                   MidTierRegisterAllocationData* data);
+
+  // Allocate |reg| as a pending use of |virtual_register| for |operand| in the
+  // instruction at |instr_index|. If |virtual_register| later gets committed to
+  // this register, then |operand| will be too, otherwise |operand| will be
+  // replaced with |virtual_register|'s spill operand.
+  void AllocatePendingUse(RegisterIndex reg, int virtual_register,
+                          InstructionOperand* operand, int instr_index);
+
+  // Mark that the register is holding a phi operand that is yet to be allocated
+  // by the source block in the gap just before the last instruction in the
+  // source block.
+  void UseForPhiGapMove(RegisterIndex reg);
+  bool IsPhiGapMove(RegisterIndex reg);
+
+  // Returns true if |reg| only has pending uses allocated to it.
+  bool HasPendingUsesOnly(RegisterIndex reg);
+
+  // Clone this RegisterState for a successor block.
+  RegisterState* Clone();
+
+  // Copy register details for |reg| from |source| to |this| RegisterState.
+  void CopyFrom(RegisterIndex reg, RegisterState* source);
+
+  // Returns true if the register details for |reg| are equal in |source| and
+  // |this| RegisterStates.
+  bool Equals(RegisterIndex reg, RegisterState* source);
+
+  // Signals that the registers in this state are going to be shared across
+  // |shared_use_count| blocks.
+  void AddSharedUses(int shared_use_count);
+
+  // When merging multiple block's RegisterState into the successor block with
+  // |this| RegisterState, commit |reg| as being merged from a given predecessor
+  // block.
+  void CommitAtMerge(RegisterIndex reg);
+
+  // Resets |reg| if it has register data that was shared with other basic
+  // blocks and was spilled in those blocks.
+  void ResetIfSpilledWhileShared(RegisterIndex reg);
+
+  // Enable range-based for on allocatable register indices.
+  RegisterIndex::Iterator begin() const { return RegisterIndex::Iterator(0); }
+  RegisterIndex::Iterator end() const {
+    return RegisterIndex::Iterator(num_allocatable_registers());
+  }
+
+ private:
+  // Represents a particular register and details of what virtual_register it is
+  // currently holding, and how it should be updated if committed or spilled.
+  class Register final : public ZoneObject {
+   public:
+    Register();
+    void Reset();
+
+    // Operations for committing, spilling and allocating uses of the register.
+    void Commit(AllocatedOperand allocated_operand,
+                MidTierRegisterAllocationData* data);
+    void Spill(AllocatedOperand allocated_op,
+               const InstructionBlock* current_block,
+               MidTierRegisterAllocationData* data);
+    void Use(int virtual_register, int instr_index);
+    void PendingUse(InstructionOperand* operand, int virtual_register,
+                    int instr_index);
+    void SpillForDeferred(AllocatedOperand allocated, int instr_index,
+                          MidTierRegisterAllocationData* data);
+    void MoveToSpillSlotOnDeferred(int virtual_register, int instr_index,
+                                   MidTierRegisterAllocationData* data);
+
+    // Mark register as holding a phi.
+    void MarkAsPhiMove();
+    bool is_phi_gap_move() const { return is_phi_gap_move_; }
+
+    // The register has deferred block spills, that will be emitted if the
+    // register is committed without having been spilled in a non-deferred block
+    void AddDeferredBlockSpill(int instr_index, bool on_exit, Zone* zone);
+    bool has_deferred_block_spills() const {
+      return deferred_block_spills_.has_value();
+    }
+
+    // Operations related to dealing with a Register that is shared across
+    // multiple basic blocks.
+    void CommitAtMerge();
+    void AddSharedUses(int shared_use_count);
+    bool is_shared() const { return is_shared_; }
+    bool was_spilled_while_shared() const {
+      return is_shared() && !is_allocated();
+    }
+
+    bool is_allocated() const {
+      return virtual_register_ != InstructionOperand::kInvalidVirtualRegister;
+    }
+
+    // The current virtual register held by this register.
+    int virtual_register() const { return virtual_register_; }
+
+    // The instruction index for the last use of the current in-progress
+    // allocation of this register in the instruction stream. Used both
+    // as the instruction too add a gap move if |needs_gap_move_on_spill| and
+    // the intruction which the virtual register's spill range should be
+    // extended too if the register is spilled.
+    int last_use_instr_index() const { return last_use_instr_index_; }
+
+    // Returns true if a gap move should be added if the register is spilled.
+    bool needs_gap_move_on_spill() const { return needs_gap_move_on_spill_; }
+
+    // Returns a threaded list of the operands that have pending uses of this
+    // register and will be resolved either to the register, or a spill slot
+    // depending on whether this register is spilled or committed.
+    PendingOperand* pending_uses() const { return pending_uses_; }
+
+   private:
+    struct DeferredBlockSpill {
+      DeferredBlockSpill(int instr, bool on_exit)
+          : instr_index(instr), on_deferred_exit(on_exit) {}
+
+      int instr_index;
+      bool on_deferred_exit;
+    };
+
+    void SpillPendingUses(MidTierRegisterAllocationData* data);
+    void SpillPhiGapMove(AllocatedOperand allocated_op,
+                         const InstructionBlock* block,
+                         MidTierRegisterAllocationData* data);
+
+    bool needs_gap_move_on_spill_;
+    bool is_shared_;
+    bool is_phi_gap_move_;
+    int last_use_instr_index_;
+
+    int num_commits_required_;
+    int virtual_register_;
+    PendingOperand* pending_uses_;
+    base::Optional<ZoneVector<DeferredBlockSpill>> deferred_block_spills_;
+  };
+
+  void ResetDataFor(RegisterIndex reg);
+
+  bool HasRegisterData(RegisterIndex reg);
+  void EnsureRegisterData(RegisterIndex reg);
+
+  int num_allocatable_registers() const {
+    return static_cast<int>(register_data_.size());
+  }
+  Register& reg_data(RegisterIndex reg);
+  Zone* zone() const { return zone_; }
+
+  ZoneVector<Register*> register_data_;
+  Zone* zone_;
+};
+
+RegisterState::Register::Register() { Reset(); }
+
+void RegisterState::Register::Reset() {
+  is_shared_ = false;
+  is_phi_gap_move_ = false;
+  needs_gap_move_on_spill_ = false;
+  last_use_instr_index_ = -1;
+  num_commits_required_ = 0;
+  virtual_register_ = InstructionOperand::kInvalidVirtualRegister;
+  pending_uses_ = nullptr;
+  deferred_block_spills_.reset();
+}
+
+void RegisterState::Register::Use(int virtual_register, int instr_index) {
+  // A register can have many pending uses, but should only ever have a single
+  // non-pending use, since any subsiquent use will commit the preceeding use
+  // first.
+  DCHECK(!is_allocated());
+  needs_gap_move_on_spill_ = true;
+  virtual_register_ = virtual_register;
+  last_use_instr_index_ = instr_index;
+  num_commits_required_ = 1;
+}
+
+void RegisterState::Register::PendingUse(InstructionOperand* operand,
+                                         int virtual_register,
+                                         int instr_index) {
+  if (!is_allocated()) {
+    virtual_register_ = virtual_register;
+    last_use_instr_index_ = instr_index;
+    num_commits_required_ = 1;
+  }
+  DCHECK_EQ(virtual_register_, virtual_register);
+
+  PendingOperand pending_op(pending_uses());
+  InstructionOperand::ReplaceWith(operand, &pending_op);
+  pending_uses_ = PendingOperand::cast(operand);
+}
+
+void RegisterState::Register::MarkAsPhiMove() {
+  DCHECK(is_allocated());
+  is_phi_gap_move_ = true;
+}
+
+void RegisterState::Register::AddDeferredBlockSpill(int instr_index,
+                                                    bool on_exit, Zone* zone) {
+  DCHECK(is_allocated());
+  if (!deferred_block_spills_) {
+    deferred_block_spills_.emplace(zone);
+  }
+  deferred_block_spills_->emplace_back(instr_index, on_exit);
+}
+
+void RegisterState::Register::AddSharedUses(int shared_use_count) {
+  is_shared_ = true;
+  num_commits_required_ += shared_use_count;
+}
+
+void RegisterState::Register::CommitAtMerge() {
+  DCHECK(is_shared());
+  DCHECK(is_allocated());
+  --num_commits_required_;
+  // We should still have commits required that will be resolved in the merge
+  // block.
+  DCHECK_GT(num_commits_required_, 0);
+}
+
+void RegisterState::Register::Commit(AllocatedOperand allocated_op,
+                                     MidTierRegisterAllocationData* data) {
+  DCHECK(is_allocated());
+  DCHECK_GT(num_commits_required_, 0);
+
+  if (--num_commits_required_ == 0) {
+    // Allocate all pending uses to |allocated_op| if this commit is non-shared,
+    // or if it is the final commit required on a register data shared across
+    // blocks.
+    PendingOperand* pending_use = pending_uses();
+    while (pending_use) {
+      PendingOperand* next = pending_use->next();
+      InstructionOperand::ReplaceWith(pending_use, &allocated_op);
+      pending_use = next;
+    }
+    pending_uses_ = nullptr;
+
+    VirtualRegisterData& vreg_data =
+        data->VirtualRegisterDataFor(virtual_register());
+
+    // If there are deferred block gap moves pending, emit them now that the
+    // register has been committed.
+    if (has_deferred_block_spills()) {
+      for (DeferredBlockSpill& spill : *deferred_block_spills_) {
+        if (spill.on_deferred_exit) {
+          vreg_data.EmitGapMoveToInputFromSpillSlot(allocated_op,
+                                                    spill.instr_index, data);
+        } else if (!vreg_data.NeedsSpillAtOutput()) {
+          vreg_data.AddDeferredSpillOutput(allocated_op, spill.instr_index,
+                                           data);
+        }
+      }
+    }
+
+    // If this register was used as a phi gap move, then it being commited
+    // is the point at which we have output the Phi.
+    if (is_phi_gap_move() && vreg_data.NeedsSpillAtDeferredBlocks()) {
+      vreg_data.EmitDeferredSpillOutputs(data);
+    }
+  }
+  DCHECK_IMPLIES(num_commits_required_ > 0, is_shared());
+}
+
+void RegisterState::Register::Spill(AllocatedOperand allocated_op,
+                                    const InstructionBlock* current_block,
+                                    MidTierRegisterAllocationData* data) {
+  VirtualRegisterData& vreg_data =
+      data->VirtualRegisterDataFor(virtual_register());
+  SpillPendingUses(data);
+  if (is_phi_gap_move()) {
+    SpillPhiGapMove(allocated_op, current_block, data);
+  }
+  if (needs_gap_move_on_spill()) {
+    vreg_data.EmitGapMoveToInputFromSpillSlot(allocated_op,
+                                              last_use_instr_index(), data);
+  }
+  if (has_deferred_block_spills() || !current_block->IsDeferred()) {
+    vreg_data.MarkAsNeedsSpillAtOutput();
+  }
+  virtual_register_ = InstructionOperand::kInvalidVirtualRegister;
+}
+
+void RegisterState::Register::SpillPhiGapMove(
+    AllocatedOperand allocated_op, const InstructionBlock* current_block,
+    MidTierRegisterAllocationData* data) {
+  DCHECK_EQ(current_block->SuccessorCount(), 1);
+  const InstructionBlock* phi_block =
+      data->GetBlock(current_block->successors()[0]);
+
+  // Add gap moves to the spilled phi for all blocks we previously allocated
+  // assuming the the phi was in a register.
+  VirtualRegisterData& vreg_data =
+      data->VirtualRegisterDataFor(virtual_register());
+  for (RpoNumber predecessor : phi_block->predecessors()) {
+    // If the predecessor has a lower rpo number than the current block, then
+    // we have already processed it, so add the required gap move.
+    if (predecessor > current_block->rpo_number()) {
+      const InstructionBlock* predecessor_block = data->GetBlock(predecessor);
+      vreg_data.EmitGapMoveToSpillSlot(
+          allocated_op, predecessor_block->last_instruction_index(), data);
+    }
+  }
+}
+
+void RegisterState::Register::SpillPendingUses(
+    MidTierRegisterAllocationData* data) {
+  VirtualRegisterData& vreg_data =
+      data->VirtualRegisterDataFor(virtual_register());
+  PendingOperand* pending_use = pending_uses();
+  while (pending_use) {
+    // Spill all the pending operands associated with this register.
+    PendingOperand* next = pending_use->next();
+    vreg_data.SpillOperand(pending_use, last_use_instr_index(), data);
+    pending_use = next;
+  }
+  pending_uses_ = nullptr;
+}
+
+void RegisterState::Register::SpillForDeferred(
+    AllocatedOperand allocated, int instr_index,
+    MidTierRegisterAllocationData* data) {
+  DCHECK(is_allocated());
+  DCHECK(is_shared());
+  // Add a pending deferred spill, then commit the register (with the commit
+  // being fullfilled by the deferred spill if the register is fully commited).
+  data->VirtualRegisterDataFor(virtual_register())
+      .AddDeferredSpillUse(instr_index, data);
+  AddDeferredBlockSpill(instr_index, true, data->allocation_zone());
+  Commit(allocated, data);
+}
+
+void RegisterState::Register::MoveToSpillSlotOnDeferred(
+    int virtual_register, int instr_index,
+    MidTierRegisterAllocationData* data) {
+  if (!is_allocated()) {
+    virtual_register_ = virtual_register;
+    last_use_instr_index_ = instr_index;
+    num_commits_required_ = 1;
+  }
+  AddDeferredBlockSpill(instr_index, false, data->allocation_zone());
+}
+
+RegisterState::RegisterState(RegisterKind kind, int num_allocatable_registers,
+                             Zone* zone)
+    : register_data_(num_allocatable_registers, zone), zone_(zone) {}
+
+RegisterState::RegisterState(const RegisterState& other) V8_NOEXCEPT
+    : register_data_(other.register_data_.begin(), other.register_data_.end(),
+                     other.zone_),
+      zone_(other.zone_) {}
+
+int RegisterState::VirtualRegisterForRegister(RegisterIndex reg) {
+  if (IsAllocated(reg)) {
+    return reg_data(reg).virtual_register();
+  } else {
+    return InstructionOperand::kInvalidVirtualRegister;
+  }
+}
+
+bool RegisterState::IsPhiGapMove(RegisterIndex reg) {
+  DCHECK(IsAllocated(reg));
+  return reg_data(reg).is_phi_gap_move();
+}
+
+void RegisterState::Commit(RegisterIndex reg, AllocatedOperand allocated,
+                           InstructionOperand* operand,
+                           MidTierRegisterAllocationData* data) {
+  InstructionOperand::ReplaceWith(operand, &allocated);
+  if (IsAllocated(reg)) {
+    reg_data(reg).Commit(allocated, data);
+    ResetDataFor(reg);
+  }
+}
+
+void RegisterState::Spill(RegisterIndex reg, AllocatedOperand allocated,
+                          const InstructionBlock* current_block,
+                          MidTierRegisterAllocationData* data) {
+  DCHECK(IsAllocated(reg));
+  reg_data(reg).Spill(allocated, current_block, data);
+  ResetDataFor(reg);
+}
+
+void RegisterState::SpillForDeferred(RegisterIndex reg,
+                                     AllocatedOperand allocated,
+                                     int instr_index,
+                                     MidTierRegisterAllocationData* data) {
+  DCHECK(IsAllocated(reg));
+  reg_data(reg).SpillForDeferred(allocated, instr_index, data);
+  ResetDataFor(reg);
+}
+
+void RegisterState::MoveToSpillSlotOnDeferred(
+    RegisterIndex reg, int virtual_register, int instr_index,
+    MidTierRegisterAllocationData* data) {
+  EnsureRegisterData(reg);
+  reg_data(reg).MoveToSpillSlotOnDeferred(virtual_register, instr_index, data);
+}
+
+void RegisterState::AllocateUse(RegisterIndex reg, int virtual_register,
+                                InstructionOperand* operand, int instr_index,
+                                MidTierRegisterAllocationData* data) {
+  EnsureRegisterData(reg);
+  reg_data(reg).Use(virtual_register, instr_index);
+}
+
+void RegisterState::AllocatePendingUse(RegisterIndex reg, int virtual_register,
+                                       InstructionOperand* operand,
+                                       int instr_index) {
+  EnsureRegisterData(reg);
+  reg_data(reg).PendingUse(operand, virtual_register, instr_index);
+}
+
+void RegisterState::UseForPhiGapMove(RegisterIndex reg) {
+  DCHECK(IsAllocated(reg));
+  reg_data(reg).MarkAsPhiMove();
+}
+
+RegisterState::Register& RegisterState::reg_data(RegisterIndex reg) {
+  DCHECK(HasRegisterData(reg));
+  return *register_data_[reg.ToInt()];
+}
+
+bool RegisterState::IsShared(RegisterIndex reg) {
+  return HasRegisterData(reg) && reg_data(reg).is_shared();
+}
+
+bool RegisterState::IsAllocated(RegisterIndex reg) {
+  return HasRegisterData(reg) && reg_data(reg).is_allocated();
+}
+
+bool RegisterState::HasPendingUsesOnly(RegisterIndex reg) {
+  DCHECK(IsAllocated(reg));
+  return !reg_data(reg).needs_gap_move_on_spill();
+}
+
+void RegisterState::ResetDataFor(RegisterIndex reg) {
+  DCHECK(HasRegisterData(reg));
+  if (reg_data(reg).is_shared()) {
+    register_data_[reg.ToInt()] = nullptr;
+  } else {
+    reg_data(reg).Reset();
+  }
+}
+
+bool RegisterState::HasRegisterData(RegisterIndex reg) {
+  DCHECK_LT(reg.ToInt(), register_data_.size());
+  return register_data_[reg.ToInt()] != nullptr;
+}
+
+void RegisterState::EnsureRegisterData(RegisterIndex reg) {
+  if (!HasRegisterData(reg)) {
+    register_data_[reg.ToInt()] = zone()->New<RegisterState::Register>();
+  }
+}
+
+void RegisterState::ResetIfSpilledWhileShared(RegisterIndex reg) {
+  if (HasRegisterData(reg) && reg_data(reg).was_spilled_while_shared()) {
+    ResetDataFor(reg);
+  }
+}
+
+void RegisterState::CommitAtMerge(RegisterIndex reg) {
+  DCHECK(IsAllocated(reg));
+  reg_data(reg).CommitAtMerge();
+}
+
+void RegisterState::CopyFrom(RegisterIndex reg, RegisterState* source) {
+  register_data_[reg.ToInt()] = source->register_data_[reg.ToInt()];
+}
+
+bool RegisterState::Equals(RegisterIndex reg, RegisterState* other) {
+  return register_data_[reg.ToInt()] == other->register_data_[reg.ToInt()];
+}
+
+void RegisterState::AddSharedUses(int shared_use_count) {
+  for (RegisterIndex reg : *this) {
+    if (HasRegisterData(reg)) {
+      reg_data(reg).AddSharedUses(shared_use_count);
+    }
+  }
+}
+
+RegisterState* RegisterState::Clone() {
+  return zone_->New<RegisterState>(*this);
+}
+
+class RegisterBitVector {
+ public:
+  RegisterBitVector() : bits_(0) {}
+
+  bool Contains(RegisterIndex reg, MachineRepresentation rep) const {
+    return bits_ & reg.ToBit(rep);
+  }
+
+  RegisterIndex GetFirstSet() const {
+    return RegisterIndex(base::bits::CountTrailingZeros(bits_));
+  }
+
+  RegisterIndex GetFirstCleared(int max_reg) const {
+    int reg_index = base::bits::CountTrailingZeros(~bits_);
+    if (reg_index < max_reg) {
+      return RegisterIndex(reg_index);
+    } else {
+      return RegisterIndex::Invalid();
+    }
+  }
+
+  void Add(RegisterIndex reg, MachineRepresentation rep) {
+    bits_ |= reg.ToBit(rep);
+  }
+
+  void Clear(RegisterIndex reg, MachineRepresentation rep) {
+    bits_ &= ~reg.ToBit(rep);
+  }
+
+  RegisterBitVector Union(const RegisterBitVector& other) {
+    return RegisterBitVector(bits_ | other.bits_);
+  }
+
+  void Reset() { bits_ = 0; }
+  bool IsEmpty() const { return bits_ == 0; }
+
+ private:
+  explicit RegisterBitVector(uintptr_t bits) : bits_(bits) {}
+
+  static_assert(RegisterConfiguration::kMaxRegisters <= sizeof(uintptr_t) * 8,
+                "Maximum registers must fit in uintptr_t bitmap");
+  uintptr_t bits_;
+};
+
+// A SinglePassRegisterAllocator is a fast register allocator that does a single
+// pass through the instruction stream without performing any live-range
+// analysis beforehand. It deals with a single RegisterKind, either general or
+// double registers, with the MidTierRegisterAllocator choosing the correct
+// SinglePassRegisterAllocator based on a values representation.
+class SinglePassRegisterAllocator final {
+ public:
+  SinglePassRegisterAllocator(RegisterKind kind,
+                              MidTierRegisterAllocationData* data);
+
+  // Convert to / from a register code and a register index.
+  RegisterIndex FromRegCode(int reg_code, MachineRepresentation rep) const;
+  int ToRegCode(RegisterIndex index, MachineRepresentation rep) const;
+
+  // Allocation routines used to allocate a particular operand to either a
+  // register or a spill slot.
+  void AllocateConstantOutput(ConstantOperand* operand);
+  void AllocateOutput(UnallocatedOperand* operand, int instr_index);
+  void AllocateInput(UnallocatedOperand* operand, int instr_index);
+  void AllocateSameInputOutput(UnallocatedOperand* output,
+                               UnallocatedOperand* input, int instr_index);
+  void AllocateGapMoveInput(UnallocatedOperand* operand, int instr_index);
+  void AllocateTemp(UnallocatedOperand* operand, int instr_index);
+  void AllocatePhi(int virtual_register, const InstructionBlock* block);
+  void AllocatePhiGapMove(int to_vreg, int from_vreg, int instr_index);
+
+  // Reserve any fixed registers for the operands on an instruction before doing
+  // allocation on the operands.
+  void ReserveFixedInputRegister(const UnallocatedOperand* operand,
+                                 int instr_index);
+  void ReserveFixedTempRegister(const UnallocatedOperand* operand,
+                                int instr_index);
+  void ReserveFixedOutputRegister(const UnallocatedOperand* operand,
+                                  int instr_index);
+
+  // Spills all registers that are currently holding data, for example, due to
+  // an instruction that clobbers all registers.
+  void SpillAllRegisters();
+
+  // Inform the allocator that we are starting / ending a block or ending
+  // allocation for the current instruction.
+  void StartBlock(const InstructionBlock* block);
+  void EndBlock(const InstructionBlock* block);
+  void EndInstruction();
+
+  void UpdateForDeferredBlock(int instr_index);
+  void AllocateDeferredBlockSpillOutput(int instr_index,
+                                        RpoNumber deferred_block,
+                                        int virtual_register);
+
+  RegisterKind kind() const { return kind_; }
+  BitVector* assigned_registers() const { return assigned_registers_; }
+
+ private:
+  enum class UsePosition {
+    // Operand used at start of instruction.
+    kStart,
+    // Operand used at end of instruction.
+    kEnd,
+    // Operand is used at both the start and end of instruction.
+    kAll,
+    // Operand is not used in the instruction (used when initializing register
+    // state on block entry).
+    kNone,
+  };
+
+  // The allocator is initialized without any RegisterState by default to avoid
+  // having to allocate per-block allocator state for functions that don't
+  // allocate registers of a particular type. All allocation functions should
+  // call EnsureRegisterState to allocate a RegisterState if necessary.
+  void EnsureRegisterState();
+
+  // Clone the register state from |successor| into the current register state.
+  void CloneStateFrom(RpoNumber successor);
+
+  // Merge the register state of |successors| into the current register state.
+  void MergeStateFrom(const InstructionBlock::Successors& successors);
+
+  // Spill a register in a previously processed successor block when merging
+  // state into the current block.
+  void SpillRegisterAtMerge(RegisterState* reg_state, RegisterIndex reg);
+
+  // Introduce a gap move to move |virtual_register| from reg |from| to reg |to|
+  // on entry to a |successor| block.
+  void MoveRegisterOnMerge(RegisterIndex from, RegisterIndex to,
+                           int virtual_register, RpoNumber successor,
+                           RegisterState* succ_state);
+
+  // Update the virtual register data with the data in register_state()
+  void UpdateVirtualRegisterState();
+
+  // Returns true if |virtual_register| is defined after use position |pos| at
+  // |instr_index|.
+  bool DefinedAfter(int virtual_register, int instr_index, UsePosition pos);
+
+  // Allocate |reg| to |virtual_register| for |operand| of the instruction at
+  // |instr_index|. The register will be reserved for this use for the specified
+  // |pos| use position.
+  void AllocateUse(RegisterIndex reg, int virtual_register,
+                   InstructionOperand* operand, int instr_index,
+                   UsePosition pos);
+
+  // Allocate |reg| to |virtual_register| as a pending use (i.e., only if the
+  // register is not subsequently spilled) for |operand| of the instruction at
+  // |instr_index|.
+  void AllocatePendingUse(RegisterIndex reg, int virtual_register,
+                          InstructionOperand* operand, int instr_index);
+
+  // Allocate |operand| to |reg| and add a gap move to move |virtual_register|
+  // to this register for the instruction at |instr_index|. |reg| will be
+  // reserved for this use for the specified |pos| use position.
+  void AllocateUseWithMove(RegisterIndex reg, int virtual_register,
+                           UnallocatedOperand* operand, int instr_index,
+                           UsePosition pos);
+
+  void CommitRegister(RegisterIndex reg, int virtual_register,
+                      InstructionOperand* operand, UsePosition pos);
+  void SpillRegister(RegisterIndex reg);
+  void SpillRegisterForVirtualRegister(int virtual_register);
+
+  // Pre-emptively spill the register at the exit of deferred blocks such that
+  // uses of this register in non-deferred blocks don't need to be spilled.
+  void SpillRegisterForDeferred(RegisterIndex reg, int instr_index);
+
+  // Returns an AllocatedOperand corresponding to the use of |reg| for
+  // |virtual_register|.
+  AllocatedOperand AllocatedOperandForReg(RegisterIndex reg,
+                                          int virtual_register);
+
+  void ReserveFixedRegister(const UnallocatedOperand* operand, int instr_index,
+                            UsePosition pos);
+  RegisterIndex AllocateOutput(UnallocatedOperand* operand, int instr_index,
+                               UsePosition pos);
+  void EmitGapMoveFromOutput(InstructionOperand from, InstructionOperand to,
+                             int instr_index);
+
+  // Helper functions to choose the best register for a given operand.
+  V8_INLINE RegisterIndex
+  ChooseRegisterFor(VirtualRegisterData& virtual_register, int instr_index,
+                    UsePosition pos, bool must_use_register);
+  V8_INLINE RegisterIndex ChooseRegisterFor(MachineRepresentation rep,
+                                            UsePosition pos,
+                                            bool must_use_register);
+  V8_INLINE RegisterIndex ChooseFreeRegister(MachineRepresentation rep,
+                                             UsePosition pos);
+  V8_INLINE RegisterIndex ChooseFreeRegister(
+      const RegisterBitVector& allocated_regs, MachineRepresentation rep);
+  V8_INLINE RegisterIndex ChooseRegisterToSpill(MachineRepresentation rep,
+                                                UsePosition pos);
+
+  // Assign, free and mark use's of |reg| for a |virtual_register| at use
+  // position |pos|.
+  V8_INLINE void AssignRegister(RegisterIndex reg, int virtual_register,
+                                UsePosition pos);
+  V8_INLINE void FreeRegister(RegisterIndex reg, int virtual_register);
+  V8_INLINE void MarkRegisterUse(RegisterIndex reg, MachineRepresentation rep,
+                                 UsePosition pos);
+  V8_INLINE RegisterBitVector InUseBitmap(UsePosition pos);
+  V8_INLINE bool IsValidForRep(RegisterIndex reg, MachineRepresentation rep);
+
+  // Return the register allocated to |virtual_register|, if any.
+  RegisterIndex RegisterForVirtualRegister(int virtual_register);
+  // Return the virtual register being held by |reg|, or kInvalidVirtualRegister
+  // if |reg| is unallocated.
+  int VirtualRegisterForRegister(RegisterIndex reg);
+
+  // Returns true if |reg| is unallocated or holds |virtual_register|.
+  bool IsFreeOrSameVirtualRegister(RegisterIndex reg, int virtual_register);
+  // Returns true if |virtual_register| is unallocated or is allocated to |reg|.
+  bool VirtualRegisterIsUnallocatedOrInReg(int virtual_register,
+                                           RegisterIndex reg);
+
+  // Returns a RegisterBitVector representing the allocated registers in
+  // reg_state.
+  RegisterBitVector GetAllocatedRegBitVector(RegisterState* reg_state);
+
+  // Check the consistency of reg->vreg and vreg->reg mappings if a debug build.
+  void CheckConsistency();
+
+  bool HasRegisterState() const { return register_state_; }
+  RegisterState* register_state() const {
+    DCHECK(HasRegisterState());
+    return register_state_;
+  }
+
+  VirtualRegisterData& VirtualRegisterDataFor(int virtual_register) const {
+    return data()->VirtualRegisterDataFor(virtual_register);
+  }
+
+  MachineRepresentation RepresentationFor(int virtual_register) const {
+    return data()->RepresentationFor(virtual_register);
+  }
+
+  int num_allocatable_registers() const { return num_allocatable_registers_; }
+  const InstructionBlock* current_block() const { return current_block_; }
+  MidTierRegisterAllocationData* data() const { return data_; }
+
+  // Virtual register to register mapping.
+  ZoneVector<RegisterIndex> virtual_register_to_reg_;
+
+  // Current register state during allocation.
+  RegisterState* register_state_;
+
+  // The current block being processed.
+  const InstructionBlock* current_block_;
+
+  const RegisterKind kind_;
+  const int num_allocatable_registers_;
+  ZoneVector<RegisterIndex> reg_code_to_index_;
+  const int* index_to_reg_code_;
+  BitVector* assigned_registers_;
+
+  MidTierRegisterAllocationData* data_;
+
+  RegisterBitVector in_use_at_instr_start_bits_;
+  RegisterBitVector in_use_at_instr_end_bits_;
+  RegisterBitVector allocated_registers_bits_;
+
+  // These fields are only used when kSimpleFPAliasing == false.
+  base::Optional<ZoneVector<RegisterIndex>> float32_reg_code_to_index_;
+  base::Optional<ZoneVector<int>> index_to_float32_reg_code_;
+  base::Optional<ZoneVector<RegisterIndex>> simd128_reg_code_to_index_;
+  base::Optional<ZoneVector<int>> index_to_simd128_reg_code_;
+};
+
+SinglePassRegisterAllocator::SinglePassRegisterAllocator(
+    RegisterKind kind, MidTierRegisterAllocationData* data)
+    : virtual_register_to_reg_(data->code()->VirtualRegisterCount(),
+                               data->allocation_zone()),
+      register_state_(nullptr),
+      current_block_(nullptr),
+      kind_(kind),
+      num_allocatable_registers_(
+          GetAllocatableRegisterCount(data->config(), kind)),
+      reg_code_to_index_(GetRegisterCount(data->config(), kind),
+                         data->allocation_zone()),
+      index_to_reg_code_(GetAllocatableRegisterCodes(data->config(), kind)),
+      assigned_registers_(data->code_zone()->New<BitVector>(
+          GetRegisterCount(data->config(), kind), data->code_zone())),
+      data_(data),
+      in_use_at_instr_start_bits_(),
+      in_use_at_instr_end_bits_(),
+      allocated_registers_bits_() {
+  for (int i = 0; i < num_allocatable_registers_; i++) {
+    int reg_code = index_to_reg_code_[i];
+    reg_code_to_index_[reg_code] = RegisterIndex(i);
+  }
+
+  // If the architecture has non-simple FP aliasing, initialize float and
+  // simd128 specific register details.
+  if (!kSimpleFPAliasing && kind == RegisterKind::kDouble) {
+    const RegisterConfiguration* config = data->config();
+
+    //  Float registers.
+    float32_reg_code_to_index_.emplace(config->num_float_registers(),
+                                       data->allocation_zone());
+    index_to_float32_reg_code_.emplace(num_allocatable_registers_, -1,
+                                       data->allocation_zone());
+    for (int i = 0; i < config->num_allocatable_float_registers(); i++) {
+      int reg_code = config->allocatable_float_codes()[i];
+      // Only add even float register codes to avoid overlapping multiple float
+      // registers on each RegisterIndex.
+      if (reg_code % 2 != 0) continue;
+      int double_reg_base_code;
+      CHECK_EQ(1, config->GetAliases(MachineRepresentation::kFloat32, reg_code,
+                                     MachineRepresentation::kFloat64,
+                                     &double_reg_base_code));
+      RegisterIndex double_reg(reg_code_to_index_[double_reg_base_code]);
+      float32_reg_code_to_index_->at(reg_code) = double_reg;
+      index_to_float32_reg_code_->at(double_reg.ToInt()) = reg_code;
+    }
+
+    //  Simd128 registers.
+    simd128_reg_code_to_index_.emplace(config->num_simd128_registers(),
+                                       data->allocation_zone());
+    index_to_simd128_reg_code_.emplace(num_allocatable_registers_, -1,
+                                       data->allocation_zone());
+    for (int i = 0; i < config->num_allocatable_simd128_registers(); i++) {
+      int reg_code = config->allocatable_simd128_codes()[i];
+      int double_reg_base_code;
+      CHECK_EQ(2, config->GetAliases(MachineRepresentation::kSimd128, reg_code,
+                                     MachineRepresentation::kFloat64,
+                                     &double_reg_base_code));
+      RegisterIndex double_reg(reg_code_to_index_[double_reg_base_code]);
+      simd128_reg_code_to_index_->at(reg_code) = double_reg;
+      index_to_simd128_reg_code_->at(double_reg.ToInt()) = reg_code;
+    }
+  }
+}
+
+int SinglePassRegisterAllocator::VirtualRegisterForRegister(RegisterIndex reg) {
+  return register_state()->VirtualRegisterForRegister(reg);
+}
+
+RegisterIndex SinglePassRegisterAllocator::RegisterForVirtualRegister(
+    int virtual_register) {
+  DCHECK_NE(virtual_register, InstructionOperand::kInvalidVirtualRegister);
+  return virtual_register_to_reg_[virtual_register];
+}
+
+void SinglePassRegisterAllocator::UpdateForDeferredBlock(int instr_index) {
+  if (!HasRegisterState()) return;
+  for (RegisterIndex reg : *register_state()) {
+    SpillRegisterForDeferred(reg, instr_index);
+  }
+}
+
+void SinglePassRegisterAllocator::EndInstruction() {
+  in_use_at_instr_end_bits_.Reset();
+  in_use_at_instr_start_bits_.Reset();
+}
+
+void SinglePassRegisterAllocator::StartBlock(const InstructionBlock* block) {
+  DCHECK(!HasRegisterState());
+  DCHECK_NULL(current_block_);
+  DCHECK(in_use_at_instr_start_bits_.IsEmpty());
+  DCHECK(in_use_at_instr_end_bits_.IsEmpty());
+  DCHECK(allocated_registers_bits_.IsEmpty());
+
+  // Update the current block we are processing.
+  current_block_ = block;
+
+  if (block->SuccessorCount() == 1) {
+    // If we have only a single successor, we can directly clone our state
+    // from that successor.
+    CloneStateFrom(block->successors()[0]);
+  } else if (block->SuccessorCount() > 1) {
+    // If we have multiple successors, merge the state from all the successors
+    // into our block.
+    MergeStateFrom(block->successors());
+  }
+}
+
+void SinglePassRegisterAllocator::EndBlock(const InstructionBlock* block) {
+  DCHECK(in_use_at_instr_start_bits_.IsEmpty());
+  DCHECK(in_use_at_instr_end_bits_.IsEmpty());
+
+  // If we didn't allocate any registers of this kind, or we have reached the
+  // start, nothing to do here.
+  if (!HasRegisterState() || block->PredecessorCount() == 0) {
+    current_block_ = nullptr;
+    return;
+  }
+
+  if (block->PredecessorCount() > 1) {
+    register_state()->AddSharedUses(
+        static_cast<int>(block->PredecessorCount()) - 1);
+  }
+
+  BlockState& block_state = data()->block_state(block->rpo_number());
+  block_state.set_register_in_state(register_state(), kind());
+
+  // Remove virtual register to register mappings and clear register state.
+  // We will update the register state when starting the next block.
+  while (!allocated_registers_bits_.IsEmpty()) {
+    RegisterIndex reg = allocated_registers_bits_.GetFirstSet();
+    FreeRegister(reg, VirtualRegisterForRegister(reg));
+  }
+  current_block_ = nullptr;
+  register_state_ = nullptr;
+}
+
+void SinglePassRegisterAllocator::CloneStateFrom(RpoNumber successor) {
+  BlockState& block_state = data()->block_state(successor);
+  RegisterState* successor_registers = block_state.register_in_state(kind());
+  if (successor_registers != nullptr) {
+    if (data()->GetBlock(successor)->PredecessorCount() == 1) {
+      // Avoids cloning for successors where we are the only predecessor.
+      register_state_ = successor_registers;
+    } else {
+      register_state_ = successor_registers->Clone();
+    }
+    UpdateVirtualRegisterState();
+  }
+}
+
+void SinglePassRegisterAllocator::MergeStateFrom(
+    const InstructionBlock::Successors& successors) {
+  for (RpoNumber successor : successors) {
+    BlockState& block_state = data()->block_state(successor);
+    RegisterState* successor_registers = block_state.register_in_state(kind());
+    if (successor_registers == nullptr) {
+      continue;
+    }
+
+    if (register_state_ == nullptr) {
+      // If we haven't merged any register state yet, just use successor's
+      // register directly.
+      register_state_ = successor_registers;
+      UpdateVirtualRegisterState();
+    } else {
+      // Otherwise try to merge our state with the existing state.
+      RegisterBitVector processed_regs;
+      RegisterBitVector succ_allocated_regs =
+          GetAllocatedRegBitVector(successor_registers);
+      for (RegisterIndex reg : *successor_registers) {
+        // If |reg| isn't allocated in successor registers, nothing to do.
+        if (!successor_registers->IsAllocated(reg)) continue;
+
+        int virtual_register =
+            successor_registers->VirtualRegisterForRegister(reg);
+        MachineRepresentation rep = RepresentationFor(virtual_register);
+
+        // If we have already processed |reg|, e.g., adding gap move to that
+        // register, then we can continue.
+        if (processed_regs.Contains(reg, rep)) continue;
+        processed_regs.Add(reg, rep);
+
+        if (register_state()->IsAllocated(reg)) {
+          if (successor_registers->Equals(reg, register_state())) {
+            // Both match, keep the merged register data.
+            register_state()->CommitAtMerge(reg);
+          } else {
+            // Try to find a new register for this successor register in the
+            // merge block, and add a gap move on entry of the successor block.
+            RegisterIndex new_reg =
+                RegisterForVirtualRegister(virtual_register);
+            if (!new_reg.is_valid()) {
+              new_reg = ChooseFreeRegister(
+                  allocated_registers_bits_.Union(succ_allocated_regs), rep);
+            } else if (new_reg != reg) {
+              // Spill the |new_reg| in the successor block to be able to use it
+              // for this gap move. It would be spilled anyway since it contains
+              // a different virtual register than the merge block.
+              SpillRegisterAtMerge(successor_registers, new_reg);
+            }
+
+            if (new_reg.is_valid()) {
+              MoveRegisterOnMerge(new_reg, reg, virtual_register, successor,
+                                  successor_registers);
+              processed_regs.Add(new_reg, rep);
+            } else {
+              SpillRegisterAtMerge(successor_registers, reg);
+            }
+          }
+        } else {
+          DCHECK(successor_registers->IsAllocated(reg));
+          if (RegisterForVirtualRegister(virtual_register).is_valid()) {
+            // If we already hold the virtual register in a different register
+            // then spill this register in the sucessor block to avoid
+            // invalidating the 1:1 vreg<->reg mapping.
+            // TODO(rmcilroy): Add a gap move to avoid spilling.
+            SpillRegisterAtMerge(successor_registers, reg);
+          } else {
+            // Register is free in our current register state, so merge the
+            // successor block's register details into it.
+            register_state()->CopyFrom(reg, successor_registers);
+            AssignRegister(reg, virtual_register, UsePosition::kNone);
+          }
+        }
+      }
+    }
+  }
+}
+
+RegisterBitVector SinglePassRegisterAllocator::GetAllocatedRegBitVector(
+    RegisterState* reg_state) {
+  RegisterBitVector allocated_regs;
+  for (RegisterIndex reg : *reg_state) {
+    if (reg_state->IsAllocated(reg)) {
+      int virtual_register = reg_state->VirtualRegisterForRegister(reg);
+      allocated_regs.Add(reg, RepresentationFor(virtual_register));
+    }
+  }
+  return allocated_regs;
+}
+
+void SinglePassRegisterAllocator::SpillRegisterAtMerge(RegisterState* reg_state,
+                                                       RegisterIndex reg) {
+  DCHECK_NE(reg_state, register_state());
+  if (reg_state->IsAllocated(reg)) {
+    int virtual_register = reg_state->VirtualRegisterForRegister(reg);
+    AllocatedOperand allocated = AllocatedOperandForReg(reg, virtual_register);
+    reg_state->Spill(reg, allocated, current_block(), data());
+  }
+}
+
+void SinglePassRegisterAllocator::MoveRegisterOnMerge(
+    RegisterIndex from, RegisterIndex to, int virtual_register,
+    RpoNumber successor, RegisterState* succ_state) {
+  int instr_index = data()->GetBlock(successor)->first_instruction_index();
+  MoveOperands* move =
+      data()->AddPendingOperandGapMove(instr_index, Instruction::START);
+  succ_state->Commit(to, AllocatedOperandForReg(to, virtual_register),
+                     &move->destination(), data());
+  AllocatePendingUse(from, virtual_register, &move->source(), instr_index);
+}
+
+void SinglePassRegisterAllocator::UpdateVirtualRegisterState() {
+  // Update to the new register state and update vreg_to_register map and
+  // resetting any shared registers that were spilled by another block.
+  DCHECK(HasRegisterState());
+  for (RegisterIndex reg : *register_state()) {
+    register_state()->ResetIfSpilledWhileShared(reg);
+    int virtual_register = VirtualRegisterForRegister(reg);
+    if (virtual_register != InstructionOperand::kInvalidVirtualRegister) {
+      AssignRegister(reg, virtual_register, UsePosition::kNone);
+    }
+  }
+  CheckConsistency();
+}
+
+void SinglePassRegisterAllocator::CheckConsistency() {
+#ifdef DEBUG
+  for (int virtual_register = 0;
+       virtual_register < data()->code()->VirtualRegisterCount();
+       virtual_register++) {
+    RegisterIndex reg = RegisterForVirtualRegister(virtual_register);
+    if (reg.is_valid()) {
+      CHECK_EQ(virtual_register, VirtualRegisterForRegister(reg));
+      CHECK(allocated_registers_bits_.Contains(
+          reg, RepresentationFor(virtual_register)));
+    }
+  }
+
+  for (RegisterIndex reg : *register_state()) {
+    int virtual_register = VirtualRegisterForRegister(reg);
+    if (virtual_register != InstructionOperand::kInvalidVirtualRegister) {
+      CHECK_EQ(reg, RegisterForVirtualRegister(virtual_register));
+      CHECK(allocated_registers_bits_.Contains(
+          reg, RepresentationFor(virtual_register)));
+    }
+  }
+#endif
+}
+
+RegisterIndex SinglePassRegisterAllocator::FromRegCode(
+    int reg_code, MachineRepresentation rep) const {
+  if (!kSimpleFPAliasing && kind() == RegisterKind::kDouble) {
+    if (rep == MachineRepresentation::kFloat32) {
+      return RegisterIndex(float32_reg_code_to_index_->at(reg_code));
+    } else if (rep == MachineRepresentation::kSimd128) {
+      return RegisterIndex(simd128_reg_code_to_index_->at(reg_code));
+    }
+    DCHECK_EQ(rep, MachineRepresentation::kFloat64);
+  }
+
+  return RegisterIndex(reg_code_to_index_[reg_code]);
+}
+
+int SinglePassRegisterAllocator::ToRegCode(RegisterIndex reg,
+                                           MachineRepresentation rep) const {
+  if (!kSimpleFPAliasing && kind() == RegisterKind::kDouble) {
+    if (rep == MachineRepresentation::kFloat32) {
+      DCHECK_NE(-1, index_to_float32_reg_code_->at(reg.ToInt()));
+      return index_to_float32_reg_code_->at(reg.ToInt());
+    } else if (rep == MachineRepresentation::kSimd128) {
+      DCHECK_NE(-1, index_to_simd128_reg_code_->at(reg.ToInt()));
+      return index_to_simd128_reg_code_->at(reg.ToInt());
+    }
+    DCHECK_EQ(rep, MachineRepresentation::kFloat64);
+  }
+  return index_to_reg_code_[reg.ToInt()];
+}
+
+bool SinglePassRegisterAllocator::VirtualRegisterIsUnallocatedOrInReg(
+    int virtual_register, RegisterIndex reg) {
+  RegisterIndex existing_reg = RegisterForVirtualRegister(virtual_register);
+  return !existing_reg.is_valid() || existing_reg == reg;
+}
+
+bool SinglePassRegisterAllocator::IsFreeOrSameVirtualRegister(
+    RegisterIndex reg, int virtual_register) {
+  int allocated_vreg = VirtualRegisterForRegister(reg);
+  return allocated_vreg == InstructionOperand::kInvalidVirtualRegister ||
+         allocated_vreg == virtual_register;
+}
+
+void SinglePassRegisterAllocator::EmitGapMoveFromOutput(InstructionOperand from,
+                                                        InstructionOperand to,
+                                                        int instr_index) {
+  DCHECK(from.IsAllocated());
+  DCHECK(to.IsAllocated());
+  const InstructionBlock* block = current_block();
+  DCHECK_EQ(data()->GetBlock(instr_index), block);
+  if (instr_index == block->last_instruction_index()) {
+    // Add gap move to the first instruction of every successor block.
+    for (const RpoNumber succ : block->successors()) {
+      const InstructionBlock* successor = data()->GetBlock(succ);
+      DCHECK_EQ(1, successor->PredecessorCount());
+      data()->AddGapMove(successor->first_instruction_index(),
+                         Instruction::START, from, to);
+    }
+  } else {
+    data()->AddGapMove(instr_index + 1, Instruction::START, from, to);
+  }
+}
+
+void SinglePassRegisterAllocator::AssignRegister(RegisterIndex reg,
+                                                 int virtual_register,
+                                                 UsePosition pos) {
+  MachineRepresentation rep = RepresentationFor(virtual_register);
+  assigned_registers()->Add(ToRegCode(reg, rep));
+  allocated_registers_bits_.Add(reg, rep);
+  MarkRegisterUse(reg, rep, pos);
+  if (virtual_register != InstructionOperand::kInvalidVirtualRegister) {
+    virtual_register_to_reg_[virtual_register] = reg;
+  }
+}
+
+void SinglePassRegisterAllocator::MarkRegisterUse(RegisterIndex reg,
+                                                  MachineRepresentation rep,
+                                                  UsePosition pos) {
+  if (pos == UsePosition::kStart || pos == UsePosition::kAll) {
+    in_use_at_instr_start_bits_.Add(reg, rep);
+  }
+  if (pos == UsePosition::kEnd || pos == UsePosition::kAll) {
+    in_use_at_instr_end_bits_.Add(reg, rep);
+  }
+}
+
+void SinglePassRegisterAllocator::FreeRegister(RegisterIndex reg,
+                                               int virtual_register) {
+  allocated_registers_bits_.Clear(reg, RepresentationFor(virtual_register));
+  if (virtual_register != InstructionOperand::kInvalidVirtualRegister) {
+    virtual_register_to_reg_[virtual_register] = RegisterIndex::Invalid();
+  }
+}
+
+RegisterIndex SinglePassRegisterAllocator::ChooseRegisterFor(
+    VirtualRegisterData& virtual_register, int instr_index, UsePosition pos,
+    bool must_use_register) {
+  // If register is already allocated to the virtual register, use that.
+  RegisterIndex reg = RegisterForVirtualRegister(virtual_register.vreg());
+
+  // If we don't need a register, only try to allocate one if the virtual
+  // register hasn't yet been spilled, to try to avoid spilling it.
+  if (!reg.is_valid() && (must_use_register ||
+                          !virtual_register.IsSpilledAt(instr_index, data()))) {
+    reg = ChooseRegisterFor(RepresentationFor(virtual_register.vreg()), pos,
+                            must_use_register);
+  }
+  return reg;
+}
+
+RegisterIndex SinglePassRegisterAllocator::ChooseRegisterFor(
+    MachineRepresentation rep, UsePosition pos, bool must_use_register) {
+  RegisterIndex reg = ChooseFreeRegister(rep, pos);
+  if (!reg.is_valid() && must_use_register) {
+    reg = ChooseRegisterToSpill(rep, pos);
+    SpillRegister(reg);
+  }
+  return reg;
+}
+
+RegisterBitVector SinglePassRegisterAllocator::InUseBitmap(UsePosition pos) {
+  switch (pos) {
+    case UsePosition::kStart:
+      return in_use_at_instr_start_bits_;
+    case UsePosition::kEnd:
+      return in_use_at_instr_end_bits_;
+    case UsePosition::kAll:
+      return in_use_at_instr_start_bits_.Union(in_use_at_instr_end_bits_);
+    case UsePosition::kNone:
+      UNREACHABLE();
+  }
+}
+
+bool SinglePassRegisterAllocator::IsValidForRep(RegisterIndex reg,
+                                                MachineRepresentation rep) {
+  if (kSimpleFPAliasing || kind() == RegisterKind::kGeneral) {
+    return true;
+  } else {
+    switch (rep) {
+      case MachineRepresentation::kFloat32:
+        return index_to_float32_reg_code_->at(reg.ToInt()) != -1;
+      case MachineRepresentation::kFloat64:
+        return true;
+      case MachineRepresentation::kSimd128:
+        return index_to_simd128_reg_code_->at(reg.ToInt()) != -1;
+      default:
+        UNREACHABLE();
+    }
+  }
+}
+
+RegisterIndex SinglePassRegisterAllocator::ChooseFreeRegister(
+    MachineRepresentation rep, UsePosition pos) {
+  // Take the first free, non-blocked register, if available.
+  // TODO(rmcilroy): Consider a better heuristic.
+  RegisterBitVector allocated_or_in_use =
+      InUseBitmap(pos).Union(allocated_registers_bits_);
+  return ChooseFreeRegister(allocated_or_in_use, rep);
+}
+
+RegisterIndex SinglePassRegisterAllocator::ChooseFreeRegister(
+    const RegisterBitVector& allocated_regs, MachineRepresentation rep) {
+  RegisterIndex chosen_reg = RegisterIndex::Invalid();
+  if (kSimpleFPAliasing || kind() == RegisterKind::kGeneral) {
+    chosen_reg = allocated_regs.GetFirstCleared(num_allocatable_registers());
+  } else {
+    // If we don't have simple fp aliasing, we need to check each register
+    // individually to get one with the required representation.
+    for (RegisterIndex reg : *register_state()) {
+      if (IsValidForRep(reg, rep) && !allocated_regs.Contains(reg, rep)) {
+        chosen_reg = reg;
+        break;
+      }
+    }
+  }
+
+  DCHECK_IMPLIES(chosen_reg.is_valid(), IsValidForRep(chosen_reg, rep));
+  return chosen_reg;
+}
+
+RegisterIndex SinglePassRegisterAllocator::ChooseRegisterToSpill(
+    MachineRepresentation rep, UsePosition pos) {
+  RegisterBitVector in_use = InUseBitmap(pos);
+
+  // Choose a register that will need to be spilled. Preferentially choose:
+  //  - A register with only pending uses, to avoid having to add a gap move for
+  //    a non-pending use.
+  //  - A register holding a virtual register that has already been spilled, to
+  //    avoid adding a new gap move to spill the virtual register when it is
+  //    output.
+  //  - Prefer the register holding the virtual register with the earliest
+  //    definition point, since it is more likely to be spilled anyway.
+  RegisterIndex chosen_reg;
+  int earliest_definition = kMaxInt;
+  bool pending_only_use = false;
+  bool already_spilled = false;
+  for (RegisterIndex reg : *register_state()) {
+    // Skip if register is in use, or not valid for representation.
+    if (!IsValidForRep(reg, rep) || in_use.Contains(reg, rep)) continue;
+
+    VirtualRegisterData& vreg_data =
+        VirtualRegisterDataFor(VirtualRegisterForRegister(reg));
+    if ((!pending_only_use && register_state()->HasPendingUsesOnly(reg)) ||
+        (!already_spilled && vreg_data.HasSpillOperand()) ||
+        vreg_data.output_instr_index() < earliest_definition) {
+      chosen_reg = reg;
+      earliest_definition = vreg_data.output_instr_index();
+      pending_only_use = register_state()->HasPendingUsesOnly(reg);
+      already_spilled = vreg_data.HasSpillOperand();
+    }
+  }
+
+  // There should always be an unblocked register available.
+  DCHECK(chosen_reg.is_valid());
+  DCHECK(IsValidForRep(chosen_reg, rep));
+  return chosen_reg;
+}
+
+void SinglePassRegisterAllocator::CommitRegister(RegisterIndex reg,
+                                                 int virtual_register,
+                                                 InstructionOperand* operand,
+                                                 UsePosition pos) {
+  // Committing the output operation, and mark the register use in this
+  // instruction, then mark it as free going forward.
+  AllocatedOperand allocated = AllocatedOperandForReg(reg, virtual_register);
+  register_state()->Commit(reg, allocated, operand, data());
+  MarkRegisterUse(reg, RepresentationFor(virtual_register), pos);
+  FreeRegister(reg, virtual_register);
+  CheckConsistency();
+}
+
+void SinglePassRegisterAllocator::SpillRegister(RegisterIndex reg) {
+  if (!register_state()->IsAllocated(reg)) return;
+
+  // Spill the register and free register.
+  int virtual_register = VirtualRegisterForRegister(reg);
+  AllocatedOperand allocated = AllocatedOperandForReg(reg, virtual_register);
+  register_state()->Spill(reg, allocated, current_block(), data());
+  FreeRegister(reg, virtual_register);
+}
+
+void SinglePassRegisterAllocator::SpillAllRegisters() {
+  if (!HasRegisterState()) return;
+
+  for (RegisterIndex reg : *register_state()) {
+    SpillRegister(reg);
+  }
+}
+
+void SinglePassRegisterAllocator::SpillRegisterForVirtualRegister(
+    int virtual_register) {
+  DCHECK_NE(virtual_register, InstructionOperand::kInvalidVirtualRegister);
+  RegisterIndex reg = RegisterForVirtualRegister(virtual_register);
+  if (reg.is_valid()) {
+    SpillRegister(reg);
+  }
+}
+
+void SinglePassRegisterAllocator::SpillRegisterForDeferred(RegisterIndex reg,
+                                                           int instr_index) {
+  // Committing the output operation, and mark the register use in this
+  // instruction, then mark it as free going forward.
+  if (register_state()->IsAllocated(reg) && register_state()->IsShared(reg)) {
+    int virtual_register = VirtualRegisterForRegister(reg);
+    AllocatedOperand allocated = AllocatedOperandForReg(reg, virtual_register);
+    register_state()->SpillForDeferred(reg, allocated, instr_index, data());
+    FreeRegister(reg, virtual_register);
+  }
+  CheckConsistency();
+}
+
+void SinglePassRegisterAllocator::AllocateDeferredBlockSpillOutput(
+    int instr_index, RpoNumber deferred_block, int virtual_register) {
+  DCHECK(data()->GetBlock(deferred_block)->IsDeferred());
+  VirtualRegisterData& vreg_data =
+      data()->VirtualRegisterDataFor(virtual_register);
+  if (!vreg_data.NeedsSpillAtOutput() &&
+      !DefinedAfter(virtual_register, instr_index, UsePosition::kEnd)) {
+    // If a register has been assigned to the virtual register, and the virtual
+    // register still doesn't need to be spilled at it's output, and add a
+    // pending move to output the virtual register to it's spill slot on entry
+    // of the deferred block (to avoid spilling on in non-deferred code).
+    // TODO(rmcilroy): Consider assigning a register even if the virtual
+    // register isn't yet assigned - currently doing this regresses performance.
+    RegisterIndex reg = RegisterForVirtualRegister(virtual_register);
+    if (reg.is_valid()) {
+      int deferred_block_start =
+          data()->GetBlock(deferred_block)->first_instruction_index();
+      register_state()->MoveToSpillSlotOnDeferred(reg, virtual_register,
+                                                  deferred_block_start, data());
+      return;
+    } else {
+      vreg_data.MarkAsNeedsSpillAtOutput();
+    }
+  }
+}
+
+AllocatedOperand SinglePassRegisterAllocator::AllocatedOperandForReg(
+    RegisterIndex reg, int virtual_register) {
+  MachineRepresentation rep = RepresentationFor(virtual_register);
+  return AllocatedOperand(AllocatedOperand::REGISTER, rep, ToRegCode(reg, rep));
+}
+
+void SinglePassRegisterAllocator::AllocateUse(RegisterIndex reg,
+                                              int virtual_register,
+                                              InstructionOperand* operand,
+                                              int instr_index,
+                                              UsePosition pos) {
+  DCHECK_NE(virtual_register, InstructionOperand::kInvalidVirtualRegister);
+  DCHECK(IsFreeOrSameVirtualRegister(reg, virtual_register));
+
+  AllocatedOperand allocated = AllocatedOperandForReg(reg, virtual_register);
+  register_state()->Commit(reg, allocated, operand, data());
+  register_state()->AllocateUse(reg, virtual_register, operand, instr_index,
+                                data());
+  AssignRegister(reg, virtual_register, pos);
+  CheckConsistency();
+}
+
+void SinglePassRegisterAllocator::AllocatePendingUse(
+    RegisterIndex reg, int virtual_register, InstructionOperand* operand,
+    int instr_index) {
+  DCHECK_NE(virtual_register, InstructionOperand::kInvalidVirtualRegister);
+  DCHECK(IsFreeOrSameVirtualRegister(reg, virtual_register));
+
+  register_state()->AllocatePendingUse(reg, virtual_register, operand,
+                                       instr_index);
+  // Since this is a pending use and the operand doesn't need to use a register,
+  // allocate with UsePosition::kNone to avoid blocking it's use by other
+  // operands in this instruction.
+  AssignRegister(reg, virtual_register, UsePosition::kNone);
+  CheckConsistency();
+}
+
+void SinglePassRegisterAllocator::AllocateUseWithMove(
+    RegisterIndex reg, int virtual_register, UnallocatedOperand* operand,
+    int instr_index, UsePosition pos) {
+  AllocatedOperand to = AllocatedOperandForReg(reg, virtual_register);
+  UnallocatedOperand from = UnallocatedOperand(
+      UnallocatedOperand::REGISTER_OR_SLOT, virtual_register);
+  data()->AddGapMove(instr_index, Instruction::END, from, to);
+  InstructionOperand::ReplaceWith(operand, &to);
+  MarkRegisterUse(reg, RepresentationFor(virtual_register), pos);
+  CheckConsistency();
+}
+
+void SinglePassRegisterAllocator::AllocateInput(UnallocatedOperand* operand,
+                                                int instr_index) {
+  EnsureRegisterState();
+  int virtual_register = operand->virtual_register();
+  MachineRepresentation rep = RepresentationFor(virtual_register);
+  VirtualRegisterData& vreg_data = VirtualRegisterDataFor(virtual_register);
+
+  // Spill slot policy operands.
+  if (operand->HasFixedSlotPolicy()) {
+    // If the operand is from a fixed slot, allocate it to that fixed slot,
+    // then add a gap move from an unconstrained copy of that input operand,
+    // and spill the gap move's input operand.
+    // TODO(rmcilroy): We could allocate a register for the gap move however
+    // we would need to wait until we've done all the allocations for the
+    // instruction since the allocation needs to reflect the state before
+    // the instruction (at the gap move). For now spilling is fine since
+    // fixed slot inputs are uncommon.
+    UnallocatedOperand input_copy(UnallocatedOperand::REGISTER_OR_SLOT,
+                                  virtual_register);
+    AllocatedOperand allocated = AllocatedOperand(
+        AllocatedOperand::STACK_SLOT, rep, operand->fixed_slot_index());
+    InstructionOperand::ReplaceWith(operand, &allocated);
+    MoveOperands* move_op =
+        data()->AddGapMove(instr_index, Instruction::END, input_copy, *operand);
+    vreg_data.SpillOperand(&move_op->source(), instr_index, data());
+    return;
+  } else if (operand->HasSlotPolicy()) {
+    vreg_data.SpillOperand(operand, instr_index, data());
+    return;
+  }
+
+  // Otherwise try to allocate a register for the operation.
+  UsePosition pos =
+      operand->IsUsedAtStart() ? UsePosition::kStart : UsePosition::kAll;
+  if (operand->HasFixedRegisterPolicy() ||
+      operand->HasFixedFPRegisterPolicy()) {
+    // With a fixed register operand, we must use that register.
+    RegisterIndex reg = FromRegCode(operand->fixed_register_index(), rep);
+    if (!VirtualRegisterIsUnallocatedOrInReg(virtual_register, reg)) {
+      // If the virtual register is already in a different register, then just
+      // add a gap move from that register to the fixed register.
+      AllocateUseWithMove(reg, virtual_register, operand, instr_index, pos);
+    } else {
+      // Otherwise allocate a use of the fixed register for |virtual_register|.
+      AllocateUse(reg, virtual_register, operand, instr_index, pos);
+    }
+  } else {
+    bool must_use_register = operand->HasRegisterPolicy() ||
+                             (vreg_data.is_constant() &&
+                              !operand->HasRegisterOrSlotOrConstantPolicy());
+    RegisterIndex reg =
+        ChooseRegisterFor(vreg_data, instr_index, pos, must_use_register);
+
+    if (reg.is_valid()) {
+      if (must_use_register) {
+        AllocateUse(reg, virtual_register, operand, instr_index, pos);
+      } else {
+        AllocatePendingUse(reg, virtual_register, operand, instr_index);
+      }
+    } else {
+      vreg_data.SpillOperand(operand, instr_index, data());
+    }
+  }
+}
+
+void SinglePassRegisterAllocator::AllocateGapMoveInput(
+    UnallocatedOperand* operand, int instr_index) {
+  EnsureRegisterState();
+  int virtual_register = operand->virtual_register();
+  VirtualRegisterData& vreg_data = VirtualRegisterDataFor(virtual_register);
+
+  // Gap move inputs should be unconstrained.
+  DCHECK(operand->HasRegisterOrSlotPolicy());
+  RegisterIndex reg =
+      ChooseRegisterFor(vreg_data, instr_index, UsePosition::kStart, false);
+  if (reg.is_valid()) {
+    AllocatePendingUse(reg, virtual_register, operand, instr_index);
+  } else {
+    vreg_data.SpillOperand(operand, instr_index, data());
+  }
+}
+
+void SinglePassRegisterAllocator::AllocateConstantOutput(
+    ConstantOperand* operand) {
+  EnsureRegisterState();
+  // If the constant is allocated to a register, spill it now to add the
+  // necessary gap moves from the constant operand to the register.
+  int virtual_register = operand->virtual_register();
+  SpillRegisterForVirtualRegister(virtual_register);
+}
+
+void SinglePassRegisterAllocator::AllocateOutput(UnallocatedOperand* operand,
+                                                 int instr_index) {
+  AllocateOutput(operand, instr_index, UsePosition::kEnd);
+}
+
+RegisterIndex SinglePassRegisterAllocator::AllocateOutput(
+    UnallocatedOperand* operand, int instr_index, UsePosition pos) {
+  EnsureRegisterState();
+  int virtual_register = operand->virtual_register();
+  VirtualRegisterData& vreg_data = VirtualRegisterDataFor(virtual_register);
+
+  RegisterIndex reg;
+  if (operand->HasSlotPolicy() || operand->HasFixedSlotPolicy()) {
+    // We can't allocate a register for output given the policy, so make sure
+    // to spill the register holding this virtual register if any.
+    SpillRegisterForVirtualRegister(virtual_register);
+    reg = RegisterIndex::Invalid();
+  } else if (operand->HasFixedPolicy()) {
+    reg = FromRegCode(operand->fixed_register_index(),
+                      RepresentationFor(virtual_register));
+  } else {
+    reg = ChooseRegisterFor(vreg_data, instr_index, pos,
+                            operand->HasRegisterPolicy());
+  }
+
+  // TODO(rmcilroy): support secondary storage.
+  if (!reg.is_valid()) {
+    vreg_data.SpillOperand(operand, instr_index, data());
+  } else {
+    InstructionOperand move_output_to;
+    if (!VirtualRegisterIsUnallocatedOrInReg(virtual_register, reg)) {
+      // If the |virtual register| was in a different register (e.g., due to
+      // the output having a fixed register), then commit its use in that
+      // register here, and move it from the output operand below.
+      RegisterIndex existing_reg = RegisterForVirtualRegister(virtual_register);
+      // Don't mark |existing_reg| as used in this instruction, since it is used
+      // in the (already allocated) following instruction's gap-move.
+      CommitRegister(existing_reg, virtual_register, &move_output_to,
+                     UsePosition::kNone);
+    }
+    CommitRegister(reg, virtual_register, operand, pos);
+    if (move_output_to.IsAllocated()) {
+      // Emit a move from output to the register that the |virtual_register| was
+      // allocated to.
+      EmitGapMoveFromOutput(*operand, move_output_to, instr_index);
+    }
+    if (vreg_data.NeedsSpillAtOutput()) {
+      vreg_data.EmitGapMoveFromOutputToSpillSlot(
+          *AllocatedOperand::cast(operand), current_block(), instr_index,
+          data());
+    } else if (vreg_data.NeedsSpillAtDeferredBlocks()) {
+      vreg_data.EmitDeferredSpillOutputs(data());
+    }
+  }
+
+  return reg;
+}
+
+void SinglePassRegisterAllocator::AllocateSameInputOutput(
+    UnallocatedOperand* output, UnallocatedOperand* input, int instr_index) {
+  EnsureRegisterState();
+  int input_vreg = input->virtual_register();
+  int output_vreg = output->virtual_register();
+
+  // The input operand has the details of the register constraints, so replace
+  // the output operand with a copy of the input, with the output's vreg.
+  UnallocatedOperand output_as_input(*input, output_vreg);
+  InstructionOperand::ReplaceWith(output, &output_as_input);
+  RegisterIndex reg = AllocateOutput(output, instr_index, UsePosition::kAll);
+
+  if (reg.is_valid()) {
+    // Replace the input operand with an unallocated fixed register policy for
+    // the same register.
+    UnallocatedOperand::ExtendedPolicy policy =
+        kind() == RegisterKind::kGeneral
+            ? UnallocatedOperand::FIXED_REGISTER
+            : UnallocatedOperand::FIXED_FP_REGISTER;
+    MachineRepresentation rep = RepresentationFor(input_vreg);
+    UnallocatedOperand fixed_input(policy, ToRegCode(reg, rep), input_vreg);
+    InstructionOperand::ReplaceWith(input, &fixed_input);
+  } else {
+    // Output was spilled. Due to the SameAsInput allocation policy, we need to
+    // make the input operand the same as the output, i.e., the output virtual
+    // register's spill slot. As such, spill this input operand using the output
+    // virtual register's spill slot, then add a gap-move to move the input
+    // value into this spill slot.
+    VirtualRegisterData& output_vreg_data = VirtualRegisterDataFor(output_vreg);
+    output_vreg_data.SpillOperand(input, instr_index, data());
+
+    // Add an unconstrained gap move for the input virtual register.
+    UnallocatedOperand unconstrained_input(UnallocatedOperand::REGISTER_OR_SLOT,
+                                           input_vreg);
+    MoveOperands* move_ops = data()->AddGapMove(
+        instr_index, Instruction::END, unconstrained_input, PendingOperand());
+    output_vreg_data.SpillOperand(&move_ops->destination(), instr_index,
+                                  data());
+  }
+}
+
+void SinglePassRegisterAllocator::AllocateTemp(UnallocatedOperand* operand,
+                                               int instr_index) {
+  EnsureRegisterState();
+  int virtual_register = operand->virtual_register();
+  RegisterIndex reg;
+  DCHECK(!operand->HasFixedSlotPolicy());
+  if (operand->HasSlotPolicy()) {
+    reg = RegisterIndex::Invalid();
+  } else if (operand->HasFixedRegisterPolicy() ||
+             operand->HasFixedFPRegisterPolicy()) {
+    reg = FromRegCode(operand->fixed_register_index(),
+                      RepresentationFor(virtual_register));
+  } else {
+    reg = ChooseRegisterFor(RepresentationFor(virtual_register),
+                            UsePosition::kAll, operand->HasRegisterPolicy());
+  }
+
+  if (reg.is_valid()) {
+    DCHECK(virtual_register == InstructionOperand::kInvalidVirtualRegister ||
+           VirtualRegisterIsUnallocatedOrInReg(virtual_register, reg));
+    CommitRegister(reg, virtual_register, operand, UsePosition::kAll);
+  } else {
+    VirtualRegisterData& vreg_data = VirtualRegisterDataFor(virtual_register);
+    vreg_data.SpillOperand(operand, instr_index, data());
+  }
+}
+
+bool SinglePassRegisterAllocator::DefinedAfter(int virtual_register,
+                                               int instr_index,
+                                               UsePosition pos) {
+  if (virtual_register == InstructionOperand::kInvalidVirtualRegister)
+    return false;
+  int defined_at =
+      VirtualRegisterDataFor(virtual_register).output_instr_index();
+  return defined_at > instr_index ||
+         (defined_at == instr_index && pos == UsePosition::kStart);
+}
+
+void SinglePassRegisterAllocator::ReserveFixedInputRegister(
+    const UnallocatedOperand* operand, int instr_index) {
+  ReserveFixedRegister(
+      operand, instr_index,
+      operand->IsUsedAtStart() ? UsePosition::kStart : UsePosition::kAll);
+}
+
+void SinglePassRegisterAllocator::ReserveFixedTempRegister(
+    const UnallocatedOperand* operand, int instr_index) {
+  ReserveFixedRegister(operand, instr_index, UsePosition::kAll);
+}
+
+void SinglePassRegisterAllocator::ReserveFixedOutputRegister(
+    const UnallocatedOperand* operand, int instr_index) {
+  ReserveFixedRegister(operand, instr_index, UsePosition::kEnd);
+}
+
+void SinglePassRegisterAllocator::ReserveFixedRegister(
+    const UnallocatedOperand* operand, int instr_index, UsePosition pos) {
+  EnsureRegisterState();
+  int virtual_register = operand->virtual_register();
+  MachineRepresentation rep = RepresentationFor(virtual_register);
+  RegisterIndex reg = FromRegCode(operand->fixed_register_index(), rep);
+  if (!IsFreeOrSameVirtualRegister(reg, virtual_register) &&
+      !DefinedAfter(virtual_register, instr_index, pos)) {
+    // If register is in-use by a different virtual register, spill it now.
+    // TODO(rmcilroy): Consider moving to a unconstrained register instead of
+    // spilling.
+    SpillRegister(reg);
+  }
+  MarkRegisterUse(reg, rep, pos);
+}
+
+void SinglePassRegisterAllocator::AllocatePhiGapMove(int to_vreg, int from_vreg,
+                                                     int instr_index) {
+  EnsureRegisterState();
+  RegisterIndex from_register = RegisterForVirtualRegister(from_vreg);
+  RegisterIndex to_register = RegisterForVirtualRegister(to_vreg);
+
+  // If to_register isn't marked as a phi gap move, we can't use it as such.
+  if (to_register.is_valid() && !register_state()->IsPhiGapMove(to_register)) {
+    to_register = RegisterIndex::Invalid();
+  }
+
+  if (to_register.is_valid() && !from_register.is_valid()) {
+    // If |to| virtual register is allocated to a register, and the |from|
+    // virtual register isn't allocated, then commit this register and
+    // re-allocate it to the |from| virtual register.
+    InstructionOperand operand;
+    CommitRegister(to_register, to_vreg, &operand, UsePosition::kAll);
+    AllocateUse(to_register, from_vreg, &operand, instr_index,
+                UsePosition::kAll);
+  } else {
+    // Otherwise add a gap move.
+    MoveOperands* move =
+        data()->AddPendingOperandGapMove(instr_index, Instruction::END);
+    PendingOperand* to_operand = PendingOperand::cast(&move->destination());
+    PendingOperand* from_operand = PendingOperand::cast(&move->source());
+
+    // Commit the |to| side to either a register or the pending spills.
+    if (to_register.is_valid()) {
+      CommitRegister(to_register, to_vreg, to_operand, UsePosition::kAll);
+    } else {
+      VirtualRegisterDataFor(to_vreg).SpillOperand(to_operand, instr_index,
+                                                   data());
+    }
+
+    // The from side is unconstrained.
+    UnallocatedOperand unconstrained_input(UnallocatedOperand::REGISTER_OR_SLOT,
+                                           from_vreg);
+    InstructionOperand::ReplaceWith(from_operand, &unconstrained_input);
+  }
+}
+
+void SinglePassRegisterAllocator::AllocatePhi(int virtual_register,
+                                              const InstructionBlock* block) {
+  VirtualRegisterData& vreg_data = VirtualRegisterDataFor(virtual_register);
+  if (vreg_data.NeedsSpillAtOutput() || block->IsLoopHeader()) {
+    // If the Phi needs to be spilled, just spill here directly so that all
+    // gap moves into the Phi move into the spill slot.
+    SpillRegisterForVirtualRegister(virtual_register);
+  } else {
+    RegisterIndex reg = RegisterForVirtualRegister(virtual_register);
+    if (reg.is_valid()) {
+      // If the register is valid, assign it as a phi gap move to be processed
+      // at the successor blocks. If no register or spill slot was used then
+      // the virtual register was never used.
+      register_state()->UseForPhiGapMove(reg);
+    }
+  }
+}
+
+void SinglePassRegisterAllocator::EnsureRegisterState() {
+  if (!HasRegisterState()) {
+    register_state_ = RegisterState::New(kind(), num_allocatable_registers_,
+                                         data()->allocation_zone());
+  }
+}
+
+class MidTierOutputProcessor final {
+ public:
+  explicit MidTierOutputProcessor(MidTierRegisterAllocationData* data);
+
+  void InitializeBlockState(const InstructionBlock* block);
+  void DefineOutputs(const InstructionBlock* block);
+
+ private:
+  void PopulateDeferredBlockRegion(RpoNumber initial_block);
+
+  VirtualRegisterData& VirtualRegisterDataFor(int virtual_register) const {
+    return data()->VirtualRegisterDataFor(virtual_register);
+  }
+  MachineRepresentation RepresentationFor(int virtual_register) const {
+    return data()->RepresentationFor(virtual_register);
+  }
+
+  bool IsDeferredBlockBoundary(const ZoneVector<RpoNumber>& blocks) {
+    return blocks.size() == 1 && !data()->GetBlock(blocks[0])->IsDeferred();
+  }
+
+  MidTierRegisterAllocationData* data() const { return data_; }
+  InstructionSequence* code() const { return data()->code(); }
+  Zone* zone() const { return data()->allocation_zone(); }
+
+  MidTierRegisterAllocationData* const data_;
+  ZoneQueue<RpoNumber> deferred_blocks_worklist_;
+  ZoneSet<RpoNumber> deferred_blocks_processed_;
+};
+
+MidTierOutputProcessor::MidTierOutputProcessor(
+    MidTierRegisterAllocationData* data)
+    : data_(data),
+      deferred_blocks_worklist_(data->allocation_zone()),
+      deferred_blocks_processed_(data->allocation_zone()) {}
+
+void MidTierOutputProcessor::PopulateDeferredBlockRegion(
+    RpoNumber initial_block) {
+  DeferredBlocksRegion* deferred_blocks_region =
+      zone()->New<DeferredBlocksRegion>(zone(),
+                                        code()->InstructionBlockCount());
+  DCHECK(deferred_blocks_worklist_.empty());
+  deferred_blocks_worklist_.push(initial_block);
+  deferred_blocks_processed_.insert(initial_block);
+  while (!deferred_blocks_worklist_.empty()) {
+    RpoNumber current = deferred_blocks_worklist_.front();
+    deferred_blocks_worklist_.pop();
+    deferred_blocks_region->AddBlock(current, data());
+
+    const InstructionBlock* curr_block = data()->GetBlock(current);
+    // Check for whether the predecessor blocks are still deferred.
+    if (IsDeferredBlockBoundary(curr_block->predecessors())) {
+      // If not, mark the predecessor as having a deferred successor.
+      data()
+          ->block_state(curr_block->predecessors()[0])
+          .MarkAsDeferredBlockBoundary();
+    } else {
+      // Otherwise process predecessors.
+      for (RpoNumber pred : curr_block->predecessors()) {
+        if (deferred_blocks_processed_.count(pred) == 0) {
+          deferred_blocks_worklist_.push(pred);
+          deferred_blocks_processed_.insert(pred);
+        }
+      }
+    }
+
+    // Check for whether the successor blocks are still deferred.
+    // Process any unprocessed successors if we aren't at a boundary.
+    if (IsDeferredBlockBoundary(curr_block->successors())) {
+      // If not, mark the predecessor as having a deferred successor.
+      data()->block_state(current).MarkAsDeferredBlockBoundary();
+    } else {
+      // Otherwise process successors.
+      for (RpoNumber succ : curr_block->successors()) {
+        if (deferred_blocks_processed_.count(succ) == 0) {
+          deferred_blocks_worklist_.push(succ);
+          deferred_blocks_processed_.insert(succ);
+        }
+      }
+    }
+  }
+}
+
+void MidTierOutputProcessor::InitializeBlockState(
+    const InstructionBlock* block) {
+  // Update our predecessor blocks with their successors_phi_index if we have
+  // phis.
+  if (block->phis().size()) {
+    for (int i = 0; i < static_cast<int>(block->PredecessorCount()); ++i) {
+      data()->block_state(block->predecessors()[i]).set_successors_phi_index(i);
+    }
+  }
+
+  BlockState& block_state = data()->block_state(block->rpo_number());
+
+  if (block->IsDeferred() && !block_state.deferred_blocks_region()) {
+    PopulateDeferredBlockRegion(block->rpo_number());
+  }
+
+  // Mark this block as dominating itself.
+  block_state.dominated_blocks()->Add(block->rpo_number().ToInt());
+
+  if (block->dominator().IsValid()) {
+    // Add all the blocks this block dominates to its dominator.
+    BlockState& dominator_block_state = data()->block_state(block->dominator());
+    dominator_block_state.dominated_blocks()->Union(
+        *block_state.dominated_blocks());
+  } else {
+    // Only the first block shouldn't have a dominator.
+    DCHECK_EQ(block, code()->instruction_blocks().front());
+  }
+}
+
+void MidTierOutputProcessor::DefineOutputs(const InstructionBlock* block) {
+  int block_start = block->first_instruction_index();
+  bool is_deferred = block->IsDeferred();
+
+  for (int index = block->last_instruction_index(); index >= block_start;
+       index--) {
+    Instruction* instr = code()->InstructionAt(index);
+
+    // For each instruction, define details of the output with the associated
+    // virtual register data.
+    for (size_t i = 0; i < instr->OutputCount(); i++) {
+      InstructionOperand* output = instr->OutputAt(i);
+      if (output->IsConstant()) {
+        ConstantOperand* constant_operand = ConstantOperand::cast(output);
+        int virtual_register = constant_operand->virtual_register();
+        VirtualRegisterDataFor(virtual_register)
+            .DefineAsConstantOperand(constant_operand, index, is_deferred);
+      } else {
+        DCHECK(output->IsUnallocated());
+        UnallocatedOperand* unallocated_operand =
+            UnallocatedOperand::cast(output);
+        int virtual_register = unallocated_operand->virtual_register();
+        bool is_exceptional_call_output =
+            instr->IsCallWithDescriptorFlags() &&
+            instr->HasCallDescriptorFlag(CallDescriptor::kHasExceptionHandler);
+        if (unallocated_operand->HasFixedSlotPolicy()) {
+          // If output has a fixed slot policy, allocate its spill operand now
+          // so that the register allocator can use this knowledge.
+          MachineRepresentation rep = RepresentationFor(virtual_register);
+          AllocatedOperand* fixed_spill_operand =
+              AllocatedOperand::New(zone(), AllocatedOperand::STACK_SLOT, rep,
+                                    unallocated_operand->fixed_slot_index());
+          VirtualRegisterDataFor(virtual_register)
+              .DefineAsFixedSpillOperand(fixed_spill_operand, virtual_register,
+                                         index, is_deferred,
+                                         is_exceptional_call_output);
+        } else {
+          VirtualRegisterDataFor(virtual_register)
+              .DefineAsUnallocatedOperand(virtual_register, index, is_deferred,
+                                          is_exceptional_call_output);
+        }
+      }
+    }
+
+    // Mark any instructions that require reference maps for later reference map
+    // processing.
+    if (instr->HasReferenceMap()) {
+      data()->reference_map_instructions().push_back(index);
+    }
+  }
+
+  // Define phi output operands.
+  for (PhiInstruction* phi : block->phis()) {
+    int virtual_register = phi->virtual_register();
+    VirtualRegisterDataFor(virtual_register)
+        .DefineAsPhi(virtual_register, block->first_instruction_index(),
+                     is_deferred);
+  }
+}
+
+void DefineOutputs(MidTierRegisterAllocationData* data) {
+  MidTierOutputProcessor processor(data);
+
+  for (const InstructionBlock* block :
+       base::Reversed(data->code()->instruction_blocks())) {
+    data->tick_counter()->TickAndMaybeEnterSafepoint();
+
+    processor.InitializeBlockState(block);
+    processor.DefineOutputs(block);
+  }
+}
+
+class MidTierRegisterAllocator final {
+ public:
+  explicit MidTierRegisterAllocator(MidTierRegisterAllocationData* data);
+  MidTierRegisterAllocator(const MidTierRegisterAllocator&) = delete;
+  MidTierRegisterAllocator& operator=(const MidTierRegisterAllocator&) = delete;
+
+  void AllocateRegisters(const InstructionBlock* block);
+  void UpdateSpillRangesForLoops();
+
+  SinglePassRegisterAllocator& general_reg_allocator() {
+    return general_reg_allocator_;
+  }
+  SinglePassRegisterAllocator& double_reg_allocator() {
+    return double_reg_allocator_;
+  }
+
+ private:
+  void AllocatePhis(const InstructionBlock* block);
+  void AllocatePhiGapMoves(const InstructionBlock* block);
+
+  bool IsFixedRegisterPolicy(const UnallocatedOperand* operand);
+  void ReserveFixedRegisters(int instr_index);
+
+  SinglePassRegisterAllocator& AllocatorFor(MachineRepresentation rep);
+  SinglePassRegisterAllocator& AllocatorFor(const UnallocatedOperand* operand);
+  SinglePassRegisterAllocator& AllocatorFor(const ConstantOperand* operand);
+
+  VirtualRegisterData& VirtualRegisterDataFor(int virtual_register) const {
+    return data()->VirtualRegisterDataFor(virtual_register);
+  }
+  MachineRepresentation RepresentationFor(int virtual_register) const {
+    return data()->RepresentationFor(virtual_register);
+  }
+  MidTierRegisterAllocationData* data() const { return data_; }
+  InstructionSequence* code() const { return data()->code(); }
+  Zone* allocation_zone() const { return data()->allocation_zone(); }
+
+  MidTierRegisterAllocationData* const data_;
+  SinglePassRegisterAllocator general_reg_allocator_;
+  SinglePassRegisterAllocator double_reg_allocator_;
+};
+
+MidTierRegisterAllocator::MidTierRegisterAllocator(
+    MidTierRegisterAllocationData* data)
+    : data_(data),
+      general_reg_allocator_(RegisterKind::kGeneral, data),
+      double_reg_allocator_(RegisterKind::kDouble, data) {}
+
+void MidTierRegisterAllocator::AllocateRegisters(
+    const InstructionBlock* block) {
+  RpoNumber block_rpo = block->rpo_number();
+  bool is_deferred_block_boundary =
+      data()->block_state(block_rpo).is_deferred_block_boundary();
+
+  general_reg_allocator().StartBlock(block);
+  double_reg_allocator().StartBlock(block);
+
+  // If the block is not deferred but has deferred successors, then try to
+  // output spill slots for virtual_registers that are only spilled in the
+  // deferred blocks at the start of those deferred blocks to avoid spilling
+  // them at their output in non-deferred blocks.
+  if (is_deferred_block_boundary && !block->IsDeferred()) {
+    for (RpoNumber successor : block->successors()) {
+      if (!data()->GetBlock(successor)->IsDeferred()) continue;
+      DCHECK_GT(successor, block_rpo);
+      for (int virtual_register :
+           *data()->block_state(successor).deferred_blocks_region()) {
+        USE(virtual_register);
+        AllocatorFor(RepresentationFor(virtual_register))
+            .AllocateDeferredBlockSpillOutput(block->last_instruction_index(),
+                                              successor, virtual_register);
+      }
+    }
+  }
+
+  // Allocate registers for instructions in reverse, from the end of the block
+  // to the start.
+  int block_start = block->first_instruction_index();
+  for (int instr_index = block->last_instruction_index();
+       instr_index >= block_start; instr_index--) {
+    Instruction* instr = code()->InstructionAt(instr_index);
+
+    // Reserve any fixed register operands to prevent the register being
+    // allocated to another operand.
+    ReserveFixedRegisters(instr_index);
+
+    // Allocate outputs.
+    for (size_t i = 0; i < instr->OutputCount(); i++) {
+      InstructionOperand* output = instr->OutputAt(i);
+      DCHECK(!output->IsAllocated());
+      if (output->IsConstant()) {
+        ConstantOperand* constant_operand = ConstantOperand::cast(output);
+        AllocatorFor(constant_operand).AllocateConstantOutput(constant_operand);
+      } else {
+        UnallocatedOperand* unallocated_output =
+            UnallocatedOperand::cast(output);
+        if (unallocated_output->HasSameAsInputPolicy()) {
+          DCHECK_EQ(i, 0);
+          UnallocatedOperand* unallocated_input =
+              UnallocatedOperand::cast(instr->InputAt(0));
+          DCHECK_EQ(AllocatorFor(unallocated_input).kind(),
+                    AllocatorFor(unallocated_output).kind());
+          AllocatorFor(unallocated_output)
+              .AllocateSameInputOutput(unallocated_output, unallocated_input,
+                                       instr_index);
+        } else {
+          AllocatorFor(unallocated_output)
+              .AllocateOutput(unallocated_output, instr_index);
+        }
+      }
+    }
+
+    if (instr->ClobbersRegisters()) {
+      general_reg_allocator().SpillAllRegisters();
+    }
+    if (instr->ClobbersDoubleRegisters()) {
+      double_reg_allocator().SpillAllRegisters();
+    }
+
+    // Allocate temporaries.
+    for (size_t i = 0; i < instr->TempCount(); i++) {
+      UnallocatedOperand* temp = UnallocatedOperand::cast(instr->TempAt(i));
+      AllocatorFor(temp).AllocateTemp(temp, instr_index);
+    }
+
+    // Allocate inputs that are used across the whole instruction.
+    for (size_t i = 0; i < instr->InputCount(); i++) {
+      if (!instr->InputAt(i)->IsUnallocated()) continue;
+      UnallocatedOperand* input = UnallocatedOperand::cast(instr->InputAt(i));
+      if (input->IsUsedAtStart()) continue;
+      AllocatorFor(input).AllocateInput(input, instr_index);
+    }
+
+    // Then allocate inputs that are only used at the start of the instruction.
+    for (size_t i = 0; i < instr->InputCount(); i++) {
+      if (!instr->InputAt(i)->IsUnallocated()) continue;
+      UnallocatedOperand* input = UnallocatedOperand::cast(instr->InputAt(i));
+      DCHECK(input->IsUsedAtStart());
+      AllocatorFor(input).AllocateInput(input, instr_index);
+    }
+
+    // If we are allocating for the last instruction in the block, allocate any
+    // phi gap move operations that are needed to resolve phis in our successor.
+    if (instr_index == block->last_instruction_index()) {
+      AllocatePhiGapMoves(block);
+
+      // If this block is deferred but it's successor isn't, update the state to
+      // limit spills to the deferred blocks where possible.
+      if (is_deferred_block_boundary && block->IsDeferred()) {
+        general_reg_allocator().UpdateForDeferredBlock(instr_index);
+        double_reg_allocator().UpdateForDeferredBlock(instr_index);
+      }
+    }
+
+    // Allocate any unallocated gap move inputs.
+    ParallelMove* moves = instr->GetParallelMove(Instruction::END);
+    if (moves != nullptr) {
+      for (MoveOperands* move : *moves) {
+        DCHECK(!move->destination().IsUnallocated());
+        if (move->source().IsUnallocated()) {
+          UnallocatedOperand* source =
+              UnallocatedOperand::cast(&move->source());
+          AllocatorFor(source).AllocateGapMoveInput(source, instr_index);
+        }
+      }
+    }
+
+    general_reg_allocator().EndInstruction();
+    double_reg_allocator().EndInstruction();
+  }
+
+  // For now we spill all registers at a loop header.
+  // TODO(rmcilroy): Add support for register allocations across loops.
+  if (block->IsLoopHeader()) {
+    general_reg_allocator().SpillAllRegisters();
+    double_reg_allocator().SpillAllRegisters();
+  }
+
+  AllocatePhis(block);
+
+  general_reg_allocator().EndBlock(block);
+  double_reg_allocator().EndBlock(block);
+}
+
+SinglePassRegisterAllocator& MidTierRegisterAllocator::AllocatorFor(
+    MachineRepresentation rep) {
+  if (IsFloatingPoint(rep)) {
+    return double_reg_allocator();
+  } else {
+    return general_reg_allocator();
+  }
+}
+
+SinglePassRegisterAllocator& MidTierRegisterAllocator::AllocatorFor(
+    const UnallocatedOperand* operand) {
+  return AllocatorFor(RepresentationFor(operand->virtual_register()));
+}
+
+SinglePassRegisterAllocator& MidTierRegisterAllocator::AllocatorFor(
+    const ConstantOperand* operand) {
+  return AllocatorFor(RepresentationFor(operand->virtual_register()));
+}
+
+bool MidTierRegisterAllocator::IsFixedRegisterPolicy(
+    const UnallocatedOperand* operand) {
+  return operand->HasFixedRegisterPolicy() ||
+         operand->HasFixedFPRegisterPolicy();
+}
+
+void MidTierRegisterAllocator::ReserveFixedRegisters(int instr_index) {
+  Instruction* instr = code()->InstructionAt(instr_index);
+  for (size_t i = 0; i < instr->OutputCount(); i++) {
+    if (!instr->OutputAt(i)->IsUnallocated()) continue;
+    const UnallocatedOperand* operand =
+        UnallocatedOperand::cast(instr->OutputAt(i));
+    if (operand->HasSameAsInputPolicy()) {
+      // Input operand has the register constraints, use it here to reserve the
+      // register for the output (it will be reserved for input below).
+      operand = UnallocatedOperand::cast(instr->InputAt(i));
+    }
+    if (IsFixedRegisterPolicy(operand)) {
+      AllocatorFor(operand).ReserveFixedOutputRegister(operand, instr_index);
+    }
+  }
+  for (size_t i = 0; i < instr->TempCount(); i++) {
+    if (!instr->TempAt(i)->IsUnallocated()) continue;
+    const UnallocatedOperand* operand =
+        UnallocatedOperand::cast(instr->TempAt(i));
+    if (IsFixedRegisterPolicy(operand)) {
+      AllocatorFor(operand).ReserveFixedTempRegister(operand, instr_index);
+    }
+  }
+  for (size_t i = 0; i < instr->InputCount(); i++) {
+    if (!instr->InputAt(i)->IsUnallocated()) continue;
+    const UnallocatedOperand* operand =
+        UnallocatedOperand::cast(instr->InputAt(i));
+    if (IsFixedRegisterPolicy(operand)) {
+      AllocatorFor(operand).ReserveFixedInputRegister(operand, instr_index);
+    }
+  }
+}
+
+void MidTierRegisterAllocator::AllocatePhiGapMoves(
+    const InstructionBlock* block) {
+  int successors_phi_index =
+      data()->block_state(block->rpo_number()).successors_phi_index();
+
+  // If successors_phi_index is -1 there are no phi's in the successor.
+  if (successors_phi_index == -1) return;
+
+  // The last instruction of a block with phis can't require reference maps
+  // since we won't record phi gap moves that get spilled when populating the
+  // reference maps
+  int instr_index = block->last_instruction_index();
+  DCHECK(!code()->InstructionAt(instr_index)->HasReferenceMap());
+
+  // If there are phis, we only have a single successor due to edge-split form.
+  DCHECK_EQ(block->SuccessorCount(), 1);
+  const InstructionBlock* successor = data()->GetBlock(block->successors()[0]);
+
+  for (PhiInstruction* phi : successor->phis()) {
+    int to_vreg = phi->virtual_register();
+    int from_vreg = phi->operands()[successors_phi_index];
+
+    MachineRepresentation rep = RepresentationFor(to_vreg);
+    AllocatorFor(rep).AllocatePhiGapMove(to_vreg, from_vreg, instr_index);
+  }
+}
+
+void MidTierRegisterAllocator::AllocatePhis(const InstructionBlock* block) {
+  for (PhiInstruction* phi : block->phis()) {
+    int virtual_register = phi->virtual_register();
+    MachineRepresentation rep = RepresentationFor(virtual_register);
+    AllocatorFor(rep).AllocatePhi(virtual_register, block);
+  }
+}
+
+void MidTierRegisterAllocator::UpdateSpillRangesForLoops() {
+  // Extend the spill range of any spill that crosses a loop header to
+  // the full loop.
+  for (InstructionBlock* block : code()->instruction_blocks()) {
+    if (block->IsLoopHeader()) {
+      RpoNumber last_loop_block =
+          RpoNumber::FromInt(block->loop_end().ToInt() - 1);
+      int last_loop_instr =
+          data()->GetBlock(last_loop_block)->last_instruction_index();
+      // Extend spill range for all spilled values that are live on entry to the
+      // loop header.
+      BitVector::Iterator iterator(&data()->spilled_virtual_registers());
+      for (; !iterator.Done(); iterator.Advance()) {
+        const VirtualRegisterData& vreg_data =
+            VirtualRegisterDataFor(iterator.Current());
+        if (vreg_data.HasSpillRange() &&
+            vreg_data.spill_range()->IsLiveAt(block->first_instruction_index(),
+                                              block)) {
+          vreg_data.spill_range()->ExtendRangeTo(last_loop_instr);
+        }
+      }
+    }
+  }
+}
+
+void AllocateRegisters(MidTierRegisterAllocationData* data) {
+  MidTierRegisterAllocator allocator(data);
+  for (InstructionBlock* block :
+       base::Reversed(data->code()->instruction_blocks())) {
+    data->tick_counter()->TickAndMaybeEnterSafepoint();
+    allocator.AllocateRegisters(block);
+  }
+
+  allocator.UpdateSpillRangesForLoops();
+
+  data->frame()->SetAllocatedRegisters(
+      allocator.general_reg_allocator().assigned_registers());
+  data->frame()->SetAllocatedDoubleRegisters(
+      allocator.double_reg_allocator().assigned_registers());
+}
+
+// Spill slot allocator for mid-tier register allocation.
+class MidTierSpillSlotAllocator final {
+ public:
+  explicit MidTierSpillSlotAllocator(MidTierRegisterAllocationData* data);
+  MidTierSpillSlotAllocator(const MidTierSpillSlotAllocator&) = delete;
+  MidTierSpillSlotAllocator& operator=(const MidTierSpillSlotAllocator&) =
+      delete;
+
+  void Allocate(VirtualRegisterData* virtual_register);
+
+ private:
+  class SpillSlot;
+
+  void AdvanceTo(int instr_index);
+  SpillSlot* GetFreeSpillSlot(int byte_width);
+
+  MidTierRegisterAllocationData* data() const { return data_; }
+  InstructionSequence* code() const { return data()->code(); }
+  Frame* frame() const { return data()->frame(); }
+  Zone* zone() const { return data()->allocation_zone(); }
+
+  struct OrderByLastUse {
+    bool operator()(const SpillSlot* a, const SpillSlot* b) const;
+  };
+
+  MidTierRegisterAllocationData* const data_;
+  ZonePriorityQueue<SpillSlot*, OrderByLastUse> allocated_slots_;
+  ZoneLinkedList<SpillSlot*> free_slots_;
+  int position_;
+};
+
+class MidTierSpillSlotAllocator::SpillSlot : public ZoneObject {
+ public:
+  SpillSlot(int stack_slot, int byte_width)
+      : stack_slot_(stack_slot), byte_width_(byte_width), range_() {}
+  SpillSlot(const SpillSlot&) = delete;
+  SpillSlot& operator=(const SpillSlot&) = delete;
+
+  void AddRange(const Range& range) { range_.AddRange(range); }
+
+  AllocatedOperand ToOperand(MachineRepresentation rep) const {
+    return AllocatedOperand(AllocatedOperand::STACK_SLOT, rep, stack_slot_);
+  }
+
+  int byte_width() const { return byte_width_; }
+  int last_use() const { return range_.end(); }
+
+ private:
+  int stack_slot_;
+  int byte_width_;
+  Range range_;
+};
+
+bool MidTierSpillSlotAllocator::OrderByLastUse::operator()(
+    const SpillSlot* a, const SpillSlot* b) const {
+  return a->last_use() > b->last_use();
+}
+
+MidTierSpillSlotAllocator::MidTierSpillSlotAllocator(
+    MidTierRegisterAllocationData* data)
+    : data_(data),
+      allocated_slots_(data->allocation_zone()),
+      free_slots_(data->allocation_zone()),
+      position_(0) {}
+
+void MidTierSpillSlotAllocator::AdvanceTo(int instr_index) {
+  // Move any slots that are no longer in use to the free slots list.
+  DCHECK_LE(position_, instr_index);
+  while (!allocated_slots_.empty() &&
+         instr_index > allocated_slots_.top()->last_use()) {
+    free_slots_.push_front(allocated_slots_.top());
+    allocated_slots_.pop();
+  }
+  position_ = instr_index;
+}
+
+MidTierSpillSlotAllocator::SpillSlot*
+MidTierSpillSlotAllocator::GetFreeSpillSlot(int byte_width) {
+  for (auto it = free_slots_.begin(); it != free_slots_.end(); ++it) {
+    SpillSlot* slot = *it;
+    if (slot->byte_width() == byte_width) {
+      free_slots_.erase(it);
+      return slot;
+    }
+  }
+  return nullptr;
+}
+
+void MidTierSpillSlotAllocator::Allocate(
+    VirtualRegisterData* virtual_register) {
+  DCHECK(virtual_register->HasPendingSpillOperand());
+  VirtualRegisterData::SpillRange* spill_range =
+      virtual_register->spill_range();
+  MachineRepresentation rep =
+      data()->RepresentationFor(virtual_register->vreg());
+  int byte_width = ByteWidthForStackSlot(rep);
+  Range live_range = spill_range->live_range();
+
+  AdvanceTo(live_range.start());
+
+  // Try to re-use an existing free spill slot.
+  SpillSlot* slot = GetFreeSpillSlot(byte_width);
+  if (slot == nullptr) {
+    // Otherwise allocate a new slot.
+    int stack_slot_ = frame()->AllocateSpillSlot(byte_width);
+    slot = zone()->New<SpillSlot>(stack_slot_, byte_width);
+  }
+
+  // Extend the range of the slot to include this spill range, and allocate the
+  // pending spill operands with this slot.
+  slot->AddRange(live_range);
+  virtual_register->AllocatePendingSpillOperand(slot->ToOperand(rep));
+  allocated_slots_.push(slot);
+}
+
+void AllocateSpillSlots(MidTierRegisterAllocationData* data) {
+  ZoneVector<VirtualRegisterData*> spilled(data->allocation_zone());
+  BitVector::Iterator iterator(&data->spilled_virtual_registers());
+  for (; !iterator.Done(); iterator.Advance()) {
+    VirtualRegisterData& vreg_data =
+        data->VirtualRegisterDataFor(iterator.Current());
+    if (vreg_data.HasPendingSpillOperand()) {
+      spilled.push_back(&vreg_data);
+    }
+  }
+
+  // Sort the spill ranges by order of their first use to enable linear
+  // allocation of spill slots.
+  std::sort(spilled.begin(), spilled.end(),
+            [](const VirtualRegisterData* a, const VirtualRegisterData* b) {
+              return a->spill_range()->live_range().start() <
+                     b->spill_range()->live_range().start();
+            });
+
+  // Allocate a spill slot for each virtual register with a spill range.
+  MidTierSpillSlotAllocator allocator(data);
+  for (VirtualRegisterData* spill : spilled) {
+    allocator.Allocate(spill);
+  }
+}
+
+// Populates reference maps for mid-tier register allocation.
+class MidTierReferenceMapPopulator final {
+ public:
+  explicit MidTierReferenceMapPopulator(MidTierRegisterAllocationData* data);
+  MidTierReferenceMapPopulator(const MidTierReferenceMapPopulator&) = delete;
+  MidTierReferenceMapPopulator& operator=(const MidTierReferenceMapPopulator&) =
+      delete;
+
+  void RecordReferences(const VirtualRegisterData& virtual_register);
+
+ private:
+  MidTierRegisterAllocationData* data() const { return data_; }
+  InstructionSequence* code() const { return data()->code(); }
+
+  MidTierRegisterAllocationData* const data_;
+};
+
+MidTierReferenceMapPopulator::MidTierReferenceMapPopulator(
+    MidTierRegisterAllocationData* data)
+    : data_(data) {}
+
+void MidTierReferenceMapPopulator::RecordReferences(
+    const VirtualRegisterData& virtual_register) {
+  if (!virtual_register.HasAllocatedSpillOperand()) return;
+  if (!code()->IsReference(virtual_register.vreg())) return;
+
+  VirtualRegisterData::SpillRange* spill_range = virtual_register.spill_range();
+  Range& live_range = spill_range->live_range();
+  AllocatedOperand allocated =
+      *AllocatedOperand::cast(virtual_register.spill_operand());
+  for (int instr_index : data()->reference_map_instructions()) {
+    if (instr_index > live_range.end() || instr_index < live_range.start())
+      continue;
+    Instruction* instr = data()->code()->InstructionAt(instr_index);
+    DCHECK(instr->HasReferenceMap());
+
+    if (spill_range->IsLiveAt(instr_index, instr->block())) {
+      instr->reference_map()->RecordReference(allocated);
+    }
+  }
+}
+
+void PopulateReferenceMaps(MidTierRegisterAllocationData* data) {
+  MidTierReferenceMapPopulator populator(data);
+  BitVector::Iterator iterator(&data->spilled_virtual_registers());
+  for (; !iterator.Done(); iterator.Advance()) {
+    populator.RecordReferences(
+        data->VirtualRegisterDataFor(iterator.Current()));
+  }
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/mid-tier-register-allocator.h b/src/compiler/backend/mid-tier-register-allocator.h
new file mode 100644
index 0000000..2440115
--- /dev/null
+++ b/src/compiler/backend/mid-tier-register-allocator.h
@@ -0,0 +1,122 @@
+// Copyright 2020 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_MID_TIER_REGISTER_ALLOCATOR_H_
+#define V8_COMPILER_BACKEND_MID_TIER_REGISTER_ALLOCATOR_H_
+
+#include "src/base/compiler-specific.h"
+#include "src/common/globals.h"
+#include "src/compiler/backend/instruction.h"
+#include "src/compiler/backend/register-allocation.h"
+#include "src/flags/flags.h"
+#include "src/utils/bit-vector.h"
+#include "src/zone/zone-containers.h"
+#include "src/zone/zone.h"
+
+namespace v8 {
+namespace internal {
+
+class TickCounter;
+
+namespace compiler {
+
+class BlockState;
+class VirtualRegisterData;
+
+// The MidTierRegisterAllocator is a register allocator specifically designed to
+// perform register allocation as fast as possible while minimizing spill moves.
+
+class MidTierRegisterAllocationData final : public RegisterAllocationData {
+ public:
+  MidTierRegisterAllocationData(const RegisterConfiguration* config,
+                                Zone* allocation_zone, Frame* frame,
+                                InstructionSequence* code,
+                                TickCounter* tick_counter,
+                                const char* debug_name = nullptr);
+  MidTierRegisterAllocationData(const MidTierRegisterAllocationData&) = delete;
+  MidTierRegisterAllocationData& operator=(
+      const MidTierRegisterAllocationData&) = delete;
+
+  static MidTierRegisterAllocationData* cast(RegisterAllocationData* data) {
+    DCHECK_EQ(data->type(), Type::kMidTier);
+    return static_cast<MidTierRegisterAllocationData*>(data);
+  }
+
+  VirtualRegisterData& VirtualRegisterDataFor(int virtual_register);
+  MachineRepresentation RepresentationFor(int virtual_register);
+
+  // Add a gap move between the given operands |from| and |to|.
+  MoveOperands* AddGapMove(int instr_index, Instruction::GapPosition position,
+                           const InstructionOperand& from,
+                           const InstructionOperand& to);
+
+  // Adds a gap move where both sides are PendingOperand operands.
+  MoveOperands* AddPendingOperandGapMove(int instr_index,
+                                         Instruction::GapPosition position);
+
+  // Helpers to get a block from an |rpo_number| or |instr_index|.
+  const InstructionBlock* GetBlock(const RpoNumber rpo_number);
+  const InstructionBlock* GetBlock(int instr_index);
+
+  // Returns a bitvector representing all the blocks that are dominated by the
+  // output of the instruction in |block|.
+  const BitVector* GetBlocksDominatedBy(const InstructionBlock* block);
+
+  // List of all instruction indexs that require a reference map.
+  ZoneVector<int>& reference_map_instructions() {
+    return reference_map_instructions_;
+  }
+
+  // Returns a bitvector representing the virtual registers that were spilled.
+  BitVector& spilled_virtual_registers() { return spilled_virtual_registers_; }
+
+  // This zone is for data structures only needed during register allocation
+  // phases.
+  Zone* allocation_zone() const { return allocation_zone_; }
+
+  // This zone is for InstructionOperands and moves that live beyond register
+  // allocation.
+  Zone* code_zone() const { return code()->zone(); }
+
+  BlockState& block_state(RpoNumber rpo_number);
+
+  InstructionSequence* code() const { return code_; }
+  Frame* frame() const { return frame_; }
+  const char* debug_name() const { return debug_name_; }
+  const RegisterConfiguration* config() const { return config_; }
+  TickCounter* tick_counter() { return tick_counter_; }
+
+ private:
+  Zone* const allocation_zone_;
+  Frame* const frame_;
+  InstructionSequence* const code_;
+  const char* const debug_name_;
+  const RegisterConfiguration* const config_;
+
+  ZoneVector<VirtualRegisterData> virtual_register_data_;
+  ZoneVector<BlockState> block_states_;
+  ZoneVector<int> reference_map_instructions_;
+  BitVector spilled_virtual_registers_;
+
+  TickCounter* const tick_counter_;
+};
+
+// Phase 1: Process instruction outputs to determine how each virtual register
+// is defined.
+void DefineOutputs(MidTierRegisterAllocationData* data);
+
+// Phase 2: Allocate registers to instructions.
+void AllocateRegisters(MidTierRegisterAllocationData* data);
+
+// Phase 3: assign spilled operands to specific spill slots.
+void AllocateSpillSlots(MidTierRegisterAllocationData* data);
+
+// Phase 4: Populate reference maps for spilled references.
+void PopulateReferenceMaps(MidTierRegisterAllocationData* data);
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_MID_TIER_REGISTER_ALLOCATOR_H_
diff --git a/src/compiler/backend/mips/code-generator-mips.cc b/src/compiler/backend/mips/code-generator-mips.cc
new file mode 100644
index 0000000..c8265d7
--- /dev/null
+++ b/src/compiler/backend/mips/code-generator-mips.cc
@@ -0,0 +1,4423 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/codegen/assembler-inl.h"
+#include "src/codegen/callable.h"
+#include "src/codegen/macro-assembler.h"
+#include "src/codegen/optimized-compilation-info.h"
+#include "src/compiler/backend/code-generator-impl.h"
+#include "src/compiler/backend/code-generator.h"
+#include "src/compiler/backend/gap-resolver.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/osr.h"
+#include "src/heap/memory-chunk.h"
+#include "src/wasm/wasm-code-manager.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+#define __ tasm()->
+
+// TODO(plind): consider renaming these macros.
+#define TRACE_MSG(msg)                                                      \
+  PrintF("code_gen: \'%s\' in function %s at line %d\n", msg, __FUNCTION__, \
+         __LINE__)
+
+#define TRACE_UNIMPL()                                                       \
+  PrintF("UNIMPLEMENTED code_generator_mips: %s at line %d\n", __FUNCTION__, \
+         __LINE__)
+
+// Adds Mips-specific methods to convert InstructionOperands.
+class MipsOperandConverter final : public InstructionOperandConverter {
+ public:
+  MipsOperandConverter(CodeGenerator* gen, Instruction* instr)
+      : InstructionOperandConverter(gen, instr) {}
+
+  FloatRegister OutputSingleRegister(size_t index = 0) {
+    return ToSingleRegister(instr_->OutputAt(index));
+  }
+
+  FloatRegister InputSingleRegister(size_t index) {
+    return ToSingleRegister(instr_->InputAt(index));
+  }
+
+  FloatRegister ToSingleRegister(InstructionOperand* op) {
+    // Single (Float) and Double register namespace is same on MIPS,
+    // both are typedefs of FPURegister.
+    return ToDoubleRegister(op);
+  }
+
+  Register InputOrZeroRegister(size_t index) {
+    if (instr_->InputAt(index)->IsImmediate()) {
+      DCHECK_EQ(0, InputInt32(index));
+      return zero_reg;
+    }
+    return InputRegister(index);
+  }
+
+  DoubleRegister InputOrZeroDoubleRegister(size_t index) {
+    if (instr_->InputAt(index)->IsImmediate()) return kDoubleRegZero;
+
+    return InputDoubleRegister(index);
+  }
+
+  DoubleRegister InputOrZeroSingleRegister(size_t index) {
+    if (instr_->InputAt(index)->IsImmediate()) return kDoubleRegZero;
+
+    return InputSingleRegister(index);
+  }
+
+  Operand InputImmediate(size_t index) {
+    Constant constant = ToConstant(instr_->InputAt(index));
+    switch (constant.type()) {
+      case Constant::kInt32:
+        return Operand(constant.ToInt32());
+      case Constant::kFloat32:
+        return Operand::EmbeddedNumber(constant.ToFloat32());
+      case Constant::kFloat64:
+        return Operand::EmbeddedNumber(constant.ToFloat64().value());
+      case Constant::kInt64:
+      case Constant::kExternalReference:
+      case Constant::kCompressedHeapObject:
+      case Constant::kHeapObject:
+        // TODO(plind): Maybe we should handle ExtRef & HeapObj here?
+        //    maybe not done on arm due to const pool ??
+        break;
+      case Constant::kDelayedStringConstant:
+        return Operand::EmbeddedStringConstant(
+            constant.ToDelayedStringConstant());
+      case Constant::kRpoNumber:
+        UNREACHABLE();  // TODO(titzer): RPO immediates on mips?
+        break;
+    }
+    UNREACHABLE();
+  }
+
+  Operand InputOperand(size_t index) {
+    InstructionOperand* op = instr_->InputAt(index);
+    if (op->IsRegister()) {
+      return Operand(ToRegister(op));
+    }
+    return InputImmediate(index);
+  }
+
+  MemOperand MemoryOperand(size_t* first_index) {
+    const size_t index = *first_index;
+    switch (AddressingModeField::decode(instr_->opcode())) {
+      case kMode_None:
+        break;
+      case kMode_MRI:
+        *first_index += 2;
+        return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
+      case kMode_MRR:
+        // TODO(plind): r6 address mode, to be implemented ...
+        UNREACHABLE();
+    }
+    UNREACHABLE();
+  }
+
+  MemOperand MemoryOperand(size_t index = 0) { return MemoryOperand(&index); }
+
+  MemOperand ToMemOperand(InstructionOperand* op) const {
+    DCHECK_NOT_NULL(op);
+    DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
+    return SlotToMemOperand(AllocatedOperand::cast(op)->index());
+  }
+
+  MemOperand SlotToMemOperand(int slot) const {
+    FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
+    return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
+  }
+};
+
+static inline bool HasRegisterInput(Instruction* instr, size_t index) {
+  return instr->InputAt(index)->IsRegister();
+}
+
+namespace {
+
+class OutOfLineRecordWrite final : public OutOfLineCode {
+ public:
+  OutOfLineRecordWrite(CodeGenerator* gen, Register object, Register index,
+                       Register value, Register scratch0, Register scratch1,
+                       RecordWriteMode mode, StubCallMode stub_mode)
+      : OutOfLineCode(gen),
+        object_(object),
+        index_(index),
+        value_(value),
+        scratch0_(scratch0),
+        scratch1_(scratch1),
+        mode_(mode),
+        stub_mode_(stub_mode),
+        must_save_lr_(!gen->frame_access_state()->has_frame()),
+        zone_(gen->zone()) {}
+
+  void Generate() final {
+    if (mode_ > RecordWriteMode::kValueIsPointer) {
+      __ JumpIfSmi(value_, exit());
+    }
+    __ CheckPageFlag(value_, scratch0_,
+                     MemoryChunk::kPointersToHereAreInterestingMask, eq,
+                     exit());
+    __ Addu(scratch1_, object_, index_);
+    RememberedSetAction const remembered_set_action =
+        mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
+                                             : OMIT_REMEMBERED_SET;
+    SaveFPRegsMode const save_fp_mode =
+        frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
+    if (must_save_lr_) {
+      // We need to save and restore ra if the frame was elided.
+      __ Push(ra);
+    }
+
+    if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
+      __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
+    } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
+      // A direct call to a wasm runtime stub defined in this module.
+      // Just encode the stub index. This will be patched when the code
+      // is added to the native module and copied into wasm code space.
+      __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
+                             save_fp_mode, wasm::WasmCode::kRecordWrite);
+    } else {
+      __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
+                             save_fp_mode);
+    }
+    if (must_save_lr_) {
+      __ Pop(ra);
+    }
+  }
+
+ private:
+  Register const object_;
+  Register const index_;
+  Register const value_;
+  Register const scratch0_;
+  Register const scratch1_;
+  RecordWriteMode const mode_;
+  StubCallMode const stub_mode_;
+  bool must_save_lr_;
+  Zone* zone_;
+};
+
+#define CREATE_OOL_CLASS(ool_name, tasm_ool_name, T)                 \
+  class ool_name final : public OutOfLineCode {                      \
+   public:                                                           \
+    ool_name(CodeGenerator* gen, T dst, T src1, T src2)              \
+        : OutOfLineCode(gen), dst_(dst), src1_(src1), src2_(src2) {} \
+                                                                     \
+    void Generate() final { __ tasm_ool_name(dst_, src1_, src2_); }  \
+                                                                     \
+   private:                                                          \
+    T const dst_;                                                    \
+    T const src1_;                                                   \
+    T const src2_;                                                   \
+  }
+
+CREATE_OOL_CLASS(OutOfLineFloat32Max, Float32MaxOutOfLine, FPURegister);
+CREATE_OOL_CLASS(OutOfLineFloat32Min, Float32MinOutOfLine, FPURegister);
+CREATE_OOL_CLASS(OutOfLineFloat64Max, Float64MaxOutOfLine, DoubleRegister);
+CREATE_OOL_CLASS(OutOfLineFloat64Min, Float64MinOutOfLine, DoubleRegister);
+
+#undef CREATE_OOL_CLASS
+
+Condition FlagsConditionToConditionCmp(FlagsCondition condition) {
+  switch (condition) {
+    case kEqual:
+      return eq;
+    case kNotEqual:
+      return ne;
+    case kSignedLessThan:
+      return lt;
+    case kSignedGreaterThanOrEqual:
+      return ge;
+    case kSignedLessThanOrEqual:
+      return le;
+    case kSignedGreaterThan:
+      return gt;
+    case kUnsignedLessThan:
+      return lo;
+    case kUnsignedGreaterThanOrEqual:
+      return hs;
+    case kUnsignedLessThanOrEqual:
+      return ls;
+    case kUnsignedGreaterThan:
+      return hi;
+    case kUnorderedEqual:
+    case kUnorderedNotEqual:
+      break;
+    default:
+      break;
+  }
+  UNREACHABLE();
+}
+
+Condition FlagsConditionToConditionTst(FlagsCondition condition) {
+  switch (condition) {
+    case kNotEqual:
+      return ne;
+    case kEqual:
+      return eq;
+    default:
+      break;
+  }
+  UNREACHABLE();
+}
+
+FPUCondition FlagsConditionToConditionCmpFPU(bool* predicate,
+                                             FlagsCondition condition) {
+  switch (condition) {
+    case kEqual:
+      *predicate = true;
+      return EQ;
+    case kNotEqual:
+      *predicate = false;
+      return EQ;
+    case kUnsignedLessThan:
+      *predicate = true;
+      return OLT;
+    case kUnsignedGreaterThanOrEqual:
+      *predicate = false;
+      return OLT;
+    case kUnsignedLessThanOrEqual:
+      *predicate = true;
+      return OLE;
+    case kUnsignedGreaterThan:
+      *predicate = false;
+      return OLE;
+    case kUnorderedEqual:
+    case kUnorderedNotEqual:
+      *predicate = true;
+      break;
+    default:
+      *predicate = true;
+      break;
+  }
+  UNREACHABLE();
+}
+
+#define UNSUPPORTED_COND(opcode, condition)                                    \
+  StdoutStream{} << "Unsupported " << #opcode << " condition: \"" << condition \
+                 << "\"";                                                      \
+  UNIMPLEMENTED();
+
+void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
+                                   InstructionCode opcode, Instruction* instr,
+                                   MipsOperandConverter const& i) {
+  const MemoryAccessMode access_mode =
+      static_cast<MemoryAccessMode>(MiscField::decode(opcode));
+  if (access_mode == kMemoryAccessPoisoned) {
+    Register value = i.OutputRegister();
+    codegen->tasm()->And(value, value, kSpeculationPoisonRegister);
+  }
+}
+
+}  // namespace
+
+#define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr)          \
+  do {                                                   \
+    __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
+    __ sync();                                           \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr)               \
+  do {                                                         \
+    __ sync();                                                 \
+    __ asm_instr(i.InputOrZeroRegister(2), i.MemoryOperand()); \
+    __ sync();                                                 \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_BINOP(bin_instr)                                \
+  do {                                                                  \
+    Label binop;                                                        \
+    __ Addu(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
+    __ sync();                                                          \
+    __ bind(&binop);                                                    \
+    __ Ll(i.OutputRegister(0), MemOperand(i.TempRegister(0), 0));       \
+    __ bin_instr(i.TempRegister(1), i.OutputRegister(0),                \
+                 Operand(i.InputRegister(2)));                          \
+    __ Sc(i.TempRegister(1), MemOperand(i.TempRegister(0), 0));         \
+    __ BranchShort(&binop, eq, i.TempRegister(1), Operand(zero_reg));   \
+    __ sync();                                                          \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC64_LOGIC_BINOP(bin_instr, external)                     \
+  do {                                                                         \
+    if (IsMipsArchVariant(kMips32r6)) {                                        \
+      Label binop;                                                             \
+      Register oldval_low =                                                    \
+          instr->OutputCount() >= 1 ? i.OutputRegister(0) : i.TempRegister(1); \
+      Register oldval_high =                                                   \
+          instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(2); \
+      __ Addu(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));      \
+      __ sync();                                                               \
+      __ bind(&binop);                                                         \
+      __ llx(oldval_high, MemOperand(i.TempRegister(0), 4));                   \
+      __ ll(oldval_low, MemOperand(i.TempRegister(0), 0));                     \
+      __ bin_instr(i.TempRegister(1), i.TempRegister(2), oldval_low,           \
+                   oldval_high, i.InputRegister(2), i.InputRegister(3));       \
+      __ scx(i.TempRegister(2), MemOperand(i.TempRegister(0), 4));             \
+      __ sc(i.TempRegister(1), MemOperand(i.TempRegister(0), 0));              \
+      __ BranchShort(&binop, eq, i.TempRegister(1), Operand(zero_reg));        \
+      __ sync();                                                               \
+    } else {                                                                   \
+      FrameScope scope(tasm(), StackFrame::MANUAL);                            \
+      __ Addu(a0, i.InputRegister(0), i.InputRegister(1));                     \
+      __ PushCallerSaved(kDontSaveFPRegs, v0, v1);                             \
+      __ PrepareCallCFunction(3, 0, kScratchReg);                              \
+      __ CallCFunction(ExternalReference::external(), 3, 0);                   \
+      __ PopCallerSaved(kDontSaveFPRegs, v0, v1);                              \
+    }                                                                          \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC64_ARITH_BINOP(bin_instr, external)                     \
+  do {                                                                         \
+    if (IsMipsArchVariant(kMips32r6)) {                                        \
+      Label binop;                                                             \
+      Register oldval_low =                                                    \
+          instr->OutputCount() >= 1 ? i.OutputRegister(0) : i.TempRegister(1); \
+      Register oldval_high =                                                   \
+          instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(2); \
+      __ Addu(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));      \
+      __ sync();                                                               \
+      __ bind(&binop);                                                         \
+      __ llx(oldval_high, MemOperand(i.TempRegister(0), 4));                   \
+      __ ll(oldval_low, MemOperand(i.TempRegister(0), 0));                     \
+      __ bin_instr(i.TempRegister(1), i.TempRegister(2), oldval_low,           \
+                   oldval_high, i.InputRegister(2), i.InputRegister(3),        \
+                   kScratchReg, kScratchReg2);                                 \
+      __ scx(i.TempRegister(2), MemOperand(i.TempRegister(0), 4));             \
+      __ sc(i.TempRegister(1), MemOperand(i.TempRegister(0), 0));              \
+      __ BranchShort(&binop, eq, i.TempRegister(1), Operand(zero_reg));        \
+      __ sync();                                                               \
+    } else {                                                                   \
+      FrameScope scope(tasm(), StackFrame::MANUAL);                            \
+      __ Addu(a0, i.InputRegister(0), i.InputRegister(1));                     \
+      __ PushCallerSaved(kDontSaveFPRegs, v0, v1);                             \
+      __ PrepareCallCFunction(3, 0, kScratchReg);                              \
+      __ CallCFunction(ExternalReference::external(), 3, 0);                   \
+      __ PopCallerSaved(kDontSaveFPRegs, v0, v1);                              \
+    }                                                                          \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_BINOP_EXT(sign_extend, size, bin_instr)                \
+  do {                                                                         \
+    Label binop;                                                               \
+    __ Addu(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));        \
+    __ andi(i.TempRegister(3), i.TempRegister(0), 0x3);                        \
+    __ Subu(i.TempRegister(0), i.TempRegister(0), Operand(i.TempRegister(3))); \
+    __ sll(i.TempRegister(3), i.TempRegister(3), 3);                           \
+    __ sync();                                                                 \
+    __ bind(&binop);                                                           \
+    __ Ll(i.TempRegister(1), MemOperand(i.TempRegister(0), 0));                \
+    __ ExtractBits(i.OutputRegister(0), i.TempRegister(1), i.TempRegister(3),  \
+                   size, sign_extend);                                         \
+    __ bin_instr(i.TempRegister(2), i.OutputRegister(0),                       \
+                 Operand(i.InputRegister(2)));                                 \
+    __ InsertBits(i.TempRegister(1), i.TempRegister(2), i.TempRegister(3),     \
+                  size);                                                       \
+    __ Sc(i.TempRegister(1), MemOperand(i.TempRegister(0), 0));                \
+    __ BranchShort(&binop, eq, i.TempRegister(1), Operand(zero_reg));          \
+    __ sync();                                                                 \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER()                               \
+  do {                                                                   \
+    Label exchange;                                                      \
+    __ sync();                                                           \
+    __ bind(&exchange);                                                  \
+    __ Addu(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));  \
+    __ Ll(i.OutputRegister(0), MemOperand(i.TempRegister(0), 0));        \
+    __ mov(i.TempRegister(1), i.InputRegister(2));                       \
+    __ Sc(i.TempRegister(1), MemOperand(i.TempRegister(0), 0));          \
+    __ BranchShort(&exchange, eq, i.TempRegister(1), Operand(zero_reg)); \
+    __ sync();                                                           \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(sign_extend, size)                \
+  do {                                                                         \
+    Label exchange;                                                            \
+    __ Addu(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));        \
+    __ andi(i.TempRegister(1), i.TempRegister(0), 0x3);                        \
+    __ Subu(i.TempRegister(0), i.TempRegister(0), Operand(i.TempRegister(1))); \
+    __ sll(i.TempRegister(1), i.TempRegister(1), 3);                           \
+    __ sync();                                                                 \
+    __ bind(&exchange);                                                        \
+    __ Ll(i.TempRegister(2), MemOperand(i.TempRegister(0), 0));                \
+    __ ExtractBits(i.OutputRegister(0), i.TempRegister(2), i.TempRegister(1),  \
+                   size, sign_extend);                                         \
+    __ InsertBits(i.TempRegister(2), i.InputRegister(2), i.TempRegister(1),    \
+                  size);                                                       \
+    __ Sc(i.TempRegister(2), MemOperand(i.TempRegister(0), 0));                \
+    __ BranchShort(&exchange, eq, i.TempRegister(2), Operand(zero_reg));       \
+    __ sync();                                                                 \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER()                      \
+  do {                                                                  \
+    Label compareExchange;                                              \
+    Label exit;                                                         \
+    __ Addu(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
+    __ sync();                                                          \
+    __ bind(&compareExchange);                                          \
+    __ Ll(i.OutputRegister(0), MemOperand(i.TempRegister(0), 0));       \
+    __ BranchShort(&exit, ne, i.InputRegister(2),                       \
+                   Operand(i.OutputRegister(0)));                       \
+    __ mov(i.TempRegister(2), i.InputRegister(3));                      \
+    __ Sc(i.TempRegister(2), MemOperand(i.TempRegister(0), 0));         \
+    __ BranchShort(&compareExchange, eq, i.TempRegister(2),             \
+                   Operand(zero_reg));                                  \
+    __ bind(&exit);                                                     \
+    __ sync();                                                          \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(sign_extend, size)        \
+  do {                                                                         \
+    Label compareExchange;                                                     \
+    Label exit;                                                                \
+    __ Addu(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));        \
+    __ andi(i.TempRegister(1), i.TempRegister(0), 0x3);                        \
+    __ Subu(i.TempRegister(0), i.TempRegister(0), Operand(i.TempRegister(1))); \
+    __ sll(i.TempRegister(1), i.TempRegister(1), 3);                           \
+    __ sync();                                                                 \
+    __ bind(&compareExchange);                                                 \
+    __ Ll(i.TempRegister(2), MemOperand(i.TempRegister(0), 0));                \
+    __ ExtractBits(i.OutputRegister(0), i.TempRegister(2), i.TempRegister(1),  \
+                   size, sign_extend);                                         \
+    __ ExtractBits(i.InputRegister(2), i.InputRegister(2), i.TempRegister(1),  \
+                   size, sign_extend);                                         \
+    __ BranchShort(&exit, ne, i.InputRegister(2),                              \
+                   Operand(i.OutputRegister(0)));                              \
+    __ InsertBits(i.TempRegister(2), i.InputRegister(3), i.TempRegister(1),    \
+                  size);                                                       \
+    __ Sc(i.TempRegister(2), MemOperand(i.TempRegister(0), 0));                \
+    __ BranchShort(&compareExchange, eq, i.TempRegister(2),                    \
+                   Operand(zero_reg));                                         \
+    __ bind(&exit);                                                            \
+    __ sync();                                                                 \
+  } while (0)
+
+#define ASSEMBLE_IEEE754_BINOP(name)                                        \
+  do {                                                                      \
+    FrameScope scope(tasm(), StackFrame::MANUAL);                           \
+    __ PrepareCallCFunction(0, 2, kScratchReg);                             \
+    __ MovToFloatParameters(i.InputDoubleRegister(0),                       \
+                            i.InputDoubleRegister(1));                      \
+    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2); \
+    /* Move the result in the double result register. */                    \
+    __ MovFromFloatResult(i.OutputDoubleRegister());                        \
+  } while (0)
+
+#define ASSEMBLE_IEEE754_UNOP(name)                                         \
+  do {                                                                      \
+    FrameScope scope(tasm(), StackFrame::MANUAL);                           \
+    __ PrepareCallCFunction(0, 1, kScratchReg);                             \
+    __ MovToFloatParameter(i.InputDoubleRegister(0));                       \
+    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1); \
+    /* Move the result in the double result register. */                    \
+    __ MovFromFloatResult(i.OutputDoubleRegister());                        \
+  } while (0)
+
+#define ASSEMBLE_F64X2_ARITHMETIC_BINOP(op)                     \
+  do {                                                          \
+    __ op(i.OutputSimd128Register(), i.InputSimd128Register(0), \
+          i.InputSimd128Register(1));                           \
+  } while (0)
+
+void CodeGenerator::AssembleDeconstructFrame() {
+  __ mov(sp, fp);
+  __ Pop(ra, fp);
+}
+
+void CodeGenerator::AssemblePrepareTailCall() {
+  if (frame_access_state()->has_frame()) {
+    __ lw(ra, MemOperand(fp, StandardFrameConstants::kCallerPCOffset));
+    __ lw(fp, MemOperand(fp, StandardFrameConstants::kCallerFPOffset));
+  }
+  frame_access_state()->SetFrameAccessToSP();
+}
+
+void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
+                                                     Register scratch1,
+                                                     Register scratch2,
+                                                     Register scratch3) {
+  DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
+  Label done;
+
+  // Check if current frame is an arguments adaptor frame.
+  __ lw(scratch1, MemOperand(fp, StandardFrameConstants::kContextOffset));
+  __ Branch(&done, ne, scratch1,
+            Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
+
+  // Load arguments count from current arguments adaptor frame (note, it
+  // does not include receiver).
+  Register caller_args_count_reg = scratch1;
+  __ lw(caller_args_count_reg,
+        MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset));
+  __ SmiUntag(caller_args_count_reg);
+
+  __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3);
+  __ bind(&done);
+}
+
+namespace {
+
+void AdjustStackPointerForTailCall(TurboAssembler* tasm,
+                                   FrameAccessState* state,
+                                   int new_slot_above_sp,
+                                   bool allow_shrinkage = true) {
+  int current_sp_offset = state->GetSPToFPSlotCount() +
+                          StandardFrameConstants::kFixedSlotCountAboveFp;
+  int stack_slot_delta = new_slot_above_sp - current_sp_offset;
+  if (stack_slot_delta > 0) {
+    tasm->Subu(sp, sp, stack_slot_delta * kSystemPointerSize);
+    state->IncreaseSPDelta(stack_slot_delta);
+  } else if (allow_shrinkage && stack_slot_delta < 0) {
+    tasm->Addu(sp, sp, -stack_slot_delta * kSystemPointerSize);
+    state->IncreaseSPDelta(stack_slot_delta);
+  }
+}
+
+}  // namespace
+
+void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
+                                              int first_unused_stack_slot) {
+  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+                                first_unused_stack_slot, false);
+}
+
+void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
+                                             int first_unused_stack_slot) {
+  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+                                first_unused_stack_slot);
+}
+
+// Check that {kJavaScriptCallCodeStartRegister} is correct.
+void CodeGenerator::AssembleCodeStartRegisterCheck() {
+  __ ComputeCodeStartAddress(kScratchReg);
+  __ Assert(eq, AbortReason::kWrongFunctionCodeStart,
+            kJavaScriptCallCodeStartRegister, Operand(kScratchReg));
+}
+
+// Check if the code object is marked for deoptimization. If it is, then it
+// jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
+// to:
+//    1. read from memory the word that contains that bit, which can be found in
+//       the flags in the referenced {CodeDataContainer} object;
+//    2. test kMarkedForDeoptimizationBit in those flags; and
+//    3. if it is not zero then it jumps to the builtin.
+void CodeGenerator::BailoutIfDeoptimized() {
+  int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
+  __ lw(kScratchReg, MemOperand(kJavaScriptCallCodeStartRegister, offset));
+  __ lw(kScratchReg,
+        FieldMemOperand(kScratchReg,
+                        CodeDataContainer::kKindSpecificFlagsOffset));
+  __ And(kScratchReg, kScratchReg,
+         Operand(1 << Code::kMarkedForDeoptimizationBit));
+  __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
+          RelocInfo::CODE_TARGET, ne, kScratchReg, Operand(zero_reg));
+}
+
+void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
+  // Calculate a mask which has all bits set in the normal case, but has all
+  // bits cleared if we are speculatively executing the wrong PC.
+  //    difference = (current - expected) | (expected - current)
+  //    poison = ~(difference >> (kBitsPerSystemPointer - 1))
+  __ ComputeCodeStartAddress(kScratchReg);
+  __ Move(kSpeculationPoisonRegister, kScratchReg);
+  __ subu(kSpeculationPoisonRegister, kSpeculationPoisonRegister,
+          kJavaScriptCallCodeStartRegister);
+  __ subu(kJavaScriptCallCodeStartRegister, kJavaScriptCallCodeStartRegister,
+          kScratchReg);
+  __ or_(kSpeculationPoisonRegister, kSpeculationPoisonRegister,
+         kJavaScriptCallCodeStartRegister);
+  __ sra(kSpeculationPoisonRegister, kSpeculationPoisonRegister,
+         kBitsPerSystemPointer - 1);
+  __ nor(kSpeculationPoisonRegister, kSpeculationPoisonRegister,
+         kSpeculationPoisonRegister);
+}
+
+void CodeGenerator::AssembleRegisterArgumentPoisoning() {
+  __ And(kJSFunctionRegister, kJSFunctionRegister, kSpeculationPoisonRegister);
+  __ And(kContextRegister, kContextRegister, kSpeculationPoisonRegister);
+  __ And(sp, sp, kSpeculationPoisonRegister);
+}
+
+// Assembles an instruction after register allocation, producing machine code.
+CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
+    Instruction* instr) {
+  MipsOperandConverter i(this, instr);
+  InstructionCode opcode = instr->opcode();
+  ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
+  switch (arch_opcode) {
+    case kArchCallCodeObject: {
+      if (instr->InputAt(0)->IsImmediate()) {
+        __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
+      } else {
+        Register reg = i.InputRegister(0);
+        DCHECK_IMPLIES(
+            instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+            reg == kJavaScriptCallCodeStartRegister);
+        __ Call(reg, reg, Code::kHeaderSize - kHeapObjectTag);
+      }
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchCallBuiltinPointer: {
+      DCHECK(!instr->InputAt(0)->IsImmediate());
+      Register builtin_index = i.InputRegister(0);
+      __ CallBuiltinByIndex(builtin_index);
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchCallWasmFunction: {
+      if (instr->InputAt(0)->IsImmediate()) {
+        Constant constant = i.ToConstant(instr->InputAt(0));
+        Address wasm_code = static_cast<Address>(constant.ToInt32());
+        __ Call(wasm_code, constant.rmode());
+      } else {
+        __ Call(i.InputRegister(0));
+      }
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchTailCallCodeObjectFromJSFunction:
+    case kArchTailCallCodeObject: {
+      if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
+        AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
+                                         i.TempRegister(0), i.TempRegister(1),
+                                         i.TempRegister(2));
+      }
+      if (instr->InputAt(0)->IsImmediate()) {
+        __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
+      } else {
+        Register reg = i.InputRegister(0);
+        DCHECK_IMPLIES(
+            instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+            reg == kJavaScriptCallCodeStartRegister);
+        __ Addu(reg, reg, Code::kHeaderSize - kHeapObjectTag);
+        __ Jump(reg);
+      }
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchTailCallWasm: {
+      if (instr->InputAt(0)->IsImmediate()) {
+        Constant constant = i.ToConstant(instr->InputAt(0));
+        Address wasm_code = static_cast<Address>(constant.ToInt32());
+        __ Jump(wasm_code, constant.rmode());
+      } else {
+        __ Jump(i.InputRegister(0));
+      }
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchTailCallAddress: {
+      CHECK(!instr->InputAt(0)->IsImmediate());
+      Register reg = i.InputRegister(0);
+      DCHECK_IMPLIES(
+          instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+          reg == kJavaScriptCallCodeStartRegister);
+      __ Jump(reg);
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchCallJSFunction: {
+      Register func = i.InputRegister(0);
+      if (FLAG_debug_code) {
+        // Check the function's context matches the context argument.
+        __ lw(kScratchReg, FieldMemOperand(func, JSFunction::kContextOffset));
+        __ Assert(eq, AbortReason::kWrongFunctionContext, cp,
+                  Operand(kScratchReg));
+      }
+      static_assert(kJavaScriptCallCodeStartRegister == a2, "ABI mismatch");
+      __ lw(a2, FieldMemOperand(func, JSFunction::kCodeOffset));
+      __ Addu(a2, a2, Code::kHeaderSize - kHeapObjectTag);
+      __ Call(a2);
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchPrepareCallCFunction: {
+      int const num_parameters = MiscField::decode(instr->opcode());
+      __ PrepareCallCFunction(num_parameters, kScratchReg);
+      // Frame alignment requires using FP-relative frame addressing.
+      frame_access_state()->SetFrameAccessToFP();
+      break;
+    }
+    case kArchSaveCallerRegisters: {
+      fp_mode_ =
+          static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
+      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+      // kReturnRegister0 should have been saved before entering the stub.
+      int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
+      DCHECK(IsAligned(bytes, kSystemPointerSize));
+      DCHECK_EQ(0, frame_access_state()->sp_delta());
+      frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+      DCHECK(!caller_registers_saved_);
+      caller_registers_saved_ = true;
+      break;
+    }
+    case kArchRestoreCallerRegisters: {
+      DCHECK(fp_mode_ ==
+             static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
+      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+      // Don't overwrite the returned value.
+      int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
+      frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
+      DCHECK_EQ(0, frame_access_state()->sp_delta());
+      DCHECK(caller_registers_saved_);
+      caller_registers_saved_ = false;
+      break;
+    }
+    case kArchPrepareTailCall:
+      AssemblePrepareTailCall();
+      break;
+    case kArchCallCFunction: {
+      int const num_parameters = MiscField::decode(instr->opcode());
+      Label start_call;
+      bool isWasmCapiFunction =
+          linkage()->GetIncomingDescriptor()->IsWasmCapiFunction();
+      // from start_call to return address.
+      int offset = __ root_array_available() ? 68 : 80;
+#if V8_HOST_ARCH_MIPS
+      if (__ emit_debug_code()) {
+        offset += 16;
+      }
+#endif
+      if (isWasmCapiFunction) {
+        // Put the return address in a stack slot.
+        __ mov(kScratchReg, ra);
+        __ bind(&start_call);
+        __ nal();
+        __ nop();
+        __ Addu(ra, ra, offset - 8);  // 8 = nop + nal
+        __ sw(ra, MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset));
+        __ mov(ra, kScratchReg);
+      }
+      if (instr->InputAt(0)->IsImmediate()) {
+        ExternalReference ref = i.InputExternalReference(0);
+        __ CallCFunction(ref, num_parameters);
+      } else {
+        Register func = i.InputRegister(0);
+        __ CallCFunction(func, num_parameters);
+      }
+      if (isWasmCapiFunction) {
+        CHECK_EQ(offset, __ SizeOfCodeGeneratedSince(&start_call));
+        RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
+      }
+
+      frame_access_state()->SetFrameAccessToDefault();
+      // Ideally, we should decrement SP delta to match the change of stack
+      // pointer in CallCFunction. However, for certain architectures (e.g.
+      // ARM), there may be more strict alignment requirement, causing old SP
+      // to be saved on the stack. In those cases, we can not calculate the SP
+      // delta statically.
+      frame_access_state()->ClearSPDelta();
+      if (caller_registers_saved_) {
+        // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
+        // Here, we assume the sequence to be:
+        //   kArchSaveCallerRegisters;
+        //   kArchCallCFunction;
+        //   kArchRestoreCallerRegisters;
+        int bytes =
+            __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
+        frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+      }
+      break;
+    }
+    case kArchJmp:
+      AssembleArchJump(i.InputRpo(0));
+      break;
+    case kArchBinarySearchSwitch:
+      AssembleArchBinarySearchSwitch(instr);
+      break;
+    case kArchTableSwitch:
+      AssembleArchTableSwitch(instr);
+      break;
+    case kArchAbortCSAAssert:
+      DCHECK(i.InputRegister(0) == a0);
+      {
+        // We don't actually want to generate a pile of code for this, so just
+        // claim there is a stack frame, without generating one.
+        FrameScope scope(tasm(), StackFrame::NONE);
+        __ Call(
+            isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
+            RelocInfo::CODE_TARGET);
+      }
+      __ stop();
+      break;
+    case kArchDebugBreak:
+      __ DebugBreak();
+      break;
+    case kArchComment:
+      __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
+      break;
+    case kArchNop:
+    case kArchThrowTerminator:
+      // don't emit code for nops.
+      break;
+    case kArchDeoptimize: {
+      DeoptimizationExit* exit =
+          BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
+      __ Branch(exit->label());
+      break;
+    }
+    case kArchRet:
+      AssembleReturn(instr->InputAt(0));
+      break;
+    case kArchStackPointerGreaterThan:
+      // Pseudo-instruction used for cmp/branch. No opcode emitted here.
+      break;
+    case kArchStackCheckOffset:
+      __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
+      break;
+    case kArchFramePointer:
+      __ mov(i.OutputRegister(), fp);
+      break;
+    case kArchParentFramePointer:
+      if (frame_access_state()->has_frame()) {
+        __ lw(i.OutputRegister(), MemOperand(fp, 0));
+      } else {
+        __ mov(i.OutputRegister(), fp);
+      }
+      break;
+    case kArchTruncateDoubleToI:
+      __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
+                           i.InputDoubleRegister(0), DetermineStubCallMode());
+      break;
+    case kArchStoreWithWriteBarrier: {
+      RecordWriteMode mode =
+          static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
+      Register object = i.InputRegister(0);
+      Register index = i.InputRegister(1);
+      Register value = i.InputRegister(2);
+      Register scratch0 = i.TempRegister(0);
+      Register scratch1 = i.TempRegister(1);
+      auto ool = zone()->New<OutOfLineRecordWrite>(this, object, index, value,
+                                                   scratch0, scratch1, mode,
+                                                   DetermineStubCallMode());
+      __ Addu(kScratchReg, object, index);
+      __ sw(value, MemOperand(kScratchReg));
+      __ CheckPageFlag(object, scratch0,
+                       MemoryChunk::kPointersFromHereAreInterestingMask, ne,
+                       ool->entry());
+      __ bind(ool->exit());
+      break;
+    }
+    case kArchStackSlot: {
+      FrameOffset offset =
+          frame_access_state()->GetFrameOffset(i.InputInt32(0));
+      Register base_reg = offset.from_stack_pointer() ? sp : fp;
+      __ Addu(i.OutputRegister(), base_reg, Operand(offset.offset()));
+      int alignment = i.InputInt32(1);
+      DCHECK(alignment == 0 || alignment == 4 || alignment == 8 ||
+             alignment == 16);
+      if (FLAG_debug_code && alignment > 0) {
+        // Verify that the output_register is properly aligned
+        __ And(kScratchReg, i.OutputRegister(),
+               Operand(kSystemPointerSize - 1));
+        __ Assert(eq, AbortReason::kAllocationIsNotDoubleAligned, kScratchReg,
+                  Operand(zero_reg));
+      }
+
+      if (alignment == 2 * kSystemPointerSize) {
+        Label done;
+        __ Addu(kScratchReg, base_reg, Operand(offset.offset()));
+        __ And(kScratchReg, kScratchReg, Operand(alignment - 1));
+        __ BranchShort(&done, eq, kScratchReg, Operand(zero_reg));
+        __ Addu(i.OutputRegister(), i.OutputRegister(), kSystemPointerSize);
+        __ bind(&done);
+      } else if (alignment > 2 * kSystemPointerSize) {
+        Label done;
+        __ Addu(kScratchReg, base_reg, Operand(offset.offset()));
+        __ And(kScratchReg, kScratchReg, Operand(alignment - 1));
+        __ BranchShort(&done, eq, kScratchReg, Operand(zero_reg));
+        __ li(kScratchReg2, alignment);
+        __ Subu(kScratchReg2, kScratchReg2, Operand(kScratchReg));
+        __ Addu(i.OutputRegister(), i.OutputRegister(), kScratchReg2);
+        __ bind(&done);
+      }
+      break;
+    }
+    case kArchWordPoisonOnSpeculation:
+      __ And(i.OutputRegister(), i.InputRegister(0),
+             kSpeculationPoisonRegister);
+      break;
+    case kIeee754Float64Acos:
+      ASSEMBLE_IEEE754_UNOP(acos);
+      break;
+    case kIeee754Float64Acosh:
+      ASSEMBLE_IEEE754_UNOP(acosh);
+      break;
+    case kIeee754Float64Asin:
+      ASSEMBLE_IEEE754_UNOP(asin);
+      break;
+    case kIeee754Float64Asinh:
+      ASSEMBLE_IEEE754_UNOP(asinh);
+      break;
+    case kIeee754Float64Atan:
+      ASSEMBLE_IEEE754_UNOP(atan);
+      break;
+    case kIeee754Float64Atanh:
+      ASSEMBLE_IEEE754_UNOP(atanh);
+      break;
+    case kIeee754Float64Atan2:
+      ASSEMBLE_IEEE754_BINOP(atan2);
+      break;
+    case kIeee754Float64Cos:
+      ASSEMBLE_IEEE754_UNOP(cos);
+      break;
+    case kIeee754Float64Cosh:
+      ASSEMBLE_IEEE754_UNOP(cosh);
+      break;
+    case kIeee754Float64Cbrt:
+      ASSEMBLE_IEEE754_UNOP(cbrt);
+      break;
+    case kIeee754Float64Exp:
+      ASSEMBLE_IEEE754_UNOP(exp);
+      break;
+    case kIeee754Float64Expm1:
+      ASSEMBLE_IEEE754_UNOP(expm1);
+      break;
+    case kIeee754Float64Log:
+      ASSEMBLE_IEEE754_UNOP(log);
+      break;
+    case kIeee754Float64Log1p:
+      ASSEMBLE_IEEE754_UNOP(log1p);
+      break;
+    case kIeee754Float64Log10:
+      ASSEMBLE_IEEE754_UNOP(log10);
+      break;
+    case kIeee754Float64Log2:
+      ASSEMBLE_IEEE754_UNOP(log2);
+      break;
+    case kIeee754Float64Pow:
+      ASSEMBLE_IEEE754_BINOP(pow);
+      break;
+    case kIeee754Float64Sin:
+      ASSEMBLE_IEEE754_UNOP(sin);
+      break;
+    case kIeee754Float64Sinh:
+      ASSEMBLE_IEEE754_UNOP(sinh);
+      break;
+    case kIeee754Float64Tan:
+      ASSEMBLE_IEEE754_UNOP(tan);
+      break;
+    case kIeee754Float64Tanh:
+      ASSEMBLE_IEEE754_UNOP(tanh);
+      break;
+    case kMipsAdd:
+      __ Addu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMipsAddOvf:
+      __ AddOverflow(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1),
+                     kScratchReg);
+      break;
+    case kMipsSub:
+      __ Subu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMipsSubOvf:
+      __ SubOverflow(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1),
+                     kScratchReg);
+      break;
+    case kMipsMul:
+      __ Mul(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMipsMulOvf:
+      __ MulOverflow(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1),
+                     kScratchReg);
+      break;
+    case kMipsMulHigh:
+      __ Mulh(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMipsMulHighU:
+      __ Mulhu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMipsDiv:
+      __ Div(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      if (IsMipsArchVariant(kMips32r6)) {
+        __ selnez(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        __ Movz(i.OutputRegister(), i.InputRegister(1), i.InputRegister(1));
+      }
+      break;
+    case kMipsDivU:
+      __ Divu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      if (IsMipsArchVariant(kMips32r6)) {
+        __ selnez(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        __ Movz(i.OutputRegister(), i.InputRegister(1), i.InputRegister(1));
+      }
+      break;
+    case kMipsMod:
+      __ Mod(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMipsModU:
+      __ Modu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMipsAnd:
+      __ And(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMipsOr:
+      __ Or(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMipsNor:
+      if (instr->InputAt(1)->IsRegister()) {
+        __ Nor(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      } else {
+        DCHECK_EQ(0, i.InputOperand(1).immediate());
+        __ Nor(i.OutputRegister(), i.InputRegister(0), zero_reg);
+      }
+      break;
+    case kMipsXor:
+      __ Xor(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMipsClz:
+      __ Clz(i.OutputRegister(), i.InputRegister(0));
+      break;
+    case kMipsCtz: {
+      Register src = i.InputRegister(0);
+      Register dst = i.OutputRegister();
+      __ Ctz(dst, src);
+    } break;
+    case kMipsPopcnt: {
+      Register src = i.InputRegister(0);
+      Register dst = i.OutputRegister();
+      __ Popcnt(dst, src);
+    } break;
+    case kMipsShl:
+      if (instr->InputAt(1)->IsRegister()) {
+        __ sllv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        int32_t imm = i.InputOperand(1).immediate();
+        __ sll(i.OutputRegister(), i.InputRegister(0), imm);
+      }
+      break;
+    case kMipsShr:
+      if (instr->InputAt(1)->IsRegister()) {
+        __ srlv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        int32_t imm = i.InputOperand(1).immediate();
+        __ srl(i.OutputRegister(), i.InputRegister(0), imm);
+      }
+      break;
+    case kMipsSar:
+      if (instr->InputAt(1)->IsRegister()) {
+        __ srav(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        int32_t imm = i.InputOperand(1).immediate();
+        __ sra(i.OutputRegister(), i.InputRegister(0), imm);
+      }
+      break;
+    case kMipsShlPair: {
+      Register second_output =
+          instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
+      if (instr->InputAt(2)->IsRegister()) {
+        __ ShlPair(i.OutputRegister(0), second_output, i.InputRegister(0),
+                   i.InputRegister(1), i.InputRegister(2), kScratchReg,
+                   kScratchReg2);
+      } else {
+        uint32_t imm = i.InputOperand(2).immediate();
+        __ ShlPair(i.OutputRegister(0), second_output, i.InputRegister(0),
+                   i.InputRegister(1), imm, kScratchReg);
+      }
+    } break;
+    case kMipsShrPair: {
+      Register second_output =
+          instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
+      if (instr->InputAt(2)->IsRegister()) {
+        __ ShrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
+                   i.InputRegister(1), i.InputRegister(2), kScratchReg,
+                   kScratchReg2);
+      } else {
+        uint32_t imm = i.InputOperand(2).immediate();
+        __ ShrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
+                   i.InputRegister(1), imm, kScratchReg);
+      }
+    } break;
+    case kMipsSarPair: {
+      Register second_output =
+          instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
+      if (instr->InputAt(2)->IsRegister()) {
+        __ SarPair(i.OutputRegister(0), second_output, i.InputRegister(0),
+                   i.InputRegister(1), i.InputRegister(2), kScratchReg,
+                   kScratchReg2);
+      } else {
+        uint32_t imm = i.InputOperand(2).immediate();
+        __ SarPair(i.OutputRegister(0), second_output, i.InputRegister(0),
+                   i.InputRegister(1), imm, kScratchReg);
+      }
+    } break;
+    case kMipsExt:
+      __ Ext(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
+             i.InputInt8(2));
+      break;
+    case kMipsIns:
+      if (instr->InputAt(1)->IsImmediate() && i.InputInt8(1) == 0) {
+        __ Ins(i.OutputRegister(), zero_reg, i.InputInt8(1), i.InputInt8(2));
+      } else {
+        __ Ins(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
+               i.InputInt8(2));
+      }
+      break;
+    case kMipsRor:
+      __ Ror(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMipsTst:
+      __ And(kScratchReg, i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMipsCmp:
+      // Pseudo-instruction used for cmp/branch. No opcode emitted here.
+      break;
+    case kMipsMov:
+      // TODO(plind): Should we combine mov/li like this, or use separate instr?
+      //    - Also see x64 ASSEMBLE_BINOP & RegisterOrOperandType
+      if (HasRegisterInput(instr, 0)) {
+        __ mov(i.OutputRegister(), i.InputRegister(0));
+      } else {
+        __ li(i.OutputRegister(), i.InputOperand(0));
+      }
+      break;
+    case kMipsLsa:
+      DCHECK(instr->InputAt(2)->IsImmediate());
+      __ Lsa(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+             i.InputInt8(2));
+      break;
+    case kMipsCmpS: {
+      FPURegister left = i.InputOrZeroSingleRegister(0);
+      FPURegister right = i.InputOrZeroSingleRegister(1);
+      bool predicate;
+      FPUCondition cc =
+          FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition());
+
+      if ((left == kDoubleRegZero || right == kDoubleRegZero) &&
+          !__ IsDoubleZeroRegSet()) {
+        __ Move(kDoubleRegZero, 0.0);
+      }
+
+      __ CompareF32(cc, left, right);
+    } break;
+    case kMipsAddS:
+      // TODO(plind): add special case: combine mult & add.
+      __ add_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMipsSubS:
+      __ sub_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMipsMulS:
+      // TODO(plind): add special case: right op is -1.0, see arm port.
+      __ mul_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMipsDivS:
+      __ div_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMipsModS: {
+      // TODO(bmeurer): We should really get rid of this special instruction,
+      // and generate a CallAddress instruction instead.
+      FrameScope scope(tasm(), StackFrame::MANUAL);
+      __ PrepareCallCFunction(0, 2, kScratchReg);
+      __ MovToFloatParameters(i.InputDoubleRegister(0),
+                              i.InputDoubleRegister(1));
+      // TODO(balazs.kilvady): implement mod_two_floats_operation(isolate())
+      __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
+      // Move the result in the double result register.
+      __ MovFromFloatResult(i.OutputSingleRegister());
+      break;
+    }
+    case kMipsAbsS:
+      if (IsMipsArchVariant(kMips32r6)) {
+        __ abs_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
+      } else {
+        __ mfc1(kScratchReg, i.InputSingleRegister(0));
+        __ Ins(kScratchReg, zero_reg, 31, 1);
+        __ mtc1(kScratchReg, i.OutputSingleRegister());
+      }
+      break;
+    case kMipsSqrtS: {
+      __ sqrt_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kMipsMaxS:
+      __ max_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMipsMinS:
+      __ min_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMipsCmpD: {
+      FPURegister left = i.InputOrZeroDoubleRegister(0);
+      FPURegister right = i.InputOrZeroDoubleRegister(1);
+      bool predicate;
+      FPUCondition cc =
+          FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition());
+      if ((left == kDoubleRegZero || right == kDoubleRegZero) &&
+          !__ IsDoubleZeroRegSet()) {
+        __ Move(kDoubleRegZero, 0.0);
+      }
+      __ CompareF64(cc, left, right);
+    } break;
+    case kMipsAddPair:
+      __ AddPair(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
+                 i.InputRegister(1), i.InputRegister(2), i.InputRegister(3),
+                 kScratchReg, kScratchReg2);
+      break;
+    case kMipsSubPair:
+      __ SubPair(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
+                 i.InputRegister(1), i.InputRegister(2), i.InputRegister(3),
+                 kScratchReg, kScratchReg2);
+      break;
+    case kMipsMulPair: {
+      __ MulPair(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
+                 i.InputRegister(1), i.InputRegister(2), i.InputRegister(3),
+                 kScratchReg, kScratchReg2);
+    } break;
+    case kMipsAddD:
+      // TODO(plind): add special case: combine mult & add.
+      __ add_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMipsSubD:
+      __ sub_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMipsMaddS:
+      __ Madd_s(i.OutputFloatRegister(), i.InputFloatRegister(0),
+                i.InputFloatRegister(1), i.InputFloatRegister(2),
+                kScratchDoubleReg);
+      break;
+    case kMipsMaddD:
+      __ Madd_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+                i.InputDoubleRegister(1), i.InputDoubleRegister(2),
+                kScratchDoubleReg);
+      break;
+    case kMipsMsubS:
+      __ Msub_s(i.OutputFloatRegister(), i.InputFloatRegister(0),
+                i.InputFloatRegister(1), i.InputFloatRegister(2),
+                kScratchDoubleReg);
+      break;
+    case kMipsMsubD:
+      __ Msub_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+                i.InputDoubleRegister(1), i.InputDoubleRegister(2),
+                kScratchDoubleReg);
+      break;
+    case kMipsMulD:
+      // TODO(plind): add special case: right op is -1.0, see arm port.
+      __ mul_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMipsDivD:
+      __ div_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMipsModD: {
+      // TODO(bmeurer): We should really get rid of this special instruction,
+      // and generate a CallAddress instruction instead.
+      FrameScope scope(tasm(), StackFrame::MANUAL);
+      __ PrepareCallCFunction(0, 2, kScratchReg);
+      __ MovToFloatParameters(i.InputDoubleRegister(0),
+                              i.InputDoubleRegister(1));
+      __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
+      // Move the result in the double result register.
+      __ MovFromFloatResult(i.OutputDoubleRegister());
+      break;
+    }
+    case kMipsAbsD: {
+      FPURegister src = i.InputDoubleRegister(0);
+      FPURegister dst = i.OutputDoubleRegister();
+      if (IsMipsArchVariant(kMips32r6)) {
+        __ abs_d(dst, src);
+      } else {
+        __ Move(dst, src);
+        __ mfhc1(kScratchReg, src);
+        __ Ins(kScratchReg, zero_reg, 31, 1);
+        __ mthc1(kScratchReg, dst);
+      }
+      break;
+    }
+    case kMipsNegS:
+      __ Neg_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
+      break;
+    case kMipsNegD:
+      __ Neg_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kMipsSqrtD: {
+      __ sqrt_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kMipsMaxD:
+      __ max_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMipsMinD:
+      __ min_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMipsFloat64RoundDown: {
+      __ Floor_d_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kMipsFloat32RoundDown: {
+      __ Floor_s_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
+      break;
+    }
+    case kMipsFloat64RoundTruncate: {
+      __ Trunc_d_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kMipsFloat32RoundTruncate: {
+      __ Trunc_s_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
+      break;
+    }
+    case kMipsFloat64RoundUp: {
+      __ Ceil_d_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kMipsFloat32RoundUp: {
+      __ Ceil_s_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
+      break;
+    }
+    case kMipsFloat64RoundTiesEven: {
+      __ Round_d_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kMipsFloat32RoundTiesEven: {
+      __ Round_s_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
+      break;
+    }
+    case kMipsFloat32Max: {
+      FPURegister dst = i.OutputSingleRegister();
+      FPURegister src1 = i.InputSingleRegister(0);
+      FPURegister src2 = i.InputSingleRegister(1);
+      auto ool = zone()->New<OutOfLineFloat32Max>(this, dst, src1, src2);
+      __ Float32Max(dst, src1, src2, ool->entry());
+      __ bind(ool->exit());
+      break;
+    }
+    case kMipsFloat64Max: {
+      DoubleRegister dst = i.OutputDoubleRegister();
+      DoubleRegister src1 = i.InputDoubleRegister(0);
+      DoubleRegister src2 = i.InputDoubleRegister(1);
+      auto ool = zone()->New<OutOfLineFloat64Max>(this, dst, src1, src2);
+      __ Float64Max(dst, src1, src2, ool->entry());
+      __ bind(ool->exit());
+      break;
+    }
+    case kMipsFloat32Min: {
+      FPURegister dst = i.OutputSingleRegister();
+      FPURegister src1 = i.InputSingleRegister(0);
+      FPURegister src2 = i.InputSingleRegister(1);
+      auto ool = zone()->New<OutOfLineFloat32Min>(this, dst, src1, src2);
+      __ Float32Min(dst, src1, src2, ool->entry());
+      __ bind(ool->exit());
+      break;
+    }
+    case kMipsFloat64Min: {
+      DoubleRegister dst = i.OutputDoubleRegister();
+      DoubleRegister src1 = i.InputDoubleRegister(0);
+      DoubleRegister src2 = i.InputDoubleRegister(1);
+      auto ool = zone()->New<OutOfLineFloat64Min>(this, dst, src1, src2);
+      __ Float64Min(dst, src1, src2, ool->entry());
+      __ bind(ool->exit());
+      break;
+    }
+    case kMipsCvtSD: {
+      __ cvt_s_d(i.OutputSingleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kMipsCvtDS: {
+      __ cvt_d_s(i.OutputDoubleRegister(), i.InputSingleRegister(0));
+      break;
+    }
+    case kMipsCvtDW: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ mtc1(i.InputRegister(0), scratch);
+      __ cvt_d_w(i.OutputDoubleRegister(), scratch);
+      break;
+    }
+    case kMipsCvtSW: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ mtc1(i.InputRegister(0), scratch);
+      __ cvt_s_w(i.OutputDoubleRegister(), scratch);
+      break;
+    }
+    case kMipsCvtSUw: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ Cvt_d_uw(i.OutputDoubleRegister(), i.InputRegister(0), scratch);
+      __ cvt_s_d(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+      break;
+    }
+    case kMipsCvtDUw: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ Cvt_d_uw(i.OutputDoubleRegister(), i.InputRegister(0), scratch);
+      break;
+    }
+    case kMipsFloorWD: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ Floor_w_d(scratch, i.InputDoubleRegister(0));
+      __ mfc1(i.OutputRegister(), scratch);
+      break;
+    }
+    case kMipsCeilWD: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ Ceil_w_d(scratch, i.InputDoubleRegister(0));
+      __ mfc1(i.OutputRegister(), scratch);
+      break;
+    }
+    case kMipsRoundWD: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ Round_w_d(scratch, i.InputDoubleRegister(0));
+      __ mfc1(i.OutputRegister(), scratch);
+      break;
+    }
+    case kMipsTruncWD: {
+      FPURegister scratch = kScratchDoubleReg;
+      // Other arches use round to zero here, so we follow.
+      __ Trunc_w_d(scratch, i.InputDoubleRegister(0));
+      __ mfc1(i.OutputRegister(), scratch);
+      break;
+    }
+    case kMipsFloorWS: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ floor_w_s(scratch, i.InputDoubleRegister(0));
+      __ mfc1(i.OutputRegister(), scratch);
+      break;
+    }
+    case kMipsCeilWS: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ ceil_w_s(scratch, i.InputDoubleRegister(0));
+      __ mfc1(i.OutputRegister(), scratch);
+      break;
+    }
+    case kMipsRoundWS: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ round_w_s(scratch, i.InputDoubleRegister(0));
+      __ mfc1(i.OutputRegister(), scratch);
+      break;
+    }
+    case kMipsTruncWS: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ trunc_w_s(scratch, i.InputDoubleRegister(0));
+      __ mfc1(i.OutputRegister(), scratch);
+      // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
+      // because INT32_MIN allows easier out-of-bounds detection.
+      __ Addu(kScratchReg, i.OutputRegister(), 1);
+      __ Slt(kScratchReg2, kScratchReg, i.OutputRegister());
+      __ Movn(i.OutputRegister(), kScratchReg, kScratchReg2);
+      break;
+    }
+    case kMipsTruncUwD: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ Trunc_uw_d(i.OutputRegister(), i.InputDoubleRegister(0), scratch);
+      break;
+    }
+    case kMipsTruncUwS: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ Trunc_uw_s(i.OutputRegister(), i.InputDoubleRegister(0), scratch);
+      // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
+      // because 0 allows easier out-of-bounds detection.
+      __ Addu(kScratchReg, i.OutputRegister(), 1);
+      __ Movz(i.OutputRegister(), zero_reg, kScratchReg);
+      break;
+    }
+    case kMipsFloat64ExtractLowWord32:
+      __ FmoveLow(i.OutputRegister(), i.InputDoubleRegister(0));
+      break;
+    case kMipsFloat64ExtractHighWord32:
+      __ FmoveHigh(i.OutputRegister(), i.InputDoubleRegister(0));
+      break;
+    case kMipsFloat64InsertLowWord32:
+      __ FmoveLow(i.OutputDoubleRegister(), i.InputRegister(1));
+      break;
+    case kMipsFloat64InsertHighWord32:
+      __ FmoveHigh(i.OutputDoubleRegister(), i.InputRegister(1));
+      break;
+    case kMipsFloat64SilenceNaN:
+      __ FPUCanonicalizeNaN(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+
+    // ... more basic instructions ...
+    case kMipsSeb:
+      __ Seb(i.OutputRegister(), i.InputRegister(0));
+      break;
+    case kMipsSeh:
+      __ Seh(i.OutputRegister(), i.InputRegister(0));
+      break;
+    case kMipsLbu:
+      __ lbu(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMipsLb:
+      __ lb(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMipsSb:
+      __ sb(i.InputOrZeroRegister(2), i.MemoryOperand());
+      break;
+    case kMipsLhu:
+      __ lhu(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMipsUlhu:
+      __ Ulhu(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMipsLh:
+      __ lh(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMipsUlh:
+      __ Ulh(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMipsSh:
+      __ sh(i.InputOrZeroRegister(2), i.MemoryOperand());
+      break;
+    case kMipsUsh:
+      __ Ush(i.InputOrZeroRegister(2), i.MemoryOperand(), kScratchReg);
+      break;
+    case kMipsLw:
+      __ lw(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMipsUlw:
+      __ Ulw(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMipsSw:
+      __ sw(i.InputOrZeroRegister(2), i.MemoryOperand());
+      break;
+    case kMipsUsw:
+      __ Usw(i.InputOrZeroRegister(2), i.MemoryOperand());
+      break;
+    case kMipsLwc1: {
+      __ lwc1(i.OutputSingleRegister(), i.MemoryOperand());
+      break;
+    }
+    case kMipsUlwc1: {
+      __ Ulwc1(i.OutputSingleRegister(), i.MemoryOperand(), kScratchReg);
+      break;
+    }
+    case kMipsSwc1: {
+      size_t index = 0;
+      MemOperand operand = i.MemoryOperand(&index);
+      FPURegister ft = i.InputOrZeroSingleRegister(index);
+      if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) {
+        __ Move(kDoubleRegZero, 0.0);
+      }
+      __ swc1(ft, operand);
+      break;
+    }
+    case kMipsUswc1: {
+      size_t index = 0;
+      MemOperand operand = i.MemoryOperand(&index);
+      FPURegister ft = i.InputOrZeroSingleRegister(index);
+      if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) {
+        __ Move(kDoubleRegZero, 0.0);
+      }
+      __ Uswc1(ft, operand, kScratchReg);
+      break;
+    }
+    case kMipsLdc1:
+      __ Ldc1(i.OutputDoubleRegister(), i.MemoryOperand());
+      break;
+    case kMipsUldc1:
+      __ Uldc1(i.OutputDoubleRegister(), i.MemoryOperand(), kScratchReg);
+      break;
+    case kMipsSdc1: {
+      FPURegister ft = i.InputOrZeroDoubleRegister(2);
+      if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) {
+        __ Move(kDoubleRegZero, 0.0);
+      }
+      __ Sdc1(ft, i.MemoryOperand());
+      break;
+    }
+    case kMipsUsdc1: {
+      FPURegister ft = i.InputOrZeroDoubleRegister(2);
+      if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) {
+        __ Move(kDoubleRegZero, 0.0);
+      }
+      __ Usdc1(ft, i.MemoryOperand(), kScratchReg);
+      break;
+    }
+    case kMipsSync: {
+      __ sync();
+      break;
+    }
+    case kMipsPush:
+      if (instr->InputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
+        switch (op->representation()) {
+          case MachineRepresentation::kFloat32:
+            __ swc1(i.InputFloatRegister(0), MemOperand(sp, -kFloatSize));
+            __ Subu(sp, sp, Operand(kFloatSize));
+            frame_access_state()->IncreaseSPDelta(kFloatSize /
+                                                  kSystemPointerSize);
+            break;
+          case MachineRepresentation::kFloat64:
+            __ Sdc1(i.InputDoubleRegister(0), MemOperand(sp, -kDoubleSize));
+            __ Subu(sp, sp, Operand(kDoubleSize));
+            frame_access_state()->IncreaseSPDelta(kDoubleSize /
+                                                  kSystemPointerSize);
+            break;
+          default: {
+            UNREACHABLE();
+            break;
+          }
+        }
+      } else {
+        __ Push(i.InputRegister(0));
+        frame_access_state()->IncreaseSPDelta(1);
+      }
+      break;
+    case kMipsPeek: {
+      // The incoming value is 0-based, but we need a 1-based value.
+      int reverse_slot = i.InputInt32(0) + 1;
+      int offset =
+          FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
+      if (instr->OutputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          __ Ldc1(i.OutputDoubleRegister(), MemOperand(fp, offset));
+        } else {
+          DCHECK_EQ(op->representation(), MachineRepresentation::kFloat32);
+          __ lwc1(i.OutputSingleRegister(0), MemOperand(fp, offset));
+        }
+      } else {
+        __ lw(i.OutputRegister(0), MemOperand(fp, offset));
+      }
+      break;
+    }
+    case kMipsStackClaim: {
+      __ Subu(sp, sp, Operand(i.InputInt32(0)));
+      frame_access_state()->IncreaseSPDelta(i.InputInt32(0) /
+                                            kSystemPointerSize);
+      break;
+    }
+    case kMipsStoreToStackSlot: {
+      if (instr->InputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          __ Sdc1(i.InputDoubleRegister(0), MemOperand(sp, i.InputInt32(1)));
+        } else if (op->representation() == MachineRepresentation::kFloat32) {
+          __ swc1(i.InputSingleRegister(0), MemOperand(sp, i.InputInt32(1)));
+        } else {
+          DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
+          CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+          __ st_b(i.InputSimd128Register(0), MemOperand(sp, i.InputInt32(1)));
+        }
+      } else {
+        __ sw(i.InputRegister(0), MemOperand(sp, i.InputInt32(1)));
+      }
+      break;
+    }
+    case kMipsByteSwap32: {
+      __ ByteSwapSigned(i.OutputRegister(0), i.InputRegister(0), 4);
+      break;
+    }
+    case kMipsS128Load8Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ lb(kScratchReg, i.MemoryOperand());
+      __ fill_b(i.OutputSimd128Register(), kScratchReg);
+      break;
+    }
+    case kMipsS128Load16Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ lh(kScratchReg, i.MemoryOperand());
+      __ fill_h(i.OutputSimd128Register(), kScratchReg);
+      break;
+    }
+    case kMipsS128Load32Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ Lw(kScratchReg, i.MemoryOperand());
+      __ fill_w(i.OutputSimd128Register(), kScratchReg);
+      break;
+    }
+    case kMipsS128Load64Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      MemOperand memLow = i.MemoryOperand();
+      MemOperand memHigh = MemOperand(memLow.rm(), memLow.offset() + 4);
+      __ Lw(kScratchReg, memLow);
+      __ fill_w(dst, kScratchReg);
+      __ Lw(kScratchReg, memHigh);
+      __ fill_w(kSimd128ScratchReg, kScratchReg);
+      __ ilvr_w(dst, kSimd128ScratchReg, dst);
+      break;
+    }
+    case kMipsS128Load8x8S: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      MemOperand memLow = i.MemoryOperand();
+      MemOperand memHigh = MemOperand(memLow.rm(), memLow.offset() + 4);
+      __ Lw(kScratchReg, memLow);
+      __ fill_w(dst, kScratchReg);
+      __ Lw(kScratchReg, memHigh);
+      __ fill_w(kSimd128ScratchReg, kScratchReg);
+      __ ilvr_w(dst, kSimd128ScratchReg, dst);
+      __ clti_s_b(kSimd128ScratchReg, dst, 0);
+      __ ilvr_b(dst, kSimd128ScratchReg, dst);
+      break;
+    }
+    case kMipsS128Load8x8U: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      MemOperand memLow = i.MemoryOperand();
+      MemOperand memHigh = MemOperand(memLow.rm(), memLow.offset() + 4);
+      __ Lw(kScratchReg, memLow);
+      __ fill_w(dst, kScratchReg);
+      __ Lw(kScratchReg, memHigh);
+      __ fill_w(kSimd128ScratchReg, kScratchReg);
+      __ ilvr_w(dst, kSimd128ScratchReg, dst);
+      __ ilvr_b(dst, kSimd128RegZero, dst);
+      break;
+    }
+    case kMipsS128Load16x4S: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      MemOperand memLow = i.MemoryOperand();
+      MemOperand memHigh = MemOperand(memLow.rm(), memLow.offset() + 4);
+      __ Lw(kScratchReg, memLow);
+      __ fill_w(dst, kScratchReg);
+      __ Lw(kScratchReg, memHigh);
+      __ fill_w(kSimd128ScratchReg, kScratchReg);
+      __ ilvr_w(dst, kSimd128ScratchReg, dst);
+      __ clti_s_h(kSimd128ScratchReg, dst, 0);
+      __ ilvr_h(dst, kSimd128ScratchReg, dst);
+      break;
+    }
+    case kMipsS128Load16x4U: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      MemOperand memLow = i.MemoryOperand();
+      MemOperand memHigh = MemOperand(memLow.rm(), memLow.offset() + 4);
+      __ Lw(kScratchReg, memLow);
+      __ fill_w(dst, kScratchReg);
+      __ Lw(kScratchReg, memHigh);
+      __ fill_w(kSimd128ScratchReg, kScratchReg);
+      __ ilvr_w(dst, kSimd128ScratchReg, dst);
+      __ ilvr_h(dst, kSimd128RegZero, dst);
+      break;
+    }
+    case kMipsS128Load32x2S: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      MemOperand memLow = i.MemoryOperand();
+      MemOperand memHigh = MemOperand(memLow.rm(), memLow.offset() + 4);
+      __ Lw(kScratchReg, memLow);
+      __ fill_w(dst, kScratchReg);
+      __ Lw(kScratchReg, memHigh);
+      __ fill_w(kSimd128ScratchReg, kScratchReg);
+      __ ilvr_w(dst, kSimd128ScratchReg, dst);
+      __ clti_s_w(kSimd128ScratchReg, dst, 0);
+      __ ilvr_w(dst, kSimd128ScratchReg, dst);
+      break;
+    }
+    case kMipsS128Load32x2U: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      MemOperand memLow = i.MemoryOperand();
+      MemOperand memHigh = MemOperand(memLow.rm(), memLow.offset() + 4);
+      __ Lw(kScratchReg, memLow);
+      __ fill_w(dst, kScratchReg);
+      __ Lw(kScratchReg, memHigh);
+      __ fill_w(kSimd128ScratchReg, kScratchReg);
+      __ ilvr_w(dst, kSimd128ScratchReg, dst);
+      __ ilvr_w(dst, kSimd128RegZero, dst);
+      break;
+    }
+    case kWord32AtomicLoadInt8:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(lb);
+      break;
+    case kWord32AtomicLoadUint8:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(lbu);
+      break;
+    case kWord32AtomicLoadInt16:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(lh);
+      break;
+    case kWord32AtomicLoadUint16:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(lhu);
+      break;
+    case kWord32AtomicLoadWord32:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(lw);
+      break;
+    case kWord32AtomicStoreWord8:
+      ASSEMBLE_ATOMIC_STORE_INTEGER(sb);
+      break;
+    case kWord32AtomicStoreWord16:
+      ASSEMBLE_ATOMIC_STORE_INTEGER(sh);
+      break;
+    case kWord32AtomicStoreWord32:
+      ASSEMBLE_ATOMIC_STORE_INTEGER(sw);
+      break;
+    case kWord32AtomicExchangeInt8:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(true, 8);
+      break;
+    case kWord32AtomicExchangeUint8:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(false, 8);
+      break;
+    case kWord32AtomicExchangeInt16:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(true, 16);
+      break;
+    case kWord32AtomicExchangeUint16:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(false, 16);
+      break;
+    case kWord32AtomicExchangeWord32:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER();
+      break;
+    case kWord32AtomicCompareExchangeInt8:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(true, 8);
+      break;
+    case kWord32AtomicCompareExchangeUint8:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(false, 8);
+      break;
+    case kWord32AtomicCompareExchangeInt16:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(true, 16);
+      break;
+    case kWord32AtomicCompareExchangeUint16:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(false, 16);
+      break;
+    case kWord32AtomicCompareExchangeWord32:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER();
+      break;
+#define ATOMIC_BINOP_CASE(op, inst)             \
+  case kWord32Atomic##op##Int8:                 \
+    ASSEMBLE_ATOMIC_BINOP_EXT(true, 8, inst);   \
+    break;                                      \
+  case kWord32Atomic##op##Uint8:                \
+    ASSEMBLE_ATOMIC_BINOP_EXT(false, 8, inst);  \
+    break;                                      \
+  case kWord32Atomic##op##Int16:                \
+    ASSEMBLE_ATOMIC_BINOP_EXT(true, 16, inst);  \
+    break;                                      \
+  case kWord32Atomic##op##Uint16:               \
+    ASSEMBLE_ATOMIC_BINOP_EXT(false, 16, inst); \
+    break;                                      \
+  case kWord32Atomic##op##Word32:               \
+    ASSEMBLE_ATOMIC_BINOP(inst);                \
+    break;
+      ATOMIC_BINOP_CASE(Add, Addu)
+      ATOMIC_BINOP_CASE(Sub, Subu)
+      ATOMIC_BINOP_CASE(And, And)
+      ATOMIC_BINOP_CASE(Or, Or)
+      ATOMIC_BINOP_CASE(Xor, Xor)
+#undef ATOMIC_BINOP_CASE
+    case kMipsWord32AtomicPairLoad: {
+      if (IsMipsArchVariant(kMips32r6)) {
+        if (instr->OutputCount() > 0) {
+          Register second_output = instr->OutputCount() == 2
+                                       ? i.OutputRegister(1)
+                                       : i.TempRegister(1);
+          __ Addu(a0, i.InputRegister(0), i.InputRegister(1));
+          __ llx(second_output, MemOperand(a0, 4));
+          __ ll(i.OutputRegister(0), MemOperand(a0, 0));
+          __ sync();
+        }
+      } else {
+        FrameScope scope(tasm(), StackFrame::MANUAL);
+        __ Addu(a0, i.InputRegister(0), i.InputRegister(1));
+        __ PushCallerSaved(kDontSaveFPRegs, v0, v1);
+        __ PrepareCallCFunction(1, 0, kScratchReg);
+        __ CallCFunction(ExternalReference::atomic_pair_load_function(), 1, 0);
+        __ PopCallerSaved(kDontSaveFPRegs, v0, v1);
+      }
+      break;
+    }
+    case kMipsWord32AtomicPairStore: {
+      if (IsMipsArchVariant(kMips32r6)) {
+        Label store;
+        __ Addu(a0, i.InputRegister(0), i.InputRegister(1));
+        __ sync();
+        __ bind(&store);
+        __ llx(i.TempRegister(2), MemOperand(a0, 4));
+        __ ll(i.TempRegister(1), MemOperand(a0, 0));
+        __ Move(i.TempRegister(1), i.InputRegister(2));
+        __ scx(i.InputRegister(3), MemOperand(a0, 4));
+        __ sc(i.TempRegister(1), MemOperand(a0, 0));
+        __ BranchShort(&store, eq, i.TempRegister(1), Operand(zero_reg));
+        __ sync();
+      } else {
+        FrameScope scope(tasm(), StackFrame::MANUAL);
+        __ Addu(a0, i.InputRegister(0), i.InputRegister(1));
+        __ PushCallerSaved(kDontSaveFPRegs);
+        __ PrepareCallCFunction(3, 0, kScratchReg);
+        __ CallCFunction(ExternalReference::atomic_pair_store_function(), 3, 0);
+        __ PopCallerSaved(kDontSaveFPRegs);
+      }
+      break;
+    }
+#define ATOMIC64_BINOP_ARITH_CASE(op, instr, external) \
+  case kMipsWord32AtomicPair##op:                      \
+    ASSEMBLE_ATOMIC64_ARITH_BINOP(instr, external);    \
+    break;
+      ATOMIC64_BINOP_ARITH_CASE(Add, AddPair, atomic_pair_add_function)
+      ATOMIC64_BINOP_ARITH_CASE(Sub, SubPair, atomic_pair_sub_function)
+#undef ATOMIC64_BINOP_ARITH_CASE
+#define ATOMIC64_BINOP_LOGIC_CASE(op, instr, external) \
+  case kMipsWord32AtomicPair##op:                      \
+    ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr, external);    \
+    break;
+      ATOMIC64_BINOP_LOGIC_CASE(And, AndPair, atomic_pair_and_function)
+      ATOMIC64_BINOP_LOGIC_CASE(Or, OrPair, atomic_pair_or_function)
+      ATOMIC64_BINOP_LOGIC_CASE(Xor, XorPair, atomic_pair_xor_function)
+#undef ATOMIC64_BINOP_LOGIC_CASE
+    case kMipsWord32AtomicPairExchange:
+      if (IsMipsArchVariant(kMips32r6)) {
+        Label binop;
+        Register oldval_low =
+            instr->OutputCount() >= 1 ? i.OutputRegister(0) : i.TempRegister(1);
+        Register oldval_high =
+            instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(2);
+        __ Addu(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
+        __ sync();
+        __ bind(&binop);
+        __ llx(oldval_high, MemOperand(i.TempRegister(0), 4));
+        __ ll(oldval_low, MemOperand(i.TempRegister(0), 0));
+        __ Move(i.TempRegister(1), i.InputRegister(2));
+        __ scx(i.InputRegister(3), MemOperand(i.TempRegister(0), 4));
+        __ sc(i.TempRegister(1), MemOperand(i.TempRegister(0), 0));
+        __ BranchShort(&binop, eq, i.TempRegister(1), Operand(zero_reg));
+        __ sync();
+      } else {
+        FrameScope scope(tasm(), StackFrame::MANUAL);
+        __ PushCallerSaved(kDontSaveFPRegs, v0, v1);
+        __ PrepareCallCFunction(3, 0, kScratchReg);
+        __ Addu(a0, i.InputRegister(0), i.InputRegister(1));
+        __ CallCFunction(ExternalReference::atomic_pair_exchange_function(), 3,
+                         0);
+        __ PopCallerSaved(kDontSaveFPRegs, v0, v1);
+      }
+      break;
+    case kMipsWord32AtomicPairCompareExchange: {
+      if (IsMipsArchVariant(kMips32r6)) {
+        Label compareExchange, exit;
+        Register oldval_low =
+            instr->OutputCount() >= 1 ? i.OutputRegister(0) : kScratchReg;
+        Register oldval_high =
+            instr->OutputCount() >= 2 ? i.OutputRegister(1) : kScratchReg2;
+        __ Addu(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
+        __ sync();
+        __ bind(&compareExchange);
+        __ llx(oldval_high, MemOperand(i.TempRegister(0), 4));
+        __ ll(oldval_low, MemOperand(i.TempRegister(0), 0));
+        __ BranchShort(&exit, ne, i.InputRegister(2), Operand(oldval_low));
+        __ BranchShort(&exit, ne, i.InputRegister(3), Operand(oldval_high));
+        __ mov(kScratchReg, i.InputRegister(4));
+        __ scx(i.InputRegister(5), MemOperand(i.TempRegister(0), 4));
+        __ sc(kScratchReg, MemOperand(i.TempRegister(0), 0));
+        __ BranchShort(&compareExchange, eq, kScratchReg, Operand(zero_reg));
+        __ bind(&exit);
+        __ sync();
+      } else {
+        FrameScope scope(tasm(), StackFrame::MANUAL);
+        __ PushCallerSaved(kDontSaveFPRegs, v0, v1);
+        __ PrepareCallCFunction(5, 0, kScratchReg);
+        __ addu(a0, i.InputRegister(0), i.InputRegister(1));
+        __ sw(i.InputRegister(5), MemOperand(sp, 16));
+        __ CallCFunction(
+            ExternalReference::atomic_pair_compare_exchange_function(), 5, 0);
+        __ PopCallerSaved(kDontSaveFPRegs, v0, v1);
+      }
+      break;
+    }
+    case kMipsS128Zero: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(i.OutputSimd128Register(), i.OutputSimd128Register(),
+               i.OutputSimd128Register());
+      break;
+    }
+    case kMipsI32x4Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fill_w(i.OutputSimd128Register(), i.InputRegister(0));
+      break;
+    }
+    case kMipsI32x4ExtractLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ copy_s_w(i.OutputRegister(), i.InputSimd128Register(0),
+                  i.InputInt8(1));
+      break;
+    }
+    case kMipsI32x4ReplaceLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      if (src != dst) {
+        __ move_v(dst, src);
+      }
+      __ insert_w(dst, i.InputInt8(1), i.InputRegister(2));
+      break;
+    }
+    case kMipsI32x4Add: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ addv_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI32x4Sub: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ subv_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsF64x2Abs: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ bclri_d(i.OutputSimd128Register(), i.InputSimd128Register(0), 63);
+      break;
+    }
+    case kMipsF64x2Neg: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ bnegi_d(i.OutputSimd128Register(), i.InputSimd128Register(0), 63);
+      break;
+    }
+    case kMipsF64x2Sqrt: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fsqrt_d(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsF64x2Add: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      ASSEMBLE_F64X2_ARITHMETIC_BINOP(fadd_d);
+      break;
+    }
+    case kMipsF64x2Sub: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      ASSEMBLE_F64X2_ARITHMETIC_BINOP(fsub_d);
+      break;
+    }
+    case kMipsF64x2Mul: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      ASSEMBLE_F64X2_ARITHMETIC_BINOP(fmul_d);
+      break;
+    }
+    case kMipsF64x2Div: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      ASSEMBLE_F64X2_ARITHMETIC_BINOP(fdiv_d);
+      break;
+    }
+    case kMipsF64x2Min: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      ASSEMBLE_F64X2_ARITHMETIC_BINOP(fmin_d);
+      break;
+    }
+    case kMipsF64x2Max: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      ASSEMBLE_F64X2_ARITHMETIC_BINOP(fmax_d);
+      break;
+    }
+    case kMipsF64x2Eq: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fceq_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsF64x2Ne: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fcne_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsF64x2Lt: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fclt_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsF64x2Le: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fcle_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsF64x2Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ FmoveLow(kScratchReg, i.InputDoubleRegister(0));
+      __ insert_w(dst, 0, kScratchReg);
+      __ insert_w(dst, 2, kScratchReg);
+      __ FmoveHigh(kScratchReg, i.InputDoubleRegister(0));
+      __ insert_w(dst, 1, kScratchReg);
+      __ insert_w(dst, 3, kScratchReg);
+      break;
+    }
+    case kMipsF64x2ExtractLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ copy_u_w(kScratchReg, i.InputSimd128Register(0), i.InputInt8(1) * 2);
+      __ FmoveLow(i.OutputDoubleRegister(), kScratchReg);
+      __ copy_u_w(kScratchReg, i.InputSimd128Register(0),
+                  i.InputInt8(1) * 2 + 1);
+      __ FmoveHigh(i.OutputDoubleRegister(), kScratchReg);
+      break;
+    }
+    case kMipsF64x2ReplaceLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      if (src != dst) {
+        __ move_v(dst, src);
+      }
+      __ FmoveLow(kScratchReg, i.InputDoubleRegister(2));
+      __ insert_w(dst, i.InputInt8(1) * 2, kScratchReg);
+      __ FmoveHigh(kScratchReg, i.InputDoubleRegister(2));
+      __ insert_w(dst, i.InputInt8(1) * 2 + 1, kScratchReg);
+      break;
+    }
+    case kMipsF64x2Pmin: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register lhs = i.InputSimd128Register(0);
+      Simd128Register rhs = i.InputSimd128Register(1);
+      // dst = rhs < lhs ? rhs : lhs
+      __ fclt_d(dst, rhs, lhs);
+      __ bsel_v(dst, lhs, rhs);
+      break;
+    }
+    case kMipsF64x2Pmax: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register lhs = i.InputSimd128Register(0);
+      Simd128Register rhs = i.InputSimd128Register(1);
+      // dst = lhs < rhs ? rhs : lhs
+      __ fclt_d(dst, lhs, rhs);
+      __ bsel_v(dst, lhs, rhs);
+      break;
+    }
+    case kMipsF64x2Ceil: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cfcmsa(kScratchReg, MSACSR);
+      __ li(kScratchReg2, kRoundToPlusInf);
+      __ ctcmsa(MSACSR, kScratchReg2);
+      __ frint_d(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ ctcmsa(MSACSR, kScratchReg);
+      break;
+    }
+    case kMipsF64x2Floor: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cfcmsa(kScratchReg, MSACSR);
+      __ li(kScratchReg2, kRoundToMinusInf);
+      __ ctcmsa(MSACSR, kScratchReg2);
+      __ frint_d(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ ctcmsa(MSACSR, kScratchReg);
+      break;
+    }
+    case kMipsF64x2Trunc: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cfcmsa(kScratchReg, MSACSR);
+      __ li(kScratchReg2, kRoundToZero);
+      __ ctcmsa(MSACSR, kScratchReg2);
+      __ frint_d(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ ctcmsa(MSACSR, kScratchReg);
+      break;
+    }
+    case kMipsF64x2NearestInt: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cfcmsa(kScratchReg, MSACSR);
+      // kRoundToNearest == 0
+      __ ctcmsa(MSACSR, zero_reg);
+      __ frint_d(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ ctcmsa(MSACSR, kScratchReg);
+      break;
+    }
+    case kMipsI64x2Add: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ addv_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI64x2Sub: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ subv_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI64x2Mul: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ mulv_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI64x2Neg: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ subv_d(i.OutputSimd128Register(), kSimd128RegZero,
+                i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI64x2Shl: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ slli_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputInt6(1));
+      break;
+    }
+    case kMipsI64x2ShrS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ srai_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputInt6(1));
+      break;
+    }
+    case kMipsI64x2ShrU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ srli_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputInt6(1));
+      break;
+    }
+    case kMipsF32x4Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ FmoveLow(kScratchReg, i.InputSingleRegister(0));
+      __ fill_w(i.OutputSimd128Register(), kScratchReg);
+      break;
+    }
+    case kMipsF32x4ExtractLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ copy_u_w(kScratchReg, i.InputSimd128Register(0), i.InputInt8(1));
+      __ FmoveLow(i.OutputSingleRegister(), kScratchReg);
+      break;
+    }
+    case kMipsF32x4ReplaceLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      if (src != dst) {
+        __ move_v(dst, src);
+      }
+      __ FmoveLow(kScratchReg, i.InputSingleRegister(2));
+      __ insert_w(dst, i.InputInt8(1), kScratchReg);
+      break;
+    }
+    case kMipsF32x4SConvertI32x4: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ ffint_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsF32x4UConvertI32x4: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ ffint_u_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI32x4Mul: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ mulv_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI32x4MaxS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ max_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI32x4MinS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ min_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI32x4Eq: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ ceq_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI32x4Ne: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ ceq_w(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
+      __ nor_v(dst, dst, dst);
+      break;
+    }
+    case kMipsI32x4Shl: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ slli_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputInt5(1));
+      break;
+    }
+    case kMipsI32x4ShrS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ srai_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputInt5(1));
+      break;
+    }
+    case kMipsI32x4ShrU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ srli_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputInt5(1));
+      break;
+    }
+    case kMipsI32x4MaxU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ max_u_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI32x4MinU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ min_u_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsS128Select: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      DCHECK(i.OutputSimd128Register() == i.InputSimd128Register(0));
+      __ bsel_v(i.OutputSimd128Register(), i.InputSimd128Register(2),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsS128AndNot: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ nor_v(dst, i.InputSimd128Register(1), i.InputSimd128Register(1));
+      __ and_v(dst, dst, i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsF32x4Abs: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ bclri_w(i.OutputSimd128Register(), i.InputSimd128Register(0), 31);
+      break;
+    }
+    case kMipsF32x4Neg: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ bnegi_w(i.OutputSimd128Register(), i.InputSimd128Register(0), 31);
+      break;
+    }
+    case kMipsF32x4Sqrt: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fsqrt_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsF32x4RecipApprox: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ frcp_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsF32x4RecipSqrtApprox: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ frsqrt_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsF32x4Add: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fadd_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsF32x4Sub: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fsub_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsF32x4Mul: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fmul_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsF32x4Div: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fdiv_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsF32x4Max: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fmax_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsF32x4Min: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fmin_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsF32x4Eq: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fceq_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsF32x4Ne: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fcne_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsF32x4Lt: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fclt_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsF32x4Le: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fcle_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsF32x4Pmin: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register lhs = i.InputSimd128Register(0);
+      Simd128Register rhs = i.InputSimd128Register(1);
+      // dst = rhs < lhs ? rhs : lhs
+      __ fclt_w(dst, rhs, lhs);
+      __ bsel_v(dst, lhs, rhs);
+      break;
+    }
+    case kMipsF32x4Pmax: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register lhs = i.InputSimd128Register(0);
+      Simd128Register rhs = i.InputSimd128Register(1);
+      // dst = lhs < rhs ? rhs : lhs
+      __ fclt_w(dst, lhs, rhs);
+      __ bsel_v(dst, lhs, rhs);
+      break;
+    }
+    case kMipsF32x4Ceil: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cfcmsa(kScratchReg, MSACSR);
+      __ li(kScratchReg2, kRoundToPlusInf);
+      __ ctcmsa(MSACSR, kScratchReg2);
+      __ frint_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ ctcmsa(MSACSR, kScratchReg);
+      break;
+    }
+    case kMipsF32x4Floor: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cfcmsa(kScratchReg, MSACSR);
+      __ li(kScratchReg2, kRoundToMinusInf);
+      __ ctcmsa(MSACSR, kScratchReg2);
+      __ frint_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ ctcmsa(MSACSR, kScratchReg);
+      break;
+    }
+    case kMipsF32x4Trunc: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cfcmsa(kScratchReg, MSACSR);
+      __ li(kScratchReg2, kRoundToZero);
+      __ ctcmsa(MSACSR, kScratchReg2);
+      __ frint_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ ctcmsa(MSACSR, kScratchReg);
+      break;
+    }
+    case kMipsF32x4NearestInt: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cfcmsa(kScratchReg, MSACSR);
+      // kRoundToNearest == 0
+      __ ctcmsa(MSACSR, zero_reg);
+      __ frint_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ ctcmsa(MSACSR, kScratchReg);
+      break;
+    }
+    case kMipsI32x4SConvertF32x4: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ ftrunc_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI32x4UConvertF32x4: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ ftrunc_u_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI32x4Neg: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ subv_w(i.OutputSimd128Register(), kSimd128RegZero,
+                i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI32x4GtS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ clt_s_w(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI32x4GeS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cle_s_w(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI32x4GtU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ clt_u_w(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI32x4GeU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cle_u_w(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI32x4Abs: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ asub_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  kSimd128RegZero);
+      break;
+    }
+    case kMipsI32x4BitMask: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Register dst = i.OutputRegister();
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register scratch0 = kSimd128RegZero;
+      Simd128Register scratch1 = kSimd128ScratchReg;
+      __ srli_w(scratch0, src, 31);
+      __ srli_d(scratch1, scratch0, 31);
+      __ or_v(scratch0, scratch0, scratch1);
+      __ shf_w(scratch1, scratch0, 0x0E);
+      __ slli_d(scratch1, scratch1, 2);
+      __ or_v(scratch0, scratch0, scratch1);
+      __ copy_u_b(dst, scratch0, 0);
+      break;
+    }
+    case kMipsI32x4DotI16x8S: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ dotp_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI16x8Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fill_h(i.OutputSimd128Register(), i.InputRegister(0));
+      break;
+    }
+    case kMipsI16x8ExtractLaneU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ copy_u_h(i.OutputRegister(), i.InputSimd128Register(0),
+                  i.InputInt8(1));
+      break;
+    }
+    case kMipsI16x8ExtractLaneS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ copy_s_h(i.OutputRegister(), i.InputSimd128Register(0),
+                  i.InputInt8(1));
+      break;
+    }
+    case kMipsI16x8ReplaceLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      if (src != dst) {
+        __ move_v(dst, src);
+      }
+      __ insert_h(dst, i.InputInt8(1), i.InputRegister(2));
+      break;
+    }
+    case kMipsI16x8Neg: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ subv_h(i.OutputSimd128Register(), kSimd128RegZero,
+                i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI16x8Shl: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ slli_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputInt4(1));
+      break;
+    }
+    case kMipsI16x8ShrS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ srai_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputInt4(1));
+      break;
+    }
+    case kMipsI16x8ShrU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ srli_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputInt4(1));
+      break;
+    }
+    case kMipsI16x8Add: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ addv_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI16x8AddSatS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ adds_s_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI16x8Sub: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ subv_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI16x8SubSatS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ subs_s_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI16x8Mul: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ mulv_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI16x8MaxS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ max_s_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI16x8MinS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ min_s_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI16x8Eq: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ ceq_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI16x8Ne: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ ceq_h(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
+      __ nor_v(dst, dst, dst);
+      break;
+    }
+    case kMipsI16x8GtS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ clt_s_h(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI16x8GeS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cle_s_h(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI16x8AddSatU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ adds_u_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI16x8SubSatU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ subs_u_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI16x8MaxU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ max_u_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI16x8MinU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ min_u_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI16x8GtU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ clt_u_h(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI16x8GeU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cle_u_h(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI16x8RoundingAverageU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ aver_u_h(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                  i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI16x8Abs: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ asub_s_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  kSimd128RegZero);
+      break;
+    }
+    case kMipsI16x8BitMask: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Register dst = i.OutputRegister();
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register scratch0 = kSimd128RegZero;
+      Simd128Register scratch1 = kSimd128ScratchReg;
+      __ srli_h(scratch0, src, 15);
+      __ srli_w(scratch1, scratch0, 15);
+      __ or_v(scratch0, scratch0, scratch1);
+      __ srli_d(scratch1, scratch0, 30);
+      __ or_v(scratch0, scratch0, scratch1);
+      __ shf_w(scratch1, scratch0, 0x0E);
+      __ slli_d(scratch1, scratch1, 4);
+      __ or_v(scratch0, scratch0, scratch1);
+      __ copy_u_b(dst, scratch0, 0);
+      break;
+    }
+    case kMipsI8x16Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fill_b(i.OutputSimd128Register(), i.InputRegister(0));
+      break;
+    }
+    case kMipsI8x16ExtractLaneU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ copy_u_b(i.OutputRegister(), i.InputSimd128Register(0),
+                  i.InputInt8(1));
+      break;
+    }
+    case kMipsI8x16ExtractLaneS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ copy_s_b(i.OutputRegister(), i.InputSimd128Register(0),
+                  i.InputInt8(1));
+      break;
+    }
+    case kMipsI8x16ReplaceLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      if (src != dst) {
+        __ move_v(dst, src);
+      }
+      __ insert_b(dst, i.InputInt8(1), i.InputRegister(2));
+      break;
+    }
+    case kMipsI8x16Neg: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ subv_b(i.OutputSimd128Register(), kSimd128RegZero,
+                i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI8x16Shl: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ slli_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputInt3(1));
+      break;
+    }
+    case kMipsI8x16ShrS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ srai_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputInt3(1));
+      break;
+    }
+    case kMipsI8x16Add: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ addv_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI8x16AddSatS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ adds_s_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI8x16Sub: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ subv_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI8x16SubSatS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ subs_s_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI8x16Mul: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ mulv_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI8x16MaxS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ max_s_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI8x16MinS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ min_s_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI8x16Eq: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ ceq_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI8x16Ne: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ ceq_b(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
+      __ nor_v(dst, dst, dst);
+      break;
+    }
+    case kMipsI8x16GtS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ clt_s_b(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI8x16GeS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cle_s_b(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI8x16ShrU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ srli_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputInt3(1));
+      break;
+    }
+    case kMipsI8x16AddSatU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ adds_u_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI8x16SubSatU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ subs_u_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI8x16MaxU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ max_u_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI8x16MinU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ min_u_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsI8x16GtU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ clt_u_b(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI8x16GeU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cle_u_b(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI8x16RoundingAverageU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ aver_u_b(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                  i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI8x16Abs: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ asub_s_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  kSimd128RegZero);
+      break;
+    }
+    case kMipsI8x16BitMask: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Register dst = i.OutputRegister();
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register scratch0 = kSimd128RegZero;
+      Simd128Register scratch1 = kSimd128ScratchReg;
+      __ srli_b(scratch0, src, 7);
+      __ srli_h(scratch1, scratch0, 7);
+      __ or_v(scratch0, scratch0, scratch1);
+      __ srli_w(scratch1, scratch0, 14);
+      __ or_v(scratch0, scratch0, scratch1);
+      __ srli_d(scratch1, scratch0, 28);
+      __ or_v(scratch0, scratch0, scratch1);
+      __ shf_w(scratch1, scratch0, 0x0E);
+      __ ilvev_b(scratch0, scratch1, scratch0);
+      __ copy_u_h(dst, scratch0, 0);
+      break;
+    }
+    case kMipsS128And: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ and_v(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsS128Or: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ or_v(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsS128Xor: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kMipsS128Not: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ nor_v(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsV32x4AnyTrue:
+    case kMipsV16x8AnyTrue:
+    case kMipsV8x16AnyTrue: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Register dst = i.OutputRegister();
+      Label all_false;
+
+      __ BranchMSA(&all_false, MSA_BRANCH_V, all_zero,
+                   i.InputSimd128Register(0), USE_DELAY_SLOT);
+      __ li(dst, 0);  // branch delay slot
+      __ li(dst, -1);
+      __ bind(&all_false);
+      break;
+    }
+    case kMipsV32x4AllTrue: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Register dst = i.OutputRegister();
+      Label all_true;
+      __ BranchMSA(&all_true, MSA_BRANCH_W, all_not_zero,
+                   i.InputSimd128Register(0), USE_DELAY_SLOT);
+      __ li(dst, -1);  // branch delay slot
+      __ li(dst, 0);
+      __ bind(&all_true);
+      break;
+    }
+    case kMipsV16x8AllTrue: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Register dst = i.OutputRegister();
+      Label all_true;
+      __ BranchMSA(&all_true, MSA_BRANCH_H, all_not_zero,
+                   i.InputSimd128Register(0), USE_DELAY_SLOT);
+      __ li(dst, -1);  // branch delay slot
+      __ li(dst, 0);
+      __ bind(&all_true);
+      break;
+    }
+    case kMipsV8x16AllTrue: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Register dst = i.OutputRegister();
+      Label all_true;
+      __ BranchMSA(&all_true, MSA_BRANCH_B, all_not_zero,
+                   i.InputSimd128Register(0), USE_DELAY_SLOT);
+      __ li(dst, -1);  // branch delay slot
+      __ li(dst, 0);
+      __ bind(&all_true);
+      break;
+    }
+    case kMipsMsaLd: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ ld_b(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    }
+    case kMipsMsaSt: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ st_b(i.InputSimd128Register(2), i.MemoryOperand());
+      break;
+    }
+    case kMipsS32x4InterleaveRight: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [7, 6, 5, 4], src0 = [3, 2, 1, 0]
+      // dst = [5, 1, 4, 0]
+      __ ilvr_w(dst, src1, src0);
+      break;
+    }
+    case kMipsS32x4InterleaveLeft: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [7, 6, 5, 4], src0 = [3, 2, 1, 0]
+      // dst = [7, 3, 6, 2]
+      __ ilvl_w(dst, src1, src0);
+      break;
+    }
+    case kMipsS32x4PackEven: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [7, 6, 5, 4], src0 = [3, 2, 1, 0]
+      // dst = [6, 4, 2, 0]
+      __ pckev_w(dst, src1, src0);
+      break;
+    }
+    case kMipsS32x4PackOdd: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [7, 6, 5, 4], src0 = [3, 2, 1, 0]
+      // dst = [7, 5, 3, 1]
+      __ pckod_w(dst, src1, src0);
+      break;
+    }
+    case kMipsS32x4InterleaveEven: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [7, 6, 5, 4], src0 = [3, 2, 1, 0]
+      // dst = [6, 2, 4, 0]
+      __ ilvev_w(dst, src1, src0);
+      break;
+    }
+    case kMipsS32x4InterleaveOdd: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [7, 6, 5, 4], src0 = [3, 2, 1, 0]
+      // dst = [7, 3, 5, 1]
+      __ ilvod_w(dst, src1, src0);
+      break;
+    }
+    case kMipsS32x4Shuffle: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+
+      int32_t shuffle = i.InputInt32(2);
+
+      if (src0 == src1) {
+        // Unary S32x4 shuffles are handled with shf.w instruction
+        unsigned lane = shuffle & 0xFF;
+        if (FLAG_debug_code) {
+          // range of all four lanes, for unary instruction,
+          // should belong to the same range, which can be one of these:
+          // [0, 3] or [4, 7]
+          if (lane >= 4) {
+            int32_t shuffle_helper = shuffle;
+            for (int i = 0; i < 4; ++i) {
+              lane = shuffle_helper & 0xFF;
+              CHECK_GE(lane, 4);
+              shuffle_helper >>= 8;
+            }
+          }
+        }
+        uint32_t i8 = 0;
+        for (int i = 0; i < 4; i++) {
+          lane = shuffle & 0xFF;
+          if (lane >= 4) {
+            lane -= 4;
+          }
+          DCHECK_GT(4, lane);
+          i8 |= lane << (2 * i);
+          shuffle >>= 8;
+        }
+        __ shf_w(dst, src0, i8);
+      } else {
+        // For binary shuffles use vshf.w instruction
+        if (dst == src0) {
+          __ move_v(kSimd128ScratchReg, src0);
+          src0 = kSimd128ScratchReg;
+        } else if (dst == src1) {
+          __ move_v(kSimd128ScratchReg, src1);
+          src1 = kSimd128ScratchReg;
+        }
+
+        __ li(kScratchReg, i.InputInt32(2));
+        __ insert_w(dst, 0, kScratchReg);
+        __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+        __ ilvr_b(dst, kSimd128RegZero, dst);
+        __ ilvr_h(dst, kSimd128RegZero, dst);
+        __ vshf_w(dst, src1, src0);
+      }
+      break;
+    }
+    case kMipsS16x8InterleaveRight: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [15, ... 11, 10, 9, 8], src0 = [7, ... 3, 2, 1, 0]
+      // dst = [11, 3, 10, 2, 9, 1, 8, 0]
+      __ ilvr_h(dst, src1, src0);
+      break;
+    }
+    case kMipsS16x8InterleaveLeft: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [15, ... 11, 10, 9, 8], src0 = [7, ... 3, 2, 1, 0]
+      // dst = [15, 7, 14, 6, 13, 5, 12, 4]
+      __ ilvl_h(dst, src1, src0);
+      break;
+    }
+    case kMipsS16x8PackEven: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [15, ... 11, 10, 9, 8], src0 = [7, ... 3, 2, 1, 0]
+      // dst = [14, 12, 10, 8, 6, 4, 2, 0]
+      __ pckev_h(dst, src1, src0);
+      break;
+    }
+    case kMipsS16x8PackOdd: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [15, ... 11, 10, 9, 8], src0 = [7, ... 3, 2, 1, 0]
+      // dst = [15, 13, 11, 9, 7, 5, 3, 1]
+      __ pckod_h(dst, src1, src0);
+      break;
+    }
+    case kMipsS16x8InterleaveEven: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [15, ... 11, 10, 9, 8], src0 = [7, ... 3, 2, 1, 0]
+      // dst = [14, 6, 12, 4, 10, 2, 8, 0]
+      __ ilvev_h(dst, src1, src0);
+      break;
+    }
+    case kMipsS16x8InterleaveOdd: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [15, ... 11, 10, 9, 8], src0 = [7, ... 3, 2, 1, 0]
+      // dst = [15, 7, ... 11, 3, 9, 1]
+      __ ilvod_h(dst, src1, src0);
+      break;
+    }
+    case kMipsS16x4Reverse: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      // src = [7, 6, 5, 4, 3, 2, 1, 0], dst = [4, 5, 6, 7, 0, 1, 2, 3]
+      // shf.df imm field: 0 1 2 3 = 00011011 = 0x1B
+      __ shf_h(i.OutputSimd128Register(), i.InputSimd128Register(0), 0x1B);
+      break;
+    }
+    case kMipsS16x2Reverse: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      // src = [7, 6, 5, 4, 3, 2, 1, 0], dst = [6, 7, 4, 5, 3, 2, 0, 1]
+      // shf.df imm field: 2 3 0 1 = 10110001 = 0xB1
+      __ shf_h(i.OutputSimd128Register(), i.InputSimd128Register(0), 0xB1);
+      break;
+    }
+    case kMipsS8x16InterleaveRight: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [31, ... 19, 18, 17, 16], src0 = [15, ... 3, 2, 1, 0]
+      // dst = [23, 7, ... 17, 1, 16, 0]
+      __ ilvr_b(dst, src1, src0);
+      break;
+    }
+    case kMipsS8x16InterleaveLeft: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [31, ... 19, 18, 17, 16], src0 = [15, ... 3, 2, 1, 0]
+      // dst = [31, 15, ... 25, 9, 24, 8]
+      __ ilvl_b(dst, src1, src0);
+      break;
+    }
+    case kMipsS8x16PackEven: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [31, ... 19, 18, 17, 16], src0 = [15, ... 3, 2, 1, 0]
+      // dst = [30, 28, ... 6, 4, 2, 0]
+      __ pckev_b(dst, src1, src0);
+      break;
+    }
+    case kMipsS8x16PackOdd: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [31, ... 19, 18, 17, 16], src0 = [15, ... 3, 2, 1, 0]
+      // dst = [31, 29, ... 7, 5, 3, 1]
+      __ pckod_b(dst, src1, src0);
+      break;
+    }
+    case kMipsS8x16InterleaveEven: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [31, ... 19, 18, 17, 16], src0 = [15, ... 3, 2, 1, 0]
+      // dst = [30, 14, ... 18, 2, 16, 0]
+      __ ilvev_b(dst, src1, src0);
+      break;
+    }
+    case kMipsS8x16InterleaveOdd: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [31, ... 19, 18, 17, 16], src0 = [15, ... 3, 2, 1, 0]
+      // dst = [31, 15, ... 19, 3, 17, 1]
+      __ ilvod_b(dst, src1, src0);
+      break;
+    }
+    case kMipsS8x16Concat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      DCHECK(dst == i.InputSimd128Register(0));
+      __ sldi_b(dst, i.InputSimd128Register(1), i.InputInt4(2));
+      break;
+    }
+    case kMipsI8x16Shuffle: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+
+      if (dst == src0) {
+        __ move_v(kSimd128ScratchReg, src0);
+        src0 = kSimd128ScratchReg;
+      } else if (dst == src1) {
+        __ move_v(kSimd128ScratchReg, src1);
+        src1 = kSimd128ScratchReg;
+      }
+
+      __ li(kScratchReg, i.InputInt32(2));
+      __ insert_w(dst, 0, kScratchReg);
+      __ li(kScratchReg, i.InputInt32(3));
+      __ insert_w(dst, 1, kScratchReg);
+      __ li(kScratchReg, i.InputInt32(4));
+      __ insert_w(dst, 2, kScratchReg);
+      __ li(kScratchReg, i.InputInt32(5));
+      __ insert_w(dst, 3, kScratchReg);
+      __ vshf_b(dst, src1, src0);
+      break;
+    }
+    case kMipsI8x16Swizzle: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      tbl = i.InputSimd128Register(0),
+                      ctl = i.InputSimd128Register(1);
+      DCHECK(dst != ctl && dst != tbl);
+      Simd128Register zeroReg = i.TempSimd128Register(0);
+      __ fill_w(zeroReg, zero_reg);
+      __ move_v(dst, ctl);
+      __ vshf_b(dst, tbl, zeroReg);
+      break;
+    }
+    case kMipsS8x8Reverse: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      // src = [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+      // dst = [8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7]
+      // [A B C D] => [B A D C]: shf.w imm: 2 3 0 1 = 10110001 = 0xB1
+      // C: [7, 6, 5, 4] => A': [4, 5, 6, 7]: shf.b imm: 00011011 = 0x1B
+      __ shf_w(kSimd128ScratchReg, i.InputSimd128Register(0), 0xB1);
+      __ shf_b(i.OutputSimd128Register(), kSimd128ScratchReg, 0x1B);
+      break;
+    }
+    case kMipsS8x4Reverse: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      // src = [15, 14, ... 3, 2, 1, 0], dst = [12, 13, 14, 15, ... 0, 1, 2, 3]
+      // shf.df imm field: 0 1 2 3 = 00011011 = 0x1B
+      __ shf_b(i.OutputSimd128Register(), i.InputSimd128Register(0), 0x1B);
+      break;
+    }
+    case kMipsS8x2Reverse: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      // src = [15, 14, ... 3, 2, 1, 0], dst = [14, 15, 12, 13, ... 2, 3, 0, 1]
+      // shf.df imm field: 2 3 0 1 = 10110001 = 0xB1
+      __ shf_b(i.OutputSimd128Register(), i.InputSimd128Register(0), 0xB1);
+      break;
+    }
+    case kMipsI32x4SConvertI16x8Low: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(0);
+      __ ilvr_h(kSimd128ScratchReg, src, src);
+      __ slli_w(dst, kSimd128ScratchReg, 16);
+      __ srai_w(dst, dst, 16);
+      break;
+    }
+    case kMipsI32x4SConvertI16x8High: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(0);
+      __ ilvl_h(kSimd128ScratchReg, src, src);
+      __ slli_w(dst, kSimd128ScratchReg, 16);
+      __ srai_w(dst, dst, 16);
+      break;
+    }
+    case kMipsI32x4UConvertI16x8Low: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ ilvr_h(i.OutputSimd128Register(), kSimd128RegZero,
+                i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI32x4UConvertI16x8High: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ ilvl_h(i.OutputSimd128Register(), kSimd128RegZero,
+                i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI16x8SConvertI8x16Low: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(0);
+      __ ilvr_b(kSimd128ScratchReg, src, src);
+      __ slli_h(dst, kSimd128ScratchReg, 8);
+      __ srai_h(dst, dst, 8);
+      break;
+    }
+    case kMipsI16x8SConvertI8x16High: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(0);
+      __ ilvl_b(kSimd128ScratchReg, src, src);
+      __ slli_h(dst, kSimd128ScratchReg, 8);
+      __ srai_h(dst, dst, 8);
+      break;
+    }
+    case kMipsI16x8SConvertI32x4: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      __ sat_s_w(kSimd128ScratchReg, src0, 15);
+      __ sat_s_w(kSimd128RegZero, src1, 15);  // kSimd128RegZero as scratch
+      __ pckev_h(dst, kSimd128RegZero, kSimd128ScratchReg);
+      break;
+    }
+    case kMipsI16x8UConvertI32x4: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      __ sat_u_w(kSimd128ScratchReg, src0, 15);
+      __ sat_u_w(kSimd128RegZero, src1, 15);  // kSimd128RegZero as scratch
+      __ pckev_h(dst, kSimd128RegZero, kSimd128ScratchReg);
+      break;
+    }
+    case kMipsI16x8UConvertI8x16Low: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ ilvr_b(i.OutputSimd128Register(), kSimd128RegZero,
+                i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI16x8UConvertI8x16High: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ ilvl_b(i.OutputSimd128Register(), kSimd128RegZero,
+                i.InputSimd128Register(0));
+      break;
+    }
+    case kMipsI8x16SConvertI16x8: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      __ sat_s_h(kSimd128ScratchReg, src0, 7);
+      __ sat_s_h(kSimd128RegZero, src1, 7);  // kSimd128RegZero as scratch
+      __ pckev_b(dst, kSimd128RegZero, kSimd128ScratchReg);
+      break;
+    }
+    case kMipsI8x16UConvertI16x8: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      __ sat_u_h(kSimd128ScratchReg, src0, 7);
+      __ sat_u_h(kSimd128RegZero, src1, 7);  // kSimd128RegZero as scratch
+      __ pckev_b(dst, kSimd128RegZero, kSimd128ScratchReg);
+      break;
+    }
+    case kMipsF32x4AddHoriz: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ shf_w(kSimd128ScratchReg, src0, 0xB1);  // 2 3 0 1 : 10110001 : 0xB1
+      __ shf_w(kSimd128RegZero, src1, 0xB1);     // kSimd128RegZero as scratch
+      __ fadd_w(kSimd128ScratchReg, kSimd128ScratchReg, src0);
+      __ fadd_w(kSimd128RegZero, kSimd128RegZero, src1);
+      __ pckev_w(dst, kSimd128RegZero, kSimd128ScratchReg);
+      break;
+    }
+    case kMipsI32x4AddHoriz: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ hadd_s_d(kSimd128ScratchReg, src0, src0);
+      __ hadd_s_d(kSimd128RegZero, src1, src1);  // kSimd128RegZero as scratch
+      __ pckev_w(dst, kSimd128RegZero, kSimd128ScratchReg);
+      break;
+    }
+    case kMipsI16x8AddHoriz: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ hadd_s_w(kSimd128ScratchReg, src0, src0);
+      __ hadd_s_w(kSimd128RegZero, src1, src1);  // kSimd128RegZero as scratch
+      __ pckev_h(dst, kSimd128RegZero, kSimd128ScratchReg);
+      break;
+    }
+  }
+  return kSuccess;
+}  // NOLINT(readability/fn_size)
+
+void AssembleBranchToLabels(CodeGenerator* gen, TurboAssembler* tasm,
+                            Instruction* instr, FlagsCondition condition,
+                            Label* tlabel, Label* flabel, bool fallthru) {
+#undef __
+#define __ tasm->
+
+  Condition cc = kNoCondition;
+  // MIPS does not have condition code flags, so compare and branch are
+  // implemented differently than on the other arch's. The compare operations
+  // emit mips pseudo-instructions, which are handled here by branch
+  // instructions that do the actual comparison. Essential that the input
+  // registers to compare pseudo-op are not modified before this branch op, as
+  // they are tested here.
+
+  MipsOperandConverter i(gen, instr);
+  if (instr->arch_opcode() == kMipsTst) {
+    cc = FlagsConditionToConditionTst(condition);
+    __ Branch(tlabel, cc, kScratchReg, Operand(zero_reg));
+  } else if (instr->arch_opcode() == kMipsAddOvf ||
+             instr->arch_opcode() == kMipsSubOvf) {
+    // Overflow occurs if overflow register is negative
+    switch (condition) {
+      case kOverflow:
+        __ Branch(tlabel, lt, kScratchReg, Operand(zero_reg));
+        break;
+      case kNotOverflow:
+        __ Branch(tlabel, ge, kScratchReg, Operand(zero_reg));
+        break;
+      default:
+        UNSUPPORTED_COND(instr->arch_opcode(), condition);
+        break;
+    }
+  } else if (instr->arch_opcode() == kMipsMulOvf) {
+    // Overflow occurs if overflow register is not zero
+    switch (condition) {
+      case kOverflow:
+        __ Branch(tlabel, ne, kScratchReg, Operand(zero_reg));
+        break;
+      case kNotOverflow:
+        __ Branch(tlabel, eq, kScratchReg, Operand(zero_reg));
+        break;
+      default:
+        UNSUPPORTED_COND(kMipsMulOvf, condition);
+        break;
+    }
+  } else if (instr->arch_opcode() == kMipsCmp) {
+    cc = FlagsConditionToConditionCmp(condition);
+    __ Branch(tlabel, cc, i.InputRegister(0), i.InputOperand(1));
+  } else if (instr->arch_opcode() == kArchStackPointerGreaterThan) {
+    cc = FlagsConditionToConditionCmp(condition);
+    Register lhs_register = sp;
+    uint32_t offset;
+    if (gen->ShouldApplyOffsetToStackCheck(instr, &offset)) {
+      lhs_register = i.TempRegister(0);
+      __ Subu(lhs_register, sp, offset);
+    }
+    __ Branch(tlabel, cc, lhs_register, Operand(i.InputRegister(0)));
+  } else if (instr->arch_opcode() == kMipsCmpS ||
+             instr->arch_opcode() == kMipsCmpD) {
+    bool predicate;
+    FlagsConditionToConditionCmpFPU(&predicate, condition);
+    if (predicate) {
+      __ BranchTrueF(tlabel);
+    } else {
+      __ BranchFalseF(tlabel);
+    }
+  } else {
+    PrintF("AssembleArchBranch Unimplemented arch_opcode: %d\n",
+           instr->arch_opcode());
+    UNIMPLEMENTED();
+  }
+  if (!fallthru) __ Branch(flabel);  // no fallthru to flabel.
+#undef __
+#define __ tasm()->
+}
+
+// Assembles branches after an instruction.
+void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
+  Label* tlabel = branch->true_label;
+  Label* flabel = branch->false_label;
+  AssembleBranchToLabels(this, tasm(), instr, branch->condition, tlabel, flabel,
+                         branch->fallthru);
+}
+
+void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
+                                            Instruction* instr) {
+  // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
+  if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
+    return;
+  }
+
+  MipsOperandConverter i(this, instr);
+  condition = NegateFlagsCondition(condition);
+
+  switch (instr->arch_opcode()) {
+    case kMipsCmp: {
+      __ LoadZeroOnCondition(kSpeculationPoisonRegister, i.InputRegister(0),
+                             i.InputOperand(1),
+                             FlagsConditionToConditionCmp(condition));
+    }
+      return;
+    case kMipsTst: {
+      switch (condition) {
+        case kEqual:
+          __ LoadZeroIfConditionZero(kSpeculationPoisonRegister, kScratchReg);
+          break;
+        case kNotEqual:
+          __ LoadZeroIfConditionNotZero(kSpeculationPoisonRegister,
+                                        kScratchReg);
+          break;
+        default:
+          UNREACHABLE();
+      }
+    }
+      return;
+    case kMipsAddOvf:
+    case kMipsSubOvf: {
+      // Overflow occurs if overflow register is negative
+      __ Slt(kScratchReg2, kScratchReg, zero_reg);
+      switch (condition) {
+        case kOverflow:
+          __ LoadZeroIfConditionNotZero(kSpeculationPoisonRegister,
+                                        kScratchReg2);
+          break;
+        case kNotOverflow:
+          __ LoadZeroIfConditionZero(kSpeculationPoisonRegister, kScratchReg2);
+          break;
+        default:
+          UNSUPPORTED_COND(instr->arch_opcode(), condition);
+      }
+    }
+      return;
+    case kMipsMulOvf: {
+      // Overflow occurs if overflow register is not zero
+      switch (condition) {
+        case kOverflow:
+          __ LoadZeroIfConditionNotZero(kSpeculationPoisonRegister,
+                                        kScratchReg);
+          break;
+        case kNotOverflow:
+          __ LoadZeroIfConditionZero(kSpeculationPoisonRegister, kScratchReg);
+          break;
+        default:
+          UNSUPPORTED_COND(instr->arch_opcode(), condition);
+      }
+    }
+      return;
+    case kMipsCmpS:
+    case kMipsCmpD: {
+      bool predicate;
+      FlagsConditionToConditionCmpFPU(&predicate, condition);
+      if (predicate) {
+        __ LoadZeroIfFPUCondition(kSpeculationPoisonRegister);
+      } else {
+        __ LoadZeroIfNotFPUCondition(kSpeculationPoisonRegister);
+      }
+    }
+      return;
+    default:
+      UNREACHABLE();
+  }
+}
+
+void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
+                                            BranchInfo* branch) {
+  AssembleArchBranch(instr, branch);
+}
+
+void CodeGenerator::AssembleArchJump(RpoNumber target) {
+  if (!IsNextInAssemblyOrder(target)) __ Branch(GetLabel(target));
+}
+
+void CodeGenerator::AssembleArchTrap(Instruction* instr,
+                                     FlagsCondition condition) {
+  class OutOfLineTrap final : public OutOfLineCode {
+   public:
+    OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
+        : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
+
+    void Generate() final {
+      MipsOperandConverter i(gen_, instr_);
+      TrapId trap_id =
+          static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
+      GenerateCallToTrap(trap_id);
+    }
+
+   private:
+    void GenerateCallToTrap(TrapId trap_id) {
+      if (trap_id == TrapId::kInvalid) {
+        // We cannot test calls to the runtime in cctest/test-run-wasm.
+        // Therefore we emit a call to C here instead of a call to the runtime.
+        // We use the context register as the scratch register, because we do
+        // not have a context here.
+        __ PrepareCallCFunction(0, 0, cp);
+        __ CallCFunction(
+            ExternalReference::wasm_call_trap_callback_for_testing(), 0);
+        __ LeaveFrame(StackFrame::WASM);
+        auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
+        int pop_count =
+            static_cast<int>(call_descriptor->StackParameterCount());
+        __ Drop(pop_count);
+        __ Ret();
+      } else {
+        gen_->AssembleSourcePosition(instr_);
+        // A direct call to a wasm runtime stub defined in this module.
+        // Just encode the stub index. This will be patched when the code
+        // is added to the native module and copied into wasm code space.
+        __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
+        ReferenceMap* reference_map =
+            gen_->zone()->New<ReferenceMap>(gen_->zone());
+        gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
+        if (FLAG_debug_code) {
+          __ stop();
+        }
+      }
+    }
+
+    Instruction* instr_;
+    CodeGenerator* gen_;
+  };
+  auto ool = zone()->New<OutOfLineTrap>(this, instr);
+  Label* tlabel = ool->entry();
+  AssembleBranchToLabels(this, tasm(), instr, condition, tlabel, nullptr, true);
+}
+
+// Assembles boolean materializations after an instruction.
+void CodeGenerator::AssembleArchBoolean(Instruction* instr,
+                                        FlagsCondition condition) {
+  MipsOperandConverter i(this, instr);
+
+  // Materialize a full 32-bit 1 or 0 value. The result register is always the
+  // last output of the instruction.
+  DCHECK_NE(0u, instr->OutputCount());
+  Register result = i.OutputRegister(instr->OutputCount() - 1);
+  Condition cc = kNoCondition;
+  // MIPS does not have condition code flags, so compare and branch are
+  // implemented differently than on the other arch's. The compare operations
+  // emit mips pseudo-instructions, which are checked and handled here.
+
+  if (instr->arch_opcode() == kMipsTst) {
+    cc = FlagsConditionToConditionTst(condition);
+    if (cc == eq) {
+      __ Sltu(result, kScratchReg, 1);
+    } else {
+      __ Sltu(result, zero_reg, kScratchReg);
+    }
+    return;
+  } else if (instr->arch_opcode() == kMipsAddOvf ||
+             instr->arch_opcode() == kMipsSubOvf) {
+    // Overflow occurs if overflow register is negative
+    __ slt(result, kScratchReg, zero_reg);
+  } else if (instr->arch_opcode() == kMipsMulOvf) {
+    // Overflow occurs if overflow register is not zero
+    __ Sgtu(result, kScratchReg, zero_reg);
+  } else if (instr->arch_opcode() == kMipsCmp) {
+    cc = FlagsConditionToConditionCmp(condition);
+    switch (cc) {
+      case eq:
+      case ne: {
+        Register left = i.InputRegister(0);
+        Operand right = i.InputOperand(1);
+        if (instr->InputAt(1)->IsImmediate()) {
+          if (is_int16(-right.immediate())) {
+            if (right.immediate() == 0) {
+              if (cc == eq) {
+                __ Sltu(result, left, 1);
+              } else {
+                __ Sltu(result, zero_reg, left);
+              }
+            } else {
+              __ Addu(result, left, -right.immediate());
+              if (cc == eq) {
+                __ Sltu(result, result, 1);
+              } else {
+                __ Sltu(result, zero_reg, result);
+              }
+            }
+          } else {
+            if (is_uint16(right.immediate())) {
+              __ Xor(result, left, right);
+            } else {
+              __ li(kScratchReg, right);
+              __ Xor(result, left, kScratchReg);
+            }
+            if (cc == eq) {
+              __ Sltu(result, result, 1);
+            } else {
+              __ Sltu(result, zero_reg, result);
+            }
+          }
+        } else {
+          __ Xor(result, left, right);
+          if (cc == eq) {
+            __ Sltu(result, result, 1);
+          } else {
+            __ Sltu(result, zero_reg, result);
+          }
+        }
+      } break;
+      case lt:
+      case ge: {
+        Register left = i.InputRegister(0);
+        Operand right = i.InputOperand(1);
+        __ Slt(result, left, right);
+        if (cc == ge) {
+          __ xori(result, result, 1);
+        }
+      } break;
+      case gt:
+      case le: {
+        Register left = i.InputRegister(1);
+        Operand right = i.InputOperand(0);
+        __ Slt(result, left, right);
+        if (cc == le) {
+          __ xori(result, result, 1);
+        }
+      } break;
+      case lo:
+      case hs: {
+        Register left = i.InputRegister(0);
+        Operand right = i.InputOperand(1);
+        __ Sltu(result, left, right);
+        if (cc == hs) {
+          __ xori(result, result, 1);
+        }
+      } break;
+      case hi:
+      case ls: {
+        Register left = i.InputRegister(1);
+        Operand right = i.InputOperand(0);
+        __ Sltu(result, left, right);
+        if (cc == ls) {
+          __ xori(result, result, 1);
+        }
+      } break;
+      default:
+        UNREACHABLE();
+    }
+    return;
+  } else if (instr->arch_opcode() == kMipsCmpD ||
+             instr->arch_opcode() == kMipsCmpS) {
+    FPURegister left = i.InputOrZeroDoubleRegister(0);
+    FPURegister right = i.InputOrZeroDoubleRegister(1);
+    if ((left == kDoubleRegZero || right == kDoubleRegZero) &&
+        !__ IsDoubleZeroRegSet()) {
+      __ Move(kDoubleRegZero, 0.0);
+    }
+    bool predicate;
+    FlagsConditionToConditionCmpFPU(&predicate, condition);
+    if (!IsMipsArchVariant(kMips32r6)) {
+      __ li(result, Operand(1));
+      if (predicate) {
+        __ Movf(result, zero_reg);
+      } else {
+        __ Movt(result, zero_reg);
+      }
+    } else {
+      __ mfc1(result, kDoubleCompareReg);
+      if (predicate) {
+        __ And(result, result, 1);  // cmp returns all 1's/0's, use only LSB.
+      } else {
+        __ Addu(result, result, 1);  // Toggle result for not equal.
+      }
+    }
+    return;
+  } else {
+    PrintF("AssembleArchBoolean Unimplemented arch_opcode is : %d\n",
+           instr->arch_opcode());
+    TRACE_UNIMPL();
+    UNIMPLEMENTED();
+  }
+}
+
+void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
+  MipsOperandConverter i(this, instr);
+  Register input = i.InputRegister(0);
+  std::vector<std::pair<int32_t, Label*>> cases;
+  for (size_t index = 2; index < instr->InputCount(); index += 2) {
+    cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
+  }
+  AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
+                                      cases.data() + cases.size());
+}
+
+void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
+  MipsOperandConverter i(this, instr);
+  Register input = i.InputRegister(0);
+  size_t const case_count = instr->InputCount() - 2;
+  __ Branch(GetLabel(i.InputRpo(1)), hs, input, Operand(case_count));
+  __ GenerateSwitchTable(input, case_count, [&i, this](size_t index) {
+    return GetLabel(i.InputRpo(index + 2));
+  });
+}
+
+void CodeGenerator::FinishFrame(Frame* frame) {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+
+  const RegList saves_fpu = call_descriptor->CalleeSavedFPRegisters();
+  if (saves_fpu != 0) {
+    frame->AlignSavedCalleeRegisterSlots();
+  }
+
+  if (saves_fpu != 0) {
+    int count = base::bits::CountPopulation(saves_fpu);
+    DCHECK_EQ(kNumCalleeSavedFPU, count);
+    frame->AllocateSavedCalleeRegisterSlots(count *
+                                            (kDoubleSize / kSystemPointerSize));
+  }
+
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+  if (saves != 0) {
+    int count = base::bits::CountPopulation(saves);
+    DCHECK_EQ(kNumCalleeSaved, count + 1);
+    frame->AllocateSavedCalleeRegisterSlots(count);
+  }
+}
+
+void CodeGenerator::AssembleConstructFrame() {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+  if (frame_access_state()->has_frame()) {
+    if (call_descriptor->IsCFunctionCall()) {
+      if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
+        __ StubPrologue(StackFrame::C_WASM_ENTRY);
+        // Reserve stack space for saving the c_entry_fp later.
+        __ Subu(sp, sp, Operand(kSystemPointerSize));
+      } else {
+        __ Push(ra, fp);
+        __ mov(fp, sp);
+      }
+    } else if (call_descriptor->IsJSFunctionCall()) {
+      __ Prologue();
+    } else {
+      __ StubPrologue(info()->GetOutputStackFrameType());
+      if (call_descriptor->IsWasmFunctionCall()) {
+        __ Push(kWasmInstanceRegister);
+      } else if (call_descriptor->IsWasmImportWrapper() ||
+                 call_descriptor->IsWasmCapiFunction()) {
+        // Wasm import wrappers are passed a tuple in the place of the instance.
+        // Unpack the tuple into the instance and the target callable.
+        // This must be done here in the codegen because it cannot be expressed
+        // properly in the graph.
+        __ lw(kJSFunctionRegister,
+              FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
+        __ lw(kWasmInstanceRegister,
+              FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
+        __ Push(kWasmInstanceRegister);
+        if (call_descriptor->IsWasmCapiFunction()) {
+          // Reserve space for saving the PC later.
+          __ Subu(sp, sp, Operand(kSystemPointerSize));
+        }
+      }
+    }
+  }
+
+  int required_slots =
+      frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
+
+  if (info()->is_osr()) {
+    // TurboFan OSR-compiled functions cannot be entered directly.
+    __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
+
+    // Unoptimized code jumps directly to this entrypoint while the unoptimized
+    // frame is still on the stack. Optimized code uses OSR values directly from
+    // the unoptimized frame. Thus, all that needs to be done is to allocate the
+    // remaining stack slots.
+    if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
+    osr_pc_offset_ = __ pc_offset();
+    required_slots -= osr_helper()->UnoptimizedFrameSlots();
+    ResetSpeculationPoison();
+  }
+
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+  const RegList saves_fpu = call_descriptor->CalleeSavedFPRegisters();
+
+  if (required_slots > 0) {
+    DCHECK(frame_access_state()->has_frame());
+    if (info()->IsWasm() && required_slots > 128) {
+      // For WebAssembly functions with big frames we have to do the stack
+      // overflow check before we construct the frame. Otherwise we may not
+      // have enough space on the stack to call the runtime for the stack
+      // overflow.
+      Label done;
+
+      // If the frame is bigger than the stack, we throw the stack overflow
+      // exception unconditionally. Thereby we can avoid the integer overflow
+      // check in the condition code.
+      if ((required_slots * kSystemPointerSize) < (FLAG_stack_size * 1024)) {
+        __ Lw(
+             kScratchReg,
+             FieldMemOperand(kWasmInstanceRegister,
+                             WasmInstanceObject::kRealStackLimitAddressOffset));
+        __ Lw(kScratchReg, MemOperand(kScratchReg));
+        __ Addu(kScratchReg, kScratchReg,
+                      Operand(required_slots * kSystemPointerSize));
+        __ Branch(&done, uge, sp, Operand(kScratchReg));
+      }
+
+      __ Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
+      // We come from WebAssembly, there are no references for the GC.
+      ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
+      RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
+      if (FLAG_debug_code) {
+        __ stop();
+      }
+
+      __ bind(&done);
+    }
+  }
+
+  const int returns = frame()->GetReturnSlotCount();
+
+  // Skip callee-saved and return slots, which are pushed below.
+  required_slots -= base::bits::CountPopulation(saves);
+  required_slots -= 2 * base::bits::CountPopulation(saves_fpu);
+  required_slots -= returns;
+  if (required_slots > 0) {
+    __ Subu(sp, sp, Operand(required_slots * kSystemPointerSize));
+  }
+
+  // Save callee-saved FPU registers.
+  if (saves_fpu != 0) {
+    __ MultiPushFPU(saves_fpu);
+  }
+
+  if (saves != 0) {
+    // Save callee-saved registers.
+    __ MultiPush(saves);
+    DCHECK_EQ(kNumCalleeSaved, base::bits::CountPopulation(saves) + 1);
+  }
+
+  if (returns != 0) {
+    // Create space for returns.
+    __ Subu(sp, sp, Operand(returns * kSystemPointerSize));
+  }
+}
+
+void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+
+  const int returns = frame()->GetReturnSlotCount();
+  if (returns != 0) {
+    __ Addu(sp, sp, Operand(returns * kSystemPointerSize));
+  }
+
+  // Restore GP registers.
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+  if (saves != 0) {
+    __ MultiPop(saves);
+  }
+
+  // Restore FPU registers.
+  const RegList saves_fpu = call_descriptor->CalleeSavedFPRegisters();
+  if (saves_fpu != 0) {
+    __ MultiPopFPU(saves_fpu);
+  }
+
+  MipsOperandConverter g(this, nullptr);
+  const int parameter_count =
+      static_cast<int>(call_descriptor->StackParameterCount());
+
+  // {aditional_pop_count} is only greater than zero if {parameter_count = 0}.
+  // Check RawMachineAssembler::PopAndReturn.
+  if (parameter_count != 0) {
+    if (additional_pop_count->IsImmediate()) {
+      DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
+    } else if (__ emit_debug_code()) {
+      __ Assert(eq, AbortReason::kUnexpectedAdditionalPopValue,
+                g.ToRegister(additional_pop_count),
+                Operand(static_cast<int64_t>(0)));
+    }
+  }
+#ifdef V8_NO_ARGUMENTS_ADAPTOR
+  // Functions with JS linkage have at least one parameter (the receiver).
+  // If {parameter_count} == 0, it means it is a builtin with
+  // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
+  // itself.
+  const bool drop_jsargs = frame_access_state()->has_frame() &&
+                           call_descriptor->IsJSFunctionCall() &&
+                           parameter_count != 0;
+#else
+  const bool drop_jsargs = false;
+#endif
+
+  if (call_descriptor->IsCFunctionCall()) {
+    AssembleDeconstructFrame();
+  } else if (frame_access_state()->has_frame()) {
+    // Canonicalize JSFunction return sites for now unless they have an variable
+    // number of stack slot pops.
+    if (additional_pop_count->IsImmediate() &&
+        g.ToConstant(additional_pop_count).ToInt32() == 0) {
+      if (return_label_.is_bound()) {
+        __ Branch(&return_label_);
+        return;
+      } else {
+        __ bind(&return_label_);
+      }
+    }
+    if (drop_jsargs) {
+      // Get the actual argument count
+      __ Lw(t0, MemOperand(fp, StandardFrameConstants::kArgCOffset));
+    }
+    AssembleDeconstructFrame();
+  }
+
+  if (drop_jsargs) {
+    // We must pop all arguments from the stack (including the receiver). This
+    // number of arguments is given by max(1 + argc_reg, parameter_count).
+    __ Addu(t0, t0, Operand(1));  // Also pop the receiver.
+    if (parameter_count > 1) {
+      __ li(kScratchReg, parameter_count);
+      __ slt(kScratchReg2, t0, kScratchReg);
+      __ movn(t0, kScratchReg, kScratchReg2);
+    }
+    __ sll(t0, t0, kSystemPointerSizeLog2);
+    __ Addu(sp, sp, t0);
+  } else if (additional_pop_count->IsImmediate()) {
+    DCHECK_EQ(Constant::kInt32, g.ToConstant(additional_pop_count).type());
+    int additional_count = g.ToConstant(additional_pop_count).ToInt32();
+    __ Drop(parameter_count + additional_count);
+  } else {
+    Register pop_reg = g.ToRegister(additional_pop_count);
+    __ Drop(parameter_count);
+    __ sll(pop_reg, pop_reg, kSystemPointerSizeLog2);
+    __ Addu(sp, sp, pop_reg);
+  }
+  __ Ret();
+}
+
+void CodeGenerator::FinishCode() {}
+
+void CodeGenerator::PrepareForDeoptimizationExits(
+    ZoneDeque<DeoptimizationExit*>* exits) {}
+
+void CodeGenerator::AssembleMove(InstructionOperand* source,
+                                 InstructionOperand* destination) {
+  MipsOperandConverter g(this, nullptr);
+  // Dispatch on the source and destination operand kinds.  Not all
+  // combinations are possible.
+  if (source->IsRegister()) {
+    DCHECK(destination->IsRegister() || destination->IsStackSlot());
+    Register src = g.ToRegister(source);
+    if (destination->IsRegister()) {
+      __ mov(g.ToRegister(destination), src);
+    } else {
+      __ sw(src, g.ToMemOperand(destination));
+    }
+  } else if (source->IsStackSlot()) {
+    DCHECK(destination->IsRegister() || destination->IsStackSlot());
+    MemOperand src = g.ToMemOperand(source);
+    if (destination->IsRegister()) {
+      __ lw(g.ToRegister(destination), src);
+    } else {
+      Register temp = kScratchReg;
+      __ lw(temp, src);
+      __ sw(temp, g.ToMemOperand(destination));
+    }
+  } else if (source->IsConstant()) {
+    Constant src = g.ToConstant(source);
+    if (destination->IsRegister() || destination->IsStackSlot()) {
+      Register dst =
+          destination->IsRegister() ? g.ToRegister(destination) : kScratchReg;
+      switch (src.type()) {
+        case Constant::kInt32:
+          if (RelocInfo::IsWasmReference(src.rmode())) {
+            __ li(dst, Operand(src.ToInt32(), src.rmode()));
+          } else {
+            __ li(dst, Operand(src.ToInt32()));
+          }
+          break;
+        case Constant::kFloat32:
+          __ li(dst, Operand::EmbeddedNumber(src.ToFloat32()));
+          break;
+        case Constant::kInt64:
+          UNREACHABLE();
+          break;
+        case Constant::kFloat64:
+          __ li(dst, Operand::EmbeddedNumber(src.ToFloat64().value()));
+          break;
+        case Constant::kExternalReference:
+          __ li(dst, src.ToExternalReference());
+          break;
+        case Constant::kDelayedStringConstant:
+          __ li(dst, src.ToDelayedStringConstant());
+          break;
+        case Constant::kHeapObject: {
+          Handle<HeapObject> src_object = src.ToHeapObject();
+          RootIndex index;
+          if (IsMaterializableFromRoot(src_object, &index)) {
+            __ LoadRoot(dst, index);
+          } else {
+            __ li(dst, src_object);
+          }
+          break;
+        }
+        case Constant::kCompressedHeapObject:
+          UNREACHABLE();
+        case Constant::kRpoNumber:
+          UNREACHABLE();  // TODO(titzer): loading RPO numbers on mips.
+          break;
+      }
+      if (destination->IsStackSlot()) __ sw(dst, g.ToMemOperand(destination));
+    } else if (src.type() == Constant::kFloat32) {
+      if (destination->IsFPStackSlot()) {
+        MemOperand dst = g.ToMemOperand(destination);
+        if (bit_cast<int32_t>(src.ToFloat32()) == 0) {
+          __ sw(zero_reg, dst);
+        } else {
+          __ li(kScratchReg, Operand(bit_cast<int32_t>(src.ToFloat32())));
+          __ sw(kScratchReg, dst);
+        }
+      } else {
+        DCHECK(destination->IsFPRegister());
+        FloatRegister dst = g.ToSingleRegister(destination);
+        __ Move(dst, src.ToFloat32());
+      }
+    } else {
+      DCHECK_EQ(Constant::kFloat64, src.type());
+      DoubleRegister dst = destination->IsFPRegister()
+                               ? g.ToDoubleRegister(destination)
+                               : kScratchDoubleReg;
+      __ Move(dst, src.ToFloat64().value());
+      if (destination->IsFPStackSlot()) {
+        __ Sdc1(dst, g.ToMemOperand(destination));
+      }
+    }
+  } else if (source->IsFPRegister()) {
+    MachineRepresentation rep = LocationOperand::cast(source)->representation();
+    if (rep == MachineRepresentation::kSimd128) {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      MSARegister src = g.ToSimd128Register(source);
+      if (destination->IsSimd128Register()) {
+        MSARegister dst = g.ToSimd128Register(destination);
+        __ move_v(dst, src);
+      } else {
+        DCHECK(destination->IsSimd128StackSlot());
+        __ st_b(src, g.ToMemOperand(destination));
+      }
+    } else {
+      FPURegister src = g.ToDoubleRegister(source);
+      if (destination->IsFPRegister()) {
+        FPURegister dst = g.ToDoubleRegister(destination);
+        __ Move(dst, src);
+      } else {
+        DCHECK(destination->IsFPStackSlot());
+        MachineRepresentation rep =
+            LocationOperand::cast(source)->representation();
+        if (rep == MachineRepresentation::kFloat64) {
+          __ Sdc1(src, g.ToMemOperand(destination));
+        } else if (rep == MachineRepresentation::kFloat32) {
+          __ swc1(src, g.ToMemOperand(destination));
+        } else {
+          UNREACHABLE();
+        }
+      }
+    }
+  } else if (source->IsFPStackSlot()) {
+    DCHECK(destination->IsFPRegister() || destination->IsFPStackSlot());
+    MemOperand src = g.ToMemOperand(source);
+    MachineRepresentation rep = LocationOperand::cast(source)->representation();
+    if (destination->IsFPRegister()) {
+      if (rep == MachineRepresentation::kFloat64) {
+        __ Ldc1(g.ToDoubleRegister(destination), src);
+      } else if (rep == MachineRepresentation::kFloat32) {
+        __ lwc1(g.ToDoubleRegister(destination), src);
+      } else {
+        DCHECK_EQ(MachineRepresentation::kSimd128, rep);
+        CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+        __ ld_b(g.ToSimd128Register(destination), src);
+      }
+    } else {
+      FPURegister temp = kScratchDoubleReg;
+      if (rep == MachineRepresentation::kFloat64) {
+        __ Ldc1(temp, src);
+        __ Sdc1(temp, g.ToMemOperand(destination));
+      } else if (rep == MachineRepresentation::kFloat32) {
+        __ lwc1(temp, src);
+        __ swc1(temp, g.ToMemOperand(destination));
+      } else {
+        DCHECK_EQ(MachineRepresentation::kSimd128, rep);
+        CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+        MSARegister temp = kSimd128ScratchReg;
+        __ ld_b(temp, src);
+        __ st_b(temp, g.ToMemOperand(destination));
+      }
+    }
+  } else {
+    UNREACHABLE();
+  }
+}
+
+void CodeGenerator::AssembleSwap(InstructionOperand* source,
+                                 InstructionOperand* destination) {
+  MipsOperandConverter g(this, nullptr);
+  // Dispatch on the source and destination operand kinds.  Not all
+  // combinations are possible.
+  if (source->IsRegister()) {
+    // Register-register.
+    Register temp = kScratchReg;
+    Register src = g.ToRegister(source);
+    if (destination->IsRegister()) {
+      Register dst = g.ToRegister(destination);
+      __ Move(temp, src);
+      __ Move(src, dst);
+      __ Move(dst, temp);
+    } else {
+      DCHECK(destination->IsStackSlot());
+      MemOperand dst = g.ToMemOperand(destination);
+      __ mov(temp, src);
+      __ lw(src, dst);
+      __ sw(temp, dst);
+    }
+  } else if (source->IsStackSlot()) {
+    DCHECK(destination->IsStackSlot());
+    Register temp_0 = kScratchReg;
+    Register temp_1 = kScratchReg2;
+    MemOperand src = g.ToMemOperand(source);
+    MemOperand dst = g.ToMemOperand(destination);
+    __ lw(temp_0, src);
+    __ lw(temp_1, dst);
+    __ sw(temp_0, dst);
+    __ sw(temp_1, src);
+  } else if (source->IsFPRegister()) {
+    if (destination->IsFPRegister()) {
+      MachineRepresentation rep =
+          LocationOperand::cast(source)->representation();
+      if (rep == MachineRepresentation::kSimd128) {
+        CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+        MSARegister temp = kSimd128ScratchReg;
+        MSARegister src = g.ToSimd128Register(source);
+        MSARegister dst = g.ToSimd128Register(destination);
+        __ move_v(temp, src);
+        __ move_v(src, dst);
+        __ move_v(dst, temp);
+      } else {
+        FPURegister temp = kScratchDoubleReg;
+        FPURegister src = g.ToDoubleRegister(source);
+        FPURegister dst = g.ToDoubleRegister(destination);
+        __ Move(temp, src);
+        __ Move(src, dst);
+        __ Move(dst, temp);
+      }
+    } else {
+      DCHECK(destination->IsFPStackSlot());
+      MemOperand dst = g.ToMemOperand(destination);
+      MachineRepresentation rep =
+          LocationOperand::cast(source)->representation();
+      if (rep == MachineRepresentation::kFloat64) {
+        FPURegister temp = kScratchDoubleReg;
+        FPURegister src = g.ToDoubleRegister(source);
+        __ Move(temp, src);
+        __ Ldc1(src, dst);
+        __ Sdc1(temp, dst);
+      } else if (rep == MachineRepresentation::kFloat32) {
+        FPURegister temp = kScratchDoubleReg;
+        FPURegister src = g.ToFloatRegister(source);
+        __ Move(temp, src);
+        __ lwc1(src, dst);
+        __ swc1(temp, dst);
+      } else {
+        DCHECK_EQ(MachineRepresentation::kSimd128, rep);
+        CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+        MSARegister temp = kSimd128ScratchReg;
+        MSARegister src = g.ToSimd128Register(source);
+        __ move_v(temp, src);
+        __ ld_b(src, dst);
+        __ st_b(temp, dst);
+      }
+    }
+  } else if (source->IsFPStackSlot()) {
+    DCHECK(destination->IsFPStackSlot());
+    Register temp_0 = kScratchReg;
+    FPURegister temp_1 = kScratchDoubleReg;
+    MemOperand src0 = g.ToMemOperand(source);
+    MemOperand dst0 = g.ToMemOperand(destination);
+    MachineRepresentation rep = LocationOperand::cast(source)->representation();
+    if (rep == MachineRepresentation::kFloat64) {
+      MemOperand src1(src0.rm(), src0.offset() + kIntSize);
+      MemOperand dst1(dst0.rm(), dst0.offset() + kIntSize);
+      __ Ldc1(temp_1, dst0);  // Save destination in temp_1.
+      __ lw(temp_0, src0);    // Then use temp_0 to copy source to destination.
+      __ sw(temp_0, dst0);
+      __ lw(temp_0, src1);
+      __ sw(temp_0, dst1);
+      __ Sdc1(temp_1, src0);
+    } else if (rep == MachineRepresentation::kFloat32) {
+      __ lwc1(temp_1, dst0);  // Save destination in temp_1.
+      __ lw(temp_0, src0);    // Then use temp_0 to copy source to destination.
+      __ sw(temp_0, dst0);
+      __ swc1(temp_1, src0);
+    } else {
+      DCHECK_EQ(MachineRepresentation::kSimd128, rep);
+      MemOperand src1(src0.rm(), src0.offset() + kIntSize);
+      MemOperand dst1(dst0.rm(), dst0.offset() + kIntSize);
+      MemOperand src2(src0.rm(), src0.offset() + 2 * kIntSize);
+      MemOperand dst2(dst0.rm(), dst0.offset() + 2 * kIntSize);
+      MemOperand src3(src0.rm(), src0.offset() + 3 * kIntSize);
+      MemOperand dst3(dst0.rm(), dst0.offset() + 3 * kIntSize);
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      MSARegister temp_1 = kSimd128ScratchReg;
+      __ ld_b(temp_1, dst0);  // Save destination in temp_1.
+      __ lw(temp_0, src0);    // Then use temp_0 to copy source to destination.
+      __ sw(temp_0, dst0);
+      __ lw(temp_0, src1);
+      __ sw(temp_0, dst1);
+      __ lw(temp_0, src2);
+      __ sw(temp_0, dst2);
+      __ lw(temp_0, src3);
+      __ sw(temp_0, dst3);
+      __ st_b(temp_1, src0);
+    }
+  } else {
+    // No other combinations are possible.
+    UNREACHABLE();
+  }
+}
+
+void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
+  // On 32-bit MIPS we emit the jump tables inline.
+  UNREACHABLE();
+}
+
+#undef __
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/mips/instruction-codes-mips.h b/src/compiler/backend/mips/instruction-codes-mips.h
new file mode 100644
index 0000000..47d439a
--- /dev/null
+++ b/src/compiler/backend/mips/instruction-codes-mips.h
@@ -0,0 +1,373 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_MIPS_INSTRUCTION_CODES_MIPS_H_
+#define V8_COMPILER_BACKEND_MIPS_INSTRUCTION_CODES_MIPS_H_
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+// MIPS-specific opcodes that specify which assembly sequence to emit.
+// Most opcodes specify a single instruction.
+#define TARGET_ARCH_OPCODE_LIST(V) \
+  V(MipsAdd)                       \
+  V(MipsAddOvf)                    \
+  V(MipsSub)                       \
+  V(MipsSubOvf)                    \
+  V(MipsMul)                       \
+  V(MipsMulOvf)                    \
+  V(MipsMulHigh)                   \
+  V(MipsMulHighU)                  \
+  V(MipsDiv)                       \
+  V(MipsDivU)                      \
+  V(MipsMod)                       \
+  V(MipsModU)                      \
+  V(MipsAnd)                       \
+  V(MipsOr)                        \
+  V(MipsNor)                       \
+  V(MipsXor)                       \
+  V(MipsClz)                       \
+  V(MipsCtz)                       \
+  V(MipsPopcnt)                    \
+  V(MipsLsa)                       \
+  V(MipsShl)                       \
+  V(MipsShr)                       \
+  V(MipsSar)                       \
+  V(MipsShlPair)                   \
+  V(MipsShrPair)                   \
+  V(MipsSarPair)                   \
+  V(MipsExt)                       \
+  V(MipsIns)                       \
+  V(MipsRor)                       \
+  V(MipsMov)                       \
+  V(MipsTst)                       \
+  V(MipsCmp)                       \
+  V(MipsCmpS)                      \
+  V(MipsAddS)                      \
+  V(MipsSubS)                      \
+  V(MipsMulS)                      \
+  V(MipsDivS)                      \
+  V(MipsModS)                      \
+  V(MipsAbsS)                      \
+  V(MipsSqrtS)                     \
+  V(MipsMaxS)                      \
+  V(MipsMinS)                      \
+  V(MipsCmpD)                      \
+  V(MipsAddD)                      \
+  V(MipsSubD)                      \
+  V(MipsMulD)                      \
+  V(MipsDivD)                      \
+  V(MipsModD)                      \
+  V(MipsAbsD)                      \
+  V(MipsSqrtD)                     \
+  V(MipsMaxD)                      \
+  V(MipsMinD)                      \
+  V(MipsNegS)                      \
+  V(MipsNegD)                      \
+  V(MipsAddPair)                   \
+  V(MipsSubPair)                   \
+  V(MipsMulPair)                   \
+  V(MipsMaddS)                     \
+  V(MipsMaddD)                     \
+  V(MipsMsubS)                     \
+  V(MipsMsubD)                     \
+  V(MipsFloat32RoundDown)          \
+  V(MipsFloat32RoundTruncate)      \
+  V(MipsFloat32RoundUp)            \
+  V(MipsFloat32RoundTiesEven)      \
+  V(MipsFloat64RoundDown)          \
+  V(MipsFloat64RoundTruncate)      \
+  V(MipsFloat64RoundUp)            \
+  V(MipsFloat64RoundTiesEven)      \
+  V(MipsCvtSD)                     \
+  V(MipsCvtDS)                     \
+  V(MipsTruncWD)                   \
+  V(MipsRoundWD)                   \
+  V(MipsFloorWD)                   \
+  V(MipsCeilWD)                    \
+  V(MipsTruncWS)                   \
+  V(MipsRoundWS)                   \
+  V(MipsFloorWS)                   \
+  V(MipsCeilWS)                    \
+  V(MipsTruncUwD)                  \
+  V(MipsTruncUwS)                  \
+  V(MipsCvtDW)                     \
+  V(MipsCvtDUw)                    \
+  V(MipsCvtSW)                     \
+  V(MipsCvtSUw)                    \
+  V(MipsLb)                        \
+  V(MipsLbu)                       \
+  V(MipsSb)                        \
+  V(MipsLh)                        \
+  V(MipsUlh)                       \
+  V(MipsLhu)                       \
+  V(MipsUlhu)                      \
+  V(MipsSh)                        \
+  V(MipsUsh)                       \
+  V(MipsLw)                        \
+  V(MipsUlw)                       \
+  V(MipsSw)                        \
+  V(MipsUsw)                       \
+  V(MipsLwc1)                      \
+  V(MipsUlwc1)                     \
+  V(MipsSwc1)                      \
+  V(MipsUswc1)                     \
+  V(MipsLdc1)                      \
+  V(MipsUldc1)                     \
+  V(MipsSdc1)                      \
+  V(MipsUsdc1)                     \
+  V(MipsFloat64ExtractLowWord32)   \
+  V(MipsFloat64ExtractHighWord32)  \
+  V(MipsFloat64InsertLowWord32)    \
+  V(MipsFloat64InsertHighWord32)   \
+  V(MipsFloat64SilenceNaN)         \
+  V(MipsFloat32Max)                \
+  V(MipsFloat64Max)                \
+  V(MipsFloat32Min)                \
+  V(MipsFloat64Min)                \
+  V(MipsPush)                      \
+  V(MipsPeek)                      \
+  V(MipsStoreToStackSlot)          \
+  V(MipsByteSwap32)                \
+  V(MipsStackClaim)                \
+  V(MipsSeb)                       \
+  V(MipsSeh)                       \
+  V(MipsSync)                      \
+  V(MipsS128Zero)                  \
+  V(MipsI32x4Splat)                \
+  V(MipsI32x4ExtractLane)          \
+  V(MipsI32x4ReplaceLane)          \
+  V(MipsI32x4Add)                  \
+  V(MipsI32x4AddHoriz)             \
+  V(MipsI32x4Sub)                  \
+  V(MipsF64x2Abs)                  \
+  V(MipsF64x2Neg)                  \
+  V(MipsF64x2Sqrt)                 \
+  V(MipsF64x2Add)                  \
+  V(MipsF64x2Sub)                  \
+  V(MipsF64x2Mul)                  \
+  V(MipsF64x2Div)                  \
+  V(MipsF64x2Min)                  \
+  V(MipsF64x2Max)                  \
+  V(MipsF64x2Eq)                   \
+  V(MipsF64x2Ne)                   \
+  V(MipsF64x2Lt)                   \
+  V(MipsF64x2Le)                   \
+  V(MipsF64x2Pmin)                 \
+  V(MipsF64x2Pmax)                 \
+  V(MipsF64x2Ceil)                 \
+  V(MipsF64x2Floor)                \
+  V(MipsF64x2Trunc)                \
+  V(MipsF64x2NearestInt)           \
+  V(MipsI64x2Add)                  \
+  V(MipsI64x2Sub)                  \
+  V(MipsI64x2Mul)                  \
+  V(MipsI64x2Neg)                  \
+  V(MipsI64x2Shl)                  \
+  V(MipsI64x2ShrS)                 \
+  V(MipsI64x2ShrU)                 \
+  V(MipsF32x4Splat)                \
+  V(MipsF32x4ExtractLane)          \
+  V(MipsF32x4ReplaceLane)          \
+  V(MipsF32x4SConvertI32x4)        \
+  V(MipsF32x4UConvertI32x4)        \
+  V(MipsI32x4Mul)                  \
+  V(MipsI32x4MaxS)                 \
+  V(MipsI32x4MinS)                 \
+  V(MipsI32x4Eq)                   \
+  V(MipsI32x4Ne)                   \
+  V(MipsI32x4Shl)                  \
+  V(MipsI32x4ShrS)                 \
+  V(MipsI32x4ShrU)                 \
+  V(MipsI32x4MaxU)                 \
+  V(MipsI32x4MinU)                 \
+  V(MipsF64x2Splat)                \
+  V(MipsF64x2ExtractLane)          \
+  V(MipsF64x2ReplaceLane)          \
+  V(MipsF32x4Abs)                  \
+  V(MipsF32x4Neg)                  \
+  V(MipsF32x4Sqrt)                 \
+  V(MipsF32x4RecipApprox)          \
+  V(MipsF32x4RecipSqrtApprox)      \
+  V(MipsF32x4Add)                  \
+  V(MipsF32x4AddHoriz)             \
+  V(MipsF32x4Sub)                  \
+  V(MipsF32x4Mul)                  \
+  V(MipsF32x4Div)                  \
+  V(MipsF32x4Max)                  \
+  V(MipsF32x4Min)                  \
+  V(MipsF32x4Eq)                   \
+  V(MipsF32x4Ne)                   \
+  V(MipsF32x4Lt)                   \
+  V(MipsF32x4Le)                   \
+  V(MipsF32x4Pmin)                 \
+  V(MipsF32x4Pmax)                 \
+  V(MipsF32x4Ceil)                 \
+  V(MipsF32x4Floor)                \
+  V(MipsF32x4Trunc)                \
+  V(MipsF32x4NearestInt)           \
+  V(MipsI32x4SConvertF32x4)        \
+  V(MipsI32x4UConvertF32x4)        \
+  V(MipsI32x4Neg)                  \
+  V(MipsI32x4GtS)                  \
+  V(MipsI32x4GeS)                  \
+  V(MipsI32x4GtU)                  \
+  V(MipsI32x4GeU)                  \
+  V(MipsI32x4Abs)                  \
+  V(MipsI32x4BitMask)              \
+  V(MipsI32x4DotI16x8S)            \
+  V(MipsI16x8Splat)                \
+  V(MipsI16x8ExtractLaneU)         \
+  V(MipsI16x8ExtractLaneS)         \
+  V(MipsI16x8ReplaceLane)          \
+  V(MipsI16x8Neg)                  \
+  V(MipsI16x8Shl)                  \
+  V(MipsI16x8ShrS)                 \
+  V(MipsI16x8ShrU)                 \
+  V(MipsI16x8Add)                  \
+  V(MipsI16x8AddSatS)              \
+  V(MipsI16x8AddHoriz)             \
+  V(MipsI16x8Sub)                  \
+  V(MipsI16x8SubSatS)              \
+  V(MipsI16x8Mul)                  \
+  V(MipsI16x8MaxS)                 \
+  V(MipsI16x8MinS)                 \
+  V(MipsI16x8Eq)                   \
+  V(MipsI16x8Ne)                   \
+  V(MipsI16x8GtS)                  \
+  V(MipsI16x8GeS)                  \
+  V(MipsI16x8AddSatU)              \
+  V(MipsI16x8SubSatU)              \
+  V(MipsI16x8MaxU)                 \
+  V(MipsI16x8MinU)                 \
+  V(MipsI16x8GtU)                  \
+  V(MipsI16x8GeU)                  \
+  V(MipsI16x8RoundingAverageU)     \
+  V(MipsI16x8Abs)                  \
+  V(MipsI16x8BitMask)              \
+  V(MipsI8x16Splat)                \
+  V(MipsI8x16ExtractLaneU)         \
+  V(MipsI8x16ExtractLaneS)         \
+  V(MipsI8x16ReplaceLane)          \
+  V(MipsI8x16Neg)                  \
+  V(MipsI8x16Shl)                  \
+  V(MipsI8x16ShrS)                 \
+  V(MipsI8x16Add)                  \
+  V(MipsI8x16AddSatS)              \
+  V(MipsI8x16Sub)                  \
+  V(MipsI8x16SubSatS)              \
+  V(MipsI8x16Mul)                  \
+  V(MipsI8x16MaxS)                 \
+  V(MipsI8x16MinS)                 \
+  V(MipsI8x16Eq)                   \
+  V(MipsI8x16Ne)                   \
+  V(MipsI8x16GtS)                  \
+  V(MipsI8x16GeS)                  \
+  V(MipsI8x16ShrU)                 \
+  V(MipsI8x16AddSatU)              \
+  V(MipsI8x16SubSatU)              \
+  V(MipsI8x16MaxU)                 \
+  V(MipsI8x16MinU)                 \
+  V(MipsI8x16GtU)                  \
+  V(MipsI8x16GeU)                  \
+  V(MipsI8x16RoundingAverageU)     \
+  V(MipsI8x16Abs)                  \
+  V(MipsI8x16BitMask)              \
+  V(MipsS128And)                   \
+  V(MipsS128Or)                    \
+  V(MipsS128Xor)                   \
+  V(MipsS128Not)                   \
+  V(MipsS128Select)                \
+  V(MipsS128AndNot)                \
+  V(MipsV32x4AnyTrue)              \
+  V(MipsV32x4AllTrue)              \
+  V(MipsV16x8AnyTrue)              \
+  V(MipsV16x8AllTrue)              \
+  V(MipsV8x16AnyTrue)              \
+  V(MipsV8x16AllTrue)              \
+  V(MipsS32x4InterleaveRight)      \
+  V(MipsS32x4InterleaveLeft)       \
+  V(MipsS32x4PackEven)             \
+  V(MipsS32x4PackOdd)              \
+  V(MipsS32x4InterleaveEven)       \
+  V(MipsS32x4InterleaveOdd)        \
+  V(MipsS32x4Shuffle)              \
+  V(MipsS16x8InterleaveRight)      \
+  V(MipsS16x8InterleaveLeft)       \
+  V(MipsS16x8PackEven)             \
+  V(MipsS16x8PackOdd)              \
+  V(MipsS16x8InterleaveEven)       \
+  V(MipsS16x8InterleaveOdd)        \
+  V(MipsS16x4Reverse)              \
+  V(MipsS16x2Reverse)              \
+  V(MipsS8x16InterleaveRight)      \
+  V(MipsS8x16InterleaveLeft)       \
+  V(MipsS8x16PackEven)             \
+  V(MipsS8x16PackOdd)              \
+  V(MipsS8x16InterleaveEven)       \
+  V(MipsS8x16InterleaveOdd)        \
+  V(MipsI8x16Shuffle)              \
+  V(MipsI8x16Swizzle)              \
+  V(MipsS8x16Concat)               \
+  V(MipsS8x8Reverse)               \
+  V(MipsS8x4Reverse)               \
+  V(MipsS8x2Reverse)               \
+  V(MipsS128Load8Splat)            \
+  V(MipsS128Load16Splat)           \
+  V(MipsS128Load32Splat)           \
+  V(MipsS128Load64Splat)           \
+  V(MipsS128Load8x8S)              \
+  V(MipsS128Load8x8U)              \
+  V(MipsS128Load16x4S)             \
+  V(MipsS128Load16x4U)             \
+  V(MipsS128Load32x2S)             \
+  V(MipsS128Load32x2U)             \
+  V(MipsMsaLd)                     \
+  V(MipsMsaSt)                     \
+  V(MipsI32x4SConvertI16x8Low)     \
+  V(MipsI32x4SConvertI16x8High)    \
+  V(MipsI32x4UConvertI16x8Low)     \
+  V(MipsI32x4UConvertI16x8High)    \
+  V(MipsI16x8SConvertI8x16Low)     \
+  V(MipsI16x8SConvertI8x16High)    \
+  V(MipsI16x8SConvertI32x4)        \
+  V(MipsI16x8UConvertI32x4)        \
+  V(MipsI16x8UConvertI8x16Low)     \
+  V(MipsI16x8UConvertI8x16High)    \
+  V(MipsI8x16SConvertI16x8)        \
+  V(MipsI8x16UConvertI16x8)        \
+  V(MipsWord32AtomicPairLoad)      \
+  V(MipsWord32AtomicPairStore)     \
+  V(MipsWord32AtomicPairAdd)       \
+  V(MipsWord32AtomicPairSub)       \
+  V(MipsWord32AtomicPairAnd)       \
+  V(MipsWord32AtomicPairOr)        \
+  V(MipsWord32AtomicPairXor)       \
+  V(MipsWord32AtomicPairExchange)  \
+  V(MipsWord32AtomicPairCompareExchange)
+
+// Addressing modes represent the "shape" of inputs to an instruction.
+// Many instructions support multiple addressing modes. Addressing modes
+// are encoded into the InstructionCode of the instruction and tell the
+// code generator after register allocation which assembler method to call.
+//
+// We use the following local notation for addressing modes:
+//
+// R = register
+// O = register or stack slot
+// D = double register
+// I = immediate (handle, external, int32)
+// MRI = [register + immediate]
+// MRR = [register + register]
+// TODO(plind): Add the new r6 address modes.
+#define TARGET_ADDRESSING_MODE_LIST(V) \
+  V(MRI) /* [%r0 + K] */               \
+  V(MRR) /* [%r0 + %r1] */
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_MIPS_INSTRUCTION_CODES_MIPS_H_
diff --git a/src/compiler/backend/mips/instruction-scheduler-mips.cc b/src/compiler/backend/mips/instruction-scheduler-mips.cc
new file mode 100644
index 0000000..bf28eec
--- /dev/null
+++ b/src/compiler/backend/mips/instruction-scheduler-mips.cc
@@ -0,0 +1,1795 @@
+// Copyright 2015 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/code-generator.h"
+#include "src/compiler/backend/instruction-scheduler.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+bool InstructionScheduler::SchedulerSupported() { return true; }
+
+int InstructionScheduler::GetTargetInstructionFlags(
+    const Instruction* instr) const {
+  switch (instr->arch_opcode()) {
+    case kMipsAbsD:
+    case kMipsAbsS:
+    case kMipsAdd:
+    case kMipsAddD:
+    case kMipsAddOvf:
+    case kMipsAddPair:
+    case kMipsAddS:
+    case kMipsAnd:
+    case kMipsByteSwap32:
+    case kMipsCeilWD:
+    case kMipsCeilWS:
+    case kMipsClz:
+    case kMipsCmp:
+    case kMipsCmpD:
+    case kMipsCmpS:
+    case kMipsCtz:
+    case kMipsCvtDS:
+    case kMipsCvtDUw:
+    case kMipsCvtDW:
+    case kMipsCvtSD:
+    case kMipsCvtSUw:
+    case kMipsCvtSW:
+    case kMipsDiv:
+    case kMipsDivD:
+    case kMipsDivS:
+    case kMipsDivU:
+    case kMipsExt:
+    case kMipsF64x2Abs:
+    case kMipsF64x2Neg:
+    case kMipsF64x2Sqrt:
+    case kMipsF64x2Add:
+    case kMipsF64x2Sub:
+    case kMipsF64x2Mul:
+    case kMipsF64x2Div:
+    case kMipsF64x2Min:
+    case kMipsF64x2Max:
+    case kMipsF64x2Eq:
+    case kMipsF64x2Ne:
+    case kMipsF64x2Lt:
+    case kMipsF64x2Le:
+    case kMipsF64x2Splat:
+    case kMipsF64x2ExtractLane:
+    case kMipsF64x2ReplaceLane:
+    case kMipsF64x2Pmin:
+    case kMipsF64x2Pmax:
+    case kMipsF64x2Ceil:
+    case kMipsF64x2Floor:
+    case kMipsF64x2Trunc:
+    case kMipsF64x2NearestInt:
+    case kMipsI64x2Add:
+    case kMipsI64x2Sub:
+    case kMipsI64x2Mul:
+    case kMipsI64x2Neg:
+    case kMipsI64x2Shl:
+    case kMipsI64x2ShrS:
+    case kMipsI64x2ShrU:
+    case kMipsF32x4Abs:
+    case kMipsF32x4Add:
+    case kMipsF32x4AddHoriz:
+    case kMipsF32x4Eq:
+    case kMipsF32x4ExtractLane:
+    case kMipsF32x4Le:
+    case kMipsF32x4Lt:
+    case kMipsF32x4Max:
+    case kMipsF32x4Min:
+    case kMipsF32x4Mul:
+    case kMipsF32x4Div:
+    case kMipsF32x4Ne:
+    case kMipsF32x4Neg:
+    case kMipsF32x4Sqrt:
+    case kMipsF32x4RecipApprox:
+    case kMipsF32x4RecipSqrtApprox:
+    case kMipsF32x4ReplaceLane:
+    case kMipsF32x4SConvertI32x4:
+    case kMipsF32x4Splat:
+    case kMipsF32x4Sub:
+    case kMipsF32x4UConvertI32x4:
+    case kMipsF32x4Pmin:
+    case kMipsF32x4Pmax:
+    case kMipsF32x4Ceil:
+    case kMipsF32x4Floor:
+    case kMipsF32x4Trunc:
+    case kMipsF32x4NearestInt:
+    case kMipsFloat32Max:
+    case kMipsFloat32Min:
+    case kMipsFloat32RoundDown:
+    case kMipsFloat32RoundTiesEven:
+    case kMipsFloat32RoundTruncate:
+    case kMipsFloat32RoundUp:
+    case kMipsFloat64ExtractHighWord32:
+    case kMipsFloat64ExtractLowWord32:
+    case kMipsFloat64InsertHighWord32:
+    case kMipsFloat64InsertLowWord32:
+    case kMipsFloat64Max:
+    case kMipsFloat64Min:
+    case kMipsFloat64RoundDown:
+    case kMipsFloat64RoundTiesEven:
+    case kMipsFloat64RoundTruncate:
+    case kMipsFloat64RoundUp:
+    case kMipsFloat64SilenceNaN:
+    case kMipsFloorWD:
+    case kMipsFloorWS:
+    case kMipsI16x8Add:
+    case kMipsI16x8AddHoriz:
+    case kMipsI16x8AddSatS:
+    case kMipsI16x8AddSatU:
+    case kMipsI16x8Eq:
+    case kMipsI16x8ExtractLaneU:
+    case kMipsI16x8ExtractLaneS:
+    case kMipsI16x8GeS:
+    case kMipsI16x8GeU:
+    case kMipsI16x8RoundingAverageU:
+    case kMipsI16x8GtS:
+    case kMipsI16x8GtU:
+    case kMipsI16x8MaxS:
+    case kMipsI16x8MaxU:
+    case kMipsI16x8MinS:
+    case kMipsI16x8MinU:
+    case kMipsI16x8Mul:
+    case kMipsI16x8Ne:
+    case kMipsI16x8Neg:
+    case kMipsI16x8ReplaceLane:
+    case kMipsI16x8SConvertI32x4:
+    case kMipsI16x8SConvertI8x16High:
+    case kMipsI16x8SConvertI8x16Low:
+    case kMipsI16x8Shl:
+    case kMipsI16x8ShrS:
+    case kMipsI16x8ShrU:
+    case kMipsI16x8Splat:
+    case kMipsI16x8Sub:
+    case kMipsI16x8SubSatS:
+    case kMipsI16x8SubSatU:
+    case kMipsI16x8UConvertI32x4:
+    case kMipsI16x8UConvertI8x16High:
+    case kMipsI16x8UConvertI8x16Low:
+    case kMipsI16x8Abs:
+    case kMipsI16x8BitMask:
+    case kMipsI32x4Add:
+    case kMipsI32x4AddHoriz:
+    case kMipsI32x4Eq:
+    case kMipsI32x4ExtractLane:
+    case kMipsI32x4GeS:
+    case kMipsI32x4GeU:
+    case kMipsI32x4GtS:
+    case kMipsI32x4GtU:
+    case kMipsI32x4MaxS:
+    case kMipsI32x4MaxU:
+    case kMipsI32x4MinS:
+    case kMipsI32x4MinU:
+    case kMipsI32x4Mul:
+    case kMipsI32x4Ne:
+    case kMipsI32x4Neg:
+    case kMipsI32x4ReplaceLane:
+    case kMipsI32x4SConvertF32x4:
+    case kMipsI32x4SConvertI16x8High:
+    case kMipsI32x4SConvertI16x8Low:
+    case kMipsI32x4Shl:
+    case kMipsI32x4ShrS:
+    case kMipsI32x4ShrU:
+    case kMipsI32x4Splat:
+    case kMipsI32x4Sub:
+    case kMipsI32x4UConvertF32x4:
+    case kMipsI32x4UConvertI16x8High:
+    case kMipsI32x4UConvertI16x8Low:
+    case kMipsI32x4Abs:
+    case kMipsI32x4BitMask:
+    case kMipsI32x4DotI16x8S:
+    case kMipsI8x16Add:
+    case kMipsI8x16AddSatS:
+    case kMipsI8x16AddSatU:
+    case kMipsI8x16Eq:
+    case kMipsI8x16ExtractLaneU:
+    case kMipsI8x16ExtractLaneS:
+    case kMipsI8x16GeS:
+    case kMipsI8x16GeU:
+    case kMipsI8x16RoundingAverageU:
+    case kMipsI8x16GtS:
+    case kMipsI8x16GtU:
+    case kMipsI8x16MaxS:
+    case kMipsI8x16MaxU:
+    case kMipsI8x16MinS:
+    case kMipsI8x16MinU:
+    case kMipsI8x16Mul:
+    case kMipsI8x16Ne:
+    case kMipsI8x16Neg:
+    case kMipsI8x16ReplaceLane:
+    case kMipsI8x16SConvertI16x8:
+    case kMipsI8x16Shl:
+    case kMipsI8x16ShrS:
+    case kMipsI8x16ShrU:
+    case kMipsI8x16Splat:
+    case kMipsI8x16Sub:
+    case kMipsI8x16SubSatS:
+    case kMipsI8x16SubSatU:
+    case kMipsI8x16UConvertI16x8:
+    case kMipsI8x16Abs:
+    case kMipsI8x16BitMask:
+    case kMipsIns:
+    case kMipsLsa:
+    case kMipsMaddD:
+    case kMipsMaddS:
+    case kMipsMaxD:
+    case kMipsMaxS:
+    case kMipsMinD:
+    case kMipsMinS:
+    case kMipsMod:
+    case kMipsModU:
+    case kMipsMov:
+    case kMipsMsubD:
+    case kMipsMsubS:
+    case kMipsMul:
+    case kMipsMulD:
+    case kMipsMulHigh:
+    case kMipsMulHighU:
+    case kMipsMulOvf:
+    case kMipsMulPair:
+    case kMipsMulS:
+    case kMipsNegD:
+    case kMipsNegS:
+    case kMipsNor:
+    case kMipsOr:
+    case kMipsPopcnt:
+    case kMipsRor:
+    case kMipsRoundWD:
+    case kMipsRoundWS:
+    case kMipsS128And:
+    case kMipsS128Not:
+    case kMipsS128Or:
+    case kMipsS128Select:
+    case kMipsS128Xor:
+    case kMipsS128Zero:
+    case kMipsS128AndNot:
+    case kMipsS16x2Reverse:
+    case kMipsS16x4Reverse:
+    case kMipsS16x8InterleaveEven:
+    case kMipsS16x8InterleaveLeft:
+    case kMipsS16x8InterleaveOdd:
+    case kMipsS16x8InterleaveRight:
+    case kMipsS16x8PackEven:
+    case kMipsS16x8PackOdd:
+    case kMipsV8x16AllTrue:
+    case kMipsV8x16AnyTrue:
+    case kMipsV32x4AllTrue:
+    case kMipsV32x4AnyTrue:
+    case kMipsV16x8AllTrue:
+    case kMipsV16x8AnyTrue:
+    case kMipsS32x4InterleaveEven:
+    case kMipsS32x4InterleaveLeft:
+    case kMipsS32x4InterleaveOdd:
+    case kMipsS32x4InterleaveRight:
+    case kMipsS32x4PackEven:
+    case kMipsS32x4PackOdd:
+    case kMipsS32x4Shuffle:
+    case kMipsS8x16Concat:
+    case kMipsS8x16InterleaveEven:
+    case kMipsS8x16InterleaveLeft:
+    case kMipsS8x16InterleaveOdd:
+    case kMipsS8x16InterleaveRight:
+    case kMipsS8x16PackEven:
+    case kMipsS8x16PackOdd:
+    case kMipsI8x16Shuffle:
+    case kMipsI8x16Swizzle:
+    case kMipsS8x2Reverse:
+    case kMipsS8x4Reverse:
+    case kMipsS8x8Reverse:
+    case kMipsSar:
+    case kMipsSarPair:
+    case kMipsSeb:
+    case kMipsSeh:
+    case kMipsShl:
+    case kMipsShlPair:
+    case kMipsShr:
+    case kMipsShrPair:
+    case kMipsSqrtD:
+    case kMipsSqrtS:
+    case kMipsSub:
+    case kMipsSubD:
+    case kMipsSubOvf:
+    case kMipsSubPair:
+    case kMipsSubS:
+    case kMipsTruncUwD:
+    case kMipsTruncUwS:
+    case kMipsTruncWD:
+    case kMipsTruncWS:
+    case kMipsTst:
+    case kMipsXor:
+      return kNoOpcodeFlags;
+
+    case kMipsLb:
+    case kMipsLbu:
+    case kMipsLdc1:
+    case kMipsLh:
+    case kMipsLhu:
+    case kMipsLw:
+    case kMipsLwc1:
+    case kMipsMsaLd:
+    case kMipsPeek:
+    case kMipsUldc1:
+    case kMipsUlh:
+    case kMipsUlhu:
+    case kMipsUlw:
+    case kMipsUlwc1:
+    case kMipsS128Load8Splat:
+    case kMipsS128Load16Splat:
+    case kMipsS128Load32Splat:
+    case kMipsS128Load64Splat:
+    case kMipsS128Load8x8S:
+    case kMipsS128Load8x8U:
+    case kMipsS128Load16x4S:
+    case kMipsS128Load16x4U:
+    case kMipsS128Load32x2S:
+    case kMipsS128Load32x2U:
+    case kMipsWord32AtomicPairLoad:
+      return kIsLoadOperation;
+
+    case kMipsModD:
+    case kMipsModS:
+    case kMipsMsaSt:
+    case kMipsPush:
+    case kMipsSb:
+    case kMipsSdc1:
+    case kMipsSh:
+    case kMipsStackClaim:
+    case kMipsStoreToStackSlot:
+    case kMipsSw:
+    case kMipsSwc1:
+    case kMipsUsdc1:
+    case kMipsUsh:
+    case kMipsUsw:
+    case kMipsUswc1:
+    case kMipsSync:
+    case kMipsWord32AtomicPairStore:
+    case kMipsWord32AtomicPairAdd:
+    case kMipsWord32AtomicPairSub:
+    case kMipsWord32AtomicPairAnd:
+    case kMipsWord32AtomicPairOr:
+    case kMipsWord32AtomicPairXor:
+    case kMipsWord32AtomicPairExchange:
+    case kMipsWord32AtomicPairCompareExchange:
+      return kHasSideEffect;
+
+#define CASE(Name) case k##Name:
+      COMMON_ARCH_OPCODE_LIST(CASE)
+#undef CASE
+      // Already covered in architecture independent code.
+      UNREACHABLE();
+  }
+
+  UNREACHABLE();
+}
+
+enum Latency {
+  BRANCH = 4,  // Estimated max.
+  RINT_S = 4,  // Estimated.
+  RINT_D = 4,  // Estimated.
+
+  MULT = 4,
+  MULTU = 4,
+  MADD = 4,
+  MADDU = 4,
+  MSUB = 4,
+  MSUBU = 4,
+
+  MUL = 7,
+  MULU = 7,
+  MUH = 7,
+  MUHU = 7,
+
+  DIV = 50,  // Min:11 Max:50
+  DIVU = 50,
+
+  ABS_S = 4,
+  ABS_D = 4,
+  NEG_S = 4,
+  NEG_D = 4,
+  ADD_S = 4,
+  ADD_D = 4,
+  SUB_S = 4,
+  SUB_D = 4,
+  MAX_S = 4,  // Estimated.
+  MAX_D = 4,  // Estimated.
+  C_cond_S = 4,
+  C_cond_D = 4,
+  MUL_S = 4,
+
+  MADD_S = 4,
+  MSUB_S = 4,
+  NMADD_S = 4,
+  NMSUB_S = 4,
+
+  CABS_cond_S = 4,
+  CABS_cond_D = 4,
+
+  CVT_D_S = 4,
+  CVT_PS_PW = 4,
+
+  CVT_S_W = 4,
+  CVT_S_L = 4,
+  CVT_D_W = 4,
+  CVT_D_L = 4,
+
+  CVT_S_D = 4,
+
+  CVT_W_S = 4,
+  CVT_W_D = 4,
+  CVT_L_S = 4,
+  CVT_L_D = 4,
+
+  CEIL_W_S = 4,
+  CEIL_W_D = 4,
+  CEIL_L_S = 4,
+  CEIL_L_D = 4,
+
+  FLOOR_W_S = 4,
+  FLOOR_W_D = 4,
+  FLOOR_L_S = 4,
+  FLOOR_L_D = 4,
+
+  ROUND_W_S = 4,
+  ROUND_W_D = 4,
+  ROUND_L_S = 4,
+  ROUND_L_D = 4,
+
+  TRUNC_W_S = 4,
+  TRUNC_W_D = 4,
+  TRUNC_L_S = 4,
+  TRUNC_L_D = 4,
+
+  MOV_S = 4,
+  MOV_D = 4,
+
+  MOVF_S = 4,
+  MOVF_D = 4,
+
+  MOVN_S = 4,
+  MOVN_D = 4,
+
+  MOVT_S = 4,
+  MOVT_D = 4,
+
+  MOVZ_S = 4,
+  MOVZ_D = 4,
+
+  MUL_D = 5,
+  MADD_D = 5,
+  MSUB_D = 5,
+  NMADD_D = 5,
+  NMSUB_D = 5,
+
+  RECIP_S = 13,
+  RECIP_D = 26,
+
+  RSQRT_S = 17,
+  RSQRT_D = 36,
+
+  DIV_S = 17,
+  SQRT_S = 17,
+
+  DIV_D = 32,
+  SQRT_D = 32,
+
+  MTC1 = 4,
+  MTHC1 = 4,
+  DMTC1 = 4,
+  LWC1 = 4,
+  LDC1 = 4,
+  LDXC1 = 4,
+  LUXC1 = 4,
+  LWXC1 = 4,
+
+  MFC1 = 1,
+  MFHC1 = 1,
+  MFHI = 1,
+  MFLO = 1,
+  DMFC1 = 1,
+  SWC1 = 1,
+  SDC1 = 1,
+  SDXC1 = 1,
+  SUXC1 = 1,
+  SWXC1 = 1,
+};
+
+int ClzLatency() {
+  if (IsMipsArchVariant(kLoongson)) {
+    return (6 + 2 * Latency::BRANCH);
+  } else {
+    return 1;
+  }
+}
+
+int RorLatency(bool is_operand_register = true) {
+  if (IsMipsArchVariant(kMips32r2) || IsMipsArchVariant(kMips32r6)) {
+    return 1;
+  } else {
+    if (is_operand_register) {
+      return 4;
+    } else {
+      return 3;  // Estimated max.
+    }
+  }
+}
+
+int AdduLatency(bool is_operand_register = true) {
+  if (is_operand_register) {
+    return 1;
+  } else {
+    return 2;  // Estimated max.
+  }
+}
+
+int XorLatency(bool is_operand_register = true) {
+  return AdduLatency(is_operand_register);
+}
+
+int AndLatency(bool is_operand_register = true) {
+  return AdduLatency(is_operand_register);
+}
+
+int OrLatency(bool is_operand_register = true) {
+  return AdduLatency(is_operand_register);
+}
+
+int SubuLatency(bool is_operand_register = true) {
+  return AdduLatency(is_operand_register);
+}
+
+int MulLatency(bool is_operand_register = true) {
+  if (is_operand_register) {
+    if (IsMipsArchVariant(kLoongson)) {
+      return Latency::MULT + 1;
+    } else {
+      return Latency::MUL + 1;
+    }
+  } else {
+    if (IsMipsArchVariant(kLoongson)) {
+      return Latency::MULT + 2;
+    } else {
+      return Latency::MUL + 2;
+    }
+  }
+}
+
+int NorLatency(bool is_operand_register = true) {
+  if (is_operand_register) {
+    return 1;
+  } else {
+    return 2;
+  }
+}
+
+int InsLatency() {
+  if (IsMipsArchVariant(kMips32r2) || IsMipsArchVariant(kMips32r6)) {
+    return 1;
+  } else {
+    return SubuLatency(false) + 7;
+  }
+}
+
+int ShlPairLatency(bool is_operand_register = true) {
+  if (is_operand_register) {
+    int latency =
+        AndLatency(false) + NorLatency() + OrLatency() + AndLatency(false) + 4;
+    if (IsMipsArchVariant(kLoongson) || IsMipsArchVariant(kMips32r6)) {
+      return latency + Latency::BRANCH + 2;
+    } else {
+      return latency + 2;
+    }
+  } else {
+    return 2;
+  }
+}
+
+int ShrPairLatency(bool is_operand_register = true, uint32_t shift = 0) {
+  if (is_operand_register) {
+    int latency =
+        AndLatency(false) + NorLatency() + OrLatency() + AndLatency(false) + 4;
+    if (IsMipsArchVariant(kLoongson) || IsMipsArchVariant(kMips32r6)) {
+      return latency + Latency::BRANCH + 2;
+    } else {
+      return latency + 2;
+    }
+  } else {
+    // Estimated max.
+    return (InsLatency() + 2 > OrLatency() + 3) ? InsLatency() + 2
+                                                : OrLatency() + 3;
+  }
+}
+
+int SarPairLatency(bool is_operand_register = true, uint32_t shift = 0) {
+  if (is_operand_register) {
+    return AndLatency(false) + NorLatency() + OrLatency() + AndLatency(false) +
+           Latency::BRANCH + 6;
+  } else {
+    shift = shift & 0x3F;
+    if (shift == 0) {
+      return 2;
+    } else if (shift < 32) {
+      if (IsMipsArchVariant(kMips32r2) || IsMipsArchVariant(kMips32r6)) {
+        return InsLatency() + 2;
+      } else {
+        return OrLatency() + 3;
+      }
+    } else if (shift == 32) {
+      return 2;
+    } else {
+      return 2;
+    }
+  }
+}
+
+int ExtLatency() {
+  if (IsMipsArchVariant(kMips32r2) || IsMipsArchVariant(kMips32r6)) {
+    return 1;
+  } else {
+    // Estimated max.
+    return 2;
+  }
+}
+
+int LsaLatency() {
+  // Estimated max.
+  return AdduLatency() + 1;
+}
+
+int SltLatency(bool is_operand_register = true) {
+  if (is_operand_register) {
+    return 1;
+  } else {
+    return 2;  // Estimated max.
+  }
+}
+
+int SltuLatency(bool is_operand_register = true) {
+  return SltLatency(is_operand_register);
+}
+
+int AddPairLatency() { return 3 * AdduLatency() + SltLatency(); }
+
+int SubPairLatency() { return SltuLatency() + 3 * SubuLatency(); }
+
+int MuluLatency(bool is_operand_register = true) {
+  int latency = 0;
+  if (!is_operand_register) latency++;
+  if (!IsMipsArchVariant(kMips32r6)) {
+    return latency + Latency::MULTU + 2;
+  } else {
+    return latency + Latency::MULU + Latency::MUHU;
+  }
+}
+
+int MulPairLatency() {
+  return MuluLatency() + 2 * MulLatency() + 2 * AdduLatency();
+}
+
+int MaddSLatency() {
+  if (IsMipsArchVariant(kMips32r2)) {
+    return Latency::MADD_D;
+  } else {
+    return Latency::MUL_D + Latency::ADD_D;
+  }
+}
+
+int MaddDLatency() {
+  if (IsMipsArchVariant(kMips32r2)) {
+    return Latency::MADD_D;
+  } else {
+    return Latency::MUL_D + Latency::ADD_D;
+  }
+}
+
+int MsubSLatency() {
+  if (IsMipsArchVariant(kMips32r2)) {
+    return Latency::MSUB_S;
+  } else {
+    return Latency::MUL_S + Latency::SUB_S;
+  }
+}
+
+int MsubDLatency() {
+  if (IsMipsArchVariant(kMips32r2)) {
+    return Latency::MSUB_D;
+  } else {
+    return Latency::MUL_D + Latency::SUB_D;
+  }
+}
+
+int Mfhc1Latency() {
+  if (IsFp32Mode()) {
+    return Latency::MFC1;
+  } else {
+    return 1;
+  }
+}
+
+int Mthc1Latency() {
+  if (IsFp32Mode()) {
+    return Latency::MTC1;
+  } else {
+    return 1;
+  }
+}
+
+int MoveLatency(bool is_double_register = true) {
+  if (!is_double_register) {
+    return Latency::MTC1 + 1;
+  } else {
+    return Mthc1Latency() + 1;  // Estimated.
+  }
+}
+
+int Float64RoundLatency() {
+  if (IsMipsArchVariant(kMips32r6)) {
+    return Latency::RINT_D + 4;
+  } else {
+    // For ceil_l_d, floor_l_d, round_l_d, trunc_l_d latency is 4.
+    return Mfhc1Latency() + ExtLatency() + Latency::BRANCH + Latency::MOV_D +
+           4 + MoveLatency() + 1 + Latency::BRANCH + Latency::CVT_D_L;
+  }
+}
+
+int Float32RoundLatency() {
+  if (IsMipsArchVariant(kMips32r6)) {
+    return Latency::RINT_S + 4;
+  } else {
+    // For ceil_w_s, floor_w_s, round_w_s, trunc_w_s latency is 4.
+    return Latency::MFC1 + ExtLatency() + Latency::BRANCH + Latency::MOV_S + 4 +
+           Latency::MFC1 + Latency::BRANCH + Latency::CVT_S_W;
+  }
+}
+
+int CvtDUwLatency() {
+  if (IsFp64Mode()) {
+    return Latency::MTC1 + Mthc1Latency() + Latency::CVT_D_L;
+  } else {
+    return Latency::BRANCH + Latency::MTC1 + 1 + Latency::MTC1 +
+           Mthc1Latency() + Latency::CVT_D_W + Latency::BRANCH +
+           Latency::ADD_D + Latency::CVT_D_W;
+  }
+}
+
+int CvtSUwLatency() { return CvtDUwLatency() + Latency::CVT_S_D; }
+
+int Floor_w_dLatency() {
+  if (IsMipsArchVariant(kLoongson)) {
+    return Mfhc1Latency() + Latency::FLOOR_W_D + Mthc1Latency();
+  } else {
+    return Latency::FLOOR_W_D;
+  }
+}
+
+int FloorWDLatency() { return Floor_w_dLatency() + Latency::MFC1; }
+
+int Ceil_w_dLatency() {
+  if (IsMipsArchVariant(kLoongson)) {
+    return Mfhc1Latency() + Latency::CEIL_W_D + Mthc1Latency();
+  } else {
+    return Latency::CEIL_W_D;
+  }
+}
+
+int CeilWDLatency() { return Ceil_w_dLatency() + Latency::MFC1; }
+
+int Round_w_dLatency() {
+  if (IsMipsArchVariant(kLoongson)) {
+    return Mfhc1Latency() + Latency::ROUND_W_D + Mthc1Latency();
+  } else {
+    return Latency::ROUND_W_D;
+  }
+}
+
+int RoundWDLatency() { return Round_w_dLatency() + Latency::MFC1; }
+
+int Trunc_w_dLatency() {
+  if (IsMipsArchVariant(kLoongson)) {
+    return Mfhc1Latency() + Latency::TRUNC_W_D + Mthc1Latency();
+  } else {
+    return Latency::TRUNC_W_D;
+  }
+}
+
+int MovnLatency() {
+  if (IsMipsArchVariant(kLoongson) || IsMipsArchVariant(kMips32r6)) {
+    return Latency::BRANCH + 1;
+  } else {
+    return 1;
+  }
+}
+
+int Trunc_uw_dLatency() {
+  return 1 + Latency::MTC1 + Mthc1Latency() + Latency::BRANCH + Latency::SUB_D +
+         Latency::TRUNC_W_D + Latency::MFC1 + OrLatency(false) +
+         Latency::BRANCH + Latency::TRUNC_W_D + Latency::MFC1;
+}
+
+int Trunc_uw_sLatency() {
+  return 1 + Latency::MTC1 + Latency::BRANCH + Latency::SUB_S +
+         Latency::TRUNC_W_S + Latency::MFC1 + OrLatency(false) +
+         Latency::TRUNC_W_S + Latency::MFC1;
+}
+
+int MovzLatency() {
+  if (IsMipsArchVariant(kLoongson) || IsMipsArchVariant(kMips32r6)) {
+    return Latency::BRANCH + 1;
+  } else {
+    return 1;
+  }
+}
+
+int FmoveLowLatency() {
+  if (IsFp32Mode()) {
+    return Latency::MTC1;
+  } else {
+    return Latency::MFHC1 + Latency::MTC1 + Latency::MTHC1;
+  }
+}
+
+int SebLatency() {
+  if (IsMipsArchVariant(kMips32r2) || IsMipsArchVariant(kMips32r6)) {
+    return 1;
+  } else {
+    return 2;
+  }
+}
+
+int SehLatency() {
+  if (IsMipsArchVariant(kMips32r2) || IsMipsArchVariant(kMips32r6)) {
+    return 1;
+  } else {
+    return 2;
+  }
+}
+
+int UlhuLatency() {
+  if (IsMipsArchVariant(kMips32r6)) {
+    return 1;
+  } else {
+    return 4;
+  }
+}
+
+int UlhLatency() {
+  if (IsMipsArchVariant(kMips32r6)) {
+    return 1;
+  } else {
+    return 4;
+  }
+}
+
+int AdjustBaseAndOffsetLatency() {
+  return 3;  // Estimated max.
+}
+
+int UshLatency() {
+  if (IsMipsArchVariant(kMips32r6)) {
+    return 1;
+  } else {
+    return AdjustBaseAndOffsetLatency() + 4;  // Estimated max.
+  }
+}
+
+int UlwLatency() {
+  if (IsMipsArchVariant(kMips32r6)) {
+    return 1;
+  } else {
+    return AdjustBaseAndOffsetLatency() + 3;  // Estimated max.
+  }
+}
+
+int UswLatency() {
+  if (IsMipsArchVariant(kMips32r6)) {
+    return 1;
+  } else {
+    return AdjustBaseAndOffsetLatency() + 2;
+  }
+}
+
+int Ulwc1Latency() {
+  if (IsMipsArchVariant(kMips32r6)) {
+    return Latency::LWC1;
+  } else {
+    return UlwLatency() + Latency::MTC1;
+  }
+}
+
+int Uswc1Latency() {
+  if (IsMipsArchVariant(kMips32r6)) {
+    return Latency::SWC1;
+  } else {
+    return Latency::MFC1 + UswLatency();
+  }
+}
+
+int Ldc1Latency() {
+  int latency = AdjustBaseAndOffsetLatency() + Latency::LWC1;
+  if (IsFp32Mode()) {
+    return latency + Latency::LWC1;
+  } else {
+    return latency + 1 + Mthc1Latency();
+  }
+}
+
+int Uldc1Latency() {
+  if (IsMipsArchVariant(kMips32r6)) {
+    return Ldc1Latency();
+  } else {
+    return 2 * UlwLatency() + Latency::MTC1 + Mthc1Latency();
+  }
+}
+
+int Sdc1Latency() {
+  int latency = AdjustBaseAndOffsetLatency() + Latency::SWC1;
+  if (IsFp32Mode()) {
+    return latency + Latency::SWC1;
+  } else {
+    return latency + Mfhc1Latency() + 1;
+  }
+}
+
+int Usdc1Latency() {
+  if (IsMipsArchVariant(kMips32r6)) {
+    return Sdc1Latency();
+  } else {
+    return Latency::MFC1 + 2 * UswLatency() + Mfhc1Latency();
+  }
+}
+
+int PushRegisterLatency() { return AdduLatency(false) + 1; }
+
+int ByteSwapSignedLatency() {
+  // operand_size == 4
+  if (IsMipsArchVariant(kMips32r2) || IsMipsArchVariant(kMips32r6)) {
+    return 2;
+  } else if (IsMipsArchVariant(kMips32r1) || IsMipsArchVariant(kLoongson)) {
+    return 10;
+  }
+}
+
+int LlLatency(int offset) {
+  bool is_one_instruction =
+      IsMipsArchVariant(kMips32r6) ? is_int9(offset) : is_int16(offset);
+  if (is_one_instruction) {
+    return 1;
+  } else {
+    return 3;
+  }
+}
+
+int ExtractBitsLatency(int size, bool sign_extend) {
+  int latency = 1 + ExtLatency();
+  if (size == 8) {
+    if (sign_extend) {
+      return latency + SebLatency();
+    } else {
+      return 0;
+    }
+  } else if (size == 16) {
+    if (sign_extend) {
+      return latency + SehLatency();
+    } else {
+      return 0;
+    }
+  } else {
+    UNREACHABLE();
+  }
+}
+
+int NegLatency() { return 1; }
+
+int InsertBitsLatency() {
+  return RorLatency() + InsLatency() + SubuLatency(false) + NegLatency() +
+         RorLatency();
+}
+
+int ScLatency(int offset) {
+  bool is_one_instruction =
+      IsMipsArchVariant(kMips32r6) ? is_int9(offset) : is_int16(offset);
+  if (is_one_instruction) {
+    return 1;
+  } else {
+    return 3;
+  }
+}
+
+int BranchShortHelperR6Latency() {
+  return 2;  // Estimated max.
+}
+
+int BranchShortHelperLatency() {
+  return SltLatency() + 2;  // Estimated max.
+}
+
+int BranchShortLatency(BranchDelaySlot bdslot = PROTECT) {
+  if (IsMipsArchVariant(kMips32r6) && bdslot == PROTECT) {
+    return BranchShortHelperR6Latency();
+  } else {
+    return BranchShortHelperLatency();
+  }
+}
+
+int Word32AtomicExchangeLatency(bool sign_extend, int size) {
+  return AdduLatency() + 1 + SubuLatency() + 2 + LlLatency(0) +
+         ExtractBitsLatency(size, sign_extend) + InsertBitsLatency() +
+         ScLatency(0) + BranchShortLatency() + 1;
+}
+
+int Word32AtomicCompareExchangeLatency(bool sign_extend, int size) {
+  return AdduLatency() + 1 + SubuLatency() + 2 + LlLatency(0) +
+         ExtractBitsLatency(size, sign_extend) + BranchShortLatency() + 1;
+}
+
+int AddOverflowLatency() {
+  return 6;  // Estimated max.
+}
+
+int SubOverflowLatency() {
+  return 6;  // Estimated max.
+}
+
+int MulhLatency(bool is_operand_register = true) {
+  if (is_operand_register) {
+    if (!IsMipsArchVariant(kMips32r6)) {
+      return Latency::MULT + Latency::MFHI;
+    } else {
+      return Latency::MUH;
+    }
+  } else {
+    if (!IsMipsArchVariant(kMips32r6)) {
+      return 1 + Latency::MULT + Latency::MFHI;
+    } else {
+      return 1 + Latency::MUH;
+    }
+  }
+}
+
+int MulhuLatency(bool is_operand_register = true) {
+  if (is_operand_register) {
+    if (!IsMipsArchVariant(kMips32r6)) {
+      return Latency::MULTU + Latency::MFHI;
+    } else {
+      return Latency::MUHU;
+    }
+  } else {
+    if (!IsMipsArchVariant(kMips32r6)) {
+      return 1 + Latency::MULTU + Latency::MFHI;
+    } else {
+      return 1 + Latency::MUHU;
+    }
+  }
+}
+
+int MulOverflowLatency() {
+  return MulLatency() + 4;  // Estimated max.
+}
+
+int ModLatency(bool is_operand_register = true) {
+  if (is_operand_register) {
+    if (!IsMipsArchVariant(kMips32r6)) {
+      return Latency::DIV + Latency::MFHI;
+    } else {
+      return 1;
+    }
+  } else {
+    if (!IsMipsArchVariant(kMips32r6)) {
+      return 1 + Latency::DIV + Latency::MFHI;
+    } else {
+      return 2;
+    }
+  }
+}
+
+int ModuLatency(bool is_operand_register = true) {
+  return ModLatency(is_operand_register);
+}
+
+int DivLatency(bool is_operand_register = true) {
+  if (is_operand_register) {
+    if (!IsMipsArchVariant(kMips32r6)) {
+      return Latency::DIV + Latency::MFLO;
+    } else {
+      return Latency::DIV;
+    }
+  } else {
+    if (!IsMipsArchVariant(kMips32r6)) {
+      return 1 + Latency::DIV + Latency::MFLO;
+    } else {
+      return 1 + Latency::DIV;
+    }
+  }
+}
+
+int DivuLatency(bool is_operand_register = true) {
+  if (is_operand_register) {
+    if (!IsMipsArchVariant(kMips32r6)) {
+      return Latency::DIVU + Latency::MFLO;
+    } else {
+      return Latency::DIVU;
+    }
+  } else {
+    if (!IsMipsArchVariant(kMips32r6)) {
+      return 1 + Latency::DIVU + Latency::MFLO;
+    } else {
+      return 1 + Latency::DIVU;
+    }
+  }
+}
+
+int CtzLatency() {
+  if (IsMipsArchVariant(kMips32r6)) {
+    return RorLatency(false) + 2 + ClzLatency();
+  } else {
+    return AdduLatency(false) + XorLatency() + AndLatency() + ClzLatency() + 1 +
+           SubuLatency();
+  }
+}
+
+int PopcntLatency() {
+  return 4 * AndLatency() + SubuLatency() + 2 * AdduLatency() + MulLatency() +
+         8;
+}
+
+int CompareFLatency() { return Latency::C_cond_S; }
+
+int CompareIsNanFLatency() { return CompareFLatency(); }
+
+int CompareIsNanF32Latency() { return CompareIsNanFLatency(); }
+
+int Neg_sLatency() {
+  if (IsMipsArchVariant(kMips32r6)) {
+    return Latency::NEG_S;
+  } else {
+    // Estimated.
+    return CompareIsNanF32Latency() + 2 * Latency::BRANCH + Latency::NEG_S +
+           Latency::MFC1 + 1 + XorLatency() + Latency::MTC1;
+  }
+}
+
+int CompareIsNanF64Latency() { return CompareIsNanFLatency(); }
+
+int Neg_dLatency() {
+  if (IsMipsArchVariant(kMips32r6)) {
+    return Latency::NEG_D;
+  } else {
+    // Estimated.
+    return CompareIsNanF64Latency() + 2 * Latency::BRANCH + Latency::NEG_D +
+           Mfhc1Latency() + 1 + XorLatency() + Mthc1Latency();
+  }
+}
+
+int CompareF32Latency() { return CompareFLatency(); }
+
+int Move_sLatency() {
+  return Latency::MOV_S;  // Estimated max.
+}
+
+int Float32MaxLatency() {
+  // Estimated max.
+  int latency = CompareIsNanF32Latency() + Latency::BRANCH;
+  if (IsMipsArchVariant(kMips32r6)) {
+    return latency + Latency::MAX_S;
+  } else {
+    return latency + 5 * Latency::BRANCH + 2 * CompareF32Latency() +
+           Latency::MFC1 + Move_sLatency();
+  }
+}
+
+int CompareF64Latency() { return CompareF32Latency(); }
+
+int Move_dLatency() {
+  return Latency::MOV_D;  // Estimated max.
+}
+
+int Float64MaxLatency() {
+  // Estimated max.
+  int latency = CompareIsNanF64Latency() + Latency::BRANCH;
+  if (IsMipsArchVariant(kMips32r6)) {
+    return latency + Latency::MAX_D;
+  } else {
+    return latency + 5 * Latency::BRANCH + 2 * CompareF64Latency() +
+           Latency::MFHC1 + 2 * Move_dLatency();
+  }
+}
+
+int PrepareCallCFunctionLatency() {
+  int frame_alignment = TurboAssembler::ActivationFrameAlignment();
+  if (frame_alignment > kSystemPointerSize) {
+    return 1 + SubuLatency(false) + AndLatency(false) + 1;
+  } else {
+    return SubuLatency(false);
+  }
+}
+
+int MovToFloatParametersLatency() { return 2 * MoveLatency(); }
+
+int CallLatency() {
+  // Estimated.
+  return AdduLatency(false) + Latency::BRANCH + 3;
+}
+
+int CallCFunctionHelperLatency() {
+  // Estimated.
+  int latency = AndLatency(false) + Latency::BRANCH + 2 + CallLatency();
+  if (base::OS::ActivationFrameAlignment() > kSystemPointerSize) {
+    latency++;
+  } else {
+    latency += AdduLatency(false);
+  }
+  return latency;
+}
+
+int CallCFunctionLatency() { return 1 + CallCFunctionHelperLatency(); }
+
+int MovFromFloatResultLatency() { return MoveLatency(); }
+
+int Float32MinLatency() {
+  // Estimated max.
+  return CompareIsNanF32Latency() + Latency::BRANCH +
+         2 * (CompareF32Latency() + Latency::BRANCH) + Latency::MFC1 +
+         2 * Latency::BRANCH + Move_sLatency();
+}
+
+int Float64MinLatency() {
+  // Estimated max.
+  return CompareIsNanF64Latency() + Latency::BRANCH +
+         2 * (CompareF64Latency() + Latency::BRANCH) + Mfhc1Latency() +
+         2 * Latency::BRANCH + Move_dLatency();
+}
+
+int SmiUntagLatency() { return 1; }
+
+int PrepareForTailCallLatency() {
+  // Estimated max.
+  return 2 * (LsaLatency() + AdduLatency(false)) + 2 + Latency::BRANCH +
+         Latency::BRANCH + 2 * SubuLatency(false) + 2 + Latency::BRANCH + 1;
+}
+
+int AssemblePopArgumentsAdaptorFrameLatency() {
+  return 1 + Latency::BRANCH + 1 + SmiUntagLatency() +
+         PrepareForTailCallLatency();
+}
+
+int JumpLatency() {
+  // Estimated max.
+  return 1 + AdduLatency(false) + Latency::BRANCH + 2;
+}
+
+int AssertLatency() { return 1; }
+
+int MultiPushLatency() {
+  int latency = SubuLatency(false);
+  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
+    latency++;
+  }
+  return latency;
+}
+
+int MultiPushFPULatency() {
+  int latency = SubuLatency(false);
+  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
+    latency += Sdc1Latency();
+  }
+  return latency;
+}
+
+int PushCallerSavedLatency(SaveFPRegsMode fp_mode) {
+  int latency = MultiPushLatency();
+  if (fp_mode == kSaveFPRegs) {
+    latency += MultiPushFPULatency();
+  }
+  return latency;
+}
+
+int MultiPopFPULatency() {
+  int latency = 0;
+  for (int16_t i = 0; i < kNumRegisters; i++) {
+    latency += Ldc1Latency();
+  }
+  return latency++;
+}
+
+int MultiPopLatency() {
+  int latency = 0;
+  for (int16_t i = 0; i < kNumRegisters; i++) {
+    latency++;
+  }
+  return latency++;
+}
+
+int PopCallerSavedLatency(SaveFPRegsMode fp_mode) {
+  int latency = 0;
+  if (fp_mode == kSaveFPRegs) {
+    latency += MultiPopFPULatency();
+  }
+  return latency + MultiPopLatency();
+}
+
+int AssembleArchJumpLatency() {
+  // Estimated max.
+  return Latency::BRANCH;
+}
+
+int AssembleArchBinarySearchSwitchLatency(int cases) {
+  if (cases < CodeGenerator::kBinarySearchSwitchMinimalCases) {
+    return cases * (1 + Latency::BRANCH) + AssembleArchJumpLatency();
+  }
+  return 1 + Latency::BRANCH + AssembleArchBinarySearchSwitchLatency(cases / 2);
+}
+
+int GenerateSwitchTableLatency() {
+  int latency = 0;
+  if (kArchVariant >= kMips32r6) {
+    latency = LsaLatency() + 2;
+  } else {
+    latency = 6;
+  }
+  latency += 2;
+  return latency;
+}
+
+int AssembleArchTableSwitchLatency() {
+  return Latency::BRANCH + GenerateSwitchTableLatency();
+}
+
+int AssembleReturnLatency() {
+  // Estimated max.
+  return AdduLatency(false) + MultiPopLatency() + MultiPopFPULatency() +
+         Latency::BRANCH + 1 + AdduLatency() + 8;
+}
+
+int TryInlineTruncateDoubleToILatency() {
+  return 2 + Latency::TRUNC_W_D + Latency::MFC1 + 2 + AndLatency(false) +
+         Latency::BRANCH;
+}
+
+int CallStubDelayedLatency() { return 1 + CallLatency(); }
+
+int TruncateDoubleToIDelayedLatency() {
+  // TODO(mips): This no longer reflects how TruncateDoubleToI is called.
+  return TryInlineTruncateDoubleToILatency() + 1 + SubuLatency(false) +
+         Sdc1Latency() + CallStubDelayedLatency() + AdduLatency(false) + 1;
+}
+
+int CheckPageFlagLatency() {
+  return 2 * AndLatency(false) + 1 + Latency::BRANCH;
+}
+
+int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
+  // Basic latency modeling for MIPS32 instructions. They have been determined
+  // in an empirical way.
+  switch (instr->arch_opcode()) {
+    case kArchCallCodeObject:
+    case kArchCallWasmFunction:
+      return CallLatency();
+    case kArchTailCallCodeObjectFromJSFunction:
+    case kArchTailCallCodeObject: {
+      int latency = 0;
+      if (instr->arch_opcode() == kArchTailCallCodeObjectFromJSFunction) {
+        latency = AssemblePopArgumentsAdaptorFrameLatency();
+      }
+      return latency + JumpLatency();
+    }
+    case kArchTailCallWasm:
+    case kArchTailCallAddress:
+      return JumpLatency();
+    case kArchCallJSFunction: {
+      int latency = 0;
+      if (FLAG_debug_code) {
+        latency = 1 + AssertLatency();
+      }
+      return latency + 1 + AdduLatency(false) + CallLatency();
+    }
+    case kArchPrepareCallCFunction:
+      return PrepareCallCFunctionLatency();
+    case kArchSaveCallerRegisters: {
+      auto fp_mode =
+          static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
+      return PushCallerSavedLatency(fp_mode);
+    }
+    case kArchRestoreCallerRegisters: {
+      auto fp_mode =
+          static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
+      return PopCallerSavedLatency(fp_mode);
+    }
+    case kArchPrepareTailCall:
+      return 2;  // Estimated max.
+    case kArchCallCFunction:
+      return CallCFunctionLatency();
+    case kArchJmp:
+      return AssembleArchJumpLatency();
+    case kArchBinarySearchSwitch:
+      return AssembleArchBinarySearchSwitchLatency((instr->InputCount() - 2) /
+                                                   2);
+    case kArchTableSwitch:
+      return AssembleArchTableSwitchLatency();
+    case kArchAbortCSAAssert:
+      return CallLatency() + 1;
+    case kArchComment:
+    case kArchDeoptimize:
+      return 0;
+    case kArchRet:
+      return AssembleReturnLatency();
+    case kArchTruncateDoubleToI:
+      return TruncateDoubleToIDelayedLatency();
+    case kArchStoreWithWriteBarrier:
+      return AdduLatency() + 1 + CheckPageFlagLatency();
+    case kArchStackSlot: {
+      // Estimated max.
+      return AdduLatency(false) + AndLatency(false) + AssertLatency() +
+             AdduLatency(false) + AndLatency(false) + BranchShortLatency() + 1 +
+             SubuLatency() + AdduLatency();
+    }
+    case kArchWordPoisonOnSpeculation:
+      return AndLatency();
+    case kIeee754Float64Acos:
+    case kIeee754Float64Acosh:
+    case kIeee754Float64Asin:
+    case kIeee754Float64Asinh:
+    case kIeee754Float64Atan:
+    case kIeee754Float64Atanh:
+    case kIeee754Float64Atan2:
+    case kIeee754Float64Cos:
+    case kIeee754Float64Cosh:
+    case kIeee754Float64Cbrt:
+    case kIeee754Float64Exp:
+    case kIeee754Float64Expm1:
+    case kIeee754Float64Log:
+    case kIeee754Float64Log1p:
+    case kIeee754Float64Log10:
+    case kIeee754Float64Log2:
+    case kIeee754Float64Pow:
+    case kIeee754Float64Sin:
+    case kIeee754Float64Sinh:
+    case kIeee754Float64Tan:
+    case kIeee754Float64Tanh:
+      return PrepareCallCFunctionLatency() + MovToFloatParametersLatency() +
+             CallCFunctionLatency() + MovFromFloatResultLatency();
+    case kMipsAdd:
+      return AdduLatency(instr->InputAt(1)->IsRegister());
+    case kMipsAnd:
+      return AndLatency(instr->InputAt(1)->IsRegister());
+    case kMipsOr:
+      return OrLatency(instr->InputAt(1)->IsRegister());
+    case kMipsXor:
+      return XorLatency(instr->InputAt(1)->IsRegister());
+    case kMipsSub:
+      return SubuLatency(instr->InputAt(1)->IsRegister());
+    case kMipsNor:
+      return NorLatency(instr->InputAt(1)->IsRegister());
+    case kMipsAddOvf:
+      return AddOverflowLatency();
+    case kMipsSubOvf:
+      return SubOverflowLatency();
+    case kMipsMul:
+      return MulLatency(false);
+    case kMipsMulHigh:
+      return MulhLatency(instr->InputAt(1)->IsRegister());
+    case kMipsMulHighU:
+      return MulhuLatency(instr->InputAt(1)->IsRegister());
+    case kMipsMulOvf:
+      return MulOverflowLatency();
+    case kMipsMod:
+      return ModLatency(instr->InputAt(1)->IsRegister());
+    case kMipsModU:
+      return ModuLatency(instr->InputAt(1)->IsRegister());
+    case kMipsDiv: {
+      int latency = DivLatency(instr->InputAt(1)->IsRegister());
+      if (IsMipsArchVariant(kMips32r6)) {
+        return latency++;
+      } else {
+        return latency + MovzLatency();
+      }
+    }
+    case kMipsDivU: {
+      int latency = DivuLatency(instr->InputAt(1)->IsRegister());
+      if (IsMipsArchVariant(kMips32r6)) {
+        return latency++;
+      } else {
+        return latency + MovzLatency();
+      }
+    }
+    case kMipsClz:
+      return ClzLatency();
+    case kMipsCtz:
+      return CtzLatency();
+    case kMipsPopcnt:
+      return PopcntLatency();
+    case kMipsShlPair: {
+      if (instr->InputAt(2)->IsRegister()) {
+        return ShlPairLatency();
+      } else {
+        return ShlPairLatency(false);
+      }
+    }
+    case kMipsShrPair: {
+      if (instr->InputAt(2)->IsRegister()) {
+        return ShrPairLatency();
+      } else {
+        // auto immediate_operand = ImmediateOperand::cast(instr->InputAt(2));
+        // return ShrPairLatency(false, immediate_operand->inline_value());
+        return 1;
+      }
+    }
+    case kMipsSarPair: {
+      if (instr->InputAt(2)->IsRegister()) {
+        return SarPairLatency();
+      } else {
+        return SarPairLatency(false);
+      }
+    }
+    case kMipsExt:
+      return ExtLatency();
+    case kMipsIns:
+      return InsLatency();
+    case kMipsRor:
+      return RorLatency(instr->InputAt(1)->IsRegister());
+    case kMipsLsa:
+      return LsaLatency();
+    case kMipsModS:
+    case kMipsModD:
+      return PrepareCallCFunctionLatency() + MovToFloatParametersLatency() +
+             CallCFunctionLatency() + MovFromFloatResultLatency();
+    case kMipsAddPair:
+      return AddPairLatency();
+    case kMipsSubPair:
+      return SubPairLatency();
+    case kMipsMulPair:
+      return MulPairLatency();
+    case kMipsMaddS:
+      return MaddSLatency();
+    case kMipsMaddD:
+      return MaddDLatency();
+    case kMipsMsubS:
+      return MsubSLatency();
+    case kMipsMsubD:
+      return MsubDLatency();
+    case kMipsNegS:
+      return Neg_sLatency();
+    case kMipsNegD:
+      return Neg_dLatency();
+    case kMipsFloat64RoundDown:
+    case kMipsFloat64RoundTruncate:
+    case kMipsFloat64RoundUp:
+    case kMipsFloat64RoundTiesEven:
+      return Float64RoundLatency();
+    case kMipsFloat32RoundDown:
+    case kMipsFloat32RoundTruncate:
+    case kMipsFloat32RoundUp:
+    case kMipsFloat32RoundTiesEven:
+      return Float32RoundLatency();
+    case kMipsFloat32Max:
+      return Float32MaxLatency();
+    case kMipsFloat64Max:
+      return Float64MaxLatency();
+    case kMipsFloat32Min:
+      return Float32MinLatency();
+    case kMipsFloat64Min:
+      return Float64MinLatency();
+    case kMipsCvtSUw:
+      return CvtSUwLatency();
+    case kMipsCvtDUw:
+      return CvtDUwLatency();
+    case kMipsFloorWD:
+      return FloorWDLatency();
+    case kMipsCeilWD:
+      return CeilWDLatency();
+    case kMipsRoundWD:
+      return RoundWDLatency();
+    case kMipsTruncWD:
+      return Trunc_w_dLatency() + Latency::MFC1;
+    case kMipsTruncWS:
+      return Latency::TRUNC_W_S + Latency::MFC1 + AdduLatency(false) +
+             SltLatency() + MovnLatency();
+    case kMipsTruncUwD:
+      return Trunc_uw_dLatency();
+    case kMipsTruncUwS:
+      return Trunc_uw_sLatency() + AdduLatency(false) + MovzLatency();
+    case kMipsFloat64ExtractLowWord32:
+      return Latency::MFC1;
+    case kMipsFloat64ExtractHighWord32:
+      return Mfhc1Latency();
+    case kMipsFloat64InsertLowWord32: {
+      if (IsFp32Mode()) {
+        return Latency::MTC1;
+      } else {
+        return Latency::MFHC1 + Latency::MTC1 + Latency::MTHC1;
+      }
+    }
+    case kMipsFloat64InsertHighWord32:
+      return Mthc1Latency();
+    case kMipsFloat64SilenceNaN:
+      return Latency::SUB_D;
+    case kMipsSeb:
+      return SebLatency();
+    case kMipsSeh:
+      return SehLatency();
+    case kMipsUlhu:
+      return UlhuLatency();
+    case kMipsUlh:
+      return UlhLatency();
+    case kMipsUsh:
+      return UshLatency();
+    case kMipsUlw:
+      return UlwLatency();
+    case kMipsUsw:
+      return UswLatency();
+    case kMipsUlwc1:
+      return Ulwc1Latency();
+    case kMipsSwc1:
+      return MoveLatency(false) + Latency::SWC1;  // Estimated max.
+    case kMipsUswc1:
+      return MoveLatency(false) + Uswc1Latency();  // Estimated max.
+    case kMipsLdc1:
+      return Ldc1Latency();
+    case kMipsUldc1:
+      return Uldc1Latency();
+    case kMipsSdc1:
+      return MoveLatency(false) + Sdc1Latency();  // Estimated max.
+    case kMipsUsdc1:
+      return MoveLatency(false) + Usdc1Latency();  // Estimated max.
+    case kMipsPush: {
+      if (instr->InputAt(0)->IsFPRegister()) {
+        auto op = LocationOperand::cast(instr->InputAt(0));
+        switch (op->representation()) {
+          case MachineRepresentation::kFloat32:
+            return Latency::SWC1 + SubuLatency(false);
+            break;
+          case MachineRepresentation::kFloat64:
+            return Sdc1Latency() + SubuLatency(false);
+            break;
+          default: {
+            UNREACHABLE();
+            break;
+          }
+        }
+      } else {
+        return PushRegisterLatency();
+      }
+      break;
+    }
+    case kMipsPeek: {
+      if (instr->OutputAt(0)->IsFPRegister()) {
+        auto op = LocationOperand::cast(instr->OutputAt(0));
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          return Ldc1Latency();
+        } else {
+          return Latency::LWC1;
+        }
+      } else {
+        return 1;
+      }
+      break;
+    }
+    case kMipsStackClaim:
+      return SubuLatency(false);
+    case kMipsStoreToStackSlot: {
+      if (instr->InputAt(0)->IsFPRegister()) {
+        auto op = LocationOperand::cast(instr->InputAt(0));
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          return Sdc1Latency();
+        } else if (op->representation() == MachineRepresentation::kFloat32) {
+          return Latency::SWC1;
+        } else {
+          return 1;  // Estimated value.
+        }
+      } else {
+        return 1;
+      }
+      break;
+    }
+    case kMipsByteSwap32:
+      return ByteSwapSignedLatency();
+    case kWord32AtomicLoadInt8:
+    case kWord32AtomicLoadUint8:
+    case kWord32AtomicLoadInt16:
+    case kWord32AtomicLoadUint16:
+    case kWord32AtomicLoadWord32:
+      return 2;
+    case kWord32AtomicStoreWord8:
+    case kWord32AtomicStoreWord16:
+    case kWord32AtomicStoreWord32:
+      return 3;
+    case kWord32AtomicExchangeInt8:
+      return Word32AtomicExchangeLatency(true, 8);
+    case kWord32AtomicExchangeUint8:
+      return Word32AtomicExchangeLatency(false, 8);
+    case kWord32AtomicExchangeInt16:
+      return Word32AtomicExchangeLatency(true, 16);
+    case kWord32AtomicExchangeUint16:
+      return Word32AtomicExchangeLatency(false, 16);
+    case kWord32AtomicExchangeWord32: {
+      return 1 + AdduLatency() + Ldc1Latency() + 1 + ScLatency(0) +
+             BranchShortLatency() + 1;
+    }
+    case kWord32AtomicCompareExchangeInt8:
+      return Word32AtomicCompareExchangeLatency(true, 8);
+    case kWord32AtomicCompareExchangeUint8:
+      return Word32AtomicCompareExchangeLatency(false, 8);
+    case kWord32AtomicCompareExchangeInt16:
+      return Word32AtomicCompareExchangeLatency(true, 16);
+    case kWord32AtomicCompareExchangeUint16:
+      return Word32AtomicCompareExchangeLatency(false, 16);
+    case kWord32AtomicCompareExchangeWord32:
+      return AdduLatency() + 1 + LlLatency(0) + BranchShortLatency() + 1;
+    case kMipsTst:
+      return AndLatency(instr->InputAt(1)->IsRegister());
+    case kMipsCmpS:
+      return MoveLatency() + CompareF32Latency();
+    case kMipsCmpD:
+      return MoveLatency() + CompareF64Latency();
+    case kArchNop:
+    case kArchThrowTerminator:
+    case kMipsCmp:
+      return 0;
+    case kArchDebugBreak:
+    case kArchFramePointer:
+    case kArchParentFramePointer:
+    case kMipsShl:
+    case kMipsShr:
+    case kMipsSar:
+    case kMipsMov:
+    case kMipsMaxS:
+    case kMipsMinS:
+    case kMipsMaxD:
+    case kMipsMinD:
+    case kMipsLbu:
+    case kMipsLb:
+    case kMipsSb:
+    case kMipsLhu:
+    case kMipsLh:
+    case kMipsSh:
+    case kMipsLw:
+    case kMipsSw:
+    case kMipsLwc1:
+      return 1;
+    case kMipsAddS:
+      return Latency::ADD_S;
+    case kMipsSubS:
+      return Latency::SUB_S;
+    case kMipsMulS:
+      return Latency::MUL_S;
+    case kMipsAbsS:
+      return Latency::ABS_S;
+    case kMipsAddD:
+      return Latency::ADD_D;
+    case kMipsSubD:
+      return Latency::SUB_D;
+    case kMipsAbsD:
+      return Latency::ABS_D;
+    case kMipsCvtSD:
+      return Latency::CVT_S_D;
+    case kMipsCvtDS:
+      return Latency::CVT_D_S;
+    case kMipsMulD:
+      return Latency::MUL_D;
+    case kMipsFloorWS:
+      return Latency::FLOOR_W_S;
+    case kMipsCeilWS:
+      return Latency::CEIL_W_S;
+    case kMipsRoundWS:
+      return Latency::ROUND_W_S;
+    case kMipsCvtDW:
+      return Latency::CVT_D_W;
+    case kMipsCvtSW:
+      return Latency::CVT_S_W;
+    case kMipsDivS:
+      return Latency::DIV_S;
+    case kMipsSqrtS:
+      return Latency::SQRT_S;
+    case kMipsDivD:
+      return Latency::DIV_D;
+    case kMipsSqrtD:
+      return Latency::SQRT_D;
+    default:
+      return 1;
+  }
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/mips/instruction-selector-mips.cc b/src/compiler/backend/mips/instruction-selector-mips.cc
new file mode 100644
index 0000000..9b6abc8
--- /dev/null
+++ b/src/compiler/backend/mips/instruction-selector-mips.cc
@@ -0,0 +1,2499 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/base/bits.h"
+#include "src/compiler/backend/instruction-selector-impl.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/node-properties.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+#define TRACE_UNIMPL() \
+  PrintF("UNIMPLEMENTED instr_sel: %s at line %d\n", __FUNCTION__, __LINE__)
+
+#define TRACE() PrintF("instr_sel: %s at line %d\n", __FUNCTION__, __LINE__)
+
+// Adds Mips-specific methods for generating InstructionOperands.
+class MipsOperandGenerator final : public OperandGenerator {
+ public:
+  explicit MipsOperandGenerator(InstructionSelector* selector)
+      : OperandGenerator(selector) {}
+
+  InstructionOperand UseOperand(Node* node, InstructionCode opcode) {
+    if (CanBeImmediate(node, opcode)) {
+      return UseImmediate(node);
+    }
+    return UseRegister(node);
+  }
+
+  // Use the zero register if the node has the immediate value zero, otherwise
+  // assign a register.
+  InstructionOperand UseRegisterOrImmediateZero(Node* node) {
+    if ((IsIntegerConstant(node) && (GetIntegerConstantValue(node) == 0)) ||
+        (IsFloatConstant(node) &&
+         (bit_cast<int64_t>(GetFloatConstantValue(node)) == 0))) {
+      return UseImmediate(node);
+    }
+    return UseRegister(node);
+  }
+
+  bool IsIntegerConstant(Node* node) {
+    return (node->opcode() == IrOpcode::kInt32Constant);
+  }
+
+  int64_t GetIntegerConstantValue(Node* node) {
+    DCHECK_EQ(IrOpcode::kInt32Constant, node->opcode());
+    return OpParameter<int32_t>(node->op());
+  }
+
+  bool IsFloatConstant(Node* node) {
+    return (node->opcode() == IrOpcode::kFloat32Constant) ||
+           (node->opcode() == IrOpcode::kFloat64Constant);
+  }
+
+  double GetFloatConstantValue(Node* node) {
+    if (node->opcode() == IrOpcode::kFloat32Constant) {
+      return OpParameter<float>(node->op());
+    }
+    DCHECK_EQ(IrOpcode::kFloat64Constant, node->opcode());
+    return OpParameter<double>(node->op());
+  }
+
+  bool CanBeImmediate(Node* node, InstructionCode opcode) {
+    Int32Matcher m(node);
+    if (!m.HasResolvedValue()) return false;
+    int32_t value = m.ResolvedValue();
+    switch (ArchOpcodeField::decode(opcode)) {
+      case kMipsShl:
+      case kMipsSar:
+      case kMipsShr:
+        return is_uint5(value);
+      case kMipsAdd:
+      case kMipsAnd:
+      case kMipsOr:
+      case kMipsTst:
+      case kMipsSub:
+      case kMipsXor:
+        return is_uint16(value);
+      case kMipsLb:
+      case kMipsLbu:
+      case kMipsSb:
+      case kMipsLh:
+      case kMipsLhu:
+      case kMipsSh:
+      case kMipsLw:
+      case kMipsSw:
+      case kMipsLwc1:
+      case kMipsSwc1:
+      case kMipsLdc1:
+      case kMipsSdc1:
+        // true even for 32b values, offsets > 16b
+        // are handled in assembler-mips.cc
+        return is_int32(value);
+      default:
+        return is_int16(value);
+    }
+  }
+
+ private:
+  bool ImmediateFitsAddrMode1Instruction(int32_t imm) const {
+    TRACE_UNIMPL();
+    return false;
+  }
+};
+
+static void VisitRRR(InstructionSelector* selector, ArchOpcode opcode,
+                     Node* node) {
+  MipsOperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)),
+                 g.UseRegister(node->InputAt(1)));
+}
+
+static void VisitUniqueRRR(InstructionSelector* selector, ArchOpcode opcode,
+                           Node* node) {
+  MipsOperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseUniqueRegister(node->InputAt(0)),
+                 g.UseUniqueRegister(node->InputAt(1)));
+}
+
+void VisitRRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
+  MipsOperandGenerator g(selector);
+  selector->Emit(
+      opcode, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
+      g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(2)));
+}
+
+static void VisitRR(InstructionSelector* selector, ArchOpcode opcode,
+                    Node* node) {
+  MipsOperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)));
+}
+
+static void VisitRRI(InstructionSelector* selector, ArchOpcode opcode,
+                     Node* node) {
+  MipsOperandGenerator g(selector);
+  int32_t imm = OpParameter<int32_t>(node->op());
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)), g.UseImmediate(imm));
+}
+
+static void VisitRRIR(InstructionSelector* selector, ArchOpcode opcode,
+                      Node* node) {
+  MipsOperandGenerator g(selector);
+  int32_t imm = OpParameter<int32_t>(node->op());
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)), g.UseImmediate(imm),
+                 g.UseRegister(node->InputAt(1)));
+}
+
+static void VisitRRO(InstructionSelector* selector, ArchOpcode opcode,
+                     Node* node) {
+  MipsOperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)),
+                 g.UseOperand(node->InputAt(1), opcode));
+}
+
+bool TryMatchImmediate(InstructionSelector* selector,
+                       InstructionCode* opcode_return, Node* node,
+                       size_t* input_count_return, InstructionOperand* inputs) {
+  MipsOperandGenerator g(selector);
+  if (g.CanBeImmediate(node, *opcode_return)) {
+    *opcode_return |= AddressingModeField::encode(kMode_MRI);
+    inputs[0] = g.UseImmediate(node);
+    *input_count_return = 1;
+    return true;
+  }
+  return false;
+}
+
+static void VisitBinop(InstructionSelector* selector, Node* node,
+                       InstructionCode opcode, bool has_reverse_opcode,
+                       InstructionCode reverse_opcode,
+                       FlagsContinuation* cont) {
+  MipsOperandGenerator g(selector);
+  Int32BinopMatcher m(node);
+  InstructionOperand inputs[2];
+  size_t input_count = 0;
+  InstructionOperand outputs[1];
+  size_t output_count = 0;
+
+  if (TryMatchImmediate(selector, &opcode, m.right().node(), &input_count,
+                        &inputs[1])) {
+    inputs[0] = g.UseRegister(m.left().node());
+    input_count++;
+  } else if (has_reverse_opcode &&
+             TryMatchImmediate(selector, &reverse_opcode, m.left().node(),
+                               &input_count, &inputs[1])) {
+    inputs[0] = g.UseRegister(m.right().node());
+    opcode = reverse_opcode;
+    input_count++;
+  } else {
+    inputs[input_count++] = g.UseRegister(m.left().node());
+    inputs[input_count++] = g.UseOperand(m.right().node(), opcode);
+  }
+
+  if (cont->IsDeoptimize()) {
+    // If we can deoptimize as a result of the binop, we need to make sure that
+    // the deopt inputs are not overwritten by the binop result. One way
+    // to achieve that is to declare the output register as same-as-first.
+    outputs[output_count++] = g.DefineSameAsFirst(node);
+  } else {
+    outputs[output_count++] = g.DefineAsRegister(node);
+  }
+
+  DCHECK_NE(0u, input_count);
+  DCHECK_EQ(1u, output_count);
+  DCHECK_GE(arraysize(inputs), input_count);
+  DCHECK_GE(arraysize(outputs), output_count);
+
+  selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
+                                 inputs, cont);
+}
+
+static void VisitBinop(InstructionSelector* selector, Node* node,
+                       InstructionCode opcode, bool has_reverse_opcode,
+                       InstructionCode reverse_opcode) {
+  FlagsContinuation cont;
+  VisitBinop(selector, node, opcode, has_reverse_opcode, reverse_opcode, &cont);
+}
+
+static void VisitBinop(InstructionSelector* selector, Node* node,
+                       InstructionCode opcode, FlagsContinuation* cont) {
+  VisitBinop(selector, node, opcode, false, kArchNop, cont);
+}
+
+static void VisitBinop(InstructionSelector* selector, Node* node,
+                       InstructionCode opcode) {
+  VisitBinop(selector, node, opcode, false, kArchNop);
+}
+
+static void VisitPairAtomicBinop(InstructionSelector* selector, Node* node,
+                                 ArchOpcode opcode) {
+  MipsOperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  Node* value_high = node->InputAt(3);
+  AddressingMode addressing_mode = kMode_None;
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  InstructionOperand inputs[] = {g.UseRegister(base), g.UseRegister(index),
+                                 g.UseFixed(value, a1),
+                                 g.UseFixed(value_high, a2)};
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  InstructionOperand temps[3];
+  size_t temp_count = 0;
+  temps[temp_count++] = g.TempRegister(a0);
+
+  Node* projection0 = NodeProperties::FindProjection(node, 0);
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  if (projection0) {
+    outputs[output_count++] = g.DefineAsFixed(projection0, v0);
+  } else {
+    temps[temp_count++] = g.TempRegister(v0);
+  }
+  if (projection1) {
+    outputs[output_count++] = g.DefineAsFixed(projection1, v1);
+  } else {
+    temps[temp_count++] = g.TempRegister(v1);
+  }
+  selector->Emit(code, output_count, outputs, arraysize(inputs), inputs,
+                 temp_count, temps);
+}
+
+void InstructionSelector::VisitStackSlot(Node* node) {
+  StackSlotRepresentation rep = StackSlotRepresentationOf(node->op());
+  int alignment = rep.alignment();
+  int slot = frame_->AllocateSpillSlot(rep.size(), alignment);
+  OperandGenerator g(this);
+
+  Emit(kArchStackSlot, g.DefineAsRegister(node),
+       sequence()->AddImmediate(Constant(slot)),
+       sequence()->AddImmediate(Constant(alignment)), 0, nullptr);
+}
+
+void InstructionSelector::VisitAbortCSAAssert(Node* node) {
+  MipsOperandGenerator g(this);
+  Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), a0));
+}
+
+void InstructionSelector::VisitLoadTransform(Node* node) {
+  LoadTransformParameters params = LoadTransformParametersOf(node->op());
+  MipsOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+
+  InstructionCode opcode = kArchNop;
+  switch (params.transformation) {
+    case LoadTransformation::kS128Load8Splat:
+      opcode = kMipsS128Load8Splat;
+      break;
+    case LoadTransformation::kS128Load16Splat:
+      opcode = kMipsS128Load16Splat;
+      break;
+    case LoadTransformation::kS128Load32Splat:
+      opcode = kMipsS128Load32Splat;
+      break;
+    case LoadTransformation::kS128Load64Splat:
+      opcode = kMipsS128Load64Splat;
+      break;
+    case LoadTransformation::kS128Load8x8S:
+      opcode = kMipsS128Load8x8S;
+      break;
+    case LoadTransformation::kS128Load8x8U:
+      opcode = kMipsS128Load8x8U;
+      break;
+    case LoadTransformation::kS128Load16x4S:
+      opcode = kMipsS128Load16x4S;
+      break;
+    case LoadTransformation::kS128Load16x4U:
+      opcode = kMipsS128Load16x4U;
+      break;
+    case LoadTransformation::kS128Load32x2S:
+      opcode = kMipsS128Load32x2S;
+      break;
+    case LoadTransformation::kS128Load32x2U:
+      opcode = kMipsS128Load32x2U;
+      break;
+    default:
+      UNIMPLEMENTED();
+  }
+
+  if (g.CanBeImmediate(index, opcode)) {
+    Emit(opcode | AddressingModeField::encode(kMode_MRI),
+         g.DefineAsRegister(node), g.UseRegister(base), g.UseImmediate(index));
+  } else {
+    InstructionOperand addr_reg = g.TempRegister();
+    Emit(kMipsAdd | AddressingModeField::encode(kMode_None), addr_reg,
+         g.UseRegister(index), g.UseRegister(base));
+    // Emit desired load opcode, using temp addr_reg.
+    Emit(opcode | AddressingModeField::encode(kMode_MRI),
+         g.DefineAsRegister(node), addr_reg, g.TempImmediate(0));
+  }
+}
+
+void InstructionSelector::VisitLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  MipsOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+
+  InstructionCode opcode = kArchNop;
+  switch (load_rep.representation()) {
+    case MachineRepresentation::kFloat32:
+      opcode = kMipsLwc1;
+      break;
+    case MachineRepresentation::kFloat64:
+      opcode = kMipsLdc1;
+      break;
+    case MachineRepresentation::kBit:  // Fall through.
+    case MachineRepresentation::kWord8:
+      opcode = load_rep.IsUnsigned() ? kMipsLbu : kMipsLb;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = load_rep.IsUnsigned() ? kMipsLhu : kMipsLh;
+      break;
+    case MachineRepresentation::kTaggedSigned:   // Fall through.
+    case MachineRepresentation::kTaggedPointer:  // Fall through.
+    case MachineRepresentation::kTagged:         // Fall through.
+    case MachineRepresentation::kWord32:
+      opcode = kMipsLw;
+      break;
+    case MachineRepresentation::kSimd128:
+      opcode = kMipsMsaLd;
+      break;
+    case MachineRepresentation::kCompressedPointer:  // Fall through.
+    case MachineRepresentation::kCompressed:         // Fall through.
+    case MachineRepresentation::kWord64:             // Fall through.
+    case MachineRepresentation::kNone:
+      UNREACHABLE();
+  }
+  if (node->opcode() == IrOpcode::kPoisonedLoad) {
+    CHECK_NE(poisoning_level_, PoisoningMitigationLevel::kDontPoison);
+    opcode |= MiscField::encode(kMemoryAccessPoisoned);
+  }
+
+  if (g.CanBeImmediate(index, opcode)) {
+    Emit(opcode | AddressingModeField::encode(kMode_MRI),
+         g.DefineAsRegister(node), g.UseRegister(base), g.UseImmediate(index));
+  } else {
+    InstructionOperand addr_reg = g.TempRegister();
+    Emit(kMipsAdd | AddressingModeField::encode(kMode_None), addr_reg,
+         g.UseRegister(index), g.UseRegister(base));
+    // Emit desired load opcode, using temp addr_reg.
+    Emit(opcode | AddressingModeField::encode(kMode_MRI),
+         g.DefineAsRegister(node), addr_reg, g.TempImmediate(0));
+  }
+}
+
+void InstructionSelector::VisitPoisonedLoad(Node* node) { VisitLoad(node); }
+
+void InstructionSelector::VisitProtectedLoad(Node* node) {
+  // TODO(eholk)
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitStore(Node* node) {
+  MipsOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+
+  StoreRepresentation store_rep = StoreRepresentationOf(node->op());
+  WriteBarrierKind write_barrier_kind = store_rep.write_barrier_kind();
+  MachineRepresentation rep = store_rep.representation();
+
+  if (FLAG_enable_unconditional_write_barriers && CanBeTaggedPointer(rep)) {
+    write_barrier_kind = kFullWriteBarrier;
+  }
+
+  // TODO(mips): I guess this could be done in a better way.
+  if (write_barrier_kind != kNoWriteBarrier &&
+      V8_LIKELY(!FLAG_disable_write_barriers)) {
+    DCHECK(CanBeTaggedPointer(rep));
+    InstructionOperand inputs[3];
+    size_t input_count = 0;
+    inputs[input_count++] = g.UseUniqueRegister(base);
+    inputs[input_count++] = g.UseUniqueRegister(index);
+    inputs[input_count++] = g.UseUniqueRegister(value);
+    RecordWriteMode record_write_mode =
+        WriteBarrierKindToRecordWriteMode(write_barrier_kind);
+    InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
+    size_t const temp_count = arraysize(temps);
+    InstructionCode code = kArchStoreWithWriteBarrier;
+    code |= MiscField::encode(static_cast<int>(record_write_mode));
+    Emit(code, 0, nullptr, input_count, inputs, temp_count, temps);
+  } else {
+    ArchOpcode opcode;
+    switch (rep) {
+      case MachineRepresentation::kFloat32:
+        opcode = kMipsSwc1;
+        break;
+      case MachineRepresentation::kFloat64:
+        opcode = kMipsSdc1;
+        break;
+      case MachineRepresentation::kBit:  // Fall through.
+      case MachineRepresentation::kWord8:
+        opcode = kMipsSb;
+        break;
+      case MachineRepresentation::kWord16:
+        opcode = kMipsSh;
+        break;
+      case MachineRepresentation::kTaggedSigned:   // Fall through.
+      case MachineRepresentation::kTaggedPointer:  // Fall through.
+      case MachineRepresentation::kTagged:         // Fall through.
+      case MachineRepresentation::kWord32:
+        opcode = kMipsSw;
+        break;
+      case MachineRepresentation::kSimd128:
+        opcode = kMipsMsaSt;
+        break;
+      case MachineRepresentation::kCompressedPointer:  // Fall through.
+      case MachineRepresentation::kCompressed:         // Fall through.
+      case MachineRepresentation::kWord64:             // Fall through.
+      case MachineRepresentation::kNone:
+        UNREACHABLE();
+    }
+
+    if (g.CanBeImmediate(index, opcode)) {
+      Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(),
+           g.UseRegister(base), g.UseImmediate(index),
+           g.UseRegisterOrImmediateZero(value));
+    } else {
+      InstructionOperand addr_reg = g.TempRegister();
+      Emit(kMipsAdd | AddressingModeField::encode(kMode_None), addr_reg,
+           g.UseRegister(index), g.UseRegister(base));
+      // Emit desired store opcode, using temp addr_reg.
+      Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(),
+           addr_reg, g.TempImmediate(0), g.UseRegisterOrImmediateZero(value));
+    }
+  }
+}
+
+void InstructionSelector::VisitProtectedStore(Node* node) {
+  // TODO(eholk)
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitWord32And(Node* node) {
+  MipsOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (m.left().IsWord32Shr() && CanCover(node, m.left().node()) &&
+      m.right().HasResolvedValue()) {
+    uint32_t mask = m.right().ResolvedValue();
+    uint32_t mask_width = base::bits::CountPopulation(mask);
+    uint32_t mask_msb = base::bits::CountLeadingZeros32(mask);
+    if ((mask_width != 0) && (mask_msb + mask_width == 32)) {
+      // The mask must be contiguous, and occupy the least-significant bits.
+      DCHECK_EQ(0u, base::bits::CountTrailingZeros32(mask));
+
+      // Select Ext for And(Shr(x, imm), mask) where the mask is in the least
+      // significant bits.
+      Int32BinopMatcher mleft(m.left().node());
+      if (mleft.right().HasResolvedValue()) {
+        // Any shift value can match; int32 shifts use `value % 32`.
+        uint32_t lsb = mleft.right().ResolvedValue() & 0x1F;
+
+        // Ext cannot extract bits past the register size, however since
+        // shifting the original value would have introduced some zeros we can
+        // still use Ext with a smaller mask and the remaining bits will be
+        // zeros.
+        if (lsb + mask_width > 32) mask_width = 32 - lsb;
+
+        if (lsb == 0 && mask_width == 32) {
+          Emit(kArchNop, g.DefineSameAsFirst(node), g.Use(mleft.left().node()));
+        } else {
+          Emit(kMipsExt, g.DefineAsRegister(node),
+               g.UseRegister(mleft.left().node()), g.TempImmediate(lsb),
+               g.TempImmediate(mask_width));
+        }
+        return;
+      }
+      // Other cases fall through to the normal And operation.
+    }
+  }
+  if (m.right().HasResolvedValue()) {
+    uint32_t mask = m.right().ResolvedValue();
+    uint32_t shift = base::bits::CountPopulation(~mask);
+    uint32_t msb = base::bits::CountLeadingZeros32(~mask);
+    if (shift != 0 && shift != 32 && msb + shift == 32) {
+      // Insert zeros for (x >> K) << K => x & ~(2^K - 1) expression reduction
+      // and remove constant loading of invereted mask.
+      Emit(kMipsIns, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
+           g.TempImmediate(0), g.TempImmediate(shift));
+      return;
+    }
+  }
+  VisitBinop(this, node, kMipsAnd, true, kMipsAnd);
+}
+
+void InstructionSelector::VisitWord32Or(Node* node) {
+  VisitBinop(this, node, kMipsOr, true, kMipsOr);
+}
+
+void InstructionSelector::VisitWord32Xor(Node* node) {
+  Int32BinopMatcher m(node);
+  if (m.left().IsWord32Or() && CanCover(node, m.left().node()) &&
+      m.right().Is(-1)) {
+    Int32BinopMatcher mleft(m.left().node());
+    if (!mleft.right().HasResolvedValue()) {
+      MipsOperandGenerator g(this);
+      Emit(kMipsNor, g.DefineAsRegister(node),
+           g.UseRegister(mleft.left().node()),
+           g.UseRegister(mleft.right().node()));
+      return;
+    }
+  }
+  if (m.right().Is(-1)) {
+    // Use Nor for bit negation and eliminate constant loading for xori.
+    MipsOperandGenerator g(this);
+    Emit(kMipsNor, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+         g.TempImmediate(0));
+    return;
+  }
+  VisitBinop(this, node, kMipsXor, true, kMipsXor);
+}
+
+void InstructionSelector::VisitWord32Shl(Node* node) {
+  Int32BinopMatcher m(node);
+  if (m.left().IsWord32And() && CanCover(node, m.left().node()) &&
+      m.right().IsInRange(1, 31)) {
+    MipsOperandGenerator g(this);
+    Int32BinopMatcher mleft(m.left().node());
+    // Match Word32Shl(Word32And(x, mask), imm) to Shl where the mask is
+    // contiguous, and the shift immediate non-zero.
+    if (mleft.right().HasResolvedValue()) {
+      uint32_t mask = mleft.right().ResolvedValue();
+      uint32_t mask_width = base::bits::CountPopulation(mask);
+      uint32_t mask_msb = base::bits::CountLeadingZeros32(mask);
+      if ((mask_width != 0) && (mask_msb + mask_width == 32)) {
+        uint32_t shift = m.right().ResolvedValue();
+        DCHECK_EQ(0u, base::bits::CountTrailingZeros32(mask));
+        DCHECK_NE(0u, shift);
+        if ((shift + mask_width) >= 32) {
+          // If the mask is contiguous and reaches or extends beyond the top
+          // bit, only the shift is needed.
+          Emit(kMipsShl, g.DefineAsRegister(node),
+               g.UseRegister(mleft.left().node()),
+               g.UseImmediate(m.right().node()));
+          return;
+        }
+      }
+    }
+  }
+  VisitRRO(this, kMipsShl, node);
+}
+
+void InstructionSelector::VisitWord32Shr(Node* node) {
+  Int32BinopMatcher m(node);
+  if (m.left().IsWord32And() && m.right().HasResolvedValue()) {
+    uint32_t lsb = m.right().ResolvedValue() & 0x1F;
+    Int32BinopMatcher mleft(m.left().node());
+    if (mleft.right().HasResolvedValue() &&
+        mleft.right().ResolvedValue() != 0) {
+      // Select Ext for Shr(And(x, mask), imm) where the result of the mask is
+      // shifted into the least-significant bits.
+      uint32_t mask = (mleft.right().ResolvedValue() >> lsb) << lsb;
+      unsigned mask_width = base::bits::CountPopulation(mask);
+      unsigned mask_msb = base::bits::CountLeadingZeros32(mask);
+      if ((mask_msb + mask_width + lsb) == 32) {
+        MipsOperandGenerator g(this);
+        DCHECK_EQ(lsb, base::bits::CountTrailingZeros32(mask));
+        Emit(kMipsExt, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()), g.TempImmediate(lsb),
+             g.TempImmediate(mask_width));
+        return;
+      }
+    }
+  }
+  VisitRRO(this, kMipsShr, node);
+}
+
+void InstructionSelector::VisitWord32Sar(Node* node) {
+  Int32BinopMatcher m(node);
+  if ((IsMipsArchVariant(kMips32r2) || IsMipsArchVariant(kMips32r6)) &&
+      m.left().IsWord32Shl() && CanCover(node, m.left().node())) {
+    Int32BinopMatcher mleft(m.left().node());
+    if (m.right().HasResolvedValue() && mleft.right().HasResolvedValue()) {
+      MipsOperandGenerator g(this);
+      uint32_t sar = m.right().ResolvedValue();
+      uint32_t shl = mleft.right().ResolvedValue();
+      if ((sar == shl) && (sar == 16)) {
+        Emit(kMipsSeh, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()));
+        return;
+      } else if ((sar == shl) && (sar == 24)) {
+        Emit(kMipsSeb, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()));
+        return;
+      }
+    }
+  }
+  VisitRRO(this, kMipsSar, node);
+}
+
+static void VisitInt32PairBinop(InstructionSelector* selector,
+                                InstructionCode pair_opcode,
+                                InstructionCode single_opcode, Node* node) {
+  MipsOperandGenerator g(selector);
+
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+
+  if (projection1) {
+    // We use UseUniqueRegister here to avoid register sharing with the output
+    // register.
+    InstructionOperand inputs[] = {g.UseUniqueRegister(node->InputAt(0)),
+                                   g.UseUniqueRegister(node->InputAt(1)),
+                                   g.UseUniqueRegister(node->InputAt(2)),
+                                   g.UseUniqueRegister(node->InputAt(3))};
+
+    InstructionOperand outputs[] = {
+        g.DefineAsRegister(node),
+        g.DefineAsRegister(NodeProperties::FindProjection(node, 1))};
+    selector->Emit(pair_opcode, 2, outputs, 4, inputs);
+  } else {
+    // The high word of the result is not used, so we emit the standard 32 bit
+    // instruction.
+    selector->Emit(single_opcode, g.DefineSameAsFirst(node),
+                   g.UseRegister(node->InputAt(0)),
+                   g.UseRegister(node->InputAt(2)));
+  }
+}
+
+void InstructionSelector::VisitInt32PairAdd(Node* node) {
+  VisitInt32PairBinop(this, kMipsAddPair, kMipsAdd, node);
+}
+
+void InstructionSelector::VisitInt32PairSub(Node* node) {
+  VisitInt32PairBinop(this, kMipsSubPair, kMipsSub, node);
+}
+
+void InstructionSelector::VisitInt32PairMul(Node* node) {
+  VisitInt32PairBinop(this, kMipsMulPair, kMipsMul, node);
+}
+
+// Shared routine for multiple shift operations.
+static void VisitWord32PairShift(InstructionSelector* selector,
+                                 InstructionCode opcode, Node* node) {
+  MipsOperandGenerator g(selector);
+  Int32Matcher m(node->InputAt(2));
+  InstructionOperand shift_operand;
+  if (m.HasResolvedValue()) {
+    shift_operand = g.UseImmediate(m.node());
+  } else {
+    shift_operand = g.UseUniqueRegister(m.node());
+  }
+
+  // We use UseUniqueRegister here to avoid register sharing with the output
+  // register.
+  InstructionOperand inputs[] = {g.UseUniqueRegister(node->InputAt(0)),
+                                 g.UseUniqueRegister(node->InputAt(1)),
+                                 shift_operand};
+
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+
+  InstructionOperand outputs[2];
+  InstructionOperand temps[1];
+  int32_t output_count = 0;
+  int32_t temp_count = 0;
+
+  outputs[output_count++] = g.DefineAsRegister(node);
+  if (projection1) {
+    outputs[output_count++] = g.DefineAsRegister(projection1);
+  } else {
+    temps[temp_count++] = g.TempRegister();
+  }
+
+  selector->Emit(opcode, output_count, outputs, 3, inputs, temp_count, temps);
+}
+
+void InstructionSelector::VisitWord32PairShl(Node* node) {
+  VisitWord32PairShift(this, kMipsShlPair, node);
+}
+
+void InstructionSelector::VisitWord32PairShr(Node* node) {
+  VisitWord32PairShift(this, kMipsShrPair, node);
+}
+
+void InstructionSelector::VisitWord32PairSar(Node* node) {
+  VisitWord32PairShift(this, kMipsSarPair, node);
+}
+
+void InstructionSelector::VisitWord32Rol(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord32Ror(Node* node) {
+  VisitRRO(this, kMipsRor, node);
+}
+
+void InstructionSelector::VisitWord32Clz(Node* node) {
+  VisitRR(this, kMipsClz, node);
+}
+
+void InstructionSelector::VisitWord32AtomicPairLoad(Node* node) {
+  MipsOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  ArchOpcode opcode = kMipsWord32AtomicPairLoad;
+  AddressingMode addressing_mode = kMode_MRI;
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  InstructionOperand inputs[] = {g.UseRegister(base), g.UseRegister(index)};
+  InstructionOperand temps[3];
+  size_t temp_count = 0;
+  temps[temp_count++] = g.TempRegister(a0);
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+
+  Node* projection0 = NodeProperties::FindProjection(node, 0);
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  if (projection0) {
+    outputs[output_count++] = g.DefineAsFixed(projection0, v0);
+  } else {
+    temps[temp_count++] = g.TempRegister(v0);
+  }
+  if (projection1) {
+    outputs[output_count++] = g.DefineAsFixed(projection1, v1);
+  } else {
+    temps[temp_count++] = g.TempRegister(v1);
+  }
+  Emit(code, output_count, outputs, arraysize(inputs), inputs, temp_count,
+       temps);
+}
+
+void InstructionSelector::VisitWord32AtomicPairStore(Node* node) {
+  MipsOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value_low = node->InputAt(2);
+  Node* value_high = node->InputAt(3);
+
+  InstructionOperand inputs[] = {g.UseRegister(base), g.UseRegister(index),
+                                 g.UseFixed(value_low, a1),
+                                 g.UseFixed(value_high, a2)};
+  InstructionOperand temps[] = {g.TempRegister(a0), g.TempRegister(),
+                                g.TempRegister()};
+  Emit(kMipsWord32AtomicPairStore | AddressingModeField::encode(kMode_MRI), 0,
+       nullptr, arraysize(inputs), inputs, arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitWord32AtomicPairAdd(Node* node) {
+  VisitPairAtomicBinop(this, node, kMipsWord32AtomicPairAdd);
+}
+
+void InstructionSelector::VisitWord32AtomicPairSub(Node* node) {
+  VisitPairAtomicBinop(this, node, kMipsWord32AtomicPairSub);
+}
+
+void InstructionSelector::VisitWord32AtomicPairAnd(Node* node) {
+  VisitPairAtomicBinop(this, node, kMipsWord32AtomicPairAnd);
+}
+
+void InstructionSelector::VisitWord32AtomicPairOr(Node* node) {
+  VisitPairAtomicBinop(this, node, kMipsWord32AtomicPairOr);
+}
+
+void InstructionSelector::VisitWord32AtomicPairXor(Node* node) {
+  VisitPairAtomicBinop(this, node, kMipsWord32AtomicPairXor);
+}
+
+void InstructionSelector::VisitWord32AtomicPairExchange(Node* node) {
+  VisitPairAtomicBinop(this, node, kMipsWord32AtomicPairExchange);
+}
+
+void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
+  MipsOperandGenerator g(this);
+  InstructionOperand inputs[] = {
+      g.UseRegister(node->InputAt(0)),  g.UseRegister(node->InputAt(1)),
+      g.UseFixed(node->InputAt(2), a1), g.UseFixed(node->InputAt(3), a2),
+      g.UseFixed(node->InputAt(4), a3), g.UseUniqueRegister(node->InputAt(5))};
+
+  InstructionCode code = kMipsWord32AtomicPairCompareExchange |
+                         AddressingModeField::encode(kMode_MRI);
+  Node* projection0 = NodeProperties::FindProjection(node, 0);
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  InstructionOperand temps[3];
+  size_t temp_count = 0;
+  temps[temp_count++] = g.TempRegister(a0);
+  if (projection0) {
+    outputs[output_count++] = g.DefineAsFixed(projection0, v0);
+  } else {
+    temps[temp_count++] = g.TempRegister(v0);
+  }
+  if (projection1) {
+    outputs[output_count++] = g.DefineAsFixed(projection1, v1);
+  } else {
+    temps[temp_count++] = g.TempRegister(v1);
+  }
+  Emit(code, output_count, outputs, arraysize(inputs), inputs, temp_count,
+       temps);
+}
+
+void InstructionSelector::VisitWord32ReverseBits(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord64ReverseBytes(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
+  MipsOperandGenerator g(this);
+  Emit(kMipsByteSwap32, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitWord32Ctz(Node* node) {
+  MipsOperandGenerator g(this);
+  Emit(kMipsCtz, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitWord32Popcnt(Node* node) {
+  MipsOperandGenerator g(this);
+  Emit(kMipsPopcnt, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitInt32Add(Node* node) {
+  MipsOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+
+  if (IsMipsArchVariant(kMips32r6)) {
+    // Select Lsa for (left + (left_of_right << imm)).
+    if (m.right().opcode() == IrOpcode::kWord32Shl &&
+        CanCover(node, m.left().node()) && CanCover(node, m.right().node())) {
+      Int32BinopMatcher mright(m.right().node());
+      if (mright.right().HasResolvedValue() && !m.left().HasResolvedValue()) {
+        int32_t shift_value =
+            static_cast<int32_t>(mright.right().ResolvedValue());
+        if (shift_value > 0 && shift_value <= 31) {
+          Emit(kMipsLsa, g.DefineAsRegister(node),
+               g.UseRegister(m.left().node()),
+               g.UseRegister(mright.left().node()),
+               g.TempImmediate(shift_value));
+          return;
+        }
+      }
+    }
+
+    // Select Lsa for ((left_of_left << imm) + right).
+    if (m.left().opcode() == IrOpcode::kWord32Shl &&
+        CanCover(node, m.right().node()) && CanCover(node, m.left().node())) {
+      Int32BinopMatcher mleft(m.left().node());
+      if (mleft.right().HasResolvedValue() && !m.right().HasResolvedValue()) {
+        int32_t shift_value =
+            static_cast<int32_t>(mleft.right().ResolvedValue());
+        if (shift_value > 0 && shift_value <= 31) {
+          Emit(kMipsLsa, g.DefineAsRegister(node),
+               g.UseRegister(m.right().node()),
+               g.UseRegister(mleft.left().node()),
+               g.TempImmediate(shift_value));
+          return;
+        }
+      }
+    }
+  }
+
+  VisitBinop(this, node, kMipsAdd, true, kMipsAdd);
+}
+
+void InstructionSelector::VisitInt32Sub(Node* node) {
+  VisitBinop(this, node, kMipsSub);
+}
+
+void InstructionSelector::VisitInt32Mul(Node* node) {
+  MipsOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (m.right().HasResolvedValue() && m.right().ResolvedValue() > 0) {
+    uint32_t value = static_cast<uint32_t>(m.right().ResolvedValue());
+    if (base::bits::IsPowerOfTwo(value)) {
+      Emit(kMipsShl | AddressingModeField::encode(kMode_None),
+           g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+           g.TempImmediate(base::bits::WhichPowerOfTwo(value)));
+      return;
+    }
+    if (base::bits::IsPowerOfTwo(value - 1) && IsMipsArchVariant(kMips32r6) &&
+        value - 1 > 0 && value - 1 <= 31) {
+      Emit(kMipsLsa, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+           g.UseRegister(m.left().node()),
+           g.TempImmediate(base::bits::WhichPowerOfTwo(value - 1)));
+      return;
+    }
+    if (base::bits::IsPowerOfTwo(value + 1)) {
+      InstructionOperand temp = g.TempRegister();
+      Emit(kMipsShl | AddressingModeField::encode(kMode_None), temp,
+           g.UseRegister(m.left().node()),
+           g.TempImmediate(base::bits::WhichPowerOfTwo(value + 1)));
+      Emit(kMipsSub | AddressingModeField::encode(kMode_None),
+           g.DefineAsRegister(node), temp, g.UseRegister(m.left().node()));
+      return;
+    }
+  }
+  VisitRRR(this, kMipsMul, node);
+}
+
+void InstructionSelector::VisitInt32MulHigh(Node* node) {
+  VisitRRR(this, kMipsMulHigh, node);
+}
+
+void InstructionSelector::VisitUint32MulHigh(Node* node) {
+  MipsOperandGenerator g(this);
+  Emit(kMipsMulHighU, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)),
+       g.UseRegister(node->InputAt(1)));
+}
+
+void InstructionSelector::VisitInt32Div(Node* node) {
+  MipsOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  Emit(kMipsDiv, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
+       g.UseRegister(m.right().node()));
+}
+
+void InstructionSelector::VisitUint32Div(Node* node) {
+  MipsOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  Emit(kMipsDivU, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
+       g.UseRegister(m.right().node()));
+}
+
+void InstructionSelector::VisitInt32Mod(Node* node) {
+  MipsOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  Emit(kMipsMod, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+       g.UseRegister(m.right().node()));
+}
+
+void InstructionSelector::VisitUint32Mod(Node* node) {
+  MipsOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  Emit(kMipsModU, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+       g.UseRegister(m.right().node()));
+}
+
+void InstructionSelector::VisitChangeFloat32ToFloat64(Node* node) {
+  VisitRR(this, kMipsCvtDS, node);
+}
+
+void InstructionSelector::VisitRoundInt32ToFloat32(Node* node) {
+  VisitRR(this, kMipsCvtSW, node);
+}
+
+void InstructionSelector::VisitRoundUint32ToFloat32(Node* node) {
+  VisitRR(this, kMipsCvtSUw, node);
+}
+
+void InstructionSelector::VisitChangeInt32ToFloat64(Node* node) {
+  VisitRR(this, kMipsCvtDW, node);
+}
+
+void InstructionSelector::VisitChangeUint32ToFloat64(Node* node) {
+  VisitRR(this, kMipsCvtDUw, node);
+}
+
+void InstructionSelector::VisitTruncateFloat32ToInt32(Node* node) {
+  VisitRR(this, kMipsTruncWS, node);
+}
+
+void InstructionSelector::VisitTruncateFloat32ToUint32(Node* node) {
+  VisitRR(this, kMipsTruncUwS, node);
+}
+
+void InstructionSelector::VisitChangeFloat64ToInt32(Node* node) {
+  MipsOperandGenerator g(this);
+  Node* value = node->InputAt(0);
+  // Match ChangeFloat64ToInt32(Float64Round##OP) to corresponding instruction
+  // which does rounding and conversion to integer format.
+  if (CanCover(node, value)) {
+    switch (value->opcode()) {
+      case IrOpcode::kFloat64RoundDown:
+        Emit(kMipsFloorWD, g.DefineAsRegister(node),
+             g.UseRegister(value->InputAt(0)));
+        return;
+      case IrOpcode::kFloat64RoundUp:
+        Emit(kMipsCeilWD, g.DefineAsRegister(node),
+             g.UseRegister(value->InputAt(0)));
+        return;
+      case IrOpcode::kFloat64RoundTiesEven:
+        Emit(kMipsRoundWD, g.DefineAsRegister(node),
+             g.UseRegister(value->InputAt(0)));
+        return;
+      case IrOpcode::kFloat64RoundTruncate:
+        Emit(kMipsTruncWD, g.DefineAsRegister(node),
+             g.UseRegister(value->InputAt(0)));
+        return;
+      default:
+        break;
+    }
+    if (value->opcode() == IrOpcode::kChangeFloat32ToFloat64) {
+      Node* next = value->InputAt(0);
+      if (CanCover(value, next)) {
+        // Match ChangeFloat64ToInt32(ChangeFloat32ToFloat64(Float64Round##OP))
+        switch (next->opcode()) {
+          case IrOpcode::kFloat32RoundDown:
+            Emit(kMipsFloorWS, g.DefineAsRegister(node),
+                 g.UseRegister(next->InputAt(0)));
+            return;
+          case IrOpcode::kFloat32RoundUp:
+            Emit(kMipsCeilWS, g.DefineAsRegister(node),
+                 g.UseRegister(next->InputAt(0)));
+            return;
+          case IrOpcode::kFloat32RoundTiesEven:
+            Emit(kMipsRoundWS, g.DefineAsRegister(node),
+                 g.UseRegister(next->InputAt(0)));
+            return;
+          case IrOpcode::kFloat32RoundTruncate:
+            Emit(kMipsTruncWS, g.DefineAsRegister(node),
+                 g.UseRegister(next->InputAt(0)));
+            return;
+          default:
+            Emit(kMipsTruncWS, g.DefineAsRegister(node),
+                 g.UseRegister(value->InputAt(0)));
+            return;
+        }
+      } else {
+        // Match float32 -> float64 -> int32 representation change path.
+        Emit(kMipsTruncWS, g.DefineAsRegister(node),
+             g.UseRegister(value->InputAt(0)));
+        return;
+      }
+    }
+  }
+  VisitRR(this, kMipsTruncWD, node);
+}
+
+void InstructionSelector::VisitChangeFloat64ToUint32(Node* node) {
+  VisitRR(this, kMipsTruncUwD, node);
+}
+
+void InstructionSelector::VisitTruncateFloat64ToUint32(Node* node) {
+  VisitRR(this, kMipsTruncUwD, node);
+}
+
+void InstructionSelector::VisitTruncateFloat64ToFloat32(Node* node) {
+  MipsOperandGenerator g(this);
+  Node* value = node->InputAt(0);
+  // Match TruncateFloat64ToFloat32(ChangeInt32ToFloat64) to corresponding
+  // instruction.
+  if (CanCover(node, value) &&
+      value->opcode() == IrOpcode::kChangeInt32ToFloat64) {
+    Emit(kMipsCvtSW, g.DefineAsRegister(node),
+         g.UseRegister(value->InputAt(0)));
+    return;
+  }
+  VisitRR(this, kMipsCvtSD, node);
+}
+
+void InstructionSelector::VisitTruncateFloat64ToWord32(Node* node) {
+  VisitRR(this, kArchTruncateDoubleToI, node);
+}
+
+void InstructionSelector::VisitRoundFloat64ToInt32(Node* node) {
+  VisitRR(this, kMipsTruncWD, node);
+}
+
+void InstructionSelector::VisitBitcastFloat32ToInt32(Node* node) {
+  VisitRR(this, kMipsFloat64ExtractLowWord32, node);
+}
+
+void InstructionSelector::VisitBitcastInt32ToFloat32(Node* node) {
+  MipsOperandGenerator g(this);
+  Emit(kMipsFloat64InsertLowWord32, g.DefineAsRegister(node),
+       ImmediateOperand(ImmediateOperand::INLINE, 0),
+       g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitFloat32Add(Node* node) {
+  MipsOperandGenerator g(this);
+  if (IsMipsArchVariant(kMips32r2)) {  // Select Madd.S(z, x, y).
+    Float32BinopMatcher m(node);
+    if (m.left().IsFloat32Mul() && CanCover(node, m.left().node())) {
+      // For Add.S(Mul.S(x, y), z):
+      Float32BinopMatcher mleft(m.left().node());
+      Emit(kMipsMaddS, g.DefineAsRegister(node),
+           g.UseRegister(m.right().node()), g.UseRegister(mleft.left().node()),
+           g.UseRegister(mleft.right().node()));
+      return;
+    }
+    if (m.right().IsFloat32Mul() && CanCover(node, m.right().node())) {
+      // For Add.S(x, Mul.S(y, z)):
+      Float32BinopMatcher mright(m.right().node());
+      Emit(kMipsMaddS, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+           g.UseRegister(mright.left().node()),
+           g.UseRegister(mright.right().node()));
+      return;
+    }
+  }
+  VisitRRR(this, kMipsAddS, node);
+}
+
+void InstructionSelector::VisitFloat64Add(Node* node) {
+  MipsOperandGenerator g(this);
+  if (IsMipsArchVariant(kMips32r2)) {  // Select Madd.S(z, x, y).
+    Float64BinopMatcher m(node);
+    if (m.left().IsFloat64Mul() && CanCover(node, m.left().node())) {
+      // For Add.D(Mul.D(x, y), z):
+      Float64BinopMatcher mleft(m.left().node());
+      Emit(kMipsMaddD, g.DefineAsRegister(node),
+           g.UseRegister(m.right().node()), g.UseRegister(mleft.left().node()),
+           g.UseRegister(mleft.right().node()));
+      return;
+    }
+    if (m.right().IsFloat64Mul() && CanCover(node, m.right().node())) {
+      // For Add.D(x, Mul.D(y, z)):
+      Float64BinopMatcher mright(m.right().node());
+      Emit(kMipsMaddD, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+           g.UseRegister(mright.left().node()),
+           g.UseRegister(mright.right().node()));
+      return;
+    }
+  }
+  VisitRRR(this, kMipsAddD, node);
+}
+
+void InstructionSelector::VisitFloat32Sub(Node* node) {
+  MipsOperandGenerator g(this);
+  if (IsMipsArchVariant(kMips32r2)) {  // Select Madd.S(z, x, y).
+    Float32BinopMatcher m(node);
+    if (m.left().IsFloat32Mul() && CanCover(node, m.left().node())) {
+      // For Sub.S(Mul.S(x,y), z) select Msub.S(z, x, y).
+      Float32BinopMatcher mleft(m.left().node());
+      Emit(kMipsMsubS, g.DefineAsRegister(node),
+           g.UseRegister(m.right().node()), g.UseRegister(mleft.left().node()),
+           g.UseRegister(mleft.right().node()));
+      return;
+    }
+  }
+  VisitRRR(this, kMipsSubS, node);
+}
+
+void InstructionSelector::VisitFloat64Sub(Node* node) {
+  MipsOperandGenerator g(this);
+  if (IsMipsArchVariant(kMips32r2)) {  // Select Madd.S(z, x, y).
+    Float64BinopMatcher m(node);
+    if (m.left().IsFloat64Mul() && CanCover(node, m.left().node())) {
+      // For Sub.D(Mul.S(x,y), z) select Msub.D(z, x, y).
+      Float64BinopMatcher mleft(m.left().node());
+      Emit(kMipsMsubD, g.DefineAsRegister(node),
+           g.UseRegister(m.right().node()), g.UseRegister(mleft.left().node()),
+           g.UseRegister(mleft.right().node()));
+      return;
+    }
+  }
+  VisitRRR(this, kMipsSubD, node);
+}
+
+void InstructionSelector::VisitFloat32Mul(Node* node) {
+  VisitRRR(this, kMipsMulS, node);
+}
+
+void InstructionSelector::VisitFloat64Mul(Node* node) {
+  VisitRRR(this, kMipsMulD, node);
+}
+
+void InstructionSelector::VisitFloat32Div(Node* node) {
+  VisitRRR(this, kMipsDivS, node);
+}
+
+void InstructionSelector::VisitFloat64Div(Node* node) {
+  VisitRRR(this, kMipsDivD, node);
+}
+
+void InstructionSelector::VisitFloat64Mod(Node* node) {
+  MipsOperandGenerator g(this);
+  Emit(kMipsModD, g.DefineAsFixed(node, f0), g.UseFixed(node->InputAt(0), f12),
+       g.UseFixed(node->InputAt(1), f14))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::VisitFloat32Max(Node* node) {
+  MipsOperandGenerator g(this);
+  Emit(kMipsFloat32Max, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
+}
+
+void InstructionSelector::VisitFloat64Max(Node* node) {
+  MipsOperandGenerator g(this);
+  Emit(kMipsFloat64Max, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
+}
+
+void InstructionSelector::VisitFloat32Min(Node* node) {
+  MipsOperandGenerator g(this);
+  Emit(kMipsFloat32Min, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
+}
+
+void InstructionSelector::VisitFloat64Min(Node* node) {
+  MipsOperandGenerator g(this);
+  Emit(kMipsFloat64Min, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
+}
+
+void InstructionSelector::VisitFloat32Abs(Node* node) {
+  VisitRR(this, kMipsAbsS, node);
+}
+
+void InstructionSelector::VisitFloat64Abs(Node* node) {
+  VisitRR(this, kMipsAbsD, node);
+}
+
+void InstructionSelector::VisitFloat32Sqrt(Node* node) {
+  VisitRR(this, kMipsSqrtS, node);
+}
+
+void InstructionSelector::VisitFloat64Sqrt(Node* node) {
+  VisitRR(this, kMipsSqrtD, node);
+}
+
+void InstructionSelector::VisitFloat32RoundDown(Node* node) {
+  VisitRR(this, kMipsFloat32RoundDown, node);
+}
+
+void InstructionSelector::VisitFloat64RoundDown(Node* node) {
+  VisitRR(this, kMipsFloat64RoundDown, node);
+}
+
+void InstructionSelector::VisitFloat32RoundUp(Node* node) {
+  VisitRR(this, kMipsFloat32RoundUp, node);
+}
+
+void InstructionSelector::VisitFloat64RoundUp(Node* node) {
+  VisitRR(this, kMipsFloat64RoundUp, node);
+}
+
+void InstructionSelector::VisitFloat32RoundTruncate(Node* node) {
+  VisitRR(this, kMipsFloat32RoundTruncate, node);
+}
+
+void InstructionSelector::VisitFloat64RoundTruncate(Node* node) {
+  VisitRR(this, kMipsFloat64RoundTruncate, node);
+}
+
+void InstructionSelector::VisitFloat64RoundTiesAway(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitFloat32RoundTiesEven(Node* node) {
+  VisitRR(this, kMipsFloat32RoundTiesEven, node);
+}
+
+void InstructionSelector::VisitFloat64RoundTiesEven(Node* node) {
+  VisitRR(this, kMipsFloat64RoundTiesEven, node);
+}
+
+void InstructionSelector::VisitFloat32Neg(Node* node) {
+  VisitRR(this, kMipsNegS, node);
+}
+
+void InstructionSelector::VisitFloat64Neg(Node* node) {
+  VisitRR(this, kMipsNegD, node);
+}
+
+void InstructionSelector::VisitFloat64Ieee754Binop(Node* node,
+                                                   InstructionCode opcode) {
+  MipsOperandGenerator g(this);
+  Emit(opcode, g.DefineAsFixed(node, f0), g.UseFixed(node->InputAt(0), f2),
+       g.UseFixed(node->InputAt(1), f4))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::VisitFloat64Ieee754Unop(Node* node,
+                                                  InstructionCode opcode) {
+  MipsOperandGenerator g(this);
+  Emit(opcode, g.DefineAsFixed(node, f0), g.UseFixed(node->InputAt(0), f12))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::EmitPrepareArguments(
+    ZoneVector<PushParameter>* arguments, const CallDescriptor* call_descriptor,
+    Node* node) {
+  MipsOperandGenerator g(this);
+
+  // Prepare for C function call.
+  if (call_descriptor->IsCFunctionCall()) {
+    Emit(kArchPrepareCallCFunction | MiscField::encode(static_cast<int>(
+                                         call_descriptor->ParameterCount())),
+         0, nullptr, 0, nullptr);
+
+    // Poke any stack arguments.
+    int slot = kCArgSlotCount;
+    for (PushParameter input : (*arguments)) {
+      if (input.node) {
+        Emit(kMipsStoreToStackSlot, g.NoOutput(), g.UseRegister(input.node),
+             g.TempImmediate(slot << kSystemPointerSizeLog2));
+        ++slot;
+      }
+    }
+  } else {
+    // Possibly align stack here for functions.
+    int push_count = static_cast<int>(call_descriptor->StackParameterCount());
+    if (push_count > 0) {
+      // Calculate needed space
+      int stack_size = 0;
+      for (size_t n = 0; n < arguments->size(); ++n) {
+        PushParameter input = (*arguments)[n];
+        if (input.node) {
+          stack_size += input.location.GetSizeInPointers();
+        }
+      }
+      Emit(kMipsStackClaim, g.NoOutput(),
+           g.TempImmediate(stack_size << kSystemPointerSizeLog2));
+    }
+    for (size_t n = 0; n < arguments->size(); ++n) {
+      PushParameter input = (*arguments)[n];
+      if (input.node) {
+        Emit(kMipsStoreToStackSlot, g.NoOutput(), g.UseRegister(input.node),
+             g.TempImmediate(n << kSystemPointerSizeLog2));
+      }
+    }
+  }
+}
+
+void InstructionSelector::EmitPrepareResults(
+    ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
+    Node* node) {
+  MipsOperandGenerator g(this);
+
+  int reverse_slot = 0;
+  for (PushParameter output : *results) {
+    if (!output.location.IsCallerFrameSlot()) continue;
+    // Skip any alignment holes in nodes.
+    if (output.node != nullptr) {
+      DCHECK(!call_descriptor->IsCFunctionCall());
+      if (output.location.GetType() == MachineType::Float32()) {
+        MarkAsFloat32(output.node);
+      } else if (output.location.GetType() == MachineType::Float64()) {
+        MarkAsFloat64(output.node);
+      }
+      Emit(kMipsPeek, g.DefineAsRegister(output.node),
+           g.UseImmediate(reverse_slot));
+    }
+    reverse_slot += output.location.GetSizeInPointers();
+  }
+}
+
+bool InstructionSelector::IsTailCallAddressImmediate() { return false; }
+
+int InstructionSelector::GetTempsCountForTailCallFromJSFunction() { return 3; }
+
+void InstructionSelector::VisitUnalignedLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  MipsOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+
+  ArchOpcode opcode;
+  switch (load_rep.representation()) {
+    case MachineRepresentation::kWord8:
+      opcode = load_rep.IsUnsigned() ? kMipsLbu : kMipsLb;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = load_rep.IsUnsigned() ? kMipsUlhu : kMipsUlh;
+      break;
+    case MachineRepresentation::kTaggedSigned:   // Fall through.
+    case MachineRepresentation::kTaggedPointer:  // Fall through.
+    case MachineRepresentation::kTagged:         // Fall through.
+    case MachineRepresentation::kWord32:
+      opcode = kMipsUlw;
+      break;
+    case MachineRepresentation::kFloat32:
+      opcode = kMipsUlwc1;
+      break;
+    case MachineRepresentation::kFloat64:
+      opcode = kMipsUldc1;
+      break;
+    case MachineRepresentation::kSimd128:
+      opcode = kMipsMsaLd;
+      break;
+    case MachineRepresentation::kBit:                // Fall through.
+    case MachineRepresentation::kCompressedPointer:  // Fall through.
+    case MachineRepresentation::kCompressed:         // Fall through.
+    case MachineRepresentation::kWord64:             // Fall through.
+    case MachineRepresentation::kNone:
+      UNREACHABLE();
+  }
+
+  if (g.CanBeImmediate(index, opcode)) {
+    Emit(opcode | AddressingModeField::encode(kMode_MRI),
+         g.DefineAsRegister(node), g.UseRegister(base), g.UseImmediate(index));
+  } else {
+    InstructionOperand addr_reg = g.TempRegister();
+    Emit(kMipsAdd | AddressingModeField::encode(kMode_None), addr_reg,
+         g.UseRegister(index), g.UseRegister(base));
+    // Emit desired load opcode, using temp addr_reg.
+    Emit(opcode | AddressingModeField::encode(kMode_MRI),
+         g.DefineAsRegister(node), addr_reg, g.TempImmediate(0));
+  }
+}
+
+void InstructionSelector::VisitUnalignedStore(Node* node) {
+  MipsOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+
+  UnalignedStoreRepresentation rep = UnalignedStoreRepresentationOf(node->op());
+
+  // TODO(mips): I guess this could be done in a better way.
+  ArchOpcode opcode;
+  switch (rep) {
+    case MachineRepresentation::kFloat32:
+      opcode = kMipsUswc1;
+      break;
+    case MachineRepresentation::kFloat64:
+      opcode = kMipsUsdc1;
+      break;
+    case MachineRepresentation::kWord8:
+      opcode = kMipsSb;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = kMipsUsh;
+      break;
+    case MachineRepresentation::kTaggedSigned:   // Fall through.
+    case MachineRepresentation::kTaggedPointer:  // Fall through.
+    case MachineRepresentation::kTagged:         // Fall through.
+    case MachineRepresentation::kWord32:
+      opcode = kMipsUsw;
+      break;
+    case MachineRepresentation::kSimd128:
+      opcode = kMipsMsaSt;
+      break;
+    case MachineRepresentation::kBit:                // Fall through.
+    case MachineRepresentation::kCompressedPointer:  // Fall through.
+    case MachineRepresentation::kCompressed:         // Fall through.
+    case MachineRepresentation::kWord64:             // Fall through.
+    case MachineRepresentation::kNone:
+      UNREACHABLE();
+  }
+
+  if (g.CanBeImmediate(index, opcode)) {
+    Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(),
+         g.UseRegister(base), g.UseImmediate(index),
+         g.UseRegisterOrImmediateZero(value));
+  } else {
+    InstructionOperand addr_reg = g.TempRegister();
+    Emit(kMipsAdd | AddressingModeField::encode(kMode_None), addr_reg,
+         g.UseRegister(index), g.UseRegister(base));
+    // Emit desired store opcode, using temp addr_reg.
+    Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(),
+         addr_reg, g.TempImmediate(0), g.UseRegisterOrImmediateZero(value));
+  }
+}
+
+namespace {
+// Shared routine for multiple compare operations.
+static void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
+                         InstructionOperand left, InstructionOperand right,
+                         FlagsContinuation* cont) {
+  selector->EmitWithContinuation(opcode, left, right, cont);
+}
+
+// Shared routine for multiple float32 compare operations.
+void VisitFloat32Compare(InstructionSelector* selector, Node* node,
+                         FlagsContinuation* cont) {
+  MipsOperandGenerator g(selector);
+  Float32BinopMatcher m(node);
+  InstructionOperand lhs, rhs;
+
+  lhs = m.left().IsZero() ? g.UseImmediate(m.left().node())
+                          : g.UseRegister(m.left().node());
+  rhs = m.right().IsZero() ? g.UseImmediate(m.right().node())
+                           : g.UseRegister(m.right().node());
+  VisitCompare(selector, kMipsCmpS, lhs, rhs, cont);
+}
+
+// Shared routine for multiple float64 compare operations.
+void VisitFloat64Compare(InstructionSelector* selector, Node* node,
+                         FlagsContinuation* cont) {
+  MipsOperandGenerator g(selector);
+  Float64BinopMatcher m(node);
+  InstructionOperand lhs, rhs;
+
+  lhs = m.left().IsZero() ? g.UseImmediate(m.left().node())
+                          : g.UseRegister(m.left().node());
+  rhs = m.right().IsZero() ? g.UseImmediate(m.right().node())
+                           : g.UseRegister(m.right().node());
+  VisitCompare(selector, kMipsCmpD, lhs, rhs, cont);
+}
+
+// Shared routine for multiple word compare operations.
+void VisitWordCompare(InstructionSelector* selector, Node* node,
+                      InstructionCode opcode, FlagsContinuation* cont,
+                      bool commutative) {
+  MipsOperandGenerator g(selector);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+
+  // Match immediates on left or right side of comparison.
+  if (g.CanBeImmediate(right, opcode)) {
+    if (opcode == kMipsTst) {
+      VisitCompare(selector, opcode, g.UseRegister(left), g.UseImmediate(right),
+                   cont);
+    } else {
+      switch (cont->condition()) {
+        case kEqual:
+        case kNotEqual:
+          if (cont->IsSet()) {
+            VisitCompare(selector, opcode, g.UseRegister(left),
+                         g.UseImmediate(right), cont);
+          } else {
+            VisitCompare(selector, opcode, g.UseRegister(left),
+                         g.UseRegister(right), cont);
+          }
+          break;
+        case kSignedLessThan:
+        case kSignedGreaterThanOrEqual:
+        case kUnsignedLessThan:
+        case kUnsignedGreaterThanOrEqual:
+          VisitCompare(selector, opcode, g.UseRegister(left),
+                       g.UseImmediate(right), cont);
+          break;
+        default:
+          VisitCompare(selector, opcode, g.UseRegister(left),
+                       g.UseRegister(right), cont);
+      }
+    }
+  } else if (g.CanBeImmediate(left, opcode)) {
+    if (!commutative) cont->Commute();
+    if (opcode == kMipsTst) {
+      VisitCompare(selector, opcode, g.UseRegister(right), g.UseImmediate(left),
+                   cont);
+    } else {
+      switch (cont->condition()) {
+        case kEqual:
+        case kNotEqual:
+          if (cont->IsSet()) {
+            VisitCompare(selector, opcode, g.UseRegister(right),
+                         g.UseImmediate(left), cont);
+          } else {
+            VisitCompare(selector, opcode, g.UseRegister(right),
+                         g.UseRegister(left), cont);
+          }
+          break;
+        case kSignedLessThan:
+        case kSignedGreaterThanOrEqual:
+        case kUnsignedLessThan:
+        case kUnsignedGreaterThanOrEqual:
+          VisitCompare(selector, opcode, g.UseRegister(right),
+                       g.UseImmediate(left), cont);
+          break;
+        default:
+          VisitCompare(selector, opcode, g.UseRegister(right),
+                       g.UseRegister(left), cont);
+      }
+    }
+  } else {
+    VisitCompare(selector, opcode, g.UseRegister(left), g.UseRegister(right),
+                 cont);
+  }
+}
+
+void VisitWordCompare(InstructionSelector* selector, Node* node,
+                      FlagsContinuation* cont) {
+  VisitWordCompare(selector, node, kMipsCmp, cont, false);
+}
+
+}  // namespace
+
+void InstructionSelector::VisitStackPointerGreaterThan(
+    Node* node, FlagsContinuation* cont) {
+  StackCheckKind kind = StackCheckKindOf(node->op());
+  InstructionCode opcode =
+      kArchStackPointerGreaterThan | MiscField::encode(static_cast<int>(kind));
+
+  MipsOperandGenerator g(this);
+
+  // No outputs.
+  InstructionOperand* const outputs = nullptr;
+  const int output_count = 0;
+
+  // Applying an offset to this stack check requires a temp register. Offsets
+  // are only applied to the first stack check. If applying an offset, we must
+  // ensure the input and temp registers do not alias, thus kUniqueRegister.
+  InstructionOperand temps[] = {g.TempRegister()};
+  const int temp_count = (kind == StackCheckKind::kJSFunctionEntry ? 1 : 0);
+  const auto register_mode = (kind == StackCheckKind::kJSFunctionEntry)
+                                 ? OperandGenerator::kUniqueRegister
+                                 : OperandGenerator::kRegister;
+
+  Node* const value = node->InputAt(0);
+  InstructionOperand inputs[] = {g.UseRegisterWithMode(value, register_mode)};
+  static constexpr int input_count = arraysize(inputs);
+
+  EmitWithContinuation(opcode, output_count, outputs, input_count, inputs,
+                       temp_count, temps, cont);
+}
+
+// Shared routine for word comparisons against zero.
+void InstructionSelector::VisitWordCompareZero(Node* user, Node* value,
+                                               FlagsContinuation* cont) {
+  // Try to combine with comparisons against 0 by simply inverting the branch.
+  while (value->opcode() == IrOpcode::kWord32Equal && CanCover(user, value)) {
+    Int32BinopMatcher m(value);
+    if (!m.right().Is(0)) break;
+
+    user = value;
+    value = m.left().node();
+    cont->Negate();
+  }
+
+  if (CanCover(user, value)) {
+    switch (value->opcode()) {
+      case IrOpcode::kWord32Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitWordCompare(this, value, cont);
+      case IrOpcode::kInt32LessThan:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThan);
+        return VisitWordCompare(this, value, cont);
+      case IrOpcode::kInt32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
+        return VisitWordCompare(this, value, cont);
+      case IrOpcode::kUint32LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitWordCompare(this, value, cont);
+      case IrOpcode::kUint32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitWordCompare(this, value, cont);
+      case IrOpcode::kFloat32Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat32LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat64Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kFloat64LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kFloat64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kProjection:
+        // Check if this is the overflow output projection of an
+        // <Operation>WithOverflow node.
+        if (ProjectionIndexOf(value->op()) == 1u) {
+          // We cannot combine the <Operation>WithOverflow with this branch
+          // unless the 0th projection (the use of the actual value of the
+          // <Operation> is either nullptr, which means there's no use of the
+          // actual value, or was already defined, which means it is scheduled
+          // *AFTER* this branch).
+          Node* const node = value->InputAt(0);
+          Node* const result = NodeProperties::FindProjection(node, 0);
+          if (!result || IsDefined(result)) {
+            switch (node->opcode()) {
+              case IrOpcode::kInt32AddWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kMipsAddOvf, cont);
+              case IrOpcode::kInt32SubWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kMipsSubOvf, cont);
+              case IrOpcode::kInt32MulWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kMipsMulOvf, cont);
+              default:
+                break;
+            }
+          }
+        }
+        break;
+      case IrOpcode::kWord32And:
+        return VisitWordCompare(this, value, kMipsTst, cont, true);
+      case IrOpcode::kStackPointerGreaterThan:
+        cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition);
+        return VisitStackPointerGreaterThan(value, cont);
+      default:
+        break;
+    }
+  }
+
+  // Continuation could not be combined with a compare, emit compare against 0.
+  MipsOperandGenerator g(this);
+  InstructionOperand const value_operand = g.UseRegister(value);
+  EmitWithContinuation(kMipsCmp, value_operand, g.TempImmediate(0), cont);
+}
+
+void InstructionSelector::VisitSwitch(Node* node, const SwitchInfo& sw) {
+  MipsOperandGenerator g(this);
+  InstructionOperand value_operand = g.UseRegister(node->InputAt(0));
+
+  // Emit either ArchTableSwitch or ArchBinarySearchSwitch.
+  if (enable_switch_jump_table_ == kEnableSwitchJumpTable) {
+    static const size_t kMaxTableSwitchValueRange = 2 << 16;
+    size_t table_space_cost = 9 + sw.value_range();
+    size_t table_time_cost = 3;
+    size_t lookup_space_cost = 2 + 2 * sw.case_count();
+    size_t lookup_time_cost = sw.case_count();
+    if (sw.case_count() > 0 &&
+        table_space_cost + 3 * table_time_cost <=
+            lookup_space_cost + 3 * lookup_time_cost &&
+        sw.min_value() > std::numeric_limits<int32_t>::min() &&
+        sw.value_range() <= kMaxTableSwitchValueRange) {
+      InstructionOperand index_operand = value_operand;
+      if (sw.min_value()) {
+        index_operand = g.TempRegister();
+        Emit(kMipsSub, index_operand, value_operand,
+             g.TempImmediate(sw.min_value()));
+      }
+      // Generate a table lookup.
+      return EmitTableSwitch(sw, index_operand);
+    }
+  }
+
+  // Generate a tree of conditional jumps.
+  return EmitBinarySearchSwitch(std::move(sw), value_operand);
+}
+
+void InstructionSelector::VisitWord32Equal(Node* const node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  Int32BinopMatcher m(node);
+  if (m.right().Is(0)) {
+    return VisitWordCompareZero(m.node(), m.left().node(), &cont);
+  }
+  VisitWordCompare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
+  VisitWordCompare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
+  VisitWordCompare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitWordCompare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitWordCompare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32AddWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kMipsAddOvf, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kMipsAddOvf, &cont);
+}
+
+void InstructionSelector::VisitInt32SubWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kMipsSubOvf, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kMipsSubOvf, &cont);
+}
+
+void InstructionSelector::VisitInt32MulWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kMipsMulOvf, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kMipsMulOvf, &cont);
+}
+
+void InstructionSelector::VisitFloat32Equal(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64Equal(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64ExtractLowWord32(Node* node) {
+  MipsOperandGenerator g(this);
+  Emit(kMipsFloat64ExtractLowWord32, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitFloat64ExtractHighWord32(Node* node) {
+  MipsOperandGenerator g(this);
+  Emit(kMipsFloat64ExtractHighWord32, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitFloat64InsertLowWord32(Node* node) {
+  MipsOperandGenerator g(this);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  Emit(kMipsFloat64InsertLowWord32, g.DefineSameAsFirst(node),
+       g.UseRegister(left), g.UseRegister(right));
+}
+
+void InstructionSelector::VisitFloat64InsertHighWord32(Node* node) {
+  MipsOperandGenerator g(this);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  Emit(kMipsFloat64InsertHighWord32, g.DefineSameAsFirst(node),
+       g.UseRegister(left), g.UseRegister(right));
+}
+
+void InstructionSelector::VisitFloat64SilenceNaN(Node* node) {
+  MipsOperandGenerator g(this);
+  Node* left = node->InputAt(0);
+  InstructionOperand temps[] = {g.TempRegister()};
+  Emit(kMipsFloat64SilenceNaN, g.DefineSameAsFirst(node), g.UseRegister(left),
+       arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitMemoryBarrier(Node* node) {
+  MipsOperandGenerator g(this);
+  Emit(kMipsSync, g.NoOutput());
+}
+
+void InstructionSelector::VisitWord32AtomicLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  MipsOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  ArchOpcode opcode;
+  switch (load_rep.representation()) {
+    case MachineRepresentation::kWord8:
+      opcode =
+          load_rep.IsSigned() ? kWord32AtomicLoadInt8 : kWord32AtomicLoadUint8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = load_rep.IsSigned() ? kWord32AtomicLoadInt16
+                                   : kWord32AtomicLoadUint16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kWord32AtomicLoadWord32;
+      break;
+    default:
+      UNREACHABLE();
+  }
+
+  if (g.CanBeImmediate(index, opcode)) {
+    Emit(opcode | AddressingModeField::encode(kMode_MRI),
+         g.DefineAsRegister(node), g.UseRegister(base), g.UseImmediate(index));
+  } else {
+    InstructionOperand addr_reg = g.TempRegister();
+    Emit(kMipsAdd | AddressingModeField::encode(kMode_None), addr_reg,
+         g.UseRegister(index), g.UseRegister(base));
+    // Emit desired load opcode, using temp addr_reg.
+    Emit(opcode | AddressingModeField::encode(kMode_MRI),
+         g.DefineAsRegister(node), addr_reg, g.TempImmediate(0));
+  }
+}
+
+void InstructionSelector::VisitWord32AtomicStore(Node* node) {
+  MachineRepresentation rep = AtomicStoreRepresentationOf(node->op());
+  MipsOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  ArchOpcode opcode;
+  switch (rep) {
+    case MachineRepresentation::kWord8:
+      opcode = kWord32AtomicStoreWord8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = kWord32AtomicStoreWord16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kWord32AtomicStoreWord32;
+      break;
+    default:
+      UNREACHABLE();
+  }
+
+  if (g.CanBeImmediate(index, opcode)) {
+    Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(),
+         g.UseRegister(base), g.UseImmediate(index),
+         g.UseRegisterOrImmediateZero(value));
+  } else {
+    InstructionOperand addr_reg = g.TempRegister();
+    Emit(kMipsAdd | AddressingModeField::encode(kMode_None), addr_reg,
+         g.UseRegister(index), g.UseRegister(base));
+    // Emit desired store opcode, using temp addr_reg.
+    Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(),
+         addr_reg, g.TempImmediate(0), g.UseRegisterOrImmediateZero(value));
+  }
+}
+
+void InstructionSelector::VisitWord32AtomicExchange(Node* node) {
+  MipsOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Int8()) {
+    opcode = kWord32AtomicExchangeInt8;
+  } else if (type == MachineType::Uint8()) {
+    opcode = kWord32AtomicExchangeUint8;
+  } else if (type == MachineType::Int16()) {
+    opcode = kWord32AtomicExchangeInt16;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kWord32AtomicExchangeUint16;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = kWord32AtomicExchangeWord32;
+  } else {
+    UNREACHABLE();
+  }
+
+  AddressingMode addressing_mode = kMode_MRI;
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  inputs[input_count++] = g.UseUniqueRegister(base);
+  inputs[input_count++] = g.UseUniqueRegister(index);
+  inputs[input_count++] = g.UseUniqueRegister(value);
+  InstructionOperand outputs[1];
+  outputs[0] = g.UseUniqueRegister(node);
+  InstructionOperand temp[3];
+  temp[0] = g.TempRegister();
+  temp[1] = g.TempRegister();
+  temp[2] = g.TempRegister();
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  Emit(code, 1, outputs, input_count, inputs, 3, temp);
+}
+
+void InstructionSelector::VisitWord32AtomicCompareExchange(Node* node) {
+  MipsOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* old_value = node->InputAt(2);
+  Node* new_value = node->InputAt(3);
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Int8()) {
+    opcode = kWord32AtomicCompareExchangeInt8;
+  } else if (type == MachineType::Uint8()) {
+    opcode = kWord32AtomicCompareExchangeUint8;
+  } else if (type == MachineType::Int16()) {
+    opcode = kWord32AtomicCompareExchangeInt16;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kWord32AtomicCompareExchangeUint16;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = kWord32AtomicCompareExchangeWord32;
+  } else {
+    UNREACHABLE();
+  }
+
+  AddressingMode addressing_mode = kMode_MRI;
+  InstructionOperand inputs[4];
+  size_t input_count = 0;
+  inputs[input_count++] = g.UseUniqueRegister(base);
+  inputs[input_count++] = g.UseUniqueRegister(index);
+  inputs[input_count++] = g.UseUniqueRegister(old_value);
+  inputs[input_count++] = g.UseUniqueRegister(new_value);
+  InstructionOperand outputs[1];
+  outputs[0] = g.UseUniqueRegister(node);
+  InstructionOperand temp[3];
+  temp[0] = g.TempRegister();
+  temp[1] = g.TempRegister();
+  temp[2] = g.TempRegister();
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  Emit(code, 1, outputs, input_count, inputs, 3, temp);
+}
+
+void InstructionSelector::VisitWord32AtomicBinaryOperation(
+    Node* node, ArchOpcode int8_op, ArchOpcode uint8_op, ArchOpcode int16_op,
+    ArchOpcode uint16_op, ArchOpcode word32_op) {
+  MipsOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Int8()) {
+    opcode = int8_op;
+  } else if (type == MachineType::Uint8()) {
+    opcode = uint8_op;
+  } else if (type == MachineType::Int16()) {
+    opcode = int16_op;
+  } else if (type == MachineType::Uint16()) {
+    opcode = uint16_op;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = word32_op;
+  } else {
+    UNREACHABLE();
+  }
+
+  AddressingMode addressing_mode = kMode_MRI;
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  inputs[input_count++] = g.UseUniqueRegister(base);
+  inputs[input_count++] = g.UseUniqueRegister(index);
+  inputs[input_count++] = g.UseUniqueRegister(value);
+  InstructionOperand outputs[1];
+  outputs[0] = g.UseUniqueRegister(node);
+  InstructionOperand temps[4];
+  temps[0] = g.TempRegister();
+  temps[1] = g.TempRegister();
+  temps[2] = g.TempRegister();
+  temps[3] = g.TempRegister();
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  Emit(code, 1, outputs, input_count, inputs, 4, temps);
+}
+
+#define VISIT_ATOMIC_BINOP(op)                                   \
+  void InstructionSelector::VisitWord32Atomic##op(Node* node) {  \
+    VisitWord32AtomicBinaryOperation(                            \
+        node, kWord32Atomic##op##Int8, kWord32Atomic##op##Uint8, \
+        kWord32Atomic##op##Int16, kWord32Atomic##op##Uint16,     \
+        kWord32Atomic##op##Word32);                              \
+  }
+VISIT_ATOMIC_BINOP(Add)
+VISIT_ATOMIC_BINOP(Sub)
+VISIT_ATOMIC_BINOP(And)
+VISIT_ATOMIC_BINOP(Or)
+VISIT_ATOMIC_BINOP(Xor)
+#undef VISIT_ATOMIC_BINOP
+
+void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
+  UNREACHABLE();
+}
+
+#define SIMD_TYPE_LIST(V) \
+  V(F32x4)                \
+  V(I32x4)                \
+  V(I16x8)                \
+  V(I8x16)
+
+#define SIMD_UNOP_LIST(V)                                \
+  V(F64x2Abs, kMipsF64x2Abs)                             \
+  V(F64x2Neg, kMipsF64x2Neg)                             \
+  V(F64x2Sqrt, kMipsF64x2Sqrt)                           \
+  V(F64x2Ceil, kMipsF64x2Ceil)                           \
+  V(F64x2Floor, kMipsF64x2Floor)                         \
+  V(F64x2Trunc, kMipsF64x2Trunc)                         \
+  V(F64x2NearestInt, kMipsF64x2NearestInt)               \
+  V(I64x2Neg, kMipsI64x2Neg)                             \
+  V(F32x4SConvertI32x4, kMipsF32x4SConvertI32x4)         \
+  V(F32x4UConvertI32x4, kMipsF32x4UConvertI32x4)         \
+  V(F32x4Abs, kMipsF32x4Abs)                             \
+  V(F32x4Neg, kMipsF32x4Neg)                             \
+  V(F32x4Sqrt, kMipsF32x4Sqrt)                           \
+  V(F32x4RecipApprox, kMipsF32x4RecipApprox)             \
+  V(F32x4RecipSqrtApprox, kMipsF32x4RecipSqrtApprox)     \
+  V(F32x4Ceil, kMipsF32x4Ceil)                           \
+  V(F32x4Floor, kMipsF32x4Floor)                         \
+  V(F32x4Trunc, kMipsF32x4Trunc)                         \
+  V(F32x4NearestInt, kMipsF32x4NearestInt)               \
+  V(I32x4SConvertF32x4, kMipsI32x4SConvertF32x4)         \
+  V(I32x4UConvertF32x4, kMipsI32x4UConvertF32x4)         \
+  V(I32x4Neg, kMipsI32x4Neg)                             \
+  V(I32x4SConvertI16x8Low, kMipsI32x4SConvertI16x8Low)   \
+  V(I32x4SConvertI16x8High, kMipsI32x4SConvertI16x8High) \
+  V(I32x4UConvertI16x8Low, kMipsI32x4UConvertI16x8Low)   \
+  V(I32x4UConvertI16x8High, kMipsI32x4UConvertI16x8High) \
+  V(I16x8Neg, kMipsI16x8Neg)                             \
+  V(I16x8SConvertI8x16Low, kMipsI16x8SConvertI8x16Low)   \
+  V(I16x8SConvertI8x16High, kMipsI16x8SConvertI8x16High) \
+  V(I16x8UConvertI8x16Low, kMipsI16x8UConvertI8x16Low)   \
+  V(I16x8UConvertI8x16High, kMipsI16x8UConvertI8x16High) \
+  V(I8x16Neg, kMipsI8x16Neg)                             \
+  V(S128Not, kMipsS128Not)                               \
+  V(V32x4AnyTrue, kMipsV32x4AnyTrue)                     \
+  V(V32x4AllTrue, kMipsV32x4AllTrue)                     \
+  V(V16x8AnyTrue, kMipsV16x8AnyTrue)                     \
+  V(V16x8AllTrue, kMipsV16x8AllTrue)                     \
+  V(V8x16AnyTrue, kMipsV8x16AnyTrue)                     \
+  V(V8x16AllTrue, kMipsV8x16AllTrue)
+
+#define SIMD_SHIFT_OP_LIST(V) \
+  V(I64x2Shl)                 \
+  V(I64x2ShrS)                \
+  V(I64x2ShrU)                \
+  V(I32x4Shl)                 \
+  V(I32x4ShrS)                \
+  V(I32x4ShrU)                \
+  V(I16x8Shl)                 \
+  V(I16x8ShrS)                \
+  V(I16x8ShrU)                \
+  V(I8x16Shl)                 \
+  V(I8x16ShrS)                \
+  V(I8x16ShrU)
+
+#define SIMD_BINOP_LIST(V)                             \
+  V(F64x2Add, kMipsF64x2Add)                           \
+  V(F64x2Sub, kMipsF64x2Sub)                           \
+  V(F64x2Mul, kMipsF64x2Mul)                           \
+  V(F64x2Div, kMipsF64x2Div)                           \
+  V(F64x2Min, kMipsF64x2Min)                           \
+  V(F64x2Max, kMipsF64x2Max)                           \
+  V(F64x2Eq, kMipsF64x2Eq)                             \
+  V(F64x2Ne, kMipsF64x2Ne)                             \
+  V(F64x2Lt, kMipsF64x2Lt)                             \
+  V(F64x2Le, kMipsF64x2Le)                             \
+  V(I64x2Add, kMipsI64x2Add)                           \
+  V(I64x2Sub, kMipsI64x2Sub)                           \
+  V(I64x2Mul, kMipsI64x2Mul)                           \
+  V(F32x4Add, kMipsF32x4Add)                           \
+  V(F32x4AddHoriz, kMipsF32x4AddHoriz)                 \
+  V(F32x4Sub, kMipsF32x4Sub)                           \
+  V(F32x4Mul, kMipsF32x4Mul)                           \
+  V(F32x4Div, kMipsF32x4Div)                           \
+  V(F32x4Max, kMipsF32x4Max)                           \
+  V(F32x4Min, kMipsF32x4Min)                           \
+  V(F32x4Eq, kMipsF32x4Eq)                             \
+  V(F32x4Ne, kMipsF32x4Ne)                             \
+  V(F32x4Lt, kMipsF32x4Lt)                             \
+  V(F32x4Le, kMipsF32x4Le)                             \
+  V(I32x4Add, kMipsI32x4Add)                           \
+  V(I32x4AddHoriz, kMipsI32x4AddHoriz)                 \
+  V(I32x4Sub, kMipsI32x4Sub)                           \
+  V(I32x4Mul, kMipsI32x4Mul)                           \
+  V(I32x4MaxS, kMipsI32x4MaxS)                         \
+  V(I32x4MinS, kMipsI32x4MinS)                         \
+  V(I32x4MaxU, kMipsI32x4MaxU)                         \
+  V(I32x4MinU, kMipsI32x4MinU)                         \
+  V(I32x4Eq, kMipsI32x4Eq)                             \
+  V(I32x4Ne, kMipsI32x4Ne)                             \
+  V(I32x4GtS, kMipsI32x4GtS)                           \
+  V(I32x4GeS, kMipsI32x4GeS)                           \
+  V(I32x4GtU, kMipsI32x4GtU)                           \
+  V(I32x4GeU, kMipsI32x4GeU)                           \
+  V(I32x4Abs, kMipsI32x4Abs)                           \
+  V(I32x4BitMask, kMipsI32x4BitMask)                   \
+  V(I32x4DotI16x8S, kMipsI32x4DotI16x8S)               \
+  V(I16x8Add, kMipsI16x8Add)                           \
+  V(I16x8AddSatS, kMipsI16x8AddSatS)                   \
+  V(I16x8AddSatU, kMipsI16x8AddSatU)                   \
+  V(I16x8AddHoriz, kMipsI16x8AddHoriz)                 \
+  V(I16x8Sub, kMipsI16x8Sub)                           \
+  V(I16x8SubSatS, kMipsI16x8SubSatS)                   \
+  V(I16x8SubSatU, kMipsI16x8SubSatU)                   \
+  V(I16x8Mul, kMipsI16x8Mul)                           \
+  V(I16x8MaxS, kMipsI16x8MaxS)                         \
+  V(I16x8MinS, kMipsI16x8MinS)                         \
+  V(I16x8MaxU, kMipsI16x8MaxU)                         \
+  V(I16x8MinU, kMipsI16x8MinU)                         \
+  V(I16x8Eq, kMipsI16x8Eq)                             \
+  V(I16x8Ne, kMipsI16x8Ne)                             \
+  V(I16x8GtS, kMipsI16x8GtS)                           \
+  V(I16x8GeS, kMipsI16x8GeS)                           \
+  V(I16x8GtU, kMipsI16x8GtU)                           \
+  V(I16x8GeU, kMipsI16x8GeU)                           \
+  V(I16x8SConvertI32x4, kMipsI16x8SConvertI32x4)       \
+  V(I16x8UConvertI32x4, kMipsI16x8UConvertI32x4)       \
+  V(I16x8RoundingAverageU, kMipsI16x8RoundingAverageU) \
+  V(I16x8Abs, kMipsI16x8Abs)                           \
+  V(I16x8BitMask, kMipsI16x8BitMask)                   \
+  V(I8x16Add, kMipsI8x16Add)                           \
+  V(I8x16AddSatS, kMipsI8x16AddSatS)                   \
+  V(I8x16AddSatU, kMipsI8x16AddSatU)                   \
+  V(I8x16Sub, kMipsI8x16Sub)                           \
+  V(I8x16SubSatS, kMipsI8x16SubSatS)                   \
+  V(I8x16SubSatU, kMipsI8x16SubSatU)                   \
+  V(I8x16Mul, kMipsI8x16Mul)                           \
+  V(I8x16MaxS, kMipsI8x16MaxS)                         \
+  V(I8x16MinS, kMipsI8x16MinS)                         \
+  V(I8x16MaxU, kMipsI8x16MaxU)                         \
+  V(I8x16MinU, kMipsI8x16MinU)                         \
+  V(I8x16Eq, kMipsI8x16Eq)                             \
+  V(I8x16Ne, kMipsI8x16Ne)                             \
+  V(I8x16GtS, kMipsI8x16GtS)                           \
+  V(I8x16GeS, kMipsI8x16GeS)                           \
+  V(I8x16GtU, kMipsI8x16GtU)                           \
+  V(I8x16GeU, kMipsI8x16GeU)                           \
+  V(I8x16RoundingAverageU, kMipsI8x16RoundingAverageU) \
+  V(I8x16SConvertI16x8, kMipsI8x16SConvertI16x8)       \
+  V(I8x16UConvertI16x8, kMipsI8x16UConvertI16x8)       \
+  V(I8x16Abs, kMipsI8x16Abs)                           \
+  V(I8x16BitMask, kMipsI8x16BitMask)                   \
+  V(S128And, kMipsS128And)                             \
+  V(S128Or, kMipsS128Or)                               \
+  V(S128Xor, kMipsS128Xor)                             \
+  V(S128AndNot, kMipsS128AndNot)
+
+void InstructionSelector::VisitS128Const(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitS128Zero(Node* node) {
+  MipsOperandGenerator g(this);
+  Emit(kMipsS128Zero, g.DefineSameAsFirst(node));
+}
+
+#define SIMD_VISIT_SPLAT(Type)                               \
+  void InstructionSelector::Visit##Type##Splat(Node* node) { \
+    VisitRR(this, kMips##Type##Splat, node);                 \
+  }
+SIMD_TYPE_LIST(SIMD_VISIT_SPLAT)
+SIMD_VISIT_SPLAT(F64x2)
+#undef SIMD_VISIT_SPLAT
+
+#define SIMD_VISIT_EXTRACT_LANE(Type, Sign)                              \
+  void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \
+    VisitRRI(this, kMips##Type##ExtractLane##Sign, node);                \
+  }
+SIMD_VISIT_EXTRACT_LANE(F64x2, )
+SIMD_VISIT_EXTRACT_LANE(F32x4, )
+SIMD_VISIT_EXTRACT_LANE(I32x4, )
+SIMD_VISIT_EXTRACT_LANE(I16x8, U)
+SIMD_VISIT_EXTRACT_LANE(I16x8, S)
+SIMD_VISIT_EXTRACT_LANE(I8x16, U)
+SIMD_VISIT_EXTRACT_LANE(I8x16, S)
+#undef SIMD_VISIT_EXTRACT_LANE
+
+#define SIMD_VISIT_REPLACE_LANE(Type)                              \
+  void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \
+    VisitRRIR(this, kMips##Type##ReplaceLane, node);               \
+  }
+SIMD_TYPE_LIST(SIMD_VISIT_REPLACE_LANE)
+SIMD_VISIT_REPLACE_LANE(F64x2)
+#undef SIMD_VISIT_REPLACE_LANE
+
+#define SIMD_VISIT_UNOP(Name, instruction)            \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRR(this, instruction, node);                 \
+  }
+SIMD_UNOP_LIST(SIMD_VISIT_UNOP)
+#undef SIMD_VISIT_UNOP
+
+#define SIMD_VISIT_SHIFT_OP(Name)                     \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRRI(this, kMips##Name, node);                \
+  }
+SIMD_SHIFT_OP_LIST(SIMD_VISIT_SHIFT_OP)
+#undef SIMD_VISIT_SHIFT_OP
+
+#define SIMD_VISIT_BINOP(Name, instruction)           \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRRR(this, instruction, node);                \
+  }
+SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
+#undef SIMD_VISIT_BINOP
+
+void InstructionSelector::VisitS128Select(Node* node) {
+  VisitRRRR(this, kMipsS128Select, node);
+}
+
+namespace {
+
+struct ShuffleEntry {
+  uint8_t shuffle[kSimd128Size];
+  ArchOpcode opcode;
+};
+
+static const ShuffleEntry arch_shuffles[] = {
+    {{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23},
+     kMipsS32x4InterleaveRight},
+    {{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31},
+     kMipsS32x4InterleaveLeft},
+    {{0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27},
+     kMipsS32x4PackEven},
+    {{4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31},
+     kMipsS32x4PackOdd},
+    {{0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27},
+     kMipsS32x4InterleaveEven},
+    {{4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31},
+     kMipsS32x4InterleaveOdd},
+
+    {{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23},
+     kMipsS16x8InterleaveRight},
+    {{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31},
+     kMipsS16x8InterleaveLeft},
+    {{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29},
+     kMipsS16x8PackEven},
+    {{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31},
+     kMipsS16x8PackOdd},
+    {{0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29},
+     kMipsS16x8InterleaveEven},
+    {{2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31},
+     kMipsS16x8InterleaveOdd},
+    {{6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9}, kMipsS16x4Reverse},
+    {{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}, kMipsS16x2Reverse},
+
+    {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
+     kMipsS8x16InterleaveRight},
+    {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31},
+     kMipsS8x16InterleaveLeft},
+    {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30},
+     kMipsS8x16PackEven},
+    {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31},
+     kMipsS8x16PackOdd},
+    {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30},
+     kMipsS8x16InterleaveEven},
+    {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31},
+     kMipsS8x16InterleaveOdd},
+    {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}, kMipsS8x8Reverse},
+    {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kMipsS8x4Reverse},
+    {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kMipsS8x2Reverse}};
+
+bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
+                         size_t num_entries, bool is_swizzle,
+                         ArchOpcode* opcode) {
+  uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
+  for (size_t i = 0; i < num_entries; ++i) {
+    const ShuffleEntry& entry = table[i];
+    int j = 0;
+    for (; j < kSimd128Size; ++j) {
+      if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) {
+        break;
+      }
+    }
+    if (j == kSimd128Size) {
+      *opcode = entry.opcode;
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace
+
+void InstructionSelector::VisitI8x16Shuffle(Node* node) {
+  uint8_t shuffle[kSimd128Size];
+  bool is_swizzle;
+  CanonicalizeShuffle(node, shuffle, &is_swizzle);
+  uint8_t shuffle32x4[4];
+  ArchOpcode opcode;
+  if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),
+                          is_swizzle, &opcode)) {
+    VisitRRR(this, opcode, node);
+    return;
+  }
+  Node* input0 = node->InputAt(0);
+  Node* input1 = node->InputAt(1);
+  uint8_t offset;
+  MipsOperandGenerator g(this);
+  if (wasm::SimdShuffle::TryMatchConcat(shuffle, &offset)) {
+    Emit(kMipsS8x16Concat, g.DefineSameAsFirst(node), g.UseRegister(input1),
+         g.UseRegister(input0), g.UseImmediate(offset));
+    return;
+  }
+  if (wasm::SimdShuffle::TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
+    Emit(kMipsS32x4Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
+         g.UseRegister(input1),
+         g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle32x4)));
+    return;
+  }
+  Emit(kMipsI8x16Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
+       g.UseRegister(input1),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 4)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 8)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 12)));
+}
+
+void InstructionSelector::VisitI8x16Swizzle(Node* node) {
+  MipsOperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  // We don't want input 0 or input 1 to be the same as output, since we will
+  // modify output before do the calculation.
+  Emit(kMipsI8x16Swizzle, g.DefineAsRegister(node),
+       g.UseUniqueRegister(node->InputAt(0)),
+       g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) {
+  MipsOperandGenerator g(this);
+  Emit(kMipsSeb, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitSignExtendWord16ToInt32(Node* node) {
+  MipsOperandGenerator g(this);
+  Emit(kMipsSeh, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitF32x4Pmin(Node* node) {
+  VisitUniqueRRR(this, kMipsF32x4Pmin, node);
+}
+
+void InstructionSelector::VisitF32x4Pmax(Node* node) {
+  VisitUniqueRRR(this, kMipsF32x4Pmax, node);
+}
+
+void InstructionSelector::VisitF64x2Pmin(Node* node) {
+  VisitUniqueRRR(this, kMipsF64x2Pmin, node);
+}
+
+void InstructionSelector::VisitF64x2Pmax(Node* node) {
+  VisitUniqueRRR(this, kMipsF64x2Pmax, node);
+}
+
+// static
+MachineOperatorBuilder::Flags
+InstructionSelector::SupportedMachineOperatorFlags() {
+  MachineOperatorBuilder::Flags flags = MachineOperatorBuilder::kNoFlags;
+  if ((IsMipsArchVariant(kMips32r2) || IsMipsArchVariant(kMips32r6)) &&
+      IsFp64Mode()) {
+    flags |= MachineOperatorBuilder::kFloat64RoundDown |
+             MachineOperatorBuilder::kFloat64RoundUp |
+             MachineOperatorBuilder::kFloat64RoundTruncate |
+             MachineOperatorBuilder::kFloat64RoundTiesEven;
+  }
+
+  return flags | MachineOperatorBuilder::kWord32Ctz |
+         MachineOperatorBuilder::kWord32Popcnt |
+         MachineOperatorBuilder::kInt32DivIsSafe |
+         MachineOperatorBuilder::kUint32DivIsSafe |
+         MachineOperatorBuilder::kWord32ShiftIsSafe |
+         MachineOperatorBuilder::kFloat32RoundDown |
+         MachineOperatorBuilder::kFloat32RoundUp |
+         MachineOperatorBuilder::kFloat32RoundTruncate |
+         MachineOperatorBuilder::kFloat32RoundTiesEven;
+}
+
+// static
+MachineOperatorBuilder::AlignmentRequirements
+InstructionSelector::AlignmentRequirements() {
+  if (IsMipsArchVariant(kMips32r6)) {
+    return MachineOperatorBuilder::AlignmentRequirements::
+        FullUnalignedAccessSupport();
+  } else {
+    DCHECK(IsMipsArchVariant(kLoongson) || IsMipsArchVariant(kMips32r1) ||
+           IsMipsArchVariant(kMips32r2));
+    return MachineOperatorBuilder::AlignmentRequirements::
+        NoUnalignedAccessSupport();
+  }
+}
+
+#undef SIMD_BINOP_LIST
+#undef SIMD_SHIFT_OP_LIST
+#undef SIMD_UNOP_LIST
+#undef SIMD_TYPE_LIST
+#undef TRACE_UNIMPL
+#undef TRACE
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/mips64/code-generator-mips64.cc b/src/compiler/backend/mips64/code-generator-mips64.cc
new file mode 100644
index 0000000..887b7e5
--- /dev/null
+++ b/src/compiler/backend/mips64/code-generator-mips64.cc
@@ -0,0 +1,4684 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/codegen/assembler-inl.h"
+#include "src/codegen/callable.h"
+#include "src/codegen/macro-assembler.h"
+#include "src/codegen/mips64/constants-mips64.h"
+#include "src/codegen/optimized-compilation-info.h"
+#include "src/compiler/backend/code-generator-impl.h"
+#include "src/compiler/backend/code-generator.h"
+#include "src/compiler/backend/gap-resolver.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/osr.h"
+#include "src/heap/memory-chunk.h"
+#include "src/wasm/wasm-code-manager.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+#define __ tasm()->
+
+// TODO(plind): consider renaming these macros.
+#define TRACE_MSG(msg)                                                      \
+  PrintF("code_gen: \'%s\' in function %s at line %d\n", msg, __FUNCTION__, \
+         __LINE__)
+
+#define TRACE_UNIMPL()                                                       \
+  PrintF("UNIMPLEMENTED code_generator_mips: %s at line %d\n", __FUNCTION__, \
+         __LINE__)
+
+// Adds Mips-specific methods to convert InstructionOperands.
+class MipsOperandConverter final : public InstructionOperandConverter {
+ public:
+  MipsOperandConverter(CodeGenerator* gen, Instruction* instr)
+      : InstructionOperandConverter(gen, instr) {}
+
+  FloatRegister OutputSingleRegister(size_t index = 0) {
+    return ToSingleRegister(instr_->OutputAt(index));
+  }
+
+  FloatRegister InputSingleRegister(size_t index) {
+    return ToSingleRegister(instr_->InputAt(index));
+  }
+
+  FloatRegister ToSingleRegister(InstructionOperand* op) {
+    // Single (Float) and Double register namespace is same on MIPS,
+    // both are typedefs of FPURegister.
+    return ToDoubleRegister(op);
+  }
+
+  Register InputOrZeroRegister(size_t index) {
+    if (instr_->InputAt(index)->IsImmediate()) {
+      DCHECK_EQ(0, InputInt32(index));
+      return zero_reg;
+    }
+    return InputRegister(index);
+  }
+
+  DoubleRegister InputOrZeroDoubleRegister(size_t index) {
+    if (instr_->InputAt(index)->IsImmediate()) return kDoubleRegZero;
+
+    return InputDoubleRegister(index);
+  }
+
+  DoubleRegister InputOrZeroSingleRegister(size_t index) {
+    if (instr_->InputAt(index)->IsImmediate()) return kDoubleRegZero;
+
+    return InputSingleRegister(index);
+  }
+
+  Operand InputImmediate(size_t index) {
+    Constant constant = ToConstant(instr_->InputAt(index));
+    switch (constant.type()) {
+      case Constant::kInt32:
+        return Operand(constant.ToInt32());
+      case Constant::kInt64:
+        return Operand(constant.ToInt64());
+      case Constant::kFloat32:
+        return Operand::EmbeddedNumber(constant.ToFloat32());
+      case Constant::kFloat64:
+        return Operand::EmbeddedNumber(constant.ToFloat64().value());
+      case Constant::kExternalReference:
+      case Constant::kCompressedHeapObject:
+      case Constant::kHeapObject:
+        // TODO(plind): Maybe we should handle ExtRef & HeapObj here?
+        //    maybe not done on arm due to const pool ??
+        break;
+      case Constant::kDelayedStringConstant:
+        return Operand::EmbeddedStringConstant(
+            constant.ToDelayedStringConstant());
+      case Constant::kRpoNumber:
+        UNREACHABLE();  // TODO(titzer): RPO immediates on mips?
+        break;
+    }
+    UNREACHABLE();
+  }
+
+  Operand InputOperand(size_t index) {
+    InstructionOperand* op = instr_->InputAt(index);
+    if (op->IsRegister()) {
+      return Operand(ToRegister(op));
+    }
+    return InputImmediate(index);
+  }
+
+  MemOperand MemoryOperand(size_t* first_index) {
+    const size_t index = *first_index;
+    switch (AddressingModeField::decode(instr_->opcode())) {
+      case kMode_None:
+        break;
+      case kMode_MRI:
+        *first_index += 2;
+        return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
+      case kMode_MRR:
+        // TODO(plind): r6 address mode, to be implemented ...
+        UNREACHABLE();
+    }
+    UNREACHABLE();
+  }
+
+  MemOperand MemoryOperand(size_t index = 0) { return MemoryOperand(&index); }
+
+  MemOperand ToMemOperand(InstructionOperand* op) const {
+    DCHECK_NOT_NULL(op);
+    DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
+    return SlotToMemOperand(AllocatedOperand::cast(op)->index());
+  }
+
+  MemOperand SlotToMemOperand(int slot) const {
+    FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
+    return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
+  }
+};
+
+static inline bool HasRegisterInput(Instruction* instr, size_t index) {
+  return instr->InputAt(index)->IsRegister();
+}
+
+namespace {
+
+class OutOfLineRecordWrite final : public OutOfLineCode {
+ public:
+  OutOfLineRecordWrite(CodeGenerator* gen, Register object, Register index,
+                       Register value, Register scratch0, Register scratch1,
+                       RecordWriteMode mode, StubCallMode stub_mode)
+      : OutOfLineCode(gen),
+        object_(object),
+        index_(index),
+        value_(value),
+        scratch0_(scratch0),
+        scratch1_(scratch1),
+        mode_(mode),
+        stub_mode_(stub_mode),
+        must_save_lr_(!gen->frame_access_state()->has_frame()),
+        zone_(gen->zone()) {}
+
+  void Generate() final {
+    if (mode_ > RecordWriteMode::kValueIsPointer) {
+      __ JumpIfSmi(value_, exit());
+    }
+    __ CheckPageFlag(value_, scratch0_,
+                     MemoryChunk::kPointersToHereAreInterestingMask, eq,
+                     exit());
+    __ Daddu(scratch1_, object_, index_);
+    RememberedSetAction const remembered_set_action =
+        mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
+                                             : OMIT_REMEMBERED_SET;
+    SaveFPRegsMode const save_fp_mode =
+        frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
+    if (must_save_lr_) {
+      // We need to save and restore ra if the frame was elided.
+      __ Push(ra);
+    }
+    if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
+      __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
+    } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
+      // A direct call to a wasm runtime stub defined in this module.
+      // Just encode the stub index. This will be patched when the code
+      // is added to the native module and copied into wasm code space.
+      __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
+                             save_fp_mode, wasm::WasmCode::kRecordWrite);
+    } else {
+      __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
+                             save_fp_mode);
+    }
+    if (must_save_lr_) {
+      __ Pop(ra);
+    }
+  }
+
+ private:
+  Register const object_;
+  Register const index_;
+  Register const value_;
+  Register const scratch0_;
+  Register const scratch1_;
+  RecordWriteMode const mode_;
+  StubCallMode const stub_mode_;
+  bool must_save_lr_;
+  Zone* zone_;
+};
+
+#define CREATE_OOL_CLASS(ool_name, tasm_ool_name, T)                 \
+  class ool_name final : public OutOfLineCode {                      \
+   public:                                                           \
+    ool_name(CodeGenerator* gen, T dst, T src1, T src2)              \
+        : OutOfLineCode(gen), dst_(dst), src1_(src1), src2_(src2) {} \
+                                                                     \
+    void Generate() final { __ tasm_ool_name(dst_, src1_, src2_); }  \
+                                                                     \
+   private:                                                          \
+    T const dst_;                                                    \
+    T const src1_;                                                   \
+    T const src2_;                                                   \
+  }
+
+CREATE_OOL_CLASS(OutOfLineFloat32Max, Float32MaxOutOfLine, FPURegister);
+CREATE_OOL_CLASS(OutOfLineFloat32Min, Float32MinOutOfLine, FPURegister);
+CREATE_OOL_CLASS(OutOfLineFloat64Max, Float64MaxOutOfLine, FPURegister);
+CREATE_OOL_CLASS(OutOfLineFloat64Min, Float64MinOutOfLine, FPURegister);
+
+#undef CREATE_OOL_CLASS
+
+Condition FlagsConditionToConditionCmp(FlagsCondition condition) {
+  switch (condition) {
+    case kEqual:
+      return eq;
+    case kNotEqual:
+      return ne;
+    case kSignedLessThan:
+      return lt;
+    case kSignedGreaterThanOrEqual:
+      return ge;
+    case kSignedLessThanOrEqual:
+      return le;
+    case kSignedGreaterThan:
+      return gt;
+    case kUnsignedLessThan:
+      return lo;
+    case kUnsignedGreaterThanOrEqual:
+      return hs;
+    case kUnsignedLessThanOrEqual:
+      return ls;
+    case kUnsignedGreaterThan:
+      return hi;
+    case kUnorderedEqual:
+    case kUnorderedNotEqual:
+      break;
+    default:
+      break;
+  }
+  UNREACHABLE();
+}
+
+Condition FlagsConditionToConditionTst(FlagsCondition condition) {
+  switch (condition) {
+    case kNotEqual:
+      return ne;
+    case kEqual:
+      return eq;
+    default:
+      break;
+  }
+  UNREACHABLE();
+}
+
+Condition FlagsConditionToConditionOvf(FlagsCondition condition) {
+  switch (condition) {
+    case kOverflow:
+      return ne;
+    case kNotOverflow:
+      return eq;
+    default:
+      break;
+  }
+  UNREACHABLE();
+}
+
+FPUCondition FlagsConditionToConditionCmpFPU(bool* predicate,
+                                             FlagsCondition condition) {
+  switch (condition) {
+    case kEqual:
+      *predicate = true;
+      return EQ;
+    case kNotEqual:
+      *predicate = false;
+      return EQ;
+    case kUnsignedLessThan:
+      *predicate = true;
+      return OLT;
+    case kUnsignedGreaterThanOrEqual:
+      *predicate = false;
+      return OLT;
+    case kUnsignedLessThanOrEqual:
+      *predicate = true;
+      return OLE;
+    case kUnsignedGreaterThan:
+      *predicate = false;
+      return OLE;
+    case kUnorderedEqual:
+    case kUnorderedNotEqual:
+      *predicate = true;
+      break;
+    default:
+      *predicate = true;
+      break;
+  }
+  UNREACHABLE();
+}
+
+void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
+                                   InstructionCode opcode, Instruction* instr,
+                                   MipsOperandConverter const& i) {
+  const MemoryAccessMode access_mode =
+      static_cast<MemoryAccessMode>(MiscField::decode(opcode));
+  if (access_mode == kMemoryAccessPoisoned) {
+    Register value = i.OutputRegister();
+    codegen->tasm()->And(value, value, kSpeculationPoisonRegister);
+  }
+}
+
+}  // namespace
+
+#define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr)          \
+  do {                                                   \
+    __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
+    __ sync();                                           \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr)               \
+  do {                                                         \
+    __ sync();                                                 \
+    __ asm_instr(i.InputOrZeroRegister(2), i.MemoryOperand()); \
+    __ sync();                                                 \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_BINOP(load_linked, store_conditional, bin_instr)       \
+  do {                                                                         \
+    Label binop;                                                               \
+    __ Daddu(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));       \
+    __ sync();                                                                 \
+    __ bind(&binop);                                                           \
+    __ load_linked(i.OutputRegister(0), MemOperand(i.TempRegister(0), 0));     \
+    __ bin_instr(i.TempRegister(1), i.OutputRegister(0),                       \
+                 Operand(i.InputRegister(2)));                                 \
+    __ store_conditional(i.TempRegister(1), MemOperand(i.TempRegister(0), 0)); \
+    __ BranchShort(&binop, eq, i.TempRegister(1), Operand(zero_reg));          \
+    __ sync();                                                                 \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_BINOP_EXT(load_linked, store_conditional, sign_extend, \
+                                  size, bin_instr, representation)             \
+  do {                                                                         \
+    Label binop;                                                               \
+    __ daddu(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));       \
+    if (representation == 32) {                                                \
+      __ andi(i.TempRegister(3), i.TempRegister(0), 0x3);                      \
+    } else {                                                                   \
+      DCHECK_EQ(representation, 64);                                           \
+      __ andi(i.TempRegister(3), i.TempRegister(0), 0x7);                      \
+    }                                                                          \
+    __ Dsubu(i.TempRegister(0), i.TempRegister(0),                             \
+             Operand(i.TempRegister(3)));                                      \
+    __ sll(i.TempRegister(3), i.TempRegister(3), 3);                           \
+    __ sync();                                                                 \
+    __ bind(&binop);                                                           \
+    __ load_linked(i.TempRegister(1), MemOperand(i.TempRegister(0), 0));       \
+    __ ExtractBits(i.OutputRegister(0), i.TempRegister(1), i.TempRegister(3),  \
+                   size, sign_extend);                                         \
+    __ bin_instr(i.TempRegister(2), i.OutputRegister(0),                       \
+                 Operand(i.InputRegister(2)));                                 \
+    __ InsertBits(i.TempRegister(1), i.TempRegister(2), i.TempRegister(3),     \
+                  size);                                                       \
+    __ store_conditional(i.TempRegister(1), MemOperand(i.TempRegister(0), 0)); \
+    __ BranchShort(&binop, eq, i.TempRegister(1), Operand(zero_reg));          \
+    __ sync();                                                                 \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_linked, store_conditional)       \
+  do {                                                                         \
+    Label exchange;                                                            \
+    __ sync();                                                                 \
+    __ bind(&exchange);                                                        \
+    __ daddu(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));       \
+    __ load_linked(i.OutputRegister(0), MemOperand(i.TempRegister(0), 0));     \
+    __ mov(i.TempRegister(1), i.InputRegister(2));                             \
+    __ store_conditional(i.TempRegister(1), MemOperand(i.TempRegister(0), 0)); \
+    __ BranchShort(&exchange, eq, i.TempRegister(1), Operand(zero_reg));       \
+    __ sync();                                                                 \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(                                  \
+    load_linked, store_conditional, sign_extend, size, representation)         \
+  do {                                                                         \
+    Label exchange;                                                            \
+    __ daddu(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));       \
+    if (representation == 32) {                                                \
+      __ andi(i.TempRegister(1), i.TempRegister(0), 0x3);                      \
+    } else {                                                                   \
+      DCHECK_EQ(representation, 64);                                           \
+      __ andi(i.TempRegister(1), i.TempRegister(0), 0x7);                      \
+    }                                                                          \
+    __ Dsubu(i.TempRegister(0), i.TempRegister(0),                             \
+             Operand(i.TempRegister(1)));                                      \
+    __ sll(i.TempRegister(1), i.TempRegister(1), 3);                           \
+    __ sync();                                                                 \
+    __ bind(&exchange);                                                        \
+    __ load_linked(i.TempRegister(2), MemOperand(i.TempRegister(0), 0));       \
+    __ ExtractBits(i.OutputRegister(0), i.TempRegister(2), i.TempRegister(1),  \
+                   size, sign_extend);                                         \
+    __ InsertBits(i.TempRegister(2), i.InputRegister(2), i.TempRegister(1),    \
+                  size);                                                       \
+    __ store_conditional(i.TempRegister(2), MemOperand(i.TempRegister(0), 0)); \
+    __ BranchShort(&exchange, eq, i.TempRegister(2), Operand(zero_reg));       \
+    __ sync();                                                                 \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_linked,                  \
+                                                 store_conditional)            \
+  do {                                                                         \
+    Label compareExchange;                                                     \
+    Label exit;                                                                \
+    __ daddu(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));       \
+    __ sync();                                                                 \
+    __ bind(&compareExchange);                                                 \
+    __ load_linked(i.OutputRegister(0), MemOperand(i.TempRegister(0), 0));     \
+    __ BranchShort(&exit, ne, i.InputRegister(2),                              \
+                   Operand(i.OutputRegister(0)));                              \
+    __ mov(i.TempRegister(2), i.InputRegister(3));                             \
+    __ store_conditional(i.TempRegister(2), MemOperand(i.TempRegister(0), 0)); \
+    __ BranchShort(&compareExchange, eq, i.TempRegister(2),                    \
+                   Operand(zero_reg));                                         \
+    __ bind(&exit);                                                            \
+    __ sync();                                                                 \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(                          \
+    load_linked, store_conditional, sign_extend, size, representation)         \
+  do {                                                                         \
+    Label compareExchange;                                                     \
+    Label exit;                                                                \
+    __ daddu(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));       \
+    if (representation == 32) {                                                \
+      __ andi(i.TempRegister(1), i.TempRegister(0), 0x3);                      \
+    } else {                                                                   \
+      DCHECK_EQ(representation, 64);                                           \
+      __ andi(i.TempRegister(1), i.TempRegister(0), 0x7);                      \
+    }                                                                          \
+    __ Dsubu(i.TempRegister(0), i.TempRegister(0),                             \
+             Operand(i.TempRegister(1)));                                      \
+    __ sll(i.TempRegister(1), i.TempRegister(1), 3);                           \
+    __ sync();                                                                 \
+    __ bind(&compareExchange);                                                 \
+    __ load_linked(i.TempRegister(2), MemOperand(i.TempRegister(0), 0));       \
+    __ ExtractBits(i.OutputRegister(0), i.TempRegister(2), i.TempRegister(1),  \
+                   size, sign_extend);                                         \
+    __ ExtractBits(i.InputRegister(2), i.InputRegister(2), i.TempRegister(1),  \
+                   size, sign_extend);                                         \
+    __ BranchShort(&exit, ne, i.InputRegister(2),                              \
+                   Operand(i.OutputRegister(0)));                              \
+    __ InsertBits(i.TempRegister(2), i.InputRegister(3), i.TempRegister(1),    \
+                  size);                                                       \
+    __ store_conditional(i.TempRegister(2), MemOperand(i.TempRegister(0), 0)); \
+    __ BranchShort(&compareExchange, eq, i.TempRegister(2),                    \
+                   Operand(zero_reg));                                         \
+    __ bind(&exit);                                                            \
+    __ sync();                                                                 \
+  } while (0)
+
+#define ASSEMBLE_IEEE754_BINOP(name)                                        \
+  do {                                                                      \
+    FrameScope scope(tasm(), StackFrame::MANUAL);                           \
+    __ PrepareCallCFunction(0, 2, kScratchReg);                             \
+    __ MovToFloatParameters(i.InputDoubleRegister(0),                       \
+                            i.InputDoubleRegister(1));                      \
+    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2); \
+    /* Move the result in the double result register. */                    \
+    __ MovFromFloatResult(i.OutputDoubleRegister());                        \
+  } while (0)
+
+#define ASSEMBLE_IEEE754_UNOP(name)                                         \
+  do {                                                                      \
+    FrameScope scope(tasm(), StackFrame::MANUAL);                           \
+    __ PrepareCallCFunction(0, 1, kScratchReg);                             \
+    __ MovToFloatParameter(i.InputDoubleRegister(0));                       \
+    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1); \
+    /* Move the result in the double result register. */                    \
+    __ MovFromFloatResult(i.OutputDoubleRegister());                        \
+  } while (0)
+
+#define ASSEMBLE_F64X2_ARITHMETIC_BINOP(op)                     \
+  do {                                                          \
+    __ op(i.OutputSimd128Register(), i.InputSimd128Register(0), \
+          i.InputSimd128Register(1));                           \
+  } while (0)
+
+void CodeGenerator::AssembleDeconstructFrame() {
+  __ mov(sp, fp);
+  __ Pop(ra, fp);
+}
+
+void CodeGenerator::AssemblePrepareTailCall() {
+  if (frame_access_state()->has_frame()) {
+    __ Ld(ra, MemOperand(fp, StandardFrameConstants::kCallerPCOffset));
+    __ Ld(fp, MemOperand(fp, StandardFrameConstants::kCallerFPOffset));
+  }
+  frame_access_state()->SetFrameAccessToSP();
+}
+
+void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
+                                                     Register scratch1,
+                                                     Register scratch2,
+                                                     Register scratch3) {
+  DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
+  Label done;
+
+  // Check if current frame is an arguments adaptor frame.
+  __ Ld(scratch3, MemOperand(fp, StandardFrameConstants::kContextOffset));
+  __ Branch(&done, ne, scratch3,
+            Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
+
+  // Load arguments count from current arguments adaptor frame (note, it
+  // does not include receiver).
+  Register caller_args_count_reg = scratch1;
+  __ Ld(caller_args_count_reg,
+        MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset));
+  __ SmiUntag(caller_args_count_reg);
+
+  __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3);
+  __ bind(&done);
+}
+
+namespace {
+
+void AdjustStackPointerForTailCall(TurboAssembler* tasm,
+                                   FrameAccessState* state,
+                                   int new_slot_above_sp,
+                                   bool allow_shrinkage = true) {
+  int current_sp_offset = state->GetSPToFPSlotCount() +
+                          StandardFrameConstants::kFixedSlotCountAboveFp;
+  int stack_slot_delta = new_slot_above_sp - current_sp_offset;
+  if (stack_slot_delta > 0) {
+    tasm->Dsubu(sp, sp, stack_slot_delta * kSystemPointerSize);
+    state->IncreaseSPDelta(stack_slot_delta);
+  } else if (allow_shrinkage && stack_slot_delta < 0) {
+    tasm->Daddu(sp, sp, -stack_slot_delta * kSystemPointerSize);
+    state->IncreaseSPDelta(stack_slot_delta);
+  }
+}
+
+}  // namespace
+
+void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
+                                              int first_unused_stack_slot) {
+  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+                                first_unused_stack_slot, false);
+}
+
+void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
+                                             int first_unused_stack_slot) {
+  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+                                first_unused_stack_slot);
+}
+
+// Check that {kJavaScriptCallCodeStartRegister} is correct.
+void CodeGenerator::AssembleCodeStartRegisterCheck() {
+  __ ComputeCodeStartAddress(kScratchReg);
+  __ Assert(eq, AbortReason::kWrongFunctionCodeStart,
+            kJavaScriptCallCodeStartRegister, Operand(kScratchReg));
+}
+
+// Check if the code object is marked for deoptimization. If it is, then it
+// jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
+// to:
+//    1. read from memory the word that contains that bit, which can be found in
+//       the flags in the referenced {CodeDataContainer} object;
+//    2. test kMarkedForDeoptimizationBit in those flags; and
+//    3. if it is not zero then it jumps to the builtin.
+void CodeGenerator::BailoutIfDeoptimized() {
+  int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
+  __ Ld(kScratchReg, MemOperand(kJavaScriptCallCodeStartRegister, offset));
+  __ Lw(kScratchReg,
+        FieldMemOperand(kScratchReg,
+                        CodeDataContainer::kKindSpecificFlagsOffset));
+  __ And(kScratchReg, kScratchReg,
+         Operand(1 << Code::kMarkedForDeoptimizationBit));
+  __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
+          RelocInfo::CODE_TARGET, ne, kScratchReg, Operand(zero_reg));
+}
+
+void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
+  // Calculate a mask which has all bits set in the normal case, but has all
+  // bits cleared if we are speculatively executing the wrong PC.
+  //    difference = (current - expected) | (expected - current)
+  //    poison = ~(difference >> (kBitsPerSystemPointer - 1))
+  __ ComputeCodeStartAddress(kScratchReg);
+  __ Move(kSpeculationPoisonRegister, kScratchReg);
+  __ subu(kSpeculationPoisonRegister, kSpeculationPoisonRegister,
+          kJavaScriptCallCodeStartRegister);
+  __ subu(kJavaScriptCallCodeStartRegister, kJavaScriptCallCodeStartRegister,
+          kScratchReg);
+  __ or_(kSpeculationPoisonRegister, kSpeculationPoisonRegister,
+         kJavaScriptCallCodeStartRegister);
+  __ sra(kSpeculationPoisonRegister, kSpeculationPoisonRegister,
+         kBitsPerSystemPointer - 1);
+  __ nor(kSpeculationPoisonRegister, kSpeculationPoisonRegister,
+         kSpeculationPoisonRegister);
+}
+
+void CodeGenerator::AssembleRegisterArgumentPoisoning() {
+  __ And(kJSFunctionRegister, kJSFunctionRegister, kSpeculationPoisonRegister);
+  __ And(kContextRegister, kContextRegister, kSpeculationPoisonRegister);
+  __ And(sp, sp, kSpeculationPoisonRegister);
+}
+
+// Assembles an instruction after register allocation, producing machine code.
+CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
+    Instruction* instr) {
+  MipsOperandConverter i(this, instr);
+  InstructionCode opcode = instr->opcode();
+  ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
+  switch (arch_opcode) {
+    case kArchCallCodeObject: {
+      if (instr->InputAt(0)->IsImmediate()) {
+        __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
+      } else {
+        Register reg = i.InputRegister(0);
+        DCHECK_IMPLIES(
+            instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+            reg == kJavaScriptCallCodeStartRegister);
+        __ daddiu(reg, reg, Code::kHeaderSize - kHeapObjectTag);
+        __ Call(reg);
+      }
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchCallBuiltinPointer: {
+      DCHECK(!instr->InputAt(0)->IsImmediate());
+      Register builtin_index = i.InputRegister(0);
+      __ CallBuiltinByIndex(builtin_index);
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchCallWasmFunction: {
+      if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
+        AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
+                                         i.TempRegister(0), i.TempRegister(1),
+                                         i.TempRegister(2));
+      }
+      if (instr->InputAt(0)->IsImmediate()) {
+        Constant constant = i.ToConstant(instr->InputAt(0));
+        Address wasm_code = static_cast<Address>(constant.ToInt64());
+        __ Call(wasm_code, constant.rmode());
+      } else {
+        __ daddiu(kScratchReg, i.InputRegister(0), 0);
+        __ Call(kScratchReg);
+      }
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchTailCallCodeObjectFromJSFunction:
+    case kArchTailCallCodeObject: {
+      if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
+        AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
+                                         i.TempRegister(0), i.TempRegister(1),
+                                         i.TempRegister(2));
+      }
+      if (instr->InputAt(0)->IsImmediate()) {
+        __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
+      } else {
+        Register reg = i.InputRegister(0);
+        DCHECK_IMPLIES(
+            instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+            reg == kJavaScriptCallCodeStartRegister);
+        __ daddiu(reg, reg, Code::kHeaderSize - kHeapObjectTag);
+        __ Jump(reg);
+      }
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchTailCallWasm: {
+      if (instr->InputAt(0)->IsImmediate()) {
+        Constant constant = i.ToConstant(instr->InputAt(0));
+        Address wasm_code = static_cast<Address>(constant.ToInt64());
+        __ Jump(wasm_code, constant.rmode());
+      } else {
+        __ daddiu(kScratchReg, i.InputRegister(0), 0);
+        __ Jump(kScratchReg);
+      }
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchTailCallAddress: {
+      CHECK(!instr->InputAt(0)->IsImmediate());
+      Register reg = i.InputRegister(0);
+      DCHECK_IMPLIES(
+          instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+          reg == kJavaScriptCallCodeStartRegister);
+      __ Jump(reg);
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchCallJSFunction: {
+      Register func = i.InputRegister(0);
+      if (FLAG_debug_code) {
+        // Check the function's context matches the context argument.
+        __ Ld(kScratchReg, FieldMemOperand(func, JSFunction::kContextOffset));
+        __ Assert(eq, AbortReason::kWrongFunctionContext, cp,
+                  Operand(kScratchReg));
+      }
+      static_assert(kJavaScriptCallCodeStartRegister == a2, "ABI mismatch");
+      __ Ld(a2, FieldMemOperand(func, JSFunction::kCodeOffset));
+      __ Daddu(a2, a2, Operand(Code::kHeaderSize - kHeapObjectTag));
+      __ Call(a2);
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchPrepareCallCFunction: {
+      int const num_parameters = MiscField::decode(instr->opcode());
+      __ PrepareCallCFunction(num_parameters, kScratchReg);
+      // Frame alignment requires using FP-relative frame addressing.
+      frame_access_state()->SetFrameAccessToFP();
+      break;
+    }
+    case kArchSaveCallerRegisters: {
+      fp_mode_ =
+          static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
+      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+      // kReturnRegister0 should have been saved before entering the stub.
+      int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
+      DCHECK(IsAligned(bytes, kSystemPointerSize));
+      DCHECK_EQ(0, frame_access_state()->sp_delta());
+      frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+      DCHECK(!caller_registers_saved_);
+      caller_registers_saved_ = true;
+      break;
+    }
+    case kArchRestoreCallerRegisters: {
+      DCHECK(fp_mode_ ==
+             static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
+      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+      // Don't overwrite the returned value.
+      int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
+      frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
+      DCHECK_EQ(0, frame_access_state()->sp_delta());
+      DCHECK(caller_registers_saved_);
+      caller_registers_saved_ = false;
+      break;
+    }
+    case kArchPrepareTailCall:
+      AssemblePrepareTailCall();
+      break;
+    case kArchCallCFunction: {
+      int const num_parameters = MiscField::decode(instr->opcode());
+      Label start_call;
+      bool isWasmCapiFunction =
+          linkage()->GetIncomingDescriptor()->IsWasmCapiFunction();
+      // from start_call to return address.
+      int offset = __ root_array_available() ? 76 : 88;
+#if V8_HOST_ARCH_MIPS64
+      if (__ emit_debug_code()) {
+        offset += 16;
+      }
+#endif
+      if (isWasmCapiFunction) {
+        // Put the return address in a stack slot.
+        __ mov(kScratchReg, ra);
+        __ bind(&start_call);
+        __ nal();
+        __ nop();
+        __ Daddu(ra, ra, offset - 8);  // 8 = nop + nal
+        __ sd(ra, MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset));
+        __ mov(ra, kScratchReg);
+      }
+      if (instr->InputAt(0)->IsImmediate()) {
+        ExternalReference ref = i.InputExternalReference(0);
+        __ CallCFunction(ref, num_parameters);
+      } else {
+        Register func = i.InputRegister(0);
+        __ CallCFunction(func, num_parameters);
+      }
+      if (isWasmCapiFunction) {
+        CHECK_EQ(offset, __ SizeOfCodeGeneratedSince(&start_call));
+        RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
+      }
+
+      frame_access_state()->SetFrameAccessToDefault();
+      // Ideally, we should decrement SP delta to match the change of stack
+      // pointer in CallCFunction. However, for certain architectures (e.g.
+      // ARM), there may be more strict alignment requirement, causing old SP
+      // to be saved on the stack. In those cases, we can not calculate the SP
+      // delta statically.
+      frame_access_state()->ClearSPDelta();
+      if (caller_registers_saved_) {
+        // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
+        // Here, we assume the sequence to be:
+        //   kArchSaveCallerRegisters;
+        //   kArchCallCFunction;
+        //   kArchRestoreCallerRegisters;
+        int bytes =
+            __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
+        frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+      }
+      break;
+    }
+    case kArchJmp:
+      AssembleArchJump(i.InputRpo(0));
+      break;
+    case kArchBinarySearchSwitch:
+      AssembleArchBinarySearchSwitch(instr);
+      break;
+      break;
+    case kArchTableSwitch:
+      AssembleArchTableSwitch(instr);
+      break;
+    case kArchAbortCSAAssert:
+      DCHECK(i.InputRegister(0) == a0);
+      {
+        // We don't actually want to generate a pile of code for this, so just
+        // claim there is a stack frame, without generating one.
+        FrameScope scope(tasm(), StackFrame::NONE);
+        __ Call(
+            isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
+            RelocInfo::CODE_TARGET);
+      }
+      __ stop();
+      break;
+    case kArchDebugBreak:
+      __ DebugBreak();
+      break;
+    case kArchComment:
+      __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
+      break;
+    case kArchNop:
+    case kArchThrowTerminator:
+      // don't emit code for nops.
+      break;
+    case kArchDeoptimize: {
+      DeoptimizationExit* exit =
+          BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
+      __ Branch(exit->label());
+      break;
+    }
+    case kArchRet:
+      AssembleReturn(instr->InputAt(0));
+      break;
+    case kArchStackPointerGreaterThan:
+      // Pseudo-instruction used for cmp/branch. No opcode emitted here.
+      break;
+    case kArchStackCheckOffset:
+      __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
+      break;
+    case kArchFramePointer:
+      __ mov(i.OutputRegister(), fp);
+      break;
+    case kArchParentFramePointer:
+      if (frame_access_state()->has_frame()) {
+        __ Ld(i.OutputRegister(), MemOperand(fp, 0));
+      } else {
+        __ mov(i.OutputRegister(), fp);
+      }
+      break;
+    case kArchTruncateDoubleToI:
+      __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
+                           i.InputDoubleRegister(0), DetermineStubCallMode());
+      break;
+    case kArchStoreWithWriteBarrier: {
+      RecordWriteMode mode =
+          static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
+      Register object = i.InputRegister(0);
+      Register index = i.InputRegister(1);
+      Register value = i.InputRegister(2);
+      Register scratch0 = i.TempRegister(0);
+      Register scratch1 = i.TempRegister(1);
+      auto ool = zone()->New<OutOfLineRecordWrite>(this, object, index, value,
+                                                   scratch0, scratch1, mode,
+                                                   DetermineStubCallMode());
+      __ Daddu(kScratchReg, object, index);
+      __ Sd(value, MemOperand(kScratchReg));
+      __ CheckPageFlag(object, scratch0,
+                       MemoryChunk::kPointersFromHereAreInterestingMask, ne,
+                       ool->entry());
+      __ bind(ool->exit());
+      break;
+    }
+    case kArchStackSlot: {
+      FrameOffset offset =
+          frame_access_state()->GetFrameOffset(i.InputInt32(0));
+      Register base_reg = offset.from_stack_pointer() ? sp : fp;
+      __ Daddu(i.OutputRegister(), base_reg, Operand(offset.offset()));
+      int alignment = i.InputInt32(1);
+      DCHECK(alignment == 0 || alignment == 4 || alignment == 8 ||
+             alignment == 16);
+      if (FLAG_debug_code && alignment > 0) {
+        // Verify that the output_register is properly aligned
+        __ And(kScratchReg, i.OutputRegister(),
+               Operand(kSystemPointerSize - 1));
+        __ Assert(eq, AbortReason::kAllocationIsNotDoubleAligned, kScratchReg,
+                  Operand(zero_reg));
+      }
+      if (alignment == 2 * kSystemPointerSize) {
+        Label done;
+        __ Daddu(kScratchReg, base_reg, Operand(offset.offset()));
+        __ And(kScratchReg, kScratchReg, Operand(alignment - 1));
+        __ BranchShort(&done, eq, kScratchReg, Operand(zero_reg));
+        __ Daddu(i.OutputRegister(), i.OutputRegister(), kSystemPointerSize);
+        __ bind(&done);
+      } else if (alignment > 2 * kSystemPointerSize) {
+        Label done;
+        __ Daddu(kScratchReg, base_reg, Operand(offset.offset()));
+        __ And(kScratchReg, kScratchReg, Operand(alignment - 1));
+        __ BranchShort(&done, eq, kScratchReg, Operand(zero_reg));
+        __ li(kScratchReg2, alignment);
+        __ Dsubu(kScratchReg2, kScratchReg2, Operand(kScratchReg));
+        __ Daddu(i.OutputRegister(), i.OutputRegister(), kScratchReg2);
+        __ bind(&done);
+      }
+
+      break;
+    }
+    case kArchWordPoisonOnSpeculation:
+      __ And(i.OutputRegister(), i.InputRegister(0),
+             kSpeculationPoisonRegister);
+      break;
+    case kIeee754Float64Acos:
+      ASSEMBLE_IEEE754_UNOP(acos);
+      break;
+    case kIeee754Float64Acosh:
+      ASSEMBLE_IEEE754_UNOP(acosh);
+      break;
+    case kIeee754Float64Asin:
+      ASSEMBLE_IEEE754_UNOP(asin);
+      break;
+    case kIeee754Float64Asinh:
+      ASSEMBLE_IEEE754_UNOP(asinh);
+      break;
+    case kIeee754Float64Atan:
+      ASSEMBLE_IEEE754_UNOP(atan);
+      break;
+    case kIeee754Float64Atanh:
+      ASSEMBLE_IEEE754_UNOP(atanh);
+      break;
+    case kIeee754Float64Atan2:
+      ASSEMBLE_IEEE754_BINOP(atan2);
+      break;
+    case kIeee754Float64Cos:
+      ASSEMBLE_IEEE754_UNOP(cos);
+      break;
+    case kIeee754Float64Cosh:
+      ASSEMBLE_IEEE754_UNOP(cosh);
+      break;
+    case kIeee754Float64Cbrt:
+      ASSEMBLE_IEEE754_UNOP(cbrt);
+      break;
+    case kIeee754Float64Exp:
+      ASSEMBLE_IEEE754_UNOP(exp);
+      break;
+    case kIeee754Float64Expm1:
+      ASSEMBLE_IEEE754_UNOP(expm1);
+      break;
+    case kIeee754Float64Log:
+      ASSEMBLE_IEEE754_UNOP(log);
+      break;
+    case kIeee754Float64Log1p:
+      ASSEMBLE_IEEE754_UNOP(log1p);
+      break;
+    case kIeee754Float64Log2:
+      ASSEMBLE_IEEE754_UNOP(log2);
+      break;
+    case kIeee754Float64Log10:
+      ASSEMBLE_IEEE754_UNOP(log10);
+      break;
+    case kIeee754Float64Pow:
+      ASSEMBLE_IEEE754_BINOP(pow);
+      break;
+    case kIeee754Float64Sin:
+      ASSEMBLE_IEEE754_UNOP(sin);
+      break;
+    case kIeee754Float64Sinh:
+      ASSEMBLE_IEEE754_UNOP(sinh);
+      break;
+    case kIeee754Float64Tan:
+      ASSEMBLE_IEEE754_UNOP(tan);
+      break;
+    case kIeee754Float64Tanh:
+      ASSEMBLE_IEEE754_UNOP(tanh);
+      break;
+    case kMips64Add:
+      __ Addu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64Dadd:
+      __ Daddu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64DaddOvf:
+      __ DaddOverflow(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1),
+                      kScratchReg);
+      break;
+    case kMips64Sub:
+      __ Subu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64Dsub:
+      __ Dsubu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64DsubOvf:
+      __ DsubOverflow(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1),
+                      kScratchReg);
+      break;
+    case kMips64Mul:
+      __ Mul(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64MulOvf:
+      __ MulOverflow(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1),
+                     kScratchReg);
+      break;
+    case kMips64MulHigh:
+      __ Mulh(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64MulHighU:
+      __ Mulhu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64DMulHigh:
+      __ Dmulh(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64Div:
+      __ Div(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      if (kArchVariant == kMips64r6) {
+        __ selnez(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        __ Movz(i.OutputRegister(), i.InputRegister(1), i.InputRegister(1));
+      }
+      break;
+    case kMips64DivU:
+      __ Divu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      if (kArchVariant == kMips64r6) {
+        __ selnez(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        __ Movz(i.OutputRegister(), i.InputRegister(1), i.InputRegister(1));
+      }
+      break;
+    case kMips64Mod:
+      __ Mod(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64ModU:
+      __ Modu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64Dmul:
+      __ Dmul(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64Ddiv:
+      __ Ddiv(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      if (kArchVariant == kMips64r6) {
+        __ selnez(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        __ Movz(i.OutputRegister(), i.InputRegister(1), i.InputRegister(1));
+      }
+      break;
+    case kMips64DdivU:
+      __ Ddivu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      if (kArchVariant == kMips64r6) {
+        __ selnez(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        __ Movz(i.OutputRegister(), i.InputRegister(1), i.InputRegister(1));
+      }
+      break;
+    case kMips64Dmod:
+      __ Dmod(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64DmodU:
+      __ Dmodu(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64Dlsa:
+      DCHECK(instr->InputAt(2)->IsImmediate());
+      __ Dlsa(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+              i.InputInt8(2));
+      break;
+    case kMips64Lsa:
+      DCHECK(instr->InputAt(2)->IsImmediate());
+      __ Lsa(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+             i.InputInt8(2));
+      break;
+    case kMips64And:
+      __ And(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64And32:
+        __ And(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+        __ sll(i.OutputRegister(), i.OutputRegister(), 0x0);
+      break;
+    case kMips64Or:
+      __ Or(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64Or32:
+        __ Or(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+        __ sll(i.OutputRegister(), i.OutputRegister(), 0x0);
+      break;
+    case kMips64Nor:
+      if (instr->InputAt(1)->IsRegister()) {
+        __ Nor(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      } else {
+        DCHECK_EQ(0, i.InputOperand(1).immediate());
+        __ Nor(i.OutputRegister(), i.InputRegister(0), zero_reg);
+      }
+      break;
+    case kMips64Nor32:
+      if (instr->InputAt(1)->IsRegister()) {
+        __ Nor(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+        __ sll(i.OutputRegister(), i.OutputRegister(), 0x0);
+      } else {
+        DCHECK_EQ(0, i.InputOperand(1).immediate());
+        __ Nor(i.OutputRegister(), i.InputRegister(0), zero_reg);
+        __ sll(i.OutputRegister(), i.OutputRegister(), 0x0);
+      }
+      break;
+    case kMips64Xor:
+      __ Xor(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64Xor32:
+        __ Xor(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+        __ sll(i.OutputRegister(), i.OutputRegister(), 0x0);
+      break;
+    case kMips64Clz:
+      __ Clz(i.OutputRegister(), i.InputRegister(0));
+      break;
+    case kMips64Dclz:
+      __ dclz(i.OutputRegister(), i.InputRegister(0));
+      break;
+    case kMips64Ctz: {
+      Register src = i.InputRegister(0);
+      Register dst = i.OutputRegister();
+      __ Ctz(dst, src);
+    } break;
+    case kMips64Dctz: {
+      Register src = i.InputRegister(0);
+      Register dst = i.OutputRegister();
+      __ Dctz(dst, src);
+    } break;
+    case kMips64Popcnt: {
+      Register src = i.InputRegister(0);
+      Register dst = i.OutputRegister();
+      __ Popcnt(dst, src);
+    } break;
+    case kMips64Dpopcnt: {
+      Register src = i.InputRegister(0);
+      Register dst = i.OutputRegister();
+      __ Dpopcnt(dst, src);
+    } break;
+    case kMips64Shl:
+      if (instr->InputAt(1)->IsRegister()) {
+        __ sllv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        int64_t imm = i.InputOperand(1).immediate();
+        __ sll(i.OutputRegister(), i.InputRegister(0),
+               static_cast<uint16_t>(imm));
+      }
+      break;
+    case kMips64Shr:
+      if (instr->InputAt(1)->IsRegister()) {
+        __ sll(i.InputRegister(0), i.InputRegister(0), 0x0);
+        __ srlv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        int64_t imm = i.InputOperand(1).immediate();
+        __ sll(i.OutputRegister(), i.InputRegister(0), 0x0);
+        __ srl(i.OutputRegister(), i.OutputRegister(),
+               static_cast<uint16_t>(imm));
+      }
+      break;
+    case kMips64Sar:
+      if (instr->InputAt(1)->IsRegister()) {
+        __ sll(i.InputRegister(0), i.InputRegister(0), 0x0);
+        __ srav(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        int64_t imm = i.InputOperand(1).immediate();
+        __ sll(i.OutputRegister(), i.InputRegister(0), 0x0);
+        __ sra(i.OutputRegister(), i.OutputRegister(),
+               static_cast<uint16_t>(imm));
+      }
+      break;
+    case kMips64Ext:
+      __ Ext(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
+             i.InputInt8(2));
+      break;
+    case kMips64Ins:
+      if (instr->InputAt(1)->IsImmediate() && i.InputInt8(1) == 0) {
+        __ Ins(i.OutputRegister(), zero_reg, i.InputInt8(1), i.InputInt8(2));
+      } else {
+        __ Ins(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
+               i.InputInt8(2));
+      }
+      break;
+    case kMips64Dext: {
+      __ Dext(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
+              i.InputInt8(2));
+      break;
+    }
+    case kMips64Dins:
+      if (instr->InputAt(1)->IsImmediate() && i.InputInt8(1) == 0) {
+        __ Dins(i.OutputRegister(), zero_reg, i.InputInt8(1), i.InputInt8(2));
+      } else {
+        __ Dins(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
+                i.InputInt8(2));
+      }
+      break;
+    case kMips64Dshl:
+      if (instr->InputAt(1)->IsRegister()) {
+        __ dsllv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        int64_t imm = i.InputOperand(1).immediate();
+        if (imm < 32) {
+          __ dsll(i.OutputRegister(), i.InputRegister(0),
+                  static_cast<uint16_t>(imm));
+        } else {
+          __ dsll32(i.OutputRegister(), i.InputRegister(0),
+                    static_cast<uint16_t>(imm - 32));
+        }
+      }
+      break;
+    case kMips64Dshr:
+      if (instr->InputAt(1)->IsRegister()) {
+        __ dsrlv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        int64_t imm = i.InputOperand(1).immediate();
+        if (imm < 32) {
+          __ dsrl(i.OutputRegister(), i.InputRegister(0),
+                  static_cast<uint16_t>(imm));
+        } else {
+          __ dsrl32(i.OutputRegister(), i.InputRegister(0),
+                    static_cast<uint16_t>(imm - 32));
+        }
+      }
+      break;
+    case kMips64Dsar:
+      if (instr->InputAt(1)->IsRegister()) {
+        __ dsrav(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        int64_t imm = i.InputOperand(1).immediate();
+        if (imm < 32) {
+          __ dsra(i.OutputRegister(), i.InputRegister(0), imm);
+        } else {
+          __ dsra32(i.OutputRegister(), i.InputRegister(0), imm - 32);
+        }
+      }
+      break;
+    case kMips64Ror:
+      __ Ror(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64Dror:
+      __ Dror(i.OutputRegister(), i.InputRegister(0), i.InputOperand(1));
+      break;
+    case kMips64Tst:
+      __ And(kScratchReg, i.InputRegister(0), i.InputOperand(1));
+      // Pseudo-instruction used for cmp/branch. No opcode emitted here.
+      break;
+    case kMips64Cmp:
+      // Pseudo-instruction used for cmp/branch. No opcode emitted here.
+      break;
+    case kMips64Mov:
+      // TODO(plind): Should we combine mov/li like this, or use separate instr?
+      //    - Also see x64 ASSEMBLE_BINOP & RegisterOrOperandType
+      if (HasRegisterInput(instr, 0)) {
+        __ mov(i.OutputRegister(), i.InputRegister(0));
+      } else {
+        __ li(i.OutputRegister(), i.InputOperand(0));
+      }
+      break;
+
+    case kMips64CmpS: {
+      FPURegister left = i.InputOrZeroSingleRegister(0);
+      FPURegister right = i.InputOrZeroSingleRegister(1);
+      bool predicate;
+      FPUCondition cc =
+          FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition());
+
+      if ((left == kDoubleRegZero || right == kDoubleRegZero) &&
+          !__ IsDoubleZeroRegSet()) {
+        __ Move(kDoubleRegZero, 0.0);
+      }
+
+      __ CompareF32(cc, left, right);
+    } break;
+    case kMips64AddS:
+      // TODO(plind): add special case: combine mult & add.
+      __ add_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMips64SubS:
+      __ sub_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMips64MulS:
+      // TODO(plind): add special case: right op is -1.0, see arm port.
+      __ mul_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMips64DivS:
+      __ div_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMips64ModS: {
+      // TODO(bmeurer): We should really get rid of this special instruction,
+      // and generate a CallAddress instruction instead.
+      FrameScope scope(tasm(), StackFrame::MANUAL);
+      __ PrepareCallCFunction(0, 2, kScratchReg);
+      __ MovToFloatParameters(i.InputDoubleRegister(0),
+                              i.InputDoubleRegister(1));
+      // TODO(balazs.kilvady): implement mod_two_floats_operation(isolate())
+      __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
+      // Move the result in the double result register.
+      __ MovFromFloatResult(i.OutputSingleRegister());
+      break;
+    }
+    case kMips64AbsS:
+      if (kArchVariant == kMips64r6) {
+        __ abs_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
+      } else {
+        __ mfc1(kScratchReg, i.InputSingleRegister(0));
+        __ Dins(kScratchReg, zero_reg, 31, 1);
+        __ mtc1(kScratchReg, i.OutputSingleRegister());
+      }
+      break;
+    case kMips64NegS:
+      __ Neg_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
+      break;
+    case kMips64SqrtS: {
+      __ sqrt_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kMips64MaxS:
+      __ max_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMips64MinS:
+      __ min_s(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMips64CmpD: {
+      FPURegister left = i.InputOrZeroDoubleRegister(0);
+      FPURegister right = i.InputOrZeroDoubleRegister(1);
+      bool predicate;
+      FPUCondition cc =
+          FlagsConditionToConditionCmpFPU(&predicate, instr->flags_condition());
+      if ((left == kDoubleRegZero || right == kDoubleRegZero) &&
+          !__ IsDoubleZeroRegSet()) {
+        __ Move(kDoubleRegZero, 0.0);
+      }
+      __ CompareF64(cc, left, right);
+    } break;
+    case kMips64AddD:
+      // TODO(plind): add special case: combine mult & add.
+      __ add_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMips64SubD:
+      __ sub_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMips64MulD:
+      // TODO(plind): add special case: right op is -1.0, see arm port.
+      __ mul_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMips64DivD:
+      __ div_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMips64ModD: {
+      // TODO(bmeurer): We should really get rid of this special instruction,
+      // and generate a CallAddress instruction instead.
+      FrameScope scope(tasm(), StackFrame::MANUAL);
+      __ PrepareCallCFunction(0, 2, kScratchReg);
+      __ MovToFloatParameters(i.InputDoubleRegister(0),
+                              i.InputDoubleRegister(1));
+      __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
+      // Move the result in the double result register.
+      __ MovFromFloatResult(i.OutputDoubleRegister());
+      break;
+    }
+    case kMips64AbsD:
+      if (kArchVariant == kMips64r6) {
+        __ abs_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      } else {
+        __ dmfc1(kScratchReg, i.InputDoubleRegister(0));
+        __ Dins(kScratchReg, zero_reg, 63, 1);
+        __ dmtc1(kScratchReg, i.OutputDoubleRegister());
+      }
+      break;
+    case kMips64NegD:
+      __ Neg_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kMips64SqrtD: {
+      __ sqrt_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kMips64MaxD:
+      __ max_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMips64MinD:
+      __ min_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
+               i.InputDoubleRegister(1));
+      break;
+    case kMips64Float64RoundDown: {
+      __ Floor_d_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kMips64Float32RoundDown: {
+      __ Floor_s_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
+      break;
+    }
+    case kMips64Float64RoundTruncate: {
+      __ Trunc_d_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kMips64Float32RoundTruncate: {
+      __ Trunc_s_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
+      break;
+    }
+    case kMips64Float64RoundUp: {
+      __ Ceil_d_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kMips64Float32RoundUp: {
+      __ Ceil_s_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
+      break;
+    }
+    case kMips64Float64RoundTiesEven: {
+      __ Round_d_d(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    }
+    case kMips64Float32RoundTiesEven: {
+      __ Round_s_s(i.OutputSingleRegister(), i.InputSingleRegister(0));
+      break;
+    }
+    case kMips64Float32Max: {
+      FPURegister dst = i.OutputSingleRegister();
+      FPURegister src1 = i.InputSingleRegister(0);
+      FPURegister src2 = i.InputSingleRegister(1);
+      auto ool = zone()->New<OutOfLineFloat32Max>(this, dst, src1, src2);
+      __ Float32Max(dst, src1, src2, ool->entry());
+      __ bind(ool->exit());
+      break;
+    }
+    case kMips64Float64Max: {
+      FPURegister dst = i.OutputDoubleRegister();
+      FPURegister src1 = i.InputDoubleRegister(0);
+      FPURegister src2 = i.InputDoubleRegister(1);
+      auto ool = zone()->New<OutOfLineFloat64Max>(this, dst, src1, src2);
+      __ Float64Max(dst, src1, src2, ool->entry());
+      __ bind(ool->exit());
+      break;
+    }
+    case kMips64Float32Min: {
+      FPURegister dst = i.OutputSingleRegister();
+      FPURegister src1 = i.InputSingleRegister(0);
+      FPURegister src2 = i.InputSingleRegister(1);
+      auto ool = zone()->New<OutOfLineFloat32Min>(this, dst, src1, src2);
+      __ Float32Min(dst, src1, src2, ool->entry());
+      __ bind(ool->exit());
+      break;
+    }
+    case kMips64Float64Min: {
+      FPURegister dst = i.OutputDoubleRegister();
+      FPURegister src1 = i.InputDoubleRegister(0);
+      FPURegister src2 = i.InputDoubleRegister(1);
+      auto ool = zone()->New<OutOfLineFloat64Min>(this, dst, src1, src2);
+      __ Float64Min(dst, src1, src2, ool->entry());
+      __ bind(ool->exit());
+      break;
+    }
+    case kMips64Float64SilenceNaN:
+      __ FPUCanonicalizeNaN(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kMips64CvtSD:
+      __ cvt_s_d(i.OutputSingleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kMips64CvtDS:
+      __ cvt_d_s(i.OutputDoubleRegister(), i.InputSingleRegister(0));
+      break;
+    case kMips64CvtDW: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ mtc1(i.InputRegister(0), scratch);
+      __ cvt_d_w(i.OutputDoubleRegister(), scratch);
+      break;
+    }
+    case kMips64CvtSW: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ mtc1(i.InputRegister(0), scratch);
+      __ cvt_s_w(i.OutputDoubleRegister(), scratch);
+      break;
+    }
+    case kMips64CvtSUw: {
+      __ Cvt_s_uw(i.OutputDoubleRegister(), i.InputRegister(0));
+      break;
+    }
+    case kMips64CvtSL: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ dmtc1(i.InputRegister(0), scratch);
+      __ cvt_s_l(i.OutputDoubleRegister(), scratch);
+      break;
+    }
+    case kMips64CvtDL: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ dmtc1(i.InputRegister(0), scratch);
+      __ cvt_d_l(i.OutputDoubleRegister(), scratch);
+      break;
+    }
+    case kMips64CvtDUw: {
+      __ Cvt_d_uw(i.OutputDoubleRegister(), i.InputRegister(0));
+      break;
+    }
+    case kMips64CvtDUl: {
+      __ Cvt_d_ul(i.OutputDoubleRegister(), i.InputRegister(0));
+      break;
+    }
+    case kMips64CvtSUl: {
+      __ Cvt_s_ul(i.OutputDoubleRegister(), i.InputRegister(0));
+      break;
+    }
+    case kMips64FloorWD: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ floor_w_d(scratch, i.InputDoubleRegister(0));
+      __ mfc1(i.OutputRegister(), scratch);
+      break;
+    }
+    case kMips64CeilWD: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ ceil_w_d(scratch, i.InputDoubleRegister(0));
+      __ mfc1(i.OutputRegister(), scratch);
+      break;
+    }
+    case kMips64RoundWD: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ round_w_d(scratch, i.InputDoubleRegister(0));
+      __ mfc1(i.OutputRegister(), scratch);
+      break;
+    }
+    case kMips64TruncWD: {
+      FPURegister scratch = kScratchDoubleReg;
+      // Other arches use round to zero here, so we follow.
+      __ trunc_w_d(scratch, i.InputDoubleRegister(0));
+      __ mfc1(i.OutputRegister(), scratch);
+      break;
+    }
+    case kMips64FloorWS: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ floor_w_s(scratch, i.InputDoubleRegister(0));
+      __ mfc1(i.OutputRegister(), scratch);
+      break;
+    }
+    case kMips64CeilWS: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ ceil_w_s(scratch, i.InputDoubleRegister(0));
+      __ mfc1(i.OutputRegister(), scratch);
+      break;
+    }
+    case kMips64RoundWS: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ round_w_s(scratch, i.InputDoubleRegister(0));
+      __ mfc1(i.OutputRegister(), scratch);
+      break;
+    }
+    case kMips64TruncWS: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ trunc_w_s(scratch, i.InputDoubleRegister(0));
+      __ mfc1(i.OutputRegister(), scratch);
+      // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
+      // because INT32_MIN allows easier out-of-bounds detection.
+      __ addiu(kScratchReg, i.OutputRegister(), 1);
+      __ slt(kScratchReg2, kScratchReg, i.OutputRegister());
+      __ Movn(i.OutputRegister(), kScratchReg, kScratchReg2);
+      break;
+    }
+    case kMips64TruncLS: {
+      FPURegister scratch = kScratchDoubleReg;
+      Register tmp_fcsr = kScratchReg;
+      Register result = kScratchReg2;
+
+      bool load_status = instr->OutputCount() > 1;
+      if (load_status) {
+        // Save FCSR.
+        __ cfc1(tmp_fcsr, FCSR);
+        // Clear FPU flags.
+        __ ctc1(zero_reg, FCSR);
+      }
+      // Other arches use round to zero here, so we follow.
+      __ trunc_l_s(scratch, i.InputDoubleRegister(0));
+      __ dmfc1(i.OutputRegister(), scratch);
+      if (load_status) {
+        __ cfc1(result, FCSR);
+        // Check for overflow and NaNs.
+        __ andi(result, result,
+                (kFCSROverflowFlagMask | kFCSRInvalidOpFlagMask));
+        __ Slt(result, zero_reg, result);
+        __ xori(result, result, 1);
+        __ mov(i.OutputRegister(1), result);
+        // Restore FCSR
+        __ ctc1(tmp_fcsr, FCSR);
+      }
+      break;
+    }
+    case kMips64TruncLD: {
+      FPURegister scratch = kScratchDoubleReg;
+      Register tmp_fcsr = kScratchReg;
+      Register result = kScratchReg2;
+
+      bool load_status = instr->OutputCount() > 1;
+      if (load_status) {
+        // Save FCSR.
+        __ cfc1(tmp_fcsr, FCSR);
+        // Clear FPU flags.
+        __ ctc1(zero_reg, FCSR);
+      }
+      // Other arches use round to zero here, so we follow.
+      __ trunc_l_d(scratch, i.InputDoubleRegister(0));
+      __ dmfc1(i.OutputRegister(0), scratch);
+      if (load_status) {
+        __ cfc1(result, FCSR);
+        // Check for overflow and NaNs.
+        __ andi(result, result,
+                (kFCSROverflowFlagMask | kFCSRInvalidOpFlagMask));
+        __ Slt(result, zero_reg, result);
+        __ xori(result, result, 1);
+        __ mov(i.OutputRegister(1), result);
+        // Restore FCSR
+        __ ctc1(tmp_fcsr, FCSR);
+      }
+      break;
+    }
+    case kMips64TruncUwD: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ Trunc_uw_d(i.OutputRegister(), i.InputDoubleRegister(0), scratch);
+      break;
+    }
+    case kMips64TruncUwS: {
+      FPURegister scratch = kScratchDoubleReg;
+      __ Trunc_uw_s(i.OutputRegister(), i.InputDoubleRegister(0), scratch);
+      // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
+      // because 0 allows easier out-of-bounds detection.
+      __ addiu(kScratchReg, i.OutputRegister(), 1);
+      __ Movz(i.OutputRegister(), zero_reg, kScratchReg);
+      break;
+    }
+    case kMips64TruncUlS: {
+      FPURegister scratch = kScratchDoubleReg;
+      Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
+      __ Trunc_ul_s(i.OutputRegister(), i.InputDoubleRegister(0), scratch,
+                    result);
+      break;
+    }
+    case kMips64TruncUlD: {
+      FPURegister scratch = kScratchDoubleReg;
+      Register result = instr->OutputCount() > 1 ? i.OutputRegister(1) : no_reg;
+      __ Trunc_ul_d(i.OutputRegister(0), i.InputDoubleRegister(0), scratch,
+                    result);
+      break;
+    }
+    case kMips64BitcastDL:
+      __ dmfc1(i.OutputRegister(), i.InputDoubleRegister(0));
+      break;
+    case kMips64BitcastLD:
+      __ dmtc1(i.InputRegister(0), i.OutputDoubleRegister());
+      break;
+    case kMips64Float64ExtractLowWord32:
+      __ FmoveLow(i.OutputRegister(), i.InputDoubleRegister(0));
+      break;
+    case kMips64Float64ExtractHighWord32:
+      __ FmoveHigh(i.OutputRegister(), i.InputDoubleRegister(0));
+      break;
+    case kMips64Float64InsertLowWord32:
+      __ FmoveLow(i.OutputDoubleRegister(), i.InputRegister(1));
+      break;
+    case kMips64Float64InsertHighWord32:
+      __ FmoveHigh(i.OutputDoubleRegister(), i.InputRegister(1));
+      break;
+    // ... more basic instructions ...
+
+    case kMips64Seb:
+      __ seb(i.OutputRegister(), i.InputRegister(0));
+      break;
+    case kMips64Seh:
+      __ seh(i.OutputRegister(), i.InputRegister(0));
+      break;
+    case kMips64Lbu:
+      __ Lbu(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMips64Lb:
+      __ Lb(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMips64Sb:
+      __ Sb(i.InputOrZeroRegister(2), i.MemoryOperand());
+      break;
+    case kMips64Lhu:
+      __ Lhu(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMips64Ulhu:
+      __ Ulhu(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMips64Lh:
+      __ Lh(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMips64Ulh:
+      __ Ulh(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMips64Sh:
+      __ Sh(i.InputOrZeroRegister(2), i.MemoryOperand());
+      break;
+    case kMips64Ush:
+      __ Ush(i.InputOrZeroRegister(2), i.MemoryOperand(), kScratchReg);
+      break;
+    case kMips64Lw:
+      __ Lw(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMips64Ulw:
+      __ Ulw(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMips64Lwu:
+      __ Lwu(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMips64Ulwu:
+      __ Ulwu(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMips64Ld:
+      __ Ld(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMips64Uld:
+      __ Uld(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kMips64Sw:
+      __ Sw(i.InputOrZeroRegister(2), i.MemoryOperand());
+      break;
+    case kMips64Usw:
+      __ Usw(i.InputOrZeroRegister(2), i.MemoryOperand());
+      break;
+    case kMips64Sd:
+      __ Sd(i.InputOrZeroRegister(2), i.MemoryOperand());
+      break;
+    case kMips64Usd:
+      __ Usd(i.InputOrZeroRegister(2), i.MemoryOperand());
+      break;
+    case kMips64Lwc1: {
+      __ Lwc1(i.OutputSingleRegister(), i.MemoryOperand());
+      break;
+    }
+    case kMips64Ulwc1: {
+      __ Ulwc1(i.OutputSingleRegister(), i.MemoryOperand(), kScratchReg);
+      break;
+    }
+    case kMips64Swc1: {
+      size_t index = 0;
+      MemOperand operand = i.MemoryOperand(&index);
+      FPURegister ft = i.InputOrZeroSingleRegister(index);
+      if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) {
+        __ Move(kDoubleRegZero, 0.0);
+      }
+      __ Swc1(ft, operand);
+      break;
+    }
+    case kMips64Uswc1: {
+      size_t index = 0;
+      MemOperand operand = i.MemoryOperand(&index);
+      FPURegister ft = i.InputOrZeroSingleRegister(index);
+      if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) {
+        __ Move(kDoubleRegZero, 0.0);
+      }
+      __ Uswc1(ft, operand, kScratchReg);
+      break;
+    }
+    case kMips64Ldc1:
+      __ Ldc1(i.OutputDoubleRegister(), i.MemoryOperand());
+      break;
+    case kMips64Uldc1:
+      __ Uldc1(i.OutputDoubleRegister(), i.MemoryOperand(), kScratchReg);
+      break;
+    case kMips64Sdc1: {
+      FPURegister ft = i.InputOrZeroDoubleRegister(2);
+      if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) {
+        __ Move(kDoubleRegZero, 0.0);
+      }
+      __ Sdc1(ft, i.MemoryOperand());
+      break;
+    }
+    case kMips64Usdc1: {
+      FPURegister ft = i.InputOrZeroDoubleRegister(2);
+      if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) {
+        __ Move(kDoubleRegZero, 0.0);
+      }
+      __ Usdc1(ft, i.MemoryOperand(), kScratchReg);
+      break;
+    }
+    case kMips64Sync: {
+      __ sync();
+      break;
+    }
+    case kMips64Push:
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ Sdc1(i.InputDoubleRegister(0), MemOperand(sp, -kDoubleSize));
+        __ Subu(sp, sp, Operand(kDoubleSize));
+        frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
+      } else {
+        __ Push(i.InputRegister(0));
+        frame_access_state()->IncreaseSPDelta(1);
+      }
+      break;
+    case kMips64Peek: {
+      int reverse_slot = i.InputInt32(0);
+      int offset =
+          FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
+      if (instr->OutputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          __ Ldc1(i.OutputDoubleRegister(), MemOperand(fp, offset));
+        } else if (op->representation() == MachineRepresentation::kFloat32) {
+          __ Lwc1(
+              i.OutputSingleRegister(0),
+              MemOperand(fp, offset + kLessSignificantWordInDoublewordOffset));
+        } else {
+          DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
+          __ ld_b(i.OutputSimd128Register(), MemOperand(fp, offset));
+        }
+      } else {
+        __ Ld(i.OutputRegister(0), MemOperand(fp, offset));
+      }
+      break;
+    }
+    case kMips64StackClaim: {
+      __ Dsubu(sp, sp, Operand(i.InputInt32(0)));
+      frame_access_state()->IncreaseSPDelta(i.InputInt32(0) /
+                                            kSystemPointerSize);
+      break;
+    }
+    case kMips64StoreToStackSlot: {
+      if (instr->InputAt(0)->IsFPRegister()) {
+        if (instr->InputAt(0)->IsSimd128Register()) {
+          CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+          __ st_b(i.InputSimd128Register(0), MemOperand(sp, i.InputInt32(1)));
+        } else {
+          __ Sdc1(i.InputDoubleRegister(0), MemOperand(sp, i.InputInt32(1)));
+        }
+      } else {
+        __ Sd(i.InputRegister(0), MemOperand(sp, i.InputInt32(1)));
+      }
+      break;
+    }
+    case kMips64ByteSwap64: {
+      __ ByteSwapSigned(i.OutputRegister(0), i.InputRegister(0), 8);
+      break;
+    }
+    case kMips64ByteSwap32: {
+      __ ByteSwapSigned(i.OutputRegister(0), i.InputRegister(0), 4);
+      break;
+    }
+    case kMips64S128Load8Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ Lb(kScratchReg, i.MemoryOperand());
+      __ fill_b(i.OutputSimd128Register(), kScratchReg);
+      break;
+    }
+    case kMips64S128Load16Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ Lh(kScratchReg, i.MemoryOperand());
+      __ fill_h(i.OutputSimd128Register(), kScratchReg);
+      break;
+    }
+    case kMips64S128Load32Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ Lw(kScratchReg, i.MemoryOperand());
+      __ fill_w(i.OutputSimd128Register(), kScratchReg);
+      break;
+    }
+    case kMips64S128Load64Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ Ld(kScratchReg, i.MemoryOperand());
+      __ fill_d(i.OutputSimd128Register(), kScratchReg);
+      break;
+    }
+    case kMips64S128Load8x8S: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register scratch = kSimd128ScratchReg;
+      __ Ld(kScratchReg, i.MemoryOperand());
+      __ fill_d(dst, kScratchReg);
+      __ clti_s_b(scratch, dst, 0);
+      __ ilvr_b(dst, scratch, dst);
+      break;
+    }
+    case kMips64S128Load8x8U: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ Ld(kScratchReg, i.MemoryOperand());
+      __ fill_d(dst, kScratchReg);
+      __ ilvr_b(dst, kSimd128RegZero, dst);
+      break;
+    }
+    case kMips64S128Load16x4S: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register scratch = kSimd128ScratchReg;
+      __ Ld(kScratchReg, i.MemoryOperand());
+      __ fill_d(dst, kScratchReg);
+      __ clti_s_h(scratch, dst, 0);
+      __ ilvr_h(dst, scratch, dst);
+      break;
+    }
+    case kMips64S128Load16x4U: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ Ld(kScratchReg, i.MemoryOperand());
+      __ fill_d(dst, kScratchReg);
+      __ ilvr_h(dst, kSimd128RegZero, dst);
+      break;
+    }
+    case kMips64S128Load32x2S: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register scratch = kSimd128ScratchReg;
+      __ Ld(kScratchReg, i.MemoryOperand());
+      __ fill_d(dst, kScratchReg);
+      __ clti_s_w(scratch, dst, 0);
+      __ ilvr_w(dst, scratch, dst);
+      break;
+    }
+    case kMips64S128Load32x2U: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ Ld(kScratchReg, i.MemoryOperand());
+      __ fill_d(dst, kScratchReg);
+      __ ilvr_w(dst, kSimd128RegZero, dst);
+      break;
+    }
+    case kMips64S128Load32Zero: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ xor_v(dst, dst, dst);
+      __ Lwu(kScratchReg, i.MemoryOperand());
+      __ insert_w(dst, 0, kScratchReg);
+      break;
+    }
+    case kMips64S128Load64Zero: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ xor_v(dst, dst, dst);
+      __ Ld(kScratchReg, i.MemoryOperand());
+      __ insert_d(dst, 0, kScratchReg);
+      break;
+    }
+    case kWord32AtomicLoadInt8:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(Lb);
+      break;
+    case kWord32AtomicLoadUint8:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(Lbu);
+      break;
+    case kWord32AtomicLoadInt16:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(Lh);
+      break;
+    case kWord32AtomicLoadUint16:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(Lhu);
+      break;
+    case kWord32AtomicLoadWord32:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(Lw);
+      break;
+    case kMips64Word64AtomicLoadUint8:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(Lbu);
+      break;
+    case kMips64Word64AtomicLoadUint16:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(Lhu);
+      break;
+    case kMips64Word64AtomicLoadUint32:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(Lwu);
+      break;
+    case kMips64Word64AtomicLoadUint64:
+      ASSEMBLE_ATOMIC_LOAD_INTEGER(Ld);
+      break;
+    case kWord32AtomicStoreWord8:
+      ASSEMBLE_ATOMIC_STORE_INTEGER(Sb);
+      break;
+    case kWord32AtomicStoreWord16:
+      ASSEMBLE_ATOMIC_STORE_INTEGER(Sh);
+      break;
+    case kWord32AtomicStoreWord32:
+      ASSEMBLE_ATOMIC_STORE_INTEGER(Sw);
+      break;
+    case kMips64Word64AtomicStoreWord8:
+      ASSEMBLE_ATOMIC_STORE_INTEGER(Sb);
+      break;
+    case kMips64Word64AtomicStoreWord16:
+      ASSEMBLE_ATOMIC_STORE_INTEGER(Sh);
+      break;
+    case kMips64Word64AtomicStoreWord32:
+      ASSEMBLE_ATOMIC_STORE_INTEGER(Sw);
+      break;
+    case kMips64Word64AtomicStoreWord64:
+      ASSEMBLE_ATOMIC_STORE_INTEGER(Sd);
+      break;
+    case kWord32AtomicExchangeInt8:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll, Sc, true, 8, 32);
+      break;
+    case kWord32AtomicExchangeUint8:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll, Sc, false, 8, 32);
+      break;
+    case kWord32AtomicExchangeInt16:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll, Sc, true, 16, 32);
+      break;
+    case kWord32AtomicExchangeUint16:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Ll, Sc, false, 16, 32);
+      break;
+    case kWord32AtomicExchangeWord32:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(Ll, Sc);
+      break;
+    case kMips64Word64AtomicExchangeUint8:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Lld, Scd, false, 8, 64);
+      break;
+    case kMips64Word64AtomicExchangeUint16:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Lld, Scd, false, 16, 64);
+      break;
+    case kMips64Word64AtomicExchangeUint32:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT(Lld, Scd, false, 32, 64);
+      break;
+    case kMips64Word64AtomicExchangeUint64:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(Lld, Scd);
+      break;
+    case kWord32AtomicCompareExchangeInt8:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll, Sc, true, 8, 32);
+      break;
+    case kWord32AtomicCompareExchangeUint8:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll, Sc, false, 8, 32);
+      break;
+    case kWord32AtomicCompareExchangeInt16:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll, Sc, true, 16, 32);
+      break;
+    case kWord32AtomicCompareExchangeUint16:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Ll, Sc, false, 16, 32);
+      break;
+    case kWord32AtomicCompareExchangeWord32:
+      __ sll(i.InputRegister(2), i.InputRegister(2), 0);
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(Ll, Sc);
+      break;
+    case kMips64Word64AtomicCompareExchangeUint8:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Lld, Scd, false, 8, 64);
+      break;
+    case kMips64Word64AtomicCompareExchangeUint16:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Lld, Scd, false, 16, 64);
+      break;
+    case kMips64Word64AtomicCompareExchangeUint32:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT(Lld, Scd, false, 32, 64);
+      break;
+    case kMips64Word64AtomicCompareExchangeUint64:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(Lld, Scd);
+      break;
+#define ATOMIC_BINOP_CASE(op, inst)                         \
+  case kWord32Atomic##op##Int8:                             \
+    ASSEMBLE_ATOMIC_BINOP_EXT(Ll, Sc, true, 8, inst, 32);   \
+    break;                                                  \
+  case kWord32Atomic##op##Uint8:                            \
+    ASSEMBLE_ATOMIC_BINOP_EXT(Ll, Sc, false, 8, inst, 32);  \
+    break;                                                  \
+  case kWord32Atomic##op##Int16:                            \
+    ASSEMBLE_ATOMIC_BINOP_EXT(Ll, Sc, true, 16, inst, 32);  \
+    break;                                                  \
+  case kWord32Atomic##op##Uint16:                           \
+    ASSEMBLE_ATOMIC_BINOP_EXT(Ll, Sc, false, 16, inst, 32); \
+    break;                                                  \
+  case kWord32Atomic##op##Word32:                           \
+    ASSEMBLE_ATOMIC_BINOP(Ll, Sc, inst);                    \
+    break;
+      ATOMIC_BINOP_CASE(Add, Addu)
+      ATOMIC_BINOP_CASE(Sub, Subu)
+      ATOMIC_BINOP_CASE(And, And)
+      ATOMIC_BINOP_CASE(Or, Or)
+      ATOMIC_BINOP_CASE(Xor, Xor)
+#undef ATOMIC_BINOP_CASE
+#define ATOMIC_BINOP_CASE(op, inst)                           \
+  case kMips64Word64Atomic##op##Uint8:                        \
+    ASSEMBLE_ATOMIC_BINOP_EXT(Lld, Scd, false, 8, inst, 64);  \
+    break;                                                    \
+  case kMips64Word64Atomic##op##Uint16:                       \
+    ASSEMBLE_ATOMIC_BINOP_EXT(Lld, Scd, false, 16, inst, 64); \
+    break;                                                    \
+  case kMips64Word64Atomic##op##Uint32:                       \
+    ASSEMBLE_ATOMIC_BINOP_EXT(Lld, Scd, false, 32, inst, 64); \
+    break;                                                    \
+  case kMips64Word64Atomic##op##Uint64:                       \
+    ASSEMBLE_ATOMIC_BINOP(Lld, Scd, inst);                    \
+    break;
+      ATOMIC_BINOP_CASE(Add, Daddu)
+      ATOMIC_BINOP_CASE(Sub, Dsubu)
+      ATOMIC_BINOP_CASE(And, And)
+      ATOMIC_BINOP_CASE(Or, Or)
+      ATOMIC_BINOP_CASE(Xor, Xor)
+#undef ATOMIC_BINOP_CASE
+    case kMips64AssertEqual:
+      __ Assert(eq, static_cast<AbortReason>(i.InputOperand(2).immediate()),
+                i.InputRegister(0), Operand(i.InputRegister(1)));
+      break;
+    case kMips64S128Const: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      uint64_t imm1 = make_uint64(i.InputUint32(1), i.InputUint32(0));
+      uint64_t imm2 = make_uint64(i.InputUint32(3), i.InputUint32(2));
+      __ li(kScratchReg, imm1);
+      __ insert_d(dst, 0, kScratchReg);
+      __ li(kScratchReg, imm2);
+      __ insert_d(dst, 1, kScratchReg);
+      break;
+    }
+    case kMips64S128Zero: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ xor_v(dst, dst, dst);
+      break;
+    }
+    case kMips64S128AllOnes: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ ceq_d(dst, dst, dst);
+      break;
+    }
+    case kMips64I32x4Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fill_w(i.OutputSimd128Register(), i.InputRegister(0));
+      break;
+    }
+    case kMips64I32x4ExtractLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ copy_s_w(i.OutputRegister(), i.InputSimd128Register(0),
+                  i.InputInt8(1));
+      break;
+    }
+    case kMips64I32x4ReplaceLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      if (src != dst) {
+        __ move_v(dst, src);
+      }
+      __ insert_w(dst, i.InputInt8(1), i.InputRegister(2));
+      break;
+    }
+    case kMips64I32x4Add: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ addv_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I32x4Sub: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ subv_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64F64x2Abs: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ bclri_d(i.OutputSimd128Register(), i.InputSimd128Register(0), 63);
+      break;
+    }
+    case kMips64F64x2Neg: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ bnegi_d(i.OutputSimd128Register(), i.InputSimd128Register(0), 63);
+      break;
+    }
+    case kMips64F64x2Sqrt: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fsqrt_d(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64F64x2Add: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      ASSEMBLE_F64X2_ARITHMETIC_BINOP(fadd_d);
+      break;
+    }
+    case kMips64F64x2Sub: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      ASSEMBLE_F64X2_ARITHMETIC_BINOP(fsub_d);
+      break;
+    }
+    case kMips64F64x2Mul: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      ASSEMBLE_F64X2_ARITHMETIC_BINOP(fmul_d);
+      break;
+    }
+    case kMips64F64x2Div: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      ASSEMBLE_F64X2_ARITHMETIC_BINOP(fdiv_d);
+      break;
+    }
+    case kMips64F64x2Min: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register scratch0 = kSimd128RegZero;
+      Simd128Register scratch1 = kSimd128ScratchReg;
+
+      // If inputs are -0.0. and +0.0, then write -0.0 to scratch1.
+      // scratch1 = (src0 == src1) ?  (src0 | src1) : (src1 | src1).
+      __ fseq_d(scratch0, src0, src1);
+      __ bsel_v(scratch0, src1, src0);
+      __ or_v(scratch1, scratch0, src1);
+      // scratch0 = isNaN(src0) ? src0 : scratch1.
+      __ fseq_d(scratch0, src0, src0);
+      __ bsel_v(scratch0, src0, scratch1);
+      // scratch1 = (src0 < scratch0) ? src0 : scratch0.
+      __ fslt_d(scratch1, src0, scratch0);
+      __ bsel_v(scratch1, scratch0, src0);
+      // Canonicalize the result.
+      __ fmin_d(dst, scratch1, scratch1);
+      break;
+    }
+    case kMips64F64x2Max: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register scratch0 = kSimd128RegZero;
+      Simd128Register scratch1 = kSimd128ScratchReg;
+
+      // If inputs are -0.0. and +0.0, then write +0.0 to scratch1.
+      // scratch1 = (src0 == src1) ?  (src0 & src1) : (src1 & src1).
+      __ fseq_d(scratch0, src0, src1);
+      __ bsel_v(scratch0, src1, src0);
+      __ and_v(scratch1, scratch0, src1);
+      // scratch0 = isNaN(src0) ? src0 : scratch1.
+      __ fseq_d(scratch0, src0, src0);
+      __ bsel_v(scratch0, src0, scratch1);
+      // scratch1 = (scratch0 < src0) ? src0 : scratch0.
+      __ fslt_d(scratch1, scratch0, src0);
+      __ bsel_v(scratch1, scratch0, src0);
+      // Canonicalize the result.
+      __ fmax_d(dst, scratch1, scratch1);
+      break;
+    }
+    case kMips64F64x2Eq: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fceq_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64F64x2Ne: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fcune_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64F64x2Lt: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fclt_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64F64x2Le: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fcle_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64F64x2Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ Move(kScratchReg, i.InputDoubleRegister(0));
+      __ fill_d(i.OutputSimd128Register(), kScratchReg);
+      break;
+    }
+    case kMips64F64x2ExtractLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ copy_s_d(kScratchReg, i.InputSimd128Register(0), i.InputInt8(1));
+      __ Move(i.OutputDoubleRegister(), kScratchReg);
+      break;
+    }
+    case kMips64F64x2ReplaceLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ Move(kScratchReg, i.InputDoubleRegister(2));
+      if (dst != src) {
+        __ move_v(dst, src);
+      }
+      __ insert_d(dst, i.InputInt8(1), kScratchReg);
+      break;
+    }
+    case kMips64I64x2Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fill_d(i.OutputSimd128Register(), i.InputRegister(0));
+      break;
+    }
+    case kMips64I64x2ExtractLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ copy_s_d(i.OutputRegister(), i.InputSimd128Register(0),
+                  i.InputInt8(1));
+      break;
+    }
+    case kMips64F64x2Pmin: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register lhs = i.InputSimd128Register(0);
+      Simd128Register rhs = i.InputSimd128Register(1);
+      // dst = rhs < lhs ? rhs : lhs
+      __ fclt_d(dst, rhs, lhs);
+      __ bsel_v(dst, lhs, rhs);
+      break;
+    }
+    case kMips64F64x2Pmax: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register lhs = i.InputSimd128Register(0);
+      Simd128Register rhs = i.InputSimd128Register(1);
+      // dst = lhs < rhs ? rhs : lhs
+      __ fclt_d(dst, lhs, rhs);
+      __ bsel_v(dst, lhs, rhs);
+      break;
+    }
+    case kMips64F64x2Ceil: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ MSARoundD(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                   kRoundToPlusInf);
+      break;
+    }
+    case kMips64F64x2Floor: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ MSARoundD(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                   kRoundToMinusInf);
+      break;
+    }
+    case kMips64F64x2Trunc: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ MSARoundD(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                   kRoundToZero);
+      break;
+    }
+    case kMips64F64x2NearestInt: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ MSARoundD(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                   kRoundToNearest);
+      break;
+    }
+    case kMips64I64x2ReplaceLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      if (src != dst) {
+        __ move_v(dst, src);
+      }
+      __ insert_d(dst, i.InputInt8(1), i.InputRegister(2));
+      break;
+    }
+    case kMips64I64x2Add: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ addv_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I64x2Sub: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ subv_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I64x2Mul: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ mulv_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I64x2Neg: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ subv_d(i.OutputSimd128Register(), kSimd128RegZero,
+                i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I64x2Shl: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      if (instr->InputAt(1)->IsRegister()) {
+        __ fill_d(kSimd128ScratchReg, i.InputRegister(1));
+        __ sll_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 kSimd128ScratchReg);
+      } else {
+        __ slli_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputInt6(1));
+      }
+      break;
+    }
+    case kMips64I64x2ShrS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      if (instr->InputAt(1)->IsRegister()) {
+        __ fill_d(kSimd128ScratchReg, i.InputRegister(1));
+        __ sra_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 kSimd128ScratchReg);
+      } else {
+        __ srai_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputInt6(1));
+      }
+      break;
+    }
+    case kMips64I64x2ShrU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      if (instr->InputAt(1)->IsRegister()) {
+        __ fill_d(kSimd128ScratchReg, i.InputRegister(1));
+        __ srl_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 kSimd128ScratchReg);
+      } else {
+        __ srli_d(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputInt6(1));
+      }
+      break;
+    }
+    case kMips64F32x4Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ FmoveLow(kScratchReg, i.InputSingleRegister(0));
+      __ fill_w(i.OutputSimd128Register(), kScratchReg);
+      break;
+    }
+    case kMips64F32x4ExtractLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ copy_u_w(kScratchReg, i.InputSimd128Register(0), i.InputInt8(1));
+      __ FmoveLow(i.OutputSingleRegister(), kScratchReg);
+      break;
+    }
+    case kMips64F32x4ReplaceLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ FmoveLow(kScratchReg, i.InputSingleRegister(2));
+      if (dst != src) {
+        __ move_v(dst, src);
+      }
+      __ insert_w(dst, i.InputInt8(1), kScratchReg);
+      break;
+    }
+    case kMips64F32x4SConvertI32x4: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ ffint_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64F32x4UConvertI32x4: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ ffint_u_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I32x4Mul: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ mulv_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I32x4MaxS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ max_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I32x4MinS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ min_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I32x4Eq: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ ceq_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I32x4Ne: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ ceq_w(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
+      __ nor_v(dst, dst, dst);
+      break;
+    }
+    case kMips64I32x4Shl: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      if (instr->InputAt(1)->IsRegister()) {
+        __ fill_w(kSimd128ScratchReg, i.InputRegister(1));
+        __ sll_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 kSimd128ScratchReg);
+      } else {
+        __ slli_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputInt5(1));
+      }
+      break;
+    }
+    case kMips64I32x4ShrS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      if (instr->InputAt(1)->IsRegister()) {
+        __ fill_w(kSimd128ScratchReg, i.InputRegister(1));
+        __ sra_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 kSimd128ScratchReg);
+      } else {
+        __ srai_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputInt5(1));
+      }
+      break;
+    }
+    case kMips64I32x4ShrU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      if (instr->InputAt(1)->IsRegister()) {
+        __ fill_w(kSimd128ScratchReg, i.InputRegister(1));
+        __ srl_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 kSimd128ScratchReg);
+      } else {
+        __ srli_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputInt5(1));
+      }
+      break;
+    }
+    case kMips64I32x4MaxU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ max_u_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I32x4MinU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ min_u_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64S128Select: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      DCHECK(i.OutputSimd128Register() == i.InputSimd128Register(0));
+      __ bsel_v(i.OutputSimd128Register(), i.InputSimd128Register(2),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64S128AndNot: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register scratch = kSimd128ScratchReg,
+                      dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      __ nor_v(scratch, src1, src1);
+      __ and_v(dst, scratch, src0);
+      break;
+    }
+    case kMips64F32x4Abs: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ bclri_w(i.OutputSimd128Register(), i.InputSimd128Register(0), 31);
+      break;
+    }
+    case kMips64F32x4Neg: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ bnegi_w(i.OutputSimd128Register(), i.InputSimd128Register(0), 31);
+      break;
+    }
+    case kMips64F32x4RecipApprox: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ frcp_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64F32x4RecipSqrtApprox: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ frsqrt_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64F32x4Add: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fadd_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64F32x4Sub: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fsub_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64F32x4Mul: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fmul_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64F32x4Div: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fdiv_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64F32x4Max: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register scratch0 = kSimd128RegZero;
+      Simd128Register scratch1 = kSimd128ScratchReg;
+
+      // If inputs are -0.0. and +0.0, then write +0.0 to scratch1.
+      // scratch1 = (src0 == src1) ?  (src0 & src1) : (src1 & src1).
+      __ fseq_w(scratch0, src0, src1);
+      __ bsel_v(scratch0, src1, src0);
+      __ and_v(scratch1, scratch0, src1);
+      // scratch0 = isNaN(src0) ? src0 : scratch1.
+      __ fseq_w(scratch0, src0, src0);
+      __ bsel_v(scratch0, src0, scratch1);
+      // scratch1 = (scratch0 < src0) ? src0 : scratch0.
+      __ fslt_w(scratch1, scratch0, src0);
+      __ bsel_v(scratch1, scratch0, src0);
+      // Canonicalize the result.
+      __ fmax_w(dst, scratch1, scratch1);
+      break;
+    }
+    case kMips64F32x4Min: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register scratch0 = kSimd128RegZero;
+      Simd128Register scratch1 = kSimd128ScratchReg;
+
+      // If inputs are -0.0. and +0.0, then write -0.0 to scratch1.
+      // scratch1 = (src0 == src1) ?  (src0 | src1) : (src1 | src1).
+      __ fseq_w(scratch0, src0, src1);
+      __ bsel_v(scratch0, src1, src0);
+      __ or_v(scratch1, scratch0, src1);
+      // scratch0 = isNaN(src0) ? src0 : scratch1.
+      __ fseq_w(scratch0, src0, src0);
+      __ bsel_v(scratch0, src0, scratch1);
+      // scratch1 = (src0 < scratch0) ? src0 : scratch0.
+      __ fslt_w(scratch1, src0, scratch0);
+      __ bsel_v(scratch1, scratch0, src0);
+      // Canonicalize the result.
+      __ fmin_w(dst, scratch1, scratch1);
+      break;
+    }
+    case kMips64F32x4Eq: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fceq_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64F32x4Ne: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fcune_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64F32x4Lt: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fclt_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64F32x4Le: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fcle_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64F32x4Pmin: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register lhs = i.InputSimd128Register(0);
+      Simd128Register rhs = i.InputSimd128Register(1);
+      // dst = rhs < lhs ? rhs : lhs
+      __ fclt_w(dst, rhs, lhs);
+      __ bsel_v(dst, lhs, rhs);
+      break;
+    }
+    case kMips64F32x4Pmax: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register lhs = i.InputSimd128Register(0);
+      Simd128Register rhs = i.InputSimd128Register(1);
+      // dst = lhs < rhs ? rhs : lhs
+      __ fclt_w(dst, lhs, rhs);
+      __ bsel_v(dst, lhs, rhs);
+      break;
+    }
+    case kMips64F32x4Ceil: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ MSARoundW(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                   kRoundToPlusInf);
+      break;
+    }
+    case kMips64F32x4Floor: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ MSARoundW(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                   kRoundToMinusInf);
+      break;
+    }
+    case kMips64F32x4Trunc: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ MSARoundW(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                   kRoundToZero);
+      break;
+    }
+    case kMips64F32x4NearestInt: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ MSARoundW(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                   kRoundToNearest);
+      break;
+    }
+    case kMips64I32x4SConvertF32x4: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ ftrunc_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I32x4UConvertF32x4: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ ftrunc_u_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64F32x4Sqrt: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fsqrt_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I32x4Neg: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ subv_w(i.OutputSimd128Register(), kSimd128RegZero,
+                i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I32x4GtS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ clt_s_w(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I32x4GeS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cle_s_w(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I32x4GtU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ clt_u_w(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I32x4GeU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cle_u_w(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I32x4Abs: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ asub_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  kSimd128RegZero);
+      break;
+    }
+    case kMips64I32x4BitMask: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Register dst = i.OutputRegister();
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register scratch0 = kSimd128RegZero;
+      Simd128Register scratch1 = kSimd128ScratchReg;
+      __ srli_w(scratch0, src, 31);
+      __ srli_d(scratch1, scratch0, 31);
+      __ or_v(scratch0, scratch0, scratch1);
+      __ shf_w(scratch1, scratch0, 0x0E);
+      __ slli_d(scratch1, scratch1, 2);
+      __ or_v(scratch0, scratch0, scratch1);
+      __ copy_u_b(dst, scratch0, 0);
+      break;
+    }
+    case kMips64I32x4DotI16x8S: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ dotp_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I16x8Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fill_h(i.OutputSimd128Register(), i.InputRegister(0));
+      break;
+    }
+    case kMips64I16x8ExtractLaneU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ copy_u_h(i.OutputRegister(), i.InputSimd128Register(0),
+                  i.InputInt8(1));
+      break;
+    }
+    case kMips64I16x8ExtractLaneS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ copy_s_h(i.OutputRegister(), i.InputSimd128Register(0),
+                  i.InputInt8(1));
+      break;
+    }
+    case kMips64I16x8ReplaceLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      if (src != dst) {
+        __ move_v(dst, src);
+      }
+      __ insert_h(dst, i.InputInt8(1), i.InputRegister(2));
+      break;
+    }
+    case kMips64I16x8Neg: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ subv_h(i.OutputSimd128Register(), kSimd128RegZero,
+                i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I16x8Shl: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      if (instr->InputAt(1)->IsRegister()) {
+        __ fill_h(kSimd128ScratchReg, i.InputRegister(1));
+        __ sll_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 kSimd128ScratchReg);
+      } else {
+        __ slli_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputInt4(1));
+      }
+      break;
+    }
+    case kMips64I16x8ShrS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      if (instr->InputAt(1)->IsRegister()) {
+        __ fill_h(kSimd128ScratchReg, i.InputRegister(1));
+        __ sra_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 kSimd128ScratchReg);
+      } else {
+        __ srai_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputInt4(1));
+      }
+      break;
+    }
+    case kMips64I16x8ShrU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      if (instr->InputAt(1)->IsRegister()) {
+        __ fill_h(kSimd128ScratchReg, i.InputRegister(1));
+        __ srl_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 kSimd128ScratchReg);
+      } else {
+        __ srli_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputInt4(1));
+      }
+      break;
+    }
+    case kMips64I16x8Add: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ addv_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I16x8AddSatS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ adds_s_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I16x8Sub: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ subv_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I16x8SubSatS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ subs_s_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I16x8Mul: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ mulv_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I16x8MaxS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ max_s_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I16x8MinS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ min_s_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I16x8Eq: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ ceq_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I16x8Ne: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ ceq_h(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
+      __ nor_v(dst, dst, dst);
+      break;
+    }
+    case kMips64I16x8GtS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ clt_s_h(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I16x8GeS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cle_s_h(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I16x8AddSatU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ adds_u_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I16x8SubSatU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ subs_u_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I16x8MaxU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ max_u_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I16x8MinU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ min_u_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I16x8GtU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ clt_u_h(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I16x8GeU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cle_u_h(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I16x8RoundingAverageU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ aver_u_h(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                  i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I16x8Abs: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ asub_s_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  kSimd128RegZero);
+      break;
+    }
+    case kMips64I16x8BitMask: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Register dst = i.OutputRegister();
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register scratch0 = kSimd128RegZero;
+      Simd128Register scratch1 = kSimd128ScratchReg;
+      __ srli_h(scratch0, src, 15);
+      __ srli_w(scratch1, scratch0, 15);
+      __ or_v(scratch0, scratch0, scratch1);
+      __ srli_d(scratch1, scratch0, 30);
+      __ or_v(scratch0, scratch0, scratch1);
+      __ shf_w(scratch1, scratch0, 0x0E);
+      __ slli_d(scratch1, scratch1, 4);
+      __ or_v(scratch0, scratch0, scratch1);
+      __ copy_u_b(dst, scratch0, 0);
+      break;
+    }
+    case kMips64I8x16Splat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ fill_b(i.OutputSimd128Register(), i.InputRegister(0));
+      break;
+    }
+    case kMips64I8x16ExtractLaneU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ copy_u_b(i.OutputRegister(), i.InputSimd128Register(0),
+                  i.InputInt8(1));
+      break;
+    }
+    case kMips64I8x16ExtractLaneS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ copy_s_b(i.OutputRegister(), i.InputSimd128Register(0),
+                  i.InputInt8(1));
+      break;
+    }
+    case kMips64I8x16ReplaceLane: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      if (src != dst) {
+        __ move_v(dst, src);
+      }
+      __ insert_b(dst, i.InputInt8(1), i.InputRegister(2));
+      break;
+    }
+    case kMips64I8x16Neg: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ subv_b(i.OutputSimd128Register(), kSimd128RegZero,
+                i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I8x16Shl: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      if (instr->InputAt(1)->IsRegister()) {
+        __ fill_b(kSimd128ScratchReg, i.InputRegister(1));
+        __ sll_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 kSimd128ScratchReg);
+      } else {
+        __ slli_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputInt3(1));
+      }
+      break;
+    }
+    case kMips64I8x16ShrS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      if (instr->InputAt(1)->IsRegister()) {
+        __ fill_b(kSimd128ScratchReg, i.InputRegister(1));
+        __ sra_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 kSimd128ScratchReg);
+      } else {
+        __ srai_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputInt3(1));
+      }
+      break;
+    }
+    case kMips64I8x16Add: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ addv_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I8x16AddSatS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ adds_s_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I8x16Sub: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ subv_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I8x16SubSatS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ subs_s_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I8x16Mul: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ mulv_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I8x16MaxS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ max_s_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I8x16MinS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ min_s_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I8x16Eq: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ ceq_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I8x16Ne: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ ceq_b(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
+      __ nor_v(dst, dst, dst);
+      break;
+    }
+    case kMips64I8x16GtS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ clt_s_b(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I8x16GeS: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cle_s_b(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I8x16ShrU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      if (instr->InputAt(1)->IsRegister()) {
+        __ fill_b(kSimd128ScratchReg, i.InputRegister(1));
+        __ srl_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 kSimd128ScratchReg);
+      } else {
+        __ srli_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputInt3(1));
+      }
+      break;
+    }
+    case kMips64I8x16AddSatU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ adds_u_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I8x16SubSatU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ subs_u_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I8x16MaxU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ max_u_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I8x16MinU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ min_u_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64I8x16GtU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ clt_u_b(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I8x16GeU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ cle_u_b(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                 i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I8x16RoundingAverageU: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ aver_u_b(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                  i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I8x16Abs: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ asub_s_b(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  kSimd128RegZero);
+      break;
+    }
+    case kMips64I8x16BitMask: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Register dst = i.OutputRegister();
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register scratch0 = kSimd128RegZero;
+      Simd128Register scratch1 = kSimd128ScratchReg;
+      __ srli_b(scratch0, src, 7);
+      __ srli_h(scratch1, scratch0, 7);
+      __ or_v(scratch0, scratch0, scratch1);
+      __ srli_w(scratch1, scratch0, 14);
+      __ or_v(scratch0, scratch0, scratch1);
+      __ srli_d(scratch1, scratch0, 28);
+      __ or_v(scratch0, scratch0, scratch1);
+      __ shf_w(scratch1, scratch0, 0x0E);
+      __ ilvev_b(scratch0, scratch1, scratch0);
+      __ copy_u_h(dst, scratch0, 0);
+      break;
+    }
+    case kMips64S128And: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ and_v(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64S128Or: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ or_v(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64S128Xor: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1));
+      break;
+    }
+    case kMips64S128Not: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ nor_v(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64V32x4AnyTrue:
+    case kMips64V16x8AnyTrue:
+    case kMips64V8x16AnyTrue: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Register dst = i.OutputRegister();
+      Label all_false;
+      __ BranchMSA(&all_false, MSA_BRANCH_V, all_zero,
+                   i.InputSimd128Register(0), USE_DELAY_SLOT);
+      __ li(dst, 0l);  // branch delay slot
+      __ li(dst, 1);
+      __ bind(&all_false);
+      break;
+    }
+    case kMips64V32x4AllTrue: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Register dst = i.OutputRegister();
+      Label all_true;
+      __ BranchMSA(&all_true, MSA_BRANCH_W, all_not_zero,
+                   i.InputSimd128Register(0), USE_DELAY_SLOT);
+      __ li(dst, 1);  // branch delay slot
+      __ li(dst, 0l);
+      __ bind(&all_true);
+      break;
+    }
+    case kMips64V16x8AllTrue: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Register dst = i.OutputRegister();
+      Label all_true;
+      __ BranchMSA(&all_true, MSA_BRANCH_H, all_not_zero,
+                   i.InputSimd128Register(0), USE_DELAY_SLOT);
+      __ li(dst, 1);  // branch delay slot
+      __ li(dst, 0l);
+      __ bind(&all_true);
+      break;
+    }
+    case kMips64V8x16AllTrue: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Register dst = i.OutputRegister();
+      Label all_true;
+      __ BranchMSA(&all_true, MSA_BRANCH_B, all_not_zero,
+                   i.InputSimd128Register(0), USE_DELAY_SLOT);
+      __ li(dst, 1);  // branch delay slot
+      __ li(dst, 0l);
+      __ bind(&all_true);
+      break;
+    }
+    case kMips64MsaLd: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ ld_b(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    }
+    case kMips64MsaSt: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ st_b(i.InputSimd128Register(2), i.MemoryOperand());
+      break;
+    }
+    case kMips64S32x4InterleaveRight: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [7, 6, 5, 4], src0 = [3, 2, 1, 0]
+      // dst = [5, 1, 4, 0]
+      __ ilvr_w(dst, src1, src0);
+      break;
+    }
+    case kMips64S32x4InterleaveLeft: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [7, 6, 5, 4], src0 = [3, 2, 1, 0]
+      // dst = [7, 3, 6, 2]
+      __ ilvl_w(dst, src1, src0);
+      break;
+    }
+    case kMips64S32x4PackEven: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [7, 6, 5, 4], src0 = [3, 2, 1, 0]
+      // dst = [6, 4, 2, 0]
+      __ pckev_w(dst, src1, src0);
+      break;
+    }
+    case kMips64S32x4PackOdd: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [7, 6, 5, 4], src0 = [3, 2, 1, 0]
+      // dst = [7, 5, 3, 1]
+      __ pckod_w(dst, src1, src0);
+      break;
+    }
+    case kMips64S32x4InterleaveEven: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [7, 6, 5, 4], src0 = [3, 2, 1, 0]
+      // dst = [6, 2, 4, 0]
+      __ ilvev_w(dst, src1, src0);
+      break;
+    }
+    case kMips64S32x4InterleaveOdd: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [7, 6, 5, 4], src0 = [3, 2, 1, 0]
+      // dst = [7, 3, 5, 1]
+      __ ilvod_w(dst, src1, src0);
+      break;
+    }
+    case kMips64S32x4Shuffle: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+
+      int32_t shuffle = i.InputInt32(2);
+
+      if (src0 == src1) {
+        // Unary S32x4 shuffles are handled with shf.w instruction
+        unsigned lane = shuffle & 0xFF;
+        if (FLAG_debug_code) {
+          // range of all four lanes, for unary instruction,
+          // should belong to the same range, which can be one of these:
+          // [0, 3] or [4, 7]
+          if (lane >= 4) {
+            int32_t shuffle_helper = shuffle;
+            for (int i = 0; i < 4; ++i) {
+              lane = shuffle_helper & 0xFF;
+              CHECK_GE(lane, 4);
+              shuffle_helper >>= 8;
+            }
+          }
+        }
+        uint32_t i8 = 0;
+        for (int i = 0; i < 4; i++) {
+          lane = shuffle & 0xFF;
+          if (lane >= 4) {
+            lane -= 4;
+          }
+          DCHECK_GT(4, lane);
+          i8 |= lane << (2 * i);
+          shuffle >>= 8;
+        }
+        __ shf_w(dst, src0, i8);
+      } else {
+        // For binary shuffles use vshf.w instruction
+        if (dst == src0) {
+          __ move_v(kSimd128ScratchReg, src0);
+          src0 = kSimd128ScratchReg;
+        } else if (dst == src1) {
+          __ move_v(kSimd128ScratchReg, src1);
+          src1 = kSimd128ScratchReg;
+        }
+
+        __ li(kScratchReg, i.InputInt32(2));
+        __ insert_w(dst, 0, kScratchReg);
+        __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+        __ ilvr_b(dst, kSimd128RegZero, dst);
+        __ ilvr_h(dst, kSimd128RegZero, dst);
+        __ vshf_w(dst, src1, src0);
+      }
+      break;
+    }
+    case kMips64S16x8InterleaveRight: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [15, ... 11, 10, 9, 8], src0 = [7, ... 3, 2, 1, 0]
+      // dst = [11, 3, 10, 2, 9, 1, 8, 0]
+      __ ilvr_h(dst, src1, src0);
+      break;
+    }
+    case kMips64S16x8InterleaveLeft: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [15, ... 11, 10, 9, 8], src0 = [7, ... 3, 2, 1, 0]
+      // dst = [15, 7, 14, 6, 13, 5, 12, 4]
+      __ ilvl_h(dst, src1, src0);
+      break;
+    }
+    case kMips64S16x8PackEven: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [15, ... 11, 10, 9, 8], src0 = [7, ... 3, 2, 1, 0]
+      // dst = [14, 12, 10, 8, 6, 4, 2, 0]
+      __ pckev_h(dst, src1, src0);
+      break;
+    }
+    case kMips64S16x8PackOdd: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [15, ... 11, 10, 9, 8], src0 = [7, ... 3, 2, 1, 0]
+      // dst = [15, 13, 11, 9, 7, 5, 3, 1]
+      __ pckod_h(dst, src1, src0);
+      break;
+    }
+    case kMips64S16x8InterleaveEven: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [15, ... 11, 10, 9, 8], src0 = [7, ... 3, 2, 1, 0]
+      // dst = [14, 6, 12, 4, 10, 2, 8, 0]
+      __ ilvev_h(dst, src1, src0);
+      break;
+    }
+    case kMips64S16x8InterleaveOdd: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [15, ... 11, 10, 9, 8], src0 = [7, ... 3, 2, 1, 0]
+      // dst = [15, 7, ... 11, 3, 9, 1]
+      __ ilvod_h(dst, src1, src0);
+      break;
+    }
+    case kMips64S16x4Reverse: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      // src = [7, 6, 5, 4, 3, 2, 1, 0], dst = [4, 5, 6, 7, 0, 1, 2, 3]
+      // shf.df imm field: 0 1 2 3 = 00011011 = 0x1B
+      __ shf_h(i.OutputSimd128Register(), i.InputSimd128Register(0), 0x1B);
+      break;
+    }
+    case kMips64S16x2Reverse: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      // src = [7, 6, 5, 4, 3, 2, 1, 0], dst = [6, 7, 4, 5, 3, 2, 0, 1]
+      // shf.df imm field: 2 3 0 1 = 10110001 = 0xB1
+      __ shf_h(i.OutputSimd128Register(), i.InputSimd128Register(0), 0xB1);
+      break;
+    }
+    case kMips64S8x16InterleaveRight: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [31, ... 19, 18, 17, 16], src0 = [15, ... 3, 2, 1, 0]
+      // dst = [23, 7, ... 17, 1, 16, 0]
+      __ ilvr_b(dst, src1, src0);
+      break;
+    }
+    case kMips64S8x16InterleaveLeft: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [31, ... 19, 18, 17, 16], src0 = [15, ... 3, 2, 1, 0]
+      // dst = [31, 15, ... 25, 9, 24, 8]
+      __ ilvl_b(dst, src1, src0);
+      break;
+    }
+    case kMips64S8x16PackEven: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [31, ... 19, 18, 17, 16], src0 = [15, ... 3, 2, 1, 0]
+      // dst = [30, 28, ... 6, 4, 2, 0]
+      __ pckev_b(dst, src1, src0);
+      break;
+    }
+    case kMips64S8x16PackOdd: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [31, ... 19, 18, 17, 16], src0 = [15, ... 3, 2, 1, 0]
+      // dst = [31, 29, ... 7, 5, 3, 1]
+      __ pckod_b(dst, src1, src0);
+      break;
+    }
+    case kMips64S8x16InterleaveEven: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [31, ... 19, 18, 17, 16], src0 = [15, ... 3, 2, 1, 0]
+      // dst = [30, 14, ... 18, 2, 16, 0]
+      __ ilvev_b(dst, src1, src0);
+      break;
+    }
+    case kMips64S8x16InterleaveOdd: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      // src1 = [31, ... 19, 18, 17, 16], src0 = [15, ... 3, 2, 1, 0]
+      // dst = [31, 15, ... 19, 3, 17, 1]
+      __ ilvod_b(dst, src1, src0);
+      break;
+    }
+    case kMips64S8x16Concat: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      DCHECK(dst == i.InputSimd128Register(0));
+      __ sldi_b(dst, i.InputSimd128Register(1), i.InputInt4(2));
+      break;
+    }
+    case kMips64I8x16Shuffle: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+
+      if (dst == src0) {
+        __ move_v(kSimd128ScratchReg, src0);
+        src0 = kSimd128ScratchReg;
+      } else if (dst == src1) {
+        __ move_v(kSimd128ScratchReg, src1);
+        src1 = kSimd128ScratchReg;
+      }
+
+      int64_t control_low =
+          static_cast<int64_t>(i.InputInt32(3)) << 32 | i.InputInt32(2);
+      int64_t control_hi =
+          static_cast<int64_t>(i.InputInt32(5)) << 32 | i.InputInt32(4);
+      __ li(kScratchReg, control_low);
+      __ insert_d(dst, 0, kScratchReg);
+      __ li(kScratchReg, control_hi);
+      __ insert_d(dst, 1, kScratchReg);
+      __ vshf_b(dst, src1, src0);
+      break;
+    }
+    case kMips64I8x16Swizzle: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      tbl = i.InputSimd128Register(0),
+                      ctl = i.InputSimd128Register(1);
+      DCHECK(dst != ctl && dst != tbl);
+      Simd128Register zeroReg = i.TempSimd128Register(0);
+      __ xor_v(zeroReg, zeroReg, zeroReg);
+      __ move_v(dst, ctl);
+      __ vshf_b(dst, zeroReg, tbl);
+      break;
+    }
+    case kMips64S8x8Reverse: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      // src = [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+      // dst = [8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7]
+      // [A B C D] => [B A D C]: shf.w imm: 2 3 0 1 = 10110001 = 0xB1
+      // C: [7, 6, 5, 4] => A': [4, 5, 6, 7]: shf.b imm: 00011011 = 0x1B
+      __ shf_w(kSimd128ScratchReg, i.InputSimd128Register(0), 0xB1);
+      __ shf_b(i.OutputSimd128Register(), kSimd128ScratchReg, 0x1B);
+      break;
+    }
+    case kMips64S8x4Reverse: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      // src = [15, 14, ... 3, 2, 1, 0], dst = [12, 13, 14, 15, ... 0, 1, 2, 3]
+      // shf.df imm field: 0 1 2 3 = 00011011 = 0x1B
+      __ shf_b(i.OutputSimd128Register(), i.InputSimd128Register(0), 0x1B);
+      break;
+    }
+    case kMips64S8x2Reverse: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      // src = [15, 14, ... 3, 2, 1, 0], dst = [14, 15, 12, 13, ... 2, 3, 0, 1]
+      // shf.df imm field: 2 3 0 1 = 10110001 = 0xB1
+      __ shf_b(i.OutputSimd128Register(), i.InputSimd128Register(0), 0xB1);
+      break;
+    }
+    case kMips64I32x4SConvertI16x8Low: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(0);
+      __ ilvr_h(kSimd128ScratchReg, src, src);
+      __ slli_w(dst, kSimd128ScratchReg, 16);
+      __ srai_w(dst, dst, 16);
+      break;
+    }
+    case kMips64I32x4SConvertI16x8High: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(0);
+      __ ilvl_h(kSimd128ScratchReg, src, src);
+      __ slli_w(dst, kSimd128ScratchReg, 16);
+      __ srai_w(dst, dst, 16);
+      break;
+    }
+    case kMips64I32x4UConvertI16x8Low: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ ilvr_h(i.OutputSimd128Register(), kSimd128RegZero,
+                i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I32x4UConvertI16x8High: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ ilvl_h(i.OutputSimd128Register(), kSimd128RegZero,
+                i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I16x8SConvertI8x16Low: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(0);
+      __ ilvr_b(kSimd128ScratchReg, src, src);
+      __ slli_h(dst, kSimd128ScratchReg, 8);
+      __ srai_h(dst, dst, 8);
+      break;
+    }
+    case kMips64I16x8SConvertI8x16High: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(0);
+      __ ilvl_b(kSimd128ScratchReg, src, src);
+      __ slli_h(dst, kSimd128ScratchReg, 8);
+      __ srai_h(dst, dst, 8);
+      break;
+    }
+    case kMips64I16x8SConvertI32x4: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      __ sat_s_w(kSimd128ScratchReg, src0, 15);
+      __ sat_s_w(kSimd128RegZero, src1, 15);  // kSimd128RegZero as scratch
+      __ pckev_h(dst, kSimd128RegZero, kSimd128ScratchReg);
+      break;
+    }
+    case kMips64I16x8UConvertI32x4: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ max_s_w(kSimd128ScratchReg, kSimd128RegZero, src0);
+      __ sat_u_w(kSimd128ScratchReg, kSimd128ScratchReg, 15);
+      __ max_s_w(dst, kSimd128RegZero, src1);
+      __ sat_u_w(dst, dst, 15);
+      __ pckev_h(dst, dst, kSimd128ScratchReg);
+      break;
+    }
+    case kMips64I16x8UConvertI8x16Low: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ ilvr_b(i.OutputSimd128Register(), kSimd128RegZero,
+                i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I16x8UConvertI8x16High: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ ilvl_b(i.OutputSimd128Register(), kSimd128RegZero,
+                i.InputSimd128Register(0));
+      break;
+    }
+    case kMips64I8x16SConvertI16x8: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      __ sat_s_h(kSimd128ScratchReg, src0, 7);
+      __ sat_s_h(kSimd128RegZero, src1, 7);  // kSimd128RegZero as scratch
+      __ pckev_b(dst, kSimd128RegZero, kSimd128ScratchReg);
+      break;
+    }
+    case kMips64I8x16UConvertI16x8: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
+      __ max_s_h(kSimd128ScratchReg, kSimd128RegZero, src0);
+      __ sat_u_h(kSimd128ScratchReg, kSimd128ScratchReg, 7);
+      __ max_s_h(dst, kSimd128RegZero, src1);
+      __ sat_u_h(dst, dst, 7);
+      __ pckev_b(dst, dst, kSimd128ScratchReg);
+      break;
+    }
+    case kMips64F32x4AddHoriz: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ shf_w(kSimd128ScratchReg, src0, 0xB1);  // 2 3 0 1 : 10110001 : 0xB1
+      __ shf_w(kSimd128RegZero, src1, 0xB1);     // kSimd128RegZero as scratch
+      __ fadd_w(kSimd128ScratchReg, kSimd128ScratchReg, src0);
+      __ fadd_w(kSimd128RegZero, kSimd128RegZero, src1);
+      __ pckev_w(dst, kSimd128RegZero, kSimd128ScratchReg);
+      break;
+    }
+    case kMips64I32x4AddHoriz: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ hadd_s_d(kSimd128ScratchReg, src0, src0);
+      __ hadd_s_d(kSimd128RegZero, src1, src1);  // kSimd128RegZero as scratch
+      __ pckev_w(dst, kSimd128RegZero, kSimd128ScratchReg);
+      break;
+    }
+    case kMips64I16x8AddHoriz: {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ hadd_s_w(kSimd128ScratchReg, src0, src0);
+      __ hadd_s_w(kSimd128RegZero, src1, src1);  // kSimd128RegZero as scratch
+      __ pckev_h(dst, kSimd128RegZero, kSimd128ScratchReg);
+      break;
+    }
+  }
+  return kSuccess;
+}  // NOLINT(readability/fn_size)
+
+#define UNSUPPORTED_COND(opcode, condition)                                    \
+  StdoutStream{} << "Unsupported " << #opcode << " condition: \"" << condition \
+                 << "\"";                                                      \
+  UNIMPLEMENTED();
+
+void AssembleBranchToLabels(CodeGenerator* gen, TurboAssembler* tasm,
+                            Instruction* instr, FlagsCondition condition,
+                            Label* tlabel, Label* flabel, bool fallthru) {
+#undef __
+#define __ tasm->
+  MipsOperandConverter i(gen, instr);
+
+  Condition cc = kNoCondition;
+  // MIPS does not have condition code flags, so compare and branch are
+  // implemented differently than on the other arch's. The compare operations
+  // emit mips pseudo-instructions, which are handled here by branch
+  // instructions that do the actual comparison. Essential that the input
+  // registers to compare pseudo-op are not modified before this branch op, as
+  // they are tested here.
+
+  if (instr->arch_opcode() == kMips64Tst) {
+    cc = FlagsConditionToConditionTst(condition);
+    __ Branch(tlabel, cc, kScratchReg, Operand(zero_reg));
+  } else if (instr->arch_opcode() == kMips64Dadd ||
+             instr->arch_opcode() == kMips64Dsub) {
+    cc = FlagsConditionToConditionOvf(condition);
+    __ dsra32(kScratchReg, i.OutputRegister(), 0);
+    __ sra(kScratchReg2, i.OutputRegister(), 31);
+    __ Branch(tlabel, cc, kScratchReg2, Operand(kScratchReg));
+  } else if (instr->arch_opcode() == kMips64DaddOvf ||
+             instr->arch_opcode() == kMips64DsubOvf) {
+    switch (condition) {
+      // Overflow occurs if overflow register is negative
+      case kOverflow:
+        __ Branch(tlabel, lt, kScratchReg, Operand(zero_reg));
+        break;
+      case kNotOverflow:
+        __ Branch(tlabel, ge, kScratchReg, Operand(zero_reg));
+        break;
+      default:
+        UNSUPPORTED_COND(instr->arch_opcode(), condition);
+        break;
+    }
+  } else if (instr->arch_opcode() == kMips64MulOvf) {
+    // Overflow occurs if overflow register is not zero
+    switch (condition) {
+      case kOverflow:
+        __ Branch(tlabel, ne, kScratchReg, Operand(zero_reg));
+        break;
+      case kNotOverflow:
+        __ Branch(tlabel, eq, kScratchReg, Operand(zero_reg));
+        break;
+      default:
+        UNSUPPORTED_COND(kMipsMulOvf, condition);
+        break;
+    }
+  } else if (instr->arch_opcode() == kMips64Cmp) {
+    cc = FlagsConditionToConditionCmp(condition);
+    __ Branch(tlabel, cc, i.InputRegister(0), i.InputOperand(1));
+  } else if (instr->arch_opcode() == kArchStackPointerGreaterThan) {
+    cc = FlagsConditionToConditionCmp(condition);
+    Register lhs_register = sp;
+    uint32_t offset;
+    if (gen->ShouldApplyOffsetToStackCheck(instr, &offset)) {
+      lhs_register = i.TempRegister(0);
+      __ Dsubu(lhs_register, sp, offset);
+    }
+    __ Branch(tlabel, cc, lhs_register, Operand(i.InputRegister(0)));
+  } else if (instr->arch_opcode() == kMips64CmpS ||
+             instr->arch_opcode() == kMips64CmpD) {
+    bool predicate;
+    FlagsConditionToConditionCmpFPU(&predicate, condition);
+    if (predicate) {
+      __ BranchTrueF(tlabel);
+    } else {
+      __ BranchFalseF(tlabel);
+    }
+  } else {
+    PrintF("AssembleArchBranch Unimplemented arch_opcode: %d\n",
+           instr->arch_opcode());
+    UNIMPLEMENTED();
+  }
+  if (!fallthru) __ Branch(flabel);  // no fallthru to flabel.
+#undef __
+#define __ tasm()->
+}
+
+// Assembles branches after an instruction.
+void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
+  Label* tlabel = branch->true_label;
+  Label* flabel = branch->false_label;
+
+  AssembleBranchToLabels(this, tasm(), instr, branch->condition, tlabel, flabel,
+                         branch->fallthru);
+}
+
+void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
+                                            Instruction* instr) {
+  // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
+  if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
+    return;
+  }
+
+  MipsOperandConverter i(this, instr);
+  condition = NegateFlagsCondition(condition);
+
+  switch (instr->arch_opcode()) {
+    case kMips64Cmp: {
+      __ LoadZeroOnCondition(kSpeculationPoisonRegister, i.InputRegister(0),
+                             i.InputOperand(1),
+                             FlagsConditionToConditionCmp(condition));
+    }
+      return;
+    case kMips64Tst: {
+      switch (condition) {
+        case kEqual:
+          __ LoadZeroIfConditionZero(kSpeculationPoisonRegister, kScratchReg);
+          break;
+        case kNotEqual:
+          __ LoadZeroIfConditionNotZero(kSpeculationPoisonRegister,
+                                        kScratchReg);
+          break;
+        default:
+          UNREACHABLE();
+      }
+    }
+      return;
+    case kMips64Dadd:
+    case kMips64Dsub: {
+      // Check for overflow creates 1 or 0 for result.
+      __ dsrl32(kScratchReg, i.OutputRegister(), 31);
+      __ srl(kScratchReg2, i.OutputRegister(), 31);
+      __ xor_(kScratchReg2, kScratchReg, kScratchReg2);
+      switch (condition) {
+        case kOverflow:
+          __ LoadZeroIfConditionNotZero(kSpeculationPoisonRegister,
+                                        kScratchReg2);
+          break;
+        case kNotOverflow:
+          __ LoadZeroIfConditionZero(kSpeculationPoisonRegister, kScratchReg2);
+          break;
+        default:
+          UNSUPPORTED_COND(instr->arch_opcode(), condition);
+      }
+    }
+      return;
+    case kMips64DaddOvf:
+    case kMips64DsubOvf: {
+      // Overflow occurs if overflow register is negative
+      __ Slt(kScratchReg2, kScratchReg, zero_reg);
+      switch (condition) {
+        case kOverflow:
+          __ LoadZeroIfConditionNotZero(kSpeculationPoisonRegister,
+                                        kScratchReg2);
+          break;
+        case kNotOverflow:
+          __ LoadZeroIfConditionZero(kSpeculationPoisonRegister, kScratchReg2);
+          break;
+        default:
+          UNSUPPORTED_COND(instr->arch_opcode(), condition);
+      }
+    }
+      return;
+    case kMips64MulOvf: {
+      // Overflow occurs if overflow register is not zero
+      switch (condition) {
+        case kOverflow:
+          __ LoadZeroIfConditionNotZero(kSpeculationPoisonRegister,
+                                        kScratchReg);
+          break;
+        case kNotOverflow:
+          __ LoadZeroIfConditionZero(kSpeculationPoisonRegister, kScratchReg);
+          break;
+        default:
+          UNSUPPORTED_COND(instr->arch_opcode(), condition);
+      }
+    }
+      return;
+    case kMips64CmpS:
+    case kMips64CmpD: {
+      bool predicate;
+      FlagsConditionToConditionCmpFPU(&predicate, condition);
+      if (predicate) {
+        __ LoadZeroIfFPUCondition(kSpeculationPoisonRegister);
+      } else {
+        __ LoadZeroIfNotFPUCondition(kSpeculationPoisonRegister);
+      }
+    }
+      return;
+    default:
+      UNREACHABLE();
+  }
+}
+
+#undef UNSUPPORTED_COND
+
+void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
+                                            BranchInfo* branch) {
+  AssembleArchBranch(instr, branch);
+}
+
+void CodeGenerator::AssembleArchJump(RpoNumber target) {
+  if (!IsNextInAssemblyOrder(target)) __ Branch(GetLabel(target));
+}
+
+void CodeGenerator::AssembleArchTrap(Instruction* instr,
+                                     FlagsCondition condition) {
+  class OutOfLineTrap final : public OutOfLineCode {
+   public:
+    OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
+        : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
+    void Generate() final {
+      MipsOperandConverter i(gen_, instr_);
+      TrapId trap_id =
+          static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
+      GenerateCallToTrap(trap_id);
+    }
+
+   private:
+    void GenerateCallToTrap(TrapId trap_id) {
+      if (trap_id == TrapId::kInvalid) {
+        // We cannot test calls to the runtime in cctest/test-run-wasm.
+        // Therefore we emit a call to C here instead of a call to the runtime.
+        // We use the context register as the scratch register, because we do
+        // not have a context here.
+        __ PrepareCallCFunction(0, 0, cp);
+        __ CallCFunction(
+            ExternalReference::wasm_call_trap_callback_for_testing(), 0);
+        __ LeaveFrame(StackFrame::WASM);
+        auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
+        int pop_count =
+            static_cast<int>(call_descriptor->StackParameterCount());
+        pop_count += (pop_count & 1);  // align
+        __ Drop(pop_count);
+        __ Ret();
+      } else {
+        gen_->AssembleSourcePosition(instr_);
+        // A direct call to a wasm runtime stub defined in this module.
+        // Just encode the stub index. This will be patched when the code
+        // is added to the native module and copied into wasm code space.
+        __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
+        ReferenceMap* reference_map =
+            gen_->zone()->New<ReferenceMap>(gen_->zone());
+        gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
+        if (FLAG_debug_code) {
+          __ stop();
+        }
+      }
+    }
+    Instruction* instr_;
+    CodeGenerator* gen_;
+  };
+  auto ool = zone()->New<OutOfLineTrap>(this, instr);
+  Label* tlabel = ool->entry();
+  AssembleBranchToLabels(this, tasm(), instr, condition, tlabel, nullptr, true);
+}
+
+// Assembles boolean materializations after an instruction.
+void CodeGenerator::AssembleArchBoolean(Instruction* instr,
+                                        FlagsCondition condition) {
+  MipsOperandConverter i(this, instr);
+
+  // Materialize a full 32-bit 1 or 0 value. The result register is always the
+  // last output of the instruction.
+  DCHECK_NE(0u, instr->OutputCount());
+  Register result = i.OutputRegister(instr->OutputCount() - 1);
+  Condition cc = kNoCondition;
+  // MIPS does not have condition code flags, so compare and branch are
+  // implemented differently than on the other arch's. The compare operations
+  // emit mips pseudo-instructions, which are checked and handled here.
+
+  if (instr->arch_opcode() == kMips64Tst) {
+    cc = FlagsConditionToConditionTst(condition);
+    if (cc == eq) {
+      __ Sltu(result, kScratchReg, 1);
+    } else {
+      __ Sltu(result, zero_reg, kScratchReg);
+    }
+    return;
+  } else if (instr->arch_opcode() == kMips64Dadd ||
+             instr->arch_opcode() == kMips64Dsub) {
+    cc = FlagsConditionToConditionOvf(condition);
+    // Check for overflow creates 1 or 0 for result.
+    __ dsrl32(kScratchReg, i.OutputRegister(), 31);
+    __ srl(kScratchReg2, i.OutputRegister(), 31);
+    __ xor_(result, kScratchReg, kScratchReg2);
+    if (cc == eq)  // Toggle result for not overflow.
+      __ xori(result, result, 1);
+    return;
+  } else if (instr->arch_opcode() == kMips64DaddOvf ||
+             instr->arch_opcode() == kMips64DsubOvf) {
+    // Overflow occurs if overflow register is negative
+    __ slt(result, kScratchReg, zero_reg);
+  } else if (instr->arch_opcode() == kMips64MulOvf) {
+    // Overflow occurs if overflow register is not zero
+    __ Sgtu(result, kScratchReg, zero_reg);
+  } else if (instr->arch_opcode() == kMips64Cmp) {
+    cc = FlagsConditionToConditionCmp(condition);
+    switch (cc) {
+      case eq:
+      case ne: {
+        Register left = i.InputRegister(0);
+        Operand right = i.InputOperand(1);
+        if (instr->InputAt(1)->IsImmediate()) {
+          if (is_int16(-right.immediate())) {
+            if (right.immediate() == 0) {
+              if (cc == eq) {
+                __ Sltu(result, left, 1);
+              } else {
+                __ Sltu(result, zero_reg, left);
+              }
+            } else {
+              __ Daddu(result, left, Operand(-right.immediate()));
+              if (cc == eq) {
+                __ Sltu(result, result, 1);
+              } else {
+                __ Sltu(result, zero_reg, result);
+              }
+            }
+          } else {
+            if (is_uint16(right.immediate())) {
+              __ Xor(result, left, right);
+            } else {
+              __ li(kScratchReg, right);
+              __ Xor(result, left, kScratchReg);
+            }
+            if (cc == eq) {
+              __ Sltu(result, result, 1);
+            } else {
+              __ Sltu(result, zero_reg, result);
+            }
+          }
+        } else {
+          __ Xor(result, left, right);
+          if (cc == eq) {
+            __ Sltu(result, result, 1);
+          } else {
+            __ Sltu(result, zero_reg, result);
+          }
+        }
+      } break;
+      case lt:
+      case ge: {
+        Register left = i.InputRegister(0);
+        Operand right = i.InputOperand(1);
+        __ Slt(result, left, right);
+        if (cc == ge) {
+          __ xori(result, result, 1);
+        }
+      } break;
+      case gt:
+      case le: {
+        Register left = i.InputRegister(1);
+        Operand right = i.InputOperand(0);
+        __ Slt(result, left, right);
+        if (cc == le) {
+          __ xori(result, result, 1);
+        }
+      } break;
+      case lo:
+      case hs: {
+        Register left = i.InputRegister(0);
+        Operand right = i.InputOperand(1);
+        __ Sltu(result, left, right);
+        if (cc == hs) {
+          __ xori(result, result, 1);
+        }
+      } break;
+      case hi:
+      case ls: {
+        Register left = i.InputRegister(1);
+        Operand right = i.InputOperand(0);
+        __ Sltu(result, left, right);
+        if (cc == ls) {
+          __ xori(result, result, 1);
+        }
+      } break;
+      default:
+        UNREACHABLE();
+    }
+    return;
+  } else if (instr->arch_opcode() == kMips64CmpD ||
+             instr->arch_opcode() == kMips64CmpS) {
+    FPURegister left = i.InputOrZeroDoubleRegister(0);
+    FPURegister right = i.InputOrZeroDoubleRegister(1);
+    if ((left == kDoubleRegZero || right == kDoubleRegZero) &&
+        !__ IsDoubleZeroRegSet()) {
+      __ Move(kDoubleRegZero, 0.0);
+    }
+    bool predicate;
+    FlagsConditionToConditionCmpFPU(&predicate, condition);
+    if (kArchVariant != kMips64r6) {
+      __ li(result, Operand(1));
+      if (predicate) {
+        __ Movf(result, zero_reg);
+      } else {
+        __ Movt(result, zero_reg);
+      }
+    } else {
+      if (instr->arch_opcode() == kMips64CmpD) {
+        __ dmfc1(result, kDoubleCompareReg);
+      } else {
+        DCHECK_EQ(kMips64CmpS, instr->arch_opcode());
+        __ mfc1(result, kDoubleCompareReg);
+      }
+      if (predicate) {
+        __ And(result, result, 1);  // cmp returns all 1's/0's, use only LSB.
+      } else {
+        __ Addu(result, result, 1);  // Toggle result for not equal.
+      }
+    }
+    return;
+  } else {
+    PrintF("AssembleArchBranch Unimplemented arch_opcode is : %d\n",
+           instr->arch_opcode());
+    TRACE_UNIMPL();
+    UNIMPLEMENTED();
+  }
+}
+
+void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
+  MipsOperandConverter i(this, instr);
+  Register input = i.InputRegister(0);
+  std::vector<std::pair<int32_t, Label*>> cases;
+  for (size_t index = 2; index < instr->InputCount(); index += 2) {
+    cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
+  }
+  AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
+                                      cases.data() + cases.size());
+}
+
+void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
+  MipsOperandConverter i(this, instr);
+  Register input = i.InputRegister(0);
+  size_t const case_count = instr->InputCount() - 2;
+
+  __ Branch(GetLabel(i.InputRpo(1)), hs, input, Operand(case_count));
+  __ GenerateSwitchTable(input, case_count, [&i, this](size_t index) {
+    return GetLabel(i.InputRpo(index + 2));
+  });
+}
+
+void CodeGenerator::FinishFrame(Frame* frame) {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+
+  const RegList saves_fpu = call_descriptor->CalleeSavedFPRegisters();
+  if (saves_fpu != 0) {
+    int count = base::bits::CountPopulation(saves_fpu);
+    DCHECK_EQ(kNumCalleeSavedFPU, count);
+    frame->AllocateSavedCalleeRegisterSlots(count *
+                                            (kDoubleSize / kSystemPointerSize));
+  }
+
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+  if (saves != 0) {
+    int count = base::bits::CountPopulation(saves);
+    DCHECK_EQ(kNumCalleeSaved, count + 1);
+    frame->AllocateSavedCalleeRegisterSlots(count);
+  }
+}
+
+void CodeGenerator::AssembleConstructFrame() {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+
+  if (frame_access_state()->has_frame()) {
+    if (call_descriptor->IsCFunctionCall()) {
+      if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
+        __ StubPrologue(StackFrame::C_WASM_ENTRY);
+        // Reserve stack space for saving the c_entry_fp later.
+        __ Dsubu(sp, sp, Operand(kSystemPointerSize));
+      } else {
+        __ Push(ra, fp);
+        __ mov(fp, sp);
+      }
+    } else if (call_descriptor->IsJSFunctionCall()) {
+      __ Prologue();
+    } else {
+      __ StubPrologue(info()->GetOutputStackFrameType());
+      if (call_descriptor->IsWasmFunctionCall()) {
+        __ Push(kWasmInstanceRegister);
+      } else if (call_descriptor->IsWasmImportWrapper() ||
+                 call_descriptor->IsWasmCapiFunction()) {
+        // Wasm import wrappers are passed a tuple in the place of the instance.
+        // Unpack the tuple into the instance and the target callable.
+        // This must be done here in the codegen because it cannot be expressed
+        // properly in the graph.
+        __ ld(kJSFunctionRegister,
+              FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
+        __ ld(kWasmInstanceRegister,
+              FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
+        __ Push(kWasmInstanceRegister);
+        if (call_descriptor->IsWasmCapiFunction()) {
+          // Reserve space for saving the PC later.
+          __ Dsubu(sp, sp, Operand(kSystemPointerSize));
+        }
+      }
+    }
+  }
+
+  int required_slots =
+      frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
+
+  if (info()->is_osr()) {
+    // TurboFan OSR-compiled functions cannot be entered directly.
+    __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
+
+    // Unoptimized code jumps directly to this entrypoint while the unoptimized
+    // frame is still on the stack. Optimized code uses OSR values directly from
+    // the unoptimized frame. Thus, all that needs to be done is to allocate the
+    // remaining stack slots.
+    if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
+    osr_pc_offset_ = __ pc_offset();
+    required_slots -= osr_helper()->UnoptimizedFrameSlots();
+    ResetSpeculationPoison();
+  }
+
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+  const RegList saves_fpu = call_descriptor->CalleeSavedFPRegisters();
+
+  if (required_slots > 0) {
+    DCHECK(frame_access_state()->has_frame());
+    if (info()->IsWasm() && required_slots > 128) {
+      // For WebAssembly functions with big frames we have to do the stack
+      // overflow check before we construct the frame. Otherwise we may not
+      // have enough space on the stack to call the runtime for the stack
+      // overflow.
+      Label done;
+
+      // If the frame is bigger than the stack, we throw the stack overflow
+      // exception unconditionally. Thereby we can avoid the integer overflow
+      // check in the condition code.
+      if ((required_slots * kSystemPointerSize) < (FLAG_stack_size * 1024)) {
+        __ Ld(
+             kScratchReg,
+             FieldMemOperand(kWasmInstanceRegister,
+                             WasmInstanceObject::kRealStackLimitAddressOffset));
+        __ Ld(kScratchReg, MemOperand(kScratchReg));
+        __ Daddu(kScratchReg, kScratchReg,
+                 Operand(required_slots * kSystemPointerSize));
+        __ Branch(&done, uge, sp, Operand(kScratchReg));
+      }
+
+      __ Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
+      // We come from WebAssembly, there are no references for the GC.
+      ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
+      RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
+      if (FLAG_debug_code) {
+        __ stop();
+      }
+
+      __ bind(&done);
+    }
+  }
+
+  const int returns = frame()->GetReturnSlotCount();
+
+  // Skip callee-saved and return slots, which are pushed below.
+  required_slots -= base::bits::CountPopulation(saves);
+  required_slots -= base::bits::CountPopulation(saves_fpu);
+  required_slots -= returns;
+  if (required_slots > 0) {
+    __ Dsubu(sp, sp, Operand(required_slots * kSystemPointerSize));
+  }
+
+  if (saves_fpu != 0) {
+    // Save callee-saved FPU registers.
+    __ MultiPushFPU(saves_fpu);
+    DCHECK_EQ(kNumCalleeSavedFPU, base::bits::CountPopulation(saves_fpu));
+  }
+
+  if (saves != 0) {
+    // Save callee-saved registers.
+    __ MultiPush(saves);
+    DCHECK_EQ(kNumCalleeSaved, base::bits::CountPopulation(saves) + 1);
+  }
+
+  if (returns != 0) {
+    // Create space for returns.
+    __ Dsubu(sp, sp, Operand(returns * kSystemPointerSize));
+  }
+}
+
+void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+
+  const int returns = frame()->GetReturnSlotCount();
+  if (returns != 0) {
+    __ Daddu(sp, sp, Operand(returns * kSystemPointerSize));
+  }
+
+  // Restore GP registers.
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+  if (saves != 0) {
+    __ MultiPop(saves);
+  }
+
+  // Restore FPU registers.
+  const RegList saves_fpu = call_descriptor->CalleeSavedFPRegisters();
+  if (saves_fpu != 0) {
+    __ MultiPopFPU(saves_fpu);
+  }
+
+  MipsOperandConverter g(this, nullptr);
+
+  const int parameter_count =
+      static_cast<int>(call_descriptor->StackParameterCount());
+
+  // {aditional_pop_count} is only greater than zero if {parameter_count = 0}.
+  // Check RawMachineAssembler::PopAndReturn.
+  if (parameter_count != 0) {
+    if (additional_pop_count->IsImmediate()) {
+      DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
+    } else if (__ emit_debug_code()) {
+      __ Assert(eq, AbortReason::kUnexpectedAdditionalPopValue,
+                g.ToRegister(additional_pop_count),
+                Operand(static_cast<int64_t>(0)));
+    }
+  }
+#ifdef V8_NO_ARGUMENTS_ADAPTOR
+  // Functions with JS linkage have at least one parameter (the receiver).
+  // If {parameter_count} == 0, it means it is a builtin with
+  // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
+  // itself.
+  const bool drop_jsargs = frame_access_state()->has_frame() &&
+                           call_descriptor->IsJSFunctionCall() &&
+                           parameter_count != 0;
+#else
+  const bool drop_jsargs = false;
+#endif
+
+  if (call_descriptor->IsCFunctionCall()) {
+    AssembleDeconstructFrame();
+  } else if (frame_access_state()->has_frame()) {
+    // Canonicalize JSFunction return sites for now unless they have an variable
+    // number of stack slot pops.
+    if (additional_pop_count->IsImmediate() &&
+        g.ToConstant(additional_pop_count).ToInt32() == 0) {
+      if (return_label_.is_bound()) {
+        __ Branch(&return_label_);
+        return;
+      } else {
+        __ bind(&return_label_);
+      }
+    }
+    if (drop_jsargs) {
+      // Get the actual argument count
+      __ Ld(t0, MemOperand(fp, StandardFrameConstants::kArgCOffset));
+    }
+    AssembleDeconstructFrame();
+  }
+  if (drop_jsargs) {
+    // We must pop all arguments from the stack (including the receiver). This
+    // number of arguments is given by max(1 + argc_reg, parameter_count).
+    __ Daddu(t0, t0, Operand(1));  // Also pop the receiver.
+    if (parameter_count > 1) {
+      __ li(kScratchReg, parameter_count);
+      __ slt(kScratchReg2, t0, kScratchReg);
+      __ movn(t0, kScratchReg, kScratchReg2);
+    }
+    __ dsll(t0, t0, kSystemPointerSizeLog2);
+    __ Daddu(sp, sp, t0);
+  } else if (additional_pop_count->IsImmediate()) {
+    DCHECK_EQ(Constant::kInt32, g.ToConstant(additional_pop_count).type());
+    int additional_count = g.ToConstant(additional_pop_count).ToInt32();
+    __ Drop(parameter_count + additional_count);
+  } else {
+    Register pop_reg = g.ToRegister(additional_pop_count);
+    __ Drop(parameter_count);
+    __ dsll(pop_reg, pop_reg, kSystemPointerSizeLog2);
+    __ Daddu(sp, sp, pop_reg);
+  }
+  __ Ret();
+}
+
+void CodeGenerator::FinishCode() {}
+
+void CodeGenerator::PrepareForDeoptimizationExits(
+    ZoneDeque<DeoptimizationExit*>* exits) {}
+
+void CodeGenerator::AssembleMove(InstructionOperand* source,
+                                 InstructionOperand* destination) {
+  MipsOperandConverter g(this, nullptr);
+  // Dispatch on the source and destination operand kinds.  Not all
+  // combinations are possible.
+  if (source->IsRegister()) {
+    DCHECK(destination->IsRegister() || destination->IsStackSlot());
+    Register src = g.ToRegister(source);
+    if (destination->IsRegister()) {
+      __ mov(g.ToRegister(destination), src);
+    } else {
+      __ Sd(src, g.ToMemOperand(destination));
+    }
+  } else if (source->IsStackSlot()) {
+    DCHECK(destination->IsRegister() || destination->IsStackSlot());
+    MemOperand src = g.ToMemOperand(source);
+    if (destination->IsRegister()) {
+      __ Ld(g.ToRegister(destination), src);
+    } else {
+      Register temp = kScratchReg;
+      __ Ld(temp, src);
+      __ Sd(temp, g.ToMemOperand(destination));
+    }
+  } else if (source->IsConstant()) {
+    Constant src = g.ToConstant(source);
+    if (destination->IsRegister() || destination->IsStackSlot()) {
+      Register dst =
+          destination->IsRegister() ? g.ToRegister(destination) : kScratchReg;
+      switch (src.type()) {
+        case Constant::kInt32:
+          __ li(dst, Operand(src.ToInt32()));
+          break;
+        case Constant::kFloat32:
+          __ li(dst, Operand::EmbeddedNumber(src.ToFloat32()));
+          break;
+        case Constant::kInt64:
+          if (RelocInfo::IsWasmReference(src.rmode())) {
+            __ li(dst, Operand(src.ToInt64(), src.rmode()));
+          } else {
+            __ li(dst, Operand(src.ToInt64()));
+          }
+          break;
+        case Constant::kFloat64:
+          __ li(dst, Operand::EmbeddedNumber(src.ToFloat64().value()));
+          break;
+        case Constant::kExternalReference:
+          __ li(dst, src.ToExternalReference());
+          break;
+        case Constant::kDelayedStringConstant:
+          __ li(dst, src.ToDelayedStringConstant());
+          break;
+        case Constant::kHeapObject: {
+          Handle<HeapObject> src_object = src.ToHeapObject();
+          RootIndex index;
+          if (IsMaterializableFromRoot(src_object, &index)) {
+            __ LoadRoot(dst, index);
+          } else {
+            __ li(dst, src_object);
+          }
+          break;
+        }
+        case Constant::kCompressedHeapObject:
+          UNREACHABLE();
+        case Constant::kRpoNumber:
+          UNREACHABLE();  // TODO(titzer): loading RPO numbers on mips64.
+          break;
+      }
+      if (destination->IsStackSlot()) __ Sd(dst, g.ToMemOperand(destination));
+    } else if (src.type() == Constant::kFloat32) {
+      if (destination->IsFPStackSlot()) {
+        MemOperand dst = g.ToMemOperand(destination);
+        if (bit_cast<int32_t>(src.ToFloat32()) == 0) {
+          __ Sd(zero_reg, dst);
+        } else {
+          __ li(kScratchReg, Operand(bit_cast<int32_t>(src.ToFloat32())));
+          __ Sd(kScratchReg, dst);
+        }
+      } else {
+        DCHECK(destination->IsFPRegister());
+        FloatRegister dst = g.ToSingleRegister(destination);
+        __ Move(dst, src.ToFloat32());
+      }
+    } else {
+      DCHECK_EQ(Constant::kFloat64, src.type());
+      DoubleRegister dst = destination->IsFPRegister()
+                               ? g.ToDoubleRegister(destination)
+                               : kScratchDoubleReg;
+      __ Move(dst, src.ToFloat64().value());
+      if (destination->IsFPStackSlot()) {
+        __ Sdc1(dst, g.ToMemOperand(destination));
+      }
+    }
+  } else if (source->IsFPRegister()) {
+    MachineRepresentation rep = LocationOperand::cast(source)->representation();
+    if (rep == MachineRepresentation::kSimd128) {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      MSARegister src = g.ToSimd128Register(source);
+      if (destination->IsSimd128Register()) {
+        MSARegister dst = g.ToSimd128Register(destination);
+        __ move_v(dst, src);
+      } else {
+        DCHECK(destination->IsSimd128StackSlot());
+        __ st_b(src, g.ToMemOperand(destination));
+      }
+    } else {
+      FPURegister src = g.ToDoubleRegister(source);
+      if (destination->IsFPRegister()) {
+        FPURegister dst = g.ToDoubleRegister(destination);
+        __ Move(dst, src);
+      } else {
+        DCHECK(destination->IsFPStackSlot());
+        __ Sdc1(src, g.ToMemOperand(destination));
+      }
+    }
+  } else if (source->IsFPStackSlot()) {
+    DCHECK(destination->IsFPRegister() || destination->IsFPStackSlot());
+    MemOperand src = g.ToMemOperand(source);
+    MachineRepresentation rep = LocationOperand::cast(source)->representation();
+    if (rep == MachineRepresentation::kSimd128) {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      if (destination->IsSimd128Register()) {
+        __ ld_b(g.ToSimd128Register(destination), src);
+      } else {
+        DCHECK(destination->IsSimd128StackSlot());
+        MSARegister temp = kSimd128ScratchReg;
+        __ ld_b(temp, src);
+        __ st_b(temp, g.ToMemOperand(destination));
+      }
+    } else {
+      if (destination->IsFPRegister()) {
+        __ Ldc1(g.ToDoubleRegister(destination), src);
+      } else {
+        DCHECK(destination->IsFPStackSlot());
+        FPURegister temp = kScratchDoubleReg;
+        __ Ldc1(temp, src);
+        __ Sdc1(temp, g.ToMemOperand(destination));
+      }
+    }
+  } else {
+    UNREACHABLE();
+  }
+}
+
+void CodeGenerator::AssembleSwap(InstructionOperand* source,
+                                 InstructionOperand* destination) {
+  MipsOperandConverter g(this, nullptr);
+  // Dispatch on the source and destination operand kinds.  Not all
+  // combinations are possible.
+  if (source->IsRegister()) {
+    // Register-register.
+    Register temp = kScratchReg;
+    Register src = g.ToRegister(source);
+    if (destination->IsRegister()) {
+      Register dst = g.ToRegister(destination);
+      __ Move(temp, src);
+      __ Move(src, dst);
+      __ Move(dst, temp);
+    } else {
+      DCHECK(destination->IsStackSlot());
+      MemOperand dst = g.ToMemOperand(destination);
+      __ mov(temp, src);
+      __ Ld(src, dst);
+      __ Sd(temp, dst);
+    }
+  } else if (source->IsStackSlot()) {
+    DCHECK(destination->IsStackSlot());
+    Register temp_0 = kScratchReg;
+    Register temp_1 = kScratchReg2;
+    MemOperand src = g.ToMemOperand(source);
+    MemOperand dst = g.ToMemOperand(destination);
+    __ Ld(temp_0, src);
+    __ Ld(temp_1, dst);
+    __ Sd(temp_0, dst);
+    __ Sd(temp_1, src);
+  } else if (source->IsFPRegister()) {
+    MachineRepresentation rep = LocationOperand::cast(source)->representation();
+    if (rep == MachineRepresentation::kSimd128) {
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      MSARegister temp = kSimd128ScratchReg;
+      MSARegister src = g.ToSimd128Register(source);
+      if (destination->IsSimd128Register()) {
+        MSARegister dst = g.ToSimd128Register(destination);
+        __ move_v(temp, src);
+        __ move_v(src, dst);
+        __ move_v(dst, temp);
+      } else {
+        DCHECK(destination->IsSimd128StackSlot());
+        MemOperand dst = g.ToMemOperand(destination);
+        __ move_v(temp, src);
+        __ ld_b(src, dst);
+        __ st_b(temp, dst);
+      }
+    } else {
+      FPURegister temp = kScratchDoubleReg;
+      FPURegister src = g.ToDoubleRegister(source);
+      if (destination->IsFPRegister()) {
+        FPURegister dst = g.ToDoubleRegister(destination);
+        __ Move(temp, src);
+        __ Move(src, dst);
+        __ Move(dst, temp);
+      } else {
+        DCHECK(destination->IsFPStackSlot());
+        MemOperand dst = g.ToMemOperand(destination);
+        __ Move(temp, src);
+        __ Ldc1(src, dst);
+        __ Sdc1(temp, dst);
+      }
+    }
+  } else if (source->IsFPStackSlot()) {
+    DCHECK(destination->IsFPStackSlot());
+    Register temp_0 = kScratchReg;
+    MemOperand src0 = g.ToMemOperand(source);
+    MemOperand src1(src0.rm(), src0.offset() + kIntSize);
+    MemOperand dst0 = g.ToMemOperand(destination);
+    MemOperand dst1(dst0.rm(), dst0.offset() + kIntSize);
+    MachineRepresentation rep = LocationOperand::cast(source)->representation();
+    if (rep == MachineRepresentation::kSimd128) {
+      MemOperand src2(src0.rm(), src0.offset() + 2 * kIntSize);
+      MemOperand src3(src0.rm(), src0.offset() + 3 * kIntSize);
+      MemOperand dst2(dst0.rm(), dst0.offset() + 2 * kIntSize);
+      MemOperand dst3(dst0.rm(), dst0.offset() + 3 * kIntSize);
+      CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
+      MSARegister temp_1 = kSimd128ScratchReg;
+      __ ld_b(temp_1, dst0);  // Save destination in temp_1.
+      __ Lw(temp_0, src0);    // Then use temp_0 to copy source to destination.
+      __ Sw(temp_0, dst0);
+      __ Lw(temp_0, src1);
+      __ Sw(temp_0, dst1);
+      __ Lw(temp_0, src2);
+      __ Sw(temp_0, dst2);
+      __ Lw(temp_0, src3);
+      __ Sw(temp_0, dst3);
+      __ st_b(temp_1, src0);
+    } else {
+      FPURegister temp_1 = kScratchDoubleReg;
+      __ Ldc1(temp_1, dst0);  // Save destination in temp_1.
+      __ Lw(temp_0, src0);    // Then use temp_0 to copy source to destination.
+      __ Sw(temp_0, dst0);
+      __ Lw(temp_0, src1);
+      __ Sw(temp_0, dst1);
+      __ Sdc1(temp_1, src0);
+    }
+  } else {
+    // No other combinations are possible.
+    UNREACHABLE();
+  }
+}
+
+void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
+  // On 64-bit MIPS we emit the jump tables inline.
+  UNREACHABLE();
+}
+
+#undef ASSEMBLE_ATOMIC_LOAD_INTEGER
+#undef ASSEMBLE_ATOMIC_STORE_INTEGER
+#undef ASSEMBLE_ATOMIC_BINOP
+#undef ASSEMBLE_ATOMIC_BINOP_EXT
+#undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER
+#undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER_EXT
+#undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
+#undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER_EXT
+#undef ASSEMBLE_IEEE754_BINOP
+#undef ASSEMBLE_IEEE754_UNOP
+
+#undef TRACE_MSG
+#undef TRACE_UNIMPL
+#undef __
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/mips64/instruction-codes-mips64.h b/src/compiler/backend/mips64/instruction-codes-mips64.h
new file mode 100644
index 0000000..18a8e61
--- /dev/null
+++ b/src/compiler/backend/mips64/instruction-codes-mips64.h
@@ -0,0 +1,437 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_MIPS64_INSTRUCTION_CODES_MIPS64_H_
+#define V8_COMPILER_BACKEND_MIPS64_INSTRUCTION_CODES_MIPS64_H_
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+// MIPS64-specific opcodes that specify which assembly sequence to emit.
+// Most opcodes specify a single instruction.
+#define TARGET_ARCH_OPCODE_LIST(V)           \
+  V(Mips64Add)                               \
+  V(Mips64Dadd)                              \
+  V(Mips64DaddOvf)                           \
+  V(Mips64Sub)                               \
+  V(Mips64Dsub)                              \
+  V(Mips64DsubOvf)                           \
+  V(Mips64Mul)                               \
+  V(Mips64MulOvf)                            \
+  V(Mips64MulHigh)                           \
+  V(Mips64DMulHigh)                          \
+  V(Mips64MulHighU)                          \
+  V(Mips64Dmul)                              \
+  V(Mips64Div)                               \
+  V(Mips64Ddiv)                              \
+  V(Mips64DivU)                              \
+  V(Mips64DdivU)                             \
+  V(Mips64Mod)                               \
+  V(Mips64Dmod)                              \
+  V(Mips64ModU)                              \
+  V(Mips64DmodU)                             \
+  V(Mips64And)                               \
+  V(Mips64And32)                             \
+  V(Mips64Or)                                \
+  V(Mips64Or32)                              \
+  V(Mips64Nor)                               \
+  V(Mips64Nor32)                             \
+  V(Mips64Xor)                               \
+  V(Mips64Xor32)                             \
+  V(Mips64Clz)                               \
+  V(Mips64Lsa)                               \
+  V(Mips64Dlsa)                              \
+  V(Mips64Shl)                               \
+  V(Mips64Shr)                               \
+  V(Mips64Sar)                               \
+  V(Mips64Ext)                               \
+  V(Mips64Ins)                               \
+  V(Mips64Dext)                              \
+  V(Mips64Dins)                              \
+  V(Mips64Dclz)                              \
+  V(Mips64Ctz)                               \
+  V(Mips64Dctz)                              \
+  V(Mips64Popcnt)                            \
+  V(Mips64Dpopcnt)                           \
+  V(Mips64Dshl)                              \
+  V(Mips64Dshr)                              \
+  V(Mips64Dsar)                              \
+  V(Mips64Ror)                               \
+  V(Mips64Dror)                              \
+  V(Mips64Mov)                               \
+  V(Mips64Tst)                               \
+  V(Mips64Cmp)                               \
+  V(Mips64CmpS)                              \
+  V(Mips64AddS)                              \
+  V(Mips64SubS)                              \
+  V(Mips64MulS)                              \
+  V(Mips64DivS)                              \
+  V(Mips64ModS)                              \
+  V(Mips64AbsS)                              \
+  V(Mips64NegS)                              \
+  V(Mips64SqrtS)                             \
+  V(Mips64MaxS)                              \
+  V(Mips64MinS)                              \
+  V(Mips64CmpD)                              \
+  V(Mips64AddD)                              \
+  V(Mips64SubD)                              \
+  V(Mips64MulD)                              \
+  V(Mips64DivD)                              \
+  V(Mips64ModD)                              \
+  V(Mips64AbsD)                              \
+  V(Mips64NegD)                              \
+  V(Mips64SqrtD)                             \
+  V(Mips64MaxD)                              \
+  V(Mips64MinD)                              \
+  V(Mips64Float64RoundDown)                  \
+  V(Mips64Float64RoundTruncate)              \
+  V(Mips64Float64RoundUp)                    \
+  V(Mips64Float64RoundTiesEven)              \
+  V(Mips64Float32RoundDown)                  \
+  V(Mips64Float32RoundTruncate)              \
+  V(Mips64Float32RoundUp)                    \
+  V(Mips64Float32RoundTiesEven)              \
+  V(Mips64CvtSD)                             \
+  V(Mips64CvtDS)                             \
+  V(Mips64TruncWD)                           \
+  V(Mips64RoundWD)                           \
+  V(Mips64FloorWD)                           \
+  V(Mips64CeilWD)                            \
+  V(Mips64TruncWS)                           \
+  V(Mips64RoundWS)                           \
+  V(Mips64FloorWS)                           \
+  V(Mips64CeilWS)                            \
+  V(Mips64TruncLS)                           \
+  V(Mips64TruncLD)                           \
+  V(Mips64TruncUwD)                          \
+  V(Mips64TruncUwS)                          \
+  V(Mips64TruncUlS)                          \
+  V(Mips64TruncUlD)                          \
+  V(Mips64CvtDW)                             \
+  V(Mips64CvtSL)                             \
+  V(Mips64CvtSW)                             \
+  V(Mips64CvtSUw)                            \
+  V(Mips64CvtSUl)                            \
+  V(Mips64CvtDL)                             \
+  V(Mips64CvtDUw)                            \
+  V(Mips64CvtDUl)                            \
+  V(Mips64Lb)                                \
+  V(Mips64Lbu)                               \
+  V(Mips64Sb)                                \
+  V(Mips64Lh)                                \
+  V(Mips64Ulh)                               \
+  V(Mips64Lhu)                               \
+  V(Mips64Ulhu)                              \
+  V(Mips64Sh)                                \
+  V(Mips64Ush)                               \
+  V(Mips64Ld)                                \
+  V(Mips64Uld)                               \
+  V(Mips64Lw)                                \
+  V(Mips64Ulw)                               \
+  V(Mips64Lwu)                               \
+  V(Mips64Ulwu)                              \
+  V(Mips64Sw)                                \
+  V(Mips64Usw)                               \
+  V(Mips64Sd)                                \
+  V(Mips64Usd)                               \
+  V(Mips64Lwc1)                              \
+  V(Mips64Ulwc1)                             \
+  V(Mips64Swc1)                              \
+  V(Mips64Uswc1)                             \
+  V(Mips64Ldc1)                              \
+  V(Mips64Uldc1)                             \
+  V(Mips64Sdc1)                              \
+  V(Mips64Usdc1)                             \
+  V(Mips64BitcastDL)                         \
+  V(Mips64BitcastLD)                         \
+  V(Mips64Float64ExtractLowWord32)           \
+  V(Mips64Float64ExtractHighWord32)          \
+  V(Mips64Float64InsertLowWord32)            \
+  V(Mips64Float64InsertHighWord32)           \
+  V(Mips64Float32Max)                        \
+  V(Mips64Float64Max)                        \
+  V(Mips64Float32Min)                        \
+  V(Mips64Float64Min)                        \
+  V(Mips64Float64SilenceNaN)                 \
+  V(Mips64Push)                              \
+  V(Mips64Peek)                              \
+  V(Mips64StoreToStackSlot)                  \
+  V(Mips64ByteSwap64)                        \
+  V(Mips64ByteSwap32)                        \
+  V(Mips64StackClaim)                        \
+  V(Mips64Seb)                               \
+  V(Mips64Seh)                               \
+  V(Mips64Sync)                              \
+  V(Mips64AssertEqual)                       \
+  V(Mips64S128Const)                         \
+  V(Mips64S128Zero)                          \
+  V(Mips64S128AllOnes)                       \
+  V(Mips64I32x4Splat)                        \
+  V(Mips64I32x4ExtractLane)                  \
+  V(Mips64I32x4ReplaceLane)                  \
+  V(Mips64I32x4Add)                          \
+  V(Mips64I32x4AddHoriz)                     \
+  V(Mips64I32x4Sub)                          \
+  V(Mips64F64x2Abs)                          \
+  V(Mips64F64x2Neg)                          \
+  V(Mips64F32x4Splat)                        \
+  V(Mips64F32x4ExtractLane)                  \
+  V(Mips64F32x4ReplaceLane)                  \
+  V(Mips64F32x4SConvertI32x4)                \
+  V(Mips64F32x4UConvertI32x4)                \
+  V(Mips64I32x4Mul)                          \
+  V(Mips64I32x4MaxS)                         \
+  V(Mips64I32x4MinS)                         \
+  V(Mips64I32x4Eq)                           \
+  V(Mips64I32x4Ne)                           \
+  V(Mips64I32x4Shl)                          \
+  V(Mips64I32x4ShrS)                         \
+  V(Mips64I32x4ShrU)                         \
+  V(Mips64I32x4MaxU)                         \
+  V(Mips64I32x4MinU)                         \
+  V(Mips64F64x2Sqrt)                         \
+  V(Mips64F64x2Add)                          \
+  V(Mips64F64x2Sub)                          \
+  V(Mips64F64x2Mul)                          \
+  V(Mips64F64x2Div)                          \
+  V(Mips64F64x2Min)                          \
+  V(Mips64F64x2Max)                          \
+  V(Mips64F64x2Eq)                           \
+  V(Mips64F64x2Ne)                           \
+  V(Mips64F64x2Lt)                           \
+  V(Mips64F64x2Le)                           \
+  V(Mips64F64x2Splat)                        \
+  V(Mips64F64x2ExtractLane)                  \
+  V(Mips64F64x2ReplaceLane)                  \
+  V(Mips64F64x2Pmin)                         \
+  V(Mips64F64x2Pmax)                         \
+  V(Mips64F64x2Ceil)                         \
+  V(Mips64F64x2Floor)                        \
+  V(Mips64F64x2Trunc)                        \
+  V(Mips64F64x2NearestInt)                   \
+  V(Mips64I64x2Splat)                        \
+  V(Mips64I64x2ExtractLane)                  \
+  V(Mips64I64x2ReplaceLane)                  \
+  V(Mips64I64x2Add)                          \
+  V(Mips64I64x2Sub)                          \
+  V(Mips64I64x2Mul)                          \
+  V(Mips64I64x2Neg)                          \
+  V(Mips64I64x2Shl)                          \
+  V(Mips64I64x2ShrS)                         \
+  V(Mips64I64x2ShrU)                         \
+  V(Mips64F32x4Abs)                          \
+  V(Mips64F32x4Neg)                          \
+  V(Mips64F32x4Sqrt)                         \
+  V(Mips64F32x4RecipApprox)                  \
+  V(Mips64F32x4RecipSqrtApprox)              \
+  V(Mips64F32x4Add)                          \
+  V(Mips64F32x4AddHoriz)                     \
+  V(Mips64F32x4Sub)                          \
+  V(Mips64F32x4Mul)                          \
+  V(Mips64F32x4Div)                          \
+  V(Mips64F32x4Max)                          \
+  V(Mips64F32x4Min)                          \
+  V(Mips64F32x4Eq)                           \
+  V(Mips64F32x4Ne)                           \
+  V(Mips64F32x4Lt)                           \
+  V(Mips64F32x4Le)                           \
+  V(Mips64F32x4Pmin)                         \
+  V(Mips64F32x4Pmax)                         \
+  V(Mips64F32x4Ceil)                         \
+  V(Mips64F32x4Floor)                        \
+  V(Mips64F32x4Trunc)                        \
+  V(Mips64F32x4NearestInt)                   \
+  V(Mips64I32x4SConvertF32x4)                \
+  V(Mips64I32x4UConvertF32x4)                \
+  V(Mips64I32x4Neg)                          \
+  V(Mips64I32x4GtS)                          \
+  V(Mips64I32x4GeS)                          \
+  V(Mips64I32x4GtU)                          \
+  V(Mips64I32x4GeU)                          \
+  V(Mips64I32x4Abs)                          \
+  V(Mips64I32x4BitMask)                      \
+  V(Mips64I32x4DotI16x8S)                    \
+  V(Mips64I16x8Splat)                        \
+  V(Mips64I16x8ExtractLaneU)                 \
+  V(Mips64I16x8ExtractLaneS)                 \
+  V(Mips64I16x8ReplaceLane)                  \
+  V(Mips64I16x8Neg)                          \
+  V(Mips64I16x8Shl)                          \
+  V(Mips64I16x8ShrS)                         \
+  V(Mips64I16x8ShrU)                         \
+  V(Mips64I16x8Add)                          \
+  V(Mips64I16x8AddSatS)                      \
+  V(Mips64I16x8AddHoriz)                     \
+  V(Mips64I16x8Sub)                          \
+  V(Mips64I16x8SubSatS)                      \
+  V(Mips64I16x8Mul)                          \
+  V(Mips64I16x8MaxS)                         \
+  V(Mips64I16x8MinS)                         \
+  V(Mips64I16x8Eq)                           \
+  V(Mips64I16x8Ne)                           \
+  V(Mips64I16x8GtS)                          \
+  V(Mips64I16x8GeS)                          \
+  V(Mips64I16x8AddSatU)                      \
+  V(Mips64I16x8SubSatU)                      \
+  V(Mips64I16x8MaxU)                         \
+  V(Mips64I16x8MinU)                         \
+  V(Mips64I16x8GtU)                          \
+  V(Mips64I16x8GeU)                          \
+  V(Mips64I16x8RoundingAverageU)             \
+  V(Mips64I16x8Abs)                          \
+  V(Mips64I16x8BitMask)                      \
+  V(Mips64I8x16Splat)                        \
+  V(Mips64I8x16ExtractLaneU)                 \
+  V(Mips64I8x16ExtractLaneS)                 \
+  V(Mips64I8x16ReplaceLane)                  \
+  V(Mips64I8x16Neg)                          \
+  V(Mips64I8x16Shl)                          \
+  V(Mips64I8x16ShrS)                         \
+  V(Mips64I8x16Add)                          \
+  V(Mips64I8x16AddSatS)                      \
+  V(Mips64I8x16Sub)                          \
+  V(Mips64I8x16SubSatS)                      \
+  V(Mips64I8x16Mul)                          \
+  V(Mips64I8x16MaxS)                         \
+  V(Mips64I8x16MinS)                         \
+  V(Mips64I8x16Eq)                           \
+  V(Mips64I8x16Ne)                           \
+  V(Mips64I8x16GtS)                          \
+  V(Mips64I8x16GeS)                          \
+  V(Mips64I8x16ShrU)                         \
+  V(Mips64I8x16AddSatU)                      \
+  V(Mips64I8x16SubSatU)                      \
+  V(Mips64I8x16MaxU)                         \
+  V(Mips64I8x16MinU)                         \
+  V(Mips64I8x16GtU)                          \
+  V(Mips64I8x16GeU)                          \
+  V(Mips64I8x16RoundingAverageU)             \
+  V(Mips64I8x16Abs)                          \
+  V(Mips64I8x16BitMask)                      \
+  V(Mips64S128And)                           \
+  V(Mips64S128Or)                            \
+  V(Mips64S128Xor)                           \
+  V(Mips64S128Not)                           \
+  V(Mips64S128Select)                        \
+  V(Mips64S128AndNot)                        \
+  V(Mips64V32x4AnyTrue)                      \
+  V(Mips64V32x4AllTrue)                      \
+  V(Mips64V16x8AnyTrue)                      \
+  V(Mips64V16x8AllTrue)                      \
+  V(Mips64V8x16AnyTrue)                      \
+  V(Mips64V8x16AllTrue)                      \
+  V(Mips64S32x4InterleaveRight)              \
+  V(Mips64S32x4InterleaveLeft)               \
+  V(Mips64S32x4PackEven)                     \
+  V(Mips64S32x4PackOdd)                      \
+  V(Mips64S32x4InterleaveEven)               \
+  V(Mips64S32x4InterleaveOdd)                \
+  V(Mips64S32x4Shuffle)                      \
+  V(Mips64S16x8InterleaveRight)              \
+  V(Mips64S16x8InterleaveLeft)               \
+  V(Mips64S16x8PackEven)                     \
+  V(Mips64S16x8PackOdd)                      \
+  V(Mips64S16x8InterleaveEven)               \
+  V(Mips64S16x8InterleaveOdd)                \
+  V(Mips64S16x4Reverse)                      \
+  V(Mips64S16x2Reverse)                      \
+  V(Mips64S8x16InterleaveRight)              \
+  V(Mips64S8x16InterleaveLeft)               \
+  V(Mips64S8x16PackEven)                     \
+  V(Mips64S8x16PackOdd)                      \
+  V(Mips64S8x16InterleaveEven)               \
+  V(Mips64S8x16InterleaveOdd)                \
+  V(Mips64I8x16Shuffle)                      \
+  V(Mips64I8x16Swizzle)                      \
+  V(Mips64S8x16Concat)                       \
+  V(Mips64S8x8Reverse)                       \
+  V(Mips64S8x4Reverse)                       \
+  V(Mips64S8x2Reverse)                       \
+  V(Mips64S128Load8Splat)                    \
+  V(Mips64S128Load16Splat)                   \
+  V(Mips64S128Load32Splat)                   \
+  V(Mips64S128Load64Splat)                   \
+  V(Mips64S128Load8x8S)                      \
+  V(Mips64S128Load8x8U)                      \
+  V(Mips64S128Load16x4S)                     \
+  V(Mips64S128Load16x4U)                     \
+  V(Mips64S128Load32x2S)                     \
+  V(Mips64S128Load32x2U)                     \
+  V(Mips64S128Load32Zero)                    \
+  V(Mips64S128Load64Zero)                    \
+  V(Mips64MsaLd)                             \
+  V(Mips64MsaSt)                             \
+  V(Mips64I32x4SConvertI16x8Low)             \
+  V(Mips64I32x4SConvertI16x8High)            \
+  V(Mips64I32x4UConvertI16x8Low)             \
+  V(Mips64I32x4UConvertI16x8High)            \
+  V(Mips64I16x8SConvertI8x16Low)             \
+  V(Mips64I16x8SConvertI8x16High)            \
+  V(Mips64I16x8SConvertI32x4)                \
+  V(Mips64I16x8UConvertI32x4)                \
+  V(Mips64I16x8UConvertI8x16Low)             \
+  V(Mips64I16x8UConvertI8x16High)            \
+  V(Mips64I8x16SConvertI16x8)                \
+  V(Mips64I8x16UConvertI16x8)                \
+  V(Mips64Word64AtomicLoadUint8)             \
+  V(Mips64Word64AtomicLoadUint16)            \
+  V(Mips64Word64AtomicLoadUint32)            \
+  V(Mips64Word64AtomicLoadUint64)            \
+  V(Mips64Word64AtomicStoreWord8)            \
+  V(Mips64Word64AtomicStoreWord16)           \
+  V(Mips64Word64AtomicStoreWord32)           \
+  V(Mips64Word64AtomicStoreWord64)           \
+  V(Mips64Word64AtomicAddUint8)              \
+  V(Mips64Word64AtomicAddUint16)             \
+  V(Mips64Word64AtomicAddUint32)             \
+  V(Mips64Word64AtomicAddUint64)             \
+  V(Mips64Word64AtomicSubUint8)              \
+  V(Mips64Word64AtomicSubUint16)             \
+  V(Mips64Word64AtomicSubUint32)             \
+  V(Mips64Word64AtomicSubUint64)             \
+  V(Mips64Word64AtomicAndUint8)              \
+  V(Mips64Word64AtomicAndUint16)             \
+  V(Mips64Word64AtomicAndUint32)             \
+  V(Mips64Word64AtomicAndUint64)             \
+  V(Mips64Word64AtomicOrUint8)               \
+  V(Mips64Word64AtomicOrUint16)              \
+  V(Mips64Word64AtomicOrUint32)              \
+  V(Mips64Word64AtomicOrUint64)              \
+  V(Mips64Word64AtomicXorUint8)              \
+  V(Mips64Word64AtomicXorUint16)             \
+  V(Mips64Word64AtomicXorUint32)             \
+  V(Mips64Word64AtomicXorUint64)             \
+  V(Mips64Word64AtomicExchangeUint8)         \
+  V(Mips64Word64AtomicExchangeUint16)        \
+  V(Mips64Word64AtomicExchangeUint32)        \
+  V(Mips64Word64AtomicExchangeUint64)        \
+  V(Mips64Word64AtomicCompareExchangeUint8)  \
+  V(Mips64Word64AtomicCompareExchangeUint16) \
+  V(Mips64Word64AtomicCompareExchangeUint32) \
+  V(Mips64Word64AtomicCompareExchangeUint64)
+
+// Addressing modes represent the "shape" of inputs to an instruction.
+// Many instructions support multiple addressing modes. Addressing modes
+// are encoded into the InstructionCode of the instruction and tell the
+// code generator after register allocation which assembler method to call.
+//
+// We use the following local notation for addressing modes:
+//
+// R = register
+// O = register or stack slot
+// D = double register
+// I = immediate (handle, external, int32)
+// MRI = [register + immediate]
+// MRR = [register + register]
+// TODO(plind): Add the new r6 address modes.
+#define TARGET_ADDRESSING_MODE_LIST(V) \
+  V(MRI) /* [%r0 + K] */               \
+  V(MRR) /* [%r0 + %r1] */
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_MIPS64_INSTRUCTION_CODES_MIPS64_H_
diff --git a/src/compiler/backend/mips64/instruction-scheduler-mips64.cc b/src/compiler/backend/mips64/instruction-scheduler-mips64.cc
new file mode 100644
index 0000000..0cbaf0c
--- /dev/null
+++ b/src/compiler/backend/mips64/instruction-scheduler-mips64.cc
@@ -0,0 +1,1780 @@
+// Copyright 2015 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/codegen/macro-assembler.h"
+#include "src/compiler/backend/instruction-scheduler.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+bool InstructionScheduler::SchedulerSupported() { return true; }
+
+int InstructionScheduler::GetTargetInstructionFlags(
+    const Instruction* instr) const {
+  switch (instr->arch_opcode()) {
+    case kMips64AbsD:
+    case kMips64AbsS:
+    case kMips64Add:
+    case kMips64AddD:
+    case kMips64AddS:
+    case kMips64And:
+    case kMips64And32:
+    case kMips64AssertEqual:
+    case kMips64BitcastDL:
+    case kMips64BitcastLD:
+    case kMips64ByteSwap32:
+    case kMips64ByteSwap64:
+    case kMips64CeilWD:
+    case kMips64CeilWS:
+    case kMips64Clz:
+    case kMips64Cmp:
+    case kMips64CmpD:
+    case kMips64CmpS:
+    case kMips64Ctz:
+    case kMips64CvtDL:
+    case kMips64CvtDS:
+    case kMips64CvtDUl:
+    case kMips64CvtDUw:
+    case kMips64CvtDW:
+    case kMips64CvtSD:
+    case kMips64CvtSL:
+    case kMips64CvtSUl:
+    case kMips64CvtSUw:
+    case kMips64CvtSW:
+    case kMips64DMulHigh:
+    case kMips64MulHighU:
+    case kMips64Dadd:
+    case kMips64DaddOvf:
+    case kMips64Dclz:
+    case kMips64Dctz:
+    case kMips64Ddiv:
+    case kMips64DdivU:
+    case kMips64Dext:
+    case kMips64Dins:
+    case kMips64Div:
+    case kMips64DivD:
+    case kMips64DivS:
+    case kMips64DivU:
+    case kMips64Dlsa:
+    case kMips64Dmod:
+    case kMips64DmodU:
+    case kMips64Dmul:
+    case kMips64Dpopcnt:
+    case kMips64Dror:
+    case kMips64Dsar:
+    case kMips64Dshl:
+    case kMips64Dshr:
+    case kMips64Dsub:
+    case kMips64DsubOvf:
+    case kMips64Ext:
+    case kMips64F64x2Abs:
+    case kMips64F64x2Neg:
+    case kMips64F64x2Sqrt:
+    case kMips64F64x2Add:
+    case kMips64F64x2Sub:
+    case kMips64F64x2Mul:
+    case kMips64F64x2Div:
+    case kMips64F64x2Min:
+    case kMips64F64x2Max:
+    case kMips64F64x2Eq:
+    case kMips64F64x2Ne:
+    case kMips64F64x2Lt:
+    case kMips64F64x2Le:
+    case kMips64F64x2Pmin:
+    case kMips64F64x2Pmax:
+    case kMips64F64x2Ceil:
+    case kMips64F64x2Floor:
+    case kMips64F64x2Trunc:
+    case kMips64F64x2NearestInt:
+    case kMips64I64x2Splat:
+    case kMips64I64x2ExtractLane:
+    case kMips64I64x2ReplaceLane:
+    case kMips64I64x2Add:
+    case kMips64I64x2Sub:
+    case kMips64I64x2Mul:
+    case kMips64I64x2Neg:
+    case kMips64I64x2Shl:
+    case kMips64I64x2ShrS:
+    case kMips64I64x2ShrU:
+    case kMips64F32x4Abs:
+    case kMips64F32x4Add:
+    case kMips64F32x4AddHoriz:
+    case kMips64F32x4Eq:
+    case kMips64F32x4ExtractLane:
+    case kMips64F32x4Lt:
+    case kMips64F32x4Le:
+    case kMips64F32x4Max:
+    case kMips64F32x4Min:
+    case kMips64F32x4Mul:
+    case kMips64F32x4Div:
+    case kMips64F32x4Ne:
+    case kMips64F32x4Neg:
+    case kMips64F32x4Sqrt:
+    case kMips64F32x4RecipApprox:
+    case kMips64F32x4RecipSqrtApprox:
+    case kMips64F32x4ReplaceLane:
+    case kMips64F32x4SConvertI32x4:
+    case kMips64F32x4Splat:
+    case kMips64F32x4Sub:
+    case kMips64F32x4UConvertI32x4:
+    case kMips64F32x4Pmin:
+    case kMips64F32x4Pmax:
+    case kMips64F32x4Ceil:
+    case kMips64F32x4Floor:
+    case kMips64F32x4Trunc:
+    case kMips64F32x4NearestInt:
+    case kMips64F64x2Splat:
+    case kMips64F64x2ExtractLane:
+    case kMips64F64x2ReplaceLane:
+    case kMips64Float32Max:
+    case kMips64Float32Min:
+    case kMips64Float32RoundDown:
+    case kMips64Float32RoundTiesEven:
+    case kMips64Float32RoundTruncate:
+    case kMips64Float32RoundUp:
+    case kMips64Float64ExtractLowWord32:
+    case kMips64Float64ExtractHighWord32:
+    case kMips64Float64InsertLowWord32:
+    case kMips64Float64InsertHighWord32:
+    case kMips64Float64Max:
+    case kMips64Float64Min:
+    case kMips64Float64RoundDown:
+    case kMips64Float64RoundTiesEven:
+    case kMips64Float64RoundTruncate:
+    case kMips64Float64RoundUp:
+    case kMips64Float64SilenceNaN:
+    case kMips64FloorWD:
+    case kMips64FloorWS:
+    case kMips64I16x8Add:
+    case kMips64I16x8AddHoriz:
+    case kMips64I16x8AddSatS:
+    case kMips64I16x8AddSatU:
+    case kMips64I16x8Eq:
+    case kMips64I16x8ExtractLaneU:
+    case kMips64I16x8ExtractLaneS:
+    case kMips64I16x8GeS:
+    case kMips64I16x8GeU:
+    case kMips64I16x8GtS:
+    case kMips64I16x8GtU:
+    case kMips64I16x8MaxS:
+    case kMips64I16x8MaxU:
+    case kMips64I16x8MinS:
+    case kMips64I16x8MinU:
+    case kMips64I16x8Mul:
+    case kMips64I16x8Ne:
+    case kMips64I16x8Neg:
+    case kMips64I16x8ReplaceLane:
+    case kMips64I8x16SConvertI16x8:
+    case kMips64I16x8SConvertI32x4:
+    case kMips64I16x8SConvertI8x16High:
+    case kMips64I16x8SConvertI8x16Low:
+    case kMips64I16x8Shl:
+    case kMips64I16x8ShrS:
+    case kMips64I16x8ShrU:
+    case kMips64I16x8Splat:
+    case kMips64I16x8Sub:
+    case kMips64I16x8SubSatS:
+    case kMips64I16x8SubSatU:
+    case kMips64I8x16UConvertI16x8:
+    case kMips64I16x8UConvertI32x4:
+    case kMips64I16x8UConvertI8x16High:
+    case kMips64I16x8UConvertI8x16Low:
+    case kMips64I16x8RoundingAverageU:
+    case kMips64I16x8Abs:
+    case kMips64I16x8BitMask:
+    case kMips64I32x4Add:
+    case kMips64I32x4AddHoriz:
+    case kMips64I32x4Eq:
+    case kMips64I32x4ExtractLane:
+    case kMips64I32x4GeS:
+    case kMips64I32x4GeU:
+    case kMips64I32x4GtS:
+    case kMips64I32x4GtU:
+    case kMips64I32x4MaxS:
+    case kMips64I32x4MaxU:
+    case kMips64I32x4MinS:
+    case kMips64I32x4MinU:
+    case kMips64I32x4Mul:
+    case kMips64I32x4Ne:
+    case kMips64I32x4Neg:
+    case kMips64I32x4ReplaceLane:
+    case kMips64I32x4SConvertF32x4:
+    case kMips64I32x4SConvertI16x8High:
+    case kMips64I32x4SConvertI16x8Low:
+    case kMips64I32x4Shl:
+    case kMips64I32x4ShrS:
+    case kMips64I32x4ShrU:
+    case kMips64I32x4Splat:
+    case kMips64I32x4Sub:
+    case kMips64I32x4UConvertF32x4:
+    case kMips64I32x4UConvertI16x8High:
+    case kMips64I32x4UConvertI16x8Low:
+    case kMips64I32x4Abs:
+    case kMips64I32x4BitMask:
+    case kMips64I32x4DotI16x8S:
+    case kMips64I8x16Add:
+    case kMips64I8x16AddSatS:
+    case kMips64I8x16AddSatU:
+    case kMips64I8x16Eq:
+    case kMips64I8x16ExtractLaneU:
+    case kMips64I8x16ExtractLaneS:
+    case kMips64I8x16GeS:
+    case kMips64I8x16GeU:
+    case kMips64I8x16GtS:
+    case kMips64I8x16GtU:
+    case kMips64I8x16MaxS:
+    case kMips64I8x16MaxU:
+    case kMips64I8x16MinS:
+    case kMips64I8x16MinU:
+    case kMips64I8x16Mul:
+    case kMips64I8x16Ne:
+    case kMips64I8x16Neg:
+    case kMips64I8x16ReplaceLane:
+    case kMips64I8x16Shl:
+    case kMips64I8x16ShrS:
+    case kMips64I8x16ShrU:
+    case kMips64I8x16Splat:
+    case kMips64I8x16Sub:
+    case kMips64I8x16SubSatS:
+    case kMips64I8x16SubSatU:
+    case kMips64I8x16RoundingAverageU:
+    case kMips64I8x16Abs:
+    case kMips64I8x16BitMask:
+    case kMips64Ins:
+    case kMips64Lsa:
+    case kMips64MaxD:
+    case kMips64MaxS:
+    case kMips64MinD:
+    case kMips64MinS:
+    case kMips64Mod:
+    case kMips64ModU:
+    case kMips64Mov:
+    case kMips64Mul:
+    case kMips64MulD:
+    case kMips64MulHigh:
+    case kMips64MulOvf:
+    case kMips64MulS:
+    case kMips64NegD:
+    case kMips64NegS:
+    case kMips64Nor:
+    case kMips64Nor32:
+    case kMips64Or:
+    case kMips64Or32:
+    case kMips64Popcnt:
+    case kMips64Ror:
+    case kMips64RoundWD:
+    case kMips64RoundWS:
+    case kMips64S128And:
+    case kMips64S128Or:
+    case kMips64S128Not:
+    case kMips64S128Select:
+    case kMips64S128AndNot:
+    case kMips64S128Xor:
+    case kMips64S128Const:
+    case kMips64S128Zero:
+    case kMips64S128AllOnes:
+    case kMips64S16x8InterleaveEven:
+    case kMips64S16x8InterleaveOdd:
+    case kMips64S16x8InterleaveLeft:
+    case kMips64S16x8InterleaveRight:
+    case kMips64S16x8PackEven:
+    case kMips64S16x8PackOdd:
+    case kMips64S16x2Reverse:
+    case kMips64S16x4Reverse:
+    case kMips64V8x16AllTrue:
+    case kMips64V8x16AnyTrue:
+    case kMips64V32x4AllTrue:
+    case kMips64V32x4AnyTrue:
+    case kMips64V16x8AllTrue:
+    case kMips64V16x8AnyTrue:
+    case kMips64S32x4InterleaveEven:
+    case kMips64S32x4InterleaveOdd:
+    case kMips64S32x4InterleaveLeft:
+    case kMips64S32x4InterleaveRight:
+    case kMips64S32x4PackEven:
+    case kMips64S32x4PackOdd:
+    case kMips64S32x4Shuffle:
+    case kMips64S8x16Concat:
+    case kMips64S8x16InterleaveEven:
+    case kMips64S8x16InterleaveOdd:
+    case kMips64S8x16InterleaveLeft:
+    case kMips64S8x16InterleaveRight:
+    case kMips64S8x16PackEven:
+    case kMips64S8x16PackOdd:
+    case kMips64S8x2Reverse:
+    case kMips64S8x4Reverse:
+    case kMips64S8x8Reverse:
+    case kMips64I8x16Shuffle:
+    case kMips64I8x16Swizzle:
+    case kMips64Sar:
+    case kMips64Seb:
+    case kMips64Seh:
+    case kMips64Shl:
+    case kMips64Shr:
+    case kMips64SqrtD:
+    case kMips64SqrtS:
+    case kMips64Sub:
+    case kMips64SubD:
+    case kMips64SubS:
+    case kMips64TruncLD:
+    case kMips64TruncLS:
+    case kMips64TruncUlD:
+    case kMips64TruncUlS:
+    case kMips64TruncUwD:
+    case kMips64TruncUwS:
+    case kMips64TruncWD:
+    case kMips64TruncWS:
+    case kMips64Tst:
+    case kMips64Xor:
+    case kMips64Xor32:
+      return kNoOpcodeFlags;
+
+    case kMips64Lb:
+    case kMips64Lbu:
+    case kMips64Ld:
+    case kMips64Ldc1:
+    case kMips64Lh:
+    case kMips64Lhu:
+    case kMips64Lw:
+    case kMips64Lwc1:
+    case kMips64Lwu:
+    case kMips64MsaLd:
+    case kMips64Peek:
+    case kMips64Uld:
+    case kMips64Uldc1:
+    case kMips64Ulh:
+    case kMips64Ulhu:
+    case kMips64Ulw:
+    case kMips64Ulwu:
+    case kMips64Ulwc1:
+    case kMips64S128Load8Splat:
+    case kMips64S128Load16Splat:
+    case kMips64S128Load32Splat:
+    case kMips64S128Load64Splat:
+    case kMips64S128Load8x8S:
+    case kMips64S128Load8x8U:
+    case kMips64S128Load16x4S:
+    case kMips64S128Load16x4U:
+    case kMips64S128Load32x2S:
+    case kMips64S128Load32x2U:
+    case kMips64S128Load32Zero:
+    case kMips64S128Load64Zero:
+    case kMips64Word64AtomicLoadUint8:
+    case kMips64Word64AtomicLoadUint16:
+    case kMips64Word64AtomicLoadUint32:
+    case kMips64Word64AtomicLoadUint64:
+
+      return kIsLoadOperation;
+
+    case kMips64ModD:
+    case kMips64ModS:
+    case kMips64MsaSt:
+    case kMips64Push:
+    case kMips64Sb:
+    case kMips64Sd:
+    case kMips64Sdc1:
+    case kMips64Sh:
+    case kMips64StackClaim:
+    case kMips64StoreToStackSlot:
+    case kMips64Sw:
+    case kMips64Swc1:
+    case kMips64Usd:
+    case kMips64Usdc1:
+    case kMips64Ush:
+    case kMips64Usw:
+    case kMips64Uswc1:
+    case kMips64Sync:
+    case kMips64Word64AtomicStoreWord8:
+    case kMips64Word64AtomicStoreWord16:
+    case kMips64Word64AtomicStoreWord32:
+    case kMips64Word64AtomicStoreWord64:
+    case kMips64Word64AtomicAddUint8:
+    case kMips64Word64AtomicAddUint16:
+    case kMips64Word64AtomicAddUint32:
+    case kMips64Word64AtomicAddUint64:
+    case kMips64Word64AtomicSubUint8:
+    case kMips64Word64AtomicSubUint16:
+    case kMips64Word64AtomicSubUint32:
+    case kMips64Word64AtomicSubUint64:
+    case kMips64Word64AtomicAndUint8:
+    case kMips64Word64AtomicAndUint16:
+    case kMips64Word64AtomicAndUint32:
+    case kMips64Word64AtomicAndUint64:
+    case kMips64Word64AtomicOrUint8:
+    case kMips64Word64AtomicOrUint16:
+    case kMips64Word64AtomicOrUint32:
+    case kMips64Word64AtomicOrUint64:
+    case kMips64Word64AtomicXorUint8:
+    case kMips64Word64AtomicXorUint16:
+    case kMips64Word64AtomicXorUint32:
+    case kMips64Word64AtomicXorUint64:
+    case kMips64Word64AtomicExchangeUint8:
+    case kMips64Word64AtomicExchangeUint16:
+    case kMips64Word64AtomicExchangeUint32:
+    case kMips64Word64AtomicExchangeUint64:
+    case kMips64Word64AtomicCompareExchangeUint8:
+    case kMips64Word64AtomicCompareExchangeUint16:
+    case kMips64Word64AtomicCompareExchangeUint32:
+    case kMips64Word64AtomicCompareExchangeUint64:
+      return kHasSideEffect;
+
+#define CASE(Name) case k##Name:
+      COMMON_ARCH_OPCODE_LIST(CASE)
+#undef CASE
+      // Already covered in architecture independent code.
+      UNREACHABLE();
+  }
+
+  UNREACHABLE();
+}
+
+enum Latency {
+  BRANCH = 4,  // Estimated max.
+  RINT_S = 4,  // Estimated.
+  RINT_D = 4,  // Estimated.
+
+  MULT = 4,
+  MULTU = 4,
+  DMULT = 4,
+  DMULTU = 4,
+
+  MUL = 7,
+  DMUL = 7,
+  MUH = 7,
+  MUHU = 7,
+  DMUH = 7,
+  DMUHU = 7,
+
+  DIV = 50,  // Min:11 Max:50
+  DDIV = 50,
+  DIVU = 50,
+  DDIVU = 50,
+
+  ABS_S = 4,
+  ABS_D = 4,
+  NEG_S = 4,
+  NEG_D = 4,
+  ADD_S = 4,
+  ADD_D = 4,
+  SUB_S = 4,
+  SUB_D = 4,
+  MAX_S = 4,  // Estimated.
+  MIN_S = 4,
+  MAX_D = 4,  // Estimated.
+  MIN_D = 4,
+  C_cond_S = 4,
+  C_cond_D = 4,
+  MUL_S = 4,
+
+  MADD_S = 4,
+  MSUB_S = 4,
+  NMADD_S = 4,
+  NMSUB_S = 4,
+
+  CABS_cond_S = 4,
+  CABS_cond_D = 4,
+
+  CVT_D_S = 4,
+  CVT_PS_PW = 4,
+
+  CVT_S_W = 4,
+  CVT_S_L = 4,
+  CVT_D_W = 4,
+  CVT_D_L = 4,
+
+  CVT_S_D = 4,
+
+  CVT_W_S = 4,
+  CVT_W_D = 4,
+  CVT_L_S = 4,
+  CVT_L_D = 4,
+
+  CEIL_W_S = 4,
+  CEIL_W_D = 4,
+  CEIL_L_S = 4,
+  CEIL_L_D = 4,
+
+  FLOOR_W_S = 4,
+  FLOOR_W_D = 4,
+  FLOOR_L_S = 4,
+  FLOOR_L_D = 4,
+
+  ROUND_W_S = 4,
+  ROUND_W_D = 4,
+  ROUND_L_S = 4,
+  ROUND_L_D = 4,
+
+  TRUNC_W_S = 4,
+  TRUNC_W_D = 4,
+  TRUNC_L_S = 4,
+  TRUNC_L_D = 4,
+
+  MOV_S = 4,
+  MOV_D = 4,
+
+  MOVF_S = 4,
+  MOVF_D = 4,
+
+  MOVN_S = 4,
+  MOVN_D = 4,
+
+  MOVT_S = 4,
+  MOVT_D = 4,
+
+  MOVZ_S = 4,
+  MOVZ_D = 4,
+
+  MUL_D = 5,
+  MADD_D = 5,
+  MSUB_D = 5,
+  NMADD_D = 5,
+  NMSUB_D = 5,
+
+  RECIP_S = 13,
+  RECIP_D = 26,
+
+  RSQRT_S = 17,
+  RSQRT_D = 36,
+
+  DIV_S = 17,
+  SQRT_S = 17,
+
+  DIV_D = 32,
+  SQRT_D = 32,
+
+  MTC1 = 4,
+  MTHC1 = 4,
+  DMTC1 = 4,
+  LWC1 = 4,
+  LDC1 = 4,
+
+  MFC1 = 1,
+  MFHC1 = 1,
+  DMFC1 = 1,
+  MFHI = 1,
+  MFLO = 1,
+  SWC1 = 1,
+  SDC1 = 1,
+};
+
+int DadduLatency(bool is_operand_register = true) {
+  if (is_operand_register) {
+    return 1;
+  } else {
+    return 2;  // Estimated max.
+  }
+}
+
+int DsubuLatency(bool is_operand_register = true) {
+  return DadduLatency(is_operand_register);
+}
+
+int AndLatency(bool is_operand_register = true) {
+  return DadduLatency(is_operand_register);
+}
+
+int OrLatency(bool is_operand_register = true) {
+  return DadduLatency(is_operand_register);
+}
+
+int NorLatency(bool is_operand_register = true) {
+  if (is_operand_register) {
+    return 1;
+  } else {
+    return 2;  // Estimated max.
+  }
+}
+
+int XorLatency(bool is_operand_register = true) {
+  return DadduLatency(is_operand_register);
+}
+
+int MulLatency(bool is_operand_register = true) {
+  if (is_operand_register) {
+    return Latency::MUL;
+  } else {
+    return Latency::MUL + 1;
+  }
+}
+
+int DmulLatency(bool is_operand_register = true) {
+  int latency = 0;
+  if (kArchVariant >= kMips64r6) {
+    latency = Latency::DMUL;
+  } else {
+    latency = Latency::DMULT + Latency::MFLO;
+  }
+  if (!is_operand_register) {
+    latency += 1;
+  }
+  return latency;
+}
+
+int MulhLatency(bool is_operand_register = true) {
+  int latency = 0;
+  if (kArchVariant >= kMips64r6) {
+    latency = Latency::MUH;
+  } else {
+    latency = Latency::MULT + Latency::MFHI;
+  }
+  if (!is_operand_register) {
+    latency += 1;
+  }
+  return latency;
+}
+
+int MulhuLatency(bool is_operand_register = true) {
+  int latency = 0;
+  if (kArchVariant >= kMips64r6) {
+    latency = Latency::MUH;
+  } else {
+    latency = Latency::MULTU + Latency::MFHI;
+  }
+  if (!is_operand_register) {
+    latency += 1;
+  }
+  return latency;
+}
+
+int DMulhLatency(bool is_operand_register = true) {
+  int latency = 0;
+  if (kArchVariant >= kMips64r6) {
+    latency = Latency::DMUH;
+  } else {
+    latency = Latency::DMULT + Latency::MFHI;
+  }
+  if (!is_operand_register) {
+    latency += 1;
+  }
+  return latency;
+}
+
+int DivLatency(bool is_operand_register = true) {
+  if (is_operand_register) {
+    return Latency::DIV;
+  } else {
+    return Latency::DIV + 1;
+  }
+}
+
+int DivuLatency(bool is_operand_register = true) {
+  if (is_operand_register) {
+    return Latency::DIVU;
+  } else {
+    return Latency::DIVU + 1;
+  }
+}
+
+int DdivLatency(bool is_operand_register = true) {
+  int latency = 0;
+  if (kArchVariant >= kMips64r6) {
+    latency = Latency::DDIV;
+  } else {
+    latency = Latency::DDIV + Latency::MFLO;
+  }
+  if (!is_operand_register) {
+    latency += 1;
+  }
+  return latency;
+}
+
+int DdivuLatency(bool is_operand_register = true) {
+  int latency = 0;
+  if (kArchVariant >= kMips64r6) {
+    latency = Latency::DDIVU;
+  } else {
+    latency = Latency::DDIVU + Latency::MFLO;
+  }
+  if (!is_operand_register) {
+    latency += 1;
+  }
+  return latency;
+}
+
+int ModLatency(bool is_operand_register = true) {
+  int latency = 0;
+  if (kArchVariant >= kMips64r6) {
+    latency = 1;
+  } else {
+    latency = Latency::DIV + Latency::MFHI;
+  }
+  if (!is_operand_register) {
+    latency += 1;
+  }
+  return latency;
+}
+
+int ModuLatency(bool is_operand_register = true) {
+  int latency = 0;
+  if (kArchVariant >= kMips64r6) {
+    latency = 1;
+  } else {
+    latency = Latency::DIVU + Latency::MFHI;
+  }
+  if (!is_operand_register) {
+    latency += 1;
+  }
+  return latency;
+}
+
+int DmodLatency(bool is_operand_register = true) {
+  int latency = 0;
+  if (kArchVariant >= kMips64r6) {
+    latency = 1;
+  } else {
+    latency = Latency::DDIV + Latency::MFHI;
+  }
+  if (!is_operand_register) {
+    latency += 1;
+  }
+  return latency;
+}
+
+int DmoduLatency(bool is_operand_register = true) {
+  int latency = 0;
+  if (kArchVariant >= kMips64r6) {
+    latency = 1;
+  } else {
+    latency = Latency::DDIV + Latency::MFHI;
+  }
+  if (!is_operand_register) {
+    latency += 1;
+  }
+  return latency;
+}
+
+int MovzLatency() {
+  if (kArchVariant >= kMips64r6) {
+    return Latency::BRANCH + 1;
+  } else {
+    return 1;
+  }
+}
+
+int MovnLatency() {
+  if (kArchVariant >= kMips64r6) {
+    return Latency::BRANCH + 1;
+  } else {
+    return 1;
+  }
+}
+
+int DlsaLatency() {
+  // Estimated max.
+  return DadduLatency() + 1;
+}
+
+int CallLatency() {
+  // Estimated.
+  return DadduLatency(false) + Latency::BRANCH + 5;
+}
+
+int JumpLatency() {
+  // Estimated max.
+  return 1 + DadduLatency() + Latency::BRANCH + 2;
+}
+
+int SmiUntagLatency() { return 1; }
+
+int PrepareForTailCallLatency() {
+  // Estimated max.
+  return 2 * (DlsaLatency() + DadduLatency(false)) + 2 + Latency::BRANCH +
+         Latency::BRANCH + 2 * DsubuLatency(false) + 2 + Latency::BRANCH + 1;
+}
+
+int AssemblePopArgumentsAdoptFrameLatency() {
+  return 1 + Latency::BRANCH + 1 + SmiUntagLatency() +
+         PrepareForTailCallLatency();
+}
+
+int AssertLatency() { return 1; }
+
+int PrepareCallCFunctionLatency() {
+  int frame_alignment = TurboAssembler::ActivationFrameAlignment();
+  if (frame_alignment > kSystemPointerSize) {
+    return 1 + DsubuLatency(false) + AndLatency(false) + 1;
+  } else {
+    return DsubuLatency(false);
+  }
+}
+
+int AdjustBaseAndOffsetLatency() {
+  return 3;  // Estimated max.
+}
+
+int AlignedMemoryLatency() { return AdjustBaseAndOffsetLatency() + 1; }
+
+int UlhuLatency() {
+  if (kArchVariant >= kMips64r6) {
+    return AlignedMemoryLatency();
+  } else {
+    return AdjustBaseAndOffsetLatency() + 2 * AlignedMemoryLatency() + 2;
+  }
+}
+
+int UlwLatency() {
+  if (kArchVariant >= kMips64r6) {
+    return AlignedMemoryLatency();
+  } else {
+    // Estimated max.
+    return AdjustBaseAndOffsetLatency() + 3;
+  }
+}
+
+int UlwuLatency() {
+  if (kArchVariant >= kMips64r6) {
+    return AlignedMemoryLatency();
+  } else {
+    return UlwLatency() + 1;
+  }
+}
+
+int UldLatency() {
+  if (kArchVariant >= kMips64r6) {
+    return AlignedMemoryLatency();
+  } else {
+    // Estimated max.
+    return AdjustBaseAndOffsetLatency() + 3;
+  }
+}
+
+int Ulwc1Latency() {
+  if (kArchVariant >= kMips64r6) {
+    return AlignedMemoryLatency();
+  } else {
+    return UlwLatency() + Latency::MTC1;
+  }
+}
+
+int Uldc1Latency() {
+  if (kArchVariant >= kMips64r6) {
+    return AlignedMemoryLatency();
+  } else {
+    return UldLatency() + Latency::DMTC1;
+  }
+}
+
+int UshLatency() {
+  if (kArchVariant >= kMips64r6) {
+    return AlignedMemoryLatency();
+  } else {
+    // Estimated max.
+    return AdjustBaseAndOffsetLatency() + 2 + 2 * AlignedMemoryLatency();
+  }
+}
+
+int UswLatency() {
+  if (kArchVariant >= kMips64r6) {
+    return AlignedMemoryLatency();
+  } else {
+    return AdjustBaseAndOffsetLatency() + 2;
+  }
+}
+
+int UsdLatency() {
+  if (kArchVariant >= kMips64r6) {
+    return AlignedMemoryLatency();
+  } else {
+    return AdjustBaseAndOffsetLatency() + 2;
+  }
+}
+
+int Uswc1Latency() {
+  if (kArchVariant >= kMips64r6) {
+    return AlignedMemoryLatency();
+  } else {
+    return Latency::MFC1 + UswLatency();
+  }
+}
+
+int Usdc1Latency() {
+  if (kArchVariant >= kMips64r6) {
+    return AlignedMemoryLatency();
+  } else {
+    return Latency::DMFC1 + UsdLatency();
+  }
+}
+
+int Lwc1Latency() { return AdjustBaseAndOffsetLatency() + Latency::LWC1; }
+
+int Swc1Latency() { return AdjustBaseAndOffsetLatency() + Latency::SWC1; }
+
+int Sdc1Latency() { return AdjustBaseAndOffsetLatency() + Latency::SDC1; }
+
+int Ldc1Latency() { return AdjustBaseAndOffsetLatency() + Latency::LDC1; }
+
+int MultiPushLatency() {
+  int latency = DsubuLatency(false);
+  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
+    latency++;
+  }
+  return latency;
+}
+
+int MultiPushFPULatency() {
+  int latency = DsubuLatency(false);
+  for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
+    latency += Sdc1Latency();
+  }
+  return latency;
+}
+
+int PushCallerSavedLatency(SaveFPRegsMode fp_mode) {
+  int latency = MultiPushLatency();
+  if (fp_mode == kSaveFPRegs) {
+    latency += MultiPushFPULatency();
+  }
+  return latency;
+}
+
+int MultiPopLatency() {
+  int latency = DadduLatency(false);
+  for (int16_t i = 0; i < kNumRegisters; i++) {
+    latency++;
+  }
+  return latency;
+}
+
+int MultiPopFPULatency() {
+  int latency = DadduLatency(false);
+  for (int16_t i = 0; i < kNumRegisters; i++) {
+    latency += Ldc1Latency();
+  }
+  return latency;
+}
+
+int PopCallerSavedLatency(SaveFPRegsMode fp_mode) {
+  int latency = MultiPopLatency();
+  if (fp_mode == kSaveFPRegs) {
+    latency += MultiPopFPULatency();
+  }
+  return latency;
+}
+
+int CallCFunctionHelperLatency() {
+  // Estimated.
+  int latency = AndLatency(false) + Latency::BRANCH + 2 + CallLatency();
+  if (base::OS::ActivationFrameAlignment() > kSystemPointerSize) {
+    latency++;
+  } else {
+    latency += DadduLatency(false);
+  }
+  return latency;
+}
+
+int CallCFunctionLatency() { return 1 + CallCFunctionHelperLatency(); }
+
+int AssembleArchJumpLatency() {
+  // Estimated max.
+  return Latency::BRANCH;
+}
+
+int GenerateSwitchTableLatency() {
+  int latency = 0;
+  if (kArchVariant >= kMips64r6) {
+    latency = DlsaLatency() + 2;
+  } else {
+    latency = 6;
+  }
+  latency += 2;
+  return latency;
+}
+
+int AssembleArchTableSwitchLatency() {
+  return Latency::BRANCH + GenerateSwitchTableLatency();
+}
+
+int DropAndRetLatency() {
+  // Estimated max.
+  return DadduLatency(false) + JumpLatency();
+}
+
+int AssemblerReturnLatency() {
+  // Estimated max.
+  return DadduLatency(false) + MultiPopLatency() + MultiPopFPULatency() +
+         Latency::BRANCH + DadduLatency() + 1 + DropAndRetLatency();
+}
+
+int TryInlineTruncateDoubleToILatency() {
+  return 2 + Latency::TRUNC_W_D + Latency::MFC1 + 2 + AndLatency(false) +
+         Latency::BRANCH;
+}
+
+int CallStubDelayedLatency() { return 1 + CallLatency(); }
+
+int TruncateDoubleToIDelayedLatency() {
+  // TODO(mips): This no longer reflects how TruncateDoubleToI is called.
+  return TryInlineTruncateDoubleToILatency() + 1 + DsubuLatency(false) +
+         Sdc1Latency() + CallStubDelayedLatency() + DadduLatency(false) + 1;
+}
+
+int CheckPageFlagLatency() {
+  return AndLatency(false) + AlignedMemoryLatency() + AndLatency(false) +
+         Latency::BRANCH;
+}
+
+int SltuLatency(bool is_operand_register = true) {
+  if (is_operand_register) {
+    return 1;
+  } else {
+    return 2;  // Estimated max.
+  }
+}
+
+int BranchShortHelperR6Latency() {
+  return 2;  // Estimated max.
+}
+
+int BranchShortHelperLatency() {
+  return SltuLatency() + 2;  // Estimated max.
+}
+
+int BranchShortLatency(BranchDelaySlot bdslot = PROTECT) {
+  if (kArchVariant >= kMips64r6 && bdslot == PROTECT) {
+    return BranchShortHelperR6Latency();
+  } else {
+    return BranchShortHelperLatency();
+  }
+}
+
+int MoveLatency() { return 1; }
+
+int MovToFloatParametersLatency() { return 2 * MoveLatency(); }
+
+int MovFromFloatResultLatency() { return MoveLatency(); }
+
+int DaddOverflowLatency() {
+  // Estimated max.
+  return 6;
+}
+
+int DsubOverflowLatency() {
+  // Estimated max.
+  return 6;
+}
+
+int MulOverflowLatency() {
+  // Estimated max.
+  return MulLatency() + MulhLatency() + 2;
+}
+
+int DclzLatency() { return 1; }
+
+int CtzLatency() {
+  if (kArchVariant >= kMips64r6) {
+    return 3 + DclzLatency();
+  } else {
+    return DadduLatency(false) + XorLatency() + AndLatency() + DclzLatency() +
+           1 + DsubuLatency();
+  }
+}
+
+int DctzLatency() {
+  if (kArchVariant >= kMips64r6) {
+    return 4;
+  } else {
+    return DadduLatency(false) + XorLatency() + AndLatency() + 1 +
+           DsubuLatency();
+  }
+}
+
+int PopcntLatency() {
+  return 2 + AndLatency() + DsubuLatency() + 1 + AndLatency() + 1 +
+         AndLatency() + DadduLatency() + 1 + DadduLatency() + 1 + AndLatency() +
+         1 + MulLatency() + 1;
+}
+
+int DpopcntLatency() {
+  return 2 + AndLatency() + DsubuLatency() + 1 + AndLatency() + 1 +
+         AndLatency() + DadduLatency() + 1 + DadduLatency() + 1 + AndLatency() +
+         1 + DmulLatency() + 1;
+}
+
+int CompareFLatency() { return Latency::C_cond_S; }
+
+int CompareF32Latency() { return CompareFLatency(); }
+
+int CompareF64Latency() { return CompareFLatency(); }
+
+int CompareIsNanFLatency() { return CompareFLatency(); }
+
+int CompareIsNanF32Latency() { return CompareIsNanFLatency(); }
+
+int CompareIsNanF64Latency() { return CompareIsNanFLatency(); }
+
+int NegsLatency() {
+  if (kArchVariant >= kMips64r6) {
+    return Latency::NEG_S;
+  } else {
+    // Estimated.
+    return CompareIsNanF32Latency() + 2 * Latency::BRANCH + Latency::NEG_S +
+           Latency::MFC1 + 1 + XorLatency() + Latency::MTC1;
+  }
+}
+
+int NegdLatency() {
+  if (kArchVariant >= kMips64r6) {
+    return Latency::NEG_D;
+  } else {
+    // Estimated.
+    return CompareIsNanF64Latency() + 2 * Latency::BRANCH + Latency::NEG_D +
+           Latency::DMFC1 + 1 + XorLatency() + Latency::DMTC1;
+  }
+}
+
+int Float64RoundLatency() {
+  if (kArchVariant >= kMips64r6) {
+    return Latency::RINT_D + 4;
+  } else {
+    // For ceil_l_d, floor_l_d, round_l_d, trunc_l_d latency is 4.
+    return Latency::DMFC1 + 1 + Latency::BRANCH + Latency::MOV_D + 4 +
+           Latency::DMFC1 + Latency::BRANCH + Latency::CVT_D_L + 2 +
+           Latency::MTHC1;
+  }
+}
+
+int Float32RoundLatency() {
+  if (kArchVariant >= kMips64r6) {
+    return Latency::RINT_S + 4;
+  } else {
+    // For ceil_w_s, floor_w_s, round_w_s, trunc_w_s latency is 4.
+    return Latency::MFC1 + 1 + Latency::BRANCH + Latency::MOV_S + 4 +
+           Latency::MFC1 + Latency::BRANCH + Latency::CVT_S_W + 2 +
+           Latency::MTC1;
+  }
+}
+
+int Float32MaxLatency() {
+  // Estimated max.
+  int latency = CompareIsNanF32Latency() + Latency::BRANCH;
+  if (kArchVariant >= kMips64r6) {
+    return latency + Latency::MAX_S;
+  } else {
+    return latency + 5 * Latency::BRANCH + 2 * CompareF32Latency() +
+           Latency::MFC1 + 1 + Latency::MOV_S;
+  }
+}
+
+int Float64MaxLatency() {
+  // Estimated max.
+  int latency = CompareIsNanF64Latency() + Latency::BRANCH;
+  if (kArchVariant >= kMips64r6) {
+    return latency + Latency::MAX_D;
+  } else {
+    return latency + 5 * Latency::BRANCH + 2 * CompareF64Latency() +
+           Latency::DMFC1 + Latency::MOV_D;
+  }
+}
+
+int Float32MinLatency() {
+  // Estimated max.
+  int latency = CompareIsNanF32Latency() + Latency::BRANCH;
+  if (kArchVariant >= kMips64r6) {
+    return latency + Latency::MIN_S;
+  } else {
+    return latency + 5 * Latency::BRANCH + 2 * CompareF32Latency() +
+           Latency::MFC1 + 1 + Latency::MOV_S;
+  }
+}
+
+int Float64MinLatency() {
+  // Estimated max.
+  int latency = CompareIsNanF64Latency() + Latency::BRANCH;
+  if (kArchVariant >= kMips64r6) {
+    return latency + Latency::MIN_D;
+  } else {
+    return latency + 5 * Latency::BRANCH + 2 * CompareF32Latency() +
+           Latency::DMFC1 + Latency::MOV_D;
+  }
+}
+
+int TruncLSLatency(bool load_status) {
+  int latency = Latency::TRUNC_L_S + Latency::DMFC1;
+  if (load_status) {
+    latency += SltuLatency() + 7;
+  }
+  return latency;
+}
+
+int TruncLDLatency(bool load_status) {
+  int latency = Latency::TRUNC_L_D + Latency::DMFC1;
+  if (load_status) {
+    latency += SltuLatency() + 7;
+  }
+  return latency;
+}
+
+int TruncUlSLatency() {
+  // Estimated max.
+  return 2 * CompareF32Latency() + CompareIsNanF32Latency() +
+         4 * Latency::BRANCH + Latency::SUB_S + 2 * Latency::TRUNC_L_S +
+         3 * Latency::DMFC1 + OrLatency() + Latency::MTC1 + Latency::MOV_S +
+         SltuLatency() + 4;
+}
+
+int TruncUlDLatency() {
+  // Estimated max.
+  return 2 * CompareF64Latency() + CompareIsNanF64Latency() +
+         4 * Latency::BRANCH + Latency::SUB_D + 2 * Latency::TRUNC_L_D +
+         3 * Latency::DMFC1 + OrLatency() + Latency::DMTC1 + Latency::MOV_D +
+         SltuLatency() + 4;
+}
+
+int PushLatency() { return DadduLatency() + AlignedMemoryLatency(); }
+
+int ByteSwapSignedLatency() { return 2; }
+
+int LlLatency(int offset) {
+  bool is_one_instruction =
+      (kArchVariant == kMips64r6) ? is_int9(offset) : is_int16(offset);
+  if (is_one_instruction) {
+    return 1;
+  } else {
+    return 3;
+  }
+}
+
+int ExtractBitsLatency(bool sign_extend, int size) {
+  int latency = 2;
+  if (sign_extend) {
+    switch (size) {
+      case 8:
+      case 16:
+      case 32:
+        latency += 1;
+        break;
+      default:
+        UNREACHABLE();
+    }
+  }
+  return latency;
+}
+
+int InsertBitsLatency() { return 2 + DsubuLatency(false) + 2; }
+
+int ScLatency(int offset) {
+  bool is_one_instruction =
+      (kArchVariant == kMips64r6) ? is_int9(offset) : is_int16(offset);
+  if (is_one_instruction) {
+    return 1;
+  } else {
+    return 3;
+  }
+}
+
+int Word32AtomicExchangeLatency(bool sign_extend, int size) {
+  return DadduLatency(false) + 1 + DsubuLatency() + 2 + LlLatency(0) +
+         ExtractBitsLatency(sign_extend, size) + InsertBitsLatency() +
+         ScLatency(0) + BranchShortLatency() + 1;
+}
+
+int Word32AtomicCompareExchangeLatency(bool sign_extend, int size) {
+  return 2 + DsubuLatency() + 2 + LlLatency(0) +
+         ExtractBitsLatency(sign_extend, size) + InsertBitsLatency() +
+         ScLatency(0) + BranchShortLatency() + 1;
+}
+
+int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
+  // Basic latency modeling for MIPS64 instructions. They have been determined
+  // in empirical way.
+  switch (instr->arch_opcode()) {
+    case kArchCallCodeObject:
+    case kArchCallWasmFunction:
+      return CallLatency();
+    case kArchTailCallCodeObjectFromJSFunction:
+    case kArchTailCallCodeObject: {
+      int latency = 0;
+      if (instr->arch_opcode() == kArchTailCallCodeObjectFromJSFunction) {
+        latency = AssemblePopArgumentsAdoptFrameLatency();
+      }
+      return latency + JumpLatency();
+    }
+    case kArchTailCallWasm:
+    case kArchTailCallAddress:
+      return JumpLatency();
+    case kArchCallJSFunction: {
+      int latency = 0;
+      if (FLAG_debug_code) {
+        latency = 1 + AssertLatency();
+      }
+      return latency + 1 + DadduLatency(false) + CallLatency();
+    }
+    case kArchPrepareCallCFunction:
+      return PrepareCallCFunctionLatency();
+    case kArchSaveCallerRegisters: {
+      auto fp_mode =
+          static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
+      return PushCallerSavedLatency(fp_mode);
+    }
+    case kArchRestoreCallerRegisters: {
+      auto fp_mode =
+          static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
+      return PopCallerSavedLatency(fp_mode);
+    }
+    case kArchPrepareTailCall:
+      return 2;
+    case kArchCallCFunction:
+      return CallCFunctionLatency();
+    case kArchJmp:
+      return AssembleArchJumpLatency();
+    case kArchTableSwitch:
+      return AssembleArchTableSwitchLatency();
+    case kArchAbortCSAAssert:
+      return CallLatency() + 1;
+    case kArchDebugBreak:
+      return 1;
+    case kArchComment:
+    case kArchNop:
+    case kArchThrowTerminator:
+    case kArchDeoptimize:
+      return 0;
+    case kArchRet:
+      return AssemblerReturnLatency();
+    case kArchFramePointer:
+      return 1;
+    case kArchParentFramePointer:
+      // Estimated max.
+      return AlignedMemoryLatency();
+    case kArchTruncateDoubleToI:
+      return TruncateDoubleToIDelayedLatency();
+    case kArchStoreWithWriteBarrier:
+      return DadduLatency() + 1 + CheckPageFlagLatency();
+    case kArchStackSlot:
+      // Estimated max.
+      return DadduLatency(false) + AndLatency(false) + AssertLatency() +
+             DadduLatency(false) + AndLatency(false) + BranchShortLatency() +
+             1 + DsubuLatency() + DadduLatency();
+    case kArchWordPoisonOnSpeculation:
+      return AndLatency();
+    case kIeee754Float64Acos:
+    case kIeee754Float64Acosh:
+    case kIeee754Float64Asin:
+    case kIeee754Float64Asinh:
+    case kIeee754Float64Atan:
+    case kIeee754Float64Atanh:
+    case kIeee754Float64Atan2:
+    case kIeee754Float64Cos:
+    case kIeee754Float64Cosh:
+    case kIeee754Float64Cbrt:
+    case kIeee754Float64Exp:
+    case kIeee754Float64Expm1:
+    case kIeee754Float64Log:
+    case kIeee754Float64Log1p:
+    case kIeee754Float64Log10:
+    case kIeee754Float64Log2:
+    case kIeee754Float64Pow:
+    case kIeee754Float64Sin:
+    case kIeee754Float64Sinh:
+    case kIeee754Float64Tan:
+    case kIeee754Float64Tanh:
+      return PrepareCallCFunctionLatency() + MovToFloatParametersLatency() +
+             CallCFunctionLatency() + MovFromFloatResultLatency();
+    case kMips64Add:
+    case kMips64Dadd:
+      return DadduLatency(instr->InputAt(1)->IsRegister());
+    case kMips64DaddOvf:
+      return DaddOverflowLatency();
+    case kMips64Sub:
+    case kMips64Dsub:
+      return DsubuLatency(instr->InputAt(1)->IsRegister());
+    case kMips64DsubOvf:
+      return DsubOverflowLatency();
+    case kMips64Mul:
+      return MulLatency();
+    case kMips64MulOvf:
+      return MulOverflowLatency();
+    case kMips64MulHigh:
+      return MulhLatency();
+    case kMips64MulHighU:
+      return MulhuLatency();
+    case kMips64DMulHigh:
+      return DMulhLatency();
+    case kMips64Div: {
+      int latency = DivLatency(instr->InputAt(1)->IsRegister());
+      if (kArchVariant >= kMips64r6) {
+        return latency++;
+      } else {
+        return latency + MovzLatency();
+      }
+    }
+    case kMips64DivU: {
+      int latency = DivuLatency(instr->InputAt(1)->IsRegister());
+      if (kArchVariant >= kMips64r6) {
+        return latency++;
+      } else {
+        return latency + MovzLatency();
+      }
+    }
+    case kMips64Mod:
+      return ModLatency();
+    case kMips64ModU:
+      return ModuLatency();
+    case kMips64Dmul:
+      return DmulLatency();
+    case kMips64Ddiv: {
+      int latency = DdivLatency();
+      if (kArchVariant >= kMips64r6) {
+        return latency++;
+      } else {
+        return latency + MovzLatency();
+      }
+    }
+    case kMips64DdivU: {
+      int latency = DdivuLatency();
+      if (kArchVariant >= kMips64r6) {
+        return latency++;
+      } else {
+        return latency + MovzLatency();
+      }
+    }
+    case kMips64Dmod:
+      return DmodLatency();
+    case kMips64DmodU:
+      return DmoduLatency();
+    case kMips64Dlsa:
+    case kMips64Lsa:
+      return DlsaLatency();
+    case kMips64And:
+      return AndLatency(instr->InputAt(1)->IsRegister());
+    case kMips64And32: {
+      bool is_operand_register = instr->InputAt(1)->IsRegister();
+      int latency = AndLatency(is_operand_register);
+      if (is_operand_register) {
+        return latency + 2;
+      } else {
+        return latency + 1;
+      }
+    }
+    case kMips64Or:
+      return OrLatency(instr->InputAt(1)->IsRegister());
+    case kMips64Or32: {
+      bool is_operand_register = instr->InputAt(1)->IsRegister();
+      int latency = OrLatency(is_operand_register);
+      if (is_operand_register) {
+        return latency + 2;
+      } else {
+        return latency + 1;
+      }
+    }
+    case kMips64Nor:
+      return NorLatency(instr->InputAt(1)->IsRegister());
+    case kMips64Nor32: {
+      bool is_operand_register = instr->InputAt(1)->IsRegister();
+      int latency = NorLatency(is_operand_register);
+      if (is_operand_register) {
+        return latency + 2;
+      } else {
+        return latency + 1;
+      }
+    }
+    case kMips64Xor:
+      return XorLatency(instr->InputAt(1)->IsRegister());
+    case kMips64Xor32: {
+      bool is_operand_register = instr->InputAt(1)->IsRegister();
+      int latency = XorLatency(is_operand_register);
+      if (is_operand_register) {
+        return latency + 2;
+      } else {
+        return latency + 1;
+      }
+    }
+    case kMips64Clz:
+    case kMips64Dclz:
+      return DclzLatency();
+    case kMips64Ctz:
+      return CtzLatency();
+    case kMips64Dctz:
+      return DctzLatency();
+    case kMips64Popcnt:
+      return PopcntLatency();
+    case kMips64Dpopcnt:
+      return DpopcntLatency();
+    case kMips64Shl:
+      return 1;
+    case kMips64Shr:
+    case kMips64Sar:
+      return 2;
+    case kMips64Ext:
+    case kMips64Ins:
+    case kMips64Dext:
+    case kMips64Dins:
+    case kMips64Dshl:
+    case kMips64Dshr:
+    case kMips64Dsar:
+    case kMips64Ror:
+    case kMips64Dror:
+      return 1;
+    case kMips64Tst:
+      return AndLatency(instr->InputAt(1)->IsRegister());
+    case kMips64Mov:
+      return 1;
+    case kMips64CmpS:
+      return MoveLatency() + CompareF32Latency();
+    case kMips64AddS:
+      return Latency::ADD_S;
+    case kMips64SubS:
+      return Latency::SUB_S;
+    case kMips64MulS:
+      return Latency::MUL_S;
+    case kMips64DivS:
+      return Latency::DIV_S;
+    case kMips64ModS:
+      return PrepareCallCFunctionLatency() + MovToFloatParametersLatency() +
+             CallCFunctionLatency() + MovFromFloatResultLatency();
+    case kMips64AbsS:
+      return Latency::ABS_S;
+    case kMips64NegS:
+      return NegdLatency();
+    case kMips64SqrtS:
+      return Latency::SQRT_S;
+    case kMips64MaxS:
+      return Latency::MAX_S;
+    case kMips64MinS:
+      return Latency::MIN_S;
+    case kMips64CmpD:
+      return MoveLatency() + CompareF64Latency();
+    case kMips64AddD:
+      return Latency::ADD_D;
+    case kMips64SubD:
+      return Latency::SUB_D;
+    case kMips64MulD:
+      return Latency::MUL_D;
+    case kMips64DivD:
+      return Latency::DIV_D;
+    case kMips64ModD:
+      return PrepareCallCFunctionLatency() + MovToFloatParametersLatency() +
+             CallCFunctionLatency() + MovFromFloatResultLatency();
+    case kMips64AbsD:
+      return Latency::ABS_D;
+    case kMips64NegD:
+      return NegdLatency();
+    case kMips64SqrtD:
+      return Latency::SQRT_D;
+    case kMips64MaxD:
+      return Latency::MAX_D;
+    case kMips64MinD:
+      return Latency::MIN_D;
+    case kMips64Float64RoundDown:
+    case kMips64Float64RoundTruncate:
+    case kMips64Float64RoundUp:
+    case kMips64Float64RoundTiesEven:
+      return Float64RoundLatency();
+    case kMips64Float32RoundDown:
+    case kMips64Float32RoundTruncate:
+    case kMips64Float32RoundUp:
+    case kMips64Float32RoundTiesEven:
+      return Float32RoundLatency();
+    case kMips64Float32Max:
+      return Float32MaxLatency();
+    case kMips64Float64Max:
+      return Float64MaxLatency();
+    case kMips64Float32Min:
+      return Float32MinLatency();
+    case kMips64Float64Min:
+      return Float64MinLatency();
+    case kMips64Float64SilenceNaN:
+      return Latency::SUB_D;
+    case kMips64CvtSD:
+      return Latency::CVT_S_D;
+    case kMips64CvtDS:
+      return Latency::CVT_D_S;
+    case kMips64CvtDW:
+      return Latency::MTC1 + Latency::CVT_D_W;
+    case kMips64CvtSW:
+      return Latency::MTC1 + Latency::CVT_S_W;
+    case kMips64CvtSUw:
+      return 1 + Latency::DMTC1 + Latency::CVT_S_L;
+    case kMips64CvtSL:
+      return Latency::DMTC1 + Latency::CVT_S_L;
+    case kMips64CvtDL:
+      return Latency::DMTC1 + Latency::CVT_D_L;
+    case kMips64CvtDUw:
+      return 1 + Latency::DMTC1 + Latency::CVT_D_L;
+    case kMips64CvtDUl:
+      return 2 * Latency::BRANCH + 3 + 2 * Latency::DMTC1 +
+             2 * Latency::CVT_D_L + Latency::ADD_D;
+    case kMips64CvtSUl:
+      return 2 * Latency::BRANCH + 3 + 2 * Latency::DMTC1 +
+             2 * Latency::CVT_S_L + Latency::ADD_S;
+    case kMips64FloorWD:
+      return Latency::FLOOR_W_D + Latency::MFC1;
+    case kMips64CeilWD:
+      return Latency::CEIL_W_D + Latency::MFC1;
+    case kMips64RoundWD:
+      return Latency::ROUND_W_D + Latency::MFC1;
+    case kMips64TruncWD:
+      return Latency::TRUNC_W_D + Latency::MFC1;
+    case kMips64FloorWS:
+      return Latency::FLOOR_W_S + Latency::MFC1;
+    case kMips64CeilWS:
+      return Latency::CEIL_W_S + Latency::MFC1;
+    case kMips64RoundWS:
+      return Latency::ROUND_W_S + Latency::MFC1;
+    case kMips64TruncWS:
+      return Latency::TRUNC_W_S + Latency::MFC1 + 2 + MovnLatency();
+    case kMips64TruncLS:
+      return TruncLSLatency(instr->OutputCount() > 1);
+    case kMips64TruncLD:
+      return TruncLDLatency(instr->OutputCount() > 1);
+    case kMips64TruncUwD:
+      // Estimated max.
+      return CompareF64Latency() + 2 * Latency::BRANCH +
+             2 * Latency::TRUNC_W_D + Latency::SUB_D + OrLatency() +
+             Latency::MTC1 + Latency::MFC1 + Latency::MTHC1 + 1;
+    case kMips64TruncUwS:
+      // Estimated max.
+      return CompareF32Latency() + 2 * Latency::BRANCH +
+             2 * Latency::TRUNC_W_S + Latency::SUB_S + OrLatency() +
+             Latency::MTC1 + 2 * Latency::MFC1 + 2 + MovzLatency();
+    case kMips64TruncUlS:
+      return TruncUlSLatency();
+    case kMips64TruncUlD:
+      return TruncUlDLatency();
+    case kMips64BitcastDL:
+      return Latency::DMFC1;
+    case kMips64BitcastLD:
+      return Latency::DMTC1;
+    case kMips64Float64ExtractLowWord32:
+      return Latency::MFC1;
+    case kMips64Float64InsertLowWord32:
+      return Latency::MFHC1 + Latency::MTC1 + Latency::MTHC1;
+    case kMips64Float64ExtractHighWord32:
+      return Latency::MFHC1;
+    case kMips64Float64InsertHighWord32:
+      return Latency::MTHC1;
+    case kMips64Seb:
+    case kMips64Seh:
+      return 1;
+    case kMips64Lbu:
+    case kMips64Lb:
+    case kMips64Lhu:
+    case kMips64Lh:
+    case kMips64Lwu:
+    case kMips64Lw:
+    case kMips64Ld:
+    case kMips64Sb:
+    case kMips64Sh:
+    case kMips64Sw:
+    case kMips64Sd:
+      return AlignedMemoryLatency();
+    case kMips64Lwc1:
+      return Lwc1Latency();
+    case kMips64Ldc1:
+      return Ldc1Latency();
+    case kMips64Swc1:
+      return Swc1Latency();
+    case kMips64Sdc1:
+      return Sdc1Latency();
+    case kMips64Ulhu:
+    case kMips64Ulh:
+      return UlhuLatency();
+    case kMips64Ulwu:
+      return UlwuLatency();
+    case kMips64Ulw:
+      return UlwLatency();
+    case kMips64Uld:
+      return UldLatency();
+    case kMips64Ulwc1:
+      return Ulwc1Latency();
+    case kMips64Uldc1:
+      return Uldc1Latency();
+    case kMips64Ush:
+      return UshLatency();
+    case kMips64Usw:
+      return UswLatency();
+    case kMips64Usd:
+      return UsdLatency();
+    case kMips64Uswc1:
+      return Uswc1Latency();
+    case kMips64Usdc1:
+      return Usdc1Latency();
+    case kMips64Push: {
+      int latency = 0;
+      if (instr->InputAt(0)->IsFPRegister()) {
+        latency = Sdc1Latency() + DsubuLatency(false);
+      } else {
+        latency = PushLatency();
+      }
+      return latency;
+    }
+    case kMips64Peek: {
+      int latency = 0;
+      if (instr->OutputAt(0)->IsFPRegister()) {
+        auto op = LocationOperand::cast(instr->OutputAt(0));
+        switch (op->representation()) {
+          case MachineRepresentation::kFloat64:
+            latency = Ldc1Latency();
+            break;
+          case MachineRepresentation::kFloat32:
+            latency = Latency::LWC1;
+            break;
+          default:
+            UNREACHABLE();
+        }
+      } else {
+        latency = AlignedMemoryLatency();
+      }
+      return latency;
+    }
+    case kMips64StackClaim:
+      return DsubuLatency(false);
+    case kMips64StoreToStackSlot: {
+      int latency = 0;
+      if (instr->InputAt(0)->IsFPRegister()) {
+        if (instr->InputAt(0)->IsSimd128Register()) {
+          latency = 1;  // Estimated value.
+        } else {
+          latency = Sdc1Latency();
+        }
+      } else {
+        latency = AlignedMemoryLatency();
+      }
+      return latency;
+    }
+    case kMips64ByteSwap64:
+      return ByteSwapSignedLatency();
+    case kMips64ByteSwap32:
+      return ByteSwapSignedLatency();
+    case kWord32AtomicLoadInt8:
+    case kWord32AtomicLoadUint8:
+    case kWord32AtomicLoadInt16:
+    case kWord32AtomicLoadUint16:
+    case kWord32AtomicLoadWord32:
+      return 2;
+    case kWord32AtomicStoreWord8:
+    case kWord32AtomicStoreWord16:
+    case kWord32AtomicStoreWord32:
+      return 3;
+    case kWord32AtomicExchangeInt8:
+      return Word32AtomicExchangeLatency(true, 8);
+    case kWord32AtomicExchangeUint8:
+      return Word32AtomicExchangeLatency(false, 8);
+    case kWord32AtomicExchangeInt16:
+      return Word32AtomicExchangeLatency(true, 16);
+    case kWord32AtomicExchangeUint16:
+      return Word32AtomicExchangeLatency(false, 16);
+    case kWord32AtomicExchangeWord32:
+      return 2 + LlLatency(0) + 1 + ScLatency(0) + BranchShortLatency() + 1;
+    case kWord32AtomicCompareExchangeInt8:
+      return Word32AtomicCompareExchangeLatency(true, 8);
+    case kWord32AtomicCompareExchangeUint8:
+      return Word32AtomicCompareExchangeLatency(false, 8);
+    case kWord32AtomicCompareExchangeInt16:
+      return Word32AtomicCompareExchangeLatency(true, 16);
+    case kWord32AtomicCompareExchangeUint16:
+      return Word32AtomicCompareExchangeLatency(false, 16);
+    case kWord32AtomicCompareExchangeWord32:
+      return 3 + LlLatency(0) + BranchShortLatency() + 1 + ScLatency(0) +
+             BranchShortLatency() + 1;
+    case kMips64AssertEqual:
+      return AssertLatency();
+    default:
+      return 1;
+  }
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/mips64/instruction-selector-mips64.cc b/src/compiler/backend/mips64/instruction-selector-mips64.cc
new file mode 100644
index 0000000..216b83c
--- /dev/null
+++ b/src/compiler/backend/mips64/instruction-selector-mips64.cc
@@ -0,0 +1,3220 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/base/bits.h"
+#include "src/compiler/backend/instruction-selector-impl.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/node-properties.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+#define TRACE_UNIMPL() \
+  PrintF("UNIMPLEMENTED instr_sel: %s at line %d\n", __FUNCTION__, __LINE__)
+
+#define TRACE() PrintF("instr_sel: %s at line %d\n", __FUNCTION__, __LINE__)
+
+// Adds Mips-specific methods for generating InstructionOperands.
+class Mips64OperandGenerator final : public OperandGenerator {
+ public:
+  explicit Mips64OperandGenerator(InstructionSelector* selector)
+      : OperandGenerator(selector) {}
+
+  InstructionOperand UseOperand(Node* node, InstructionCode opcode) {
+    if (CanBeImmediate(node, opcode)) {
+      return UseImmediate(node);
+    }
+    return UseRegister(node);
+  }
+
+  // Use the zero register if the node has the immediate value zero, otherwise
+  // assign a register.
+  InstructionOperand UseRegisterOrImmediateZero(Node* node) {
+    if ((IsIntegerConstant(node) && (GetIntegerConstantValue(node) == 0)) ||
+        (IsFloatConstant(node) &&
+         (bit_cast<int64_t>(GetFloatConstantValue(node)) == 0))) {
+      return UseImmediate(node);
+    }
+    return UseRegister(node);
+  }
+
+  bool IsIntegerConstant(Node* node) {
+    return (node->opcode() == IrOpcode::kInt32Constant) ||
+           (node->opcode() == IrOpcode::kInt64Constant);
+  }
+
+  int64_t GetIntegerConstantValue(Node* node) {
+    if (node->opcode() == IrOpcode::kInt32Constant) {
+      return OpParameter<int32_t>(node->op());
+    }
+    DCHECK_EQ(IrOpcode::kInt64Constant, node->opcode());
+    return OpParameter<int64_t>(node->op());
+  }
+
+  bool IsFloatConstant(Node* node) {
+    return (node->opcode() == IrOpcode::kFloat32Constant) ||
+           (node->opcode() == IrOpcode::kFloat64Constant);
+  }
+
+  double GetFloatConstantValue(Node* node) {
+    if (node->opcode() == IrOpcode::kFloat32Constant) {
+      return OpParameter<float>(node->op());
+    }
+    DCHECK_EQ(IrOpcode::kFloat64Constant, node->opcode());
+    return OpParameter<double>(node->op());
+  }
+
+  bool CanBeImmediate(Node* node, InstructionCode mode) {
+    return IsIntegerConstant(node) &&
+           CanBeImmediate(GetIntegerConstantValue(node), mode);
+  }
+
+  bool CanBeImmediate(int64_t value, InstructionCode opcode) {
+    switch (ArchOpcodeField::decode(opcode)) {
+      case kMips64Shl:
+      case kMips64Sar:
+      case kMips64Shr:
+        return is_uint5(value);
+      case kMips64Dshl:
+      case kMips64Dsar:
+      case kMips64Dshr:
+        return is_uint6(value);
+      case kMips64Add:
+      case kMips64And32:
+      case kMips64And:
+      case kMips64Dadd:
+      case kMips64Or32:
+      case kMips64Or:
+      case kMips64Tst:
+      case kMips64Xor:
+        return is_uint16(value);
+      case kMips64Lb:
+      case kMips64Lbu:
+      case kMips64Sb:
+      case kMips64Lh:
+      case kMips64Lhu:
+      case kMips64Sh:
+      case kMips64Lw:
+      case kMips64Sw:
+      case kMips64Ld:
+      case kMips64Sd:
+      case kMips64Lwc1:
+      case kMips64Swc1:
+      case kMips64Ldc1:
+      case kMips64Sdc1:
+        return is_int32(value);
+      default:
+        return is_int16(value);
+    }
+  }
+
+ private:
+  bool ImmediateFitsAddrMode1Instruction(int32_t imm) const {
+    TRACE_UNIMPL();
+    return false;
+  }
+};
+
+static void VisitRR(InstructionSelector* selector, ArchOpcode opcode,
+                    Node* node) {
+  Mips64OperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)));
+}
+
+static void VisitRRI(InstructionSelector* selector, ArchOpcode opcode,
+                     Node* node) {
+  Mips64OperandGenerator g(selector);
+  int32_t imm = OpParameter<int32_t>(node->op());
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)), g.UseImmediate(imm));
+}
+
+static void VisitSimdShift(InstructionSelector* selector, ArchOpcode opcode,
+                           Node* node) {
+  Mips64OperandGenerator g(selector);
+  if (g.IsIntegerConstant(node->InputAt(1))) {
+    selector->Emit(opcode, g.DefineAsRegister(node),
+                   g.UseRegister(node->InputAt(0)),
+                   g.UseImmediate(node->InputAt(1)));
+  } else {
+    selector->Emit(opcode, g.DefineAsRegister(node),
+                   g.UseRegister(node->InputAt(0)),
+                   g.UseRegister(node->InputAt(1)));
+  }
+}
+
+static void VisitRRIR(InstructionSelector* selector, ArchOpcode opcode,
+                      Node* node) {
+  Mips64OperandGenerator g(selector);
+  int32_t imm = OpParameter<int32_t>(node->op());
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)), g.UseImmediate(imm),
+                 g.UseRegister(node->InputAt(1)));
+}
+
+static void VisitRRR(InstructionSelector* selector, ArchOpcode opcode,
+                     Node* node) {
+  Mips64OperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)),
+                 g.UseRegister(node->InputAt(1)));
+}
+
+static void VisitUniqueRRR(InstructionSelector* selector, ArchOpcode opcode,
+                           Node* node) {
+  Mips64OperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseUniqueRegister(node->InputAt(0)),
+                 g.UseUniqueRegister(node->InputAt(1)));
+}
+
+void VisitRRRR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
+  Mips64OperandGenerator g(selector);
+  selector->Emit(
+      opcode, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
+      g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(2)));
+}
+
+static void VisitRRO(InstructionSelector* selector, ArchOpcode opcode,
+                     Node* node) {
+  Mips64OperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)),
+                 g.UseOperand(node->InputAt(1), opcode));
+}
+
+struct ExtendingLoadMatcher {
+  ExtendingLoadMatcher(Node* node, InstructionSelector* selector)
+      : matches_(false), selector_(selector), base_(nullptr), immediate_(0) {
+    Initialize(node);
+  }
+
+  bool Matches() const { return matches_; }
+
+  Node* base() const {
+    DCHECK(Matches());
+    return base_;
+  }
+  int64_t immediate() const {
+    DCHECK(Matches());
+    return immediate_;
+  }
+  ArchOpcode opcode() const {
+    DCHECK(Matches());
+    return opcode_;
+  }
+
+ private:
+  bool matches_;
+  InstructionSelector* selector_;
+  Node* base_;
+  int64_t immediate_;
+  ArchOpcode opcode_;
+
+  void Initialize(Node* node) {
+    Int64BinopMatcher m(node);
+    // When loading a 64-bit value and shifting by 32, we should
+    // just load and sign-extend the interesting 4 bytes instead.
+    // This happens, for example, when we're loading and untagging SMIs.
+    DCHECK(m.IsWord64Sar());
+    if (m.left().IsLoad() && m.right().Is(32) &&
+        selector_->CanCover(m.node(), m.left().node())) {
+      DCHECK_EQ(selector_->GetEffectLevel(node),
+                selector_->GetEffectLevel(m.left().node()));
+      MachineRepresentation rep =
+          LoadRepresentationOf(m.left().node()->op()).representation();
+      DCHECK_EQ(3, ElementSizeLog2Of(rep));
+      if (rep != MachineRepresentation::kTaggedSigned &&
+          rep != MachineRepresentation::kTaggedPointer &&
+          rep != MachineRepresentation::kTagged &&
+          rep != MachineRepresentation::kWord64) {
+        return;
+      }
+
+      Mips64OperandGenerator g(selector_);
+      Node* load = m.left().node();
+      Node* offset = load->InputAt(1);
+      base_ = load->InputAt(0);
+      opcode_ = kMips64Lw;
+      if (g.CanBeImmediate(offset, opcode_)) {
+#if defined(V8_TARGET_LITTLE_ENDIAN)
+        immediate_ = g.GetIntegerConstantValue(offset) + 4;
+#elif defined(V8_TARGET_BIG_ENDIAN)
+        immediate_ = g.GetIntegerConstantValue(offset);
+#endif
+        matches_ = g.CanBeImmediate(immediate_, kMips64Lw);
+      }
+    }
+  }
+};
+
+bool TryEmitExtendingLoad(InstructionSelector* selector, Node* node,
+                          Node* output_node) {
+  ExtendingLoadMatcher m(node, selector);
+  Mips64OperandGenerator g(selector);
+  if (m.Matches()) {
+    InstructionOperand inputs[2];
+    inputs[0] = g.UseRegister(m.base());
+    InstructionCode opcode =
+        m.opcode() | AddressingModeField::encode(kMode_MRI);
+    DCHECK(is_int32(m.immediate()));
+    inputs[1] = g.TempImmediate(static_cast<int32_t>(m.immediate()));
+    InstructionOperand outputs[] = {g.DefineAsRegister(output_node)};
+    selector->Emit(opcode, arraysize(outputs), outputs, arraysize(inputs),
+                   inputs);
+    return true;
+  }
+  return false;
+}
+
+bool TryMatchImmediate(InstructionSelector* selector,
+                       InstructionCode* opcode_return, Node* node,
+                       size_t* input_count_return, InstructionOperand* inputs) {
+  Mips64OperandGenerator g(selector);
+  if (g.CanBeImmediate(node, *opcode_return)) {
+    *opcode_return |= AddressingModeField::encode(kMode_MRI);
+    inputs[0] = g.UseImmediate(node);
+    *input_count_return = 1;
+    return true;
+  }
+  return false;
+}
+
+static void VisitBinop(InstructionSelector* selector, Node* node,
+                       InstructionCode opcode, bool has_reverse_opcode,
+                       InstructionCode reverse_opcode,
+                       FlagsContinuation* cont) {
+  Mips64OperandGenerator g(selector);
+  Int32BinopMatcher m(node);
+  InstructionOperand inputs[2];
+  size_t input_count = 0;
+  InstructionOperand outputs[1];
+  size_t output_count = 0;
+
+  if (TryMatchImmediate(selector, &opcode, m.right().node(), &input_count,
+                        &inputs[1])) {
+    inputs[0] = g.UseRegister(m.left().node());
+    input_count++;
+  } else if (has_reverse_opcode &&
+             TryMatchImmediate(selector, &reverse_opcode, m.left().node(),
+                               &input_count, &inputs[1])) {
+    inputs[0] = g.UseRegister(m.right().node());
+    opcode = reverse_opcode;
+    input_count++;
+  } else {
+    inputs[input_count++] = g.UseRegister(m.left().node());
+    inputs[input_count++] = g.UseOperand(m.right().node(), opcode);
+  }
+
+  if (cont->IsDeoptimize()) {
+    // If we can deoptimize as a result of the binop, we need to make sure that
+    // the deopt inputs are not overwritten by the binop result. One way
+    // to achieve that is to declare the output register as same-as-first.
+    outputs[output_count++] = g.DefineSameAsFirst(node);
+  } else {
+    outputs[output_count++] = g.DefineAsRegister(node);
+  }
+
+  DCHECK_NE(0u, input_count);
+  DCHECK_EQ(1u, output_count);
+  DCHECK_GE(arraysize(inputs), input_count);
+  DCHECK_GE(arraysize(outputs), output_count);
+
+  selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
+                                 inputs, cont);
+}
+
+static void VisitBinop(InstructionSelector* selector, Node* node,
+                       InstructionCode opcode, bool has_reverse_opcode,
+                       InstructionCode reverse_opcode) {
+  FlagsContinuation cont;
+  VisitBinop(selector, node, opcode, has_reverse_opcode, reverse_opcode, &cont);
+}
+
+static void VisitBinop(InstructionSelector* selector, Node* node,
+                       InstructionCode opcode, FlagsContinuation* cont) {
+  VisitBinop(selector, node, opcode, false, kArchNop, cont);
+}
+
+static void VisitBinop(InstructionSelector* selector, Node* node,
+                       InstructionCode opcode) {
+  VisitBinop(selector, node, opcode, false, kArchNop);
+}
+
+void InstructionSelector::VisitStackSlot(Node* node) {
+  StackSlotRepresentation rep = StackSlotRepresentationOf(node->op());
+  int alignment = rep.alignment();
+  int slot = frame_->AllocateSpillSlot(rep.size(), alignment);
+  OperandGenerator g(this);
+
+  Emit(kArchStackSlot, g.DefineAsRegister(node),
+       sequence()->AddImmediate(Constant(slot)),
+       sequence()->AddImmediate(Constant(alignment)), 0, nullptr);
+}
+
+void InstructionSelector::VisitAbortCSAAssert(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), a0));
+}
+
+void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode,
+              Node* output = nullptr) {
+  Mips64OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+
+  if (g.CanBeImmediate(index, opcode)) {
+    selector->Emit(opcode | AddressingModeField::encode(kMode_MRI),
+                   g.DefineAsRegister(output == nullptr ? node : output),
+                   g.UseRegister(base), g.UseImmediate(index));
+  } else {
+    InstructionOperand addr_reg = g.TempRegister();
+    selector->Emit(kMips64Dadd | AddressingModeField::encode(kMode_None),
+                   addr_reg, g.UseRegister(index), g.UseRegister(base));
+    // Emit desired load opcode, using temp addr_reg.
+    selector->Emit(opcode | AddressingModeField::encode(kMode_MRI),
+                   g.DefineAsRegister(output == nullptr ? node : output),
+                   addr_reg, g.TempImmediate(0));
+  }
+}
+
+void InstructionSelector::VisitLoadTransform(Node* node) {
+  LoadTransformParameters params = LoadTransformParametersOf(node->op());
+
+  InstructionCode opcode = kArchNop;
+  switch (params.transformation) {
+    case LoadTransformation::kS128Load8Splat:
+      opcode = kMips64S128Load8Splat;
+      break;
+    case LoadTransformation::kS128Load16Splat:
+      opcode = kMips64S128Load16Splat;
+      break;
+    case LoadTransformation::kS128Load32Splat:
+      opcode = kMips64S128Load32Splat;
+      break;
+    case LoadTransformation::kS128Load64Splat:
+      opcode = kMips64S128Load64Splat;
+      break;
+    case LoadTransformation::kS128Load8x8S:
+      opcode = kMips64S128Load8x8S;
+      break;
+    case LoadTransformation::kS128Load8x8U:
+      opcode = kMips64S128Load8x8U;
+      break;
+    case LoadTransformation::kS128Load16x4S:
+      opcode = kMips64S128Load16x4S;
+      break;
+    case LoadTransformation::kS128Load16x4U:
+      opcode = kMips64S128Load16x4U;
+      break;
+    case LoadTransformation::kS128Load32x2S:
+      opcode = kMips64S128Load32x2S;
+      break;
+    case LoadTransformation::kS128Load32x2U:
+      opcode = kMips64S128Load32x2U;
+      break;
+    case LoadTransformation::kS128Load32Zero:
+      opcode = kMips64S128Load32Zero;
+      break;
+    case LoadTransformation::kS128Load64Zero:
+      opcode = kMips64S128Load64Zero;
+      break;
+    default:
+      UNIMPLEMENTED();
+  }
+
+  EmitLoad(this, node, opcode);
+}
+
+void InstructionSelector::VisitLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+
+  InstructionCode opcode = kArchNop;
+  switch (load_rep.representation()) {
+    case MachineRepresentation::kFloat32:
+      opcode = kMips64Lwc1;
+      break;
+    case MachineRepresentation::kFloat64:
+      opcode = kMips64Ldc1;
+      break;
+    case MachineRepresentation::kBit:  // Fall through.
+    case MachineRepresentation::kWord8:
+      opcode = load_rep.IsUnsigned() ? kMips64Lbu : kMips64Lb;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = load_rep.IsUnsigned() ? kMips64Lhu : kMips64Lh;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = load_rep.IsUnsigned() ? kMips64Lwu : kMips64Lw;
+      break;
+    case MachineRepresentation::kTaggedSigned:   // Fall through.
+    case MachineRepresentation::kTaggedPointer:  // Fall through.
+    case MachineRepresentation::kTagged:         // Fall through.
+    case MachineRepresentation::kWord64:
+      opcode = kMips64Ld;
+      break;
+    case MachineRepresentation::kSimd128:
+      opcode = kMips64MsaLd;
+      break;
+    case MachineRepresentation::kCompressedPointer:  // Fall through.
+    case MachineRepresentation::kCompressed:         // Fall through.
+    case MachineRepresentation::kNone:
+      UNREACHABLE();
+  }
+  if (node->opcode() == IrOpcode::kPoisonedLoad) {
+    CHECK_NE(poisoning_level_, PoisoningMitigationLevel::kDontPoison);
+    opcode |= MiscField::encode(kMemoryAccessPoisoned);
+  }
+
+  EmitLoad(this, node, opcode);
+}
+
+void InstructionSelector::VisitPoisonedLoad(Node* node) { VisitLoad(node); }
+
+void InstructionSelector::VisitProtectedLoad(Node* node) {
+  // TODO(eholk)
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitStore(Node* node) {
+  Mips64OperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+
+  StoreRepresentation store_rep = StoreRepresentationOf(node->op());
+  WriteBarrierKind write_barrier_kind = store_rep.write_barrier_kind();
+  MachineRepresentation rep = store_rep.representation();
+
+  if (FLAG_enable_unconditional_write_barriers && CanBeTaggedPointer(rep)) {
+    write_barrier_kind = kFullWriteBarrier;
+  }
+
+  // TODO(mips): I guess this could be done in a better way.
+  if (write_barrier_kind != kNoWriteBarrier &&
+      V8_LIKELY(!FLAG_disable_write_barriers)) {
+    DCHECK(CanBeTaggedPointer(rep));
+    InstructionOperand inputs[3];
+    size_t input_count = 0;
+    inputs[input_count++] = g.UseUniqueRegister(base);
+    inputs[input_count++] = g.UseUniqueRegister(index);
+    inputs[input_count++] = g.UseUniqueRegister(value);
+    RecordWriteMode record_write_mode =
+        WriteBarrierKindToRecordWriteMode(write_barrier_kind);
+    InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
+    size_t const temp_count = arraysize(temps);
+    InstructionCode code = kArchStoreWithWriteBarrier;
+    code |= MiscField::encode(static_cast<int>(record_write_mode));
+    Emit(code, 0, nullptr, input_count, inputs, temp_count, temps);
+  } else {
+    ArchOpcode opcode;
+    switch (rep) {
+      case MachineRepresentation::kFloat32:
+        opcode = kMips64Swc1;
+        break;
+      case MachineRepresentation::kFloat64:
+        opcode = kMips64Sdc1;
+        break;
+      case MachineRepresentation::kBit:  // Fall through.
+      case MachineRepresentation::kWord8:
+        opcode = kMips64Sb;
+        break;
+      case MachineRepresentation::kWord16:
+        opcode = kMips64Sh;
+        break;
+      case MachineRepresentation::kWord32:
+        opcode = kMips64Sw;
+        break;
+      case MachineRepresentation::kTaggedSigned:   // Fall through.
+      case MachineRepresentation::kTaggedPointer:  // Fall through.
+      case MachineRepresentation::kTagged:         // Fall through.
+      case MachineRepresentation::kWord64:
+        opcode = kMips64Sd;
+        break;
+      case MachineRepresentation::kSimd128:
+        opcode = kMips64MsaSt;
+        break;
+      case MachineRepresentation::kCompressedPointer:  // Fall through.
+      case MachineRepresentation::kCompressed:         // Fall through.
+      case MachineRepresentation::kNone:
+        UNREACHABLE();
+    }
+
+    if (g.CanBeImmediate(index, opcode)) {
+      Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(),
+           g.UseRegister(base), g.UseImmediate(index),
+           g.UseRegisterOrImmediateZero(value));
+    } else {
+      InstructionOperand addr_reg = g.TempRegister();
+      Emit(kMips64Dadd | AddressingModeField::encode(kMode_None), addr_reg,
+           g.UseRegister(index), g.UseRegister(base));
+      // Emit desired store opcode, using temp addr_reg.
+      Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(),
+           addr_reg, g.TempImmediate(0), g.UseRegisterOrImmediateZero(value));
+    }
+  }
+}
+
+void InstructionSelector::VisitProtectedStore(Node* node) {
+  // TODO(eholk)
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitWord32And(Node* node) {
+  Mips64OperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (m.left().IsWord32Shr() && CanCover(node, m.left().node()) &&
+      m.right().HasResolvedValue()) {
+    uint32_t mask = m.right().ResolvedValue();
+    uint32_t mask_width = base::bits::CountPopulation(mask);
+    uint32_t mask_msb = base::bits::CountLeadingZeros32(mask);
+    if ((mask_width != 0) && (mask_msb + mask_width == 32)) {
+      // The mask must be contiguous, and occupy the least-significant bits.
+      DCHECK_EQ(0u, base::bits::CountTrailingZeros32(mask));
+
+      // Select Ext for And(Shr(x, imm), mask) where the mask is in the least
+      // significant bits.
+      Int32BinopMatcher mleft(m.left().node());
+      if (mleft.right().HasResolvedValue()) {
+        // Any shift value can match; int32 shifts use `value % 32`.
+        uint32_t lsb = mleft.right().ResolvedValue() & 0x1F;
+
+        // Ext cannot extract bits past the register size, however since
+        // shifting the original value would have introduced some zeros we can
+        // still use Ext with a smaller mask and the remaining bits will be
+        // zeros.
+        if (lsb + mask_width > 32) mask_width = 32 - lsb;
+
+        Emit(kMips64Ext, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()), g.TempImmediate(lsb),
+             g.TempImmediate(mask_width));
+        return;
+      }
+      // Other cases fall through to the normal And operation.
+    }
+  }
+  if (m.right().HasResolvedValue()) {
+    uint32_t mask = m.right().ResolvedValue();
+    uint32_t shift = base::bits::CountPopulation(~mask);
+    uint32_t msb = base::bits::CountLeadingZeros32(~mask);
+    if (shift != 0 && shift != 32 && msb + shift == 32) {
+      // Insert zeros for (x >> K) << K => x & ~(2^K - 1) expression reduction
+      // and remove constant loading of inverted mask.
+      Emit(kMips64Ins, g.DefineSameAsFirst(node),
+           g.UseRegister(m.left().node()), g.TempImmediate(0),
+           g.TempImmediate(shift));
+      return;
+    }
+  }
+  VisitBinop(this, node, kMips64And32, true, kMips64And32);
+}
+
+void InstructionSelector::VisitWord64And(Node* node) {
+  Mips64OperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  if (m.left().IsWord64Shr() && CanCover(node, m.left().node()) &&
+      m.right().HasResolvedValue()) {
+    uint64_t mask = m.right().ResolvedValue();
+    uint32_t mask_width = base::bits::CountPopulation(mask);
+    uint32_t mask_msb = base::bits::CountLeadingZeros64(mask);
+    if ((mask_width != 0) && (mask_msb + mask_width == 64)) {
+      // The mask must be contiguous, and occupy the least-significant bits.
+      DCHECK_EQ(0u, base::bits::CountTrailingZeros64(mask));
+
+      // Select Dext for And(Shr(x, imm), mask) where the mask is in the least
+      // significant bits.
+      Int64BinopMatcher mleft(m.left().node());
+      if (mleft.right().HasResolvedValue()) {
+        // Any shift value can match; int64 shifts use `value % 64`.
+        uint32_t lsb =
+            static_cast<uint32_t>(mleft.right().ResolvedValue() & 0x3F);
+
+        // Dext cannot extract bits past the register size, however since
+        // shifting the original value would have introduced some zeros we can
+        // still use Dext with a smaller mask and the remaining bits will be
+        // zeros.
+        if (lsb + mask_width > 64) mask_width = 64 - lsb;
+
+        if (lsb == 0 && mask_width == 64) {
+          Emit(kArchNop, g.DefineSameAsFirst(node), g.Use(mleft.left().node()));
+        } else {
+          Emit(kMips64Dext, g.DefineAsRegister(node),
+               g.UseRegister(mleft.left().node()), g.TempImmediate(lsb),
+               g.TempImmediate(static_cast<int32_t>(mask_width)));
+        }
+        return;
+      }
+      // Other cases fall through to the normal And operation.
+    }
+  }
+  if (m.right().HasResolvedValue()) {
+    uint64_t mask = m.right().ResolvedValue();
+    uint32_t shift = base::bits::CountPopulation(~mask);
+    uint32_t msb = base::bits::CountLeadingZeros64(~mask);
+    if (shift != 0 && shift < 32 && msb + shift == 64) {
+      // Insert zeros for (x >> K) << K => x & ~(2^K - 1) expression reduction
+      // and remove constant loading of inverted mask. Dins cannot insert bits
+      // past word size, so shifts smaller than 32 are covered.
+      Emit(kMips64Dins, g.DefineSameAsFirst(node),
+           g.UseRegister(m.left().node()), g.TempImmediate(0),
+           g.TempImmediate(shift));
+      return;
+    }
+  }
+  VisitBinop(this, node, kMips64And, true, kMips64And);
+}
+
+void InstructionSelector::VisitWord32Or(Node* node) {
+  VisitBinop(this, node, kMips64Or32, true, kMips64Or32);
+}
+
+void InstructionSelector::VisitWord64Or(Node* node) {
+  VisitBinop(this, node, kMips64Or, true, kMips64Or);
+}
+
+void InstructionSelector::VisitWord32Xor(Node* node) {
+  Int32BinopMatcher m(node);
+  if (m.left().IsWord32Or() && CanCover(node, m.left().node()) &&
+      m.right().Is(-1)) {
+    Int32BinopMatcher mleft(m.left().node());
+    if (!mleft.right().HasResolvedValue()) {
+      Mips64OperandGenerator g(this);
+      Emit(kMips64Nor32, g.DefineAsRegister(node),
+           g.UseRegister(mleft.left().node()),
+           g.UseRegister(mleft.right().node()));
+      return;
+    }
+  }
+  if (m.right().Is(-1)) {
+    // Use Nor for bit negation and eliminate constant loading for xori.
+    Mips64OperandGenerator g(this);
+    Emit(kMips64Nor32, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+         g.TempImmediate(0));
+    return;
+  }
+  VisitBinop(this, node, kMips64Xor32, true, kMips64Xor32);
+}
+
+void InstructionSelector::VisitWord64Xor(Node* node) {
+  Int64BinopMatcher m(node);
+  if (m.left().IsWord64Or() && CanCover(node, m.left().node()) &&
+      m.right().Is(-1)) {
+    Int64BinopMatcher mleft(m.left().node());
+    if (!mleft.right().HasResolvedValue()) {
+      Mips64OperandGenerator g(this);
+      Emit(kMips64Nor, g.DefineAsRegister(node),
+           g.UseRegister(mleft.left().node()),
+           g.UseRegister(mleft.right().node()));
+      return;
+    }
+  }
+  if (m.right().Is(-1)) {
+    // Use Nor for bit negation and eliminate constant loading for xori.
+    Mips64OperandGenerator g(this);
+    Emit(kMips64Nor, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+         g.TempImmediate(0));
+    return;
+  }
+  VisitBinop(this, node, kMips64Xor, true, kMips64Xor);
+}
+
+void InstructionSelector::VisitWord32Shl(Node* node) {
+  Int32BinopMatcher m(node);
+  if (m.left().IsWord32And() && CanCover(node, m.left().node()) &&
+      m.right().IsInRange(1, 31)) {
+    Mips64OperandGenerator g(this);
+    Int32BinopMatcher mleft(m.left().node());
+    // Match Word32Shl(Word32And(x, mask), imm) to Shl where the mask is
+    // contiguous, and the shift immediate non-zero.
+    if (mleft.right().HasResolvedValue()) {
+      uint32_t mask = mleft.right().ResolvedValue();
+      uint32_t mask_width = base::bits::CountPopulation(mask);
+      uint32_t mask_msb = base::bits::CountLeadingZeros32(mask);
+      if ((mask_width != 0) && (mask_msb + mask_width == 32)) {
+        uint32_t shift = m.right().ResolvedValue();
+        DCHECK_EQ(0u, base::bits::CountTrailingZeros32(mask));
+        DCHECK_NE(0u, shift);
+        if ((shift + mask_width) >= 32) {
+          // If the mask is contiguous and reaches or extends beyond the top
+          // bit, only the shift is needed.
+          Emit(kMips64Shl, g.DefineAsRegister(node),
+               g.UseRegister(mleft.left().node()),
+               g.UseImmediate(m.right().node()));
+          return;
+        }
+      }
+    }
+  }
+  VisitRRO(this, kMips64Shl, node);
+}
+
+void InstructionSelector::VisitWord32Shr(Node* node) {
+  Int32BinopMatcher m(node);
+  if (m.left().IsWord32And() && m.right().HasResolvedValue()) {
+    uint32_t lsb = m.right().ResolvedValue() & 0x1F;
+    Int32BinopMatcher mleft(m.left().node());
+    if (mleft.right().HasResolvedValue() &&
+        mleft.right().ResolvedValue() != 0) {
+      // Select Ext for Shr(And(x, mask), imm) where the result of the mask is
+      // shifted into the least-significant bits.
+      uint32_t mask = (mleft.right().ResolvedValue() >> lsb) << lsb;
+      unsigned mask_width = base::bits::CountPopulation(mask);
+      unsigned mask_msb = base::bits::CountLeadingZeros32(mask);
+      if ((mask_msb + mask_width + lsb) == 32) {
+        Mips64OperandGenerator g(this);
+        DCHECK_EQ(lsb, base::bits::CountTrailingZeros32(mask));
+        Emit(kMips64Ext, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()), g.TempImmediate(lsb),
+             g.TempImmediate(mask_width));
+        return;
+      }
+    }
+  }
+  VisitRRO(this, kMips64Shr, node);
+}
+
+void InstructionSelector::VisitWord32Sar(Node* node) {
+  Int32BinopMatcher m(node);
+  if (m.left().IsWord32Shl() && CanCover(node, m.left().node())) {
+    Int32BinopMatcher mleft(m.left().node());
+    if (m.right().HasResolvedValue() && mleft.right().HasResolvedValue()) {
+      Mips64OperandGenerator g(this);
+      uint32_t sar = m.right().ResolvedValue();
+      uint32_t shl = mleft.right().ResolvedValue();
+      if ((sar == shl) && (sar == 16)) {
+        Emit(kMips64Seh, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()));
+        return;
+      } else if ((sar == shl) && (sar == 24)) {
+        Emit(kMips64Seb, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()));
+        return;
+      } else if ((sar == shl) && (sar == 32)) {
+        Emit(kMips64Shl, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()), g.TempImmediate(0));
+        return;
+      }
+    }
+  }
+  VisitRRO(this, kMips64Sar, node);
+}
+
+void InstructionSelector::VisitWord64Shl(Node* node) {
+  Mips64OperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  if ((m.left().IsChangeInt32ToInt64() || m.left().IsChangeUint32ToUint64()) &&
+      m.right().IsInRange(32, 63) && CanCover(node, m.left().node())) {
+    // There's no need to sign/zero-extend to 64-bit if we shift out the upper
+    // 32 bits anyway.
+    Emit(kMips64Dshl, g.DefineSameAsFirst(node),
+         g.UseRegister(m.left().node()->InputAt(0)),
+         g.UseImmediate(m.right().node()));
+    return;
+  }
+  if (m.left().IsWord64And() && CanCover(node, m.left().node()) &&
+      m.right().IsInRange(1, 63)) {
+    // Match Word64Shl(Word64And(x, mask), imm) to Dshl where the mask is
+    // contiguous, and the shift immediate non-zero.
+    Int64BinopMatcher mleft(m.left().node());
+    if (mleft.right().HasResolvedValue()) {
+      uint64_t mask = mleft.right().ResolvedValue();
+      uint32_t mask_width = base::bits::CountPopulation(mask);
+      uint32_t mask_msb = base::bits::CountLeadingZeros64(mask);
+      if ((mask_width != 0) && (mask_msb + mask_width == 64)) {
+        uint64_t shift = m.right().ResolvedValue();
+        DCHECK_EQ(0u, base::bits::CountTrailingZeros64(mask));
+        DCHECK_NE(0u, shift);
+
+        if ((shift + mask_width) >= 64) {
+          // If the mask is contiguous and reaches or extends beyond the top
+          // bit, only the shift is needed.
+          Emit(kMips64Dshl, g.DefineAsRegister(node),
+               g.UseRegister(mleft.left().node()),
+               g.UseImmediate(m.right().node()));
+          return;
+        }
+      }
+    }
+  }
+  VisitRRO(this, kMips64Dshl, node);
+}
+
+void InstructionSelector::VisitWord64Shr(Node* node) {
+  Int64BinopMatcher m(node);
+  if (m.left().IsWord64And() && m.right().HasResolvedValue()) {
+    uint32_t lsb = m.right().ResolvedValue() & 0x3F;
+    Int64BinopMatcher mleft(m.left().node());
+    if (mleft.right().HasResolvedValue() &&
+        mleft.right().ResolvedValue() != 0) {
+      // Select Dext for Shr(And(x, mask), imm) where the result of the mask is
+      // shifted into the least-significant bits.
+      uint64_t mask = (mleft.right().ResolvedValue() >> lsb) << lsb;
+      unsigned mask_width = base::bits::CountPopulation(mask);
+      unsigned mask_msb = base::bits::CountLeadingZeros64(mask);
+      if ((mask_msb + mask_width + lsb) == 64) {
+        Mips64OperandGenerator g(this);
+        DCHECK_EQ(lsb, base::bits::CountTrailingZeros64(mask));
+        Emit(kMips64Dext, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()), g.TempImmediate(lsb),
+             g.TempImmediate(mask_width));
+        return;
+      }
+    }
+  }
+  VisitRRO(this, kMips64Dshr, node);
+}
+
+void InstructionSelector::VisitWord64Sar(Node* node) {
+  if (TryEmitExtendingLoad(this, node, node)) return;
+  VisitRRO(this, kMips64Dsar, node);
+}
+
+void InstructionSelector::VisitWord32Rol(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord64Rol(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord32Ror(Node* node) {
+  VisitRRO(this, kMips64Ror, node);
+}
+
+void InstructionSelector::VisitWord32Clz(Node* node) {
+  VisitRR(this, kMips64Clz, node);
+}
+
+void InstructionSelector::VisitWord32ReverseBits(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord64ReverseBits(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord64ReverseBytes(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64ByteSwap64, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64ByteSwap32, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitWord32Ctz(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64Ctz, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitWord64Ctz(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64Dctz, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitWord32Popcnt(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64Popcnt, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitWord64Popcnt(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64Dpopcnt, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitWord64Ror(Node* node) {
+  VisitRRO(this, kMips64Dror, node);
+}
+
+void InstructionSelector::VisitWord64Clz(Node* node) {
+  VisitRR(this, kMips64Dclz, node);
+}
+
+void InstructionSelector::VisitInt32Add(Node* node) {
+  Mips64OperandGenerator g(this);
+  Int32BinopMatcher m(node);
+
+  if (kArchVariant == kMips64r6) {
+    // Select Lsa for (left + (left_of_right << imm)).
+    if (m.right().opcode() == IrOpcode::kWord32Shl &&
+        CanCover(node, m.left().node()) && CanCover(node, m.right().node())) {
+      Int32BinopMatcher mright(m.right().node());
+      if (mright.right().HasResolvedValue() && !m.left().HasResolvedValue()) {
+        int32_t shift_value =
+            static_cast<int32_t>(mright.right().ResolvedValue());
+        if (shift_value > 0 && shift_value <= 31) {
+          Emit(kMips64Lsa, g.DefineAsRegister(node),
+               g.UseRegister(m.left().node()),
+               g.UseRegister(mright.left().node()),
+               g.TempImmediate(shift_value));
+          return;
+        }
+      }
+    }
+
+    // Select Lsa for ((left_of_left << imm) + right).
+    if (m.left().opcode() == IrOpcode::kWord32Shl &&
+        CanCover(node, m.right().node()) && CanCover(node, m.left().node())) {
+      Int32BinopMatcher mleft(m.left().node());
+      if (mleft.right().HasResolvedValue() && !m.right().HasResolvedValue()) {
+        int32_t shift_value =
+            static_cast<int32_t>(mleft.right().ResolvedValue());
+        if (shift_value > 0 && shift_value <= 31) {
+          Emit(kMips64Lsa, g.DefineAsRegister(node),
+               g.UseRegister(m.right().node()),
+               g.UseRegister(mleft.left().node()),
+               g.TempImmediate(shift_value));
+          return;
+        }
+      }
+    }
+  }
+
+  VisitBinop(this, node, kMips64Add, true, kMips64Add);
+}
+
+void InstructionSelector::VisitInt64Add(Node* node) {
+  Mips64OperandGenerator g(this);
+  Int64BinopMatcher m(node);
+
+  if (kArchVariant == kMips64r6) {
+    // Select Dlsa for (left + (left_of_right << imm)).
+    if (m.right().opcode() == IrOpcode::kWord64Shl &&
+        CanCover(node, m.left().node()) && CanCover(node, m.right().node())) {
+      Int64BinopMatcher mright(m.right().node());
+      if (mright.right().HasResolvedValue() && !m.left().HasResolvedValue()) {
+        int32_t shift_value =
+            static_cast<int32_t>(mright.right().ResolvedValue());
+        if (shift_value > 0 && shift_value <= 31) {
+          Emit(kMips64Dlsa, g.DefineAsRegister(node),
+               g.UseRegister(m.left().node()),
+               g.UseRegister(mright.left().node()),
+               g.TempImmediate(shift_value));
+          return;
+        }
+      }
+    }
+
+    // Select Dlsa for ((left_of_left << imm) + right).
+    if (m.left().opcode() == IrOpcode::kWord64Shl &&
+        CanCover(node, m.right().node()) && CanCover(node, m.left().node())) {
+      Int64BinopMatcher mleft(m.left().node());
+      if (mleft.right().HasResolvedValue() && !m.right().HasResolvedValue()) {
+        int32_t shift_value =
+            static_cast<int32_t>(mleft.right().ResolvedValue());
+        if (shift_value > 0 && shift_value <= 31) {
+          Emit(kMips64Dlsa, g.DefineAsRegister(node),
+               g.UseRegister(m.right().node()),
+               g.UseRegister(mleft.left().node()),
+               g.TempImmediate(shift_value));
+          return;
+        }
+      }
+    }
+  }
+
+  VisitBinop(this, node, kMips64Dadd, true, kMips64Dadd);
+}
+
+void InstructionSelector::VisitInt32Sub(Node* node) {
+  VisitBinop(this, node, kMips64Sub);
+}
+
+void InstructionSelector::VisitInt64Sub(Node* node) {
+  VisitBinop(this, node, kMips64Dsub);
+}
+
+void InstructionSelector::VisitInt32Mul(Node* node) {
+  Mips64OperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (m.right().HasResolvedValue() && m.right().ResolvedValue() > 0) {
+    uint32_t value = static_cast<uint32_t>(m.right().ResolvedValue());
+    if (base::bits::IsPowerOfTwo(value)) {
+      Emit(kMips64Shl | AddressingModeField::encode(kMode_None),
+           g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+           g.TempImmediate(base::bits::WhichPowerOfTwo(value)));
+      return;
+    }
+    if (base::bits::IsPowerOfTwo(value - 1) && kArchVariant == kMips64r6 &&
+        value - 1 > 0 && value - 1 <= 31) {
+      Emit(kMips64Lsa, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+           g.UseRegister(m.left().node()),
+           g.TempImmediate(base::bits::WhichPowerOfTwo(value - 1)));
+      return;
+    }
+    if (base::bits::IsPowerOfTwo(value + 1)) {
+      InstructionOperand temp = g.TempRegister();
+      Emit(kMips64Shl | AddressingModeField::encode(kMode_None), temp,
+           g.UseRegister(m.left().node()),
+           g.TempImmediate(base::bits::WhichPowerOfTwo(value + 1)));
+      Emit(kMips64Sub | AddressingModeField::encode(kMode_None),
+           g.DefineAsRegister(node), temp, g.UseRegister(m.left().node()));
+      return;
+    }
+  }
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  if (CanCover(node, left) && CanCover(node, right)) {
+    if (left->opcode() == IrOpcode::kWord64Sar &&
+        right->opcode() == IrOpcode::kWord64Sar) {
+      Int64BinopMatcher leftInput(left), rightInput(right);
+      if (leftInput.right().Is(32) && rightInput.right().Is(32)) {
+        // Combine untagging shifts with Dmul high.
+        Emit(kMips64DMulHigh, g.DefineSameAsFirst(node),
+             g.UseRegister(leftInput.left().node()),
+             g.UseRegister(rightInput.left().node()));
+        return;
+      }
+    }
+  }
+  VisitRRR(this, kMips64Mul, node);
+}
+
+void InstructionSelector::VisitInt32MulHigh(Node* node) {
+  VisitRRR(this, kMips64MulHigh, node);
+}
+
+void InstructionSelector::VisitUint32MulHigh(Node* node) {
+  VisitRRR(this, kMips64MulHighU, node);
+}
+
+void InstructionSelector::VisitInt64Mul(Node* node) {
+  Mips64OperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  // TODO(dusmil): Add optimization for shifts larger than 32.
+  if (m.right().HasResolvedValue() && m.right().ResolvedValue() > 0) {
+    uint32_t value = static_cast<uint32_t>(m.right().ResolvedValue());
+    if (base::bits::IsPowerOfTwo(value)) {
+      Emit(kMips64Dshl | AddressingModeField::encode(kMode_None),
+           g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+           g.TempImmediate(base::bits::WhichPowerOfTwo(value)));
+      return;
+    }
+    if (base::bits::IsPowerOfTwo(value - 1) && kArchVariant == kMips64r6 &&
+        value - 1 > 0 && value - 1 <= 31) {
+      // Dlsa macro will handle the shifting value out of bound cases.
+      Emit(kMips64Dlsa, g.DefineAsRegister(node),
+           g.UseRegister(m.left().node()), g.UseRegister(m.left().node()),
+           g.TempImmediate(base::bits::WhichPowerOfTwo(value - 1)));
+      return;
+    }
+    if (base::bits::IsPowerOfTwo(value + 1)) {
+      InstructionOperand temp = g.TempRegister();
+      Emit(kMips64Dshl | AddressingModeField::encode(kMode_None), temp,
+           g.UseRegister(m.left().node()),
+           g.TempImmediate(base::bits::WhichPowerOfTwo(value + 1)));
+      Emit(kMips64Dsub | AddressingModeField::encode(kMode_None),
+           g.DefineAsRegister(node), temp, g.UseRegister(m.left().node()));
+      return;
+    }
+  }
+  Emit(kMips64Dmul, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+       g.UseRegister(m.right().node()));
+}
+
+void InstructionSelector::VisitInt32Div(Node* node) {
+  Mips64OperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  if (CanCover(node, left) && CanCover(node, right)) {
+    if (left->opcode() == IrOpcode::kWord64Sar &&
+        right->opcode() == IrOpcode::kWord64Sar) {
+      Int64BinopMatcher rightInput(right), leftInput(left);
+      if (rightInput.right().Is(32) && leftInput.right().Is(32)) {
+        // Combine both shifted operands with Ddiv.
+        Emit(kMips64Ddiv, g.DefineSameAsFirst(node),
+             g.UseRegister(leftInput.left().node()),
+             g.UseRegister(rightInput.left().node()));
+        return;
+      }
+    }
+  }
+  Emit(kMips64Div, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
+       g.UseRegister(m.right().node()));
+}
+
+void InstructionSelector::VisitUint32Div(Node* node) {
+  Mips64OperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  Emit(kMips64DivU, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
+       g.UseRegister(m.right().node()));
+}
+
+void InstructionSelector::VisitInt32Mod(Node* node) {
+  Mips64OperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  if (CanCover(node, left) && CanCover(node, right)) {
+    if (left->opcode() == IrOpcode::kWord64Sar &&
+        right->opcode() == IrOpcode::kWord64Sar) {
+      Int64BinopMatcher rightInput(right), leftInput(left);
+      if (rightInput.right().Is(32) && leftInput.right().Is(32)) {
+        // Combine both shifted operands with Dmod.
+        Emit(kMips64Dmod, g.DefineSameAsFirst(node),
+             g.UseRegister(leftInput.left().node()),
+             g.UseRegister(rightInput.left().node()));
+        return;
+      }
+    }
+  }
+  Emit(kMips64Mod, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+       g.UseRegister(m.right().node()));
+}
+
+void InstructionSelector::VisitUint32Mod(Node* node) {
+  Mips64OperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  Emit(kMips64ModU, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+       g.UseRegister(m.right().node()));
+}
+
+void InstructionSelector::VisitInt64Div(Node* node) {
+  Mips64OperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  Emit(kMips64Ddiv, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
+       g.UseRegister(m.right().node()));
+}
+
+void InstructionSelector::VisitUint64Div(Node* node) {
+  Mips64OperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  Emit(kMips64DdivU, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()),
+       g.UseRegister(m.right().node()));
+}
+
+void InstructionSelector::VisitInt64Mod(Node* node) {
+  Mips64OperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  Emit(kMips64Dmod, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+       g.UseRegister(m.right().node()));
+}
+
+void InstructionSelector::VisitUint64Mod(Node* node) {
+  Mips64OperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  Emit(kMips64DmodU, g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+       g.UseRegister(m.right().node()));
+}
+
+void InstructionSelector::VisitChangeFloat32ToFloat64(Node* node) {
+  VisitRR(this, kMips64CvtDS, node);
+}
+
+void InstructionSelector::VisitRoundInt32ToFloat32(Node* node) {
+  VisitRR(this, kMips64CvtSW, node);
+}
+
+void InstructionSelector::VisitRoundUint32ToFloat32(Node* node) {
+  VisitRR(this, kMips64CvtSUw, node);
+}
+
+void InstructionSelector::VisitChangeInt32ToFloat64(Node* node) {
+  VisitRR(this, kMips64CvtDW, node);
+}
+
+void InstructionSelector::VisitChangeInt64ToFloat64(Node* node) {
+  VisitRR(this, kMips64CvtDL, node);
+}
+
+void InstructionSelector::VisitChangeUint32ToFloat64(Node* node) {
+  VisitRR(this, kMips64CvtDUw, node);
+}
+
+void InstructionSelector::VisitTruncateFloat32ToInt32(Node* node) {
+  VisitRR(this, kMips64TruncWS, node);
+}
+
+void InstructionSelector::VisitTruncateFloat32ToUint32(Node* node) {
+  VisitRR(this, kMips64TruncUwS, node);
+}
+
+void InstructionSelector::VisitChangeFloat64ToInt32(Node* node) {
+  Mips64OperandGenerator g(this);
+  Node* value = node->InputAt(0);
+  // Match ChangeFloat64ToInt32(Float64Round##OP) to corresponding instruction
+  // which does rounding and conversion to integer format.
+  if (CanCover(node, value)) {
+    switch (value->opcode()) {
+      case IrOpcode::kFloat64RoundDown:
+        Emit(kMips64FloorWD, g.DefineAsRegister(node),
+             g.UseRegister(value->InputAt(0)));
+        return;
+      case IrOpcode::kFloat64RoundUp:
+        Emit(kMips64CeilWD, g.DefineAsRegister(node),
+             g.UseRegister(value->InputAt(0)));
+        return;
+      case IrOpcode::kFloat64RoundTiesEven:
+        Emit(kMips64RoundWD, g.DefineAsRegister(node),
+             g.UseRegister(value->InputAt(0)));
+        return;
+      case IrOpcode::kFloat64RoundTruncate:
+        Emit(kMips64TruncWD, g.DefineAsRegister(node),
+             g.UseRegister(value->InputAt(0)));
+        return;
+      default:
+        break;
+    }
+    if (value->opcode() == IrOpcode::kChangeFloat32ToFloat64) {
+      Node* next = value->InputAt(0);
+      if (CanCover(value, next)) {
+        // Match ChangeFloat64ToInt32(ChangeFloat32ToFloat64(Float64Round##OP))
+        switch (next->opcode()) {
+          case IrOpcode::kFloat32RoundDown:
+            Emit(kMips64FloorWS, g.DefineAsRegister(node),
+                 g.UseRegister(next->InputAt(0)));
+            return;
+          case IrOpcode::kFloat32RoundUp:
+            Emit(kMips64CeilWS, g.DefineAsRegister(node),
+                 g.UseRegister(next->InputAt(0)));
+            return;
+          case IrOpcode::kFloat32RoundTiesEven:
+            Emit(kMips64RoundWS, g.DefineAsRegister(node),
+                 g.UseRegister(next->InputAt(0)));
+            return;
+          case IrOpcode::kFloat32RoundTruncate:
+            Emit(kMips64TruncWS, g.DefineAsRegister(node),
+                 g.UseRegister(next->InputAt(0)));
+            return;
+          default:
+            Emit(kMips64TruncWS, g.DefineAsRegister(node),
+                 g.UseRegister(value->InputAt(0)));
+            return;
+        }
+      } else {
+        // Match float32 -> float64 -> int32 representation change path.
+        Emit(kMips64TruncWS, g.DefineAsRegister(node),
+             g.UseRegister(value->InputAt(0)));
+        return;
+      }
+    }
+  }
+  VisitRR(this, kMips64TruncWD, node);
+}
+
+void InstructionSelector::VisitChangeFloat64ToInt64(Node* node) {
+  VisitRR(this, kMips64TruncLD, node);
+}
+
+void InstructionSelector::VisitChangeFloat64ToUint32(Node* node) {
+  VisitRR(this, kMips64TruncUwD, node);
+}
+
+void InstructionSelector::VisitChangeFloat64ToUint64(Node* node) {
+  VisitRR(this, kMips64TruncUlD, node);
+}
+
+void InstructionSelector::VisitTruncateFloat64ToUint32(Node* node) {
+  VisitRR(this, kMips64TruncUwD, node);
+}
+
+void InstructionSelector::VisitTruncateFloat64ToInt64(Node* node) {
+  VisitRR(this, kMips64TruncLD, node);
+}
+
+void InstructionSelector::VisitTryTruncateFloat32ToInt64(Node* node) {
+  Mips64OperandGenerator g(this);
+  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  Node* success_output = NodeProperties::FindProjection(node, 1);
+  if (success_output) {
+    outputs[output_count++] = g.DefineAsRegister(success_output);
+  }
+
+  this->Emit(kMips64TruncLS, output_count, outputs, 1, inputs);
+}
+
+void InstructionSelector::VisitTryTruncateFloat64ToInt64(Node* node) {
+  Mips64OperandGenerator g(this);
+  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  Node* success_output = NodeProperties::FindProjection(node, 1);
+  if (success_output) {
+    outputs[output_count++] = g.DefineAsRegister(success_output);
+  }
+
+  Emit(kMips64TruncLD, output_count, outputs, 1, inputs);
+}
+
+void InstructionSelector::VisitTryTruncateFloat32ToUint64(Node* node) {
+  Mips64OperandGenerator g(this);
+  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  Node* success_output = NodeProperties::FindProjection(node, 1);
+  if (success_output) {
+    outputs[output_count++] = g.DefineAsRegister(success_output);
+  }
+
+  Emit(kMips64TruncUlS, output_count, outputs, 1, inputs);
+}
+
+void InstructionSelector::VisitTryTruncateFloat64ToUint64(Node* node) {
+  Mips64OperandGenerator g(this);
+
+  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  Node* success_output = NodeProperties::FindProjection(node, 1);
+  if (success_output) {
+    outputs[output_count++] = g.DefineAsRegister(success_output);
+  }
+
+  Emit(kMips64TruncUlD, output_count, outputs, 1, inputs);
+}
+
+void InstructionSelector::VisitBitcastWord32ToWord64(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitChangeInt32ToInt64(Node* node) {
+  Node* value = node->InputAt(0);
+  if (value->opcode() == IrOpcode::kLoad && CanCover(node, value)) {
+    // Generate sign-extending load.
+    LoadRepresentation load_rep = LoadRepresentationOf(value->op());
+    InstructionCode opcode = kArchNop;
+    switch (load_rep.representation()) {
+      case MachineRepresentation::kBit:  // Fall through.
+      case MachineRepresentation::kWord8:
+        opcode = load_rep.IsUnsigned() ? kMips64Lbu : kMips64Lb;
+        break;
+      case MachineRepresentation::kWord16:
+        opcode = load_rep.IsUnsigned() ? kMips64Lhu : kMips64Lh;
+        break;
+      case MachineRepresentation::kWord32:
+        opcode = kMips64Lw;
+        break;
+      default:
+        UNREACHABLE();
+    }
+    EmitLoad(this, value, opcode, node);
+  } else {
+    Mips64OperandGenerator g(this);
+    Emit(kMips64Shl, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)),
+         g.TempImmediate(0));
+  }
+}
+
+bool InstructionSelector::ZeroExtendsWord32ToWord64NoPhis(Node* node) {
+  DCHECK_NE(node->opcode(), IrOpcode::kPhi);
+  switch (node->opcode()) {
+    // 32-bit operations will write their result in a 64 bit register,
+    // clearing the top 32 bits of the destination register.
+    case IrOpcode::kUint32Div:
+    case IrOpcode::kUint32Mod:
+    case IrOpcode::kUint32MulHigh:
+      return true;
+    case IrOpcode::kLoad: {
+      LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+      if (load_rep.IsUnsigned()) {
+        switch (load_rep.representation()) {
+          case MachineRepresentation::kWord8:
+          case MachineRepresentation::kWord16:
+          case MachineRepresentation::kWord32:
+            return true;
+          default:
+            return false;
+        }
+      }
+      return false;
+    }
+    default:
+      return false;
+  }
+}
+
+void InstructionSelector::VisitChangeUint32ToUint64(Node* node) {
+  Mips64OperandGenerator g(this);
+  Node* value = node->InputAt(0);
+  if (ZeroExtendsWord32ToWord64(value)) {
+    Emit(kArchNop, g.DefineSameAsFirst(node), g.Use(value));
+    return;
+  }
+  Emit(kMips64Dext, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)),
+       g.TempImmediate(0), g.TempImmediate(32));
+}
+
+void InstructionSelector::VisitTruncateInt64ToInt32(Node* node) {
+  Mips64OperandGenerator g(this);
+  Node* value = node->InputAt(0);
+  if (CanCover(node, value)) {
+    switch (value->opcode()) {
+      case IrOpcode::kWord64Sar: {
+        if (CanCoverTransitively(node, value, value->InputAt(0)) &&
+            TryEmitExtendingLoad(this, value, node)) {
+          return;
+        } else {
+          Int64BinopMatcher m(value);
+          if (m.right().IsInRange(32, 63)) {
+            // After smi untagging no need for truncate. Combine sequence.
+            Emit(kMips64Dsar, g.DefineSameAsFirst(node),
+                 g.UseRegister(m.left().node()),
+                 g.UseImmediate(m.right().node()));
+            return;
+          }
+        }
+        break;
+      }
+      default:
+        break;
+    }
+  }
+  Emit(kMips64Ext, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)),
+       g.TempImmediate(0), g.TempImmediate(32));
+}
+
+void InstructionSelector::VisitTruncateFloat64ToFloat32(Node* node) {
+  Mips64OperandGenerator g(this);
+  Node* value = node->InputAt(0);
+  // Match TruncateFloat64ToFloat32(ChangeInt32ToFloat64) to corresponding
+  // instruction.
+  if (CanCover(node, value) &&
+      value->opcode() == IrOpcode::kChangeInt32ToFloat64) {
+    Emit(kMips64CvtSW, g.DefineAsRegister(node),
+         g.UseRegister(value->InputAt(0)));
+    return;
+  }
+  VisitRR(this, kMips64CvtSD, node);
+}
+
+void InstructionSelector::VisitTruncateFloat64ToWord32(Node* node) {
+  VisitRR(this, kArchTruncateDoubleToI, node);
+}
+
+void InstructionSelector::VisitRoundFloat64ToInt32(Node* node) {
+  VisitRR(this, kMips64TruncWD, node);
+}
+
+void InstructionSelector::VisitRoundInt64ToFloat32(Node* node) {
+  VisitRR(this, kMips64CvtSL, node);
+}
+
+void InstructionSelector::VisitRoundInt64ToFloat64(Node* node) {
+  VisitRR(this, kMips64CvtDL, node);
+}
+
+void InstructionSelector::VisitRoundUint64ToFloat32(Node* node) {
+  VisitRR(this, kMips64CvtSUl, node);
+}
+
+void InstructionSelector::VisitRoundUint64ToFloat64(Node* node) {
+  VisitRR(this, kMips64CvtDUl, node);
+}
+
+void InstructionSelector::VisitBitcastFloat32ToInt32(Node* node) {
+  VisitRR(this, kMips64Float64ExtractLowWord32, node);
+}
+
+void InstructionSelector::VisitBitcastFloat64ToInt64(Node* node) {
+  VisitRR(this, kMips64BitcastDL, node);
+}
+
+void InstructionSelector::VisitBitcastInt32ToFloat32(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64Float64InsertLowWord32, g.DefineAsRegister(node),
+       ImmediateOperand(ImmediateOperand::INLINE, 0),
+       g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitBitcastInt64ToFloat64(Node* node) {
+  VisitRR(this, kMips64BitcastLD, node);
+}
+
+void InstructionSelector::VisitFloat32Add(Node* node) {
+  // Optimization with Madd.S(z, x, y) is intentionally removed.
+  // See explanation for madd_s in assembler-mips64.cc.
+  VisitRRR(this, kMips64AddS, node);
+}
+
+void InstructionSelector::VisitFloat64Add(Node* node) {
+  // Optimization with Madd.D(z, x, y) is intentionally removed.
+  // See explanation for madd_d in assembler-mips64.cc.
+  VisitRRR(this, kMips64AddD, node);
+}
+
+void InstructionSelector::VisitFloat32Sub(Node* node) {
+  // Optimization with Msub.S(z, x, y) is intentionally removed.
+  // See explanation for madd_s in assembler-mips64.cc.
+  VisitRRR(this, kMips64SubS, node);
+}
+
+void InstructionSelector::VisitFloat64Sub(Node* node) {
+  // Optimization with Msub.D(z, x, y) is intentionally removed.
+  // See explanation for madd_d in assembler-mips64.cc.
+  VisitRRR(this, kMips64SubD, node);
+}
+
+void InstructionSelector::VisitFloat32Mul(Node* node) {
+  VisitRRR(this, kMips64MulS, node);
+}
+
+void InstructionSelector::VisitFloat64Mul(Node* node) {
+  VisitRRR(this, kMips64MulD, node);
+}
+
+void InstructionSelector::VisitFloat32Div(Node* node) {
+  VisitRRR(this, kMips64DivS, node);
+}
+
+void InstructionSelector::VisitFloat64Div(Node* node) {
+  VisitRRR(this, kMips64DivD, node);
+}
+
+void InstructionSelector::VisitFloat64Mod(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64ModD, g.DefineAsFixed(node, f0),
+       g.UseFixed(node->InputAt(0), f12), g.UseFixed(node->InputAt(1), f14))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::VisitFloat32Max(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64Float32Max, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
+}
+
+void InstructionSelector::VisitFloat64Max(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64Float64Max, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
+}
+
+void InstructionSelector::VisitFloat32Min(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64Float32Min, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
+}
+
+void InstructionSelector::VisitFloat64Min(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64Float64Min, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
+}
+
+void InstructionSelector::VisitFloat32Abs(Node* node) {
+  VisitRR(this, kMips64AbsS, node);
+}
+
+void InstructionSelector::VisitFloat64Abs(Node* node) {
+  VisitRR(this, kMips64AbsD, node);
+}
+
+void InstructionSelector::VisitFloat32Sqrt(Node* node) {
+  VisitRR(this, kMips64SqrtS, node);
+}
+
+void InstructionSelector::VisitFloat64Sqrt(Node* node) {
+  VisitRR(this, kMips64SqrtD, node);
+}
+
+void InstructionSelector::VisitFloat32RoundDown(Node* node) {
+  VisitRR(this, kMips64Float32RoundDown, node);
+}
+
+void InstructionSelector::VisitFloat64RoundDown(Node* node) {
+  VisitRR(this, kMips64Float64RoundDown, node);
+}
+
+void InstructionSelector::VisitFloat32RoundUp(Node* node) {
+  VisitRR(this, kMips64Float32RoundUp, node);
+}
+
+void InstructionSelector::VisitFloat64RoundUp(Node* node) {
+  VisitRR(this, kMips64Float64RoundUp, node);
+}
+
+void InstructionSelector::VisitFloat32RoundTruncate(Node* node) {
+  VisitRR(this, kMips64Float32RoundTruncate, node);
+}
+
+void InstructionSelector::VisitFloat64RoundTruncate(Node* node) {
+  VisitRR(this, kMips64Float64RoundTruncate, node);
+}
+
+void InstructionSelector::VisitFloat64RoundTiesAway(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitFloat32RoundTiesEven(Node* node) {
+  VisitRR(this, kMips64Float32RoundTiesEven, node);
+}
+
+void InstructionSelector::VisitFloat64RoundTiesEven(Node* node) {
+  VisitRR(this, kMips64Float64RoundTiesEven, node);
+}
+
+void InstructionSelector::VisitFloat32Neg(Node* node) {
+  VisitRR(this, kMips64NegS, node);
+}
+
+void InstructionSelector::VisitFloat64Neg(Node* node) {
+  VisitRR(this, kMips64NegD, node);
+}
+
+void InstructionSelector::VisitFloat64Ieee754Binop(Node* node,
+                                                   InstructionCode opcode) {
+  Mips64OperandGenerator g(this);
+  Emit(opcode, g.DefineAsFixed(node, f0), g.UseFixed(node->InputAt(0), f2),
+       g.UseFixed(node->InputAt(1), f4))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::VisitFloat64Ieee754Unop(Node* node,
+                                                  InstructionCode opcode) {
+  Mips64OperandGenerator g(this);
+  Emit(opcode, g.DefineAsFixed(node, f0), g.UseFixed(node->InputAt(0), f12))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::EmitPrepareArguments(
+    ZoneVector<PushParameter>* arguments, const CallDescriptor* call_descriptor,
+    Node* node) {
+  Mips64OperandGenerator g(this);
+
+  // Prepare for C function call.
+  if (call_descriptor->IsCFunctionCall()) {
+    Emit(kArchPrepareCallCFunction | MiscField::encode(static_cast<int>(
+                                         call_descriptor->ParameterCount())),
+         0, nullptr, 0, nullptr);
+
+    // Poke any stack arguments.
+    int slot = kCArgSlotCount;
+    for (PushParameter input : (*arguments)) {
+      Emit(kMips64StoreToStackSlot, g.NoOutput(), g.UseRegister(input.node),
+           g.TempImmediate(slot << kSystemPointerSizeLog2));
+      ++slot;
+    }
+  } else {
+    int push_count = static_cast<int>(call_descriptor->StackParameterCount());
+    if (push_count > 0) {
+      // Calculate needed space
+      int stack_size = 0;
+      for (PushParameter input : (*arguments)) {
+        if (input.node) {
+          stack_size += input.location.GetSizeInPointers();
+        }
+      }
+      Emit(kMips64StackClaim, g.NoOutput(),
+           g.TempImmediate(stack_size << kSystemPointerSizeLog2));
+    }
+    for (size_t n = 0; n < arguments->size(); ++n) {
+      PushParameter input = (*arguments)[n];
+      if (input.node) {
+        Emit(kMips64StoreToStackSlot, g.NoOutput(), g.UseRegister(input.node),
+             g.TempImmediate(static_cast<int>(n << kSystemPointerSizeLog2)));
+      }
+    }
+  }
+}
+
+void InstructionSelector::EmitPrepareResults(
+    ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
+    Node* node) {
+  Mips64OperandGenerator g(this);
+
+  int reverse_slot = 1;
+  for (PushParameter output : *results) {
+    if (!output.location.IsCallerFrameSlot()) continue;
+    // Skip any alignment holes in nodes.
+    if (output.node != nullptr) {
+      DCHECK(!call_descriptor->IsCFunctionCall());
+      if (output.location.GetType() == MachineType::Float32()) {
+        MarkAsFloat32(output.node);
+      } else if (output.location.GetType() == MachineType::Float64()) {
+        MarkAsFloat64(output.node);
+      } else if (output.location.GetType() == MachineType::Simd128()) {
+        MarkAsSimd128(output.node);
+      }
+      Emit(kMips64Peek, g.DefineAsRegister(output.node),
+           g.UseImmediate(reverse_slot));
+    }
+    reverse_slot += output.location.GetSizeInPointers();
+  }
+}
+
+bool InstructionSelector::IsTailCallAddressImmediate() { return false; }
+
+int InstructionSelector::GetTempsCountForTailCallFromJSFunction() { return 3; }
+
+void InstructionSelector::VisitUnalignedLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  Mips64OperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+
+  ArchOpcode opcode;
+  switch (load_rep.representation()) {
+    case MachineRepresentation::kFloat32:
+      opcode = kMips64Ulwc1;
+      break;
+    case MachineRepresentation::kFloat64:
+      opcode = kMips64Uldc1;
+      break;
+    case MachineRepresentation::kWord8:
+      opcode = load_rep.IsUnsigned() ? kMips64Lbu : kMips64Lb;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = load_rep.IsUnsigned() ? kMips64Ulhu : kMips64Ulh;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = load_rep.IsUnsigned() ? kMips64Ulwu : kMips64Ulw;
+      break;
+    case MachineRepresentation::kTaggedSigned:   // Fall through.
+    case MachineRepresentation::kTaggedPointer:  // Fall through.
+    case MachineRepresentation::kTagged:         // Fall through.
+    case MachineRepresentation::kWord64:
+      opcode = kMips64Uld;
+      break;
+    case MachineRepresentation::kSimd128:
+      opcode = kMips64MsaLd;
+      break;
+    case MachineRepresentation::kBit:                // Fall through.
+    case MachineRepresentation::kCompressedPointer:  // Fall through.
+    case MachineRepresentation::kCompressed:         // Fall through.
+    case MachineRepresentation::kNone:
+      UNREACHABLE();
+  }
+
+  if (g.CanBeImmediate(index, opcode)) {
+    Emit(opcode | AddressingModeField::encode(kMode_MRI),
+         g.DefineAsRegister(node), g.UseRegister(base), g.UseImmediate(index));
+  } else {
+    InstructionOperand addr_reg = g.TempRegister();
+    Emit(kMips64Dadd | AddressingModeField::encode(kMode_None), addr_reg,
+         g.UseRegister(index), g.UseRegister(base));
+    // Emit desired load opcode, using temp addr_reg.
+    Emit(opcode | AddressingModeField::encode(kMode_MRI),
+         g.DefineAsRegister(node), addr_reg, g.TempImmediate(0));
+  }
+}
+
+void InstructionSelector::VisitUnalignedStore(Node* node) {
+  Mips64OperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+
+  UnalignedStoreRepresentation rep = UnalignedStoreRepresentationOf(node->op());
+  ArchOpcode opcode;
+  switch (rep) {
+    case MachineRepresentation::kFloat32:
+      opcode = kMips64Uswc1;
+      break;
+    case MachineRepresentation::kFloat64:
+      opcode = kMips64Usdc1;
+      break;
+    case MachineRepresentation::kWord8:
+      opcode = kMips64Sb;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = kMips64Ush;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kMips64Usw;
+      break;
+    case MachineRepresentation::kTaggedSigned:   // Fall through.
+    case MachineRepresentation::kTaggedPointer:  // Fall through.
+    case MachineRepresentation::kTagged:         // Fall through.
+    case MachineRepresentation::kWord64:
+      opcode = kMips64Usd;
+      break;
+    case MachineRepresentation::kSimd128:
+      opcode = kMips64MsaSt;
+      break;
+    case MachineRepresentation::kBit:                // Fall through.
+    case MachineRepresentation::kCompressedPointer:  // Fall through.
+    case MachineRepresentation::kCompressed:         // Fall through.
+    case MachineRepresentation::kNone:
+      UNREACHABLE();
+  }
+
+  if (g.CanBeImmediate(index, opcode)) {
+    Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(),
+         g.UseRegister(base), g.UseImmediate(index),
+         g.UseRegisterOrImmediateZero(value));
+  } else {
+    InstructionOperand addr_reg = g.TempRegister();
+    Emit(kMips64Dadd | AddressingModeField::encode(kMode_None), addr_reg,
+         g.UseRegister(index), g.UseRegister(base));
+    // Emit desired store opcode, using temp addr_reg.
+    Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(),
+         addr_reg, g.TempImmediate(0), g.UseRegisterOrImmediateZero(value));
+  }
+}
+
+namespace {
+
+// Shared routine for multiple compare operations.
+static void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
+                         InstructionOperand left, InstructionOperand right,
+                         FlagsContinuation* cont) {
+  selector->EmitWithContinuation(opcode, left, right, cont);
+}
+
+// Shared routine for multiple float32 compare operations.
+void VisitFloat32Compare(InstructionSelector* selector, Node* node,
+                         FlagsContinuation* cont) {
+  Mips64OperandGenerator g(selector);
+  Float32BinopMatcher m(node);
+  InstructionOperand lhs, rhs;
+
+  lhs = m.left().IsZero() ? g.UseImmediate(m.left().node())
+                          : g.UseRegister(m.left().node());
+  rhs = m.right().IsZero() ? g.UseImmediate(m.right().node())
+                           : g.UseRegister(m.right().node());
+  VisitCompare(selector, kMips64CmpS, lhs, rhs, cont);
+}
+
+// Shared routine for multiple float64 compare operations.
+void VisitFloat64Compare(InstructionSelector* selector, Node* node,
+                         FlagsContinuation* cont) {
+  Mips64OperandGenerator g(selector);
+  Float64BinopMatcher m(node);
+  InstructionOperand lhs, rhs;
+
+  lhs = m.left().IsZero() ? g.UseImmediate(m.left().node())
+                          : g.UseRegister(m.left().node());
+  rhs = m.right().IsZero() ? g.UseImmediate(m.right().node())
+                           : g.UseRegister(m.right().node());
+  VisitCompare(selector, kMips64CmpD, lhs, rhs, cont);
+}
+
+// Shared routine for multiple word compare operations.
+void VisitWordCompare(InstructionSelector* selector, Node* node,
+                      InstructionCode opcode, FlagsContinuation* cont,
+                      bool commutative) {
+  Mips64OperandGenerator g(selector);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+
+  // Match immediates on left or right side of comparison.
+  if (g.CanBeImmediate(right, opcode)) {
+    if (opcode == kMips64Tst) {
+      VisitCompare(selector, opcode, g.UseRegister(left), g.UseImmediate(right),
+                   cont);
+    } else {
+      switch (cont->condition()) {
+        case kEqual:
+        case kNotEqual:
+          if (cont->IsSet()) {
+            VisitCompare(selector, opcode, g.UseRegister(left),
+                         g.UseImmediate(right), cont);
+          } else {
+            VisitCompare(selector, opcode, g.UseRegister(left),
+                         g.UseRegister(right), cont);
+          }
+          break;
+        case kSignedLessThan:
+        case kSignedGreaterThanOrEqual:
+        case kUnsignedLessThan:
+        case kUnsignedGreaterThanOrEqual:
+          VisitCompare(selector, opcode, g.UseRegister(left),
+                       g.UseImmediate(right), cont);
+          break;
+        default:
+          VisitCompare(selector, opcode, g.UseRegister(left),
+                       g.UseRegister(right), cont);
+      }
+    }
+  } else if (g.CanBeImmediate(left, opcode)) {
+    if (!commutative) cont->Commute();
+    if (opcode == kMips64Tst) {
+      VisitCompare(selector, opcode, g.UseRegister(right), g.UseImmediate(left),
+                   cont);
+    } else {
+      switch (cont->condition()) {
+        case kEqual:
+        case kNotEqual:
+          if (cont->IsSet()) {
+            VisitCompare(selector, opcode, g.UseRegister(right),
+                         g.UseImmediate(left), cont);
+          } else {
+            VisitCompare(selector, opcode, g.UseRegister(right),
+                         g.UseRegister(left), cont);
+          }
+          break;
+        case kSignedLessThan:
+        case kSignedGreaterThanOrEqual:
+        case kUnsignedLessThan:
+        case kUnsignedGreaterThanOrEqual:
+          VisitCompare(selector, opcode, g.UseRegister(right),
+                       g.UseImmediate(left), cont);
+          break;
+        default:
+          VisitCompare(selector, opcode, g.UseRegister(right),
+                       g.UseRegister(left), cont);
+      }
+    }
+  } else {
+    VisitCompare(selector, opcode, g.UseRegister(left), g.UseRegister(right),
+                 cont);
+  }
+}
+
+bool IsNodeUnsigned(Node* n) {
+  NodeMatcher m(n);
+
+  if (m.IsLoad() || m.IsUnalignedLoad() || m.IsPoisonedLoad() ||
+      m.IsProtectedLoad() || m.IsWord32AtomicLoad() || m.IsWord64AtomicLoad()) {
+    LoadRepresentation load_rep = LoadRepresentationOf(n->op());
+    return load_rep.IsUnsigned();
+  } else {
+    return m.IsUint32Div() || m.IsUint32LessThan() ||
+           m.IsUint32LessThanOrEqual() || m.IsUint32Mod() ||
+           m.IsUint32MulHigh() || m.IsChangeFloat64ToUint32() ||
+           m.IsTruncateFloat64ToUint32() || m.IsTruncateFloat32ToUint32();
+  }
+}
+
+// Shared routine for multiple word compare operations.
+void VisitFullWord32Compare(InstructionSelector* selector, Node* node,
+                            InstructionCode opcode, FlagsContinuation* cont) {
+  Mips64OperandGenerator g(selector);
+  InstructionOperand leftOp = g.TempRegister();
+  InstructionOperand rightOp = g.TempRegister();
+
+  selector->Emit(kMips64Dshl, leftOp, g.UseRegister(node->InputAt(0)),
+                 g.TempImmediate(32));
+  selector->Emit(kMips64Dshl, rightOp, g.UseRegister(node->InputAt(1)),
+                 g.TempImmediate(32));
+
+  VisitCompare(selector, opcode, leftOp, rightOp, cont);
+}
+
+void VisitOptimizedWord32Compare(InstructionSelector* selector, Node* node,
+                                 InstructionCode opcode,
+                                 FlagsContinuation* cont) {
+  if (FLAG_debug_code) {
+    Mips64OperandGenerator g(selector);
+    InstructionOperand leftOp = g.TempRegister();
+    InstructionOperand rightOp = g.TempRegister();
+    InstructionOperand optimizedResult = g.TempRegister();
+    InstructionOperand fullResult = g.TempRegister();
+    FlagsCondition condition = cont->condition();
+    InstructionCode testOpcode = opcode |
+                                 FlagsConditionField::encode(condition) |
+                                 FlagsModeField::encode(kFlags_set);
+
+    selector->Emit(testOpcode, optimizedResult, g.UseRegister(node->InputAt(0)),
+                   g.UseRegister(node->InputAt(1)));
+
+    selector->Emit(kMips64Dshl, leftOp, g.UseRegister(node->InputAt(0)),
+                   g.TempImmediate(32));
+    selector->Emit(kMips64Dshl, rightOp, g.UseRegister(node->InputAt(1)),
+                   g.TempImmediate(32));
+    selector->Emit(testOpcode, fullResult, leftOp, rightOp);
+
+    selector->Emit(
+        kMips64AssertEqual, g.NoOutput(), optimizedResult, fullResult,
+        g.TempImmediate(
+            static_cast<int>(AbortReason::kUnsupportedNonPrimitiveCompare)));
+  }
+
+  VisitWordCompare(selector, node, opcode, cont, false);
+}
+
+void VisitWord32Compare(InstructionSelector* selector, Node* node,
+                        FlagsContinuation* cont) {
+  // MIPS64 doesn't support Word32 compare instructions. Instead it relies
+  // that the values in registers are correctly sign-extended and uses
+  // Word64 comparison instead. This behavior is correct in most cases,
+  // but doesn't work when comparing signed with unsigned operands.
+  // We could simulate full Word32 compare in all cases but this would
+  // create an unnecessary overhead since unsigned integers are rarely
+  // used in JavaScript.
+  // The solution proposed here tries to match a comparison of signed
+  // with unsigned operand, and perform full Word32Compare only
+  // in those cases. Unfortunately, the solution is not complete because
+  // it might skip cases where Word32 full compare is needed, so
+  // basically it is a hack.
+  // When call to a host function in simulator, if the function return a
+  // int32 value, the simulator do not sign-extended to int64 because in
+  // simulator we do not know the function whether return a int32 or int64.
+  // so we need do a full word32 compare in this case.
+#ifndef USE_SIMULATOR
+  if (IsNodeUnsigned(node->InputAt(0)) != IsNodeUnsigned(node->InputAt(1))) {
+#else
+  if (IsNodeUnsigned(node->InputAt(0)) != IsNodeUnsigned(node->InputAt(1)) ||
+      node->InputAt(0)->opcode() == IrOpcode::kCall ||
+      node->InputAt(1)->opcode() == IrOpcode::kCall ) {
+#endif
+    VisitFullWord32Compare(selector, node, kMips64Cmp, cont);
+  } else {
+    VisitOptimizedWord32Compare(selector, node, kMips64Cmp, cont);
+  }
+}
+
+void VisitWord64Compare(InstructionSelector* selector, Node* node,
+                        FlagsContinuation* cont) {
+  VisitWordCompare(selector, node, kMips64Cmp, cont, false);
+}
+
+void EmitWordCompareZero(InstructionSelector* selector, Node* value,
+                         FlagsContinuation* cont) {
+  Mips64OperandGenerator g(selector);
+  selector->EmitWithContinuation(kMips64Cmp, g.UseRegister(value),
+                                 g.TempImmediate(0), cont);
+}
+
+void VisitAtomicLoad(InstructionSelector* selector, Node* node,
+                     ArchOpcode opcode) {
+  Mips64OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  if (g.CanBeImmediate(index, opcode)) {
+    selector->Emit(opcode | AddressingModeField::encode(kMode_MRI),
+                   g.DefineAsRegister(node), g.UseRegister(base),
+                   g.UseImmediate(index));
+  } else {
+    InstructionOperand addr_reg = g.TempRegister();
+    selector->Emit(kMips64Dadd | AddressingModeField::encode(kMode_None),
+                   addr_reg, g.UseRegister(index), g.UseRegister(base));
+    // Emit desired load opcode, using temp addr_reg.
+    selector->Emit(opcode | AddressingModeField::encode(kMode_MRI),
+                   g.DefineAsRegister(node), addr_reg, g.TempImmediate(0));
+  }
+}
+
+void VisitAtomicStore(InstructionSelector* selector, Node* node,
+                      ArchOpcode opcode) {
+  Mips64OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+
+  if (g.CanBeImmediate(index, opcode)) {
+    selector->Emit(opcode | AddressingModeField::encode(kMode_MRI),
+                   g.NoOutput(), g.UseRegister(base), g.UseImmediate(index),
+                   g.UseRegisterOrImmediateZero(value));
+  } else {
+    InstructionOperand addr_reg = g.TempRegister();
+    selector->Emit(kMips64Dadd | AddressingModeField::encode(kMode_None),
+                   addr_reg, g.UseRegister(index), g.UseRegister(base));
+    // Emit desired store opcode, using temp addr_reg.
+    selector->Emit(opcode | AddressingModeField::encode(kMode_MRI),
+                   g.NoOutput(), addr_reg, g.TempImmediate(0),
+                   g.UseRegisterOrImmediateZero(value));
+  }
+}
+
+void VisitAtomicExchange(InstructionSelector* selector, Node* node,
+                         ArchOpcode opcode) {
+  Mips64OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+
+  AddressingMode addressing_mode = kMode_MRI;
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  inputs[input_count++] = g.UseUniqueRegister(base);
+  inputs[input_count++] = g.UseUniqueRegister(index);
+  inputs[input_count++] = g.UseUniqueRegister(value);
+  InstructionOperand outputs[1];
+  outputs[0] = g.UseUniqueRegister(node);
+  InstructionOperand temp[3];
+  temp[0] = g.TempRegister();
+  temp[1] = g.TempRegister();
+  temp[2] = g.TempRegister();
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  selector->Emit(code, 1, outputs, input_count, inputs, 3, temp);
+}
+
+void VisitAtomicCompareExchange(InstructionSelector* selector, Node* node,
+                                ArchOpcode opcode) {
+  Mips64OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* old_value = node->InputAt(2);
+  Node* new_value = node->InputAt(3);
+
+  AddressingMode addressing_mode = kMode_MRI;
+  InstructionOperand inputs[4];
+  size_t input_count = 0;
+  inputs[input_count++] = g.UseUniqueRegister(base);
+  inputs[input_count++] = g.UseUniqueRegister(index);
+  inputs[input_count++] = g.UseUniqueRegister(old_value);
+  inputs[input_count++] = g.UseUniqueRegister(new_value);
+  InstructionOperand outputs[1];
+  outputs[0] = g.UseUniqueRegister(node);
+  InstructionOperand temp[3];
+  temp[0] = g.TempRegister();
+  temp[1] = g.TempRegister();
+  temp[2] = g.TempRegister();
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  selector->Emit(code, 1, outputs, input_count, inputs, 3, temp);
+}
+
+void VisitAtomicBinop(InstructionSelector* selector, Node* node,
+                      ArchOpcode opcode) {
+  Mips64OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+
+  AddressingMode addressing_mode = kMode_MRI;
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  inputs[input_count++] = g.UseUniqueRegister(base);
+  inputs[input_count++] = g.UseUniqueRegister(index);
+  inputs[input_count++] = g.UseUniqueRegister(value);
+  InstructionOperand outputs[1];
+  outputs[0] = g.UseUniqueRegister(node);
+  InstructionOperand temps[4];
+  temps[0] = g.TempRegister();
+  temps[1] = g.TempRegister();
+  temps[2] = g.TempRegister();
+  temps[3] = g.TempRegister();
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  selector->Emit(code, 1, outputs, input_count, inputs, 4, temps);
+}
+
+}  // namespace
+
+void InstructionSelector::VisitStackPointerGreaterThan(
+    Node* node, FlagsContinuation* cont) {
+  StackCheckKind kind = StackCheckKindOf(node->op());
+  InstructionCode opcode =
+      kArchStackPointerGreaterThan | MiscField::encode(static_cast<int>(kind));
+
+  Mips64OperandGenerator g(this);
+
+  // No outputs.
+  InstructionOperand* const outputs = nullptr;
+  const int output_count = 0;
+
+  // Applying an offset to this stack check requires a temp register. Offsets
+  // are only applied to the first stack check. If applying an offset, we must
+  // ensure the input and temp registers do not alias, thus kUniqueRegister.
+  InstructionOperand temps[] = {g.TempRegister()};
+  const int temp_count = (kind == StackCheckKind::kJSFunctionEntry ? 1 : 0);
+  const auto register_mode = (kind == StackCheckKind::kJSFunctionEntry)
+                                 ? OperandGenerator::kUniqueRegister
+                                 : OperandGenerator::kRegister;
+
+  Node* const value = node->InputAt(0);
+  InstructionOperand inputs[] = {g.UseRegisterWithMode(value, register_mode)};
+  static constexpr int input_count = arraysize(inputs);
+
+  EmitWithContinuation(opcode, output_count, outputs, input_count, inputs,
+                       temp_count, temps, cont);
+}
+
+// Shared routine for word comparisons against zero.
+void InstructionSelector::VisitWordCompareZero(Node* user, Node* value,
+                                               FlagsContinuation* cont) {
+  // Try to combine with comparisons against 0 by simply inverting the branch.
+  while (CanCover(user, value)) {
+    if (value->opcode() == IrOpcode::kWord32Equal) {
+      Int32BinopMatcher m(value);
+      if (!m.right().Is(0)) break;
+      user = value;
+      value = m.left().node();
+    } else if (value->opcode() == IrOpcode::kWord64Equal) {
+      Int64BinopMatcher m(value);
+      if (!m.right().Is(0)) break;
+      user = value;
+      value = m.left().node();
+    } else {
+      break;
+    }
+
+    cont->Negate();
+  }
+
+  if (CanCover(user, value)) {
+    switch (value->opcode()) {
+      case IrOpcode::kWord32Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kInt32LessThan:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThan);
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kInt32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kUint32LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kUint32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kWord64Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitWord64Compare(this, value, cont);
+      case IrOpcode::kInt64LessThan:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThan);
+        return VisitWord64Compare(this, value, cont);
+      case IrOpcode::kInt64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
+        return VisitWord64Compare(this, value, cont);
+      case IrOpcode::kUint64LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitWord64Compare(this, value, cont);
+      case IrOpcode::kUint64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitWord64Compare(this, value, cont);
+      case IrOpcode::kFloat32Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat32LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat64Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kFloat64LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kFloat64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kProjection:
+        // Check if this is the overflow output projection of an
+        // <Operation>WithOverflow node.
+        if (ProjectionIndexOf(value->op()) == 1u) {
+          // We cannot combine the <Operation>WithOverflow with this branch
+          // unless the 0th projection (the use of the actual value of the
+          // <Operation> is either nullptr, which means there's no use of the
+          // actual value, or was already defined, which means it is scheduled
+          // *AFTER* this branch).
+          Node* const node = value->InputAt(0);
+          Node* const result = NodeProperties::FindProjection(node, 0);
+          if (result == nullptr || IsDefined(result)) {
+            switch (node->opcode()) {
+              case IrOpcode::kInt32AddWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kMips64Dadd, cont);
+              case IrOpcode::kInt32SubWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kMips64Dsub, cont);
+              case IrOpcode::kInt32MulWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kMips64MulOvf, cont);
+              case IrOpcode::kInt64AddWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kMips64DaddOvf, cont);
+              case IrOpcode::kInt64SubWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kMips64DsubOvf, cont);
+              default:
+                break;
+            }
+          }
+        }
+        break;
+      case IrOpcode::kWord32And:
+      case IrOpcode::kWord64And:
+        return VisitWordCompare(this, value, kMips64Tst, cont, true);
+      case IrOpcode::kStackPointerGreaterThan:
+        cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition);
+        return VisitStackPointerGreaterThan(value, cont);
+      default:
+        break;
+    }
+  }
+
+  // Continuation could not be combined with a compare, emit compare against 0.
+  EmitWordCompareZero(this, value, cont);
+}
+
+void InstructionSelector::VisitSwitch(Node* node, const SwitchInfo& sw) {
+  Mips64OperandGenerator g(this);
+  InstructionOperand value_operand = g.UseRegister(node->InputAt(0));
+
+  // Emit either ArchTableSwitch or ArchBinarySearchSwitch.
+  if (enable_switch_jump_table_ == kEnableSwitchJumpTable) {
+    static const size_t kMaxTableSwitchValueRange = 2 << 16;
+    size_t table_space_cost = 10 + 2 * sw.value_range();
+    size_t table_time_cost = 3;
+    size_t lookup_space_cost = 2 + 2 * sw.case_count();
+    size_t lookup_time_cost = sw.case_count();
+    if (sw.case_count() > 0 &&
+        table_space_cost + 3 * table_time_cost <=
+            lookup_space_cost + 3 * lookup_time_cost &&
+        sw.min_value() > std::numeric_limits<int32_t>::min() &&
+        sw.value_range() <= kMaxTableSwitchValueRange) {
+      InstructionOperand index_operand = value_operand;
+      if (sw.min_value()) {
+        index_operand = g.TempRegister();
+        Emit(kMips64Sub, index_operand, value_operand,
+             g.TempImmediate(sw.min_value()));
+      }
+      // Generate a table lookup.
+      return EmitTableSwitch(sw, index_operand);
+    }
+  }
+
+  // Generate a tree of conditional jumps.
+  return EmitBinarySearchSwitch(sw, value_operand);
+}
+
+void InstructionSelector::VisitWord32Equal(Node* const node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  Int32BinopMatcher m(node);
+  if (m.right().Is(0)) {
+    return VisitWordCompareZero(m.node(), m.left().node(), &cont);
+  }
+
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32AddWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kMips64Dadd, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kMips64Dadd, &cont);
+}
+
+void InstructionSelector::VisitInt32SubWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kMips64Dsub, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kMips64Dsub, &cont);
+}
+
+void InstructionSelector::VisitInt32MulWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kMips64MulOvf, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kMips64MulOvf, &cont);
+}
+
+void InstructionSelector::VisitInt64AddWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kMips64DaddOvf, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kMips64DaddOvf, &cont);
+}
+
+void InstructionSelector::VisitInt64SubWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kMips64DsubOvf, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kMips64DsubOvf, &cont);
+}
+
+void InstructionSelector::VisitWord64Equal(Node* const node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  Int64BinopMatcher m(node);
+  if (m.right().Is(0)) {
+    return VisitWordCompareZero(m.node(), m.left().node(), &cont);
+  }
+
+  VisitWord64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt64LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
+  VisitWord64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
+  VisitWord64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint64LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitWord64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitWord64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32Equal(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64Equal(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64ExtractLowWord32(Node* node) {
+  VisitRR(this, kMips64Float64ExtractLowWord32, node);
+}
+
+void InstructionSelector::VisitFloat64ExtractHighWord32(Node* node) {
+  VisitRR(this, kMips64Float64ExtractHighWord32, node);
+}
+
+void InstructionSelector::VisitFloat64SilenceNaN(Node* node) {
+  VisitRR(this, kMips64Float64SilenceNaN, node);
+}
+
+void InstructionSelector::VisitFloat64InsertLowWord32(Node* node) {
+  Mips64OperandGenerator g(this);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  Emit(kMips64Float64InsertLowWord32, g.DefineSameAsFirst(node),
+       g.UseRegister(left), g.UseRegister(right));
+}
+
+void InstructionSelector::VisitFloat64InsertHighWord32(Node* node) {
+  Mips64OperandGenerator g(this);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  Emit(kMips64Float64InsertHighWord32, g.DefineSameAsFirst(node),
+       g.UseRegister(left), g.UseRegister(right));
+}
+
+void InstructionSelector::VisitMemoryBarrier(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64Sync, g.NoOutput());
+}
+
+void InstructionSelector::VisitWord32AtomicLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  ArchOpcode opcode;
+  switch (load_rep.representation()) {
+    case MachineRepresentation::kWord8:
+      opcode =
+          load_rep.IsSigned() ? kWord32AtomicLoadInt8 : kWord32AtomicLoadUint8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = load_rep.IsSigned() ? kWord32AtomicLoadInt16
+                                   : kWord32AtomicLoadUint16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kWord32AtomicLoadWord32;
+      break;
+    default:
+      UNREACHABLE();
+  }
+  VisitAtomicLoad(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord32AtomicStore(Node* node) {
+  MachineRepresentation rep = AtomicStoreRepresentationOf(node->op());
+  ArchOpcode opcode;
+  switch (rep) {
+    case MachineRepresentation::kWord8:
+      opcode = kWord32AtomicStoreWord8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = kWord32AtomicStoreWord16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kWord32AtomicStoreWord32;
+      break;
+    default:
+      UNREACHABLE();
+  }
+
+  VisitAtomicStore(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord64AtomicLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  ArchOpcode opcode;
+  switch (load_rep.representation()) {
+    case MachineRepresentation::kWord8:
+      opcode = kMips64Word64AtomicLoadUint8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = kMips64Word64AtomicLoadUint16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kMips64Word64AtomicLoadUint32;
+      break;
+    case MachineRepresentation::kWord64:
+      opcode = kMips64Word64AtomicLoadUint64;
+      break;
+    default:
+      UNREACHABLE();
+  }
+  VisitAtomicLoad(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord64AtomicStore(Node* node) {
+  MachineRepresentation rep = AtomicStoreRepresentationOf(node->op());
+  ArchOpcode opcode;
+  switch (rep) {
+    case MachineRepresentation::kWord8:
+      opcode = kMips64Word64AtomicStoreWord8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = kMips64Word64AtomicStoreWord16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kMips64Word64AtomicStoreWord32;
+      break;
+    case MachineRepresentation::kWord64:
+      opcode = kMips64Word64AtomicStoreWord64;
+      break;
+    default:
+      UNREACHABLE();
+  }
+
+  VisitAtomicStore(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord32AtomicExchange(Node* node) {
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Int8()) {
+    opcode = kWord32AtomicExchangeInt8;
+  } else if (type == MachineType::Uint8()) {
+    opcode = kWord32AtomicExchangeUint8;
+  } else if (type == MachineType::Int16()) {
+    opcode = kWord32AtomicExchangeInt16;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kWord32AtomicExchangeUint16;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = kWord32AtomicExchangeWord32;
+  } else {
+    UNREACHABLE();
+  }
+
+  VisitAtomicExchange(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord64AtomicExchange(Node* node) {
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Uint8()) {
+    opcode = kMips64Word64AtomicExchangeUint8;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kMips64Word64AtomicExchangeUint16;
+  } else if (type == MachineType::Uint32()) {
+    opcode = kMips64Word64AtomicExchangeUint32;
+  } else if (type == MachineType::Uint64()) {
+    opcode = kMips64Word64AtomicExchangeUint64;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicExchange(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord32AtomicCompareExchange(Node* node) {
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Int8()) {
+    opcode = kWord32AtomicCompareExchangeInt8;
+  } else if (type == MachineType::Uint8()) {
+    opcode = kWord32AtomicCompareExchangeUint8;
+  } else if (type == MachineType::Int16()) {
+    opcode = kWord32AtomicCompareExchangeInt16;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kWord32AtomicCompareExchangeUint16;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = kWord32AtomicCompareExchangeWord32;
+  } else {
+    UNREACHABLE();
+  }
+
+  VisitAtomicCompareExchange(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Uint8()) {
+    opcode = kMips64Word64AtomicCompareExchangeUint8;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kMips64Word64AtomicCompareExchangeUint16;
+  } else if (type == MachineType::Uint32()) {
+    opcode = kMips64Word64AtomicCompareExchangeUint32;
+  } else if (type == MachineType::Uint64()) {
+    opcode = kMips64Word64AtomicCompareExchangeUint64;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicCompareExchange(this, node, opcode);
+}
+void InstructionSelector::VisitWord32AtomicBinaryOperation(
+    Node* node, ArchOpcode int8_op, ArchOpcode uint8_op, ArchOpcode int16_op,
+    ArchOpcode uint16_op, ArchOpcode word32_op) {
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Int8()) {
+    opcode = int8_op;
+  } else if (type == MachineType::Uint8()) {
+    opcode = uint8_op;
+  } else if (type == MachineType::Int16()) {
+    opcode = int16_op;
+  } else if (type == MachineType::Uint16()) {
+    opcode = uint16_op;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = word32_op;
+  } else {
+    UNREACHABLE();
+  }
+
+  VisitAtomicBinop(this, node, opcode);
+}
+
+#define VISIT_ATOMIC_BINOP(op)                                   \
+  void InstructionSelector::VisitWord32Atomic##op(Node* node) {  \
+    VisitWord32AtomicBinaryOperation(                            \
+        node, kWord32Atomic##op##Int8, kWord32Atomic##op##Uint8, \
+        kWord32Atomic##op##Int16, kWord32Atomic##op##Uint16,     \
+        kWord32Atomic##op##Word32);                              \
+  }
+VISIT_ATOMIC_BINOP(Add)
+VISIT_ATOMIC_BINOP(Sub)
+VISIT_ATOMIC_BINOP(And)
+VISIT_ATOMIC_BINOP(Or)
+VISIT_ATOMIC_BINOP(Xor)
+#undef VISIT_ATOMIC_BINOP
+
+void InstructionSelector::VisitWord64AtomicBinaryOperation(
+    Node* node, ArchOpcode uint8_op, ArchOpcode uint16_op, ArchOpcode uint32_op,
+    ArchOpcode uint64_op) {
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Uint8()) {
+    opcode = uint8_op;
+  } else if (type == MachineType::Uint16()) {
+    opcode = uint16_op;
+  } else if (type == MachineType::Uint32()) {
+    opcode = uint32_op;
+  } else if (type == MachineType::Uint64()) {
+    opcode = uint64_op;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicBinop(this, node, opcode);
+}
+
+#define VISIT_ATOMIC_BINOP(op)                                                 \
+  void InstructionSelector::VisitWord64Atomic##op(Node* node) {                \
+    VisitWord64AtomicBinaryOperation(                                          \
+        node, kMips64Word64Atomic##op##Uint8, kMips64Word64Atomic##op##Uint16, \
+        kMips64Word64Atomic##op##Uint32, kMips64Word64Atomic##op##Uint64);     \
+  }
+VISIT_ATOMIC_BINOP(Add)
+VISIT_ATOMIC_BINOP(Sub)
+VISIT_ATOMIC_BINOP(And)
+VISIT_ATOMIC_BINOP(Or)
+VISIT_ATOMIC_BINOP(Xor)
+#undef VISIT_ATOMIC_BINOP
+
+void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
+  UNREACHABLE();
+}
+
+#define SIMD_TYPE_LIST(V) \
+  V(F64x2)                \
+  V(F32x4)                \
+  V(I64x2)                \
+  V(I32x4)                \
+  V(I16x8)                \
+  V(I8x16)
+
+#define SIMD_UNOP_LIST(V)                                  \
+  V(F64x2Abs, kMips64F64x2Abs)                             \
+  V(F64x2Neg, kMips64F64x2Neg)                             \
+  V(F64x2Sqrt, kMips64F64x2Sqrt)                           \
+  V(F64x2Ceil, kMips64F64x2Ceil)                           \
+  V(F64x2Floor, kMips64F64x2Floor)                         \
+  V(F64x2Trunc, kMips64F64x2Trunc)                         \
+  V(F64x2NearestInt, kMips64F64x2NearestInt)               \
+  V(I64x2Neg, kMips64I64x2Neg)                             \
+  V(F32x4SConvertI32x4, kMips64F32x4SConvertI32x4)         \
+  V(F32x4UConvertI32x4, kMips64F32x4UConvertI32x4)         \
+  V(F32x4Abs, kMips64F32x4Abs)                             \
+  V(F32x4Neg, kMips64F32x4Neg)                             \
+  V(F32x4Sqrt, kMips64F32x4Sqrt)                           \
+  V(F32x4RecipApprox, kMips64F32x4RecipApprox)             \
+  V(F32x4RecipSqrtApprox, kMips64F32x4RecipSqrtApprox)     \
+  V(F32x4Ceil, kMips64F32x4Ceil)                           \
+  V(F32x4Floor, kMips64F32x4Floor)                         \
+  V(F32x4Trunc, kMips64F32x4Trunc)                         \
+  V(F32x4NearestInt, kMips64F32x4NearestInt)               \
+  V(I32x4SConvertF32x4, kMips64I32x4SConvertF32x4)         \
+  V(I32x4UConvertF32x4, kMips64I32x4UConvertF32x4)         \
+  V(I32x4Neg, kMips64I32x4Neg)                             \
+  V(I32x4SConvertI16x8Low, kMips64I32x4SConvertI16x8Low)   \
+  V(I32x4SConvertI16x8High, kMips64I32x4SConvertI16x8High) \
+  V(I32x4UConvertI16x8Low, kMips64I32x4UConvertI16x8Low)   \
+  V(I32x4UConvertI16x8High, kMips64I32x4UConvertI16x8High) \
+  V(I32x4Abs, kMips64I32x4Abs)                             \
+  V(I32x4BitMask, kMips64I32x4BitMask)                     \
+  V(I16x8Neg, kMips64I16x8Neg)                             \
+  V(I16x8SConvertI8x16Low, kMips64I16x8SConvertI8x16Low)   \
+  V(I16x8SConvertI8x16High, kMips64I16x8SConvertI8x16High) \
+  V(I16x8UConvertI8x16Low, kMips64I16x8UConvertI8x16Low)   \
+  V(I16x8UConvertI8x16High, kMips64I16x8UConvertI8x16High) \
+  V(I16x8Abs, kMips64I16x8Abs)                             \
+  V(I16x8BitMask, kMips64I16x8BitMask)                     \
+  V(I8x16Neg, kMips64I8x16Neg)                             \
+  V(I8x16Abs, kMips64I8x16Abs)                             \
+  V(I8x16BitMask, kMips64I8x16BitMask)                     \
+  V(S128Not, kMips64S128Not)                               \
+  V(V32x4AnyTrue, kMips64V32x4AnyTrue)                     \
+  V(V32x4AllTrue, kMips64V32x4AllTrue)                     \
+  V(V16x8AnyTrue, kMips64V16x8AnyTrue)                     \
+  V(V16x8AllTrue, kMips64V16x8AllTrue)                     \
+  V(V8x16AnyTrue, kMips64V8x16AnyTrue)                     \
+  V(V8x16AllTrue, kMips64V8x16AllTrue)
+
+#define SIMD_SHIFT_OP_LIST(V) \
+  V(I64x2Shl)                 \
+  V(I64x2ShrS)                \
+  V(I64x2ShrU)                \
+  V(I32x4Shl)                 \
+  V(I32x4ShrS)                \
+  V(I32x4ShrU)                \
+  V(I16x8Shl)                 \
+  V(I16x8ShrS)                \
+  V(I16x8ShrU)                \
+  V(I8x16Shl)                 \
+  V(I8x16ShrS)                \
+  V(I8x16ShrU)
+
+#define SIMD_BINOP_LIST(V)                               \
+  V(F64x2Add, kMips64F64x2Add)                           \
+  V(F64x2Sub, kMips64F64x2Sub)                           \
+  V(F64x2Mul, kMips64F64x2Mul)                           \
+  V(F64x2Div, kMips64F64x2Div)                           \
+  V(F64x2Min, kMips64F64x2Min)                           \
+  V(F64x2Max, kMips64F64x2Max)                           \
+  V(F64x2Eq, kMips64F64x2Eq)                             \
+  V(F64x2Ne, kMips64F64x2Ne)                             \
+  V(F64x2Lt, kMips64F64x2Lt)                             \
+  V(F64x2Le, kMips64F64x2Le)                             \
+  V(I64x2Add, kMips64I64x2Add)                           \
+  V(I64x2Sub, kMips64I64x2Sub)                           \
+  V(I64x2Mul, kMips64I64x2Mul)                           \
+  V(F32x4Add, kMips64F32x4Add)                           \
+  V(F32x4AddHoriz, kMips64F32x4AddHoriz)                 \
+  V(F32x4Sub, kMips64F32x4Sub)                           \
+  V(F32x4Mul, kMips64F32x4Mul)                           \
+  V(F32x4Div, kMips64F32x4Div)                           \
+  V(F32x4Max, kMips64F32x4Max)                           \
+  V(F32x4Min, kMips64F32x4Min)                           \
+  V(F32x4Eq, kMips64F32x4Eq)                             \
+  V(F32x4Ne, kMips64F32x4Ne)                             \
+  V(F32x4Lt, kMips64F32x4Lt)                             \
+  V(F32x4Le, kMips64F32x4Le)                             \
+  V(I32x4Add, kMips64I32x4Add)                           \
+  V(I32x4AddHoriz, kMips64I32x4AddHoriz)                 \
+  V(I32x4Sub, kMips64I32x4Sub)                           \
+  V(I32x4Mul, kMips64I32x4Mul)                           \
+  V(I32x4MaxS, kMips64I32x4MaxS)                         \
+  V(I32x4MinS, kMips64I32x4MinS)                         \
+  V(I32x4MaxU, kMips64I32x4MaxU)                         \
+  V(I32x4MinU, kMips64I32x4MinU)                         \
+  V(I32x4Eq, kMips64I32x4Eq)                             \
+  V(I32x4Ne, kMips64I32x4Ne)                             \
+  V(I32x4GtS, kMips64I32x4GtS)                           \
+  V(I32x4GeS, kMips64I32x4GeS)                           \
+  V(I32x4GtU, kMips64I32x4GtU)                           \
+  V(I32x4GeU, kMips64I32x4GeU)                           \
+  V(I32x4DotI16x8S, kMips64I32x4DotI16x8S)               \
+  V(I16x8Add, kMips64I16x8Add)                           \
+  V(I16x8AddSatS, kMips64I16x8AddSatS)                   \
+  V(I16x8AddSatU, kMips64I16x8AddSatU)                   \
+  V(I16x8AddHoriz, kMips64I16x8AddHoriz)                 \
+  V(I16x8Sub, kMips64I16x8Sub)                           \
+  V(I16x8SubSatS, kMips64I16x8SubSatS)                   \
+  V(I16x8SubSatU, kMips64I16x8SubSatU)                   \
+  V(I16x8Mul, kMips64I16x8Mul)                           \
+  V(I16x8MaxS, kMips64I16x8MaxS)                         \
+  V(I16x8MinS, kMips64I16x8MinS)                         \
+  V(I16x8MaxU, kMips64I16x8MaxU)                         \
+  V(I16x8MinU, kMips64I16x8MinU)                         \
+  V(I16x8Eq, kMips64I16x8Eq)                             \
+  V(I16x8Ne, kMips64I16x8Ne)                             \
+  V(I16x8GtS, kMips64I16x8GtS)                           \
+  V(I16x8GeS, kMips64I16x8GeS)                           \
+  V(I16x8GtU, kMips64I16x8GtU)                           \
+  V(I16x8GeU, kMips64I16x8GeU)                           \
+  V(I16x8RoundingAverageU, kMips64I16x8RoundingAverageU) \
+  V(I16x8SConvertI32x4, kMips64I16x8SConvertI32x4)       \
+  V(I16x8UConvertI32x4, kMips64I16x8UConvertI32x4)       \
+  V(I8x16Add, kMips64I8x16Add)                           \
+  V(I8x16AddSatS, kMips64I8x16AddSatS)                   \
+  V(I8x16AddSatU, kMips64I8x16AddSatU)                   \
+  V(I8x16Sub, kMips64I8x16Sub)                           \
+  V(I8x16SubSatS, kMips64I8x16SubSatS)                   \
+  V(I8x16SubSatU, kMips64I8x16SubSatU)                   \
+  V(I8x16Mul, kMips64I8x16Mul)                           \
+  V(I8x16MaxS, kMips64I8x16MaxS)                         \
+  V(I8x16MinS, kMips64I8x16MinS)                         \
+  V(I8x16MaxU, kMips64I8x16MaxU)                         \
+  V(I8x16MinU, kMips64I8x16MinU)                         \
+  V(I8x16Eq, kMips64I8x16Eq)                             \
+  V(I8x16Ne, kMips64I8x16Ne)                             \
+  V(I8x16GtS, kMips64I8x16GtS)                           \
+  V(I8x16GeS, kMips64I8x16GeS)                           \
+  V(I8x16GtU, kMips64I8x16GtU)                           \
+  V(I8x16GeU, kMips64I8x16GeU)                           \
+  V(I8x16RoundingAverageU, kMips64I8x16RoundingAverageU) \
+  V(I8x16SConvertI16x8, kMips64I8x16SConvertI16x8)       \
+  V(I8x16UConvertI16x8, kMips64I8x16UConvertI16x8)       \
+  V(S128And, kMips64S128And)                             \
+  V(S128Or, kMips64S128Or)                               \
+  V(S128Xor, kMips64S128Xor)                             \
+  V(S128AndNot, kMips64S128AndNot)
+
+void InstructionSelector::VisitS128Const(Node* node) {
+  Mips64OperandGenerator g(this);
+  static const int kUint32Immediates = kSimd128Size / sizeof(uint32_t);
+  uint32_t val[kUint32Immediates];
+  memcpy(val, S128ImmediateParameterOf(node->op()).data(), kSimd128Size);
+  // If all bytes are zeros or ones, avoid emitting code for generic constants
+  bool all_zeros = !(val[0] || val[1] || val[2] || val[3]);
+  bool all_ones = val[0] == UINT32_MAX && val[1] == UINT32_MAX &&
+                  val[2] == UINT32_MAX && val[3] == UINT32_MAX;
+  InstructionOperand dst = g.DefineAsRegister(node);
+  if (all_zeros) {
+    Emit(kMips64S128Zero, dst);
+  } else if (all_ones) {
+    Emit(kMips64S128AllOnes, dst);
+  } else {
+    Emit(kMips64S128Const, dst, g.UseImmediate(val[0]), g.UseImmediate(val[1]),
+         g.UseImmediate(val[2]), g.UseImmediate(val[3]));
+  }
+}
+
+void InstructionSelector::VisitS128Zero(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64S128Zero, g.DefineAsRegister(node));
+}
+
+#define SIMD_VISIT_SPLAT(Type)                               \
+  void InstructionSelector::Visit##Type##Splat(Node* node) { \
+    VisitRR(this, kMips64##Type##Splat, node);               \
+  }
+SIMD_TYPE_LIST(SIMD_VISIT_SPLAT)
+#undef SIMD_VISIT_SPLAT
+
+#define SIMD_VISIT_EXTRACT_LANE(Type, Sign)                              \
+  void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \
+    VisitRRI(this, kMips64##Type##ExtractLane##Sign, node);              \
+  }
+SIMD_VISIT_EXTRACT_LANE(F64x2, )
+SIMD_VISIT_EXTRACT_LANE(F32x4, )
+SIMD_VISIT_EXTRACT_LANE(I64x2, )
+SIMD_VISIT_EXTRACT_LANE(I32x4, )
+SIMD_VISIT_EXTRACT_LANE(I16x8, U)
+SIMD_VISIT_EXTRACT_LANE(I16x8, S)
+SIMD_VISIT_EXTRACT_LANE(I8x16, U)
+SIMD_VISIT_EXTRACT_LANE(I8x16, S)
+#undef SIMD_VISIT_EXTRACT_LANE
+
+#define SIMD_VISIT_REPLACE_LANE(Type)                              \
+  void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \
+    VisitRRIR(this, kMips64##Type##ReplaceLane, node);             \
+  }
+SIMD_TYPE_LIST(SIMD_VISIT_REPLACE_LANE)
+#undef SIMD_VISIT_REPLACE_LANE
+
+#define SIMD_VISIT_UNOP(Name, instruction)            \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRR(this, instruction, node);                 \
+  }
+SIMD_UNOP_LIST(SIMD_VISIT_UNOP)
+#undef SIMD_VISIT_UNOP
+
+#define SIMD_VISIT_SHIFT_OP(Name)                     \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitSimdShift(this, kMips64##Name, node);        \
+  }
+SIMD_SHIFT_OP_LIST(SIMD_VISIT_SHIFT_OP)
+#undef SIMD_VISIT_SHIFT_OP
+
+#define SIMD_VISIT_BINOP(Name, instruction)           \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRRR(this, instruction, node);                \
+  }
+SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
+#undef SIMD_VISIT_BINOP
+
+void InstructionSelector::VisitS128Select(Node* node) {
+  VisitRRRR(this, kMips64S128Select, node);
+}
+
+namespace {
+
+struct ShuffleEntry {
+  uint8_t shuffle[kSimd128Size];
+  ArchOpcode opcode;
+};
+
+static const ShuffleEntry arch_shuffles[] = {
+    {{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23},
+     kMips64S32x4InterleaveRight},
+    {{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31},
+     kMips64S32x4InterleaveLeft},
+    {{0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27},
+     kMips64S32x4PackEven},
+    {{4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31},
+     kMips64S32x4PackOdd},
+    {{0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27},
+     kMips64S32x4InterleaveEven},
+    {{4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31},
+     kMips64S32x4InterleaveOdd},
+
+    {{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23},
+     kMips64S16x8InterleaveRight},
+    {{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31},
+     kMips64S16x8InterleaveLeft},
+    {{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29},
+     kMips64S16x8PackEven},
+    {{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31},
+     kMips64S16x8PackOdd},
+    {{0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29},
+     kMips64S16x8InterleaveEven},
+    {{2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31},
+     kMips64S16x8InterleaveOdd},
+    {{6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9},
+     kMips64S16x4Reverse},
+    {{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13},
+     kMips64S16x2Reverse},
+
+    {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
+     kMips64S8x16InterleaveRight},
+    {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31},
+     kMips64S8x16InterleaveLeft},
+    {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30},
+     kMips64S8x16PackEven},
+    {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31},
+     kMips64S8x16PackOdd},
+    {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30},
+     kMips64S8x16InterleaveEven},
+    {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31},
+     kMips64S8x16InterleaveOdd},
+    {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8},
+     kMips64S8x8Reverse},
+    {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
+     kMips64S8x4Reverse},
+    {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
+     kMips64S8x2Reverse}};
+
+bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
+                         size_t num_entries, bool is_swizzle,
+                         ArchOpcode* opcode) {
+  uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
+  for (size_t i = 0; i < num_entries; ++i) {
+    const ShuffleEntry& entry = table[i];
+    int j = 0;
+    for (; j < kSimd128Size; ++j) {
+      if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) {
+        break;
+      }
+    }
+    if (j == kSimd128Size) {
+      *opcode = entry.opcode;
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace
+
+void InstructionSelector::VisitI8x16Shuffle(Node* node) {
+  uint8_t shuffle[kSimd128Size];
+  bool is_swizzle;
+  CanonicalizeShuffle(node, shuffle, &is_swizzle);
+  uint8_t shuffle32x4[4];
+  ArchOpcode opcode;
+  if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),
+                          is_swizzle, &opcode)) {
+    VisitRRR(this, opcode, node);
+    return;
+  }
+  Node* input0 = node->InputAt(0);
+  Node* input1 = node->InputAt(1);
+  uint8_t offset;
+  Mips64OperandGenerator g(this);
+  if (wasm::SimdShuffle::TryMatchConcat(shuffle, &offset)) {
+    Emit(kMips64S8x16Concat, g.DefineSameAsFirst(node), g.UseRegister(input1),
+         g.UseRegister(input0), g.UseImmediate(offset));
+    return;
+  }
+  if (wasm::SimdShuffle::TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
+    Emit(kMips64S32x4Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
+         g.UseRegister(input1),
+         g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle32x4)));
+    return;
+  }
+  Emit(kMips64I8x16Shuffle, g.DefineAsRegister(node), g.UseRegister(input0),
+       g.UseRegister(input1),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 4)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 8)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle + 12)));
+}
+
+void InstructionSelector::VisitI8x16Swizzle(Node* node) {
+  Mips64OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  // We don't want input 0 or input 1 to be the same as output, since we will
+  // modify output before do the calculation.
+  Emit(kMips64I8x16Swizzle, g.DefineAsRegister(node),
+       g.UseUniqueRegister(node->InputAt(0)),
+       g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64Seb, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitSignExtendWord16ToInt32(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64Seh, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitSignExtendWord8ToInt64(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64Seb, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitSignExtendWord16ToInt64(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64Seh, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitSignExtendWord32ToInt64(Node* node) {
+  Mips64OperandGenerator g(this);
+  Emit(kMips64Shl, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)),
+       g.TempImmediate(0));
+}
+
+void InstructionSelector::VisitF32x4Pmin(Node* node) {
+  VisitUniqueRRR(this, kMips64F32x4Pmin, node);
+}
+
+void InstructionSelector::VisitF32x4Pmax(Node* node) {
+  VisitUniqueRRR(this, kMips64F32x4Pmax, node);
+}
+
+void InstructionSelector::VisitF64x2Pmin(Node* node) {
+  VisitUniqueRRR(this, kMips64F64x2Pmin, node);
+}
+
+void InstructionSelector::VisitF64x2Pmax(Node* node) {
+  VisitUniqueRRR(this, kMips64F64x2Pmax, node);
+}
+
+// static
+MachineOperatorBuilder::Flags
+InstructionSelector::SupportedMachineOperatorFlags() {
+  MachineOperatorBuilder::Flags flags = MachineOperatorBuilder::kNoFlags;
+  return flags | MachineOperatorBuilder::kWord32Ctz |
+         MachineOperatorBuilder::kWord64Ctz |
+         MachineOperatorBuilder::kWord32Popcnt |
+         MachineOperatorBuilder::kWord64Popcnt |
+         MachineOperatorBuilder::kWord32ShiftIsSafe |
+         MachineOperatorBuilder::kInt32DivIsSafe |
+         MachineOperatorBuilder::kUint32DivIsSafe |
+         MachineOperatorBuilder::kFloat64RoundDown |
+         MachineOperatorBuilder::kFloat32RoundDown |
+         MachineOperatorBuilder::kFloat64RoundUp |
+         MachineOperatorBuilder::kFloat32RoundUp |
+         MachineOperatorBuilder::kFloat64RoundTruncate |
+         MachineOperatorBuilder::kFloat32RoundTruncate |
+         MachineOperatorBuilder::kFloat64RoundTiesEven |
+         MachineOperatorBuilder::kFloat32RoundTiesEven;
+}
+
+// static
+MachineOperatorBuilder::AlignmentRequirements
+InstructionSelector::AlignmentRequirements() {
+  if (kArchVariant == kMips64r6) {
+    return MachineOperatorBuilder::AlignmentRequirements::
+        FullUnalignedAccessSupport();
+  } else {
+    DCHECK_EQ(kMips64r2, kArchVariant);
+    return MachineOperatorBuilder::AlignmentRequirements::
+        NoUnalignedAccessSupport();
+  }
+}
+
+#undef SIMD_BINOP_LIST
+#undef SIMD_SHIFT_OP_LIST
+#undef SIMD_UNOP_LIST
+#undef SIMD_TYPE_LIST
+#undef TRACE_UNIMPL
+#undef TRACE
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/move-optimizer.cc b/src/compiler/backend/move-optimizer.cc
new file mode 100644
index 0000000..35d771e
--- /dev/null
+++ b/src/compiler/backend/move-optimizer.cc
@@ -0,0 +1,555 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/move-optimizer.h"
+
+#include "src/codegen/register-configuration.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+namespace {
+
+struct MoveKey {
+  InstructionOperand source;
+  InstructionOperand destination;
+};
+
+struct MoveKeyCompare {
+  bool operator()(const MoveKey& a, const MoveKey& b) const {
+    if (a.source.EqualsCanonicalized(b.source)) {
+      return a.destination.CompareCanonicalized(b.destination);
+    }
+    return a.source.CompareCanonicalized(b.source);
+  }
+};
+
+using MoveMap = ZoneMap<MoveKey, unsigned, MoveKeyCompare>;
+
+class OperandSet {
+ public:
+  explicit OperandSet(ZoneVector<InstructionOperand>* buffer)
+      : set_(buffer), fp_reps_(0) {
+    buffer->clear();
+  }
+
+  void InsertOp(const InstructionOperand& op) {
+    set_->push_back(op);
+
+    if (!kSimpleFPAliasing && op.IsFPRegister())
+      fp_reps_ |= RepresentationBit(LocationOperand::cast(op).representation());
+  }
+
+  bool Contains(const InstructionOperand& op) const {
+    for (const InstructionOperand& elem : *set_) {
+      if (elem.EqualsCanonicalized(op)) return true;
+    }
+    return false;
+  }
+
+  bool ContainsOpOrAlias(const InstructionOperand& op) const {
+    if (Contains(op)) return true;
+
+    if (!kSimpleFPAliasing && op.IsFPRegister()) {
+      // Platforms where FP registers have complex aliasing need extra checks.
+      const LocationOperand& loc = LocationOperand::cast(op);
+      MachineRepresentation rep = loc.representation();
+      // If haven't encountered mixed rep FP registers, skip the extra checks.
+      if (!HasMixedFPReps(fp_reps_ | RepresentationBit(rep))) return false;
+
+      // Check register against aliasing registers of other FP representations.
+      MachineRepresentation other_rep1, other_rep2;
+      switch (rep) {
+        case MachineRepresentation::kFloat32:
+          other_rep1 = MachineRepresentation::kFloat64;
+          other_rep2 = MachineRepresentation::kSimd128;
+          break;
+        case MachineRepresentation::kFloat64:
+          other_rep1 = MachineRepresentation::kFloat32;
+          other_rep2 = MachineRepresentation::kSimd128;
+          break;
+        case MachineRepresentation::kSimd128:
+          other_rep1 = MachineRepresentation::kFloat32;
+          other_rep2 = MachineRepresentation::kFloat64;
+          break;
+        default:
+          UNREACHABLE();
+          break;
+      }
+      const RegisterConfiguration* config = RegisterConfiguration::Default();
+      int base = -1;
+      int aliases =
+          config->GetAliases(rep, loc.register_code(), other_rep1, &base);
+      DCHECK(aliases > 0 || (aliases == 0 && base == -1));
+      while (aliases--) {
+        if (Contains(AllocatedOperand(LocationOperand::REGISTER, other_rep1,
+                                      base + aliases))) {
+          return true;
+        }
+      }
+      aliases = config->GetAliases(rep, loc.register_code(), other_rep2, &base);
+      DCHECK(aliases > 0 || (aliases == 0 && base == -1));
+      while (aliases--) {
+        if (Contains(AllocatedOperand(LocationOperand::REGISTER, other_rep2,
+                                      base + aliases))) {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
+ private:
+  static bool HasMixedFPReps(int reps) {
+    return reps && !base::bits::IsPowerOfTwo(reps);
+  }
+
+  ZoneVector<InstructionOperand>* set_;
+  int fp_reps_;
+};
+
+int FindFirstNonEmptySlot(const Instruction* instr) {
+  int i = Instruction::FIRST_GAP_POSITION;
+  for (; i <= Instruction::LAST_GAP_POSITION; i++) {
+    ParallelMove* moves = instr->parallel_moves()[i];
+    if (moves == nullptr) continue;
+    for (MoveOperands* move : *moves) {
+      if (!move->IsRedundant()) return i;
+      move->Eliminate();
+    }
+    moves->clear();  // Clear this redundant move.
+  }
+  return i;
+}
+
+}  // namespace
+
+MoveOptimizer::MoveOptimizer(Zone* local_zone, InstructionSequence* code)
+    : local_zone_(local_zone),
+      code_(code),
+      local_vector_(local_zone),
+      operand_buffer1(local_zone),
+      operand_buffer2(local_zone) {}
+
+void MoveOptimizer::Run() {
+  for (Instruction* instruction : code()->instructions()) {
+    CompressGaps(instruction);
+  }
+  for (InstructionBlock* block : code()->instruction_blocks()) {
+    CompressBlock(block);
+  }
+  for (InstructionBlock* block : code()->instruction_blocks()) {
+    if (block->PredecessorCount() <= 1) continue;
+    if (!block->IsDeferred()) {
+      bool has_only_deferred = true;
+      for (RpoNumber& pred_id : block->predecessors()) {
+        if (!code()->InstructionBlockAt(pred_id)->IsDeferred()) {
+          has_only_deferred = false;
+          break;
+        }
+      }
+      // This would pull down common moves. If the moves occur in deferred
+      // blocks, and the closest common successor is not deferred, we lose the
+      // optimization of just spilling/filling in deferred blocks, when the
+      // current block is not deferred.
+      if (has_only_deferred) continue;
+    }
+    OptimizeMerge(block);
+  }
+  for (Instruction* gap : code()->instructions()) {
+    FinalizeMoves(gap);
+  }
+}
+
+void MoveOptimizer::RemoveClobberedDestinations(Instruction* instruction) {
+  if (instruction->IsCall()) return;
+  ParallelMove* moves = instruction->parallel_moves()[0];
+  if (moves == nullptr) return;
+
+  DCHECK(instruction->parallel_moves()[1] == nullptr ||
+         instruction->parallel_moves()[1]->empty());
+
+  OperandSet outputs(&operand_buffer1);
+  OperandSet inputs(&operand_buffer2);
+
+  // Outputs and temps are treated together as potentially clobbering a
+  // destination operand.
+  for (size_t i = 0; i < instruction->OutputCount(); ++i) {
+    outputs.InsertOp(*instruction->OutputAt(i));
+  }
+  for (size_t i = 0; i < instruction->TempCount(); ++i) {
+    outputs.InsertOp(*instruction->TempAt(i));
+  }
+
+  // Input operands block elisions.
+  for (size_t i = 0; i < instruction->InputCount(); ++i) {
+    inputs.InsertOp(*instruction->InputAt(i));
+  }
+
+  // Elide moves made redundant by the instruction.
+  for (MoveOperands* move : *moves) {
+    if (outputs.ContainsOpOrAlias(move->destination()) &&
+        !inputs.ContainsOpOrAlias(move->destination())) {
+      move->Eliminate();
+    }
+  }
+
+  // The ret instruction makes any assignment before it unnecessary, except for
+  // the one for its input.
+  if (instruction->IsRet() || instruction->IsTailCall()) {
+    for (MoveOperands* move : *moves) {
+      if (!inputs.ContainsOpOrAlias(move->destination())) {
+        move->Eliminate();
+      }
+    }
+  }
+}
+
+void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
+  if (from->IsCall()) return;
+
+  ParallelMove* from_moves = from->parallel_moves()[0];
+  if (from_moves == nullptr || from_moves->empty()) return;
+
+  OperandSet dst_cant_be(&operand_buffer1);
+  OperandSet src_cant_be(&operand_buffer2);
+
+  // If an operand is an input to the instruction, we cannot move assignments
+  // where it appears on the LHS.
+  for (size_t i = 0; i < from->InputCount(); ++i) {
+    dst_cant_be.InsertOp(*from->InputAt(i));
+  }
+  // If an operand is output to the instruction, we cannot move assignments
+  // where it appears on the RHS, because we would lose its value before the
+  // instruction.
+  // Same for temp operands.
+  // The output can't appear on the LHS because we performed
+  // RemoveClobberedDestinations for the "from" instruction.
+  for (size_t i = 0; i < from->OutputCount(); ++i) {
+    src_cant_be.InsertOp(*from->OutputAt(i));
+  }
+  for (size_t i = 0; i < from->TempCount(); ++i) {
+    src_cant_be.InsertOp(*from->TempAt(i));
+  }
+  for (MoveOperands* move : *from_moves) {
+    if (move->IsRedundant()) continue;
+    // Assume dest has a value "V". If we have a "dest = y" move, then we can't
+    // move "z = dest", because z would become y rather than "V".
+    // We assume CompressMoves has happened before this, which means we don't
+    // have more than one assignment to dest.
+    src_cant_be.InsertOp(move->destination());
+  }
+
+  ZoneSet<MoveKey, MoveKeyCompare> move_candidates(local_zone());
+  // We start with all the moves that don't have conflicting source or
+  // destination operands are eligible for being moved down.
+  for (MoveOperands* move : *from_moves) {
+    if (move->IsRedundant()) continue;
+    if (!dst_cant_be.ContainsOpOrAlias(move->destination())) {
+      MoveKey key = {move->source(), move->destination()};
+      move_candidates.insert(key);
+    }
+  }
+  if (move_candidates.empty()) return;
+
+  // Stabilize the candidate set.
+  bool changed = false;
+  do {
+    changed = false;
+    for (auto iter = move_candidates.begin(); iter != move_candidates.end();) {
+      auto current = iter;
+      ++iter;
+      InstructionOperand src = current->source;
+      if (src_cant_be.ContainsOpOrAlias(src)) {
+        src_cant_be.InsertOp(current->destination);
+        move_candidates.erase(current);
+        changed = true;
+      }
+    }
+  } while (changed);
+
+  ParallelMove to_move(local_zone());
+  for (MoveOperands* move : *from_moves) {
+    if (move->IsRedundant()) continue;
+    MoveKey key = {move->source(), move->destination()};
+    if (move_candidates.find(key) != move_candidates.end()) {
+      to_move.AddMove(move->source(), move->destination(), code_zone());
+      move->Eliminate();
+    }
+  }
+  if (to_move.empty()) return;
+
+  ParallelMove* dest =
+      to->GetOrCreateParallelMove(Instruction::GapPosition::START, code_zone());
+
+  CompressMoves(&to_move, dest);
+  DCHECK(dest->empty());
+  for (MoveOperands* m : to_move) {
+    dest->push_back(m);
+  }
+}
+
+void MoveOptimizer::CompressMoves(ParallelMove* left, MoveOpVector* right) {
+  if (right == nullptr) return;
+
+  MoveOpVector& eliminated = local_vector();
+  DCHECK(eliminated.empty());
+
+  if (!left->empty()) {
+    // Modify the right moves in place and collect moves that will be killed by
+    // merging the two gaps.
+    for (MoveOperands* move : *right) {
+      if (move->IsRedundant()) continue;
+      left->PrepareInsertAfter(move, &eliminated);
+    }
+    // Eliminate dead moves.
+    for (MoveOperands* to_eliminate : eliminated) {
+      to_eliminate->Eliminate();
+    }
+    eliminated.clear();
+  }
+  // Add all possibly modified moves from right side.
+  for (MoveOperands* move : *right) {
+    if (move->IsRedundant()) continue;
+    left->push_back(move);
+  }
+  // Nuke right.
+  right->clear();
+  DCHECK(eliminated.empty());
+}
+
+void MoveOptimizer::CompressGaps(Instruction* instruction) {
+  int i = FindFirstNonEmptySlot(instruction);
+  bool has_moves = i <= Instruction::LAST_GAP_POSITION;
+  USE(has_moves);
+
+  if (i == Instruction::LAST_GAP_POSITION) {
+    std::swap(instruction->parallel_moves()[Instruction::FIRST_GAP_POSITION],
+              instruction->parallel_moves()[Instruction::LAST_GAP_POSITION]);
+  } else if (i == Instruction::FIRST_GAP_POSITION) {
+    CompressMoves(
+        instruction->parallel_moves()[Instruction::FIRST_GAP_POSITION],
+        instruction->parallel_moves()[Instruction::LAST_GAP_POSITION]);
+  }
+  // We either have no moves, or, after swapping or compressing, we have
+  // all the moves in the first gap position, and none in the second/end gap
+  // position.
+  ParallelMove* first =
+      instruction->parallel_moves()[Instruction::FIRST_GAP_POSITION];
+  ParallelMove* last =
+      instruction->parallel_moves()[Instruction::LAST_GAP_POSITION];
+  USE(first);
+  USE(last);
+
+  DCHECK(!has_moves ||
+         (first != nullptr && (last == nullptr || last->empty())));
+}
+
+void MoveOptimizer::CompressBlock(InstructionBlock* block) {
+  int first_instr_index = block->first_instruction_index();
+  int last_instr_index = block->last_instruction_index();
+
+  // Start by removing gap assignments where the output of the subsequent
+  // instruction appears on LHS, as long as they are not needed by its input.
+  Instruction* prev_instr = code()->instructions()[first_instr_index];
+  RemoveClobberedDestinations(prev_instr);
+
+  for (int index = first_instr_index + 1; index <= last_instr_index; ++index) {
+    Instruction* instr = code()->instructions()[index];
+    // Migrate to the gap of prev_instr eligible moves from instr.
+    MigrateMoves(instr, prev_instr);
+    // Remove gap assignments clobbered by instr's output.
+    RemoveClobberedDestinations(instr);
+    prev_instr = instr;
+  }
+}
+
+const Instruction* MoveOptimizer::LastInstruction(
+    const InstructionBlock* block) const {
+  return code()->instructions()[block->last_instruction_index()];
+}
+
+void MoveOptimizer::OptimizeMerge(InstructionBlock* block) {
+  DCHECK_LT(1, block->PredecessorCount());
+  // Ensure that the last instruction in all incoming blocks don't contain
+  // things that would prevent moving gap moves across them.
+  for (RpoNumber& pred_index : block->predecessors()) {
+    const InstructionBlock* pred = code()->InstructionBlockAt(pred_index);
+
+    // If the predecessor has more than one successor, we shouldn't attempt to
+    // move down to this block (one of the successors) any of the gap moves,
+    // because their effect may be necessary to the other successors.
+    if (pred->SuccessorCount() > 1) return;
+
+    const Instruction* last_instr =
+        code()->instructions()[pred->last_instruction_index()];
+    if (last_instr->IsCall()) return;
+    if (last_instr->TempCount() != 0) return;
+    if (last_instr->OutputCount() != 0) return;
+    for (size_t i = 0; i < last_instr->InputCount(); ++i) {
+      const InstructionOperand* op = last_instr->InputAt(i);
+      if (!op->IsConstant() && !op->IsImmediate()) return;
+    }
+  }
+  // TODO(dcarney): pass a ZoneStats down for this?
+  MoveMap move_map(local_zone());
+  size_t correct_counts = 0;
+  // Accumulate set of shared moves.
+  for (RpoNumber& pred_index : block->predecessors()) {
+    const InstructionBlock* pred = code()->InstructionBlockAt(pred_index);
+    const Instruction* instr = LastInstruction(pred);
+    if (instr->parallel_moves()[0] == nullptr ||
+        instr->parallel_moves()[0]->empty()) {
+      return;
+    }
+    for (const MoveOperands* move : *instr->parallel_moves()[0]) {
+      if (move->IsRedundant()) continue;
+      InstructionOperand src = move->source();
+      InstructionOperand dst = move->destination();
+      MoveKey key = {src, dst};
+      auto res = move_map.insert(std::make_pair(key, 1));
+      if (!res.second) {
+        res.first->second++;
+        if (res.first->second == block->PredecessorCount()) {
+          correct_counts++;
+        }
+      }
+    }
+  }
+  if (move_map.empty() || correct_counts == 0) return;
+
+  // Find insertion point.
+  Instruction* instr = code()->instructions()[block->first_instruction_index()];
+
+  if (correct_counts != move_map.size()) {
+    // Moves that are unique to each predecessor won't be pushed to the common
+    // successor.
+    OperandSet conflicting_srcs(&operand_buffer1);
+    for (auto iter = move_map.begin(), end = move_map.end(); iter != end;) {
+      auto current = iter;
+      ++iter;
+      if (current->second != block->PredecessorCount()) {
+        InstructionOperand dest = current->first.destination;
+        // Not all the moves in all the gaps are the same. Maybe some are. If
+        // there are such moves, we could move them, but the destination of the
+        // moves staying behind can't appear as a source of a common move,
+        // because the move staying behind will clobber this destination.
+        conflicting_srcs.InsertOp(dest);
+        move_map.erase(current);
+      }
+    }
+
+    bool changed = false;
+    do {
+      // If a common move can't be pushed to the common successor, then its
+      // destination also can't appear as source to any move being pushed.
+      changed = false;
+      for (auto iter = move_map.begin(), end = move_map.end(); iter != end;) {
+        auto current = iter;
+        ++iter;
+        DCHECK_EQ(block->PredecessorCount(), current->second);
+        if (conflicting_srcs.ContainsOpOrAlias(current->first.source)) {
+          conflicting_srcs.InsertOp(current->first.destination);
+          move_map.erase(current);
+          changed = true;
+        }
+      }
+    } while (changed);
+  }
+
+  if (move_map.empty()) return;
+
+  DCHECK_NOT_NULL(instr);
+  bool gap_initialized = true;
+  if (instr->parallel_moves()[0] != nullptr &&
+      !instr->parallel_moves()[0]->empty()) {
+    // Will compress after insertion.
+    gap_initialized = false;
+    std::swap(instr->parallel_moves()[0], instr->parallel_moves()[1]);
+  }
+  ParallelMove* moves = instr->GetOrCreateParallelMove(
+      static_cast<Instruction::GapPosition>(0), code_zone());
+  // Delete relevant entries in predecessors and move everything to block.
+  bool first_iteration = true;
+  for (RpoNumber& pred_index : block->predecessors()) {
+    const InstructionBlock* pred = code()->InstructionBlockAt(pred_index);
+    for (MoveOperands* move : *LastInstruction(pred)->parallel_moves()[0]) {
+      if (move->IsRedundant()) continue;
+      MoveKey key = {move->source(), move->destination()};
+      auto it = move_map.find(key);
+      if (it != move_map.end()) {
+        if (first_iteration) {
+          moves->AddMove(move->source(), move->destination());
+        }
+        move->Eliminate();
+      }
+    }
+    first_iteration = false;
+  }
+  // Compress.
+  if (!gap_initialized) {
+    CompressMoves(instr->parallel_moves()[0], instr->parallel_moves()[1]);
+  }
+  CompressBlock(block);
+}
+
+namespace {
+
+bool IsSlot(const InstructionOperand& op) {
+  return op.IsStackSlot() || op.IsFPStackSlot();
+}
+
+bool LoadCompare(const MoveOperands* a, const MoveOperands* b) {
+  if (!a->source().EqualsCanonicalized(b->source())) {
+    return a->source().CompareCanonicalized(b->source());
+  }
+  if (IsSlot(a->destination()) && !IsSlot(b->destination())) return false;
+  if (!IsSlot(a->destination()) && IsSlot(b->destination())) return true;
+  return a->destination().CompareCanonicalized(b->destination());
+}
+
+}  // namespace
+
+// Split multiple loads of the same constant or stack slot off into the second
+// slot and keep remaining moves in the first slot.
+void MoveOptimizer::FinalizeMoves(Instruction* instr) {
+  MoveOpVector& loads = local_vector();
+  DCHECK(loads.empty());
+
+  ParallelMove* parallel_moves = instr->parallel_moves()[0];
+  if (parallel_moves == nullptr) return;
+  // Find all the loads.
+  for (MoveOperands* move : *parallel_moves) {
+    if (move->IsRedundant()) continue;
+    if (move->source().IsConstant() || IsSlot(move->source())) {
+      loads.push_back(move);
+    }
+  }
+  if (loads.empty()) return;
+  // Group the loads by source, moving the preferred destination to the
+  // beginning of the group.
+  std::sort(loads.begin(), loads.end(), LoadCompare);
+  MoveOperands* group_begin = nullptr;
+  for (MoveOperands* load : loads) {
+    // New group.
+    if (group_begin == nullptr ||
+        !load->source().EqualsCanonicalized(group_begin->source())) {
+      group_begin = load;
+      continue;
+    }
+    // Nothing to be gained from splitting here.
+    if (IsSlot(group_begin->destination())) continue;
+    // Insert new move into slot 1.
+    ParallelMove* slot_1 = instr->GetOrCreateParallelMove(
+        static_cast<Instruction::GapPosition>(1), code_zone());
+    slot_1->AddMove(group_begin->destination(), load->destination());
+    load->Eliminate();
+  }
+  loads.clear();
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/move-optimizer.h b/src/compiler/backend/move-optimizer.h
new file mode 100644
index 0000000..a63bd52
--- /dev/null
+++ b/src/compiler/backend/move-optimizer.h
@@ -0,0 +1,69 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_MOVE_OPTIMIZER_H_
+#define V8_COMPILER_BACKEND_MOVE_OPTIMIZER_H_
+
+#include "src/common/globals.h"
+#include "src/compiler/backend/instruction.h"
+#include "src/zone/zone-containers.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+class V8_EXPORT_PRIVATE MoveOptimizer final {
+ public:
+  MoveOptimizer(Zone* local_zone, InstructionSequence* code);
+  MoveOptimizer(const MoveOptimizer&) = delete;
+  MoveOptimizer& operator=(const MoveOptimizer&) = delete;
+
+  void Run();
+
+ private:
+  using MoveOpVector = ZoneVector<MoveOperands*>;
+  using Instructions = ZoneVector<Instruction*>;
+
+  InstructionSequence* code() const { return code_; }
+  Zone* local_zone() const { return local_zone_; }
+  Zone* code_zone() const { return code()->zone(); }
+  MoveOpVector& local_vector() { return local_vector_; }
+
+  // Consolidate moves into the first gap.
+  void CompressGaps(Instruction* instr);
+
+  // Attempt to push down to the last instruction those moves that can.
+  void CompressBlock(InstructionBlock* block);
+
+  // Consolidate moves into the first gap.
+  void CompressMoves(ParallelMove* left, MoveOpVector* right);
+
+  // Push down those moves in the gap of from that do not change the
+  // semantics of the from instruction, nor the semantics of the moves
+  // that remain behind.
+  void MigrateMoves(Instruction* to, Instruction* from);
+
+  void RemoveClobberedDestinations(Instruction* instruction);
+
+  const Instruction* LastInstruction(const InstructionBlock* block) const;
+
+  // Consolidate common moves appearing across all predecessors of a block.
+  void OptimizeMerge(InstructionBlock* block);
+  void FinalizeMoves(Instruction* instr);
+
+  Zone* const local_zone_;
+  InstructionSequence* const code_;
+  MoveOpVector local_vector_;
+
+  // Reusable buffers for storing operand sets. We need at most two sets
+  // at any given time, so we create two buffers.
+  ZoneVector<InstructionOperand> operand_buffer1;
+  ZoneVector<InstructionOperand> operand_buffer2;
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_MOVE_OPTIMIZER_H_
diff --git a/src/compiler/backend/ppc/OWNERS b/src/compiler/backend/ppc/OWNERS
new file mode 100644
index 0000000..02c2cd7
--- /dev/null
+++ b/src/compiler/backend/ppc/OWNERS
@@ -0,0 +1,5 @@
+junyan@redhat.com
+joransiu@ca.ibm.com
+midawson@redhat.com
+mfarazma@redhat.com
+vasili.skurydzin@ibm.com
diff --git a/src/compiler/backend/ppc/code-generator-ppc.cc b/src/compiler/backend/ppc/code-generator-ppc.cc
new file mode 100644
index 0000000..ee1ef6d
--- /dev/null
+++ b/src/compiler/backend/ppc/code-generator-ppc.cc
@@ -0,0 +1,4151 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/codegen/assembler-inl.h"
+#include "src/codegen/callable.h"
+#include "src/codegen/macro-assembler.h"
+#include "src/codegen/optimized-compilation-info.h"
+#include "src/compiler/backend/code-generator-impl.h"
+#include "src/compiler/backend/code-generator.h"
+#include "src/compiler/backend/gap-resolver.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/osr.h"
+#include "src/heap/memory-chunk.h"
+#include "src/numbers/double.h"
+#include "src/wasm/wasm-code-manager.h"
+#include "src/wasm/wasm-objects.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+#define __ tasm()->
+
+#define kScratchReg r11
+
+// Adds PPC-specific methods to convert InstructionOperands.
+class PPCOperandConverter final : public InstructionOperandConverter {
+ public:
+  PPCOperandConverter(CodeGenerator* gen, Instruction* instr)
+      : InstructionOperandConverter(gen, instr) {}
+
+  size_t OutputCount() { return instr_->OutputCount(); }
+
+  RCBit OutputRCBit() const {
+    switch (instr_->flags_mode()) {
+      case kFlags_branch:
+      case kFlags_branch_and_poison:
+      case kFlags_deoptimize:
+      case kFlags_deoptimize_and_poison:
+      case kFlags_set:
+      case kFlags_trap:
+        return SetRC;
+      case kFlags_none:
+        return LeaveRC;
+    }
+    UNREACHABLE();
+  }
+
+  bool CompareLogical() const {
+    switch (instr_->flags_condition()) {
+      case kUnsignedLessThan:
+      case kUnsignedGreaterThanOrEqual:
+      case kUnsignedLessThanOrEqual:
+      case kUnsignedGreaterThan:
+        return true;
+      default:
+        return false;
+    }
+    UNREACHABLE();
+  }
+
+  Operand InputImmediate(size_t index) {
+    Constant constant = ToConstant(instr_->InputAt(index));
+    switch (constant.type()) {
+      case Constant::kInt32:
+        return Operand(constant.ToInt32());
+      case Constant::kFloat32:
+        return Operand::EmbeddedNumber(constant.ToFloat32());
+      case Constant::kFloat64:
+        return Operand::EmbeddedNumber(constant.ToFloat64().value());
+      case Constant::kInt64:
+#if V8_TARGET_ARCH_PPC64
+        return Operand(constant.ToInt64());
+#endif
+      case Constant::kExternalReference:
+        return Operand(constant.ToExternalReference());
+      case Constant::kDelayedStringConstant:
+        return Operand::EmbeddedStringConstant(
+            constant.ToDelayedStringConstant());
+      case Constant::kCompressedHeapObject:
+      case Constant::kHeapObject:
+      case Constant::kRpoNumber:
+        break;
+    }
+    UNREACHABLE();
+  }
+
+  MemOperand MemoryOperand(AddressingMode* mode, size_t* first_index) {
+    const size_t index = *first_index;
+    AddressingMode addr_mode = AddressingModeField::decode(instr_->opcode());
+    if (mode) *mode = addr_mode;
+    switch (addr_mode) {
+      case kMode_None:
+        break;
+      case kMode_MRI:
+        *first_index += 2;
+        return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
+      case kMode_MRR:
+        *first_index += 2;
+        return MemOperand(InputRegister(index + 0), InputRegister(index + 1));
+    }
+    UNREACHABLE();
+  }
+
+  MemOperand MemoryOperand(AddressingMode* mode = NULL,
+                           size_t first_index = 0) {
+    return MemoryOperand(mode, &first_index);
+  }
+
+  MemOperand ToMemOperand(InstructionOperand* op) const {
+    DCHECK_NOT_NULL(op);
+    DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
+    return SlotToMemOperand(AllocatedOperand::cast(op)->index());
+  }
+
+  MemOperand SlotToMemOperand(int slot) const {
+    FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
+    return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
+  }
+};
+
+static inline bool HasRegisterInput(Instruction* instr, size_t index) {
+  return instr->InputAt(index)->IsRegister();
+}
+
+namespace {
+
+class OutOfLineRecordWrite final : public OutOfLineCode {
+ public:
+  OutOfLineRecordWrite(CodeGenerator* gen, Register object, Register offset,
+                       Register value, Register scratch0, Register scratch1,
+                       RecordWriteMode mode, StubCallMode stub_mode,
+                       UnwindingInfoWriter* unwinding_info_writer)
+      : OutOfLineCode(gen),
+        object_(object),
+        offset_(offset),
+        offset_immediate_(0),
+        value_(value),
+        scratch0_(scratch0),
+        scratch1_(scratch1),
+        mode_(mode),
+        stub_mode_(stub_mode),
+        must_save_lr_(!gen->frame_access_state()->has_frame()),
+        unwinding_info_writer_(unwinding_info_writer),
+        zone_(gen->zone()) {}
+
+  OutOfLineRecordWrite(CodeGenerator* gen, Register object, int32_t offset,
+                       Register value, Register scratch0, Register scratch1,
+                       RecordWriteMode mode, StubCallMode stub_mode,
+                       UnwindingInfoWriter* unwinding_info_writer)
+      : OutOfLineCode(gen),
+        object_(object),
+        offset_(no_reg),
+        offset_immediate_(offset),
+        value_(value),
+        scratch0_(scratch0),
+        scratch1_(scratch1),
+        mode_(mode),
+        stub_mode_(stub_mode),
+        must_save_lr_(!gen->frame_access_state()->has_frame()),
+        unwinding_info_writer_(unwinding_info_writer),
+        zone_(gen->zone()) {}
+
+  void Generate() final {
+    ConstantPoolUnavailableScope constant_pool_unavailable(tasm());
+    if (mode_ > RecordWriteMode::kValueIsPointer) {
+      __ JumpIfSmi(value_, exit());
+    }
+    if (COMPRESS_POINTERS_BOOL) {
+      __ DecompressTaggedPointer(value_, value_);
+    }
+    __ CheckPageFlag(value_, scratch0_,
+                     MemoryChunk::kPointersToHereAreInterestingMask, eq,
+                     exit());
+    if (offset_ == no_reg) {
+      __ addi(scratch1_, object_, Operand(offset_immediate_));
+    } else {
+      DCHECK_EQ(0, offset_immediate_);
+      __ add(scratch1_, object_, offset_);
+    }
+    RememberedSetAction const remembered_set_action =
+        mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
+                                             : OMIT_REMEMBERED_SET;
+    SaveFPRegsMode const save_fp_mode =
+        frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
+    if (must_save_lr_) {
+      // We need to save and restore lr if the frame was elided.
+      __ mflr(scratch0_);
+      __ Push(scratch0_);
+      unwinding_info_writer_->MarkLinkRegisterOnTopOfStack(__ pc_offset());
+    }
+    if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
+      __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
+    } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
+      __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
+                             save_fp_mode, wasm::WasmCode::kRecordWrite);
+    } else {
+      __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
+                             save_fp_mode);
+    }
+    if (must_save_lr_) {
+      // We need to save and restore lr if the frame was elided.
+      __ Pop(scratch0_);
+      __ mtlr(scratch0_);
+      unwinding_info_writer_->MarkPopLinkRegisterFromTopOfStack(__ pc_offset());
+    }
+  }
+
+ private:
+  Register const object_;
+  Register const offset_;
+  int32_t const offset_immediate_;  // Valid if offset_ == no_reg.
+  Register const value_;
+  Register const scratch0_;
+  Register const scratch1_;
+  RecordWriteMode const mode_;
+  StubCallMode stub_mode_;
+  bool must_save_lr_;
+  UnwindingInfoWriter* const unwinding_info_writer_;
+  Zone* zone_;
+};
+
+Condition FlagsConditionToCondition(FlagsCondition condition, ArchOpcode op) {
+  switch (condition) {
+    case kEqual:
+      return eq;
+    case kNotEqual:
+      return ne;
+    case kSignedLessThan:
+    case kUnsignedLessThan:
+      return lt;
+    case kSignedGreaterThanOrEqual:
+    case kUnsignedGreaterThanOrEqual:
+      return ge;
+    case kSignedLessThanOrEqual:
+    case kUnsignedLessThanOrEqual:
+      return le;
+    case kSignedGreaterThan:
+    case kUnsignedGreaterThan:
+      return gt;
+    case kOverflow:
+      // Overflow checked for add/sub only.
+      switch (op) {
+#if V8_TARGET_ARCH_PPC64
+        case kPPC_Add32:
+        case kPPC_Add64:
+        case kPPC_Sub:
+#endif
+        case kPPC_AddWithOverflow32:
+        case kPPC_SubWithOverflow32:
+          return lt;
+        default:
+          break;
+      }
+      break;
+    case kNotOverflow:
+      switch (op) {
+#if V8_TARGET_ARCH_PPC64
+        case kPPC_Add32:
+        case kPPC_Add64:
+        case kPPC_Sub:
+#endif
+        case kPPC_AddWithOverflow32:
+        case kPPC_SubWithOverflow32:
+          return ge;
+        default:
+          break;
+      }
+      break;
+    default:
+      break;
+  }
+  UNREACHABLE();
+}
+
+void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, Instruction* instr,
+                                   PPCOperandConverter const& i) {
+  const MemoryAccessMode access_mode =
+      static_cast<MemoryAccessMode>(MiscField::decode(instr->opcode()));
+  if (access_mode == kMemoryAccessPoisoned) {
+    Register value = i.OutputRegister();
+    codegen->tasm()->and_(value, value, kSpeculationPoisonRegister);
+  }
+}
+
+}  // namespace
+
+#define ASSEMBLE_FLOAT_UNOP_RC(asm_instr, round)                     \
+  do {                                                               \
+    __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
+                 i.OutputRCBit());                                   \
+    if (round) {                                                     \
+      __ frsp(i.OutputDoubleRegister(), i.OutputDoubleRegister());   \
+    }                                                                \
+  } while (0)
+
+#define ASSEMBLE_FLOAT_BINOP_RC(asm_instr, round)                    \
+  do {                                                               \
+    __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
+                 i.InputDoubleRegister(1), i.OutputRCBit());         \
+    if (round) {                                                     \
+      __ frsp(i.OutputDoubleRegister(), i.OutputDoubleRegister());   \
+    }                                                                \
+  } while (0)
+
+#define ASSEMBLE_BINOP(asm_instr_reg, asm_instr_imm)           \
+  do {                                                         \
+    if (HasRegisterInput(instr, 1)) {                          \
+      __ asm_instr_reg(i.OutputRegister(), i.InputRegister(0), \
+                       i.InputRegister(1));                    \
+    } else {                                                   \
+      __ asm_instr_imm(i.OutputRegister(), i.InputRegister(0), \
+                       i.InputImmediate(1));                   \
+    }                                                          \
+  } while (0)
+
+#define ASSEMBLE_BINOP_RC(asm_instr_reg, asm_instr_imm)        \
+  do {                                                         \
+    if (HasRegisterInput(instr, 1)) {                          \
+      __ asm_instr_reg(i.OutputRegister(), i.InputRegister(0), \
+                       i.InputRegister(1), i.OutputRCBit());   \
+    } else {                                                   \
+      __ asm_instr_imm(i.OutputRegister(), i.InputRegister(0), \
+                       i.InputImmediate(1), i.OutputRCBit());  \
+    }                                                          \
+  } while (0)
+
+#define ASSEMBLE_BINOP_INT_RC(asm_instr_reg, asm_instr_imm)    \
+  do {                                                         \
+    if (HasRegisterInput(instr, 1)) {                          \
+      __ asm_instr_reg(i.OutputRegister(), i.InputRegister(0), \
+                       i.InputRegister(1), i.OutputRCBit());   \
+    } else {                                                   \
+      __ asm_instr_imm(i.OutputRegister(), i.InputRegister(0), \
+                       i.InputInt32(1), i.OutputRCBit());      \
+    }                                                          \
+  } while (0)
+
+#define ASSEMBLE_ADD_WITH_OVERFLOW()                                    \
+  do {                                                                  \
+    if (HasRegisterInput(instr, 1)) {                                   \
+      __ AddAndCheckForOverflow(i.OutputRegister(), i.InputRegister(0), \
+                                i.InputRegister(1), kScratchReg, r0);   \
+    } else {                                                            \
+      __ AddAndCheckForOverflow(i.OutputRegister(), i.InputRegister(0), \
+                                i.InputInt32(1), kScratchReg, r0);      \
+    }                                                                   \
+  } while (0)
+
+#define ASSEMBLE_SUB_WITH_OVERFLOW()                                    \
+  do {                                                                  \
+    if (HasRegisterInput(instr, 1)) {                                   \
+      __ SubAndCheckForOverflow(i.OutputRegister(), i.InputRegister(0), \
+                                i.InputRegister(1), kScratchReg, r0);   \
+    } else {                                                            \
+      __ AddAndCheckForOverflow(i.OutputRegister(), i.InputRegister(0), \
+                                -i.InputInt32(1), kScratchReg, r0);     \
+    }                                                                   \
+  } while (0)
+
+#if V8_TARGET_ARCH_PPC64
+#define ASSEMBLE_ADD_WITH_OVERFLOW32()         \
+  do {                                         \
+    ASSEMBLE_ADD_WITH_OVERFLOW();              \
+    __ extsw(kScratchReg, kScratchReg, SetRC); \
+  } while (0)
+
+#define ASSEMBLE_SUB_WITH_OVERFLOW32()         \
+  do {                                         \
+    ASSEMBLE_SUB_WITH_OVERFLOW();              \
+    __ extsw(kScratchReg, kScratchReg, SetRC); \
+  } while (0)
+#else
+#define ASSEMBLE_ADD_WITH_OVERFLOW32 ASSEMBLE_ADD_WITH_OVERFLOW
+#define ASSEMBLE_SUB_WITH_OVERFLOW32 ASSEMBLE_SUB_WITH_OVERFLOW
+#endif
+
+#define ASSEMBLE_COMPARE(cmp_instr, cmpl_instr)                        \
+  do {                                                                 \
+    const CRegister cr = cr0;                                          \
+    if (HasRegisterInput(instr, 1)) {                                  \
+      if (i.CompareLogical()) {                                        \
+        __ cmpl_instr(i.InputRegister(0), i.InputRegister(1), cr);     \
+      } else {                                                         \
+        __ cmp_instr(i.InputRegister(0), i.InputRegister(1), cr);      \
+      }                                                                \
+    } else {                                                           \
+      if (i.CompareLogical()) {                                        \
+        __ cmpl_instr##i(i.InputRegister(0), i.InputImmediate(1), cr); \
+      } else {                                                         \
+        __ cmp_instr##i(i.InputRegister(0), i.InputImmediate(1), cr);  \
+      }                                                                \
+    }                                                                  \
+    DCHECK_EQ(SetRC, i.OutputRCBit());                                 \
+  } while (0)
+
+#define ASSEMBLE_FLOAT_COMPARE(cmp_instr)                                 \
+  do {                                                                    \
+    const CRegister cr = cr0;                                             \
+    __ cmp_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1), cr); \
+    DCHECK_EQ(SetRC, i.OutputRCBit());                                    \
+  } while (0)
+
+#define ASSEMBLE_MODULO(div_instr, mul_instr)                        \
+  do {                                                               \
+    const Register scratch = kScratchReg;                            \
+    __ div_instr(scratch, i.InputRegister(0), i.InputRegister(1));   \
+    __ mul_instr(scratch, scratch, i.InputRegister(1));              \
+    __ sub(i.OutputRegister(), i.InputRegister(0), scratch, LeaveOE, \
+           i.OutputRCBit());                                         \
+  } while (0)
+
+#define ASSEMBLE_FLOAT_MODULO()                                             \
+  do {                                                                      \
+    FrameScope scope(tasm(), StackFrame::MANUAL);                           \
+    __ PrepareCallCFunction(0, 2, kScratchReg);                             \
+    __ MovToFloatParameters(i.InputDoubleRegister(0),                       \
+                            i.InputDoubleRegister(1));                      \
+    __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2); \
+    __ MovFromFloatResult(i.OutputDoubleRegister());                        \
+    DCHECK_EQ(LeaveRC, i.OutputRCBit());                                    \
+  } while (0)
+
+#define ASSEMBLE_IEEE754_UNOP(name)                                            \
+  do {                                                                         \
+    /* TODO(bmeurer): We should really get rid of this special instruction, */ \
+    /* and generate a CallAddress instruction instead. */                      \
+    FrameScope scope(tasm(), StackFrame::MANUAL);                              \
+    __ PrepareCallCFunction(0, 1, kScratchReg);                                \
+    __ MovToFloatParameter(i.InputDoubleRegister(0));                          \
+    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1);    \
+    /* Move the result in the double result register. */                       \
+    __ MovFromFloatResult(i.OutputDoubleRegister());                           \
+    DCHECK_EQ(LeaveRC, i.OutputRCBit());                                       \
+  } while (0)
+
+#define ASSEMBLE_IEEE754_BINOP(name)                                           \
+  do {                                                                         \
+    /* TODO(bmeurer): We should really get rid of this special instruction, */ \
+    /* and generate a CallAddress instruction instead. */                      \
+    FrameScope scope(tasm(), StackFrame::MANUAL);                              \
+    __ PrepareCallCFunction(0, 2, kScratchReg);                                \
+    __ MovToFloatParameters(i.InputDoubleRegister(0),                          \
+                            i.InputDoubleRegister(1));                         \
+    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2);    \
+    /* Move the result in the double result register. */                       \
+    __ MovFromFloatResult(i.OutputDoubleRegister());                           \
+    DCHECK_EQ(LeaveRC, i.OutputRCBit());                                       \
+  } while (0)
+
+#define ASSEMBLE_FLOAT_MAX()                                            \
+  do {                                                                  \
+    DoubleRegister left_reg = i.InputDoubleRegister(0);                 \
+    DoubleRegister right_reg = i.InputDoubleRegister(1);                \
+    DoubleRegister result_reg = i.OutputDoubleRegister();               \
+    Label check_zero, return_left, return_right, return_nan, done;      \
+    __ fcmpu(left_reg, right_reg);                                      \
+    __ bunordered(&return_nan);                                         \
+    __ beq(&check_zero);                                                \
+    __ bge(&return_left);                                               \
+    __ b(&return_right);                                                \
+                                                                        \
+    __ bind(&check_zero);                                               \
+    __ fcmpu(left_reg, kDoubleRegZero);                                 \
+    /* left == right != 0. */                                           \
+    __ bne(&return_left);                                               \
+    /* At this point, both left and right are either 0 or -0. */        \
+    __ fadd(result_reg, left_reg, right_reg);                           \
+    __ b(&done);                                                        \
+                                                                        \
+    __ bind(&return_nan);                                               \
+    /* If left or right are NaN, fadd propagates the appropriate one.*/ \
+    __ fadd(result_reg, left_reg, right_reg);                           \
+    __ b(&done);                                                        \
+                                                                        \
+    __ bind(&return_right);                                             \
+    if (right_reg != result_reg) {                                      \
+      __ fmr(result_reg, right_reg);                                    \
+    }                                                                   \
+    __ b(&done);                                                        \
+                                                                        \
+    __ bind(&return_left);                                              \
+    if (left_reg != result_reg) {                                       \
+      __ fmr(result_reg, left_reg);                                     \
+    }                                                                   \
+    __ bind(&done);                                                     \
+  } while (0)
+
+#define ASSEMBLE_FLOAT_MIN()                                              \
+  do {                                                                    \
+    DoubleRegister left_reg = i.InputDoubleRegister(0);                   \
+    DoubleRegister right_reg = i.InputDoubleRegister(1);                  \
+    DoubleRegister result_reg = i.OutputDoubleRegister();                 \
+    Label check_zero, return_left, return_right, return_nan, done;        \
+    __ fcmpu(left_reg, right_reg);                                        \
+    __ bunordered(&return_nan);                                           \
+    __ beq(&check_zero);                                                  \
+    __ ble(&return_left);                                                 \
+    __ b(&return_right);                                                  \
+                                                                          \
+    __ bind(&check_zero);                                                 \
+    __ fcmpu(left_reg, kDoubleRegZero);                                   \
+    /* left == right != 0. */                                             \
+    __ bne(&return_left);                                                 \
+    /* At this point, both left and right are either 0 or -0. */          \
+    /* Min: The algorithm is: -((-L) + (-R)), which in case of L and R */ \
+    /* being different registers is most efficiently expressed */         \
+    /* as -((-L) - R). */                                                 \
+    __ fneg(kScratchDoubleReg, left_reg);                                 \
+    if (kScratchDoubleReg == right_reg) {                                 \
+      __ fadd(result_reg, kScratchDoubleReg, right_reg);                  \
+    } else {                                                              \
+      __ fsub(result_reg, kScratchDoubleReg, right_reg);                  \
+    }                                                                     \
+    __ fneg(result_reg, result_reg);                                      \
+    __ b(&done);                                                          \
+                                                                          \
+    __ bind(&return_nan);                                                 \
+    /* If left or right are NaN, fadd propagates the appropriate one.*/   \
+    __ fadd(result_reg, left_reg, right_reg);                             \
+    __ b(&done);                                                          \
+                                                                          \
+    __ bind(&return_right);                                               \
+    if (right_reg != result_reg) {                                        \
+      __ fmr(result_reg, right_reg);                                      \
+    }                                                                     \
+    __ b(&done);                                                          \
+                                                                          \
+    __ bind(&return_left);                                                \
+    if (left_reg != result_reg) {                                         \
+      __ fmr(result_reg, left_reg);                                       \
+    }                                                                     \
+    __ bind(&done);                                                       \
+  } while (0)
+
+#define ASSEMBLE_LOAD_FLOAT(asm_instr, asm_instrx)    \
+  do {                                                \
+    DoubleRegister result = i.OutputDoubleRegister(); \
+    AddressingMode mode = kMode_None;                 \
+    MemOperand operand = i.MemoryOperand(&mode);      \
+    bool is_atomic = i.InputInt32(2);                 \
+    if (mode == kMode_MRI) {                          \
+      __ asm_instr(result, operand);                  \
+    } else {                                          \
+      __ asm_instrx(result, operand);                 \
+    }                                                 \
+    if (is_atomic) __ lwsync();                       \
+    DCHECK_EQ(LeaveRC, i.OutputRCBit());              \
+  } while (0)
+
+#define ASSEMBLE_LOAD_INTEGER(asm_instr, asm_instrx) \
+  do {                                               \
+    Register result = i.OutputRegister();            \
+    AddressingMode mode = kMode_None;                \
+    MemOperand operand = i.MemoryOperand(&mode);     \
+    bool is_atomic = i.InputInt32(2);                \
+    if (mode == kMode_MRI) {                         \
+      __ asm_instr(result, operand);                 \
+    } else {                                         \
+      __ asm_instrx(result, operand);                \
+    }                                                \
+    if (is_atomic) __ lwsync();                      \
+    DCHECK_EQ(LeaveRC, i.OutputRCBit());             \
+  } while (0)
+
+#define ASSEMBLE_STORE_FLOAT(asm_instr, asm_instrx)      \
+  do {                                                   \
+    size_t index = 0;                                    \
+    AddressingMode mode = kMode_None;                    \
+    MemOperand operand = i.MemoryOperand(&mode, &index); \
+    DoubleRegister value = i.InputDoubleRegister(index); \
+    bool is_atomic = i.InputInt32(3);                    \
+    if (is_atomic) __ lwsync();                          \
+    /* removed frsp as instruction-selector checked */   \
+    /* value to be kFloat32 */                           \
+    if (mode == kMode_MRI) {                             \
+      __ asm_instr(value, operand);                      \
+    } else {                                             \
+      __ asm_instrx(value, operand);                     \
+    }                                                    \
+    if (is_atomic) __ sync();                            \
+    DCHECK_EQ(LeaveRC, i.OutputRCBit());                 \
+  } while (0)
+
+#define ASSEMBLE_STORE_INTEGER(asm_instr, asm_instrx)    \
+  do {                                                   \
+    size_t index = 0;                                    \
+    AddressingMode mode = kMode_None;                    \
+    MemOperand operand = i.MemoryOperand(&mode, &index); \
+    Register value = i.InputRegister(index);             \
+    bool is_atomic = i.InputInt32(3);                    \
+    if (is_atomic) __ lwsync();                          \
+    if (mode == kMode_MRI) {                             \
+      __ asm_instr(value, operand);                      \
+    } else {                                             \
+      __ asm_instrx(value, operand);                     \
+    }                                                    \
+    if (is_atomic) __ sync();                            \
+    DCHECK_EQ(LeaveRC, i.OutputRCBit());                 \
+  } while (0)
+
+#if V8_TARGET_ARCH_PPC64
+// TODO(mbrandy): fix paths that produce garbage in offset's upper 32-bits.
+#define CleanUInt32(x) __ ClearLeftImm(x, x, Operand(32))
+#else
+#define CleanUInt32(x)
+#endif
+
+#define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_instr, store_instr)       \
+  do {                                                                  \
+    Label exchange;                                                     \
+    __ lwsync();                                                        \
+    __ bind(&exchange);                                                 \
+    __ load_instr(i.OutputRegister(0),                                  \
+                  MemOperand(i.InputRegister(0), i.InputRegister(1)));  \
+    __ store_instr(i.InputRegister(2),                                  \
+                   MemOperand(i.InputRegister(0), i.InputRegister(1))); \
+    __ bne(&exchange, cr0);                                             \
+    __ sync();                                                          \
+  } while (0)
+
+#define ASSEMBLE_ATOMIC_BINOP(bin_inst, load_inst, store_inst)               \
+  do {                                                                       \
+    MemOperand operand = MemOperand(i.InputRegister(0), i.InputRegister(1)); \
+    Label binop;                                                             \
+    __ lwsync();                                                             \
+    __ bind(&binop);                                                         \
+    __ load_inst(i.OutputRegister(), operand);                               \
+    __ bin_inst(kScratchReg, i.OutputRegister(), i.InputRegister(2));        \
+    __ store_inst(kScratchReg, operand);                                     \
+    __ bne(&binop, cr0);                                                     \
+    __ sync();                                                               \
+  } while (false)
+
+#define ASSEMBLE_ATOMIC_BINOP_SIGN_EXT(bin_inst, load_inst, store_inst,      \
+                                       ext_instr)                            \
+  do {                                                                       \
+    MemOperand operand = MemOperand(i.InputRegister(0), i.InputRegister(1)); \
+    Label binop;                                                             \
+    __ lwsync();                                                             \
+    __ bind(&binop);                                                         \
+    __ load_inst(i.OutputRegister(), operand);                               \
+    __ ext_instr(i.OutputRegister(), i.OutputRegister());                    \
+    __ bin_inst(kScratchReg, i.OutputRegister(), i.InputRegister(2));        \
+    __ store_inst(kScratchReg, operand);                                     \
+    __ bne(&binop, cr0);                                                     \
+    __ sync();                                                               \
+  } while (false)
+
+#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE(cmp_inst, load_inst, store_inst,    \
+                                         input_ext)                          \
+  do {                                                                       \
+    MemOperand operand = MemOperand(i.InputRegister(0), i.InputRegister(1)); \
+    Label loop;                                                              \
+    Label exit;                                                              \
+    __ input_ext(r0, i.InputRegister(2));                                    \
+    __ lwsync();                                                             \
+    __ bind(&loop);                                                          \
+    __ load_inst(i.OutputRegister(), operand);                               \
+    __ cmp_inst(i.OutputRegister(), r0, cr0);                                \
+    __ bne(&exit, cr0);                                                      \
+    __ store_inst(i.InputRegister(3), operand);                              \
+    __ bne(&loop, cr0);                                                      \
+    __ bind(&exit);                                                          \
+    __ sync();                                                               \
+  } while (false)
+
+#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_SIGN_EXT(cmp_inst, load_inst,       \
+                                                  store_inst, ext_instr)     \
+  do {                                                                       \
+    MemOperand operand = MemOperand(i.InputRegister(0), i.InputRegister(1)); \
+    Label loop;                                                              \
+    Label exit;                                                              \
+    __ ext_instr(r0, i.InputRegister(2));                                    \
+    __ lwsync();                                                             \
+    __ bind(&loop);                                                          \
+    __ load_inst(i.OutputRegister(), operand);                               \
+    __ ext_instr(i.OutputRegister(), i.OutputRegister());                    \
+    __ cmp_inst(i.OutputRegister(), r0, cr0);                                \
+    __ bne(&exit, cr0);                                                      \
+    __ store_inst(i.InputRegister(3), operand);                              \
+    __ bne(&loop, cr0);                                                      \
+    __ bind(&exit);                                                          \
+    __ sync();                                                               \
+  } while (false)
+
+void CodeGenerator::AssembleDeconstructFrame() {
+  __ LeaveFrame(StackFrame::MANUAL);
+  unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
+}
+
+void CodeGenerator::AssemblePrepareTailCall() {
+  if (frame_access_state()->has_frame()) {
+    __ RestoreFrameStateForTailCall();
+  }
+  frame_access_state()->SetFrameAccessToSP();
+}
+
+void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
+                                                     Register scratch1,
+                                                     Register scratch2,
+                                                     Register scratch3) {
+  DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
+  Label done;
+
+  // Check if current frame is an arguments adaptor frame.
+  __ LoadP(scratch1, MemOperand(fp, StandardFrameConstants::kContextOffset));
+  __ cmpi(scratch1,
+          Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
+  __ bne(&done);
+
+  // Load arguments count from current arguments adaptor frame (note, it
+  // does not include receiver).
+  Register caller_args_count_reg = scratch1;
+  __ LoadP(caller_args_count_reg,
+           MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset));
+  __ SmiUntag(caller_args_count_reg);
+
+  __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3);
+  __ bind(&done);
+}
+
+namespace {
+
+void FlushPendingPushRegisters(TurboAssembler* tasm,
+                               FrameAccessState* frame_access_state,
+                               ZoneVector<Register>* pending_pushes) {
+  switch (pending_pushes->size()) {
+    case 0:
+      break;
+    case 1:
+      tasm->Push((*pending_pushes)[0]);
+      break;
+    case 2:
+      tasm->Push((*pending_pushes)[0], (*pending_pushes)[1]);
+      break;
+    case 3:
+      tasm->Push((*pending_pushes)[0], (*pending_pushes)[1],
+                 (*pending_pushes)[2]);
+      break;
+    default:
+      UNREACHABLE();
+  }
+  frame_access_state->IncreaseSPDelta(pending_pushes->size());
+  pending_pushes->clear();
+}
+
+void AdjustStackPointerForTailCall(
+    TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp,
+    ZoneVector<Register>* pending_pushes = nullptr,
+    bool allow_shrinkage = true) {
+  int current_sp_offset = state->GetSPToFPSlotCount() +
+                          StandardFrameConstants::kFixedSlotCountAboveFp;
+  int stack_slot_delta = new_slot_above_sp - current_sp_offset;
+  if (stack_slot_delta > 0) {
+    if (pending_pushes != nullptr) {
+      FlushPendingPushRegisters(tasm, state, pending_pushes);
+    }
+    tasm->Add(sp, sp, -stack_slot_delta * kSystemPointerSize, r0);
+    state->IncreaseSPDelta(stack_slot_delta);
+  } else if (allow_shrinkage && stack_slot_delta < 0) {
+    if (pending_pushes != nullptr) {
+      FlushPendingPushRegisters(tasm, state, pending_pushes);
+    }
+    tasm->Add(sp, sp, -stack_slot_delta * kSystemPointerSize, r0);
+    state->IncreaseSPDelta(stack_slot_delta);
+  }
+}
+
+}  // namespace
+
+void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
+                                              int first_unused_stack_slot) {
+  ZoneVector<MoveOperands*> pushes(zone());
+  GetPushCompatibleMoves(instr, kRegisterPush, &pushes);
+
+  if (!pushes.empty() &&
+      (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
+       first_unused_stack_slot)) {
+    PPCOperandConverter g(this, instr);
+    ZoneVector<Register> pending_pushes(zone());
+    for (auto move : pushes) {
+      LocationOperand destination_location(
+          LocationOperand::cast(move->destination()));
+      InstructionOperand source(move->source());
+      AdjustStackPointerForTailCall(
+          tasm(), frame_access_state(),
+          destination_location.index() - pending_pushes.size(),
+          &pending_pushes);
+      // Pushes of non-register data types are not supported.
+      DCHECK(source.IsRegister());
+      LocationOperand source_location(LocationOperand::cast(source));
+      pending_pushes.push_back(source_location.GetRegister());
+      // TODO(arm): We can push more than 3 registers at once. Add support in
+      // the macro-assembler for pushing a list of registers.
+      if (pending_pushes.size() == 3) {
+        FlushPendingPushRegisters(tasm(), frame_access_state(),
+                                  &pending_pushes);
+      }
+      move->Eliminate();
+    }
+    FlushPendingPushRegisters(tasm(), frame_access_state(), &pending_pushes);
+  }
+  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+                                first_unused_stack_slot, nullptr, false);
+}
+
+void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
+                                             int first_unused_stack_slot) {
+  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+                                first_unused_stack_slot);
+}
+
+// Check that {kJavaScriptCallCodeStartRegister} is correct.
+void CodeGenerator::AssembleCodeStartRegisterCheck() {
+  Register scratch = kScratchReg;
+  __ ComputeCodeStartAddress(scratch);
+  __ cmp(scratch, kJavaScriptCallCodeStartRegister);
+  __ Assert(eq, AbortReason::kWrongFunctionCodeStart);
+}
+
+// Check if the code object is marked for deoptimization. If it is, then it
+// jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
+// to:
+//    1. read from memory the word that contains that bit, which can be found in
+//       the flags in the referenced {CodeDataContainer} object;
+//    2. test kMarkedForDeoptimizationBit in those flags; and
+//    3. if it is not zero then it jumps to the builtin.
+void CodeGenerator::BailoutIfDeoptimized() {
+  if (FLAG_debug_code) {
+    // Check that {kJavaScriptCallCodeStartRegister} is correct.
+    __ ComputeCodeStartAddress(ip);
+    __ cmp(ip, kJavaScriptCallCodeStartRegister);
+    __ Assert(eq, AbortReason::kWrongFunctionCodeStart);
+  }
+
+  int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
+  __ LoadTaggedPointerField(
+      r11, MemOperand(kJavaScriptCallCodeStartRegister, offset));
+  __ LoadWordArith(
+      r11, FieldMemOperand(r11, CodeDataContainer::kKindSpecificFlagsOffset));
+  __ TestBit(r11, Code::kMarkedForDeoptimizationBit);
+  __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
+          RelocInfo::CODE_TARGET, ne, cr0);
+}
+
+void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
+  Register scratch = kScratchReg;
+
+  __ ComputeCodeStartAddress(scratch);
+
+  // Calculate a mask which has all bits set in the normal case, but has all
+  // bits cleared if we are speculatively executing the wrong PC.
+  __ cmp(kJavaScriptCallCodeStartRegister, scratch);
+  __ li(scratch, Operand::Zero());
+  __ notx(kSpeculationPoisonRegister, scratch);
+  __ isel(eq, kSpeculationPoisonRegister, kSpeculationPoisonRegister, scratch);
+}
+
+void CodeGenerator::AssembleRegisterArgumentPoisoning() {
+  __ and_(kJSFunctionRegister, kJSFunctionRegister, kSpeculationPoisonRegister);
+  __ and_(kContextRegister, kContextRegister, kSpeculationPoisonRegister);
+  __ and_(sp, sp, kSpeculationPoisonRegister);
+}
+
+// Assembles an instruction after register allocation, producing machine code.
+CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
+    Instruction* instr) {
+  PPCOperandConverter i(this, instr);
+  ArchOpcode opcode = ArchOpcodeField::decode(instr->opcode());
+
+  switch (opcode) {
+    case kArchCallCodeObject: {
+      v8::internal::Assembler::BlockTrampolinePoolScope block_trampoline_pool(
+          tasm());
+      if (HasRegisterInput(instr, 0)) {
+        Register reg = i.InputRegister(0);
+        DCHECK_IMPLIES(
+            instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+            reg == kJavaScriptCallCodeStartRegister);
+        __ CallCodeObject(reg);
+      } else {
+        __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
+      }
+      RecordCallPosition(instr);
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchCallBuiltinPointer: {
+      DCHECK(!instr->InputAt(0)->IsImmediate());
+      Register builtin_index = i.InputRegister(0);
+      __ CallBuiltinByIndex(builtin_index);
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchCallWasmFunction: {
+      // We must not share code targets for calls to builtins for wasm code, as
+      // they might need to be patched individually.
+      if (instr->InputAt(0)->IsImmediate()) {
+        Constant constant = i.ToConstant(instr->InputAt(0));
+#ifdef V8_TARGET_ARCH_PPC64
+        Address wasm_code = static_cast<Address>(constant.ToInt64());
+#else
+        Address wasm_code = static_cast<Address>(constant.ToInt32());
+#endif
+        __ Call(wasm_code, constant.rmode());
+      } else {
+        __ Call(i.InputRegister(0));
+      }
+      RecordCallPosition(instr);
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchTailCallCodeObjectFromJSFunction:
+    case kArchTailCallCodeObject: {
+      if (opcode == kArchTailCallCodeObjectFromJSFunction) {
+        AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
+                                         i.TempRegister(0), i.TempRegister(1),
+                                         i.TempRegister(2));
+      }
+      if (HasRegisterInput(instr, 0)) {
+        Register reg = i.InputRegister(0);
+        DCHECK_IMPLIES(
+            instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+            reg == kJavaScriptCallCodeStartRegister);
+        __ JumpCodeObject(reg);
+      } else {
+        // We cannot use the constant pool to load the target since
+        // we've already restored the caller's frame.
+        ConstantPoolUnavailableScope constant_pool_unavailable(tasm());
+        __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
+      }
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchTailCallWasm: {
+      // We must not share code targets for calls to builtins for wasm code, as
+      // they might need to be patched individually.
+      if (instr->InputAt(0)->IsImmediate()) {
+        Constant constant = i.ToConstant(instr->InputAt(0));
+#ifdef V8_TARGET_ARCH_PPC64
+        Address wasm_code = static_cast<Address>(constant.ToInt64());
+#else
+        Address wasm_code = static_cast<Address>(constant.ToInt32());
+#endif
+        __ Jump(wasm_code, constant.rmode());
+      } else {
+        __ Jump(i.InputRegister(0));
+      }
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchTailCallAddress: {
+      CHECK(!instr->InputAt(0)->IsImmediate());
+      Register reg = i.InputRegister(0);
+      DCHECK_IMPLIES(
+          instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+          reg == kJavaScriptCallCodeStartRegister);
+      __ Jump(reg);
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchCallJSFunction: {
+      v8::internal::Assembler::BlockTrampolinePoolScope block_trampoline_pool(
+          tasm());
+      Register func = i.InputRegister(0);
+      if (FLAG_debug_code) {
+        // Check the function's context matches the context argument.
+        __ LoadTaggedPointerField(
+            kScratchReg, FieldMemOperand(func, JSFunction::kContextOffset));
+        __ cmp(cp, kScratchReg);
+        __ Assert(eq, AbortReason::kWrongFunctionContext);
+      }
+      static_assert(kJavaScriptCallCodeStartRegister == r5, "ABI mismatch");
+      __ LoadTaggedPointerField(r5,
+                                FieldMemOperand(func, JSFunction::kCodeOffset));
+      __ CallCodeObject(r5);
+      RecordCallPosition(instr);
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchPrepareCallCFunction: {
+      int const num_parameters = MiscField::decode(instr->opcode());
+      __ PrepareCallCFunction(num_parameters, kScratchReg);
+      // Frame alignment requires using FP-relative frame addressing.
+      frame_access_state()->SetFrameAccessToFP();
+      break;
+    }
+    case kArchSaveCallerRegisters: {
+      fp_mode_ =
+          static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
+      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+      // kReturnRegister0 should have been saved before entering the stub.
+      int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
+      DCHECK(IsAligned(bytes, kSystemPointerSize));
+      DCHECK_EQ(0, frame_access_state()->sp_delta());
+      frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+      DCHECK(!caller_registers_saved_);
+      caller_registers_saved_ = true;
+      break;
+    }
+    case kArchRestoreCallerRegisters: {
+      DCHECK(fp_mode_ ==
+             static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
+      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+      // Don't overwrite the returned value.
+      int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
+      frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
+      DCHECK_EQ(0, frame_access_state()->sp_delta());
+      DCHECK(caller_registers_saved_);
+      caller_registers_saved_ = false;
+      break;
+    }
+    case kArchPrepareTailCall:
+      AssemblePrepareTailCall();
+      break;
+    case kArchComment:
+#ifdef V8_TARGET_ARCH_PPC64
+      __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
+#else
+      __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
+#endif
+      break;
+    case kArchCallCFunction: {
+      int misc_field = MiscField::decode(instr->opcode());
+      int num_parameters = misc_field;
+      bool has_function_descriptor = false;
+      Label start_call;
+      bool isWasmCapiFunction =
+          linkage()->GetIncomingDescriptor()->IsWasmCapiFunction();
+      int offset = (FLAG_enable_embedded_constant_pool ? 20 : 23) * kInstrSize;
+
+#if ABI_USES_FUNCTION_DESCRIPTORS
+      // AIX/PPC64BE Linux uses a function descriptor
+      int kNumParametersMask = kHasFunctionDescriptorBitMask - 1;
+      num_parameters = kNumParametersMask & misc_field;
+      has_function_descriptor =
+          (misc_field & kHasFunctionDescriptorBitMask) != 0;
+      // AIX may emit 2 extra Load instructions under CallCFunctionHelper
+      // due to having function descriptor.
+      if (has_function_descriptor) {
+        offset += 2 * kInstrSize;
+      }
+#endif
+      if (isWasmCapiFunction) {
+        __ mflr(r0);
+        __ bind(&start_call);
+        __ LoadPC(kScratchReg);
+        __ addi(kScratchReg, kScratchReg, Operand(offset));
+        __ StoreP(kScratchReg,
+                  MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset));
+        __ mtlr(r0);
+      }
+      if (instr->InputAt(0)->IsImmediate()) {
+        ExternalReference ref = i.InputExternalReference(0);
+        __ CallCFunction(ref, num_parameters, has_function_descriptor);
+      } else {
+        Register func = i.InputRegister(0);
+        __ CallCFunction(func, num_parameters, has_function_descriptor);
+      }
+      // TODO(miladfar): In the above block, kScratchReg must be populated with
+      // the strictly-correct PC, which is the return address at this spot. The
+      // offset is set to 36 (9 * kInstrSize) on pLinux and 44 on AIX, which is
+      // counted from where we are binding to the label and ends at this spot.
+      // If failed, replace it with the correct offset suggested. More info on
+      // f5ab7d3.
+      if (isWasmCapiFunction) {
+        CHECK_EQ(offset, __ SizeOfCodeGeneratedSince(&start_call));
+        RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
+      }
+      frame_access_state()->SetFrameAccessToDefault();
+      // Ideally, we should decrement SP delta to match the change of stack
+      // pointer in CallCFunction. However, for certain architectures (e.g.
+      // ARM), there may be more strict alignment requirement, causing old SP
+      // to be saved on the stack. In those cases, we can not calculate the SP
+      // delta statically.
+      frame_access_state()->ClearSPDelta();
+      if (caller_registers_saved_) {
+        // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
+        // Here, we assume the sequence to be:
+        //   kArchSaveCallerRegisters;
+        //   kArchCallCFunction;
+        //   kArchRestoreCallerRegisters;
+        int bytes =
+            __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
+        frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+      }
+      break;
+    }
+    case kArchJmp:
+      AssembleArchJump(i.InputRpo(0));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kArchBinarySearchSwitch:
+      AssembleArchBinarySearchSwitch(instr);
+      break;
+    case kArchTableSwitch:
+      AssembleArchTableSwitch(instr);
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kArchAbortCSAAssert:
+      DCHECK(i.InputRegister(0) == r4);
+      {
+        // We don't actually want to generate a pile of code for this, so just
+        // claim there is a stack frame, without generating one.
+        FrameScope scope(tasm(), StackFrame::NONE);
+        __ Call(
+            isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
+            RelocInfo::CODE_TARGET);
+      }
+      __ stop();
+      break;
+    case kArchDebugBreak:
+      __ DebugBreak();
+      break;
+    case kArchNop:
+    case kArchThrowTerminator:
+      // don't emit code for nops.
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kArchDeoptimize: {
+      DeoptimizationExit* exit =
+          BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
+      __ b(exit->label());
+      break;
+    }
+    case kArchRet:
+      AssembleReturn(instr->InputAt(0));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kArchFramePointer:
+      __ mr(i.OutputRegister(), fp);
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kArchParentFramePointer:
+      if (frame_access_state()->has_frame()) {
+        __ LoadP(i.OutputRegister(), MemOperand(fp, 0));
+      } else {
+        __ mr(i.OutputRegister(), fp);
+      }
+      break;
+    case kArchStackPointerGreaterThan: {
+      // Potentially apply an offset to the current stack pointer before the
+      // comparison to consider the size difference of an optimized frame versus
+      // the contained unoptimized frames.
+
+      Register lhs_register = sp;
+      uint32_t offset;
+
+      if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
+        lhs_register = i.TempRegister(0);
+        if (is_int16(offset)) {
+          __ subi(lhs_register, sp, Operand(offset));
+        } else {
+          __ mov(kScratchReg, Operand(offset));
+          __ sub(lhs_register, sp, kScratchReg);
+        }
+      }
+
+      constexpr size_t kValueIndex = 0;
+      DCHECK(instr->InputAt(kValueIndex)->IsRegister());
+      __ cmpl(lhs_register, i.InputRegister(kValueIndex), cr0);
+      break;
+    }
+    case kArchStackCheckOffset:
+      __ LoadSmiLiteral(i.OutputRegister(),
+                        Smi::FromInt(GetStackCheckOffset()));
+      break;
+    case kArchTruncateDoubleToI:
+      __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
+                           i.InputDoubleRegister(0), DetermineStubCallMode());
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kArchStoreWithWriteBarrier: {
+      RecordWriteMode mode =
+          static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
+      Register object = i.InputRegister(0);
+      Register value = i.InputRegister(2);
+      Register scratch0 = i.TempRegister(0);
+      Register scratch1 = i.TempRegister(1);
+      OutOfLineRecordWrite* ool;
+
+      AddressingMode addressing_mode =
+          AddressingModeField::decode(instr->opcode());
+      if (addressing_mode == kMode_MRI) {
+        int32_t offset = i.InputInt32(1);
+        ool = zone()->New<OutOfLineRecordWrite>(
+            this, object, offset, value, scratch0, scratch1, mode,
+            DetermineStubCallMode(), &unwinding_info_writer_);
+        __ StoreTaggedField(value, MemOperand(object, offset), r0);
+      } else {
+        DCHECK_EQ(kMode_MRR, addressing_mode);
+        Register offset(i.InputRegister(1));
+        ool = zone()->New<OutOfLineRecordWrite>(
+            this, object, offset, value, scratch0, scratch1, mode,
+            DetermineStubCallMode(), &unwinding_info_writer_);
+        __ StoreTaggedFieldX(value, MemOperand(object, offset), r0);
+      }
+      __ CheckPageFlag(object, scratch0,
+                       MemoryChunk::kPointersFromHereAreInterestingMask, ne,
+                       ool->entry());
+      __ bind(ool->exit());
+      break;
+    }
+    case kArchStackSlot: {
+      FrameOffset offset =
+          frame_access_state()->GetFrameOffset(i.InputInt32(0));
+      __ addi(i.OutputRegister(), offset.from_stack_pointer() ? sp : fp,
+              Operand(offset.offset()));
+      break;
+    }
+    case kArchWordPoisonOnSpeculation:
+      __ and_(i.OutputRegister(), i.InputRegister(0),
+              kSpeculationPoisonRegister);
+      break;
+    case kPPC_Peek: {
+      int reverse_slot = i.InputInt32(0);
+      int offset =
+          FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
+      if (instr->OutputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          __ LoadDouble(i.OutputDoubleRegister(), MemOperand(fp, offset), r0);
+        } else if (op->representation() == MachineRepresentation::kFloat32) {
+          __ LoadFloat32(i.OutputFloatRegister(), MemOperand(fp, offset), r0);
+        } else {
+          DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
+          __ mov(ip, Operand(offset));
+          __ LoadSimd128(i.OutputSimd128Register(), MemOperand(fp, ip), r0,
+                         kScratchDoubleReg);
+        }
+      } else {
+        __ LoadP(i.OutputRegister(), MemOperand(fp, offset), r0);
+      }
+      break;
+    }
+    case kPPC_Sync: {
+      __ sync();
+      break;
+    }
+    case kPPC_And:
+      if (HasRegisterInput(instr, 1)) {
+        __ and_(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+                i.OutputRCBit());
+      } else {
+        __ andi(i.OutputRegister(), i.InputRegister(0), i.InputImmediate(1));
+      }
+      break;
+    case kPPC_AndComplement:
+      __ andc(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+              i.OutputRCBit());
+      break;
+    case kPPC_Or:
+      if (HasRegisterInput(instr, 1)) {
+        __ orx(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+               i.OutputRCBit());
+      } else {
+        __ ori(i.OutputRegister(), i.InputRegister(0), i.InputImmediate(1));
+        DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      }
+      break;
+    case kPPC_OrComplement:
+      __ orc(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+             i.OutputRCBit());
+      break;
+    case kPPC_Xor:
+      if (HasRegisterInput(instr, 1)) {
+        __ xor_(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+                i.OutputRCBit());
+      } else {
+        __ xori(i.OutputRegister(), i.InputRegister(0), i.InputImmediate(1));
+        DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      }
+      break;
+    case kPPC_ShiftLeft32:
+      ASSEMBLE_BINOP_RC(slw, slwi);
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_ShiftLeft64:
+      ASSEMBLE_BINOP_RC(sld, sldi);
+      break;
+#endif
+    case kPPC_ShiftRight32:
+      ASSEMBLE_BINOP_RC(srw, srwi);
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_ShiftRight64:
+      ASSEMBLE_BINOP_RC(srd, srdi);
+      break;
+#endif
+    case kPPC_ShiftRightAlg32:
+      ASSEMBLE_BINOP_INT_RC(sraw, srawi);
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_ShiftRightAlg64:
+      ASSEMBLE_BINOP_INT_RC(srad, sradi);
+      break;
+#endif
+#if !V8_TARGET_ARCH_PPC64
+    case kPPC_AddPair:
+      // i.InputRegister(0) ... left low word.
+      // i.InputRegister(1) ... left high word.
+      // i.InputRegister(2) ... right low word.
+      // i.InputRegister(3) ... right high word.
+      __ addc(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2));
+      __ adde(i.OutputRegister(1), i.InputRegister(1), i.InputRegister(3));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_SubPair:
+      // i.InputRegister(0) ... left low word.
+      // i.InputRegister(1) ... left high word.
+      // i.InputRegister(2) ... right low word.
+      // i.InputRegister(3) ... right high word.
+      __ subc(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2));
+      __ sube(i.OutputRegister(1), i.InputRegister(1), i.InputRegister(3));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_MulPair:
+      // i.InputRegister(0) ... left low word.
+      // i.InputRegister(1) ... left high word.
+      // i.InputRegister(2) ... right low word.
+      // i.InputRegister(3) ... right high word.
+      __ mullw(i.TempRegister(0), i.InputRegister(0), i.InputRegister(3));
+      __ mullw(i.TempRegister(1), i.InputRegister(2), i.InputRegister(1));
+      __ add(i.TempRegister(0), i.TempRegister(0), i.TempRegister(1));
+      __ mullw(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2));
+      __ mulhwu(i.OutputRegister(1), i.InputRegister(0), i.InputRegister(2));
+      __ add(i.OutputRegister(1), i.OutputRegister(1), i.TempRegister(0));
+      break;
+    case kPPC_ShiftLeftPair: {
+      Register second_output =
+          instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
+      if (instr->InputAt(2)->IsImmediate()) {
+        __ ShiftLeftPair(i.OutputRegister(0), second_output, i.InputRegister(0),
+                         i.InputRegister(1), i.InputInt32(2));
+      } else {
+        __ ShiftLeftPair(i.OutputRegister(0), second_output, i.InputRegister(0),
+                         i.InputRegister(1), kScratchReg, i.InputRegister(2));
+      }
+      break;
+    }
+    case kPPC_ShiftRightPair: {
+      Register second_output =
+          instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
+      if (instr->InputAt(2)->IsImmediate()) {
+        __ ShiftRightPair(i.OutputRegister(0), second_output,
+                          i.InputRegister(0), i.InputRegister(1),
+                          i.InputInt32(2));
+      } else {
+        __ ShiftRightPair(i.OutputRegister(0), second_output,
+                          i.InputRegister(0), i.InputRegister(1), kScratchReg,
+                          i.InputRegister(2));
+      }
+      break;
+    }
+    case kPPC_ShiftRightAlgPair: {
+      Register second_output =
+          instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
+      if (instr->InputAt(2)->IsImmediate()) {
+        __ ShiftRightAlgPair(i.OutputRegister(0), second_output,
+                             i.InputRegister(0), i.InputRegister(1),
+                             i.InputInt32(2));
+      } else {
+        __ ShiftRightAlgPair(i.OutputRegister(0), second_output,
+                             i.InputRegister(0), i.InputRegister(1),
+                             kScratchReg, i.InputRegister(2));
+      }
+      break;
+    }
+#endif
+    case kPPC_RotRight32:
+      if (HasRegisterInput(instr, 1)) {
+        __ subfic(kScratchReg, i.InputRegister(1), Operand(32));
+        __ rotlw(i.OutputRegister(), i.InputRegister(0), kScratchReg,
+                 i.OutputRCBit());
+      } else {
+        int sh = i.InputInt32(1);
+        __ rotrwi(i.OutputRegister(), i.InputRegister(0), sh, i.OutputRCBit());
+      }
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_RotRight64:
+      if (HasRegisterInput(instr, 1)) {
+        __ subfic(kScratchReg, i.InputRegister(1), Operand(64));
+        __ rotld(i.OutputRegister(), i.InputRegister(0), kScratchReg,
+                 i.OutputRCBit());
+      } else {
+        int sh = i.InputInt32(1);
+        __ rotrdi(i.OutputRegister(), i.InputRegister(0), sh, i.OutputRCBit());
+      }
+      break;
+#endif
+    case kPPC_Not:
+      __ notx(i.OutputRegister(), i.InputRegister(0), i.OutputRCBit());
+      break;
+    case kPPC_RotLeftAndMask32:
+      __ rlwinm(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1),
+                31 - i.InputInt32(2), 31 - i.InputInt32(3), i.OutputRCBit());
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_RotLeftAndClear64:
+      __ rldic(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1),
+               63 - i.InputInt32(2), i.OutputRCBit());
+      break;
+    case kPPC_RotLeftAndClearLeft64:
+      __ rldicl(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1),
+                63 - i.InputInt32(2), i.OutputRCBit());
+      break;
+    case kPPC_RotLeftAndClearRight64:
+      __ rldicr(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1),
+                63 - i.InputInt32(2), i.OutputRCBit());
+      break;
+#endif
+    case kPPC_Add32:
+#if V8_TARGET_ARCH_PPC64
+      if (FlagsModeField::decode(instr->opcode()) != kFlags_none) {
+        ASSEMBLE_ADD_WITH_OVERFLOW();
+      } else {
+#endif
+        if (HasRegisterInput(instr, 1)) {
+          __ add(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+                 LeaveOE, i.OutputRCBit());
+        } else {
+          __ addi(i.OutputRegister(), i.InputRegister(0), i.InputImmediate(1));
+          DCHECK_EQ(LeaveRC, i.OutputRCBit());
+        }
+        __ extsw(i.OutputRegister(), i.OutputRegister());
+#if V8_TARGET_ARCH_PPC64
+      }
+#endif
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_Add64:
+      if (FlagsModeField::decode(instr->opcode()) != kFlags_none) {
+        ASSEMBLE_ADD_WITH_OVERFLOW();
+      } else {
+        if (HasRegisterInput(instr, 1)) {
+          __ add(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+                 LeaveOE, i.OutputRCBit());
+        } else {
+          __ addi(i.OutputRegister(), i.InputRegister(0), i.InputImmediate(1));
+          DCHECK_EQ(LeaveRC, i.OutputRCBit());
+        }
+      }
+      break;
+#endif
+    case kPPC_AddWithOverflow32:
+      ASSEMBLE_ADD_WITH_OVERFLOW32();
+      break;
+    case kPPC_AddDouble:
+      ASSEMBLE_FLOAT_BINOP_RC(fadd, MiscField::decode(instr->opcode()));
+      break;
+    case kPPC_Sub:
+#if V8_TARGET_ARCH_PPC64
+      if (FlagsModeField::decode(instr->opcode()) != kFlags_none) {
+        ASSEMBLE_SUB_WITH_OVERFLOW();
+      } else {
+#endif
+        if (HasRegisterInput(instr, 1)) {
+          __ sub(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+                 LeaveOE, i.OutputRCBit());
+        } else {
+          if (is_int16(i.InputImmediate(1).immediate())) {
+            __ subi(i.OutputRegister(), i.InputRegister(0),
+                    i.InputImmediate(1));
+            DCHECK_EQ(LeaveRC, i.OutputRCBit());
+          } else {
+            __ mov(kScratchReg, i.InputImmediate(1));
+            __ sub(i.OutputRegister(), i.InputRegister(0), kScratchReg, LeaveOE,
+                   i.OutputRCBit());
+          }
+        }
+#if V8_TARGET_ARCH_PPC64
+      }
+#endif
+      break;
+    case kPPC_SubWithOverflow32:
+      ASSEMBLE_SUB_WITH_OVERFLOW32();
+      break;
+    case kPPC_SubDouble:
+      ASSEMBLE_FLOAT_BINOP_RC(fsub, MiscField::decode(instr->opcode()));
+      break;
+    case kPPC_Mul32:
+      __ mullw(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+               LeaveOE, i.OutputRCBit());
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_Mul64:
+      __ mulld(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
+               LeaveOE, i.OutputRCBit());
+      break;
+#endif
+
+    case kPPC_Mul32WithHigh32:
+      if (i.OutputRegister(0) == i.InputRegister(0) ||
+          i.OutputRegister(0) == i.InputRegister(1) ||
+          i.OutputRegister(1) == i.InputRegister(0) ||
+          i.OutputRegister(1) == i.InputRegister(1)) {
+        __ mullw(kScratchReg, i.InputRegister(0), i.InputRegister(1));  // low
+        __ mulhw(i.OutputRegister(1), i.InputRegister(0),
+                 i.InputRegister(1));  // high
+        __ mr(i.OutputRegister(0), kScratchReg);
+      } else {
+        __ mullw(i.OutputRegister(0), i.InputRegister(0),
+                 i.InputRegister(1));  // low
+        __ mulhw(i.OutputRegister(1), i.InputRegister(0),
+                 i.InputRegister(1));  // high
+      }
+      break;
+    case kPPC_MulHigh32:
+      __ mulhw(r0, i.InputRegister(0), i.InputRegister(1), i.OutputRCBit());
+      // High 32 bits are undefined and need to be cleared.
+      __ clrldi(i.OutputRegister(), r0, Operand(32));
+      break;
+    case kPPC_MulHighU32:
+      __ mulhwu(r0, i.InputRegister(0), i.InputRegister(1), i.OutputRCBit());
+      // High 32 bits are undefined and need to be cleared.
+      __ clrldi(i.OutputRegister(), r0, Operand(32));
+      break;
+    case kPPC_MulDouble:
+      ASSEMBLE_FLOAT_BINOP_RC(fmul, MiscField::decode(instr->opcode()));
+      break;
+    case kPPC_Div32:
+      __ divw(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_Div64:
+      __ divd(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+#endif
+    case kPPC_DivU32:
+      __ divwu(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_DivU64:
+      __ divdu(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+#endif
+    case kPPC_DivDouble:
+      ASSEMBLE_FLOAT_BINOP_RC(fdiv, MiscField::decode(instr->opcode()));
+      break;
+    case kPPC_Mod32:
+      if (CpuFeatures::IsSupported(MODULO)) {
+        __ modsw(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        ASSEMBLE_MODULO(divw, mullw);
+      }
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_Mod64:
+      if (CpuFeatures::IsSupported(MODULO)) {
+        __ modsd(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        ASSEMBLE_MODULO(divd, mulld);
+      }
+      break;
+#endif
+    case kPPC_ModU32:
+      if (CpuFeatures::IsSupported(MODULO)) {
+        __ moduw(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        ASSEMBLE_MODULO(divwu, mullw);
+      }
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_ModU64:
+      if (CpuFeatures::IsSupported(MODULO)) {
+        __ modud(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
+      } else {
+        ASSEMBLE_MODULO(divdu, mulld);
+      }
+      break;
+#endif
+    case kPPC_ModDouble:
+      // TODO(bmeurer): We should really get rid of this special instruction,
+      // and generate a CallAddress instruction instead.
+      ASSEMBLE_FLOAT_MODULO();
+      break;
+    case kIeee754Float64Acos:
+      ASSEMBLE_IEEE754_UNOP(acos);
+      break;
+    case kIeee754Float64Acosh:
+      ASSEMBLE_IEEE754_UNOP(acosh);
+      break;
+    case kIeee754Float64Asin:
+      ASSEMBLE_IEEE754_UNOP(asin);
+      break;
+    case kIeee754Float64Asinh:
+      ASSEMBLE_IEEE754_UNOP(asinh);
+      break;
+    case kIeee754Float64Atan:
+      ASSEMBLE_IEEE754_UNOP(atan);
+      break;
+    case kIeee754Float64Atan2:
+      ASSEMBLE_IEEE754_BINOP(atan2);
+      break;
+    case kIeee754Float64Atanh:
+      ASSEMBLE_IEEE754_UNOP(atanh);
+      break;
+    case kIeee754Float64Tan:
+      ASSEMBLE_IEEE754_UNOP(tan);
+      break;
+    case kIeee754Float64Tanh:
+      ASSEMBLE_IEEE754_UNOP(tanh);
+      break;
+    case kIeee754Float64Cbrt:
+      ASSEMBLE_IEEE754_UNOP(cbrt);
+      break;
+    case kIeee754Float64Sin:
+      ASSEMBLE_IEEE754_UNOP(sin);
+      break;
+    case kIeee754Float64Sinh:
+      ASSEMBLE_IEEE754_UNOP(sinh);
+      break;
+    case kIeee754Float64Cos:
+      ASSEMBLE_IEEE754_UNOP(cos);
+      break;
+    case kIeee754Float64Cosh:
+      ASSEMBLE_IEEE754_UNOP(cosh);
+      break;
+    case kIeee754Float64Exp:
+      ASSEMBLE_IEEE754_UNOP(exp);
+      break;
+    case kIeee754Float64Expm1:
+      ASSEMBLE_IEEE754_UNOP(expm1);
+      break;
+    case kIeee754Float64Log:
+      ASSEMBLE_IEEE754_UNOP(log);
+      break;
+    case kIeee754Float64Log1p:
+      ASSEMBLE_IEEE754_UNOP(log1p);
+      break;
+    case kIeee754Float64Log2:
+      ASSEMBLE_IEEE754_UNOP(log2);
+      break;
+    case kIeee754Float64Log10:
+      ASSEMBLE_IEEE754_UNOP(log10);
+      break;
+    case kIeee754Float64Pow:
+      ASSEMBLE_IEEE754_BINOP(pow);
+      break;
+    case kPPC_Neg:
+      __ neg(i.OutputRegister(), i.InputRegister(0), LeaveOE, i.OutputRCBit());
+      break;
+    case kPPC_MaxDouble:
+      ASSEMBLE_FLOAT_MAX();
+      break;
+    case kPPC_MinDouble:
+      ASSEMBLE_FLOAT_MIN();
+      break;
+    case kPPC_AbsDouble:
+      ASSEMBLE_FLOAT_UNOP_RC(fabs, 0);
+      break;
+    case kPPC_SqrtDouble:
+      ASSEMBLE_FLOAT_UNOP_RC(fsqrt, MiscField::decode(instr->opcode()));
+      break;
+    case kPPC_FloorDouble:
+      ASSEMBLE_FLOAT_UNOP_RC(frim, MiscField::decode(instr->opcode()));
+      break;
+    case kPPC_CeilDouble:
+      ASSEMBLE_FLOAT_UNOP_RC(frip, MiscField::decode(instr->opcode()));
+      break;
+    case kPPC_TruncateDouble:
+      ASSEMBLE_FLOAT_UNOP_RC(friz, MiscField::decode(instr->opcode()));
+      break;
+    case kPPC_RoundDouble:
+      ASSEMBLE_FLOAT_UNOP_RC(frin, MiscField::decode(instr->opcode()));
+      break;
+    case kPPC_NegDouble:
+      ASSEMBLE_FLOAT_UNOP_RC(fneg, 0);
+      break;
+    case kPPC_Cntlz32:
+      __ cntlzw(i.OutputRegister(), i.InputRegister(0));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_Cntlz64:
+      __ cntlzd(i.OutputRegister(), i.InputRegister(0));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+#endif
+    case kPPC_Popcnt32:
+      __ popcntw(i.OutputRegister(), i.InputRegister(0));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_Popcnt64:
+      __ popcntd(i.OutputRegister(), i.InputRegister(0));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+#endif
+    case kPPC_Cmp32:
+      ASSEMBLE_COMPARE(cmpw, cmplw);
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_Cmp64:
+      ASSEMBLE_COMPARE(cmp, cmpl);
+      break;
+#endif
+    case kPPC_CmpDouble:
+      ASSEMBLE_FLOAT_COMPARE(fcmpu);
+      break;
+    case kPPC_Tst32:
+      if (HasRegisterInput(instr, 1)) {
+        __ and_(r0, i.InputRegister(0), i.InputRegister(1), i.OutputRCBit());
+      } else {
+        __ andi(r0, i.InputRegister(0), i.InputImmediate(1));
+      }
+#if V8_TARGET_ARCH_PPC64
+      __ extsw(r0, r0, i.OutputRCBit());
+#endif
+      DCHECK_EQ(SetRC, i.OutputRCBit());
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_Tst64:
+      if (HasRegisterInput(instr, 1)) {
+        __ and_(r0, i.InputRegister(0), i.InputRegister(1), i.OutputRCBit());
+      } else {
+        __ andi(r0, i.InputRegister(0), i.InputImmediate(1));
+      }
+      DCHECK_EQ(SetRC, i.OutputRCBit());
+      break;
+#endif
+    case kPPC_Float64SilenceNaN: {
+      DoubleRegister value = i.InputDoubleRegister(0);
+      DoubleRegister result = i.OutputDoubleRegister();
+      __ CanonicalizeNaN(result, value);
+      break;
+    }
+    case kPPC_Push:
+      if (instr->InputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
+        switch (op->representation()) {
+          case MachineRepresentation::kFloat32:
+            __ StoreSingleU(i.InputDoubleRegister(0),
+                            MemOperand(sp, -kSystemPointerSize), r0);
+            frame_access_state()->IncreaseSPDelta(1);
+            break;
+          case MachineRepresentation::kFloat64:
+            __ StoreDoubleU(i.InputDoubleRegister(0),
+                            MemOperand(sp, -kDoubleSize), r0);
+            frame_access_state()->IncreaseSPDelta(kDoubleSize /
+                                                  kSystemPointerSize);
+            break;
+          case MachineRepresentation::kSimd128: {
+            __ addi(sp, sp, Operand(-kSimd128Size));
+            __ StoreSimd128(i.InputDoubleRegister(0), MemOperand(r0, sp), r0,
+                            kScratchDoubleReg);
+            frame_access_state()->IncreaseSPDelta(kSimd128Size /
+                                                  kSystemPointerSize);
+            break;
+          }
+          default:
+            UNREACHABLE();
+            break;
+        }
+      } else {
+        __ StorePU(i.InputRegister(0), MemOperand(sp, -kSystemPointerSize), r0);
+        frame_access_state()->IncreaseSPDelta(1);
+      }
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_PushFrame: {
+      int num_slots = i.InputInt32(1);
+      if (instr->InputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          __ StoreDoubleU(i.InputDoubleRegister(0),
+                          MemOperand(sp, -num_slots * kSystemPointerSize), r0);
+        } else {
+          DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
+          __ StoreSingleU(i.InputDoubleRegister(0),
+                          MemOperand(sp, -num_slots * kSystemPointerSize), r0);
+        }
+      } else {
+        __ StorePU(i.InputRegister(0),
+                   MemOperand(sp, -num_slots * kSystemPointerSize), r0);
+      }
+      break;
+    }
+    case kPPC_StoreToStackSlot: {
+      int slot = i.InputInt32(1);
+      if (instr->InputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          __ StoreDouble(i.InputDoubleRegister(0),
+                         MemOperand(sp, slot * kSystemPointerSize), r0);
+        } else if (op->representation() == MachineRepresentation::kFloat32) {
+          __ StoreSingle(i.InputDoubleRegister(0),
+                         MemOperand(sp, slot * kSystemPointerSize), r0);
+        } else {
+          DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
+          __ mov(ip, Operand(slot * kSystemPointerSize));
+          __ StoreSimd128(i.InputDoubleRegister(0), MemOperand(ip, sp), r0,
+                          kScratchDoubleReg);
+        }
+      } else {
+        __ StoreP(i.InputRegister(0), MemOperand(sp, slot * kSystemPointerSize),
+                  r0);
+      }
+      break;
+    }
+    case kPPC_ExtendSignWord8:
+      __ extsb(i.OutputRegister(), i.InputRegister(0));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_ExtendSignWord16:
+      __ extsh(i.OutputRegister(), i.InputRegister(0));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_ExtendSignWord32:
+      __ extsw(i.OutputRegister(), i.InputRegister(0));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_Uint32ToUint64:
+      // Zero extend
+      __ clrldi(i.OutputRegister(), i.InputRegister(0), Operand(32));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_Int64ToInt32:
+      __ extsw(i.OutputRegister(), i.InputRegister(0));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_Int64ToFloat32:
+      __ ConvertInt64ToFloat(i.InputRegister(0), i.OutputDoubleRegister());
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_Int64ToDouble:
+      __ ConvertInt64ToDouble(i.InputRegister(0), i.OutputDoubleRegister());
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_Uint64ToFloat32:
+      __ ConvertUnsignedInt64ToFloat(i.InputRegister(0),
+                                     i.OutputDoubleRegister());
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_Uint64ToDouble:
+      __ ConvertUnsignedInt64ToDouble(i.InputRegister(0),
+                                      i.OutputDoubleRegister());
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+#endif
+    case kPPC_Int32ToFloat32:
+      __ ConvertIntToFloat(i.InputRegister(0), i.OutputDoubleRegister());
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_Int32ToDouble:
+      __ ConvertIntToDouble(i.InputRegister(0), i.OutputDoubleRegister());
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_Uint32ToFloat32:
+      __ ConvertUnsignedIntToFloat(i.InputRegister(0),
+                                   i.OutputDoubleRegister());
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_Uint32ToDouble:
+      __ ConvertUnsignedIntToDouble(i.InputRegister(0),
+                                    i.OutputDoubleRegister());
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_Float32ToInt32: {
+      bool set_overflow_to_min_i32 = MiscField::decode(instr->opcode());
+      if (set_overflow_to_min_i32) {
+        __ mtfsb0(VXCVI);  // clear FPSCR:VXCVI bit
+      }
+      __ fctiwz(kScratchDoubleReg, i.InputDoubleRegister(0));
+      __ MovDoubleLowToInt(i.OutputRegister(), kScratchDoubleReg);
+      if (set_overflow_to_min_i32) {
+        // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
+        // because INT32_MIN allows easier out-of-bounds detection.
+        CRegister cr = cr7;
+        int crbit = v8::internal::Assembler::encode_crbit(
+            cr, static_cast<CRBit>(VXCVI % CRWIDTH));
+        __ mcrfs(cr, VXCVI);  // extract FPSCR field containing VXCVI into cr7
+        __ li(kScratchReg, Operand(1));
+        __ sldi(kScratchReg, kScratchReg, Operand(31));  // generate INT32_MIN.
+        __ isel(i.OutputRegister(0), kScratchReg, i.OutputRegister(0), crbit);
+      }
+      break;
+    }
+    case kPPC_Float32ToUint32: {
+      bool set_overflow_to_min_u32 = MiscField::decode(instr->opcode());
+      if (set_overflow_to_min_u32) {
+        __ mtfsb0(VXCVI);  // clear FPSCR:VXCVI bit
+      }
+      __ fctiwuz(kScratchDoubleReg, i.InputDoubleRegister(0));
+      __ MovDoubleLowToInt(i.OutputRegister(), kScratchDoubleReg);
+      if (set_overflow_to_min_u32) {
+        // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
+        // because 0 allows easier out-of-bounds detection.
+        CRegister cr = cr7;
+        int crbit = v8::internal::Assembler::encode_crbit(
+            cr, static_cast<CRBit>(VXCVI % CRWIDTH));
+        __ mcrfs(cr, VXCVI);  // extract FPSCR field containing VXCVI into cr7
+        __ li(kScratchReg, Operand::Zero());
+        __ isel(i.OutputRegister(0), kScratchReg, i.OutputRegister(0), crbit);
+      }
+      break;
+    }
+    case kPPC_DoubleToInt32:
+    case kPPC_DoubleToUint32:
+    case kPPC_DoubleToInt64: {
+#if V8_TARGET_ARCH_PPC64
+      bool check_conversion =
+          (opcode == kPPC_DoubleToInt64 && i.OutputCount() > 1);
+      if (check_conversion) {
+        __ mtfsb0(VXCVI);  // clear FPSCR:VXCVI bit
+      }
+#endif
+      __ ConvertDoubleToInt64(i.InputDoubleRegister(0),
+#if !V8_TARGET_ARCH_PPC64
+                              kScratchReg,
+#endif
+                              i.OutputRegister(0), kScratchDoubleReg);
+#if V8_TARGET_ARCH_PPC64
+        CRegister cr = cr7;
+        int crbit = v8::internal::Assembler::encode_crbit(
+            cr, static_cast<CRBit>(VXCVI % CRWIDTH));
+        __ mcrfs(cr, VXCVI);  // extract FPSCR field containing VXCVI into cr7
+        // Handle conversion failures (such as overflow).
+        if (CpuFeatures::IsSupported(ISELECT)) {
+          if (check_conversion) {
+            __ li(i.OutputRegister(1), Operand(1));
+            __ isel(i.OutputRegister(1), r0, i.OutputRegister(1), crbit);
+          } else {
+            __ isel(i.OutputRegister(0), r0, i.OutputRegister(0), crbit);
+          }
+        } else {
+          if (check_conversion) {
+            __ li(i.OutputRegister(1), Operand::Zero());
+            __ bc(v8::internal::kInstrSize * 2, BT, crbit);
+            __ li(i.OutputRegister(1), Operand(1));
+          } else {
+            __ mr(ip, i.OutputRegister(0));
+            __ li(i.OutputRegister(0), Operand::Zero());
+            __ bc(v8::internal::kInstrSize * 2, BT, crbit);
+            __ mr(i.OutputRegister(0), ip);
+          }
+        }
+#endif
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    }
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_DoubleToUint64: {
+      bool check_conversion = (i.OutputCount() > 1);
+      if (check_conversion) {
+        __ mtfsb0(VXCVI);  // clear FPSCR:VXCVI bit
+      }
+      __ ConvertDoubleToUnsignedInt64(i.InputDoubleRegister(0),
+                                      i.OutputRegister(0), kScratchDoubleReg);
+      if (check_conversion) {
+        // Set 2nd output to zero if conversion fails.
+        CRegister cr = cr7;
+        int crbit = v8::internal::Assembler::encode_crbit(
+            cr, static_cast<CRBit>(VXCVI % CRWIDTH));
+        __ mcrfs(cr, VXCVI);  // extract FPSCR field containing VXCVI into cr7
+        if (CpuFeatures::IsSupported(ISELECT)) {
+          __ li(i.OutputRegister(1), Operand(1));
+          __ isel(i.OutputRegister(1), r0, i.OutputRegister(1), crbit);
+        } else {
+          __ li(i.OutputRegister(1), Operand::Zero());
+          __ bc(v8::internal::kInstrSize * 2, BT, crbit);
+          __ li(i.OutputRegister(1), Operand(1));
+        }
+      }
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    }
+#endif
+    case kPPC_DoubleToFloat32:
+      ASSEMBLE_FLOAT_UNOP_RC(frsp, 0);
+      break;
+    case kPPC_Float32ToDouble:
+      // Nothing to do.
+      __ Move(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_DoubleExtractLowWord32:
+      __ MovDoubleLowToInt(i.OutputRegister(), i.InputDoubleRegister(0));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_DoubleExtractHighWord32:
+      __ MovDoubleHighToInt(i.OutputRegister(), i.InputDoubleRegister(0));
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_DoubleInsertLowWord32:
+      __ InsertDoubleLow(i.OutputDoubleRegister(), i.InputRegister(1), r0);
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_DoubleInsertHighWord32:
+      __ InsertDoubleHigh(i.OutputDoubleRegister(), i.InputRegister(1), r0);
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_DoubleConstruct:
+#if V8_TARGET_ARCH_PPC64
+      __ MovInt64ComponentsToDouble(i.OutputDoubleRegister(),
+                                    i.InputRegister(0), i.InputRegister(1), r0);
+#else
+      __ MovInt64ToDouble(i.OutputDoubleRegister(), i.InputRegister(0),
+                          i.InputRegister(1));
+#endif
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    case kPPC_BitcastFloat32ToInt32:
+      __ MovFloatToInt(i.OutputRegister(), i.InputDoubleRegister(0));
+      break;
+    case kPPC_BitcastInt32ToFloat32:
+      __ MovIntToFloat(i.OutputDoubleRegister(), i.InputRegister(0));
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_BitcastDoubleToInt64:
+      __ MovDoubleToInt64(i.OutputRegister(), i.InputDoubleRegister(0));
+      break;
+    case kPPC_BitcastInt64ToDouble:
+      __ MovInt64ToDouble(i.OutputDoubleRegister(), i.InputRegister(0));
+      break;
+#endif
+    case kPPC_LoadWordU8:
+      ASSEMBLE_LOAD_INTEGER(lbz, lbzx);
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+    case kPPC_LoadWordS8:
+      ASSEMBLE_LOAD_INTEGER(lbz, lbzx);
+      __ extsb(i.OutputRegister(), i.OutputRegister());
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+    case kPPC_LoadWordU16:
+      ASSEMBLE_LOAD_INTEGER(lhz, lhzx);
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+    case kPPC_LoadWordS16:
+      ASSEMBLE_LOAD_INTEGER(lha, lhax);
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+    case kPPC_LoadWordU32:
+      ASSEMBLE_LOAD_INTEGER(lwz, lwzx);
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+    case kPPC_LoadWordS32:
+      ASSEMBLE_LOAD_INTEGER(lwa, lwax);
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_LoadWord64:
+      ASSEMBLE_LOAD_INTEGER(ld, ldx);
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+#endif
+    case kPPC_LoadFloat32:
+      ASSEMBLE_LOAD_FLOAT(lfs, lfsx);
+      break;
+    case kPPC_LoadDouble:
+      ASSEMBLE_LOAD_FLOAT(lfd, lfdx);
+      break;
+    case kPPC_LoadSimd128: {
+      Simd128Register result = i.OutputSimd128Register();
+      AddressingMode mode = kMode_None;
+      MemOperand operand = i.MemoryOperand(&mode);
+      bool is_atomic = i.InputInt32(2);
+      // lvx only supports MRR.
+      DCHECK_EQ(mode, kMode_MRR);
+      __ LoadSimd128(result, operand, r0, kScratchDoubleReg);
+      if (is_atomic) __ lwsync();
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    }
+    case kPPC_StoreWord8:
+      ASSEMBLE_STORE_INTEGER(stb, stbx);
+      break;
+    case kPPC_StoreWord16:
+      ASSEMBLE_STORE_INTEGER(sth, sthx);
+      break;
+    case kPPC_StoreWord32:
+      ASSEMBLE_STORE_INTEGER(stw, stwx);
+      break;
+#if V8_TARGET_ARCH_PPC64
+    case kPPC_StoreWord64:
+      ASSEMBLE_STORE_INTEGER(std, stdx);
+      break;
+#endif
+    case kPPC_StoreFloat32:
+      ASSEMBLE_STORE_FLOAT(stfs, stfsx);
+      break;
+    case kPPC_StoreDouble:
+      ASSEMBLE_STORE_FLOAT(stfd, stfdx);
+      break;
+    case kPPC_StoreSimd128: {
+      size_t index = 0;
+      AddressingMode mode = kMode_None;
+      MemOperand operand = i.MemoryOperand(&mode, &index);
+      Simd128Register value = i.InputSimd128Register(index);
+      bool is_atomic = i.InputInt32(3);
+      if (is_atomic) __ lwsync();
+      // stvx only supports MRR.
+      DCHECK_EQ(mode, kMode_MRR);
+      __ StoreSimd128(value, operand, r0, kScratchDoubleReg);
+      if (is_atomic) __ sync();
+      DCHECK_EQ(LeaveRC, i.OutputRCBit());
+      break;
+    }
+    case kWord32AtomicLoadInt8:
+    case kPPC_AtomicLoadUint8:
+    case kWord32AtomicLoadInt16:
+    case kPPC_AtomicLoadUint16:
+    case kPPC_AtomicLoadWord32:
+    case kPPC_AtomicLoadWord64:
+    case kPPC_AtomicStoreUint8:
+    case kPPC_AtomicStoreUint16:
+    case kPPC_AtomicStoreWord32:
+    case kPPC_AtomicStoreWord64:
+      UNREACHABLE();
+    case kWord32AtomicExchangeInt8:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(lbarx, stbcx);
+      __ extsb(i.OutputRegister(0), i.OutputRegister(0));
+      break;
+    case kPPC_AtomicExchangeUint8:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(lbarx, stbcx);
+      break;
+    case kWord32AtomicExchangeInt16:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(lharx, sthcx);
+      __ extsh(i.OutputRegister(0), i.OutputRegister(0));
+      break;
+    case kPPC_AtomicExchangeUint16:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(lharx, sthcx);
+      break;
+    case kPPC_AtomicExchangeWord32:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(lwarx, stwcx);
+      break;
+    case kPPC_AtomicExchangeWord64:
+      ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldarx, stdcx);
+      break;
+    case kWord32AtomicCompareExchangeInt8:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_SIGN_EXT(cmp, lbarx, stbcx, extsb);
+      break;
+    case kPPC_AtomicCompareExchangeUint8:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE(cmp, lbarx, stbcx, ZeroExtByte);
+      break;
+    case kWord32AtomicCompareExchangeInt16:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_SIGN_EXT(cmp, lharx, sthcx, extsh);
+      break;
+    case kPPC_AtomicCompareExchangeUint16:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE(cmp, lharx, sthcx, ZeroExtHalfWord);
+      break;
+    case kPPC_AtomicCompareExchangeWord32:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE(cmpw, lwarx, stwcx, ZeroExtWord32);
+      break;
+    case kPPC_AtomicCompareExchangeWord64:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE(cmp, ldarx, stdcx, mr);
+      break;
+
+#define ATOMIC_BINOP_CASE(op, inst)                            \
+  case kPPC_Atomic##op##Int8:                                  \
+    ASSEMBLE_ATOMIC_BINOP_SIGN_EXT(inst, lbarx, stbcx, extsb); \
+    break;                                                     \
+  case kPPC_Atomic##op##Uint8:                                 \
+    ASSEMBLE_ATOMIC_BINOP(inst, lbarx, stbcx);                 \
+    break;                                                     \
+  case kPPC_Atomic##op##Int16:                                 \
+    ASSEMBLE_ATOMIC_BINOP_SIGN_EXT(inst, lharx, sthcx, extsh); \
+    break;                                                     \
+  case kPPC_Atomic##op##Uint16:                                \
+    ASSEMBLE_ATOMIC_BINOP(inst, lharx, sthcx);                 \
+    break;                                                     \
+  case kPPC_Atomic##op##Int32:                                 \
+    ASSEMBLE_ATOMIC_BINOP_SIGN_EXT(inst, lwarx, stwcx, extsw); \
+    break;                                                     \
+  case kPPC_Atomic##op##Uint32:                                \
+    ASSEMBLE_ATOMIC_BINOP(inst, lwarx, stwcx);                 \
+    break;                                                     \
+  case kPPC_Atomic##op##Int64:                                 \
+  case kPPC_Atomic##op##Uint64:                                \
+    ASSEMBLE_ATOMIC_BINOP(inst, ldarx, stdcx);                 \
+    break;
+      ATOMIC_BINOP_CASE(Add, add)
+      ATOMIC_BINOP_CASE(Sub, sub)
+      ATOMIC_BINOP_CASE(And, and_)
+      ATOMIC_BINOP_CASE(Or, orx)
+      ATOMIC_BINOP_CASE(Xor, xor_)
+#undef ATOMIC_BINOP_CASE
+
+    case kPPC_ByteRev32: {
+      Register input = i.InputRegister(0);
+      Register output = i.OutputRegister();
+      Register temp1 = r0;
+      __ rotlwi(temp1, input, 8);
+      __ rlwimi(temp1, input, 24, 0, 7);
+      __ rlwimi(temp1, input, 24, 16, 23);
+      __ extsw(output, temp1);
+      break;
+    }
+#ifdef V8_TARGET_ARCH_PPC64
+    case kPPC_ByteRev64: {
+      Register input = i.InputRegister(0);
+      Register output = i.OutputRegister();
+      Register temp1 = r0;
+      Register temp2 = kScratchReg;
+      Register temp3 = i.TempRegister(0);
+      __ rldicl(temp1, input, 32, 32);
+      __ rotlwi(temp2, input, 8);
+      __ rlwimi(temp2, input, 24, 0, 7);
+      __ rotlwi(temp3, temp1, 8);
+      __ rlwimi(temp2, input, 24, 16, 23);
+      __ rlwimi(temp3, temp1, 24, 0, 7);
+      __ rlwimi(temp3, temp1, 24, 16, 23);
+      __ rldicr(temp2, temp2, 32, 31);
+      __ orx(output, temp2, temp3);
+      break;
+    }
+#endif  // V8_TARGET_ARCH_PPC64
+    case kPPC_F64x2Splat: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ MovDoubleToInt64(ip, i.InputDoubleRegister(0));
+      // Need to maintain 16 byte alignment for lvx.
+      __ mr(kScratchReg, sp);
+      __ ClearRightImm(
+          sp, sp,
+          Operand(base::bits::WhichPowerOfTwo(16)));  // equivalent to &= -16
+      __ addi(sp, sp, Operand(-16));
+      __ StoreP(ip, MemOperand(sp, 0));
+      __ StoreP(ip, MemOperand(sp, 8));
+      __ lvx(dst, MemOperand(r0, sp));
+      __ mr(sp, kScratchReg);
+      break;
+    }
+    case kPPC_F32x4Splat: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ MovFloatToInt(kScratchReg, i.InputDoubleRegister(0));
+      __ mtvsrd(dst, kScratchReg);
+      __ vspltw(dst, dst, Operand(1));
+      break;
+    }
+    case kPPC_I64x2Splat: {
+      Register src = i.InputRegister(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      // Need to maintain 16 byte alignment for lvx.
+      __ mr(kScratchReg, sp);
+      __ ClearRightImm(
+          sp, sp,
+          Operand(base::bits::WhichPowerOfTwo(16)));  // equivalent to &= -16
+      __ addi(sp, sp, Operand(-16));
+      __ StoreP(src, MemOperand(sp, 0));
+      __ StoreP(src, MemOperand(sp, 8));
+      __ lvx(dst, MemOperand(r0, sp));
+      __ mr(sp, kScratchReg);
+      break;
+    }
+    case kPPC_I32x4Splat: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ mtvsrd(dst, i.InputRegister(0));
+      __ vspltw(dst, dst, Operand(1));
+      break;
+    }
+    case kPPC_I16x8Splat: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ mtvsrd(dst, i.InputRegister(0));
+      __ vsplth(dst, dst, Operand(3));
+      break;
+    }
+    case kPPC_I8x16Splat: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ mtvsrd(dst, i.InputRegister(0));
+      __ vspltb(dst, dst, Operand(7));
+      break;
+    }
+    case kPPC_F64x2ExtractLane: {
+      constexpr int lane_width_in_bytes = 8;
+      __ vextractd(kScratchDoubleReg, i.InputSimd128Register(0),
+                   Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
+      __ mfvsrd(kScratchReg, kScratchDoubleReg);
+      __ MovInt64ToDouble(i.OutputDoubleRegister(), kScratchReg);
+      break;
+    }
+    case kPPC_F32x4ExtractLane: {
+      constexpr int lane_width_in_bytes = 4;
+      __ vextractuw(kScratchDoubleReg, i.InputSimd128Register(0),
+                    Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
+      __ mfvsrd(kScratchReg, kScratchDoubleReg);
+      __ MovIntToFloat(i.OutputDoubleRegister(), kScratchReg);
+      break;
+    }
+    case kPPC_I64x2ExtractLane: {
+      constexpr int lane_width_in_bytes = 8;
+      __ vextractd(kScratchDoubleReg, i.InputSimd128Register(0),
+                   Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
+      __ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I32x4ExtractLane: {
+      constexpr int lane_width_in_bytes = 4;
+      __ vextractuw(kScratchDoubleReg, i.InputSimd128Register(0),
+                    Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
+      __ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I16x8ExtractLaneU: {
+      constexpr int lane_width_in_bytes = 2;
+      __ vextractuh(kScratchDoubleReg, i.InputSimd128Register(0),
+                    Operand((7 - i.InputInt8(1)) * lane_width_in_bytes));
+      __ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I16x8ExtractLaneS: {
+      constexpr int lane_width_in_bytes = 2;
+      __ vextractuh(kScratchDoubleReg, i.InputSimd128Register(0),
+                    Operand((7 - i.InputInt8(1)) * lane_width_in_bytes));
+      __ mfvsrd(kScratchReg, kScratchDoubleReg);
+      __ extsh(i.OutputRegister(), kScratchReg);
+      break;
+    }
+    case kPPC_I8x16ExtractLaneU: {
+      __ vextractub(kScratchDoubleReg, i.InputSimd128Register(0),
+                    Operand(15 - i.InputInt8(1)));
+      __ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I8x16ExtractLaneS: {
+      __ vextractub(kScratchDoubleReg, i.InputSimd128Register(0),
+                    Operand(15 - i.InputInt8(1)));
+      __ mfvsrd(kScratchReg, kScratchDoubleReg);
+      __ extsb(i.OutputRegister(), kScratchReg);
+      break;
+    }
+    case kPPC_F64x2ReplaceLane: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      constexpr int lane_width_in_bytes = 8;
+      Simd128Register dst = i.OutputSimd128Register();
+      __ MovDoubleToInt64(r0, i.InputDoubleRegister(2));
+      __ mtvsrd(kScratchDoubleReg, r0);
+      __ vinsertd(dst, kScratchDoubleReg,
+                  Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
+      break;
+    }
+    case kPPC_F32x4ReplaceLane: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      constexpr int lane_width_in_bytes = 4;
+      Simd128Register dst = i.OutputSimd128Register();
+      __ MovFloatToInt(r0, i.InputDoubleRegister(2));
+      __ mtvsrd(kScratchDoubleReg, r0);
+      __ vinsertw(dst, kScratchDoubleReg,
+                  Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
+      break;
+    }
+    case kPPC_I64x2ReplaceLane: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      constexpr int lane_width_in_bytes = 8;
+      Simd128Register dst = i.OutputSimd128Register();
+      __ mtvsrd(kScratchDoubleReg, i.InputRegister(2));
+      __ vinsertd(dst, kScratchDoubleReg,
+                  Operand((1 - i.InputInt8(1)) * lane_width_in_bytes));
+      break;
+    }
+    case kPPC_I32x4ReplaceLane: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      constexpr int lane_width_in_bytes = 4;
+      Simd128Register dst = i.OutputSimd128Register();
+      __ mtvsrd(kScratchDoubleReg, i.InputRegister(2));
+      __ vinsertw(dst, kScratchDoubleReg,
+                  Operand((3 - i.InputInt8(1)) * lane_width_in_bytes));
+      break;
+    }
+    case kPPC_I16x8ReplaceLane: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      constexpr int lane_width_in_bytes = 2;
+      Simd128Register dst = i.OutputSimd128Register();
+      __ mtvsrd(kScratchDoubleReg, i.InputRegister(2));
+      __ vinserth(dst, kScratchDoubleReg,
+                  Operand((7 - i.InputInt8(1)) * lane_width_in_bytes));
+      break;
+    }
+    case kPPC_I8x16ReplaceLane: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      Simd128Register dst = i.OutputSimd128Register();
+      __ mtvsrd(kScratchDoubleReg, i.InputRegister(2));
+      __ vinsertb(dst, kScratchDoubleReg, Operand(15 - i.InputInt8(1)));
+      break;
+    }
+    case kPPC_F64x2Add: {
+      __ xvadddp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_F64x2Sub: {
+      __ xvsubdp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_F64x2Mul: {
+      __ xvmuldp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_F32x4Add: {
+      __ vaddfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_F32x4AddHoriz: {
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
+      Simd128Register tempFPReg2 = i.ToSimd128Register(instr->TempAt(1));
+      constexpr int shift_bits = 32;
+      // generate first operand
+      __ vpkudum(dst, src1, src0);
+      // generate second operand
+      __ li(ip, Operand(shift_bits));
+      __ mtvsrd(tempFPReg2, ip);
+      __ vspltb(tempFPReg2, tempFPReg2, Operand(7));
+      __ vsro(tempFPReg1, src0, tempFPReg2);
+      __ vsro(tempFPReg2, src1, tempFPReg2);
+      __ vpkudum(kScratchDoubleReg, tempFPReg2, tempFPReg1);
+      // add the operands
+      __ vaddfp(dst, kScratchDoubleReg, dst);
+      break;
+    }
+    case kPPC_F32x4Sub: {
+      __ vsubfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_F32x4Mul: {
+      __ xvmulsp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I64x2Add: {
+      __ vaddudm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I64x2Sub: {
+      __ vsubudm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I64x2Mul: {
+      // Need to maintain 16 byte alignment for stvx and lvx.
+      __ mr(kScratchReg, sp);
+      __ ClearRightImm(
+          sp, sp,
+          Operand(base::bits::WhichPowerOfTwo(16)));  // equivalent to &= -16
+      __ addi(sp, sp, Operand(-32));
+      __ stvx(i.InputSimd128Register(0), MemOperand(r0, sp));
+      __ li(ip, Operand(16));
+      __ stvx(i.InputSimd128Register(1), MemOperand(ip, sp));
+      for (int i = 0; i < 2; i++) {
+        __ LoadP(r0, MemOperand(sp, kBitsPerByte * i));
+        __ LoadP(ip, MemOperand(sp, (kBitsPerByte * i) + kSimd128Size));
+        __ mulld(r0, r0, ip);
+        __ StoreP(r0, MemOperand(sp, i * kBitsPerByte));
+      }
+      __ lvx(i.OutputSimd128Register(), MemOperand(r0, sp));
+      __ mr(sp, kScratchReg);
+      break;
+    }
+    case kPPC_I32x4Add: {
+      __ vadduwm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I32x4AddHoriz: {
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vsum2sws(dst, src0, kScratchDoubleReg);
+      __ vsum2sws(kScratchDoubleReg, src1, kScratchDoubleReg);
+      __ vpkudum(dst, kScratchDoubleReg, dst);
+      break;
+    }
+    case kPPC_I32x4Sub: {
+      __ vsubuwm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I32x4Mul: {
+      __ vmuluwm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I16x8Add: {
+      __ vadduhm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I16x8AddHoriz: {
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vsum4shs(dst, src0, kScratchDoubleReg);
+      __ vsum4shs(kScratchDoubleReg, src1, kScratchDoubleReg);
+      __ vpkuwus(dst, kScratchDoubleReg, dst);
+      break;
+    }
+    case kPPC_I16x8Sub: {
+      __ vsubuhm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I16x8Mul: {
+      __ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vmladduhm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                   i.InputSimd128Register(1), kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I8x16Add: {
+      __ vaddubm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16Sub: {
+      __ vsububm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16Mul: {
+      __ vmuleub(kScratchDoubleReg, i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      __ vmuloub(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      __ vpkuhum(i.OutputSimd128Register(), kScratchDoubleReg,
+                 i.OutputSimd128Register());
+      break;
+    }
+    case kPPC_I64x2MinS: {
+      __ vminsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I32x4MinS: {
+      __ vminsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I64x2MinU: {
+      __ vminud(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I32x4MinU: {
+      __ vminuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I16x8MinS: {
+      __ vminsh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I16x8MinU: {
+      __ vminuh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16MinS: {
+      __ vminsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16MinU: {
+      __ vminub(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I64x2MaxS: {
+      __ vmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I32x4MaxS: {
+      __ vmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I64x2MaxU: {
+      __ vmaxud(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I32x4MaxU: {
+      __ vmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I16x8MaxS: {
+      __ vmaxsh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I16x8MaxU: {
+      __ vmaxuh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16MaxS: {
+      __ vmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16MaxU: {
+      __ vmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_F64x2Eq: {
+      __ xvcmpeqdp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                   i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_F64x2Ne: {
+      __ xvcmpeqdp(kScratchDoubleReg, i.InputSimd128Register(0),
+                   i.InputSimd128Register(1));
+      __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg);
+      break;
+    }
+    case kPPC_F64x2Le: {
+      __ xvcmpgedp(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                   i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F64x2Lt: {
+      __ xvcmpgtdp(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                   i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F32x4Eq: {
+      __ xvcmpeqsp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                   i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I64x2Eq: {
+      __ vcmpequd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I32x4Eq: {
+      __ vcmpequw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I16x8Eq: {
+      __ vcmpequh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16Eq: {
+      __ vcmpequb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_F32x4Ne: {
+      __ xvcmpeqsp(kScratchDoubleReg, i.InputSimd128Register(0),
+                   i.InputSimd128Register(1));
+      __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I64x2Ne: {
+      __ vcmpequd(kScratchDoubleReg, i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I32x4Ne: {
+      __ vcmpequw(kScratchDoubleReg, i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I16x8Ne: {
+      __ vcmpequh(kScratchDoubleReg, i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I8x16Ne: {
+      __ vcmpequb(kScratchDoubleReg, i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vnor(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg);
+      break;
+    }
+    case kPPC_F32x4Lt: {
+      __ xvcmpgtsp(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                   i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F32x4Le: {
+      __ xvcmpgesp(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                   i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_I64x2GtS: {
+      __ vcmpgtsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I32x4GtS: {
+      __ vcmpgtsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I64x2GeS: {
+      __ vcmpequd(kScratchDoubleReg, i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vcmpgtsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+             kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I32x4GeS: {
+      __ vcmpequw(kScratchDoubleReg, i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vcmpgtsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+             kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I64x2GtU: {
+      __ vcmpgtud(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I32x4GtU: {
+      __ vcmpgtuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+
+      break;
+    }
+    case kPPC_I64x2GeU: {
+      __ vcmpequd(kScratchDoubleReg, i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vcmpgtud(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+             kScratchDoubleReg);
+
+      break;
+    }
+    case kPPC_I32x4GeU: {
+      __ vcmpequw(kScratchDoubleReg, i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vcmpgtuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+             kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I16x8GtS: {
+      __ vcmpgtsh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I16x8GeS: {
+      __ vcmpequh(kScratchDoubleReg, i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vcmpgtsh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+             kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I16x8GtU: {
+      __ vcmpgtuh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I16x8GeU: {
+      __ vcmpequh(kScratchDoubleReg, i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vcmpgtuh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+             kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I8x16GtS: {
+      __ vcmpgtsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16GeS: {
+      __ vcmpequb(kScratchDoubleReg, i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vcmpgtsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+             kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I8x16GtU: {
+      __ vcmpgtub(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16GeU: {
+      __ vcmpequb(kScratchDoubleReg, i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vcmpgtub(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1));
+      __ vor(i.OutputSimd128Register(), i.OutputSimd128Register(),
+             kScratchDoubleReg);
+      break;
+    }
+#define VECTOR_SHIFT(op)                                         \
+  {                                                              \
+    __ mtvsrd(kScratchDoubleReg, i.InputRegister(1));            \
+    __ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7)); \
+    __ op(i.OutputSimd128Register(), i.InputSimd128Register(0),  \
+          kScratchDoubleReg);                                    \
+  }
+    case kPPC_I64x2Shl: {
+      VECTOR_SHIFT(vsld)
+      break;
+    }
+    case kPPC_I64x2ShrS: {
+      VECTOR_SHIFT(vsrad)
+      break;
+    }
+    case kPPC_I64x2ShrU: {
+      VECTOR_SHIFT(vsrd)
+      break;
+    }
+    case kPPC_I32x4Shl: {
+      VECTOR_SHIFT(vslw)
+      break;
+    }
+    case kPPC_I32x4ShrS: {
+      VECTOR_SHIFT(vsraw)
+      break;
+    }
+    case kPPC_I32x4ShrU: {
+      VECTOR_SHIFT(vsrw)
+      break;
+    }
+    case kPPC_I16x8Shl: {
+      VECTOR_SHIFT(vslh)
+      break;
+    }
+    case kPPC_I16x8ShrS: {
+      VECTOR_SHIFT(vsrah)
+      break;
+    }
+    case kPPC_I16x8ShrU: {
+      VECTOR_SHIFT(vsrh)
+      break;
+    }
+    case kPPC_I8x16Shl: {
+      VECTOR_SHIFT(vslb)
+      break;
+    }
+    case kPPC_I8x16ShrS: {
+      VECTOR_SHIFT(vsrab)
+      break;
+    }
+    case kPPC_I8x16ShrU: {
+      VECTOR_SHIFT(vsrb)
+      break;
+    }
+#undef VECTOR_SHIFT
+    case kPPC_S128And: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(1);
+      __ vand(dst, i.InputSimd128Register(0), src);
+      break;
+    }
+    case kPPC_S128Or: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(1);
+      __ vor(dst, i.InputSimd128Register(0), src);
+      break;
+    }
+    case kPPC_S128Xor: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(1);
+      __ vxor(dst, i.InputSimd128Register(0), src);
+      break;
+    }
+    case kPPC_S128Zero: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vxor(dst, dst, dst);
+      break;
+    }
+    case kPPC_S128Not: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(0);
+      __ vnor(dst, src, src);
+      break;
+    }
+    case kPPC_S128Select: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register mask = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register src2 = i.InputSimd128Register(2);
+      __ vsel(dst, src2, src1, mask);
+      break;
+    }
+    case kPPC_F64x2Abs: {
+      __ xvabsdp(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F64x2Neg: {
+      __ xvnegdp(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F64x2Sqrt: {
+      __ xvsqrtdp(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F32x4Abs: {
+      __ xvabssp(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F32x4Neg: {
+      __ xvnegsp(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F32x4RecipApprox: {
+      __ xvresp(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F32x4RecipSqrtApprox: {
+      __ xvrsqrtesp(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F32x4Sqrt: {
+      __ xvsqrtsp(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_I64x2Neg: {
+      Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
+      __ li(ip, Operand(1));
+      // Need to maintain 16 byte alignment for lvx.
+      __ mr(kScratchReg, sp);
+      __ ClearRightImm(
+          sp, sp,
+          Operand(base::bits::WhichPowerOfTwo(16)));  // equivalent to &= -16
+      __ addi(sp, sp, Operand(-16));
+      __ StoreP(ip, MemOperand(sp, 0));
+      __ StoreP(ip, MemOperand(sp, 8));
+      __ lvx(kScratchDoubleReg, MemOperand(r0, sp));
+      __ mr(sp, kScratchReg);
+      // Perform negation.
+      __ vnor(tempFPReg1, i.InputSimd128Register(0), i.InputSimd128Register(0));
+      __ vaddudm(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I32x4Neg: {
+      Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
+      __ li(ip, Operand(1));
+      __ mtvsrd(kScratchDoubleReg, ip);
+      __ vspltw(kScratchDoubleReg, kScratchDoubleReg, Operand(1));
+      __ vnor(tempFPReg1, i.InputSimd128Register(0), i.InputSimd128Register(0));
+      __ vadduwm(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1);
+      break;
+    }
+    case kPPC_I32x4Abs: {
+      Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
+      Simd128Register src = i.InputSimd128Register(0);
+      constexpr int shift_bits = 31;
+      __ li(ip, Operand(shift_bits));
+      __ mtvsrd(kScratchDoubleReg, ip);
+      __ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7));
+      __ vsraw(kScratchDoubleReg, src, kScratchDoubleReg);
+      __ vxor(tempFPReg1, src, kScratchDoubleReg);
+      __ vsubuwm(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I16x8Neg: {
+      Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
+      __ li(ip, Operand(1));
+      __ mtvsrd(kScratchDoubleReg, ip);
+      __ vsplth(kScratchDoubleReg, kScratchDoubleReg, Operand(3));
+      __ vnor(tempFPReg1, i.InputSimd128Register(0), i.InputSimd128Register(0));
+      __ vadduhm(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1);
+      break;
+    }
+    case kPPC_I16x8Abs: {
+      Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
+      Simd128Register src = i.InputSimd128Register(0);
+      constexpr int shift_bits = 15;
+      __ li(ip, Operand(shift_bits));
+      __ mtvsrd(kScratchDoubleReg, ip);
+      __ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7));
+      __ vsrah(kScratchDoubleReg, src, kScratchDoubleReg);
+      __ vxor(tempFPReg1, src, kScratchDoubleReg);
+      __ vsubuhm(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I8x16Neg: {
+      Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
+      __ li(ip, Operand(1));
+      __ mtvsrd(kScratchDoubleReg, ip);
+      __ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7));
+      __ vnor(tempFPReg1, i.InputSimd128Register(0), i.InputSimd128Register(0));
+      __ vaddubm(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1);
+      break;
+    }
+    case kPPC_I8x16Abs: {
+      Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
+      Simd128Register src = i.InputSimd128Register(0);
+      constexpr int shift_bits = 7;
+      __ li(ip, Operand(shift_bits));
+      __ mtvsrd(kScratchDoubleReg, ip);
+      __ vspltb(kScratchDoubleReg, kScratchDoubleReg, Operand(7));
+      __ vsrab(kScratchDoubleReg, src, kScratchDoubleReg);
+      __ vxor(tempFPReg1, src, kScratchDoubleReg);
+      __ vsububm(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg);
+      break;
+    }
+    case kPPC_V64x2AnyTrue:
+    case kPPC_V32x4AnyTrue:
+    case kPPC_V16x8AnyTrue:
+    case kPPC_V8x16AnyTrue: {
+      Simd128Register src = i.InputSimd128Register(0);
+      Register dst = i.OutputRegister();
+      constexpr int bit_number = 24;
+      __ li(r0, Operand(0));
+      __ li(ip, Operand(-1));
+      // Check if both lanes are 0, if so then return false.
+      __ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vcmpequd(kScratchDoubleReg, src, kScratchDoubleReg, SetRC);
+      __ isel(dst, r0, ip, bit_number);
+      break;
+    }
+#define SIMD_ALL_TRUE(opcode)                                       \
+  Simd128Register src = i.InputSimd128Register(0);                  \
+  Register dst = i.OutputRegister();                                \
+  constexpr int bit_number = 24;                                    \
+  __ li(r0, Operand(0));                                            \
+  __ li(ip, Operand(-1));                                           \
+  /* Check if all lanes > 0, if not then return false.*/            \
+  __ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); \
+  __ opcode(kScratchDoubleReg, src, kScratchDoubleReg, SetRC);      \
+  __ isel(dst, ip, r0, bit_number);
+    case kPPC_V64x2AllTrue: {
+      SIMD_ALL_TRUE(vcmpgtud)
+      break;
+    }
+    case kPPC_V32x4AllTrue: {
+      SIMD_ALL_TRUE(vcmpgtuw)
+      break;
+    }
+    case kPPC_V16x8AllTrue: {
+      SIMD_ALL_TRUE(vcmpgtuh)
+      break;
+    }
+    case kPPC_V8x16AllTrue: {
+      SIMD_ALL_TRUE(vcmpgtub)
+      break;
+    }
+#undef SIMD_ALL_TRUE
+    case kPPC_I32x4SConvertF32x4: {
+      Simd128Register src = i.InputSimd128Register(0);
+      // NaN to 0
+      __ vor(kScratchDoubleReg, src, src);
+      __ xvcmpeqsp(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vand(kScratchDoubleReg, src, kScratchDoubleReg);
+      __ xvcvspsxws(i.OutputSimd128Register(), kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I32x4UConvertF32x4: {
+      __ xvcvspuxws(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F32x4SConvertI32x4: {
+      __ xvcvsxwsp(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F32x4UConvertI32x4: {
+      __ xvcvuxwsp(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_I32x4SConvertI16x8Low: {
+      __ vupklsh(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_I32x4SConvertI16x8High: {
+      __ vupkhsh(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_I32x4UConvertI16x8Low: {
+      __ vupklsh(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      // Zero extend.
+      __ mov(ip, Operand(0xFFFF));
+      __ mtvsrd(kScratchDoubleReg, ip);
+      __ vspltw(kScratchDoubleReg, kScratchDoubleReg, Operand(1));
+      __ vand(i.OutputSimd128Register(), kScratchDoubleReg,
+              i.OutputSimd128Register());
+      break;
+    }
+    case kPPC_I32x4UConvertI16x8High: {
+      __ vupkhsh(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      // Zero extend.
+      __ mov(ip, Operand(0xFFFF));
+      __ mtvsrd(kScratchDoubleReg, ip);
+      __ vspltw(kScratchDoubleReg, kScratchDoubleReg, Operand(1));
+      __ vand(i.OutputSimd128Register(), kScratchDoubleReg,
+              i.OutputSimd128Register());
+      break;
+    }
+
+    case kPPC_I16x8SConvertI8x16Low: {
+      __ vupklsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_I16x8SConvertI8x16High: {
+      __ vupkhsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_I16x8UConvertI8x16Low: {
+      __ vupklsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      // Zero extend.
+      __ li(ip, Operand(0xFF));
+      __ mtvsrd(kScratchDoubleReg, ip);
+      __ vsplth(kScratchDoubleReg, kScratchDoubleReg, Operand(3));
+      __ vand(i.OutputSimd128Register(), kScratchDoubleReg,
+              i.OutputSimd128Register());
+      break;
+    }
+    case kPPC_I16x8UConvertI8x16High: {
+      __ vupkhsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      // Zero extend.
+      __ li(ip, Operand(0xFF));
+      __ mtvsrd(kScratchDoubleReg, ip);
+      __ vsplth(kScratchDoubleReg, kScratchDoubleReg, Operand(3));
+      __ vand(i.OutputSimd128Register(), kScratchDoubleReg,
+              i.OutputSimd128Register());
+      break;
+    }
+    case kPPC_I16x8SConvertI32x4: {
+      __ vpkswss(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I16x8UConvertI32x4: {
+      __ vpkswus(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16SConvertI16x8: {
+      __ vpkshss(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16UConvertI16x8: {
+      __ vpkshus(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16Shuffle: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      __ mov(r0, Operand(make_uint64(i.InputUint32(3), i.InputUint32(2))));
+      __ mov(ip, Operand(make_uint64(i.InputUint32(5), i.InputUint32(4))));
+      // Need to maintain 16 byte alignment for lvx.
+      __ mr(kScratchReg, sp);
+      __ ClearRightImm(
+          sp, sp,
+          Operand(base::bits::WhichPowerOfTwo(16)));  // equivalent to &= -16
+      __ addi(sp, sp, Operand(-16));
+      __ StoreP(r0, MemOperand(sp, 0));
+      __ StoreP(ip, MemOperand(sp, 8));
+      __ lvx(kScratchDoubleReg, MemOperand(r0, sp));
+      __ mr(sp, kScratchReg);
+      __ vperm(dst, src0, src1, kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I16x8AddSatS: {
+      __ vaddshs(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I16x8SubSatS: {
+      __ vsubshs(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I16x8AddSatU: {
+      __ vadduhs(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I16x8SubSatU: {
+      __ vsubuhs(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16AddSatS: {
+      __ vaddsbs(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16SubSatS: {
+      __ vsubsbs(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16AddSatU: {
+      __ vaddubs(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16SubSatU: {
+      __ vsububs(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16Swizzle: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1),
+                      tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)),
+                      tempFPReg2 = i.ToSimd128Register(instr->TempAt(1));
+      // Saturate the indices to 5 bits. Input indices more than 31 should
+      // return 0.
+      __ xxspltib(tempFPReg2, Operand(31));
+      __ vminub(tempFPReg2, src1, tempFPReg2);
+      __ addi(sp, sp, Operand(-16));
+      __ stxvd(src0, MemOperand(r0, sp));
+      __ ldbrx(r0, MemOperand(r0, sp));
+      __ li(ip, Operand(8));
+      __ ldbrx(ip, MemOperand(ip, sp));
+      __ stdx(ip, MemOperand(r0, sp));
+      __ li(ip, Operand(8));
+      __ stdx(r0, MemOperand(ip, sp));
+      __ lxvd(kScratchDoubleReg, MemOperand(r0, sp));
+      __ addi(sp, sp, Operand(16));
+      __ vxor(tempFPReg1, tempFPReg1, tempFPReg1);
+      __ vperm(dst, kScratchDoubleReg, tempFPReg1, tempFPReg2);
+      break;
+    }
+    case kPPC_F64x2Qfma: {
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register src2 = i.InputSimd128Register(2);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vor(kScratchDoubleReg, src1, src1);
+      __ xvmaddmdp(kScratchDoubleReg, src2, src0);
+      __ vor(dst, kScratchDoubleReg, kScratchDoubleReg);
+      break;
+    }
+    case kPPC_F64x2Qfms: {
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register src2 = i.InputSimd128Register(2);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vor(kScratchDoubleReg, src1, src1);
+      __ xvnmsubmdp(kScratchDoubleReg, src2, src0);
+      __ vor(dst, kScratchDoubleReg, kScratchDoubleReg);
+      break;
+    }
+    case kPPC_F32x4Qfma: {
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register src2 = i.InputSimd128Register(2);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vor(kScratchDoubleReg, src1, src1);
+      __ xvmaddmsp(kScratchDoubleReg, src2, src0);
+      __ vor(dst, kScratchDoubleReg, kScratchDoubleReg);
+      break;
+    }
+    case kPPC_F32x4Qfms: {
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register src2 = i.InputSimd128Register(2);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vor(kScratchDoubleReg, src1, src1);
+      __ xvnmsubmsp(kScratchDoubleReg, src2, src0);
+      __ vor(dst, kScratchDoubleReg, kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I16x8RoundingAverageU: {
+      __ vavguh(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_I8x16RoundingAverageU: {
+      __ vavgub(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_S128AndNot: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(0);
+      __ vandc(dst, src, i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_F64x2Div: {
+      __ xvdivdp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+#define F64X2_MIN_MAX_NAN(result)                                       \
+  Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));   \
+  __ xvcmpeqdp(tempFPReg1, i.InputSimd128Register(0),                   \
+               i.InputSimd128Register(0));                              \
+  __ vsel(result, i.InputSimd128Register(0), result, tempFPReg1);       \
+  __ xvcmpeqdp(tempFPReg1, i.InputSimd128Register(1),                   \
+               i.InputSimd128Register(1));                              \
+  __ vsel(i.OutputSimd128Register(), i.InputSimd128Register(1), result, \
+          tempFPReg1);
+    case kPPC_F64x2Min: {
+      __ xvmindp(kScratchDoubleReg, i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      // We need to check if an input is NAN and preserve it.
+      F64X2_MIN_MAX_NAN(kScratchDoubleReg)
+      break;
+    }
+    case kPPC_F64x2Max: {
+      __ xvmaxdp(kScratchDoubleReg, i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      // We need to check if an input is NAN and preserve it.
+      F64X2_MIN_MAX_NAN(kScratchDoubleReg)
+      break;
+    }
+#undef F64X2_MIN_MAX_NAN
+    case kPPC_F32x4Div: {
+      __ xvdivsp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                 i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_F32x4Min: {
+      __ vminfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_F32x4Max: {
+      __ vmaxfp(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                i.InputSimd128Register(1));
+      break;
+    }
+    case kPPC_F64x2Ceil: {
+      __ xvrdpip(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F64x2Floor: {
+      __ xvrdpim(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F64x2Trunc: {
+      __ xvrdpiz(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F64x2NearestInt: {
+      __ xvrdpi(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F32x4Ceil: {
+      __ xvrspip(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F32x4Floor: {
+      __ xvrspim(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F32x4Trunc: {
+      __ xvrspiz(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_F32x4NearestInt: {
+      __ xvrspi(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kPPC_I32x4BitMask: {
+      __ mov(kScratchReg,
+             Operand(0x8080808000204060));  // Select 0 for the high bits.
+      __ mtvsrd(kScratchDoubleReg, kScratchReg);
+      __ vbpermq(kScratchDoubleReg, i.InputSimd128Register(0),
+                 kScratchDoubleReg);
+      __ vextractub(kScratchDoubleReg, kScratchDoubleReg, Operand(6));
+      __ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I16x8BitMask: {
+      __ mov(kScratchReg, Operand(0x10203040506070));
+      __ mtvsrd(kScratchDoubleReg, kScratchReg);
+      __ vbpermq(kScratchDoubleReg, i.InputSimd128Register(0),
+                 kScratchDoubleReg);
+      __ vextractub(kScratchDoubleReg, kScratchDoubleReg, Operand(6));
+      __ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I8x16BitMask: {
+      Register temp = i.ToRegister(instr->TempAt(0));
+      __ mov(temp, Operand(0x8101820283038));
+      __ mov(ip, Operand(0x4048505860687078));
+      __ mtvsrdd(kScratchDoubleReg, temp, ip);
+      __ vbpermq(kScratchDoubleReg, i.InputSimd128Register(0),
+                 kScratchDoubleReg);
+      __ vextractuh(kScratchDoubleReg, kScratchDoubleReg, Operand(6));
+      __ mfvsrd(i.OutputRegister(), kScratchDoubleReg);
+      break;
+    }
+    case kPPC_I32x4DotI16x8S: {
+      __ vxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
+      __ vmsumshm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1), kScratchDoubleReg);
+      break;
+    }
+    case kPPC_StoreCompressTagged: {
+      ASSEMBLE_STORE_INTEGER(StoreTaggedField, StoreTaggedFieldX);
+      break;
+    }
+    case kPPC_LoadDecompressTaggedSigned: {
+      CHECK(instr->HasOutput());
+      ASSEMBLE_LOAD_INTEGER(lwz, lwzx);
+      break;
+    }
+    case kPPC_LoadDecompressTaggedPointer: {
+      CHECK(instr->HasOutput());
+      ASSEMBLE_LOAD_INTEGER(lwz, lwzx);
+      __ add(i.OutputRegister(), i.OutputRegister(), kRootRegister);
+      break;
+    }
+    case kPPC_LoadDecompressAnyTagged: {
+      CHECK(instr->HasOutput());
+      ASSEMBLE_LOAD_INTEGER(lwz, lwzx);
+      __ add(i.OutputRegister(), i.OutputRegister(), kRootRegister);
+      break;
+    }
+    default:
+      UNREACHABLE();
+  }
+  return kSuccess;
+}  // NOLINT(readability/fn_size)
+
+// Assembles branches after an instruction.
+void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
+  PPCOperandConverter i(this, instr);
+  Label* tlabel = branch->true_label;
+  Label* flabel = branch->false_label;
+  ArchOpcode op = instr->arch_opcode();
+  FlagsCondition condition = branch->condition;
+  CRegister cr = cr0;
+
+  Condition cond = FlagsConditionToCondition(condition, op);
+  if (op == kPPC_CmpDouble) {
+    // check for unordered if necessary
+    if (cond == le) {
+      __ bunordered(flabel, cr);
+      // Unnecessary for eq/lt since only FU bit will be set.
+    } else if (cond == gt) {
+      __ bunordered(tlabel, cr);
+      // Unnecessary for ne/ge since only FU bit will be set.
+    }
+  }
+  __ b(cond, tlabel, cr);
+  if (!branch->fallthru) __ b(flabel);  // no fallthru to flabel.
+}
+
+void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
+                                            Instruction* instr) {
+  // TODO(John) Handle float comparisons (kUnordered[Not]Equal).
+  if (condition == kUnorderedEqual || condition == kUnorderedNotEqual ||
+      condition == kOverflow || condition == kNotOverflow) {
+    return;
+  }
+
+  ArchOpcode op = instr->arch_opcode();
+  condition = NegateFlagsCondition(condition);
+  __ li(kScratchReg, Operand::Zero());
+  __ isel(FlagsConditionToCondition(condition, op), kSpeculationPoisonRegister,
+          kScratchReg, kSpeculationPoisonRegister, cr0);
+}
+
+void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
+                                            BranchInfo* branch) {
+  AssembleArchBranch(instr, branch);
+}
+
+void CodeGenerator::AssembleArchJump(RpoNumber target) {
+  if (!IsNextInAssemblyOrder(target)) __ b(GetLabel(target));
+}
+
+void CodeGenerator::AssembleArchTrap(Instruction* instr,
+                                     FlagsCondition condition) {
+  class OutOfLineTrap final : public OutOfLineCode {
+   public:
+    OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
+        : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
+
+    void Generate() final {
+      PPCOperandConverter i(gen_, instr_);
+      TrapId trap_id =
+          static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
+      GenerateCallToTrap(trap_id);
+    }
+
+   private:
+    void GenerateCallToTrap(TrapId trap_id) {
+      if (trap_id == TrapId::kInvalid) {
+        // We cannot test calls to the runtime in cctest/test-run-wasm.
+        // Therefore we emit a call to C here instead of a call to the runtime.
+        // We use the context register as the scratch register, because we do
+        // not have a context here.
+        __ PrepareCallCFunction(0, 0, cp);
+        __ CallCFunction(
+            ExternalReference::wasm_call_trap_callback_for_testing(), 0);
+        __ LeaveFrame(StackFrame::WASM);
+        auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
+        int pop_count =
+            static_cast<int>(call_descriptor->StackParameterCount());
+        __ Drop(pop_count);
+        __ Ret();
+      } else {
+        gen_->AssembleSourcePosition(instr_);
+        // A direct call to a wasm runtime stub defined in this module.
+        // Just encode the stub index. This will be patched when the code
+        // is added to the native module and copied into wasm code space.
+        __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
+        ReferenceMap* reference_map =
+            gen_->zone()->New<ReferenceMap>(gen_->zone());
+        gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
+        if (FLAG_debug_code) {
+          __ stop();
+        }
+      }
+    }
+
+    Instruction* instr_;
+    CodeGenerator* gen_;
+  };
+  auto ool = zone()->New<OutOfLineTrap>(this, instr);
+  Label* tlabel = ool->entry();
+  Label end;
+
+  ArchOpcode op = instr->arch_opcode();
+  CRegister cr = cr0;
+  Condition cond = FlagsConditionToCondition(condition, op);
+  if (op == kPPC_CmpDouble) {
+    // check for unordered if necessary
+    if (cond == le) {
+      __ bunordered(&end, cr);
+      // Unnecessary for eq/lt since only FU bit will be set.
+    } else if (cond == gt) {
+      __ bunordered(tlabel, cr);
+      // Unnecessary for ne/ge since only FU bit will be set.
+    }
+  }
+  __ b(cond, tlabel, cr);
+  __ bind(&end);
+}
+
+// Assembles boolean materializations after an instruction.
+void CodeGenerator::AssembleArchBoolean(Instruction* instr,
+                                        FlagsCondition condition) {
+  PPCOperandConverter i(this, instr);
+  Label done;
+  ArchOpcode op = instr->arch_opcode();
+  CRegister cr = cr0;
+  int reg_value = -1;
+
+  // Materialize a full 32-bit 1 or 0 value. The result register is always the
+  // last output of the instruction.
+  DCHECK_NE(0u, instr->OutputCount());
+  Register reg = i.OutputRegister(instr->OutputCount() - 1);
+
+  Condition cond = FlagsConditionToCondition(condition, op);
+  if (op == kPPC_CmpDouble) {
+    // check for unordered if necessary
+    if (cond == le) {
+      reg_value = 0;
+      __ li(reg, Operand::Zero());
+      __ bunordered(&done, cr);
+    } else if (cond == gt) {
+      reg_value = 1;
+      __ li(reg, Operand(1));
+      __ bunordered(&done, cr);
+    }
+    // Unnecessary for eq/lt & ne/ge since only FU bit will be set.
+  }
+
+  if (CpuFeatures::IsSupported(ISELECT)) {
+    switch (cond) {
+      case eq:
+      case lt:
+      case gt:
+        if (reg_value != 1) __ li(reg, Operand(1));
+        __ li(kScratchReg, Operand::Zero());
+        __ isel(cond, reg, reg, kScratchReg, cr);
+        break;
+      case ne:
+      case ge:
+      case le:
+        if (reg_value != 1) __ li(reg, Operand(1));
+        // r0 implies logical zero in this form
+        __ isel(NegateCondition(cond), reg, r0, reg, cr);
+        break;
+      default:
+        UNREACHABLE();
+        break;
+    }
+  } else {
+    if (reg_value != 0) __ li(reg, Operand::Zero());
+    __ b(NegateCondition(cond), &done, cr);
+    __ li(reg, Operand(1));
+  }
+  __ bind(&done);
+}
+
+void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
+  PPCOperandConverter i(this, instr);
+  Register input = i.InputRegister(0);
+  std::vector<std::pair<int32_t, Label*>> cases;
+  for (size_t index = 2; index < instr->InputCount(); index += 2) {
+    cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
+  }
+  AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
+                                      cases.data() + cases.size());
+}
+
+void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
+  PPCOperandConverter i(this, instr);
+  Register input = i.InputRegister(0);
+  int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
+  Label** cases = zone()->NewArray<Label*>(case_count);
+  for (int32_t index = 0; index < case_count; ++index) {
+    cases[index] = GetLabel(i.InputRpo(index + 2));
+  }
+  Label* const table = AddJumpTable(cases, case_count);
+  __ Cmpli(input, Operand(case_count), r0);
+  __ bge(GetLabel(i.InputRpo(1)));
+  __ mov_label_addr(kScratchReg, table);
+  __ ShiftLeftImm(r0, input, Operand(kSystemPointerSizeLog2));
+  __ LoadPX(kScratchReg, MemOperand(kScratchReg, r0));
+  __ Jump(kScratchReg);
+}
+
+void CodeGenerator::FinishFrame(Frame* frame) {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+  const RegList double_saves = call_descriptor->CalleeSavedFPRegisters();
+
+  // Save callee-saved Double registers.
+  if (double_saves != 0) {
+    frame->AlignSavedCalleeRegisterSlots();
+    DCHECK_EQ(kNumCalleeSavedDoubles,
+              base::bits::CountPopulation(double_saves));
+    frame->AllocateSavedCalleeRegisterSlots(kNumCalleeSavedDoubles *
+                                            (kDoubleSize / kSystemPointerSize));
+  }
+  // Save callee-saved registers.
+  const RegList saves = FLAG_enable_embedded_constant_pool
+                            ? call_descriptor->CalleeSavedRegisters() &
+                                  ~kConstantPoolRegister.bit()
+                            : call_descriptor->CalleeSavedRegisters();
+  if (saves != 0) {
+    // register save area does not include the fp or constant pool pointer.
+    const int num_saves =
+        kNumCalleeSaved - 1 - (FLAG_enable_embedded_constant_pool ? 1 : 0);
+    DCHECK(num_saves == base::bits::CountPopulation(saves));
+    frame->AllocateSavedCalleeRegisterSlots(num_saves);
+  }
+}
+
+void CodeGenerator::AssembleConstructFrame() {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+  if (frame_access_state()->has_frame()) {
+    if (call_descriptor->IsCFunctionCall()) {
+      if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
+        __ StubPrologue(StackFrame::C_WASM_ENTRY);
+        // Reserve stack space for saving the c_entry_fp later.
+        __ addi(sp, sp, Operand(-kSystemPointerSize));
+      } else {
+        __ mflr(r0);
+        if (FLAG_enable_embedded_constant_pool) {
+          __ Push(r0, fp, kConstantPoolRegister);
+          // Adjust FP to point to saved FP.
+          __ subi(fp, sp, Operand(StandardFrameConstants::kConstantPoolOffset));
+        } else {
+          __ Push(r0, fp);
+          __ mr(fp, sp);
+        }
+      }
+    } else if (call_descriptor->IsJSFunctionCall()) {
+      __ Prologue();
+    } else {
+      StackFrame::Type type = info()->GetOutputStackFrameType();
+      // TODO(mbrandy): Detect cases where ip is the entrypoint (for
+      // efficient intialization of the constant pool pointer register).
+      __ StubPrologue(type);
+      if (call_descriptor->IsWasmFunctionCall()) {
+        __ Push(kWasmInstanceRegister);
+      } else if (call_descriptor->IsWasmImportWrapper() ||
+                 call_descriptor->IsWasmCapiFunction()) {
+        // Wasm import wrappers are passed a tuple in the place of the instance.
+        // Unpack the tuple into the instance and the target callable.
+        // This must be done here in the codegen because it cannot be expressed
+        // properly in the graph.
+        __ LoadTaggedPointerField(
+            kJSFunctionRegister,
+            FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
+        __ LoadTaggedPointerField(
+            kWasmInstanceRegister,
+            FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
+        __ Push(kWasmInstanceRegister);
+        if (call_descriptor->IsWasmCapiFunction()) {
+          // Reserve space for saving the PC later.
+          __ addi(sp, sp, Operand(-kSystemPointerSize));
+        }
+      }
+    }
+    unwinding_info_writer_.MarkFrameConstructed(__ pc_offset());
+  }
+
+  int required_slots =
+      frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
+  if (info()->is_osr()) {
+    // TurboFan OSR-compiled functions cannot be entered directly.
+    __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
+
+    // Unoptimized code jumps directly to this entrypoint while the unoptimized
+    // frame is still on the stack. Optimized code uses OSR values directly from
+    // the unoptimized frame. Thus, all that needs to be done is to allocate the
+    // remaining stack slots.
+    if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
+    osr_pc_offset_ = __ pc_offset();
+    required_slots -= osr_helper()->UnoptimizedFrameSlots();
+    ResetSpeculationPoison();
+  }
+
+  const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
+  const RegList saves = FLAG_enable_embedded_constant_pool
+                            ? call_descriptor->CalleeSavedRegisters() &
+                                  ~kConstantPoolRegister.bit()
+                            : call_descriptor->CalleeSavedRegisters();
+
+  if (required_slots > 0) {
+    if (info()->IsWasm() && required_slots > 128) {
+      // For WebAssembly functions with big frames we have to do the stack
+      // overflow check before we construct the frame. Otherwise we may not
+      // have enough space on the stack to call the runtime for the stack
+      // overflow.
+      Label done;
+
+      // If the frame is bigger than the stack, we throw the stack overflow
+      // exception unconditionally. Thereby we can avoid the integer overflow
+      // check in the condition code.
+      if ((required_slots * kSystemPointerSize) < (FLAG_stack_size * 1024)) {
+        Register scratch = ip;
+        __ LoadP(
+            scratch,
+            FieldMemOperand(kWasmInstanceRegister,
+                            WasmInstanceObject::kRealStackLimitAddressOffset));
+        __ LoadP(scratch, MemOperand(scratch), r0);
+        __ Add(scratch, scratch, required_slots * kSystemPointerSize, r0);
+        __ cmpl(sp, scratch);
+        __ bge(&done);
+      }
+
+      __ Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
+      // We come from WebAssembly, there are no references for the GC.
+      ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
+      RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
+      if (FLAG_debug_code) {
+        __ stop();
+      }
+
+      __ bind(&done);
+    }
+
+    // Skip callee-saved and return slots, which are pushed below.
+    required_slots -= base::bits::CountPopulation(saves);
+    required_slots -= frame()->GetReturnSlotCount();
+    required_slots -= (kDoubleSize / kSystemPointerSize) *
+                      base::bits::CountPopulation(saves_fp);
+    __ Add(sp, sp, -required_slots * kSystemPointerSize, r0);
+  }
+
+  // Save callee-saved Double registers.
+  if (saves_fp != 0) {
+    __ MultiPushDoubles(saves_fp);
+    DCHECK_EQ(kNumCalleeSavedDoubles, base::bits::CountPopulation(saves_fp));
+  }
+
+  // Save callee-saved registers.
+  if (saves != 0) {
+    __ MultiPush(saves);
+    // register save area does not include the fp or constant pool pointer.
+  }
+
+  const int returns = frame()->GetReturnSlotCount();
+  if (returns != 0) {
+    // Create space for returns.
+    __ Add(sp, sp, -returns * kSystemPointerSize, r0);
+  }
+}
+
+void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+  int pop_count = static_cast<int>(call_descriptor->StackParameterCount());
+
+  const int returns = frame()->GetReturnSlotCount();
+  if (returns != 0) {
+    // Create space for returns.
+    __ Add(sp, sp, returns * kSystemPointerSize, r0);
+  }
+
+  // Restore registers.
+  const RegList saves = FLAG_enable_embedded_constant_pool
+                            ? call_descriptor->CalleeSavedRegisters() &
+                                  ~kConstantPoolRegister.bit()
+                            : call_descriptor->CalleeSavedRegisters();
+  if (saves != 0) {
+    __ MultiPop(saves);
+  }
+
+  // Restore double registers.
+  const RegList double_saves = call_descriptor->CalleeSavedFPRegisters();
+  if (double_saves != 0) {
+    __ MultiPopDoubles(double_saves);
+  }
+  PPCOperandConverter g(this, nullptr);
+  unwinding_info_writer_.MarkBlockWillExit();
+
+  if (call_descriptor->IsCFunctionCall()) {
+    AssembleDeconstructFrame();
+  } else if (frame_access_state()->has_frame()) {
+    // Canonicalize JSFunction return sites for now unless they have an variable
+    // number of stack slot pops
+    if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
+      if (return_label_.is_bound()) {
+        __ b(&return_label_);
+        return;
+      } else {
+        __ bind(&return_label_);
+        AssembleDeconstructFrame();
+      }
+    } else {
+      AssembleDeconstructFrame();
+    }
+  }
+  // Constant pool is unavailable since the frame has been destructed
+  ConstantPoolUnavailableScope constant_pool_unavailable(tasm());
+  if (pop->IsImmediate()) {
+    DCHECK(Constant::kInt32 == g.ToConstant(pop).type() ||
+           Constant::kInt64 == g.ToConstant(pop).type());
+    pop_count += g.ToConstant(pop).ToInt32();
+  } else {
+    __ Drop(g.ToRegister(pop));
+  }
+  __ Drop(pop_count);
+  __ Ret();
+}
+
+void CodeGenerator::FinishCode() {}
+
+void CodeGenerator::PrepareForDeoptimizationExits(
+    ZoneDeque<DeoptimizationExit*>* exits) {
+  // __ EmitConstantPool();
+}
+
+void CodeGenerator::AssembleMove(InstructionOperand* source,
+                                 InstructionOperand* destination) {
+  PPCOperandConverter g(this, nullptr);
+  // Dispatch on the source and destination operand kinds.  Not all
+  // combinations are possible.
+  if (source->IsRegister()) {
+    DCHECK(destination->IsRegister() || destination->IsStackSlot());
+    Register src = g.ToRegister(source);
+    if (destination->IsRegister()) {
+      __ Move(g.ToRegister(destination), src);
+    } else {
+      __ StoreP(src, g.ToMemOperand(destination), r0);
+    }
+  } else if (source->IsStackSlot()) {
+    DCHECK(destination->IsRegister() || destination->IsStackSlot());
+    MemOperand src = g.ToMemOperand(source);
+    if (destination->IsRegister()) {
+      __ LoadP(g.ToRegister(destination), src, r0);
+    } else {
+      Register temp = kScratchReg;
+      __ LoadP(temp, src, r0);
+      __ StoreP(temp, g.ToMemOperand(destination), r0);
+    }
+  } else if (source->IsConstant()) {
+    Constant src = g.ToConstant(source);
+    if (destination->IsRegister() || destination->IsStackSlot()) {
+      Register dst =
+          destination->IsRegister() ? g.ToRegister(destination) : kScratchReg;
+      switch (src.type()) {
+        case Constant::kInt32:
+#if V8_TARGET_ARCH_PPC64
+          if (false) {
+#else
+          if (RelocInfo::IsWasmReference(src.rmode())) {
+#endif
+            __ mov(dst, Operand(src.ToInt32(), src.rmode()));
+          } else {
+            __ mov(dst, Operand(src.ToInt32()));
+          }
+          break;
+        case Constant::kInt64:
+#if V8_TARGET_ARCH_PPC64
+          if (RelocInfo::IsWasmReference(src.rmode())) {
+            __ mov(dst, Operand(src.ToInt64(), src.rmode()));
+          } else {
+#endif
+            __ mov(dst, Operand(src.ToInt64()));
+#if V8_TARGET_ARCH_PPC64
+          }
+#endif
+          break;
+        case Constant::kFloat32:
+          __ mov(dst, Operand::EmbeddedNumber(src.ToFloat32()));
+          break;
+        case Constant::kFloat64:
+          __ mov(dst, Operand::EmbeddedNumber(src.ToFloat64().value()));
+          break;
+        case Constant::kExternalReference:
+          __ Move(dst, src.ToExternalReference());
+          break;
+        case Constant::kDelayedStringConstant:
+          __ mov(dst, Operand::EmbeddedStringConstant(
+                          src.ToDelayedStringConstant()));
+          break;
+        case Constant::kHeapObject: {
+          Handle<HeapObject> src_object = src.ToHeapObject();
+          RootIndex index;
+          if (IsMaterializableFromRoot(src_object, &index)) {
+            __ LoadRoot(dst, index);
+          } else {
+            __ Move(dst, src_object);
+          }
+          break;
+        }
+        case Constant::kCompressedHeapObject: {
+          Handle<HeapObject> src_object = src.ToHeapObject();
+          RootIndex index;
+          if (IsMaterializableFromRoot(src_object, &index)) {
+            __ LoadRoot(dst, index);
+          } else {
+            // TODO(v8:7703, jyan@ca.ibm.com): Turn into a
+            // COMPRESSED_EMBEDDED_OBJECT when the constant pool entry size is
+            // tagged size.
+            __ Move(dst, src_object, RelocInfo::FULL_EMBEDDED_OBJECT);
+          }
+          break;
+        }
+        case Constant::kRpoNumber:
+          UNREACHABLE();  // TODO(dcarney): loading RPO constants on PPC.
+          break;
+      }
+      if (destination->IsStackSlot()) {
+        __ StoreP(dst, g.ToMemOperand(destination), r0);
+      }
+    } else {
+      DoubleRegister dst = destination->IsFPRegister()
+                               ? g.ToDoubleRegister(destination)
+                               : kScratchDoubleReg;
+      Double value;
+#if V8_HOST_ARCH_IA32 || V8_HOST_ARCH_X64
+      // casting double precision snan to single precision
+      // converts it to qnan on ia32/x64
+      if (src.type() == Constant::kFloat32) {
+        uint32_t val = src.ToFloat32AsInt();
+        if ((val & 0x7F800000) == 0x7F800000) {
+          uint64_t dval = static_cast<uint64_t>(val);
+          dval = ((dval & 0xC0000000) << 32) | ((dval & 0x40000000) << 31) |
+                 ((dval & 0x40000000) << 30) | ((dval & 0x7FFFFFFF) << 29);
+          value = Double(dval);
+        } else {
+          value = Double(static_cast<double>(src.ToFloat32()));
+        }
+      } else {
+        value = Double(src.ToFloat64());
+      }
+#else
+      value = src.type() == Constant::kFloat32
+                  ? Double(static_cast<double>(src.ToFloat32()))
+                  : Double(src.ToFloat64());
+#endif
+      __ LoadDoubleLiteral(dst, value, kScratchReg);
+      if (destination->IsDoubleStackSlot()) {
+        __ StoreDouble(dst, g.ToMemOperand(destination), r0);
+      } else if (destination->IsFloatStackSlot()) {
+        __ StoreSingle(dst, g.ToMemOperand(destination), r0);
+      }
+    }
+  } else if (source->IsFPRegister()) {
+    MachineRepresentation rep = LocationOperand::cast(source)->representation();
+    if (rep == MachineRepresentation::kSimd128) {
+      if (destination->IsSimd128Register()) {
+        __ vor(g.ToSimd128Register(destination), g.ToSimd128Register(source),
+               g.ToSimd128Register(source));
+      } else {
+        DCHECK(destination->IsSimd128StackSlot());
+        MemOperand dst = g.ToMemOperand(destination);
+        __ mov(ip, Operand(dst.offset()));
+        __ StoreSimd128(g.ToSimd128Register(source), MemOperand(dst.ra(), ip),
+                        r0, kScratchDoubleReg);
+      }
+    } else {
+      DoubleRegister src = g.ToDoubleRegister(source);
+      if (destination->IsFPRegister()) {
+        DoubleRegister dst = g.ToDoubleRegister(destination);
+        __ Move(dst, src);
+      } else {
+        DCHECK(destination->IsFPStackSlot());
+        LocationOperand* op = LocationOperand::cast(source);
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          __ StoreDouble(src, g.ToMemOperand(destination), r0);
+        } else {
+          __ StoreSingle(src, g.ToMemOperand(destination), r0);
+        }
+      }
+    }
+  } else if (source->IsFPStackSlot()) {
+    DCHECK(destination->IsFPRegister() || destination->IsFPStackSlot());
+    MemOperand src = g.ToMemOperand(source);
+    if (destination->IsFPRegister()) {
+      LocationOperand* op = LocationOperand::cast(source);
+      if (op->representation() == MachineRepresentation::kFloat64) {
+        __ LoadDouble(g.ToDoubleRegister(destination), src, r0);
+      } else if (op->representation() == MachineRepresentation::kFloat32) {
+        __ LoadSingle(g.ToDoubleRegister(destination), src, r0);
+      } else {
+        DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
+        MemOperand src = g.ToMemOperand(source);
+        __ mov(ip, Operand(src.offset()));
+        __ LoadSimd128(g.ToSimd128Register(destination),
+                       MemOperand(src.ra(), ip), r0, kScratchDoubleReg);
+      }
+    } else {
+      LocationOperand* op = LocationOperand::cast(source);
+      DoubleRegister temp = kScratchDoubleReg;
+      if (op->representation() == MachineRepresentation::kFloat64) {
+        __ LoadDouble(temp, src, r0);
+        __ StoreDouble(temp, g.ToMemOperand(destination), r0);
+      } else if (op->representation() == MachineRepresentation::kFloat32) {
+        __ LoadSingle(temp, src, r0);
+        __ StoreSingle(temp, g.ToMemOperand(destination), r0);
+      } else {
+        DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
+        // push d0, to be used as scratch
+        __ addi(sp, sp, Operand(-kSimd128Size));
+        __ StoreSimd128(d0, MemOperand(r0, sp), r0, kScratchDoubleReg);
+        MemOperand src = g.ToMemOperand(source);
+        MemOperand dst = g.ToMemOperand(destination);
+        __ mov(ip, Operand(src.offset()));
+        __ LoadSimd128(d0, MemOperand(src.ra(), ip), r0, kScratchDoubleReg);
+        __ mov(ip, Operand(dst.offset()));
+        __ StoreSimd128(d0, MemOperand(dst.ra(), ip), r0, kScratchDoubleReg);
+        // restore d0
+        __ LoadSimd128(d0, MemOperand(r0, sp), ip, kScratchDoubleReg);
+        __ addi(sp, sp, Operand(kSimd128Size));
+      }
+    }
+  } else {
+    UNREACHABLE();
+  }
+}
+
+// Swaping contents in source and destination.
+// source and destination could be:
+//   Register,
+//   FloatRegister,
+//   DoubleRegister,
+//   StackSlot,
+//   FloatStackSlot,
+//   or DoubleStackSlot
+void CodeGenerator::AssembleSwap(InstructionOperand* source,
+                                 InstructionOperand* destination) {
+  PPCOperandConverter g(this, nullptr);
+  if (source->IsRegister()) {
+    Register src = g.ToRegister(source);
+    if (destination->IsRegister()) {
+      __ SwapP(src, g.ToRegister(destination), kScratchReg);
+    } else {
+      DCHECK(destination->IsStackSlot());
+      __ SwapP(src, g.ToMemOperand(destination), kScratchReg);
+    }
+  } else if (source->IsStackSlot()) {
+    DCHECK(destination->IsStackSlot());
+    __ SwapP(g.ToMemOperand(source), g.ToMemOperand(destination), kScratchReg,
+             r0);
+  } else if (source->IsFloatRegister()) {
+    DoubleRegister src = g.ToDoubleRegister(source);
+    if (destination->IsFloatRegister()) {
+      __ SwapFloat32(src, g.ToDoubleRegister(destination), kScratchDoubleReg);
+    } else {
+      DCHECK(destination->IsFloatStackSlot());
+      __ SwapFloat32(src, g.ToMemOperand(destination), kScratchDoubleReg);
+    }
+  } else if (source->IsDoubleRegister()) {
+    DoubleRegister src = g.ToDoubleRegister(source);
+    if (destination->IsDoubleRegister()) {
+      __ SwapDouble(src, g.ToDoubleRegister(destination), kScratchDoubleReg);
+    } else {
+      DCHECK(destination->IsDoubleStackSlot());
+      __ SwapDouble(src, g.ToMemOperand(destination), kScratchDoubleReg);
+    }
+  } else if (source->IsFloatStackSlot()) {
+    DCHECK(destination->IsFloatStackSlot());
+    __ SwapFloat32(g.ToMemOperand(source), g.ToMemOperand(destination),
+                   kScratchDoubleReg, d0);
+  } else if (source->IsDoubleStackSlot()) {
+    DCHECK(destination->IsDoubleStackSlot());
+    __ SwapDouble(g.ToMemOperand(source), g.ToMemOperand(destination),
+                  kScratchDoubleReg, d0);
+
+  } else if (source->IsSimd128Register()) {
+    Simd128Register src = g.ToSimd128Register(source);
+    if (destination->IsSimd128Register()) {
+      __ SwapSimd128(src, g.ToSimd128Register(destination), kScratchDoubleReg);
+    } else {
+      DCHECK(destination->IsSimd128StackSlot());
+      __ SwapSimd128(src, g.ToMemOperand(destination), kScratchDoubleReg);
+    }
+  } else if (source->IsSimd128StackSlot()) {
+    DCHECK(destination->IsSimd128StackSlot());
+    __ SwapSimd128(g.ToMemOperand(source), g.ToMemOperand(destination),
+                   kScratchDoubleReg);
+
+  } else {
+    UNREACHABLE();
+  }
+
+  return;
+}
+
+void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
+  for (size_t index = 0; index < target_count; ++index) {
+    __ emit_label_addr(targets[index]);
+  }
+}
+
+#undef __
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/ppc/instruction-codes-ppc.h b/src/compiler/backend/ppc/instruction-codes-ppc.h
new file mode 100644
index 0000000..a4cda21
--- /dev/null
+++ b/src/compiler/backend/ppc/instruction-codes-ppc.h
@@ -0,0 +1,402 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_PPC_INSTRUCTION_CODES_PPC_H_
+#define V8_COMPILER_BACKEND_PPC_INSTRUCTION_CODES_PPC_H_
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+// PPC-specific opcodes that specify which assembly sequence to emit.
+// Most opcodes specify a single instruction.
+#define TARGET_ARCH_OPCODE_LIST(V)   \
+  V(PPC_Peek)                        \
+  V(PPC_Sync)                        \
+  V(PPC_And)                         \
+  V(PPC_AndComplement)               \
+  V(PPC_Or)                          \
+  V(PPC_OrComplement)                \
+  V(PPC_Xor)                         \
+  V(PPC_ShiftLeft32)                 \
+  V(PPC_ShiftLeft64)                 \
+  V(PPC_ShiftLeftPair)               \
+  V(PPC_ShiftRight32)                \
+  V(PPC_ShiftRight64)                \
+  V(PPC_ShiftRightPair)              \
+  V(PPC_ShiftRightAlg32)             \
+  V(PPC_ShiftRightAlg64)             \
+  V(PPC_ShiftRightAlgPair)           \
+  V(PPC_RotRight32)                  \
+  V(PPC_RotRight64)                  \
+  V(PPC_Not)                         \
+  V(PPC_RotLeftAndMask32)            \
+  V(PPC_RotLeftAndClear64)           \
+  V(PPC_RotLeftAndClearLeft64)       \
+  V(PPC_RotLeftAndClearRight64)      \
+  V(PPC_Add32)                       \
+  V(PPC_Add64)                       \
+  V(PPC_AddWithOverflow32)           \
+  V(PPC_AddPair)                     \
+  V(PPC_AddDouble)                   \
+  V(PPC_Sub)                         \
+  V(PPC_SubWithOverflow32)           \
+  V(PPC_SubPair)                     \
+  V(PPC_SubDouble)                   \
+  V(PPC_Mul32)                       \
+  V(PPC_Mul32WithHigh32)             \
+  V(PPC_Mul64)                       \
+  V(PPC_MulHigh32)                   \
+  V(PPC_MulHighU32)                  \
+  V(PPC_MulPair)                     \
+  V(PPC_MulDouble)                   \
+  V(PPC_Div32)                       \
+  V(PPC_Div64)                       \
+  V(PPC_DivU32)                      \
+  V(PPC_DivU64)                      \
+  V(PPC_DivDouble)                   \
+  V(PPC_Mod32)                       \
+  V(PPC_Mod64)                       \
+  V(PPC_ModU32)                      \
+  V(PPC_ModU64)                      \
+  V(PPC_ModDouble)                   \
+  V(PPC_Neg)                         \
+  V(PPC_NegDouble)                   \
+  V(PPC_SqrtDouble)                  \
+  V(PPC_FloorDouble)                 \
+  V(PPC_CeilDouble)                  \
+  V(PPC_TruncateDouble)              \
+  V(PPC_RoundDouble)                 \
+  V(PPC_MaxDouble)                   \
+  V(PPC_MinDouble)                   \
+  V(PPC_AbsDouble)                   \
+  V(PPC_Cntlz32)                     \
+  V(PPC_Cntlz64)                     \
+  V(PPC_Popcnt32)                    \
+  V(PPC_Popcnt64)                    \
+  V(PPC_Cmp32)                       \
+  V(PPC_Cmp64)                       \
+  V(PPC_CmpDouble)                   \
+  V(PPC_Tst32)                       \
+  V(PPC_Tst64)                       \
+  V(PPC_Push)                        \
+  V(PPC_PushFrame)                   \
+  V(PPC_StoreToStackSlot)            \
+  V(PPC_ExtendSignWord8)             \
+  V(PPC_ExtendSignWord16)            \
+  V(PPC_ExtendSignWord32)            \
+  V(PPC_Uint32ToUint64)              \
+  V(PPC_Int64ToInt32)                \
+  V(PPC_Int64ToFloat32)              \
+  V(PPC_Int64ToDouble)               \
+  V(PPC_Uint64ToFloat32)             \
+  V(PPC_Uint64ToDouble)              \
+  V(PPC_Int32ToFloat32)              \
+  V(PPC_Int32ToDouble)               \
+  V(PPC_Uint32ToFloat32)             \
+  V(PPC_Float32ToInt32)              \
+  V(PPC_Float32ToUint32)             \
+  V(PPC_Uint32ToDouble)              \
+  V(PPC_Float32ToDouble)             \
+  V(PPC_Float64SilenceNaN)           \
+  V(PPC_DoubleToInt32)               \
+  V(PPC_DoubleToUint32)              \
+  V(PPC_DoubleToInt64)               \
+  V(PPC_DoubleToUint64)              \
+  V(PPC_DoubleToFloat32)             \
+  V(PPC_DoubleExtractLowWord32)      \
+  V(PPC_DoubleExtractHighWord32)     \
+  V(PPC_DoubleInsertLowWord32)       \
+  V(PPC_DoubleInsertHighWord32)      \
+  V(PPC_DoubleConstruct)             \
+  V(PPC_BitcastInt32ToFloat32)       \
+  V(PPC_BitcastFloat32ToInt32)       \
+  V(PPC_BitcastInt64ToDouble)        \
+  V(PPC_BitcastDoubleToInt64)        \
+  V(PPC_LoadWordS8)                  \
+  V(PPC_LoadWordU8)                  \
+  V(PPC_LoadWordS16)                 \
+  V(PPC_LoadWordU16)                 \
+  V(PPC_LoadWordS32)                 \
+  V(PPC_LoadWordU32)                 \
+  V(PPC_LoadWord64)                  \
+  V(PPC_LoadFloat32)                 \
+  V(PPC_LoadDouble)                  \
+  V(PPC_LoadSimd128)                 \
+  V(PPC_StoreWord8)                  \
+  V(PPC_StoreWord16)                 \
+  V(PPC_StoreWord32)                 \
+  V(PPC_StoreWord64)                 \
+  V(PPC_StoreFloat32)                \
+  V(PPC_StoreDouble)                 \
+  V(PPC_StoreSimd128)                \
+  V(PPC_ByteRev32)                   \
+  V(PPC_ByteRev64)                   \
+  V(PPC_CompressSigned)              \
+  V(PPC_CompressPointer)             \
+  V(PPC_CompressAny)                 \
+  V(PPC_AtomicStoreUint8)            \
+  V(PPC_AtomicStoreUint16)           \
+  V(PPC_AtomicStoreWord32)           \
+  V(PPC_AtomicStoreWord64)           \
+  V(PPC_AtomicLoadUint8)             \
+  V(PPC_AtomicLoadUint16)            \
+  V(PPC_AtomicLoadWord32)            \
+  V(PPC_AtomicLoadWord64)            \
+  V(PPC_AtomicExchangeUint8)         \
+  V(PPC_AtomicExchangeUint16)        \
+  V(PPC_AtomicExchangeWord32)        \
+  V(PPC_AtomicExchangeWord64)        \
+  V(PPC_AtomicCompareExchangeUint8)  \
+  V(PPC_AtomicCompareExchangeUint16) \
+  V(PPC_AtomicCompareExchangeWord32) \
+  V(PPC_AtomicCompareExchangeWord64) \
+  V(PPC_AtomicAddUint8)              \
+  V(PPC_AtomicAddUint16)             \
+  V(PPC_AtomicAddUint32)             \
+  V(PPC_AtomicAddUint64)             \
+  V(PPC_AtomicAddInt8)               \
+  V(PPC_AtomicAddInt16)              \
+  V(PPC_AtomicAddInt32)              \
+  V(PPC_AtomicAddInt64)              \
+  V(PPC_AtomicSubUint8)              \
+  V(PPC_AtomicSubUint16)             \
+  V(PPC_AtomicSubUint32)             \
+  V(PPC_AtomicSubUint64)             \
+  V(PPC_AtomicSubInt8)               \
+  V(PPC_AtomicSubInt16)              \
+  V(PPC_AtomicSubInt32)              \
+  V(PPC_AtomicSubInt64)              \
+  V(PPC_AtomicAndUint8)              \
+  V(PPC_AtomicAndUint16)             \
+  V(PPC_AtomicAndUint32)             \
+  V(PPC_AtomicAndUint64)             \
+  V(PPC_AtomicAndInt8)               \
+  V(PPC_AtomicAndInt16)              \
+  V(PPC_AtomicAndInt32)              \
+  V(PPC_AtomicAndInt64)              \
+  V(PPC_AtomicOrUint8)               \
+  V(PPC_AtomicOrUint16)              \
+  V(PPC_AtomicOrUint32)              \
+  V(PPC_AtomicOrUint64)              \
+  V(PPC_AtomicOrInt8)                \
+  V(PPC_AtomicOrInt16)               \
+  V(PPC_AtomicOrInt32)               \
+  V(PPC_AtomicOrInt64)               \
+  V(PPC_AtomicXorUint8)              \
+  V(PPC_AtomicXorUint16)             \
+  V(PPC_AtomicXorUint32)             \
+  V(PPC_AtomicXorUint64)             \
+  V(PPC_AtomicXorInt8)               \
+  V(PPC_AtomicXorInt16)              \
+  V(PPC_AtomicXorInt32)              \
+  V(PPC_AtomicXorInt64)              \
+  V(PPC_F64x2Splat)                  \
+  V(PPC_F64x2ExtractLane)            \
+  V(PPC_F64x2ReplaceLane)            \
+  V(PPC_F64x2Add)                    \
+  V(PPC_F64x2Sub)                    \
+  V(PPC_F64x2Mul)                    \
+  V(PPC_F64x2Eq)                     \
+  V(PPC_F64x2Ne)                     \
+  V(PPC_F64x2Le)                     \
+  V(PPC_F64x2Lt)                     \
+  V(PPC_F64x2Abs)                    \
+  V(PPC_F64x2Neg)                    \
+  V(PPC_F64x2Sqrt)                   \
+  V(PPC_F64x2Qfma)                   \
+  V(PPC_F64x2Qfms)                   \
+  V(PPC_F64x2Div)                    \
+  V(PPC_F64x2Min)                    \
+  V(PPC_F64x2Max)                    \
+  V(PPC_F64x2Ceil)                   \
+  V(PPC_F64x2Floor)                  \
+  V(PPC_F64x2Trunc)                  \
+  V(PPC_F64x2NearestInt)             \
+  V(PPC_F32x4Splat)                  \
+  V(PPC_F32x4ExtractLane)            \
+  V(PPC_F32x4ReplaceLane)            \
+  V(PPC_F32x4Add)                    \
+  V(PPC_F32x4AddHoriz)               \
+  V(PPC_F32x4Sub)                    \
+  V(PPC_F32x4Mul)                    \
+  V(PPC_F32x4Eq)                     \
+  V(PPC_F32x4Ne)                     \
+  V(PPC_F32x4Lt)                     \
+  V(PPC_F32x4Le)                     \
+  V(PPC_F32x4Abs)                    \
+  V(PPC_F32x4Neg)                    \
+  V(PPC_F32x4RecipApprox)            \
+  V(PPC_F32x4RecipSqrtApprox)        \
+  V(PPC_F32x4Sqrt)                   \
+  V(PPC_F32x4SConvertI32x4)          \
+  V(PPC_F32x4UConvertI32x4)          \
+  V(PPC_F32x4Div)                    \
+  V(PPC_F32x4Min)                    \
+  V(PPC_F32x4Max)                    \
+  V(PPC_F32x4Ceil)                   \
+  V(PPC_F32x4Floor)                  \
+  V(PPC_F32x4Trunc)                  \
+  V(PPC_F32x4NearestInt)             \
+  V(PPC_I64x2Splat)                  \
+  V(PPC_I64x2ExtractLane)            \
+  V(PPC_I64x2ReplaceLane)            \
+  V(PPC_I64x2Add)                    \
+  V(PPC_I64x2Sub)                    \
+  V(PPC_I64x2Mul)                    \
+  V(PPC_I64x2MinS)                   \
+  V(PPC_I64x2MinU)                   \
+  V(PPC_I64x2MaxS)                   \
+  V(PPC_I64x2MaxU)                   \
+  V(PPC_I64x2Eq)                     \
+  V(PPC_I64x2Ne)                     \
+  V(PPC_I64x2GtS)                    \
+  V(PPC_I64x2GtU)                    \
+  V(PPC_I64x2GeU)                    \
+  V(PPC_I64x2GeS)                    \
+  V(PPC_I64x2Shl)                    \
+  V(PPC_I64x2ShrS)                   \
+  V(PPC_I64x2ShrU)                   \
+  V(PPC_I64x2Neg)                    \
+  V(PPC_I32x4Splat)                  \
+  V(PPC_I32x4ExtractLane)            \
+  V(PPC_I32x4ReplaceLane)            \
+  V(PPC_I32x4Add)                    \
+  V(PPC_I32x4AddHoriz)               \
+  V(PPC_I32x4Sub)                    \
+  V(PPC_I32x4Mul)                    \
+  V(PPC_I32x4MinS)                   \
+  V(PPC_I32x4MinU)                   \
+  V(PPC_I32x4MaxS)                   \
+  V(PPC_I32x4MaxU)                   \
+  V(PPC_I32x4Eq)                     \
+  V(PPC_I32x4Ne)                     \
+  V(PPC_I32x4GtS)                    \
+  V(PPC_I32x4GeS)                    \
+  V(PPC_I32x4GtU)                    \
+  V(PPC_I32x4GeU)                    \
+  V(PPC_I32x4Shl)                    \
+  V(PPC_I32x4ShrS)                   \
+  V(PPC_I32x4ShrU)                   \
+  V(PPC_I32x4Neg)                    \
+  V(PPC_I32x4Abs)                    \
+  V(PPC_I32x4SConvertF32x4)          \
+  V(PPC_I32x4UConvertF32x4)          \
+  V(PPC_I32x4SConvertI16x8Low)       \
+  V(PPC_I32x4SConvertI16x8High)      \
+  V(PPC_I32x4UConvertI16x8Low)       \
+  V(PPC_I32x4UConvertI16x8High)      \
+  V(PPC_I32x4BitMask)                \
+  V(PPC_I32x4DotI16x8S)              \
+  V(PPC_F32x4Qfma)                   \
+  V(PPC_F32x4Qfms)                   \
+  V(PPC_I16x8Splat)                  \
+  V(PPC_I16x8ExtractLaneU)           \
+  V(PPC_I16x8ExtractLaneS)           \
+  V(PPC_I16x8ReplaceLane)            \
+  V(PPC_I16x8Add)                    \
+  V(PPC_I16x8AddHoriz)               \
+  V(PPC_I16x8Sub)                    \
+  V(PPC_I16x8Mul)                    \
+  V(PPC_I16x8MinS)                   \
+  V(PPC_I16x8MinU)                   \
+  V(PPC_I16x8MaxS)                   \
+  V(PPC_I16x8MaxU)                   \
+  V(PPC_I16x8Eq)                     \
+  V(PPC_I16x8Ne)                     \
+  V(PPC_I16x8GtS)                    \
+  V(PPC_I16x8GeS)                    \
+  V(PPC_I16x8GtU)                    \
+  V(PPC_I16x8GeU)                    \
+  V(PPC_I16x8Shl)                    \
+  V(PPC_I16x8ShrS)                   \
+  V(PPC_I16x8ShrU)                   \
+  V(PPC_I16x8Neg)                    \
+  V(PPC_I16x8Abs)                    \
+  V(PPC_I16x8SConvertI32x4)          \
+  V(PPC_I16x8UConvertI32x4)          \
+  V(PPC_I16x8SConvertI8x16Low)       \
+  V(PPC_I16x8SConvertI8x16High)      \
+  V(PPC_I16x8UConvertI8x16Low)       \
+  V(PPC_I16x8UConvertI8x16High)      \
+  V(PPC_I16x8AddSatS)                \
+  V(PPC_I16x8SubSatS)                \
+  V(PPC_I16x8AddSatU)                \
+  V(PPC_I16x8SubSatU)                \
+  V(PPC_I16x8RoundingAverageU)       \
+  V(PPC_I16x8BitMask)                \
+  V(PPC_I8x16Splat)                  \
+  V(PPC_I8x16ExtractLaneU)           \
+  V(PPC_I8x16ExtractLaneS)           \
+  V(PPC_I8x16ReplaceLane)            \
+  V(PPC_I8x16Add)                    \
+  V(PPC_I8x16Sub)                    \
+  V(PPC_I8x16Mul)                    \
+  V(PPC_I8x16MinS)                   \
+  V(PPC_I8x16MinU)                   \
+  V(PPC_I8x16MaxS)                   \
+  V(PPC_I8x16MaxU)                   \
+  V(PPC_I8x16Eq)                     \
+  V(PPC_I8x16Ne)                     \
+  V(PPC_I8x16GtS)                    \
+  V(PPC_I8x16GeS)                    \
+  V(PPC_I8x16GtU)                    \
+  V(PPC_I8x16GeU)                    \
+  V(PPC_I8x16Shl)                    \
+  V(PPC_I8x16ShrS)                   \
+  V(PPC_I8x16ShrU)                   \
+  V(PPC_I8x16Neg)                    \
+  V(PPC_I8x16Abs)                    \
+  V(PPC_I8x16SConvertI16x8)          \
+  V(PPC_I8x16UConvertI16x8)          \
+  V(PPC_I8x16AddSatS)                \
+  V(PPC_I8x16SubSatS)                \
+  V(PPC_I8x16AddSatU)                \
+  V(PPC_I8x16SubSatU)                \
+  V(PPC_I8x16RoundingAverageU)       \
+  V(PPC_I8x16Shuffle)                \
+  V(PPC_I8x16Swizzle)                \
+  V(PPC_I8x16BitMask)                \
+  V(PPC_V64x2AnyTrue)                \
+  V(PPC_V32x4AnyTrue)                \
+  V(PPC_V16x8AnyTrue)                \
+  V(PPC_V8x16AnyTrue)                \
+  V(PPC_V64x2AllTrue)                \
+  V(PPC_V32x4AllTrue)                \
+  V(PPC_V16x8AllTrue)                \
+  V(PPC_V8x16AllTrue)                \
+  V(PPC_S128And)                     \
+  V(PPC_S128Or)                      \
+  V(PPC_S128Xor)                     \
+  V(PPC_S128Zero)                    \
+  V(PPC_S128Not)                     \
+  V(PPC_S128Select)                  \
+  V(PPC_S128AndNot)                  \
+  V(PPC_StoreCompressTagged)         \
+  V(PPC_LoadDecompressTaggedSigned)  \
+  V(PPC_LoadDecompressTaggedPointer) \
+  V(PPC_LoadDecompressAnyTagged)
+
+// Addressing modes represent the "shape" of inputs to an instruction.
+// Many instructions support multiple addressing modes. Addressing modes
+// are encoded into the InstructionCode of the instruction and tell the
+// code generator after register allocation which assembler method to call.
+//
+// We use the following local notation for addressing modes:
+//
+// R = register
+// O = register or stack slot
+// D = double register
+// I = immediate (handle, external, int32)
+// MRI = [register + immediate]
+// MRR = [register + register]
+#define TARGET_ADDRESSING_MODE_LIST(V) \
+  V(MRI) /* [%r0 + K] */               \
+  V(MRR) /* [%r0 + %r1] */
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_PPC_INSTRUCTION_CODES_PPC_H_
diff --git a/src/compiler/backend/ppc/instruction-scheduler-ppc.cc b/src/compiler/backend/ppc/instruction-scheduler-ppc.cc
new file mode 100644
index 0000000..87ea3f3
--- /dev/null
+++ b/src/compiler/backend/ppc/instruction-scheduler-ppc.cc
@@ -0,0 +1,406 @@
+// Copyright 2015 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/instruction-scheduler.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+bool InstructionScheduler::SchedulerSupported() { return true; }
+
+int InstructionScheduler::GetTargetInstructionFlags(
+    const Instruction* instr) const {
+  switch (instr->arch_opcode()) {
+    case kPPC_And:
+    case kPPC_AndComplement:
+    case kPPC_Or:
+    case kPPC_OrComplement:
+    case kPPC_Xor:
+    case kPPC_ShiftLeft32:
+    case kPPC_ShiftLeft64:
+    case kPPC_ShiftLeftPair:
+    case kPPC_ShiftRight32:
+    case kPPC_ShiftRight64:
+    case kPPC_ShiftRightPair:
+    case kPPC_ShiftRightAlg32:
+    case kPPC_ShiftRightAlg64:
+    case kPPC_ShiftRightAlgPair:
+    case kPPC_RotRight32:
+    case kPPC_RotRight64:
+    case kPPC_Not:
+    case kPPC_RotLeftAndMask32:
+    case kPPC_RotLeftAndClear64:
+    case kPPC_RotLeftAndClearLeft64:
+    case kPPC_RotLeftAndClearRight64:
+    case kPPC_Add32:
+    case kPPC_Add64:
+    case kPPC_AddWithOverflow32:
+    case kPPC_AddPair:
+    case kPPC_AddDouble:
+    case kPPC_Sub:
+    case kPPC_SubWithOverflow32:
+    case kPPC_SubPair:
+    case kPPC_SubDouble:
+    case kPPC_Mul32:
+    case kPPC_Mul32WithHigh32:
+    case kPPC_Mul64:
+    case kPPC_MulHigh32:
+    case kPPC_MulHighU32:
+    case kPPC_MulPair:
+    case kPPC_MulDouble:
+    case kPPC_Div32:
+    case kPPC_Div64:
+    case kPPC_DivU32:
+    case kPPC_DivU64:
+    case kPPC_DivDouble:
+    case kPPC_Mod32:
+    case kPPC_Mod64:
+    case kPPC_ModU32:
+    case kPPC_ModU64:
+    case kPPC_ModDouble:
+    case kPPC_Neg:
+    case kPPC_NegDouble:
+    case kPPC_SqrtDouble:
+    case kPPC_FloorDouble:
+    case kPPC_CeilDouble:
+    case kPPC_TruncateDouble:
+    case kPPC_RoundDouble:
+    case kPPC_MaxDouble:
+    case kPPC_MinDouble:
+    case kPPC_AbsDouble:
+    case kPPC_Cntlz32:
+    case kPPC_Cntlz64:
+    case kPPC_Popcnt32:
+    case kPPC_Popcnt64:
+    case kPPC_Cmp32:
+    case kPPC_Cmp64:
+    case kPPC_CmpDouble:
+    case kPPC_Tst32:
+    case kPPC_Tst64:
+    case kPPC_ExtendSignWord8:
+    case kPPC_ExtendSignWord16:
+    case kPPC_ExtendSignWord32:
+    case kPPC_Uint32ToUint64:
+    case kPPC_Int64ToInt32:
+    case kPPC_Int64ToFloat32:
+    case kPPC_Int64ToDouble:
+    case kPPC_Uint64ToFloat32:
+    case kPPC_Uint64ToDouble:
+    case kPPC_Int32ToFloat32:
+    case kPPC_Int32ToDouble:
+    case kPPC_Uint32ToFloat32:
+    case kPPC_Uint32ToDouble:
+    case kPPC_Float32ToInt32:
+    case kPPC_Float32ToUint32:
+    case kPPC_Float32ToDouble:
+    case kPPC_Float64SilenceNaN:
+    case kPPC_DoubleToInt32:
+    case kPPC_DoubleToUint32:
+    case kPPC_DoubleToInt64:
+    case kPPC_DoubleToUint64:
+    case kPPC_DoubleToFloat32:
+    case kPPC_DoubleExtractLowWord32:
+    case kPPC_DoubleExtractHighWord32:
+    case kPPC_DoubleInsertLowWord32:
+    case kPPC_DoubleInsertHighWord32:
+    case kPPC_DoubleConstruct:
+    case kPPC_BitcastInt32ToFloat32:
+    case kPPC_BitcastFloat32ToInt32:
+    case kPPC_BitcastInt64ToDouble:
+    case kPPC_BitcastDoubleToInt64:
+    case kPPC_ByteRev32:
+    case kPPC_ByteRev64:
+    case kPPC_CompressSigned:
+    case kPPC_CompressPointer:
+    case kPPC_CompressAny:
+    case kPPC_F64x2Splat:
+    case kPPC_F64x2ExtractLane:
+    case kPPC_F64x2ReplaceLane:
+    case kPPC_F64x2Add:
+    case kPPC_F64x2Sub:
+    case kPPC_F64x2Mul:
+    case kPPC_F64x2Eq:
+    case kPPC_F64x2Ne:
+    case kPPC_F64x2Le:
+    case kPPC_F64x2Lt:
+    case kPPC_F64x2Abs:
+    case kPPC_F64x2Neg:
+    case kPPC_F64x2Sqrt:
+    case kPPC_F64x2Qfma:
+    case kPPC_F64x2Qfms:
+    case kPPC_F64x2Div:
+    case kPPC_F64x2Min:
+    case kPPC_F64x2Max:
+    case kPPC_F64x2Ceil:
+    case kPPC_F64x2Floor:
+    case kPPC_F64x2Trunc:
+    case kPPC_F64x2NearestInt:
+    case kPPC_F32x4Splat:
+    case kPPC_F32x4ExtractLane:
+    case kPPC_F32x4ReplaceLane:
+    case kPPC_F32x4Add:
+    case kPPC_F32x4AddHoriz:
+    case kPPC_F32x4Sub:
+    case kPPC_F32x4Mul:
+    case kPPC_F32x4Eq:
+    case kPPC_F32x4Ne:
+    case kPPC_F32x4Lt:
+    case kPPC_F32x4Le:
+    case kPPC_F32x4Abs:
+    case kPPC_F32x4Neg:
+    case kPPC_F32x4RecipApprox:
+    case kPPC_F32x4RecipSqrtApprox:
+    case kPPC_F32x4Sqrt:
+    case kPPC_F32x4SConvertI32x4:
+    case kPPC_F32x4UConvertI32x4:
+    case kPPC_F32x4Qfma:
+    case kPPC_F32x4Qfms:
+    case kPPC_F32x4Div:
+    case kPPC_F32x4Min:
+    case kPPC_F32x4Max:
+    case kPPC_F32x4Ceil:
+    case kPPC_F32x4Floor:
+    case kPPC_F32x4Trunc:
+    case kPPC_F32x4NearestInt:
+    case kPPC_I64x2Splat:
+    case kPPC_I64x2ExtractLane:
+    case kPPC_I64x2ReplaceLane:
+    case kPPC_I64x2Add:
+    case kPPC_I64x2Sub:
+    case kPPC_I64x2Mul:
+    case kPPC_I64x2MinS:
+    case kPPC_I64x2MinU:
+    case kPPC_I64x2MaxS:
+    case kPPC_I64x2MaxU:
+    case kPPC_I64x2Eq:
+    case kPPC_I64x2Ne:
+    case kPPC_I64x2GtS:
+    case kPPC_I64x2GtU:
+    case kPPC_I64x2GeU:
+    case kPPC_I64x2GeS:
+    case kPPC_I64x2Shl:
+    case kPPC_I64x2ShrS:
+    case kPPC_I64x2ShrU:
+    case kPPC_I64x2Neg:
+    case kPPC_I32x4Splat:
+    case kPPC_I32x4ExtractLane:
+    case kPPC_I32x4ReplaceLane:
+    case kPPC_I32x4Add:
+    case kPPC_I32x4AddHoriz:
+    case kPPC_I32x4Sub:
+    case kPPC_I32x4Mul:
+    case kPPC_I32x4MinS:
+    case kPPC_I32x4MinU:
+    case kPPC_I32x4MaxS:
+    case kPPC_I32x4MaxU:
+    case kPPC_I32x4Eq:
+    case kPPC_I32x4Ne:
+    case kPPC_I32x4GtS:
+    case kPPC_I32x4GeS:
+    case kPPC_I32x4GtU:
+    case kPPC_I32x4GeU:
+    case kPPC_I32x4Shl:
+    case kPPC_I32x4ShrS:
+    case kPPC_I32x4ShrU:
+    case kPPC_I32x4Neg:
+    case kPPC_I32x4Abs:
+    case kPPC_I32x4SConvertF32x4:
+    case kPPC_I32x4UConvertF32x4:
+    case kPPC_I32x4SConvertI16x8Low:
+    case kPPC_I32x4SConvertI16x8High:
+    case kPPC_I32x4UConvertI16x8Low:
+    case kPPC_I32x4UConvertI16x8High:
+    case kPPC_I32x4BitMask:
+    case kPPC_I32x4DotI16x8S:
+    case kPPC_I16x8Splat:
+    case kPPC_I16x8ExtractLaneU:
+    case kPPC_I16x8ExtractLaneS:
+    case kPPC_I16x8ReplaceLane:
+    case kPPC_I16x8Add:
+    case kPPC_I16x8AddHoriz:
+    case kPPC_I16x8Sub:
+    case kPPC_I16x8Mul:
+    case kPPC_I16x8MinS:
+    case kPPC_I16x8MinU:
+    case kPPC_I16x8MaxS:
+    case kPPC_I16x8MaxU:
+    case kPPC_I16x8Eq:
+    case kPPC_I16x8Ne:
+    case kPPC_I16x8GtS:
+    case kPPC_I16x8GeS:
+    case kPPC_I16x8GtU:
+    case kPPC_I16x8GeU:
+    case kPPC_I16x8Shl:
+    case kPPC_I16x8ShrS:
+    case kPPC_I16x8ShrU:
+    case kPPC_I16x8Neg:
+    case kPPC_I16x8Abs:
+    case kPPC_I16x8SConvertI32x4:
+    case kPPC_I16x8UConvertI32x4:
+    case kPPC_I16x8SConvertI8x16Low:
+    case kPPC_I16x8SConvertI8x16High:
+    case kPPC_I16x8UConvertI8x16Low:
+    case kPPC_I16x8UConvertI8x16High:
+    case kPPC_I16x8AddSatS:
+    case kPPC_I16x8SubSatS:
+    case kPPC_I16x8AddSatU:
+    case kPPC_I16x8SubSatU:
+    case kPPC_I16x8RoundingAverageU:
+    case kPPC_I16x8BitMask:
+    case kPPC_I8x16Splat:
+    case kPPC_I8x16ExtractLaneU:
+    case kPPC_I8x16ExtractLaneS:
+    case kPPC_I8x16ReplaceLane:
+    case kPPC_I8x16Add:
+    case kPPC_I8x16Sub:
+    case kPPC_I8x16Mul:
+    case kPPC_I8x16MinS:
+    case kPPC_I8x16MinU:
+    case kPPC_I8x16MaxS:
+    case kPPC_I8x16MaxU:
+    case kPPC_I8x16Eq:
+    case kPPC_I8x16Ne:
+    case kPPC_I8x16GtS:
+    case kPPC_I8x16GeS:
+    case kPPC_I8x16GtU:
+    case kPPC_I8x16GeU:
+    case kPPC_I8x16Shl:
+    case kPPC_I8x16ShrS:
+    case kPPC_I8x16ShrU:
+    case kPPC_I8x16Neg:
+    case kPPC_I8x16Abs:
+    case kPPC_I8x16SConvertI16x8:
+    case kPPC_I8x16UConvertI16x8:
+    case kPPC_I8x16AddSatS:
+    case kPPC_I8x16SubSatS:
+    case kPPC_I8x16AddSatU:
+    case kPPC_I8x16SubSatU:
+    case kPPC_I8x16RoundingAverageU:
+    case kPPC_I8x16Shuffle:
+    case kPPC_I8x16Swizzle:
+    case kPPC_I8x16BitMask:
+    case kPPC_V64x2AnyTrue:
+    case kPPC_V32x4AnyTrue:
+    case kPPC_V16x8AnyTrue:
+    case kPPC_V8x16AnyTrue:
+    case kPPC_V64x2AllTrue:
+    case kPPC_V32x4AllTrue:
+    case kPPC_V16x8AllTrue:
+    case kPPC_V8x16AllTrue:
+    case kPPC_S128And:
+    case kPPC_S128Or:
+    case kPPC_S128Xor:
+    case kPPC_S128Zero:
+    case kPPC_S128Not:
+    case kPPC_S128Select:
+    case kPPC_S128AndNot:
+      return kNoOpcodeFlags;
+
+    case kPPC_LoadWordS8:
+    case kPPC_LoadWordU8:
+    case kPPC_LoadWordS16:
+    case kPPC_LoadWordU16:
+    case kPPC_LoadWordS32:
+    case kPPC_LoadWordU32:
+    case kPPC_LoadWord64:
+    case kPPC_LoadFloat32:
+    case kPPC_LoadDouble:
+    case kPPC_LoadSimd128:
+    case kPPC_AtomicLoadUint8:
+    case kPPC_AtomicLoadUint16:
+    case kPPC_AtomicLoadWord32:
+    case kPPC_AtomicLoadWord64:
+    case kPPC_Peek:
+    case kPPC_LoadDecompressTaggedSigned:
+    case kPPC_LoadDecompressTaggedPointer:
+    case kPPC_LoadDecompressAnyTagged:
+      return kIsLoadOperation;
+
+    case kPPC_StoreWord8:
+    case kPPC_StoreWord16:
+    case kPPC_StoreWord32:
+    case kPPC_StoreWord64:
+    case kPPC_StoreFloat32:
+    case kPPC_StoreDouble:
+    case kPPC_StoreSimd128:
+    case kPPC_StoreCompressTagged:
+    case kPPC_Push:
+    case kPPC_PushFrame:
+    case kPPC_StoreToStackSlot:
+    case kPPC_Sync:
+      return kHasSideEffect;
+
+    case kPPC_AtomicStoreUint8:
+    case kPPC_AtomicStoreUint16:
+    case kPPC_AtomicStoreWord32:
+    case kPPC_AtomicStoreWord64:
+    case kPPC_AtomicExchangeUint8:
+    case kPPC_AtomicExchangeUint16:
+    case kPPC_AtomicExchangeWord32:
+    case kPPC_AtomicExchangeWord64:
+    case kPPC_AtomicCompareExchangeUint8:
+    case kPPC_AtomicCompareExchangeUint16:
+    case kPPC_AtomicCompareExchangeWord32:
+    case kPPC_AtomicCompareExchangeWord64:
+    case kPPC_AtomicAddUint8:
+    case kPPC_AtomicAddUint16:
+    case kPPC_AtomicAddUint32:
+    case kPPC_AtomicAddUint64:
+    case kPPC_AtomicAddInt8:
+    case kPPC_AtomicAddInt16:
+    case kPPC_AtomicAddInt32:
+    case kPPC_AtomicAddInt64:
+    case kPPC_AtomicSubUint8:
+    case kPPC_AtomicSubUint16:
+    case kPPC_AtomicSubUint32:
+    case kPPC_AtomicSubUint64:
+    case kPPC_AtomicSubInt8:
+    case kPPC_AtomicSubInt16:
+    case kPPC_AtomicSubInt32:
+    case kPPC_AtomicSubInt64:
+    case kPPC_AtomicAndUint8:
+    case kPPC_AtomicAndUint16:
+    case kPPC_AtomicAndUint32:
+    case kPPC_AtomicAndUint64:
+    case kPPC_AtomicAndInt8:
+    case kPPC_AtomicAndInt16:
+    case kPPC_AtomicAndInt32:
+    case kPPC_AtomicAndInt64:
+    case kPPC_AtomicOrUint8:
+    case kPPC_AtomicOrUint16:
+    case kPPC_AtomicOrUint32:
+    case kPPC_AtomicOrUint64:
+    case kPPC_AtomicOrInt8:
+    case kPPC_AtomicOrInt16:
+    case kPPC_AtomicOrInt32:
+    case kPPC_AtomicOrInt64:
+    case kPPC_AtomicXorUint8:
+    case kPPC_AtomicXorUint16:
+    case kPPC_AtomicXorUint32:
+    case kPPC_AtomicXorUint64:
+    case kPPC_AtomicXorInt8:
+    case kPPC_AtomicXorInt16:
+    case kPPC_AtomicXorInt32:
+    case kPPC_AtomicXorInt64:
+      return kHasSideEffect;
+
+#define CASE(Name) case k##Name:
+      COMMON_ARCH_OPCODE_LIST(CASE)
+#undef CASE
+      // Already covered in architecture independent code.
+      UNREACHABLE();
+  }
+
+  UNREACHABLE();
+}
+
+int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
+  // TODO(all): Add instruction cost modeling.
+  return 1;
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/ppc/instruction-selector-ppc.cc b/src/compiler/backend/ppc/instruction-selector-ppc.cc
new file mode 100644
index 0000000..9c66d6f
--- /dev/null
+++ b/src/compiler/backend/ppc/instruction-selector-ppc.cc
@@ -0,0 +1,2492 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/base/iterator.h"
+#include "src/compiler/backend/instruction-selector-impl.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/node-properties.h"
+#include "src/execution/ppc/frame-constants-ppc.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+enum ImmediateMode {
+  kInt16Imm,
+  kInt16Imm_Unsigned,
+  kInt16Imm_Negate,
+  kInt16Imm_4ByteAligned,
+  kShift32Imm,
+  kShift64Imm,
+  kNoImmediate
+};
+
+// Adds PPC-specific methods for generating operands.
+class PPCOperandGenerator final : public OperandGenerator {
+ public:
+  explicit PPCOperandGenerator(InstructionSelector* selector)
+      : OperandGenerator(selector) {}
+
+  InstructionOperand UseOperand(Node* node, ImmediateMode mode) {
+    if (CanBeImmediate(node, mode)) {
+      return UseImmediate(node);
+    }
+    return UseRegister(node);
+  }
+
+  bool CanBeImmediate(Node* node, ImmediateMode mode) {
+    int64_t value;
+    if (node->opcode() == IrOpcode::kInt32Constant)
+      value = OpParameter<int32_t>(node->op());
+    else if (node->opcode() == IrOpcode::kInt64Constant)
+      value = OpParameter<int64_t>(node->op());
+    else
+      return false;
+    return CanBeImmediate(value, mode);
+  }
+
+  bool CanBeImmediate(int64_t value, ImmediateMode mode) {
+    switch (mode) {
+      case kInt16Imm:
+        return is_int16(value);
+      case kInt16Imm_Unsigned:
+        return is_uint16(value);
+      case kInt16Imm_Negate:
+        return is_int16(-value);
+      case kInt16Imm_4ByteAligned:
+        return is_int16(value) && !(value & 3);
+      case kShift32Imm:
+        return 0 <= value && value < 32;
+      case kShift64Imm:
+        return 0 <= value && value < 64;
+      case kNoImmediate:
+        return false;
+    }
+    return false;
+  }
+};
+
+namespace {
+
+void VisitRR(InstructionSelector* selector, InstructionCode opcode,
+             Node* node) {
+  PPCOperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)));
+}
+
+void VisitRRR(InstructionSelector* selector, InstructionCode opcode,
+              Node* node) {
+  PPCOperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)),
+                 g.UseRegister(node->InputAt(1)));
+}
+
+void VisitRRO(InstructionSelector* selector, InstructionCode opcode, Node* node,
+              ImmediateMode operand_mode) {
+  PPCOperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)),
+                 g.UseOperand(node->InputAt(1), operand_mode));
+}
+
+#if V8_TARGET_ARCH_PPC64
+void VisitTryTruncateDouble(InstructionSelector* selector,
+                            InstructionCode opcode, Node* node) {
+  PPCOperandGenerator g(selector);
+  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  Node* success_output = NodeProperties::FindProjection(node, 1);
+  if (success_output) {
+    outputs[output_count++] = g.DefineAsRegister(success_output);
+  }
+
+  selector->Emit(opcode, output_count, outputs, 1, inputs);
+}
+#endif
+
+// Shared routine for multiple binary operations.
+template <typename Matcher>
+void VisitBinop(InstructionSelector* selector, Node* node,
+                InstructionCode opcode, ImmediateMode operand_mode,
+                FlagsContinuation* cont) {
+  PPCOperandGenerator g(selector);
+  Matcher m(node);
+  InstructionOperand inputs[4];
+  size_t input_count = 0;
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+
+  inputs[input_count++] = g.UseRegister(m.left().node());
+  inputs[input_count++] = g.UseOperand(m.right().node(), operand_mode);
+
+  if (cont->IsDeoptimize()) {
+    // If we can deoptimize as a result of the binop, we need to make sure that
+    // the deopt inputs are not overwritten by the binop result. One way
+    // to achieve that is to declare the output register as same-as-first.
+    outputs[output_count++] = g.DefineSameAsFirst(node);
+  } else {
+    outputs[output_count++] = g.DefineAsRegister(node);
+  }
+
+  DCHECK_NE(0u, input_count);
+  DCHECK_NE(0u, output_count);
+  DCHECK_GE(arraysize(inputs), input_count);
+  DCHECK_GE(arraysize(outputs), output_count);
+
+  selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
+                                 inputs, cont);
+}
+
+// Shared routine for multiple binary operations.
+template <typename Matcher>
+void VisitBinop(InstructionSelector* selector, Node* node,
+                InstructionCode opcode, ImmediateMode operand_mode) {
+  FlagsContinuation cont;
+  VisitBinop<Matcher>(selector, node, opcode, operand_mode, &cont);
+}
+
+}  // namespace
+
+void InstructionSelector::VisitStackSlot(Node* node) {
+  StackSlotRepresentation rep = StackSlotRepresentationOf(node->op());
+  int slot = frame_->AllocateSpillSlot(rep.size());
+  OperandGenerator g(this);
+
+  Emit(kArchStackSlot, g.DefineAsRegister(node),
+       sequence()->AddImmediate(Constant(slot)), 0, nullptr);
+}
+
+void InstructionSelector::VisitAbortCSAAssert(Node* node) {
+  PPCOperandGenerator g(this);
+  Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), r4));
+}
+
+void InstructionSelector::VisitLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  PPCOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* offset = node->InputAt(1);
+  InstructionCode opcode = kArchNop;
+  ImmediateMode mode = kInt16Imm;
+  switch (load_rep.representation()) {
+    case MachineRepresentation::kFloat32:
+      opcode = kPPC_LoadFloat32;
+      break;
+    case MachineRepresentation::kFloat64:
+      opcode = kPPC_LoadDouble;
+      break;
+    case MachineRepresentation::kBit:  // Fall through.
+    case MachineRepresentation::kWord8:
+      opcode = load_rep.IsSigned() ? kPPC_LoadWordS8 : kPPC_LoadWordU8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = load_rep.IsSigned() ? kPPC_LoadWordS16 : kPPC_LoadWordU16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kPPC_LoadWordU32;
+      break;
+    case MachineRepresentation::kCompressedPointer:  // Fall through.
+    case MachineRepresentation::kCompressed:
+#ifdef V8_COMPRESS_POINTERS
+      opcode = kPPC_LoadWordS32;
+      mode = kInt16Imm_4ByteAligned;
+      break;
+#else
+      UNREACHABLE();
+#endif
+#ifdef V8_COMPRESS_POINTERS
+    case MachineRepresentation::kTaggedSigned:
+      opcode = kPPC_LoadDecompressTaggedSigned;
+      break;
+    case MachineRepresentation::kTaggedPointer:
+      opcode = kPPC_LoadDecompressTaggedPointer;
+      break;
+    case MachineRepresentation::kTagged:
+      opcode = kPPC_LoadDecompressAnyTagged;
+      break;
+#else
+    case MachineRepresentation::kTaggedSigned:   // Fall through.
+    case MachineRepresentation::kTaggedPointer:  // Fall through.
+    case MachineRepresentation::kTagged:         // Fall through.
+#endif
+    case MachineRepresentation::kWord64:
+      opcode = kPPC_LoadWord64;
+      mode = kInt16Imm_4ByteAligned;
+      break;
+    case MachineRepresentation::kSimd128:
+      opcode = kPPC_LoadSimd128;
+      // Vectors do not support MRI mode, only MRR is available.
+      mode = kNoImmediate;
+      break;
+    case MachineRepresentation::kNone:
+      UNREACHABLE();
+  }
+
+  if (node->opcode() == IrOpcode::kPoisonedLoad &&
+      poisoning_level_ != PoisoningMitigationLevel::kDontPoison) {
+    opcode |= MiscField::encode(kMemoryAccessPoisoned);
+  }
+
+  bool is_atomic = (node->opcode() == IrOpcode::kWord32AtomicLoad ||
+                    node->opcode() == IrOpcode::kWord64AtomicLoad);
+
+  if (g.CanBeImmediate(offset, mode)) {
+    Emit(opcode | AddressingModeField::encode(kMode_MRI),
+         g.DefineAsRegister(node), g.UseRegister(base), g.UseImmediate(offset),
+         g.UseImmediate(is_atomic));
+  } else if (g.CanBeImmediate(base, mode)) {
+    Emit(opcode | AddressingModeField::encode(kMode_MRI),
+         g.DefineAsRegister(node), g.UseRegister(offset), g.UseImmediate(base),
+         g.UseImmediate(is_atomic));
+  } else {
+    Emit(opcode | AddressingModeField::encode(kMode_MRR),
+         g.DefineAsRegister(node), g.UseRegister(base), g.UseRegister(offset),
+         g.UseImmediate(is_atomic));
+  }
+}
+
+void InstructionSelector::VisitPoisonedLoad(Node* node) { VisitLoad(node); }
+
+void InstructionSelector::VisitProtectedLoad(Node* node) {
+  // TODO(eholk)
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitStore(Node* node) {
+  PPCOperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* offset = node->InputAt(1);
+  Node* value = node->InputAt(2);
+
+  bool is_atomic = (node->opcode() == IrOpcode::kWord32AtomicStore ||
+                    node->opcode() == IrOpcode::kWord64AtomicStore);
+
+  MachineRepresentation rep;
+  WriteBarrierKind write_barrier_kind = kNoWriteBarrier;
+
+  if (is_atomic) {
+    rep = AtomicStoreRepresentationOf(node->op());
+  } else {
+    StoreRepresentation store_rep = StoreRepresentationOf(node->op());
+    write_barrier_kind = store_rep.write_barrier_kind();
+    rep = store_rep.representation();
+  }
+
+  if (FLAG_enable_unconditional_write_barriers &&
+      CanBeTaggedOrCompressedPointer(rep)) {
+    write_barrier_kind = kFullWriteBarrier;
+  }
+
+  if (write_barrier_kind != kNoWriteBarrier &&
+      V8_LIKELY(!FLAG_disable_write_barriers)) {
+    DCHECK(CanBeTaggedOrCompressedPointer(rep));
+    AddressingMode addressing_mode;
+    InstructionOperand inputs[3];
+    size_t input_count = 0;
+    inputs[input_count++] = g.UseUniqueRegister(base);
+    // OutOfLineRecordWrite uses the offset in an 'add' instruction as well as
+    // for the store itself, so we must check compatibility with both.
+    if (g.CanBeImmediate(offset, kInt16Imm)
+#if V8_TARGET_ARCH_PPC64
+        && g.CanBeImmediate(offset, kInt16Imm_4ByteAligned)
+#endif
+            ) {
+      inputs[input_count++] = g.UseImmediate(offset);
+      addressing_mode = kMode_MRI;
+    } else {
+      inputs[input_count++] = g.UseUniqueRegister(offset);
+      addressing_mode = kMode_MRR;
+    }
+    inputs[input_count++] = g.UseUniqueRegister(value);
+    RecordWriteMode record_write_mode =
+        WriteBarrierKindToRecordWriteMode(write_barrier_kind);
+    InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
+    size_t const temp_count = arraysize(temps);
+    InstructionCode code = kArchStoreWithWriteBarrier;
+    code |= AddressingModeField::encode(addressing_mode);
+    code |= MiscField::encode(static_cast<int>(record_write_mode));
+    CHECK_EQ(is_atomic, false);
+    Emit(code, 0, nullptr, input_count, inputs, temp_count, temps);
+  } else {
+    ArchOpcode opcode;
+    ImmediateMode mode = kInt16Imm;
+    switch (rep) {
+      case MachineRepresentation::kFloat32:
+        opcode = kPPC_StoreFloat32;
+        break;
+      case MachineRepresentation::kFloat64:
+        opcode = kPPC_StoreDouble;
+        break;
+      case MachineRepresentation::kBit:  // Fall through.
+      case MachineRepresentation::kWord8:
+        opcode = kPPC_StoreWord8;
+        break;
+      case MachineRepresentation::kWord16:
+        opcode = kPPC_StoreWord16;
+        break;
+      case MachineRepresentation::kWord32:
+        opcode = kPPC_StoreWord32;
+        break;
+      case MachineRepresentation::kCompressedPointer:  // Fall through.
+      case MachineRepresentation::kCompressed:
+#ifdef V8_COMPRESS_POINTERS
+        opcode = kPPC_StoreCompressTagged;
+        break;
+#else
+        UNREACHABLE();
+        break;
+#endif
+      case MachineRepresentation::kTaggedSigned:   // Fall through.
+      case MachineRepresentation::kTaggedPointer:  // Fall through.
+      case MachineRepresentation::kTagged:
+        mode = kInt16Imm_4ByteAligned;
+        opcode = kPPC_StoreCompressTagged;
+        break;
+      case MachineRepresentation::kWord64:
+        opcode = kPPC_StoreWord64;
+        mode = kInt16Imm_4ByteAligned;
+        break;
+      case MachineRepresentation::kSimd128:
+        opcode = kPPC_StoreSimd128;
+        // Vectors do not support MRI mode, only MRR is available.
+        mode = kNoImmediate;
+        break;
+      case MachineRepresentation::kNone:
+        UNREACHABLE();
+    }
+
+    if (g.CanBeImmediate(offset, mode)) {
+      Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(),
+           g.UseRegister(base), g.UseImmediate(offset), g.UseRegister(value),
+           g.UseImmediate(is_atomic));
+    } else if (g.CanBeImmediate(base, mode)) {
+      Emit(opcode | AddressingModeField::encode(kMode_MRI), g.NoOutput(),
+           g.UseRegister(offset), g.UseImmediate(base), g.UseRegister(value),
+           g.UseImmediate(is_atomic));
+    } else {
+      Emit(opcode | AddressingModeField::encode(kMode_MRR), g.NoOutput(),
+           g.UseRegister(base), g.UseRegister(offset), g.UseRegister(value),
+           g.UseImmediate(is_atomic));
+    }
+  }
+}
+
+void InstructionSelector::VisitProtectedStore(Node* node) {
+  // TODO(eholk)
+  UNIMPLEMENTED();
+}
+
+// Architecture supports unaligned access, therefore VisitLoad is used instead
+void InstructionSelector::VisitUnalignedLoad(Node* node) { UNREACHABLE(); }
+
+// Architecture supports unaligned access, therefore VisitStore is used instead
+void InstructionSelector::VisitUnalignedStore(Node* node) { UNREACHABLE(); }
+
+template <typename Matcher>
+static void VisitLogical(InstructionSelector* selector, Node* node, Matcher* m,
+                         ArchOpcode opcode, bool left_can_cover,
+                         bool right_can_cover, ImmediateMode imm_mode) {
+  PPCOperandGenerator g(selector);
+
+  // Map instruction to equivalent operation with inverted right input.
+  ArchOpcode inv_opcode = opcode;
+  switch (opcode) {
+    case kPPC_And:
+      inv_opcode = kPPC_AndComplement;
+      break;
+    case kPPC_Or:
+      inv_opcode = kPPC_OrComplement;
+      break;
+    default:
+      UNREACHABLE();
+  }
+
+  // Select Logical(y, ~x) for Logical(Xor(x, -1), y).
+  if ((m->left().IsWord32Xor() || m->left().IsWord64Xor()) && left_can_cover) {
+    Matcher mleft(m->left().node());
+    if (mleft.right().Is(-1)) {
+      selector->Emit(inv_opcode, g.DefineAsRegister(node),
+                     g.UseRegister(m->right().node()),
+                     g.UseRegister(mleft.left().node()));
+      return;
+    }
+  }
+
+  // Select Logical(x, ~y) for Logical(x, Xor(y, -1)).
+  if ((m->right().IsWord32Xor() || m->right().IsWord64Xor()) &&
+      right_can_cover) {
+    Matcher mright(m->right().node());
+    if (mright.right().Is(-1)) {
+      // TODO(all): support shifted operand on right.
+      selector->Emit(inv_opcode, g.DefineAsRegister(node),
+                     g.UseRegister(m->left().node()),
+                     g.UseRegister(mright.left().node()));
+      return;
+    }
+  }
+
+  VisitBinop<Matcher>(selector, node, opcode, imm_mode);
+}
+
+static inline bool IsContiguousMask32(uint32_t value, int* mb, int* me) {
+  int mask_width = base::bits::CountPopulation(value);
+  int mask_msb = base::bits::CountLeadingZeros32(value);
+  int mask_lsb = base::bits::CountTrailingZeros32(value);
+  if ((mask_width == 0) || (mask_msb + mask_width + mask_lsb != 32))
+    return false;
+  *mb = mask_lsb + mask_width - 1;
+  *me = mask_lsb;
+  return true;
+}
+
+#if V8_TARGET_ARCH_PPC64
+static inline bool IsContiguousMask64(uint64_t value, int* mb, int* me) {
+  int mask_width = base::bits::CountPopulation(value);
+  int mask_msb = base::bits::CountLeadingZeros64(value);
+  int mask_lsb = base::bits::CountTrailingZeros64(value);
+  if ((mask_width == 0) || (mask_msb + mask_width + mask_lsb != 64))
+    return false;
+  *mb = mask_lsb + mask_width - 1;
+  *me = mask_lsb;
+  return true;
+}
+#endif
+
+// TODO(mbrandy): Absorb rotate-right into rlwinm?
+void InstructionSelector::VisitWord32And(Node* node) {
+  PPCOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  int mb = 0;
+  int me = 0;
+  if (m.right().HasResolvedValue() &&
+      IsContiguousMask32(m.right().ResolvedValue(), &mb, &me)) {
+    int sh = 0;
+    Node* left = m.left().node();
+    if ((m.left().IsWord32Shr() || m.left().IsWord32Shl()) &&
+        CanCover(node, left)) {
+      // Try to absorb left/right shift into rlwinm
+      Int32BinopMatcher mleft(m.left().node());
+      if (mleft.right().IsInRange(0, 31)) {
+        left = mleft.left().node();
+        sh = mleft.right().ResolvedValue();
+        if (m.left().IsWord32Shr()) {
+          // Adjust the mask such that it doesn't include any rotated bits.
+          if (mb > 31 - sh) mb = 31 - sh;
+          sh = (32 - sh) & 0x1F;
+        } else {
+          // Adjust the mask such that it doesn't include any rotated bits.
+          if (me < sh) me = sh;
+        }
+      }
+    }
+    if (mb >= me) {
+      Emit(kPPC_RotLeftAndMask32, g.DefineAsRegister(node), g.UseRegister(left),
+           g.TempImmediate(sh), g.TempImmediate(mb), g.TempImmediate(me));
+      return;
+    }
+  }
+  VisitLogical<Int32BinopMatcher>(
+      this, node, &m, kPPC_And, CanCover(node, m.left().node()),
+      CanCover(node, m.right().node()), kInt16Imm_Unsigned);
+}
+
+#if V8_TARGET_ARCH_PPC64
+// TODO(mbrandy): Absorb rotate-right into rldic?
+void InstructionSelector::VisitWord64And(Node* node) {
+  PPCOperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  int mb = 0;
+  int me = 0;
+  if (m.right().HasResolvedValue() &&
+      IsContiguousMask64(m.right().ResolvedValue(), &mb, &me)) {
+    int sh = 0;
+    Node* left = m.left().node();
+    if ((m.left().IsWord64Shr() || m.left().IsWord64Shl()) &&
+        CanCover(node, left)) {
+      // Try to absorb left/right shift into rldic
+      Int64BinopMatcher mleft(m.left().node());
+      if (mleft.right().IsInRange(0, 63)) {
+        left = mleft.left().node();
+        sh = mleft.right().ResolvedValue();
+        if (m.left().IsWord64Shr()) {
+          // Adjust the mask such that it doesn't include any rotated bits.
+          if (mb > 63 - sh) mb = 63 - sh;
+          sh = (64 - sh) & 0x3F;
+        } else {
+          // Adjust the mask such that it doesn't include any rotated bits.
+          if (me < sh) me = sh;
+        }
+      }
+    }
+    if (mb >= me) {
+      bool match = false;
+      ArchOpcode opcode;
+      int mask;
+      if (me == 0) {
+        match = true;
+        opcode = kPPC_RotLeftAndClearLeft64;
+        mask = mb;
+      } else if (mb == 63) {
+        match = true;
+        opcode = kPPC_RotLeftAndClearRight64;
+        mask = me;
+      } else if (sh && me <= sh && m.left().IsWord64Shl()) {
+        match = true;
+        opcode = kPPC_RotLeftAndClear64;
+        mask = mb;
+      }
+      if (match) {
+        Emit(opcode, g.DefineAsRegister(node), g.UseRegister(left),
+             g.TempImmediate(sh), g.TempImmediate(mask));
+        return;
+      }
+    }
+  }
+  VisitLogical<Int64BinopMatcher>(
+      this, node, &m, kPPC_And, CanCover(node, m.left().node()),
+      CanCover(node, m.right().node()), kInt16Imm_Unsigned);
+}
+#endif
+
+void InstructionSelector::VisitWord32Or(Node* node) {
+  Int32BinopMatcher m(node);
+  VisitLogical<Int32BinopMatcher>(
+      this, node, &m, kPPC_Or, CanCover(node, m.left().node()),
+      CanCover(node, m.right().node()), kInt16Imm_Unsigned);
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitWord64Or(Node* node) {
+  Int64BinopMatcher m(node);
+  VisitLogical<Int64BinopMatcher>(
+      this, node, &m, kPPC_Or, CanCover(node, m.left().node()),
+      CanCover(node, m.right().node()), kInt16Imm_Unsigned);
+}
+#endif
+
+void InstructionSelector::VisitWord32Xor(Node* node) {
+  PPCOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (m.right().Is(-1)) {
+    Emit(kPPC_Not, g.DefineAsRegister(node), g.UseRegister(m.left().node()));
+  } else {
+    VisitBinop<Int32BinopMatcher>(this, node, kPPC_Xor, kInt16Imm_Unsigned);
+  }
+}
+
+void InstructionSelector::VisitStackPointerGreaterThan(
+    Node* node, FlagsContinuation* cont) {
+  StackCheckKind kind = StackCheckKindOf(node->op());
+  InstructionCode opcode =
+      kArchStackPointerGreaterThan | MiscField::encode(static_cast<int>(kind));
+
+  PPCOperandGenerator g(this);
+
+  // No outputs.
+  InstructionOperand* const outputs = nullptr;
+  const int output_count = 0;
+
+  // Applying an offset to this stack check requires a temp register. Offsets
+  // are only applied to the first stack check. If applying an offset, we must
+  // ensure the input and temp registers do not alias, thus kUniqueRegister.
+  InstructionOperand temps[] = {g.TempRegister()};
+  const int temp_count = (kind == StackCheckKind::kJSFunctionEntry) ? 1 : 0;
+  const auto register_mode = (kind == StackCheckKind::kJSFunctionEntry)
+                                 ? OperandGenerator::kUniqueRegister
+                                 : OperandGenerator::kRegister;
+
+  Node* const value = node->InputAt(0);
+  InstructionOperand inputs[] = {g.UseRegisterWithMode(value, register_mode)};
+  static constexpr int input_count = arraysize(inputs);
+
+  EmitWithContinuation(opcode, output_count, outputs, input_count, inputs,
+                       temp_count, temps, cont);
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitWord64Xor(Node* node) {
+  PPCOperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  if (m.right().Is(-1)) {
+    Emit(kPPC_Not, g.DefineAsRegister(node), g.UseRegister(m.left().node()));
+  } else {
+    VisitBinop<Int64BinopMatcher>(this, node, kPPC_Xor, kInt16Imm_Unsigned);
+  }
+}
+#endif
+
+void InstructionSelector::VisitWord32Shl(Node* node) {
+  PPCOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (m.left().IsWord32And() && m.right().IsInRange(0, 31)) {
+    // Try to absorb logical-and into rlwinm
+    Int32BinopMatcher mleft(m.left().node());
+    int sh = m.right().ResolvedValue();
+    int mb;
+    int me;
+    if (mleft.right().HasResolvedValue() &&
+        IsContiguousMask32(mleft.right().ResolvedValue() << sh, &mb, &me)) {
+      // Adjust the mask such that it doesn't include any rotated bits.
+      if (me < sh) me = sh;
+      if (mb >= me) {
+        Emit(kPPC_RotLeftAndMask32, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()), g.TempImmediate(sh),
+             g.TempImmediate(mb), g.TempImmediate(me));
+        return;
+      }
+    }
+  }
+  VisitRRO(this, kPPC_ShiftLeft32, node, kShift32Imm);
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitWord64Shl(Node* node) {
+  PPCOperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  // TODO(mbrandy): eliminate left sign extension if right >= 32
+  if (m.left().IsWord64And() && m.right().IsInRange(0, 63)) {
+    // Try to absorb logical-and into rldic
+    Int64BinopMatcher mleft(m.left().node());
+    int sh = m.right().ResolvedValue();
+    int mb;
+    int me;
+    if (mleft.right().HasResolvedValue() &&
+        IsContiguousMask64(mleft.right().ResolvedValue() << sh, &mb, &me)) {
+      // Adjust the mask such that it doesn't include any rotated bits.
+      if (me < sh) me = sh;
+      if (mb >= me) {
+        bool match = false;
+        ArchOpcode opcode;
+        int mask;
+        if (me == 0) {
+          match = true;
+          opcode = kPPC_RotLeftAndClearLeft64;
+          mask = mb;
+        } else if (mb == 63) {
+          match = true;
+          opcode = kPPC_RotLeftAndClearRight64;
+          mask = me;
+        } else if (sh && me <= sh) {
+          match = true;
+          opcode = kPPC_RotLeftAndClear64;
+          mask = mb;
+        }
+        if (match) {
+          Emit(opcode, g.DefineAsRegister(node),
+               g.UseRegister(mleft.left().node()), g.TempImmediate(sh),
+               g.TempImmediate(mask));
+          return;
+        }
+      }
+    }
+  }
+  VisitRRO(this, kPPC_ShiftLeft64, node, kShift64Imm);
+}
+#endif
+
+void InstructionSelector::VisitWord32Shr(Node* node) {
+  PPCOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (m.left().IsWord32And() && m.right().IsInRange(0, 31)) {
+    // Try to absorb logical-and into rlwinm
+    Int32BinopMatcher mleft(m.left().node());
+    int sh = m.right().ResolvedValue();
+    int mb;
+    int me;
+    if (mleft.right().HasResolvedValue() &&
+        IsContiguousMask32((uint32_t)(mleft.right().ResolvedValue()) >> sh, &mb,
+                           &me)) {
+      // Adjust the mask such that it doesn't include any rotated bits.
+      if (mb > 31 - sh) mb = 31 - sh;
+      sh = (32 - sh) & 0x1F;
+      if (mb >= me) {
+        Emit(kPPC_RotLeftAndMask32, g.DefineAsRegister(node),
+             g.UseRegister(mleft.left().node()), g.TempImmediate(sh),
+             g.TempImmediate(mb), g.TempImmediate(me));
+        return;
+      }
+    }
+  }
+  VisitRRO(this, kPPC_ShiftRight32, node, kShift32Imm);
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitWord64Shr(Node* node) {
+  PPCOperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  if (m.left().IsWord64And() && m.right().IsInRange(0, 63)) {
+    // Try to absorb logical-and into rldic
+    Int64BinopMatcher mleft(m.left().node());
+    int sh = m.right().ResolvedValue();
+    int mb;
+    int me;
+    if (mleft.right().HasResolvedValue() &&
+        IsContiguousMask64((uint64_t)(mleft.right().ResolvedValue()) >> sh, &mb,
+                           &me)) {
+      // Adjust the mask such that it doesn't include any rotated bits.
+      if (mb > 63 - sh) mb = 63 - sh;
+      sh = (64 - sh) & 0x3F;
+      if (mb >= me) {
+        bool match = false;
+        ArchOpcode opcode;
+        int mask;
+        if (me == 0) {
+          match = true;
+          opcode = kPPC_RotLeftAndClearLeft64;
+          mask = mb;
+        } else if (mb == 63) {
+          match = true;
+          opcode = kPPC_RotLeftAndClearRight64;
+          mask = me;
+        }
+        if (match) {
+          Emit(opcode, g.DefineAsRegister(node),
+               g.UseRegister(mleft.left().node()), g.TempImmediate(sh),
+               g.TempImmediate(mask));
+          return;
+        }
+      }
+    }
+  }
+  VisitRRO(this, kPPC_ShiftRight64, node, kShift64Imm);
+}
+#endif
+
+void InstructionSelector::VisitWord32Sar(Node* node) {
+  PPCOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  // Replace with sign extension for (x << K) >> K where K is 16 or 24.
+  if (CanCover(node, m.left().node()) && m.left().IsWord32Shl()) {
+    Int32BinopMatcher mleft(m.left().node());
+    if (mleft.right().Is(16) && m.right().Is(16)) {
+      Emit(kPPC_ExtendSignWord16, g.DefineAsRegister(node),
+           g.UseRegister(mleft.left().node()));
+      return;
+    } else if (mleft.right().Is(24) && m.right().Is(24)) {
+      Emit(kPPC_ExtendSignWord8, g.DefineAsRegister(node),
+           g.UseRegister(mleft.left().node()));
+      return;
+    }
+  }
+  VisitRRO(this, kPPC_ShiftRightAlg32, node, kShift32Imm);
+}
+
+#if !V8_TARGET_ARCH_PPC64
+void VisitPairBinop(InstructionSelector* selector, InstructionCode opcode,
+                    InstructionCode opcode2, Node* node) {
+  PPCOperandGenerator g(selector);
+
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  if (projection1) {
+    // We use UseUniqueRegister here to avoid register sharing with the output
+    // registers.
+    InstructionOperand inputs[] = {
+        g.UseRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(1)),
+        g.UseRegister(node->InputAt(2)), g.UseUniqueRegister(node->InputAt(3))};
+
+    InstructionOperand outputs[] = {
+        g.DefineAsRegister(node),
+        g.DefineAsRegister(NodeProperties::FindProjection(node, 1))};
+
+    selector->Emit(opcode, 2, outputs, 4, inputs);
+  } else {
+    // The high word of the result is not used, so we emit the standard 32 bit
+    // instruction.
+    selector->Emit(opcode2, g.DefineSameAsFirst(node),
+                   g.UseRegister(node->InputAt(0)),
+                   g.UseRegister(node->InputAt(2)));
+  }
+}
+
+void InstructionSelector::VisitInt32PairAdd(Node* node) {
+  VisitPairBinop(this, kPPC_AddPair, kPPC_Add32, node);
+}
+
+void InstructionSelector::VisitInt32PairSub(Node* node) {
+  VisitPairBinop(this, kPPC_SubPair, kPPC_Sub, node);
+}
+
+void InstructionSelector::VisitInt32PairMul(Node* node) {
+  PPCOperandGenerator g(this);
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  if (projection1) {
+    InstructionOperand inputs[] = {g.UseUniqueRegister(node->InputAt(0)),
+                                   g.UseUniqueRegister(node->InputAt(1)),
+                                   g.UseUniqueRegister(node->InputAt(2)),
+                                   g.UseUniqueRegister(node->InputAt(3))};
+
+    InstructionOperand outputs[] = {
+        g.DefineAsRegister(node),
+        g.DefineAsRegister(NodeProperties::FindProjection(node, 1))};
+
+    InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
+
+    Emit(kPPC_MulPair, 2, outputs, 4, inputs, 2, temps);
+  } else {
+    // The high word of the result is not used, so we emit the standard 32 bit
+    // instruction.
+    Emit(kPPC_Mul32, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
+         g.UseRegister(node->InputAt(2)));
+  }
+}
+
+namespace {
+// Shared routine for multiple shift operations.
+void VisitPairShift(InstructionSelector* selector, InstructionCode opcode,
+                    Node* node) {
+  PPCOperandGenerator g(selector);
+  // We use g.UseUniqueRegister here to guarantee that there is
+  // no register aliasing of input registers with output registers.
+  Int32Matcher m(node->InputAt(2));
+  InstructionOperand shift_operand;
+  if (m.HasResolvedValue()) {
+    shift_operand = g.UseImmediate(m.node());
+  } else {
+    shift_operand = g.UseUniqueRegister(m.node());
+  }
+
+  InstructionOperand inputs[] = {g.UseUniqueRegister(node->InputAt(0)),
+                                 g.UseUniqueRegister(node->InputAt(1)),
+                                 shift_operand};
+
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+
+  InstructionOperand outputs[2];
+  InstructionOperand temps[1];
+  int32_t output_count = 0;
+  int32_t temp_count = 0;
+
+  outputs[output_count++] = g.DefineAsRegister(node);
+  if (projection1) {
+    outputs[output_count++] = g.DefineAsRegister(projection1);
+  } else {
+    temps[temp_count++] = g.TempRegister();
+  }
+
+  selector->Emit(opcode, output_count, outputs, 3, inputs, temp_count, temps);
+}
+}  // namespace
+
+void InstructionSelector::VisitWord32PairShl(Node* node) {
+  VisitPairShift(this, kPPC_ShiftLeftPair, node);
+}
+
+void InstructionSelector::VisitWord32PairShr(Node* node) {
+  VisitPairShift(this, kPPC_ShiftRightPair, node);
+}
+
+void InstructionSelector::VisitWord32PairSar(Node* node) {
+  VisitPairShift(this, kPPC_ShiftRightAlgPair, node);
+}
+#endif
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitWord64Sar(Node* node) {
+  PPCOperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  if (CanCover(m.node(), m.left().node()) && m.left().IsLoad() &&
+      m.right().Is(32)) {
+    // Just load and sign-extend the interesting 4 bytes instead. This happens,
+    // for example, when we're loading and untagging SMIs.
+    BaseWithIndexAndDisplacement64Matcher mleft(m.left().node(),
+                                                AddressOption::kAllowAll);
+    if (mleft.matches() && mleft.index() == nullptr) {
+      int64_t offset = 0;
+      Node* displacement = mleft.displacement();
+      if (displacement != nullptr) {
+        Int64Matcher mdisplacement(displacement);
+        DCHECK(mdisplacement.HasResolvedValue());
+        offset = mdisplacement.ResolvedValue();
+      }
+      offset = SmiWordOffset(offset);
+      if (g.CanBeImmediate(offset, kInt16Imm_4ByteAligned)) {
+        Emit(kPPC_LoadWordS32 | AddressingModeField::encode(kMode_MRI),
+             g.DefineAsRegister(node), g.UseRegister(mleft.base()),
+             g.TempImmediate(offset), g.UseImmediate(0));
+        return;
+      }
+    }
+  }
+  VisitRRO(this, kPPC_ShiftRightAlg64, node, kShift64Imm);
+}
+#endif
+
+void InstructionSelector::VisitWord32Rol(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord64Rol(Node* node) { UNREACHABLE(); }
+
+// TODO(mbrandy): Absorb logical-and into rlwinm?
+void InstructionSelector::VisitWord32Ror(Node* node) {
+  VisitRRO(this, kPPC_RotRight32, node, kShift32Imm);
+}
+
+#if V8_TARGET_ARCH_PPC64
+// TODO(mbrandy): Absorb logical-and into rldic?
+void InstructionSelector::VisitWord64Ror(Node* node) {
+  VisitRRO(this, kPPC_RotRight64, node, kShift64Imm);
+}
+#endif
+
+void InstructionSelector::VisitWord32Clz(Node* node) {
+  PPCOperandGenerator g(this);
+  Emit(kPPC_Cntlz32, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitWord64Clz(Node* node) {
+  PPCOperandGenerator g(this);
+  Emit(kPPC_Cntlz64, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+#endif
+
+void InstructionSelector::VisitWord32Popcnt(Node* node) {
+  PPCOperandGenerator g(this);
+  Emit(kPPC_Popcnt32, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)));
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitWord64Popcnt(Node* node) {
+  PPCOperandGenerator g(this);
+  Emit(kPPC_Popcnt64, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)));
+}
+#endif
+
+void InstructionSelector::VisitWord32Ctz(Node* node) { UNREACHABLE(); }
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitWord64Ctz(Node* node) { UNREACHABLE(); }
+#endif
+
+void InstructionSelector::VisitWord32ReverseBits(Node* node) { UNREACHABLE(); }
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitWord64ReverseBits(Node* node) { UNREACHABLE(); }
+#endif
+
+void InstructionSelector::VisitWord64ReverseBytes(Node* node) {
+  PPCOperandGenerator g(this);
+  InstructionOperand temp[] = {g.TempRegister()};
+  Emit(kPPC_ByteRev64, g.DefineAsRegister(node),
+       g.UseUniqueRegister(node->InputAt(0)), 1, temp);
+}
+
+void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
+  PPCOperandGenerator g(this);
+  Emit(kPPC_ByteRev32, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
+  // TODO(miladfar): Implement the ppc selector for reversing SIMD bytes.
+  // Check if the input node is a Load and do a Load Reverse at once.
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitInt32Add(Node* node) {
+  VisitBinop<Int32BinopMatcher>(this, node, kPPC_Add32, kInt16Imm);
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitInt64Add(Node* node) {
+  VisitBinop<Int64BinopMatcher>(this, node, kPPC_Add64, kInt16Imm);
+}
+#endif
+
+void InstructionSelector::VisitInt32Sub(Node* node) {
+  PPCOperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (m.left().Is(0)) {
+    Emit(kPPC_Neg, g.DefineAsRegister(node), g.UseRegister(m.right().node()));
+  } else {
+    VisitBinop<Int32BinopMatcher>(this, node, kPPC_Sub, kInt16Imm_Negate);
+  }
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitInt64Sub(Node* node) {
+  PPCOperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  if (m.left().Is(0)) {
+    Emit(kPPC_Neg, g.DefineAsRegister(node), g.UseRegister(m.right().node()));
+  } else {
+    VisitBinop<Int64BinopMatcher>(this, node, kPPC_Sub, kInt16Imm_Negate);
+  }
+}
+#endif
+
+namespace {
+
+void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
+                  InstructionOperand left, InstructionOperand right,
+                  FlagsContinuation* cont);
+void EmitInt32MulWithOverflow(InstructionSelector* selector, Node* node,
+                              FlagsContinuation* cont) {
+  PPCOperandGenerator g(selector);
+  Int32BinopMatcher m(node);
+  InstructionOperand result_operand = g.DefineAsRegister(node);
+  InstructionOperand high32_operand = g.TempRegister();
+  InstructionOperand temp_operand = g.TempRegister();
+  {
+    InstructionOperand outputs[] = {result_operand, high32_operand};
+    InstructionOperand inputs[] = {g.UseRegister(m.left().node()),
+                                   g.UseRegister(m.right().node())};
+    selector->Emit(kPPC_Mul32WithHigh32, 2, outputs, 2, inputs);
+  }
+  {
+    InstructionOperand shift_31 = g.UseImmediate(31);
+    InstructionOperand outputs[] = {temp_operand};
+    InstructionOperand inputs[] = {result_operand, shift_31};
+    selector->Emit(kPPC_ShiftRightAlg32, 1, outputs, 2, inputs);
+  }
+
+  VisitCompare(selector, kPPC_Cmp32, high32_operand, temp_operand, cont);
+}
+
+}  // namespace
+
+void InstructionSelector::VisitInt32Mul(Node* node) {
+  VisitRRR(this, kPPC_Mul32, node);
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitInt64Mul(Node* node) {
+  VisitRRR(this, kPPC_Mul64, node);
+}
+#endif
+
+void InstructionSelector::VisitInt32MulHigh(Node* node) {
+  PPCOperandGenerator g(this);
+  Emit(kPPC_MulHigh32, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
+}
+
+void InstructionSelector::VisitUint32MulHigh(Node* node) {
+  PPCOperandGenerator g(this);
+  Emit(kPPC_MulHighU32, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
+}
+
+void InstructionSelector::VisitInt32Div(Node* node) {
+  VisitRRR(this, kPPC_Div32, node);
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitInt64Div(Node* node) {
+  VisitRRR(this, kPPC_Div64, node);
+}
+#endif
+
+void InstructionSelector::VisitUint32Div(Node* node) {
+  VisitRRR(this, kPPC_DivU32, node);
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitUint64Div(Node* node) {
+  VisitRRR(this, kPPC_DivU64, node);
+}
+#endif
+
+void InstructionSelector::VisitInt32Mod(Node* node) {
+  VisitRRR(this, kPPC_Mod32, node);
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitInt64Mod(Node* node) {
+  VisitRRR(this, kPPC_Mod64, node);
+}
+#endif
+
+void InstructionSelector::VisitUint32Mod(Node* node) {
+  VisitRRR(this, kPPC_ModU32, node);
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitUint64Mod(Node* node) {
+  VisitRRR(this, kPPC_ModU64, node);
+}
+#endif
+
+void InstructionSelector::VisitChangeFloat32ToFloat64(Node* node) {
+  VisitRR(this, kPPC_Float32ToDouble, node);
+}
+
+void InstructionSelector::VisitRoundInt32ToFloat32(Node* node) {
+  VisitRR(this, kPPC_Int32ToFloat32, node);
+}
+
+void InstructionSelector::VisitRoundUint32ToFloat32(Node* node) {
+  VisitRR(this, kPPC_Uint32ToFloat32, node);
+}
+
+void InstructionSelector::VisitChangeInt32ToFloat64(Node* node) {
+  VisitRR(this, kPPC_Int32ToDouble, node);
+}
+
+void InstructionSelector::VisitChangeUint32ToFloat64(Node* node) {
+  VisitRR(this, kPPC_Uint32ToDouble, node);
+}
+
+void InstructionSelector::VisitChangeFloat64ToInt32(Node* node) {
+  VisitRR(this, kPPC_DoubleToInt32, node);
+}
+
+void InstructionSelector::VisitChangeFloat64ToUint32(Node* node) {
+  VisitRR(this, kPPC_DoubleToUint32, node);
+}
+
+void InstructionSelector::VisitTruncateFloat64ToUint32(Node* node) {
+  VisitRR(this, kPPC_DoubleToUint32, node);
+}
+
+void InstructionSelector::VisitSignExtendWord8ToInt32(Node* node) {
+  // TODO(mbrandy): inspect input to see if nop is appropriate.
+  VisitRR(this, kPPC_ExtendSignWord8, node);
+}
+
+void InstructionSelector::VisitSignExtendWord16ToInt32(Node* node) {
+  // TODO(mbrandy): inspect input to see if nop is appropriate.
+  VisitRR(this, kPPC_ExtendSignWord16, node);
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitTryTruncateFloat32ToInt64(Node* node) {
+  VisitTryTruncateDouble(this, kPPC_DoubleToInt64, node);
+}
+
+void InstructionSelector::VisitTryTruncateFloat64ToInt64(Node* node) {
+  VisitTryTruncateDouble(this, kPPC_DoubleToInt64, node);
+}
+
+void InstructionSelector::VisitTruncateFloat64ToInt64(Node* node) {
+  VisitRR(this, kPPC_DoubleToInt64, node);
+}
+
+void InstructionSelector::VisitTryTruncateFloat32ToUint64(Node* node) {
+  VisitTryTruncateDouble(this, kPPC_DoubleToUint64, node);
+}
+
+void InstructionSelector::VisitTryTruncateFloat64ToUint64(Node* node) {
+  VisitTryTruncateDouble(this, kPPC_DoubleToUint64, node);
+}
+
+void InstructionSelector::VisitBitcastWord32ToWord64(Node* node) {
+  DCHECK(SmiValuesAre31Bits());
+  DCHECK(COMPRESS_POINTERS_BOOL);
+  EmitIdentity(node);
+}
+
+void InstructionSelector::VisitChangeInt32ToInt64(Node* node) {
+  // TODO(mbrandy): inspect input to see if nop is appropriate.
+  VisitRR(this, kPPC_ExtendSignWord32, node);
+}
+
+void InstructionSelector::VisitSignExtendWord8ToInt64(Node* node) {
+  // TODO(mbrandy): inspect input to see if nop is appropriate.
+  VisitRR(this, kPPC_ExtendSignWord8, node);
+}
+
+void InstructionSelector::VisitSignExtendWord16ToInt64(Node* node) {
+  // TODO(mbrandy): inspect input to see if nop is appropriate.
+  VisitRR(this, kPPC_ExtendSignWord16, node);
+}
+
+void InstructionSelector::VisitSignExtendWord32ToInt64(Node* node) {
+  // TODO(mbrandy): inspect input to see if nop is appropriate.
+  VisitRR(this, kPPC_ExtendSignWord32, node);
+}
+
+bool InstructionSelector::ZeroExtendsWord32ToWord64NoPhis(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::VisitChangeUint32ToUint64(Node* node) {
+  // TODO(mbrandy): inspect input to see if nop is appropriate.
+  VisitRR(this, kPPC_Uint32ToUint64, node);
+}
+
+void InstructionSelector::VisitChangeFloat64ToUint64(Node* node) {
+  VisitRR(this, kPPC_DoubleToUint64, node);
+}
+
+void InstructionSelector::VisitChangeFloat64ToInt64(Node* node) {
+  VisitRR(this, kPPC_DoubleToInt64, node);
+}
+#endif
+
+void InstructionSelector::VisitTruncateFloat64ToFloat32(Node* node) {
+  VisitRR(this, kPPC_DoubleToFloat32, node);
+}
+
+void InstructionSelector::VisitTruncateFloat64ToWord32(Node* node) {
+  VisitRR(this, kArchTruncateDoubleToI, node);
+}
+
+void InstructionSelector::VisitRoundFloat64ToInt32(Node* node) {
+  VisitRR(this, kPPC_DoubleToInt32, node);
+}
+
+void InstructionSelector::VisitTruncateFloat32ToInt32(Node* node) {
+  PPCOperandGenerator g(this);
+
+  InstructionCode opcode = kPPC_Float32ToInt32;
+  TruncateKind kind = OpParameter<TruncateKind>(node->op());
+  if (kind == TruncateKind::kSetOverflowToMin) {
+    opcode |= MiscField::encode(true);
+  }
+
+  Emit(opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitTruncateFloat32ToUint32(Node* node) {
+  PPCOperandGenerator g(this);
+
+  InstructionCode opcode = kPPC_Float32ToUint32;
+  TruncateKind kind = OpParameter<TruncateKind>(node->op());
+  if (kind == TruncateKind::kSetOverflowToMin) {
+    opcode |= MiscField::encode(true);
+  }
+
+  Emit(opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitTruncateInt64ToInt32(Node* node) {
+  // TODO(mbrandy): inspect input to see if nop is appropriate.
+  VisitRR(this, kPPC_Int64ToInt32, node);
+}
+
+void InstructionSelector::VisitRoundInt64ToFloat32(Node* node) {
+  VisitRR(this, kPPC_Int64ToFloat32, node);
+}
+
+void InstructionSelector::VisitRoundInt64ToFloat64(Node* node) {
+  VisitRR(this, kPPC_Int64ToDouble, node);
+}
+
+void InstructionSelector::VisitChangeInt64ToFloat64(Node* node) {
+  VisitRR(this, kPPC_Int64ToDouble, node);
+}
+
+void InstructionSelector::VisitRoundUint64ToFloat32(Node* node) {
+  VisitRR(this, kPPC_Uint64ToFloat32, node);
+}
+
+void InstructionSelector::VisitRoundUint64ToFloat64(Node* node) {
+  VisitRR(this, kPPC_Uint64ToDouble, node);
+}
+#endif
+
+void InstructionSelector::VisitBitcastFloat32ToInt32(Node* node) {
+  VisitRR(this, kPPC_BitcastFloat32ToInt32, node);
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitBitcastFloat64ToInt64(Node* node) {
+  VisitRR(this, kPPC_BitcastDoubleToInt64, node);
+}
+#endif
+
+void InstructionSelector::VisitBitcastInt32ToFloat32(Node* node) {
+  VisitRR(this, kPPC_BitcastInt32ToFloat32, node);
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitBitcastInt64ToFloat64(Node* node) {
+  VisitRR(this, kPPC_BitcastInt64ToDouble, node);
+}
+#endif
+
+void InstructionSelector::VisitFloat32Add(Node* node) {
+  VisitRRR(this, kPPC_AddDouble | MiscField::encode(1), node);
+}
+
+void InstructionSelector::VisitFloat64Add(Node* node) {
+  // TODO(mbrandy): detect multiply-add
+  VisitRRR(this, kPPC_AddDouble, node);
+}
+
+void InstructionSelector::VisitFloat32Sub(Node* node) {
+  VisitRRR(this, kPPC_SubDouble | MiscField::encode(1), node);
+}
+
+void InstructionSelector::VisitFloat64Sub(Node* node) {
+  // TODO(mbrandy): detect multiply-subtract
+  VisitRRR(this, kPPC_SubDouble, node);
+}
+
+void InstructionSelector::VisitFloat32Mul(Node* node) {
+  VisitRRR(this, kPPC_MulDouble | MiscField::encode(1), node);
+}
+
+void InstructionSelector::VisitFloat64Mul(Node* node) {
+  // TODO(mbrandy): detect negate
+  VisitRRR(this, kPPC_MulDouble, node);
+}
+
+void InstructionSelector::VisitFloat32Div(Node* node) {
+  VisitRRR(this, kPPC_DivDouble | MiscField::encode(1), node);
+}
+
+void InstructionSelector::VisitFloat64Div(Node* node) {
+  VisitRRR(this, kPPC_DivDouble, node);
+}
+
+void InstructionSelector::VisitFloat64Mod(Node* node) {
+  PPCOperandGenerator g(this);
+  Emit(kPPC_ModDouble, g.DefineAsFixed(node, d1),
+       g.UseFixed(node->InputAt(0), d1), g.UseFixed(node->InputAt(1), d2))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::VisitFloat32Max(Node* node) {
+  VisitRRR(this, kPPC_MaxDouble | MiscField::encode(1), node);
+}
+
+void InstructionSelector::VisitFloat64Max(Node* node) {
+  VisitRRR(this, kPPC_MaxDouble, node);
+}
+
+void InstructionSelector::VisitFloat64SilenceNaN(Node* node) {
+  VisitRR(this, kPPC_Float64SilenceNaN, node);
+}
+
+void InstructionSelector::VisitFloat32Min(Node* node) {
+  VisitRRR(this, kPPC_MinDouble | MiscField::encode(1), node);
+}
+
+void InstructionSelector::VisitFloat64Min(Node* node) {
+  VisitRRR(this, kPPC_MinDouble, node);
+}
+
+void InstructionSelector::VisitFloat32Abs(Node* node) {
+  VisitRR(this, kPPC_AbsDouble | MiscField::encode(1), node);
+}
+
+void InstructionSelector::VisitFloat64Abs(Node* node) {
+  VisitRR(this, kPPC_AbsDouble, node);
+}
+
+void InstructionSelector::VisitFloat32Sqrt(Node* node) {
+  VisitRR(this, kPPC_SqrtDouble | MiscField::encode(1), node);
+}
+
+void InstructionSelector::VisitFloat64Ieee754Unop(Node* node,
+                                                  InstructionCode opcode) {
+  PPCOperandGenerator g(this);
+  Emit(opcode, g.DefineAsFixed(node, d1), g.UseFixed(node->InputAt(0), d1))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::VisitFloat64Ieee754Binop(Node* node,
+                                                   InstructionCode opcode) {
+  PPCOperandGenerator g(this);
+  Emit(opcode, g.DefineAsFixed(node, d1), g.UseFixed(node->InputAt(0), d1),
+       g.UseFixed(node->InputAt(1), d2))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::VisitFloat64Sqrt(Node* node) {
+  VisitRR(this, kPPC_SqrtDouble, node);
+}
+
+void InstructionSelector::VisitFloat32RoundDown(Node* node) {
+  VisitRR(this, kPPC_FloorDouble | MiscField::encode(1), node);
+}
+
+void InstructionSelector::VisitFloat64RoundDown(Node* node) {
+  VisitRR(this, kPPC_FloorDouble, node);
+}
+
+void InstructionSelector::VisitFloat32RoundUp(Node* node) {
+  VisitRR(this, kPPC_CeilDouble | MiscField::encode(1), node);
+}
+
+void InstructionSelector::VisitFloat64RoundUp(Node* node) {
+  VisitRR(this, kPPC_CeilDouble, node);
+}
+
+void InstructionSelector::VisitFloat32RoundTruncate(Node* node) {
+  VisitRR(this, kPPC_TruncateDouble | MiscField::encode(1), node);
+}
+
+void InstructionSelector::VisitFloat64RoundTruncate(Node* node) {
+  VisitRR(this, kPPC_TruncateDouble, node);
+}
+
+void InstructionSelector::VisitFloat64RoundTiesAway(Node* node) {
+  VisitRR(this, kPPC_RoundDouble, node);
+}
+
+void InstructionSelector::VisitFloat32RoundTiesEven(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitFloat64RoundTiesEven(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitFloat32Neg(Node* node) {
+  VisitRR(this, kPPC_NegDouble, node);
+}
+
+void InstructionSelector::VisitFloat64Neg(Node* node) {
+  VisitRR(this, kPPC_NegDouble, node);
+}
+
+void InstructionSelector::VisitInt32AddWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop<Int32BinopMatcher>(this, node, kPPC_AddWithOverflow32,
+                                         kInt16Imm, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop<Int32BinopMatcher>(this, node, kPPC_AddWithOverflow32, kInt16Imm,
+                                &cont);
+}
+
+void InstructionSelector::VisitInt32SubWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop<Int32BinopMatcher>(this, node, kPPC_SubWithOverflow32,
+                                         kInt16Imm_Negate, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop<Int32BinopMatcher>(this, node, kPPC_SubWithOverflow32,
+                                kInt16Imm_Negate, &cont);
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitInt64AddWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop<Int64BinopMatcher>(this, node, kPPC_Add64, kInt16Imm,
+                                         &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop<Int64BinopMatcher>(this, node, kPPC_Add64, kInt16Imm, &cont);
+}
+
+void InstructionSelector::VisitInt64SubWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop<Int64BinopMatcher>(this, node, kPPC_Sub, kInt16Imm_Negate,
+                                         &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop<Int64BinopMatcher>(this, node, kPPC_Sub, kInt16Imm_Negate, &cont);
+}
+#endif
+
+static bool CompareLogical(FlagsContinuation* cont) {
+  switch (cont->condition()) {
+    case kUnsignedLessThan:
+    case kUnsignedGreaterThanOrEqual:
+    case kUnsignedLessThanOrEqual:
+    case kUnsignedGreaterThan:
+      return true;
+    default:
+      return false;
+  }
+  UNREACHABLE();
+}
+
+namespace {
+
+// Shared routine for multiple compare operations.
+void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
+                  InstructionOperand left, InstructionOperand right,
+                  FlagsContinuation* cont) {
+  selector->EmitWithContinuation(opcode, left, right, cont);
+}
+
+// Shared routine for multiple word compare operations.
+void VisitWordCompare(InstructionSelector* selector, Node* node,
+                      InstructionCode opcode, FlagsContinuation* cont,
+                      bool commutative, ImmediateMode immediate_mode) {
+  PPCOperandGenerator g(selector);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+
+  // Match immediates on left or right side of comparison.
+  if (g.CanBeImmediate(right, immediate_mode)) {
+    VisitCompare(selector, opcode, g.UseRegister(left), g.UseImmediate(right),
+                 cont);
+  } else if (g.CanBeImmediate(left, immediate_mode)) {
+    if (!commutative) cont->Commute();
+    VisitCompare(selector, opcode, g.UseRegister(right), g.UseImmediate(left),
+                 cont);
+  } else {
+    VisitCompare(selector, opcode, g.UseRegister(left), g.UseRegister(right),
+                 cont);
+  }
+}
+
+void VisitWord32Compare(InstructionSelector* selector, Node* node,
+                        FlagsContinuation* cont) {
+  ImmediateMode mode = (CompareLogical(cont) ? kInt16Imm_Unsigned : kInt16Imm);
+  VisitWordCompare(selector, node, kPPC_Cmp32, cont, false, mode);
+}
+
+#if V8_TARGET_ARCH_PPC64
+void VisitWord64Compare(InstructionSelector* selector, Node* node,
+                        FlagsContinuation* cont) {
+  ImmediateMode mode = (CompareLogical(cont) ? kInt16Imm_Unsigned : kInt16Imm);
+  VisitWordCompare(selector, node, kPPC_Cmp64, cont, false, mode);
+}
+#endif
+
+// Shared routine for multiple float32 compare operations.
+void VisitFloat32Compare(InstructionSelector* selector, Node* node,
+                         FlagsContinuation* cont) {
+  PPCOperandGenerator g(selector);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  VisitCompare(selector, kPPC_CmpDouble, g.UseRegister(left),
+               g.UseRegister(right), cont);
+}
+
+// Shared routine for multiple float64 compare operations.
+void VisitFloat64Compare(InstructionSelector* selector, Node* node,
+                         FlagsContinuation* cont) {
+  PPCOperandGenerator g(selector);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  VisitCompare(selector, kPPC_CmpDouble, g.UseRegister(left),
+               g.UseRegister(right), cont);
+}
+
+}  // namespace
+
+// Shared routine for word comparisons against zero.
+void InstructionSelector::VisitWordCompareZero(Node* user, Node* value,
+                                               FlagsContinuation* cont) {
+  // Try to combine with comparisons against 0 by simply inverting the branch.
+  while (value->opcode() == IrOpcode::kWord32Equal && CanCover(user, value)) {
+    Int32BinopMatcher m(value);
+    if (!m.right().Is(0)) break;
+
+    user = value;
+    value = m.left().node();
+    cont->Negate();
+  }
+
+  if (CanCover(user, value)) {
+    switch (value->opcode()) {
+      case IrOpcode::kWord32Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kInt32LessThan:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThan);
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kInt32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kUint32LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kUint32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitWord32Compare(this, value, cont);
+#if V8_TARGET_ARCH_PPC64
+      case IrOpcode::kWord64Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitWord64Compare(this, value, cont);
+      case IrOpcode::kInt64LessThan:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThan);
+        return VisitWord64Compare(this, value, cont);
+      case IrOpcode::kInt64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
+        return VisitWord64Compare(this, value, cont);
+      case IrOpcode::kUint64LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitWord64Compare(this, value, cont);
+      case IrOpcode::kUint64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitWord64Compare(this, value, cont);
+#endif
+      case IrOpcode::kFloat32Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat32LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat64Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kFloat64LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kFloat64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kProjection:
+        // Check if this is the overflow output projection of an
+        // <Operation>WithOverflow node.
+        if (ProjectionIndexOf(value->op()) == 1u) {
+          // We cannot combine the <Operation>WithOverflow with this branch
+          // unless the 0th projection (the use of the actual value of the
+          // <Operation> is either nullptr, which means there's no use of the
+          // actual value, or was already defined, which means it is scheduled
+          // *AFTER* this branch).
+          Node* const node = value->InputAt(0);
+          Node* const result = NodeProperties::FindProjection(node, 0);
+          if (result == nullptr || IsDefined(result)) {
+            switch (node->opcode()) {
+              case IrOpcode::kInt32AddWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop<Int32BinopMatcher>(
+                    this, node, kPPC_AddWithOverflow32, kInt16Imm, cont);
+              case IrOpcode::kInt32SubWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop<Int32BinopMatcher>(
+                    this, node, kPPC_SubWithOverflow32, kInt16Imm_Negate, cont);
+              case IrOpcode::kInt32MulWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kNotEqual);
+                return EmitInt32MulWithOverflow(this, node, cont);
+#if V8_TARGET_ARCH_PPC64
+              case IrOpcode::kInt64AddWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop<Int64BinopMatcher>(this, node, kPPC_Add64,
+                                                     kInt16Imm, cont);
+              case IrOpcode::kInt64SubWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop<Int64BinopMatcher>(this, node, kPPC_Sub,
+                                                     kInt16Imm_Negate, cont);
+#endif
+              default:
+                break;
+            }
+          }
+        }
+        break;
+      case IrOpcode::kInt32Sub:
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kWord32And:
+        // TODO(mbandy): opportunity for rlwinm?
+        return VisitWordCompare(this, value, kPPC_Tst32, cont, true,
+                                kInt16Imm_Unsigned);
+// TODO(mbrandy): Handle?
+// case IrOpcode::kInt32Add:
+// case IrOpcode::kWord32Or:
+// case IrOpcode::kWord32Xor:
+// case IrOpcode::kWord32Sar:
+// case IrOpcode::kWord32Shl:
+// case IrOpcode::kWord32Shr:
+// case IrOpcode::kWord32Ror:
+#if V8_TARGET_ARCH_PPC64
+      case IrOpcode::kInt64Sub:
+        return VisitWord64Compare(this, value, cont);
+      case IrOpcode::kWord64And:
+        // TODO(mbandy): opportunity for rldic?
+        return VisitWordCompare(this, value, kPPC_Tst64, cont, true,
+                                kInt16Imm_Unsigned);
+// TODO(mbrandy): Handle?
+// case IrOpcode::kInt64Add:
+// case IrOpcode::kWord64Or:
+// case IrOpcode::kWord64Xor:
+// case IrOpcode::kWord64Sar:
+// case IrOpcode::kWord64Shl:
+// case IrOpcode::kWord64Shr:
+// case IrOpcode::kWord64Ror:
+#endif
+      case IrOpcode::kStackPointerGreaterThan:
+        cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition);
+        return VisitStackPointerGreaterThan(value, cont);
+      default:
+        break;
+    }
+  }
+
+  // Branch could not be combined with a compare, emit compare against 0.
+  PPCOperandGenerator g(this);
+  VisitCompare(this, kPPC_Cmp32, g.UseRegister(value), g.TempImmediate(0),
+               cont);
+}
+
+void InstructionSelector::VisitSwitch(Node* node, const SwitchInfo& sw) {
+  PPCOperandGenerator g(this);
+  InstructionOperand value_operand = g.UseRegister(node->InputAt(0));
+
+  // Emit either ArchTableSwitch or ArchBinarySearchSwitch.
+  if (enable_switch_jump_table_ == kEnableSwitchJumpTable) {
+    static const size_t kMaxTableSwitchValueRange = 2 << 16;
+    size_t table_space_cost = 4 + sw.value_range();
+    size_t table_time_cost = 3;
+    size_t lookup_space_cost = 3 + 2 * sw.case_count();
+    size_t lookup_time_cost = sw.case_count();
+    if (sw.case_count() > 0 &&
+        table_space_cost + 3 * table_time_cost <=
+            lookup_space_cost + 3 * lookup_time_cost &&
+        sw.min_value() > std::numeric_limits<int32_t>::min() &&
+        sw.value_range() <= kMaxTableSwitchValueRange) {
+      InstructionOperand index_operand = value_operand;
+      if (sw.min_value()) {
+        index_operand = g.TempRegister();
+        Emit(kPPC_Sub, index_operand, value_operand,
+             g.TempImmediate(sw.min_value()));
+      }
+      // Generate a table lookup.
+      return EmitTableSwitch(sw, index_operand);
+    }
+  }
+
+  // Generate a tree of conditional jumps.
+  return EmitBinarySearchSwitch(sw, value_operand);
+}
+
+void InstructionSelector::VisitWord32Equal(Node* const node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitWord32Compare(this, node, &cont);
+}
+
+#if V8_TARGET_ARCH_PPC64
+void InstructionSelector::VisitWord64Equal(Node* const node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  VisitWord64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt64LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
+  VisitWord64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
+  VisitWord64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint64LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitWord64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitWord64Compare(this, node, &cont);
+}
+#endif
+
+void InstructionSelector::VisitInt32MulWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kNotEqual, ovf);
+    return EmitInt32MulWithOverflow(this, node, &cont);
+  }
+  FlagsContinuation cont;
+  EmitInt32MulWithOverflow(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32Equal(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64Equal(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::EmitPrepareArguments(
+    ZoneVector<PushParameter>* arguments, const CallDescriptor* call_descriptor,
+    Node* node) {
+  PPCOperandGenerator g(this);
+
+  // Prepare for C function call.
+  if (call_descriptor->IsCFunctionCall()) {
+    Emit(kArchPrepareCallCFunction | MiscField::encode(static_cast<int>(
+                                         call_descriptor->ParameterCount())),
+         0, nullptr, 0, nullptr);
+
+    // Poke any stack arguments.
+    int slot = kStackFrameExtraParamSlot;
+    for (PushParameter input : (*arguments)) {
+      if (input.node == nullptr) continue;
+      Emit(kPPC_StoreToStackSlot, g.NoOutput(), g.UseRegister(input.node),
+           g.TempImmediate(slot));
+      ++slot;
+    }
+  } else {
+    // Push any stack arguments.
+    for (PushParameter input : base::Reversed(*arguments)) {
+      // Skip any alignment holes in pushed nodes.
+      if (input.node == nullptr) continue;
+      Emit(kPPC_Push, g.NoOutput(), g.UseRegister(input.node));
+    }
+  }
+}
+
+bool InstructionSelector::IsTailCallAddressImmediate() { return false; }
+
+int InstructionSelector::GetTempsCountForTailCallFromJSFunction() { return 3; }
+
+void InstructionSelector::VisitFloat64ExtractLowWord32(Node* node) {
+  PPCOperandGenerator g(this);
+  Emit(kPPC_DoubleExtractLowWord32, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitFloat64ExtractHighWord32(Node* node) {
+  PPCOperandGenerator g(this);
+  Emit(kPPC_DoubleExtractHighWord32, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitFloat64InsertLowWord32(Node* node) {
+  PPCOperandGenerator g(this);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  if (left->opcode() == IrOpcode::kFloat64InsertHighWord32 &&
+      CanCover(node, left)) {
+    left = left->InputAt(1);
+    Emit(kPPC_DoubleConstruct, g.DefineAsRegister(node), g.UseRegister(left),
+         g.UseRegister(right));
+    return;
+  }
+  Emit(kPPC_DoubleInsertLowWord32, g.DefineSameAsFirst(node),
+       g.UseRegister(left), g.UseRegister(right));
+}
+
+void InstructionSelector::VisitFloat64InsertHighWord32(Node* node) {
+  PPCOperandGenerator g(this);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  if (left->opcode() == IrOpcode::kFloat64InsertLowWord32 &&
+      CanCover(node, left)) {
+    left = left->InputAt(1);
+    Emit(kPPC_DoubleConstruct, g.DefineAsRegister(node), g.UseRegister(right),
+         g.UseRegister(left));
+    return;
+  }
+  Emit(kPPC_DoubleInsertHighWord32, g.DefineSameAsFirst(node),
+       g.UseRegister(left), g.UseRegister(right));
+}
+
+void InstructionSelector::VisitMemoryBarrier(Node* node) {
+  PPCOperandGenerator g(this);
+  Emit(kPPC_Sync, g.NoOutput());
+}
+
+void InstructionSelector::VisitWord32AtomicLoad(Node* node) { VisitLoad(node); }
+
+void InstructionSelector::VisitWord64AtomicLoad(Node* node) { VisitLoad(node); }
+
+void InstructionSelector::VisitWord32AtomicStore(Node* node) {
+  VisitStore(node);
+}
+
+void InstructionSelector::VisitWord64AtomicStore(Node* node) {
+  VisitStore(node);
+}
+
+void VisitAtomicExchange(InstructionSelector* selector, Node* node,
+                         ArchOpcode opcode) {
+  PPCOperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+
+  AddressingMode addressing_mode = kMode_MRR;
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  inputs[input_count++] = g.UseUniqueRegister(base);
+  inputs[input_count++] = g.UseUniqueRegister(index);
+  inputs[input_count++] = g.UseUniqueRegister(value);
+  InstructionOperand outputs[1];
+  outputs[0] = g.UseUniqueRegister(node);
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  selector->Emit(code, 1, outputs, input_count, inputs);
+}
+
+void InstructionSelector::VisitWord32AtomicExchange(Node* node) {
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Int8()) {
+    opcode = kWord32AtomicExchangeInt8;
+  } else if (type == MachineType::Uint8()) {
+    opcode = kPPC_AtomicExchangeUint8;
+  } else if (type == MachineType::Int16()) {
+    opcode = kWord32AtomicExchangeInt16;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kPPC_AtomicExchangeUint16;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = kPPC_AtomicExchangeWord32;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicExchange(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord64AtomicExchange(Node* node) {
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Uint8()) {
+    opcode = kPPC_AtomicExchangeUint8;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kPPC_AtomicExchangeUint16;
+  } else if (type == MachineType::Uint32()) {
+    opcode = kPPC_AtomicExchangeWord32;
+  } else if (type == MachineType::Uint64()) {
+    opcode = kPPC_AtomicExchangeWord64;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicExchange(this, node, opcode);
+}
+
+void VisitAtomicCompareExchange(InstructionSelector* selector, Node* node,
+                                ArchOpcode opcode) {
+  PPCOperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* old_value = node->InputAt(2);
+  Node* new_value = node->InputAt(3);
+
+  AddressingMode addressing_mode = kMode_MRR;
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+
+  InstructionOperand inputs[4];
+  size_t input_count = 0;
+  inputs[input_count++] = g.UseUniqueRegister(base);
+  inputs[input_count++] = g.UseUniqueRegister(index);
+  inputs[input_count++] = g.UseUniqueRegister(old_value);
+  inputs[input_count++] = g.UseUniqueRegister(new_value);
+
+  InstructionOperand outputs[1];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  selector->Emit(code, output_count, outputs, input_count, inputs);
+}
+
+void InstructionSelector::VisitWord32AtomicCompareExchange(Node* node) {
+  MachineType type = AtomicOpType(node->op());
+  ArchOpcode opcode;
+  if (type == MachineType::Int8()) {
+    opcode = kWord32AtomicCompareExchangeInt8;
+  } else if (type == MachineType::Uint8()) {
+    opcode = kPPC_AtomicCompareExchangeUint8;
+  } else if (type == MachineType::Int16()) {
+    opcode = kWord32AtomicCompareExchangeInt16;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kPPC_AtomicCompareExchangeUint16;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = kPPC_AtomicCompareExchangeWord32;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicCompareExchange(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
+  MachineType type = AtomicOpType(node->op());
+  ArchOpcode opcode;
+  if (type == MachineType::Uint8()) {
+    opcode = kPPC_AtomicCompareExchangeUint8;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kPPC_AtomicCompareExchangeUint16;
+  } else if (type == MachineType::Uint32()) {
+    opcode = kPPC_AtomicCompareExchangeWord32;
+  } else if (type == MachineType::Uint64()) {
+    opcode = kPPC_AtomicCompareExchangeWord64;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicCompareExchange(this, node, opcode);
+}
+
+void VisitAtomicBinaryOperation(InstructionSelector* selector, Node* node,
+                                ArchOpcode int8_op, ArchOpcode uint8_op,
+                                ArchOpcode int16_op, ArchOpcode uint16_op,
+                                ArchOpcode int32_op, ArchOpcode uint32_op,
+                                ArchOpcode int64_op, ArchOpcode uint64_op) {
+  PPCOperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  MachineType type = AtomicOpType(node->op());
+
+  ArchOpcode opcode;
+
+  if (type == MachineType::Int8()) {
+    opcode = int8_op;
+  } else if (type == MachineType::Uint8()) {
+    opcode = uint8_op;
+  } else if (type == MachineType::Int16()) {
+    opcode = int16_op;
+  } else if (type == MachineType::Uint16()) {
+    opcode = uint16_op;
+  } else if (type == MachineType::Int32()) {
+    opcode = int32_op;
+  } else if (type == MachineType::Uint32()) {
+    opcode = uint32_op;
+  } else if (type == MachineType::Int64()) {
+    opcode = int64_op;
+  } else if (type == MachineType::Uint64()) {
+    opcode = uint64_op;
+  } else {
+    UNREACHABLE();
+  }
+
+  AddressingMode addressing_mode = kMode_MRR;
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  InstructionOperand inputs[3];
+
+  size_t input_count = 0;
+  inputs[input_count++] = g.UseUniqueRegister(base);
+  inputs[input_count++] = g.UseUniqueRegister(index);
+  inputs[input_count++] = g.UseUniqueRegister(value);
+
+  InstructionOperand outputs[1];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  selector->Emit(code, output_count, outputs, input_count, inputs);
+}
+
+void InstructionSelector::VisitWord32AtomicBinaryOperation(
+    Node* node, ArchOpcode int8_op, ArchOpcode uint8_op, ArchOpcode int16_op,
+    ArchOpcode uint16_op, ArchOpcode word32_op) {
+  // Unused
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitWord64AtomicBinaryOperation(
+    Node* node, ArchOpcode uint8_op, ArchOpcode uint16_op, ArchOpcode uint32_op,
+    ArchOpcode uint64_op) {
+  // Unused
+  UNREACHABLE();
+}
+
+#define VISIT_ATOMIC_BINOP(op)                                     \
+  void InstructionSelector::VisitWord32Atomic##op(Node* node) {    \
+    VisitAtomicBinaryOperation(                                    \
+        this, node, kPPC_Atomic##op##Int8, kPPC_Atomic##op##Uint8, \
+        kPPC_Atomic##op##Int16, kPPC_Atomic##op##Uint16,           \
+        kPPC_Atomic##op##Int32, kPPC_Atomic##op##Uint32,           \
+        kPPC_Atomic##op##Int64, kPPC_Atomic##op##Uint64);          \
+  }                                                                \
+  void InstructionSelector::VisitWord64Atomic##op(Node* node) {    \
+    VisitAtomicBinaryOperation(                                    \
+        this, node, kPPC_Atomic##op##Int8, kPPC_Atomic##op##Uint8, \
+        kPPC_Atomic##op##Int16, kPPC_Atomic##op##Uint16,           \
+        kPPC_Atomic##op##Int32, kPPC_Atomic##op##Uint32,           \
+        kPPC_Atomic##op##Int64, kPPC_Atomic##op##Uint64);          \
+  }
+VISIT_ATOMIC_BINOP(Add)
+VISIT_ATOMIC_BINOP(Sub)
+VISIT_ATOMIC_BINOP(And)
+VISIT_ATOMIC_BINOP(Or)
+VISIT_ATOMIC_BINOP(Xor)
+#undef VISIT_ATOMIC_BINOP
+
+void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
+  UNREACHABLE();
+}
+
+#define SIMD_TYPES(V) \
+  V(F64x2)            \
+  V(F32x4)            \
+  V(I32x4)            \
+  V(I16x8)            \
+  V(I8x16)
+
+#define SIMD_BINOP_LIST(V) \
+  V(F64x2Add)              \
+  V(F64x2Sub)              \
+  V(F64x2Mul)              \
+  V(F64x2Eq)               \
+  V(F64x2Ne)               \
+  V(F64x2Le)               \
+  V(F64x2Lt)               \
+  V(F64x2Div)              \
+  V(F64x2Min)              \
+  V(F64x2Max)              \
+  V(F32x4Add)              \
+  V(F32x4AddHoriz)         \
+  V(F32x4Sub)              \
+  V(F32x4Mul)              \
+  V(F32x4Eq)               \
+  V(F32x4Ne)               \
+  V(F32x4Lt)               \
+  V(F32x4Le)               \
+  V(F32x4Div)              \
+  V(F32x4Min)              \
+  V(F32x4Max)              \
+  V(I64x2Add)              \
+  V(I64x2Sub)              \
+  V(I64x2Mul)              \
+  V(I32x4Add)              \
+  V(I32x4AddHoriz)         \
+  V(I32x4Sub)              \
+  V(I32x4Mul)              \
+  V(I32x4MinS)             \
+  V(I32x4MinU)             \
+  V(I32x4MaxS)             \
+  V(I32x4MaxU)             \
+  V(I32x4Eq)               \
+  V(I32x4Ne)               \
+  V(I32x4GtS)              \
+  V(I32x4GeS)              \
+  V(I32x4GtU)              \
+  V(I32x4GeU)              \
+  V(I32x4DotI16x8S)        \
+  V(I16x8Add)              \
+  V(I16x8AddHoriz)         \
+  V(I16x8Sub)              \
+  V(I16x8Mul)              \
+  V(I16x8MinS)             \
+  V(I16x8MinU)             \
+  V(I16x8MaxS)             \
+  V(I16x8MaxU)             \
+  V(I16x8Eq)               \
+  V(I16x8Ne)               \
+  V(I16x8GtS)              \
+  V(I16x8GeS)              \
+  V(I16x8GtU)              \
+  V(I16x8GeU)              \
+  V(I16x8SConvertI32x4)    \
+  V(I16x8UConvertI32x4)    \
+  V(I16x8AddSatS)          \
+  V(I16x8SubSatS)          \
+  V(I16x8AddSatU)          \
+  V(I16x8SubSatU)          \
+  V(I16x8RoundingAverageU) \
+  V(I8x16Add)              \
+  V(I8x16Sub)              \
+  V(I8x16Mul)              \
+  V(I8x16MinS)             \
+  V(I8x16MinU)             \
+  V(I8x16MaxS)             \
+  V(I8x16MaxU)             \
+  V(I8x16Eq)               \
+  V(I8x16Ne)               \
+  V(I8x16GtS)              \
+  V(I8x16GeS)              \
+  V(I8x16GtU)              \
+  V(I8x16GeU)              \
+  V(I8x16SConvertI16x8)    \
+  V(I8x16UConvertI16x8)    \
+  V(I8x16AddSatS)          \
+  V(I8x16SubSatS)          \
+  V(I8x16AddSatU)          \
+  V(I8x16SubSatU)          \
+  V(I8x16RoundingAverageU) \
+  V(I8x16Swizzle)          \
+  V(S128And)               \
+  V(S128Or)                \
+  V(S128Xor)               \
+  V(S128AndNot)
+
+#define SIMD_UNOP_LIST(V)   \
+  V(F64x2Abs)               \
+  V(F64x2Neg)               \
+  V(F64x2Sqrt)              \
+  V(F64x2Ceil)              \
+  V(F64x2Floor)             \
+  V(F64x2Trunc)             \
+  V(F64x2NearestInt)        \
+  V(F32x4Abs)               \
+  V(F32x4Neg)               \
+  V(F32x4RecipApprox)       \
+  V(F32x4RecipSqrtApprox)   \
+  V(F32x4Sqrt)              \
+  V(F32x4SConvertI32x4)     \
+  V(F32x4UConvertI32x4)     \
+  V(F32x4Ceil)              \
+  V(F32x4Floor)             \
+  V(F32x4Trunc)             \
+  V(F32x4NearestInt)        \
+  V(I64x2Neg)               \
+  V(I32x4Neg)               \
+  V(I32x4Abs)               \
+  V(I32x4SConvertF32x4)     \
+  V(I32x4UConvertF32x4)     \
+  V(I32x4SConvertI16x8Low)  \
+  V(I32x4SConvertI16x8High) \
+  V(I32x4UConvertI16x8Low)  \
+  V(I32x4UConvertI16x8High) \
+  V(I16x8Neg)               \
+  V(I16x8Abs)               \
+  V(I8x16Neg)               \
+  V(I8x16Abs)               \
+  V(I16x8SConvertI8x16Low)  \
+  V(I16x8SConvertI8x16High) \
+  V(I16x8UConvertI8x16Low)  \
+  V(I16x8UConvertI8x16High) \
+  V(S128Not)
+
+#define SIMD_SHIFT_LIST(V) \
+  V(I64x2Shl)              \
+  V(I64x2ShrS)             \
+  V(I64x2ShrU)             \
+  V(I32x4Shl)              \
+  V(I32x4ShrS)             \
+  V(I32x4ShrU)             \
+  V(I16x8Shl)              \
+  V(I16x8ShrS)             \
+  V(I16x8ShrU)             \
+  V(I8x16Shl)              \
+  V(I8x16ShrS)             \
+  V(I8x16ShrU)
+
+#define SIMD_BOOL_LIST(V) \
+  V(V32x4AnyTrue)         \
+  V(V16x8AnyTrue)         \
+  V(V8x16AnyTrue)         \
+  V(V32x4AllTrue)         \
+  V(V16x8AllTrue)         \
+  V(V8x16AllTrue)
+
+#define SIMD_VISIT_SPLAT(Type)                               \
+  void InstructionSelector::Visit##Type##Splat(Node* node) { \
+    PPCOperandGenerator g(this);                             \
+    Emit(kPPC_##Type##Splat, g.DefineAsRegister(node),       \
+         g.UseRegister(node->InputAt(0)));                   \
+  }
+SIMD_TYPES(SIMD_VISIT_SPLAT)
+#undef SIMD_VISIT_SPLAT
+
+#define SIMD_VISIT_EXTRACT_LANE(Type, Sign)                              \
+  void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \
+    PPCOperandGenerator g(this);                                         \
+    int32_t lane = OpParameter<int32_t>(node->op());                     \
+    Emit(kPPC_##Type##ExtractLane##Sign, g.DefineAsRegister(node),       \
+         g.UseRegister(node->InputAt(0)), g.UseImmediate(lane));         \
+  }
+SIMD_VISIT_EXTRACT_LANE(F64x2, )
+SIMD_VISIT_EXTRACT_LANE(F32x4, )
+SIMD_VISIT_EXTRACT_LANE(I32x4, )
+SIMD_VISIT_EXTRACT_LANE(I16x8, U)
+SIMD_VISIT_EXTRACT_LANE(I16x8, S)
+SIMD_VISIT_EXTRACT_LANE(I8x16, U)
+SIMD_VISIT_EXTRACT_LANE(I8x16, S)
+#undef SIMD_VISIT_EXTRACT_LANE
+
+#define SIMD_VISIT_REPLACE_LANE(Type)                              \
+  void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \
+    PPCOperandGenerator g(this);                                   \
+    int32_t lane = OpParameter<int32_t>(node->op());               \
+    Emit(kPPC_##Type##ReplaceLane, g.DefineSameAsFirst(node),      \
+         g.UseRegister(node->InputAt(0)), g.UseImmediate(lane),    \
+         g.UseRegister(node->InputAt(1)));                         \
+  }
+SIMD_TYPES(SIMD_VISIT_REPLACE_LANE)
+#undef SIMD_VISIT_REPLACE_LANE
+
+#define SIMD_VISIT_BINOP(Opcode)                                          \
+  void InstructionSelector::Visit##Opcode(Node* node) {                   \
+    PPCOperandGenerator g(this);                                          \
+    InstructionOperand temps[] = {g.TempSimd128Register(),                \
+                                  g.TempSimd128Register()};               \
+    Emit(kPPC_##Opcode, g.DefineAsRegister(node),                         \
+         g.UseUniqueRegister(node->InputAt(0)),                           \
+         g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \
+  }
+SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
+#undef SIMD_VISIT_BINOP
+#undef SIMD_BINOP_LIST
+
+#define SIMD_VISIT_UNOP(Opcode)                                     \
+  void InstructionSelector::Visit##Opcode(Node* node) {             \
+    PPCOperandGenerator g(this);                                    \
+    InstructionOperand temps[] = {g.TempSimd128Register()};         \
+    Emit(kPPC_##Opcode, g.DefineAsRegister(node),                   \
+         g.UseRegister(node->InputAt(0)), arraysize(temps), temps); \
+  }
+SIMD_UNOP_LIST(SIMD_VISIT_UNOP)
+#undef SIMD_VISIT_UNOP
+#undef SIMD_UNOP_LIST
+
+#define SIMD_VISIT_SHIFT(Opcode)                        \
+  void InstructionSelector::Visit##Opcode(Node* node) { \
+    PPCOperandGenerator g(this);                        \
+    Emit(kPPC_##Opcode, g.DefineAsRegister(node),       \
+         g.UseUniqueRegister(node->InputAt(0)),         \
+         g.UseUniqueRegister(node->InputAt(1)));        \
+  }
+SIMD_SHIFT_LIST(SIMD_VISIT_SHIFT)
+#undef SIMD_VISIT_SHIFT
+#undef SIMD_SHIFT_LIST
+
+#define SIMD_VISIT_BOOL(Opcode)                         \
+  void InstructionSelector::Visit##Opcode(Node* node) { \
+    PPCOperandGenerator g(this);                        \
+    Emit(kPPC_##Opcode, g.DefineAsRegister(node),       \
+         g.UseUniqueRegister(node->InputAt(0)));        \
+  }
+SIMD_BOOL_LIST(SIMD_VISIT_BOOL)
+#undef SIMD_VISIT_BOOL
+#undef SIMD_BOOL_LIST
+
+#define SIMD_VISIT_BITMASK(Opcode)                                        \
+  void InstructionSelector::Visit##Opcode(Node* node) {                   \
+    PPCOperandGenerator g(this);                                          \
+    InstructionOperand temps[] = {g.TempRegister()};                      \
+    Emit(kPPC_##Opcode, g.DefineAsRegister(node),                         \
+         g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); \
+  }
+SIMD_VISIT_BITMASK(I8x16BitMask)
+SIMD_VISIT_BITMASK(I16x8BitMask)
+SIMD_VISIT_BITMASK(I32x4BitMask)
+#undef SIMD_VISIT_BITMASK
+#undef SIMD_TYPES
+
+void InstructionSelector::VisitI8x16Shuffle(Node* node) {
+  uint8_t shuffle[kSimd128Size];
+  bool is_swizzle;
+  CanonicalizeShuffle(node, shuffle, &is_swizzle);
+  PPCOperandGenerator g(this);
+  Node* input0 = node->InputAt(0);
+  Node* input1 = node->InputAt(1);
+  // Remap the shuffle indices to match IBM lane numbering.
+  int max_index = 15;
+  int total_lane_count = 2 * kSimd128Size;
+  uint8_t shuffle_remapped[kSimd128Size];
+  for (int i = 0; i < kSimd128Size; i++) {
+    uint8_t current_index = shuffle[i];
+    shuffle_remapped[i] = (current_index <= max_index
+                               ? max_index - current_index
+                               : total_lane_count - current_index + max_index);
+  }
+  Emit(kPPC_I8x16Shuffle, g.DefineAsRegister(node), g.UseUniqueRegister(input0),
+       g.UseUniqueRegister(input1),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle_remapped)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle_remapped + 4)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle_remapped + 8)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle_remapped + 12)));
+}
+
+void InstructionSelector::VisitS128Zero(Node* node) {
+  PPCOperandGenerator g(this);
+  Emit(kPPC_S128Zero, g.DefineAsRegister(node));
+}
+
+void InstructionSelector::VisitS128Select(Node* node) {
+  PPCOperandGenerator g(this);
+  Emit(kPPC_S128Select, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
+       g.UseRegister(node->InputAt(2)));
+}
+
+void InstructionSelector::VisitS128Const(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::EmitPrepareResults(
+    ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
+    Node* node) {
+  PPCOperandGenerator g(this);
+
+  int reverse_slot = 1;
+  for (PushParameter output : *results) {
+    if (!output.location.IsCallerFrameSlot()) continue;
+    // Skip any alignment holes in nodes.
+    if (output.node != nullptr) {
+      DCHECK(!call_descriptor->IsCFunctionCall());
+      if (output.location.GetType() == MachineType::Float32()) {
+        MarkAsFloat32(output.node);
+      } else if (output.location.GetType() == MachineType::Float64()) {
+        MarkAsFloat64(output.node);
+      } else if (output.location.GetType() == MachineType::Simd128()) {
+        MarkAsSimd128(output.node);
+      }
+      Emit(kPPC_Peek, g.DefineAsRegister(output.node),
+           g.UseImmediate(reverse_slot));
+    }
+    reverse_slot += output.location.GetSizeInPointers();
+  }
+}
+
+void InstructionSelector::VisitLoadTransform(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitF32x4Pmin(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitF32x4Pmax(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitF64x2Pmin(Node* node) { UNIMPLEMENTED(); }
+
+void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
+
+// static
+MachineOperatorBuilder::Flags
+InstructionSelector::SupportedMachineOperatorFlags() {
+  return MachineOperatorBuilder::kFloat32RoundDown |
+         MachineOperatorBuilder::kFloat64RoundDown |
+         MachineOperatorBuilder::kFloat32RoundUp |
+         MachineOperatorBuilder::kFloat64RoundUp |
+         MachineOperatorBuilder::kFloat32RoundTruncate |
+         MachineOperatorBuilder::kFloat64RoundTruncate |
+         MachineOperatorBuilder::kFloat64RoundTiesAway |
+         MachineOperatorBuilder::kWord32Popcnt |
+         MachineOperatorBuilder::kWord64Popcnt;
+  // We omit kWord32ShiftIsSafe as s[rl]w use 0x3F as a mask rather than 0x1F.
+}
+
+// static
+MachineOperatorBuilder::AlignmentRequirements
+InstructionSelector::AlignmentRequirements() {
+  return MachineOperatorBuilder::AlignmentRequirements::
+      FullUnalignedAccessSupport();
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/ppc/unwinding-info-writer-ppc.cc b/src/compiler/backend/ppc/unwinding-info-writer-ppc.cc
new file mode 100644
index 0000000..587482b
--- /dev/null
+++ b/src/compiler/backend/ppc/unwinding-info-writer-ppc.cc
@@ -0,0 +1,105 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/ppc/unwinding-info-writer-ppc.h"
+#include "src/compiler/backend/instruction.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+void UnwindingInfoWriter::BeginInstructionBlock(int pc_offset,
+                                                const InstructionBlock* block) {
+  if (!enabled()) return;
+
+  block_will_exit_ = false;
+
+  DCHECK_LT(block->rpo_number().ToInt(),
+            static_cast<int>(block_initial_states_.size()));
+  const BlockInitialState* initial_state =
+      block_initial_states_[block->rpo_number().ToInt()];
+  if (!initial_state) return;
+  if (initial_state->saved_lr_ != saved_lr_) {
+    eh_frame_writer_.AdvanceLocation(pc_offset);
+    if (initial_state->saved_lr_) {
+      eh_frame_writer_.RecordRegisterSavedToStack(kLrDwarfCode,
+                                                  kSystemPointerSize);
+      eh_frame_writer_.RecordRegisterSavedToStack(fp, 0);
+    } else {
+      eh_frame_writer_.RecordRegisterFollowsInitialRule(kLrDwarfCode);
+    }
+    saved_lr_ = initial_state->saved_lr_;
+  }
+}
+
+void UnwindingInfoWriter::EndInstructionBlock(const InstructionBlock* block) {
+  if (!enabled() || block_will_exit_) return;
+
+  for (const RpoNumber& successor : block->successors()) {
+    int successor_index = successor.ToInt();
+    DCHECK_LT(successor_index, static_cast<int>(block_initial_states_.size()));
+    const BlockInitialState* existing_state =
+        block_initial_states_[successor_index];
+
+    // If we already had an entry for this BB, check that the values are the
+    // same we are trying to insert.
+    if (existing_state) {
+      DCHECK_EQ(existing_state->saved_lr_, saved_lr_);
+    } else {
+      block_initial_states_[successor_index] =
+          zone_->New<BlockInitialState>(saved_lr_);
+    }
+  }
+}
+
+void UnwindingInfoWriter::MarkFrameConstructed(int at_pc) {
+  if (!enabled()) return;
+
+  // Regardless of the type of frame constructed, the relevant part of the
+  // layout is always the one in the diagram:
+  //
+  // |   ....   |         higher addresses
+  // +----------+               ^
+  // |    LR    |               |            |
+  // +----------+               |            |
+  // | saved FP |               |            |
+  // +----------+ <-- FP                     v
+  // |   ....   |                       stack growth
+  //
+  // The LR is pushed on the stack, and we can record this fact at the end of
+  // the construction, since the LR itself is not modified in the process.
+  eh_frame_writer_.AdvanceLocation(at_pc);
+  eh_frame_writer_.RecordRegisterSavedToStack(kLrDwarfCode,
+                                              kSystemPointerSize);
+  eh_frame_writer_.RecordRegisterSavedToStack(fp, 0);
+  saved_lr_ = true;
+}
+
+void UnwindingInfoWriter::MarkFrameDeconstructed(int at_pc) {
+  if (!enabled()) return;
+
+  // The lr is restored by the last operation in LeaveFrame().
+  eh_frame_writer_.AdvanceLocation(at_pc);
+  eh_frame_writer_.RecordRegisterFollowsInitialRule(kLrDwarfCode);
+  saved_lr_ = false;
+}
+
+void UnwindingInfoWriter::MarkLinkRegisterOnTopOfStack(int pc_offset) {
+  if (!enabled()) return;
+
+  eh_frame_writer_.AdvanceLocation(pc_offset);
+  eh_frame_writer_.SetBaseAddressRegisterAndOffset(sp, 0);
+  eh_frame_writer_.RecordRegisterSavedToStack(kLrDwarfCode, 0);
+}
+
+void UnwindingInfoWriter::MarkPopLinkRegisterFromTopOfStack(int pc_offset) {
+  if (!enabled()) return;
+
+  eh_frame_writer_.AdvanceLocation(pc_offset);
+  eh_frame_writer_.SetBaseAddressRegisterAndOffset(fp, 0);
+  eh_frame_writer_.RecordRegisterFollowsInitialRule(kLrDwarfCode);
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/ppc/unwinding-info-writer-ppc.h b/src/compiler/backend/ppc/unwinding-info-writer-ppc.h
new file mode 100644
index 0000000..e96a483
--- /dev/null
+++ b/src/compiler/backend/ppc/unwinding-info-writer-ppc.h
@@ -0,0 +1,73 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_PPC_UNWINDING_INFO_WRITER_PPC_H_
+#define V8_COMPILER_BACKEND_PPC_UNWINDING_INFO_WRITER_PPC_H_
+
+#include "src/diagnostics/eh-frame.h"
+#include "src/flags/flags.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+class InstructionBlock;
+
+class UnwindingInfoWriter {
+ public:
+  explicit UnwindingInfoWriter(Zone* zone)
+      : zone_(zone),
+        eh_frame_writer_(zone),
+        saved_lr_(false),
+        block_will_exit_(false),
+        block_initial_states_(zone) {
+    if (enabled()) eh_frame_writer_.Initialize();
+  }
+
+  void SetNumberOfInstructionBlocks(int number) {
+    if (enabled()) block_initial_states_.resize(number);
+  }
+
+  void BeginInstructionBlock(int pc_offset, const InstructionBlock* block);
+  void EndInstructionBlock(const InstructionBlock* block);
+
+  void MarkLinkRegisterOnTopOfStack(int pc_offset);
+  void MarkPopLinkRegisterFromTopOfStack(int pc_offset);
+
+  void MarkFrameConstructed(int at_pc);
+  void MarkFrameDeconstructed(int at_pc);
+
+  void MarkBlockWillExit() { block_will_exit_ = true; }
+
+  void Finish(int code_size) {
+    if (enabled()) eh_frame_writer_.Finish(code_size);
+  }
+
+  EhFrameWriter* eh_frame_writer() {
+    return enabled() ? &eh_frame_writer_ : nullptr;
+  }
+
+ private:
+  bool enabled() const { return FLAG_perf_prof_unwinding_info; }
+
+  class BlockInitialState : public ZoneObject {
+   public:
+    explicit BlockInitialState(bool saved_lr) : saved_lr_(saved_lr) {}
+
+    bool saved_lr_;
+  };
+
+  Zone* zone_;
+  EhFrameWriter eh_frame_writer_;
+  bool saved_lr_;
+  bool block_will_exit_;
+
+  ZoneVector<const BlockInitialState*> block_initial_states_;
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_PPC_UNWINDING_INFO_WRITER_PPC_H_
diff --git a/src/compiler/backend/register-allocation.h b/src/compiler/backend/register-allocation.h
new file mode 100644
index 0000000..f4a7558
--- /dev/null
+++ b/src/compiler/backend/register-allocation.h
@@ -0,0 +1,93 @@
+// Copyright 2020 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_REGISTER_ALLOCATION_H_
+#define V8_COMPILER_BACKEND_REGISTER_ALLOCATION_H_
+
+#include "src/codegen/register-configuration.h"
+#include "src/zone/zone.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+enum class RegisterKind { kGeneral, kDouble };
+
+inline int GetRegisterCount(const RegisterConfiguration* config,
+                            RegisterKind kind) {
+  switch (kind) {
+    case RegisterKind::kGeneral:
+      return config->num_general_registers();
+    case RegisterKind::kDouble:
+      return config->num_double_registers();
+  }
+}
+
+inline int GetAllocatableRegisterCount(const RegisterConfiguration* config,
+                                       RegisterKind kind) {
+  switch (kind) {
+    case RegisterKind::kGeneral:
+      return config->num_allocatable_general_registers();
+    case RegisterKind::kDouble:
+      return config->num_allocatable_double_registers();
+  }
+}
+
+inline const int* GetAllocatableRegisterCodes(
+    const RegisterConfiguration* config, RegisterKind kind) {
+  switch (kind) {
+    case RegisterKind::kGeneral:
+      return config->allocatable_general_codes();
+    case RegisterKind::kDouble:
+      return config->allocatable_double_codes();
+  }
+}
+
+inline int ByteWidthForStackSlot(MachineRepresentation rep) {
+  switch (rep) {
+    case MachineRepresentation::kBit:
+    case MachineRepresentation::kWord8:
+    case MachineRepresentation::kWord16:
+    case MachineRepresentation::kWord32:
+    case MachineRepresentation::kFloat32:
+      return kSystemPointerSize;
+    case MachineRepresentation::kTaggedSigned:
+    case MachineRepresentation::kTaggedPointer:
+    case MachineRepresentation::kTagged:
+    case MachineRepresentation::kCompressedPointer:
+    case MachineRepresentation::kCompressed:
+      // TODO(ishell): kTaggedSize once half size locations are supported.
+      return kSystemPointerSize;
+    case MachineRepresentation::kWord64:
+    case MachineRepresentation::kFloat64:
+      return kDoubleSize;
+    case MachineRepresentation::kSimd128:
+      return kSimd128Size;
+    case MachineRepresentation::kNone:
+      break;
+  }
+  UNREACHABLE();
+}
+
+class RegisterAllocationData : public ZoneObject {
+ public:
+  enum Type {
+    kTopTier,
+    kMidTier,
+  };
+
+  Type type() const { return type_; }
+
+ protected:
+  explicit RegisterAllocationData(Type type) : type_(type) {}
+
+ private:
+  Type type_;
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_REGISTER_ALLOCATION_H_
diff --git a/src/compiler/backend/register-allocator-verifier.cc b/src/compiler/backend/register-allocator-verifier.cc
new file mode 100644
index 0000000..50e57bc
--- /dev/null
+++ b/src/compiler/backend/register-allocator-verifier.cc
@@ -0,0 +1,621 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/register-allocator-verifier.h"
+
+#include "src/compiler/backend/instruction.h"
+#include "src/utils/bit-vector.h"
+#include "src/utils/ostreams.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+namespace {
+
+size_t OperandCount(const Instruction* instr) {
+  return instr->InputCount() + instr->OutputCount() + instr->TempCount();
+}
+
+void VerifyEmptyGaps(const Instruction* instr) {
+  for (int i = Instruction::FIRST_GAP_POSITION;
+       i <= Instruction::LAST_GAP_POSITION; i++) {
+    Instruction::GapPosition inner_pos =
+        static_cast<Instruction::GapPosition>(i);
+    CHECK_NULL(instr->GetParallelMove(inner_pos));
+  }
+}
+
+void VerifyAllocatedGaps(const Instruction* instr, const char* caller_info) {
+  for (int i = Instruction::FIRST_GAP_POSITION;
+       i <= Instruction::LAST_GAP_POSITION; i++) {
+    Instruction::GapPosition inner_pos =
+        static_cast<Instruction::GapPosition>(i);
+    const ParallelMove* moves = instr->GetParallelMove(inner_pos);
+    if (moves == nullptr) continue;
+    for (const MoveOperands* move : *moves) {
+      if (move->IsRedundant()) continue;
+      CHECK_WITH_MSG(
+          move->source().IsAllocated() || move->source().IsConstant(),
+          caller_info);
+      CHECK_WITH_MSG(move->destination().IsAllocated(), caller_info);
+    }
+  }
+}
+
+}  // namespace
+
+RegisterAllocatorVerifier::RegisterAllocatorVerifier(
+    Zone* zone, const RegisterConfiguration* config,
+    const InstructionSequence* sequence, const Frame* frame)
+    : zone_(zone),
+      config_(config),
+      sequence_(sequence),
+      constraints_(zone),
+      assessments_(zone),
+      outstanding_assessments_(zone),
+      spill_slot_delta_(frame->GetTotalFrameSlotCount() -
+                        frame->GetSpillSlotCount()) {
+  constraints_.reserve(sequence->instructions().size());
+  // TODO(dcarney): model unique constraints.
+  // Construct OperandConstraints for all InstructionOperands, eliminating
+  // kSameAsFirst along the way.
+  for (const Instruction* instr : sequence->instructions()) {
+    // All gaps should be totally unallocated at this point.
+    VerifyEmptyGaps(instr);
+    const size_t operand_count = OperandCount(instr);
+    OperandConstraint* op_constraints =
+        zone->NewArray<OperandConstraint>(operand_count);
+    size_t count = 0;
+    for (size_t i = 0; i < instr->InputCount(); ++i, ++count) {
+      BuildConstraint(instr->InputAt(i), &op_constraints[count]);
+      VerifyInput(op_constraints[count]);
+    }
+    for (size_t i = 0; i < instr->TempCount(); ++i, ++count) {
+      BuildConstraint(instr->TempAt(i), &op_constraints[count]);
+      VerifyTemp(op_constraints[count]);
+    }
+    for (size_t i = 0; i < instr->OutputCount(); ++i, ++count) {
+      BuildConstraint(instr->OutputAt(i), &op_constraints[count]);
+      if (op_constraints[count].type_ == kSameAsFirst) {
+        CHECK_LT(0, instr->InputCount());
+        op_constraints[count].type_ = op_constraints[0].type_;
+        op_constraints[count].value_ = op_constraints[0].value_;
+      }
+      VerifyOutput(op_constraints[count]);
+    }
+    InstructionConstraint instr_constraint = {instr, operand_count,
+                                              op_constraints};
+    constraints()->push_back(instr_constraint);
+  }
+}
+
+void RegisterAllocatorVerifier::VerifyInput(
+    const OperandConstraint& constraint) {
+  CHECK_NE(kSameAsFirst, constraint.type_);
+  if (constraint.type_ != kImmediate) {
+    CHECK_NE(InstructionOperand::kInvalidVirtualRegister,
+             constraint.virtual_register_);
+  }
+}
+
+void RegisterAllocatorVerifier::VerifyTemp(
+    const OperandConstraint& constraint) {
+  CHECK_NE(kSameAsFirst, constraint.type_);
+  CHECK_NE(kImmediate, constraint.type_);
+  CHECK_NE(kConstant, constraint.type_);
+}
+
+void RegisterAllocatorVerifier::VerifyOutput(
+    const OperandConstraint& constraint) {
+  CHECK_NE(kImmediate, constraint.type_);
+  CHECK_NE(InstructionOperand::kInvalidVirtualRegister,
+           constraint.virtual_register_);
+}
+
+void RegisterAllocatorVerifier::VerifyAssignment(const char* caller_info) {
+  caller_info_ = caller_info;
+  CHECK(sequence()->instructions().size() == constraints()->size());
+  auto instr_it = sequence()->begin();
+  for (const auto& instr_constraint : *constraints()) {
+    const Instruction* instr = instr_constraint.instruction_;
+    // All gaps should be totally allocated at this point.
+    VerifyAllocatedGaps(instr, caller_info_);
+    const size_t operand_count = instr_constraint.operand_constaints_size_;
+    const OperandConstraint* op_constraints =
+        instr_constraint.operand_constraints_;
+    CHECK_EQ(instr, *instr_it);
+    CHECK(operand_count == OperandCount(instr));
+    size_t count = 0;
+    for (size_t i = 0; i < instr->InputCount(); ++i, ++count) {
+      CheckConstraint(instr->InputAt(i), &op_constraints[count]);
+    }
+    for (size_t i = 0; i < instr->TempCount(); ++i, ++count) {
+      CheckConstraint(instr->TempAt(i), &op_constraints[count]);
+    }
+    for (size_t i = 0; i < instr->OutputCount(); ++i, ++count) {
+      CheckConstraint(instr->OutputAt(i), &op_constraints[count]);
+    }
+    ++instr_it;
+  }
+}
+
+void RegisterAllocatorVerifier::BuildConstraint(const InstructionOperand* op,
+                                                OperandConstraint* constraint) {
+  constraint->value_ = kMinInt;
+  constraint->virtual_register_ = InstructionOperand::kInvalidVirtualRegister;
+  if (op->IsConstant()) {
+    constraint->type_ = kConstant;
+    constraint->value_ = ConstantOperand::cast(op)->virtual_register();
+    constraint->virtual_register_ = constraint->value_;
+  } else if (op->IsImmediate()) {
+    const ImmediateOperand* imm = ImmediateOperand::cast(op);
+    int value = imm->type() == ImmediateOperand::INLINE ? imm->inline_value()
+                                                        : imm->indexed_value();
+    constraint->type_ = kImmediate;
+    constraint->value_ = value;
+  } else {
+    CHECK(op->IsUnallocated());
+    const UnallocatedOperand* unallocated = UnallocatedOperand::cast(op);
+    int vreg = unallocated->virtual_register();
+    constraint->virtual_register_ = vreg;
+    if (unallocated->basic_policy() == UnallocatedOperand::FIXED_SLOT) {
+      constraint->type_ = kFixedSlot;
+      constraint->value_ = unallocated->fixed_slot_index();
+    } else {
+      switch (unallocated->extended_policy()) {
+        case UnallocatedOperand::REGISTER_OR_SLOT:
+        case UnallocatedOperand::NONE:
+          if (sequence()->IsFP(vreg)) {
+            constraint->type_ = kRegisterOrSlotFP;
+          } else {
+            constraint->type_ = kRegisterOrSlot;
+          }
+          break;
+        case UnallocatedOperand::REGISTER_OR_SLOT_OR_CONSTANT:
+          DCHECK(!sequence()->IsFP(vreg));
+          constraint->type_ = kRegisterOrSlotOrConstant;
+          break;
+        case UnallocatedOperand::FIXED_REGISTER:
+          if (unallocated->HasSecondaryStorage()) {
+            constraint->type_ = kRegisterAndSlot;
+            constraint->spilled_slot_ = unallocated->GetSecondaryStorage();
+          } else {
+            constraint->type_ = kFixedRegister;
+          }
+          constraint->value_ = unallocated->fixed_register_index();
+          break;
+        case UnallocatedOperand::FIXED_FP_REGISTER:
+          constraint->type_ = kFixedFPRegister;
+          constraint->value_ = unallocated->fixed_register_index();
+          break;
+        case UnallocatedOperand::MUST_HAVE_REGISTER:
+          if (sequence()->IsFP(vreg)) {
+            constraint->type_ = kFPRegister;
+          } else {
+            constraint->type_ = kRegister;
+          }
+          break;
+        case UnallocatedOperand::MUST_HAVE_SLOT:
+          constraint->type_ = kSlot;
+          constraint->value_ =
+              ElementSizeLog2Of(sequence()->GetRepresentation(vreg));
+          break;
+        case UnallocatedOperand::SAME_AS_FIRST_INPUT:
+          constraint->type_ = kSameAsFirst;
+          break;
+      }
+    }
+  }
+}
+
+void RegisterAllocatorVerifier::CheckConstraint(
+    const InstructionOperand* op, const OperandConstraint* constraint) {
+  switch (constraint->type_) {
+    case kConstant:
+      CHECK_WITH_MSG(op->IsConstant(), caller_info_);
+      CHECK_EQ(ConstantOperand::cast(op)->virtual_register(),
+               constraint->value_);
+      return;
+    case kImmediate: {
+      CHECK_WITH_MSG(op->IsImmediate(), caller_info_);
+      const ImmediateOperand* imm = ImmediateOperand::cast(op);
+      int value = imm->type() == ImmediateOperand::INLINE
+                      ? imm->inline_value()
+                      : imm->indexed_value();
+      CHECK_EQ(value, constraint->value_);
+      return;
+    }
+    case kRegister:
+      CHECK_WITH_MSG(op->IsRegister(), caller_info_);
+      return;
+    case kFPRegister:
+      CHECK_WITH_MSG(op->IsFPRegister(), caller_info_);
+      return;
+    case kFixedRegister:
+    case kRegisterAndSlot:
+      CHECK_WITH_MSG(op->IsRegister(), caller_info_);
+      CHECK_EQ(LocationOperand::cast(op)->register_code(), constraint->value_);
+      return;
+    case kFixedFPRegister:
+      CHECK_WITH_MSG(op->IsFPRegister(), caller_info_);
+      CHECK_EQ(LocationOperand::cast(op)->register_code(), constraint->value_);
+      return;
+    case kFixedSlot:
+      CHECK_WITH_MSG(op->IsStackSlot() || op->IsFPStackSlot(), caller_info_);
+      CHECK_EQ(LocationOperand::cast(op)->index(), constraint->value_);
+      return;
+    case kSlot:
+      CHECK_WITH_MSG(op->IsStackSlot() || op->IsFPStackSlot(), caller_info_);
+      CHECK_EQ(ElementSizeLog2Of(LocationOperand::cast(op)->representation()),
+               constraint->value_);
+      return;
+    case kRegisterOrSlot:
+      CHECK_WITH_MSG(op->IsRegister() || op->IsStackSlot(), caller_info_);
+      return;
+    case kRegisterOrSlotFP:
+      CHECK_WITH_MSG(op->IsFPRegister() || op->IsFPStackSlot(), caller_info_);
+      return;
+    case kRegisterOrSlotOrConstant:
+      CHECK_WITH_MSG(op->IsRegister() || op->IsStackSlot() || op->IsConstant(),
+                     caller_info_);
+      return;
+    case kSameAsFirst:
+      CHECK_WITH_MSG(false, caller_info_);
+      return;
+  }
+}
+
+void BlockAssessments::PerformMoves(const Instruction* instruction) {
+  const ParallelMove* first =
+      instruction->GetParallelMove(Instruction::GapPosition::START);
+  PerformParallelMoves(first);
+  const ParallelMove* last =
+      instruction->GetParallelMove(Instruction::GapPosition::END);
+  PerformParallelMoves(last);
+}
+
+void BlockAssessments::PerformParallelMoves(const ParallelMove* moves) {
+  if (moves == nullptr) return;
+
+  CHECK(map_for_moves_.empty());
+  for (MoveOperands* move : *moves) {
+    if (move->IsEliminated() || move->IsRedundant()) continue;
+    auto it = map_.find(move->source());
+    // The RHS of a parallel move should have been already assessed.
+    CHECK(it != map_.end());
+    // The LHS of a parallel move should not have been assigned in this
+    // parallel move.
+    CHECK(map_for_moves_.find(move->destination()) == map_for_moves_.end());
+    // The RHS of a parallel move should not be a stale reference.
+    CHECK(!IsStaleReferenceStackSlot(move->source()));
+    // Copy the assessment to the destination.
+    map_for_moves_[move->destination()] = it->second;
+  }
+  for (auto pair : map_for_moves_) {
+    // Re-insert the existing key for the new assignment so that it has the
+    // correct representation (which is ignored by the canonicalizing map
+    // comparator).
+    InstructionOperand op = pair.first;
+    map_.erase(op);
+    map_.insert(pair);
+    // Destination is no longer a stale reference.
+    stale_ref_stack_slots().erase(op);
+  }
+  map_for_moves_.clear();
+}
+
+void BlockAssessments::DropRegisters() {
+  for (auto iterator = map().begin(), end = map().end(); iterator != end;) {
+    auto current = iterator;
+    ++iterator;
+    InstructionOperand op = current->first;
+    if (op.IsAnyRegister()) map().erase(current);
+  }
+}
+
+void BlockAssessments::CheckReferenceMap(const ReferenceMap* reference_map) {
+  // First mark all existing reference stack spill slots as stale.
+  for (auto pair : map()) {
+    InstructionOperand op = pair.first;
+    if (op.IsStackSlot()) {
+      const LocationOperand* loc_op = LocationOperand::cast(&op);
+      // Only mark arguments that are spill slots as stale, the reference map
+      // doesn't track arguments or fixed stack slots, which are implicitly
+      // tracked by the GC.
+      if (CanBeTaggedOrCompressedPointer(loc_op->representation()) &&
+          loc_op->index() >= spill_slot_delta()) {
+        stale_ref_stack_slots().insert(op);
+      }
+    }
+  }
+
+  // Now remove any stack spill slots in the reference map from the list of
+  // stale slots.
+  for (auto ref_map_operand : reference_map->reference_operands()) {
+    if (ref_map_operand.IsStackSlot()) {
+      auto pair = map().find(ref_map_operand);
+      CHECK(pair != map().end());
+      stale_ref_stack_slots().erase(pair->first);
+    }
+  }
+}
+
+bool BlockAssessments::IsStaleReferenceStackSlot(InstructionOperand op) {
+  if (!op.IsStackSlot()) return false;
+
+  const LocationOperand* loc_op = LocationOperand::cast(&op);
+  return CanBeTaggedOrCompressedPointer(loc_op->representation()) &&
+         stale_ref_stack_slots().find(op) != stale_ref_stack_slots().end();
+}
+
+void BlockAssessments::Print() const {
+  StdoutStream os;
+  for (const auto pair : map()) {
+    const InstructionOperand op = pair.first;
+    const Assessment* assessment = pair.second;
+    // Use operator<< so we can write the assessment on the same
+    // line.
+    os << op << " : ";
+    if (assessment->kind() == AssessmentKind::Final) {
+      os << "v" << FinalAssessment::cast(assessment)->virtual_register();
+    } else {
+      os << "P";
+    }
+    if (stale_ref_stack_slots().find(op) != stale_ref_stack_slots().end()) {
+      os << " (stale reference)";
+    }
+    os << std::endl;
+  }
+  os << std::endl;
+}
+
+BlockAssessments* RegisterAllocatorVerifier::CreateForBlock(
+    const InstructionBlock* block) {
+  RpoNumber current_block_id = block->rpo_number();
+
+  BlockAssessments* ret =
+      zone()->New<BlockAssessments>(zone(), spill_slot_delta());
+  if (block->PredecessorCount() == 0) {
+    // TODO(mtrofin): the following check should hold, however, in certain
+    // unit tests it is invalidated by the last block. Investigate and
+    // normalize the CFG.
+    // CHECK_EQ(0, current_block_id.ToInt());
+    // The phi size test below is because we can, technically, have phi
+    // instructions with one argument. Some tests expose that, too.
+  } else if (block->PredecessorCount() == 1 && block->phis().size() == 0) {
+    const BlockAssessments* prev_block = assessments_[block->predecessors()[0]];
+    ret->CopyFrom(prev_block);
+  } else {
+    for (RpoNumber pred_id : block->predecessors()) {
+      // For every operand coming from any of the predecessors, create an
+      // Unfinalized assessment.
+      auto iterator = assessments_.find(pred_id);
+      if (iterator == assessments_.end()) {
+        // This block is the head of a loop, and this predecessor is the
+        // loopback
+        // arc.
+        // Validate this is a loop case, otherwise the CFG is malformed.
+        CHECK(pred_id >= current_block_id);
+        CHECK(block->IsLoopHeader());
+        continue;
+      }
+      const BlockAssessments* pred_assessments = iterator->second;
+      CHECK_NOT_NULL(pred_assessments);
+      for (auto pair : pred_assessments->map()) {
+        InstructionOperand operand = pair.first;
+        if (ret->map().find(operand) == ret->map().end()) {
+          ret->map().insert(std::make_pair(
+              operand, zone()->New<PendingAssessment>(zone(), block, operand)));
+        }
+      }
+
+      // Any references stack slots that became stale in predecessors will be
+      // stale here.
+      ret->stale_ref_stack_slots().insert(
+          pred_assessments->stale_ref_stack_slots().begin(),
+          pred_assessments->stale_ref_stack_slots().end());
+    }
+  }
+  return ret;
+}
+
+void RegisterAllocatorVerifier::ValidatePendingAssessment(
+    RpoNumber block_id, InstructionOperand op,
+    const BlockAssessments* current_assessments,
+    PendingAssessment* const assessment, int virtual_register) {
+  if (assessment->IsAliasOf(virtual_register)) return;
+
+  // When validating a pending assessment, it is possible some of the
+  // assessments for the original operand (the one where the assessment was
+  // created for first) are also pending. To avoid recursion, we use a work
+  // list. To deal with cycles, we keep a set of seen nodes.
+  Zone local_zone(zone()->allocator(), ZONE_NAME);
+  ZoneQueue<std::pair<const PendingAssessment*, int>> worklist(&local_zone);
+  ZoneSet<RpoNumber> seen(&local_zone);
+  worklist.push(std::make_pair(assessment, virtual_register));
+  seen.insert(block_id);
+
+  while (!worklist.empty()) {
+    auto work = worklist.front();
+    const PendingAssessment* current_assessment = work.first;
+    int current_virtual_register = work.second;
+    InstructionOperand current_operand = current_assessment->operand();
+    worklist.pop();
+
+    const InstructionBlock* origin = current_assessment->origin();
+    CHECK(origin->PredecessorCount() > 1 || origin->phis().size() > 0);
+
+    // Check if the virtual register is a phi first, instead of relying on
+    // the incoming assessments. In particular, this handles the case
+    // v1 = phi v0 v0, which structurally is identical to v0 having been
+    // defined at the top of a diamond, and arriving at the node joining the
+    // diamond's branches.
+    const PhiInstruction* phi = nullptr;
+    for (const PhiInstruction* candidate : origin->phis()) {
+      if (candidate->virtual_register() == current_virtual_register) {
+        phi = candidate;
+        break;
+      }
+    }
+
+    int op_index = 0;
+    for (RpoNumber pred : origin->predecessors()) {
+      int expected =
+          phi != nullptr ? phi->operands()[op_index] : current_virtual_register;
+
+      ++op_index;
+      auto pred_assignment = assessments_.find(pred);
+      if (pred_assignment == assessments_.end()) {
+        CHECK(origin->IsLoopHeader());
+        auto todo_iter = outstanding_assessments_.find(pred);
+        DelayedAssessments* set = nullptr;
+        if (todo_iter == outstanding_assessments_.end()) {
+          set = zone()->New<DelayedAssessments>(zone());
+          outstanding_assessments_.insert(std::make_pair(pred, set));
+        } else {
+          set = todo_iter->second;
+        }
+        set->AddDelayedAssessment(current_operand, expected);
+        continue;
+      }
+
+      const BlockAssessments* pred_assessments = pred_assignment->second;
+      auto found_contribution = pred_assessments->map().find(current_operand);
+      CHECK(found_contribution != pred_assessments->map().end());
+      Assessment* contribution = found_contribution->second;
+
+      switch (contribution->kind()) {
+        case Final:
+          CHECK_EQ(FinalAssessment::cast(contribution)->virtual_register(),
+                   expected);
+          break;
+        case Pending: {
+          // This happens if we have a diamond feeding into another one, and
+          // the inner one never being used - other than for carrying the value.
+          const PendingAssessment* next = PendingAssessment::cast(contribution);
+          if (seen.find(pred) == seen.end()) {
+            worklist.push({next, expected});
+            seen.insert(pred);
+          }
+          // Note that we do not want to finalize pending assessments at the
+          // beginning of a block - which is the information we'd have
+          // available here. This is because this operand may be reused to
+          // define duplicate phis.
+          break;
+        }
+      }
+    }
+  }
+  assessment->AddAlias(virtual_register);
+}
+
+void RegisterAllocatorVerifier::ValidateUse(
+    RpoNumber block_id, BlockAssessments* current_assessments,
+    InstructionOperand op, int virtual_register) {
+  auto iterator = current_assessments->map().find(op);
+  // We should have seen this operand before.
+  CHECK(iterator != current_assessments->map().end());
+  Assessment* assessment = iterator->second;
+
+  // The operand shouldn't be a stale reference stack slot.
+  CHECK(!current_assessments->IsStaleReferenceStackSlot(op));
+
+  switch (assessment->kind()) {
+    case Final:
+      CHECK_EQ(FinalAssessment::cast(assessment)->virtual_register(),
+               virtual_register);
+      break;
+    case Pending: {
+      PendingAssessment* pending = PendingAssessment::cast(assessment);
+      ValidatePendingAssessment(block_id, op, current_assessments, pending,
+                                virtual_register);
+      break;
+    }
+  }
+}
+
+void RegisterAllocatorVerifier::VerifyGapMoves() {
+  CHECK(assessments_.empty());
+  CHECK(outstanding_assessments_.empty());
+  const size_t block_count = sequence()->instruction_blocks().size();
+  for (size_t block_index = 0; block_index < block_count; ++block_index) {
+    const InstructionBlock* block =
+        sequence()->instruction_blocks()[block_index];
+    BlockAssessments* block_assessments = CreateForBlock(block);
+
+    for (int instr_index = block->code_start(); instr_index < block->code_end();
+         ++instr_index) {
+      const InstructionConstraint& instr_constraint = constraints_[instr_index];
+      const Instruction* instr = instr_constraint.instruction_;
+      block_assessments->PerformMoves(instr);
+
+      const OperandConstraint* op_constraints =
+          instr_constraint.operand_constraints_;
+      size_t count = 0;
+      for (size_t i = 0; i < instr->InputCount(); ++i, ++count) {
+        if (op_constraints[count].type_ == kImmediate) {
+          continue;
+        }
+        int virtual_register = op_constraints[count].virtual_register_;
+        InstructionOperand op = *instr->InputAt(i);
+        ValidateUse(block->rpo_number(), block_assessments, op,
+                    virtual_register);
+      }
+      for (size_t i = 0; i < instr->TempCount(); ++i, ++count) {
+        block_assessments->Drop(*instr->TempAt(i));
+      }
+      if (instr->IsCall()) {
+        block_assessments->DropRegisters();
+      }
+      if (instr->HasReferenceMap()) {
+        block_assessments->CheckReferenceMap(instr->reference_map());
+      }
+      for (size_t i = 0; i < instr->OutputCount(); ++i, ++count) {
+        int virtual_register = op_constraints[count].virtual_register_;
+        block_assessments->AddDefinition(*instr->OutputAt(i), virtual_register);
+        if (op_constraints[count].type_ == kRegisterAndSlot) {
+          const AllocatedOperand* reg_op =
+              AllocatedOperand::cast(instr->OutputAt(i));
+          MachineRepresentation rep = reg_op->representation();
+          const AllocatedOperand* stack_op = AllocatedOperand::New(
+              zone(), LocationOperand::LocationKind::STACK_SLOT, rep,
+              op_constraints[i].spilled_slot_);
+          block_assessments->AddDefinition(*stack_op, virtual_register);
+        }
+      }
+    }
+    // Now commit the assessments for this block. If there are any delayed
+    // assessments, ValidatePendingAssessment should see this block, too.
+    assessments_[block->rpo_number()] = block_assessments;
+
+    auto todo_iter = outstanding_assessments_.find(block->rpo_number());
+    if (todo_iter == outstanding_assessments_.end()) continue;
+    DelayedAssessments* todo = todo_iter->second;
+    for (auto pair : todo->map()) {
+      InstructionOperand op = pair.first;
+      int vreg = pair.second;
+      auto found_op = block_assessments->map().find(op);
+      CHECK(found_op != block_assessments->map().end());
+      // This block is a jump back to the loop header, ensure that the op hasn't
+      // become a stale reference during the blocks in the loop.
+      CHECK(!block_assessments->IsStaleReferenceStackSlot(op));
+      switch (found_op->second->kind()) {
+        case Final:
+          CHECK_EQ(FinalAssessment::cast(found_op->second)->virtual_register(),
+                   vreg);
+          break;
+        case Pending:
+          ValidatePendingAssessment(block->rpo_number(), op, block_assessments,
+                                    PendingAssessment::cast(found_op->second),
+                                    vreg);
+          break;
+      }
+    }
+  }
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/register-allocator-verifier.h b/src/compiler/backend/register-allocator-verifier.h
new file mode 100644
index 0000000..11bd492
--- /dev/null
+++ b/src/compiler/backend/register-allocator-verifier.h
@@ -0,0 +1,304 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_REGISTER_ALLOCATOR_VERIFIER_H_
+#define V8_COMPILER_BACKEND_REGISTER_ALLOCATOR_VERIFIER_H_
+
+#include "src/compiler/backend/instruction.h"
+#include "src/zone/zone-containers.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+class InstructionBlock;
+class InstructionSequence;
+
+// The register allocator validator traverses instructions in the instruction
+// sequence, and verifies the correctness of machine operand substitutions of
+// virtual registers. It collects the virtual register instruction signatures
+// before register allocation. Then, after the register allocation pipeline
+// completes, it compares the operand substitutions against the pre-allocation
+// data.
+// At a high level, validation works as follows: we iterate through each block,
+// and, in a block, through each instruction; then:
+// - when an operand is the output of an instruction, we associate it to the
+// virtual register that the instruction sequence declares as its output. We
+// use the concept of "FinalAssessment" to model this.
+// - when an operand is used in an instruction, we check that the assessment
+// matches the expectation of the instruction
+// - moves simply copy the assessment over to the new operand
+// - blocks with more than one predecessor associate to each operand a "Pending"
+// assessment. The pending assessment remembers the operand and block where it
+// was created. Then, when the value is used (which may be as a different
+// operand, because of moves), we check that the virtual register at the use
+// site matches the definition of this pending operand: either the phi inputs
+// match, or, if it's not a phi, all the predecessors at the point the pending
+// assessment was defined have that operand assigned to the given virtual
+// register. If all checks out, we record in the assessment that the virtual
+// register is aliased by the specific operand.
+// If a block is a loop header - so one or more of its predecessors are it or
+// below - we still treat uses of operands as above, but we record which operand
+// assessments haven't been made yet, and what virtual register they must
+// correspond to, and verify that when we are done with the respective
+// predecessor blocks.
+// This way, the algorithm always makes a final decision about the operands
+// in an instruction, ensuring convergence.
+// Operand assessments are recorded per block, as the result at the exit from
+// the block. When moving to a new block, we copy assessments from its single
+// predecessor, or, if the block has multiple predecessors, the mechanism was
+// described already.
+
+enum AssessmentKind { Final, Pending };
+
+class Assessment : public ZoneObject {
+ public:
+  Assessment(const Assessment&) = delete;
+  Assessment& operator=(const Assessment&) = delete;
+
+  AssessmentKind kind() const { return kind_; }
+
+ protected:
+  explicit Assessment(AssessmentKind kind) : kind_(kind) {}
+  AssessmentKind kind_;
+};
+
+// PendingAssessments are associated to operands coming from the multiple
+// predecessors of a block. We only record the operand and the block, and
+// will determine if the way the operand is defined (from the predecessors)
+// matches a particular use. We allow more than one vreg association with
+// an operand - this handles scenarios where multiple phis are
+// defined with identical operands, and the move optimizer moved down the moves
+// separating the 2 phis in the block defining them.
+class PendingAssessment final : public Assessment {
+ public:
+  explicit PendingAssessment(Zone* zone, const InstructionBlock* origin,
+                             InstructionOperand operand)
+      : Assessment(Pending),
+        origin_(origin),
+        operand_(operand),
+        aliases_(zone) {}
+
+  PendingAssessment(const PendingAssessment&) = delete;
+  PendingAssessment& operator=(const PendingAssessment&) = delete;
+
+  static const PendingAssessment* cast(const Assessment* assessment) {
+    CHECK(assessment->kind() == Pending);
+    return static_cast<const PendingAssessment*>(assessment);
+  }
+
+  static PendingAssessment* cast(Assessment* assessment) {
+    CHECK(assessment->kind() == Pending);
+    return static_cast<PendingAssessment*>(assessment);
+  }
+
+  const InstructionBlock* origin() const { return origin_; }
+  InstructionOperand operand() const { return operand_; }
+  bool IsAliasOf(int vreg) const { return aliases_.count(vreg) > 0; }
+  void AddAlias(int vreg) { aliases_.insert(vreg); }
+
+ private:
+  const InstructionBlock* const origin_;
+  InstructionOperand operand_;
+  ZoneSet<int> aliases_;
+};
+
+// FinalAssessments are associated to operands that we know to be a certain
+// virtual register.
+class FinalAssessment final : public Assessment {
+ public:
+  explicit FinalAssessment(int virtual_register)
+      : Assessment(Final), virtual_register_(virtual_register) {}
+  FinalAssessment(const FinalAssessment&) = delete;
+  FinalAssessment& operator=(const FinalAssessment&) = delete;
+
+  int virtual_register() const { return virtual_register_; }
+  static const FinalAssessment* cast(const Assessment* assessment) {
+    CHECK(assessment->kind() == Final);
+    return static_cast<const FinalAssessment*>(assessment);
+  }
+
+ private:
+  int virtual_register_;
+};
+
+struct OperandAsKeyLess {
+  bool operator()(const InstructionOperand& a,
+                  const InstructionOperand& b) const {
+    return a.CompareCanonicalized(b);
+  }
+};
+
+// Assessments associated with a basic block.
+class BlockAssessments : public ZoneObject {
+ public:
+  using OperandMap = ZoneMap<InstructionOperand, Assessment*, OperandAsKeyLess>;
+  using OperandSet = ZoneSet<InstructionOperand, OperandAsKeyLess>;
+  explicit BlockAssessments(Zone* zone, int spill_slot_delta)
+      : map_(zone),
+        map_for_moves_(zone),
+        stale_ref_stack_slots_(zone),
+        spill_slot_delta_(spill_slot_delta),
+        zone_(zone) {}
+  BlockAssessments(const BlockAssessments&) = delete;
+  BlockAssessments& operator=(const BlockAssessments&) = delete;
+
+  void Drop(InstructionOperand operand) {
+    map_.erase(operand);
+    stale_ref_stack_slots_.erase(operand);
+  }
+  void DropRegisters();
+  void AddDefinition(InstructionOperand operand, int virtual_register) {
+    auto existent = map_.find(operand);
+    if (existent != map_.end()) {
+      // Drop the assignment
+      map_.erase(existent);
+      // Destination operand is no longer a stale reference.
+      stale_ref_stack_slots_.erase(operand);
+    }
+    map_.insert(
+        std::make_pair(operand, zone_->New<FinalAssessment>(virtual_register)));
+  }
+
+  void PerformMoves(const Instruction* instruction);
+  void PerformParallelMoves(const ParallelMove* moves);
+  void CopyFrom(const BlockAssessments* other) {
+    CHECK(map_.empty());
+    CHECK(stale_ref_stack_slots_.empty());
+    CHECK_NOT_NULL(other);
+    map_.insert(other->map_.begin(), other->map_.end());
+    stale_ref_stack_slots_.insert(other->stale_ref_stack_slots_.begin(),
+                                  other->stale_ref_stack_slots_.end());
+  }
+  void CheckReferenceMap(const ReferenceMap* reference_map);
+  bool IsStaleReferenceStackSlot(InstructionOperand op);
+
+  OperandMap& map() { return map_; }
+  const OperandMap& map() const { return map_; }
+
+  OperandSet& stale_ref_stack_slots() { return stale_ref_stack_slots_; }
+  const OperandSet& stale_ref_stack_slots() const {
+    return stale_ref_stack_slots_;
+  }
+
+  int spill_slot_delta() const { return spill_slot_delta_; }
+
+  void Print() const;
+
+ private:
+  OperandMap map_;
+  OperandMap map_for_moves_;
+  OperandSet stale_ref_stack_slots_;
+  int spill_slot_delta_;
+  Zone* zone_;
+};
+
+class RegisterAllocatorVerifier final : public ZoneObject {
+ public:
+  RegisterAllocatorVerifier(Zone* zone, const RegisterConfiguration* config,
+                            const InstructionSequence* sequence,
+                            const Frame* frame);
+  RegisterAllocatorVerifier(const RegisterAllocatorVerifier&) = delete;
+  RegisterAllocatorVerifier& operator=(const RegisterAllocatorVerifier&) =
+      delete;
+
+  void VerifyAssignment(const char* caller_info);
+  void VerifyGapMoves();
+
+ private:
+  enum ConstraintType {
+    kConstant,
+    kImmediate,
+    kRegister,
+    kFixedRegister,
+    kFPRegister,
+    kFixedFPRegister,
+    kSlot,
+    kFixedSlot,
+    kRegisterOrSlot,
+    kRegisterOrSlotFP,
+    kRegisterOrSlotOrConstant,
+    kSameAsFirst,
+    kRegisterAndSlot
+  };
+
+  struct OperandConstraint {
+    ConstraintType type_;
+    // Constant or immediate value, register code, slot index, or slot size
+    // when relevant.
+    int value_;
+    int spilled_slot_;
+    int virtual_register_;
+  };
+
+  struct InstructionConstraint {
+    const Instruction* instruction_;
+    size_t operand_constaints_size_;
+    OperandConstraint* operand_constraints_;
+  };
+
+  using Constraints = ZoneVector<InstructionConstraint>;
+
+  class DelayedAssessments : public ZoneObject {
+   public:
+    explicit DelayedAssessments(Zone* zone) : map_(zone) {}
+
+    const ZoneMap<InstructionOperand, int, OperandAsKeyLess>& map() const {
+      return map_;
+    }
+
+    void AddDelayedAssessment(InstructionOperand op, int vreg) {
+      auto it = map_.find(op);
+      if (it == map_.end()) {
+        map_.insert(std::make_pair(op, vreg));
+      } else {
+        CHECK_EQ(it->second, vreg);
+      }
+    }
+
+   private:
+    ZoneMap<InstructionOperand, int, OperandAsKeyLess> map_;
+  };
+
+  Zone* zone() const { return zone_; }
+  const RegisterConfiguration* config() { return config_; }
+  const InstructionSequence* sequence() const { return sequence_; }
+  Constraints* constraints() { return &constraints_; }
+  int spill_slot_delta() const { return spill_slot_delta_; }
+
+  static void VerifyInput(const OperandConstraint& constraint);
+  static void VerifyTemp(const OperandConstraint& constraint);
+  static void VerifyOutput(const OperandConstraint& constraint);
+
+  void BuildConstraint(const InstructionOperand* op,
+                       OperandConstraint* constraint);
+  void CheckConstraint(const InstructionOperand* op,
+                       const OperandConstraint* constraint);
+  BlockAssessments* CreateForBlock(const InstructionBlock* block);
+
+  // Prove that this operand is an alias of this virtual register in the given
+  // block. Update the assessment if that's the case.
+  void ValidatePendingAssessment(RpoNumber block_id, InstructionOperand op,
+                                 const BlockAssessments* current_assessments,
+                                 PendingAssessment* const assessment,
+                                 int virtual_register);
+  void ValidateUse(RpoNumber block_id, BlockAssessments* current_assessments,
+                   InstructionOperand op, int virtual_register);
+
+  Zone* const zone_;
+  const RegisterConfiguration* config_;
+  const InstructionSequence* const sequence_;
+  Constraints constraints_;
+  ZoneMap<RpoNumber, BlockAssessments*> assessments_;
+  ZoneMap<RpoNumber, DelayedAssessments*> outstanding_assessments_;
+  int spill_slot_delta_;
+  // TODO(chromium:725559): remove after we understand this bug's root cause.
+  const char* caller_info_ = nullptr;
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_REGISTER_ALLOCATOR_VERIFIER_H_
diff --git a/src/compiler/backend/register-allocator.cc b/src/compiler/backend/register-allocator.cc
new file mode 100644
index 0000000..c0905b9
--- /dev/null
+++ b/src/compiler/backend/register-allocator.cc
@@ -0,0 +1,5002 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/register-allocator.h"
+
+#include <iomanip>
+
+#include "src/base/iterator.h"
+#include "src/base/small-vector.h"
+#include "src/codegen/assembler-inl.h"
+#include "src/codegen/tick-counter.h"
+#include "src/compiler/backend/spill-placer.h"
+#include "src/compiler/linkage.h"
+#include "src/strings/string-stream.h"
+#include "src/utils/vector.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+#define TRACE_COND(cond, ...)      \
+  do {                             \
+    if (cond) PrintF(__VA_ARGS__); \
+  } while (false)
+
+#define TRACE(...) TRACE_COND(data()->is_trace_alloc(), __VA_ARGS__)
+
+namespace {
+
+static constexpr int kFloat32Bit =
+    RepresentationBit(MachineRepresentation::kFloat32);
+static constexpr int kSimd128Bit =
+    RepresentationBit(MachineRepresentation::kSimd128);
+
+
+const InstructionBlock* GetContainingLoop(const InstructionSequence* sequence,
+                                          const InstructionBlock* block) {
+  RpoNumber index = block->loop_header();
+  if (!index.IsValid()) return nullptr;
+  return sequence->InstructionBlockAt(index);
+}
+
+const InstructionBlock* GetInstructionBlock(const InstructionSequence* code,
+                                            LifetimePosition pos) {
+  return code->GetInstructionBlock(pos.ToInstructionIndex());
+}
+
+Instruction* GetLastInstruction(InstructionSequence* code,
+                                const InstructionBlock* block) {
+  return code->InstructionAt(block->last_instruction_index());
+}
+
+}  // namespace
+
+void LiveRangeBoundArray::Initialize(Zone* zone, TopLevelLiveRange* range) {
+  size_t max_child_count = range->GetMaxChildCount();
+
+  start_ = zone->NewArray<LiveRangeBound>(max_child_count);
+  length_ = 0;
+  LiveRangeBound* curr = start_;
+  // The primary loop in ResolveControlFlow is not responsible for inserting
+  // connecting moves for spilled ranges.
+  for (LiveRange* i = range; i != nullptr; i = i->next(), ++curr, ++length_) {
+    new (curr) LiveRangeBound(i, i->spilled());
+  }
+}
+
+LiveRangeBound* LiveRangeBoundArray::Find(
+    const LifetimePosition position) const {
+  size_t left_index = 0;
+  size_t right_index = length_;
+  while (true) {
+    size_t current_index = left_index + (right_index - left_index) / 2;
+    DCHECK(right_index > current_index);
+    LiveRangeBound* bound = &start_[current_index];
+    if (bound->start_ <= position) {
+      if (position < bound->end_) return bound;
+      DCHECK(left_index < current_index);
+      left_index = current_index;
+    } else {
+      right_index = current_index;
+    }
+  }
+}
+
+LiveRangeBound* LiveRangeBoundArray::FindPred(const InstructionBlock* pred) {
+  LifetimePosition pred_end = LifetimePosition::InstructionFromInstructionIndex(
+      pred->last_instruction_index());
+  return Find(pred_end);
+}
+
+LiveRangeBound* LiveRangeBoundArray::FindSucc(const InstructionBlock* succ) {
+  LifetimePosition succ_start = LifetimePosition::GapFromInstructionIndex(
+      succ->first_instruction_index());
+  return Find(succ_start);
+}
+
+bool LiveRangeBoundArray::FindConnectableSubranges(
+    const InstructionBlock* block, const InstructionBlock* pred,
+    FindResult* result) const {
+  LifetimePosition pred_end = LifetimePosition::InstructionFromInstructionIndex(
+      pred->last_instruction_index());
+  LiveRangeBound* bound = Find(pred_end);
+  result->pred_cover_ = bound->range_;
+  LifetimePosition cur_start = LifetimePosition::GapFromInstructionIndex(
+      block->first_instruction_index());
+
+  if (bound->CanCover(cur_start)) {
+    // Both blocks are covered by the same range, so there is nothing to
+    // connect.
+    return false;
+  }
+  bound = Find(cur_start);
+  if (bound->skip_) {
+    return false;
+  }
+  result->cur_cover_ = bound->range_;
+  DCHECK(result->pred_cover_ != nullptr && result->cur_cover_ != nullptr);
+  return (result->cur_cover_ != result->pred_cover_);
+}
+
+LiveRangeFinder::LiveRangeFinder(const TopTierRegisterAllocationData* data,
+                                 Zone* zone)
+    : data_(data),
+      bounds_length_(static_cast<int>(data_->live_ranges().size())),
+      bounds_(zone->NewArray<LiveRangeBoundArray>(bounds_length_)),
+      zone_(zone) {
+  for (int i = 0; i < bounds_length_; ++i) {
+    new (&bounds_[i]) LiveRangeBoundArray();
+  }
+}
+
+LiveRangeBoundArray* LiveRangeFinder::ArrayFor(int operand_index) {
+  DCHECK(operand_index < bounds_length_);
+  TopLevelLiveRange* range = data_->live_ranges()[operand_index];
+  DCHECK(range != nullptr && !range->IsEmpty());
+  LiveRangeBoundArray* array = &bounds_[operand_index];
+  if (array->ShouldInitialize()) {
+    array->Initialize(zone_, range);
+  }
+  return array;
+}
+
+using DelayedInsertionMapKey = std::pair<ParallelMove*, InstructionOperand>;
+
+struct DelayedInsertionMapCompare {
+  bool operator()(const DelayedInsertionMapKey& a,
+                  const DelayedInsertionMapKey& b) const {
+    if (a.first == b.first) {
+      return a.second.Compare(b.second);
+    }
+    return a.first < b.first;
+  }
+};
+
+using DelayedInsertionMap = ZoneMap<DelayedInsertionMapKey, InstructionOperand,
+                                    DelayedInsertionMapCompare>;
+
+UsePosition::UsePosition(LifetimePosition pos, InstructionOperand* operand,
+                         void* hint, UsePositionHintType hint_type)
+    : operand_(operand), hint_(hint), next_(nullptr), pos_(pos), flags_(0) {
+  DCHECK_IMPLIES(hint == nullptr, hint_type == UsePositionHintType::kNone);
+  bool register_beneficial = true;
+  UsePositionType type = UsePositionType::kRegisterOrSlot;
+  if (operand_ != nullptr && operand_->IsUnallocated()) {
+    const UnallocatedOperand* unalloc = UnallocatedOperand::cast(operand_);
+    if (unalloc->HasRegisterPolicy()) {
+      type = UsePositionType::kRequiresRegister;
+    } else if (unalloc->HasSlotPolicy()) {
+      type = UsePositionType::kRequiresSlot;
+      register_beneficial = false;
+    } else if (unalloc->HasRegisterOrSlotOrConstantPolicy()) {
+      type = UsePositionType::kRegisterOrSlotOrConstant;
+      register_beneficial = false;
+    } else {
+      register_beneficial = !unalloc->HasRegisterOrSlotPolicy();
+    }
+  }
+  flags_ = TypeField::encode(type) | HintTypeField::encode(hint_type) |
+           RegisterBeneficialField::encode(register_beneficial) |
+           AssignedRegisterField::encode(kUnassignedRegister);
+  DCHECK(pos_.IsValid());
+}
+
+bool UsePosition::HasHint() const {
+  int hint_register;
+  return HintRegister(&hint_register);
+}
+
+bool UsePosition::HintRegister(int* register_code) const {
+  if (hint_ == nullptr) return false;
+  switch (HintTypeField::decode(flags_)) {
+    case UsePositionHintType::kNone:
+    case UsePositionHintType::kUnresolved:
+      return false;
+    case UsePositionHintType::kUsePos: {
+      UsePosition* use_pos = reinterpret_cast<UsePosition*>(hint_);
+      int assigned_register = AssignedRegisterField::decode(use_pos->flags_);
+      if (assigned_register == kUnassignedRegister) return false;
+      *register_code = assigned_register;
+      return true;
+    }
+    case UsePositionHintType::kOperand: {
+      InstructionOperand* operand =
+          reinterpret_cast<InstructionOperand*>(hint_);
+      *register_code = LocationOperand::cast(operand)->register_code();
+      return true;
+    }
+    case UsePositionHintType::kPhi: {
+      TopTierRegisterAllocationData::PhiMapValue* phi =
+          reinterpret_cast<TopTierRegisterAllocationData::PhiMapValue*>(hint_);
+      int assigned_register = phi->assigned_register();
+      if (assigned_register == kUnassignedRegister) return false;
+      *register_code = assigned_register;
+      return true;
+    }
+  }
+  UNREACHABLE();
+}
+
+UsePositionHintType UsePosition::HintTypeForOperand(
+    const InstructionOperand& op) {
+  switch (op.kind()) {
+    case InstructionOperand::CONSTANT:
+    case InstructionOperand::IMMEDIATE:
+      return UsePositionHintType::kNone;
+    case InstructionOperand::UNALLOCATED:
+      return UsePositionHintType::kUnresolved;
+    case InstructionOperand::ALLOCATED:
+      if (op.IsRegister() || op.IsFPRegister()) {
+        return UsePositionHintType::kOperand;
+      } else {
+        DCHECK(op.IsStackSlot() || op.IsFPStackSlot());
+        return UsePositionHintType::kNone;
+      }
+    case InstructionOperand::PENDING:
+    case InstructionOperand::INVALID:
+      break;
+  }
+  UNREACHABLE();
+}
+
+void UsePosition::SetHint(UsePosition* use_pos) {
+  DCHECK_NOT_NULL(use_pos);
+  hint_ = use_pos;
+  flags_ = HintTypeField::update(flags_, UsePositionHintType::kUsePos);
+}
+
+void UsePosition::ResolveHint(UsePosition* use_pos) {
+  DCHECK_NOT_NULL(use_pos);
+  if (HintTypeField::decode(flags_) != UsePositionHintType::kUnresolved) return;
+  hint_ = use_pos;
+  flags_ = HintTypeField::update(flags_, UsePositionHintType::kUsePos);
+}
+
+void UsePosition::set_type(UsePositionType type, bool register_beneficial) {
+  DCHECK_IMPLIES(type == UsePositionType::kRequiresSlot, !register_beneficial);
+  DCHECK_EQ(kUnassignedRegister, AssignedRegisterField::decode(flags_));
+  flags_ = TypeField::encode(type) |
+           RegisterBeneficialField::encode(register_beneficial) |
+           HintTypeField::encode(HintTypeField::decode(flags_)) |
+           AssignedRegisterField::encode(kUnassignedRegister);
+}
+
+UseInterval* UseInterval::SplitAt(LifetimePosition pos, Zone* zone) {
+  DCHECK(Contains(pos) && pos != start());
+  UseInterval* after = zone->New<UseInterval>(pos, end_);
+  after->next_ = next_;
+  next_ = nullptr;
+  end_ = pos;
+  return after;
+}
+
+void LifetimePosition::Print() const { StdoutStream{} << *this << std::endl; }
+
+std::ostream& operator<<(std::ostream& os, const LifetimePosition pos) {
+  os << '@' << pos.ToInstructionIndex();
+  if (pos.IsGapPosition()) {
+    os << 'g';
+  } else {
+    os << 'i';
+  }
+  if (pos.IsStart()) {
+    os << 's';
+  } else {
+    os << 'e';
+  }
+  return os;
+}
+
+LiveRange::LiveRange(int relative_id, MachineRepresentation rep,
+                     TopLevelLiveRange* top_level)
+    : relative_id_(relative_id),
+      bits_(0),
+      last_interval_(nullptr),
+      first_interval_(nullptr),
+      first_pos_(nullptr),
+      top_level_(top_level),
+      next_(nullptr),
+      current_interval_(nullptr),
+      last_processed_use_(nullptr),
+      current_hint_position_(nullptr) {
+  DCHECK(AllocatedOperand::IsSupportedRepresentation(rep));
+  bits_ = AssignedRegisterField::encode(kUnassignedRegister) |
+          RepresentationField::encode(rep) |
+          ControlFlowRegisterHint::encode(kUnassignedRegister);
+}
+
+void LiveRange::VerifyPositions() const {
+  // Walk the positions, verifying that each is in an interval.
+  UseInterval* interval = first_interval_;
+  for (UsePosition* pos = first_pos_; pos != nullptr; pos = pos->next()) {
+    CHECK(Start() <= pos->pos());
+    CHECK(pos->pos() <= End());
+    CHECK_NOT_NULL(interval);
+    while (!interval->Contains(pos->pos()) && interval->end() != pos->pos()) {
+      interval = interval->next();
+      CHECK_NOT_NULL(interval);
+    }
+  }
+}
+
+void LiveRange::VerifyIntervals() const {
+  DCHECK(first_interval()->start() == Start());
+  LifetimePosition last_end = first_interval()->end();
+  for (UseInterval* interval = first_interval()->next(); interval != nullptr;
+       interval = interval->next()) {
+    DCHECK(last_end <= interval->start());
+    last_end = interval->end();
+  }
+  DCHECK(last_end == End());
+}
+
+void LiveRange::set_assigned_register(int reg) {
+  DCHECK(!HasRegisterAssigned() && !spilled());
+  bits_ = AssignedRegisterField::update(bits_, reg);
+}
+
+void LiveRange::UnsetAssignedRegister() {
+  DCHECK(HasRegisterAssigned() && !spilled());
+  bits_ = AssignedRegisterField::update(bits_, kUnassignedRegister);
+}
+
+void LiveRange::AttachToNext() {
+  DCHECK_NOT_NULL(next_);
+  DCHECK_NE(TopLevel()->last_child_covers_, next_);
+  last_interval_->set_next(next_->first_interval());
+  next_->first_interval_ = nullptr;
+  last_interval_ = next_->last_interval_;
+  next_->last_interval_ = nullptr;
+  if (first_pos() == nullptr) {
+    first_pos_ = next_->first_pos();
+  } else {
+    UsePosition* ptr = first_pos_;
+    while (ptr->next() != nullptr) {
+      ptr = ptr->next();
+    }
+    ptr->set_next(next_->first_pos());
+  }
+  next_->first_pos_ = nullptr;
+  LiveRange* old_next = next_;
+  next_ = next_->next_;
+  old_next->next_ = nullptr;
+}
+
+void LiveRange::Unspill() {
+  DCHECK(spilled());
+  set_spilled(false);
+  bits_ = AssignedRegisterField::update(bits_, kUnassignedRegister);
+}
+
+void LiveRange::Spill() {
+  DCHECK(!spilled());
+  DCHECK(!TopLevel()->HasNoSpillType());
+  set_spilled(true);
+  bits_ = AssignedRegisterField::update(bits_, kUnassignedRegister);
+}
+
+RegisterKind LiveRange::kind() const {
+  return IsFloatingPoint(representation()) ? RegisterKind::kDouble
+                                           : RegisterKind::kGeneral;
+}
+
+UsePosition* LiveRange::FirstHintPosition(int* register_index) {
+  if (!first_pos_) return nullptr;
+  if (current_hint_position_) {
+    if (current_hint_position_->pos() < first_pos_->pos()) {
+      current_hint_position_ = first_pos_;
+    }
+    if (current_hint_position_->pos() > End()) {
+      current_hint_position_ = nullptr;
+    }
+  }
+  bool needs_revisit = false;
+  UsePosition* pos = current_hint_position_;
+  for (; pos != nullptr; pos = pos->next()) {
+    if (pos->HintRegister(register_index)) {
+      break;
+    }
+    // Phi and use position hints can be assigned during allocation which
+    // would invalidate the cached hint position. Make sure we revisit them.
+    needs_revisit = needs_revisit ||
+                    pos->hint_type() == UsePositionHintType::kPhi ||
+                    pos->hint_type() == UsePositionHintType::kUsePos;
+  }
+  if (!needs_revisit) {
+    current_hint_position_ = pos;
+  }
+#ifdef DEBUG
+  UsePosition* pos_check = first_pos_;
+  for (; pos_check != nullptr; pos_check = pos_check->next()) {
+    if (pos_check->HasHint()) {
+      break;
+    }
+  }
+  CHECK_EQ(pos, pos_check);
+#endif
+  return pos;
+}
+
+UsePosition* LiveRange::NextUsePosition(LifetimePosition start) const {
+  UsePosition* use_pos = last_processed_use_;
+  if (use_pos == nullptr || use_pos->pos() > start) {
+    use_pos = first_pos();
+  }
+  while (use_pos != nullptr && use_pos->pos() < start) {
+    use_pos = use_pos->next();
+  }
+  last_processed_use_ = use_pos;
+  return use_pos;
+}
+
+UsePosition* LiveRange::NextUsePositionRegisterIsBeneficial(
+    LifetimePosition start) const {
+  UsePosition* pos = NextUsePosition(start);
+  while (pos != nullptr && !pos->RegisterIsBeneficial()) {
+    pos = pos->next();
+  }
+  return pos;
+}
+
+LifetimePosition LiveRange::NextLifetimePositionRegisterIsBeneficial(
+    const LifetimePosition& start) const {
+  UsePosition* next_use = NextUsePositionRegisterIsBeneficial(start);
+  if (next_use == nullptr) return End();
+  return next_use->pos();
+}
+
+UsePosition* LiveRange::PreviousUsePositionRegisterIsBeneficial(
+    LifetimePosition start) const {
+  UsePosition* pos = first_pos();
+  UsePosition* prev = nullptr;
+  while (pos != nullptr && pos->pos() < start) {
+    if (pos->RegisterIsBeneficial()) prev = pos;
+    pos = pos->next();
+  }
+  return prev;
+}
+
+UsePosition* LiveRange::NextUsePositionSpillDetrimental(
+    LifetimePosition start) const {
+  UsePosition* pos = NextUsePosition(start);
+  while (pos != nullptr && pos->type() != UsePositionType::kRequiresRegister &&
+         !pos->SpillDetrimental()) {
+    pos = pos->next();
+  }
+  return pos;
+}
+
+UsePosition* LiveRange::NextRegisterPosition(LifetimePosition start) const {
+  UsePosition* pos = NextUsePosition(start);
+  while (pos != nullptr && pos->type() != UsePositionType::kRequiresRegister) {
+    pos = pos->next();
+  }
+  return pos;
+}
+
+UsePosition* LiveRange::NextSlotPosition(LifetimePosition start) const {
+  for (UsePosition* pos = NextUsePosition(start); pos != nullptr;
+       pos = pos->next()) {
+    if (pos->type() != UsePositionType::kRequiresSlot) continue;
+    return pos;
+  }
+  return nullptr;
+}
+
+bool LiveRange::CanBeSpilled(LifetimePosition pos) const {
+  // We cannot spill a live range that has a use requiring a register
+  // at the current or the immediate next position.
+  UsePosition* use_pos = NextRegisterPosition(pos);
+  if (use_pos == nullptr) return true;
+  return use_pos->pos() > pos.NextStart().End();
+}
+
+bool LiveRange::IsTopLevel() const { return top_level_ == this; }
+
+InstructionOperand LiveRange::GetAssignedOperand() const {
+  DCHECK(!IsEmpty());
+  if (HasRegisterAssigned()) {
+    DCHECK(!spilled());
+    return AllocatedOperand(LocationOperand::REGISTER, representation(),
+                            assigned_register());
+  }
+  DCHECK(spilled());
+  DCHECK(!HasRegisterAssigned());
+  if (TopLevel()->HasSpillOperand()) {
+    InstructionOperand* op = TopLevel()->GetSpillOperand();
+    DCHECK(!op->IsUnallocated());
+    return *op;
+  }
+  return TopLevel()->GetSpillRangeOperand();
+}
+
+UseInterval* LiveRange::FirstSearchIntervalForPosition(
+    LifetimePosition position) const {
+  if (current_interval_ == nullptr) return first_interval_;
+  if (current_interval_->start() > position) {
+    current_interval_ = nullptr;
+    return first_interval_;
+  }
+  return current_interval_;
+}
+
+void LiveRange::AdvanceLastProcessedMarker(
+    UseInterval* to_start_of, LifetimePosition but_not_past) const {
+  if (to_start_of == nullptr) return;
+  if (to_start_of->start() > but_not_past) return;
+  LifetimePosition start = current_interval_ == nullptr
+                               ? LifetimePosition::Invalid()
+                               : current_interval_->start();
+  if (to_start_of->start() > start) {
+    current_interval_ = to_start_of;
+  }
+}
+
+LiveRange* LiveRange::SplitAt(LifetimePosition position, Zone* zone) {
+  int new_id = TopLevel()->GetNextChildId();
+  LiveRange* child = zone->New<LiveRange>(new_id, representation(), TopLevel());
+  child->set_bundle(bundle_);
+  // If we split, we do so because we're about to switch registers or move
+  // to/from a slot, so there's no value in connecting hints.
+  DetachAt(position, child, zone, DoNotConnectHints);
+
+  child->top_level_ = TopLevel();
+  child->next_ = next_;
+  next_ = child;
+  return child;
+}
+
+UsePosition* LiveRange::DetachAt(LifetimePosition position, LiveRange* result,
+                                 Zone* zone,
+                                 HintConnectionOption connect_hints) {
+  DCHECK(Start() < position);
+  DCHECK(End() > position);
+  DCHECK(result->IsEmpty());
+  // Find the last interval that ends before the position. If the
+  // position is contained in one of the intervals in the chain, we
+  // split that interval and use the first part.
+  UseInterval* current = FirstSearchIntervalForPosition(position);
+
+  // If the split position coincides with the beginning of a use interval
+  // we need to split use positons in a special way.
+  bool split_at_start = false;
+
+  if (current->start() == position) {
+    // When splitting at start we need to locate the previous use interval.
+    current = first_interval_;
+  }
+
+  UseInterval* after = nullptr;
+  while (current != nullptr) {
+    if (current->Contains(position)) {
+      after = current->SplitAt(position, zone);
+      break;
+    }
+    UseInterval* next = current->next();
+    if (next->start() >= position) {
+      split_at_start = (next->start() == position);
+      after = next;
+      current->set_next(nullptr);
+      break;
+    }
+    current = next;
+  }
+  DCHECK_NOT_NULL(after);
+
+  // Partition original use intervals to the two live ranges.
+  UseInterval* before = current;
+  result->last_interval_ =
+      (last_interval_ == before)
+          ? after            // Only interval in the range after split.
+          : last_interval_;  // Last interval of the original range.
+  result->first_interval_ = after;
+  last_interval_ = before;
+
+  // Find the last use position before the split and the first use
+  // position after it.
+  UsePosition* use_after = first_pos();
+  UsePosition* use_before = nullptr;
+  if (split_at_start) {
+    // The split position coincides with the beginning of a use interval (the
+    // end of a lifetime hole). Use at this position should be attributed to
+    // the split child because split child owns use interval covering it.
+    while (use_after != nullptr && use_after->pos() < position) {
+      use_before = use_after;
+      use_after = use_after->next();
+    }
+  } else {
+    while (use_after != nullptr && use_after->pos() <= position) {
+      use_before = use_after;
+      use_after = use_after->next();
+    }
+  }
+
+  // Partition original use positions to the two live ranges.
+  if (use_before != nullptr) {
+    use_before->set_next(nullptr);
+  } else {
+    first_pos_ = nullptr;
+  }
+  result->first_pos_ = use_after;
+  result->current_hint_position_ = current_hint_position_;
+
+  // Discard cached iteration state. It might be pointing
+  // to the use that no longer belongs to this live range.
+  last_processed_use_ = nullptr;
+  current_interval_ = nullptr;
+
+  if (connect_hints == ConnectHints && use_before != nullptr &&
+      use_after != nullptr) {
+    use_after->SetHint(use_before);
+    result->current_hint_position_ = use_after;
+  }
+#ifdef DEBUG
+  VerifyChildStructure();
+  result->VerifyChildStructure();
+#endif
+  return use_before;
+}
+
+void LiveRange::UpdateParentForAllChildren(TopLevelLiveRange* new_top_level) {
+  LiveRange* child = this;
+  for (; child != nullptr; child = child->next()) {
+    child->top_level_ = new_top_level;
+  }
+}
+
+void LiveRange::ConvertUsesToOperand(const InstructionOperand& op,
+                                     const InstructionOperand& spill_op) {
+  for (UsePosition* pos = first_pos(); pos != nullptr; pos = pos->next()) {
+    DCHECK(Start() <= pos->pos() && pos->pos() <= End());
+    if (!pos->HasOperand()) continue;
+    switch (pos->type()) {
+      case UsePositionType::kRequiresSlot:
+        DCHECK(spill_op.IsStackSlot() || spill_op.IsFPStackSlot());
+        InstructionOperand::ReplaceWith(pos->operand(), &spill_op);
+        break;
+      case UsePositionType::kRequiresRegister:
+        DCHECK(op.IsRegister() || op.IsFPRegister());
+        V8_FALLTHROUGH;
+      case UsePositionType::kRegisterOrSlot:
+      case UsePositionType::kRegisterOrSlotOrConstant:
+        InstructionOperand::ReplaceWith(pos->operand(), &op);
+        break;
+    }
+  }
+}
+
+// This implements an ordering on live ranges so that they are ordered by their
+// start positions.  This is needed for the correctness of the register
+// allocation algorithm.  If two live ranges start at the same offset then there
+// is a tie breaker based on where the value is first used.  This part of the
+// ordering is merely a heuristic.
+bool LiveRange::ShouldBeAllocatedBefore(const LiveRange* other) const {
+  LifetimePosition start = Start();
+  LifetimePosition other_start = other->Start();
+  if (start == other_start) {
+    // Prefer register that has a controlflow hint to make sure it gets
+    // allocated first. This allows the control flow aware alloction to
+    // just put ranges back into the queue without other ranges interfering.
+    if (controlflow_hint() < other->controlflow_hint()) {
+      return true;
+    }
+    // The other has a smaller hint.
+    if (controlflow_hint() > other->controlflow_hint()) {
+      return false;
+    }
+    // Both have the same hint or no hint at all. Use first use position.
+    UsePosition* pos = first_pos();
+    UsePosition* other_pos = other->first_pos();
+    // To make the order total, handle the case where both positions are null.
+    if (pos == other_pos) return TopLevel()->vreg() < other->TopLevel()->vreg();
+    if (pos == nullptr) return false;
+    if (other_pos == nullptr) return true;
+    // To make the order total, handle the case where both positions are equal.
+    if (pos->pos() == other_pos->pos())
+      return TopLevel()->vreg() < other->TopLevel()->vreg();
+    return pos->pos() < other_pos->pos();
+  }
+  return start < other_start;
+}
+
+void LiveRange::SetUseHints(int register_index) {
+  for (UsePosition* pos = first_pos(); pos != nullptr; pos = pos->next()) {
+    if (!pos->HasOperand()) continue;
+    switch (pos->type()) {
+      case UsePositionType::kRequiresSlot:
+        break;
+      case UsePositionType::kRequiresRegister:
+      case UsePositionType::kRegisterOrSlot:
+      case UsePositionType::kRegisterOrSlotOrConstant:
+        pos->set_assigned_register(register_index);
+        break;
+    }
+  }
+}
+
+bool LiveRange::CanCover(LifetimePosition position) const {
+  if (IsEmpty()) return false;
+  return Start() <= position && position < End();
+}
+
+bool LiveRange::Covers(LifetimePosition position) const {
+  if (!CanCover(position)) return false;
+  UseInterval* start_search = FirstSearchIntervalForPosition(position);
+  for (UseInterval* interval = start_search; interval != nullptr;
+       interval = interval->next()) {
+    DCHECK(interval->next() == nullptr ||
+           interval->next()->start() >= interval->start());
+    AdvanceLastProcessedMarker(interval, position);
+    if (interval->Contains(position)) return true;
+    if (interval->start() > position) return false;
+  }
+  return false;
+}
+
+LifetimePosition LiveRange::NextEndAfter(LifetimePosition position) const {
+  UseInterval* start_search = FirstSearchIntervalForPosition(position);
+  while (start_search->end() < position) {
+    start_search = start_search->next();
+  }
+  return start_search->end();
+}
+
+LifetimePosition LiveRange::NextStartAfter(LifetimePosition position) {
+  UseInterval* start_search = FirstSearchIntervalForPosition(position);
+  while (start_search->start() < position) {
+    start_search = start_search->next();
+  }
+  next_start_ = start_search->start();
+  return next_start_;
+}
+
+LifetimePosition LiveRange::FirstIntersection(LiveRange* other) const {
+  UseInterval* b = other->first_interval();
+  if (b == nullptr) return LifetimePosition::Invalid();
+  LifetimePosition advance_last_processed_up_to = b->start();
+  UseInterval* a = FirstSearchIntervalForPosition(b->start());
+  while (a != nullptr && b != nullptr) {
+    if (a->start() > other->End()) break;
+    if (b->start() > End()) break;
+    LifetimePosition cur_intersection = a->Intersect(b);
+    if (cur_intersection.IsValid()) {
+      return cur_intersection;
+    }
+    if (a->start() < b->start()) {
+      a = a->next();
+      if (a == nullptr || a->start() > other->End()) break;
+      AdvanceLastProcessedMarker(a, advance_last_processed_up_to);
+    } else {
+      b = b->next();
+    }
+  }
+  return LifetimePosition::Invalid();
+}
+
+void LiveRange::Print(const RegisterConfiguration* config,
+                      bool with_children) const {
+  StdoutStream os;
+  PrintableLiveRange wrapper;
+  wrapper.register_configuration_ = config;
+  for (const LiveRange* i = this; i != nullptr; i = i->next()) {
+    wrapper.range_ = i;
+    os << wrapper << std::endl;
+    if (!with_children) break;
+  }
+}
+
+void LiveRange::Print(bool with_children) const {
+  Print(RegisterConfiguration::Default(), with_children);
+}
+
+bool LiveRange::RegisterFromBundle(int* hint) const {
+  if (bundle_ == nullptr || bundle_->reg() == kUnassignedRegister) return false;
+  *hint = bundle_->reg();
+  return true;
+}
+
+void LiveRange::UpdateBundleRegister(int reg) const {
+  if (bundle_ == nullptr || bundle_->reg() != kUnassignedRegister) return;
+  bundle_->set_reg(reg);
+}
+
+struct TopLevelLiveRange::SpillMoveInsertionList : ZoneObject {
+  SpillMoveInsertionList(int gap_index, InstructionOperand* operand,
+                         SpillMoveInsertionList* next)
+      : gap_index(gap_index), operand(operand), next(next) {}
+  const int gap_index;
+  InstructionOperand* const operand;
+  SpillMoveInsertionList* next;
+};
+
+TopLevelLiveRange::TopLevelLiveRange(int vreg, MachineRepresentation rep)
+    : LiveRange(0, rep, this),
+      vreg_(vreg),
+      last_child_id_(0),
+      spill_operand_(nullptr),
+      spill_move_insertion_locations_(nullptr),
+      spilled_in_deferred_blocks_(false),
+      has_preassigned_slot_(false),
+      spill_start_index_(kMaxInt),
+      last_pos_(nullptr),
+      last_child_covers_(this) {
+  bits_ |= SpillTypeField::encode(SpillType::kNoSpillType);
+}
+
+void TopLevelLiveRange::RecordSpillLocation(Zone* zone, int gap_index,
+                                            InstructionOperand* operand) {
+  DCHECK(HasNoSpillType());
+  spill_move_insertion_locations_ = zone->New<SpillMoveInsertionList>(
+      gap_index, operand, spill_move_insertion_locations_);
+}
+
+void TopLevelLiveRange::CommitSpillMoves(TopTierRegisterAllocationData* data,
+                                         const InstructionOperand& op) {
+  DCHECK_IMPLIES(op.IsConstant(),
+                 GetSpillMoveInsertionLocations(data) == nullptr);
+
+  if (HasGeneralSpillRange()) {
+    SetLateSpillingSelected(false);
+  }
+
+  InstructionSequence* sequence = data->code();
+  Zone* zone = sequence->zone();
+
+  for (SpillMoveInsertionList* to_spill = GetSpillMoveInsertionLocations(data);
+       to_spill != nullptr; to_spill = to_spill->next) {
+    Instruction* instr = sequence->InstructionAt(to_spill->gap_index);
+    ParallelMove* move =
+        instr->GetOrCreateParallelMove(Instruction::START, zone);
+    move->AddMove(*to_spill->operand, op);
+    instr->block()->mark_needs_frame();
+  }
+}
+
+void TopLevelLiveRange::FilterSpillMoves(TopTierRegisterAllocationData* data,
+                                         const InstructionOperand& op) {
+  DCHECK_IMPLIES(op.IsConstant(),
+                 GetSpillMoveInsertionLocations(data) == nullptr);
+  bool might_be_duplicated = has_slot_use() || spilled();
+  InstructionSequence* sequence = data->code();
+
+  SpillMoveInsertionList* previous = nullptr;
+  for (SpillMoveInsertionList* to_spill = GetSpillMoveInsertionLocations(data);
+       to_spill != nullptr; previous = to_spill, to_spill = to_spill->next) {
+    Instruction* instr = sequence->InstructionAt(to_spill->gap_index);
+    ParallelMove* move = instr->GetParallelMove(Instruction::START);
+    // Skip insertion if it's possible that the move exists already as a
+    // constraint move from a fixed output register to a slot.
+    bool found = false;
+    if (move != nullptr && (might_be_duplicated || has_preassigned_slot())) {
+      for (MoveOperands* move_op : *move) {
+        if (move_op->IsEliminated()) continue;
+        if (move_op->source().Equals(*to_spill->operand) &&
+            move_op->destination().Equals(op)) {
+          found = true;
+          if (has_preassigned_slot()) move_op->Eliminate();
+          break;
+        }
+      }
+    }
+    if (found || has_preassigned_slot()) {
+      // Remove the item from the list.
+      if (previous == nullptr) {
+        spill_move_insertion_locations_ = to_spill->next;
+      } else {
+        previous->next = to_spill->next;
+      }
+      // Even though this location doesn't need a spill instruction, the
+      // block does require a frame.
+      instr->block()->mark_needs_frame();
+    }
+  }
+}
+
+void TopLevelLiveRange::SetSpillOperand(InstructionOperand* operand) {
+  DCHECK(HasNoSpillType());
+  DCHECK(!operand->IsUnallocated() && !operand->IsImmediate());
+  set_spill_type(SpillType::kSpillOperand);
+  spill_operand_ = operand;
+}
+
+void TopLevelLiveRange::SetSpillRange(SpillRange* spill_range) {
+  DCHECK(!HasSpillOperand());
+  DCHECK(spill_range);
+  spill_range_ = spill_range;
+}
+
+AllocatedOperand TopLevelLiveRange::GetSpillRangeOperand() const {
+  SpillRange* spill_range = GetSpillRange();
+  int index = spill_range->assigned_slot();
+  return AllocatedOperand(LocationOperand::STACK_SLOT, representation(), index);
+}
+
+void TopLevelLiveRange::VerifyChildrenInOrder() const {
+  LifetimePosition last_end = End();
+  for (const LiveRange* child = this->next(); child != nullptr;
+       child = child->next()) {
+    DCHECK(last_end <= child->Start());
+    last_end = child->End();
+  }
+}
+
+LiveRange* TopLevelLiveRange::GetChildCovers(LifetimePosition pos) {
+  LiveRange* child = last_child_covers_;
+  DCHECK_NE(child, nullptr);
+  if (pos < child->Start()) {
+    // Cached value has advanced too far; start from the top.
+    child = this;
+  }
+  LiveRange* previous_child = nullptr;
+  while (child != nullptr && child->End() <= pos) {
+    previous_child = child;
+    child = child->next();
+  }
+
+  // If we've walked past the end, cache the last child instead. This allows
+  // future calls that are also past the end to be fast, since they will know
+  // that there is no need to reset the search to the beginning.
+  last_child_covers_ = child == nullptr ? previous_child : child;
+
+  return !child || !child->Covers(pos) ? nullptr : child;
+}
+
+void TopLevelLiveRange::Verify() const {
+  VerifyChildrenInOrder();
+  for (const LiveRange* child = this; child != nullptr; child = child->next()) {
+    VerifyChildStructure();
+  }
+}
+
+void TopLevelLiveRange::ShortenTo(LifetimePosition start, bool trace_alloc) {
+  TRACE_COND(trace_alloc, "Shorten live range %d to [%d\n", vreg(),
+             start.value());
+  DCHECK_NOT_NULL(first_interval_);
+  DCHECK(first_interval_->start() <= start);
+  DCHECK(start < first_interval_->end());
+  first_interval_->set_start(start);
+}
+
+void TopLevelLiveRange::EnsureInterval(LifetimePosition start,
+                                       LifetimePosition end, Zone* zone,
+                                       bool trace_alloc) {
+  TRACE_COND(trace_alloc, "Ensure live range %d in interval [%d %d[\n", vreg(),
+             start.value(), end.value());
+  LifetimePosition new_end = end;
+  while (first_interval_ != nullptr && first_interval_->start() <= end) {
+    if (first_interval_->end() > end) {
+      new_end = first_interval_->end();
+    }
+    first_interval_ = first_interval_->next();
+  }
+
+  UseInterval* new_interval = zone->New<UseInterval>(start, new_end);
+  new_interval->set_next(first_interval_);
+  first_interval_ = new_interval;
+  if (new_interval->next() == nullptr) {
+    last_interval_ = new_interval;
+  }
+}
+
+void TopLevelLiveRange::AddUseInterval(LifetimePosition start,
+                                       LifetimePosition end, Zone* zone,
+                                       bool trace_alloc) {
+  TRACE_COND(trace_alloc, "Add to live range %d interval [%d %d[\n", vreg(),
+             start.value(), end.value());
+  if (first_interval_ == nullptr) {
+    UseInterval* interval = zone->New<UseInterval>(start, end);
+    first_interval_ = interval;
+    last_interval_ = interval;
+  } else {
+    if (end == first_interval_->start()) {
+      first_interval_->set_start(start);
+    } else if (end < first_interval_->start()) {
+      UseInterval* interval = zone->New<UseInterval>(start, end);
+      interval->set_next(first_interval_);
+      first_interval_ = interval;
+    } else {
+      // Order of instruction's processing (see ProcessInstructions) guarantees
+      // that each new use interval either precedes, intersects with or touches
+      // the last added interval.
+      DCHECK(start <= first_interval_->end());
+      first_interval_->set_start(std::min(start, first_interval_->start()));
+      first_interval_->set_end(std::max(end, first_interval_->end()));
+    }
+  }
+}
+
+void TopLevelLiveRange::AddUsePosition(UsePosition* use_pos, bool trace_alloc) {
+  LifetimePosition pos = use_pos->pos();
+  TRACE_COND(trace_alloc, "Add to live range %d use position %d\n", vreg(),
+             pos.value());
+  UsePosition* prev_hint = nullptr;
+  UsePosition* prev = nullptr;
+  UsePosition* current = first_pos_;
+  while (current != nullptr && current->pos() < pos) {
+    prev_hint = current->HasHint() ? current : prev_hint;
+    prev = current;
+    current = current->next();
+  }
+
+  if (prev == nullptr) {
+    use_pos->set_next(first_pos_);
+    first_pos_ = use_pos;
+  } else {
+    use_pos->set_next(prev->next());
+    prev->set_next(use_pos);
+  }
+
+  if (prev_hint == nullptr && use_pos->HasHint()) {
+    current_hint_position_ = use_pos;
+  }
+}
+
+static bool AreUseIntervalsIntersecting(UseInterval* interval1,
+                                        UseInterval* interval2) {
+  while (interval1 != nullptr && interval2 != nullptr) {
+    if (interval1->start() < interval2->start()) {
+      if (interval1->end() > interval2->start()) {
+        return true;
+      }
+      interval1 = interval1->next();
+    } else {
+      if (interval2->end() > interval1->start()) {
+        return true;
+      }
+      interval2 = interval2->next();
+    }
+  }
+  return false;
+}
+
+std::ostream& operator<<(std::ostream& os,
+                         const PrintableLiveRange& printable_range) {
+  const LiveRange* range = printable_range.range_;
+  os << "Range: " << range->TopLevel()->vreg() << ":" << range->relative_id()
+     << " ";
+  if (range->TopLevel()->is_phi()) os << "phi ";
+  if (range->TopLevel()->is_non_loop_phi()) os << "nlphi ";
+
+  os << "{" << std::endl;
+  UseInterval* interval = range->first_interval();
+  UsePosition* use_pos = range->first_pos();
+  while (use_pos != nullptr) {
+    if (use_pos->HasOperand()) {
+      os << *use_pos->operand() << use_pos->pos() << " ";
+    }
+    use_pos = use_pos->next();
+  }
+  os << std::endl;
+
+  while (interval != nullptr) {
+    os << '[' << interval->start() << ", " << interval->end() << ')'
+       << std::endl;
+    interval = interval->next();
+  }
+  os << "}";
+  return os;
+}
+
+namespace {
+void PrintBlockRow(std::ostream& os, const InstructionBlocks& blocks) {
+  os << "     ";
+  for (auto block : blocks) {
+    LifetimePosition start_pos = LifetimePosition::GapFromInstructionIndex(
+        block->first_instruction_index());
+    LifetimePosition end_pos = LifetimePosition::GapFromInstructionIndex(
+                                   block->last_instruction_index())
+                                   .NextFullStart();
+    int length = end_pos.value() - start_pos.value();
+    constexpr int kMaxPrefixLength = 32;
+    char buffer[kMaxPrefixLength];
+    int rpo_number = block->rpo_number().ToInt();
+    const char* deferred_marker = block->IsDeferred() ? "(deferred)" : "";
+    int max_prefix_length = std::min(length, kMaxPrefixLength);
+    int prefix = snprintf(buffer, max_prefix_length, "[-B%d-%s", rpo_number,
+                          deferred_marker);
+    os << buffer;
+    int remaining = length - std::min(prefix, max_prefix_length) - 1;
+    for (int i = 0; i < remaining; ++i) os << '-';
+    os << ']';
+  }
+  os << '\n';
+}
+}  // namespace
+
+void LinearScanAllocator::PrintRangeRow(std::ostream& os,
+                                        const TopLevelLiveRange* toplevel) {
+  int position = 0;
+  os << std::setw(3) << toplevel->vreg() << ": ";
+
+  const char* kind_string;
+  switch (toplevel->spill_type()) {
+    case TopLevelLiveRange::SpillType::kSpillRange:
+      kind_string = "ss";
+      break;
+    case TopLevelLiveRange::SpillType::kDeferredSpillRange:
+      kind_string = "sd";
+      break;
+    case TopLevelLiveRange::SpillType::kSpillOperand:
+      kind_string = "so";
+      break;
+    default:
+      kind_string = "s?";
+  }
+
+  for (const LiveRange* range = toplevel; range != nullptr;
+       range = range->next()) {
+    for (UseInterval* interval = range->first_interval(); interval != nullptr;
+         interval = interval->next()) {
+      LifetimePosition start = interval->start();
+      LifetimePosition end = interval->end();
+      CHECK_GE(start.value(), position);
+      for (; start.value() > position; position++) {
+        os << ' ';
+      }
+      int length = end.value() - start.value();
+      constexpr int kMaxPrefixLength = 32;
+      char buffer[kMaxPrefixLength];
+      int max_prefix_length = std::min(length + 1, kMaxPrefixLength);
+      int prefix;
+      if (range->spilled()) {
+        prefix = snprintf(buffer, max_prefix_length, "|%s", kind_string);
+      } else {
+        prefix = snprintf(buffer, max_prefix_length, "|%s",
+                          RegisterName(range->assigned_register()));
+      }
+      os << buffer;
+      position += std::min(prefix, max_prefix_length - 1);
+      CHECK_GE(end.value(), position);
+      const char line_style = range->spilled() ? '-' : '=';
+      for (; end.value() > position; position++) {
+        os << line_style;
+      }
+    }
+  }
+  os << '\n';
+}
+
+void LinearScanAllocator::PrintRangeOverview(std::ostream& os) {
+  PrintBlockRow(os, code()->instruction_blocks());
+  for (auto const toplevel : data()->fixed_live_ranges()) {
+    if (toplevel == nullptr) continue;
+    PrintRangeRow(os, toplevel);
+  }
+  int rowcount = 0;
+  for (auto toplevel : data()->live_ranges()) {
+    if (!CanProcessRange(toplevel)) continue;
+    if (rowcount++ % 10 == 0) PrintBlockRow(os, code()->instruction_blocks());
+    PrintRangeRow(os, toplevel);
+  }
+}
+
+SpillRange::SpillRange(TopLevelLiveRange* parent, Zone* zone)
+    : live_ranges_(zone),
+      assigned_slot_(kUnassignedSlot),
+      byte_width_(ByteWidthForStackSlot(parent->representation())) {
+  // Spill ranges are created for top level. This is so that, when merging
+  // decisions are made, we consider the full extent of the virtual register,
+  // and avoid clobbering it.
+  UseInterval* result = nullptr;
+  UseInterval* node = nullptr;
+  // Copy the intervals for all ranges.
+  for (LiveRange* range = parent; range != nullptr; range = range->next()) {
+    UseInterval* src = range->first_interval();
+    while (src != nullptr) {
+      UseInterval* new_node = zone->New<UseInterval>(src->start(), src->end());
+      if (result == nullptr) {
+        result = new_node;
+      } else {
+        node->set_next(new_node);
+      }
+      node = new_node;
+      src = src->next();
+    }
+  }
+  use_interval_ = result;
+  live_ranges().push_back(parent);
+  end_position_ = node->end();
+  parent->SetSpillRange(this);
+}
+
+bool SpillRange::IsIntersectingWith(SpillRange* other) const {
+  if (this->use_interval_ == nullptr || other->use_interval_ == nullptr ||
+      this->End() <= other->use_interval_->start() ||
+      other->End() <= this->use_interval_->start()) {
+    return false;
+  }
+  return AreUseIntervalsIntersecting(use_interval_, other->use_interval_);
+}
+
+bool SpillRange::TryMerge(SpillRange* other) {
+  if (HasSlot() || other->HasSlot()) return false;
+  if (byte_width() != other->byte_width() || IsIntersectingWith(other))
+    return false;
+
+  LifetimePosition max = LifetimePosition::MaxPosition();
+  if (End() < other->End() && other->End() != max) {
+    end_position_ = other->End();
+  }
+  other->end_position_ = max;
+
+  MergeDisjointIntervals(other->use_interval_);
+  other->use_interval_ = nullptr;
+
+  for (TopLevelLiveRange* range : other->live_ranges()) {
+    DCHECK(range->GetSpillRange() == other);
+    range->SetSpillRange(this);
+  }
+
+  live_ranges().insert(live_ranges().end(), other->live_ranges().begin(),
+                       other->live_ranges().end());
+  other->live_ranges().clear();
+
+  return true;
+}
+
+void SpillRange::MergeDisjointIntervals(UseInterval* other) {
+  UseInterval* tail = nullptr;
+  UseInterval* current = use_interval_;
+  while (other != nullptr) {
+    // Make sure the 'current' list starts first
+    if (current == nullptr || current->start() > other->start()) {
+      std::swap(current, other);
+    }
+    // Check disjointness
+    DCHECK(other == nullptr || current->end() <= other->start());
+    // Append the 'current' node to the result accumulator and move forward
+    if (tail == nullptr) {
+      use_interval_ = current;
+    } else {
+      tail->set_next(current);
+    }
+    tail = current;
+    current = current->next();
+  }
+  // Other list is empty => we are done
+}
+
+void SpillRange::Print() const {
+  StdoutStream os;
+  os << "{" << std::endl;
+  for (TopLevelLiveRange* range : live_ranges()) {
+    os << range->vreg() << " ";
+  }
+  os << std::endl;
+
+  for (UseInterval* i = interval(); i != nullptr; i = i->next()) {
+    os << '[' << i->start() << ", " << i->end() << ')' << std::endl;
+  }
+  os << "}" << std::endl;
+}
+
+TopTierRegisterAllocationData::PhiMapValue::PhiMapValue(
+    PhiInstruction* phi, const InstructionBlock* block, Zone* zone)
+    : phi_(phi),
+      block_(block),
+      incoming_operands_(zone),
+      assigned_register_(kUnassignedRegister) {
+  incoming_operands_.reserve(phi->operands().size());
+}
+
+void TopTierRegisterAllocationData::PhiMapValue::AddOperand(
+    InstructionOperand* operand) {
+  incoming_operands_.push_back(operand);
+}
+
+void TopTierRegisterAllocationData::PhiMapValue::CommitAssignment(
+    const InstructionOperand& assigned) {
+  for (InstructionOperand* operand : incoming_operands_) {
+    InstructionOperand::ReplaceWith(operand, &assigned);
+  }
+}
+
+TopTierRegisterAllocationData::TopTierRegisterAllocationData(
+    const RegisterConfiguration* config, Zone* zone, Frame* frame,
+    InstructionSequence* code, RegisterAllocationFlags flags,
+    TickCounter* tick_counter, const char* debug_name)
+    : RegisterAllocationData(Type::kTopTier),
+      allocation_zone_(zone),
+      frame_(frame),
+      code_(code),
+      debug_name_(debug_name),
+      config_(config),
+      phi_map_(allocation_zone()),
+      live_in_sets_(code->InstructionBlockCount(), nullptr, allocation_zone()),
+      live_out_sets_(code->InstructionBlockCount(), nullptr, allocation_zone()),
+      live_ranges_(code->VirtualRegisterCount() * 2, nullptr,
+                   allocation_zone()),
+      fixed_live_ranges_(kNumberOfFixedRangesPerRegister *
+                             this->config()->num_general_registers(),
+                         nullptr, allocation_zone()),
+      fixed_float_live_ranges_(allocation_zone()),
+      fixed_double_live_ranges_(kNumberOfFixedRangesPerRegister *
+                                    this->config()->num_double_registers(),
+                                nullptr, allocation_zone()),
+      fixed_simd128_live_ranges_(allocation_zone()),
+      spill_ranges_(code->VirtualRegisterCount(), nullptr, allocation_zone()),
+      delayed_references_(allocation_zone()),
+      assigned_registers_(nullptr),
+      assigned_double_registers_(nullptr),
+      virtual_register_count_(code->VirtualRegisterCount()),
+      preassigned_slot_ranges_(zone),
+      spill_state_(code->InstructionBlockCount(), ZoneVector<LiveRange*>(zone),
+                   zone),
+      flags_(flags),
+      tick_counter_(tick_counter) {
+  if (!kSimpleFPAliasing) {
+    fixed_float_live_ranges_.resize(
+        kNumberOfFixedRangesPerRegister * this->config()->num_float_registers(),
+        nullptr);
+    fixed_simd128_live_ranges_.resize(
+        kNumberOfFixedRangesPerRegister *
+            this->config()->num_simd128_registers(),
+        nullptr);
+  }
+
+  assigned_registers_ = code_zone()->New<BitVector>(
+      this->config()->num_general_registers(), code_zone());
+  assigned_double_registers_ = code_zone()->New<BitVector>(
+      this->config()->num_double_registers(), code_zone());
+  fixed_register_use_ = code_zone()->New<BitVector>(
+      this->config()->num_general_registers(), code_zone());
+  fixed_fp_register_use_ = code_zone()->New<BitVector>(
+      this->config()->num_double_registers(), code_zone());
+
+  this->frame()->SetAllocatedRegisters(assigned_registers_);
+  this->frame()->SetAllocatedDoubleRegisters(assigned_double_registers_);
+}
+
+MoveOperands* TopTierRegisterAllocationData::AddGapMove(
+    int index, Instruction::GapPosition position,
+    const InstructionOperand& from, const InstructionOperand& to) {
+  Instruction* instr = code()->InstructionAt(index);
+  ParallelMove* moves = instr->GetOrCreateParallelMove(position, code_zone());
+  return moves->AddMove(from, to);
+}
+
+MachineRepresentation TopTierRegisterAllocationData::RepresentationFor(
+    int virtual_register) {
+  DCHECK_LT(virtual_register, code()->VirtualRegisterCount());
+  return code()->GetRepresentation(virtual_register);
+}
+
+TopLevelLiveRange* TopTierRegisterAllocationData::GetOrCreateLiveRangeFor(
+    int index) {
+  if (index >= static_cast<int>(live_ranges().size())) {
+    live_ranges().resize(index + 1, nullptr);
+  }
+  TopLevelLiveRange* result = live_ranges()[index];
+  if (result == nullptr) {
+    result = NewLiveRange(index, RepresentationFor(index));
+    live_ranges()[index] = result;
+  }
+  return result;
+}
+
+TopLevelLiveRange* TopTierRegisterAllocationData::NewLiveRange(
+    int index, MachineRepresentation rep) {
+  return allocation_zone()->New<TopLevelLiveRange>(index, rep);
+}
+
+int TopTierRegisterAllocationData::GetNextLiveRangeId() {
+  int vreg = virtual_register_count_++;
+  if (vreg >= static_cast<int>(live_ranges().size())) {
+    live_ranges().resize(vreg + 1, nullptr);
+  }
+  return vreg;
+}
+
+TopLevelLiveRange* TopTierRegisterAllocationData::NextLiveRange(
+    MachineRepresentation rep) {
+  int vreg = GetNextLiveRangeId();
+  TopLevelLiveRange* ret = NewLiveRange(vreg, rep);
+  return ret;
+}
+
+TopTierRegisterAllocationData::PhiMapValue*
+TopTierRegisterAllocationData::InitializePhiMap(const InstructionBlock* block,
+                                                PhiInstruction* phi) {
+  TopTierRegisterAllocationData::PhiMapValue* map_value =
+      allocation_zone()->New<TopTierRegisterAllocationData::PhiMapValue>(
+          phi, block, allocation_zone());
+  auto res =
+      phi_map_.insert(std::make_pair(phi->virtual_register(), map_value));
+  DCHECK(res.second);
+  USE(res);
+  return map_value;
+}
+
+TopTierRegisterAllocationData::PhiMapValue*
+TopTierRegisterAllocationData::GetPhiMapValueFor(int virtual_register) {
+  auto it = phi_map_.find(virtual_register);
+  DCHECK(it != phi_map_.end());
+  return it->second;
+}
+
+TopTierRegisterAllocationData::PhiMapValue*
+TopTierRegisterAllocationData::GetPhiMapValueFor(TopLevelLiveRange* top_range) {
+  return GetPhiMapValueFor(top_range->vreg());
+}
+
+bool TopTierRegisterAllocationData::ExistsUseWithoutDefinition() {
+  bool found = false;
+  BitVector::Iterator iterator(live_in_sets()[0]);
+  while (!iterator.Done()) {
+    found = true;
+    int operand_index = iterator.Current();
+    PrintF("Register allocator error: live v%d reached first block.\n",
+           operand_index);
+    LiveRange* range = GetOrCreateLiveRangeFor(operand_index);
+    PrintF("  (first use is at %d)\n", range->first_pos()->pos().value());
+    if (debug_name() == nullptr) {
+      PrintF("\n");
+    } else {
+      PrintF("  (function: %s)\n", debug_name());
+    }
+    iterator.Advance();
+  }
+  return found;
+}
+
+// If a range is defined in a deferred block, we can expect all the range
+// to only cover positions in deferred blocks. Otherwise, a block on the
+// hot path would be dominated by a deferred block, meaning it is unreachable
+// without passing through the deferred block, which is contradictory.
+// In particular, when such a range contributes a result back on the hot
+// path, it will be as one of the inputs of a phi. In that case, the value
+// will be transferred via a move in the Gap::END's of the last instruction
+// of a deferred block.
+bool TopTierRegisterAllocationData::RangesDefinedInDeferredStayInDeferred() {
+  const size_t live_ranges_size = live_ranges().size();
+  for (const TopLevelLiveRange* range : live_ranges()) {
+    CHECK_EQ(live_ranges_size,
+             live_ranges().size());  // TODO(neis): crbug.com/831822
+    if (range == nullptr || range->IsEmpty() ||
+        !code()
+             ->GetInstructionBlock(range->Start().ToInstructionIndex())
+             ->IsDeferred()) {
+      continue;
+    }
+    for (const UseInterval* i = range->first_interval(); i != nullptr;
+         i = i->next()) {
+      int first = i->FirstGapIndex();
+      int last = i->LastGapIndex();
+      for (int instr = first; instr <= last;) {
+        const InstructionBlock* block = code()->GetInstructionBlock(instr);
+        if (!block->IsDeferred()) return false;
+        instr = block->last_instruction_index() + 1;
+      }
+    }
+  }
+  return true;
+}
+
+SpillRange* TopTierRegisterAllocationData::AssignSpillRangeToLiveRange(
+    TopLevelLiveRange* range, SpillMode spill_mode) {
+  using SpillType = TopLevelLiveRange::SpillType;
+  DCHECK(!range->HasSpillOperand());
+
+  SpillRange* spill_range = range->GetAllocatedSpillRange();
+  if (spill_range == nullptr) {
+    spill_range = allocation_zone()->New<SpillRange>(range, allocation_zone());
+  }
+  if (spill_mode == SpillMode::kSpillDeferred &&
+      (range->spill_type() != SpillType::kSpillRange)) {
+    range->set_spill_type(SpillType::kDeferredSpillRange);
+  } else {
+    range->set_spill_type(SpillType::kSpillRange);
+  }
+
+  spill_ranges()[range->vreg()] = spill_range;
+  return spill_range;
+}
+
+void TopTierRegisterAllocationData::MarkFixedUse(MachineRepresentation rep,
+                                                 int index) {
+  switch (rep) {
+    case MachineRepresentation::kFloat32:
+    case MachineRepresentation::kSimd128:
+      if (kSimpleFPAliasing) {
+        fixed_fp_register_use_->Add(index);
+      } else {
+        int alias_base_index = -1;
+        int aliases = config()->GetAliases(
+            rep, index, MachineRepresentation::kFloat64, &alias_base_index);
+        DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
+        while (aliases--) {
+          int aliased_reg = alias_base_index + aliases;
+          fixed_fp_register_use_->Add(aliased_reg);
+        }
+      }
+      break;
+    case MachineRepresentation::kFloat64:
+      fixed_fp_register_use_->Add(index);
+      break;
+    default:
+      DCHECK(!IsFloatingPoint(rep));
+      fixed_register_use_->Add(index);
+      break;
+  }
+}
+
+bool TopTierRegisterAllocationData::HasFixedUse(MachineRepresentation rep,
+                                                int index) {
+  switch (rep) {
+    case MachineRepresentation::kFloat32:
+    case MachineRepresentation::kSimd128:
+      if (kSimpleFPAliasing) {
+        return fixed_fp_register_use_->Contains(index);
+      } else {
+        int alias_base_index = -1;
+        int aliases = config()->GetAliases(
+            rep, index, MachineRepresentation::kFloat64, &alias_base_index);
+        DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
+        bool result = false;
+        while (aliases-- && !result) {
+          int aliased_reg = alias_base_index + aliases;
+          result |= fixed_fp_register_use_->Contains(aliased_reg);
+        }
+        return result;
+      }
+      break;
+    case MachineRepresentation::kFloat64:
+      return fixed_fp_register_use_->Contains(index);
+      break;
+    default:
+      DCHECK(!IsFloatingPoint(rep));
+      return fixed_register_use_->Contains(index);
+      break;
+  }
+}
+
+void TopTierRegisterAllocationData::MarkAllocated(MachineRepresentation rep,
+                                                  int index) {
+  switch (rep) {
+    case MachineRepresentation::kFloat32:
+    case MachineRepresentation::kSimd128:
+      if (kSimpleFPAliasing) {
+        assigned_double_registers_->Add(index);
+      } else {
+        int alias_base_index = -1;
+        int aliases = config()->GetAliases(
+            rep, index, MachineRepresentation::kFloat64, &alias_base_index);
+        DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
+        while (aliases--) {
+          int aliased_reg = alias_base_index + aliases;
+          assigned_double_registers_->Add(aliased_reg);
+        }
+      }
+      break;
+    case MachineRepresentation::kFloat64:
+      assigned_double_registers_->Add(index);
+      break;
+    default:
+      DCHECK(!IsFloatingPoint(rep));
+      assigned_registers_->Add(index);
+      break;
+  }
+}
+
+bool TopTierRegisterAllocationData::IsBlockBoundary(
+    LifetimePosition pos) const {
+  return pos.IsFullStart() &&
+         (static_cast<size_t>(pos.ToInstructionIndex()) ==
+              code()->instructions().size() ||
+          code()->GetInstructionBlock(pos.ToInstructionIndex())->code_start() ==
+              pos.ToInstructionIndex());
+}
+
+ConstraintBuilder::ConstraintBuilder(TopTierRegisterAllocationData* data)
+    : data_(data) {}
+
+InstructionOperand* ConstraintBuilder::AllocateFixed(
+    UnallocatedOperand* operand, int pos, bool is_tagged, bool is_input) {
+  TRACE("Allocating fixed reg for op %d\n", operand->virtual_register());
+  DCHECK(operand->HasFixedPolicy());
+  InstructionOperand allocated;
+  MachineRepresentation rep = InstructionSequence::DefaultRepresentation();
+  int virtual_register = operand->virtual_register();
+  if (virtual_register != InstructionOperand::kInvalidVirtualRegister) {
+    rep = data()->RepresentationFor(virtual_register);
+  }
+  if (operand->HasFixedSlotPolicy()) {
+    allocated = AllocatedOperand(AllocatedOperand::STACK_SLOT, rep,
+                                 operand->fixed_slot_index());
+  } else if (operand->HasFixedRegisterPolicy()) {
+    DCHECK(!IsFloatingPoint(rep));
+    DCHECK(data()->config()->IsAllocatableGeneralCode(
+        operand->fixed_register_index()));
+    allocated = AllocatedOperand(AllocatedOperand::REGISTER, rep,
+                                 operand->fixed_register_index());
+  } else if (operand->HasFixedFPRegisterPolicy()) {
+    DCHECK(IsFloatingPoint(rep));
+    DCHECK_NE(InstructionOperand::kInvalidVirtualRegister, virtual_register);
+    allocated = AllocatedOperand(AllocatedOperand::REGISTER, rep,
+                                 operand->fixed_register_index());
+  } else {
+    UNREACHABLE();
+  }
+  if (is_input && allocated.IsAnyRegister()) {
+    data()->MarkFixedUse(rep, operand->fixed_register_index());
+  }
+  InstructionOperand::ReplaceWith(operand, &allocated);
+  if (is_tagged) {
+    TRACE("Fixed reg is tagged at %d\n", pos);
+    Instruction* instr = code()->InstructionAt(pos);
+    if (instr->HasReferenceMap()) {
+      instr->reference_map()->RecordReference(*AllocatedOperand::cast(operand));
+    }
+  }
+  return operand;
+}
+
+void ConstraintBuilder::MeetRegisterConstraints() {
+  for (InstructionBlock* block : code()->instruction_blocks()) {
+    data_->tick_counter()->TickAndMaybeEnterSafepoint();
+    MeetRegisterConstraints(block);
+  }
+}
+
+void ConstraintBuilder::MeetRegisterConstraints(const InstructionBlock* block) {
+  int start = block->first_instruction_index();
+  int end = block->last_instruction_index();
+  DCHECK_NE(-1, start);
+  for (int i = start; i <= end; ++i) {
+    MeetConstraintsBefore(i);
+    if (i != end) MeetConstraintsAfter(i);
+  }
+  // Meet register constraints for the instruction in the end.
+  MeetRegisterConstraintsForLastInstructionInBlock(block);
+}
+
+void ConstraintBuilder::MeetRegisterConstraintsForLastInstructionInBlock(
+    const InstructionBlock* block) {
+  int end = block->last_instruction_index();
+  Instruction* last_instruction = code()->InstructionAt(end);
+  for (size_t i = 0; i < last_instruction->OutputCount(); i++) {
+    InstructionOperand* output_operand = last_instruction->OutputAt(i);
+    DCHECK(!output_operand->IsConstant());
+    UnallocatedOperand* output = UnallocatedOperand::cast(output_operand);
+    int output_vreg = output->virtual_register();
+    TopLevelLiveRange* range = data()->GetOrCreateLiveRangeFor(output_vreg);
+    bool assigned = false;
+    if (output->HasFixedPolicy()) {
+      AllocateFixed(output, -1, false, false);
+      // This value is produced on the stack, we never need to spill it.
+      if (output->IsStackSlot()) {
+        DCHECK(LocationOperand::cast(output)->index() <
+               data()->frame()->GetSpillSlotCount());
+        range->SetSpillOperand(LocationOperand::cast(output));
+        range->SetSpillStartIndex(end);
+        assigned = true;
+      }
+
+      for (const RpoNumber& succ : block->successors()) {
+        const InstructionBlock* successor = code()->InstructionBlockAt(succ);
+        DCHECK_EQ(1, successor->PredecessorCount());
+        int gap_index = successor->first_instruction_index();
+        // Create an unconstrained operand for the same virtual register
+        // and insert a gap move from the fixed output to the operand.
+        UnallocatedOperand output_copy(UnallocatedOperand::REGISTER_OR_SLOT,
+                                       output_vreg);
+        data()->AddGapMove(gap_index, Instruction::START, *output, output_copy);
+      }
+    }
+
+    if (!assigned) {
+      for (const RpoNumber& succ : block->successors()) {
+        const InstructionBlock* successor = code()->InstructionBlockAt(succ);
+        DCHECK_EQ(1, successor->PredecessorCount());
+        int gap_index = successor->first_instruction_index();
+        range->RecordSpillLocation(allocation_zone(), gap_index, output);
+        range->SetSpillStartIndex(gap_index);
+      }
+    }
+  }
+}
+
+void ConstraintBuilder::MeetConstraintsAfter(int instr_index) {
+  Instruction* first = code()->InstructionAt(instr_index);
+  // Handle fixed temporaries.
+  for (size_t i = 0; i < first->TempCount(); i++) {
+    UnallocatedOperand* temp = UnallocatedOperand::cast(first->TempAt(i));
+    if (temp->HasFixedPolicy()) AllocateFixed(temp, instr_index, false, false);
+  }
+  // Handle constant/fixed output operands.
+  for (size_t i = 0; i < first->OutputCount(); i++) {
+    InstructionOperand* output = first->OutputAt(i);
+    if (output->IsConstant()) {
+      int output_vreg = ConstantOperand::cast(output)->virtual_register();
+      TopLevelLiveRange* range = data()->GetOrCreateLiveRangeFor(output_vreg);
+      range->SetSpillStartIndex(instr_index + 1);
+      range->SetSpillOperand(output);
+      continue;
+    }
+    UnallocatedOperand* first_output = UnallocatedOperand::cast(output);
+    TopLevelLiveRange* range =
+        data()->GetOrCreateLiveRangeFor(first_output->virtual_register());
+    bool assigned = false;
+    if (first_output->HasFixedPolicy()) {
+      int output_vreg = first_output->virtual_register();
+      UnallocatedOperand output_copy(UnallocatedOperand::REGISTER_OR_SLOT,
+                                     output_vreg);
+      bool is_tagged = code()->IsReference(output_vreg);
+      if (first_output->HasSecondaryStorage()) {
+        range->MarkHasPreassignedSlot();
+        data()->preassigned_slot_ranges().push_back(
+            std::make_pair(range, first_output->GetSecondaryStorage()));
+      }
+      AllocateFixed(first_output, instr_index, is_tagged, false);
+
+      // This value is produced on the stack, we never need to spill it.
+      if (first_output->IsStackSlot()) {
+        DCHECK(LocationOperand::cast(first_output)->index() <
+               data()->frame()->GetTotalFrameSlotCount());
+        range->SetSpillOperand(LocationOperand::cast(first_output));
+        range->SetSpillStartIndex(instr_index + 1);
+        assigned = true;
+      }
+      data()->AddGapMove(instr_index + 1, Instruction::START, *first_output,
+                         output_copy);
+    }
+    // Make sure we add a gap move for spilling (if we have not done
+    // so already).
+    if (!assigned) {
+      range->RecordSpillLocation(allocation_zone(), instr_index + 1,
+                                 first_output);
+      range->SetSpillStartIndex(instr_index + 1);
+    }
+  }
+}
+
+void ConstraintBuilder::MeetConstraintsBefore(int instr_index) {
+  Instruction* second = code()->InstructionAt(instr_index);
+  // Handle fixed input operands of second instruction.
+  for (size_t i = 0; i < second->InputCount(); i++) {
+    InstructionOperand* input = second->InputAt(i);
+    if (input->IsImmediate()) {
+      continue;  // Ignore immediates.
+    }
+    UnallocatedOperand* cur_input = UnallocatedOperand::cast(input);
+    if (cur_input->HasFixedPolicy()) {
+      int input_vreg = cur_input->virtual_register();
+      UnallocatedOperand input_copy(UnallocatedOperand::REGISTER_OR_SLOT,
+                                    input_vreg);
+      bool is_tagged = code()->IsReference(input_vreg);
+      AllocateFixed(cur_input, instr_index, is_tagged, true);
+      data()->AddGapMove(instr_index, Instruction::END, input_copy, *cur_input);
+    }
+  }
+  // Handle "output same as input" for second instruction.
+  for (size_t i = 0; i < second->OutputCount(); i++) {
+    InstructionOperand* output = second->OutputAt(i);
+    if (!output->IsUnallocated()) continue;
+    UnallocatedOperand* second_output = UnallocatedOperand::cast(output);
+    if (!second_output->HasSameAsInputPolicy()) continue;
+    DCHECK_EQ(0, i);  // Only valid for first output.
+    UnallocatedOperand* cur_input =
+        UnallocatedOperand::cast(second->InputAt(0));
+    int output_vreg = second_output->virtual_register();
+    int input_vreg = cur_input->virtual_register();
+    UnallocatedOperand input_copy(UnallocatedOperand::REGISTER_OR_SLOT,
+                                  input_vreg);
+    *cur_input =
+        UnallocatedOperand(*cur_input, second_output->virtual_register());
+    MoveOperands* gap_move = data()->AddGapMove(instr_index, Instruction::END,
+                                                input_copy, *cur_input);
+    DCHECK_NOT_NULL(gap_move);
+    if (code()->IsReference(input_vreg) && !code()->IsReference(output_vreg)) {
+      if (second->HasReferenceMap()) {
+        TopTierRegisterAllocationData::DelayedReference delayed_reference = {
+            second->reference_map(), &gap_move->source()};
+        data()->delayed_references().push_back(delayed_reference);
+      }
+    }
+  }
+}
+
+void ConstraintBuilder::ResolvePhis() {
+  // Process the blocks in reverse order.
+  for (InstructionBlock* block : base::Reversed(code()->instruction_blocks())) {
+    data_->tick_counter()->TickAndMaybeEnterSafepoint();
+    ResolvePhis(block);
+  }
+}
+
+void ConstraintBuilder::ResolvePhis(const InstructionBlock* block) {
+  for (PhiInstruction* phi : block->phis()) {
+    int phi_vreg = phi->virtual_register();
+    TopTierRegisterAllocationData::PhiMapValue* map_value =
+        data()->InitializePhiMap(block, phi);
+    InstructionOperand& output = phi->output();
+    // Map the destination operands, so the commitment phase can find them.
+    for (size_t i = 0; i < phi->operands().size(); ++i) {
+      InstructionBlock* cur_block =
+          code()->InstructionBlockAt(block->predecessors()[i]);
+      UnallocatedOperand input(UnallocatedOperand::REGISTER_OR_SLOT,
+                               phi->operands()[i]);
+      MoveOperands* move = data()->AddGapMove(
+          cur_block->last_instruction_index(), Instruction::END, input, output);
+      map_value->AddOperand(&move->destination());
+      DCHECK(!code()
+                  ->InstructionAt(cur_block->last_instruction_index())
+                  ->HasReferenceMap());
+    }
+    TopLevelLiveRange* live_range = data()->GetOrCreateLiveRangeFor(phi_vreg);
+    int gap_index = block->first_instruction_index();
+    live_range->RecordSpillLocation(allocation_zone(), gap_index, &output);
+    live_range->SetSpillStartIndex(gap_index);
+    // We use the phi-ness of some nodes in some later heuristics.
+    live_range->set_is_phi(true);
+    live_range->set_is_non_loop_phi(!block->IsLoopHeader());
+  }
+}
+
+LiveRangeBuilder::LiveRangeBuilder(TopTierRegisterAllocationData* data,
+                                   Zone* local_zone)
+    : data_(data), phi_hints_(local_zone) {}
+
+BitVector* LiveRangeBuilder::ComputeLiveOut(
+    const InstructionBlock* block, TopTierRegisterAllocationData* data) {
+  size_t block_index = block->rpo_number().ToSize();
+  BitVector* live_out = data->live_out_sets()[block_index];
+  if (live_out == nullptr) {
+    // Compute live out for the given block, except not including backward
+    // successor edges.
+    Zone* zone = data->allocation_zone();
+    const InstructionSequence* code = data->code();
+
+    live_out = zone->New<BitVector>(code->VirtualRegisterCount(), zone);
+
+    // Process all successor blocks.
+    for (const RpoNumber& succ : block->successors()) {
+      // Add values live on entry to the successor.
+      if (succ <= block->rpo_number()) continue;
+      BitVector* live_in = data->live_in_sets()[succ.ToSize()];
+      if (live_in != nullptr) live_out->Union(*live_in);
+
+      // All phi input operands corresponding to this successor edge are live
+      // out from this block.
+      const InstructionBlock* successor = code->InstructionBlockAt(succ);
+      size_t index = successor->PredecessorIndexOf(block->rpo_number());
+      DCHECK(index < successor->PredecessorCount());
+      for (PhiInstruction* phi : successor->phis()) {
+        live_out->Add(phi->operands()[index]);
+      }
+    }
+    data->live_out_sets()[block_index] = live_out;
+  }
+  return live_out;
+}
+
+void LiveRangeBuilder::AddInitialIntervals(const InstructionBlock* block,
+                                           BitVector* live_out) {
+  // Add an interval that includes the entire block to the live range for
+  // each live_out value.
+  LifetimePosition start = LifetimePosition::GapFromInstructionIndex(
+      block->first_instruction_index());
+  LifetimePosition end = LifetimePosition::InstructionFromInstructionIndex(
+                             block->last_instruction_index())
+                             .NextStart();
+  BitVector::Iterator iterator(live_out);
+  while (!iterator.Done()) {
+    int operand_index = iterator.Current();
+    TopLevelLiveRange* range = data()->GetOrCreateLiveRangeFor(operand_index);
+    range->AddUseInterval(start, end, allocation_zone(),
+                          data()->is_trace_alloc());
+    iterator.Advance();
+  }
+}
+
+int LiveRangeBuilder::FixedFPLiveRangeID(int index, MachineRepresentation rep) {
+  int result = -index - 1;
+  switch (rep) {
+    case MachineRepresentation::kSimd128:
+      result -=
+          kNumberOfFixedRangesPerRegister * config()->num_float_registers();
+      V8_FALLTHROUGH;
+    case MachineRepresentation::kFloat32:
+      result -=
+          kNumberOfFixedRangesPerRegister * config()->num_double_registers();
+      V8_FALLTHROUGH;
+    case MachineRepresentation::kFloat64:
+      result -=
+          kNumberOfFixedRangesPerRegister * config()->num_general_registers();
+      break;
+    default:
+      UNREACHABLE();
+  }
+  return result;
+}
+
+TopLevelLiveRange* LiveRangeBuilder::FixedLiveRangeFor(int index,
+                                                       SpillMode spill_mode) {
+  int offset = spill_mode == SpillMode::kSpillAtDefinition
+                   ? 0
+                   : config()->num_general_registers();
+  DCHECK(index < config()->num_general_registers());
+  TopLevelLiveRange* result = data()->fixed_live_ranges()[offset + index];
+  if (result == nullptr) {
+    MachineRepresentation rep = InstructionSequence::DefaultRepresentation();
+    result = data()->NewLiveRange(FixedLiveRangeID(offset + index), rep);
+    DCHECK(result->IsFixed());
+    result->set_assigned_register(index);
+    data()->MarkAllocated(rep, index);
+    if (spill_mode == SpillMode::kSpillDeferred) {
+      result->set_deferred_fixed();
+    }
+    data()->fixed_live_ranges()[offset + index] = result;
+  }
+  return result;
+}
+
+TopLevelLiveRange* LiveRangeBuilder::FixedFPLiveRangeFor(
+    int index, MachineRepresentation rep, SpillMode spill_mode) {
+  int num_regs = config()->num_double_registers();
+  ZoneVector<TopLevelLiveRange*>* live_ranges =
+      &data()->fixed_double_live_ranges();
+  if (!kSimpleFPAliasing) {
+    switch (rep) {
+      case MachineRepresentation::kFloat32:
+        num_regs = config()->num_float_registers();
+        live_ranges = &data()->fixed_float_live_ranges();
+        break;
+      case MachineRepresentation::kSimd128:
+        num_regs = config()->num_simd128_registers();
+        live_ranges = &data()->fixed_simd128_live_ranges();
+        break;
+      default:
+        break;
+    }
+  }
+
+  int offset = spill_mode == SpillMode::kSpillAtDefinition ? 0 : num_regs;
+
+  DCHECK(index < num_regs);
+  USE(num_regs);
+  TopLevelLiveRange* result = (*live_ranges)[offset + index];
+  if (result == nullptr) {
+    result = data()->NewLiveRange(FixedFPLiveRangeID(offset + index, rep), rep);
+    DCHECK(result->IsFixed());
+    result->set_assigned_register(index);
+    data()->MarkAllocated(rep, index);
+    if (spill_mode == SpillMode::kSpillDeferred) {
+      result->set_deferred_fixed();
+    }
+    (*live_ranges)[offset + index] = result;
+  }
+  return result;
+}
+
+TopLevelLiveRange* LiveRangeBuilder::LiveRangeFor(InstructionOperand* operand,
+                                                  SpillMode spill_mode) {
+  if (operand->IsUnallocated()) {
+    return data()->GetOrCreateLiveRangeFor(
+        UnallocatedOperand::cast(operand)->virtual_register());
+  } else if (operand->IsConstant()) {
+    return data()->GetOrCreateLiveRangeFor(
+        ConstantOperand::cast(operand)->virtual_register());
+  } else if (operand->IsRegister()) {
+    return FixedLiveRangeFor(
+        LocationOperand::cast(operand)->GetRegister().code(), spill_mode);
+  } else if (operand->IsFPRegister()) {
+    LocationOperand* op = LocationOperand::cast(operand);
+    return FixedFPLiveRangeFor(op->register_code(), op->representation(),
+                               spill_mode);
+  } else {
+    return nullptr;
+  }
+}
+
+UsePosition* LiveRangeBuilder::NewUsePosition(LifetimePosition pos,
+                                              InstructionOperand* operand,
+                                              void* hint,
+                                              UsePositionHintType hint_type) {
+  return allocation_zone()->New<UsePosition>(pos, operand, hint, hint_type);
+}
+
+UsePosition* LiveRangeBuilder::Define(LifetimePosition position,
+                                      InstructionOperand* operand, void* hint,
+                                      UsePositionHintType hint_type,
+                                      SpillMode spill_mode) {
+  TopLevelLiveRange* range = LiveRangeFor(operand, spill_mode);
+  if (range == nullptr) return nullptr;
+
+  if (range->IsEmpty() || range->Start() > position) {
+    // Can happen if there is a definition without use.
+    range->AddUseInterval(position, position.NextStart(), allocation_zone(),
+                          data()->is_trace_alloc());
+    range->AddUsePosition(NewUsePosition(position.NextStart()),
+                          data()->is_trace_alloc());
+  } else {
+    range->ShortenTo(position, data()->is_trace_alloc());
+  }
+  if (!operand->IsUnallocated()) return nullptr;
+  UnallocatedOperand* unalloc_operand = UnallocatedOperand::cast(operand);
+  UsePosition* use_pos =
+      NewUsePosition(position, unalloc_operand, hint, hint_type);
+  range->AddUsePosition(use_pos, data()->is_trace_alloc());
+  return use_pos;
+}
+
+UsePosition* LiveRangeBuilder::Use(LifetimePosition block_start,
+                                   LifetimePosition position,
+                                   InstructionOperand* operand, void* hint,
+                                   UsePositionHintType hint_type,
+                                   SpillMode spill_mode) {
+  TopLevelLiveRange* range = LiveRangeFor(operand, spill_mode);
+  if (range == nullptr) return nullptr;
+  UsePosition* use_pos = nullptr;
+  if (operand->IsUnallocated()) {
+    UnallocatedOperand* unalloc_operand = UnallocatedOperand::cast(operand);
+    use_pos = NewUsePosition(position, unalloc_operand, hint, hint_type);
+    range->AddUsePosition(use_pos, data()->is_trace_alloc());
+  }
+  range->AddUseInterval(block_start, position, allocation_zone(),
+                        data()->is_trace_alloc());
+  return use_pos;
+}
+
+void LiveRangeBuilder::ProcessInstructions(const InstructionBlock* block,
+                                           BitVector* live) {
+  int block_start = block->first_instruction_index();
+  LifetimePosition block_start_position =
+      LifetimePosition::GapFromInstructionIndex(block_start);
+  bool fixed_float_live_ranges = false;
+  bool fixed_simd128_live_ranges = false;
+  if (!kSimpleFPAliasing) {
+    int mask = data()->code()->representation_mask();
+    fixed_float_live_ranges = (mask & kFloat32Bit) != 0;
+    fixed_simd128_live_ranges = (mask & kSimd128Bit) != 0;
+  }
+  SpillMode spill_mode = SpillModeForBlock(block);
+
+  for (int index = block->last_instruction_index(); index >= block_start;
+       index--) {
+    LifetimePosition curr_position =
+        LifetimePosition::InstructionFromInstructionIndex(index);
+    Instruction* instr = code()->InstructionAt(index);
+    DCHECK_NOT_NULL(instr);
+    DCHECK(curr_position.IsInstructionPosition());
+    // Process output, inputs, and temps of this instruction.
+    for (size_t i = 0; i < instr->OutputCount(); i++) {
+      InstructionOperand* output = instr->OutputAt(i);
+      if (output->IsUnallocated()) {
+        // Unsupported.
+        DCHECK(!UnallocatedOperand::cast(output)->HasSlotPolicy());
+        int out_vreg = UnallocatedOperand::cast(output)->virtual_register();
+        live->Remove(out_vreg);
+      } else if (output->IsConstant()) {
+        int out_vreg = ConstantOperand::cast(output)->virtual_register();
+        live->Remove(out_vreg);
+      }
+      if (block->IsHandler() && index == block_start && output->IsAllocated() &&
+          output->IsRegister() &&
+          AllocatedOperand::cast(output)->GetRegister() ==
+              v8::internal::kReturnRegister0) {
+        // The register defined here is blocked from gap start - it is the
+        // exception value.
+        // TODO(mtrofin): should we explore an explicit opcode for
+        // the first instruction in the handler?
+        Define(LifetimePosition::GapFromInstructionIndex(index), output,
+               spill_mode);
+      } else {
+        Define(curr_position, output, spill_mode);
+      }
+    }
+
+    if (instr->ClobbersRegisters()) {
+      for (int i = 0; i < config()->num_allocatable_general_registers(); ++i) {
+        // Create a UseInterval at this instruction for all fixed registers,
+        // (including the instruction outputs). Adding another UseInterval here
+        // is OK because AddUseInterval will just merge it with the existing
+        // one at the end of the range.
+        int code = config()->GetAllocatableGeneralCode(i);
+        TopLevelLiveRange* range = FixedLiveRangeFor(code, spill_mode);
+        range->AddUseInterval(curr_position, curr_position.End(),
+                              allocation_zone(), data()->is_trace_alloc());
+      }
+    }
+
+    if (instr->ClobbersDoubleRegisters()) {
+      for (int i = 0; i < config()->num_allocatable_double_registers(); ++i) {
+        // Add a UseInterval for all DoubleRegisters. See comment above for
+        // general registers.
+        int code = config()->GetAllocatableDoubleCode(i);
+        TopLevelLiveRange* range = FixedFPLiveRangeFor(
+            code, MachineRepresentation::kFloat64, spill_mode);
+        range->AddUseInterval(curr_position, curr_position.End(),
+                              allocation_zone(), data()->is_trace_alloc());
+      }
+      // Clobber fixed float registers on archs with non-simple aliasing.
+      if (!kSimpleFPAliasing) {
+        if (fixed_float_live_ranges) {
+          for (int i = 0; i < config()->num_allocatable_float_registers();
+               ++i) {
+            // Add a UseInterval for all FloatRegisters. See comment above for
+            // general registers.
+            int code = config()->GetAllocatableFloatCode(i);
+            TopLevelLiveRange* range = FixedFPLiveRangeFor(
+                code, MachineRepresentation::kFloat32, spill_mode);
+            range->AddUseInterval(curr_position, curr_position.End(),
+                                  allocation_zone(), data()->is_trace_alloc());
+          }
+        }
+        if (fixed_simd128_live_ranges) {
+          for (int i = 0; i < config()->num_allocatable_simd128_registers();
+               ++i) {
+            int code = config()->GetAllocatableSimd128Code(i);
+            TopLevelLiveRange* range = FixedFPLiveRangeFor(
+                code, MachineRepresentation::kSimd128, spill_mode);
+            range->AddUseInterval(curr_position, curr_position.End(),
+                                  allocation_zone(), data()->is_trace_alloc());
+          }
+        }
+      }
+    }
+
+    for (size_t i = 0; i < instr->InputCount(); i++) {
+      InstructionOperand* input = instr->InputAt(i);
+      if (input->IsImmediate()) {
+        continue;  // Ignore immediates.
+      }
+      LifetimePosition use_pos;
+      if (input->IsUnallocated() &&
+          UnallocatedOperand::cast(input)->IsUsedAtStart()) {
+        use_pos = curr_position;
+      } else {
+        use_pos = curr_position.End();
+      }
+
+      if (input->IsUnallocated()) {
+        UnallocatedOperand* unalloc = UnallocatedOperand::cast(input);
+        int vreg = unalloc->virtual_register();
+        live->Add(vreg);
+        if (unalloc->HasSlotPolicy()) {
+          data()->GetOrCreateLiveRangeFor(vreg)->register_slot_use(
+              block->IsDeferred()
+                  ? TopLevelLiveRange::SlotUseKind::kDeferredSlotUse
+                  : TopLevelLiveRange::SlotUseKind::kGeneralSlotUse);
+        }
+      }
+      Use(block_start_position, use_pos, input, spill_mode);
+    }
+
+    for (size_t i = 0; i < instr->TempCount(); i++) {
+      InstructionOperand* temp = instr->TempAt(i);
+      // Unsupported.
+      DCHECK_IMPLIES(temp->IsUnallocated(),
+                     !UnallocatedOperand::cast(temp)->HasSlotPolicy());
+      if (instr->ClobbersTemps()) {
+        if (temp->IsRegister()) continue;
+        if (temp->IsUnallocated()) {
+          UnallocatedOperand* temp_unalloc = UnallocatedOperand::cast(temp);
+          if (temp_unalloc->HasFixedPolicy()) {
+            continue;
+          }
+        }
+      }
+      Use(block_start_position, curr_position.End(), temp, spill_mode);
+      Define(curr_position, temp, spill_mode);
+    }
+
+    // Process the moves of the instruction's gaps, making their sources live.
+    const Instruction::GapPosition kPositions[] = {Instruction::END,
+                                                   Instruction::START};
+    curr_position = curr_position.PrevStart();
+    DCHECK(curr_position.IsGapPosition());
+    for (const Instruction::GapPosition& position : kPositions) {
+      ParallelMove* move = instr->GetParallelMove(position);
+      if (move == nullptr) continue;
+      if (position == Instruction::END) {
+        curr_position = curr_position.End();
+      } else {
+        curr_position = curr_position.Start();
+      }
+      for (MoveOperands* cur : *move) {
+        InstructionOperand& from = cur->source();
+        InstructionOperand& to = cur->destination();
+        void* hint = &to;
+        UsePositionHintType hint_type = UsePosition::HintTypeForOperand(to);
+        UsePosition* to_use = nullptr;
+        int phi_vreg = -1;
+        if (to.IsUnallocated()) {
+          int to_vreg = UnallocatedOperand::cast(to).virtual_register();
+          TopLevelLiveRange* to_range =
+              data()->GetOrCreateLiveRangeFor(to_vreg);
+          if (to_range->is_phi()) {
+            phi_vreg = to_vreg;
+            if (to_range->is_non_loop_phi()) {
+              hint = to_range->current_hint_position();
+              hint_type = hint == nullptr ? UsePositionHintType::kNone
+                                          : UsePositionHintType::kUsePos;
+            } else {
+              hint_type = UsePositionHintType::kPhi;
+              hint = data()->GetPhiMapValueFor(to_vreg);
+            }
+          } else {
+            if (live->Contains(to_vreg)) {
+              to_use =
+                  Define(curr_position, &to, &from,
+                         UsePosition::HintTypeForOperand(from), spill_mode);
+              live->Remove(to_vreg);
+            } else {
+              cur->Eliminate();
+              continue;
+            }
+          }
+        } else {
+          Define(curr_position, &to, spill_mode);
+        }
+        UsePosition* from_use = Use(block_start_position, curr_position, &from,
+                                    hint, hint_type, spill_mode);
+        // Mark range live.
+        if (from.IsUnallocated()) {
+          live->Add(UnallocatedOperand::cast(from).virtual_register());
+        }
+        // When the value is moved to a register to meet input constraints,
+        // we should consider this value use similar as a register use in the
+        // backward spilling heuristics, even though this value use is not
+        // register benefical at the AllocateBlockedReg stage.
+        if (to.IsAnyRegister() ||
+            (to.IsUnallocated() &&
+             UnallocatedOperand::cast(&to)->HasRegisterPolicy())) {
+          from_use->set_spill_detrimental();
+        }
+        // Resolve use position hints just created.
+        if (to_use != nullptr && from_use != nullptr) {
+          to_use->ResolveHint(from_use);
+          from_use->ResolveHint(to_use);
+        }
+        DCHECK_IMPLIES(to_use != nullptr, to_use->IsResolved());
+        DCHECK_IMPLIES(from_use != nullptr, from_use->IsResolved());
+        // Potentially resolve phi hint.
+        if (phi_vreg != -1) ResolvePhiHint(&from, from_use);
+      }
+    }
+  }
+}
+
+void LiveRangeBuilder::ProcessPhis(const InstructionBlock* block,
+                                   BitVector* live) {
+  for (PhiInstruction* phi : block->phis()) {
+    // The live range interval already ends at the first instruction of the
+    // block.
+    int phi_vreg = phi->virtual_register();
+    live->Remove(phi_vreg);
+    // Select a hint from a predecessor block that precedes this block in the
+    // rpo order. In order of priority:
+    // - Avoid hints from deferred blocks.
+    // - Prefer hints from allocated (or explicit) operands.
+    // - Prefer hints from empty blocks (containing just parallel moves and a
+    //   jump). In these cases, if we can elide the moves, the jump threader
+    //   is likely to be able to elide the jump.
+    // The enforcement of hinting in rpo order is required because hint
+    // resolution that happens later in the compiler pipeline visits
+    // instructions in reverse rpo order, relying on the fact that phis are
+    // encountered before their hints.
+    InstructionOperand* hint = nullptr;
+    int hint_preference = 0;
+
+    // The cost of hinting increases with the number of predecessors. At the
+    // same time, the typical benefit decreases, since this hinting only
+    // optimises the execution path through one predecessor. A limit of 2 is
+    // sufficient to hit the common if/else pattern.
+    int predecessor_limit = 2;
+
+    for (RpoNumber predecessor : block->predecessors()) {
+      const InstructionBlock* predecessor_block =
+          code()->InstructionBlockAt(predecessor);
+      DCHECK_EQ(predecessor_block->rpo_number(), predecessor);
+
+      // Only take hints from earlier rpo numbers.
+      if (predecessor >= block->rpo_number()) continue;
+
+      // Look up the predecessor instruction.
+      const Instruction* predecessor_instr =
+          GetLastInstruction(code(), predecessor_block);
+      InstructionOperand* predecessor_hint = nullptr;
+      // Phis are assigned in the END position of the last instruction in each
+      // predecessor block.
+      for (MoveOperands* move :
+           *predecessor_instr->GetParallelMove(Instruction::END)) {
+        InstructionOperand& to = move->destination();
+        if (to.IsUnallocated() &&
+            UnallocatedOperand::cast(to).virtual_register() == phi_vreg) {
+          predecessor_hint = &move->source();
+          break;
+        }
+      }
+      DCHECK_NOT_NULL(predecessor_hint);
+
+      // For each predecessor, generate a score according to the priorities
+      // described above, and pick the best one. Flags in higher-order bits have
+      // a higher priority than those in lower-order bits.
+      int predecessor_hint_preference = 0;
+      const int kNotDeferredBlockPreference = (1 << 2);
+      const int kMoveIsAllocatedPreference = (1 << 1);
+      const int kBlockIsEmptyPreference = (1 << 0);
+
+      // - Avoid hints from deferred blocks.
+      if (!predecessor_block->IsDeferred()) {
+        predecessor_hint_preference |= kNotDeferredBlockPreference;
+      }
+
+      // - Prefer hints from allocated operands.
+      //
+      // Already-allocated operands are typically assigned using the parallel
+      // moves on the last instruction. For example:
+      //
+      //      gap (v101 = [x0|R|w32]) (v100 = v101)
+      //      ArchJmp
+      //    ...
+      //    phi: v100 = v101 v102
+      //
+      // We have already found the END move, so look for a matching START move
+      // from an allocated operand.
+      //
+      // Note that we cannot simply look up data()->live_ranges()[vreg] here
+      // because the live ranges are still being built when this function is
+      // called.
+      // TODO(v8): Find a way to separate hinting from live range analysis in
+      // BuildLiveRanges so that we can use the O(1) live-range look-up.
+      auto moves = predecessor_instr->GetParallelMove(Instruction::START);
+      if (moves != nullptr) {
+        for (MoveOperands* move : *moves) {
+          InstructionOperand& to = move->destination();
+          if (predecessor_hint->Equals(to)) {
+            if (move->source().IsAllocated()) {
+              predecessor_hint_preference |= kMoveIsAllocatedPreference;
+            }
+            break;
+          }
+        }
+      }
+
+      // - Prefer hints from empty blocks.
+      if (predecessor_block->last_instruction_index() ==
+          predecessor_block->first_instruction_index()) {
+        predecessor_hint_preference |= kBlockIsEmptyPreference;
+      }
+
+      if ((hint == nullptr) ||
+          (predecessor_hint_preference > hint_preference)) {
+        // Take the hint from this predecessor.
+        hint = predecessor_hint;
+        hint_preference = predecessor_hint_preference;
+      }
+
+      if (--predecessor_limit <= 0) break;
+    }
+    DCHECK_NOT_NULL(hint);
+
+    LifetimePosition block_start = LifetimePosition::GapFromInstructionIndex(
+        block->first_instruction_index());
+    UsePosition* use_pos = Define(block_start, &phi->output(), hint,
+                                  UsePosition::HintTypeForOperand(*hint),
+                                  SpillModeForBlock(block));
+    MapPhiHint(hint, use_pos);
+  }
+}
+
+void LiveRangeBuilder::ProcessLoopHeader(const InstructionBlock* block,
+                                         BitVector* live) {
+  DCHECK(block->IsLoopHeader());
+  // Add a live range stretching from the first loop instruction to the last
+  // for each value live on entry to the header.
+  BitVector::Iterator iterator(live);
+  LifetimePosition start = LifetimePosition::GapFromInstructionIndex(
+      block->first_instruction_index());
+  LifetimePosition end = LifetimePosition::GapFromInstructionIndex(
+                             code()->LastLoopInstructionIndex(block))
+                             .NextFullStart();
+  while (!iterator.Done()) {
+    int operand_index = iterator.Current();
+    TopLevelLiveRange* range = data()->GetOrCreateLiveRangeFor(operand_index);
+    range->EnsureInterval(start, end, allocation_zone(),
+                          data()->is_trace_alloc());
+    iterator.Advance();
+  }
+  // Insert all values into the live in sets of all blocks in the loop.
+  for (int i = block->rpo_number().ToInt() + 1; i < block->loop_end().ToInt();
+       ++i) {
+    live_in_sets()[i]->Union(*live);
+  }
+}
+
+void LiveRangeBuilder::BuildLiveRanges() {
+  // Process the blocks in reverse order.
+  for (int block_id = code()->InstructionBlockCount() - 1; block_id >= 0;
+       --block_id) {
+    data_->tick_counter()->TickAndMaybeEnterSafepoint();
+    InstructionBlock* block =
+        code()->InstructionBlockAt(RpoNumber::FromInt(block_id));
+    BitVector* live = ComputeLiveOut(block, data());
+    // Initially consider all live_out values live for the entire block. We
+    // will shorten these intervals if necessary.
+    AddInitialIntervals(block, live);
+    // Process the instructions in reverse order, generating and killing
+    // live values.
+    ProcessInstructions(block, live);
+    // All phi output operands are killed by this block.
+    ProcessPhis(block, live);
+    // Now live is live_in for this block except not including values live
+    // out on backward successor edges.
+    if (block->IsLoopHeader()) ProcessLoopHeader(block, live);
+    live_in_sets()[block_id] = live;
+  }
+  // Postprocess the ranges.
+  const size_t live_ranges_size = data()->live_ranges().size();
+  for (TopLevelLiveRange* range : data()->live_ranges()) {
+    data_->tick_counter()->TickAndMaybeEnterSafepoint();
+    CHECK_EQ(live_ranges_size,
+             data()->live_ranges().size());  // TODO(neis): crbug.com/831822
+    if (range == nullptr) continue;
+    // Give slots to all ranges with a non fixed slot use.
+    if (range->has_slot_use() && range->HasNoSpillType()) {
+      SpillMode spill_mode =
+          range->slot_use_kind() ==
+                  TopLevelLiveRange::SlotUseKind::kDeferredSlotUse
+              ? SpillMode::kSpillDeferred
+              : SpillMode::kSpillAtDefinition;
+      data()->AssignSpillRangeToLiveRange(range, spill_mode);
+    }
+    // TODO(bmeurer): This is a horrible hack to make sure that for constant
+    // live ranges, every use requires the constant to be in a register.
+    // Without this hack, all uses with "any" policy would get the constant
+    // operand assigned.
+    if (range->HasSpillOperand() && range->GetSpillOperand()->IsConstant()) {
+      for (UsePosition* pos = range->first_pos(); pos != nullptr;
+           pos = pos->next()) {
+        if (pos->type() == UsePositionType::kRequiresSlot ||
+            pos->type() == UsePositionType::kRegisterOrSlotOrConstant) {
+          continue;
+        }
+        UsePositionType new_type = UsePositionType::kRegisterOrSlot;
+        // Can't mark phis as needing a register.
+        if (!pos->pos().IsGapPosition()) {
+          new_type = UsePositionType::kRequiresRegister;
+        }
+        pos->set_type(new_type, true);
+      }
+    }
+    range->ResetCurrentHintPosition();
+  }
+  for (auto preassigned : data()->preassigned_slot_ranges()) {
+    TopLevelLiveRange* range = preassigned.first;
+    int slot_id = preassigned.second;
+    SpillRange* spill = range->HasSpillRange()
+                            ? range->GetSpillRange()
+                            : data()->AssignSpillRangeToLiveRange(
+                                  range, SpillMode::kSpillAtDefinition);
+    spill->set_assigned_slot(slot_id);
+  }
+#ifdef DEBUG
+  Verify();
+#endif
+}
+
+void LiveRangeBuilder::MapPhiHint(InstructionOperand* operand,
+                                  UsePosition* use_pos) {
+  DCHECK(!use_pos->IsResolved());
+  auto res = phi_hints_.insert(std::make_pair(operand, use_pos));
+  DCHECK(res.second);
+  USE(res);
+}
+
+void LiveRangeBuilder::ResolvePhiHint(InstructionOperand* operand,
+                                      UsePosition* use_pos) {
+  auto it = phi_hints_.find(operand);
+  if (it == phi_hints_.end()) return;
+  DCHECK(!it->second->IsResolved());
+  it->second->ResolveHint(use_pos);
+}
+
+void LiveRangeBuilder::Verify() const {
+  for (auto& hint : phi_hints_) {
+    CHECK(hint.second->IsResolved());
+  }
+  for (const TopLevelLiveRange* current : data()->live_ranges()) {
+    if (current != nullptr && !current->IsEmpty()) {
+      // New LiveRanges should not be split.
+      CHECK_NULL(current->next());
+      // General integrity check.
+      current->Verify();
+      const UseInterval* first = current->first_interval();
+      if (first->next() == nullptr) continue;
+
+      // Consecutive intervals should not end and start in the same block,
+      // otherwise the intervals should have been joined, because the
+      // variable is live throughout that block.
+      CHECK(NextIntervalStartsInDifferentBlocks(first));
+
+      for (const UseInterval* i = first->next(); i != nullptr; i = i->next()) {
+        // Except for the first interval, the other intevals must start at
+        // a block boundary, otherwise data wouldn't flow to them.
+        CHECK(IntervalStartsAtBlockBoundary(i));
+        // The last instruction of the predecessors of the block the interval
+        // starts must be covered by the range.
+        CHECK(IntervalPredecessorsCoveredByRange(i, current));
+        if (i->next() != nullptr) {
+          // Check the consecutive intervals property, except for the last
+          // interval, where it doesn't apply.
+          CHECK(NextIntervalStartsInDifferentBlocks(i));
+        }
+      }
+    }
+  }
+}
+
+bool LiveRangeBuilder::IntervalStartsAtBlockBoundary(
+    const UseInterval* interval) const {
+  LifetimePosition start = interval->start();
+  if (!start.IsFullStart()) return false;
+  int instruction_index = start.ToInstructionIndex();
+  const InstructionBlock* block =
+      data()->code()->GetInstructionBlock(instruction_index);
+  return block->first_instruction_index() == instruction_index;
+}
+
+bool LiveRangeBuilder::IntervalPredecessorsCoveredByRange(
+    const UseInterval* interval, const TopLevelLiveRange* range) const {
+  LifetimePosition start = interval->start();
+  int instruction_index = start.ToInstructionIndex();
+  const InstructionBlock* block =
+      data()->code()->GetInstructionBlock(instruction_index);
+  for (RpoNumber pred_index : block->predecessors()) {
+    const InstructionBlock* predecessor =
+        data()->code()->InstructionBlockAt(pred_index);
+    LifetimePosition last_pos = LifetimePosition::GapFromInstructionIndex(
+        predecessor->last_instruction_index());
+    last_pos = last_pos.NextStart().End();
+    if (!range->Covers(last_pos)) return false;
+  }
+  return true;
+}
+
+bool LiveRangeBuilder::NextIntervalStartsInDifferentBlocks(
+    const UseInterval* interval) const {
+  DCHECK_NOT_NULL(interval->next());
+  LifetimePosition end = interval->end();
+  LifetimePosition next_start = interval->next()->start();
+  // Since end is not covered, but the previous position is, move back a
+  // position
+  end = end.IsStart() ? end.PrevStart().End() : end.Start();
+  int last_covered_index = end.ToInstructionIndex();
+  const InstructionBlock* block =
+      data()->code()->GetInstructionBlock(last_covered_index);
+  const InstructionBlock* next_block =
+      data()->code()->GetInstructionBlock(next_start.ToInstructionIndex());
+  return block->rpo_number() < next_block->rpo_number();
+}
+
+void BundleBuilder::BuildBundles() {
+  TRACE("Build bundles\n");
+  // Process the blocks in reverse order.
+  for (int block_id = code()->InstructionBlockCount() - 1; block_id >= 0;
+       --block_id) {
+    InstructionBlock* block =
+        code()->InstructionBlockAt(RpoNumber::FromInt(block_id));
+    TRACE("Block B%d\n", block_id);
+    for (auto phi : block->phis()) {
+      LiveRange* out_range =
+          data()->GetOrCreateLiveRangeFor(phi->virtual_register());
+      LiveRangeBundle* out = out_range->get_bundle();
+      if (out == nullptr) {
+        out = data()->allocation_zone()->New<LiveRangeBundle>(
+            data()->allocation_zone(), next_bundle_id_++);
+        out->TryAddRange(out_range);
+      }
+      TRACE("Processing phi for v%d with %d:%d\n", phi->virtual_register(),
+            out_range->TopLevel()->vreg(), out_range->relative_id());
+      bool phi_interferes_with_backedge_input = false;
+      for (auto input : phi->operands()) {
+        LiveRange* input_range = data()->GetOrCreateLiveRangeFor(input);
+        TRACE("Input value v%d with range %d:%d\n", input,
+              input_range->TopLevel()->vreg(), input_range->relative_id());
+        LiveRangeBundle* input_bundle = input_range->get_bundle();
+        if (input_bundle != nullptr) {
+          TRACE("Merge\n");
+          if (out->TryMerge(input_bundle, data()->is_trace_alloc())) {
+            TRACE("Merged %d and %d to %d\n", phi->virtual_register(), input,
+                  out->id());
+          } else if (input_range->Start() > out_range->Start()) {
+            // We are only interested in values defined after the phi, because
+            // those are values that will go over a back-edge.
+            phi_interferes_with_backedge_input = true;
+          }
+        } else {
+          TRACE("Add\n");
+          if (out->TryAddRange(input_range)) {
+            TRACE("Added %d and %d to %d\n", phi->virtual_register(), input,
+                  out->id());
+          } else if (input_range->Start() > out_range->Start()) {
+            // We are only interested in values defined after the phi, because
+            // those are values that will go over a back-edge.
+            phi_interferes_with_backedge_input = true;
+          }
+        }
+      }
+      // Spilling the phi at the loop header is not beneficial if there is
+      // a back-edge with an input for the phi that interferes with the phi's
+      // value, because in case that input gets spilled it might introduce
+      // a stack-to-stack move at the back-edge.
+      if (phi_interferes_with_backedge_input)
+        out_range->TopLevel()->set_spilling_at_loop_header_not_beneficial();
+    }
+    TRACE("Done block B%d\n", block_id);
+  }
+}
+
+bool LiveRangeBundle::TryAddRange(LiveRange* range) {
+  DCHECK_NULL(range->get_bundle());
+  // We may only add a new live range if its use intervals do not
+  // overlap with existing intervals in the bundle.
+  if (UsesOverlap(range->first_interval())) return false;
+  ranges_.insert(range);
+  range->set_bundle(this);
+  InsertUses(range->first_interval());
+  return true;
+}
+bool LiveRangeBundle::TryMerge(LiveRangeBundle* other, bool trace_alloc) {
+  if (other == this) return true;
+
+  auto iter1 = uses_.begin();
+  auto iter2 = other->uses_.begin();
+
+  while (iter1 != uses_.end() && iter2 != other->uses_.end()) {
+    if (iter1->start >= iter2->end) {
+      ++iter2;
+    } else if (iter2->start >= iter1->end) {
+      ++iter1;
+    } else {
+      TRACE_COND(trace_alloc, "No merge %d:%d %d:%d\n", iter1->start,
+                 iter1->end, iter2->start, iter2->end);
+      return false;
+    }
+  }
+  // Uses are disjoint, merging is possible.
+  for (auto it = other->ranges_.begin(); it != other->ranges_.end(); ++it) {
+    (*it)->set_bundle(this);
+    InsertUses((*it)->first_interval());
+  }
+  ranges_.insert(other->ranges_.begin(), other->ranges_.end());
+  other->ranges_.clear();
+
+  return true;
+}
+
+void LiveRangeBundle::MergeSpillRanges() {
+  SpillRange* target = nullptr;
+  for (auto range : ranges_) {
+    if (range->TopLevel()->HasSpillRange()) {
+      SpillRange* current = range->TopLevel()->GetSpillRange();
+      if (target == nullptr) {
+        target = current;
+      } else if (target != current) {
+        target->TryMerge(current);
+      }
+    }
+  }
+}
+
+RegisterAllocator::RegisterAllocator(TopTierRegisterAllocationData* data,
+                                     RegisterKind kind)
+    : data_(data),
+      mode_(kind),
+      num_registers_(GetRegisterCount(data->config(), kind)),
+      num_allocatable_registers_(
+          GetAllocatableRegisterCount(data->config(), kind)),
+      allocatable_register_codes_(
+          GetAllocatableRegisterCodes(data->config(), kind)),
+      check_fp_aliasing_(false) {
+  if (!kSimpleFPAliasing && kind == RegisterKind::kDouble) {
+    check_fp_aliasing_ = (data->code()->representation_mask() &
+                          (kFloat32Bit | kSimd128Bit)) != 0;
+  }
+}
+
+LifetimePosition RegisterAllocator::GetSplitPositionForInstruction(
+    const LiveRange* range, int instruction_index) {
+  LifetimePosition ret = LifetimePosition::Invalid();
+
+  ret = LifetimePosition::GapFromInstructionIndex(instruction_index);
+  if (range->Start() >= ret || ret >= range->End()) {
+    return LifetimePosition::Invalid();
+  }
+  return ret;
+}
+
+void RegisterAllocator::SplitAndSpillRangesDefinedByMemoryOperand() {
+  size_t initial_range_count = data()->live_ranges().size();
+  for (size_t i = 0; i < initial_range_count; ++i) {
+    CHECK_EQ(initial_range_count,
+             data()->live_ranges().size());  // TODO(neis): crbug.com/831822
+    TopLevelLiveRange* range = data()->live_ranges()[i];
+    if (!CanProcessRange(range)) continue;
+    // Only assume defined by memory operand if we are guaranteed to spill it or
+    // it has a spill operand.
+    if (range->HasNoSpillType() ||
+        (range->HasSpillRange() && !range->has_non_deferred_slot_use())) {
+      continue;
+    }
+    LifetimePosition start = range->Start();
+    TRACE("Live range %d:%d is defined by a spill operand.\n",
+          range->TopLevel()->vreg(), range->relative_id());
+    LifetimePosition next_pos = start;
+    if (next_pos.IsGapPosition()) {
+      next_pos = next_pos.NextStart();
+    }
+
+    UsePosition* pos = range->NextUsePositionRegisterIsBeneficial(next_pos);
+    // If the range already has a spill operand and it doesn't need a
+    // register immediately, split it and spill the first part of the range.
+    if (pos == nullptr) {
+      Spill(range, SpillMode::kSpillAtDefinition);
+    } else if (pos->pos() > range->Start().NextStart()) {
+      // Do not spill live range eagerly if use position that can benefit from
+      // the register is too close to the start of live range.
+      LifetimePosition split_pos = GetSplitPositionForInstruction(
+          range, pos->pos().ToInstructionIndex());
+      // There is no place to split, so we can't split and spill.
+      if (!split_pos.IsValid()) continue;
+
+      split_pos =
+          FindOptimalSplitPos(range->Start().NextFullStart(), split_pos);
+
+      SplitRangeAt(range, split_pos);
+      Spill(range, SpillMode::kSpillAtDefinition);
+    }
+  }
+}
+
+LiveRange* RegisterAllocator::SplitRangeAt(LiveRange* range,
+                                           LifetimePosition pos) {
+  DCHECK(!range->TopLevel()->IsFixed());
+  TRACE("Splitting live range %d:%d at %d\n", range->TopLevel()->vreg(),
+        range->relative_id(), pos.value());
+
+  if (pos <= range->Start()) return range;
+
+  // We can't properly connect liveranges if splitting occurred at the end
+  // a block.
+  DCHECK(pos.IsStart() || pos.IsGapPosition() ||
+         (GetInstructionBlock(code(), pos)->last_instruction_index() !=
+          pos.ToInstructionIndex()));
+
+  LiveRange* result = range->SplitAt(pos, allocation_zone());
+  return result;
+}
+
+LiveRange* RegisterAllocator::SplitBetween(LiveRange* range,
+                                           LifetimePosition start,
+                                           LifetimePosition end) {
+  DCHECK(!range->TopLevel()->IsFixed());
+  TRACE("Splitting live range %d:%d in position between [%d, %d]\n",
+        range->TopLevel()->vreg(), range->relative_id(), start.value(),
+        end.value());
+
+  LifetimePosition split_pos = FindOptimalSplitPos(start, end);
+  DCHECK(split_pos >= start);
+  return SplitRangeAt(range, split_pos);
+}
+
+LifetimePosition RegisterAllocator::FindOptimalSplitPos(LifetimePosition start,
+                                                        LifetimePosition end) {
+  int start_instr = start.ToInstructionIndex();
+  int end_instr = end.ToInstructionIndex();
+  DCHECK_LE(start_instr, end_instr);
+
+  // We have no choice
+  if (start_instr == end_instr) return end;
+
+  const InstructionBlock* start_block = GetInstructionBlock(code(), start);
+  const InstructionBlock* end_block = GetInstructionBlock(code(), end);
+
+  if (end_block == start_block) {
+    // The interval is split in the same basic block. Split at the latest
+    // possible position.
+    return end;
+  }
+
+  const InstructionBlock* block = end_block;
+  // Find header of outermost loop.
+  do {
+    const InstructionBlock* loop = GetContainingLoop(code(), block);
+    if (loop == nullptr ||
+        loop->rpo_number().ToInt() <= start_block->rpo_number().ToInt()) {
+      // No more loops or loop starts before the lifetime start.
+      break;
+    }
+    block = loop;
+  } while (true);
+
+  // We did not find any suitable outer loop. Split at the latest possible
+  // position unless end_block is a loop header itself.
+  if (block == end_block && !end_block->IsLoopHeader()) return end;
+
+  return LifetimePosition::GapFromInstructionIndex(
+      block->first_instruction_index());
+}
+
+LifetimePosition RegisterAllocator::FindOptimalSpillingPos(
+    LiveRange* range, LifetimePosition pos, SpillMode spill_mode,
+    LiveRange** begin_spill_out) {
+  *begin_spill_out = range;
+  // TODO(herhut): Be more clever here as long as we do not move pos out of
+  // deferred code.
+  if (spill_mode == SpillMode::kSpillDeferred) return pos;
+  const InstructionBlock* block = GetInstructionBlock(code(), pos.Start());
+  const InstructionBlock* loop_header =
+      block->IsLoopHeader() ? block : GetContainingLoop(code(), block);
+  if (loop_header == nullptr) return pos;
+
+  while (loop_header != nullptr) {
+    // We are going to spill live range inside the loop.
+    // If possible try to move spilling position backwards to loop header.
+    // This will reduce number of memory moves on the back edge.
+    LifetimePosition loop_start = LifetimePosition::GapFromInstructionIndex(
+        loop_header->first_instruction_index());
+    // Stop if we moved to a loop header before the value is defined or
+    // at the define position that is not beneficial to spill.
+    if (range->TopLevel()->Start() > loop_start ||
+        (range->TopLevel()->Start() == loop_start &&
+         range->TopLevel()->SpillAtLoopHeaderNotBeneficial()))
+      return pos;
+
+    LiveRange* live_at_header = range->TopLevel()->GetChildCovers(loop_start);
+
+    if (live_at_header != nullptr && !live_at_header->spilled()) {
+      for (LiveRange* check_use = live_at_header;
+           check_use != nullptr && check_use->Start() < pos;
+           check_use = check_use->next()) {
+        // If we find a use for which spilling is detrimental, don't spill
+        // at the loop header
+        UsePosition* next_use =
+            check_use->NextUsePositionSpillDetrimental(loop_start);
+        // UsePosition at the end of a UseInterval may
+        // have the same value as the start of next range.
+        if (next_use != nullptr && next_use->pos() <= pos) {
+          return pos;
+        }
+      }
+      // No register beneficial use inside the loop before the pos.
+      *begin_spill_out = live_at_header;
+      pos = loop_start;
+    }
+
+    // Try hoisting out to an outer loop.
+    loop_header = GetContainingLoop(code(), loop_header);
+  }
+  return pos;
+}
+
+void RegisterAllocator::Spill(LiveRange* range, SpillMode spill_mode) {
+  DCHECK(!range->spilled());
+  DCHECK(spill_mode == SpillMode::kSpillAtDefinition ||
+         GetInstructionBlock(code(), range->Start())->IsDeferred());
+  TopLevelLiveRange* first = range->TopLevel();
+  TRACE("Spilling live range %d:%d mode %d\n", first->vreg(),
+        range->relative_id(), spill_mode);
+
+  TRACE("Starting spill type is %d\n", static_cast<int>(first->spill_type()));
+  if (first->HasNoSpillType()) {
+    TRACE("New spill range needed");
+    data()->AssignSpillRangeToLiveRange(first, spill_mode);
+  }
+  // Upgrade the spillmode, in case this was only spilled in deferred code so
+  // far.
+  if ((spill_mode == SpillMode::kSpillAtDefinition) &&
+      (first->spill_type() ==
+       TopLevelLiveRange::SpillType::kDeferredSpillRange)) {
+    TRACE("Upgrading\n");
+    first->set_spill_type(TopLevelLiveRange::SpillType::kSpillRange);
+  }
+  TRACE("Final spill type is %d\n", static_cast<int>(first->spill_type()));
+  range->Spill();
+}
+
+const char* RegisterAllocator::RegisterName(int register_code) const {
+  if (register_code == kUnassignedRegister) return "unassigned";
+  return mode() == RegisterKind::kGeneral
+             ? i::RegisterName(Register::from_code(register_code))
+             : i::RegisterName(DoubleRegister::from_code(register_code));
+}
+
+LinearScanAllocator::LinearScanAllocator(TopTierRegisterAllocationData* data,
+                                         RegisterKind kind, Zone* local_zone)
+    : RegisterAllocator(data, kind),
+      unhandled_live_ranges_(local_zone),
+      active_live_ranges_(local_zone),
+      inactive_live_ranges_(num_registers(), InactiveLiveRangeQueue(local_zone),
+                            local_zone),
+      next_active_ranges_change_(LifetimePosition::Invalid()),
+      next_inactive_ranges_change_(LifetimePosition::Invalid()) {
+  active_live_ranges().reserve(8);
+}
+
+void LinearScanAllocator::MaybeSpillPreviousRanges(LiveRange* begin_range,
+                                                   LifetimePosition begin_pos,
+                                                   LiveRange* end_range) {
+  // Spill begin_range after begin_pos, then spill every live range of this
+  // virtual register until but excluding end_range.
+  DCHECK(begin_range->Covers(begin_pos));
+  DCHECK_EQ(begin_range->TopLevel(), end_range->TopLevel());
+
+  if (begin_range != end_range) {
+    DCHECK_LE(begin_range->End(), end_range->Start());
+    if (!begin_range->spilled()) {
+      SpillAfter(begin_range, begin_pos, SpillMode::kSpillAtDefinition);
+    }
+    for (LiveRange* range = begin_range->next(); range != end_range;
+         range = range->next()) {
+      if (!range->spilled()) {
+        range->Spill();
+      }
+    }
+  }
+}
+
+void LinearScanAllocator::MaybeUndoPreviousSplit(LiveRange* range) {
+  if (range->next() != nullptr && range->next()->ShouldRecombine()) {
+    LiveRange* to_remove = range->next();
+    TRACE("Recombining %d:%d with %d\n", range->TopLevel()->vreg(),
+          range->relative_id(), to_remove->relative_id());
+
+    // Remove the range from unhandled, as attaching it will change its
+    // state and hence ordering in the unhandled set.
+    auto removed_cnt = unhandled_live_ranges().erase(to_remove);
+    DCHECK_EQ(removed_cnt, 1);
+    USE(removed_cnt);
+
+    range->AttachToNext();
+  } else if (range->next() != nullptr) {
+    TRACE("No recombine for %d:%d to %d\n", range->TopLevel()->vreg(),
+          range->relative_id(), range->next()->relative_id());
+  }
+}
+
+void LinearScanAllocator::SpillNotLiveRanges(RangeWithRegisterSet* to_be_live,
+                                             LifetimePosition position,
+                                             SpillMode spill_mode) {
+  for (auto it = active_live_ranges().begin();
+       it != active_live_ranges().end();) {
+    LiveRange* active_range = *it;
+    TopLevelLiveRange* toplevel = (*it)->TopLevel();
+    auto found = to_be_live->find({toplevel, kUnassignedRegister});
+    if (found == to_be_live->end()) {
+      // Is not contained in {to_be_live}, spill it.
+      // Fixed registers are exempt from this. They might have been
+      // added from inactive at the block boundary but we know that
+      // they cannot conflict as they are built before register
+      // allocation starts. It would be algorithmically fine to split
+      // them and reschedule but the code does not allow to do this.
+      if (toplevel->IsFixed()) {
+        TRACE("Keeping reactivated fixed range for %s\n",
+              RegisterName(toplevel->assigned_register()));
+        ++it;
+      } else {
+        // When spilling a previously spilled/reloaded range, we add back the
+        // tail that we might have split off when we reloaded/spilled it
+        // previously. Otherwise we might keep generating small split-offs.
+        MaybeUndoPreviousSplit(active_range);
+        TRACE("Putting back %d:%d\n", toplevel->vreg(),
+              active_range->relative_id());
+        LiveRange* split = SplitRangeAt(active_range, position);
+        DCHECK_NE(split, active_range);
+
+        // Make sure we revisit this range once it has a use that requires
+        // a register.
+        UsePosition* next_use = split->NextRegisterPosition(position);
+        if (next_use != nullptr) {
+          // Move to the start of the gap before use so that we have a space
+          // to perform the potential reload. Otherwise, do not spill but add
+          // to unhandled for reallocation.
+          LifetimePosition revisit_at = next_use->pos().FullStart();
+          TRACE("Next use at %d\n", revisit_at.value());
+          if (!data()->IsBlockBoundary(revisit_at)) {
+            // Leave some space so we have enough gap room.
+            revisit_at = revisit_at.PrevStart().FullStart();
+          }
+          // If this range became life right at the block boundary that we are
+          // currently processing, we do not need to split it. Instead move it
+          // to unhandled right away.
+          if (position < revisit_at) {
+            LiveRange* third_part = SplitRangeAt(split, revisit_at);
+            DCHECK_NE(split, third_part);
+            Spill(split, spill_mode);
+            TRACE("Marking %d:%d to recombine\n", toplevel->vreg(),
+                  third_part->relative_id());
+            third_part->SetRecombine();
+            AddToUnhandled(third_part);
+          } else {
+            AddToUnhandled(split);
+          }
+        } else {
+          Spill(split, spill_mode);
+        }
+        it = ActiveToHandled(it);
+      }
+    } else {
+      // This range is contained in {to_be_live}, so we can keep it.
+      int expected_register = (*found).expected_register;
+      to_be_live->erase(found);
+      if (expected_register == active_range->assigned_register()) {
+        // Was life and in correct register, simply pass through.
+        TRACE("Keeping %d:%d in %s\n", toplevel->vreg(),
+              active_range->relative_id(),
+              RegisterName(active_range->assigned_register()));
+        ++it;
+      } else {
+        // Was life but wrong register. Split and schedule for
+        // allocation.
+        TRACE("Scheduling %d:%d\n", toplevel->vreg(),
+              active_range->relative_id());
+        LiveRange* split = SplitRangeAt(active_range, position);
+        split->set_controlflow_hint(expected_register);
+        AddToUnhandled(split);
+        it = ActiveToHandled(it);
+      }
+    }
+  }
+}
+
+LiveRange* LinearScanAllocator::AssignRegisterOnReload(LiveRange* range,
+                                                       int reg) {
+  // We know the register is currently free but it might be in
+  // use by a currently inactive range. So we might not be able
+  // to reload for the full distance. In such case, split here.
+  // TODO(herhut):
+  // It might be better if we could use the normal unhandled queue and
+  // give reloading registers pecedence. That way we would compute the
+  // intersection for the entire future.
+  LifetimePosition new_end = range->End();
+  for (int cur_reg = 0; cur_reg < num_registers(); ++cur_reg) {
+    if ((kSimpleFPAliasing || !check_fp_aliasing()) && cur_reg != reg) {
+      continue;
+    }
+    for (const LiveRange* cur_inactive : inactive_live_ranges(cur_reg)) {
+      if (!kSimpleFPAliasing && check_fp_aliasing() &&
+          !data()->config()->AreAliases(cur_inactive->representation(), cur_reg,
+                                        range->representation(), reg)) {
+        continue;
+      }
+      if (new_end <= cur_inactive->NextStart()) {
+        // Inactive ranges are sorted by their next start, so the remaining
+        // ranges cannot contribute to new_end.
+        break;
+      }
+      auto next_intersection = cur_inactive->FirstIntersection(range);
+      if (!next_intersection.IsValid()) continue;
+      new_end = std::min(new_end, next_intersection);
+    }
+  }
+  if (new_end != range->End()) {
+    TRACE("Found new end for %d:%d at %d\n", range->TopLevel()->vreg(),
+          range->relative_id(), new_end.value());
+    LiveRange* tail = SplitRangeAt(range, new_end);
+    AddToUnhandled(tail);
+  }
+  SetLiveRangeAssignedRegister(range, reg);
+  return range;
+}
+
+void LinearScanAllocator::ReloadLiveRanges(
+    RangeWithRegisterSet const& to_be_live, LifetimePosition position) {
+  // Assumption: All ranges in {to_be_live} are currently spilled and there are
+  // no conflicting registers in the active ranges.
+  // The former is ensured by SpillNotLiveRanges, the latter is by construction
+  // of the to_be_live set.
+  for (RangeWithRegister range_with_register : to_be_live) {
+    TopLevelLiveRange* range = range_with_register.range;
+    int reg = range_with_register.expected_register;
+    LiveRange* to_resurrect = range->GetChildCovers(position);
+    if (to_resurrect == nullptr) {
+      // While the range was life until the end of the predecessor block, it is
+      // not live in this block. Either there is a lifetime gap or the range
+      // died.
+      TRACE("No candidate for %d at %d\n", range->vreg(), position.value());
+    } else {
+      // We might be resurrecting a range that we spilled until its next use
+      // before. In such cases, we have to unsplit it before processing as
+      // otherwise we might get register changes from one range to the other
+      // in the middle of blocks.
+      // If there is a gap between this range and the next, we can just keep
+      // it as a register change won't hurt.
+      MaybeUndoPreviousSplit(to_resurrect);
+      if (to_resurrect->Start() == position) {
+        // This range already starts at this block. It might have been spilled,
+        // so we have to unspill it. Otherwise, it is already in the unhandled
+        // queue waiting for processing.
+        DCHECK(!to_resurrect->HasRegisterAssigned());
+        TRACE("Reload %d:%d starting at %d itself\n", range->vreg(),
+              to_resurrect->relative_id(), position.value());
+        if (to_resurrect->spilled()) {
+          to_resurrect->Unspill();
+          to_resurrect->set_controlflow_hint(reg);
+          AddToUnhandled(to_resurrect);
+        } else {
+          // Assign the preassigned register if we know. Otherwise, nothing to
+          // do as already in unhandeled.
+          if (reg != kUnassignedRegister) {
+            auto erased_cnt = unhandled_live_ranges().erase(to_resurrect);
+            DCHECK_EQ(erased_cnt, 1);
+            USE(erased_cnt);
+            // We know that there is no conflict with active ranges, so just
+            // assign the register to the range.
+            to_resurrect = AssignRegisterOnReload(to_resurrect, reg);
+            AddToActive(to_resurrect);
+          }
+        }
+      } else {
+        // This range was spilled before. We have to split it and schedule the
+        // second part for allocation (or assign the register if we know).
+        DCHECK(to_resurrect->spilled());
+        LiveRange* split = SplitRangeAt(to_resurrect, position);
+        TRACE("Reload %d:%d starting at %d as %d\n", range->vreg(),
+              to_resurrect->relative_id(), split->Start().value(),
+              split->relative_id());
+        DCHECK_NE(split, to_resurrect);
+        if (reg != kUnassignedRegister) {
+          // We know that there is no conflict with active ranges, so just
+          // assign the register to the range.
+          split = AssignRegisterOnReload(split, reg);
+          AddToActive(split);
+        } else {
+          // Let normal register assignment find a suitable register.
+          split->set_controlflow_hint(reg);
+          AddToUnhandled(split);
+        }
+      }
+    }
+  }
+}
+
+RpoNumber LinearScanAllocator::ChooseOneOfTwoPredecessorStates(
+    InstructionBlock* current_block, LifetimePosition boundary) {
+  using SmallRangeVector =
+      base::SmallVector<TopLevelLiveRange*,
+                        RegisterConfiguration::kMaxRegisters>;
+  // Pick the state that would generate the least spill/reloads.
+  // Compute vectors of ranges with imminent use for both sides.
+  // As GetChildCovers is cached, it is cheaper to repeatedly
+  // call is rather than compute a shared set first.
+  auto& left = data()->GetSpillState(current_block->predecessors()[0]);
+  auto& right = data()->GetSpillState(current_block->predecessors()[1]);
+  SmallRangeVector left_used;
+  for (const auto item : left) {
+    LiveRange* at_next_block = item->TopLevel()->GetChildCovers(boundary);
+    if (at_next_block != nullptr &&
+        at_next_block->NextUsePositionRegisterIsBeneficial(boundary) !=
+            nullptr) {
+      left_used.emplace_back(item->TopLevel());
+    }
+  }
+  SmallRangeVector right_used;
+  for (const auto item : right) {
+    LiveRange* at_next_block = item->TopLevel()->GetChildCovers(boundary);
+    if (at_next_block != nullptr &&
+        at_next_block->NextUsePositionRegisterIsBeneficial(boundary) !=
+            nullptr) {
+      right_used.emplace_back(item->TopLevel());
+    }
+  }
+  if (left_used.empty() && right_used.empty()) {
+    // There are no beneficial register uses. Look at any use at
+    // all. We do not account for all uses, like flowing into a phi.
+    // So we just look at ranges still being live.
+    TRACE("Looking at only uses\n");
+    for (const auto item : left) {
+      LiveRange* at_next_block = item->TopLevel()->GetChildCovers(boundary);
+      if (at_next_block != nullptr &&
+          at_next_block->NextUsePosition(boundary) != nullptr) {
+        left_used.emplace_back(item->TopLevel());
+      }
+    }
+    for (const auto item : right) {
+      LiveRange* at_next_block = item->TopLevel()->GetChildCovers(boundary);
+      if (at_next_block != nullptr &&
+          at_next_block->NextUsePosition(boundary) != nullptr) {
+        right_used.emplace_back(item->TopLevel());
+      }
+    }
+  }
+  // Now left_used and right_used contains those ranges that matter.
+  // Count which side matches this most.
+  TRACE("Vote went %zu vs %zu\n", left_used.size(), right_used.size());
+  return left_used.size() > right_used.size()
+             ? current_block->predecessors()[0]
+             : current_block->predecessors()[1];
+}
+
+bool LinearScanAllocator::CheckConflict(MachineRepresentation rep, int reg,
+                                        RangeWithRegisterSet* to_be_live) {
+  for (RangeWithRegister range_with_reg : *to_be_live) {
+    if (data()->config()->AreAliases(range_with_reg.range->representation(),
+                                     range_with_reg.expected_register, rep,
+                                     reg)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void LinearScanAllocator::ComputeStateFromManyPredecessors(
+    InstructionBlock* current_block, RangeWithRegisterSet* to_be_live) {
+  struct Vote {
+    size_t count;
+    int used_registers[RegisterConfiguration::kMaxRegisters];
+  };
+  struct TopLevelLiveRangeComparator {
+    bool operator()(const TopLevelLiveRange* lhs,
+                    const TopLevelLiveRange* rhs) const {
+      return lhs->vreg() < rhs->vreg();
+    }
+  };
+  ZoneMap<TopLevelLiveRange*, Vote, TopLevelLiveRangeComparator> counts(
+      data()->allocation_zone());
+  int deferred_blocks = 0;
+  for (RpoNumber pred : current_block->predecessors()) {
+    if (!ConsiderBlockForControlFlow(current_block, pred)) {
+      // Back edges of a loop count as deferred here too.
+      deferred_blocks++;
+      continue;
+    }
+    const auto& pred_state = data()->GetSpillState(pred);
+    for (LiveRange* range : pred_state) {
+      // We might have spilled the register backwards, so the range we
+      // stored might have lost its register. Ignore those.
+      if (!range->HasRegisterAssigned()) continue;
+      TopLevelLiveRange* toplevel = range->TopLevel();
+      auto previous = counts.find(toplevel);
+      if (previous == counts.end()) {
+        auto result = counts.emplace(std::make_pair(toplevel, Vote{1, {0}}));
+        CHECK(result.second);
+        result.first->second.used_registers[range->assigned_register()]++;
+      } else {
+        previous->second.count++;
+        previous->second.used_registers[range->assigned_register()]++;
+      }
+    }
+  }
+
+  // Choose the live ranges from the majority.
+  const size_t majority =
+      (current_block->PredecessorCount() + 2 - deferred_blocks) / 2;
+  bool taken_registers[RegisterConfiguration::kMaxRegisters] = {false};
+  auto assign_to_live = [this, counts, majority](
+                            std::function<bool(TopLevelLiveRange*)> filter,
+                            RangeWithRegisterSet* to_be_live,
+                            bool* taken_registers) {
+    bool check_aliasing = !kSimpleFPAliasing && check_fp_aliasing();
+    for (const auto& val : counts) {
+      if (!filter(val.first)) continue;
+      if (val.second.count >= majority) {
+        int register_max = 0;
+        int reg = kUnassignedRegister;
+        bool conflict = false;
+        int num_regs = num_registers();
+        int num_codes = num_allocatable_registers();
+        const int* codes = allocatable_register_codes();
+        MachineRepresentation rep = val.first->representation();
+        if (check_aliasing && (rep == MachineRepresentation::kFloat32 ||
+                               rep == MachineRepresentation::kSimd128))
+          GetFPRegisterSet(rep, &num_regs, &num_codes, &codes);
+        for (int idx = 0; idx < num_regs; idx++) {
+          int uses = val.second.used_registers[idx];
+          if (uses == 0) continue;
+          if (uses > register_max || (conflict && uses == register_max)) {
+            reg = idx;
+            register_max = uses;
+            conflict = check_aliasing ? CheckConflict(rep, reg, to_be_live)
+                                      : taken_registers[reg];
+          }
+        }
+        if (conflict) {
+          reg = kUnassignedRegister;
+        } else if (!check_aliasing) {
+          taken_registers[reg] = true;
+        }
+        to_be_live->emplace(val.first, reg);
+        TRACE("Reset %d as live due vote %zu in %s\n",
+              val.first->TopLevel()->vreg(), val.second.count,
+              RegisterName(reg));
+      }
+    }
+  };
+  // First round, process fixed registers, as these have precedence.
+  // There is only one fixed range per register, so we cannot have
+  // conflicts.
+  assign_to_live([](TopLevelLiveRange* r) { return r->IsFixed(); }, to_be_live,
+                 taken_registers);
+  // Second round, process the rest.
+  assign_to_live([](TopLevelLiveRange* r) { return !r->IsFixed(); }, to_be_live,
+                 taken_registers);
+}
+
+bool LinearScanAllocator::ConsiderBlockForControlFlow(
+    InstructionBlock* current_block, RpoNumber predecessor) {
+  // We ignore predecessors on back edges when looking for control flow effects,
+  // as those lie in the future of allocation and we have no data yet. Also,
+  // deferred bocks are ignored on deferred to non-deferred boundaries, as we do
+  // not want them to influence allocation of non deferred code.
+  return (predecessor < current_block->rpo_number()) &&
+         (current_block->IsDeferred() ||
+          !code()->InstructionBlockAt(predecessor)->IsDeferred());
+}
+
+void LinearScanAllocator::UpdateDeferredFixedRanges(SpillMode spill_mode,
+                                                    InstructionBlock* block) {
+  if (spill_mode == SpillMode::kSpillDeferred) {
+    LifetimePosition max = LifetimePosition::InstructionFromInstructionIndex(
+        LastDeferredInstructionIndex(block));
+    // Adds range back to inactive, resolving resulting conflicts.
+    auto add_to_inactive = [this, max](LiveRange* range) {
+      AddToInactive(range);
+      // Splits other if it conflicts with range. Other is placed in unhandled
+      // for later reallocation.
+      auto split_conflicting = [this, max](LiveRange* range, LiveRange* other,
+                                           std::function<void(LiveRange*)>
+                                               update_caches) {
+        if (other->TopLevel()->IsFixed()) return;
+        int reg = range->assigned_register();
+        if (kSimpleFPAliasing || !check_fp_aliasing()) {
+          if (other->assigned_register() != reg) {
+            return;
+          }
+        } else {
+          if (!data()->config()->AreAliases(range->representation(), reg,
+                                            other->representation(),
+                                            other->assigned_register())) {
+            return;
+          }
+        }
+        // The inactive range might conflict, so check whether we need to
+        // split and spill. We can look for the first intersection, as there
+        // cannot be any intersections in the past, as those would have been a
+        // conflict then.
+        LifetimePosition next_start = range->FirstIntersection(other);
+        if (!next_start.IsValid() || (next_start > max)) {
+          // There is no conflict or the conflict is outside of the current
+          // stretch of deferred code. In either case we can ignore the
+          // inactive range.
+          return;
+        }
+        // They overlap. So we need to split active and reschedule it
+        // for allocation.
+        TRACE("Resolving conflict of %d with deferred fixed for register %s\n",
+              other->TopLevel()->vreg(),
+              RegisterName(other->assigned_register()));
+        LiveRange* split_off =
+            other->SplitAt(next_start, data()->allocation_zone());
+        // Try to get the same register after the deferred block.
+        split_off->set_controlflow_hint(other->assigned_register());
+        DCHECK_NE(split_off, other);
+        AddToUnhandled(split_off);
+        update_caches(other);
+      };
+      // Now check for conflicts in active and inactive ranges. We might have
+      // conflicts in inactive, as we do not do this check on every block
+      // boundary but only on deferred/non-deferred changes but inactive
+      // live ranges might become live on any block boundary.
+      for (auto active : active_live_ranges()) {
+        split_conflicting(range, active, [this](LiveRange* updated) {
+          next_active_ranges_change_ =
+              std::min(updated->End(), next_active_ranges_change_);
+        });
+      }
+      for (int reg = 0; reg < num_registers(); ++reg) {
+        if ((kSimpleFPAliasing || !check_fp_aliasing()) &&
+            reg != range->assigned_register()) {
+          continue;
+        }
+        for (auto inactive : inactive_live_ranges(reg)) {
+          split_conflicting(range, inactive, [this](LiveRange* updated) {
+            next_inactive_ranges_change_ =
+                std::min(updated->End(), next_inactive_ranges_change_);
+          });
+        }
+      }
+    };
+    if (mode() == RegisterKind::kGeneral) {
+      for (TopLevelLiveRange* current : data()->fixed_live_ranges()) {
+        if (current != nullptr) {
+          if (current->IsDeferredFixed()) {
+            add_to_inactive(current);
+          }
+        }
+      }
+    } else {
+      for (TopLevelLiveRange* current : data()->fixed_double_live_ranges()) {
+        if (current != nullptr) {
+          if (current->IsDeferredFixed()) {
+            add_to_inactive(current);
+          }
+        }
+      }
+      if (!kSimpleFPAliasing && check_fp_aliasing()) {
+        for (TopLevelLiveRange* current : data()->fixed_float_live_ranges()) {
+          if (current != nullptr) {
+            if (current->IsDeferredFixed()) {
+              add_to_inactive(current);
+            }
+          }
+        }
+        for (TopLevelLiveRange* current : data()->fixed_simd128_live_ranges()) {
+          if (current != nullptr) {
+            if (current->IsDeferredFixed()) {
+              add_to_inactive(current);
+            }
+          }
+        }
+      }
+    }
+  } else {
+    // Remove all ranges.
+    for (int reg = 0; reg < num_registers(); ++reg) {
+      for (auto it = inactive_live_ranges(reg).begin();
+           it != inactive_live_ranges(reg).end();) {
+        if ((*it)->TopLevel()->IsDeferredFixed()) {
+          it = inactive_live_ranges(reg).erase(it);
+        } else {
+          ++it;
+        }
+      }
+    }
+  }
+}
+
+bool LinearScanAllocator::BlockIsDeferredOrImmediatePredecessorIsNotDeferred(
+    const InstructionBlock* block) {
+  if (block->IsDeferred()) return true;
+  if (block->PredecessorCount() == 0) return true;
+  bool pred_is_deferred = false;
+  for (auto pred : block->predecessors()) {
+    if (pred.IsNext(block->rpo_number())) {
+      pred_is_deferred = code()->InstructionBlockAt(pred)->IsDeferred();
+      break;
+    }
+  }
+  return !pred_is_deferred;
+}
+
+bool LinearScanAllocator::HasNonDeferredPredecessor(InstructionBlock* block) {
+  for (auto pred : block->predecessors()) {
+    InstructionBlock* pred_block = code()->InstructionBlockAt(pred);
+    if (!pred_block->IsDeferred()) return true;
+  }
+  return false;
+}
+
+void LinearScanAllocator::AllocateRegisters() {
+  DCHECK(unhandled_live_ranges().empty());
+  DCHECK(active_live_ranges().empty());
+  for (int reg = 0; reg < num_registers(); ++reg) {
+    DCHECK(inactive_live_ranges(reg).empty());
+  }
+
+  SplitAndSpillRangesDefinedByMemoryOperand();
+  data()->ResetSpillState();
+
+  if (data()->is_trace_alloc()) {
+    PrintRangeOverview(std::cout);
+  }
+
+  const size_t live_ranges_size = data()->live_ranges().size();
+  for (TopLevelLiveRange* range : data()->live_ranges()) {
+    CHECK_EQ(live_ranges_size,
+             data()->live_ranges().size());  // TODO(neis): crbug.com/831822
+    if (!CanProcessRange(range)) continue;
+    for (LiveRange* to_add = range; to_add != nullptr;
+         to_add = to_add->next()) {
+      if (!to_add->spilled()) {
+        AddToUnhandled(to_add);
+      }
+    }
+  }
+
+  if (mode() == RegisterKind::kGeneral) {
+    for (TopLevelLiveRange* current : data()->fixed_live_ranges()) {
+      if (current != nullptr) {
+        if (current->IsDeferredFixed()) continue;
+        AddToInactive(current);
+      }
+    }
+  } else {
+    for (TopLevelLiveRange* current : data()->fixed_double_live_ranges()) {
+      if (current != nullptr) {
+        if (current->IsDeferredFixed()) continue;
+        AddToInactive(current);
+      }
+    }
+    if (!kSimpleFPAliasing && check_fp_aliasing()) {
+      for (TopLevelLiveRange* current : data()->fixed_float_live_ranges()) {
+        if (current != nullptr) {
+          if (current->IsDeferredFixed()) continue;
+          AddToInactive(current);
+        }
+      }
+      for (TopLevelLiveRange* current : data()->fixed_simd128_live_ranges()) {
+        if (current != nullptr) {
+          if (current->IsDeferredFixed()) continue;
+          AddToInactive(current);
+        }
+      }
+    }
+  }
+
+  RpoNumber last_block = RpoNumber::FromInt(0);
+  RpoNumber max_blocks =
+      RpoNumber::FromInt(code()->InstructionBlockCount() - 1);
+  LifetimePosition next_block_boundary =
+      LifetimePosition::InstructionFromInstructionIndex(
+          data()
+              ->code()
+              ->InstructionBlockAt(last_block)
+              ->last_instruction_index())
+          .NextFullStart();
+  SpillMode spill_mode = SpillMode::kSpillAtDefinition;
+
+  // Process all ranges. We also need to ensure that we have seen all block
+  // boundaries. Linear scan might have assigned and spilled ranges before
+  // reaching the last block and hence we would ignore control flow effects for
+  // those. Not only does this produce a potentially bad assignment, it also
+  // breaks with the invariant that we undo spills that happen in deferred code
+  // when crossing a deferred/non-deferred boundary.
+  while (!unhandled_live_ranges().empty() || last_block < max_blocks) {
+    data()->tick_counter()->TickAndMaybeEnterSafepoint();
+    LiveRange* current = unhandled_live_ranges().empty()
+                             ? nullptr
+                             : *unhandled_live_ranges().begin();
+    LifetimePosition position =
+        current ? current->Start() : next_block_boundary;
+#ifdef DEBUG
+    allocation_finger_ = position;
+#endif
+    // Check whether we just moved across a block boundary. This will trigger
+    // for the first range that is past the current boundary.
+    if (position >= next_block_boundary) {
+      TRACE("Processing boundary at %d leaving %d\n",
+            next_block_boundary.value(), last_block.ToInt());
+
+      // Forward state to before block boundary
+      LifetimePosition end_of_block = next_block_boundary.PrevStart().End();
+      ForwardStateTo(end_of_block);
+
+      // Remember this state.
+      InstructionBlock* current_block = data()->code()->GetInstructionBlock(
+          next_block_boundary.ToInstructionIndex());
+
+      // Store current spill state (as the state at end of block). For
+      // simplicity, we store the active ranges, e.g., the live ranges that
+      // are not spilled.
+      data()->RememberSpillState(last_block, active_live_ranges());
+
+      // Only reset the state if this was not a direct fallthrough. Otherwise
+      // control flow resolution will get confused (it does not expect changes
+      // across fallthrough edges.).
+      bool fallthrough =
+          (current_block->PredecessorCount() == 1) &&
+          current_block->predecessors()[0].IsNext(current_block->rpo_number());
+
+      // When crossing a deferred/non-deferred boundary, we have to load or
+      // remove the deferred fixed ranges from inactive.
+      if ((spill_mode == SpillMode::kSpillDeferred) !=
+          current_block->IsDeferred()) {
+        // Update spill mode.
+        spill_mode = current_block->IsDeferred()
+                         ? SpillMode::kSpillDeferred
+                         : SpillMode::kSpillAtDefinition;
+
+        ForwardStateTo(next_block_boundary);
+
+#ifdef DEBUG
+        // Allow allocation at current position.
+        allocation_finger_ = next_block_boundary;
+#endif
+        UpdateDeferredFixedRanges(spill_mode, current_block);
+      }
+
+      // Allocation relies on the fact that each non-deferred block has at
+      // least one non-deferred predecessor. Check this invariant here.
+      DCHECK_IMPLIES(!current_block->IsDeferred(),
+                     HasNonDeferredPredecessor(current_block));
+
+      if (!fallthrough) {
+#ifdef DEBUG
+        // Allow allocation at current position.
+        allocation_finger_ = next_block_boundary;
+#endif
+
+        // We are currently at next_block_boundary - 1. Move the state to the
+        // actual block boundary position. In particular, we have to
+        // reactivate inactive ranges so that they get rescheduled for
+        // allocation if they were not live at the predecessors.
+        ForwardStateTo(next_block_boundary);
+
+        RangeWithRegisterSet to_be_live(data()->allocation_zone());
+
+        // If we end up deciding to use the state of the immediate
+        // predecessor, it is better not to perform a change. It would lead to
+        // the same outcome anyway.
+        // This may never happen on boundaries between deferred and
+        // non-deferred code, as we rely on explicit respill to ensure we
+        // spill at definition.
+        bool no_change_required = false;
+
+        auto pick_state_from = [this, current_block](
+                                   RpoNumber pred,
+                                   RangeWithRegisterSet* to_be_live) -> bool {
+          TRACE("Using information from B%d\n", pred.ToInt());
+          // If this is a fall-through that is not across a deferred
+          // boundary, there is nothing to do.
+          bool is_noop = pred.IsNext(current_block->rpo_number());
+          if (!is_noop) {
+            auto& spill_state = data()->GetSpillState(pred);
+            TRACE("Not a fallthrough. Adding %zu elements...\n",
+                  spill_state.size());
+            LifetimePosition pred_end =
+                LifetimePosition::GapFromInstructionIndex(
+                    this->code()->InstructionBlockAt(pred)->code_end());
+            for (const auto range : spill_state) {
+              // Filter out ranges that were split or had their register
+              // stolen by backwards working spill heuristics. These have
+              // been spilled after the fact, so ignore them.
+              if (range->End() < pred_end || !range->HasRegisterAssigned())
+                continue;
+              to_be_live->emplace(range);
+            }
+          }
+          return is_noop;
+        };
+
+        // Multiple cases here:
+        // 1) We have a single predecessor => this is a control flow split, so
+        //     just restore the predecessor state.
+        // 2) We have two predecessors => this is a conditional, so break ties
+        //     based on what to do based on forward uses, trying to benefit
+        //     the same branch if in doubt (make one path fast).
+        // 3) We have many predecessors => this is a switch. Compute union
+        //     based on majority, break ties by looking forward.
+        if (current_block->PredecessorCount() == 1) {
+          TRACE("Single predecessor for B%d\n",
+                current_block->rpo_number().ToInt());
+          no_change_required =
+              pick_state_from(current_block->predecessors()[0], &to_be_live);
+        } else if (current_block->PredecessorCount() == 2) {
+          TRACE("Two predecessors for B%d\n",
+                current_block->rpo_number().ToInt());
+          // If one of the branches does not contribute any information,
+          // e.g. because it is deferred or a back edge, we can short cut
+          // here right away.
+          RpoNumber chosen_predecessor = RpoNumber::Invalid();
+          if (!ConsiderBlockForControlFlow(current_block,
+                                           current_block->predecessors()[0])) {
+            chosen_predecessor = current_block->predecessors()[1];
+          } else if (!ConsiderBlockForControlFlow(
+                         current_block, current_block->predecessors()[1])) {
+            chosen_predecessor = current_block->predecessors()[0];
+          } else {
+            chosen_predecessor = ChooseOneOfTwoPredecessorStates(
+                current_block, next_block_boundary);
+          }
+          no_change_required = pick_state_from(chosen_predecessor, &to_be_live);
+
+        } else {
+          // Merge at the end of, e.g., a switch.
+          ComputeStateFromManyPredecessors(current_block, &to_be_live);
+        }
+
+        if (!no_change_required) {
+          SpillNotLiveRanges(&to_be_live, next_block_boundary, spill_mode);
+          ReloadLiveRanges(to_be_live, next_block_boundary);
+        }
+      }
+      // Update block information
+      last_block = current_block->rpo_number();
+      next_block_boundary = LifetimePosition::InstructionFromInstructionIndex(
+                                current_block->last_instruction_index())
+                                .NextFullStart();
+
+      // We might have created new unhandled live ranges, so cycle around the
+      // loop to make sure we pick the top most range in unhandled for
+      // processing.
+      continue;
+    }
+
+    DCHECK_NOT_NULL(current);
+
+    TRACE("Processing interval %d:%d start=%d\n", current->TopLevel()->vreg(),
+          current->relative_id(), position.value());
+
+    // Now we can erase current, as we are sure to process it.
+    unhandled_live_ranges().erase(unhandled_live_ranges().begin());
+
+    if (current->IsTopLevel() && TryReuseSpillForPhi(current->TopLevel()))
+      continue;
+
+    ForwardStateTo(position);
+
+    DCHECK(!current->HasRegisterAssigned() && !current->spilled());
+
+    ProcessCurrentRange(current, spill_mode);
+  }
+
+  if (data()->is_trace_alloc()) {
+    PrintRangeOverview(std::cout);
+  }
+}
+
+void LinearScanAllocator::SetLiveRangeAssignedRegister(LiveRange* range,
+                                                       int reg) {
+  data()->MarkAllocated(range->representation(), reg);
+  range->set_assigned_register(reg);
+  range->SetUseHints(reg);
+  range->UpdateBundleRegister(reg);
+  if (range->IsTopLevel() && range->TopLevel()->is_phi()) {
+    data()->GetPhiMapValueFor(range->TopLevel())->set_assigned_register(reg);
+  }
+}
+
+void LinearScanAllocator::AddToActive(LiveRange* range) {
+  TRACE("Add live range %d:%d in %s to active\n", range->TopLevel()->vreg(),
+        range->relative_id(), RegisterName(range->assigned_register()));
+  active_live_ranges().push_back(range);
+  next_active_ranges_change_ =
+      std::min(next_active_ranges_change_, range->NextEndAfter(range->Start()));
+}
+
+void LinearScanAllocator::AddToInactive(LiveRange* range) {
+  TRACE("Add live range %d:%d to inactive\n", range->TopLevel()->vreg(),
+        range->relative_id());
+  next_inactive_ranges_change_ = std::min(
+      next_inactive_ranges_change_, range->NextStartAfter(range->Start()));
+  DCHECK(range->HasRegisterAssigned());
+  inactive_live_ranges(range->assigned_register()).insert(range);
+}
+
+void LinearScanAllocator::AddToUnhandled(LiveRange* range) {
+  if (range == nullptr || range->IsEmpty()) return;
+  DCHECK(!range->HasRegisterAssigned() && !range->spilled());
+  DCHECK(allocation_finger_ <= range->Start());
+
+  TRACE("Add live range %d:%d to unhandled\n", range->TopLevel()->vreg(),
+        range->relative_id());
+  unhandled_live_ranges().insert(range);
+}
+
+ZoneVector<LiveRange*>::iterator LinearScanAllocator::ActiveToHandled(
+    const ZoneVector<LiveRange*>::iterator it) {
+  TRACE("Moving live range %d:%d from active to handled\n",
+        (*it)->TopLevel()->vreg(), (*it)->relative_id());
+  return active_live_ranges().erase(it);
+}
+
+ZoneVector<LiveRange*>::iterator LinearScanAllocator::ActiveToInactive(
+    const ZoneVector<LiveRange*>::iterator it, LifetimePosition position) {
+  LiveRange* range = *it;
+  TRACE("Moving live range %d:%d from active to inactive\n",
+        (range)->TopLevel()->vreg(), range->relative_id());
+  LifetimePosition next_active = range->NextStartAfter(position);
+  next_inactive_ranges_change_ =
+      std::min(next_inactive_ranges_change_, next_active);
+  DCHECK(range->HasRegisterAssigned());
+  inactive_live_ranges(range->assigned_register()).insert(range);
+  return active_live_ranges().erase(it);
+}
+
+LinearScanAllocator::InactiveLiveRangeQueue::iterator
+LinearScanAllocator::InactiveToHandled(InactiveLiveRangeQueue::iterator it) {
+  LiveRange* range = *it;
+  TRACE("Moving live range %d:%d from inactive to handled\n",
+        range->TopLevel()->vreg(), range->relative_id());
+  int reg = range->assigned_register();
+  return inactive_live_ranges(reg).erase(it);
+}
+
+LinearScanAllocator::InactiveLiveRangeQueue::iterator
+LinearScanAllocator::InactiveToActive(InactiveLiveRangeQueue::iterator it,
+                                      LifetimePosition position) {
+  LiveRange* range = *it;
+  active_live_ranges().push_back(range);
+  TRACE("Moving live range %d:%d from inactive to active\n",
+        range->TopLevel()->vreg(), range->relative_id());
+  next_active_ranges_change_ =
+      std::min(next_active_ranges_change_, range->NextEndAfter(position));
+  int reg = range->assigned_register();
+  return inactive_live_ranges(reg).erase(it);
+}
+
+void LinearScanAllocator::ForwardStateTo(LifetimePosition position) {
+  if (position >= next_active_ranges_change_) {
+    next_active_ranges_change_ = LifetimePosition::MaxPosition();
+    for (auto it = active_live_ranges().begin();
+         it != active_live_ranges().end();) {
+      LiveRange* cur_active = *it;
+      if (cur_active->End() <= position) {
+        it = ActiveToHandled(it);
+      } else if (!cur_active->Covers(position)) {
+        it = ActiveToInactive(it, position);
+      } else {
+        next_active_ranges_change_ = std::min(
+            next_active_ranges_change_, cur_active->NextEndAfter(position));
+        ++it;
+      }
+    }
+  }
+
+  if (position >= next_inactive_ranges_change_) {
+    next_inactive_ranges_change_ = LifetimePosition::MaxPosition();
+    for (int reg = 0; reg < num_registers(); ++reg) {
+      ZoneVector<LiveRange*> reorder(data()->allocation_zone());
+      for (auto it = inactive_live_ranges(reg).begin();
+           it != inactive_live_ranges(reg).end();) {
+        LiveRange* cur_inactive = *it;
+        if (cur_inactive->End() <= position) {
+          it = InactiveToHandled(it);
+        } else if (cur_inactive->Covers(position)) {
+          it = InactiveToActive(it, position);
+        } else {
+          next_inactive_ranges_change_ =
+              std::min(next_inactive_ranges_change_,
+                       cur_inactive->NextStartAfter(position));
+          it = inactive_live_ranges(reg).erase(it);
+          reorder.push_back(cur_inactive);
+        }
+      }
+      for (LiveRange* range : reorder) {
+        inactive_live_ranges(reg).insert(range);
+      }
+    }
+  }
+}
+
+int LinearScanAllocator::LastDeferredInstructionIndex(InstructionBlock* start) {
+  DCHECK(start->IsDeferred());
+  RpoNumber last_block =
+      RpoNumber::FromInt(code()->InstructionBlockCount() - 1);
+  while ((start->rpo_number() < last_block)) {
+    InstructionBlock* next =
+        code()->InstructionBlockAt(start->rpo_number().Next());
+    if (!next->IsDeferred()) break;
+    start = next;
+  }
+  return start->last_instruction_index();
+}
+
+void LinearScanAllocator::GetFPRegisterSet(MachineRepresentation rep,
+                                           int* num_regs, int* num_codes,
+                                           const int** codes) const {
+  DCHECK(!kSimpleFPAliasing);
+  if (rep == MachineRepresentation::kFloat32) {
+    *num_regs = data()->config()->num_float_registers();
+    *num_codes = data()->config()->num_allocatable_float_registers();
+    *codes = data()->config()->allocatable_float_codes();
+  } else if (rep == MachineRepresentation::kSimd128) {
+    *num_regs = data()->config()->num_simd128_registers();
+    *num_codes = data()->config()->num_allocatable_simd128_registers();
+    *codes = data()->config()->allocatable_simd128_codes();
+  } else {
+    UNREACHABLE();
+  }
+}
+
+void LinearScanAllocator::FindFreeRegistersForRange(
+    LiveRange* range, Vector<LifetimePosition> positions) {
+  int num_regs = num_registers();
+  int num_codes = num_allocatable_registers();
+  const int* codes = allocatable_register_codes();
+  MachineRepresentation rep = range->representation();
+  if (!kSimpleFPAliasing && (rep == MachineRepresentation::kFloat32 ||
+                             rep == MachineRepresentation::kSimd128))
+    GetFPRegisterSet(rep, &num_regs, &num_codes, &codes);
+  DCHECK_GE(positions.length(), num_regs);
+
+  for (int i = 0; i < num_regs; ++i) {
+    positions[i] = LifetimePosition::MaxPosition();
+  }
+
+  for (LiveRange* cur_active : active_live_ranges()) {
+    int cur_reg = cur_active->assigned_register();
+    if (kSimpleFPAliasing || !check_fp_aliasing()) {
+      positions[cur_reg] = LifetimePosition::GapFromInstructionIndex(0);
+      TRACE("Register %s is free until pos %d (1) due to %d\n",
+            RegisterName(cur_reg),
+            LifetimePosition::GapFromInstructionIndex(0).value(),
+            cur_active->TopLevel()->vreg());
+    } else {
+      int alias_base_index = -1;
+      int aliases = data()->config()->GetAliases(
+          cur_active->representation(), cur_reg, rep, &alias_base_index);
+      DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
+      while (aliases--) {
+        int aliased_reg = alias_base_index + aliases;
+        positions[aliased_reg] = LifetimePosition::GapFromInstructionIndex(0);
+      }
+    }
+  }
+
+  for (int cur_reg = 0; cur_reg < num_regs; ++cur_reg) {
+    for (LiveRange* cur_inactive : inactive_live_ranges(cur_reg)) {
+      DCHECK_GT(cur_inactive->End(), range->Start());
+      CHECK_EQ(cur_inactive->assigned_register(), cur_reg);
+      // No need to carry out intersections, when this register won't be
+      // interesting to this range anyway.
+      // TODO(mtrofin): extend to aliased ranges, too.
+      if ((kSimpleFPAliasing || !check_fp_aliasing()) &&
+          positions[cur_reg] <= cur_inactive->NextStart()) {
+        break;
+      }
+      LifetimePosition next_intersection =
+          cur_inactive->FirstIntersection(range);
+      if (!next_intersection.IsValid()) continue;
+      if (kSimpleFPAliasing || !check_fp_aliasing()) {
+        positions[cur_reg] = std::min(positions[cur_reg], next_intersection);
+        TRACE("Register %s is free until pos %d (2)\n", RegisterName(cur_reg),
+              positions[cur_reg].value());
+      } else {
+        int alias_base_index = -1;
+        int aliases = data()->config()->GetAliases(
+            cur_inactive->representation(), cur_reg, rep, &alias_base_index);
+        DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
+        while (aliases--) {
+          int aliased_reg = alias_base_index + aliases;
+          positions[aliased_reg] =
+              std::min(positions[aliased_reg], next_intersection);
+        }
+      }
+    }
+  }
+}
+
+// High-level register allocation summary:
+//
+// We attempt to first allocate the preferred (hint) register. If that is not
+// possible, we find a register that's free, and allocate that. If that's not
+// possible, we search for a register to steal from a range that was allocated.
+// The goal is to optimize for throughput by avoiding register-to-memory moves,
+// which are expensive.
+void LinearScanAllocator::ProcessCurrentRange(LiveRange* current,
+                                              SpillMode spill_mode) {
+  EmbeddedVector<LifetimePosition, RegisterConfiguration::kMaxRegisters>
+      free_until_pos;
+  FindFreeRegistersForRange(current, free_until_pos);
+  if (!TryAllocatePreferredReg(current, free_until_pos)) {
+    if (!TryAllocateFreeReg(current, free_until_pos)) {
+      AllocateBlockedReg(current, spill_mode);
+    }
+  }
+  if (current->HasRegisterAssigned()) {
+    AddToActive(current);
+  }
+}
+
+bool LinearScanAllocator::TryAllocatePreferredReg(
+    LiveRange* current, const Vector<LifetimePosition>& free_until_pos) {
+  int hint_register;
+  if (current->RegisterFromControlFlow(&hint_register) ||
+      current->FirstHintPosition(&hint_register) != nullptr ||
+      current->RegisterFromBundle(&hint_register)) {
+    TRACE(
+        "Found reg hint %s (free until [%d) for live range %d:%d (end %d[).\n",
+        RegisterName(hint_register), free_until_pos[hint_register].value(),
+        current->TopLevel()->vreg(), current->relative_id(),
+        current->End().value());
+
+    // The desired register is free until the end of the current live range.
+    if (free_until_pos[hint_register] >= current->End()) {
+      TRACE("Assigning preferred reg %s to live range %d:%d\n",
+            RegisterName(hint_register), current->TopLevel()->vreg(),
+            current->relative_id());
+      SetLiveRangeAssignedRegister(current, hint_register);
+      return true;
+    }
+  }
+  return false;
+}
+
+int LinearScanAllocator::PickRegisterThatIsAvailableLongest(
+    LiveRange* current, int hint_reg,
+    const Vector<LifetimePosition>& free_until_pos) {
+  int num_regs = 0;  // used only for the call to GetFPRegisterSet.
+  int num_codes = num_allocatable_registers();
+  const int* codes = allocatable_register_codes();
+  MachineRepresentation rep = current->representation();
+  if (!kSimpleFPAliasing && (rep == MachineRepresentation::kFloat32 ||
+                             rep == MachineRepresentation::kSimd128)) {
+    GetFPRegisterSet(rep, &num_regs, &num_codes, &codes);
+  }
+
+  DCHECK_GE(free_until_pos.length(), num_codes);
+
+  // Find the register which stays free for the longest time. Check for
+  // the hinted register first, as we might want to use that one. Only
+  // count full instructions for free ranges, as an instruction's internal
+  // positions do not help but might shadow a hinted register. This is
+  // typically the case for function calls, where all registered are
+  // cloberred after the call except for the argument registers, which are
+  // set before the call. Hence, the argument registers always get ignored,
+  // as their available time is shorter.
+  int reg = (hint_reg == kUnassignedRegister) ? codes[0] : hint_reg;
+  int current_free = free_until_pos[reg].ToInstructionIndex();
+  for (int i = 0; i < num_codes; ++i) {
+    int code = codes[i];
+    // Prefer registers that have no fixed uses to avoid blocking later hints.
+    // We use the first register that has no fixed uses to ensure we use
+    // byte addressable registers in ia32 first.
+    int candidate_free = free_until_pos[code].ToInstructionIndex();
+    TRACE("Register %s in free until %d\n", RegisterName(code), candidate_free);
+    if ((candidate_free > current_free) ||
+        (candidate_free == current_free && reg != hint_reg &&
+         (data()->HasFixedUse(current->representation(), reg) &&
+          !data()->HasFixedUse(current->representation(), code)))) {
+      reg = code;
+      current_free = candidate_free;
+    }
+  }
+
+  return reg;
+}
+
+bool LinearScanAllocator::TryAllocateFreeReg(
+    LiveRange* current, const Vector<LifetimePosition>& free_until_pos) {
+  // Compute register hint, if such exists.
+  int hint_reg = kUnassignedRegister;
+  current->RegisterFromControlFlow(&hint_reg) ||
+      current->FirstHintPosition(&hint_reg) != nullptr ||
+      current->RegisterFromBundle(&hint_reg);
+
+  int reg =
+      PickRegisterThatIsAvailableLongest(current, hint_reg, free_until_pos);
+
+  LifetimePosition pos = free_until_pos[reg];
+
+  if (pos <= current->Start()) {
+    // All registers are blocked.
+    return false;
+  }
+
+  if (pos < current->End()) {
+    // Register reg is available at the range start but becomes blocked before
+    // the range end. Split current at position where it becomes blocked.
+    LiveRange* tail = SplitRangeAt(current, pos);
+    AddToUnhandled(tail);
+
+    // Try to allocate preferred register once more.
+    if (TryAllocatePreferredReg(current, free_until_pos)) return true;
+  }
+
+  // Register reg is available at the range start and is free until the range
+  // end.
+  DCHECK(pos >= current->End());
+  TRACE("Assigning free reg %s to live range %d:%d\n", RegisterName(reg),
+        current->TopLevel()->vreg(), current->relative_id());
+  SetLiveRangeAssignedRegister(current, reg);
+
+  return true;
+}
+
+void LinearScanAllocator::AllocateBlockedReg(LiveRange* current,
+                                             SpillMode spill_mode) {
+  UsePosition* register_use = current->NextRegisterPosition(current->Start());
+  if (register_use == nullptr) {
+    // There is no use in the current live range that requires a register.
+    // We can just spill it.
+    LiveRange* begin_spill = nullptr;
+    LifetimePosition spill_pos = FindOptimalSpillingPos(
+        current, current->Start(), spill_mode, &begin_spill);
+    MaybeSpillPreviousRanges(begin_spill, spill_pos, current);
+    Spill(current, spill_mode);
+    return;
+  }
+
+  MachineRepresentation rep = current->representation();
+
+  // use_pos keeps track of positions a register/alias is used at.
+  // block_pos keeps track of positions where a register/alias is blocked
+  // from.
+  EmbeddedVector<LifetimePosition, RegisterConfiguration::kMaxRegisters>
+      use_pos(LifetimePosition::MaxPosition());
+  EmbeddedVector<LifetimePosition, RegisterConfiguration::kMaxRegisters>
+      block_pos(LifetimePosition::MaxPosition());
+
+  for (LiveRange* range : active_live_ranges()) {
+    int cur_reg = range->assigned_register();
+    bool is_fixed_or_cant_spill =
+        range->TopLevel()->IsFixed() || !range->CanBeSpilled(current->Start());
+    if (kSimpleFPAliasing || !check_fp_aliasing()) {
+      if (is_fixed_or_cant_spill) {
+        block_pos[cur_reg] = use_pos[cur_reg] =
+            LifetimePosition::GapFromInstructionIndex(0);
+      } else {
+        DCHECK_NE(LifetimePosition::GapFromInstructionIndex(0),
+                  block_pos[cur_reg]);
+        use_pos[cur_reg] =
+            range->NextLifetimePositionRegisterIsBeneficial(current->Start());
+      }
+    } else {
+      int alias_base_index = -1;
+      int aliases = data()->config()->GetAliases(
+          range->representation(), cur_reg, rep, &alias_base_index);
+      DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
+      while (aliases--) {
+        int aliased_reg = alias_base_index + aliases;
+        if (is_fixed_or_cant_spill) {
+          block_pos[aliased_reg] = use_pos[aliased_reg] =
+              LifetimePosition::GapFromInstructionIndex(0);
+        } else {
+          use_pos[aliased_reg] =
+              std::min(block_pos[aliased_reg],
+                       range->NextLifetimePositionRegisterIsBeneficial(
+                           current->Start()));
+        }
+      }
+    }
+  }
+
+  for (int cur_reg = 0; cur_reg < num_registers(); ++cur_reg) {
+    for (LiveRange* range : inactive_live_ranges(cur_reg)) {
+      DCHECK(range->End() > current->Start());
+      DCHECK_EQ(range->assigned_register(), cur_reg);
+      bool is_fixed = range->TopLevel()->IsFixed();
+
+      // Don't perform costly intersections if they are guaranteed to not update
+      // block_pos or use_pos.
+      // TODO(mtrofin): extend to aliased ranges, too.
+      if ((kSimpleFPAliasing || !check_fp_aliasing())) {
+        DCHECK_LE(use_pos[cur_reg], block_pos[cur_reg]);
+        if (block_pos[cur_reg] <= range->NextStart()) break;
+        if (!is_fixed && use_pos[cur_reg] <= range->NextStart()) continue;
+      }
+
+      LifetimePosition next_intersection = range->FirstIntersection(current);
+      if (!next_intersection.IsValid()) continue;
+
+      if (kSimpleFPAliasing || !check_fp_aliasing()) {
+        if (is_fixed) {
+          block_pos[cur_reg] = std::min(block_pos[cur_reg], next_intersection);
+          use_pos[cur_reg] = std::min(block_pos[cur_reg], use_pos[cur_reg]);
+        } else {
+          use_pos[cur_reg] = std::min(use_pos[cur_reg], next_intersection);
+        }
+      } else {
+        int alias_base_index = -1;
+        int aliases = data()->config()->GetAliases(
+            range->representation(), cur_reg, rep, &alias_base_index);
+        DCHECK(aliases > 0 || (aliases == 0 && alias_base_index == -1));
+        while (aliases--) {
+          int aliased_reg = alias_base_index + aliases;
+          if (is_fixed) {
+            block_pos[aliased_reg] =
+                std::min(block_pos[aliased_reg], next_intersection);
+            use_pos[aliased_reg] =
+                std::min(block_pos[aliased_reg], use_pos[aliased_reg]);
+          } else {
+            use_pos[aliased_reg] =
+                std::min(use_pos[aliased_reg], next_intersection);
+          }
+        }
+      }
+    }
+  }
+
+  // Compute register hint if it exists.
+  int hint_reg = kUnassignedRegister;
+  current->RegisterFromControlFlow(&hint_reg) ||
+      register_use->HintRegister(&hint_reg) ||
+      current->RegisterFromBundle(&hint_reg);
+  int reg = PickRegisterThatIsAvailableLongest(current, hint_reg, use_pos);
+
+  if (use_pos[reg] < register_use->pos()) {
+    // If there is a gap position before the next register use, we can
+    // spill until there. The gap position will then fit the fill move.
+    if (LifetimePosition::ExistsGapPositionBetween(current->Start(),
+                                                   register_use->pos())) {
+      SpillBetween(current, current->Start(), register_use->pos(), spill_mode);
+      return;
+    }
+  }
+
+  // When in deferred spilling mode avoid stealing registers beyond the current
+  // deferred region. This is required as we otherwise might spill an inactive
+  // range with a start outside of deferred code and that would not be reloaded.
+  LifetimePosition new_end = current->End();
+  if (spill_mode == SpillMode::kSpillDeferred) {
+    InstructionBlock* deferred_block =
+        code()->GetInstructionBlock(current->Start().ToInstructionIndex());
+    new_end =
+        std::min(new_end, LifetimePosition::GapFromInstructionIndex(
+                              LastDeferredInstructionIndex(deferred_block)));
+  }
+
+  // We couldn't spill until the next register use. Split before the register
+  // is blocked, if applicable.
+  if (block_pos[reg] < new_end) {
+    // Register becomes blocked before the current range end. Split before that
+    // position.
+    new_end = block_pos[reg].Start();
+  }
+
+  // If there is no register available at all, we can only spill this range.
+  // Happens for instance on entry to deferred code where registers might
+  // become blocked yet we aim to reload ranges.
+  if (new_end == current->Start()) {
+    SpillBetween(current, new_end, register_use->pos(), spill_mode);
+    return;
+  }
+
+  // Split at the new end if we found one.
+  if (new_end != current->End()) {
+    LiveRange* tail = SplitBetween(current, current->Start(), new_end);
+    AddToUnhandled(tail);
+  }
+
+  // Register reg is not blocked for the whole range.
+  DCHECK(block_pos[reg] >= current->End());
+  TRACE("Assigning blocked reg %s to live range %d:%d\n", RegisterName(reg),
+        current->TopLevel()->vreg(), current->relative_id());
+  SetLiveRangeAssignedRegister(current, reg);
+
+  // This register was not free. Thus we need to find and spill
+  // parts of active and inactive live regions that use the same register
+  // at the same lifetime positions as current.
+  SplitAndSpillIntersecting(current, spill_mode);
+}
+
+void LinearScanAllocator::SplitAndSpillIntersecting(LiveRange* current,
+                                                    SpillMode spill_mode) {
+  DCHECK(current->HasRegisterAssigned());
+  int reg = current->assigned_register();
+  LifetimePosition split_pos = current->Start();
+  for (auto it = active_live_ranges().begin();
+       it != active_live_ranges().end();) {
+    LiveRange* range = *it;
+    if (kSimpleFPAliasing || !check_fp_aliasing()) {
+      if (range->assigned_register() != reg) {
+        ++it;
+        continue;
+      }
+    } else {
+      if (!data()->config()->AreAliases(current->representation(), reg,
+                                        range->representation(),
+                                        range->assigned_register())) {
+        ++it;
+        continue;
+      }
+    }
+
+    UsePosition* next_pos = range->NextRegisterPosition(current->Start());
+    LiveRange* begin_spill = nullptr;
+    LifetimePosition spill_pos =
+        FindOptimalSpillingPos(range, split_pos, spill_mode, &begin_spill);
+    MaybeSpillPreviousRanges(begin_spill, spill_pos, range);
+    if (next_pos == nullptr) {
+      SpillAfter(range, spill_pos, spill_mode);
+    } else {
+      // When spilling between spill_pos and next_pos ensure that the range
+      // remains spilled at least until the start of the current live range.
+      // This guarantees that we will not introduce new unhandled ranges that
+      // start before the current range as this violates allocation invariants
+      // and will lead to an inconsistent state of active and inactive
+      // live-ranges: ranges are allocated in order of their start positions,
+      // ranges are retired from active/inactive when the start of the
+      // current live-range is larger than their end.
+      DCHECK(LifetimePosition::ExistsGapPositionBetween(current->Start(),
+                                                        next_pos->pos()));
+      SpillBetweenUntil(range, spill_pos, current->Start(), next_pos->pos(),
+                        spill_mode);
+    }
+    it = ActiveToHandled(it);
+  }
+
+  for (int cur_reg = 0; cur_reg < num_registers(); ++cur_reg) {
+    if (kSimpleFPAliasing || !check_fp_aliasing()) {
+      if (cur_reg != reg) continue;
+    }
+    for (auto it = inactive_live_ranges(cur_reg).begin();
+         it != inactive_live_ranges(cur_reg).end();) {
+      LiveRange* range = *it;
+      if (!kSimpleFPAliasing && check_fp_aliasing() &&
+          !data()->config()->AreAliases(current->representation(), reg,
+                                        range->representation(), cur_reg)) {
+        ++it;
+        continue;
+      }
+      DCHECK(range->End() > current->Start());
+      if (range->TopLevel()->IsFixed()) {
+        ++it;
+        continue;
+      }
+
+      LifetimePosition next_intersection = range->FirstIntersection(current);
+      if (next_intersection.IsValid()) {
+        UsePosition* next_pos = range->NextRegisterPosition(current->Start());
+        if (next_pos == nullptr) {
+          SpillAfter(range, split_pos, spill_mode);
+        } else {
+          next_intersection = std::min(next_intersection, next_pos->pos());
+          SpillBetween(range, split_pos, next_intersection, spill_mode);
+        }
+        it = InactiveToHandled(it);
+      } else {
+        ++it;
+      }
+    }
+  }
+}
+
+bool LinearScanAllocator::TryReuseSpillForPhi(TopLevelLiveRange* range) {
+  if (!range->is_phi()) return false;
+
+  DCHECK(!range->HasSpillOperand());
+  // Check how many operands belong to the same bundle as the output.
+  LiveRangeBundle* out_bundle = range->get_bundle();
+  TopTierRegisterAllocationData::PhiMapValue* phi_map_value =
+      data()->GetPhiMapValueFor(range);
+  const PhiInstruction* phi = phi_map_value->phi();
+  const InstructionBlock* block = phi_map_value->block();
+  // Count the number of spilled operands.
+  size_t spilled_count = 0;
+  for (size_t i = 0; i < phi->operands().size(); i++) {
+    int op = phi->operands()[i];
+    LiveRange* op_range = data()->GetOrCreateLiveRangeFor(op);
+    if (!op_range->TopLevel()->HasSpillRange()) continue;
+    const InstructionBlock* pred =
+        code()->InstructionBlockAt(block->predecessors()[i]);
+    LifetimePosition pred_end =
+        LifetimePosition::InstructionFromInstructionIndex(
+            pred->last_instruction_index());
+    while (op_range != nullptr && !op_range->CanCover(pred_end)) {
+      op_range = op_range->next();
+    }
+    if (op_range != nullptr && op_range->spilled() &&
+        op_range->get_bundle() == out_bundle) {
+      spilled_count++;
+    }
+  }
+
+  // Only continue if more than half of the operands are spilled to the same
+  // slot (because part of same bundle).
+  if (spilled_count * 2 <= phi->operands().size()) {
+    return false;
+  }
+
+  // If the range does not need register soon, spill it to the merged
+  // spill range.
+  LifetimePosition next_pos = range->Start();
+  if (next_pos.IsGapPosition()) next_pos = next_pos.NextStart();
+  UsePosition* pos = range->NextUsePositionRegisterIsBeneficial(next_pos);
+  if (pos == nullptr) {
+    Spill(range, SpillMode::kSpillAtDefinition);
+    return true;
+  } else if (pos->pos() > range->Start().NextStart()) {
+    SpillBetween(range, range->Start(), pos->pos(),
+                 SpillMode::kSpillAtDefinition);
+    return true;
+  }
+  return false;
+}
+
+void LinearScanAllocator::SpillAfter(LiveRange* range, LifetimePosition pos,
+                                     SpillMode spill_mode) {
+  LiveRange* second_part = SplitRangeAt(range, pos);
+  Spill(second_part, spill_mode);
+}
+
+void LinearScanAllocator::SpillBetween(LiveRange* range, LifetimePosition start,
+                                       LifetimePosition end,
+                                       SpillMode spill_mode) {
+  SpillBetweenUntil(range, start, start, end, spill_mode);
+}
+
+void LinearScanAllocator::SpillBetweenUntil(LiveRange* range,
+                                            LifetimePosition start,
+                                            LifetimePosition until,
+                                            LifetimePosition end,
+                                            SpillMode spill_mode) {
+  CHECK(start < end);
+  LiveRange* second_part = SplitRangeAt(range, start);
+
+  if (second_part->Start() < end) {
+    // The split result intersects with [start, end[.
+    // Split it at position between ]start+1, end[, spill the middle part
+    // and put the rest to unhandled.
+
+    // Make sure that the third part always starts after the start of the
+    // second part, as that likely is the current position of the register
+    // allocator and we cannot add ranges to unhandled that start before
+    // the current position.
+    LifetimePosition split_start = std::max(second_part->Start().End(), until);
+
+    // If end is an actual use (which it typically is) we have to split
+    // so that there is a gap before so that we have space for moving the
+    // value into its position.
+    // However, if we have no choice, split right where asked.
+    LifetimePosition third_part_end =
+        std::max(split_start, end.PrevStart().End());
+    // Instead of spliting right after or even before the block boundary,
+    // split on the boumndary to avoid extra moves.
+    if (data()->IsBlockBoundary(end.Start())) {
+      third_part_end = std::max(split_start, end.Start());
+    }
+
+    LiveRange* third_part =
+        SplitBetween(second_part, split_start, third_part_end);
+    if (GetInstructionBlock(data()->code(), second_part->Start())
+            ->IsDeferred()) {
+      // Try to use the same register as before.
+      TRACE("Setting control flow hint for %d:%d to %s\n",
+            third_part->TopLevel()->vreg(), third_part->relative_id(),
+            RegisterName(range->controlflow_hint()));
+      third_part->set_controlflow_hint(range->controlflow_hint());
+    }
+
+    AddToUnhandled(third_part);
+    // This can happen, even if we checked for start < end above, as we fiddle
+    // with the end location. However, we are guaranteed to be after or at
+    // until, so this is fine.
+    if (third_part != second_part) {
+      Spill(second_part, spill_mode);
+    }
+  } else {
+    // The split result does not intersect with [start, end[.
+    // Nothing to spill. Just put it to unhandled as whole.
+    AddToUnhandled(second_part);
+  }
+}
+
+OperandAssigner::OperandAssigner(TopTierRegisterAllocationData* data)
+    : data_(data) {}
+
+void OperandAssigner::DecideSpillingMode() {
+  for (auto range : data()->live_ranges()) {
+    data()->tick_counter()->TickAndMaybeEnterSafepoint();
+    int max_blocks = data()->code()->InstructionBlockCount();
+    if (range != nullptr && range->IsSpilledOnlyInDeferredBlocks(data())) {
+      // If the range is spilled only in deferred blocks and starts in
+      // a non-deferred block, we transition its representation here so
+      // that the LiveRangeConnector processes them correctly. If,
+      // however, they start in a deferred block, we uograde them to
+      // spill at definition, as that definition is in a deferred block
+      // anyway. While this is an optimization, the code in LiveRangeConnector
+      // relies on it!
+      if (GetInstructionBlock(data()->code(), range->Start())->IsDeferred()) {
+        TRACE("Live range %d is spilled and alive in deferred code only\n",
+              range->vreg());
+        range->TransitionRangeToSpillAtDefinition();
+      } else {
+        TRACE("Live range %d is spilled deferred code only but alive outside\n",
+              range->vreg());
+        range->TransitionRangeToDeferredSpill(data()->allocation_zone(),
+                                              max_blocks);
+      }
+    }
+  }
+}
+
+void OperandAssigner::AssignSpillSlots() {
+  for (auto range : data()->live_ranges()) {
+    data()->tick_counter()->TickAndMaybeEnterSafepoint();
+    if (range != nullptr && range->get_bundle() != nullptr) {
+      range->get_bundle()->MergeSpillRanges();
+    }
+  }
+  ZoneVector<SpillRange*>& spill_ranges = data()->spill_ranges();
+  // Merge disjoint spill ranges
+  for (size_t i = 0; i < spill_ranges.size(); ++i) {
+    data()->tick_counter()->TickAndMaybeEnterSafepoint();
+    SpillRange* range = spill_ranges[i];
+    if (range == nullptr) continue;
+    if (range->IsEmpty()) continue;
+    for (size_t j = i + 1; j < spill_ranges.size(); ++j) {
+      SpillRange* other = spill_ranges[j];
+      if (other != nullptr && !other->IsEmpty()) {
+        range->TryMerge(other);
+      }
+    }
+  }
+  // Allocate slots for the merged spill ranges.
+  for (SpillRange* range : spill_ranges) {
+    data()->tick_counter()->TickAndMaybeEnterSafepoint();
+    if (range == nullptr || range->IsEmpty()) continue;
+    // Allocate a new operand referring to the spill slot.
+    if (!range->HasSlot()) {
+      int index = data()->frame()->AllocateSpillSlot(range->byte_width());
+      range->set_assigned_slot(index);
+    }
+  }
+}
+
+void OperandAssigner::CommitAssignment() {
+  const size_t live_ranges_size = data()->live_ranges().size();
+  for (TopLevelLiveRange* top_range : data()->live_ranges()) {
+    data()->tick_counter()->TickAndMaybeEnterSafepoint();
+    CHECK_EQ(live_ranges_size,
+             data()->live_ranges().size());  // TODO(neis): crbug.com/831822
+    if (top_range == nullptr || top_range->IsEmpty()) continue;
+    InstructionOperand spill_operand;
+    if (top_range->HasSpillOperand()) {
+      spill_operand = *top_range->TopLevel()->GetSpillOperand();
+    } else if (top_range->TopLevel()->HasSpillRange()) {
+      spill_operand = top_range->TopLevel()->GetSpillRangeOperand();
+    }
+    if (top_range->is_phi()) {
+      data()->GetPhiMapValueFor(top_range)->CommitAssignment(
+          top_range->GetAssignedOperand());
+    }
+    for (LiveRange* range = top_range; range != nullptr;
+         range = range->next()) {
+      InstructionOperand assigned = range->GetAssignedOperand();
+      DCHECK(!assigned.IsUnallocated());
+      range->ConvertUsesToOperand(assigned, spill_operand);
+    }
+
+    if (!spill_operand.IsInvalid()) {
+      // If this top level range has a child spilled in a deferred block, we use
+      // the range and control flow connection mechanism instead of spilling at
+      // definition. Refer to the ConnectLiveRanges and ResolveControlFlow
+      // phases. Normally, when we spill at definition, we do not insert a
+      // connecting move when a successor child range is spilled - because the
+      // spilled range picks up its value from the slot which was assigned at
+      // definition. For ranges that are determined to spill only in deferred
+      // blocks, we let ConnectLiveRanges and ResolveControlFlow find the blocks
+      // where a spill operand is expected, and then finalize by inserting the
+      // spills in the deferred blocks dominators.
+      if (!top_range->IsSpilledOnlyInDeferredBlocks(data()) &&
+          !top_range->HasGeneralSpillRange()) {
+        // Spill at definition if the range isn't spilled in a way that will be
+        // handled later.
+        top_range->FilterSpillMoves(data(), spill_operand);
+        top_range->CommitSpillMoves(data(), spill_operand);
+      }
+    }
+  }
+}
+
+ReferenceMapPopulator::ReferenceMapPopulator(
+    TopTierRegisterAllocationData* data)
+    : data_(data) {}
+
+bool ReferenceMapPopulator::SafePointsAreInOrder() const {
+  int safe_point = 0;
+  for (ReferenceMap* map : *data()->code()->reference_maps()) {
+    if (safe_point > map->instruction_position()) return false;
+    safe_point = map->instruction_position();
+  }
+  return true;
+}
+
+void ReferenceMapPopulator::PopulateReferenceMaps() {
+  DCHECK(SafePointsAreInOrder());
+  // Map all delayed references.
+  for (TopTierRegisterAllocationData::DelayedReference& delayed_reference :
+       data()->delayed_references()) {
+    delayed_reference.map->RecordReference(
+        AllocatedOperand::cast(*delayed_reference.operand));
+  }
+  // Iterate over all safe point positions and record a pointer
+  // for all spilled live ranges at this point.
+  int last_range_start = 0;
+  const ReferenceMapDeque* reference_maps = data()->code()->reference_maps();
+  ReferenceMapDeque::const_iterator first_it = reference_maps->begin();
+  const size_t live_ranges_size = data()->live_ranges().size();
+  for (TopLevelLiveRange* range : data()->live_ranges()) {
+    CHECK_EQ(live_ranges_size,
+             data()->live_ranges().size());  // TODO(neis): crbug.com/831822
+    if (range == nullptr) continue;
+    // Skip non-reference values.
+    if (!data()->code()->IsReference(range->vreg())) continue;
+    // Skip empty live ranges.
+    if (range->IsEmpty()) continue;
+    if (range->has_preassigned_slot()) continue;
+
+    // Find the extent of the range and its children.
+    int start = range->Start().ToInstructionIndex();
+    int end = 0;
+    for (LiveRange* cur = range; cur != nullptr; cur = cur->next()) {
+      LifetimePosition this_end = cur->End();
+      if (this_end.ToInstructionIndex() > end)
+        end = this_end.ToInstructionIndex();
+      DCHECK(cur->Start().ToInstructionIndex() >= start);
+    }
+
+    // Most of the ranges are in order, but not all.  Keep an eye on when they
+    // step backwards and reset the first_it so we don't miss any safe points.
+    if (start < last_range_start) first_it = reference_maps->begin();
+    last_range_start = start;
+
+    // Step across all the safe points that are before the start of this range,
+    // recording how far we step in order to save doing this for the next range.
+    for (; first_it != reference_maps->end(); ++first_it) {
+      ReferenceMap* map = *first_it;
+      if (map->instruction_position() >= start) break;
+    }
+
+    InstructionOperand spill_operand;
+    if (((range->HasSpillOperand() &&
+          !range->GetSpillOperand()->IsConstant()) ||
+         range->HasSpillRange())) {
+      if (range->HasSpillOperand()) {
+        spill_operand = *range->GetSpillOperand();
+      } else {
+        spill_operand = range->GetSpillRangeOperand();
+      }
+      DCHECK(spill_operand.IsStackSlot());
+      DCHECK(CanBeTaggedOrCompressedPointer(
+          AllocatedOperand::cast(spill_operand).representation()));
+    }
+
+    LiveRange* cur = range;
+    // Step through the safe points to see whether they are in the range.
+    for (auto it = first_it; it != reference_maps->end(); ++it) {
+      ReferenceMap* map = *it;
+      int safe_point = map->instruction_position();
+
+      // The safe points are sorted so we can stop searching here.
+      if (safe_point - 1 > end) break;
+
+      // Advance to the next active range that covers the current
+      // safe point position.
+      LifetimePosition safe_point_pos =
+          LifetimePosition::InstructionFromInstructionIndex(safe_point);
+
+      // Search for the child range (cur) that covers safe_point_pos. If we
+      // don't find it before the children pass safe_point_pos, keep cur at
+      // the last child, because the next safe_point_pos may be covered by cur.
+      // This may happen if cur has more than one interval, and the current
+      // safe_point_pos is in between intervals.
+      // For that reason, cur may be at most the last child.
+      DCHECK_NOT_NULL(cur);
+      DCHECK(safe_point_pos >= cur->Start() || range == cur);
+      bool found = false;
+      while (!found) {
+        if (cur->Covers(safe_point_pos)) {
+          found = true;
+        } else {
+          LiveRange* next = cur->next();
+          if (next == nullptr || next->Start() > safe_point_pos) {
+            break;
+          }
+          cur = next;
+        }
+      }
+
+      if (!found) {
+        continue;
+      }
+
+      // Check if the live range is spilled and the safe point is after
+      // the spill position.
+      int spill_index = range->IsSpilledOnlyInDeferredBlocks(data()) ||
+                                range->LateSpillingSelected()
+                            ? cur->Start().ToInstructionIndex()
+                            : range->spill_start_index();
+
+      if (!spill_operand.IsInvalid() && safe_point >= spill_index) {
+        TRACE("Pointer for range %d (spilled at %d) at safe point %d\n",
+              range->vreg(), spill_index, safe_point);
+        map->RecordReference(AllocatedOperand::cast(spill_operand));
+      }
+
+      if (!cur->spilled()) {
+        TRACE(
+            "Pointer in register for range %d:%d (start at %d) "
+            "at safe point %d\n",
+            range->vreg(), cur->relative_id(), cur->Start().value(),
+            safe_point);
+        InstructionOperand operand = cur->GetAssignedOperand();
+        DCHECK(!operand.IsStackSlot());
+        DCHECK(CanBeTaggedOrCompressedPointer(
+            AllocatedOperand::cast(operand).representation()));
+        map->RecordReference(AllocatedOperand::cast(operand));
+      }
+    }
+  }
+}
+
+LiveRangeConnector::LiveRangeConnector(TopTierRegisterAllocationData* data)
+    : data_(data) {}
+
+bool LiveRangeConnector::CanEagerlyResolveControlFlow(
+    const InstructionBlock* block) const {
+  if (block->PredecessorCount() != 1) return false;
+  return block->predecessors()[0].IsNext(block->rpo_number());
+}
+
+void LiveRangeConnector::ResolveControlFlow(Zone* local_zone) {
+  // Lazily linearize live ranges in memory for fast lookup.
+  LiveRangeFinder finder(data(), local_zone);
+  ZoneVector<BitVector*>& live_in_sets = data()->live_in_sets();
+  for (const InstructionBlock* block : code()->instruction_blocks()) {
+    if (CanEagerlyResolveControlFlow(block)) continue;
+    BitVector* live = live_in_sets[block->rpo_number().ToInt()];
+    BitVector::Iterator iterator(live);
+    while (!iterator.Done()) {
+      data()->tick_counter()->TickAndMaybeEnterSafepoint();
+      int vreg = iterator.Current();
+      LiveRangeBoundArray* array = finder.ArrayFor(vreg);
+      for (const RpoNumber& pred : block->predecessors()) {
+        FindResult result;
+        const InstructionBlock* pred_block = code()->InstructionBlockAt(pred);
+        if (!array->FindConnectableSubranges(block, pred_block, &result)) {
+          continue;
+        }
+        InstructionOperand pred_op = result.pred_cover_->GetAssignedOperand();
+        InstructionOperand cur_op = result.cur_cover_->GetAssignedOperand();
+        if (pred_op.Equals(cur_op)) continue;
+        if (!pred_op.IsAnyRegister() && cur_op.IsAnyRegister()) {
+          // We're doing a reload.
+          // We don't need to, if:
+          // 1) there's no register use in this block, and
+          // 2) the range ends before the block does, and
+          // 3) we don't have a successor, or the successor is spilled.
+          LifetimePosition block_start =
+              LifetimePosition::GapFromInstructionIndex(block->code_start());
+          LifetimePosition block_end =
+              LifetimePosition::GapFromInstructionIndex(block->code_end());
+          const LiveRange* current = result.cur_cover_;
+          // Note that this is not the successor if we have control flow!
+          // However, in the following condition, we only refer to it if it
+          // begins in the current block, in which case we can safely declare it
+          // to be the successor.
+          const LiveRange* successor = current->next();
+          if (current->End() < block_end &&
+              (successor == nullptr || successor->spilled())) {
+            // verify point 1: no register use. We can go to the end of the
+            // range, since it's all within the block.
+
+            bool uses_reg = false;
+            for (const UsePosition* use = current->NextUsePosition(block_start);
+                 use != nullptr; use = use->next()) {
+              if (use->operand()->IsAnyRegister()) {
+                uses_reg = true;
+                break;
+              }
+            }
+            if (!uses_reg) continue;
+          }
+          if (current->TopLevel()->IsSpilledOnlyInDeferredBlocks(data()) &&
+              pred_block->IsDeferred()) {
+            // The spill location should be defined in pred_block, so add
+            // pred_block to the list of blocks requiring a spill operand.
+            TRACE("Adding B%d to list of spill blocks for %d\n",
+                  pred_block->rpo_number().ToInt(),
+                  current->TopLevel()->vreg());
+            current->TopLevel()
+                ->GetListOfBlocksRequiringSpillOperands(data())
+                ->Add(pred_block->rpo_number().ToInt());
+          }
+        }
+        int move_loc = ResolveControlFlow(block, cur_op, pred_block, pred_op);
+        USE(move_loc);
+        DCHECK_IMPLIES(
+            result.cur_cover_->TopLevel()->IsSpilledOnlyInDeferredBlocks(
+                data()) &&
+                !(pred_op.IsAnyRegister() && cur_op.IsAnyRegister()),
+            code()->GetInstructionBlock(move_loc)->IsDeferred());
+      }
+      iterator.Advance();
+    }
+  }
+
+  // At this stage, we collected blocks needing a spill operand due to reloads
+  // from ConnectRanges and from ResolveControlFlow. Time to commit the spills
+  // for deferred blocks. This is a convenient time to commit spills for general
+  // spill ranges also, because they need to use the LiveRangeFinder.
+  const size_t live_ranges_size = data()->live_ranges().size();
+  SpillPlacer spill_placer(&finder, data(), local_zone);
+  for (TopLevelLiveRange* top : data()->live_ranges()) {
+    CHECK_EQ(live_ranges_size,
+             data()->live_ranges().size());  // TODO(neis): crbug.com/831822
+    if (top == nullptr || top->IsEmpty()) continue;
+    if (top->IsSpilledOnlyInDeferredBlocks(data())) {
+      CommitSpillsInDeferredBlocks(top, finder.ArrayFor(top->vreg()),
+                                   local_zone);
+    } else if (top->HasGeneralSpillRange()) {
+      spill_placer.Add(top);
+    }
+  }
+}
+
+int LiveRangeConnector::ResolveControlFlow(const InstructionBlock* block,
+                                           const InstructionOperand& cur_op,
+                                           const InstructionBlock* pred,
+                                           const InstructionOperand& pred_op) {
+  DCHECK(!pred_op.Equals(cur_op));
+  int gap_index;
+  Instruction::GapPosition position;
+  if (block->PredecessorCount() == 1) {
+    gap_index = block->first_instruction_index();
+    position = Instruction::START;
+  } else {
+    DCHECK_EQ(1, pred->SuccessorCount());
+    DCHECK(!code()
+                ->InstructionAt(pred->last_instruction_index())
+                ->HasReferenceMap());
+    gap_index = pred->last_instruction_index();
+    position = Instruction::END;
+  }
+  data()->AddGapMove(gap_index, position, pred_op, cur_op);
+  return gap_index;
+}
+
+void LiveRangeConnector::ConnectRanges(Zone* local_zone) {
+  DelayedInsertionMap delayed_insertion_map(local_zone);
+  const size_t live_ranges_size = data()->live_ranges().size();
+  for (TopLevelLiveRange* top_range : data()->live_ranges()) {
+    CHECK_EQ(live_ranges_size,
+             data()->live_ranges().size());  // TODO(neis): crbug.com/831822
+    if (top_range == nullptr) continue;
+    bool connect_spilled = top_range->IsSpilledOnlyInDeferredBlocks(data());
+    LiveRange* first_range = top_range;
+    for (LiveRange *second_range = first_range->next(); second_range != nullptr;
+         first_range = second_range, second_range = second_range->next()) {
+      LifetimePosition pos = second_range->Start();
+      // Add gap move if the two live ranges touch and there is no block
+      // boundary.
+      if (second_range->spilled()) continue;
+      if (first_range->End() != pos) continue;
+      if (data()->IsBlockBoundary(pos) &&
+          !CanEagerlyResolveControlFlow(GetInstructionBlock(code(), pos))) {
+        continue;
+      }
+      InstructionOperand prev_operand = first_range->GetAssignedOperand();
+      InstructionOperand cur_operand = second_range->GetAssignedOperand();
+      if (prev_operand.Equals(cur_operand)) continue;
+      bool delay_insertion = false;
+      Instruction::GapPosition gap_pos;
+      int gap_index = pos.ToInstructionIndex();
+      if (connect_spilled && !prev_operand.IsAnyRegister() &&
+          cur_operand.IsAnyRegister()) {
+        const InstructionBlock* block = code()->GetInstructionBlock(gap_index);
+        DCHECK(block->IsDeferred());
+        // Performing a reload in this block, meaning the spill operand must
+        // be defined here.
+        top_range->GetListOfBlocksRequiringSpillOperands(data())->Add(
+            block->rpo_number().ToInt());
+      }
+
+      if (pos.IsGapPosition()) {
+        gap_pos = pos.IsStart() ? Instruction::START : Instruction::END;
+      } else {
+        if (pos.IsStart()) {
+          delay_insertion = true;
+        } else {
+          gap_index++;
+        }
+        gap_pos = delay_insertion ? Instruction::END : Instruction::START;
+      }
+      // Reloads or spills for spilled in deferred blocks ranges must happen
+      // only in deferred blocks.
+      DCHECK_IMPLIES(connect_spilled && !(prev_operand.IsAnyRegister() &&
+                                          cur_operand.IsAnyRegister()),
+                     code()->GetInstructionBlock(gap_index)->IsDeferred());
+
+      ParallelMove* move =
+          code()->InstructionAt(gap_index)->GetOrCreateParallelMove(
+              gap_pos, code_zone());
+      if (!delay_insertion) {
+        move->AddMove(prev_operand, cur_operand);
+      } else {
+        delayed_insertion_map.insert(
+            std::make_pair(std::make_pair(move, prev_operand), cur_operand));
+      }
+    }
+  }
+  if (delayed_insertion_map.empty()) return;
+  // Insert all the moves which should occur after the stored move.
+  ZoneVector<MoveOperands*> to_insert(local_zone);
+  ZoneVector<MoveOperands*> to_eliminate(local_zone);
+  to_insert.reserve(4);
+  to_eliminate.reserve(4);
+  ParallelMove* moves = delayed_insertion_map.begin()->first.first;
+  for (auto it = delayed_insertion_map.begin();; ++it) {
+    bool done = it == delayed_insertion_map.end();
+    if (done || it->first.first != moves) {
+      // Commit the MoveOperands for current ParallelMove.
+      for (MoveOperands* move : to_eliminate) {
+        move->Eliminate();
+      }
+      for (MoveOperands* move : to_insert) {
+        moves->push_back(move);
+      }
+      if (done) break;
+      // Reset state.
+      to_eliminate.clear();
+      to_insert.clear();
+      moves = it->first.first;
+    }
+    // Gather all MoveOperands for a single ParallelMove.
+    MoveOperands* move =
+        code_zone()->New<MoveOperands>(it->first.second, it->second);
+    moves->PrepareInsertAfter(move, &to_eliminate);
+    to_insert.push_back(move);
+  }
+}
+
+void LiveRangeConnector::CommitSpillsInDeferredBlocks(
+    TopLevelLiveRange* range, LiveRangeBoundArray* array, Zone* temp_zone) {
+  DCHECK(range->IsSpilledOnlyInDeferredBlocks(data()));
+  DCHECK(!range->spilled());
+
+  InstructionSequence* code = data()->code();
+  InstructionOperand spill_operand = range->GetSpillRangeOperand();
+
+  TRACE("Live Range %d will be spilled only in deferred blocks.\n",
+        range->vreg());
+  // If we have ranges that aren't spilled but require the operand on the stack,
+  // make sure we insert the spill.
+  for (const LiveRange* child = range; child != nullptr;
+       child = child->next()) {
+    for (const UsePosition* pos = child->first_pos(); pos != nullptr;
+         pos = pos->next()) {
+      if (pos->type() != UsePositionType::kRequiresSlot && !child->spilled())
+        continue;
+      range->AddBlockRequiringSpillOperand(
+          code->GetInstructionBlock(pos->pos().ToInstructionIndex())
+              ->rpo_number(),
+          data());
+    }
+  }
+
+  ZoneQueue<int> worklist(temp_zone);
+
+  for (BitVector::Iterator iterator(
+           range->GetListOfBlocksRequiringSpillOperands(data()));
+       !iterator.Done(); iterator.Advance()) {
+    worklist.push(iterator.Current());
+  }
+
+  ZoneSet<std::pair<RpoNumber, int>> done_moves(temp_zone);
+  // Seek the deferred blocks that dominate locations requiring spill operands,
+  // and spill there. We only need to spill at the start of such blocks.
+  BitVector done_blocks(
+      range->GetListOfBlocksRequiringSpillOperands(data())->length(),
+      temp_zone);
+  while (!worklist.empty()) {
+    int block_id = worklist.front();
+    worklist.pop();
+    if (done_blocks.Contains(block_id)) continue;
+    done_blocks.Add(block_id);
+    InstructionBlock* spill_block =
+        code->InstructionBlockAt(RpoNumber::FromInt(block_id));
+
+    for (const RpoNumber& pred : spill_block->predecessors()) {
+      const InstructionBlock* pred_block = code->InstructionBlockAt(pred);
+
+      if (pred_block->IsDeferred()) {
+        worklist.push(pred_block->rpo_number().ToInt());
+      } else {
+        LifetimePosition pred_end =
+            LifetimePosition::InstructionFromInstructionIndex(
+                pred_block->last_instruction_index());
+
+        LiveRangeBound* bound = array->Find(pred_end);
+
+        InstructionOperand pred_op = bound->range_->GetAssignedOperand();
+
+        RpoNumber spill_block_number = spill_block->rpo_number();
+        if (done_moves.find(std::make_pair(
+                spill_block_number, range->vreg())) == done_moves.end()) {
+          TRACE("Spilling deferred spill for range %d at B%d\n", range->vreg(),
+                spill_block_number.ToInt());
+          data()->AddGapMove(spill_block->first_instruction_index(),
+                             Instruction::GapPosition::START, pred_op,
+                             spill_operand);
+          done_moves.insert(std::make_pair(spill_block_number, range->vreg()));
+          spill_block->mark_needs_frame();
+        }
+      }
+    }
+  }
+}
+
+#undef TRACE
+#undef TRACE_COND
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/register-allocator.h b/src/compiler/backend/register-allocator.h
new file mode 100644
index 0000000..858fac8
--- /dev/null
+++ b/src/compiler/backend/register-allocator.h
@@ -0,0 +1,1603 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_REGISTER_ALLOCATOR_H_
+#define V8_COMPILER_BACKEND_REGISTER_ALLOCATOR_H_
+
+#include "src/base/bits.h"
+#include "src/base/compiler-specific.h"
+#include "src/codegen/register-configuration.h"
+#include "src/common/globals.h"
+#include "src/compiler/backend/instruction.h"
+#include "src/compiler/backend/register-allocation.h"
+#include "src/flags/flags.h"
+#include "src/utils/ostreams.h"
+#include "src/zone/zone-containers.h"
+
+namespace v8 {
+namespace internal {
+
+class TickCounter;
+
+namespace compiler {
+
+static const int32_t kUnassignedRegister = RegisterConfiguration::kMaxRegisters;
+
+// This class represents a single point of a InstructionOperand's lifetime. For
+// each instruction there are four lifetime positions:
+//
+//   [[START, END], [START, END]]
+//
+// Where the first half position corresponds to
+//
+//  [GapPosition::START, GapPosition::END]
+//
+// and the second half position corresponds to
+//
+//  [Lifetime::USED_AT_START, Lifetime::USED_AT_END]
+//
+class LifetimePosition final {
+ public:
+  // Return the lifetime position that corresponds to the beginning of
+  // the gap with the given index.
+  static LifetimePosition GapFromInstructionIndex(int index) {
+    return LifetimePosition(index * kStep);
+  }
+  // Return the lifetime position that corresponds to the beginning of
+  // the instruction with the given index.
+  static LifetimePosition InstructionFromInstructionIndex(int index) {
+    return LifetimePosition(index * kStep + kHalfStep);
+  }
+
+  static bool ExistsGapPositionBetween(LifetimePosition pos1,
+                                       LifetimePosition pos2) {
+    if (pos1 > pos2) std::swap(pos1, pos2);
+    LifetimePosition next(pos1.value_ + 1);
+    if (next.IsGapPosition()) return next < pos2;
+    return next.NextFullStart() < pos2;
+  }
+
+  // Returns a numeric representation of this lifetime position.
+  int value() const { return value_; }
+
+  // Returns the index of the instruction to which this lifetime position
+  // corresponds.
+  int ToInstructionIndex() const {
+    DCHECK(IsValid());
+    return value_ / kStep;
+  }
+
+  // Returns true if this lifetime position corresponds to a START value
+  bool IsStart() const { return (value_ & (kHalfStep - 1)) == 0; }
+  // Returns true if this lifetime position corresponds to an END value
+  bool IsEnd() const { return (value_ & (kHalfStep - 1)) == 1; }
+  // Returns true if this lifetime position corresponds to a gap START value
+  bool IsFullStart() const { return (value_ & (kStep - 1)) == 0; }
+
+  bool IsGapPosition() const { return (value_ & 0x2) == 0; }
+  bool IsInstructionPosition() const { return !IsGapPosition(); }
+
+  // Returns the lifetime position for the current START.
+  LifetimePosition Start() const {
+    DCHECK(IsValid());
+    return LifetimePosition(value_ & ~(kHalfStep - 1));
+  }
+
+  // Returns the lifetime position for the current gap START.
+  LifetimePosition FullStart() const {
+    DCHECK(IsValid());
+    return LifetimePosition(value_ & ~(kStep - 1));
+  }
+
+  // Returns the lifetime position for the current END.
+  LifetimePosition End() const {
+    DCHECK(IsValid());
+    return LifetimePosition(Start().value_ + kHalfStep / 2);
+  }
+
+  // Returns the lifetime position for the beginning of the next START.
+  LifetimePosition NextStart() const {
+    DCHECK(IsValid());
+    return LifetimePosition(Start().value_ + kHalfStep);
+  }
+
+  // Returns the lifetime position for the beginning of the next gap START.
+  LifetimePosition NextFullStart() const {
+    DCHECK(IsValid());
+    return LifetimePosition(FullStart().value_ + kStep);
+  }
+
+  // Returns the lifetime position for the beginning of the previous START.
+  LifetimePosition PrevStart() const {
+    DCHECK(IsValid());
+    DCHECK_LE(kHalfStep, value_);
+    return LifetimePosition(Start().value_ - kHalfStep);
+  }
+
+  // Constructs the lifetime position which does not correspond to any
+  // instruction.
+  LifetimePosition() : value_(-1) {}
+
+  // Returns true if this lifetime positions corrensponds to some
+  // instruction.
+  bool IsValid() const { return value_ != -1; }
+
+  bool operator<(const LifetimePosition& that) const {
+    return this->value_ < that.value_;
+  }
+
+  bool operator<=(const LifetimePosition& that) const {
+    return this->value_ <= that.value_;
+  }
+
+  bool operator==(const LifetimePosition& that) const {
+    return this->value_ == that.value_;
+  }
+
+  bool operator!=(const LifetimePosition& that) const {
+    return this->value_ != that.value_;
+  }
+
+  bool operator>(const LifetimePosition& that) const {
+    return this->value_ > that.value_;
+  }
+
+  bool operator>=(const LifetimePosition& that) const {
+    return this->value_ >= that.value_;
+  }
+
+  void Print() const;
+
+  static inline LifetimePosition Invalid() { return LifetimePosition(); }
+
+  static inline LifetimePosition MaxPosition() {
+    // We have to use this kind of getter instead of static member due to
+    // crash bug in GDB.
+    return LifetimePosition(kMaxInt);
+  }
+
+  static inline LifetimePosition FromInt(int value) {
+    return LifetimePosition(value);
+  }
+
+ private:
+  static const int kHalfStep = 2;
+  static const int kStep = 2 * kHalfStep;
+
+  static_assert(base::bits::IsPowerOfTwo(kHalfStep),
+                "Code relies on kStep and kHalfStep being a power of two");
+
+  explicit LifetimePosition(int value) : value_(value) {}
+
+  int value_;
+};
+
+std::ostream& operator<<(std::ostream& os, const LifetimePosition pos);
+
+enum class RegisterAllocationFlag : unsigned { kTraceAllocation = 1 << 0 };
+
+using RegisterAllocationFlags = base::Flags<RegisterAllocationFlag>;
+
+class SpillRange;
+class LiveRange;
+class TopLevelLiveRange;
+
+class TopTierRegisterAllocationData final : public RegisterAllocationData {
+ public:
+  TopTierRegisterAllocationData(const TopTierRegisterAllocationData&) = delete;
+  TopTierRegisterAllocationData& operator=(
+      const TopTierRegisterAllocationData&) = delete;
+
+  static const TopTierRegisterAllocationData* cast(
+      const RegisterAllocationData* data) {
+    DCHECK_EQ(data->type(), Type::kTopTier);
+    return static_cast<const TopTierRegisterAllocationData*>(data);
+  }
+
+  static TopTierRegisterAllocationData* cast(RegisterAllocationData* data) {
+    DCHECK_EQ(data->type(), Type::kTopTier);
+    return static_cast<TopTierRegisterAllocationData*>(data);
+  }
+
+  static const TopTierRegisterAllocationData& cast(
+      const RegisterAllocationData& data) {
+    DCHECK_EQ(data.type(), Type::kTopTier);
+    return static_cast<const TopTierRegisterAllocationData&>(data);
+  }
+
+  // Encodes whether a spill happens in deferred code (kSpillDeferred) or
+  // regular code (kSpillAtDefinition).
+  enum SpillMode { kSpillAtDefinition, kSpillDeferred };
+
+  bool is_trace_alloc() {
+    return flags_ & RegisterAllocationFlag::kTraceAllocation;
+  }
+
+  static constexpr int kNumberOfFixedRangesPerRegister = 2;
+
+  class PhiMapValue : public ZoneObject {
+   public:
+    PhiMapValue(PhiInstruction* phi, const InstructionBlock* block, Zone* zone);
+
+    const PhiInstruction* phi() const { return phi_; }
+    const InstructionBlock* block() const { return block_; }
+
+    // For hinting.
+    int assigned_register() const { return assigned_register_; }
+    void set_assigned_register(int register_code) {
+      DCHECK_EQ(assigned_register_, kUnassignedRegister);
+      assigned_register_ = register_code;
+    }
+    void UnsetAssignedRegister() { assigned_register_ = kUnassignedRegister; }
+
+    void AddOperand(InstructionOperand* operand);
+    void CommitAssignment(const InstructionOperand& operand);
+
+   private:
+    PhiInstruction* const phi_;
+    const InstructionBlock* const block_;
+    ZoneVector<InstructionOperand*> incoming_operands_;
+    int assigned_register_;
+  };
+  using PhiMap = ZoneMap<int, PhiMapValue*>;
+
+  struct DelayedReference {
+    ReferenceMap* map;
+    InstructionOperand* operand;
+  };
+  using DelayedReferences = ZoneVector<DelayedReference>;
+  using RangesWithPreassignedSlots =
+      ZoneVector<std::pair<TopLevelLiveRange*, int>>;
+
+  TopTierRegisterAllocationData(const RegisterConfiguration* config,
+                                Zone* allocation_zone, Frame* frame,
+                                InstructionSequence* code,
+                                RegisterAllocationFlags flags,
+                                TickCounter* tick_counter,
+                                const char* debug_name = nullptr);
+
+  const ZoneVector<TopLevelLiveRange*>& live_ranges() const {
+    return live_ranges_;
+  }
+  ZoneVector<TopLevelLiveRange*>& live_ranges() { return live_ranges_; }
+  const ZoneVector<TopLevelLiveRange*>& fixed_live_ranges() const {
+    return fixed_live_ranges_;
+  }
+  ZoneVector<TopLevelLiveRange*>& fixed_live_ranges() {
+    return fixed_live_ranges_;
+  }
+  ZoneVector<TopLevelLiveRange*>& fixed_float_live_ranges() {
+    return fixed_float_live_ranges_;
+  }
+  const ZoneVector<TopLevelLiveRange*>& fixed_float_live_ranges() const {
+    return fixed_float_live_ranges_;
+  }
+  ZoneVector<TopLevelLiveRange*>& fixed_double_live_ranges() {
+    return fixed_double_live_ranges_;
+  }
+  const ZoneVector<TopLevelLiveRange*>& fixed_double_live_ranges() const {
+    return fixed_double_live_ranges_;
+  }
+  ZoneVector<TopLevelLiveRange*>& fixed_simd128_live_ranges() {
+    return fixed_simd128_live_ranges_;
+  }
+  const ZoneVector<TopLevelLiveRange*>& fixed_simd128_live_ranges() const {
+    return fixed_simd128_live_ranges_;
+  }
+  ZoneVector<BitVector*>& live_in_sets() { return live_in_sets_; }
+  ZoneVector<BitVector*>& live_out_sets() { return live_out_sets_; }
+  ZoneVector<SpillRange*>& spill_ranges() { return spill_ranges_; }
+  DelayedReferences& delayed_references() { return delayed_references_; }
+  InstructionSequence* code() const { return code_; }
+  // This zone is for data structures only needed during register allocation
+  // phases.
+  Zone* allocation_zone() const { return allocation_zone_; }
+  // This zone is for InstructionOperands and moves that live beyond register
+  // allocation.
+  Zone* code_zone() const { return code()->zone(); }
+  Frame* frame() const { return frame_; }
+  const char* debug_name() const { return debug_name_; }
+  const RegisterConfiguration* config() const { return config_; }
+
+  MachineRepresentation RepresentationFor(int virtual_register);
+
+  TopLevelLiveRange* GetOrCreateLiveRangeFor(int index);
+  // Creates a new live range.
+  TopLevelLiveRange* NewLiveRange(int index, MachineRepresentation rep);
+  TopLevelLiveRange* NextLiveRange(MachineRepresentation rep);
+
+  SpillRange* AssignSpillRangeToLiveRange(TopLevelLiveRange* range,
+                                          SpillMode spill_mode);
+  SpillRange* CreateSpillRangeForLiveRange(TopLevelLiveRange* range);
+
+  MoveOperands* AddGapMove(int index, Instruction::GapPosition position,
+                           const InstructionOperand& from,
+                           const InstructionOperand& to);
+
+  bool ExistsUseWithoutDefinition();
+  bool RangesDefinedInDeferredStayInDeferred();
+
+  void MarkFixedUse(MachineRepresentation rep, int index);
+  bool HasFixedUse(MachineRepresentation rep, int index);
+
+  void MarkAllocated(MachineRepresentation rep, int index);
+
+  PhiMapValue* InitializePhiMap(const InstructionBlock* block,
+                                PhiInstruction* phi);
+  PhiMapValue* GetPhiMapValueFor(TopLevelLiveRange* top_range);
+  PhiMapValue* GetPhiMapValueFor(int virtual_register);
+  bool IsBlockBoundary(LifetimePosition pos) const;
+
+  RangesWithPreassignedSlots& preassigned_slot_ranges() {
+    return preassigned_slot_ranges_;
+  }
+
+  void RememberSpillState(RpoNumber block,
+                          const ZoneVector<LiveRange*>& state) {
+    spill_state_[block.ToSize()] = state;
+  }
+
+  ZoneVector<LiveRange*>& GetSpillState(RpoNumber block) {
+    auto& result = spill_state_[block.ToSize()];
+    return result;
+  }
+
+  void ResetSpillState() {
+    for (auto& state : spill_state_) {
+      state.clear();
+    }
+  }
+
+  TickCounter* tick_counter() { return tick_counter_; }
+
+ private:
+  int GetNextLiveRangeId();
+
+  Zone* const allocation_zone_;
+  Frame* const frame_;
+  InstructionSequence* const code_;
+  const char* const debug_name_;
+  const RegisterConfiguration* const config_;
+  PhiMap phi_map_;
+  ZoneVector<BitVector*> live_in_sets_;
+  ZoneVector<BitVector*> live_out_sets_;
+  ZoneVector<TopLevelLiveRange*> live_ranges_;
+  ZoneVector<TopLevelLiveRange*> fixed_live_ranges_;
+  ZoneVector<TopLevelLiveRange*> fixed_float_live_ranges_;
+  ZoneVector<TopLevelLiveRange*> fixed_double_live_ranges_;
+  ZoneVector<TopLevelLiveRange*> fixed_simd128_live_ranges_;
+  ZoneVector<SpillRange*> spill_ranges_;
+  DelayedReferences delayed_references_;
+  BitVector* assigned_registers_;
+  BitVector* assigned_double_registers_;
+  BitVector* fixed_register_use_;
+  BitVector* fixed_fp_register_use_;
+  int virtual_register_count_;
+  RangesWithPreassignedSlots preassigned_slot_ranges_;
+  ZoneVector<ZoneVector<LiveRange*>> spill_state_;
+  RegisterAllocationFlags flags_;
+  TickCounter* const tick_counter_;
+};
+
+// Representation of the non-empty interval [start,end[.
+class UseInterval final : public ZoneObject {
+ public:
+  UseInterval(LifetimePosition start, LifetimePosition end)
+      : start_(start), end_(end), next_(nullptr) {
+    DCHECK(start < end);
+  }
+  UseInterval(const UseInterval&) = delete;
+  UseInterval& operator=(const UseInterval&) = delete;
+
+  LifetimePosition start() const { return start_; }
+  void set_start(LifetimePosition start) { start_ = start; }
+  LifetimePosition end() const { return end_; }
+  void set_end(LifetimePosition end) { end_ = end; }
+  UseInterval* next() const { return next_; }
+  void set_next(UseInterval* next) { next_ = next; }
+
+  // Split this interval at the given position without effecting the
+  // live range that owns it. The interval must contain the position.
+  UseInterval* SplitAt(LifetimePosition pos, Zone* zone);
+
+  // If this interval intersects with other return smallest position
+  // that belongs to both of them.
+  LifetimePosition Intersect(const UseInterval* other) const {
+    if (other->start() < start_) return other->Intersect(this);
+    if (other->start() < end_) return other->start();
+    return LifetimePosition::Invalid();
+  }
+
+  bool Contains(LifetimePosition point) const {
+    return start_ <= point && point < end_;
+  }
+
+  // Returns the index of the first gap covered by this interval.
+  int FirstGapIndex() const {
+    int ret = start_.ToInstructionIndex();
+    if (start_.IsInstructionPosition()) {
+      ++ret;
+    }
+    return ret;
+  }
+
+  // Returns the index of the last gap covered by this interval.
+  int LastGapIndex() const {
+    int ret = end_.ToInstructionIndex();
+    if (end_.IsGapPosition() && end_.IsStart()) {
+      --ret;
+    }
+    return ret;
+  }
+
+ private:
+  LifetimePosition start_;
+  LifetimePosition end_;
+  UseInterval* next_;
+};
+
+enum class UsePositionType : uint8_t {
+  kRegisterOrSlot,
+  kRegisterOrSlotOrConstant,
+  kRequiresRegister,
+  kRequiresSlot
+};
+
+enum class UsePositionHintType : uint8_t {
+  kNone,
+  kOperand,
+  kUsePos,
+  kPhi,
+  kUnresolved
+};
+
+// Representation of a use position.
+class V8_EXPORT_PRIVATE UsePosition final
+    : public NON_EXPORTED_BASE(ZoneObject) {
+ public:
+  UsePosition(LifetimePosition pos, InstructionOperand* operand, void* hint,
+              UsePositionHintType hint_type);
+  UsePosition(const UsePosition&) = delete;
+  UsePosition& operator=(const UsePosition&) = delete;
+
+  InstructionOperand* operand() const { return operand_; }
+  bool HasOperand() const { return operand_ != nullptr; }
+
+  bool RegisterIsBeneficial() const {
+    return RegisterBeneficialField::decode(flags_);
+  }
+  bool SpillDetrimental() const {
+    return SpillDetrimentalField::decode(flags_);
+  }
+
+  UsePositionType type() const { return TypeField::decode(flags_); }
+  void set_type(UsePositionType type, bool register_beneficial);
+
+  LifetimePosition pos() const { return pos_; }
+
+  UsePosition* next() const { return next_; }
+  void set_next(UsePosition* next) { next_ = next; }
+
+  // For hinting only.
+  void set_assigned_register(int register_code) {
+    flags_ = AssignedRegisterField::update(flags_, register_code);
+  }
+  void set_spill_detrimental() {
+    flags_ = SpillDetrimentalField::update(flags_, true);
+  }
+
+  UsePositionHintType hint_type() const {
+    return HintTypeField::decode(flags_);
+  }
+  bool HasHint() const;
+  bool HintRegister(int* register_code) const;
+  void SetHint(UsePosition* use_pos);
+  void ResolveHint(UsePosition* use_pos);
+  bool IsResolved() const {
+    return hint_type() != UsePositionHintType::kUnresolved;
+  }
+  static UsePositionHintType HintTypeForOperand(const InstructionOperand& op);
+
+ private:
+  using TypeField = base::BitField<UsePositionType, 0, 2>;
+  using HintTypeField = base::BitField<UsePositionHintType, 2, 3>;
+  using RegisterBeneficialField = base::BitField<bool, 5, 1>;
+  using AssignedRegisterField = base::BitField<int32_t, 6, 6>;
+  using SpillDetrimentalField = base::BitField<int32_t, 12, 1>;
+
+  InstructionOperand* const operand_;
+  void* hint_;
+  UsePosition* next_;
+  LifetimePosition const pos_;
+  uint32_t flags_;
+};
+
+class SpillRange;
+class TopTierRegisterAllocationData;
+class TopLevelLiveRange;
+class LiveRangeBundle;
+
+// Representation of SSA values' live ranges as a collection of (continuous)
+// intervals over the instruction ordering.
+class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) {
+ public:
+  LiveRange(const LiveRange&) = delete;
+  LiveRange& operator=(const LiveRange&) = delete;
+
+  UseInterval* first_interval() const { return first_interval_; }
+  UsePosition* first_pos() const { return first_pos_; }
+  TopLevelLiveRange* TopLevel() { return top_level_; }
+  const TopLevelLiveRange* TopLevel() const { return top_level_; }
+
+  bool IsTopLevel() const;
+
+  LiveRange* next() const { return next_; }
+
+  int relative_id() const { return relative_id_; }
+
+  bool IsEmpty() const { return first_interval() == nullptr; }
+
+  InstructionOperand GetAssignedOperand() const;
+
+  MachineRepresentation representation() const {
+    return RepresentationField::decode(bits_);
+  }
+
+  int assigned_register() const { return AssignedRegisterField::decode(bits_); }
+  bool HasRegisterAssigned() const {
+    return assigned_register() != kUnassignedRegister;
+  }
+  void set_assigned_register(int reg);
+  void UnsetAssignedRegister();
+
+  bool ShouldRecombine() const { return RecombineField::decode(bits_); }
+
+  void SetRecombine() { bits_ = RecombineField::update(bits_, true); }
+  void set_controlflow_hint(int reg) {
+    bits_ = ControlFlowRegisterHint::update(bits_, reg);
+  }
+  int controlflow_hint() const {
+    return ControlFlowRegisterHint::decode(bits_);
+  }
+  bool RegisterFromControlFlow(int* reg) {
+    int hint = controlflow_hint();
+    if (hint != kUnassignedRegister) {
+      *reg = hint;
+      return true;
+    }
+    return false;
+  }
+  bool spilled() const { return SpilledField::decode(bits_); }
+  void AttachToNext();
+  void Unspill();
+  void Spill();
+
+  RegisterKind kind() const;
+
+  // Returns use position in this live range that follows both start
+  // and last processed use position.
+  UsePosition* NextUsePosition(LifetimePosition start) const;
+
+  // Returns use position for which register is required in this live
+  // range and which follows both start and last processed use position
+  UsePosition* NextRegisterPosition(LifetimePosition start) const;
+
+  // Returns the first use position requiring stack slot, or nullptr.
+  UsePosition* NextSlotPosition(LifetimePosition start) const;
+
+  // Returns use position for which register is beneficial in this live
+  // range and which follows both start and last processed use position
+  UsePosition* NextUsePositionRegisterIsBeneficial(
+      LifetimePosition start) const;
+
+  // Returns lifetime position for which register is beneficial in this live
+  // range and which follows both start and last processed use position.
+  LifetimePosition NextLifetimePositionRegisterIsBeneficial(
+      const LifetimePosition& start) const;
+
+  // Returns use position for which register is beneficial in this live
+  // range and which precedes start.
+  UsePosition* PreviousUsePositionRegisterIsBeneficial(
+      LifetimePosition start) const;
+
+  // Returns use position for which spilling is detrimental in this live
+  // range and which follows both start and last processed use position
+  UsePosition* NextUsePositionSpillDetrimental(LifetimePosition start) const;
+
+  // Can this live range be spilled at this position.
+  bool CanBeSpilled(LifetimePosition pos) const;
+
+  // Splitting primitive used by splitting members.
+  // Performs the split, but does not link the resulting ranges.
+  // The given position must follow the start of the range.
+  // All uses following the given position will be moved from this
+  // live range to the result live range.
+  // The current range will terminate at position, while result will start from
+  // position.
+  enum HintConnectionOption : bool {
+    DoNotConnectHints = false,
+    ConnectHints = true
+  };
+  UsePosition* DetachAt(LifetimePosition position, LiveRange* result,
+                        Zone* zone, HintConnectionOption connect_hints);
+
+  // Detaches at position, and then links the resulting ranges. Returns the
+  // child, which starts at position.
+  LiveRange* SplitAt(LifetimePosition position, Zone* zone);
+
+  // Returns nullptr when no register is hinted, otherwise sets register_index.
+  // Uses {current_hint_position_} as a cache, and tries to update it.
+  UsePosition* FirstHintPosition(int* register_index);
+  UsePosition* FirstHintPosition() {
+    int register_index;
+    return FirstHintPosition(&register_index);
+  }
+
+  UsePosition* current_hint_position() const {
+    return current_hint_position_;
+  }
+
+  LifetimePosition Start() const {
+    DCHECK(!IsEmpty());
+    return first_interval()->start();
+  }
+
+  LifetimePosition End() const {
+    DCHECK(!IsEmpty());
+    return last_interval_->end();
+  }
+
+  bool ShouldBeAllocatedBefore(const LiveRange* other) const;
+  bool CanCover(LifetimePosition position) const;
+  bool Covers(LifetimePosition position) const;
+  LifetimePosition NextStartAfter(LifetimePosition position);
+  LifetimePosition NextEndAfter(LifetimePosition position) const;
+  LifetimePosition FirstIntersection(LiveRange* other) const;
+  LifetimePosition NextStart() const { return next_start_; }
+
+  void VerifyChildStructure() const {
+    VerifyIntervals();
+    VerifyPositions();
+  }
+
+  void ConvertUsesToOperand(const InstructionOperand& op,
+                            const InstructionOperand& spill_op);
+  void SetUseHints(int register_index);
+  void UnsetUseHints() { SetUseHints(kUnassignedRegister); }
+  void ResetCurrentHintPosition() { current_hint_position_ = first_pos_; }
+
+  void Print(const RegisterConfiguration* config, bool with_children) const;
+  void Print(bool with_children) const;
+
+  void set_bundle(LiveRangeBundle* bundle) { bundle_ = bundle; }
+  LiveRangeBundle* get_bundle() const { return bundle_; }
+  bool RegisterFromBundle(int* hint) const;
+  void UpdateBundleRegister(int reg) const;
+
+ private:
+  friend class TopLevelLiveRange;
+  friend Zone;
+
+  explicit LiveRange(int relative_id, MachineRepresentation rep,
+                     TopLevelLiveRange* top_level);
+
+  void UpdateParentForAllChildren(TopLevelLiveRange* new_top_level);
+
+  void set_spilled(bool value) { bits_ = SpilledField::update(bits_, value); }
+
+  UseInterval* FirstSearchIntervalForPosition(LifetimePosition position) const;
+  void AdvanceLastProcessedMarker(UseInterval* to_start_of,
+                                  LifetimePosition but_not_past) const;
+
+  void VerifyPositions() const;
+  void VerifyIntervals() const;
+
+  using SpilledField = base::BitField<bool, 0, 1>;
+  // Bits (1,7[ are used by TopLevelLiveRange.
+  using AssignedRegisterField = base::BitField<int32_t, 7, 6>;
+  using RepresentationField = base::BitField<MachineRepresentation, 13, 8>;
+  using RecombineField = base::BitField<bool, 21, 1>;
+  using ControlFlowRegisterHint = base::BitField<uint8_t, 22, 6>;
+  // Bits 28-31 are used by TopLevelLiveRange.
+
+  // Unique among children of the same virtual register.
+  int relative_id_;
+  uint32_t bits_;
+  UseInterval* last_interval_;
+  UseInterval* first_interval_;
+  UsePosition* first_pos_;
+  TopLevelLiveRange* top_level_;
+  LiveRange* next_;
+  // This is used as a cache, it doesn't affect correctness.
+  mutable UseInterval* current_interval_;
+  // This is used as a cache, it doesn't affect correctness.
+  mutable UsePosition* last_processed_use_;
+  // This is used as a cache in BuildLiveRanges and during register allocation.
+  UsePosition* current_hint_position_;
+  LiveRangeBundle* bundle_ = nullptr;
+  // Next interval start, relative to the current linear scan position.
+  LifetimePosition next_start_;
+};
+
+struct LiveRangeOrdering {
+  bool operator()(const LiveRange* left, const LiveRange* right) const {
+    return left->Start() < right->Start();
+  }
+};
+class LiveRangeBundle : public ZoneObject {
+ public:
+  void MergeSpillRanges();
+
+  int id() { return id_; }
+
+  int reg() { return reg_; }
+
+  void set_reg(int reg) {
+    DCHECK_EQ(reg_, kUnassignedRegister);
+    reg_ = reg;
+  }
+
+ private:
+  friend class BundleBuilder;
+  friend Zone;
+
+  // Representation of the non-empty interval [start,end[.
+  class Range {
+   public:
+    Range(int s, int e) : start(s), end(e) {}
+    Range(LifetimePosition s, LifetimePosition e)
+        : start(s.value()), end(e.value()) {}
+    int start;
+    int end;
+  };
+
+  struct RangeOrdering {
+    bool operator()(const Range left, const Range right) const {
+      return left.start < right.start;
+    }
+  };
+  bool UsesOverlap(UseInterval* interval) {
+    auto use = uses_.begin();
+    while (interval != nullptr && use != uses_.end()) {
+      if (use->end <= interval->start().value()) {
+        ++use;
+      } else if (interval->end().value() <= use->start) {
+        interval = interval->next();
+      } else {
+        return true;
+      }
+    }
+    return false;
+  }
+  void InsertUses(UseInterval* interval) {
+    while (interval != nullptr) {
+      auto done = uses_.insert({interval->start(), interval->end()});
+      USE(done);
+      DCHECK_EQ(done.second, 1);
+      interval = interval->next();
+    }
+  }
+  explicit LiveRangeBundle(Zone* zone, int id)
+      : ranges_(zone), uses_(zone), id_(id) {}
+
+  bool TryAddRange(LiveRange* range);
+  bool TryMerge(LiveRangeBundle* other, bool trace_alloc);
+
+  ZoneSet<LiveRange*, LiveRangeOrdering> ranges_;
+  ZoneSet<Range, RangeOrdering> uses_;
+  int id_;
+  int reg_ = kUnassignedRegister;
+};
+
+class V8_EXPORT_PRIVATE TopLevelLiveRange final : public LiveRange {
+ public:
+  explicit TopLevelLiveRange(int vreg, MachineRepresentation rep);
+  TopLevelLiveRange(const TopLevelLiveRange&) = delete;
+  TopLevelLiveRange& operator=(const TopLevelLiveRange&) = delete;
+
+  int spill_start_index() const { return spill_start_index_; }
+
+  bool IsFixed() const { return vreg_ < 0; }
+
+  bool IsDeferredFixed() const { return DeferredFixedField::decode(bits_); }
+  void set_deferred_fixed() { bits_ = DeferredFixedField::update(bits_, true); }
+  bool is_phi() const { return IsPhiField::decode(bits_); }
+  void set_is_phi(bool value) { bits_ = IsPhiField::update(bits_, value); }
+
+  bool is_non_loop_phi() const { return IsNonLoopPhiField::decode(bits_); }
+  bool is_loop_phi() const { return is_phi() && !is_non_loop_phi(); }
+  void set_is_non_loop_phi(bool value) {
+    bits_ = IsNonLoopPhiField::update(bits_, value);
+  }
+  bool SpillAtLoopHeaderNotBeneficial() const {
+    return SpillAtLoopHeaderNotBeneficialField::decode(bits_);
+  }
+  void set_spilling_at_loop_header_not_beneficial() {
+    bits_ = SpillAtLoopHeaderNotBeneficialField::update(bits_, true);
+  }
+
+  enum SlotUseKind { kNoSlotUse, kDeferredSlotUse, kGeneralSlotUse };
+
+  bool has_slot_use() const {
+    return slot_use_kind() > SlotUseKind::kNoSlotUse;
+  }
+
+  bool has_non_deferred_slot_use() const {
+    return slot_use_kind() == SlotUseKind::kGeneralSlotUse;
+  }
+
+  void reset_slot_use() {
+    bits_ = HasSlotUseField::update(bits_, SlotUseKind::kNoSlotUse);
+  }
+  void register_slot_use(SlotUseKind value) {
+    bits_ = HasSlotUseField::update(bits_, std::max(slot_use_kind(), value));
+  }
+  SlotUseKind slot_use_kind() const { return HasSlotUseField::decode(bits_); }
+
+  // Add a new interval or a new use position to this live range.
+  void EnsureInterval(LifetimePosition start, LifetimePosition end, Zone* zone,
+                      bool trace_alloc);
+  void AddUseInterval(LifetimePosition start, LifetimePosition end, Zone* zone,
+                      bool trace_alloc);
+  void AddUsePosition(UsePosition* pos, bool trace_alloc);
+
+  // Shorten the most recently added interval by setting a new start.
+  void ShortenTo(LifetimePosition start, bool trace_alloc);
+
+  // Spill range management.
+  void SetSpillRange(SpillRange* spill_range);
+
+  // Encodes whether a range is also available from a memory location:
+  //   kNoSpillType: not availble in memory location.
+  //   kSpillOperand: computed in a memory location at range start.
+  //   kSpillRange: copied (spilled) to memory location at the definition,
+  //                or at the beginning of some later blocks if
+  //                LateSpillingSelected() is true.
+  //   kDeferredSpillRange: copied (spilled) to memory location at entry
+  //                        to deferred blocks that have a use from memory.
+  //
+  // Ranges either start out at kSpillOperand, which is also their final
+  // state, or kNoSpillType. When spilled only in deferred code, a range
+  // ends up with kDeferredSpillRange, while when spilled in regular code,
+  // a range will be tagged as kSpillRange.
+  enum class SpillType {
+    kNoSpillType,
+    kSpillOperand,
+    kSpillRange,
+    kDeferredSpillRange
+  };
+  void set_spill_type(SpillType value) {
+    bits_ = SpillTypeField::update(bits_, value);
+  }
+  SpillType spill_type() const { return SpillTypeField::decode(bits_); }
+  InstructionOperand* GetSpillOperand() const {
+    DCHECK_EQ(SpillType::kSpillOperand, spill_type());
+    return spill_operand_;
+  }
+
+  SpillRange* GetAllocatedSpillRange() const {
+    DCHECK_NE(SpillType::kSpillOperand, spill_type());
+    return spill_range_;
+  }
+
+  SpillRange* GetSpillRange() const {
+    DCHECK_GE(spill_type(), SpillType::kSpillRange);
+    return spill_range_;
+  }
+  bool HasNoSpillType() const {
+    return spill_type() == SpillType::kNoSpillType;
+  }
+  bool HasSpillOperand() const {
+    return spill_type() == SpillType::kSpillOperand;
+  }
+  bool HasSpillRange() const { return spill_type() >= SpillType::kSpillRange; }
+  bool HasGeneralSpillRange() const {
+    return spill_type() == SpillType::kSpillRange;
+  }
+  AllocatedOperand GetSpillRangeOperand() const;
+
+  void RecordSpillLocation(Zone* zone, int gap_index,
+                           InstructionOperand* operand);
+  void SetSpillOperand(InstructionOperand* operand);
+  void SetSpillStartIndex(int start) {
+    spill_start_index_ = std::min(start, spill_start_index_);
+  }
+
+  // Omits any moves from spill_move_insertion_locations_ that can be skipped.
+  void FilterSpillMoves(TopTierRegisterAllocationData* data,
+                        const InstructionOperand& operand);
+
+  // Writes all moves from spill_move_insertion_locations_ to the schedule.
+  void CommitSpillMoves(TopTierRegisterAllocationData* data,
+                        const InstructionOperand& operand);
+
+  // If all the children of this range are spilled in deferred blocks, and if
+  // for any non-spilled child with a use position requiring a slot, that range
+  // is contained in a deferred block, mark the range as
+  // IsSpilledOnlyInDeferredBlocks, so that we avoid spilling at definition,
+  // and instead let the LiveRangeConnector perform the spills within the
+  // deferred blocks. If so, we insert here spills for non-spilled ranges
+  // with slot use positions.
+  void TreatAsSpilledInDeferredBlock(Zone* zone, int total_block_count) {
+    spill_start_index_ = -1;
+    spilled_in_deferred_blocks_ = true;
+    spill_move_insertion_locations_ = nullptr;
+    list_of_blocks_requiring_spill_operands_ =
+        zone->New<BitVector>(total_block_count, zone);
+  }
+
+  // Updates internal data structures to reflect that this range is not
+  // spilled at definition but instead spilled in some blocks only.
+  void TransitionRangeToDeferredSpill(Zone* zone, int total_block_count) {
+    spill_start_index_ = -1;
+    spill_move_insertion_locations_ = nullptr;
+    list_of_blocks_requiring_spill_operands_ =
+        zone->New<BitVector>(total_block_count, zone);
+  }
+
+  // Promotes this range to spill at definition if it was marked for spilling
+  // in deferred blocks before.
+  void TransitionRangeToSpillAtDefinition() {
+    DCHECK_NOT_NULL(spill_move_insertion_locations_);
+    if (spill_type() == SpillType::kDeferredSpillRange) {
+      set_spill_type(SpillType::kSpillRange);
+    }
+  }
+
+  bool MayRequireSpillRange() const {
+    return !HasSpillOperand() && spill_range_ == nullptr;
+  }
+  void UpdateSpillRangePostMerge(TopLevelLiveRange* merged);
+  int vreg() const { return vreg_; }
+
+  void Verify() const;
+  void VerifyChildrenInOrder() const;
+
+  // Returns the LiveRange covering the given position, or nullptr if no such
+  // range exists. Uses a linear search through child ranges. The range at the
+  // previously requested position is cached, so this function will be very fast
+  // if you call it with a non-decreasing sequence of positions.
+  LiveRange* GetChildCovers(LifetimePosition pos);
+
+  int GetNextChildId() { return ++last_child_id_; }
+
+  int GetMaxChildCount() const { return last_child_id_ + 1; }
+
+  bool IsSpilledOnlyInDeferredBlocks(
+      const TopTierRegisterAllocationData* data) const {
+    return spill_type() == SpillType::kDeferredSpillRange;
+  }
+
+  struct SpillMoveInsertionList;
+
+  SpillMoveInsertionList* GetSpillMoveInsertionLocations(
+      const TopTierRegisterAllocationData* data) const {
+    DCHECK(!IsSpilledOnlyInDeferredBlocks(data));
+    return spill_move_insertion_locations_;
+  }
+
+  void MarkHasPreassignedSlot() { has_preassigned_slot_ = true; }
+  bool has_preassigned_slot() const { return has_preassigned_slot_; }
+
+  // Late spilling refers to spilling at places after the definition. These
+  // spills are guaranteed to cover at least all of the sub-ranges where the
+  // register allocator chose to evict the value from a register.
+  void SetLateSpillingSelected(bool late_spilling_selected) {
+    DCHECK(spill_type() == SpillType::kSpillRange);
+    SpillRangeMode new_mode = late_spilling_selected
+                                  ? SpillRangeMode::kSpillLater
+                                  : SpillRangeMode::kSpillAtDefinition;
+    // A single TopLevelLiveRange should never be used in both modes.
+    DCHECK(SpillRangeModeField::decode(bits_) == SpillRangeMode::kNotSet ||
+           SpillRangeModeField::decode(bits_) == new_mode);
+    bits_ = SpillRangeModeField::update(bits_, new_mode);
+  }
+  bool LateSpillingSelected() const {
+    // Nobody should be reading this value until it's been decided.
+    DCHECK_IMPLIES(HasGeneralSpillRange(), SpillRangeModeField::decode(bits_) !=
+                                               SpillRangeMode::kNotSet);
+    return SpillRangeModeField::decode(bits_) == SpillRangeMode::kSpillLater;
+  }
+
+  void AddBlockRequiringSpillOperand(
+      RpoNumber block_id, const TopTierRegisterAllocationData* data) {
+    DCHECK(IsSpilledOnlyInDeferredBlocks(data));
+    GetListOfBlocksRequiringSpillOperands(data)->Add(block_id.ToInt());
+  }
+
+  BitVector* GetListOfBlocksRequiringSpillOperands(
+      const TopTierRegisterAllocationData* data) const {
+    DCHECK(IsSpilledOnlyInDeferredBlocks(data));
+    return list_of_blocks_requiring_spill_operands_;
+  }
+
+ private:
+  friend class LiveRange;
+
+  // If spill type is kSpillRange, then this value indicates whether we've
+  // chosen to spill at the definition or at some later points.
+  enum class SpillRangeMode : uint8_t {
+    kNotSet,
+    kSpillAtDefinition,
+    kSpillLater,
+  };
+
+  using HasSlotUseField = base::BitField<SlotUseKind, 1, 2>;
+  using IsPhiField = base::BitField<bool, 3, 1>;
+  using IsNonLoopPhiField = base::BitField<bool, 4, 1>;
+  using SpillTypeField = base::BitField<SpillType, 5, 2>;
+  using DeferredFixedField = base::BitField<bool, 28, 1>;
+  using SpillAtLoopHeaderNotBeneficialField = base::BitField<bool, 29, 1>;
+  using SpillRangeModeField = base::BitField<SpillRangeMode, 30, 2>;
+
+  int vreg_;
+  int last_child_id_;
+  union {
+    // Correct value determined by spill_type()
+    InstructionOperand* spill_operand_;
+    SpillRange* spill_range_;
+  };
+
+  union {
+    SpillMoveInsertionList* spill_move_insertion_locations_;
+    BitVector* list_of_blocks_requiring_spill_operands_;
+  };
+
+  // TODO(mtrofin): generalize spilling after definition, currently specialized
+  // just for spill in a single deferred block.
+  bool spilled_in_deferred_blocks_;
+  bool has_preassigned_slot_;
+
+  int spill_start_index_;
+  UsePosition* last_pos_;
+  LiveRange* last_child_covers_;
+};
+
+struct PrintableLiveRange {
+  const RegisterConfiguration* register_configuration_;
+  const LiveRange* range_;
+};
+
+std::ostream& operator<<(std::ostream& os,
+                         const PrintableLiveRange& printable_range);
+
+class SpillRange final : public ZoneObject {
+ public:
+  static const int kUnassignedSlot = -1;
+  SpillRange(TopLevelLiveRange* range, Zone* zone);
+  SpillRange(const SpillRange&) = delete;
+  SpillRange& operator=(const SpillRange&) = delete;
+
+  UseInterval* interval() const { return use_interval_; }
+
+  bool IsEmpty() const { return live_ranges_.empty(); }
+  bool TryMerge(SpillRange* other);
+  bool HasSlot() const { return assigned_slot_ != kUnassignedSlot; }
+
+  void set_assigned_slot(int index) {
+    DCHECK_EQ(kUnassignedSlot, assigned_slot_);
+    assigned_slot_ = index;
+  }
+  int assigned_slot() {
+    DCHECK_NE(kUnassignedSlot, assigned_slot_);
+    return assigned_slot_;
+  }
+  const ZoneVector<TopLevelLiveRange*>& live_ranges() const {
+    return live_ranges_;
+  }
+  ZoneVector<TopLevelLiveRange*>& live_ranges() { return live_ranges_; }
+  // Spill slots can be 4, 8, or 16 bytes wide.
+  int byte_width() const { return byte_width_; }
+  void Print() const;
+
+ private:
+  LifetimePosition End() const { return end_position_; }
+  bool IsIntersectingWith(SpillRange* other) const;
+  // Merge intervals, making sure the use intervals are sorted
+  void MergeDisjointIntervals(UseInterval* other);
+
+  ZoneVector<TopLevelLiveRange*> live_ranges_;
+  UseInterval* use_interval_;
+  LifetimePosition end_position_;
+  int assigned_slot_;
+  int byte_width_;
+};
+
+class LiveRangeBound {
+ public:
+  explicit LiveRangeBound(LiveRange* range, bool skip)
+      : range_(range), start_(range->Start()), end_(range->End()), skip_(skip) {
+    DCHECK(!range->IsEmpty());
+  }
+  LiveRangeBound(const LiveRangeBound&) = delete;
+  LiveRangeBound& operator=(const LiveRangeBound&) = delete;
+
+  bool CanCover(LifetimePosition position) {
+    return start_ <= position && position < end_;
+  }
+
+  LiveRange* const range_;
+  const LifetimePosition start_;
+  const LifetimePosition end_;
+  const bool skip_;
+};
+
+struct FindResult {
+  LiveRange* cur_cover_;
+  LiveRange* pred_cover_;
+};
+
+class LiveRangeBoundArray {
+ public:
+  LiveRangeBoundArray() : length_(0), start_(nullptr) {}
+  LiveRangeBoundArray(const LiveRangeBoundArray&) = delete;
+  LiveRangeBoundArray& operator=(const LiveRangeBoundArray&) = delete;
+
+  bool ShouldInitialize() { return start_ == nullptr; }
+  void Initialize(Zone* zone, TopLevelLiveRange* range);
+  LiveRangeBound* Find(const LifetimePosition position) const;
+  LiveRangeBound* FindPred(const InstructionBlock* pred);
+  LiveRangeBound* FindSucc(const InstructionBlock* succ);
+  bool FindConnectableSubranges(const InstructionBlock* block,
+                                const InstructionBlock* pred,
+                                FindResult* result) const;
+
+ private:
+  size_t length_;
+  LiveRangeBound* start_;
+};
+
+class LiveRangeFinder {
+ public:
+  explicit LiveRangeFinder(const TopTierRegisterAllocationData* data,
+                           Zone* zone);
+  LiveRangeFinder(const LiveRangeFinder&) = delete;
+  LiveRangeFinder& operator=(const LiveRangeFinder&) = delete;
+
+  LiveRangeBoundArray* ArrayFor(int operand_index);
+
+ private:
+  const TopTierRegisterAllocationData* const data_;
+  const int bounds_length_;
+  LiveRangeBoundArray* const bounds_;
+  Zone* const zone_;
+};
+
+class ConstraintBuilder final : public ZoneObject {
+ public:
+  explicit ConstraintBuilder(TopTierRegisterAllocationData* data);
+  ConstraintBuilder(const ConstraintBuilder&) = delete;
+  ConstraintBuilder& operator=(const ConstraintBuilder&) = delete;
+
+  // Phase 1 : insert moves to account for fixed register operands.
+  void MeetRegisterConstraints();
+
+  // Phase 2: deconstruct SSA by inserting moves in successors and the headers
+  // of blocks containing phis.
+  void ResolvePhis();
+
+ private:
+  TopTierRegisterAllocationData* data() const { return data_; }
+  InstructionSequence* code() const { return data()->code(); }
+  Zone* allocation_zone() const { return data()->allocation_zone(); }
+
+  InstructionOperand* AllocateFixed(UnallocatedOperand* operand, int pos,
+                                    bool is_tagged, bool is_input);
+  void MeetRegisterConstraints(const InstructionBlock* block);
+  void MeetConstraintsBefore(int index);
+  void MeetConstraintsAfter(int index);
+  void MeetRegisterConstraintsForLastInstructionInBlock(
+      const InstructionBlock* block);
+  void ResolvePhis(const InstructionBlock* block);
+
+  TopTierRegisterAllocationData* const data_;
+};
+
+class LiveRangeBuilder final : public ZoneObject {
+ public:
+  explicit LiveRangeBuilder(TopTierRegisterAllocationData* data,
+                            Zone* local_zone);
+  LiveRangeBuilder(const LiveRangeBuilder&) = delete;
+  LiveRangeBuilder& operator=(const LiveRangeBuilder&) = delete;
+
+  // Phase 3: compute liveness of all virtual register.
+  void BuildLiveRanges();
+  static BitVector* ComputeLiveOut(const InstructionBlock* block,
+                                   TopTierRegisterAllocationData* data);
+
+ private:
+  using SpillMode = TopTierRegisterAllocationData::SpillMode;
+  static constexpr int kNumberOfFixedRangesPerRegister =
+      TopTierRegisterAllocationData::kNumberOfFixedRangesPerRegister;
+
+  TopTierRegisterAllocationData* data() const { return data_; }
+  InstructionSequence* code() const { return data()->code(); }
+  Zone* allocation_zone() const { return data()->allocation_zone(); }
+  Zone* code_zone() const { return code()->zone(); }
+  const RegisterConfiguration* config() const { return data()->config(); }
+  ZoneVector<BitVector*>& live_in_sets() const {
+    return data()->live_in_sets();
+  }
+
+  // Verification.
+  void Verify() const;
+  bool IntervalStartsAtBlockBoundary(const UseInterval* interval) const;
+  bool IntervalPredecessorsCoveredByRange(const UseInterval* interval,
+                                          const TopLevelLiveRange* range) const;
+  bool NextIntervalStartsInDifferentBlocks(const UseInterval* interval) const;
+
+  // Liveness analysis support.
+  void AddInitialIntervals(const InstructionBlock* block, BitVector* live_out);
+  void ProcessInstructions(const InstructionBlock* block, BitVector* live);
+  void ProcessPhis(const InstructionBlock* block, BitVector* live);
+  void ProcessLoopHeader(const InstructionBlock* block, BitVector* live);
+
+  static int FixedLiveRangeID(int index) { return -index - 1; }
+  int FixedFPLiveRangeID(int index, MachineRepresentation rep);
+  TopLevelLiveRange* FixedLiveRangeFor(int index, SpillMode spill_mode);
+  TopLevelLiveRange* FixedFPLiveRangeFor(int index, MachineRepresentation rep,
+                                         SpillMode spill_mode);
+
+  void MapPhiHint(InstructionOperand* operand, UsePosition* use_pos);
+  void ResolvePhiHint(InstructionOperand* operand, UsePosition* use_pos);
+
+  UsePosition* NewUsePosition(LifetimePosition pos, InstructionOperand* operand,
+                              void* hint, UsePositionHintType hint_type);
+  UsePosition* NewUsePosition(LifetimePosition pos) {
+    return NewUsePosition(pos, nullptr, nullptr, UsePositionHintType::kNone);
+  }
+  TopLevelLiveRange* LiveRangeFor(InstructionOperand* operand,
+                                  SpillMode spill_mode);
+  // Helper methods for building intervals.
+  UsePosition* Define(LifetimePosition position, InstructionOperand* operand,
+                      void* hint, UsePositionHintType hint_type,
+                      SpillMode spill_mode);
+  void Define(LifetimePosition position, InstructionOperand* operand,
+              SpillMode spill_mode) {
+    Define(position, operand, nullptr, UsePositionHintType::kNone, spill_mode);
+  }
+  UsePosition* Use(LifetimePosition block_start, LifetimePosition position,
+                   InstructionOperand* operand, void* hint,
+                   UsePositionHintType hint_type, SpillMode spill_mode);
+  void Use(LifetimePosition block_start, LifetimePosition position,
+           InstructionOperand* operand, SpillMode spill_mode) {
+    Use(block_start, position, operand, nullptr, UsePositionHintType::kNone,
+        spill_mode);
+  }
+  SpillMode SpillModeForBlock(const InstructionBlock* block) const {
+    return block->IsDeferred() ? SpillMode::kSpillDeferred
+                               : SpillMode::kSpillAtDefinition;
+  }
+  TopTierRegisterAllocationData* const data_;
+  ZoneMap<InstructionOperand*, UsePosition*> phi_hints_;
+};
+
+class BundleBuilder final : public ZoneObject {
+ public:
+  explicit BundleBuilder(TopTierRegisterAllocationData* data) : data_(data) {}
+
+  void BuildBundles();
+
+ private:
+  TopTierRegisterAllocationData* data() const { return data_; }
+  InstructionSequence* code() const { return data_->code(); }
+  TopTierRegisterAllocationData* data_;
+  int next_bundle_id_ = 0;
+};
+
+class RegisterAllocator : public ZoneObject {
+ public:
+  RegisterAllocator(TopTierRegisterAllocationData* data, RegisterKind kind);
+  RegisterAllocator(const RegisterAllocator&) = delete;
+  RegisterAllocator& operator=(const RegisterAllocator&) = delete;
+
+ protected:
+  using SpillMode = TopTierRegisterAllocationData::SpillMode;
+  TopTierRegisterAllocationData* data() const { return data_; }
+  InstructionSequence* code() const { return data()->code(); }
+  RegisterKind mode() const { return mode_; }
+  int num_registers() const { return num_registers_; }
+  int num_allocatable_registers() const { return num_allocatable_registers_; }
+  const int* allocatable_register_codes() const {
+    return allocatable_register_codes_;
+  }
+  // Returns true iff. we must check float register aliasing.
+  bool check_fp_aliasing() const { return check_fp_aliasing_; }
+
+  // TODO(mtrofin): explain why splitting in gap START is always OK.
+  LifetimePosition GetSplitPositionForInstruction(const LiveRange* range,
+                                                  int instruction_index);
+
+  Zone* allocation_zone() const { return data()->allocation_zone(); }
+
+  // Find the optimal split for ranges defined by a memory operand, e.g.
+  // constants or function parameters passed on the stack.
+  void SplitAndSpillRangesDefinedByMemoryOperand();
+
+  // Split the given range at the given position.
+  // If range starts at or after the given position then the
+  // original range is returned.
+  // Otherwise returns the live range that starts at pos and contains
+  // all uses from the original range that follow pos. Uses at pos will
+  // still be owned by the original range after splitting.
+  LiveRange* SplitRangeAt(LiveRange* range, LifetimePosition pos);
+
+  bool CanProcessRange(LiveRange* range) const {
+    return range != nullptr && !range->IsEmpty() && range->kind() == mode();
+  }
+
+  // Split the given range in a position from the interval [start, end].
+  LiveRange* SplitBetween(LiveRange* range, LifetimePosition start,
+                          LifetimePosition end);
+
+  // Find a lifetime position in the interval [start, end] which
+  // is optimal for splitting: it is either header of the outermost
+  // loop covered by this interval or the latest possible position.
+  LifetimePosition FindOptimalSplitPos(LifetimePosition start,
+                                       LifetimePosition end);
+
+  void Spill(LiveRange* range, SpillMode spill_mode);
+
+  // If we are trying to spill a range inside the loop try to
+  // hoist spill position out to the point just before the loop.
+  LifetimePosition FindOptimalSpillingPos(LiveRange* range,
+                                          LifetimePosition pos,
+                                          SpillMode spill_mode,
+                                          LiveRange** begin_spill_out);
+
+  const ZoneVector<TopLevelLiveRange*>& GetFixedRegisters() const;
+  const char* RegisterName(int allocation_index) const;
+
+ private:
+  TopTierRegisterAllocationData* const data_;
+  const RegisterKind mode_;
+  const int num_registers_;
+  int num_allocatable_registers_;
+  const int* allocatable_register_codes_;
+  bool check_fp_aliasing_;
+
+ private:
+  bool no_combining_;
+};
+
+class LinearScanAllocator final : public RegisterAllocator {
+ public:
+  LinearScanAllocator(TopTierRegisterAllocationData* data, RegisterKind kind,
+                      Zone* local_zone);
+  LinearScanAllocator(const LinearScanAllocator&) = delete;
+  LinearScanAllocator& operator=(const LinearScanAllocator&) = delete;
+
+  // Phase 4: compute register assignments.
+  void AllocateRegisters();
+
+ private:
+  struct RangeWithRegister {
+    TopLevelLiveRange* range;
+    int expected_register;
+    struct Hash {
+      size_t operator()(const RangeWithRegister item) const {
+        return item.range->vreg();
+      }
+    };
+    struct Equals {
+      bool operator()(const RangeWithRegister one,
+                      const RangeWithRegister two) const {
+        return one.range == two.range;
+      }
+    };
+
+    explicit RangeWithRegister(LiveRange* a_range)
+        : range(a_range->TopLevel()),
+          expected_register(a_range->assigned_register()) {}
+    RangeWithRegister(TopLevelLiveRange* toplevel, int reg)
+        : range(toplevel), expected_register(reg) {}
+  };
+
+  using RangeWithRegisterSet =
+      ZoneUnorderedSet<RangeWithRegister, RangeWithRegister::Hash,
+                       RangeWithRegister::Equals>;
+
+  void MaybeSpillPreviousRanges(LiveRange* begin_range,
+                                LifetimePosition begin_pos,
+                                LiveRange* end_range);
+  void MaybeUndoPreviousSplit(LiveRange* range);
+  void SpillNotLiveRanges(RangeWithRegisterSet* to_be_live,
+                          LifetimePosition position, SpillMode spill_mode);
+  LiveRange* AssignRegisterOnReload(LiveRange* range, int reg);
+  void ReloadLiveRanges(RangeWithRegisterSet const& to_be_live,
+                        LifetimePosition position);
+
+  void UpdateDeferredFixedRanges(SpillMode spill_mode, InstructionBlock* block);
+  bool BlockIsDeferredOrImmediatePredecessorIsNotDeferred(
+      const InstructionBlock* block);
+  bool HasNonDeferredPredecessor(InstructionBlock* block);
+
+  struct UnhandledLiveRangeOrdering {
+    bool operator()(const LiveRange* a, const LiveRange* b) const {
+      return a->ShouldBeAllocatedBefore(b);
+    }
+  };
+
+  struct InactiveLiveRangeOrdering {
+    bool operator()(const LiveRange* a, const LiveRange* b) const {
+      return a->NextStart() < b->NextStart();
+    }
+  };
+
+  using UnhandledLiveRangeQueue =
+      ZoneMultiset<LiveRange*, UnhandledLiveRangeOrdering>;
+  using InactiveLiveRangeQueue =
+      ZoneMultiset<LiveRange*, InactiveLiveRangeOrdering>;
+  UnhandledLiveRangeQueue& unhandled_live_ranges() {
+    return unhandled_live_ranges_;
+  }
+  ZoneVector<LiveRange*>& active_live_ranges() { return active_live_ranges_; }
+  InactiveLiveRangeQueue& inactive_live_ranges(int reg) {
+    return inactive_live_ranges_[reg];
+  }
+
+  void SetLiveRangeAssignedRegister(LiveRange* range, int reg);
+
+  // Helper methods for updating the life range lists.
+  void AddToActive(LiveRange* range);
+  void AddToInactive(LiveRange* range);
+  void AddToUnhandled(LiveRange* range);
+  ZoneVector<LiveRange*>::iterator ActiveToHandled(
+      ZoneVector<LiveRange*>::iterator it);
+  ZoneVector<LiveRange*>::iterator ActiveToInactive(
+      ZoneVector<LiveRange*>::iterator it, LifetimePosition position);
+  InactiveLiveRangeQueue::iterator InactiveToHandled(
+      InactiveLiveRangeQueue::iterator it);
+  InactiveLiveRangeQueue::iterator InactiveToActive(
+      InactiveLiveRangeQueue::iterator it, LifetimePosition position);
+
+  void ForwardStateTo(LifetimePosition position);
+
+  int LastDeferredInstructionIndex(InstructionBlock* start);
+
+  // Helper methods for choosing state after control flow events.
+
+  bool ConsiderBlockForControlFlow(InstructionBlock* current_block,
+                                   RpoNumber predecessor);
+  RpoNumber ChooseOneOfTwoPredecessorStates(InstructionBlock* current_block,
+                                            LifetimePosition boundary);
+  bool CheckConflict(MachineRepresentation rep, int reg,
+                     RangeWithRegisterSet* to_be_live);
+  void ComputeStateFromManyPredecessors(InstructionBlock* current_block,
+                                        RangeWithRegisterSet* to_be_live);
+
+  // Helper methods for allocating registers.
+  bool TryReuseSpillForPhi(TopLevelLiveRange* range);
+  int PickRegisterThatIsAvailableLongest(
+      LiveRange* current, int hint_reg,
+      const Vector<LifetimePosition>& free_until_pos);
+  bool TryAllocateFreeReg(LiveRange* range,
+                          const Vector<LifetimePosition>& free_until_pos);
+  bool TryAllocatePreferredReg(LiveRange* range,
+                               const Vector<LifetimePosition>& free_until_pos);
+  void GetFPRegisterSet(MachineRepresentation rep, int* num_regs,
+                        int* num_codes, const int** codes) const;
+  void FindFreeRegistersForRange(LiveRange* range,
+                                 Vector<LifetimePosition> free_until_pos);
+  void ProcessCurrentRange(LiveRange* current, SpillMode spill_mode);
+  void AllocateBlockedReg(LiveRange* range, SpillMode spill_mode);
+
+  // Spill the given life range after position pos.
+  void SpillAfter(LiveRange* range, LifetimePosition pos, SpillMode spill_mode);
+
+  // Spill the given life range after position [start] and up to position [end].
+  void SpillBetween(LiveRange* range, LifetimePosition start,
+                    LifetimePosition end, SpillMode spill_mode);
+
+  // Spill the given life range after position [start] and up to position [end].
+  // Range is guaranteed to be spilled at least until position [until].
+  void SpillBetweenUntil(LiveRange* range, LifetimePosition start,
+                         LifetimePosition until, LifetimePosition end,
+                         SpillMode spill_mode);
+  void SplitAndSpillIntersecting(LiveRange* range, SpillMode spill_mode);
+
+  void PrintRangeRow(std::ostream& os, const TopLevelLiveRange* toplevel);
+
+  void PrintRangeOverview(std::ostream& os);
+
+  UnhandledLiveRangeQueue unhandled_live_ranges_;
+  ZoneVector<LiveRange*> active_live_ranges_;
+  ZoneVector<InactiveLiveRangeQueue> inactive_live_ranges_;
+
+  // Approximate at what position the set of ranges will change next.
+  // Used to avoid scanning for updates even if none are present.
+  LifetimePosition next_active_ranges_change_;
+  LifetimePosition next_inactive_ranges_change_;
+
+#ifdef DEBUG
+  LifetimePosition allocation_finger_;
+#endif
+};
+
+class OperandAssigner final : public ZoneObject {
+ public:
+  explicit OperandAssigner(TopTierRegisterAllocationData* data);
+  OperandAssigner(const OperandAssigner&) = delete;
+  OperandAssigner& operator=(const OperandAssigner&) = delete;
+
+  // Phase 5: final decision on spilling mode.
+  void DecideSpillingMode();
+
+  // Phase 6: assign spill splots.
+  void AssignSpillSlots();
+
+  // Phase 7: commit assignment.
+  void CommitAssignment();
+
+ private:
+  TopTierRegisterAllocationData* data() const { return data_; }
+
+  TopTierRegisterAllocationData* const data_;
+};
+
+class ReferenceMapPopulator final : public ZoneObject {
+ public:
+  explicit ReferenceMapPopulator(TopTierRegisterAllocationData* data);
+  ReferenceMapPopulator(const ReferenceMapPopulator&) = delete;
+  ReferenceMapPopulator& operator=(const ReferenceMapPopulator&) = delete;
+
+  // Phase 10: compute values for pointer maps.
+  void PopulateReferenceMaps();
+
+ private:
+  TopTierRegisterAllocationData* data() const { return data_; }
+
+  bool SafePointsAreInOrder() const;
+
+  TopTierRegisterAllocationData* const data_;
+};
+
+class LiveRangeBoundArray;
+// Insert moves of the form
+//
+//          Operand(child_(k+1)) = Operand(child_k)
+//
+// where child_k and child_(k+1) are consecutive children of a range (so
+// child_k->next() == child_(k+1)), and Operand(...) refers to the
+// assigned operand, be it a register or a slot.
+class LiveRangeConnector final : public ZoneObject {
+ public:
+  explicit LiveRangeConnector(TopTierRegisterAllocationData* data);
+  LiveRangeConnector(const LiveRangeConnector&) = delete;
+  LiveRangeConnector& operator=(const LiveRangeConnector&) = delete;
+
+  // Phase 8: reconnect split ranges with moves, when the control flow
+  // between the ranges is trivial (no branches).
+  void ConnectRanges(Zone* local_zone);
+
+  // Phase 9: insert moves to connect ranges across basic blocks, when the
+  // control flow between them cannot be trivially resolved, such as joining
+  // branches. Also determines whether to spill at the definition or later, and
+  // adds spill moves to the gaps in the schedule.
+  void ResolveControlFlow(Zone* local_zone);
+
+ private:
+  TopTierRegisterAllocationData* data() const { return data_; }
+  InstructionSequence* code() const { return data()->code(); }
+  Zone* code_zone() const { return code()->zone(); }
+
+  bool CanEagerlyResolveControlFlow(const InstructionBlock* block) const;
+
+  int ResolveControlFlow(const InstructionBlock* block,
+                         const InstructionOperand& cur_op,
+                         const InstructionBlock* pred,
+                         const InstructionOperand& pred_op);
+
+  void CommitSpillsInDeferredBlocks(TopLevelLiveRange* range,
+                                    LiveRangeBoundArray* array,
+                                    Zone* temp_zone);
+
+  TopTierRegisterAllocationData* const data_;
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_REGISTER_ALLOCATOR_H_
diff --git a/src/compiler/backend/s390/code-generator-s390.cc b/src/compiler/backend/s390/code-generator-s390.cc
new file mode 100644
index 0000000..4b51bb7
--- /dev/null
+++ b/src/compiler/backend/s390/code-generator-s390.cc
@@ -0,0 +1,5000 @@
+// Copyright 2015 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/codegen/assembler-inl.h"
+#include "src/codegen/callable.h"
+#include "src/codegen/macro-assembler.h"
+#include "src/codegen/optimized-compilation-info.h"
+#include "src/compiler/backend/code-generator-impl.h"
+#include "src/compiler/backend/code-generator.h"
+#include "src/compiler/backend/gap-resolver.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/osr.h"
+#include "src/heap/memory-chunk.h"
+#include "src/wasm/wasm-code-manager.h"
+#include "src/wasm/wasm-objects.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+#define __ tasm()->
+
+#define kScratchReg ip
+
+// Adds S390-specific methods to convert InstructionOperands.
+class S390OperandConverter final : public InstructionOperandConverter {
+ public:
+  S390OperandConverter(CodeGenerator* gen, Instruction* instr)
+      : InstructionOperandConverter(gen, instr) {}
+
+  size_t OutputCount() { return instr_->OutputCount(); }
+
+  bool Is64BitOperand(int index) {
+    return LocationOperand::cast(instr_->InputAt(index))->representation() ==
+           MachineRepresentation::kWord64;
+  }
+
+  bool Is32BitOperand(int index) {
+    return LocationOperand::cast(instr_->InputAt(index))->representation() ==
+           MachineRepresentation::kWord32;
+  }
+
+  bool CompareLogical() const {
+    switch (instr_->flags_condition()) {
+      case kUnsignedLessThan:
+      case kUnsignedGreaterThanOrEqual:
+      case kUnsignedLessThanOrEqual:
+      case kUnsignedGreaterThan:
+        return true;
+      default:
+        return false;
+    }
+    UNREACHABLE();
+  }
+
+  Operand InputImmediate(size_t index) {
+    Constant constant = ToConstant(instr_->InputAt(index));
+    switch (constant.type()) {
+      case Constant::kInt32:
+        return Operand(constant.ToInt32());
+      case Constant::kFloat32:
+        return Operand::EmbeddedNumber(constant.ToFloat32());
+      case Constant::kFloat64:
+        return Operand::EmbeddedNumber(constant.ToFloat64().value());
+      case Constant::kInt64:
+#if V8_TARGET_ARCH_S390X
+        return Operand(constant.ToInt64());
+#endif
+      case Constant::kExternalReference:
+        return Operand(constant.ToExternalReference());
+      case Constant::kDelayedStringConstant:
+        return Operand::EmbeddedStringConstant(
+            constant.ToDelayedStringConstant());
+      case Constant::kCompressedHeapObject:
+      case Constant::kHeapObject:
+      case Constant::kRpoNumber:
+        break;
+    }
+    UNREACHABLE();
+  }
+
+  MemOperand MemoryOperand(AddressingMode* mode, size_t* first_index) {
+    const size_t index = *first_index;
+    if (mode) *mode = AddressingModeField::decode(instr_->opcode());
+    switch (AddressingModeField::decode(instr_->opcode())) {
+      case kMode_None:
+        break;
+      case kMode_MR:
+        *first_index += 1;
+        return MemOperand(InputRegister(index + 0), 0);
+      case kMode_MRI:
+        *first_index += 2;
+        return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
+      case kMode_MRR:
+        *first_index += 2;
+        return MemOperand(InputRegister(index + 0), InputRegister(index + 1));
+      case kMode_MRRI:
+        *first_index += 3;
+        return MemOperand(InputRegister(index + 0), InputRegister(index + 1),
+                          InputInt32(index + 2));
+    }
+    UNREACHABLE();
+  }
+
+  MemOperand MemoryOperand(AddressingMode* mode = nullptr,
+                           size_t first_index = 0) {
+    return MemoryOperand(mode, &first_index);
+  }
+
+  MemOperand ToMemOperand(InstructionOperand* op) const {
+    DCHECK_NOT_NULL(op);
+    DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
+    return SlotToMemOperand(AllocatedOperand::cast(op)->index());
+  }
+
+  MemOperand SlotToMemOperand(int slot) const {
+    FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
+    return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
+  }
+
+  MemOperand InputStackSlot(size_t index) {
+    InstructionOperand* op = instr_->InputAt(index);
+    return SlotToMemOperand(AllocatedOperand::cast(op)->index());
+  }
+
+  MemOperand InputStackSlot32(size_t index) {
+#if V8_TARGET_ARCH_S390X && !V8_TARGET_LITTLE_ENDIAN
+    // We want to read the 32-bits directly from memory
+    MemOperand mem = InputStackSlot(index);
+    return MemOperand(mem.rb(), mem.rx(), mem.offset() + 4);
+#else
+    return InputStackSlot(index);
+#endif
+  }
+};
+
+static inline bool HasRegisterOutput(Instruction* instr, int index = 0) {
+  return instr->OutputCount() > 0 && instr->OutputAt(index)->IsRegister();
+}
+
+static inline bool HasFPRegisterInput(Instruction* instr, int index) {
+  return instr->InputAt(index)->IsFPRegister();
+}
+
+static inline bool HasRegisterInput(Instruction* instr, int index) {
+  return instr->InputAt(index)->IsRegister() ||
+         HasFPRegisterInput(instr, index);
+}
+
+static inline bool HasImmediateInput(Instruction* instr, size_t index) {
+  return instr->InputAt(index)->IsImmediate();
+}
+
+static inline bool HasFPStackSlotInput(Instruction* instr, size_t index) {
+  return instr->InputAt(index)->IsFPStackSlot();
+}
+
+static inline bool HasStackSlotInput(Instruction* instr, size_t index) {
+  return instr->InputAt(index)->IsStackSlot() ||
+         HasFPStackSlotInput(instr, index);
+}
+
+namespace {
+
+class OutOfLineRecordWrite final : public OutOfLineCode {
+ public:
+  OutOfLineRecordWrite(CodeGenerator* gen, Register object, Register offset,
+                       Register value, Register scratch0, Register scratch1,
+                       RecordWriteMode mode, StubCallMode stub_mode,
+                       UnwindingInfoWriter* unwinding_info_writer)
+      : OutOfLineCode(gen),
+        object_(object),
+        offset_(offset),
+        offset_immediate_(0),
+        value_(value),
+        scratch0_(scratch0),
+        scratch1_(scratch1),
+        mode_(mode),
+        stub_mode_(stub_mode),
+        must_save_lr_(!gen->frame_access_state()->has_frame()),
+        unwinding_info_writer_(unwinding_info_writer),
+        zone_(gen->zone()) {}
+
+  OutOfLineRecordWrite(CodeGenerator* gen, Register object, int32_t offset,
+                       Register value, Register scratch0, Register scratch1,
+                       RecordWriteMode mode, StubCallMode stub_mode,
+                       UnwindingInfoWriter* unwinding_info_writer)
+      : OutOfLineCode(gen),
+        object_(object),
+        offset_(no_reg),
+        offset_immediate_(offset),
+        value_(value),
+        scratch0_(scratch0),
+        scratch1_(scratch1),
+        mode_(mode),
+        stub_mode_(stub_mode),
+        must_save_lr_(!gen->frame_access_state()->has_frame()),
+        unwinding_info_writer_(unwinding_info_writer),
+        zone_(gen->zone()) {}
+
+  void Generate() final {
+    if (mode_ > RecordWriteMode::kValueIsPointer) {
+      __ JumpIfSmi(value_, exit());
+    }
+    if (COMPRESS_POINTERS_BOOL) {
+      __ DecompressTaggedPointer(value_, value_);
+    }
+    __ CheckPageFlag(value_, scratch0_,
+                     MemoryChunk::kPointersToHereAreInterestingMask, eq,
+                     exit());
+    if (offset_ == no_reg) {
+      __ AddP(scratch1_, object_, Operand(offset_immediate_));
+    } else {
+      DCHECK_EQ(0, offset_immediate_);
+      __ AddP(scratch1_, object_, offset_);
+    }
+    RememberedSetAction const remembered_set_action =
+        mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
+                                             : OMIT_REMEMBERED_SET;
+    SaveFPRegsMode const save_fp_mode =
+        frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
+    if (must_save_lr_) {
+      // We need to save and restore r14 if the frame was elided.
+      __ Push(r14);
+      unwinding_info_writer_->MarkLinkRegisterOnTopOfStack(__ pc_offset());
+    }
+    if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
+      __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
+    } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
+      __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
+                             save_fp_mode, wasm::WasmCode::kRecordWrite);
+    } else {
+      __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
+                             save_fp_mode);
+    }
+    if (must_save_lr_) {
+      // We need to save and restore r14 if the frame was elided.
+      __ Pop(r14);
+      unwinding_info_writer_->MarkPopLinkRegisterFromTopOfStack(__ pc_offset());
+    }
+  }
+
+ private:
+  Register const object_;
+  Register const offset_;
+  int32_t const offset_immediate_;  // Valid if offset_ == no_reg.
+  Register const value_;
+  Register const scratch0_;
+  Register const scratch1_;
+  RecordWriteMode const mode_;
+  StubCallMode stub_mode_;
+  bool must_save_lr_;
+  UnwindingInfoWriter* const unwinding_info_writer_;
+  Zone* zone_;
+};
+
+Condition FlagsConditionToCondition(FlagsCondition condition, ArchOpcode op) {
+  switch (condition) {
+    case kEqual:
+      return eq;
+    case kNotEqual:
+      return ne;
+    case kUnsignedLessThan:
+      // unsigned number never less than 0
+      if (op == kS390_LoadAndTestWord32 || op == kS390_LoadAndTestWord64)
+        return CC_NOP;
+      V8_FALLTHROUGH;
+    case kSignedLessThan:
+      return lt;
+    case kUnsignedGreaterThanOrEqual:
+      // unsigned number always greater than or equal 0
+      if (op == kS390_LoadAndTestWord32 || op == kS390_LoadAndTestWord64)
+        return CC_ALWAYS;
+      V8_FALLTHROUGH;
+    case kSignedGreaterThanOrEqual:
+      return ge;
+    case kUnsignedLessThanOrEqual:
+      // unsigned number never less than 0
+      if (op == kS390_LoadAndTestWord32 || op == kS390_LoadAndTestWord64)
+        return CC_EQ;
+      V8_FALLTHROUGH;
+    case kSignedLessThanOrEqual:
+      return le;
+    case kUnsignedGreaterThan:
+      // unsigned number always greater than or equal 0
+      if (op == kS390_LoadAndTestWord32 || op == kS390_LoadAndTestWord64)
+        return ne;
+      V8_FALLTHROUGH;
+    case kSignedGreaterThan:
+      return gt;
+    case kOverflow:
+      // Overflow checked for AddP/SubP only.
+      switch (op) {
+        case kS390_Add32:
+        case kS390_Add64:
+        case kS390_Sub32:
+        case kS390_Sub64:
+        case kS390_Abs64:
+        case kS390_Abs32:
+        case kS390_Mul32:
+          return overflow;
+        default:
+          break;
+      }
+      break;
+    case kNotOverflow:
+      switch (op) {
+        case kS390_Add32:
+        case kS390_Add64:
+        case kS390_Sub32:
+        case kS390_Sub64:
+        case kS390_Abs64:
+        case kS390_Abs32:
+        case kS390_Mul32:
+          return nooverflow;
+        default:
+          break;
+      }
+      break;
+    default:
+      break;
+  }
+  UNREACHABLE();
+}
+
+#define GET_MEMOPERAND32(ret, fi)                                       \
+  ([&](int& ret) {                                                      \
+    AddressingMode mode = AddressingModeField::decode(instr->opcode()); \
+    MemOperand mem(r0);                                                 \
+    if (mode != kMode_None) {                                           \
+      size_t first_index = (fi);                                        \
+      mem = i.MemoryOperand(&mode, &first_index);                       \
+      ret = first_index;                                                \
+    } else {                                                            \
+      mem = i.InputStackSlot32(fi);                                     \
+    }                                                                   \
+    return mem;                                                         \
+  })(ret)
+
+#define GET_MEMOPERAND(ret, fi)                                         \
+  ([&](int& ret) {                                                      \
+    AddressingMode mode = AddressingModeField::decode(instr->opcode()); \
+    MemOperand mem(r0);                                                 \
+    if (mode != kMode_None) {                                           \
+      size_t first_index = (fi);                                        \
+      mem = i.MemoryOperand(&mode, &first_index);                       \
+      ret = first_index;                                                \
+    } else {                                                            \
+      mem = i.InputStackSlot(fi);                                       \
+    }                                                                   \
+    return mem;                                                         \
+  })(ret)
+
+#define RRInstr(instr)                                \
+  [&]() {                                             \
+    DCHECK(i.OutputRegister() == i.InputRegister(0)); \
+    __ instr(i.OutputRegister(), i.InputRegister(1)); \
+    return 2;                                         \
+  }
+#define RIInstr(instr)                                 \
+  [&]() {                                              \
+    DCHECK(i.OutputRegister() == i.InputRegister(0));  \
+    __ instr(i.OutputRegister(), i.InputImmediate(1)); \
+    return 2;                                          \
+  }
+#define RMInstr(instr, GETMEM)                        \
+  [&]() {                                             \
+    DCHECK(i.OutputRegister() == i.InputRegister(0)); \
+    int ret = 2;                                      \
+    __ instr(i.OutputRegister(), GETMEM(ret, 1));     \
+    return ret;                                       \
+  }
+#define RM32Instr(instr) RMInstr(instr, GET_MEMOPERAND32)
+#define RM64Instr(instr) RMInstr(instr, GET_MEMOPERAND)
+
+#define RRRInstr(instr)                                                   \
+  [&]() {                                                                 \
+    __ instr(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1)); \
+    return 2;                                                             \
+  }
+#define RRIInstr(instr)                                                    \
+  [&]() {                                                                  \
+    __ instr(i.OutputRegister(), i.InputRegister(0), i.InputImmediate(1)); \
+    return 2;                                                              \
+  }
+#define RRMInstr(instr, GETMEM)                                       \
+  [&]() {                                                             \
+    int ret = 2;                                                      \
+    __ instr(i.OutputRegister(), i.InputRegister(0), GETMEM(ret, 1)); \
+    return ret;                                                       \
+  }
+#define RRM32Instr(instr) RRMInstr(instr, GET_MEMOPERAND32)
+#define RRM64Instr(instr) RRMInstr(instr, GET_MEMOPERAND)
+
+#define DDInstr(instr)                                            \
+  [&]() {                                                         \
+    DCHECK(i.OutputDoubleRegister() == i.InputDoubleRegister(0)); \
+    __ instr(i.OutputDoubleRegister(), i.InputDoubleRegister(1)); \
+    return 2;                                                     \
+  }
+
+#define DMInstr(instr)                                            \
+  [&]() {                                                         \
+    DCHECK(i.OutputDoubleRegister() == i.InputDoubleRegister(0)); \
+    int ret = 2;                                                  \
+    __ instr(i.OutputDoubleRegister(), GET_MEMOPERAND(ret, 1));   \
+    return ret;                                                   \
+  }
+
+#define DMTInstr(instr)                                           \
+  [&]() {                                                         \
+    DCHECK(i.OutputDoubleRegister() == i.InputDoubleRegister(0)); \
+    int ret = 2;                                                  \
+    __ instr(i.OutputDoubleRegister(), GET_MEMOPERAND(ret, 1),    \
+             kScratchDoubleReg);                                  \
+    return ret;                                                   \
+  }
+
+#define R_MInstr(instr)                                   \
+  [&]() {                                                 \
+    int ret = 2;                                          \
+    __ instr(i.OutputRegister(), GET_MEMOPERAND(ret, 0)); \
+    return ret;                                           \
+  }
+
+#define R_DInstr(instr)                                     \
+  [&]() {                                                   \
+    __ instr(i.OutputRegister(), i.InputDoubleRegister(0)); \
+    return 2;                                               \
+  }
+
+#define D_DInstr(instr)                                           \
+  [&]() {                                                         \
+    __ instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
+    return 2;                                                     \
+  }
+
+#define D_MInstr(instr)                                         \
+  [&]() {                                                       \
+    int ret = 2;                                                \
+    __ instr(i.OutputDoubleRegister(), GET_MEMOPERAND(ret, 0)); \
+    return ret;                                                 \
+  }
+
+#define D_MTInstr(instr)                                       \
+  [&]() {                                                      \
+    int ret = 2;                                               \
+    __ instr(i.OutputDoubleRegister(), GET_MEMOPERAND(ret, 0), \
+             kScratchDoubleReg);                               \
+    return ret;                                                \
+  }
+
+static int nullInstr() { UNREACHABLE(); }
+
+template <int numOfOperand, class RType, class MType, class IType>
+static inline int AssembleOp(Instruction* instr, RType r, MType m, IType i) {
+  AddressingMode mode = AddressingModeField::decode(instr->opcode());
+  if (mode != kMode_None || HasStackSlotInput(instr, numOfOperand - 1)) {
+    return m();
+  } else if (HasRegisterInput(instr, numOfOperand - 1)) {
+    return r();
+  } else if (HasImmediateInput(instr, numOfOperand - 1)) {
+    return i();
+  } else {
+    UNREACHABLE();
+  }
+}
+
+template <class _RR, class _RM, class _RI>
+static inline int AssembleBinOp(Instruction* instr, _RR _rr, _RM _rm, _RI _ri) {
+  return AssembleOp<2>(instr, _rr, _rm, _ri);
+}
+
+template <class _R, class _M, class _I>
+static inline int AssembleUnaryOp(Instruction* instr, _R _r, _M _m, _I _i) {
+  return AssembleOp<1>(instr, _r, _m, _i);
+}
+
+#define ASSEMBLE_BIN_OP(_rr, _rm, _ri) AssembleBinOp(instr, _rr, _rm, _ri)
+#define ASSEMBLE_UNARY_OP(_r, _m, _i) AssembleUnaryOp(instr, _r, _m, _i)
+
+#ifdef V8_TARGET_ARCH_S390X
+#define CHECK_AND_ZERO_EXT_OUTPUT(num)                                \
+  ([&](int index) {                                                   \
+    DCHECK(HasImmediateInput(instr, (index)));                        \
+    int doZeroExt = i.InputInt32(index);                              \
+    if (doZeroExt) __ LoadlW(i.OutputRegister(), i.OutputRegister()); \
+  })(num)
+
+#define ASSEMBLE_BIN32_OP(_rr, _rm, _ri) \
+  { CHECK_AND_ZERO_EXT_OUTPUT(AssembleBinOp(instr, _rr, _rm, _ri)); }
+#else
+#define ASSEMBLE_BIN32_OP ASSEMBLE_BIN_OP
+#define CHECK_AND_ZERO_EXT_OUTPUT(num)
+#endif
+
+}  // namespace
+
+#define ASSEMBLE_FLOAT_UNOP(asm_instr)                                \
+  do {                                                                \
+    __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
+  } while (0)
+
+#define ASSEMBLE_FLOAT_BINOP(asm_instr)                              \
+  do {                                                               \
+    __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
+                 i.InputDoubleRegister(1));                          \
+  } while (0)
+
+#define ASSEMBLE_COMPARE(cmp_instr, cmpl_instr)                         \
+  do {                                                                  \
+    AddressingMode mode = AddressingModeField::decode(instr->opcode()); \
+    if (mode != kMode_None) {                                           \
+      size_t first_index = 1;                                           \
+      MemOperand operand = i.MemoryOperand(&mode, &first_index);        \
+      if (i.CompareLogical()) {                                         \
+        __ cmpl_instr(i.InputRegister(0), operand);                     \
+      } else {                                                          \
+        __ cmp_instr(i.InputRegister(0), operand);                      \
+      }                                                                 \
+    } else if (HasRegisterInput(instr, 1)) {                            \
+      if (i.CompareLogical()) {                                         \
+        __ cmpl_instr(i.InputRegister(0), i.InputRegister(1));          \
+      } else {                                                          \
+        __ cmp_instr(i.InputRegister(0), i.InputRegister(1));           \
+      }                                                                 \
+    } else if (HasImmediateInput(instr, 1)) {                           \
+      if (i.CompareLogical()) {                                         \
+        __ cmpl_instr(i.InputRegister(0), i.InputImmediate(1));         \
+      } else {                                                          \
+        __ cmp_instr(i.InputRegister(0), i.InputImmediate(1));          \
+      }                                                                 \
+    } else {                                                            \
+      DCHECK(HasStackSlotInput(instr, 1));                              \
+      if (i.CompareLogical()) {                                         \
+        __ cmpl_instr(i.InputRegister(0), i.InputStackSlot(1));         \
+      } else {                                                          \
+        __ cmp_instr(i.InputRegister(0), i.InputStackSlot(1));          \
+      }                                                                 \
+    }                                                                   \
+  } while (0)
+
+#define ASSEMBLE_COMPARE32(cmp_instr, cmpl_instr)                       \
+  do {                                                                  \
+    AddressingMode mode = AddressingModeField::decode(instr->opcode()); \
+    if (mode != kMode_None) {                                           \
+      size_t first_index = 1;                                           \
+      MemOperand operand = i.MemoryOperand(&mode, &first_index);        \
+      if (i.CompareLogical()) {                                         \
+        __ cmpl_instr(i.InputRegister(0), operand);                     \
+      } else {                                                          \
+        __ cmp_instr(i.InputRegister(0), operand);                      \
+      }                                                                 \
+    } else if (HasRegisterInput(instr, 1)) {                            \
+      if (i.CompareLogical()) {                                         \
+        __ cmpl_instr(i.InputRegister(0), i.InputRegister(1));          \
+      } else {                                                          \
+        __ cmp_instr(i.InputRegister(0), i.InputRegister(1));           \
+      }                                                                 \
+    } else if (HasImmediateInput(instr, 1)) {                           \
+      if (i.CompareLogical()) {                                         \
+        __ cmpl_instr(i.InputRegister(0), i.InputImmediate(1));         \
+      } else {                                                          \
+        __ cmp_instr(i.InputRegister(0), i.InputImmediate(1));          \
+      }                                                                 \
+    } else {                                                            \
+      DCHECK(HasStackSlotInput(instr, 1));                              \
+      if (i.CompareLogical()) {                                         \
+        __ cmpl_instr(i.InputRegister(0), i.InputStackSlot32(1));       \
+      } else {                                                          \
+        __ cmp_instr(i.InputRegister(0), i.InputStackSlot32(1));        \
+      }                                                                 \
+    }                                                                   \
+  } while (0)
+
+#define ASSEMBLE_FLOAT_COMPARE(cmp_rr_instr, cmp_rm_instr, load_instr)     \
+  do {                                                                     \
+    AddressingMode mode = AddressingModeField::decode(instr->opcode());    \
+    if (mode != kMode_None) {                                              \
+      size_t first_index = 1;                                              \
+      MemOperand operand = i.MemoryOperand(&mode, &first_index);           \
+      __ cmp_rm_instr(i.InputDoubleRegister(0), operand);                  \
+    } else if (HasFPRegisterInput(instr, 1)) {                             \
+      __ cmp_rr_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
+    } else {                                                               \
+      USE(HasFPStackSlotInput);                                            \
+      DCHECK(HasFPStackSlotInput(instr, 1));                               \
+      MemOperand operand = i.InputStackSlot(1);                            \
+      if (operand.offset() >= 0) {                                         \
+        __ cmp_rm_instr(i.InputDoubleRegister(0), operand);                \
+      } else {                                                             \
+        __ load_instr(kScratchDoubleReg, operand);                         \
+        __ cmp_rr_instr(i.InputDoubleRegister(0), kScratchDoubleReg);      \
+      }                                                                    \
+    }                                                                      \
+  } while (0)
+
+// Divide instruction dr will implicity use register pair
+// r0 & r1 below.
+// R0:R1 = R1 / divisor - R0 remainder
+// Copy remainder to output reg
+#define ASSEMBLE_MODULO(div_instr, shift_instr) \
+  do {                                          \
+    __ LoadRR(r0, i.InputRegister(0));          \
+    __ shift_instr(r0, Operand(32));            \
+    __ div_instr(r0, i.InputRegister(1));       \
+    __ LoadlW(i.OutputRegister(), r0);          \
+  } while (0)
+
+#define ASSEMBLE_FLOAT_MODULO()                                             \
+  do {                                                                      \
+    FrameScope scope(tasm(), StackFrame::MANUAL);                           \
+    __ PrepareCallCFunction(0, 2, kScratchReg);                             \
+    __ MovToFloatParameters(i.InputDoubleRegister(0),                       \
+                            i.InputDoubleRegister(1));                      \
+    __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2); \
+    __ MovFromFloatResult(i.OutputDoubleRegister());                        \
+  } while (0)
+
+#define ASSEMBLE_IEEE754_UNOP(name)                                            \
+  do {                                                                         \
+    /* TODO(bmeurer): We should really get rid of this special instruction, */ \
+    /* and generate a CallAddress instruction instead. */                      \
+    FrameScope scope(tasm(), StackFrame::MANUAL);                              \
+    __ PrepareCallCFunction(0, 1, kScratchReg);                                \
+    __ MovToFloatParameter(i.InputDoubleRegister(0));                          \
+    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1);    \
+    /* Move the result in the double result register. */                       \
+    __ MovFromFloatResult(i.OutputDoubleRegister());                           \
+  } while (0)
+
+#define ASSEMBLE_IEEE754_BINOP(name)                                           \
+  do {                                                                         \
+    /* TODO(bmeurer): We should really get rid of this special instruction, */ \
+    /* and generate a CallAddress instruction instead. */                      \
+    FrameScope scope(tasm(), StackFrame::MANUAL);                              \
+    __ PrepareCallCFunction(0, 2, kScratchReg);                                \
+    __ MovToFloatParameters(i.InputDoubleRegister(0),                          \
+                            i.InputDoubleRegister(1));                         \
+    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2);    \
+    /* Move the result in the double result register. */                       \
+    __ MovFromFloatResult(i.OutputDoubleRegister());                           \
+  } while (0)
+
+#define ASSEMBLE_DOUBLE_MAX()                                           \
+  do {                                                                  \
+    DoubleRegister left_reg = i.InputDoubleRegister(0);                 \
+    DoubleRegister right_reg = i.InputDoubleRegister(1);                \
+    DoubleRegister result_reg = i.OutputDoubleRegister();               \
+    Label check_zero, return_left, return_right, return_nan, done;      \
+    __ cdbr(left_reg, right_reg);                                       \
+    __ bunordered(&return_nan, Label::kNear);                           \
+    __ beq(&check_zero);                                                \
+    __ bge(&return_left, Label::kNear);                                 \
+    __ b(&return_right, Label::kNear);                                  \
+                                                                        \
+    __ bind(&check_zero);                                               \
+    __ lzdr(kDoubleRegZero);                                            \
+    __ cdbr(left_reg, kDoubleRegZero);                                  \
+    /* left == right != 0. */                                           \
+    __ bne(&return_left, Label::kNear);                                 \
+    /* At this point, both left and right are either 0 or -0. */        \
+    /* N.B. The following works because +0 + -0 == +0 */                \
+    /* For max we want logical-and of sign bit: (L + R) */              \
+    __ ldr(result_reg, left_reg);                                       \
+    __ adbr(result_reg, right_reg);                                     \
+    __ b(&done, Label::kNear);                                          \
+                                                                        \
+    __ bind(&return_nan);                                               \
+    /* If left or right are NaN, adbr propagates the appropriate one.*/ \
+    __ adbr(left_reg, right_reg);                                       \
+    __ b(&return_left, Label::kNear);                                   \
+                                                                        \
+    __ bind(&return_right);                                             \
+    if (right_reg != result_reg) {                                      \
+      __ ldr(result_reg, right_reg);                                    \
+    }                                                                   \
+    __ b(&done, Label::kNear);                                          \
+                                                                        \
+    __ bind(&return_left);                                              \
+    if (left_reg != result_reg) {                                       \
+      __ ldr(result_reg, left_reg);                                     \
+    }                                                                   \
+    __ bind(&done);                                                     \
+  } while (0)
+
+#define ASSEMBLE_DOUBLE_MIN()                                           \
+  do {                                                                  \
+    DoubleRegister left_reg = i.InputDoubleRegister(0);                 \
+    DoubleRegister right_reg = i.InputDoubleRegister(1);                \
+    DoubleRegister result_reg = i.OutputDoubleRegister();               \
+    Label check_zero, return_left, return_right, return_nan, done;      \
+    __ cdbr(left_reg, right_reg);                                       \
+    __ bunordered(&return_nan, Label::kNear);                           \
+    __ beq(&check_zero);                                                \
+    __ ble(&return_left, Label::kNear);                                 \
+    __ b(&return_right, Label::kNear);                                  \
+                                                                        \
+    __ bind(&check_zero);                                               \
+    __ lzdr(kDoubleRegZero);                                            \
+    __ cdbr(left_reg, kDoubleRegZero);                                  \
+    /* left == right != 0. */                                           \
+    __ bne(&return_left, Label::kNear);                                 \
+    /* At this point, both left and right are either 0 or -0. */        \
+    /* N.B. The following works because +0 + -0 == +0 */                \
+    /* For min we want logical-or of sign bit: -(-L + -R) */            \
+    __ lcdbr(left_reg, left_reg);                                       \
+    __ ldr(result_reg, left_reg);                                       \
+    if (left_reg == right_reg) {                                        \
+      __ adbr(result_reg, right_reg);                                   \
+    } else {                                                            \
+      __ sdbr(result_reg, right_reg);                                   \
+    }                                                                   \
+    __ lcdbr(result_reg, result_reg);                                   \
+    __ b(&done, Label::kNear);                                          \
+                                                                        \
+    __ bind(&return_nan);                                               \
+    /* If left or right are NaN, adbr propagates the appropriate one.*/ \
+    __ adbr(left_reg, right_reg);                                       \
+    __ b(&return_left, Label::kNear);                                   \
+                                                                        \
+    __ bind(&return_right);                                             \
+    if (right_reg != result_reg) {                                      \
+      __ ldr(result_reg, right_reg);                                    \
+    }                                                                   \
+    __ b(&done, Label::kNear);                                          \
+                                                                        \
+    __ bind(&return_left);                                              \
+    if (left_reg != result_reg) {                                       \
+      __ ldr(result_reg, left_reg);                                     \
+    }                                                                   \
+    __ bind(&done);                                                     \
+  } while (0)
+
+#define ASSEMBLE_FLOAT_MAX()                                            \
+  do {                                                                  \
+    DoubleRegister left_reg = i.InputDoubleRegister(0);                 \
+    DoubleRegister right_reg = i.InputDoubleRegister(1);                \
+    DoubleRegister result_reg = i.OutputDoubleRegister();               \
+    Label check_zero, return_left, return_right, return_nan, done;      \
+    __ cebr(left_reg, right_reg);                                       \
+    __ bunordered(&return_nan, Label::kNear);                           \
+    __ beq(&check_zero);                                                \
+    __ bge(&return_left, Label::kNear);                                 \
+    __ b(&return_right, Label::kNear);                                  \
+                                                                        \
+    __ bind(&check_zero);                                               \
+    __ lzdr(kDoubleRegZero);                                            \
+    __ cebr(left_reg, kDoubleRegZero);                                  \
+    /* left == right != 0. */                                           \
+    __ bne(&return_left, Label::kNear);                                 \
+    /* At this point, both left and right are either 0 or -0. */        \
+    /* N.B. The following works because +0 + -0 == +0 */                \
+    /* For max we want logical-and of sign bit: (L + R) */              \
+    __ ldr(result_reg, left_reg);                                       \
+    __ aebr(result_reg, right_reg);                                     \
+    __ b(&done, Label::kNear);                                          \
+                                                                        \
+    __ bind(&return_nan);                                               \
+    /* If left or right are NaN, aebr propagates the appropriate one.*/ \
+    __ aebr(left_reg, right_reg);                                       \
+    __ b(&return_left, Label::kNear);                                   \
+                                                                        \
+    __ bind(&return_right);                                             \
+    if (right_reg != result_reg) {                                      \
+      __ ldr(result_reg, right_reg);                                    \
+    }                                                                   \
+    __ b(&done, Label::kNear);                                          \
+                                                                        \
+    __ bind(&return_left);                                              \
+    if (left_reg != result_reg) {                                       \
+      __ ldr(result_reg, left_reg);                                     \
+    }                                                                   \
+    __ bind(&done);                                                     \
+  } while (0)
+
+#define ASSEMBLE_FLOAT_MIN()                                            \
+  do {                                                                  \
+    DoubleRegister left_reg = i.InputDoubleRegister(0);                 \
+    DoubleRegister right_reg = i.InputDoubleRegister(1);                \
+    DoubleRegister result_reg = i.OutputDoubleRegister();               \
+    Label check_zero, return_left, return_right, return_nan, done;      \
+    __ cebr(left_reg, right_reg);                                       \
+    __ bunordered(&return_nan, Label::kNear);                           \
+    __ beq(&check_zero);                                                \
+    __ ble(&return_left, Label::kNear);                                 \
+    __ b(&return_right, Label::kNear);                                  \
+                                                                        \
+    __ bind(&check_zero);                                               \
+    __ lzdr(kDoubleRegZero);                                            \
+    __ cebr(left_reg, kDoubleRegZero);                                  \
+    /* left == right != 0. */                                           \
+    __ bne(&return_left, Label::kNear);                                 \
+    /* At this point, both left and right are either 0 or -0. */        \
+    /* N.B. The following works because +0 + -0 == +0 */                \
+    /* For min we want logical-or of sign bit: -(-L + -R) */            \
+    __ lcebr(left_reg, left_reg);                                       \
+    __ ldr(result_reg, left_reg);                                       \
+    if (left_reg == right_reg) {                                        \
+      __ aebr(result_reg, right_reg);                                   \
+    } else {                                                            \
+      __ sebr(result_reg, right_reg);                                   \
+    }                                                                   \
+    __ lcebr(result_reg, result_reg);                                   \
+    __ b(&done, Label::kNear);                                          \
+                                                                        \
+    __ bind(&return_nan);                                               \
+    /* If left or right are NaN, aebr propagates the appropriate one.*/ \
+    __ aebr(left_reg, right_reg);                                       \
+    __ b(&return_left, Label::kNear);                                   \
+                                                                        \
+    __ bind(&return_right);                                             \
+    if (right_reg != result_reg) {                                      \
+      __ ldr(result_reg, right_reg);                                    \
+    }                                                                   \
+    __ b(&done, Label::kNear);                                          \
+                                                                        \
+    __ bind(&return_left);                                              \
+    if (left_reg != result_reg) {                                       \
+      __ ldr(result_reg, left_reg);                                     \
+    }                                                                   \
+    __ bind(&done);                                                     \
+  } while (0)
+//
+// Only MRI mode for these instructions available
+#define ASSEMBLE_LOAD_FLOAT(asm_instr)                \
+  do {                                                \
+    DoubleRegister result = i.OutputDoubleRegister(); \
+    AddressingMode mode = kMode_None;                 \
+    MemOperand operand = i.MemoryOperand(&mode);      \
+    __ asm_instr(result, operand);                    \
+  } while (0)
+
+#define ASSEMBLE_LOAD_INTEGER(asm_instr)         \
+  do {                                           \
+    Register result = i.OutputRegister();        \
+    AddressingMode mode = kMode_None;            \
+    MemOperand operand = i.MemoryOperand(&mode); \
+    __ asm_instr(result, operand);               \
+  } while (0)
+
+#define ASSEMBLE_LOADANDTEST64(asm_instr_rr, asm_instr_rm)              \
+  {                                                                     \
+    AddressingMode mode = AddressingModeField::decode(instr->opcode()); \
+    Register dst = HasRegisterOutput(instr) ? i.OutputRegister() : r0;  \
+    if (mode != kMode_None) {                                           \
+      size_t first_index = 0;                                           \
+      MemOperand operand = i.MemoryOperand(&mode, &first_index);        \
+      __ asm_instr_rm(dst, operand);                                    \
+    } else if (HasRegisterInput(instr, 0)) {                            \
+      __ asm_instr_rr(dst, i.InputRegister(0));                         \
+    } else {                                                            \
+      DCHECK(HasStackSlotInput(instr, 0));                              \
+      __ asm_instr_rm(dst, i.InputStackSlot(0));                        \
+    }                                                                   \
+  }
+
+#define ASSEMBLE_LOADANDTEST32(asm_instr_rr, asm_instr_rm)              \
+  {                                                                     \
+    AddressingMode mode = AddressingModeField::decode(instr->opcode()); \
+    Register dst = HasRegisterOutput(instr) ? i.OutputRegister() : r0;  \
+    if (mode != kMode_None) {                                           \
+      size_t first_index = 0;                                           \
+      MemOperand operand = i.MemoryOperand(&mode, &first_index);        \
+      __ asm_instr_rm(dst, operand);                                    \
+    } else if (HasRegisterInput(instr, 0)) {                            \
+      __ asm_instr_rr(dst, i.InputRegister(0));                         \
+    } else {                                                            \
+      DCHECK(HasStackSlotInput(instr, 0));                              \
+      __ asm_instr_rm(dst, i.InputStackSlot32(0));                      \
+    }                                                                   \
+  }
+
+#define ASSEMBLE_STORE_FLOAT32()                         \
+  do {                                                   \
+    size_t index = 0;                                    \
+    AddressingMode mode = kMode_None;                    \
+    MemOperand operand = i.MemoryOperand(&mode, &index); \
+    DoubleRegister value = i.InputDoubleRegister(index); \
+    __ StoreFloat32(value, operand);                     \
+  } while (0)
+
+#define ASSEMBLE_STORE_DOUBLE()                          \
+  do {                                                   \
+    size_t index = 0;                                    \
+    AddressingMode mode = kMode_None;                    \
+    MemOperand operand = i.MemoryOperand(&mode, &index); \
+    DoubleRegister value = i.InputDoubleRegister(index); \
+    __ StoreDouble(value, operand);                      \
+  } while (0)
+
+#define ASSEMBLE_STORE_INTEGER(asm_instr)                \
+  do {                                                   \
+    size_t index = 0;                                    \
+    AddressingMode mode = kMode_None;                    \
+    MemOperand operand = i.MemoryOperand(&mode, &index); \
+    Register value = i.InputRegister(index);             \
+    __ asm_instr(value, operand);                        \
+  } while (0)
+
+#define ATOMIC_COMP_EXCHANGE(start, end, shift_amount, offset)              \
+  {                                                                         \
+    __ LoadlW(temp0, MemOperand(addr, offset));                             \
+    __ llgfr(temp1, temp0);                                                 \
+    __ RotateInsertSelectBits(temp0, old_val, Operand(start), Operand(end), \
+                              Operand(shift_amount), false);                \
+    __ RotateInsertSelectBits(temp1, new_val, Operand(start), Operand(end), \
+                              Operand(shift_amount), false);                \
+    __ CmpAndSwap(temp0, temp1, MemOperand(addr, offset));                  \
+    __ RotateInsertSelectBits(output, temp0, Operand(start + shift_amount), \
+                              Operand(end + shift_amount),                  \
+                              Operand(64 - shift_amount), true);            \
+  }
+
+#ifdef V8_TARGET_BIG_ENDIAN
+#define ATOMIC_COMP_EXCHANGE_BYTE(i)                             \
+  {                                                              \
+    constexpr int idx = (i);                                     \
+    static_assert(idx <= 3 && idx >= 0, "idx is out of range!"); \
+    constexpr int start = 32 + 8 * idx;                          \
+    constexpr int end = start + 7;                               \
+    constexpr int shift_amount = (3 - idx) * 8;                  \
+    ATOMIC_COMP_EXCHANGE(start, end, shift_amount, -idx);        \
+  }
+#define ATOMIC_COMP_EXCHANGE_HALFWORD(i)                         \
+  {                                                              \
+    constexpr int idx = (i);                                     \
+    static_assert(idx <= 1 && idx >= 0, "idx is out of range!"); \
+    constexpr int start = 32 + 16 * idx;                         \
+    constexpr int end = start + 15;                              \
+    constexpr int shift_amount = (1 - idx) * 16;                 \
+    ATOMIC_COMP_EXCHANGE(start, end, shift_amount, -idx * 2);    \
+  }
+#else
+#define ATOMIC_COMP_EXCHANGE_BYTE(i)                             \
+  {                                                              \
+    constexpr int idx = (i);                                     \
+    static_assert(idx <= 3 && idx >= 0, "idx is out of range!"); \
+    constexpr int start = 32 + 8 * (3 - idx);                    \
+    constexpr int end = start + 7;                               \
+    constexpr int shift_amount = idx * 8;                        \
+    ATOMIC_COMP_EXCHANGE(start, end, shift_amount, -idx);        \
+  }
+#define ATOMIC_COMP_EXCHANGE_HALFWORD(i)                         \
+  {                                                              \
+    constexpr int idx = (i);                                     \
+    static_assert(idx <= 1 && idx >= 0, "idx is out of range!"); \
+    constexpr int start = 32 + 16 * (1 - idx);                   \
+    constexpr int end = start + 15;                              \
+    constexpr int shift_amount = idx * 16;                       \
+    ATOMIC_COMP_EXCHANGE(start, end, shift_amount, -idx * 2);    \
+  }
+#endif
+
+#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_BYTE(load_and_ext) \
+  do {                                                      \
+    Register old_val = i.InputRegister(0);                  \
+    Register new_val = i.InputRegister(1);                  \
+    Register output = i.OutputRegister();                   \
+    Register addr = kScratchReg;                            \
+    Register temp0 = r0;                                    \
+    Register temp1 = r1;                                    \
+    size_t index = 2;                                       \
+    AddressingMode mode = kMode_None;                       \
+    MemOperand op = i.MemoryOperand(&mode, &index);         \
+    Label three, two, one, done;                            \
+    __ lay(addr, op);                                       \
+    __ tmll(addr, Operand(3));                              \
+    __ b(Condition(1), &three);                             \
+    __ b(Condition(2), &two);                               \
+    __ b(Condition(4), &one);                               \
+    /* ending with 0b00 */                                  \
+    ATOMIC_COMP_EXCHANGE_BYTE(0);                           \
+    __ b(&done);                                            \
+    /* ending with 0b01 */                                  \
+    __ bind(&one);                                          \
+    ATOMIC_COMP_EXCHANGE_BYTE(1);                           \
+    __ b(&done);                                            \
+    /* ending with 0b10 */                                  \
+    __ bind(&two);                                          \
+    ATOMIC_COMP_EXCHANGE_BYTE(2);                           \
+    __ b(&done);                                            \
+    /* ending with 0b11 */                                  \
+    __ bind(&three);                                        \
+    ATOMIC_COMP_EXCHANGE_BYTE(3);                           \
+    __ bind(&done);                                         \
+    __ load_and_ext(output, output);                        \
+  } while (false)
+
+#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_HALFWORD(load_and_ext) \
+  do {                                                          \
+    Register old_val = i.InputRegister(0);                      \
+    Register new_val = i.InputRegister(1);                      \
+    Register output = i.OutputRegister();                       \
+    Register addr = kScratchReg;                                \
+    Register temp0 = r0;                                        \
+    Register temp1 = r1;                                        \
+    size_t index = 2;                                           \
+    AddressingMode mode = kMode_None;                           \
+    MemOperand op = i.MemoryOperand(&mode, &index);             \
+    Label two, done;                                            \
+    __ lay(addr, op);                                           \
+    __ tmll(addr, Operand(3));                                  \
+    __ b(Condition(2), &two);                                   \
+    ATOMIC_COMP_EXCHANGE_HALFWORD(0);                           \
+    __ b(&done);                                                \
+    __ bind(&two);                                              \
+    ATOMIC_COMP_EXCHANGE_HALFWORD(1);                           \
+    __ bind(&done);                                             \
+    __ load_and_ext(output, output);                            \
+  } while (false)
+
+#define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_WORD()       \
+  do {                                                \
+    Register new_val = i.InputRegister(1);            \
+    Register output = i.OutputRegister();             \
+    Register addr = kScratchReg;                      \
+    size_t index = 2;                                 \
+    AddressingMode mode = kMode_None;                 \
+    MemOperand op = i.MemoryOperand(&mode, &index);   \
+    __ lay(addr, op);                                 \
+    __ CmpAndSwap(output, new_val, MemOperand(addr)); \
+    __ LoadlW(output, output);                        \
+  } while (false)
+
+#define ASSEMBLE_ATOMIC_BINOP_WORD(load_and_op)      \
+  do {                                               \
+    Register value = i.InputRegister(2);             \
+    Register result = i.OutputRegister(0);           \
+    Register addr = r1;                              \
+    AddressingMode mode = kMode_None;                \
+    MemOperand op = i.MemoryOperand(&mode);          \
+    __ lay(addr, op);                                \
+    __ load_and_op(result, value, MemOperand(addr)); \
+    __ LoadlW(result, result);                       \
+  } while (false)
+
+#define ASSEMBLE_ATOMIC_BINOP_WORD64(load_and_op)    \
+  do {                                               \
+    Register value = i.InputRegister(2);             \
+    Register result = i.OutputRegister(0);           \
+    Register addr = r1;                              \
+    AddressingMode mode = kMode_None;                \
+    MemOperand op = i.MemoryOperand(&mode);          \
+    __ lay(addr, op);                                \
+    __ load_and_op(result, value, MemOperand(addr)); \
+  } while (false)
+
+#define ATOMIC_BIN_OP(bin_inst, offset, shift_amount, start, end)           \
+  do {                                                                      \
+    Label do_cs;                                                            \
+    __ LoadlW(prev, MemOperand(addr, offset));                              \
+    __ bind(&do_cs);                                                        \
+    __ RotateInsertSelectBits(temp, value, Operand(start), Operand(end),    \
+                              Operand(static_cast<intptr_t>(shift_amount)), \
+                              true);                                        \
+    __ bin_inst(new_val, prev, temp);                                       \
+    __ lr(temp, prev);                                                      \
+    __ RotateInsertSelectBits(temp, new_val, Operand(start), Operand(end),  \
+                              Operand::Zero(), false);                      \
+    __ CmpAndSwap(prev, temp, MemOperand(addr, offset));                    \
+    __ bne(&do_cs, Label::kNear);                                           \
+  } while (false)
+
+#ifdef V8_TARGET_BIG_ENDIAN
+#define ATOMIC_BIN_OP_HALFWORD(bin_inst, index, extract_result) \
+  {                                                             \
+    constexpr int offset = -(2 * index);                        \
+    constexpr int shift_amount = 16 - (index * 16);             \
+    constexpr int start = 48 - shift_amount;                    \
+    constexpr int end = start + 15;                             \
+    ATOMIC_BIN_OP(bin_inst, offset, shift_amount, start, end);  \
+    extract_result();                                           \
+  }
+#define ATOMIC_BIN_OP_BYTE(bin_inst, index, extract_result)    \
+  {                                                            \
+    constexpr int offset = -(index);                           \
+    constexpr int shift_amount = 24 - (index * 8);             \
+    constexpr int start = 56 - shift_amount;                   \
+    constexpr int end = start + 7;                             \
+    ATOMIC_BIN_OP(bin_inst, offset, shift_amount, start, end); \
+    extract_result();                                          \
+  }
+#else
+#define ATOMIC_BIN_OP_HALFWORD(bin_inst, index, extract_result) \
+  {                                                             \
+    constexpr int offset = -(2 * index);                        \
+    constexpr int shift_amount = index * 16;                    \
+    constexpr int start = 48 - shift_amount;                    \
+    constexpr int end = start + 15;                             \
+    ATOMIC_BIN_OP(bin_inst, offset, shift_amount, start, end);  \
+    extract_result();                                           \
+  }
+#define ATOMIC_BIN_OP_BYTE(bin_inst, index, extract_result)    \
+  {                                                            \
+    constexpr int offset = -(index);                           \
+    constexpr int shift_amount = index * 8;                    \
+    constexpr int start = 56 - shift_amount;                   \
+    constexpr int end = start + 7;                             \
+    ATOMIC_BIN_OP(bin_inst, offset, shift_amount, start, end); \
+    extract_result();                                          \
+  }
+#endif  // V8_TARGET_BIG_ENDIAN
+
+#define ASSEMBLE_ATOMIC_BINOP_HALFWORD(bin_inst, extract_result) \
+  do {                                                           \
+    Register value = i.InputRegister(2);                         \
+    Register result = i.OutputRegister(0);                       \
+    Register prev = i.TempRegister(0);                           \
+    Register new_val = r0;                                       \
+    Register addr = r1;                                          \
+    Register temp = kScratchReg;                                 \
+    AddressingMode mode = kMode_None;                            \
+    MemOperand op = i.MemoryOperand(&mode);                      \
+    Label two, done;                                             \
+    __ lay(addr, op);                                            \
+    __ tmll(addr, Operand(3));                                   \
+    __ b(Condition(2), &two);                                    \
+    /* word boundary */                                          \
+    ATOMIC_BIN_OP_HALFWORD(bin_inst, 0, extract_result);         \
+    __ b(&done);                                                 \
+    __ bind(&two);                                               \
+    /* halfword boundary */                                      \
+    ATOMIC_BIN_OP_HALFWORD(bin_inst, 1, extract_result);         \
+    __ bind(&done);                                              \
+  } while (false)
+
+#define ASSEMBLE_ATOMIC_BINOP_BYTE(bin_inst, extract_result) \
+  do {                                                       \
+    Register value = i.InputRegister(2);                     \
+    Register result = i.OutputRegister(0);                   \
+    Register addr = i.TempRegister(0);                       \
+    Register prev = r0;                                      \
+    Register new_val = r1;                                   \
+    Register temp = kScratchReg;                             \
+    AddressingMode mode = kMode_None;                        \
+    MemOperand op = i.MemoryOperand(&mode);                  \
+    Label done, one, two, three;                             \
+    __ lay(addr, op);                                        \
+    __ tmll(addr, Operand(3));                               \
+    __ b(Condition(1), &three);                              \
+    __ b(Condition(2), &two);                                \
+    __ b(Condition(4), &one);                                \
+    /* ending with 0b00 (word boundary) */                   \
+    ATOMIC_BIN_OP_BYTE(bin_inst, 0, extract_result);         \
+    __ b(&done);                                             \
+    /* ending with 0b01 */                                   \
+    __ bind(&one);                                           \
+    ATOMIC_BIN_OP_BYTE(bin_inst, 1, extract_result);         \
+    __ b(&done);                                             \
+    /* ending with 0b10 (hw boundary) */                     \
+    __ bind(&two);                                           \
+    ATOMIC_BIN_OP_BYTE(bin_inst, 2, extract_result);         \
+    __ b(&done);                                             \
+    /* ending with 0b11 */                                   \
+    __ bind(&three);                                         \
+    ATOMIC_BIN_OP_BYTE(bin_inst, 3, extract_result);         \
+    __ bind(&done);                                          \
+  } while (false)
+
+#define ASSEMBLE_ATOMIC64_COMP_EXCHANGE_WORD64()        \
+  do {                                                  \
+    Register new_val = i.InputRegister(1);              \
+    Register output = i.OutputRegister();               \
+    Register addr = kScratchReg;                        \
+    size_t index = 2;                                   \
+    AddressingMode mode = kMode_None;                   \
+    MemOperand op = i.MemoryOperand(&mode, &index);     \
+    __ lay(addr, op);                                   \
+    __ CmpAndSwap64(output, new_val, MemOperand(addr)); \
+  } while (false)
+
+void CodeGenerator::AssembleDeconstructFrame() {
+  __ LeaveFrame(StackFrame::MANUAL);
+  unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
+}
+
+void CodeGenerator::AssemblePrepareTailCall() {
+  if (frame_access_state()->has_frame()) {
+    __ RestoreFrameStateForTailCall();
+  }
+  frame_access_state()->SetFrameAccessToSP();
+}
+
+void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
+                                                     Register scratch1,
+                                                     Register scratch2,
+                                                     Register scratch3) {
+  DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
+  Label done;
+
+  // Check if current frame is an arguments adaptor frame.
+  __ LoadP(scratch1, MemOperand(fp, StandardFrameConstants::kContextOffset));
+  __ CmpP(scratch1,
+          Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
+  __ bne(&done);
+
+  // Load arguments count from current arguments adaptor frame (note, it
+  // does not include receiver).
+  Register caller_args_count_reg = scratch1;
+  __ LoadP(caller_args_count_reg,
+           MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset));
+  __ SmiUntag(caller_args_count_reg);
+
+  __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3);
+  __ bind(&done);
+}
+
+namespace {
+
+void FlushPendingPushRegisters(TurboAssembler* tasm,
+                               FrameAccessState* frame_access_state,
+                               ZoneVector<Register>* pending_pushes) {
+  switch (pending_pushes->size()) {
+    case 0:
+      break;
+    case 1:
+      tasm->Push((*pending_pushes)[0]);
+      break;
+    case 2:
+      tasm->Push((*pending_pushes)[0], (*pending_pushes)[1]);
+      break;
+    case 3:
+      tasm->Push((*pending_pushes)[0], (*pending_pushes)[1],
+                 (*pending_pushes)[2]);
+      break;
+    default:
+      UNREACHABLE();
+  }
+  frame_access_state->IncreaseSPDelta(pending_pushes->size());
+  pending_pushes->clear();
+}
+
+void AdjustStackPointerForTailCall(
+    TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp,
+    ZoneVector<Register>* pending_pushes = nullptr,
+    bool allow_shrinkage = true) {
+  int current_sp_offset = state->GetSPToFPSlotCount() +
+                          StandardFrameConstants::kFixedSlotCountAboveFp;
+  int stack_slot_delta = new_slot_above_sp - current_sp_offset;
+  if (stack_slot_delta > 0) {
+    if (pending_pushes != nullptr) {
+      FlushPendingPushRegisters(tasm, state, pending_pushes);
+    }
+    tasm->AddP(sp, sp, Operand(-stack_slot_delta * kSystemPointerSize));
+    state->IncreaseSPDelta(stack_slot_delta);
+  } else if (allow_shrinkage && stack_slot_delta < 0) {
+    if (pending_pushes != nullptr) {
+      FlushPendingPushRegisters(tasm, state, pending_pushes);
+    }
+    tasm->AddP(sp, sp, Operand(-stack_slot_delta * kSystemPointerSize));
+    state->IncreaseSPDelta(stack_slot_delta);
+  }
+}
+
+void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, Instruction* instr,
+                                   S390OperandConverter const& i) {
+  const MemoryAccessMode access_mode =
+      static_cast<MemoryAccessMode>(MiscField::decode(instr->opcode()));
+  if (access_mode == kMemoryAccessPoisoned) {
+    Register value = i.OutputRegister();
+    codegen->tasm()->AndP(value, kSpeculationPoisonRegister);
+  }
+}
+
+}  // namespace
+
+void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
+                                              int first_unused_stack_slot) {
+  ZoneVector<MoveOperands*> pushes(zone());
+  GetPushCompatibleMoves(instr, kRegisterPush, &pushes);
+
+  if (!pushes.empty() &&
+      (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
+       first_unused_stack_slot)) {
+    S390OperandConverter g(this, instr);
+    ZoneVector<Register> pending_pushes(zone());
+    for (auto move : pushes) {
+      LocationOperand destination_location(
+          LocationOperand::cast(move->destination()));
+      InstructionOperand source(move->source());
+      AdjustStackPointerForTailCall(
+          tasm(), frame_access_state(),
+          destination_location.index() - pending_pushes.size(),
+          &pending_pushes);
+      // Pushes of non-register data types are not supported.
+      DCHECK(source.IsRegister());
+      LocationOperand source_location(LocationOperand::cast(source));
+      pending_pushes.push_back(source_location.GetRegister());
+      // TODO(arm): We can push more than 3 registers at once. Add support in
+      // the macro-assembler for pushing a list of registers.
+      if (pending_pushes.size() == 3) {
+        FlushPendingPushRegisters(tasm(), frame_access_state(),
+                                  &pending_pushes);
+      }
+      move->Eliminate();
+    }
+    FlushPendingPushRegisters(tasm(), frame_access_state(), &pending_pushes);
+  }
+  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+                                first_unused_stack_slot, nullptr, false);
+}
+
+void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
+                                             int first_unused_stack_slot) {
+  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
+                                first_unused_stack_slot);
+}
+
+// Check that {kJavaScriptCallCodeStartRegister} is correct.
+void CodeGenerator::AssembleCodeStartRegisterCheck() {
+  Register scratch = r1;
+  __ ComputeCodeStartAddress(scratch);
+  __ CmpP(scratch, kJavaScriptCallCodeStartRegister);
+  __ Assert(eq, AbortReason::kWrongFunctionCodeStart);
+}
+
+// Check if the code object is marked for deoptimization. If it is, then it
+// jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
+// to:
+//    1. read from memory the word that contains that bit, which can be found in
+//       the flags in the referenced {CodeDataContainer} object;
+//    2. test kMarkedForDeoptimizationBit in those flags; and
+//    3. if it is not zero then it jumps to the builtin.
+void CodeGenerator::BailoutIfDeoptimized() {
+  if (FLAG_debug_code) {
+    // Check that {kJavaScriptCallCodeStartRegister} is correct.
+    __ ComputeCodeStartAddress(ip);
+    __ CmpP(ip, kJavaScriptCallCodeStartRegister);
+    __ Assert(eq, AbortReason::kWrongFunctionCodeStart);
+  }
+
+  int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
+  __ LoadTaggedPointerField(
+      ip, MemOperand(kJavaScriptCallCodeStartRegister, offset), r0);
+  __ LoadW(ip,
+           FieldMemOperand(ip, CodeDataContainer::kKindSpecificFlagsOffset));
+  __ TestBit(ip, Code::kMarkedForDeoptimizationBit);
+  __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
+          RelocInfo::CODE_TARGET, ne);
+}
+
+void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
+  Register scratch = r1;
+
+  __ ComputeCodeStartAddress(scratch);
+
+  // Calculate a mask which has all bits set in the normal case, but has all
+  // bits cleared if we are speculatively executing the wrong PC.
+  __ LoadImmP(kSpeculationPoisonRegister, Operand::Zero());
+  __ LoadImmP(r0, Operand(-1));
+  __ CmpP(kJavaScriptCallCodeStartRegister, scratch);
+  __ LoadOnConditionP(eq, kSpeculationPoisonRegister, r0);
+}
+
+void CodeGenerator::AssembleRegisterArgumentPoisoning() {
+  __ AndP(kJSFunctionRegister, kJSFunctionRegister, kSpeculationPoisonRegister);
+  __ AndP(kContextRegister, kContextRegister, kSpeculationPoisonRegister);
+  __ AndP(sp, sp, kSpeculationPoisonRegister);
+}
+
+// Assembles an instruction after register allocation, producing machine code.
+CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
+    Instruction* instr) {
+  S390OperandConverter i(this, instr);
+  ArchOpcode opcode = ArchOpcodeField::decode(instr->opcode());
+
+  switch (opcode) {
+    case kArchComment:
+#ifdef V8_TARGET_ARCH_S390X
+      __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
+#else
+      __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
+#endif
+      break;
+    case kArchCallCodeObject: {
+      if (HasRegisterInput(instr, 0)) {
+        Register reg = i.InputRegister(0);
+        DCHECK_IMPLIES(
+            instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+            reg == kJavaScriptCallCodeStartRegister);
+        __ CallCodeObject(reg);
+      } else {
+        __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
+      }
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchCallBuiltinPointer: {
+      DCHECK(!instr->InputAt(0)->IsImmediate());
+      Register builtin_index = i.InputRegister(0);
+      __ CallBuiltinByIndex(builtin_index);
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchCallWasmFunction: {
+      // We must not share code targets for calls to builtins for wasm code, as
+      // they might need to be patched individually.
+      if (instr->InputAt(0)->IsImmediate()) {
+        Constant constant = i.ToConstant(instr->InputAt(0));
+#ifdef V8_TARGET_ARCH_S390X
+        Address wasm_code = static_cast<Address>(constant.ToInt64());
+#else
+        Address wasm_code = static_cast<Address>(constant.ToInt32());
+#endif
+        __ Call(wasm_code, constant.rmode());
+      } else {
+        __ Call(i.InputRegister(0));
+      }
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchTailCallCodeObjectFromJSFunction:
+    case kArchTailCallCodeObject: {
+      if (opcode == kArchTailCallCodeObjectFromJSFunction) {
+        AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
+                                         i.TempRegister(0), i.TempRegister(1),
+                                         i.TempRegister(2));
+      }
+      if (HasRegisterInput(instr, 0)) {
+        Register reg = i.InputRegister(0);
+        DCHECK_IMPLIES(
+            instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+            reg == kJavaScriptCallCodeStartRegister);
+        __ JumpCodeObject(reg);
+      } else {
+        // We cannot use the constant pool to load the target since
+        // we've already restored the caller's frame.
+        ConstantPoolUnavailableScope constant_pool_unavailable(tasm());
+        __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
+      }
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchTailCallWasm: {
+      // We must not share code targets for calls to builtins for wasm code, as
+      // they might need to be patched individually.
+      if (instr->InputAt(0)->IsImmediate()) {
+        Constant constant = i.ToConstant(instr->InputAt(0));
+#ifdef V8_TARGET_ARCH_S390X
+        Address wasm_code = static_cast<Address>(constant.ToInt64());
+#else
+        Address wasm_code = static_cast<Address>(constant.ToInt32());
+#endif
+        __ Jump(wasm_code, constant.rmode());
+      } else {
+        __ Jump(i.InputRegister(0));
+      }
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchTailCallAddress: {
+      CHECK(!instr->InputAt(0)->IsImmediate());
+      Register reg = i.InputRegister(0);
+      DCHECK_IMPLIES(
+          instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+          reg == kJavaScriptCallCodeStartRegister);
+      __ Jump(reg);
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchCallJSFunction: {
+      Register func = i.InputRegister(0);
+      if (FLAG_debug_code) {
+        // Check the function's context matches the context argument.
+        __ LoadTaggedPointerField(
+            kScratchReg, FieldMemOperand(func, JSFunction::kContextOffset));
+        __ CmpP(cp, kScratchReg);
+        __ Assert(eq, AbortReason::kWrongFunctionContext);
+      }
+      static_assert(kJavaScriptCallCodeStartRegister == r4, "ABI mismatch");
+      __ LoadTaggedPointerField(r4,
+                                FieldMemOperand(func, JSFunction::kCodeOffset));
+      __ CallCodeObject(r4);
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchPrepareCallCFunction: {
+      int const num_parameters = MiscField::decode(instr->opcode());
+      __ PrepareCallCFunction(num_parameters, kScratchReg);
+      // Frame alignment requires using FP-relative frame addressing.
+      frame_access_state()->SetFrameAccessToFP();
+      break;
+    }
+    case kArchSaveCallerRegisters: {
+      fp_mode_ =
+          static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
+      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+      // kReturnRegister0 should have been saved before entering the stub.
+      int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
+      DCHECK(IsAligned(bytes, kSystemPointerSize));
+      DCHECK_EQ(0, frame_access_state()->sp_delta());
+      frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+      DCHECK(!caller_registers_saved_);
+      caller_registers_saved_ = true;
+      break;
+    }
+    case kArchRestoreCallerRegisters: {
+      DCHECK(fp_mode_ ==
+             static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
+      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+      // Don't overwrite the returned value.
+      int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
+      frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
+      DCHECK_EQ(0, frame_access_state()->sp_delta());
+      DCHECK(caller_registers_saved_);
+      caller_registers_saved_ = false;
+      break;
+    }
+    case kArchPrepareTailCall:
+      AssemblePrepareTailCall();
+      break;
+    case kArchCallCFunction: {
+      int const num_parameters = MiscField::decode(instr->opcode());
+      Label return_location;
+      // Put the return address in a stack slot.
+      if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
+        // Put the return address in a stack slot.
+        __ larl(r0, &return_location);
+        __ StoreP(r0, MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset));
+      }
+      if (instr->InputAt(0)->IsImmediate()) {
+        ExternalReference ref = i.InputExternalReference(0);
+        __ CallCFunction(ref, num_parameters);
+      } else {
+        Register func = i.InputRegister(0);
+        __ CallCFunction(func, num_parameters);
+      }
+      __ bind(&return_location);
+      if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
+        RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
+      }
+      frame_access_state()->SetFrameAccessToDefault();
+      // Ideally, we should decrement SP delta to match the change of stack
+      // pointer in CallCFunction. However, for certain architectures (e.g.
+      // ARM), there may be more strict alignment requirement, causing old SP
+      // to be saved on the stack. In those cases, we can not calculate the SP
+      // delta statically.
+      frame_access_state()->ClearSPDelta();
+      if (caller_registers_saved_) {
+        // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
+        // Here, we assume the sequence to be:
+        //   kArchSaveCallerRegisters;
+        //   kArchCallCFunction;
+        //   kArchRestoreCallerRegisters;
+        int bytes =
+            __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
+        frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+      }
+      break;
+    }
+    case kArchJmp:
+      AssembleArchJump(i.InputRpo(0));
+      break;
+    case kArchBinarySearchSwitch:
+      AssembleArchBinarySearchSwitch(instr);
+      break;
+    case kArchTableSwitch:
+      AssembleArchTableSwitch(instr);
+      break;
+    case kArchAbortCSAAssert:
+      DCHECK(i.InputRegister(0) == r3);
+      {
+        // We don't actually want to generate a pile of code for this, so just
+        // claim there is a stack frame, without generating one.
+        FrameScope scope(tasm(), StackFrame::NONE);
+        __ Call(
+            isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
+            RelocInfo::CODE_TARGET);
+      }
+      __ stop();
+      break;
+    case kArchDebugBreak:
+      __ DebugBreak();
+      break;
+    case kArchNop:
+    case kArchThrowTerminator:
+      // don't emit code for nops.
+      break;
+    case kArchDeoptimize: {
+      DeoptimizationExit* exit =
+          BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
+      __ b(exit->label());
+      break;
+    }
+    case kArchRet:
+      AssembleReturn(instr->InputAt(0));
+      break;
+    case kArchFramePointer:
+      __ LoadRR(i.OutputRegister(), fp);
+      break;
+    case kArchParentFramePointer:
+      if (frame_access_state()->has_frame()) {
+        __ LoadP(i.OutputRegister(), MemOperand(fp, 0));
+      } else {
+        __ LoadRR(i.OutputRegister(), fp);
+      }
+      break;
+    case kArchStackPointerGreaterThan: {
+      // Potentially apply an offset to the current stack pointer before the
+      // comparison to consider the size difference of an optimized frame versus
+      // the contained unoptimized frames.
+
+      Register lhs_register = sp;
+      uint32_t offset;
+
+      if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
+        lhs_register = i.TempRegister(0);
+        __ SubP(lhs_register, sp, Operand(offset));
+      }
+
+      constexpr size_t kValueIndex = 0;
+      DCHECK(instr->InputAt(kValueIndex)->IsRegister());
+      __ CmpLogicalP(lhs_register, i.InputRegister(kValueIndex));
+      break;
+    }
+    case kArchStackCheckOffset:
+      __ LoadSmiLiteral(i.OutputRegister(),
+                        Smi::FromInt(GetStackCheckOffset()));
+      break;
+    case kArchTruncateDoubleToI:
+      __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
+                           i.InputDoubleRegister(0), DetermineStubCallMode());
+      break;
+    case kArchStoreWithWriteBarrier: {
+      RecordWriteMode mode =
+          static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
+      Register object = i.InputRegister(0);
+      Register value = i.InputRegister(2);
+      Register scratch0 = i.TempRegister(0);
+      Register scratch1 = i.TempRegister(1);
+      OutOfLineRecordWrite* ool;
+
+      AddressingMode addressing_mode =
+          AddressingModeField::decode(instr->opcode());
+      if (addressing_mode == kMode_MRI) {
+        int32_t offset = i.InputInt32(1);
+        ool = zone()->New<OutOfLineRecordWrite>(
+            this, object, offset, value, scratch0, scratch1, mode,
+            DetermineStubCallMode(), &unwinding_info_writer_);
+        __ StoreTaggedField(value, MemOperand(object, offset), r0);
+      } else {
+        DCHECK_EQ(kMode_MRR, addressing_mode);
+        Register offset(i.InputRegister(1));
+        ool = zone()->New<OutOfLineRecordWrite>(
+            this, object, offset, value, scratch0, scratch1, mode,
+            DetermineStubCallMode(), &unwinding_info_writer_);
+        __ StoreTaggedField(value, MemOperand(object, offset));
+      }
+      __ CheckPageFlag(object, scratch0,
+                       MemoryChunk::kPointersFromHereAreInterestingMask, ne,
+                       ool->entry());
+      __ bind(ool->exit());
+      break;
+    }
+    case kArchStackSlot: {
+      FrameOffset offset =
+          frame_access_state()->GetFrameOffset(i.InputInt32(0));
+      __ AddP(i.OutputRegister(), offset.from_stack_pointer() ? sp : fp,
+              Operand(offset.offset()));
+      break;
+    }
+    case kArchWordPoisonOnSpeculation:
+      DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
+      __ AndP(i.InputRegister(0), kSpeculationPoisonRegister);
+      break;
+    case kS390_Peek: {
+      int reverse_slot = i.InputInt32(0);
+      int offset =
+          FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
+      if (instr->OutputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          __ LoadDouble(i.OutputDoubleRegister(), MemOperand(fp, offset));
+        } else if (op->representation() == MachineRepresentation::kFloat32) {
+          __ LoadFloat32(i.OutputFloatRegister(), MemOperand(fp, offset));
+        } else {
+          DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
+          __ LoadSimd128(i.OutputSimd128Register(), MemOperand(fp, offset),
+                         kScratchReg);
+        }
+      } else {
+        __ LoadP(i.OutputRegister(), MemOperand(fp, offset));
+      }
+      break;
+    }
+    case kS390_Abs32:
+      // TODO(john.yan): zero-ext
+      __ lpr(i.OutputRegister(0), i.InputRegister(0));
+      break;
+    case kS390_Abs64:
+      __ lpgr(i.OutputRegister(0), i.InputRegister(0));
+      break;
+    case kS390_And32:
+      // zero-ext
+      if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
+        ASSEMBLE_BIN32_OP(RRRInstr(nrk), RM32Instr(And), RIInstr(nilf));
+      } else {
+        ASSEMBLE_BIN32_OP(RRInstr(nr), RM32Instr(And), RIInstr(nilf));
+      }
+      break;
+    case kS390_And64:
+      if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
+        ASSEMBLE_BIN_OP(RRRInstr(ngrk), RM64Instr(ng), nullInstr);
+      } else {
+        ASSEMBLE_BIN_OP(RRInstr(ngr), RM64Instr(ng), nullInstr);
+      }
+      break;
+    case kS390_Or32:
+      // zero-ext
+      if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
+        ASSEMBLE_BIN32_OP(RRRInstr(ork), RM32Instr(Or), RIInstr(oilf));
+      } else {
+        ASSEMBLE_BIN32_OP(RRInstr(or_z), RM32Instr(Or), RIInstr(oilf));
+      }
+      break;
+    case kS390_Or64:
+      if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
+        ASSEMBLE_BIN_OP(RRRInstr(ogrk), RM64Instr(og), nullInstr);
+      } else {
+        ASSEMBLE_BIN_OP(RRInstr(ogr), RM64Instr(og), nullInstr);
+      }
+      break;
+    case kS390_Xor32:
+      // zero-ext
+      if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
+        ASSEMBLE_BIN32_OP(RRRInstr(xrk), RM32Instr(Xor), RIInstr(xilf));
+      } else {
+        ASSEMBLE_BIN32_OP(RRInstr(xr), RM32Instr(Xor), RIInstr(xilf));
+      }
+      break;
+    case kS390_Xor64:
+      if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
+        ASSEMBLE_BIN_OP(RRRInstr(xgrk), RM64Instr(xg), nullInstr);
+      } else {
+        ASSEMBLE_BIN_OP(RRInstr(xgr), RM64Instr(xg), nullInstr);
+      }
+      break;
+    case kS390_ShiftLeft32:
+      // zero-ext
+      if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
+        ASSEMBLE_BIN32_OP(RRRInstr(ShiftLeft), nullInstr, RRIInstr(ShiftLeft));
+      } else {
+        ASSEMBLE_BIN32_OP(RRInstr(sll), nullInstr, RIInstr(sll));
+      }
+      break;
+    case kS390_ShiftLeft64:
+      ASSEMBLE_BIN_OP(RRRInstr(sllg), nullInstr, RRIInstr(sllg));
+      break;
+    case kS390_ShiftRight32:
+      // zero-ext
+      if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
+        ASSEMBLE_BIN32_OP(RRRInstr(srlk), nullInstr, RRIInstr(srlk));
+      } else {
+        ASSEMBLE_BIN32_OP(RRInstr(srl), nullInstr, RIInstr(srl));
+      }
+      break;
+    case kS390_ShiftRight64:
+      ASSEMBLE_BIN_OP(RRRInstr(srlg), nullInstr, RRIInstr(srlg));
+      break;
+    case kS390_ShiftRightArith32:
+      // zero-ext
+      if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
+        ASSEMBLE_BIN32_OP(RRRInstr(srak), nullInstr, RRIInstr(srak));
+      } else {
+        ASSEMBLE_BIN32_OP(RRInstr(sra), nullInstr, RIInstr(sra));
+      }
+      break;
+    case kS390_ShiftRightArith64:
+      ASSEMBLE_BIN_OP(RRRInstr(srag), nullInstr, RRIInstr(srag));
+      break;
+#if !V8_TARGET_ARCH_S390X
+    case kS390_AddPair:
+      // i.InputRegister(0) ... left low word.
+      // i.InputRegister(1) ... left high word.
+      // i.InputRegister(2) ... right low word.
+      // i.InputRegister(3) ... right high word.
+      __ AddLogical32(i.OutputRegister(0), i.InputRegister(0),
+                      i.InputRegister(2));
+      __ AddLogicalWithCarry32(i.OutputRegister(1), i.InputRegister(1),
+                               i.InputRegister(3));
+      break;
+    case kS390_SubPair:
+      // i.InputRegister(0) ... left low word.
+      // i.InputRegister(1) ... left high word.
+      // i.InputRegister(2) ... right low word.
+      // i.InputRegister(3) ... right high word.
+      __ SubLogical32(i.OutputRegister(0), i.InputRegister(0),
+                      i.InputRegister(2));
+      __ SubLogicalWithBorrow32(i.OutputRegister(1), i.InputRegister(1),
+                                i.InputRegister(3));
+      break;
+    case kS390_MulPair:
+      // i.InputRegister(0) ... left low word.
+      // i.InputRegister(1) ... left high word.
+      // i.InputRegister(2) ... right low word.
+      // i.InputRegister(3) ... right high word.
+      __ sllg(r0, i.InputRegister(1), Operand(32));
+      __ sllg(r1, i.InputRegister(3), Operand(32));
+      __ lr(r0, i.InputRegister(0));
+      __ lr(r1, i.InputRegister(2));
+      __ msgr(r1, r0);
+      __ lr(i.OutputRegister(0), r1);
+      __ srag(i.OutputRegister(1), r1, Operand(32));
+      break;
+    case kS390_ShiftLeftPair: {
+      Register second_output =
+          instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
+      if (instr->InputAt(2)->IsImmediate()) {
+        __ ShiftLeftPair(i.OutputRegister(0), second_output, i.InputRegister(0),
+                         i.InputRegister(1), i.InputInt32(2));
+      } else {
+        __ ShiftLeftPair(i.OutputRegister(0), second_output, i.InputRegister(0),
+                         i.InputRegister(1), kScratchReg, i.InputRegister(2));
+      }
+      break;
+    }
+    case kS390_ShiftRightPair: {
+      Register second_output =
+          instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
+      if (instr->InputAt(2)->IsImmediate()) {
+        __ ShiftRightPair(i.OutputRegister(0), second_output,
+                          i.InputRegister(0), i.InputRegister(1),
+                          i.InputInt32(2));
+      } else {
+        __ ShiftRightPair(i.OutputRegister(0), second_output,
+                          i.InputRegister(0), i.InputRegister(1), kScratchReg,
+                          i.InputRegister(2));
+      }
+      break;
+    }
+    case kS390_ShiftRightArithPair: {
+      Register second_output =
+          instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
+      if (instr->InputAt(2)->IsImmediate()) {
+        __ ShiftRightArithPair(i.OutputRegister(0), second_output,
+                               i.InputRegister(0), i.InputRegister(1),
+                               i.InputInt32(2));
+      } else {
+        __ ShiftRightArithPair(i.OutputRegister(0), second_output,
+                               i.InputRegister(0), i.InputRegister(1),
+                               kScratchReg, i.InputRegister(2));
+      }
+      break;
+    }
+#endif
+    case kS390_RotRight32: {
+      // zero-ext
+      if (HasRegisterInput(instr, 1)) {
+        __ LoadComplementRR(kScratchReg, i.InputRegister(1));
+        __ rll(i.OutputRegister(), i.InputRegister(0), kScratchReg);
+      } else {
+        __ rll(i.OutputRegister(), i.InputRegister(0),
+               Operand(32 - i.InputInt32(1)));
+      }
+      CHECK_AND_ZERO_EXT_OUTPUT(2);
+      break;
+    }
+    case kS390_RotRight64:
+      if (HasRegisterInput(instr, 1)) {
+        __ lcgr(kScratchReg, i.InputRegister(1));
+        __ rllg(i.OutputRegister(), i.InputRegister(0), kScratchReg);
+      } else {
+        DCHECK(HasImmediateInput(instr, 1));
+        __ rllg(i.OutputRegister(), i.InputRegister(0),
+                Operand(64 - i.InputInt32(1)));
+      }
+      break;
+    // TODO(john.yan): clean up kS390_RotLeftAnd...
+    case kS390_RotLeftAndClear64:
+      if (CpuFeatures::IsSupported(GENERAL_INSTR_EXT)) {
+        int shiftAmount = i.InputInt32(1);
+        int endBit = 63 - shiftAmount;
+        int startBit = 63 - i.InputInt32(2);
+        __ RotateInsertSelectBits(i.OutputRegister(), i.InputRegister(0),
+                                  Operand(startBit), Operand(endBit),
+                                  Operand(shiftAmount), true);
+      } else {
+        int shiftAmount = i.InputInt32(1);
+        int clearBit = 63 - i.InputInt32(2);
+        __ rllg(i.OutputRegister(), i.InputRegister(0), Operand(shiftAmount));
+        __ sllg(i.OutputRegister(), i.OutputRegister(), Operand(clearBit));
+        __ srlg(i.OutputRegister(), i.OutputRegister(),
+                Operand(clearBit + shiftAmount));
+        __ sllg(i.OutputRegister(), i.OutputRegister(), Operand(shiftAmount));
+      }
+      break;
+    case kS390_RotLeftAndClearLeft64:
+      if (CpuFeatures::IsSupported(GENERAL_INSTR_EXT)) {
+        int shiftAmount = i.InputInt32(1);
+        int endBit = 63;
+        int startBit = 63 - i.InputInt32(2);
+        __ RotateInsertSelectBits(i.OutputRegister(), i.InputRegister(0),
+                                  Operand(startBit), Operand(endBit),
+                                  Operand(shiftAmount), true);
+      } else {
+        int shiftAmount = i.InputInt32(1);
+        int clearBit = 63 - i.InputInt32(2);
+        __ rllg(i.OutputRegister(), i.InputRegister(0), Operand(shiftAmount));
+        __ sllg(i.OutputRegister(), i.OutputRegister(), Operand(clearBit));
+        __ srlg(i.OutputRegister(), i.OutputRegister(), Operand(clearBit));
+      }
+      break;
+    case kS390_RotLeftAndClearRight64:
+      if (CpuFeatures::IsSupported(GENERAL_INSTR_EXT)) {
+        int shiftAmount = i.InputInt32(1);
+        int endBit = 63 - i.InputInt32(2);
+        int startBit = 0;
+        __ RotateInsertSelectBits(i.OutputRegister(), i.InputRegister(0),
+                                  Operand(startBit), Operand(endBit),
+                                  Operand(shiftAmount), true);
+      } else {
+        int shiftAmount = i.InputInt32(1);
+        int clearBit = i.InputInt32(2);
+        __ rllg(i.OutputRegister(), i.InputRegister(0), Operand(shiftAmount));
+        __ srlg(i.OutputRegister(), i.OutputRegister(), Operand(clearBit));
+        __ sllg(i.OutputRegister(), i.OutputRegister(), Operand(clearBit));
+      }
+      break;
+    case kS390_Add32: {
+      // zero-ext
+      if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
+        ASSEMBLE_BIN32_OP(RRRInstr(ark), RM32Instr(Add32), RRIInstr(Add32));
+      } else {
+        ASSEMBLE_BIN32_OP(RRInstr(ar), RM32Instr(Add32), RIInstr(Add32));
+      }
+      break;
+    }
+    case kS390_Add64:
+      if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
+        ASSEMBLE_BIN_OP(RRRInstr(agrk), RM64Instr(ag), RRIInstr(AddP));
+      } else {
+        ASSEMBLE_BIN_OP(RRInstr(agr), RM64Instr(ag), RIInstr(agfi));
+      }
+      break;
+    case kS390_AddFloat:
+      ASSEMBLE_BIN_OP(DDInstr(aebr), DMTInstr(AddFloat32), nullInstr);
+      break;
+    case kS390_AddDouble:
+      ASSEMBLE_BIN_OP(DDInstr(adbr), DMTInstr(AddFloat64), nullInstr);
+      break;
+    case kS390_Sub32:
+      // zero-ext
+      if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
+        ASSEMBLE_BIN32_OP(RRRInstr(srk), RM32Instr(Sub32), RRIInstr(Sub32));
+      } else {
+        ASSEMBLE_BIN32_OP(RRInstr(sr), RM32Instr(Sub32), RIInstr(Sub32));
+      }
+      break;
+    case kS390_Sub64:
+      if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
+        ASSEMBLE_BIN_OP(RRRInstr(sgrk), RM64Instr(sg), RRIInstr(SubP));
+      } else {
+        ASSEMBLE_BIN_OP(RRInstr(sgr), RM64Instr(sg), RIInstr(SubP));
+      }
+      break;
+    case kS390_SubFloat:
+      ASSEMBLE_BIN_OP(DDInstr(sebr), DMTInstr(SubFloat32), nullInstr);
+      break;
+    case kS390_SubDouble:
+      ASSEMBLE_BIN_OP(DDInstr(sdbr), DMTInstr(SubFloat64), nullInstr);
+      break;
+    case kS390_Mul32:
+      // zero-ext
+      if (CpuFeatures::IsSupported(MISC_INSTR_EXT2)) {
+        ASSEMBLE_BIN32_OP(RRRInstr(msrkc), RM32Instr(msc), RIInstr(Mul32));
+      } else {
+        ASSEMBLE_BIN32_OP(RRInstr(Mul32), RM32Instr(Mul32), RIInstr(Mul32));
+      }
+      break;
+    case kS390_Mul32WithOverflow:
+      // zero-ext
+      ASSEMBLE_BIN32_OP(RRRInstr(Mul32WithOverflowIfCCUnequal),
+                        RRM32Instr(Mul32WithOverflowIfCCUnequal),
+                        RRIInstr(Mul32WithOverflowIfCCUnequal));
+      break;
+    case kS390_Mul64:
+      ASSEMBLE_BIN_OP(RRInstr(Mul64), RM64Instr(Mul64), RIInstr(Mul64));
+      break;
+    case kS390_MulHigh32:
+      // zero-ext
+      ASSEMBLE_BIN_OP(RRRInstr(MulHigh32), RRM32Instr(MulHigh32),
+                      RRIInstr(MulHigh32));
+      break;
+    case kS390_MulHighU32:
+      // zero-ext
+      ASSEMBLE_BIN_OP(RRRInstr(MulHighU32), RRM32Instr(MulHighU32),
+                      RRIInstr(MulHighU32));
+      break;
+    case kS390_MulFloat:
+      ASSEMBLE_BIN_OP(DDInstr(meebr), DMTInstr(MulFloat32), nullInstr);
+      break;
+    case kS390_MulDouble:
+      ASSEMBLE_BIN_OP(DDInstr(mdbr), DMTInstr(MulFloat64), nullInstr);
+      break;
+    case kS390_Div64:
+      ASSEMBLE_BIN_OP(RRRInstr(Div64), RRM64Instr(Div64), nullInstr);
+      break;
+    case kS390_Div32: {
+      // zero-ext
+      ASSEMBLE_BIN_OP(RRRInstr(Div32), RRM32Instr(Div32), nullInstr);
+      break;
+    }
+    case kS390_DivU64:
+      ASSEMBLE_BIN_OP(RRRInstr(DivU64), RRM64Instr(DivU64), nullInstr);
+      break;
+    case kS390_DivU32: {
+      // zero-ext
+      ASSEMBLE_BIN_OP(RRRInstr(DivU32), RRM32Instr(DivU32), nullInstr);
+      break;
+    }
+    case kS390_DivFloat:
+      ASSEMBLE_BIN_OP(DDInstr(debr), DMTInstr(DivFloat32), nullInstr);
+      break;
+    case kS390_DivDouble:
+      ASSEMBLE_BIN_OP(DDInstr(ddbr), DMTInstr(DivFloat64), nullInstr);
+      break;
+    case kS390_Mod32:
+      // zero-ext
+      ASSEMBLE_BIN_OP(RRRInstr(Mod32), RRM32Instr(Mod32), nullInstr);
+      break;
+    case kS390_ModU32:
+      // zero-ext
+      ASSEMBLE_BIN_OP(RRRInstr(ModU32), RRM32Instr(ModU32), nullInstr);
+      break;
+    case kS390_Mod64:
+      ASSEMBLE_BIN_OP(RRRInstr(Mod64), RRM64Instr(Mod64), nullInstr);
+      break;
+    case kS390_ModU64:
+      ASSEMBLE_BIN_OP(RRRInstr(ModU64), RRM64Instr(ModU64), nullInstr);
+      break;
+    case kS390_AbsFloat:
+      __ lpebr(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kS390_SqrtFloat:
+      ASSEMBLE_UNARY_OP(D_DInstr(sqebr), nullInstr, nullInstr);
+      break;
+    case kS390_SqrtDouble:
+      ASSEMBLE_UNARY_OP(D_DInstr(sqdbr), nullInstr, nullInstr);
+      break;
+    case kS390_FloorFloat:
+      __ fiebra(v8::internal::Assembler::FIDBRA_ROUND_TOWARD_NEG_INF,
+                i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kS390_CeilFloat:
+      __ fiebra(v8::internal::Assembler::FIDBRA_ROUND_TOWARD_POS_INF,
+                i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kS390_TruncateFloat:
+      __ fiebra(v8::internal::Assembler::FIDBRA_ROUND_TOWARD_0,
+                i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    //  Double operations
+    case kS390_ModDouble:
+      ASSEMBLE_FLOAT_MODULO();
+      break;
+    case kIeee754Float64Acos:
+      ASSEMBLE_IEEE754_UNOP(acos);
+      break;
+    case kIeee754Float64Acosh:
+      ASSEMBLE_IEEE754_UNOP(acosh);
+      break;
+    case kIeee754Float64Asin:
+      ASSEMBLE_IEEE754_UNOP(asin);
+      break;
+    case kIeee754Float64Asinh:
+      ASSEMBLE_IEEE754_UNOP(asinh);
+      break;
+    case kIeee754Float64Atanh:
+      ASSEMBLE_IEEE754_UNOP(atanh);
+      break;
+    case kIeee754Float64Atan:
+      ASSEMBLE_IEEE754_UNOP(atan);
+      break;
+    case kIeee754Float64Atan2:
+      ASSEMBLE_IEEE754_BINOP(atan2);
+      break;
+    case kIeee754Float64Tan:
+      ASSEMBLE_IEEE754_UNOP(tan);
+      break;
+    case kIeee754Float64Tanh:
+      ASSEMBLE_IEEE754_UNOP(tanh);
+      break;
+    case kIeee754Float64Cbrt:
+      ASSEMBLE_IEEE754_UNOP(cbrt);
+      break;
+    case kIeee754Float64Sin:
+      ASSEMBLE_IEEE754_UNOP(sin);
+      break;
+    case kIeee754Float64Sinh:
+      ASSEMBLE_IEEE754_UNOP(sinh);
+      break;
+    case kIeee754Float64Cos:
+      ASSEMBLE_IEEE754_UNOP(cos);
+      break;
+    case kIeee754Float64Cosh:
+      ASSEMBLE_IEEE754_UNOP(cosh);
+      break;
+    case kIeee754Float64Exp:
+      ASSEMBLE_IEEE754_UNOP(exp);
+      break;
+    case kIeee754Float64Expm1:
+      ASSEMBLE_IEEE754_UNOP(expm1);
+      break;
+    case kIeee754Float64Log:
+      ASSEMBLE_IEEE754_UNOP(log);
+      break;
+    case kIeee754Float64Log1p:
+      ASSEMBLE_IEEE754_UNOP(log1p);
+      break;
+    case kIeee754Float64Log2:
+      ASSEMBLE_IEEE754_UNOP(log2);
+      break;
+    case kIeee754Float64Log10:
+      ASSEMBLE_IEEE754_UNOP(log10);
+      break;
+    case kIeee754Float64Pow:
+      ASSEMBLE_IEEE754_BINOP(pow);
+      break;
+    case kS390_Neg32:
+      __ lcr(i.OutputRegister(), i.InputRegister(0));
+      CHECK_AND_ZERO_EXT_OUTPUT(1);
+      break;
+    case kS390_Neg64:
+      __ lcgr(i.OutputRegister(), i.InputRegister(0));
+      break;
+    case kS390_MaxFloat:
+      ASSEMBLE_FLOAT_MAX();
+      break;
+    case kS390_MaxDouble:
+      ASSEMBLE_DOUBLE_MAX();
+      break;
+    case kS390_MinFloat:
+      ASSEMBLE_FLOAT_MIN();
+      break;
+    case kS390_MinDouble:
+      ASSEMBLE_DOUBLE_MIN();
+      break;
+    case kS390_AbsDouble:
+      __ lpdbr(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kS390_FloorDouble:
+      __ fidbra(v8::internal::Assembler::FIDBRA_ROUND_TOWARD_NEG_INF,
+                i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kS390_CeilDouble:
+      __ fidbra(v8::internal::Assembler::FIDBRA_ROUND_TOWARD_POS_INF,
+                i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kS390_TruncateDouble:
+      __ fidbra(v8::internal::Assembler::FIDBRA_ROUND_TOWARD_0,
+                i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kS390_RoundDouble:
+      __ fidbra(v8::internal::Assembler::FIDBRA_ROUND_TO_NEAREST_AWAY_FROM_0,
+                i.OutputDoubleRegister(), i.InputDoubleRegister(0));
+      break;
+    case kS390_NegFloat:
+      ASSEMBLE_UNARY_OP(D_DInstr(lcebr), nullInstr, nullInstr);
+      break;
+    case kS390_NegDouble:
+      ASSEMBLE_UNARY_OP(D_DInstr(lcdbr), nullInstr, nullInstr);
+      break;
+    case kS390_Cntlz32: {
+      __ llgfr(i.OutputRegister(), i.InputRegister(0));
+      __ flogr(r0, i.OutputRegister());
+      __ Add32(i.OutputRegister(), r0, Operand(-32));
+      // No need to zero-ext b/c llgfr is done already
+      break;
+    }
+#if V8_TARGET_ARCH_S390X
+    case kS390_Cntlz64: {
+      __ flogr(r0, i.InputRegister(0));
+      __ LoadRR(i.OutputRegister(), r0);
+      break;
+    }
+#endif
+    case kS390_Popcnt32:
+      __ Popcnt32(i.OutputRegister(), i.InputRegister(0));
+      break;
+#if V8_TARGET_ARCH_S390X
+    case kS390_Popcnt64:
+      __ Popcnt64(i.OutputRegister(), i.InputRegister(0));
+      break;
+#endif
+    case kS390_Cmp32:
+      ASSEMBLE_COMPARE32(Cmp32, CmpLogical32);
+      break;
+#if V8_TARGET_ARCH_S390X
+    case kS390_Cmp64:
+      ASSEMBLE_COMPARE(CmpP, CmpLogicalP);
+      break;
+#endif
+    case kS390_CmpFloat:
+      ASSEMBLE_FLOAT_COMPARE(cebr, ceb, ley);
+      // __ cebr(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      break;
+    case kS390_CmpDouble:
+      ASSEMBLE_FLOAT_COMPARE(cdbr, cdb, ldy);
+      // __ cdbr(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      break;
+    case kS390_Tst32:
+      if (HasRegisterInput(instr, 1)) {
+        __ And(r0, i.InputRegister(0), i.InputRegister(1));
+      } else {
+        // detect tmlh/tmhl/tmhh case
+        Operand opnd = i.InputImmediate(1);
+        if (is_uint16(opnd.immediate())) {
+          __ tmll(i.InputRegister(0), opnd);
+        } else {
+          __ lr(r0, i.InputRegister(0));
+          __ nilf(r0, opnd);
+        }
+      }
+      break;
+    case kS390_Tst64:
+      if (HasRegisterInput(instr, 1)) {
+        __ AndP(r0, i.InputRegister(0), i.InputRegister(1));
+      } else {
+        Operand opnd = i.InputImmediate(1);
+        if (is_uint16(opnd.immediate())) {
+          __ tmll(i.InputRegister(0), opnd);
+        } else {
+          __ AndP(r0, i.InputRegister(0), opnd);
+        }
+      }
+      break;
+    case kS390_Float64SilenceNaN: {
+      DoubleRegister value = i.InputDoubleRegister(0);
+      DoubleRegister result = i.OutputDoubleRegister();
+      __ CanonicalizeNaN(result, value);
+      break;
+    }
+    case kS390_StackClaim: {
+      int num_slots = i.InputInt32(0);
+      __ lay(sp, MemOperand(sp, -num_slots * kSystemPointerSize));
+      frame_access_state()->IncreaseSPDelta(num_slots);
+      break;
+    }
+    case kS390_Push:
+      if (instr->InputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
+        switch (op->representation()) {
+          case MachineRepresentation::kFloat32:
+            __ lay(sp, MemOperand(sp, -kSystemPointerSize));
+            __ StoreFloat32(i.InputDoubleRegister(0), MemOperand(sp));
+            break;
+          case MachineRepresentation::kFloat64:
+            __ lay(sp, MemOperand(sp, -kDoubleSize));
+            __ StoreDouble(i.InputDoubleRegister(0), MemOperand(sp));
+            frame_access_state()->IncreaseSPDelta(kDoubleSize /
+                                                  kSystemPointerSize);
+            break;
+          case MachineRepresentation::kSimd128: {
+            __ lay(sp, MemOperand(sp, -kSimd128Size));
+            __ StoreSimd128(i.InputDoubleRegister(0), MemOperand(sp),
+                            kScratchReg);
+            frame_access_state()->IncreaseSPDelta(kSimd128Size /
+                                                  kSystemPointerSize);
+            break;
+          }
+          default:
+            UNREACHABLE();
+            break;
+        }
+      } else {
+        __ Push(i.InputRegister(0));
+        frame_access_state()->IncreaseSPDelta(1);
+      }
+      break;
+    case kS390_PushFrame: {
+      int num_slots = i.InputInt32(1);
+      __ lay(sp, MemOperand(sp, -num_slots * kSystemPointerSize));
+      if (instr->InputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          __ StoreDouble(i.InputDoubleRegister(0), MemOperand(sp));
+        } else {
+          DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
+          __ StoreFloat32(i.InputDoubleRegister(0), MemOperand(sp));
+        }
+      } else {
+        __ StoreP(i.InputRegister(0), MemOperand(sp));
+      }
+      break;
+    }
+    case kS390_StoreToStackSlot: {
+      int slot = i.InputInt32(1);
+      if (instr->InputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          __ StoreDouble(i.InputDoubleRegister(0),
+                         MemOperand(sp, slot * kSystemPointerSize));
+        } else if (op->representation() == MachineRepresentation::kFloat32) {
+          __ StoreFloat32(i.InputDoubleRegister(0),
+                          MemOperand(sp, slot * kSystemPointerSize));
+        } else {
+          DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
+          __ StoreSimd128(i.InputDoubleRegister(0),
+                          MemOperand(sp, slot * kSystemPointerSize),
+                          kScratchReg);
+        }
+      } else {
+        __ StoreP(i.InputRegister(0),
+                  MemOperand(sp, slot * kSystemPointerSize));
+      }
+      break;
+    }
+    case kS390_SignExtendWord8ToInt32:
+      __ lbr(i.OutputRegister(), i.InputRegister(0));
+      CHECK_AND_ZERO_EXT_OUTPUT(1);
+      break;
+    case kS390_SignExtendWord16ToInt32:
+      __ lhr(i.OutputRegister(), i.InputRegister(0));
+      CHECK_AND_ZERO_EXT_OUTPUT(1);
+      break;
+    case kS390_SignExtendWord8ToInt64:
+      __ lgbr(i.OutputRegister(), i.InputRegister(0));
+      break;
+    case kS390_SignExtendWord16ToInt64:
+      __ lghr(i.OutputRegister(), i.InputRegister(0));
+      break;
+    case kS390_SignExtendWord32ToInt64:
+      __ lgfr(i.OutputRegister(), i.InputRegister(0));
+      break;
+    case kS390_Uint32ToUint64:
+      // Zero extend
+      __ llgfr(i.OutputRegister(), i.InputRegister(0));
+      break;
+    case kS390_Int64ToInt32:
+      // sign extend
+      __ lgfr(i.OutputRegister(), i.InputRegister(0));
+      break;
+    // Convert Fixed to Floating Point
+    case kS390_Int64ToFloat32:
+      __ ConvertInt64ToFloat(i.OutputDoubleRegister(), i.InputRegister(0));
+      break;
+    case kS390_Int64ToDouble:
+      __ ConvertInt64ToDouble(i.OutputDoubleRegister(), i.InputRegister(0));
+      break;
+    case kS390_Uint64ToFloat32:
+      __ ConvertUnsignedInt64ToFloat(i.OutputDoubleRegister(),
+                                     i.InputRegister(0));
+      break;
+    case kS390_Uint64ToDouble:
+      __ ConvertUnsignedInt64ToDouble(i.OutputDoubleRegister(),
+                                      i.InputRegister(0));
+      break;
+    case kS390_Int32ToFloat32:
+      __ ConvertIntToFloat(i.OutputDoubleRegister(), i.InputRegister(0));
+      break;
+    case kS390_Int32ToDouble:
+      __ ConvertIntToDouble(i.OutputDoubleRegister(), i.InputRegister(0));
+      break;
+    case kS390_Uint32ToFloat32:
+      __ ConvertUnsignedIntToFloat(i.OutputDoubleRegister(),
+                                   i.InputRegister(0));
+      break;
+    case kS390_Uint32ToDouble:
+      __ ConvertUnsignedIntToDouble(i.OutputDoubleRegister(),
+                                    i.InputRegister(0));
+      break;
+    case kS390_DoubleToInt32: {
+      Label done;
+      __ ConvertDoubleToInt32(i.OutputRegister(0), i.InputDoubleRegister(0),
+                              kRoundToNearest);
+      __ b(Condition(0xE), &done, Label::kNear);  // normal case
+      __ lghi(i.OutputRegister(0), Operand::Zero());
+      __ bind(&done);
+      break;
+    }
+    case kS390_DoubleToUint32: {
+      Label done;
+      __ ConvertDoubleToUnsignedInt32(i.OutputRegister(0),
+                                      i.InputDoubleRegister(0));
+      __ b(Condition(0xE), &done, Label::kNear);  // normal case
+      __ lghi(i.OutputRegister(0), Operand::Zero());
+      __ bind(&done);
+      break;
+    }
+    case kS390_DoubleToInt64: {
+      Label done;
+      if (i.OutputCount() > 1) {
+        __ lghi(i.OutputRegister(1), Operand(1));
+      }
+      __ ConvertDoubleToInt64(i.OutputRegister(0), i.InputDoubleRegister(0));
+      __ b(Condition(0xE), &done, Label::kNear);  // normal case
+      if (i.OutputCount() > 1) {
+        __ lghi(i.OutputRegister(1), Operand::Zero());
+      } else {
+        __ lghi(i.OutputRegister(0), Operand::Zero());
+      }
+      __ bind(&done);
+      break;
+    }
+    case kS390_DoubleToUint64: {
+      Label done;
+      if (i.OutputCount() > 1) {
+        __ lghi(i.OutputRegister(1), Operand(1));
+      }
+      __ ConvertDoubleToUnsignedInt64(i.OutputRegister(0),
+                                      i.InputDoubleRegister(0));
+      __ b(Condition(0xE), &done, Label::kNear);  // normal case
+      if (i.OutputCount() > 1) {
+        __ lghi(i.OutputRegister(1), Operand::Zero());
+      } else {
+        __ lghi(i.OutputRegister(0), Operand::Zero());
+      }
+      __ bind(&done);
+      break;
+    }
+    case kS390_Float32ToInt32: {
+      Label done;
+      __ ConvertFloat32ToInt32(i.OutputRegister(0), i.InputDoubleRegister(0),
+                               kRoundToZero);
+      bool set_overflow_to_min_i32 = MiscField::decode(instr->opcode());
+      if (set_overflow_to_min_i32) {
+        // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
+        // because INT32_MIN allows easier out-of-bounds detection.
+        __ b(Condition(0xE), &done, Label::kNear);  // normal case
+        __ llilh(i.OutputRegister(0), Operand(0x8000));
+      }
+      __ bind(&done);
+      break;
+    }
+    case kS390_Float32ToUint32: {
+      Label done;
+      __ ConvertFloat32ToUnsignedInt32(i.OutputRegister(0),
+                                       i.InputDoubleRegister(0));
+      bool set_overflow_to_min_u32 = MiscField::decode(instr->opcode());
+      if (set_overflow_to_min_u32) {
+        // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
+        // because 0 allows easier out-of-bounds detection.
+        __ b(Condition(0xE), &done, Label::kNear);  // normal case
+        __ lghi(i.OutputRegister(0), Operand::Zero());
+      }
+      __ bind(&done);
+      break;
+    }
+    case kS390_Float32ToUint64: {
+      Label done;
+      if (i.OutputCount() > 1) {
+        __ lghi(i.OutputRegister(1), Operand(1));
+      }
+      __ ConvertFloat32ToUnsignedInt64(i.OutputRegister(0),
+                                       i.InputDoubleRegister(0));
+      __ b(Condition(0xE), &done, Label::kNear);  // normal case
+      if (i.OutputCount() > 1) {
+        __ lghi(i.OutputRegister(1), Operand::Zero());
+      } else {
+        __ lghi(i.OutputRegister(0), Operand::Zero());
+      }
+      __ bind(&done);
+      break;
+    }
+    case kS390_Float32ToInt64: {
+      Label done;
+      if (i.OutputCount() > 1) {
+        __ lghi(i.OutputRegister(1), Operand(1));
+      }
+      __ ConvertFloat32ToInt64(i.OutputRegister(0), i.InputDoubleRegister(0));
+      __ b(Condition(0xE), &done, Label::kNear);  // normal case
+      if (i.OutputCount() > 1) {
+        __ lghi(i.OutputRegister(1), Operand::Zero());
+      } else {
+        __ lghi(i.OutputRegister(0), Operand::Zero());
+      }
+      __ bind(&done);
+      break;
+    }
+    case kS390_DoubleToFloat32:
+      ASSEMBLE_UNARY_OP(D_DInstr(ledbr), nullInstr, nullInstr);
+      break;
+    case kS390_Float32ToDouble:
+      ASSEMBLE_UNARY_OP(D_DInstr(ldebr), D_MTInstr(LoadFloat32ToDouble),
+                        nullInstr);
+      break;
+    case kS390_DoubleExtractLowWord32:
+      __ lgdr(i.OutputRegister(), i.InputDoubleRegister(0));
+      __ llgfr(i.OutputRegister(), i.OutputRegister());
+      break;
+    case kS390_DoubleExtractHighWord32:
+      __ lgdr(i.OutputRegister(), i.InputDoubleRegister(0));
+      __ srlg(i.OutputRegister(), i.OutputRegister(), Operand(32));
+      break;
+    case kS390_DoubleInsertLowWord32:
+      __ lgdr(kScratchReg, i.InputDoubleRegister(0));
+      __ lr(kScratchReg, i.InputRegister(1));
+      __ ldgr(i.OutputDoubleRegister(), kScratchReg);
+      break;
+    case kS390_DoubleInsertHighWord32:
+      __ sllg(kScratchReg, i.InputRegister(1), Operand(32));
+      __ lgdr(r0, i.InputDoubleRegister(0));
+      __ lr(kScratchReg, r0);
+      __ ldgr(i.OutputDoubleRegister(), kScratchReg);
+      break;
+    case kS390_DoubleConstruct:
+      __ sllg(kScratchReg, i.InputRegister(0), Operand(32));
+      __ lr(kScratchReg, i.InputRegister(1));
+
+      // Bitwise convert from GPR to FPR
+      __ ldgr(i.OutputDoubleRegister(), kScratchReg);
+      break;
+    case kS390_LoadWordS8:
+      ASSEMBLE_LOAD_INTEGER(LoadB);
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+    case kS390_BitcastFloat32ToInt32:
+      ASSEMBLE_UNARY_OP(R_DInstr(MovFloatToInt), R_MInstr(LoadlW), nullInstr);
+      break;
+    case kS390_BitcastInt32ToFloat32:
+      __ MovIntToFloat(i.OutputDoubleRegister(), i.InputRegister(0));
+      break;
+#if V8_TARGET_ARCH_S390X
+    case kS390_BitcastDoubleToInt64:
+      __ MovDoubleToInt64(i.OutputRegister(), i.InputDoubleRegister(0));
+      break;
+    case kS390_BitcastInt64ToDouble:
+      __ MovInt64ToDouble(i.OutputDoubleRegister(), i.InputRegister(0));
+      break;
+#endif
+    case kS390_LoadWordU8:
+      ASSEMBLE_LOAD_INTEGER(LoadlB);
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+    case kS390_LoadWordU16:
+      ASSEMBLE_LOAD_INTEGER(LoadLogicalHalfWordP);
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+    case kS390_LoadWordS16:
+      ASSEMBLE_LOAD_INTEGER(LoadHalfWordP);
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+    case kS390_LoadWordU32:
+      ASSEMBLE_LOAD_INTEGER(LoadlW);
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+    case kS390_LoadWordS32:
+      ASSEMBLE_LOAD_INTEGER(LoadW);
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+    case kS390_LoadReverse16:
+      ASSEMBLE_LOAD_INTEGER(lrvh);
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+    case kS390_LoadReverse32:
+      ASSEMBLE_LOAD_INTEGER(lrv);
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+    case kS390_LoadReverse64:
+      ASSEMBLE_LOAD_INTEGER(lrvg);
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+    case kS390_LoadReverse16RR:
+      __ lrvr(i.OutputRegister(), i.InputRegister(0));
+      __ rll(i.OutputRegister(), i.OutputRegister(), Operand(16));
+      break;
+    case kS390_LoadReverse32RR:
+      __ lrvr(i.OutputRegister(), i.InputRegister(0));
+      break;
+    case kS390_LoadReverse64RR:
+      __ lrvgr(i.OutputRegister(), i.InputRegister(0));
+      break;
+    case kS390_LoadReverseSimd128RR:
+      __ vlgv(r0, i.InputSimd128Register(0), MemOperand(r0, 0), Condition(3));
+      __ vlgv(r1, i.InputSimd128Register(0), MemOperand(r0, 1), Condition(3));
+      __ lrvgr(r0, r0);
+      __ lrvgr(r1, r1);
+      __ vlvg(i.OutputSimd128Register(), r0, MemOperand(r0, 1), Condition(3));
+      __ vlvg(i.OutputSimd128Register(), r1, MemOperand(r0, 0), Condition(3));
+      break;
+    case kS390_LoadReverseSimd128: {
+      AddressingMode mode = kMode_None;
+      MemOperand operand = i.MemoryOperand(&mode);
+      if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2) &&
+          is_uint12(operand.offset())) {
+        __ vlbr(i.OutputSimd128Register(), operand, Condition(4));
+      } else {
+        __ lrvg(r0, operand);
+        __ lrvg(r1, MemOperand(operand.rx(), operand.rb(),
+                               operand.offset() + kBitsPerByte));
+        __ vlvgp(i.OutputSimd128Register(), r1, r0);
+      }
+      break;
+    }
+    case kS390_LoadWord64:
+      ASSEMBLE_LOAD_INTEGER(lg);
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+    case kS390_LoadAndTestWord32: {
+      ASSEMBLE_LOADANDTEST32(ltr, lt_z);
+      break;
+    }
+    case kS390_LoadAndTestWord64: {
+      ASSEMBLE_LOADANDTEST64(ltgr, ltg);
+      break;
+    }
+    case kS390_LoadFloat32:
+      ASSEMBLE_LOAD_FLOAT(LoadFloat32);
+      break;
+    case kS390_LoadDouble:
+      ASSEMBLE_LOAD_FLOAT(LoadDouble);
+      break;
+    case kS390_LoadSimd128: {
+      AddressingMode mode = kMode_None;
+      MemOperand operand = i.MemoryOperand(&mode);
+      __ vl(i.OutputSimd128Register(), operand, Condition(0));
+      EmitWordLoadPoisoningIfNeeded(this, instr, i);
+      break;
+    }
+    case kS390_StoreWord8:
+      ASSEMBLE_STORE_INTEGER(StoreByte);
+      break;
+    case kS390_StoreWord16:
+      ASSEMBLE_STORE_INTEGER(StoreHalfWord);
+      break;
+    case kS390_StoreWord32:
+      ASSEMBLE_STORE_INTEGER(StoreW);
+      break;
+#if V8_TARGET_ARCH_S390X
+    case kS390_StoreWord64:
+      ASSEMBLE_STORE_INTEGER(StoreP);
+      break;
+#endif
+    case kS390_StoreReverse16:
+      ASSEMBLE_STORE_INTEGER(strvh);
+      break;
+    case kS390_StoreReverse32:
+      ASSEMBLE_STORE_INTEGER(strv);
+      break;
+    case kS390_StoreReverse64:
+      ASSEMBLE_STORE_INTEGER(strvg);
+      break;
+    case kS390_StoreReverseSimd128: {
+      size_t index = 0;
+      AddressingMode mode = kMode_None;
+      MemOperand operand = i.MemoryOperand(&mode, &index);
+      if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2) &&
+          is_uint12(operand.offset())) {
+        __ vstbr(i.InputSimd128Register(index), operand, Condition(4));
+      } else {
+        __ vlgv(r0, i.InputSimd128Register(index), MemOperand(r0, 1),
+                Condition(3));
+        __ vlgv(r1, i.InputSimd128Register(index), MemOperand(r0, 0),
+                Condition(3));
+        __ strvg(r0, operand);
+        __ strvg(r1, MemOperand(operand.rx(), operand.rb(),
+                                operand.offset() + kBitsPerByte));
+      }
+      break;
+    }
+    case kS390_StoreFloat32:
+      ASSEMBLE_STORE_FLOAT32();
+      break;
+    case kS390_StoreDouble:
+      ASSEMBLE_STORE_DOUBLE();
+      break;
+    case kS390_StoreSimd128: {
+      size_t index = 0;
+      AddressingMode mode = kMode_None;
+      MemOperand operand = i.MemoryOperand(&mode, &index);
+      __ vst(i.InputSimd128Register(index), operand, Condition(0));
+      break;
+    }
+    case kS390_Lay:
+      __ lay(i.OutputRegister(), i.MemoryOperand());
+      break;
+//         0x aa bb cc dd
+// index =    3..2..1..0
+#define ATOMIC_EXCHANGE(start, end, shift_amount, offset)              \
+  {                                                                    \
+    Label do_cs;                                                       \
+    __ LoadlW(output, MemOperand(r1, offset));                         \
+    __ bind(&do_cs);                                                   \
+    __ llgfr(r0, output);                                              \
+    __ RotateInsertSelectBits(r0, value, Operand(start), Operand(end), \
+                              Operand(shift_amount), false);           \
+    __ csy(output, r0, MemOperand(r1, offset));                        \
+    __ bne(&do_cs, Label::kNear);                                      \
+    __ srl(output, Operand(shift_amount));                             \
+  }
+#ifdef V8_TARGET_BIG_ENDIAN
+#define ATOMIC_EXCHANGE_BYTE(i)                                  \
+  {                                                              \
+    constexpr int idx = (i);                                     \
+    static_assert(idx <= 3 && idx >= 0, "idx is out of range!"); \
+    constexpr int start = 32 + 8 * idx;                          \
+    constexpr int end = start + 7;                               \
+    constexpr int shift_amount = (3 - idx) * 8;                  \
+    ATOMIC_EXCHANGE(start, end, shift_amount, -idx);             \
+  }
+#define ATOMIC_EXCHANGE_HALFWORD(i)                              \
+  {                                                              \
+    constexpr int idx = (i);                                     \
+    static_assert(idx <= 1 && idx >= 0, "idx is out of range!"); \
+    constexpr int start = 32 + 16 * idx;                         \
+    constexpr int end = start + 15;                              \
+    constexpr int shift_amount = (1 - idx) * 16;                 \
+    ATOMIC_EXCHANGE(start, end, shift_amount, -idx * 2);         \
+  }
+#else
+#define ATOMIC_EXCHANGE_BYTE(i)                                  \
+  {                                                              \
+    constexpr int idx = (i);                                     \
+    static_assert(idx <= 3 && idx >= 0, "idx is out of range!"); \
+    constexpr int start = 32 + 8 * (3 - idx);                    \
+    constexpr int end = start + 7;                               \
+    constexpr int shift_amount = idx * 8;                        \
+    ATOMIC_EXCHANGE(start, end, shift_amount, -idx);             \
+  }
+#define ATOMIC_EXCHANGE_HALFWORD(i)                              \
+  {                                                              \
+    constexpr int idx = (i);                                     \
+    static_assert(idx <= 1 && idx >= 0, "idx is out of range!"); \
+    constexpr int start = 32 + 16 * (1 - idx);                   \
+    constexpr int end = start + 15;                              \
+    constexpr int shift_amount = idx * 16;                       \
+    ATOMIC_EXCHANGE(start, end, shift_amount, -idx * 2);         \
+  }
+#endif
+    case kS390_Word64AtomicExchangeUint8:
+    case kWord32AtomicExchangeInt8:
+    case kWord32AtomicExchangeUint8: {
+      Register base = i.InputRegister(0);
+      Register index = i.InputRegister(1);
+      Register value = i.InputRegister(2);
+      Register output = i.OutputRegister();
+      Label three, two, one, done;
+      __ la(r1, MemOperand(base, index));
+      __ tmll(r1, Operand(3));
+      __ b(Condition(1), &three);
+      __ b(Condition(2), &two);
+      __ b(Condition(4), &one);
+
+      // end with 0b00
+      ATOMIC_EXCHANGE_BYTE(0);
+      __ b(&done);
+
+      // ending with 0b01
+      __ bind(&one);
+      ATOMIC_EXCHANGE_BYTE(1);
+      __ b(&done);
+
+      // ending with 0b10
+      __ bind(&two);
+      ATOMIC_EXCHANGE_BYTE(2);
+      __ b(&done);
+
+      // ending with 0b11
+      __ bind(&three);
+      ATOMIC_EXCHANGE_BYTE(3);
+
+      __ bind(&done);
+      if (opcode == kWord32AtomicExchangeInt8) {
+        __ lgbr(output, output);
+      } else {
+        __ llgcr(output, output);
+      }
+      break;
+    }
+    case kS390_Word64AtomicExchangeUint16:
+    case kWord32AtomicExchangeInt16:
+    case kWord32AtomicExchangeUint16: {
+      Register base = i.InputRegister(0);
+      Register index = i.InputRegister(1);
+      Register value = i.InputRegister(2);
+      Register output = i.OutputRegister();
+      Label two, done;
+      __ la(r1, MemOperand(base, index));
+      __ tmll(r1, Operand(3));
+      __ b(Condition(2), &two);
+
+      // end with 0b00
+      ATOMIC_EXCHANGE_HALFWORD(0);
+      __ b(&done);
+
+      // ending with 0b10
+      __ bind(&two);
+      ATOMIC_EXCHANGE_HALFWORD(1);
+
+      __ bind(&done);
+      if (opcode == kWord32AtomicExchangeInt16) {
+        __ lghr(output, output);
+      } else {
+        __ llghr(output, output);
+      }
+      break;
+    }
+    case kS390_Word64AtomicExchangeUint32:
+    case kWord32AtomicExchangeWord32: {
+      Register base = i.InputRegister(0);
+      Register index = i.InputRegister(1);
+      Register value = i.InputRegister(2);
+      Register output = i.OutputRegister();
+      Label do_cs;
+      __ lay(r1, MemOperand(base, index));
+      __ LoadlW(output, MemOperand(r1));
+      __ bind(&do_cs);
+      __ cs(output, value, MemOperand(r1));
+      __ bne(&do_cs, Label::kNear);
+      break;
+    }
+    case kWord32AtomicCompareExchangeInt8:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_BYTE(LoadB);
+      break;
+    case kS390_Word64AtomicCompareExchangeUint8:
+    case kWord32AtomicCompareExchangeUint8:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_BYTE(LoadlB);
+      break;
+    case kWord32AtomicCompareExchangeInt16:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_HALFWORD(LoadHalfWordP);
+      break;
+    case kS390_Word64AtomicCompareExchangeUint16:
+    case kWord32AtomicCompareExchangeUint16:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_HALFWORD(LoadLogicalHalfWordP);
+      break;
+    case kS390_Word64AtomicCompareExchangeUint32:
+    case kWord32AtomicCompareExchangeWord32:
+      ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_WORD();
+      break;
+#define ATOMIC_BINOP_CASE(op, inst)                                          \
+  case kWord32Atomic##op##Int8:                                              \
+    ASSEMBLE_ATOMIC_BINOP_BYTE(inst, [&]() {                                 \
+      intptr_t shift_right = static_cast<intptr_t>(shift_amount);            \
+      __ srlk(result, prev, Operand(shift_right));                           \
+      __ LoadB(result, result);                                              \
+    });                                                                      \
+    break;                                                                   \
+  case kS390_Word64Atomic##op##Uint8:                                        \
+  case kWord32Atomic##op##Uint8:                                             \
+    ASSEMBLE_ATOMIC_BINOP_BYTE(inst, [&]() {                                 \
+      int rotate_left = shift_amount == 0 ? 0 : 64 - shift_amount;           \
+      __ RotateInsertSelectBits(result, prev, Operand(56), Operand(63),      \
+                                Operand(static_cast<intptr_t>(rotate_left)), \
+                                true);                                       \
+    });                                                                      \
+    break;                                                                   \
+  case kWord32Atomic##op##Int16:                                             \
+    ASSEMBLE_ATOMIC_BINOP_HALFWORD(inst, [&]() {                             \
+      intptr_t shift_right = static_cast<intptr_t>(shift_amount);            \
+      __ srlk(result, prev, Operand(shift_right));                           \
+      __ LoadHalfWordP(result, result);                                      \
+    });                                                                      \
+    break;                                                                   \
+  case kS390_Word64Atomic##op##Uint16:                                       \
+  case kWord32Atomic##op##Uint16:                                            \
+    ASSEMBLE_ATOMIC_BINOP_HALFWORD(inst, [&]() {                             \
+      int rotate_left = shift_amount == 0 ? 0 : 64 - shift_amount;           \
+      __ RotateInsertSelectBits(result, prev, Operand(48), Operand(63),      \
+                                Operand(static_cast<intptr_t>(rotate_left)), \
+                                true);                                       \
+    });                                                                      \
+    break;
+      ATOMIC_BINOP_CASE(Add, Add32)
+      ATOMIC_BINOP_CASE(Sub, Sub32)
+      ATOMIC_BINOP_CASE(And, And)
+      ATOMIC_BINOP_CASE(Or, Or)
+      ATOMIC_BINOP_CASE(Xor, Xor)
+#undef ATOMIC_BINOP_CASE
+    case kS390_Word64AtomicAddUint32:
+    case kWord32AtomicAddWord32:
+      ASSEMBLE_ATOMIC_BINOP_WORD(laa);
+      break;
+    case kS390_Word64AtomicSubUint32:
+    case kWord32AtomicSubWord32:
+      ASSEMBLE_ATOMIC_BINOP_WORD(LoadAndSub32);
+      break;
+    case kS390_Word64AtomicAndUint32:
+    case kWord32AtomicAndWord32:
+      ASSEMBLE_ATOMIC_BINOP_WORD(lan);
+      break;
+    case kS390_Word64AtomicOrUint32:
+    case kWord32AtomicOrWord32:
+      ASSEMBLE_ATOMIC_BINOP_WORD(lao);
+      break;
+    case kS390_Word64AtomicXorUint32:
+    case kWord32AtomicXorWord32:
+      ASSEMBLE_ATOMIC_BINOP_WORD(lax);
+      break;
+    case kS390_Word64AtomicAddUint64:
+      ASSEMBLE_ATOMIC_BINOP_WORD64(laag);
+      break;
+    case kS390_Word64AtomicSubUint64:
+      ASSEMBLE_ATOMIC_BINOP_WORD64(LoadAndSub64);
+      break;
+    case kS390_Word64AtomicAndUint64:
+      ASSEMBLE_ATOMIC_BINOP_WORD64(lang);
+      break;
+    case kS390_Word64AtomicOrUint64:
+      ASSEMBLE_ATOMIC_BINOP_WORD64(laog);
+      break;
+    case kS390_Word64AtomicXorUint64:
+      ASSEMBLE_ATOMIC_BINOP_WORD64(laxg);
+      break;
+    case kS390_Word64AtomicExchangeUint64: {
+      Register base = i.InputRegister(0);
+      Register index = i.InputRegister(1);
+      Register value = i.InputRegister(2);
+      Register output = i.OutputRegister();
+      Label do_cs;
+      __ la(r1, MemOperand(base, index));
+      __ lg(output, MemOperand(r1));
+      __ bind(&do_cs);
+      __ csg(output, value, MemOperand(r1));
+      __ bne(&do_cs, Label::kNear);
+      break;
+    }
+    case kS390_Word64AtomicCompareExchangeUint64:
+      ASSEMBLE_ATOMIC64_COMP_EXCHANGE_WORD64();
+      break;
+      // vector replicate element
+    case kS390_F64x2Splat: {
+      __ vrep(i.OutputSimd128Register(), i.InputDoubleRegister(0), Operand(0),
+              Condition(3));
+      break;
+    }
+    case kS390_F32x4Splat: {
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vrep(i.OutputSimd128Register(), i.InputDoubleRegister(0), Operand(0),
+              Condition(2));
+#else
+      __ vrep(i.OutputSimd128Register(), i.InputDoubleRegister(0), Operand(1),
+              Condition(2));
+#endif
+      break;
+    }
+    case kS390_I64x2Splat: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vlvg(dst, i.InputRegister(0), MemOperand(r0, 0), Condition(3));
+      __ vrep(dst, dst, Operand(0), Condition(3));
+      break;
+    }
+    case kS390_I32x4Splat: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vlvg(dst, i.InputRegister(0), MemOperand(r0, 0), Condition(2));
+      __ vrep(dst, dst, Operand(0), Condition(2));
+      break;
+    }
+    case kS390_I16x8Splat: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vlvg(dst, i.InputRegister(0), MemOperand(r0, 0), Condition(1));
+      __ vrep(dst, dst, Operand(0), Condition(1));
+      break;
+    }
+    case kS390_I8x16Splat: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vlvg(dst, i.InputRegister(0), MemOperand(r0, 0), Condition(0));
+      __ vrep(dst, dst, Operand(0), Condition(0));
+      break;
+    }
+    // vector extract element
+    case kS390_F64x2ExtractLane: {
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vrep(i.OutputDoubleRegister(), i.InputSimd128Register(0),
+              Operand(1 - i.InputInt8(1)), Condition(3));
+#else
+      __ vrep(i.OutputDoubleRegister(), i.InputSimd128Register(0),
+              Operand(i.InputInt8(1)), Condition(3));
+#endif
+      break;
+    }
+    case kS390_F32x4ExtractLane: {
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vrep(i.OutputDoubleRegister(), i.InputSimd128Register(0),
+              Operand(3 - i.InputInt8(1)), Condition(2));
+#else
+      __ vrep(i.OutputDoubleRegister(), i.InputSimd128Register(0),
+              Operand(i.InputInt8(1)), Condition(2));
+#endif
+      break;
+    }
+    case kS390_I64x2ExtractLane: {
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
+              MemOperand(r0, 1 - i.InputInt8(1)), Condition(3));
+#else
+      __ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
+              MemOperand(r0, i.InputInt8(1)), Condition(3));
+#endif
+      break;
+    }
+    case kS390_I32x4ExtractLane: {
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
+              MemOperand(r0, 3 - i.InputInt8(1)), Condition(2));
+#else
+      __ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
+              MemOperand(r0, i.InputInt8(1)), Condition(2));
+#endif
+      break;
+    }
+    case kS390_I16x8ExtractLaneU: {
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
+              MemOperand(r0, 7 - i.InputInt8(1)), Condition(1));
+#else
+      __ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
+              MemOperand(r0, i.InputInt8(1)), Condition(1));
+#endif
+      break;
+    }
+    case kS390_I16x8ExtractLaneS: {
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vlgv(kScratchReg, i.InputSimd128Register(0),
+              MemOperand(r0, 7 - i.InputInt8(1)), Condition(1));
+#else
+      __ vlgv(kScratchReg, i.InputSimd128Register(0),
+              MemOperand(r0, i.InputInt8(1)), Condition(1));
+#endif
+      __ lghr(i.OutputRegister(), kScratchReg);
+      break;
+    }
+    case kS390_I8x16ExtractLaneU: {
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
+              MemOperand(r0, 15 - i.InputInt8(1)), Condition(0));
+#else
+      __ vlgv(i.OutputRegister(), i.InputSimd128Register(0),
+              MemOperand(r0, i.InputInt8(1)), Condition(0));
+#endif
+      break;
+    }
+    case kS390_I8x16ExtractLaneS: {
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vlgv(kScratchReg, i.InputSimd128Register(0),
+              MemOperand(r0, 15 - i.InputInt8(1)), Condition(0));
+#else
+      __ vlgv(kScratchReg, i.InputSimd128Register(0),
+              MemOperand(r0, i.InputInt8(1)), Condition(0));
+#endif
+      __ lgbr(i.OutputRegister(), kScratchReg);
+      break;
+    }
+    // vector replace element
+    case kS390_F64x2ReplaceLane: {
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vlr(kScratchDoubleReg, src, Condition(0), Condition(0), Condition(0));
+      __ vlgv(kScratchReg, i.InputDoubleRegister(2), MemOperand(r0, 0),
+              Condition(3));
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vlvg(kScratchDoubleReg, kScratchReg,
+              MemOperand(r0, 1 - i.InputInt8(1)), Condition(3));
+#else
+      __ vlvg(kScratchDoubleReg, kScratchReg, MemOperand(r0, i.InputInt8(1)),
+              Condition(3));
+#endif
+      __ vlr(dst, kScratchDoubleReg, Condition(0), Condition(0), Condition(0));
+      break;
+    }
+    case kS390_F32x4ReplaceLane: {
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vlr(kScratchDoubleReg, src, Condition(0), Condition(0), Condition(0));
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vlgv(kScratchReg, i.InputDoubleRegister(2), MemOperand(r0, 0),
+              Condition(2));
+      __ vlvg(kScratchDoubleReg, kScratchReg,
+              MemOperand(r0, 3 - i.InputInt8(1)), Condition(2));
+#else
+      __ vlgv(kScratchReg, i.InputDoubleRegister(2), MemOperand(r0, 1),
+              Condition(2));
+      __ vlvg(kScratchDoubleReg, kScratchReg, MemOperand(r0, i.InputInt8(1)),
+              Condition(2));
+#endif
+      __ vlr(dst, kScratchDoubleReg, Condition(0), Condition(0), Condition(0));
+      break;
+    }
+    case kS390_I64x2ReplaceLane: {
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      if (src != dst) {
+        __ vlr(dst, src, Condition(0), Condition(0), Condition(0));
+      }
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
+              MemOperand(r0, 1 - i.InputInt8(1)), Condition(3));
+#else
+      __ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
+              MemOperand(r0, i.InputInt8(1)), Condition(3));
+#endif
+      break;
+    }
+    case kS390_I32x4ReplaceLane: {
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      if (src != dst) {
+        __ vlr(dst, src, Condition(0), Condition(0), Condition(0));
+      }
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
+              MemOperand(r0, 3 - i.InputInt8(1)), Condition(2));
+#else
+      __ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
+              MemOperand(r0, i.InputInt8(1)), Condition(2));
+#endif
+      break;
+    }
+    case kS390_I16x8ReplaceLane: {
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      if (src != dst) {
+        __ vlr(dst, src, Condition(0), Condition(0), Condition(0));
+      }
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
+              MemOperand(r0, 7 - i.InputInt8(1)), Condition(1));
+#else
+      __ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
+              MemOperand(r0, i.InputInt8(1)), Condition(1));
+#endif
+      break;
+    }
+    case kS390_I8x16ReplaceLane: {
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      if (src != dst) {
+        __ vlr(dst, src, Condition(0), Condition(0), Condition(0));
+      }
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
+              MemOperand(r0, 15 - i.InputInt8(1)), Condition(0));
+#else
+      __ vlvg(i.OutputSimd128Register(), i.InputRegister(2),
+              MemOperand(r0, i.InputInt8(1)), Condition(0));
+#endif
+      break;
+    }
+    // vector binops
+    case kS390_F64x2Add: {
+      __ vfa(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(3));
+      break;
+    }
+    case kS390_F64x2Sub: {
+      __ vfs(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(3));
+      break;
+    }
+    case kS390_F64x2Mul: {
+      __ vfm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(3));
+      break;
+    }
+    case kS390_F64x2Div: {
+      __ vfd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(3));
+      break;
+    }
+    case kS390_F64x2Min: {
+      __ vfmin(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1), Condition(1), Condition(0),
+               Condition(3));
+      break;
+    }
+    case kS390_F64x2Max: {
+      __ vfmax(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1), Condition(1), Condition(0),
+               Condition(3));
+      break;
+    }
+    case kS390_F64x2Qfma: {
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register src2 = i.InputSimd128Register(2);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vfma(dst, src1, src2, src0, Condition(3), Condition(0));
+      break;
+    }
+    case kS390_F64x2Qfms: {
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register src2 = i.InputSimd128Register(2);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vfnms(dst, src1, src2, src0, Condition(3), Condition(0));
+      break;
+    }
+    case kS390_F32x4Add: {
+      __ vfa(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(2));
+      break;
+    }
+#define FLOAT_ADD_HORIZ(src0, src1, scratch0, scratch1, add0, add1)         \
+  __ vpk(dst, src0, src1, Condition(0), Condition(0), Condition(3));        \
+  __ vesrl(scratch0, src0, MemOperand(r0, shift_bits), Condition(3));       \
+  __ vesrl(scratch1, src1, MemOperand(r0, shift_bits), Condition(3));       \
+  __ vpk(kScratchDoubleReg, scratch0, scratch1, Condition(0), Condition(0), \
+         Condition(3));                                                     \
+  __ vfa(dst, add0, add1, Condition(0), Condition(0), Condition(2));
+    case kS390_F32x4AddHoriz: {
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register dst = i.OutputSimd128Register();
+      DoubleRegister tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
+      DoubleRegister tempFPReg2 = i.ToSimd128Register(instr->TempAt(1));
+      constexpr int shift_bits = 32;
+#ifdef V8_TARGET_BIG_ENDIAN
+      FLOAT_ADD_HORIZ(src1, src0, tempFPReg2, tempFPReg1, kScratchDoubleReg,
+                      dst)
+#else
+      FLOAT_ADD_HORIZ(src0, src1, tempFPReg1, tempFPReg2, dst,
+                      kScratchDoubleReg)
+#endif
+#undef FLOAT_ADD_HORIZ
+      break;
+    }
+    case kS390_F32x4Sub: {
+      __ vfs(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(2));
+      break;
+    }
+    case kS390_F32x4Mul: {
+      __ vfm(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(2));
+      break;
+    }
+    case kS390_F32x4Div: {
+      __ vfd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(2));
+      break;
+    }
+    case kS390_F32x4Min: {
+      __ vfmin(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1), Condition(1), Condition(0),
+               Condition(2));
+      break;
+    }
+    case kS390_F32x4Max: {
+      __ vfmax(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1), Condition(1), Condition(0),
+               Condition(2));
+      break;
+    }
+    case kS390_F32x4Qfma: {
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register src2 = i.InputSimd128Register(2);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vfma(dst, src1, src2, src0, Condition(2), Condition(0));
+      break;
+    }
+    case kS390_F32x4Qfms: {
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register src2 = i.InputSimd128Register(2);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vfnms(dst, src1, src2, src0, Condition(2), Condition(0));
+      break;
+    }
+    case kS390_I64x2Add: {
+      __ va(i.OutputSimd128Register(), i.InputSimd128Register(0),
+            i.InputSimd128Register(1), Condition(0), Condition(0),
+            Condition(3));
+      break;
+    }
+    case kS390_I64x2Sub: {
+      __ vs(i.OutputSimd128Register(), i.InputSimd128Register(0),
+            i.InputSimd128Register(1), Condition(0), Condition(0),
+            Condition(3));
+      break;
+    }
+    case kS390_I64x2Mul: {
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Register scratch_0 = r0;
+      Register scratch_1 = r1;
+      for (int i = 0; i < 2; i++) {
+        __ vlgv(scratch_0, src0, MemOperand(r0, i), Condition(3));
+        __ vlgv(scratch_1, src1, MemOperand(r0, i), Condition(3));
+        __ Mul64(scratch_0, scratch_1);
+        scratch_0 = r1;
+        scratch_1 = ip;
+      }
+      __ vlvgp(i.OutputSimd128Register(), r0, r1);
+      break;
+    }
+    case kS390_I32x4Add: {
+      __ va(i.OutputSimd128Register(), i.InputSimd128Register(0),
+            i.InputSimd128Register(1), Condition(0), Condition(0),
+            Condition(2));
+      break;
+    }
+    case kS390_I32x4AddHoriz: {
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vs(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
+            Condition(0), Condition(0), Condition(2));
+      __ vsumg(dst, src0, kScratchDoubleReg, Condition(0), Condition(0),
+               Condition(2));
+      __ vsumg(kScratchDoubleReg, src1, kScratchDoubleReg, Condition(0),
+               Condition(0), Condition(2));
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vpk(dst, kScratchDoubleReg, dst, Condition(0), Condition(0),
+             Condition(3));
+#else
+      __ vpk(dst, dst, kScratchDoubleReg, Condition(0), Condition(0),
+             Condition(3));
+#endif
+      break;
+    }
+    case kS390_I32x4Sub: {
+      __ vs(i.OutputSimd128Register(), i.InputSimd128Register(0),
+            i.InputSimd128Register(1), Condition(0), Condition(0),
+            Condition(2));
+      break;
+    }
+    case kS390_I32x4Mul: {
+      __ vml(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(2));
+      break;
+    }
+    case kS390_I16x8Add: {
+      __ va(i.OutputSimd128Register(), i.InputSimd128Register(0),
+            i.InputSimd128Register(1), Condition(0), Condition(0),
+            Condition(1));
+      break;
+    }
+    case kS390_I16x8AddHoriz: {
+      Simd128Register src0 = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vs(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
+            Condition(0), Condition(0), Condition(1));
+      __ vsum(dst, src0, kScratchDoubleReg, Condition(0), Condition(0),
+              Condition(1));
+      __ vsum(kScratchDoubleReg, src1, kScratchDoubleReg, Condition(0),
+              Condition(0), Condition(1));
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vpk(dst, kScratchDoubleReg, dst, Condition(0), Condition(0),
+             Condition(2));
+#else
+      __ vpk(dst, dst, kScratchDoubleReg, Condition(0), Condition(0),
+             Condition(2));
+#endif
+      break;
+    }
+    case kS390_I16x8Sub: {
+      __ vs(i.OutputSimd128Register(), i.InputSimd128Register(0),
+            i.InputSimd128Register(1), Condition(0), Condition(0),
+            Condition(1));
+      break;
+    }
+    case kS390_I16x8Mul: {
+      __ vml(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(1));
+      break;
+    }
+    case kS390_I8x16Add: {
+      __ va(i.OutputSimd128Register(), i.InputSimd128Register(0),
+            i.InputSimd128Register(1), Condition(0), Condition(0),
+            Condition(0));
+      break;
+    }
+    case kS390_I8x16Sub: {
+      __ vs(i.OutputSimd128Register(), i.InputSimd128Register(0),
+            i.InputSimd128Register(1), Condition(0), Condition(0),
+            Condition(0));
+      break;
+    }
+    case kS390_I8x16Mul: {
+      __ vml(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(0));
+      break;
+    }
+    case kS390_I16x8RoundingAverageU: {
+      __ vavgl(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1), Condition(0), Condition(0),
+               Condition(1));
+      break;
+    }
+    case kS390_I8x16RoundingAverageU: {
+      __ vavgl(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1), Condition(0), Condition(0),
+               Condition(0));
+      break;
+    }
+    // vector comparisons
+    case kS390_F64x2Eq: {
+      __ vfce(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(0),
+              Condition(3));
+      break;
+    }
+    case kS390_F64x2Ne: {
+      __ vfce(kScratchDoubleReg, i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(0),
+              Condition(3));
+      __ vno(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg,
+             Condition(0), Condition(0), Condition(3));
+      break;
+    }
+    case kS390_F64x2Le: {
+      __ vfche(i.OutputSimd128Register(), i.InputSimd128Register(1),
+               i.InputSimd128Register(0), Condition(0), Condition(0),
+               Condition(3));
+      break;
+    }
+    case kS390_F64x2Lt: {
+      __ vfch(i.OutputSimd128Register(), i.InputSimd128Register(1),
+              i.InputSimd128Register(0), Condition(0), Condition(0),
+              Condition(3));
+      break;
+    }
+    case kS390_I32x4MinS: {
+      __ vmn(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(2));
+      break;
+    }
+    case kS390_I32x4MinU: {
+      __ vmnl(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(0),
+              Condition(2));
+      break;
+    }
+    case kS390_I16x8MinS: {
+      __ vmn(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(1));
+      break;
+    }
+    case kS390_I16x8MinU: {
+      __ vmnl(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(0),
+              Condition(1));
+      break;
+    }
+    case kS390_I8x16MinS: {
+      __ vmn(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(0));
+      break;
+    }
+    case kS390_I8x16MinU: {
+      __ vmnl(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(0),
+              Condition(0));
+      break;
+    }
+    case kS390_I32x4MaxS: {
+      __ vmx(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(2));
+      break;
+    }
+    case kS390_I32x4MaxU: {
+      __ vmxl(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(0),
+              Condition(2));
+      break;
+    }
+    case kS390_I16x8MaxS: {
+      __ vmx(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(1));
+      break;
+    }
+    case kS390_I16x8MaxU: {
+      __ vmxl(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(0),
+              Condition(1));
+      break;
+    }
+    case kS390_I8x16MaxS: {
+      __ vmx(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(0));
+      break;
+    }
+    case kS390_I8x16MaxU: {
+      __ vmxl(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(0),
+              Condition(0));
+      break;
+    }
+    case kS390_F32x4Eq: {
+      __ vfce(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(0),
+              Condition(2));
+      break;
+    }
+    case kS390_I64x2Eq: {
+      __ vceq(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(3));
+      break;
+    }
+    case kS390_I32x4Eq: {
+      __ vceq(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(2));
+      break;
+    }
+    case kS390_I16x8Eq: {
+      __ vceq(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(1));
+      break;
+    }
+    case kS390_I8x16Eq: {
+      __ vceq(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(0));
+      break;
+    }
+    case kS390_F32x4Ne: {
+      __ vfce(kScratchDoubleReg, i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(0),
+              Condition(2));
+      __ vno(i.OutputSimd128Register(), kScratchDoubleReg, kScratchDoubleReg,
+             Condition(0), Condition(0), Condition(2));
+      break;
+    }
+    case kS390_I32x4Ne: {
+      __ vceq(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(2));
+      __ vno(i.OutputSimd128Register(), i.OutputSimd128Register(),
+             i.OutputSimd128Register(), Condition(0), Condition(0),
+             Condition(2));
+      break;
+    }
+    case kS390_I16x8Ne: {
+      __ vceq(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(1));
+      __ vno(i.OutputSimd128Register(), i.OutputSimd128Register(),
+             i.OutputSimd128Register(), Condition(0), Condition(0),
+             Condition(1));
+      break;
+    }
+    case kS390_I8x16Ne: {
+      __ vceq(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(0));
+      __ vno(i.OutputSimd128Register(), i.OutputSimd128Register(),
+             i.OutputSimd128Register(), Condition(0), Condition(0),
+             Condition(0));
+      break;
+    }
+    case kS390_F32x4Lt: {
+      __ vfch(i.OutputSimd128Register(), i.InputSimd128Register(1),
+              i.InputSimd128Register(0), Condition(0), Condition(0),
+              Condition(2));
+      break;
+    }
+    case kS390_F32x4Le: {
+      __ vfche(i.OutputSimd128Register(), i.InputSimd128Register(1),
+               i.InputSimd128Register(0), Condition(0), Condition(0),
+               Condition(2));
+      break;
+    }
+    case kS390_I32x4GtS: {
+      __ vch(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(2));
+      break;
+    }
+    case kS390_I32x4GeS: {
+      __ vceq(kScratchDoubleReg, i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(2));
+      __ vch(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(2));
+      __ vo(i.OutputSimd128Register(), i.OutputSimd128Register(),
+            kScratchDoubleReg, Condition(0), Condition(0), Condition(2));
+      break;
+    }
+    case kS390_I32x4GtU: {
+      __ vchl(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(2));
+      break;
+    }
+    case kS390_I32x4GeU: {
+      __ vceq(kScratchDoubleReg, i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(2));
+      __ vchl(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(2));
+      __ vo(i.OutputSimd128Register(), i.OutputSimd128Register(),
+            kScratchDoubleReg, Condition(0), Condition(0), Condition(2));
+      break;
+    }
+    case kS390_I16x8GtS: {
+      __ vch(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(1));
+      break;
+    }
+    case kS390_I16x8GeS: {
+      __ vceq(kScratchDoubleReg, i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(1));
+      __ vch(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(1));
+      __ vo(i.OutputSimd128Register(), i.OutputSimd128Register(),
+            kScratchDoubleReg, Condition(0), Condition(0), Condition(1));
+      break;
+    }
+    case kS390_I16x8GtU: {
+      __ vchl(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(1));
+      break;
+    }
+    case kS390_I16x8GeU: {
+      __ vceq(kScratchDoubleReg, i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(1));
+      __ vchl(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(1));
+      __ vo(i.OutputSimd128Register(), i.OutputSimd128Register(),
+            kScratchDoubleReg, Condition(0), Condition(0), Condition(1));
+      break;
+    }
+    case kS390_I8x16GtS: {
+      __ vch(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0));
+      break;
+    }
+    case kS390_I8x16GeS: {
+      __ vceq(kScratchDoubleReg, i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(0));
+      __ vch(i.OutputSimd128Register(), i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0));
+      __ vo(i.OutputSimd128Register(), i.OutputSimd128Register(),
+            kScratchDoubleReg, Condition(0), Condition(0), Condition(0));
+      break;
+    }
+    case kS390_I8x16GtU: {
+      __ vchl(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(0));
+      break;
+    }
+    case kS390_I8x16GeU: {
+      __ vceq(kScratchDoubleReg, i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(0));
+      __ vchl(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(0));
+      __ vo(i.OutputSimd128Register(), i.OutputSimd128Register(),
+            kScratchDoubleReg, Condition(0), Condition(0), Condition(0));
+      break;
+    }
+    // vector shifts
+#define VECTOR_SHIFT(op, mode)                                             \
+  {                                                                        \
+    __ vlvg(kScratchDoubleReg, i.InputRegister(1), MemOperand(r0, 0),      \
+            Condition(mode));                                              \
+    __ vrep(kScratchDoubleReg, kScratchDoubleReg, Operand(0),              \
+            Condition(mode));                                              \
+    __ op(i.OutputSimd128Register(), i.InputSimd128Register(0),            \
+          kScratchDoubleReg, Condition(0), Condition(0), Condition(mode)); \
+  }
+    case kS390_I64x2Shl: {
+      VECTOR_SHIFT(veslv, 3);
+      break;
+    }
+    case kS390_I64x2ShrS: {
+      VECTOR_SHIFT(vesrav, 3);
+      break;
+    }
+    case kS390_I64x2ShrU: {
+      VECTOR_SHIFT(vesrlv, 3);
+      break;
+    }
+    case kS390_I32x4Shl: {
+      VECTOR_SHIFT(veslv, 2);
+      break;
+    }
+    case kS390_I32x4ShrS: {
+      VECTOR_SHIFT(vesrav, 2);
+      break;
+    }
+    case kS390_I32x4ShrU: {
+      VECTOR_SHIFT(vesrlv, 2);
+      break;
+    }
+    case kS390_I16x8Shl: {
+      VECTOR_SHIFT(veslv, 1);
+      break;
+    }
+    case kS390_I16x8ShrS: {
+      VECTOR_SHIFT(vesrav, 1);
+      break;
+    }
+    case kS390_I16x8ShrU: {
+      VECTOR_SHIFT(vesrlv, 1);
+      break;
+    }
+    case kS390_I8x16Shl: {
+      VECTOR_SHIFT(veslv, 0);
+      break;
+    }
+    case kS390_I8x16ShrS: {
+      VECTOR_SHIFT(vesrav, 0);
+      break;
+    }
+    case kS390_I8x16ShrU: {
+      VECTOR_SHIFT(vesrlv, 0);
+      break;
+    }
+    // vector unary ops
+    case kS390_F64x2Abs: {
+      __ vfpso(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               Condition(2), Condition(0), Condition(3));
+      break;
+    }
+    case kS390_F64x2Neg: {
+      __ vfpso(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               Condition(0), Condition(0), Condition(3));
+      break;
+    }
+    case kS390_F64x2Sqrt: {
+      __ vfsq(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              Condition(0), Condition(0), Condition(3));
+      break;
+    }
+    case kS390_F32x4Abs: {
+      __ vfpso(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               Condition(2), Condition(0), Condition(2));
+      break;
+    }
+    case kS390_F32x4Neg: {
+      __ vfpso(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               Condition(0), Condition(0), Condition(2));
+      break;
+    }
+    case kS390_I64x2Neg: {
+      __ vlc(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(0),
+             Condition(0), Condition(3));
+      break;
+    }
+    case kS390_I32x4Neg: {
+      __ vlc(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(0),
+             Condition(0), Condition(2));
+      break;
+    }
+    case kS390_I16x8Neg: {
+      __ vlc(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(0),
+             Condition(0), Condition(1));
+      break;
+    }
+    case kS390_I8x16Neg: {
+      __ vlc(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(0),
+             Condition(0), Condition(0));
+      break;
+    }
+    case kS390_F32x4RecipApprox: {
+      __ lgfi(kScratchReg, Operand(1));
+      __ ConvertIntToFloat(kScratchDoubleReg, kScratchReg);
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vrep(kScratchDoubleReg, kScratchDoubleReg, Operand(0), Condition(2));
+#else
+      __ vrep(kScratchDoubleReg, kScratchDoubleReg, Operand(1), Condition(2));
+#endif
+      __ vfd(i.OutputSimd128Register(), kScratchDoubleReg,
+             i.InputSimd128Register(0), Condition(0), Condition(0),
+             Condition(2));
+      break;
+    }
+    case kS390_F32x4RecipSqrtApprox: {
+      DoubleRegister tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
+      __ vfsq(tempFPReg1, i.InputSimd128Register(0), Condition(0), Condition(0),
+              Condition(2));
+      __ lgfi(kScratchReg, Operand(1));
+      __ ConvertIntToFloat(kScratchDoubleReg, kScratchReg);
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vrep(kScratchDoubleReg, kScratchDoubleReg, Operand(0), Condition(2));
+#else
+      __ vrep(kScratchDoubleReg, kScratchDoubleReg, Operand(1), Condition(2));
+#endif
+      __ vfd(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
+             Condition(0), Condition(0), Condition(2));
+      break;
+    }
+    case kS390_F32x4Sqrt: {
+      __ vfsq(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              Condition(0), Condition(0), Condition(2));
+      break;
+    }
+    case kS390_S128Not: {
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vno(dst, src, src, Condition(0), Condition(0), Condition(0));
+      break;
+    }
+    case kS390_I8x16Abs: {
+      __ vlp(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(0),
+             Condition(0), Condition(0));
+      break;
+    }
+    case kS390_I16x8Abs: {
+      __ vlp(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(0),
+             Condition(0), Condition(1));
+      break;
+    }
+    case kS390_I32x4Abs: {
+      __ vlp(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(0),
+             Condition(0), Condition(2));
+      break;
+    }
+    // vector boolean unops
+    case kS390_V32x4AnyTrue:
+    case kS390_V16x8AnyTrue:
+    case kS390_V8x16AnyTrue: {
+      Simd128Register src = i.InputSimd128Register(0);
+      Register dst = i.OutputRegister();
+      Register temp = i.TempRegister(0);
+      __ lgfi(dst, Operand(1));
+      __ xgr(temp, temp);
+      __ vtm(src, src, Condition(0), Condition(0), Condition(0));
+      __ locgr(Condition(8), dst, temp);
+      break;
+    }
+#define SIMD_ALL_TRUE(mode)                                                    \
+  Simd128Register src = i.InputSimd128Register(0);                             \
+  Register dst = i.OutputRegister();                                           \
+  Register temp = i.TempRegister(0);                                           \
+  __ lgfi(temp, Operand(1));                                                   \
+  __ xgr(dst, dst);                                                            \
+  __ vx(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg, Condition(0), \
+        Condition(0), Condition(2));                                           \
+  __ vceq(kScratchDoubleReg, src, kScratchDoubleReg, Condition(0),             \
+          Condition(mode));                                                    \
+  __ vtm(kScratchDoubleReg, kScratchDoubleReg, Condition(0), Condition(0),     \
+         Condition(0));                                                        \
+  __ locgr(Condition(8), dst, temp);
+    case kS390_V32x4AllTrue: {
+      SIMD_ALL_TRUE(2)
+      break;
+    }
+    case kS390_V16x8AllTrue: {
+      SIMD_ALL_TRUE(1)
+      break;
+    }
+    case kS390_V8x16AllTrue: {
+      SIMD_ALL_TRUE(0)
+      break;
+    }
+#undef SIMD_ALL_TRUE
+    // vector bitwise ops
+    case kS390_S128And: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(1);
+      __ vn(dst, i.InputSimd128Register(0), src, Condition(0), Condition(0),
+            Condition(0));
+      break;
+    }
+    case kS390_S128Or: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(1);
+      __ vo(dst, i.InputSimd128Register(0), src, Condition(0), Condition(0),
+            Condition(0));
+      break;
+    }
+    case kS390_S128Xor: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(1);
+      __ vx(dst, i.InputSimd128Register(0), src, Condition(0), Condition(0),
+            Condition(0));
+      break;
+    }
+    case kS390_S128Const: {
+#ifdef V8_TARGET_BIG_ENDIAN
+      for (int index = 0, j = 0; index < 2; index++, j = +2) {
+        __ lgfi(index < 1 ? ip : r0, Operand(i.InputInt32(j)));
+        __ iihf(index < 1 ? ip : r0, Operand(i.InputInt32(j + 1)));
+      }
+#else
+      for (int index = 0, j = 0; index < 2; index++, j = +2) {
+        __ lgfi(index < 1 ? r0 : ip, Operand(i.InputInt32(j)));
+        __ iihf(index < 1 ? r0 : ip, Operand(i.InputInt32(j + 1)));
+      }
+#endif
+      __ vlvgp(i.OutputSimd128Register(), r0, ip);
+      break;
+    }
+    case kS390_S128Zero: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
+      break;
+    }
+    case kS390_S128AllOnes: {
+      Simd128Register dst = i.OutputSimd128Register();
+      __ vceq(dst, dst, dst, Condition(0), Condition(3));
+      break;
+    }
+    case kS390_S128Select: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register mask = i.InputSimd128Register(0);
+      Simd128Register src1 = i.InputSimd128Register(1);
+      Simd128Register src2 = i.InputSimd128Register(2);
+      __ vsel(dst, src1, src2, mask, Condition(0), Condition(0));
+      break;
+    }
+    case kS390_S128AndNot: {
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register src = i.InputSimd128Register(1);
+      __ vnc(dst, i.InputSimd128Register(0), src, Condition(0), Condition(0),
+             Condition(0));
+      break;
+    }
+    // vector conversions
+#define CONVERT_FLOAT_TO_INT32(convert)                             \
+  for (int index = 0; index < 4; index++) {                         \
+    __ vlgv(kScratchReg, kScratchDoubleReg, MemOperand(r0, index),  \
+            Condition(2));                                          \
+    __ MovIntToFloat(tempFPReg1, kScratchReg);                      \
+    __ convert(kScratchReg, tempFPReg1, kRoundToZero);              \
+    __ vlvg(dst, kScratchReg, MemOperand(r0, index), Condition(2)); \
+  }
+    case kS390_I32x4SConvertF32x4: {
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
+      // NaN to 0
+      __ vlr(kScratchDoubleReg, src, Condition(0), Condition(0), Condition(0));
+      __ vfce(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
+              Condition(0), Condition(0), Condition(2));
+      __ vn(kScratchDoubleReg, src, kScratchDoubleReg, Condition(0),
+            Condition(0), Condition(0));
+      CONVERT_FLOAT_TO_INT32(ConvertFloat32ToInt32)
+      break;
+    }
+    case kS390_I32x4UConvertF32x4: {
+      Simd128Register src = i.InputSimd128Register(0);
+      Simd128Register dst = i.OutputSimd128Register();
+      Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
+      // NaN to 0, negative to 0
+      __ vx(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
+            Condition(0), Condition(0), Condition(0));
+      __ vfmax(kScratchDoubleReg, src, kScratchDoubleReg, Condition(1),
+               Condition(0), Condition(2));
+      CONVERT_FLOAT_TO_INT32(ConvertFloat32ToUnsignedInt32)
+      break;
+    }
+#undef CONVERT_FLOAT_TO_INT32
+#define CONVERT_INT32_TO_FLOAT(convert, double_index)               \
+  Simd128Register src = i.InputSimd128Register(0);                  \
+  Simd128Register dst = i.OutputSimd128Register();                  \
+  for (int index = 0; index < 4; index++) {                         \
+    __ vlgv(kScratchReg, src, MemOperand(r0, index), Condition(2)); \
+    __ convert(kScratchDoubleReg, kScratchReg);                     \
+    __ MovFloatToInt(kScratchReg, kScratchDoubleReg);               \
+    __ vlvg(dst, kScratchReg, MemOperand(r0, index), Condition(2)); \
+  }
+    case kS390_F32x4SConvertI32x4: {
+#ifdef V8_TARGET_BIG_ENDIAN
+      CONVERT_INT32_TO_FLOAT(ConvertIntToFloat, 0)
+#else
+      CONVERT_INT32_TO_FLOAT(ConvertIntToFloat, 1)
+#endif
+      break;
+    }
+    case kS390_F32x4UConvertI32x4: {
+#ifdef V8_TARGET_BIG_ENDIAN
+      CONVERT_INT32_TO_FLOAT(ConvertUnsignedIntToFloat, 0)
+#else
+      CONVERT_INT32_TO_FLOAT(ConvertUnsignedIntToFloat, 1)
+#endif
+      break;
+    }
+#undef CONVERT_INT32_TO_FLOAT
+#define VECTOR_UNPACK(op, mode)                                             \
+  __ op(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(0), \
+        Condition(0), Condition(mode));
+    case kS390_I32x4SConvertI16x8Low: {
+      VECTOR_UNPACK(vupl, 1)
+      break;
+    }
+    case kS390_I32x4SConvertI16x8High: {
+      VECTOR_UNPACK(vuph, 1)
+      break;
+    }
+    case kS390_I32x4UConvertI16x8Low: {
+      VECTOR_UNPACK(vupll, 1)
+      break;
+    }
+    case kS390_I32x4UConvertI16x8High: {
+      VECTOR_UNPACK(vuplh, 1)
+      break;
+    }
+    case kS390_I16x8SConvertI8x16Low: {
+      VECTOR_UNPACK(vupl, 0)
+      break;
+    }
+    case kS390_I16x8SConvertI8x16High: {
+      VECTOR_UNPACK(vuph, 0)
+      break;
+    }
+    case kS390_I16x8UConvertI8x16Low: {
+      VECTOR_UNPACK(vupll, 0)
+      break;
+    }
+    case kS390_I16x8UConvertI8x16High: {
+      VECTOR_UNPACK(vuplh, 0)
+      break;
+    }
+#undef VECTOR_UNPACK
+    case kS390_I16x8SConvertI32x4:
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vpks(i.OutputSimd128Register(), i.InputSimd128Register(1),
+              i.InputSimd128Register(0), Condition(0), Condition(2));
+#else
+      __ vpks(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(2));
+#endif
+      break;
+    case kS390_I8x16SConvertI16x8:
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vpks(i.OutputSimd128Register(), i.InputSimd128Register(1),
+              i.InputSimd128Register(0), Condition(0), Condition(1));
+#else
+      __ vpks(i.OutputSimd128Register(), i.InputSimd128Register(0),
+              i.InputSimd128Register(1), Condition(0), Condition(1));
+#endif
+      break;
+#define VECTOR_PACK_UNSIGNED(mode)                                             \
+  Simd128Register tempFPReg = i.ToSimd128Register(instr->TempAt(0));           \
+  __ vx(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg, Condition(0), \
+        Condition(0), Condition(mode));                                        \
+  __ vmx(tempFPReg, i.InputSimd128Register(0), kScratchDoubleReg,              \
+         Condition(0), Condition(0), Condition(mode));                         \
+  __ vmx(kScratchDoubleReg, i.InputSimd128Register(1), kScratchDoubleReg,      \
+         Condition(0), Condition(0), Condition(mode));
+    case kS390_I16x8UConvertI32x4: {
+      // treat inputs as signed, and saturate to unsigned (negative to 0)
+      VECTOR_PACK_UNSIGNED(2)
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vpkls(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg,
+               Condition(0), Condition(2));
+#else
+      __ vpkls(i.OutputSimd128Register(), tempFPReg, kScratchDoubleReg,
+               Condition(0), Condition(2));
+#endif
+      break;
+    }
+    case kS390_I8x16UConvertI16x8: {
+      // treat inputs as signed, and saturate to unsigned (negative to 0)
+      VECTOR_PACK_UNSIGNED(1)
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vpkls(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg,
+               Condition(0), Condition(1));
+#else
+      __ vpkls(i.OutputSimd128Register(), tempFPReg, kScratchDoubleReg,
+               Condition(0), Condition(1));
+#endif
+      break;
+    }
+#undef VECTOR_PACK_UNSIGNED
+#define BINOP_EXTRACT(op, extract_high, extract_low, mode)              \
+  Simd128Register src1 = i.InputSimd128Register(0);                     \
+  Simd128Register src2 = i.InputSimd128Register(1);                     \
+  Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));   \
+  Simd128Register tempFPReg2 = i.ToSimd128Register(instr->TempAt(1));   \
+  __ extract_high(kScratchDoubleReg, src1, Condition(0), Condition(0),  \
+                  Condition(mode));                                     \
+  __ extract_high(tempFPReg1, src2, Condition(0), Condition(0),         \
+                  Condition(mode));                                     \
+  __ op(kScratchDoubleReg, kScratchDoubleReg, tempFPReg1, Condition(0), \
+        Condition(0), Condition(mode + 1));                             \
+  __ extract_low(tempFPReg1, src1, Condition(0), Condition(0),          \
+                 Condition(mode));                                      \
+  __ extract_low(tempFPReg2, src2, Condition(0), Condition(0),          \
+                 Condition(mode));                                      \
+  __ op(tempFPReg1, tempFPReg1, tempFPReg2, Condition(0), Condition(0), \
+        Condition(mode + 1));
+    case kS390_I16x8AddSatS: {
+      BINOP_EXTRACT(va, vuph, vupl, 1)
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vpks(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
+              Condition(0), Condition(2));
+#else
+      __ vpks(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg,
+              Condition(0), Condition(2));
+#endif
+      break;
+    }
+    case kS390_I16x8SubSatS: {
+      BINOP_EXTRACT(vs, vuph, vupl, 1)
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vpks(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
+              Condition(0), Condition(2));
+#else
+      __ vpks(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg,
+              Condition(0), Condition(2));
+#endif
+      break;
+    }
+    case kS390_I16x8AddSatU: {
+      BINOP_EXTRACT(va, vuplh, vupll, 1)
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vpkls(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
+               Condition(0), Condition(2));
+#else
+      __ vpkls(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg,
+               Condition(0), Condition(2));
+#endif
+      break;
+    }
+    case kS390_I16x8SubSatU: {
+      BINOP_EXTRACT(vs, vuplh, vupll, 1)
+      // negative to 0
+      __ vx(tempFPReg2, tempFPReg2, tempFPReg2, Condition(0), Condition(0),
+            Condition(0));
+      __ vmx(kScratchDoubleReg, tempFPReg2, kScratchDoubleReg, Condition(0),
+             Condition(0), Condition(2));
+      __ vmx(tempFPReg1, tempFPReg2, tempFPReg1, Condition(0), Condition(0),
+             Condition(2));
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vpkls(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
+               Condition(0), Condition(2));
+#else
+      __ vpkls(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg,
+               Condition(0), Condition(2));
+#endif
+      break;
+    }
+    case kS390_I8x16AddSatS: {
+      BINOP_EXTRACT(va, vuph, vupl, 0)
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vpks(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
+              Condition(0), Condition(1));
+#else
+      __ vpks(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg,
+              Condition(0), Condition(1));
+#endif
+      break;
+    }
+    case kS390_I8x16SubSatS: {
+      BINOP_EXTRACT(vs, vuph, vupl, 0)
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vpks(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
+              Condition(0), Condition(1));
+#else
+      __ vpks(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg,
+              Condition(0), Condition(1));
+#endif
+      break;
+    }
+    case kS390_I8x16AddSatU: {
+      BINOP_EXTRACT(va, vuplh, vupll, 0)
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vpkls(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
+               Condition(0), Condition(1));
+#else
+      __ vpkls(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg,
+               Condition(0), Condition(1));
+#endif
+      break;
+    }
+    case kS390_I8x16SubSatU: {
+      BINOP_EXTRACT(vs, vuplh, vupll, 0)
+      // negative to 0
+      __ vx(tempFPReg2, tempFPReg2, tempFPReg2, Condition(0), Condition(0),
+            Condition(0));
+      __ vmx(kScratchDoubleReg, tempFPReg2, kScratchDoubleReg, Condition(0),
+             Condition(0), Condition(1));
+      __ vmx(tempFPReg1, tempFPReg2, tempFPReg1, Condition(0), Condition(0),
+             Condition(1));
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ vpkls(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
+               Condition(0), Condition(1));
+#else
+      __ vpkls(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg,
+               Condition(0), Condition(1));
+
+#endif
+      break;
+    }
+#undef BINOP_EXTRACT
+    case kS390_I8x16Shuffle: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      int32_t k8x16_indices[] = {i.InputInt32(2), i.InputInt32(3),
+                                 i.InputInt32(4), i.InputInt32(5)};
+      // create 2 * 8 byte inputs indicating new indices
+      for (int i = 0, j = 0; i < 2; i++, j = +2) {
+#ifdef V8_TARGET_BIG_ENDIAN
+        __ lgfi(i < 1 ? ip : r0, Operand(k8x16_indices[j]));
+        __ iihf(i < 1 ? ip : r0, Operand(k8x16_indices[j + 1]));
+#else
+        __ lgfi(i < 1 ? r0 : ip, Operand(k8x16_indices[j]));
+        __ iihf(i < 1 ? r0 : ip, Operand(k8x16_indices[j + 1]));
+#endif
+      }
+      __ vlvgp(kScratchDoubleReg, r0, ip);
+      __ vperm(dst, src0, src1, kScratchDoubleReg, Condition(0), Condition(0));
+      break;
+    }
+    case kS390_I8x16Swizzle: {
+      Simd128Register dst = i.OutputSimd128Register(),
+                      src0 = i.InputSimd128Register(0),
+                      src1 = i.InputSimd128Register(1);
+      Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
+      // Saturate the indices to 5 bits. Input indices more than 31 should
+      // return 0.
+      __ vrepi(kScratchDoubleReg, Operand(31), Condition(0));
+      __ vmnl(tempFPReg1, src1, kScratchDoubleReg, Condition(0), Condition(0),
+              Condition(0));
+#ifdef V8_TARGET_BIG_ENDIAN
+      //  input needs to be reversed
+      __ vlgv(r0, src0, MemOperand(r0, 0), Condition(3));
+      __ vlgv(r1, src0, MemOperand(r0, 1), Condition(3));
+      __ lrvgr(r0, r0);
+      __ lrvgr(r1, r1);
+      __ vlvgp(dst, r1, r0);
+      // clear scratch
+      __ vx(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
+            Condition(0), Condition(0), Condition(0));
+      __ vperm(dst, dst, kScratchDoubleReg, tempFPReg1, Condition(0),
+               Condition(0));
+#else
+      __ vx(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg,
+            Condition(0), Condition(0), Condition(0));
+      __ vperm(dst, src0, kScratchDoubleReg, tempFPReg1, Condition(0),
+               Condition(0));
+#endif
+      break;
+    }
+    case kS390_I32x4BitMask: {
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ lgfi(kScratchReg, Operand(0x204060));
+      __ iihf(kScratchReg, Operand(0x80808080));  // Zeroing the high bits.
+#else
+      __ lgfi(kScratchReg, Operand(0x80808080));
+      __ iihf(kScratchReg, Operand(0x60402000));
+#endif
+      __ vlvg(kScratchDoubleReg, kScratchReg, MemOperand(r0, 1), Condition(3));
+      __ vbperm(kScratchDoubleReg, i.InputSimd128Register(0), kScratchDoubleReg,
+                Condition(0), Condition(0), Condition(0));
+      __ vlgv(i.OutputRegister(), kScratchDoubleReg, MemOperand(r0, 7),
+              Condition(0));
+      break;
+    }
+    case kS390_I16x8BitMask: {
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ lgfi(kScratchReg, Operand(0x40506070));
+      __ iihf(kScratchReg, Operand(0x102030));
+#else
+      __ lgfi(kScratchReg, Operand(0x30201000));
+      __ iihf(kScratchReg, Operand(0x70605040));
+#endif
+      __ vlvg(kScratchDoubleReg, kScratchReg, MemOperand(r0, 1), Condition(3));
+      __ vbperm(kScratchDoubleReg, i.InputSimd128Register(0), kScratchDoubleReg,
+                Condition(0), Condition(0), Condition(0));
+      __ vlgv(i.OutputRegister(), kScratchDoubleReg, MemOperand(r0, 7),
+              Condition(0));
+      break;
+    }
+    case kS390_I8x16BitMask: {
+#ifdef V8_TARGET_BIG_ENDIAN
+      __ lgfi(r0, Operand(0x60687078));
+      __ iihf(r0, Operand(0x40485058));
+      __ lgfi(ip, Operand(0x20283038));
+      __ iihf(ip, Operand(0x81018));
+#else
+      __ lgfi(ip, Operand(0x58504840));
+      __ iihf(ip, Operand(0x78706860));
+      __ lgfi(r0, Operand(0x18100800));
+      __ iihf(r0, Operand(0x38302820));
+#endif
+      __ vlvgp(kScratchDoubleReg, ip, r0);
+      __ vbperm(kScratchDoubleReg, i.InputSimd128Register(0), kScratchDoubleReg,
+                Condition(0), Condition(0), Condition(0));
+      __ vlgv(i.OutputRegister(), kScratchDoubleReg, MemOperand(r0, 3),
+              Condition(1));
+      break;
+    }
+    case kS390_F32x4Pmin: {
+      __ vfmin(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1), Condition(3), Condition(0),
+               Condition(2));
+      break;
+    }
+    case kS390_F32x4Pmax: {
+      __ vfmax(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1), Condition(3), Condition(0),
+               Condition(2));
+      break;
+    }
+    case kS390_F64x2Pmin: {
+      __ vfmin(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1), Condition(3), Condition(0),
+               Condition(3));
+      break;
+    }
+    case kS390_F64x2Pmax: {
+      __ vfmax(i.OutputSimd128Register(), i.InputSimd128Register(0),
+               i.InputSimd128Register(1), Condition(3), Condition(0),
+               Condition(3));
+      break;
+    }
+    case kS390_F64x2Ceil: {
+      __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(6),
+             Condition(0), Condition(3));
+      break;
+    }
+    case kS390_F64x2Floor: {
+      __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(7),
+             Condition(0), Condition(3));
+      break;
+    }
+    case kS390_F64x2Trunc: {
+      __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(5),
+             Condition(0), Condition(3));
+      break;
+    }
+    case kS390_F64x2NearestInt: {
+      __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(4),
+             Condition(0), Condition(3));
+      break;
+    }
+    case kS390_F32x4Ceil: {
+      __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(6),
+             Condition(0), Condition(2));
+      break;
+    }
+    case kS390_F32x4Floor: {
+      __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(7),
+             Condition(0), Condition(2));
+      break;
+    }
+    case kS390_F32x4Trunc: {
+      __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(5),
+             Condition(0), Condition(2));
+      break;
+    }
+    case kS390_F32x4NearestInt: {
+      __ vfi(i.OutputSimd128Register(), i.InputSimd128Register(0), Condition(4),
+             Condition(0), Condition(2));
+      break;
+    }
+    case kS390_I32x4DotI16x8S: {
+      Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
+      __ vme(kScratchDoubleReg, i.InputSimd128Register(0),
+             i.InputSimd128Register(1), Condition(0), Condition(0),
+             Condition(1));
+      __ vmo(tempFPReg1, i.InputSimd128Register(0), i.InputSimd128Register(1),
+             Condition(0), Condition(0), Condition(1));
+      __ va(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
+            Condition(0), Condition(0), Condition(2));
+      break;
+    }
+    case kS390_StoreCompressTagged: {
+      CHECK(!instr->HasOutput());
+      size_t index = 0;
+      AddressingMode mode = kMode_None;
+      MemOperand operand = i.MemoryOperand(&mode, &index);
+      Register value = i.InputRegister(index);
+      __ StoreTaggedField(value, operand, r1);
+      break;
+    }
+    case kS390_LoadDecompressTaggedSigned: {
+      CHECK(instr->HasOutput());
+      __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
+      break;
+    }
+    case kS390_LoadDecompressTaggedPointer: {
+      CHECK(instr->HasOutput());
+      __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
+      break;
+    }
+    case kS390_LoadDecompressAnyTagged: {
+      CHECK(instr->HasOutput());
+      __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
+      break;
+    }
+    default:
+      UNREACHABLE();
+  }
+  return kSuccess;
+}  // NOLINT(readability/fn_size)
+
+// Assembles branches after an instruction.
+void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
+  S390OperandConverter i(this, instr);
+  Label* tlabel = branch->true_label;
+  Label* flabel = branch->false_label;
+  ArchOpcode op = instr->arch_opcode();
+  FlagsCondition condition = branch->condition;
+
+  Condition cond = FlagsConditionToCondition(condition, op);
+  if (op == kS390_CmpFloat || op == kS390_CmpDouble) {
+    // check for unordered if necessary
+    // Branching to flabel/tlabel according to what's expected by tests
+    if (cond == le || cond == eq || cond == lt) {
+      __ bunordered(flabel);
+    } else if (cond == gt || cond == ne || cond == ge) {
+      __ bunordered(tlabel);
+    }
+  }
+  __ b(cond, tlabel);
+  if (!branch->fallthru) __ b(flabel);  // no fallthru to flabel.
+}
+
+void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
+                                            Instruction* instr) {
+  // TODO(John) Handle float comparisons (kUnordered[Not]Equal).
+  if (condition == kUnorderedEqual || condition == kUnorderedNotEqual ||
+      condition == kOverflow || condition == kNotOverflow) {
+    return;
+  }
+
+  condition = NegateFlagsCondition(condition);
+  __ LoadImmP(r0, Operand::Zero());
+  __ LoadOnConditionP(FlagsConditionToCondition(condition, kArchNop),
+                      kSpeculationPoisonRegister, r0);
+}
+
+void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
+                                            BranchInfo* branch) {
+  AssembleArchBranch(instr, branch);
+}
+
+void CodeGenerator::AssembleArchJump(RpoNumber target) {
+  if (!IsNextInAssemblyOrder(target)) __ b(GetLabel(target));
+}
+
+void CodeGenerator::AssembleArchTrap(Instruction* instr,
+                                     FlagsCondition condition) {
+  class OutOfLineTrap final : public OutOfLineCode {
+   public:
+    OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
+        : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
+
+    void Generate() final {
+      S390OperandConverter i(gen_, instr_);
+      TrapId trap_id =
+          static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
+      GenerateCallToTrap(trap_id);
+    }
+
+   private:
+    void GenerateCallToTrap(TrapId trap_id) {
+      if (trap_id == TrapId::kInvalid) {
+        // We cannot test calls to the runtime in cctest/test-run-wasm.
+        // Therefore we emit a call to C here instead of a call to the runtime.
+        // We use the context register as the scratch register, because we do
+        // not have a context here.
+        __ PrepareCallCFunction(0, 0, cp);
+        __ CallCFunction(
+            ExternalReference::wasm_call_trap_callback_for_testing(), 0);
+        __ LeaveFrame(StackFrame::WASM);
+        auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
+        int pop_count =
+            static_cast<int>(call_descriptor->StackParameterCount());
+        __ Drop(pop_count);
+        __ Ret();
+      } else {
+        gen_->AssembleSourcePosition(instr_);
+        // A direct call to a wasm runtime stub defined in this module.
+        // Just encode the stub index. This will be patched when the code
+        // is added to the native module and copied into wasm code space.
+        __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
+        ReferenceMap* reference_map =
+            gen_->zone()->New<ReferenceMap>(gen_->zone());
+        gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
+        if (FLAG_debug_code) {
+          __ stop();
+        }
+      }
+    }
+
+    Instruction* instr_;
+    CodeGenerator* gen_;
+  };
+  auto ool = zone()->New<OutOfLineTrap>(this, instr);
+  Label* tlabel = ool->entry();
+  Label end;
+
+  ArchOpcode op = instr->arch_opcode();
+  Condition cond = FlagsConditionToCondition(condition, op);
+  if (op == kS390_CmpFloat || op == kS390_CmpDouble) {
+    // check for unordered if necessary
+    if (cond == le || cond == eq || cond == lt) {
+      __ bunordered(&end);
+    } else if (cond == gt || cond == ne || cond == ge) {
+      __ bunordered(tlabel);
+    }
+  }
+  __ b(cond, tlabel);
+  __ bind(&end);
+}
+
+// Assembles boolean materializations after an instruction.
+void CodeGenerator::AssembleArchBoolean(Instruction* instr,
+                                        FlagsCondition condition) {
+  S390OperandConverter i(this, instr);
+  ArchOpcode op = instr->arch_opcode();
+  bool check_unordered = (op == kS390_CmpDouble || op == kS390_CmpFloat);
+
+  // Overflow checked for add/sub only.
+  DCHECK((condition != kOverflow && condition != kNotOverflow) ||
+         (op == kS390_Add32 || op == kS390_Add64 || op == kS390_Sub32 ||
+          op == kS390_Sub64 || op == kS390_Mul32));
+
+  // Materialize a full 32-bit 1 or 0 value. The result register is always the
+  // last output of the instruction.
+  DCHECK_NE(0u, instr->OutputCount());
+  Register reg = i.OutputRegister(instr->OutputCount() - 1);
+  Condition cond = FlagsConditionToCondition(condition, op);
+  Label done;
+  if (check_unordered) {
+    __ LoadImmP(reg, (cond == eq || cond == le || cond == lt) ? Operand::Zero()
+                                                              : Operand(1));
+    __ bunordered(&done);
+  }
+
+  // TODO(john.yan): use load imm high on condition here
+  __ LoadImmP(reg, Operand::Zero());
+  __ LoadImmP(kScratchReg, Operand(1));
+  // locr is sufficient since reg's upper 32 is guarrantee to be 0
+  __ locr(cond, reg, kScratchReg);
+  __ bind(&done);
+}
+
+void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
+  S390OperandConverter i(this, instr);
+  Register input = i.InputRegister(0);
+  std::vector<std::pair<int32_t, Label*>> cases;
+  for (size_t index = 2; index < instr->InputCount(); index += 2) {
+    cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
+  }
+  AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
+                                      cases.data() + cases.size());
+}
+
+void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
+  S390OperandConverter i(this, instr);
+  Register input = i.InputRegister(0);
+  int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
+  Label** cases = zone()->NewArray<Label*>(case_count);
+  for (int32_t index = 0; index < case_count; ++index) {
+    cases[index] = GetLabel(i.InputRpo(index + 2));
+  }
+  Label* const table = AddJumpTable(cases, case_count);
+  __ CmpLogicalP(input, Operand(case_count));
+  __ bge(GetLabel(i.InputRpo(1)));
+  __ larl(kScratchReg, table);
+  __ ShiftLeftP(r1, input, Operand(kSystemPointerSizeLog2));
+  __ LoadP(kScratchReg, MemOperand(kScratchReg, r1));
+  __ Jump(kScratchReg);
+}
+
+void CodeGenerator::FinishFrame(Frame* frame) {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+  const RegList double_saves = call_descriptor->CalleeSavedFPRegisters();
+
+  // Save callee-saved Double registers.
+  if (double_saves != 0) {
+    frame->AlignSavedCalleeRegisterSlots();
+    DCHECK_EQ(kNumCalleeSavedDoubles,
+              base::bits::CountPopulation(double_saves));
+    frame->AllocateSavedCalleeRegisterSlots(kNumCalleeSavedDoubles *
+                                            (kDoubleSize / kSystemPointerSize));
+  }
+  // Save callee-saved registers.
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+  if (saves != 0) {
+    // register save area does not include the fp or constant pool pointer.
+    const int num_saves = kNumCalleeSaved - 1;
+    DCHECK(num_saves == base::bits::CountPopulation(saves));
+    frame->AllocateSavedCalleeRegisterSlots(num_saves);
+  }
+}
+
+void CodeGenerator::AssembleConstructFrame() {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+
+  if (frame_access_state()->has_frame()) {
+    if (call_descriptor->IsCFunctionCall()) {
+      if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
+        __ StubPrologue(StackFrame::C_WASM_ENTRY);
+        // Reserve stack space for saving the c_entry_fp later.
+        __ lay(sp, MemOperand(sp, -kSystemPointerSize));
+      } else {
+        __ Push(r14, fp);
+        __ LoadRR(fp, sp);
+      }
+    } else if (call_descriptor->IsJSFunctionCall()) {
+      __ Prologue(ip);
+    } else {
+      StackFrame::Type type = info()->GetOutputStackFrameType();
+      // TODO(mbrandy): Detect cases where ip is the entrypoint (for
+      // efficient intialization of the constant pool pointer register).
+      __ StubPrologue(type);
+      if (call_descriptor->IsWasmFunctionCall()) {
+        __ Push(kWasmInstanceRegister);
+      } else if (call_descriptor->IsWasmImportWrapper() ||
+                 call_descriptor->IsWasmCapiFunction()) {
+        // Wasm import wrappers are passed a tuple in the place of the instance.
+        // Unpack the tuple into the instance and the target callable.
+        // This must be done here in the codegen because it cannot be expressed
+        // properly in the graph.
+        __ LoadTaggedPointerField(
+            kJSFunctionRegister,
+            FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue2Offset), r0);
+        __ LoadTaggedPointerField(
+            kWasmInstanceRegister,
+            FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue1Offset), r0);
+        __ Push(kWasmInstanceRegister);
+        if (call_descriptor->IsWasmCapiFunction()) {
+          // Reserve space for saving the PC later.
+          __ lay(sp, MemOperand(sp, -kSystemPointerSize));
+        }
+      }
+    }
+    unwinding_info_writer_.MarkFrameConstructed(__ pc_offset());
+  }
+
+  int required_slots =
+      frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
+  if (info()->is_osr()) {
+    // TurboFan OSR-compiled functions cannot be entered directly.
+    __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
+
+    // Unoptimized code jumps directly to this entrypoint while the unoptimized
+    // frame is still on the stack. Optimized code uses OSR values directly from
+    // the unoptimized frame. Thus, all that needs to be done is to allocate the
+    // remaining stack slots.
+    if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
+    osr_pc_offset_ = __ pc_offset();
+    required_slots -= osr_helper()->UnoptimizedFrameSlots();
+    ResetSpeculationPoison();
+  }
+
+  const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+
+  if (required_slots > 0) {
+    if (info()->IsWasm() && required_slots > 128) {
+      // For WebAssembly functions with big frames we have to do the stack
+      // overflow check before we construct the frame. Otherwise we may not
+      // have enough space on the stack to call the runtime for the stack
+      // overflow.
+      Label done;
+
+      // If the frame is bigger than the stack, we throw the stack overflow
+      // exception unconditionally. Thereby we can avoid the integer overflow
+      // check in the condition code.
+      if ((required_slots * kSystemPointerSize) < (FLAG_stack_size * 1024)) {
+        Register scratch = r1;
+        __ LoadP(
+            scratch,
+            FieldMemOperand(kWasmInstanceRegister,
+                            WasmInstanceObject::kRealStackLimitAddressOffset));
+        __ LoadP(scratch, MemOperand(scratch));
+        __ AddP(scratch, scratch, Operand(required_slots * kSystemPointerSize));
+        __ CmpLogicalP(sp, scratch);
+        __ bge(&done);
+      }
+
+      __ Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
+      // We come from WebAssembly, there are no references for the GC.
+      ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
+      RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
+      if (FLAG_debug_code) {
+        __ stop();
+      }
+
+      __ bind(&done);
+    }
+
+    // Skip callee-saved and return slots, which are pushed below.
+    required_slots -= base::bits::CountPopulation(saves);
+    required_slots -= frame()->GetReturnSlotCount();
+    required_slots -= (kDoubleSize / kSystemPointerSize) *
+                      base::bits::CountPopulation(saves_fp);
+    __ lay(sp, MemOperand(sp, -required_slots * kSystemPointerSize));
+  }
+
+  // Save callee-saved Double registers.
+  if (saves_fp != 0) {
+    __ MultiPushDoubles(saves_fp);
+    DCHECK_EQ(kNumCalleeSavedDoubles, base::bits::CountPopulation(saves_fp));
+  }
+
+  // Save callee-saved registers.
+  if (saves != 0) {
+    __ MultiPush(saves);
+    // register save area does not include the fp or constant pool pointer.
+  }
+
+  const int returns = frame()->GetReturnSlotCount();
+  if (returns != 0) {
+    // Create space for returns.
+    __ lay(sp, MemOperand(sp, -returns * kSystemPointerSize));
+  }
+}
+
+void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+  int pop_count = static_cast<int>(call_descriptor->StackParameterCount());
+
+  const int returns = frame()->GetReturnSlotCount();
+  if (returns != 0) {
+    // Create space for returns.
+    __ lay(sp, MemOperand(sp, returns * kSystemPointerSize));
+  }
+
+  // Restore registers.
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+  if (saves != 0) {
+    __ MultiPop(saves);
+  }
+
+  // Restore double registers.
+  const RegList double_saves = call_descriptor->CalleeSavedFPRegisters();
+  if (double_saves != 0) {
+    __ MultiPopDoubles(double_saves);
+  }
+
+  unwinding_info_writer_.MarkBlockWillExit();
+
+  S390OperandConverter g(this, nullptr);
+  if (call_descriptor->IsCFunctionCall()) {
+    AssembleDeconstructFrame();
+  } else if (frame_access_state()->has_frame()) {
+    // Canonicalize JSFunction return sites for now unless they have an variable
+    // number of stack slot pops
+    if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
+      if (return_label_.is_bound()) {
+        __ b(&return_label_);
+        return;
+      } else {
+        __ bind(&return_label_);
+        AssembleDeconstructFrame();
+      }
+    } else {
+      AssembleDeconstructFrame();
+    }
+  }
+  if (pop->IsImmediate()) {
+    pop_count += g.ToConstant(pop).ToInt32();
+  } else {
+    __ Drop(g.ToRegister(pop));
+  }
+  __ Drop(pop_count);
+  __ Ret();
+}
+
+void CodeGenerator::FinishCode() {}
+
+void CodeGenerator::PrepareForDeoptimizationExits(
+    ZoneDeque<DeoptimizationExit*>* exits) {}
+
+void CodeGenerator::AssembleMove(InstructionOperand* source,
+                                 InstructionOperand* destination) {
+  S390OperandConverter g(this, nullptr);
+  // Dispatch on the source and destination operand kinds.  Not all
+  // combinations are possible.
+  if (source->IsRegister()) {
+    DCHECK(destination->IsRegister() || destination->IsStackSlot());
+    Register src = g.ToRegister(source);
+    if (destination->IsRegister()) {
+      __ Move(g.ToRegister(destination), src);
+    } else {
+      __ StoreP(src, g.ToMemOperand(destination));
+    }
+  } else if (source->IsStackSlot()) {
+    DCHECK(destination->IsRegister() || destination->IsStackSlot());
+    MemOperand src = g.ToMemOperand(source);
+    if (destination->IsRegister()) {
+      __ LoadP(g.ToRegister(destination), src);
+    } else {
+      Register temp = kScratchReg;
+      __ LoadP(temp, src, r0);
+      __ StoreP(temp, g.ToMemOperand(destination));
+    }
+  } else if (source->IsConstant()) {
+    Constant src = g.ToConstant(source);
+    if (destination->IsRegister() || destination->IsStackSlot()) {
+      Register dst =
+          destination->IsRegister() ? g.ToRegister(destination) : kScratchReg;
+      switch (src.type()) {
+        case Constant::kInt32:
+#if V8_TARGET_ARCH_S390X
+          if (false) {
+#else
+          if (RelocInfo::IsWasmReference(src.rmode())) {
+#endif
+            __ mov(dst, Operand(src.ToInt32(), src.rmode()));
+          } else {
+            __ Load(dst, Operand(src.ToInt32()));
+          }
+          break;
+        case Constant::kInt64:
+#if V8_TARGET_ARCH_S390X
+          if (RelocInfo::IsWasmReference(src.rmode())) {
+            __ mov(dst, Operand(src.ToInt64(), src.rmode()));
+          } else {
+            __ Load(dst, Operand(src.ToInt64()));
+          }
+#else
+          __ mov(dst, Operand(src.ToInt64()));
+#endif  // V8_TARGET_ARCH_S390X
+          break;
+        case Constant::kFloat32:
+          __ mov(dst, Operand::EmbeddedNumber(src.ToFloat32()));
+          break;
+        case Constant::kFloat64:
+          __ mov(dst, Operand::EmbeddedNumber(src.ToFloat64().value()));
+          break;
+        case Constant::kExternalReference:
+          __ Move(dst, src.ToExternalReference());
+          break;
+        case Constant::kDelayedStringConstant:
+          __ mov(dst, Operand::EmbeddedStringConstant(
+                          src.ToDelayedStringConstant()));
+          break;
+        case Constant::kHeapObject: {
+          Handle<HeapObject> src_object = src.ToHeapObject();
+          RootIndex index;
+          if (IsMaterializableFromRoot(src_object, &index)) {
+            __ LoadRoot(dst, index);
+          } else {
+            __ Move(dst, src_object);
+          }
+          break;
+        }
+        case Constant::kCompressedHeapObject: {
+          Handle<HeapObject> src_object = src.ToHeapObject();
+          RootIndex index;
+          if (IsMaterializableFromRoot(src_object, &index)) {
+            __ LoadRoot(dst, index);
+          } else {
+            __ Move(dst, src_object, RelocInfo::COMPRESSED_EMBEDDED_OBJECT);
+          }
+          break;
+        }
+        case Constant::kRpoNumber:
+          UNREACHABLE();  // TODO(dcarney): loading RPO constants on S390.
+          break;
+      }
+      if (destination->IsStackSlot()) {
+        __ StoreP(dst, g.ToMemOperand(destination), r0);
+      }
+    } else {
+      DoubleRegister dst = destination->IsFPRegister()
+                               ? g.ToDoubleRegister(destination)
+                               : kScratchDoubleReg;
+      double value = (src.type() == Constant::kFloat32)
+                         ? src.ToFloat32()
+                         : src.ToFloat64().value();
+      if (src.type() == Constant::kFloat32) {
+        __ LoadFloat32Literal(dst, src.ToFloat32(), kScratchReg);
+      } else {
+        __ LoadDoubleLiteral(dst, value, kScratchReg);
+      }
+
+      if (destination->IsFloatStackSlot()) {
+        __ StoreFloat32(dst, g.ToMemOperand(destination));
+      } else if (destination->IsDoubleStackSlot()) {
+        __ StoreDouble(dst, g.ToMemOperand(destination));
+      }
+    }
+  } else if (source->IsFPRegister()) {
+    MachineRepresentation rep = LocationOperand::cast(source)->representation();
+    if (rep == MachineRepresentation::kSimd128) {
+      if (destination->IsSimd128Register()) {
+        __ vlr(g.ToSimd128Register(destination), g.ToSimd128Register(source),
+               Condition(0), Condition(0), Condition(0));
+      } else {
+        DCHECK(destination->IsSimd128StackSlot());
+        __ StoreSimd128(g.ToSimd128Register(source),
+                        g.ToMemOperand(destination), kScratchReg);
+      }
+    } else {
+      DoubleRegister src = g.ToDoubleRegister(source);
+      if (destination->IsFPRegister()) {
+        DoubleRegister dst = g.ToDoubleRegister(destination);
+        __ Move(dst, src);
+      } else {
+        DCHECK(destination->IsFPStackSlot());
+        LocationOperand* op = LocationOperand::cast(source);
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          __ StoreDouble(src, g.ToMemOperand(destination));
+        } else {
+          __ StoreFloat32(src, g.ToMemOperand(destination));
+        }
+      }
+    }
+  } else if (source->IsFPStackSlot()) {
+    DCHECK(destination->IsFPRegister() || destination->IsFPStackSlot());
+    MemOperand src = g.ToMemOperand(source);
+    if (destination->IsFPRegister()) {
+      LocationOperand* op = LocationOperand::cast(source);
+      if (op->representation() == MachineRepresentation::kFloat64) {
+        __ LoadDouble(g.ToDoubleRegister(destination), src);
+      } else if (op->representation() == MachineRepresentation::kFloat32) {
+        __ LoadFloat32(g.ToDoubleRegister(destination), src);
+      } else {
+        DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
+        __ LoadSimd128(g.ToSimd128Register(destination), g.ToMemOperand(source),
+                       kScratchReg);
+      }
+    } else {
+      LocationOperand* op = LocationOperand::cast(source);
+      DoubleRegister temp = kScratchDoubleReg;
+      if (op->representation() == MachineRepresentation::kFloat64) {
+        __ LoadDouble(temp, src);
+        __ StoreDouble(temp, g.ToMemOperand(destination));
+      } else if (op->representation() == MachineRepresentation::kFloat32) {
+        __ LoadFloat32(temp, src);
+        __ StoreFloat32(temp, g.ToMemOperand(destination));
+      } else {
+        DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
+        __ LoadSimd128(kScratchDoubleReg, g.ToMemOperand(source), kScratchReg);
+        __ StoreSimd128(kScratchDoubleReg, g.ToMemOperand(destination),
+                        kScratchReg);
+      }
+    }
+  } else {
+    UNREACHABLE();
+  }
+}
+
+// Swaping contents in source and destination.
+// source and destination could be:
+//   Register,
+//   FloatRegister,
+//   DoubleRegister,
+//   StackSlot,
+//   FloatStackSlot,
+//   or DoubleStackSlot
+void CodeGenerator::AssembleSwap(InstructionOperand* source,
+                                 InstructionOperand* destination) {
+  S390OperandConverter g(this, nullptr);
+  if (source->IsRegister()) {
+    Register src = g.ToRegister(source);
+    if (destination->IsRegister()) {
+      __ SwapP(src, g.ToRegister(destination), kScratchReg);
+    } else {
+      DCHECK(destination->IsStackSlot());
+      __ SwapP(src, g.ToMemOperand(destination), kScratchReg);
+    }
+  } else if (source->IsStackSlot()) {
+    DCHECK(destination->IsStackSlot());
+    __ SwapP(g.ToMemOperand(source), g.ToMemOperand(destination), kScratchReg,
+             r0);
+  } else if (source->IsFloatRegister()) {
+    DoubleRegister src = g.ToDoubleRegister(source);
+    if (destination->IsFloatRegister()) {
+      __ SwapFloat32(src, g.ToDoubleRegister(destination), kScratchDoubleReg);
+    } else {
+      DCHECK(destination->IsFloatStackSlot());
+      __ SwapFloat32(src, g.ToMemOperand(destination), kScratchDoubleReg);
+    }
+  } else if (source->IsDoubleRegister()) {
+    DoubleRegister src = g.ToDoubleRegister(source);
+    if (destination->IsDoubleRegister()) {
+      __ SwapDouble(src, g.ToDoubleRegister(destination), kScratchDoubleReg);
+    } else {
+      DCHECK(destination->IsDoubleStackSlot());
+      __ SwapDouble(src, g.ToMemOperand(destination), kScratchDoubleReg);
+    }
+  } else if (source->IsFloatStackSlot()) {
+    DCHECK(destination->IsFloatStackSlot());
+    __ SwapFloat32(g.ToMemOperand(source), g.ToMemOperand(destination),
+                   kScratchDoubleReg);
+  } else if (source->IsDoubleStackSlot()) {
+    DCHECK(destination->IsDoubleStackSlot());
+    __ SwapDouble(g.ToMemOperand(source), g.ToMemOperand(destination),
+                  kScratchDoubleReg);
+  } else if (source->IsSimd128Register()) {
+    Simd128Register src = g.ToSimd128Register(source);
+    if (destination->IsSimd128Register()) {
+      __ SwapSimd128(src, g.ToSimd128Register(destination), kScratchDoubleReg);
+    } else {
+      DCHECK(destination->IsSimd128StackSlot());
+      __ SwapSimd128(src, g.ToMemOperand(destination), kScratchDoubleReg);
+    }
+  } else if (source->IsSimd128StackSlot()) {
+    DCHECK(destination->IsSimd128StackSlot());
+    __ SwapSimd128(g.ToMemOperand(source), g.ToMemOperand(destination),
+                   kScratchDoubleReg);
+  } else {
+    UNREACHABLE();
+  }
+}
+
+void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
+  for (size_t index = 0; index < target_count; ++index) {
+    __ emit_label_addr(targets[index]);
+  }
+}
+
+#undef __
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/s390/instruction-codes-s390.h b/src/compiler/backend/s390/instruction-codes-s390.h
new file mode 100644
index 0000000..f7d3370
--- /dev/null
+++ b/src/compiler/backend/s390/instruction-codes-s390.h
@@ -0,0 +1,406 @@
+// Copyright 2015 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_S390_INSTRUCTION_CODES_S390_H_
+#define V8_COMPILER_BACKEND_S390_INSTRUCTION_CODES_S390_H_
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+// S390-specific opcodes that specify which assembly sequence to emit.
+// Most opcodes specify a single instruction.
+#define TARGET_ARCH_OPCODE_LIST(V)          \
+  V(S390_Peek)                              \
+  V(S390_Abs32)                             \
+  V(S390_Abs64)                             \
+  V(S390_And32)                             \
+  V(S390_And64)                             \
+  V(S390_Or32)                              \
+  V(S390_Or64)                              \
+  V(S390_Xor32)                             \
+  V(S390_Xor64)                             \
+  V(S390_ShiftLeft32)                       \
+  V(S390_ShiftLeft64)                       \
+  V(S390_ShiftLeftPair)                     \
+  V(S390_ShiftRight32)                      \
+  V(S390_ShiftRight64)                      \
+  V(S390_ShiftRightPair)                    \
+  V(S390_ShiftRightArith32)                 \
+  V(S390_ShiftRightArith64)                 \
+  V(S390_ShiftRightArithPair)               \
+  V(S390_RotRight32)                        \
+  V(S390_RotRight64)                        \
+  V(S390_Not32)                             \
+  V(S390_Not64)                             \
+  V(S390_RotLeftAndClear64)                 \
+  V(S390_RotLeftAndClearLeft64)             \
+  V(S390_RotLeftAndClearRight64)            \
+  V(S390_Lay)                               \
+  V(S390_Add32)                             \
+  V(S390_Add64)                             \
+  V(S390_AddPair)                           \
+  V(S390_AddFloat)                          \
+  V(S390_AddDouble)                         \
+  V(S390_Sub32)                             \
+  V(S390_Sub64)                             \
+  V(S390_SubFloat)                          \
+  V(S390_SubDouble)                         \
+  V(S390_SubPair)                           \
+  V(S390_MulPair)                           \
+  V(S390_Mul32)                             \
+  V(S390_Mul32WithOverflow)                 \
+  V(S390_Mul64)                             \
+  V(S390_MulHigh32)                         \
+  V(S390_MulHighU32)                        \
+  V(S390_MulFloat)                          \
+  V(S390_MulDouble)                         \
+  V(S390_Div32)                             \
+  V(S390_Div64)                             \
+  V(S390_DivU32)                            \
+  V(S390_DivU64)                            \
+  V(S390_DivFloat)                          \
+  V(S390_DivDouble)                         \
+  V(S390_Mod32)                             \
+  V(S390_Mod64)                             \
+  V(S390_ModU32)                            \
+  V(S390_ModU64)                            \
+  V(S390_ModDouble)                         \
+  V(S390_Neg32)                             \
+  V(S390_Neg64)                             \
+  V(S390_NegDouble)                         \
+  V(S390_NegFloat)                          \
+  V(S390_SqrtFloat)                         \
+  V(S390_FloorFloat)                        \
+  V(S390_CeilFloat)                         \
+  V(S390_TruncateFloat)                     \
+  V(S390_AbsFloat)                          \
+  V(S390_SqrtDouble)                        \
+  V(S390_FloorDouble)                       \
+  V(S390_CeilDouble)                        \
+  V(S390_TruncateDouble)                    \
+  V(S390_RoundDouble)                       \
+  V(S390_MaxFloat)                          \
+  V(S390_MaxDouble)                         \
+  V(S390_MinFloat)                          \
+  V(S390_MinDouble)                         \
+  V(S390_AbsDouble)                         \
+  V(S390_Cntlz32)                           \
+  V(S390_Cntlz64)                           \
+  V(S390_Popcnt32)                          \
+  V(S390_Popcnt64)                          \
+  V(S390_Cmp32)                             \
+  V(S390_Cmp64)                             \
+  V(S390_CmpFloat)                          \
+  V(S390_CmpDouble)                         \
+  V(S390_Tst32)                             \
+  V(S390_Tst64)                             \
+  V(S390_Push)                              \
+  V(S390_PushFrame)                         \
+  V(S390_StackClaim)                        \
+  V(S390_StoreToStackSlot)                  \
+  V(S390_SignExtendWord8ToInt32)            \
+  V(S390_SignExtendWord16ToInt32)           \
+  V(S390_SignExtendWord8ToInt64)            \
+  V(S390_SignExtendWord16ToInt64)           \
+  V(S390_SignExtendWord32ToInt64)           \
+  V(S390_Uint32ToUint64)                    \
+  V(S390_Int64ToInt32)                      \
+  V(S390_Int64ToFloat32)                    \
+  V(S390_Int64ToDouble)                     \
+  V(S390_Uint64ToFloat32)                   \
+  V(S390_Uint64ToDouble)                    \
+  V(S390_Int32ToFloat32)                    \
+  V(S390_Int32ToDouble)                     \
+  V(S390_Uint32ToFloat32)                   \
+  V(S390_Uint32ToDouble)                    \
+  V(S390_Float32ToInt64)                    \
+  V(S390_Float32ToUint64)                   \
+  V(S390_Float32ToInt32)                    \
+  V(S390_Float32ToUint32)                   \
+  V(S390_Float32ToDouble)                   \
+  V(S390_Float64SilenceNaN)                 \
+  V(S390_DoubleToInt32)                     \
+  V(S390_DoubleToUint32)                    \
+  V(S390_DoubleToInt64)                     \
+  V(S390_DoubleToUint64)                    \
+  V(S390_DoubleToFloat32)                   \
+  V(S390_DoubleExtractLowWord32)            \
+  V(S390_DoubleExtractHighWord32)           \
+  V(S390_DoubleInsertLowWord32)             \
+  V(S390_DoubleInsertHighWord32)            \
+  V(S390_DoubleConstruct)                   \
+  V(S390_BitcastInt32ToFloat32)             \
+  V(S390_BitcastFloat32ToInt32)             \
+  V(S390_BitcastInt64ToDouble)              \
+  V(S390_BitcastDoubleToInt64)              \
+  V(S390_LoadWordS8)                        \
+  V(S390_LoadWordU8)                        \
+  V(S390_LoadWordS16)                       \
+  V(S390_LoadWordU16)                       \
+  V(S390_LoadWordS32)                       \
+  V(S390_LoadWordU32)                       \
+  V(S390_LoadAndTestWord32)                 \
+  V(S390_LoadAndTestWord64)                 \
+  V(S390_LoadAndTestFloat32)                \
+  V(S390_LoadAndTestFloat64)                \
+  V(S390_LoadReverse16RR)                   \
+  V(S390_LoadReverse32RR)                   \
+  V(S390_LoadReverse64RR)                   \
+  V(S390_LoadReverseSimd128RR)              \
+  V(S390_LoadReverseSimd128)                \
+  V(S390_LoadReverse16)                     \
+  V(S390_LoadReverse32)                     \
+  V(S390_LoadReverse64)                     \
+  V(S390_LoadWord64)                        \
+  V(S390_LoadFloat32)                       \
+  V(S390_LoadDouble)                        \
+  V(S390_StoreWord8)                        \
+  V(S390_StoreWord16)                       \
+  V(S390_StoreWord32)                       \
+  V(S390_StoreWord64)                       \
+  V(S390_StoreReverse16)                    \
+  V(S390_StoreReverse32)                    \
+  V(S390_StoreReverse64)                    \
+  V(S390_StoreReverseSimd128)               \
+  V(S390_StoreFloat32)                      \
+  V(S390_StoreDouble)                       \
+  V(S390_CompressSigned)                    \
+  V(S390_CompressPointer)                   \
+  V(S390_CompressAny)                       \
+  V(S390_Word64AtomicExchangeUint8)         \
+  V(S390_Word64AtomicExchangeUint16)        \
+  V(S390_Word64AtomicExchangeUint32)        \
+  V(S390_Word64AtomicExchangeUint64)        \
+  V(S390_Word64AtomicCompareExchangeUint8)  \
+  V(S390_Word64AtomicCompareExchangeUint16) \
+  V(S390_Word64AtomicCompareExchangeUint32) \
+  V(S390_Word64AtomicCompareExchangeUint64) \
+  V(S390_Word64AtomicAddUint8)              \
+  V(S390_Word64AtomicAddUint16)             \
+  V(S390_Word64AtomicAddUint32)             \
+  V(S390_Word64AtomicAddUint64)             \
+  V(S390_Word64AtomicSubUint8)              \
+  V(S390_Word64AtomicSubUint16)             \
+  V(S390_Word64AtomicSubUint32)             \
+  V(S390_Word64AtomicSubUint64)             \
+  V(S390_Word64AtomicAndUint8)              \
+  V(S390_Word64AtomicAndUint16)             \
+  V(S390_Word64AtomicAndUint32)             \
+  V(S390_Word64AtomicAndUint64)             \
+  V(S390_Word64AtomicOrUint8)               \
+  V(S390_Word64AtomicOrUint16)              \
+  V(S390_Word64AtomicOrUint32)              \
+  V(S390_Word64AtomicOrUint64)              \
+  V(S390_Word64AtomicXorUint8)              \
+  V(S390_Word64AtomicXorUint16)             \
+  V(S390_Word64AtomicXorUint32)             \
+  V(S390_Word64AtomicXorUint64)             \
+  V(S390_F64x2Splat)                        \
+  V(S390_F64x2ReplaceLane)                  \
+  V(S390_F64x2Abs)                          \
+  V(S390_F64x2Neg)                          \
+  V(S390_F64x2Sqrt)                         \
+  V(S390_F64x2Add)                          \
+  V(S390_F64x2Sub)                          \
+  V(S390_F64x2Mul)                          \
+  V(S390_F64x2Div)                          \
+  V(S390_F64x2Eq)                           \
+  V(S390_F64x2Ne)                           \
+  V(S390_F64x2Lt)                           \
+  V(S390_F64x2Le)                           \
+  V(S390_F64x2Min)                          \
+  V(S390_F64x2Max)                          \
+  V(S390_F64x2ExtractLane)                  \
+  V(S390_F64x2Qfma)                         \
+  V(S390_F64x2Qfms)                         \
+  V(S390_F64x2Pmin)                         \
+  V(S390_F64x2Pmax)                         \
+  V(S390_F64x2Ceil)                         \
+  V(S390_F64x2Floor)                        \
+  V(S390_F64x2Trunc)                        \
+  V(S390_F64x2NearestInt)                   \
+  V(S390_F32x4Splat)                        \
+  V(S390_F32x4ExtractLane)                  \
+  V(S390_F32x4ReplaceLane)                  \
+  V(S390_F32x4Add)                          \
+  V(S390_F32x4AddHoriz)                     \
+  V(S390_F32x4Sub)                          \
+  V(S390_F32x4Mul)                          \
+  V(S390_F32x4Eq)                           \
+  V(S390_F32x4Ne)                           \
+  V(S390_F32x4Lt)                           \
+  V(S390_F32x4Le)                           \
+  V(S390_F32x4Abs)                          \
+  V(S390_F32x4Neg)                          \
+  V(S390_F32x4RecipApprox)                  \
+  V(S390_F32x4RecipSqrtApprox)              \
+  V(S390_F32x4SConvertI32x4)                \
+  V(S390_F32x4UConvertI32x4)                \
+  V(S390_F32x4Sqrt)                         \
+  V(S390_F32x4Div)                          \
+  V(S390_F32x4Min)                          \
+  V(S390_F32x4Max)                          \
+  V(S390_F32x4Qfma)                         \
+  V(S390_F32x4Qfms)                         \
+  V(S390_F32x4Pmin)                         \
+  V(S390_F32x4Pmax)                         \
+  V(S390_F32x4Ceil)                         \
+  V(S390_F32x4Floor)                        \
+  V(S390_F32x4Trunc)                        \
+  V(S390_F32x4NearestInt)                   \
+  V(S390_I64x2Neg)                          \
+  V(S390_I64x2Add)                          \
+  V(S390_I64x2Sub)                          \
+  V(S390_I64x2Shl)                          \
+  V(S390_I64x2ShrS)                         \
+  V(S390_I64x2ShrU)                         \
+  V(S390_I64x2Mul)                          \
+  V(S390_I64x2Splat)                        \
+  V(S390_I64x2ReplaceLane)                  \
+  V(S390_I64x2ExtractLane)                  \
+  V(S390_I64x2Eq)                           \
+  V(S390_I32x4Splat)                        \
+  V(S390_I32x4ExtractLane)                  \
+  V(S390_I32x4ReplaceLane)                  \
+  V(S390_I32x4Add)                          \
+  V(S390_I32x4AddHoriz)                     \
+  V(S390_I32x4Sub)                          \
+  V(S390_I32x4Mul)                          \
+  V(S390_I32x4MinS)                         \
+  V(S390_I32x4MinU)                         \
+  V(S390_I32x4MaxS)                         \
+  V(S390_I32x4MaxU)                         \
+  V(S390_I32x4Eq)                           \
+  V(S390_I32x4Ne)                           \
+  V(S390_I32x4GtS)                          \
+  V(S390_I32x4GeS)                          \
+  V(S390_I32x4GtU)                          \
+  V(S390_I32x4GeU)                          \
+  V(S390_I32x4Neg)                          \
+  V(S390_I32x4Shl)                          \
+  V(S390_I32x4ShrS)                         \
+  V(S390_I32x4ShrU)                         \
+  V(S390_I32x4SConvertF32x4)                \
+  V(S390_I32x4UConvertF32x4)                \
+  V(S390_I32x4SConvertI16x8Low)             \
+  V(S390_I32x4SConvertI16x8High)            \
+  V(S390_I32x4UConvertI16x8Low)             \
+  V(S390_I32x4UConvertI16x8High)            \
+  V(S390_I32x4Abs)                          \
+  V(S390_I32x4BitMask)                      \
+  V(S390_I32x4DotI16x8S)                    \
+  V(S390_I16x8Splat)                        \
+  V(S390_I16x8ExtractLaneU)                 \
+  V(S390_I16x8ExtractLaneS)                 \
+  V(S390_I16x8ReplaceLane)                  \
+  V(S390_I16x8Add)                          \
+  V(S390_I16x8AddHoriz)                     \
+  V(S390_I16x8Sub)                          \
+  V(S390_I16x8Mul)                          \
+  V(S390_I16x8MinS)                         \
+  V(S390_I16x8MinU)                         \
+  V(S390_I16x8MaxS)                         \
+  V(S390_I16x8MaxU)                         \
+  V(S390_I16x8Eq)                           \
+  V(S390_I16x8Ne)                           \
+  V(S390_I16x8GtS)                          \
+  V(S390_I16x8GeS)                          \
+  V(S390_I16x8GtU)                          \
+  V(S390_I16x8GeU)                          \
+  V(S390_I16x8Shl)                          \
+  V(S390_I16x8ShrS)                         \
+  V(S390_I16x8ShrU)                         \
+  V(S390_I16x8Neg)                          \
+  V(S390_I16x8SConvertI32x4)                \
+  V(S390_I16x8UConvertI32x4)                \
+  V(S390_I16x8SConvertI8x16Low)             \
+  V(S390_I16x8SConvertI8x16High)            \
+  V(S390_I16x8UConvertI8x16Low)             \
+  V(S390_I16x8UConvertI8x16High)            \
+  V(S390_I16x8AddSatS)                      \
+  V(S390_I16x8SubSatS)                      \
+  V(S390_I16x8AddSatU)                      \
+  V(S390_I16x8SubSatU)                      \
+  V(S390_I16x8RoundingAverageU)             \
+  V(S390_I16x8Abs)                          \
+  V(S390_I16x8BitMask)                      \
+  V(S390_I8x16Splat)                        \
+  V(S390_I8x16ExtractLaneU)                 \
+  V(S390_I8x16ExtractLaneS)                 \
+  V(S390_I8x16ReplaceLane)                  \
+  V(S390_I8x16Add)                          \
+  V(S390_I8x16Sub)                          \
+  V(S390_I8x16Mul)                          \
+  V(S390_I8x16MinS)                         \
+  V(S390_I8x16MinU)                         \
+  V(S390_I8x16MaxS)                         \
+  V(S390_I8x16MaxU)                         \
+  V(S390_I8x16Eq)                           \
+  V(S390_I8x16Ne)                           \
+  V(S390_I8x16GtS)                          \
+  V(S390_I8x16GeS)                          \
+  V(S390_I8x16GtU)                          \
+  V(S390_I8x16GeU)                          \
+  V(S390_I8x16Shl)                          \
+  V(S390_I8x16ShrS)                         \
+  V(S390_I8x16ShrU)                         \
+  V(S390_I8x16Neg)                          \
+  V(S390_I8x16SConvertI16x8)                \
+  V(S390_I8x16UConvertI16x8)                \
+  V(S390_I8x16AddSatS)                      \
+  V(S390_I8x16SubSatS)                      \
+  V(S390_I8x16AddSatU)                      \
+  V(S390_I8x16SubSatU)                      \
+  V(S390_I8x16RoundingAverageU)             \
+  V(S390_I8x16Abs)                          \
+  V(S390_I8x16BitMask)                      \
+  V(S390_I8x16Shuffle)                      \
+  V(S390_I8x16Swizzle)                      \
+  V(S390_V32x4AnyTrue)                      \
+  V(S390_V16x8AnyTrue)                      \
+  V(S390_V8x16AnyTrue)                      \
+  V(S390_V32x4AllTrue)                      \
+  V(S390_V16x8AllTrue)                      \
+  V(S390_V8x16AllTrue)                      \
+  V(S390_S128And)                           \
+  V(S390_S128Or)                            \
+  V(S390_S128Xor)                           \
+  V(S390_S128Const)                         \
+  V(S390_S128Zero)                          \
+  V(S390_S128AllOnes)                       \
+  V(S390_S128Not)                           \
+  V(S390_S128Select)                        \
+  V(S390_S128AndNot)                        \
+  V(S390_StoreSimd128)                      \
+  V(S390_LoadSimd128)                       \
+  V(S390_StoreCompressTagged)               \
+  V(S390_LoadDecompressTaggedSigned)        \
+  V(S390_LoadDecompressTaggedPointer)       \
+  V(S390_LoadDecompressAnyTagged)
+
+// Addressing modes represent the "shape" of inputs to an instruction.
+// Many instructions support multiple addressing modes. Addressing modes
+// are encoded into the InstructionCode of the instruction and tell the
+// code generator after register allocation which assembler method to call.
+//
+// We use the following local notation for addressing modes:
+//
+// R = register
+// O = register or stack slot
+// D = double register
+// I = immediate (handle, external, int32)
+// MRI = [register + immediate]
+// MRR = [register + register]
+#define TARGET_ADDRESSING_MODE_LIST(V) \
+  V(MR)   /* [%r0          ] */        \
+  V(MRI)  /* [%r0       + K] */        \
+  V(MRR)  /* [%r0 + %r1    ] */        \
+  V(MRRI) /* [%r0 + %r1 + K] */
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_S390_INSTRUCTION_CODES_S390_H_
diff --git a/src/compiler/backend/s390/instruction-scheduler-s390.cc b/src/compiler/backend/s390/instruction-scheduler-s390.cc
new file mode 100644
index 0000000..be0b14c
--- /dev/null
+++ b/src/compiler/backend/s390/instruction-scheduler-s390.cc
@@ -0,0 +1,408 @@
+// Copyright 2015 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/instruction-scheduler.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+bool InstructionScheduler::SchedulerSupported() { return true; }
+
+int InstructionScheduler::GetTargetInstructionFlags(
+    const Instruction* instr) const {
+  switch (instr->arch_opcode()) {
+    case kS390_Abs32:
+    case kS390_Abs64:
+    case kS390_And32:
+    case kS390_And64:
+    case kS390_Or32:
+    case kS390_Or64:
+    case kS390_Xor32:
+    case kS390_Xor64:
+    case kS390_ShiftLeft32:
+    case kS390_ShiftLeft64:
+    case kS390_ShiftLeftPair:
+    case kS390_ShiftRight32:
+    case kS390_ShiftRight64:
+    case kS390_ShiftRightPair:
+    case kS390_ShiftRightArith32:
+    case kS390_ShiftRightArith64:
+    case kS390_ShiftRightArithPair:
+    case kS390_RotRight32:
+    case kS390_RotRight64:
+    case kS390_Not32:
+    case kS390_Not64:
+    case kS390_RotLeftAndClear64:
+    case kS390_RotLeftAndClearLeft64:
+    case kS390_RotLeftAndClearRight64:
+    case kS390_Lay:
+    case kS390_Add32:
+    case kS390_Add64:
+    case kS390_AddPair:
+    case kS390_AddFloat:
+    case kS390_AddDouble:
+    case kS390_Sub32:
+    case kS390_Sub64:
+    case kS390_SubPair:
+    case kS390_MulPair:
+    case kS390_SubFloat:
+    case kS390_SubDouble:
+    case kS390_Mul32:
+    case kS390_Mul32WithOverflow:
+    case kS390_Mul64:
+    case kS390_MulHigh32:
+    case kS390_MulHighU32:
+    case kS390_MulFloat:
+    case kS390_MulDouble:
+    case kS390_Div32:
+    case kS390_Div64:
+    case kS390_DivU32:
+    case kS390_DivU64:
+    case kS390_DivFloat:
+    case kS390_DivDouble:
+    case kS390_Mod32:
+    case kS390_Mod64:
+    case kS390_ModU32:
+    case kS390_ModU64:
+    case kS390_ModDouble:
+    case kS390_Neg32:
+    case kS390_Neg64:
+    case kS390_NegDouble:
+    case kS390_NegFloat:
+    case kS390_SqrtFloat:
+    case kS390_FloorFloat:
+    case kS390_CeilFloat:
+    case kS390_TruncateFloat:
+    case kS390_AbsFloat:
+    case kS390_SqrtDouble:
+    case kS390_FloorDouble:
+    case kS390_CeilDouble:
+    case kS390_TruncateDouble:
+    case kS390_RoundDouble:
+    case kS390_MaxFloat:
+    case kS390_MaxDouble:
+    case kS390_MinFloat:
+    case kS390_MinDouble:
+    case kS390_AbsDouble:
+    case kS390_Cntlz32:
+    case kS390_Cntlz64:
+    case kS390_Popcnt32:
+    case kS390_Popcnt64:
+    case kS390_Cmp32:
+    case kS390_Cmp64:
+    case kS390_CmpFloat:
+    case kS390_CmpDouble:
+    case kS390_Tst32:
+    case kS390_Tst64:
+    case kS390_SignExtendWord8ToInt32:
+    case kS390_SignExtendWord16ToInt32:
+    case kS390_SignExtendWord8ToInt64:
+    case kS390_SignExtendWord16ToInt64:
+    case kS390_SignExtendWord32ToInt64:
+    case kS390_Uint32ToUint64:
+    case kS390_Int64ToInt32:
+    case kS390_Int64ToFloat32:
+    case kS390_Int64ToDouble:
+    case kS390_Uint64ToFloat32:
+    case kS390_Uint64ToDouble:
+    case kS390_Int32ToFloat32:
+    case kS390_Int32ToDouble:
+    case kS390_Uint32ToFloat32:
+    case kS390_Uint32ToDouble:
+    case kS390_Float32ToInt32:
+    case kS390_Float32ToUint32:
+    case kS390_Float32ToUint64:
+    case kS390_Float32ToDouble:
+    case kS390_Float64SilenceNaN:
+    case kS390_DoubleToInt32:
+    case kS390_DoubleToUint32:
+    case kS390_Float32ToInt64:
+    case kS390_DoubleToInt64:
+    case kS390_DoubleToUint64:
+    case kS390_DoubleToFloat32:
+    case kS390_DoubleExtractLowWord32:
+    case kS390_DoubleExtractHighWord32:
+    case kS390_DoubleInsertLowWord32:
+    case kS390_DoubleInsertHighWord32:
+    case kS390_DoubleConstruct:
+    case kS390_BitcastInt32ToFloat32:
+    case kS390_BitcastFloat32ToInt32:
+    case kS390_BitcastInt64ToDouble:
+    case kS390_BitcastDoubleToInt64:
+    case kS390_LoadReverse16RR:
+    case kS390_LoadReverse32RR:
+    case kS390_LoadReverse64RR:
+    case kS390_LoadReverseSimd128RR:
+    case kS390_LoadReverseSimd128:
+    case kS390_LoadAndTestWord32:
+    case kS390_LoadAndTestWord64:
+    case kS390_LoadAndTestFloat32:
+    case kS390_LoadAndTestFloat64:
+    case kS390_CompressSigned:
+    case kS390_CompressPointer:
+    case kS390_CompressAny:
+    case kS390_F64x2Splat:
+    case kS390_F64x2ReplaceLane:
+    case kS390_F64x2Abs:
+    case kS390_F64x2Neg:
+    case kS390_F64x2Sqrt:
+    case kS390_F64x2Add:
+    case kS390_F64x2Sub:
+    case kS390_F64x2Mul:
+    case kS390_F64x2Div:
+    case kS390_F64x2Eq:
+    case kS390_F64x2Ne:
+    case kS390_F64x2Lt:
+    case kS390_F64x2Le:
+    case kS390_F64x2Min:
+    case kS390_F64x2Max:
+    case kS390_F64x2ExtractLane:
+    case kS390_F64x2Qfma:
+    case kS390_F64x2Qfms:
+    case kS390_F64x2Pmin:
+    case kS390_F64x2Pmax:
+    case kS390_F64x2Ceil:
+    case kS390_F64x2Floor:
+    case kS390_F64x2Trunc:
+    case kS390_F64x2NearestInt:
+    case kS390_F32x4Splat:
+    case kS390_F32x4ExtractLane:
+    case kS390_F32x4ReplaceLane:
+    case kS390_F32x4Add:
+    case kS390_F32x4AddHoriz:
+    case kS390_F32x4Sub:
+    case kS390_F32x4Mul:
+    case kS390_F32x4Eq:
+    case kS390_F32x4Ne:
+    case kS390_F32x4Lt:
+    case kS390_F32x4Le:
+    case kS390_F32x4Abs:
+    case kS390_F32x4Neg:
+    case kS390_F32x4RecipApprox:
+    case kS390_F32x4RecipSqrtApprox:
+    case kS390_F32x4SConvertI32x4:
+    case kS390_F32x4UConvertI32x4:
+    case kS390_F32x4Sqrt:
+    case kS390_F32x4Div:
+    case kS390_F32x4Min:
+    case kS390_F32x4Max:
+    case kS390_F32x4Qfma:
+    case kS390_F32x4Qfms:
+    case kS390_F32x4Pmin:
+    case kS390_F32x4Pmax:
+    case kS390_F32x4Ceil:
+    case kS390_F32x4Floor:
+    case kS390_F32x4Trunc:
+    case kS390_F32x4NearestInt:
+    case kS390_I64x2Neg:
+    case kS390_I64x2Add:
+    case kS390_I64x2Sub:
+    case kS390_I64x2Shl:
+    case kS390_I64x2ShrS:
+    case kS390_I64x2ShrU:
+    case kS390_I64x2Mul:
+    case kS390_I64x2Splat:
+    case kS390_I64x2ReplaceLane:
+    case kS390_I64x2ExtractLane:
+    case kS390_I64x2Eq:
+    case kS390_I32x4Splat:
+    case kS390_I32x4ExtractLane:
+    case kS390_I32x4ReplaceLane:
+    case kS390_I32x4Add:
+    case kS390_I32x4AddHoriz:
+    case kS390_I32x4Sub:
+    case kS390_I32x4Mul:
+    case kS390_I32x4MinS:
+    case kS390_I32x4MinU:
+    case kS390_I32x4MaxS:
+    case kS390_I32x4MaxU:
+    case kS390_I32x4Eq:
+    case kS390_I32x4Ne:
+    case kS390_I32x4GtS:
+    case kS390_I32x4GeS:
+    case kS390_I32x4GtU:
+    case kS390_I32x4GeU:
+    case kS390_I32x4Shl:
+    case kS390_I32x4ShrS:
+    case kS390_I32x4ShrU:
+    case kS390_I32x4Neg:
+    case kS390_I32x4SConvertF32x4:
+    case kS390_I32x4UConvertF32x4:
+    case kS390_I32x4SConvertI16x8Low:
+    case kS390_I32x4SConvertI16x8High:
+    case kS390_I32x4UConvertI16x8Low:
+    case kS390_I32x4UConvertI16x8High:
+    case kS390_I32x4Abs:
+    case kS390_I32x4BitMask:
+    case kS390_I32x4DotI16x8S:
+    case kS390_I16x8Splat:
+    case kS390_I16x8ExtractLaneU:
+    case kS390_I16x8ExtractLaneS:
+    case kS390_I16x8ReplaceLane:
+    case kS390_I16x8Add:
+    case kS390_I16x8AddHoriz:
+    case kS390_I16x8Sub:
+    case kS390_I16x8Mul:
+    case kS390_I16x8MinS:
+    case kS390_I16x8MinU:
+    case kS390_I16x8MaxS:
+    case kS390_I16x8MaxU:
+    case kS390_I16x8Eq:
+    case kS390_I16x8Ne:
+    case kS390_I16x8GtS:
+    case kS390_I16x8GeS:
+    case kS390_I16x8GtU:
+    case kS390_I16x8GeU:
+    case kS390_I16x8Shl:
+    case kS390_I16x8ShrS:
+    case kS390_I16x8ShrU:
+    case kS390_I16x8Neg:
+    case kS390_I16x8SConvertI32x4:
+    case kS390_I16x8UConvertI32x4:
+    case kS390_I16x8SConvertI8x16Low:
+    case kS390_I16x8SConvertI8x16High:
+    case kS390_I16x8UConvertI8x16Low:
+    case kS390_I16x8UConvertI8x16High:
+    case kS390_I16x8AddSatS:
+    case kS390_I16x8SubSatS:
+    case kS390_I16x8AddSatU:
+    case kS390_I16x8SubSatU:
+    case kS390_I16x8RoundingAverageU:
+    case kS390_I16x8Abs:
+    case kS390_I16x8BitMask:
+    case kS390_I8x16Splat:
+    case kS390_I8x16ExtractLaneU:
+    case kS390_I8x16ExtractLaneS:
+    case kS390_I8x16ReplaceLane:
+    case kS390_I8x16Add:
+    case kS390_I8x16Sub:
+    case kS390_I8x16Mul:
+    case kS390_I8x16MinS:
+    case kS390_I8x16MinU:
+    case kS390_I8x16MaxS:
+    case kS390_I8x16MaxU:
+    case kS390_I8x16Eq:
+    case kS390_I8x16Ne:
+    case kS390_I8x16GtS:
+    case kS390_I8x16GeS:
+    case kS390_I8x16GtU:
+    case kS390_I8x16GeU:
+    case kS390_I8x16Shl:
+    case kS390_I8x16ShrS:
+    case kS390_I8x16ShrU:
+    case kS390_I8x16Neg:
+    case kS390_I8x16SConvertI16x8:
+    case kS390_I8x16UConvertI16x8:
+    case kS390_I8x16AddSatS:
+    case kS390_I8x16SubSatS:
+    case kS390_I8x16AddSatU:
+    case kS390_I8x16SubSatU:
+    case kS390_I8x16RoundingAverageU:
+    case kS390_I8x16Abs:
+    case kS390_I8x16BitMask:
+    case kS390_I8x16Shuffle:
+    case kS390_I8x16Swizzle:
+    case kS390_V32x4AnyTrue:
+    case kS390_V16x8AnyTrue:
+    case kS390_V8x16AnyTrue:
+    case kS390_V32x4AllTrue:
+    case kS390_V16x8AllTrue:
+    case kS390_V8x16AllTrue:
+    case kS390_S128And:
+    case kS390_S128Or:
+    case kS390_S128Xor:
+    case kS390_S128Const:
+    case kS390_S128Zero:
+    case kS390_S128AllOnes:
+    case kS390_S128Not:
+    case kS390_S128Select:
+    case kS390_S128AndNot:
+      return kNoOpcodeFlags;
+
+    case kS390_LoadWordS8:
+    case kS390_LoadWordU8:
+    case kS390_LoadWordS16:
+    case kS390_LoadWordU16:
+    case kS390_LoadWordS32:
+    case kS390_LoadWordU32:
+    case kS390_LoadWord64:
+    case kS390_LoadFloat32:
+    case kS390_LoadDouble:
+    case kS390_LoadSimd128:
+    case kS390_LoadReverse16:
+    case kS390_LoadReverse32:
+    case kS390_LoadReverse64:
+    case kS390_Peek:
+    case kS390_LoadDecompressTaggedSigned:
+    case kS390_LoadDecompressTaggedPointer:
+    case kS390_LoadDecompressAnyTagged:
+      return kIsLoadOperation;
+
+    case kS390_StoreWord8:
+    case kS390_StoreWord16:
+    case kS390_StoreWord32:
+    case kS390_StoreWord64:
+    case kS390_StoreReverseSimd128:
+    case kS390_StoreReverse16:
+    case kS390_StoreReverse32:
+    case kS390_StoreReverse64:
+    case kS390_StoreFloat32:
+    case kS390_StoreDouble:
+    case kS390_StoreSimd128:
+    case kS390_StoreCompressTagged:
+    case kS390_Push:
+    case kS390_PushFrame:
+    case kS390_StoreToStackSlot:
+    case kS390_StackClaim:
+      return kHasSideEffect;
+
+    case kS390_Word64AtomicExchangeUint8:
+    case kS390_Word64AtomicExchangeUint16:
+    case kS390_Word64AtomicExchangeUint32:
+    case kS390_Word64AtomicExchangeUint64:
+    case kS390_Word64AtomicCompareExchangeUint8:
+    case kS390_Word64AtomicCompareExchangeUint16:
+    case kS390_Word64AtomicCompareExchangeUint32:
+    case kS390_Word64AtomicCompareExchangeUint64:
+    case kS390_Word64AtomicAddUint8:
+    case kS390_Word64AtomicAddUint16:
+    case kS390_Word64AtomicAddUint32:
+    case kS390_Word64AtomicAddUint64:
+    case kS390_Word64AtomicSubUint8:
+    case kS390_Word64AtomicSubUint16:
+    case kS390_Word64AtomicSubUint32:
+    case kS390_Word64AtomicSubUint64:
+    case kS390_Word64AtomicAndUint8:
+    case kS390_Word64AtomicAndUint16:
+    case kS390_Word64AtomicAndUint32:
+    case kS390_Word64AtomicAndUint64:
+    case kS390_Word64AtomicOrUint8:
+    case kS390_Word64AtomicOrUint16:
+    case kS390_Word64AtomicOrUint32:
+    case kS390_Word64AtomicOrUint64:
+    case kS390_Word64AtomicXorUint8:
+    case kS390_Word64AtomicXorUint16:
+    case kS390_Word64AtomicXorUint32:
+    case kS390_Word64AtomicXorUint64:
+      return kHasSideEffect;
+
+#define CASE(Name) case k##Name:
+      COMMON_ARCH_OPCODE_LIST(CASE)
+#undef CASE
+      // Already covered in architecture independent code.
+      UNREACHABLE();
+  }
+
+  UNREACHABLE();
+}
+
+int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
+  // TODO(all): Add instruction cost modeling.
+  return 1;
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/s390/instruction-selector-s390.cc b/src/compiler/backend/s390/instruction-selector-s390.cc
new file mode 100644
index 0000000..124193f
--- /dev/null
+++ b/src/compiler/backend/s390/instruction-selector-s390.cc
@@ -0,0 +1,2968 @@
+// Copyright 2015 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/instruction-selector-impl.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/node-properties.h"
+#include "src/execution/frame-constants.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+enum class OperandMode : uint32_t {
+  kNone = 0u,
+  // Immediate mode
+  kShift32Imm = 1u << 0,
+  kShift64Imm = 1u << 1,
+  kInt32Imm = 1u << 2,
+  kInt32Imm_Negate = 1u << 3,
+  kUint32Imm = 1u << 4,
+  kInt20Imm = 1u << 5,
+  kUint12Imm = 1u << 6,
+  // Instr format
+  kAllowRRR = 1u << 7,
+  kAllowRM = 1u << 8,
+  kAllowRI = 1u << 9,
+  kAllowRRI = 1u << 10,
+  kAllowRRM = 1u << 11,
+  // Useful combination
+  kAllowImmediate = kAllowRI | kAllowRRI,
+  kAllowMemoryOperand = kAllowRM | kAllowRRM,
+  kAllowDistinctOps = kAllowRRR | kAllowRRI | kAllowRRM,
+  kBitWiseCommonMode = kAllowRI,
+  kArithmeticCommonMode = kAllowRM | kAllowRI
+};
+
+using OperandModes = base::Flags<OperandMode, uint32_t>;
+DEFINE_OPERATORS_FOR_FLAGS(OperandModes)
+OperandModes immediateModeMask =
+    OperandMode::kShift32Imm | OperandMode::kShift64Imm |
+    OperandMode::kInt32Imm | OperandMode::kInt32Imm_Negate |
+    OperandMode::kUint32Imm | OperandMode::kInt20Imm;
+
+#define AndCommonMode                                                \
+  ((OperandMode::kAllowRM |                                          \
+    (CpuFeatures::IsSupported(DISTINCT_OPS) ? OperandMode::kAllowRRR \
+                                            : OperandMode::kNone)))
+#define And64OperandMode AndCommonMode
+#define Or64OperandMode And64OperandMode
+#define Xor64OperandMode And64OperandMode
+
+#define And32OperandMode \
+  (AndCommonMode | OperandMode::kAllowRI | OperandMode::kUint32Imm)
+#define Or32OperandMode And32OperandMode
+#define Xor32OperandMode And32OperandMode
+
+#define Shift32OperandMode                                   \
+  ((OperandMode::kAllowRI | OperandMode::kShift64Imm |       \
+    (CpuFeatures::IsSupported(DISTINCT_OPS)                  \
+         ? (OperandMode::kAllowRRR | OperandMode::kAllowRRI) \
+         : OperandMode::kNone)))
+
+#define Shift64OperandMode                             \
+  ((OperandMode::kAllowRI | OperandMode::kShift64Imm | \
+    OperandMode::kAllowRRR | OperandMode::kAllowRRI))
+
+#define AddOperandMode                                            \
+  ((OperandMode::kArithmeticCommonMode | OperandMode::kInt32Imm | \
+    (CpuFeatures::IsSupported(DISTINCT_OPS)                       \
+         ? (OperandMode::kAllowRRR | OperandMode::kAllowRRI)      \
+         : OperandMode::kArithmeticCommonMode)))
+#define SubOperandMode                                                   \
+  ((OperandMode::kArithmeticCommonMode | OperandMode::kInt32Imm_Negate | \
+    (CpuFeatures::IsSupported(DISTINCT_OPS)                              \
+         ? (OperandMode::kAllowRRR | OperandMode::kAllowRRI)             \
+         : OperandMode::kArithmeticCommonMode)))
+#define MulOperandMode \
+  (OperandMode::kArithmeticCommonMode | OperandMode::kInt32Imm)
+
+// Adds S390-specific methods for generating operands.
+class S390OperandGenerator final : public OperandGenerator {
+ public:
+  explicit S390OperandGenerator(InstructionSelector* selector)
+      : OperandGenerator(selector) {}
+
+  InstructionOperand UseOperand(Node* node, OperandModes mode) {
+    if (CanBeImmediate(node, mode)) {
+      return UseImmediate(node);
+    }
+    return UseRegister(node);
+  }
+
+  InstructionOperand UseAnyExceptImmediate(Node* node) {
+    if (NodeProperties::IsConstant(node))
+      return UseRegister(node);
+    else
+      return Use(node);
+  }
+
+  int64_t GetImmediate(Node* node) {
+    if (node->opcode() == IrOpcode::kInt32Constant)
+      return OpParameter<int32_t>(node->op());
+    else if (node->opcode() == IrOpcode::kInt64Constant)
+      return OpParameter<int64_t>(node->op());
+    else
+      UNIMPLEMENTED();
+    return 0L;
+  }
+
+  bool CanBeImmediate(Node* node, OperandModes mode) {
+    int64_t value;
+    if (node->opcode() == IrOpcode::kInt32Constant)
+      value = OpParameter<int32_t>(node->op());
+    else if (node->opcode() == IrOpcode::kInt64Constant)
+      value = OpParameter<int64_t>(node->op());
+    else
+      return false;
+    return CanBeImmediate(value, mode);
+  }
+
+  bool CanBeImmediate(int64_t value, OperandModes mode) {
+    if (mode & OperandMode::kShift32Imm)
+      return 0 <= value && value < 32;
+    else if (mode & OperandMode::kShift64Imm)
+      return 0 <= value && value < 64;
+    else if (mode & OperandMode::kInt32Imm)
+      return is_int32(value);
+    else if (mode & OperandMode::kInt32Imm_Negate)
+      return is_int32(-value);
+    else if (mode & OperandMode::kUint32Imm)
+      return is_uint32(value);
+    else if (mode & OperandMode::kInt20Imm)
+      return is_int20(value);
+    else if (mode & OperandMode::kUint12Imm)
+      return is_uint12(value);
+    else
+      return false;
+  }
+
+  bool CanBeMemoryOperand(InstructionCode opcode, Node* user, Node* input,
+                          int effect_level) {
+    if (input->opcode() != IrOpcode::kLoad ||
+        !selector()->CanCover(user, input)) {
+      return false;
+    }
+
+    if (effect_level != selector()->GetEffectLevel(input)) {
+      return false;
+    }
+
+    MachineRepresentation rep =
+        LoadRepresentationOf(input->op()).representation();
+    switch (opcode) {
+      case kS390_Cmp64:
+      case kS390_LoadAndTestWord64:
+        return rep == MachineRepresentation::kWord64 ||
+               (!COMPRESS_POINTERS_BOOL && IsAnyTagged(rep));
+      case kS390_LoadAndTestWord32:
+      case kS390_Cmp32:
+        return rep == MachineRepresentation::kWord32 ||
+               (COMPRESS_POINTERS_BOOL && IsAnyTagged(rep));
+      default:
+        break;
+    }
+    return false;
+  }
+
+  AddressingMode GenerateMemoryOperandInputs(Node* index, Node* base,
+                                             Node* displacement,
+                                             DisplacementMode displacement_mode,
+                                             InstructionOperand inputs[],
+                                             size_t* input_count) {
+    AddressingMode mode = kMode_MRI;
+    if (base != nullptr) {
+      inputs[(*input_count)++] = UseRegister(base);
+      if (index != nullptr) {
+        inputs[(*input_count)++] = UseRegister(index);
+        if (displacement != nullptr) {
+          inputs[(*input_count)++] = displacement_mode
+                                         ? UseNegatedImmediate(displacement)
+                                         : UseImmediate(displacement);
+          mode = kMode_MRRI;
+        } else {
+          mode = kMode_MRR;
+        }
+      } else {
+        if (displacement == nullptr) {
+          mode = kMode_MR;
+        } else {
+          inputs[(*input_count)++] = displacement_mode == kNegativeDisplacement
+                                         ? UseNegatedImmediate(displacement)
+                                         : UseImmediate(displacement);
+          mode = kMode_MRI;
+        }
+      }
+    } else {
+      DCHECK_NOT_NULL(index);
+      inputs[(*input_count)++] = UseRegister(index);
+      if (displacement != nullptr) {
+        inputs[(*input_count)++] = displacement_mode == kNegativeDisplacement
+                                       ? UseNegatedImmediate(displacement)
+                                       : UseImmediate(displacement);
+        mode = kMode_MRI;
+      } else {
+        mode = kMode_MR;
+      }
+    }
+    return mode;
+  }
+
+  AddressingMode GetEffectiveAddressMemoryOperand(
+      Node* operand, InstructionOperand inputs[], size_t* input_count,
+      OperandModes immediate_mode = OperandMode::kInt20Imm) {
+#if V8_TARGET_ARCH_S390X
+    BaseWithIndexAndDisplacement64Matcher m(operand,
+                                            AddressOption::kAllowInputSwap);
+#else
+    BaseWithIndexAndDisplacement32Matcher m(operand,
+                                            AddressOption::kAllowInputSwap);
+#endif
+    DCHECK(m.matches());
+    if ((m.displacement() == nullptr ||
+         CanBeImmediate(m.displacement(), immediate_mode))) {
+      DCHECK_EQ(0, m.scale());
+      return GenerateMemoryOperandInputs(m.index(), m.base(), m.displacement(),
+                                         m.displacement_mode(), inputs,
+                                         input_count);
+    } else {
+      inputs[(*input_count)++] = UseRegister(operand->InputAt(0));
+      inputs[(*input_count)++] = UseRegister(operand->InputAt(1));
+      return kMode_MRR;
+    }
+  }
+
+  bool CanBeBetterLeftOperand(Node* node) const {
+    return !selector()->IsLive(node);
+  }
+
+  MachineRepresentation GetRepresentation(Node* node) {
+    return sequence()->GetRepresentation(selector()->GetVirtualRegister(node));
+  }
+
+  bool Is64BitOperand(Node* node) {
+    return MachineRepresentation::kWord64 == GetRepresentation(node);
+  }
+};
+
+namespace {
+
+bool S390OpcodeOnlySupport12BitDisp(ArchOpcode opcode) {
+  switch (opcode) {
+    case kS390_AddFloat:
+    case kS390_AddDouble:
+    case kS390_CmpFloat:
+    case kS390_CmpDouble:
+    case kS390_Float32ToDouble:
+      return true;
+    default:
+      return false;
+  }
+}
+
+bool S390OpcodeOnlySupport12BitDisp(InstructionCode op) {
+  ArchOpcode opcode = ArchOpcodeField::decode(op);
+  return S390OpcodeOnlySupport12BitDisp(opcode);
+}
+
+#define OpcodeImmMode(op)                                       \
+  (S390OpcodeOnlySupport12BitDisp(op) ? OperandMode::kUint12Imm \
+                                      : OperandMode::kInt20Imm)
+
+ArchOpcode SelectLoadOpcode(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  ArchOpcode opcode;
+  switch (load_rep.representation()) {
+    case MachineRepresentation::kFloat32:
+      opcode = kS390_LoadFloat32;
+      break;
+    case MachineRepresentation::kFloat64:
+      opcode = kS390_LoadDouble;
+      break;
+    case MachineRepresentation::kBit:  // Fall through.
+    case MachineRepresentation::kWord8:
+      opcode = load_rep.IsSigned() ? kS390_LoadWordS8 : kS390_LoadWordU8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = load_rep.IsSigned() ? kS390_LoadWordS16 : kS390_LoadWordU16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kS390_LoadWordU32;
+      break;
+    case MachineRepresentation::kCompressedPointer:  // Fall through.
+    case MachineRepresentation::kCompressed:
+#ifdef V8_COMPRESS_POINTERS
+      opcode = kS390_LoadWordS32;
+      break;
+#else
+      UNREACHABLE();
+#endif
+#ifdef V8_COMPRESS_POINTERS
+    case MachineRepresentation::kTaggedSigned:
+      opcode = kS390_LoadDecompressTaggedSigned;
+      break;
+    case MachineRepresentation::kTaggedPointer:
+      opcode = kS390_LoadDecompressTaggedPointer;
+      break;
+    case MachineRepresentation::kTagged:
+      opcode = kS390_LoadDecompressAnyTagged;
+      break;
+#else
+    case MachineRepresentation::kTaggedSigned:   // Fall through.
+    case MachineRepresentation::kTaggedPointer:  // Fall through.
+    case MachineRepresentation::kTagged:         // Fall through.
+#endif
+    case MachineRepresentation::kWord64:
+      opcode = kS390_LoadWord64;
+      break;
+    case MachineRepresentation::kSimd128:
+      opcode = kS390_LoadSimd128;
+      break;
+    case MachineRepresentation::kNone:
+    default:
+      UNREACHABLE();
+  }
+  return opcode;
+}
+
+#define RESULT_IS_WORD32_LIST(V)   \
+  /* Float unary op*/              \
+  V(BitcastFloat32ToInt32)         \
+  /* V(TruncateFloat64ToWord32) */ \
+  V(RoundFloat64ToInt32)           \
+  V(TruncateFloat32ToInt32)        \
+  V(TruncateFloat32ToUint32)       \
+  V(TruncateFloat64ToUint32)       \
+  V(ChangeFloat64ToInt32)          \
+  V(ChangeFloat64ToUint32)         \
+  /* Word32 unary op */            \
+  V(Word32Clz)                     \
+  V(Word32Popcnt)                  \
+  V(Float64ExtractLowWord32)       \
+  V(Float64ExtractHighWord32)      \
+  V(SignExtendWord8ToInt32)        \
+  V(SignExtendWord16ToInt32)       \
+  /* Word32 bin op */              \
+  V(Int32Add)                      \
+  V(Int32Sub)                      \
+  V(Int32Mul)                      \
+  V(Int32AddWithOverflow)          \
+  V(Int32SubWithOverflow)          \
+  V(Int32MulWithOverflow)          \
+  V(Int32MulHigh)                  \
+  V(Uint32MulHigh)                 \
+  V(Int32Div)                      \
+  V(Uint32Div)                     \
+  V(Int32Mod)                      \
+  V(Uint32Mod)                     \
+  V(Word32Ror)                     \
+  V(Word32And)                     \
+  V(Word32Or)                      \
+  V(Word32Xor)                     \
+  V(Word32Shl)                     \
+  V(Word32Shr)                     \
+  V(Word32Sar)
+
+bool ProduceWord32Result(Node* node) {
+#if !V8_TARGET_ARCH_S390X
+  return true;
+#else
+  switch (node->opcode()) {
+#define VISITOR(name) case IrOpcode::k##name:
+    RESULT_IS_WORD32_LIST(VISITOR)
+#undef VISITOR
+    return true;
+    // TODO(john.yan): consider the following case to be valid
+    // case IrOpcode::kWord32Equal:
+    // case IrOpcode::kInt32LessThan:
+    // case IrOpcode::kInt32LessThanOrEqual:
+    // case IrOpcode::kUint32LessThan:
+    // case IrOpcode::kUint32LessThanOrEqual:
+    // case IrOpcode::kUint32MulHigh:
+    //   // These 32-bit operations implicitly zero-extend to 64-bit on x64, so
+    //   the
+    //   // zero-extension is a no-op.
+    //   return true;
+    // case IrOpcode::kProjection: {
+    //   Node* const value = node->InputAt(0);
+    //   switch (value->opcode()) {
+    //     case IrOpcode::kInt32AddWithOverflow:
+    //     case IrOpcode::kInt32SubWithOverflow:
+    //     case IrOpcode::kInt32MulWithOverflow:
+    //       return true;
+    //     default:
+    //       return false;
+    //   }
+    // }
+    case IrOpcode::kLoad: {
+      LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+      switch (load_rep.representation()) {
+        case MachineRepresentation::kWord32:
+          return true;
+        case MachineRepresentation::kWord8:
+          if (load_rep.IsSigned())
+            return false;
+          else
+            return true;
+        default:
+          return false;
+      }
+    }
+    default:
+      return false;
+  }
+#endif
+}
+
+static inline bool DoZeroExtForResult(Node* node) {
+#if V8_TARGET_ARCH_S390X
+  return ProduceWord32Result(node);
+#else
+  return false;
+#endif
+}
+
+// TODO(john.yan): Create VisiteShift to match dst = src shift (R+I)
+#if 0
+void VisitShift() { }
+#endif
+
+#if V8_TARGET_ARCH_S390X
+void VisitTryTruncateDouble(InstructionSelector* selector, ArchOpcode opcode,
+                            Node* node) {
+  S390OperandGenerator g(selector);
+  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  Node* success_output = NodeProperties::FindProjection(node, 1);
+  if (success_output) {
+    outputs[output_count++] = g.DefineAsRegister(success_output);
+  }
+
+  selector->Emit(opcode, output_count, outputs, 1, inputs);
+}
+#endif
+
+template <class CanCombineWithLoad>
+void GenerateRightOperands(InstructionSelector* selector, Node* node,
+                           Node* right, InstructionCode* opcode,
+                           OperandModes* operand_mode,
+                           InstructionOperand* inputs, size_t* input_count,
+                           CanCombineWithLoad canCombineWithLoad) {
+  S390OperandGenerator g(selector);
+
+  if ((*operand_mode & OperandMode::kAllowImmediate) &&
+      g.CanBeImmediate(right, *operand_mode)) {
+    inputs[(*input_count)++] = g.UseImmediate(right);
+    // Can only be RI or RRI
+    *operand_mode &= OperandMode::kAllowImmediate;
+  } else if (*operand_mode & OperandMode::kAllowMemoryOperand) {
+    NodeMatcher mright(right);
+    if (mright.IsLoad() && selector->CanCover(node, right) &&
+        canCombineWithLoad(SelectLoadOpcode(right))) {
+      AddressingMode mode = g.GetEffectiveAddressMemoryOperand(
+          right, inputs, input_count, OpcodeImmMode(*opcode));
+      *opcode |= AddressingModeField::encode(mode);
+      *operand_mode &= ~OperandMode::kAllowImmediate;
+      if (*operand_mode & OperandMode::kAllowRM)
+        *operand_mode &= ~OperandMode::kAllowDistinctOps;
+    } else if (*operand_mode & OperandMode::kAllowRM) {
+      DCHECK(!(*operand_mode & OperandMode::kAllowRRM));
+      inputs[(*input_count)++] = g.UseAnyExceptImmediate(right);
+      // Can not be Immediate
+      *operand_mode &=
+          ~OperandMode::kAllowImmediate & ~OperandMode::kAllowDistinctOps;
+    } else if (*operand_mode & OperandMode::kAllowRRM) {
+      DCHECK(!(*operand_mode & OperandMode::kAllowRM));
+      inputs[(*input_count)++] = g.UseAnyExceptImmediate(right);
+      // Can not be Immediate
+      *operand_mode &= ~OperandMode::kAllowImmediate;
+    } else {
+      UNREACHABLE();
+    }
+  } else {
+    inputs[(*input_count)++] = g.UseRegister(right);
+    // Can only be RR or RRR
+    *operand_mode &= OperandMode::kAllowRRR;
+  }
+}
+
+template <class CanCombineWithLoad>
+void GenerateBinOpOperands(InstructionSelector* selector, Node* node,
+                           Node* left, Node* right, InstructionCode* opcode,
+                           OperandModes* operand_mode,
+                           InstructionOperand* inputs, size_t* input_count,
+                           CanCombineWithLoad canCombineWithLoad) {
+  S390OperandGenerator g(selector);
+  // left is always register
+  InstructionOperand const left_input = g.UseRegister(left);
+  inputs[(*input_count)++] = left_input;
+
+  if (left == right) {
+    inputs[(*input_count)++] = left_input;
+    // Can only be RR or RRR
+    *operand_mode &= OperandMode::kAllowRRR;
+  } else {
+    GenerateRightOperands(selector, node, right, opcode, operand_mode, inputs,
+                          input_count, canCombineWithLoad);
+  }
+}
+
+template <class CanCombineWithLoad>
+void VisitUnaryOp(InstructionSelector* selector, Node* node,
+                  InstructionCode opcode, OperandModes operand_mode,
+                  FlagsContinuation* cont,
+                  CanCombineWithLoad canCombineWithLoad);
+
+template <class CanCombineWithLoad>
+void VisitBinOp(InstructionSelector* selector, Node* node,
+                InstructionCode opcode, OperandModes operand_mode,
+                FlagsContinuation* cont, CanCombineWithLoad canCombineWithLoad);
+
+// Generate The following variations:
+//   VisitWord32UnaryOp, VisitWord32BinOp,
+//   VisitWord64UnaryOp, VisitWord64BinOp,
+//   VisitFloat32UnaryOp, VisitFloat32BinOp,
+//   VisitFloat64UnaryOp, VisitFloat64BinOp
+#define VISIT_OP_LIST_32(V)                                            \
+  V(Word32, Unary, [](ArchOpcode opcode) {                             \
+    return opcode == kS390_LoadWordS32 || opcode == kS390_LoadWordU32; \
+  })                                                                   \
+  V(Word64, Unary,                                                     \
+    [](ArchOpcode opcode) { return opcode == kS390_LoadWord64; })      \
+  V(Float32, Unary,                                                    \
+    [](ArchOpcode opcode) { return opcode == kS390_LoadFloat32; })     \
+  V(Float64, Unary,                                                    \
+    [](ArchOpcode opcode) { return opcode == kS390_LoadDouble; })      \
+  V(Word32, Bin, [](ArchOpcode opcode) {                               \
+    return opcode == kS390_LoadWordS32 || opcode == kS390_LoadWordU32; \
+  })                                                                   \
+  V(Float32, Bin,                                                      \
+    [](ArchOpcode opcode) { return opcode == kS390_LoadFloat32; })     \
+  V(Float64, Bin, [](ArchOpcode opcode) { return opcode == kS390_LoadDouble; })
+
+#if V8_TARGET_ARCH_S390X
+#define VISIT_OP_LIST(V) \
+  VISIT_OP_LIST_32(V)    \
+  V(Word64, Bin, [](ArchOpcode opcode) { return opcode == kS390_LoadWord64; })
+#else
+#define VISIT_OP_LIST VISIT_OP_LIST_32
+#endif
+
+#define DECLARE_VISIT_HELPER_FUNCTIONS(type1, type2, canCombineWithLoad)  \
+  static inline void Visit##type1##type2##Op(                             \
+      InstructionSelector* selector, Node* node, InstructionCode opcode,  \
+      OperandModes operand_mode, FlagsContinuation* cont) {               \
+    Visit##type2##Op(selector, node, opcode, operand_mode, cont,          \
+                     canCombineWithLoad);                                 \
+  }                                                                       \
+  static inline void Visit##type1##type2##Op(                             \
+      InstructionSelector* selector, Node* node, InstructionCode opcode,  \
+      OperandModes operand_mode) {                                        \
+    FlagsContinuation cont;                                               \
+    Visit##type1##type2##Op(selector, node, opcode, operand_mode, &cont); \
+  }
+VISIT_OP_LIST(DECLARE_VISIT_HELPER_FUNCTIONS)
+#undef DECLARE_VISIT_HELPER_FUNCTIONS
+#undef VISIT_OP_LIST_32
+#undef VISIT_OP_LIST
+
+template <class CanCombineWithLoad>
+void VisitUnaryOp(InstructionSelector* selector, Node* node,
+                  InstructionCode opcode, OperandModes operand_mode,
+                  FlagsContinuation* cont,
+                  CanCombineWithLoad canCombineWithLoad) {
+  S390OperandGenerator g(selector);
+  InstructionOperand inputs[8];
+  size_t input_count = 0;
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  Node* input = node->InputAt(0);
+
+  GenerateRightOperands(selector, node, input, &opcode, &operand_mode, inputs,
+                        &input_count, canCombineWithLoad);
+
+  bool input_is_word32 = ProduceWord32Result(input);
+
+  bool doZeroExt = DoZeroExtForResult(node);
+  bool canEliminateZeroExt = input_is_word32;
+
+  if (doZeroExt) {
+    // Add zero-ext indication
+    inputs[input_count++] = g.TempImmediate(!canEliminateZeroExt);
+  }
+
+  if (!cont->IsDeoptimize()) {
+    // If we can deoptimize as a result of the binop, we need to make sure
+    // that the deopt inputs are not overwritten by the binop result. One way
+    // to achieve that is to declare the output register as same-as-first.
+    if (doZeroExt && canEliminateZeroExt) {
+      // we have to make sure result and left use the same register
+      outputs[output_count++] = g.DefineSameAsFirst(node);
+    } else {
+      outputs[output_count++] = g.DefineAsRegister(node);
+    }
+  } else {
+    outputs[output_count++] = g.DefineSameAsFirst(node);
+  }
+
+  DCHECK_NE(0u, input_count);
+  DCHECK_NE(0u, output_count);
+  DCHECK_GE(arraysize(inputs), input_count);
+  DCHECK_GE(arraysize(outputs), output_count);
+
+  selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
+                                 inputs, cont);
+}
+
+template <class CanCombineWithLoad>
+void VisitBinOp(InstructionSelector* selector, Node* node,
+                InstructionCode opcode, OperandModes operand_mode,
+                FlagsContinuation* cont,
+                CanCombineWithLoad canCombineWithLoad) {
+  S390OperandGenerator g(selector);
+  Int32BinopMatcher m(node);
+  Node* left = m.left().node();
+  Node* right = m.right().node();
+  InstructionOperand inputs[8];
+  size_t input_count = 0;
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+
+  if (node->op()->HasProperty(Operator::kCommutative) &&
+      !g.CanBeImmediate(right, operand_mode) &&
+      (g.CanBeBetterLeftOperand(right))) {
+    std::swap(left, right);
+  }
+
+  GenerateBinOpOperands(selector, node, left, right, &opcode, &operand_mode,
+                        inputs, &input_count, canCombineWithLoad);
+
+  bool left_is_word32 = ProduceWord32Result(left);
+
+  bool doZeroExt = DoZeroExtForResult(node);
+  bool canEliminateZeroExt = left_is_word32;
+
+  if (doZeroExt) {
+    // Add zero-ext indication
+    inputs[input_count++] = g.TempImmediate(!canEliminateZeroExt);
+  }
+
+  if ((operand_mode & OperandMode::kAllowDistinctOps) &&
+      // If we can deoptimize as a result of the binop, we need to make sure
+      // that the deopt inputs are not overwritten by the binop result. One way
+      // to achieve that is to declare the output register as same-as-first.
+      !cont->IsDeoptimize()) {
+    if (doZeroExt && canEliminateZeroExt) {
+      // we have to make sure result and left use the same register
+      outputs[output_count++] = g.DefineSameAsFirst(node);
+    } else {
+      outputs[output_count++] = g.DefineAsRegister(node);
+    }
+  } else {
+    outputs[output_count++] = g.DefineSameAsFirst(node);
+  }
+
+  DCHECK_NE(0u, input_count);
+  DCHECK_NE(0u, output_count);
+  DCHECK_GE(arraysize(inputs), input_count);
+  DCHECK_GE(arraysize(outputs), output_count);
+
+  selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
+                                 inputs, cont);
+}
+
+}  // namespace
+
+void InstructionSelector::VisitStackSlot(Node* node) {
+  StackSlotRepresentation rep = StackSlotRepresentationOf(node->op());
+  int slot = frame_->AllocateSpillSlot(rep.size());
+  OperandGenerator g(this);
+
+  Emit(kArchStackSlot, g.DefineAsRegister(node),
+       sequence()->AddImmediate(Constant(slot)), 0, nullptr);
+}
+
+void InstructionSelector::VisitAbortCSAAssert(Node* node) {
+  S390OperandGenerator g(this);
+  Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), r3));
+}
+
+void InstructionSelector::VisitLoad(Node* node) {
+  S390OperandGenerator g(this);
+  InstructionCode opcode = SelectLoadOpcode(node);
+  InstructionOperand outputs[] = {g.DefineAsRegister(node)};
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  AddressingMode mode =
+      g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
+  opcode |= AddressingModeField::encode(mode);
+  if (node->opcode() == IrOpcode::kPoisonedLoad) {
+    CHECK_NE(poisoning_level_, PoisoningMitigationLevel::kDontPoison);
+    opcode |= MiscField::encode(kMemoryAccessPoisoned);
+  }
+  Emit(opcode, 1, outputs, input_count, inputs);
+}
+
+void InstructionSelector::VisitPoisonedLoad(Node* node) { VisitLoad(node); }
+
+void InstructionSelector::VisitProtectedLoad(Node* node) {
+  // TODO(eholk)
+  UNIMPLEMENTED();
+}
+
+static void VisitGeneralStore(
+    InstructionSelector* selector, Node* node, MachineRepresentation rep,
+    WriteBarrierKind write_barrier_kind = kNoWriteBarrier) {
+  S390OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* offset = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  if (write_barrier_kind != kNoWriteBarrier &&
+      V8_LIKELY(!FLAG_disable_write_barriers)) {
+    DCHECK(CanBeTaggedOrCompressedPointer(rep));
+    AddressingMode addressing_mode;
+    InstructionOperand inputs[3];
+    size_t input_count = 0;
+    inputs[input_count++] = g.UseUniqueRegister(base);
+    // OutOfLineRecordWrite uses the offset in an 'AddP' instruction as well as
+    // for the store itself, so we must check compatibility with both.
+    if (g.CanBeImmediate(offset, OperandMode::kInt20Imm)) {
+      inputs[input_count++] = g.UseImmediate(offset);
+      addressing_mode = kMode_MRI;
+    } else {
+      inputs[input_count++] = g.UseUniqueRegister(offset);
+      addressing_mode = kMode_MRR;
+    }
+    inputs[input_count++] = g.UseUniqueRegister(value);
+    RecordWriteMode record_write_mode =
+        WriteBarrierKindToRecordWriteMode(write_barrier_kind);
+    InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
+    size_t const temp_count = arraysize(temps);
+    InstructionCode code = kArchStoreWithWriteBarrier;
+    code |= AddressingModeField::encode(addressing_mode);
+    code |= MiscField::encode(static_cast<int>(record_write_mode));
+    selector->Emit(code, 0, nullptr, input_count, inputs, temp_count, temps);
+  } else {
+    ArchOpcode opcode;
+    NodeMatcher m(value);
+    switch (rep) {
+      case MachineRepresentation::kFloat32:
+        opcode = kS390_StoreFloat32;
+        break;
+      case MachineRepresentation::kFloat64:
+        opcode = kS390_StoreDouble;
+        break;
+      case MachineRepresentation::kBit:  // Fall through.
+      case MachineRepresentation::kWord8:
+        opcode = kS390_StoreWord8;
+        break;
+      case MachineRepresentation::kWord16:
+        opcode = kS390_StoreWord16;
+        break;
+      case MachineRepresentation::kWord32:
+        opcode = kS390_StoreWord32;
+        if (m.IsWord32ReverseBytes()) {
+          opcode = kS390_StoreReverse32;
+          value = value->InputAt(0);
+        }
+        break;
+      case MachineRepresentation::kCompressedPointer:  // Fall through.
+      case MachineRepresentation::kCompressed:
+#ifdef V8_COMPRESS_POINTERS
+        opcode = kS390_StoreCompressTagged;
+        break;
+#else
+        UNREACHABLE();
+#endif
+      case MachineRepresentation::kTaggedSigned:   // Fall through.
+      case MachineRepresentation::kTaggedPointer:  // Fall through.
+      case MachineRepresentation::kTagged:
+        opcode = kS390_StoreCompressTagged;
+        break;
+      case MachineRepresentation::kWord64:
+        opcode = kS390_StoreWord64;
+        if (m.IsWord64ReverseBytes()) {
+          opcode = kS390_StoreReverse64;
+          value = value->InputAt(0);
+        }
+        break;
+      case MachineRepresentation::kSimd128:
+        opcode = kS390_StoreSimd128;
+        if (m.IsSimd128ReverseBytes()) {
+          opcode = kS390_StoreReverseSimd128;
+          value = value->InputAt(0);
+        }
+        break;
+      case MachineRepresentation::kNone:
+        UNREACHABLE();
+    }
+    InstructionOperand inputs[4];
+    size_t input_count = 0;
+    AddressingMode addressing_mode =
+        g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
+    InstructionCode code =
+        opcode | AddressingModeField::encode(addressing_mode);
+    InstructionOperand value_operand = g.UseRegister(value);
+    inputs[input_count++] = value_operand;
+    selector->Emit(code, 0, static_cast<InstructionOperand*>(nullptr),
+                   input_count, inputs);
+  }
+}
+
+void InstructionSelector::VisitStore(Node* node) {
+  StoreRepresentation store_rep = StoreRepresentationOf(node->op());
+  WriteBarrierKind write_barrier_kind = store_rep.write_barrier_kind();
+  MachineRepresentation rep = store_rep.representation();
+
+  if (FLAG_enable_unconditional_write_barriers &&
+      CanBeTaggedOrCompressedPointer(rep)) {
+    write_barrier_kind = kFullWriteBarrier;
+  }
+
+  VisitGeneralStore(this, node, rep, write_barrier_kind);
+}
+
+void InstructionSelector::VisitProtectedStore(Node* node) {
+  // TODO(eholk)
+  UNIMPLEMENTED();
+}
+
+// Architecture supports unaligned access, therefore VisitLoad is used instead
+void InstructionSelector::VisitUnalignedLoad(Node* node) { UNREACHABLE(); }
+
+// Architecture supports unaligned access, therefore VisitStore is used instead
+void InstructionSelector::VisitUnalignedStore(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitStackPointerGreaterThan(
+    Node* node, FlagsContinuation* cont) {
+  StackCheckKind kind = StackCheckKindOf(node->op());
+  InstructionCode opcode =
+      kArchStackPointerGreaterThan | MiscField::encode(static_cast<int>(kind));
+
+  S390OperandGenerator g(this);
+
+  // No outputs.
+  InstructionOperand* const outputs = nullptr;
+  const int output_count = 0;
+
+  // Applying an offset to this stack check requires a temp register. Offsets
+  // are only applied to the first stack check. If applying an offset, we must
+  // ensure the input and temp registers do not alias, thus kUniqueRegister.
+  InstructionOperand temps[] = {g.TempRegister()};
+  const int temp_count = (kind == StackCheckKind::kJSFunctionEntry) ? 1 : 0;
+  const auto register_mode = (kind == StackCheckKind::kJSFunctionEntry)
+                                 ? OperandGenerator::kUniqueRegister
+                                 : OperandGenerator::kRegister;
+
+  Node* const value = node->InputAt(0);
+  InstructionOperand inputs[] = {g.UseRegisterWithMode(value, register_mode)};
+  static constexpr int input_count = arraysize(inputs);
+
+  EmitWithContinuation(opcode, output_count, outputs, input_count, inputs,
+                       temp_count, temps, cont);
+}
+
+#if 0
+static inline bool IsContiguousMask32(uint32_t value, int* mb, int* me) {
+  int mask_width = base::bits::CountPopulation(value);
+  int mask_msb = base::bits::CountLeadingZeros32(value);
+  int mask_lsb = base::bits::CountTrailingZeros32(value);
+  if ((mask_width == 0) || (mask_msb + mask_width + mask_lsb != 32))
+    return false;
+  *mb = mask_lsb + mask_width - 1;
+  *me = mask_lsb;
+  return true;
+}
+#endif
+
+#if V8_TARGET_ARCH_S390X
+static inline bool IsContiguousMask64(uint64_t value, int* mb, int* me) {
+  int mask_width = base::bits::CountPopulation(value);
+  int mask_msb = base::bits::CountLeadingZeros64(value);
+  int mask_lsb = base::bits::CountTrailingZeros64(value);
+  if ((mask_width == 0) || (mask_msb + mask_width + mask_lsb != 64))
+    return false;
+  *mb = mask_lsb + mask_width - 1;
+  *me = mask_lsb;
+  return true;
+}
+#endif
+
+#if V8_TARGET_ARCH_S390X
+void InstructionSelector::VisitWord64And(Node* node) {
+  S390OperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  int mb = 0;
+  int me = 0;
+  if (m.right().HasResolvedValue() &&
+      IsContiguousMask64(m.right().ResolvedValue(), &mb, &me)) {
+    int sh = 0;
+    Node* left = m.left().node();
+    if ((m.left().IsWord64Shr() || m.left().IsWord64Shl()) &&
+        CanCover(node, left)) {
+      Int64BinopMatcher mleft(m.left().node());
+      if (mleft.right().IsInRange(0, 63)) {
+        left = mleft.left().node();
+        sh = mleft.right().ResolvedValue();
+        if (m.left().IsWord64Shr()) {
+          // Adjust the mask such that it doesn't include any rotated bits.
+          if (mb > 63 - sh) mb = 63 - sh;
+          sh = (64 - sh) & 0x3F;
+        } else {
+          // Adjust the mask such that it doesn't include any rotated bits.
+          if (me < sh) me = sh;
+        }
+      }
+    }
+    if (mb >= me) {
+      bool match = false;
+      ArchOpcode opcode;
+      int mask;
+      if (me == 0) {
+        match = true;
+        opcode = kS390_RotLeftAndClearLeft64;
+        mask = mb;
+      } else if (mb == 63) {
+        match = true;
+        opcode = kS390_RotLeftAndClearRight64;
+        mask = me;
+      } else if (sh && me <= sh && m.left().IsWord64Shl()) {
+        match = true;
+        opcode = kS390_RotLeftAndClear64;
+        mask = mb;
+      }
+      if (match && CpuFeatures::IsSupported(GENERAL_INSTR_EXT)) {
+        Emit(opcode, g.DefineAsRegister(node), g.UseRegister(left),
+             g.TempImmediate(sh), g.TempImmediate(mask));
+        return;
+      }
+    }
+  }
+  VisitWord64BinOp(this, node, kS390_And64, And64OperandMode);
+}
+
+void InstructionSelector::VisitWord64Shl(Node* node) {
+  S390OperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  // TODO(mbrandy): eliminate left sign extension if right >= 32
+  if (m.left().IsWord64And() && m.right().IsInRange(0, 63)) {
+    Int64BinopMatcher mleft(m.left().node());
+    int sh = m.right().ResolvedValue();
+    int mb;
+    int me;
+    if (mleft.right().HasResolvedValue() &&
+        IsContiguousMask64(mleft.right().ResolvedValue() << sh, &mb, &me)) {
+      // Adjust the mask such that it doesn't include any rotated bits.
+      if (me < sh) me = sh;
+      if (mb >= me) {
+        bool match = false;
+        ArchOpcode opcode;
+        int mask;
+        if (me == 0) {
+          match = true;
+          opcode = kS390_RotLeftAndClearLeft64;
+          mask = mb;
+        } else if (mb == 63) {
+          match = true;
+          opcode = kS390_RotLeftAndClearRight64;
+          mask = me;
+        } else if (sh && me <= sh) {
+          match = true;
+          opcode = kS390_RotLeftAndClear64;
+          mask = mb;
+        }
+        if (match && CpuFeatures::IsSupported(GENERAL_INSTR_EXT)) {
+          Emit(opcode, g.DefineAsRegister(node),
+               g.UseRegister(mleft.left().node()), g.TempImmediate(sh),
+               g.TempImmediate(mask));
+          return;
+        }
+      }
+    }
+  }
+  VisitWord64BinOp(this, node, kS390_ShiftLeft64, Shift64OperandMode);
+}
+
+void InstructionSelector::VisitWord64Shr(Node* node) {
+  S390OperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  if (m.left().IsWord64And() && m.right().IsInRange(0, 63)) {
+    Int64BinopMatcher mleft(m.left().node());
+    int sh = m.right().ResolvedValue();
+    int mb;
+    int me;
+    if (mleft.right().HasResolvedValue() &&
+        IsContiguousMask64((uint64_t)(mleft.right().ResolvedValue()) >> sh, &mb,
+                           &me)) {
+      // Adjust the mask such that it doesn't include any rotated bits.
+      if (mb > 63 - sh) mb = 63 - sh;
+      sh = (64 - sh) & 0x3F;
+      if (mb >= me) {
+        bool match = false;
+        ArchOpcode opcode;
+        int mask;
+        if (me == 0) {
+          match = true;
+          opcode = kS390_RotLeftAndClearLeft64;
+          mask = mb;
+        } else if (mb == 63) {
+          match = true;
+          opcode = kS390_RotLeftAndClearRight64;
+          mask = me;
+        }
+        if (match) {
+          Emit(opcode, g.DefineAsRegister(node),
+               g.UseRegister(mleft.left().node()), g.TempImmediate(sh),
+               g.TempImmediate(mask));
+          return;
+        }
+      }
+    }
+  }
+  VisitWord64BinOp(this, node, kS390_ShiftRight64, Shift64OperandMode);
+}
+#endif
+
+static inline bool TryMatchSignExtInt16OrInt8FromWord32Sar(
+    InstructionSelector* selector, Node* node) {
+  S390OperandGenerator g(selector);
+  Int32BinopMatcher m(node);
+  if (selector->CanCover(node, m.left().node()) && m.left().IsWord32Shl()) {
+    Int32BinopMatcher mleft(m.left().node());
+    if (mleft.right().Is(16) && m.right().Is(16)) {
+      bool canEliminateZeroExt = ProduceWord32Result(mleft.left().node());
+      selector->Emit(kS390_SignExtendWord16ToInt32,
+                     canEliminateZeroExt ? g.DefineSameAsFirst(node)
+                                         : g.DefineAsRegister(node),
+                     g.UseRegister(mleft.left().node()),
+                     g.TempImmediate(!canEliminateZeroExt));
+      return true;
+    } else if (mleft.right().Is(24) && m.right().Is(24)) {
+      bool canEliminateZeroExt = ProduceWord32Result(mleft.left().node());
+      selector->Emit(kS390_SignExtendWord8ToInt32,
+                     canEliminateZeroExt ? g.DefineSameAsFirst(node)
+                                         : g.DefineAsRegister(node),
+                     g.UseRegister(mleft.left().node()),
+                     g.TempImmediate(!canEliminateZeroExt));
+      return true;
+    }
+  }
+  return false;
+}
+
+#if !V8_TARGET_ARCH_S390X
+void VisitPairBinop(InstructionSelector* selector, InstructionCode opcode,
+                    InstructionCode opcode2, Node* node) {
+  S390OperandGenerator g(selector);
+
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  if (projection1) {
+    // We use UseUniqueRegister here to avoid register sharing with the output
+    // registers.
+    InstructionOperand inputs[] = {
+        g.UseRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(1)),
+        g.UseRegister(node->InputAt(2)), g.UseUniqueRegister(node->InputAt(3))};
+
+    InstructionOperand outputs[] = {
+        g.DefineAsRegister(node),
+        g.DefineAsRegister(NodeProperties::FindProjection(node, 1))};
+
+    selector->Emit(opcode, 2, outputs, 4, inputs);
+  } else {
+    // The high word of the result is not used, so we emit the standard 32 bit
+    // instruction.
+    selector->Emit(opcode2, g.DefineSameAsFirst(node),
+                   g.UseRegister(node->InputAt(0)),
+                   g.UseRegister(node->InputAt(2)), g.TempImmediate(0));
+  }
+}
+
+void InstructionSelector::VisitInt32PairAdd(Node* node) {
+  VisitPairBinop(this, kS390_AddPair, kS390_Add32, node);
+}
+
+void InstructionSelector::VisitInt32PairSub(Node* node) {
+  VisitPairBinop(this, kS390_SubPair, kS390_Sub32, node);
+}
+
+void InstructionSelector::VisitInt32PairMul(Node* node) {
+  S390OperandGenerator g(this);
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+  if (projection1) {
+    InstructionOperand inputs[] = {g.UseUniqueRegister(node->InputAt(0)),
+                                   g.UseUniqueRegister(node->InputAt(1)),
+                                   g.UseUniqueRegister(node->InputAt(2)),
+                                   g.UseUniqueRegister(node->InputAt(3))};
+
+    InstructionOperand outputs[] = {
+        g.DefineAsRegister(node),
+        g.DefineAsRegister(NodeProperties::FindProjection(node, 1))};
+
+    Emit(kS390_MulPair, 2, outputs, 4, inputs);
+  } else {
+    // The high word of the result is not used, so we emit the standard 32 bit
+    // instruction.
+    Emit(kS390_Mul32, g.DefineSameAsFirst(node),
+         g.UseRegister(node->InputAt(0)), g.Use(node->InputAt(2)),
+         g.TempImmediate(0));
+  }
+}
+
+namespace {
+// Shared routine for multiple shift operations.
+void VisitPairShift(InstructionSelector* selector, InstructionCode opcode,
+                    Node* node) {
+  S390OperandGenerator g(selector);
+  // We use g.UseUniqueRegister here to guarantee that there is
+  // no register aliasing of input registers with output registers.
+  Int32Matcher m(node->InputAt(2));
+  InstructionOperand shift_operand;
+  if (m.HasResolvedValue()) {
+    shift_operand = g.UseImmediate(m.node());
+  } else {
+    shift_operand = g.UseUniqueRegister(m.node());
+  }
+
+  InstructionOperand inputs[] = {g.UseUniqueRegister(node->InputAt(0)),
+                                 g.UseUniqueRegister(node->InputAt(1)),
+                                 shift_operand};
+
+  Node* projection1 = NodeProperties::FindProjection(node, 1);
+
+  InstructionOperand outputs[2];
+  InstructionOperand temps[1];
+  int32_t output_count = 0;
+  int32_t temp_count = 0;
+
+  outputs[output_count++] = g.DefineAsRegister(node);
+  if (projection1) {
+    outputs[output_count++] = g.DefineAsRegister(projection1);
+  } else {
+    temps[temp_count++] = g.TempRegister();
+  }
+
+  selector->Emit(opcode, output_count, outputs, 3, inputs, temp_count, temps);
+}
+}  // namespace
+
+void InstructionSelector::VisitWord32PairShl(Node* node) {
+  VisitPairShift(this, kS390_ShiftLeftPair, node);
+}
+
+void InstructionSelector::VisitWord32PairShr(Node* node) {
+  VisitPairShift(this, kS390_ShiftRightPair, node);
+}
+
+void InstructionSelector::VisitWord32PairSar(Node* node) {
+  VisitPairShift(this, kS390_ShiftRightArithPair, node);
+}
+#endif
+
+void InstructionSelector::VisitWord32Rol(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord64Rol(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord32Ctz(Node* node) { UNREACHABLE(); }
+
+#if V8_TARGET_ARCH_S390X
+void InstructionSelector::VisitWord64Ctz(Node* node) { UNREACHABLE(); }
+#endif
+
+void InstructionSelector::VisitWord32ReverseBits(Node* node) { UNREACHABLE(); }
+
+#if V8_TARGET_ARCH_S390X
+void InstructionSelector::VisitWord64ReverseBits(Node* node) { UNREACHABLE(); }
+#endif
+
+void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
+  VisitWord32UnaryOp(this, node, kS390_Abs32, OperandMode::kNone);
+}
+
+void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
+  VisitWord64UnaryOp(this, node, kS390_Abs64, OperandMode::kNone);
+}
+
+void InstructionSelector::VisitWord64ReverseBytes(Node* node) {
+  S390OperandGenerator g(this);
+  Emit(kS390_LoadReverse64RR, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
+  S390OperandGenerator g(this);
+  NodeMatcher input(node->InputAt(0));
+  if (CanCover(node, input.node()) && input.IsLoad()) {
+    LoadRepresentation load_rep = LoadRepresentationOf(input.node()->op());
+    if (load_rep.representation() == MachineRepresentation::kWord32) {
+      Node* base = input.node()->InputAt(0);
+      Node* offset = input.node()->InputAt(1);
+      Emit(kS390_LoadReverse32 | AddressingModeField::encode(kMode_MRR),
+           // TODO(john.yan): one of the base and offset can be imm.
+           g.DefineAsRegister(node), g.UseRegister(base),
+           g.UseRegister(offset));
+      return;
+    }
+  }
+  Emit(kS390_LoadReverse32RR, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
+  S390OperandGenerator g(this);
+  NodeMatcher input(node->InputAt(0));
+  if (CanCover(node, input.node()) && input.IsLoad()) {
+    LoadRepresentation load_rep = LoadRepresentationOf(input.node()->op());
+    if (load_rep.representation() == MachineRepresentation::kSimd128) {
+      Node* base = input.node()->InputAt(0);
+      Node* offset = input.node()->InputAt(1);
+      Emit(kS390_LoadReverseSimd128 | AddressingModeField::encode(kMode_MRR),
+           // TODO(miladfar): one of the base and offset can be imm.
+           g.DefineAsRegister(node), g.UseRegister(base),
+           g.UseRegister(offset));
+      return;
+    }
+  }
+  Emit(kS390_LoadReverseSimd128RR, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)));
+}
+
+template <class Matcher, ArchOpcode neg_opcode>
+static inline bool TryMatchNegFromSub(InstructionSelector* selector,
+                                      Node* node) {
+  S390OperandGenerator g(selector);
+  Matcher m(node);
+  static_assert(neg_opcode == kS390_Neg32 || neg_opcode == kS390_Neg64,
+                "Provided opcode is not a Neg opcode.");
+  if (m.left().Is(0)) {
+    Node* value = m.right().node();
+    bool doZeroExt = DoZeroExtForResult(node);
+    bool canEliminateZeroExt = ProduceWord32Result(value);
+    if (doZeroExt) {
+      selector->Emit(neg_opcode,
+                     canEliminateZeroExt ? g.DefineSameAsFirst(node)
+                                         : g.DefineAsRegister(node),
+                     g.UseRegister(value),
+                     g.TempImmediate(!canEliminateZeroExt));
+    } else {
+      selector->Emit(neg_opcode, g.DefineAsRegister(node),
+                     g.UseRegister(value));
+    }
+    return true;
+  }
+  return false;
+}
+
+template <class Matcher, ArchOpcode shift_op>
+bool TryMatchShiftFromMul(InstructionSelector* selector, Node* node) {
+  S390OperandGenerator g(selector);
+  Matcher m(node);
+  Node* left = m.left().node();
+  Node* right = m.right().node();
+  if (g.CanBeImmediate(right, OperandMode::kInt32Imm) &&
+      base::bits::IsPowerOfTwo(g.GetImmediate(right))) {
+    int power = 63 - base::bits::CountLeadingZeros64(g.GetImmediate(right));
+    bool doZeroExt = DoZeroExtForResult(node);
+    bool canEliminateZeroExt = ProduceWord32Result(left);
+    InstructionOperand dst = (doZeroExt && !canEliminateZeroExt &&
+                              CpuFeatures::IsSupported(DISTINCT_OPS))
+                                 ? g.DefineAsRegister(node)
+                                 : g.DefineSameAsFirst(node);
+
+    if (doZeroExt) {
+      selector->Emit(shift_op, dst, g.UseRegister(left), g.UseImmediate(power),
+                     g.TempImmediate(!canEliminateZeroExt));
+    } else {
+      selector->Emit(shift_op, dst, g.UseRegister(left), g.UseImmediate(power));
+    }
+    return true;
+  }
+  return false;
+}
+
+template <ArchOpcode opcode>
+static inline bool TryMatchInt32OpWithOverflow(InstructionSelector* selector,
+                                               Node* node, OperandModes mode) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    VisitWord32BinOp(selector, node, opcode, mode, &cont);
+    return true;
+  }
+  return false;
+}
+
+static inline bool TryMatchInt32AddWithOverflow(InstructionSelector* selector,
+                                                Node* node) {
+  return TryMatchInt32OpWithOverflow<kS390_Add32>(selector, node,
+                                                  AddOperandMode);
+}
+
+static inline bool TryMatchInt32SubWithOverflow(InstructionSelector* selector,
+                                                Node* node) {
+  return TryMatchInt32OpWithOverflow<kS390_Sub32>(selector, node,
+                                                  SubOperandMode);
+}
+
+static inline bool TryMatchInt32MulWithOverflow(InstructionSelector* selector,
+                                                Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    if (CpuFeatures::IsSupported(MISC_INSTR_EXT2)) {
+      TryMatchInt32OpWithOverflow<kS390_Mul32>(
+          selector, node, OperandMode::kAllowRRR | OperandMode::kAllowRM);
+    } else {
+      FlagsContinuation cont = FlagsContinuation::ForSet(kNotEqual, ovf);
+      VisitWord32BinOp(selector, node, kS390_Mul32WithOverflow,
+                       OperandMode::kInt32Imm | OperandMode::kAllowDistinctOps,
+                       &cont);
+    }
+    return true;
+  }
+  return TryMatchShiftFromMul<Int32BinopMatcher, kS390_ShiftLeft32>(selector,
+                                                                    node);
+}
+
+#if V8_TARGET_ARCH_S390X
+template <ArchOpcode opcode>
+static inline bool TryMatchInt64OpWithOverflow(InstructionSelector* selector,
+                                               Node* node, OperandModes mode) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    VisitWord64BinOp(selector, node, opcode, mode, &cont);
+    return true;
+  }
+  return false;
+}
+
+static inline bool TryMatchInt64AddWithOverflow(InstructionSelector* selector,
+                                                Node* node) {
+  return TryMatchInt64OpWithOverflow<kS390_Add64>(selector, node,
+                                                  AddOperandMode);
+}
+
+static inline bool TryMatchInt64SubWithOverflow(InstructionSelector* selector,
+                                                Node* node) {
+  return TryMatchInt64OpWithOverflow<kS390_Sub64>(selector, node,
+                                                  SubOperandMode);
+}
+#endif
+
+static inline bool TryMatchDoubleConstructFromInsert(
+    InstructionSelector* selector, Node* node) {
+  S390OperandGenerator g(selector);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  Node* lo32 = nullptr;
+  Node* hi32 = nullptr;
+
+  if (node->opcode() == IrOpcode::kFloat64InsertLowWord32) {
+    lo32 = right;
+  } else if (node->opcode() == IrOpcode::kFloat64InsertHighWord32) {
+    hi32 = right;
+  } else {
+    return false;  // doesn't match
+  }
+
+  if (left->opcode() == IrOpcode::kFloat64InsertLowWord32) {
+    lo32 = left->InputAt(1);
+  } else if (left->opcode() == IrOpcode::kFloat64InsertHighWord32) {
+    hi32 = left->InputAt(1);
+  } else {
+    return false;  // doesn't match
+  }
+
+  if (!lo32 || !hi32) return false;  // doesn't match
+
+  selector->Emit(kS390_DoubleConstruct, g.DefineAsRegister(node),
+                 g.UseRegister(hi32), g.UseRegister(lo32));
+  return true;
+}
+
+#define null ([]() { return false; })
+// TODO(john.yan): place kAllowRM where available
+#define FLOAT_UNARY_OP_LIST_32(V)                                              \
+  V(Float32, ChangeFloat32ToFloat64, kS390_Float32ToDouble,                    \
+    OperandMode::kAllowRM, null)                                               \
+  V(Float32, BitcastFloat32ToInt32, kS390_BitcastFloat32ToInt32,               \
+    OperandMode::kAllowRM, null)                                               \
+  V(Float64, TruncateFloat64ToFloat32, kS390_DoubleToFloat32,                  \
+    OperandMode::kNone, null)                                                  \
+  V(Float64, TruncateFloat64ToWord32, kArchTruncateDoubleToI,                  \
+    OperandMode::kNone, null)                                                  \
+  V(Float64, RoundFloat64ToInt32, kS390_DoubleToInt32, OperandMode::kNone,     \
+    null)                                                                      \
+  V(Float64, TruncateFloat64ToUint32, kS390_DoubleToUint32,                    \
+    OperandMode::kNone, null)                                                  \
+  V(Float64, ChangeFloat64ToInt32, kS390_DoubleToInt32, OperandMode::kNone,    \
+    null)                                                                      \
+  V(Float64, ChangeFloat64ToUint32, kS390_DoubleToUint32, OperandMode::kNone,  \
+    null)                                                                      \
+  V(Float64, Float64SilenceNaN, kS390_Float64SilenceNaN, OperandMode::kNone,   \
+    null)                                                                      \
+  V(Float32, Float32Abs, kS390_AbsFloat, OperandMode::kNone, null)             \
+  V(Float64, Float64Abs, kS390_AbsDouble, OperandMode::kNone, null)            \
+  V(Float32, Float32Sqrt, kS390_SqrtFloat, OperandMode::kNone, null)           \
+  V(Float64, Float64Sqrt, kS390_SqrtDouble, OperandMode::kNone, null)          \
+  V(Float32, Float32RoundDown, kS390_FloorFloat, OperandMode::kNone, null)     \
+  V(Float64, Float64RoundDown, kS390_FloorDouble, OperandMode::kNone, null)    \
+  V(Float32, Float32RoundUp, kS390_CeilFloat, OperandMode::kNone, null)        \
+  V(Float64, Float64RoundUp, kS390_CeilDouble, OperandMode::kNone, null)       \
+  V(Float32, Float32RoundTruncate, kS390_TruncateFloat, OperandMode::kNone,    \
+    null)                                                                      \
+  V(Float64, Float64RoundTruncate, kS390_TruncateDouble, OperandMode::kNone,   \
+    null)                                                                      \
+  V(Float64, Float64RoundTiesAway, kS390_RoundDouble, OperandMode::kNone,      \
+    null)                                                                      \
+  V(Float32, Float32Neg, kS390_NegFloat, OperandMode::kNone, null)             \
+  V(Float64, Float64Neg, kS390_NegDouble, OperandMode::kNone, null)            \
+  /* TODO(john.yan): can use kAllowRM */                                       \
+  V(Word32, Float64ExtractLowWord32, kS390_DoubleExtractLowWord32,             \
+    OperandMode::kNone, null)                                                  \
+  V(Word32, Float64ExtractHighWord32, kS390_DoubleExtractHighWord32,           \
+    OperandMode::kNone, null)
+
+#define FLOAT_BIN_OP_LIST(V)                                           \
+  V(Float32, Float32Add, kS390_AddFloat, OperandMode::kAllowRM, null)  \
+  V(Float64, Float64Add, kS390_AddDouble, OperandMode::kAllowRM, null) \
+  V(Float32, Float32Sub, kS390_SubFloat, OperandMode::kAllowRM, null)  \
+  V(Float64, Float64Sub, kS390_SubDouble, OperandMode::kAllowRM, null) \
+  V(Float32, Float32Mul, kS390_MulFloat, OperandMode::kAllowRM, null)  \
+  V(Float64, Float64Mul, kS390_MulDouble, OperandMode::kAllowRM, null) \
+  V(Float32, Float32Div, kS390_DivFloat, OperandMode::kAllowRM, null)  \
+  V(Float64, Float64Div, kS390_DivDouble, OperandMode::kAllowRM, null) \
+  V(Float32, Float32Max, kS390_MaxFloat, OperandMode::kNone, null)     \
+  V(Float64, Float64Max, kS390_MaxDouble, OperandMode::kNone, null)    \
+  V(Float32, Float32Min, kS390_MinFloat, OperandMode::kNone, null)     \
+  V(Float64, Float64Min, kS390_MinDouble, OperandMode::kNone, null)
+
+#define WORD32_UNARY_OP_LIST_32(V)                                           \
+  V(Word32, Word32Clz, kS390_Cntlz32, OperandMode::kNone, null)              \
+  V(Word32, Word32Popcnt, kS390_Popcnt32, OperandMode::kNone, null)          \
+  V(Word32, RoundInt32ToFloat32, kS390_Int32ToFloat32, OperandMode::kNone,   \
+    null)                                                                    \
+  V(Word32, RoundUint32ToFloat32, kS390_Uint32ToFloat32, OperandMode::kNone, \
+    null)                                                                    \
+  V(Word32, ChangeInt32ToFloat64, kS390_Int32ToDouble, OperandMode::kNone,   \
+    null)                                                                    \
+  V(Word32, ChangeUint32ToFloat64, kS390_Uint32ToDouble, OperandMode::kNone, \
+    null)                                                                    \
+  V(Word32, SignExtendWord8ToInt32, kS390_SignExtendWord8ToInt32,            \
+    OperandMode::kNone, null)                                                \
+  V(Word32, SignExtendWord16ToInt32, kS390_SignExtendWord16ToInt32,          \
+    OperandMode::kNone, null)                                                \
+  V(Word32, BitcastInt32ToFloat32, kS390_BitcastInt32ToFloat32,              \
+    OperandMode::kNone, null)
+
+#ifdef V8_TARGET_ARCH_S390X
+#define FLOAT_UNARY_OP_LIST(V)                                                \
+  FLOAT_UNARY_OP_LIST_32(V)                                                   \
+  V(Float64, ChangeFloat64ToUint64, kS390_DoubleToUint64, OperandMode::kNone, \
+    null)                                                                     \
+  V(Float64, ChangeFloat64ToInt64, kS390_DoubleToInt64, OperandMode::kNone,   \
+    null)                                                                     \
+  V(Float64, TruncateFloat64ToInt64, kS390_DoubleToInt64, OperandMode::kNone, \
+    null)                                                                     \
+  V(Float64, BitcastFloat64ToInt64, kS390_BitcastDoubleToInt64,               \
+    OperandMode::kNone, null)
+
+#define WORD32_UNARY_OP_LIST(V)                                             \
+  WORD32_UNARY_OP_LIST_32(V)                                                \
+  V(Word32, ChangeInt32ToInt64, kS390_SignExtendWord32ToInt64,              \
+    OperandMode::kNone, null)                                               \
+  V(Word32, SignExtendWord8ToInt64, kS390_SignExtendWord8ToInt64,           \
+    OperandMode::kNone, null)                                               \
+  V(Word32, SignExtendWord16ToInt64, kS390_SignExtendWord16ToInt64,         \
+    OperandMode::kNone, null)                                               \
+  V(Word32, SignExtendWord32ToInt64, kS390_SignExtendWord32ToInt64,         \
+    OperandMode::kNone, null)                                               \
+  V(Word32, ChangeUint32ToUint64, kS390_Uint32ToUint64, OperandMode::kNone, \
+    [&]() -> bool {                                                         \
+      if (ProduceWord32Result(node->InputAt(0))) {                          \
+        EmitIdentity(node);                                                 \
+        return true;                                                        \
+      }                                                                     \
+      return false;                                                         \
+    })
+
+#else
+#define FLOAT_UNARY_OP_LIST(V) FLOAT_UNARY_OP_LIST_32(V)
+#define WORD32_UNARY_OP_LIST(V) WORD32_UNARY_OP_LIST_32(V)
+#endif
+
+#define WORD32_BIN_OP_LIST(V)                                                  \
+  V(Word32, Int32Add, kS390_Add32, AddOperandMode, null)                       \
+  V(Word32, Int32Sub, kS390_Sub32, SubOperandMode, ([&]() {                    \
+      return TryMatchNegFromSub<Int32BinopMatcher, kS390_Neg32>(this, node);   \
+    }))                                                                        \
+  V(Word32, Int32Mul, kS390_Mul32, MulOperandMode, ([&]() {                    \
+      return TryMatchShiftFromMul<Int32BinopMatcher, kS390_ShiftLeft32>(this,  \
+                                                                        node); \
+    }))                                                                        \
+  V(Word32, Int32AddWithOverflow, kS390_Add32, AddOperandMode,                 \
+    ([&]() { return TryMatchInt32AddWithOverflow(this, node); }))              \
+  V(Word32, Int32SubWithOverflow, kS390_Sub32, SubOperandMode,                 \
+    ([&]() { return TryMatchInt32SubWithOverflow(this, node); }))              \
+  V(Word32, Int32MulWithOverflow, kS390_Mul32, MulOperandMode,                 \
+    ([&]() { return TryMatchInt32MulWithOverflow(this, node); }))              \
+  V(Word32, Int32MulHigh, kS390_MulHigh32,                                     \
+    OperandMode::kInt32Imm | OperandMode::kAllowDistinctOps, null)             \
+  V(Word32, Uint32MulHigh, kS390_MulHighU32,                                   \
+    OperandMode::kAllowRRM | OperandMode::kAllowRRR, null)                     \
+  V(Word32, Int32Div, kS390_Div32,                                             \
+    OperandMode::kAllowRRM | OperandMode::kAllowRRR, null)                     \
+  V(Word32, Uint32Div, kS390_DivU32,                                           \
+    OperandMode::kAllowRRM | OperandMode::kAllowRRR, null)                     \
+  V(Word32, Int32Mod, kS390_Mod32,                                             \
+    OperandMode::kAllowRRM | OperandMode::kAllowRRR, null)                     \
+  V(Word32, Uint32Mod, kS390_ModU32,                                           \
+    OperandMode::kAllowRRM | OperandMode::kAllowRRR, null)                     \
+  V(Word32, Word32Ror, kS390_RotRight32,                                       \
+    OperandMode::kAllowRI | OperandMode::kAllowRRR | OperandMode::kAllowRRI |  \
+        OperandMode::kShift32Imm,                                              \
+    null)                                                                      \
+  V(Word32, Word32And, kS390_And32, And32OperandMode, null)                    \
+  V(Word32, Word32Or, kS390_Or32, Or32OperandMode, null)                       \
+  V(Word32, Word32Xor, kS390_Xor32, Xor32OperandMode, null)                    \
+  V(Word32, Word32Shl, kS390_ShiftLeft32, Shift32OperandMode, null)            \
+  V(Word32, Word32Shr, kS390_ShiftRight32, Shift32OperandMode, null)           \
+  V(Word32, Word32Sar, kS390_ShiftRightArith32, Shift32OperandMode,            \
+    [&]() { return TryMatchSignExtInt16OrInt8FromWord32Sar(this, node); })     \
+  V(Word32, Float64InsertLowWord32, kS390_DoubleInsertLowWord32,               \
+    OperandMode::kAllowRRR,                                                    \
+    [&]() -> bool { return TryMatchDoubleConstructFromInsert(this, node); })   \
+  V(Word32, Float64InsertHighWord32, kS390_DoubleInsertHighWord32,             \
+    OperandMode::kAllowRRR,                                                    \
+    [&]() -> bool { return TryMatchDoubleConstructFromInsert(this, node); })
+
+#define WORD64_UNARY_OP_LIST(V)                                              \
+  V(Word64, Word64Popcnt, kS390_Popcnt64, OperandMode::kNone, null)          \
+  V(Word64, Word64Clz, kS390_Cntlz64, OperandMode::kNone, null)              \
+  V(Word64, TruncateInt64ToInt32, kS390_Int64ToInt32, OperandMode::kNone,    \
+    null)                                                                    \
+  V(Word64, RoundInt64ToFloat32, kS390_Int64ToFloat32, OperandMode::kNone,   \
+    null)                                                                    \
+  V(Word64, RoundInt64ToFloat64, kS390_Int64ToDouble, OperandMode::kNone,    \
+    null)                                                                    \
+  V(Word64, ChangeInt64ToFloat64, kS390_Int64ToDouble, OperandMode::kNone,   \
+    null)                                                                    \
+  V(Word64, RoundUint64ToFloat32, kS390_Uint64ToFloat32, OperandMode::kNone, \
+    null)                                                                    \
+  V(Word64, RoundUint64ToFloat64, kS390_Uint64ToDouble, OperandMode::kNone,  \
+    null)                                                                    \
+  V(Word64, BitcastInt64ToFloat64, kS390_BitcastInt64ToDouble,               \
+    OperandMode::kNone, null)
+
+#define WORD64_BIN_OP_LIST(V)                                                  \
+  V(Word64, Int64Add, kS390_Add64, AddOperandMode, null)                       \
+  V(Word64, Int64Sub, kS390_Sub64, SubOperandMode, ([&]() {                    \
+      return TryMatchNegFromSub<Int64BinopMatcher, kS390_Neg64>(this, node);   \
+    }))                                                                        \
+  V(Word64, Int64AddWithOverflow, kS390_Add64, AddOperandMode,                 \
+    ([&]() { return TryMatchInt64AddWithOverflow(this, node); }))              \
+  V(Word64, Int64SubWithOverflow, kS390_Sub64, SubOperandMode,                 \
+    ([&]() { return TryMatchInt64SubWithOverflow(this, node); }))              \
+  V(Word64, Int64Mul, kS390_Mul64, MulOperandMode, ([&]() {                    \
+      return TryMatchShiftFromMul<Int64BinopMatcher, kS390_ShiftLeft64>(this,  \
+                                                                        node); \
+    }))                                                                        \
+  V(Word64, Int64Div, kS390_Div64,                                             \
+    OperandMode::kAllowRRM | OperandMode::kAllowRRR, null)                     \
+  V(Word64, Uint64Div, kS390_DivU64,                                           \
+    OperandMode::kAllowRRM | OperandMode::kAllowRRR, null)                     \
+  V(Word64, Int64Mod, kS390_Mod64,                                             \
+    OperandMode::kAllowRRM | OperandMode::kAllowRRR, null)                     \
+  V(Word64, Uint64Mod, kS390_ModU64,                                           \
+    OperandMode::kAllowRRM | OperandMode::kAllowRRR, null)                     \
+  V(Word64, Word64Sar, kS390_ShiftRightArith64, Shift64OperandMode, null)      \
+  V(Word64, Word64Ror, kS390_RotRight64, Shift64OperandMode, null)             \
+  V(Word64, Word64Or, kS390_Or64, Or64OperandMode, null)                       \
+  V(Word64, Word64Xor, kS390_Xor64, Xor64OperandMode, null)
+
+#define DECLARE_UNARY_OP(type, name, op, mode, try_extra) \
+  void InstructionSelector::Visit##name(Node* node) {     \
+    if (std::function<bool()>(try_extra)()) return;       \
+    Visit##type##UnaryOp(this, node, op, mode);           \
+  }
+
+#define DECLARE_BIN_OP(type, name, op, mode, try_extra) \
+  void InstructionSelector::Visit##name(Node* node) {   \
+    if (std::function<bool()>(try_extra)()) return;     \
+    Visit##type##BinOp(this, node, op, mode);           \
+  }
+
+WORD32_BIN_OP_LIST(DECLARE_BIN_OP)
+WORD32_UNARY_OP_LIST(DECLARE_UNARY_OP)
+FLOAT_UNARY_OP_LIST(DECLARE_UNARY_OP)
+FLOAT_BIN_OP_LIST(DECLARE_BIN_OP)
+
+#if V8_TARGET_ARCH_S390X
+WORD64_UNARY_OP_LIST(DECLARE_UNARY_OP)
+WORD64_BIN_OP_LIST(DECLARE_BIN_OP)
+#endif
+
+#undef DECLARE_BIN_OP
+#undef DECLARE_UNARY_OP
+#undef WORD64_BIN_OP_LIST
+#undef WORD64_UNARY_OP_LIST
+#undef WORD32_BIN_OP_LIST
+#undef WORD32_UNARY_OP_LIST
+#undef FLOAT_UNARY_OP_LIST
+#undef WORD32_UNARY_OP_LIST_32
+#undef FLOAT_BIN_OP_LIST
+#undef FLOAT_BIN_OP_LIST_32
+#undef null
+
+#if V8_TARGET_ARCH_S390X
+void InstructionSelector::VisitTryTruncateFloat32ToInt64(Node* node) {
+  VisitTryTruncateDouble(this, kS390_Float32ToInt64, node);
+}
+
+void InstructionSelector::VisitTryTruncateFloat64ToInt64(Node* node) {
+  VisitTryTruncateDouble(this, kS390_DoubleToInt64, node);
+}
+
+void InstructionSelector::VisitTryTruncateFloat32ToUint64(Node* node) {
+  VisitTryTruncateDouble(this, kS390_Float32ToUint64, node);
+}
+
+void InstructionSelector::VisitTryTruncateFloat64ToUint64(Node* node) {
+  VisitTryTruncateDouble(this, kS390_DoubleToUint64, node);
+}
+
+#endif
+
+void InstructionSelector::VisitBitcastWord32ToWord64(Node* node) {
+  DCHECK(SmiValuesAre31Bits());
+  DCHECK(COMPRESS_POINTERS_BOOL);
+  EmitIdentity(node);
+}
+
+void InstructionSelector::VisitFloat64Mod(Node* node) {
+  S390OperandGenerator g(this);
+  Emit(kS390_ModDouble, g.DefineAsFixed(node, d1),
+       g.UseFixed(node->InputAt(0), d1), g.UseFixed(node->InputAt(1), d2))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::VisitFloat64Ieee754Unop(Node* node,
+                                                  InstructionCode opcode) {
+  S390OperandGenerator g(this);
+  Emit(opcode, g.DefineAsFixed(node, d1), g.UseFixed(node->InputAt(0), d1))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::VisitFloat64Ieee754Binop(Node* node,
+                                                   InstructionCode opcode) {
+  S390OperandGenerator g(this);
+  Emit(opcode, g.DefineAsFixed(node, d1), g.UseFixed(node->InputAt(0), d1),
+       g.UseFixed(node->InputAt(1), d2))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::VisitFloat32RoundTiesEven(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitFloat64RoundTiesEven(Node* node) {
+  UNREACHABLE();
+}
+
+static bool CompareLogical(FlagsContinuation* cont) {
+  switch (cont->condition()) {
+    case kUnsignedLessThan:
+    case kUnsignedGreaterThanOrEqual:
+    case kUnsignedLessThanOrEqual:
+    case kUnsignedGreaterThan:
+      return true;
+    default:
+      return false;
+  }
+  UNREACHABLE();
+}
+
+namespace {
+
+// Shared routine for multiple compare operations.
+void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
+                  InstructionOperand left, InstructionOperand right,
+                  FlagsContinuation* cont) {
+  selector->EmitWithContinuation(opcode, left, right, cont);
+}
+
+void VisitLoadAndTest(InstructionSelector* selector, InstructionCode opcode,
+                      Node* node, Node* value, FlagsContinuation* cont,
+                      bool discard_output = false);
+
+// Shared routine for multiple word compare operations.
+void VisitWordCompare(InstructionSelector* selector, Node* node,
+                      InstructionCode opcode, FlagsContinuation* cont,
+                      OperandModes immediate_mode) {
+  S390OperandGenerator g(selector);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+
+  DCHECK(IrOpcode::IsComparisonOpcode(node->opcode()) ||
+         node->opcode() == IrOpcode::kInt32Sub ||
+         node->opcode() == IrOpcode::kInt64Sub);
+
+  InstructionOperand inputs[8];
+  InstructionOperand outputs[1];
+  size_t input_count = 0;
+  size_t output_count = 0;
+
+  // If one of the two inputs is an immediate, make sure it's on the right, or
+  // if one of the two inputs is a memory operand, make sure it's on the left.
+  int effect_level = selector->GetEffectLevel(node, cont);
+
+  if ((!g.CanBeImmediate(right, immediate_mode) &&
+       g.CanBeImmediate(left, immediate_mode)) ||
+      (!g.CanBeMemoryOperand(opcode, node, right, effect_level) &&
+       g.CanBeMemoryOperand(opcode, node, left, effect_level))) {
+    if (!node->op()->HasProperty(Operator::kCommutative)) cont->Commute();
+    std::swap(left, right);
+  }
+
+  // check if compare with 0
+  if (g.CanBeImmediate(right, immediate_mode) && g.GetImmediate(right) == 0) {
+    DCHECK(opcode == kS390_Cmp32 || opcode == kS390_Cmp64);
+    ArchOpcode load_and_test = (opcode == kS390_Cmp32)
+                                   ? kS390_LoadAndTestWord32
+                                   : kS390_LoadAndTestWord64;
+    return VisitLoadAndTest(selector, load_and_test, node, left, cont, true);
+  }
+
+  inputs[input_count++] = g.UseRegister(left);
+  if (g.CanBeMemoryOperand(opcode, node, right, effect_level)) {
+    // generate memory operand
+    AddressingMode addressing_mode = g.GetEffectiveAddressMemoryOperand(
+        right, inputs, &input_count, OpcodeImmMode(opcode));
+    opcode |= AddressingModeField::encode(addressing_mode);
+  } else if (g.CanBeImmediate(right, immediate_mode)) {
+    inputs[input_count++] = g.UseImmediate(right);
+  } else {
+    inputs[input_count++] = g.UseAnyExceptImmediate(right);
+  }
+
+  DCHECK(input_count <= 8 && output_count <= 1);
+  selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
+                                 inputs, cont);
+}
+
+void VisitWord32Compare(InstructionSelector* selector, Node* node,
+                        FlagsContinuation* cont) {
+  OperandModes mode =
+      (CompareLogical(cont) ? OperandMode::kUint32Imm : OperandMode::kInt32Imm);
+  VisitWordCompare(selector, node, kS390_Cmp32, cont, mode);
+}
+
+#if V8_TARGET_ARCH_S390X
+void VisitWord64Compare(InstructionSelector* selector, Node* node,
+                        FlagsContinuation* cont) {
+  OperandModes mode =
+      (CompareLogical(cont) ? OperandMode::kUint32Imm : OperandMode::kInt32Imm);
+  VisitWordCompare(selector, node, kS390_Cmp64, cont, mode);
+}
+#endif
+
+// Shared routine for multiple float32 compare operations.
+void VisitFloat32Compare(InstructionSelector* selector, Node* node,
+                         FlagsContinuation* cont) {
+  VisitWordCompare(selector, node, kS390_CmpFloat, cont, OperandMode::kNone);
+}
+
+// Shared routine for multiple float64 compare operations.
+void VisitFloat64Compare(InstructionSelector* selector, Node* node,
+                         FlagsContinuation* cont) {
+  VisitWordCompare(selector, node, kS390_CmpDouble, cont, OperandMode::kNone);
+}
+
+void VisitTestUnderMask(InstructionSelector* selector, Node* node,
+                        FlagsContinuation* cont) {
+  DCHECK(node->opcode() == IrOpcode::kWord32And ||
+         node->opcode() == IrOpcode::kWord64And);
+  ArchOpcode opcode =
+      (node->opcode() == IrOpcode::kWord32And) ? kS390_Tst32 : kS390_Tst64;
+  S390OperandGenerator g(selector);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  if (!g.CanBeImmediate(right, OperandMode::kUint32Imm) &&
+      g.CanBeImmediate(left, OperandMode::kUint32Imm)) {
+    std::swap(left, right);
+  }
+  VisitCompare(selector, opcode, g.UseRegister(left),
+               g.UseOperand(right, OperandMode::kUint32Imm), cont);
+}
+
+void VisitLoadAndTest(InstructionSelector* selector, InstructionCode opcode,
+                      Node* node, Node* value, FlagsContinuation* cont,
+                      bool discard_output) {
+  static_assert(kS390_LoadAndTestFloat64 - kS390_LoadAndTestWord32 == 3,
+                "LoadAndTest Opcode shouldn't contain other opcodes.");
+
+  // TODO(john.yan): Add support for Float32/Float64.
+  DCHECK(opcode >= kS390_LoadAndTestWord32 ||
+         opcode <= kS390_LoadAndTestWord64);
+
+  S390OperandGenerator g(selector);
+  InstructionOperand inputs[8];
+  InstructionOperand outputs[2];
+  size_t input_count = 0;
+  size_t output_count = 0;
+  bool use_value = false;
+
+  int effect_level = selector->GetEffectLevel(node, cont);
+
+  if (g.CanBeMemoryOperand(opcode, node, value, effect_level)) {
+    // generate memory operand
+    AddressingMode addressing_mode =
+        g.GetEffectiveAddressMemoryOperand(value, inputs, &input_count);
+    opcode |= AddressingModeField::encode(addressing_mode);
+  } else {
+    inputs[input_count++] = g.UseAnyExceptImmediate(value);
+    use_value = true;
+  }
+
+  if (!discard_output && !use_value) {
+    outputs[output_count++] = g.DefineAsRegister(value);
+  }
+
+  DCHECK(input_count <= 8 && output_count <= 2);
+  selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
+                                 inputs, cont);
+}
+
+}  // namespace
+
+// Shared routine for word comparisons against zero.
+void InstructionSelector::VisitWordCompareZero(Node* user, Node* value,
+                                               FlagsContinuation* cont) {
+  // Try to combine with comparisons against 0 by simply inverting the branch.
+  while (value->opcode() == IrOpcode::kWord32Equal && CanCover(user, value)) {
+    Int32BinopMatcher m(value);
+    if (!m.right().Is(0)) break;
+
+    user = value;
+    value = m.left().node();
+    cont->Negate();
+  }
+
+  FlagsCondition fc = cont->condition();
+  if (CanCover(user, value)) {
+    switch (value->opcode()) {
+      case IrOpcode::kWord32Equal: {
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        Int32BinopMatcher m(value);
+        if (m.right().Is(0)) {
+          // Try to combine the branch with a comparison.
+          Node* const user = m.node();
+          Node* const value = m.left().node();
+          if (CanCover(user, value)) {
+            switch (value->opcode()) {
+              case IrOpcode::kInt32Sub:
+                return VisitWord32Compare(this, value, cont);
+              case IrOpcode::kWord32And:
+                return VisitTestUnderMask(this, value, cont);
+              default:
+                break;
+            }
+          }
+        }
+        return VisitWord32Compare(this, value, cont);
+      }
+      case IrOpcode::kInt32LessThan:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThan);
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kInt32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kUint32LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitWord32Compare(this, value, cont);
+      case IrOpcode::kUint32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitWord32Compare(this, value, cont);
+#if V8_TARGET_ARCH_S390X
+      case IrOpcode::kWord64Equal: {
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        Int64BinopMatcher m(value);
+        if (m.right().Is(0)) {
+          // Try to combine the branch with a comparison.
+          Node* const user = m.node();
+          Node* const value = m.left().node();
+          if (CanCover(user, value)) {
+            switch (value->opcode()) {
+              case IrOpcode::kInt64Sub:
+                return VisitWord64Compare(this, value, cont);
+              case IrOpcode::kWord64And:
+                return VisitTestUnderMask(this, value, cont);
+              default:
+                break;
+            }
+          }
+        }
+        return VisitWord64Compare(this, value, cont);
+      }
+      case IrOpcode::kInt64LessThan:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThan);
+        return VisitWord64Compare(this, value, cont);
+      case IrOpcode::kInt64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
+        return VisitWord64Compare(this, value, cont);
+      case IrOpcode::kUint64LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitWord64Compare(this, value, cont);
+      case IrOpcode::kUint64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitWord64Compare(this, value, cont);
+#endif
+      case IrOpcode::kFloat32Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat32LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat64Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kFloat64LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kFloat64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kProjection:
+        // Check if this is the overflow output projection of an
+        // <Operation>WithOverflow node.
+        if (ProjectionIndexOf(value->op()) == 1u) {
+          // We cannot combine the <Operation>WithOverflow with this branch
+          // unless the 0th projection (the use of the actual value of the
+          // <Operation> is either nullptr, which means there's no use of the
+          // actual value, or was already defined, which means it is scheduled
+          // *AFTER* this branch).
+          Node* const node = value->InputAt(0);
+          Node* const result = NodeProperties::FindProjection(node, 0);
+          if (result == nullptr || IsDefined(result)) {
+            switch (node->opcode()) {
+              case IrOpcode::kInt32AddWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitWord32BinOp(this, node, kS390_Add32, AddOperandMode,
+                                        cont);
+              case IrOpcode::kInt32SubWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitWord32BinOp(this, node, kS390_Sub32, SubOperandMode,
+                                        cont);
+              case IrOpcode::kInt32MulWithOverflow:
+                if (CpuFeatures::IsSupported(MISC_INSTR_EXT2)) {
+                  cont->OverwriteAndNegateIfEqual(kOverflow);
+                  return VisitWord32BinOp(
+                      this, node, kS390_Mul32,
+                      OperandMode::kAllowRRR | OperandMode::kAllowRM, cont);
+                } else {
+                  cont->OverwriteAndNegateIfEqual(kNotEqual);
+                  return VisitWord32BinOp(
+                      this, node, kS390_Mul32WithOverflow,
+                      OperandMode::kInt32Imm | OperandMode::kAllowDistinctOps,
+                      cont);
+                }
+              case IrOpcode::kInt32AbsWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitWord32UnaryOp(this, node, kS390_Abs32,
+                                          OperandMode::kNone, cont);
+#if V8_TARGET_ARCH_S390X
+              case IrOpcode::kInt64AbsWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitWord64UnaryOp(this, node, kS390_Abs64,
+                                          OperandMode::kNone, cont);
+              case IrOpcode::kInt64AddWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitWord64BinOp(this, node, kS390_Add64, AddOperandMode,
+                                        cont);
+              case IrOpcode::kInt64SubWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitWord64BinOp(this, node, kS390_Sub64, SubOperandMode,
+                                        cont);
+#endif
+              default:
+                break;
+            }
+          }
+        }
+        break;
+      case IrOpcode::kInt32Sub:
+        if (fc == kNotEqual || fc == kEqual)
+          return VisitWord32Compare(this, value, cont);
+        break;
+      case IrOpcode::kWord32And:
+        return VisitTestUnderMask(this, value, cont);
+      case IrOpcode::kLoad: {
+        LoadRepresentation load_rep = LoadRepresentationOf(value->op());
+        switch (load_rep.representation()) {
+          case MachineRepresentation::kWord32:
+            return VisitLoadAndTest(this, kS390_LoadAndTestWord32, user, value,
+                                    cont);
+          default:
+            break;
+        }
+        break;
+      }
+      case IrOpcode::kInt32Add:
+        // can't handle overflow case.
+        break;
+      case IrOpcode::kWord32Or:
+        if (fc == kNotEqual || fc == kEqual)
+          return VisitWord32BinOp(this, value, kS390_Or32, Or32OperandMode,
+                                  cont);
+        break;
+      case IrOpcode::kWord32Xor:
+        if (fc == kNotEqual || fc == kEqual)
+          return VisitWord32BinOp(this, value, kS390_Xor32, Xor32OperandMode,
+                                  cont);
+        break;
+      case IrOpcode::kWord32Sar:
+      case IrOpcode::kWord32Shl:
+      case IrOpcode::kWord32Shr:
+      case IrOpcode::kWord32Ror:
+        // doesn't generate cc, so ignore.
+        break;
+#if V8_TARGET_ARCH_S390X
+      case IrOpcode::kInt64Sub:
+        if (fc == kNotEqual || fc == kEqual)
+          return VisitWord64Compare(this, value, cont);
+        break;
+      case IrOpcode::kWord64And:
+        return VisitTestUnderMask(this, value, cont);
+      case IrOpcode::kInt64Add:
+        // can't handle overflow case.
+        break;
+      case IrOpcode::kWord64Or:
+        if (fc == kNotEqual || fc == kEqual)
+          return VisitWord64BinOp(this, value, kS390_Or64, Or64OperandMode,
+                                  cont);
+        break;
+      case IrOpcode::kWord64Xor:
+        if (fc == kNotEqual || fc == kEqual)
+          return VisitWord64BinOp(this, value, kS390_Xor64, Xor64OperandMode,
+                                  cont);
+        break;
+      case IrOpcode::kWord64Sar:
+      case IrOpcode::kWord64Shl:
+      case IrOpcode::kWord64Shr:
+      case IrOpcode::kWord64Ror:
+        // doesn't generate cc, so ignore
+        break;
+#endif
+      case IrOpcode::kStackPointerGreaterThan:
+        cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition);
+        return VisitStackPointerGreaterThan(value, cont);
+      default:
+        break;
+    }
+  }
+
+  // Branch could not be combined with a compare, emit LoadAndTest
+  VisitLoadAndTest(this, kS390_LoadAndTestWord32, user, value, cont, true);
+}
+
+void InstructionSelector::VisitSwitch(Node* node, const SwitchInfo& sw) {
+  S390OperandGenerator g(this);
+  InstructionOperand value_operand = g.UseRegister(node->InputAt(0));
+
+  // Emit either ArchTableSwitch or ArchBinarySearchSwitch.
+  if (enable_switch_jump_table_ == kEnableSwitchJumpTable) {
+    static const size_t kMaxTableSwitchValueRange = 2 << 16;
+    size_t table_space_cost = 4 + sw.value_range();
+    size_t table_time_cost = 3;
+    size_t lookup_space_cost = 3 + 2 * sw.case_count();
+    size_t lookup_time_cost = sw.case_count();
+    if (sw.case_count() > 0 &&
+        table_space_cost + 3 * table_time_cost <=
+            lookup_space_cost + 3 * lookup_time_cost &&
+        sw.min_value() > std::numeric_limits<int32_t>::min() &&
+        sw.value_range() <= kMaxTableSwitchValueRange) {
+      InstructionOperand index_operand = value_operand;
+      if (sw.min_value()) {
+        index_operand = g.TempRegister();
+        Emit(kS390_Lay | AddressingModeField::encode(kMode_MRI), index_operand,
+             value_operand, g.TempImmediate(-sw.min_value()));
+      }
+#if V8_TARGET_ARCH_S390X
+      InstructionOperand index_operand_zero_ext = g.TempRegister();
+      Emit(kS390_Uint32ToUint64, index_operand_zero_ext, index_operand);
+      index_operand = index_operand_zero_ext;
+#endif
+      // Generate a table lookup.
+      return EmitTableSwitch(sw, index_operand);
+    }
+  }
+
+  // Generate a tree of conditional jumps.
+  return EmitBinarySearchSwitch(sw, value_operand);
+}
+
+void InstructionSelector::VisitWord32Equal(Node* const node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  Int32BinopMatcher m(node);
+  if (m.right().Is(0)) {
+    return VisitLoadAndTest(this, kS390_LoadAndTestWord32, m.node(),
+                            m.left().node(), &cont, true);
+  }
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitWord32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitWord32Compare(this, node, &cont);
+}
+
+#if V8_TARGET_ARCH_S390X
+void InstructionSelector::VisitWord64Equal(Node* const node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  Int64BinopMatcher m(node);
+  if (m.right().Is(0)) {
+    return VisitLoadAndTest(this, kS390_LoadAndTestWord64, m.node(),
+                            m.left().node(), &cont, true);
+  }
+  VisitWord64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt64LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
+  VisitWord64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
+  VisitWord64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint64LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitWord64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitUint64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitWord64Compare(this, node, &cont);
+}
+#endif
+
+void InstructionSelector::VisitFloat32Equal(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64Equal(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+bool InstructionSelector::ZeroExtendsWord32ToWord64NoPhis(Node* node) {
+  UNIMPLEMENTED();
+}
+
+void InstructionSelector::EmitPrepareArguments(
+    ZoneVector<PushParameter>* arguments, const CallDescriptor* call_descriptor,
+    Node* node) {
+  S390OperandGenerator g(this);
+
+  // Prepare for C function call.
+  if (call_descriptor->IsCFunctionCall()) {
+    Emit(kArchPrepareCallCFunction | MiscField::encode(static_cast<int>(
+                                         call_descriptor->ParameterCount())),
+         0, nullptr, 0, nullptr);
+
+    // Poke any stack arguments.
+    int slot = kStackFrameExtraParamSlot;
+    for (PushParameter input : (*arguments)) {
+      if (input.node == nullptr) continue;
+      Emit(kS390_StoreToStackSlot, g.NoOutput(), g.UseRegister(input.node),
+           g.TempImmediate(slot));
+      ++slot;
+    }
+  } else {
+    // Push any stack arguments.
+    int num_slots = 0;
+    int slot = 0;
+
+#define INPUT_SWITCH(param)                            \
+  switch (input.location.GetType().representation()) { \
+    case MachineRepresentation::kSimd128:              \
+      param += kSimd128Size / kSystemPointerSize;      \
+      break;                                           \
+    case MachineRepresentation::kFloat64:              \
+      param += kDoubleSize / kSystemPointerSize;       \
+      break;                                           \
+    default:                                           \
+      param += 1;                                      \
+      break;                                           \
+  }
+    for (PushParameter input : *arguments) {
+      if (input.node == nullptr) continue;
+      INPUT_SWITCH(num_slots)
+    }
+    Emit(kS390_StackClaim, g.NoOutput(), g.TempImmediate(num_slots));
+    for (PushParameter input : *arguments) {
+      // Skip any alignment holes in pushed nodes.
+      if (input.node) {
+        Emit(kS390_StoreToStackSlot, g.NoOutput(), g.UseRegister(input.node),
+             g.TempImmediate(slot));
+        INPUT_SWITCH(slot)
+      }
+    }
+#undef INPUT_SWITCH
+    DCHECK(num_slots == slot);
+  }
+}
+
+void InstructionSelector::VisitMemoryBarrier(Node* node) {
+  S390OperandGenerator g(this);
+  Emit(kArchNop, g.NoOutput());
+}
+
+bool InstructionSelector::IsTailCallAddressImmediate() { return false; }
+
+int InstructionSelector::GetTempsCountForTailCallFromJSFunction() { return 3; }
+
+void InstructionSelector::VisitWord32AtomicLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  DCHECK(load_rep.representation() == MachineRepresentation::kWord8 ||
+         load_rep.representation() == MachineRepresentation::kWord16 ||
+         load_rep.representation() == MachineRepresentation::kWord32);
+  USE(load_rep);
+  VisitLoad(node);
+}
+
+void InstructionSelector::VisitWord32AtomicStore(Node* node) {
+  MachineRepresentation rep = AtomicStoreRepresentationOf(node->op());
+  VisitGeneralStore(this, node, rep);
+}
+
+void VisitAtomicExchange(InstructionSelector* selector, Node* node,
+                         ArchOpcode opcode) {
+  S390OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+
+  AddressingMode addressing_mode = kMode_MRR;
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  inputs[input_count++] = g.UseUniqueRegister(base);
+  inputs[input_count++] = g.UseUniqueRegister(index);
+  inputs[input_count++] = g.UseUniqueRegister(value);
+  InstructionOperand outputs[1];
+  outputs[0] = g.DefineAsRegister(node);
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  selector->Emit(code, 1, outputs, input_count, inputs);
+}
+
+void InstructionSelector::VisitWord32AtomicExchange(Node* node) {
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Int8()) {
+    opcode = kWord32AtomicExchangeInt8;
+  } else if (type == MachineType::Uint8()) {
+    opcode = kWord32AtomicExchangeUint8;
+  } else if (type == MachineType::Int16()) {
+    opcode = kWord32AtomicExchangeInt16;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kWord32AtomicExchangeUint16;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = kWord32AtomicExchangeWord32;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicExchange(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord64AtomicExchange(Node* node) {
+  ArchOpcode opcode;
+  MachineType type = AtomicOpType(node->op());
+  if (type == MachineType::Uint8()) {
+    opcode = kS390_Word64AtomicExchangeUint8;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kS390_Word64AtomicExchangeUint16;
+  } else if (type == MachineType::Uint32()) {
+    opcode = kS390_Word64AtomicExchangeUint32;
+  } else if (type == MachineType::Uint64()) {
+    opcode = kS390_Word64AtomicExchangeUint64;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicExchange(this, node, opcode);
+}
+
+void VisitAtomicCompareExchange(InstructionSelector* selector, Node* node,
+                                ArchOpcode opcode) {
+  S390OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* old_value = node->InputAt(2);
+  Node* new_value = node->InputAt(3);
+
+  InstructionOperand inputs[4];
+  size_t input_count = 0;
+  inputs[input_count++] = g.UseUniqueRegister(old_value);
+  inputs[input_count++] = g.UseUniqueRegister(new_value);
+  inputs[input_count++] = g.UseUniqueRegister(base);
+
+  AddressingMode addressing_mode;
+  if (g.CanBeImmediate(index, OperandMode::kInt20Imm)) {
+    inputs[input_count++] = g.UseImmediate(index);
+    addressing_mode = kMode_MRI;
+  } else {
+    inputs[input_count++] = g.UseUniqueRegister(index);
+    addressing_mode = kMode_MRR;
+  }
+
+  InstructionOperand outputs[1];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineSameAsFirst(node);
+
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  selector->Emit(code, output_count, outputs, input_count, inputs);
+}
+
+void InstructionSelector::VisitWord32AtomicCompareExchange(Node* node) {
+  MachineType type = AtomicOpType(node->op());
+  ArchOpcode opcode;
+  if (type == MachineType::Int8()) {
+    opcode = kWord32AtomicCompareExchangeInt8;
+  } else if (type == MachineType::Uint8()) {
+    opcode = kWord32AtomicCompareExchangeUint8;
+  } else if (type == MachineType::Int16()) {
+    opcode = kWord32AtomicCompareExchangeInt16;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kWord32AtomicCompareExchangeUint16;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = kWord32AtomicCompareExchangeWord32;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicCompareExchange(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
+  MachineType type = AtomicOpType(node->op());
+  ArchOpcode opcode;
+  if (type == MachineType::Uint8()) {
+    opcode = kS390_Word64AtomicCompareExchangeUint8;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kS390_Word64AtomicCompareExchangeUint16;
+  } else if (type == MachineType::Uint32()) {
+    opcode = kS390_Word64AtomicCompareExchangeUint32;
+  } else if (type == MachineType::Uint64()) {
+    opcode = kS390_Word64AtomicCompareExchangeUint64;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicCompareExchange(this, node, opcode);
+}
+
+void VisitAtomicBinop(InstructionSelector* selector, Node* node,
+                      ArchOpcode opcode) {
+  S390OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  inputs[input_count++] = g.UseUniqueRegister(base);
+
+  AddressingMode addressing_mode;
+  if (g.CanBeImmediate(index, OperandMode::kInt20Imm)) {
+    inputs[input_count++] = g.UseImmediate(index);
+    addressing_mode = kMode_MRI;
+  } else {
+    inputs[input_count++] = g.UseUniqueRegister(index);
+    addressing_mode = kMode_MRR;
+  }
+
+  inputs[input_count++] = g.UseUniqueRegister(value);
+
+  InstructionOperand outputs[1];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  InstructionOperand temps[1];
+  size_t temp_count = 0;
+  temps[temp_count++] = g.TempRegister();
+
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  selector->Emit(code, output_count, outputs, input_count, inputs, temp_count,
+                 temps);
+}
+
+void InstructionSelector::VisitWord32AtomicBinaryOperation(
+    Node* node, ArchOpcode int8_op, ArchOpcode uint8_op, ArchOpcode int16_op,
+    ArchOpcode uint16_op, ArchOpcode word32_op) {
+  MachineType type = AtomicOpType(node->op());
+  ArchOpcode opcode;
+
+  if (type == MachineType::Int8()) {
+    opcode = int8_op;
+  } else if (type == MachineType::Uint8()) {
+    opcode = uint8_op;
+  } else if (type == MachineType::Int16()) {
+    opcode = int16_op;
+  } else if (type == MachineType::Uint16()) {
+    opcode = uint16_op;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = word32_op;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicBinop(this, node, opcode);
+}
+
+#define VISIT_ATOMIC_BINOP(op)                                   \
+  void InstructionSelector::VisitWord32Atomic##op(Node* node) {  \
+    VisitWord32AtomicBinaryOperation(                            \
+        node, kWord32Atomic##op##Int8, kWord32Atomic##op##Uint8, \
+        kWord32Atomic##op##Int16, kWord32Atomic##op##Uint16,     \
+        kWord32Atomic##op##Word32);                              \
+  }
+VISIT_ATOMIC_BINOP(Add)
+VISIT_ATOMIC_BINOP(Sub)
+VISIT_ATOMIC_BINOP(And)
+VISIT_ATOMIC_BINOP(Or)
+VISIT_ATOMIC_BINOP(Xor)
+#undef VISIT_ATOMIC_BINOP
+
+void InstructionSelector::VisitWord64AtomicBinaryOperation(
+    Node* node, ArchOpcode uint8_op, ArchOpcode uint16_op, ArchOpcode word32_op,
+    ArchOpcode word64_op) {
+  MachineType type = AtomicOpType(node->op());
+  ArchOpcode opcode;
+
+  if (type == MachineType::Uint8()) {
+    opcode = uint8_op;
+  } else if (type == MachineType::Uint16()) {
+    opcode = uint16_op;
+  } else if (type == MachineType::Uint32()) {
+    opcode = word32_op;
+  } else if (type == MachineType::Uint64()) {
+    opcode = word64_op;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicBinop(this, node, opcode);
+}
+
+#define VISIT_ATOMIC64_BINOP(op)                                             \
+  void InstructionSelector::VisitWord64Atomic##op(Node* node) {              \
+    VisitWord64AtomicBinaryOperation(                                        \
+        node, kS390_Word64Atomic##op##Uint8, kS390_Word64Atomic##op##Uint16, \
+        kS390_Word64Atomic##op##Uint32, kS390_Word64Atomic##op##Uint64);     \
+  }
+VISIT_ATOMIC64_BINOP(Add)
+VISIT_ATOMIC64_BINOP(Sub)
+VISIT_ATOMIC64_BINOP(And)
+VISIT_ATOMIC64_BINOP(Or)
+VISIT_ATOMIC64_BINOP(Xor)
+#undef VISIT_ATOMIC64_BINOP
+
+void InstructionSelector::VisitWord64AtomicLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  USE(load_rep);
+  VisitLoad(node);
+}
+
+void InstructionSelector::VisitWord64AtomicStore(Node* node) {
+  MachineRepresentation rep = AtomicStoreRepresentationOf(node->op());
+  VisitGeneralStore(this, node, rep);
+}
+
+#define SIMD_TYPES(V) \
+  V(F64x2)            \
+  V(F32x4)            \
+  V(I64x2)            \
+  V(I32x4)            \
+  V(I16x8)            \
+  V(I8x16)
+
+#define SIMD_BINOP_LIST(V) \
+  V(F64x2Add)              \
+  V(F64x2Sub)              \
+  V(F64x2Mul)              \
+  V(F64x2Div)              \
+  V(F64x2Eq)               \
+  V(F64x2Ne)               \
+  V(F64x2Lt)               \
+  V(F64x2Le)               \
+  V(F64x2Min)              \
+  V(F64x2Max)              \
+  V(F32x4Add)              \
+  V(F32x4AddHoriz)         \
+  V(F32x4Sub)              \
+  V(F32x4Mul)              \
+  V(F32x4Eq)               \
+  V(F32x4Ne)               \
+  V(F32x4Lt)               \
+  V(F32x4Le)               \
+  V(F32x4Div)              \
+  V(F32x4Min)              \
+  V(F32x4Max)              \
+  V(I64x2Add)              \
+  V(I64x2Sub)              \
+  V(I64x2Mul)              \
+  V(I64x2Eq)               \
+  V(I32x4Add)              \
+  V(I32x4AddHoriz)         \
+  V(I32x4Sub)              \
+  V(I32x4Mul)              \
+  V(I32x4MinS)             \
+  V(I32x4MinU)             \
+  V(I32x4MaxS)             \
+  V(I32x4MaxU)             \
+  V(I32x4Eq)               \
+  V(I32x4Ne)               \
+  V(I32x4GtS)              \
+  V(I32x4GeS)              \
+  V(I32x4GtU)              \
+  V(I32x4GeU)              \
+  V(I32x4DotI16x8S)        \
+  V(I16x8Add)              \
+  V(I16x8AddHoriz)         \
+  V(I16x8Sub)              \
+  V(I16x8Mul)              \
+  V(I16x8MinS)             \
+  V(I16x8MinU)             \
+  V(I16x8MaxS)             \
+  V(I16x8MaxU)             \
+  V(I16x8Eq)               \
+  V(I16x8Ne)               \
+  V(I16x8GtS)              \
+  V(I16x8GeS)              \
+  V(I16x8GtU)              \
+  V(I16x8GeU)              \
+  V(I16x8SConvertI32x4)    \
+  V(I16x8UConvertI32x4)    \
+  V(I16x8AddSatS)          \
+  V(I16x8SubSatS)          \
+  V(I16x8AddSatU)          \
+  V(I16x8SubSatU)          \
+  V(I16x8RoundingAverageU) \
+  V(I8x16Add)              \
+  V(I8x16Sub)              \
+  V(I8x16Mul)              \
+  V(I8x16MinS)             \
+  V(I8x16MinU)             \
+  V(I8x16MaxS)             \
+  V(I8x16MaxU)             \
+  V(I8x16Eq)               \
+  V(I8x16Ne)               \
+  V(I8x16GtS)              \
+  V(I8x16GeS)              \
+  V(I8x16GtU)              \
+  V(I8x16GeU)              \
+  V(I8x16SConvertI16x8)    \
+  V(I8x16UConvertI16x8)    \
+  V(I8x16AddSatS)          \
+  V(I8x16SubSatS)          \
+  V(I8x16AddSatU)          \
+  V(I8x16SubSatU)          \
+  V(I8x16RoundingAverageU) \
+  V(S128And)               \
+  V(S128Or)                \
+  V(S128Xor)               \
+  V(S128AndNot)
+
+#define SIMD_UNOP_LIST(V)   \
+  V(F64x2Abs)               \
+  V(F64x2Neg)               \
+  V(F64x2Sqrt)              \
+  V(F64x2Ceil)              \
+  V(F64x2Floor)             \
+  V(F64x2Trunc)             \
+  V(F64x2NearestInt)        \
+  V(F32x4Abs)               \
+  V(F32x4Neg)               \
+  V(F32x4RecipApprox)       \
+  V(F32x4RecipSqrtApprox)   \
+  V(F32x4Sqrt)              \
+  V(F32x4Ceil)              \
+  V(F32x4Floor)             \
+  V(F32x4Trunc)             \
+  V(F32x4NearestInt)        \
+  V(I64x2Neg)               \
+  V(I16x8Abs)               \
+  V(I32x4Neg)               \
+  V(I32x4SConvertI16x8Low)  \
+  V(I32x4SConvertI16x8High) \
+  V(I32x4UConvertI16x8Low)  \
+  V(I32x4UConvertI16x8High) \
+  V(I32x4Abs)               \
+  V(I16x8Neg)               \
+  V(I16x8SConvertI8x16Low)  \
+  V(I16x8SConvertI8x16High) \
+  V(I16x8UConvertI8x16Low)  \
+  V(I16x8UConvertI8x16High) \
+  V(I8x16Neg)               \
+  V(I8x16Abs)               \
+  V(S128Not)
+
+#define SIMD_SHIFT_LIST(V) \
+  V(I64x2Shl)              \
+  V(I64x2ShrS)             \
+  V(I64x2ShrU)             \
+  V(I32x4Shl)              \
+  V(I32x4ShrS)             \
+  V(I32x4ShrU)             \
+  V(I16x8Shl)              \
+  V(I16x8ShrS)             \
+  V(I16x8ShrU)             \
+  V(I8x16Shl)              \
+  V(I8x16ShrS)             \
+  V(I8x16ShrU)
+
+#define SIMD_BOOL_LIST(V) \
+  V(V32x4AnyTrue)         \
+  V(V16x8AnyTrue)         \
+  V(V8x16AnyTrue)         \
+  V(V32x4AllTrue)         \
+  V(V16x8AllTrue)         \
+  V(V8x16AllTrue)
+
+#define SIMD_CONVERSION_LIST(V) \
+  V(I32x4SConvertF32x4)         \
+  V(I32x4UConvertF32x4)         \
+  V(F32x4SConvertI32x4)         \
+  V(F32x4UConvertI32x4)
+
+#define SIMD_VISIT_SPLAT(Type)                               \
+  void InstructionSelector::Visit##Type##Splat(Node* node) { \
+    S390OperandGenerator g(this);                            \
+    Emit(kS390_##Type##Splat, g.DefineAsRegister(node),      \
+         g.UseRegister(node->InputAt(0)));                   \
+  }
+SIMD_TYPES(SIMD_VISIT_SPLAT)
+#undef SIMD_VISIT_SPLAT
+
+#define SIMD_VISIT_EXTRACT_LANE(Type, Sign)                              \
+  void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \
+    S390OperandGenerator g(this);                                        \
+    int32_t lane = OpParameter<int32_t>(node->op());                     \
+    Emit(kS390_##Type##ExtractLane##Sign, g.DefineAsRegister(node),      \
+         g.UseRegister(node->InputAt(0)), g.UseImmediate(lane));         \
+  }
+SIMD_VISIT_EXTRACT_LANE(F64x2, )
+SIMD_VISIT_EXTRACT_LANE(F32x4, )
+SIMD_VISIT_EXTRACT_LANE(I64x2, )
+SIMD_VISIT_EXTRACT_LANE(I32x4, )
+SIMD_VISIT_EXTRACT_LANE(I16x8, U)
+SIMD_VISIT_EXTRACT_LANE(I16x8, S)
+SIMD_VISIT_EXTRACT_LANE(I8x16, U)
+SIMD_VISIT_EXTRACT_LANE(I8x16, S)
+#undef SIMD_VISIT_EXTRACT_LANE
+
+#define SIMD_VISIT_REPLACE_LANE(Type)                              \
+  void InstructionSelector::Visit##Type##ReplaceLane(Node* node) { \
+    S390OperandGenerator g(this);                                  \
+    int32_t lane = OpParameter<int32_t>(node->op());               \
+    Emit(kS390_##Type##ReplaceLane, g.DefineAsRegister(node),      \
+         g.UseRegister(node->InputAt(0)), g.UseImmediate(lane),    \
+         g.UseRegister(node->InputAt(1)));                         \
+  }
+SIMD_TYPES(SIMD_VISIT_REPLACE_LANE)
+#undef SIMD_VISIT_REPLACE_LANE
+
+#define SIMD_VISIT_BINOP(Opcode)                                          \
+  void InstructionSelector::Visit##Opcode(Node* node) {                   \
+    S390OperandGenerator g(this);                                         \
+    InstructionOperand temps[] = {g.TempSimd128Register(),                \
+                                  g.TempSimd128Register()};               \
+    Emit(kS390_##Opcode, g.DefineAsRegister(node),                        \
+         g.UseUniqueRegister(node->InputAt(0)),                           \
+         g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \
+  }
+SIMD_BINOP_LIST(SIMD_VISIT_BINOP)
+#undef SIMD_VISIT_BINOP
+#undef SIMD_BINOP_LIST
+
+#define SIMD_VISIT_UNOP(Opcode)                                     \
+  void InstructionSelector::Visit##Opcode(Node* node) {             \
+    S390OperandGenerator g(this);                                   \
+    InstructionOperand temps[] = {g.TempSimd128Register()};         \
+    Emit(kS390_##Opcode, g.DefineAsRegister(node),                  \
+         g.UseRegister(node->InputAt(0)), arraysize(temps), temps); \
+  }
+SIMD_UNOP_LIST(SIMD_VISIT_UNOP)
+#undef SIMD_VISIT_UNOP
+#undef SIMD_UNOP_LIST
+
+#define SIMD_VISIT_SHIFT(Opcode)                        \
+  void InstructionSelector::Visit##Opcode(Node* node) { \
+    S390OperandGenerator g(this);                       \
+    Emit(kS390_##Opcode, g.DefineAsRegister(node),      \
+         g.UseUniqueRegister(node->InputAt(0)),         \
+         g.UseUniqueRegister(node->InputAt(1)));        \
+  }
+SIMD_SHIFT_LIST(SIMD_VISIT_SHIFT)
+#undef SIMD_VISIT_SHIFT
+#undef SIMD_SHIFT_LIST
+
+#define SIMD_VISIT_BOOL(Opcode)                                           \
+  void InstructionSelector::Visit##Opcode(Node* node) {                   \
+    S390OperandGenerator g(this);                                         \
+    InstructionOperand temps[] = {g.TempRegister()};                      \
+    Emit(kS390_##Opcode, g.DefineAsRegister(node),                        \
+         g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); \
+  }
+SIMD_BOOL_LIST(SIMD_VISIT_BOOL)
+#undef SIMD_VISIT_BOOL
+#undef SIMD_BOOL_LIST
+
+#define SIMD_VISIT_CONVERSION(Opcode)                               \
+  void InstructionSelector::Visit##Opcode(Node* node) {             \
+    S390OperandGenerator g(this);                                   \
+    InstructionOperand temps[] = {g.TempSimd128Register()};         \
+    Emit(kS390_##Opcode, g.DefineAsRegister(node),                  \
+         g.UseRegister(node->InputAt(0)), arraysize(temps), temps); \
+  }
+SIMD_CONVERSION_LIST(SIMD_VISIT_CONVERSION)
+#undef SIMD_VISIT_CONVERSION
+#undef SIMD_CONVERSION_LIST
+
+#define SIMD_VISIT_QFMOP(Opcode)                        \
+  void InstructionSelector::Visit##Opcode(Node* node) { \
+    S390OperandGenerator g(this);                       \
+    Emit(kS390_##Opcode, g.DefineSameAsFirst(node),     \
+         g.UseUniqueRegister(node->InputAt(0)),         \
+         g.UseUniqueRegister(node->InputAt(1)),         \
+         g.UseRegister(node->InputAt(2)));              \
+  }
+SIMD_VISIT_QFMOP(F64x2Qfma)
+SIMD_VISIT_QFMOP(F64x2Qfms)
+SIMD_VISIT_QFMOP(F32x4Qfma)
+SIMD_VISIT_QFMOP(F32x4Qfms)
+#undef SIMD_VISIT_QFMOP
+
+#define SIMD_VISIT_BITMASK(Opcode)                      \
+  void InstructionSelector::Visit##Opcode(Node* node) { \
+    S390OperandGenerator g(this);                       \
+    Emit(kS390_##Opcode, g.DefineAsRegister(node),      \
+         g.UseUniqueRegister(node->InputAt(0)));        \
+  }
+SIMD_VISIT_BITMASK(I8x16BitMask)
+SIMD_VISIT_BITMASK(I16x8BitMask)
+SIMD_VISIT_BITMASK(I32x4BitMask)
+#undef SIMD_VISIT_BITMASK
+
+#define SIMD_VISIT_PMIN_MAX(Type)                                           \
+  void InstructionSelector::Visit##Type(Node* node) {                       \
+    S390OperandGenerator g(this);                                           \
+    Emit(kS390_##Type, g.DefineAsRegister(node),                            \
+         g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); \
+  }
+SIMD_VISIT_PMIN_MAX(F64x2Pmin)
+SIMD_VISIT_PMIN_MAX(F32x4Pmin)
+SIMD_VISIT_PMIN_MAX(F64x2Pmax)
+SIMD_VISIT_PMIN_MAX(F32x4Pmax)
+#undef SIMD_VISIT_PMIN_MAX
+#undef SIMD_TYPES
+
+void InstructionSelector::VisitI8x16Shuffle(Node* node) {
+  uint8_t shuffle[kSimd128Size];
+  uint8_t* shuffle_p = &shuffle[0];
+  bool is_swizzle;
+  CanonicalizeShuffle(node, shuffle, &is_swizzle);
+  S390OperandGenerator g(this);
+  Node* input0 = node->InputAt(0);
+  Node* input1 = node->InputAt(1);
+#ifdef V8_TARGET_BIG_ENDIAN
+  // Remap the shuffle indices to match IBM lane numbering.
+  int max_index = 15;
+  int total_lane_count = 2 * kSimd128Size;
+  uint8_t shuffle_remapped[kSimd128Size];
+  for (int i = 0; i < kSimd128Size; i++) {
+    uint8_t current_index = shuffle[i];
+    shuffle_remapped[i] = (current_index <= max_index
+                               ? max_index - current_index
+                               : total_lane_count - current_index + max_index);
+  }
+  shuffle_p = &shuffle_remapped[0];
+#endif
+  Emit(kS390_I8x16Shuffle, g.DefineAsRegister(node),
+       g.UseUniqueRegister(input0), g.UseUniqueRegister(input1),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle_p)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle_p + 4)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle_p + 8)),
+       g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(shuffle_p + 12)));
+}
+
+void InstructionSelector::VisitI8x16Swizzle(Node* node) {
+  S390OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  Emit(kS390_I8x16Swizzle, g.DefineAsRegister(node),
+       g.UseUniqueRegister(node->InputAt(0)),
+       g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitS128Const(Node* node) {
+  S390OperandGenerator g(this);
+  uint32_t val[kSimd128Size / sizeof(uint32_t)];
+  memcpy(val, S128ImmediateParameterOf(node->op()).data(), kSimd128Size);
+  // If all bytes are zeros, avoid emitting code for generic constants.
+  bool all_zeros = !(val[0] || val[1] || val[2] || val[3]);
+  bool all_ones = val[0] == UINT32_MAX && val[1] == UINT32_MAX &&
+                  val[2] == UINT32_MAX && val[3] == UINT32_MAX;
+  InstructionOperand dst = g.DefineAsRegister(node);
+  if (all_zeros) {
+    Emit(kS390_S128Zero, dst);
+  } else if (all_ones) {
+    Emit(kS390_S128AllOnes, dst);
+  } else {
+    // We have to use Pack4Lanes to reverse the bytes (lanes) on BE,
+    // Which in this case is ineffective on LE.
+    Emit(kS390_S128Const, g.DefineAsRegister(node),
+         g.UseImmediate(
+             wasm::SimdShuffle::Pack4Lanes(reinterpret_cast<uint8_t*>(val))),
+         g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(
+             reinterpret_cast<uint8_t*>(val) + 4)),
+         g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(
+             reinterpret_cast<uint8_t*>(val) + 8)),
+         g.UseImmediate(wasm::SimdShuffle::Pack4Lanes(
+             reinterpret_cast<uint8_t*>(val) + 12)));
+  }
+}
+
+void InstructionSelector::VisitS128Zero(Node* node) {
+  S390OperandGenerator g(this);
+  Emit(kS390_S128Zero, g.DefineAsRegister(node));
+}
+
+void InstructionSelector::VisitS128Select(Node* node) {
+  S390OperandGenerator g(this);
+  Emit(kS390_S128Select, g.DefineAsRegister(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
+       g.UseRegister(node->InputAt(2)));
+}
+
+void InstructionSelector::EmitPrepareResults(
+    ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
+    Node* node) {
+  S390OperandGenerator g(this);
+
+  int reverse_slot = 1;
+  for (PushParameter output : *results) {
+    if (!output.location.IsCallerFrameSlot()) continue;
+    // Skip any alignment holes in nodes.
+    if (output.node != nullptr) {
+      DCHECK(!call_descriptor->IsCFunctionCall());
+      if (output.location.GetType() == MachineType::Float32()) {
+        MarkAsFloat32(output.node);
+      } else if (output.location.GetType() == MachineType::Float64()) {
+        MarkAsFloat64(output.node);
+      } else if (output.location.GetType() == MachineType::Simd128()) {
+        MarkAsSimd128(output.node);
+      }
+      Emit(kS390_Peek, g.DefineAsRegister(output.node),
+           g.UseImmediate(reverse_slot));
+    }
+    reverse_slot += output.location.GetSizeInPointers();
+  }
+}
+
+void InstructionSelector::VisitLoadTransform(Node* node) {
+  // We should never reach here, see http://crrev.com/c/2050811
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitTruncateFloat32ToInt32(Node* node) {
+  S390OperandGenerator g(this);
+
+  InstructionCode opcode = kS390_Float32ToInt32;
+  TruncateKind kind = OpParameter<TruncateKind>(node->op());
+  if (kind == TruncateKind::kSetOverflowToMin) {
+    opcode |= MiscField::encode(true);
+  }
+
+  Emit(opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitTruncateFloat32ToUint32(Node* node) {
+  S390OperandGenerator g(this);
+
+  InstructionCode opcode = kS390_Float32ToUint32;
+  TruncateKind kind = OpParameter<TruncateKind>(node->op());
+  if (kind == TruncateKind::kSetOverflowToMin) {
+    opcode |= MiscField::encode(true);
+  }
+
+  Emit(opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
+}
+
+// static
+MachineOperatorBuilder::Flags
+InstructionSelector::SupportedMachineOperatorFlags() {
+  return MachineOperatorBuilder::kFloat32RoundDown |
+         MachineOperatorBuilder::kFloat64RoundDown |
+         MachineOperatorBuilder::kFloat32RoundUp |
+         MachineOperatorBuilder::kFloat64RoundUp |
+         MachineOperatorBuilder::kFloat32RoundTruncate |
+         MachineOperatorBuilder::kFloat64RoundTruncate |
+         MachineOperatorBuilder::kFloat64RoundTiesAway |
+         MachineOperatorBuilder::kWord32Popcnt |
+         MachineOperatorBuilder::kInt32AbsWithOverflow |
+         MachineOperatorBuilder::kInt64AbsWithOverflow |
+         MachineOperatorBuilder::kWord64Popcnt;
+}
+
+// static
+MachineOperatorBuilder::AlignmentRequirements
+InstructionSelector::AlignmentRequirements() {
+  return MachineOperatorBuilder::AlignmentRequirements::
+      FullUnalignedAccessSupport();
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/s390/unwinding-info-writer-s390.cc b/src/compiler/backend/s390/unwinding-info-writer-s390.cc
new file mode 100644
index 0000000..ff3a439
--- /dev/null
+++ b/src/compiler/backend/s390/unwinding-info-writer-s390.cc
@@ -0,0 +1,103 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/s390/unwinding-info-writer-s390.h"
+#include "src/compiler/backend/instruction.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+void UnwindingInfoWriter::BeginInstructionBlock(int pc_offset,
+                                                const InstructionBlock* block) {
+  if (!enabled()) return;
+
+  block_will_exit_ = false;
+
+  DCHECK_LT(block->rpo_number().ToInt(),
+            static_cast<int>(block_initial_states_.size()));
+  const BlockInitialState* initial_state =
+      block_initial_states_[block->rpo_number().ToInt()];
+  if (!initial_state) return;
+  if (initial_state->saved_lr_ != saved_lr_) {
+    eh_frame_writer_.AdvanceLocation(pc_offset);
+    if (initial_state->saved_lr_) {
+      eh_frame_writer_.RecordRegisterSavedToStack(r14, kSystemPointerSize);
+      eh_frame_writer_.RecordRegisterSavedToStack(fp, 0);
+    } else {
+      eh_frame_writer_.RecordRegisterFollowsInitialRule(r14);
+    }
+    saved_lr_ = initial_state->saved_lr_;
+  }
+}
+
+void UnwindingInfoWriter::EndInstructionBlock(const InstructionBlock* block) {
+  if (!enabled() || block_will_exit_) return;
+
+  for (const RpoNumber& successor : block->successors()) {
+    int successor_index = successor.ToInt();
+    DCHECK_LT(successor_index, static_cast<int>(block_initial_states_.size()));
+    const BlockInitialState* existing_state =
+        block_initial_states_[successor_index];
+
+    // If we already had an entry for this BB, check that the values are the
+    // same we are trying to insert.
+    if (existing_state) {
+      DCHECK_EQ(existing_state->saved_lr_, saved_lr_);
+    } else {
+      block_initial_states_[successor_index] =
+          zone_->New<BlockInitialState>(saved_lr_);
+    }
+  }
+}
+
+void UnwindingInfoWriter::MarkFrameConstructed(int at_pc) {
+  if (!enabled()) return;
+
+  // Regardless of the type of frame constructed, the relevant part of the
+  // layout is always the one in the diagram:
+  //
+  // |   ....   |         higher addresses
+  // +----------+               ^
+  // |    LR    |               |            |
+  // +----------+               |            |
+  // | saved FP |               |            |
+  // +----------+ <-- FP                     v
+  // |   ....   |                       stack growth
+  //
+  // The LR is pushed on the stack, and we can record this fact at the end of
+  // the construction, since the LR itself is not modified in the process.
+  eh_frame_writer_.AdvanceLocation(at_pc);
+  eh_frame_writer_.RecordRegisterSavedToStack(r14, kSystemPointerSize);
+  eh_frame_writer_.RecordRegisterSavedToStack(fp, 0);
+  saved_lr_ = true;
+}
+
+void UnwindingInfoWriter::MarkFrameDeconstructed(int at_pc) {
+  if (!enabled()) return;
+
+  // The lr is restored by the last operation in LeaveFrame().
+  eh_frame_writer_.AdvanceLocation(at_pc);
+  eh_frame_writer_.RecordRegisterFollowsInitialRule(r14);
+  saved_lr_ = false;
+}
+
+void UnwindingInfoWriter::MarkLinkRegisterOnTopOfStack(int pc_offset) {
+  if (!enabled()) return;
+
+  eh_frame_writer_.AdvanceLocation(pc_offset);
+  eh_frame_writer_.SetBaseAddressRegisterAndOffset(sp, 0);
+  eh_frame_writer_.RecordRegisterSavedToStack(r14, 0);
+}
+
+void UnwindingInfoWriter::MarkPopLinkRegisterFromTopOfStack(int pc_offset) {
+  if (!enabled()) return;
+
+  eh_frame_writer_.AdvanceLocation(pc_offset);
+  eh_frame_writer_.SetBaseAddressRegisterAndOffset(fp, 0);
+  eh_frame_writer_.RecordRegisterFollowsInitialRule(r14);
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/s390/unwinding-info-writer-s390.h b/src/compiler/backend/s390/unwinding-info-writer-s390.h
new file mode 100644
index 0000000..2202c28
--- /dev/null
+++ b/src/compiler/backend/s390/unwinding-info-writer-s390.h
@@ -0,0 +1,73 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_S390_UNWINDING_INFO_WRITER_S390_H_
+#define V8_COMPILER_BACKEND_S390_UNWINDING_INFO_WRITER_S390_H_
+
+#include "src/diagnostics/eh-frame.h"
+#include "src/flags/flags.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+class InstructionBlock;
+
+class UnwindingInfoWriter {
+ public:
+  explicit UnwindingInfoWriter(Zone* zone)
+      : zone_(zone),
+        eh_frame_writer_(zone),
+        saved_lr_(false),
+        block_will_exit_(false),
+        block_initial_states_(zone) {
+    if (enabled()) eh_frame_writer_.Initialize();
+  }
+
+  void SetNumberOfInstructionBlocks(int number) {
+    if (enabled()) block_initial_states_.resize(number);
+  }
+
+  void BeginInstructionBlock(int pc_offset, const InstructionBlock* block);
+  void EndInstructionBlock(const InstructionBlock* block);
+
+  void MarkLinkRegisterOnTopOfStack(int pc_offset);
+  void MarkPopLinkRegisterFromTopOfStack(int pc_offset);
+
+  void MarkFrameConstructed(int at_pc);
+  void MarkFrameDeconstructed(int at_pc);
+
+  void MarkBlockWillExit() { block_will_exit_ = true; }
+
+  void Finish(int code_size) {
+    if (enabled()) eh_frame_writer_.Finish(code_size);
+  }
+
+  EhFrameWriter* eh_frame_writer() {
+    return enabled() ? &eh_frame_writer_ : nullptr;
+  }
+
+ private:
+  bool enabled() const { return FLAG_perf_prof_unwinding_info; }
+
+  class BlockInitialState : public ZoneObject {
+   public:
+    explicit BlockInitialState(bool saved_lr) : saved_lr_(saved_lr) {}
+
+    bool saved_lr_;
+  };
+
+  Zone* zone_;
+  EhFrameWriter eh_frame_writer_;
+  bool saved_lr_;
+  bool block_will_exit_;
+
+  ZoneVector<const BlockInitialState*> block_initial_states_;
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_S390_UNWINDING_INFO_WRITER_S390_H_
diff --git a/src/compiler/backend/spill-placer.cc b/src/compiler/backend/spill-placer.cc
new file mode 100644
index 0000000..55c2b4f
--- /dev/null
+++ b/src/compiler/backend/spill-placer.cc
@@ -0,0 +1,484 @@
+// Copyright 2020 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/spill-placer.h"
+
+#include "src/base/bits-iterator.h"
+#include "src/compiler/backend/register-allocator.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+SpillPlacer::SpillPlacer(LiveRangeFinder* finder,
+                         TopTierRegisterAllocationData* data, Zone* zone)
+    : finder_(finder), data_(data), zone_(zone) {}
+
+SpillPlacer::~SpillPlacer() {
+  if (assigned_indices_ > 0) {
+    CommitSpills();
+  }
+}
+
+void SpillPlacer::Add(TopLevelLiveRange* range) {
+  DCHECK(range->HasGeneralSpillRange());
+  InstructionOperand spill_operand = range->GetSpillRangeOperand();
+  range->FilterSpillMoves(data(), spill_operand);
+
+  InstructionSequence* code = data_->code();
+  InstructionBlock* top_start_block =
+      code->GetInstructionBlock(range->Start().ToInstructionIndex());
+  RpoNumber top_start_block_number = top_start_block->rpo_number();
+
+  // Check for several cases where spilling at the definition is best.
+  // - The value is already moved on-stack somehow so the list of insertion
+  //   locations for spilling at the definition is empty.
+  // - If the first LiveRange is spilled, then there's no sense in doing
+  //   anything other than spilling at the definition.
+  // - If the value is defined in a deferred block, then the logic to select
+  //   the earliest deferred block as the insertion point would cause
+  //   incorrect behavior, so the value must be spilled at the definition.
+  // - We haven't seen any indication of performance improvements from seeking
+  //   optimal spilling positions except on loop-top phi values, so spill
+  //   any value that isn't a loop-top phi at the definition to avoid
+  //   increasing the code size for no benefit.
+  if (range->GetSpillMoveInsertionLocations(data()) == nullptr ||
+      range->spilled() || top_start_block->IsDeferred() ||
+      (!FLAG_stress_turbo_late_spilling && !range->is_loop_phi())) {
+    range->CommitSpillMoves(data(), spill_operand);
+    return;
+  }
+
+  // Iterate through the range and mark every block that needs the value to be
+  // spilled.
+  for (const LiveRange* child = range; child != nullptr;
+       child = child->next()) {
+    if (child->spilled()) {
+      // Add every block that contains part of this live range.
+      for (UseInterval* interval = child->first_interval(); interval != nullptr;
+           interval = interval->next()) {
+        RpoNumber start_block =
+            code->GetInstructionBlock(interval->start().ToInstructionIndex())
+                ->rpo_number();
+        if (start_block == top_start_block_number) {
+          // Can't do late spilling if the first spill is within the
+          // definition block.
+          range->CommitSpillMoves(data(), spill_operand);
+          // Verify that we never added any data for this range to the table.
+          DCHECK(!IsLatestVreg(range->vreg()));
+          return;
+        }
+        LifetimePosition end = interval->end();
+        int end_instruction = end.ToInstructionIndex();
+        // The end position is exclusive, so an end position exactly on a block
+        // boundary indicates that the range applies only to the prior block.
+        if (data()->IsBlockBoundary(end)) {
+          --end_instruction;
+        }
+        RpoNumber end_block =
+            code->GetInstructionBlock(end_instruction)->rpo_number();
+        while (start_block <= end_block) {
+          SetSpillRequired(code->InstructionBlockAt(start_block), range->vreg(),
+                           top_start_block_number);
+          start_block = start_block.Next();
+        }
+      }
+    } else {
+      // Add every block that contains a use which requires the on-stack value.
+      for (const UsePosition* pos = child->first_pos(); pos != nullptr;
+           pos = pos->next()) {
+        if (pos->type() != UsePositionType::kRequiresSlot) continue;
+        InstructionBlock* block =
+            code->GetInstructionBlock(pos->pos().ToInstructionIndex());
+        RpoNumber block_number = block->rpo_number();
+        if (block_number == top_start_block_number) {
+          // Can't do late spilling if the first spill is within the
+          // definition block.
+          range->CommitSpillMoves(data(), spill_operand);
+          // Verify that we never added any data for this range to the table.
+          DCHECK(!IsLatestVreg(range->vreg()));
+          return;
+        }
+        SetSpillRequired(block, range->vreg(), top_start_block_number);
+      }
+    }
+  }
+
+  // If we haven't yet marked anything for this range, then it never needs to
+  // spill at all.
+  if (!IsLatestVreg(range->vreg())) {
+    range->SetLateSpillingSelected(true);
+    return;
+  }
+
+  SetDefinition(top_start_block_number, range->vreg());
+}
+
+class SpillPlacer::Entry {
+ public:
+  // Functions operating on single values (during setup):
+
+  void SetSpillRequiredSingleValue(int value_index) {
+    DCHECK_LT(value_index, kValueIndicesPerEntry);
+    uint64_t bit = uint64_t{1} << value_index;
+    SetSpillRequired(bit);
+  }
+  void SetDefinitionSingleValue(int value_index) {
+    DCHECK_LT(value_index, kValueIndicesPerEntry);
+    uint64_t bit = uint64_t{1} << value_index;
+    SetDefinition(bit);
+  }
+
+  // Functions operating on all values simultaneously, as bitfields:
+
+  uint64_t SpillRequired() const { return GetValuesInState<kSpillRequired>(); }
+  void SetSpillRequired(uint64_t mask) {
+    UpdateValuesToState<kSpillRequired>(mask);
+  }
+  uint64_t SpillRequiredInNonDeferredSuccessor() const {
+    return GetValuesInState<kSpillRequiredInNonDeferredSuccessor>();
+  }
+  void SetSpillRequiredInNonDeferredSuccessor(uint64_t mask) {
+    UpdateValuesToState<kSpillRequiredInNonDeferredSuccessor>(mask);
+  }
+  uint64_t SpillRequiredInDeferredSuccessor() const {
+    return GetValuesInState<kSpillRequiredInDeferredSuccessor>();
+  }
+  void SetSpillRequiredInDeferredSuccessor(uint64_t mask) {
+    UpdateValuesToState<kSpillRequiredInDeferredSuccessor>(mask);
+  }
+  uint64_t Definition() const { return GetValuesInState<kDefinition>(); }
+  void SetDefinition(uint64_t mask) { UpdateValuesToState<kDefinition>(mask); }
+
+ private:
+  // Possible states for every value, at every block.
+  enum State {
+    // This block is not (yet) known to require the on-stack value.
+    kUnmarked,
+
+    // The value must be on the stack in this block.
+    kSpillRequired,
+
+    // The value doesn't need to be on-stack in this block, but some
+    // non-deferred successor needs it.
+    kSpillRequiredInNonDeferredSuccessor,
+
+    // The value doesn't need to be on-stack in this block, but some
+    // deferred successor needs it.
+    kSpillRequiredInDeferredSuccessor,
+
+    // The value is defined in this block.
+    kDefinition,
+  };
+
+  template <State state>
+  uint64_t GetValuesInState() const {
+    STATIC_ASSERT(state < 8);
+    return ((state & 1) ? first_bit_ : ~first_bit_) &
+           ((state & 2) ? second_bit_ : ~second_bit_) &
+           ((state & 4) ? third_bit_ : ~third_bit_);
+  }
+
+  template <State state>
+  void UpdateValuesToState(uint64_t mask) {
+    STATIC_ASSERT(state < 8);
+    first_bit_ =
+        Entry::UpdateBitDataWithMask<(state & 1) != 0>(first_bit_, mask);
+    second_bit_ =
+        Entry::UpdateBitDataWithMask<(state & 2) != 0>(second_bit_, mask);
+    third_bit_ =
+        Entry::UpdateBitDataWithMask<(state & 4) != 0>(third_bit_, mask);
+  }
+
+  template <bool set_ones>
+  static uint64_t UpdateBitDataWithMask(uint64_t data, uint64_t mask) {
+    return set_ones ? data | mask : data & ~mask;
+  }
+
+  // Storage for the states of up to 64 live ranges.
+  uint64_t first_bit_ = 0;
+  uint64_t second_bit_ = 0;
+  uint64_t third_bit_ = 0;
+};
+
+int SpillPlacer::GetOrCreateIndexForLatestVreg(int vreg) {
+  DCHECK_LE(assigned_indices_, kValueIndicesPerEntry);
+  // If this vreg isn't yet the last one in the list, then add it.
+  if (!IsLatestVreg(vreg)) {
+    if (vreg_numbers_ == nullptr) {
+      DCHECK_EQ(assigned_indices_, 0);
+      DCHECK_EQ(entries_, nullptr);
+      // We lazily allocate these arrays because many functions don't have any
+      // values that use SpillPlacer.
+      entries_ =
+          zone_->NewArray<Entry>(data()->code()->instruction_blocks().size());
+      for (size_t i = 0; i < data()->code()->instruction_blocks().size(); ++i) {
+        new (&entries_[i]) Entry();
+      }
+      vreg_numbers_ = zone_->NewArray<int>(kValueIndicesPerEntry);
+    }
+
+    if (assigned_indices_ == kValueIndicesPerEntry) {
+      // The table is full; commit the current set of values and clear it.
+      CommitSpills();
+      ClearData();
+    }
+
+    vreg_numbers_[assigned_indices_] = vreg;
+    ++assigned_indices_;
+  }
+  return assigned_indices_ - 1;
+}
+
+void SpillPlacer::CommitSpills() {
+  FirstBackwardPass();
+  ForwardPass();
+  SecondBackwardPass();
+}
+
+void SpillPlacer::ClearData() {
+  assigned_indices_ = 0;
+  for (int i = 0; i < data()->code()->InstructionBlockCount(); ++i) {
+    new (&entries_[i]) Entry();
+  }
+  first_block_ = RpoNumber::Invalid();
+  last_block_ = RpoNumber::Invalid();
+}
+
+void SpillPlacer::ExpandBoundsToInclude(RpoNumber block) {
+  if (!first_block_.IsValid()) {
+    DCHECK(!last_block_.IsValid());
+    first_block_ = block;
+    last_block_ = block;
+  } else {
+    if (first_block_ > block) {
+      first_block_ = block;
+    }
+    if (last_block_ < block) {
+      last_block_ = block;
+    }
+  }
+}
+
+void SpillPlacer::SetSpillRequired(InstructionBlock* block, int vreg,
+                                   RpoNumber top_start_block) {
+  // Spilling in loops is bad, so if the block is non-deferred and nested
+  // within a loop, and the definition is before that loop, then mark the loop
+  // top instead. Of course we must find the outermost such loop.
+  if (!block->IsDeferred()) {
+    while (block->loop_header().IsValid() &&
+           block->loop_header() > top_start_block) {
+      block = data()->code()->InstructionBlockAt(block->loop_header());
+    }
+  }
+
+  int value_index = GetOrCreateIndexForLatestVreg(vreg);
+  entries_[block->rpo_number().ToSize()].SetSpillRequiredSingleValue(
+      value_index);
+  ExpandBoundsToInclude(block->rpo_number());
+}
+
+void SpillPlacer::SetDefinition(RpoNumber block, int vreg) {
+  int value_index = GetOrCreateIndexForLatestVreg(vreg);
+  entries_[block.ToSize()].SetDefinitionSingleValue(value_index);
+  ExpandBoundsToInclude(block);
+}
+
+void SpillPlacer::FirstBackwardPass() {
+  InstructionSequence* code = data()->code();
+
+  for (int i = last_block_.ToInt(); i >= first_block_.ToInt(); --i) {
+    RpoNumber block_id = RpoNumber::FromInt(i);
+    InstructionBlock* block = code->instruction_blocks()[i];
+
+    Entry& entry = entries_[i];
+
+    // State that will be accumulated from successors.
+    uint64_t spill_required_in_non_deferred_successor = 0;
+    uint64_t spill_required_in_deferred_successor = 0;
+
+    for (RpoNumber successor_id : block->successors()) {
+      // Ignore loop back-edges.
+      if (successor_id <= block_id) continue;
+
+      InstructionBlock* successor = code->InstructionBlockAt(successor_id);
+      const Entry& successor_entry = entries_[successor_id.ToSize()];
+      if (successor->IsDeferred()) {
+        spill_required_in_deferred_successor |= successor_entry.SpillRequired();
+      } else {
+        spill_required_in_non_deferred_successor |=
+            successor_entry.SpillRequired();
+      }
+      spill_required_in_deferred_successor |=
+          successor_entry.SpillRequiredInDeferredSuccessor();
+      spill_required_in_non_deferred_successor |=
+          successor_entry.SpillRequiredInNonDeferredSuccessor();
+    }
+
+    // Starting state of the current block.
+    uint64_t defs = entry.Definition();
+    uint64_t needs_spill = entry.SpillRequired();
+
+    // Info about successors doesn't get to override existing info about
+    // definitions and spills required by this block itself.
+    spill_required_in_deferred_successor &= ~(defs | needs_spill);
+    spill_required_in_non_deferred_successor &= ~(defs | needs_spill);
+
+    entry.SetSpillRequiredInDeferredSuccessor(
+        spill_required_in_deferred_successor);
+    entry.SetSpillRequiredInNonDeferredSuccessor(
+        spill_required_in_non_deferred_successor);
+  }
+}
+
+void SpillPlacer::ForwardPass() {
+  InstructionSequence* code = data()->code();
+  for (int i = first_block_.ToInt(); i <= last_block_.ToInt(); ++i) {
+    RpoNumber block_id = RpoNumber::FromInt(i);
+    InstructionBlock* block = code->instruction_blocks()[i];
+
+    // Deferred blocks don't need to participate in the forward pass, because
+    // their spills all get pulled forward to the earliest possible deferred
+    // block (where a non-deferred block jumps to a deferred block), and
+    // decisions about spill requirements for non-deferred blocks don't take
+    // deferred blocks into account.
+    if (block->IsDeferred()) continue;
+
+    Entry& entry = entries_[i];
+
+    // State that will be accumulated from predecessors.
+    uint64_t spill_required_in_non_deferred_predecessor = 0;
+    uint64_t spill_required_in_all_non_deferred_predecessors =
+        static_cast<uint64_t>(int64_t{-1});
+
+    for (RpoNumber predecessor_id : block->predecessors()) {
+      // Ignore loop back-edges.
+      if (predecessor_id >= block_id) continue;
+
+      InstructionBlock* predecessor = code->InstructionBlockAt(predecessor_id);
+      if (predecessor->IsDeferred()) continue;
+      const Entry& predecessor_entry = entries_[predecessor_id.ToSize()];
+      spill_required_in_non_deferred_predecessor |=
+          predecessor_entry.SpillRequired();
+      spill_required_in_all_non_deferred_predecessors &=
+          predecessor_entry.SpillRequired();
+    }
+
+    // Starting state of the current block.
+    uint64_t spill_required_in_non_deferred_successor =
+        entry.SpillRequiredInNonDeferredSuccessor();
+    uint64_t spill_required_in_any_successor =
+        spill_required_in_non_deferred_successor |
+        entry.SpillRequiredInDeferredSuccessor();
+
+    // If all of the predecessors agree that a spill is required, then a
+    // spill is required. Note that we don't set anything for values that
+    // currently have no markings in this block, to avoid pushing data too
+    // far down the graph and confusing the next backward pass.
+    entry.SetSpillRequired(spill_required_in_any_successor &
+                           spill_required_in_non_deferred_predecessor &
+                           spill_required_in_all_non_deferred_predecessors);
+
+    // If only some of the predecessors require a spill, but some successor
+    // of this block also requires a spill, then this merge point requires a
+    // spill. This ensures that no control-flow path through non-deferred
+    // blocks ever has to spill twice.
+    entry.SetSpillRequired(spill_required_in_non_deferred_successor &
+                           spill_required_in_non_deferred_predecessor);
+  }
+}
+
+void SpillPlacer::SecondBackwardPass() {
+  InstructionSequence* code = data()->code();
+  for (int i = last_block_.ToInt(); i >= first_block_.ToInt(); --i) {
+    RpoNumber block_id = RpoNumber::FromInt(i);
+    InstructionBlock* block = code->instruction_blocks()[i];
+
+    Entry& entry = entries_[i];
+
+    // State that will be accumulated from successors.
+    uint64_t spill_required_in_non_deferred_successor = 0;
+    uint64_t spill_required_in_deferred_successor = 0;
+    uint64_t spill_required_in_all_non_deferred_successors =
+        static_cast<uint64_t>(int64_t{-1});
+
+    for (RpoNumber successor_id : block->successors()) {
+      // Ignore loop back-edges.
+      if (successor_id <= block_id) continue;
+
+      InstructionBlock* successor = code->InstructionBlockAt(successor_id);
+      const Entry& successor_entry = entries_[successor_id.ToSize()];
+      if (successor->IsDeferred()) {
+        spill_required_in_deferred_successor |= successor_entry.SpillRequired();
+      } else {
+        spill_required_in_non_deferred_successor |=
+            successor_entry.SpillRequired();
+        spill_required_in_all_non_deferred_successors &=
+            successor_entry.SpillRequired();
+      }
+    }
+
+    // Starting state of the current block.
+    uint64_t defs = entry.Definition();
+
+    // If all of the successors of a definition need the value to be
+    // spilled, then the value should be spilled at the definition.
+    uint64_t spill_at_def = defs & spill_required_in_non_deferred_successor &
+                            spill_required_in_all_non_deferred_successors;
+    for (int index_to_spill : base::bits::IterateBits(spill_at_def)) {
+      int vreg_to_spill = vreg_numbers_[index_to_spill];
+      TopLevelLiveRange* top = data()->live_ranges()[vreg_to_spill];
+      top->CommitSpillMoves(data(), top->GetSpillRangeOperand());
+    }
+
+    if (block->IsDeferred()) {
+      DCHECK_EQ(defs, 0);
+      // Any deferred successor needing a spill is sufficient to make the
+      // current block need a spill.
+      entry.SetSpillRequired(spill_required_in_deferred_successor);
+    }
+
+    // Propagate data upward if there are non-deferred successors and they
+    // all need a spill, regardless of whether the current block is
+    // deferred.
+    entry.SetSpillRequired(~defs & spill_required_in_non_deferred_successor &
+                           spill_required_in_all_non_deferred_successors);
+
+    // Iterate the successors again to find out which ones require spills at
+    // their beginnings, and insert those spills.
+    for (RpoNumber successor_id : block->successors()) {
+      // Ignore loop back-edges.
+      if (successor_id <= block_id) continue;
+
+      InstructionBlock* successor = code->InstructionBlockAt(successor_id);
+      const Entry& successor_entry = entries_[successor_id.ToSize()];
+      for (int index_to_spill :
+           base::bits::IterateBits(successor_entry.SpillRequired() &
+                                   ~entry.SpillRequired() & ~spill_at_def)) {
+        CommitSpill(vreg_numbers_[index_to_spill], block, successor);
+      }
+    }
+  }
+}
+
+void SpillPlacer::CommitSpill(int vreg, InstructionBlock* predecessor,
+                              InstructionBlock* successor) {
+  TopLevelLiveRange* top = data()->live_ranges()[vreg];
+  LiveRangeBoundArray* array = finder_->ArrayFor(vreg);
+  LifetimePosition pred_end = LifetimePosition::InstructionFromInstructionIndex(
+      predecessor->last_instruction_index());
+  LiveRangeBound* bound = array->Find(pred_end);
+  InstructionOperand pred_op = bound->range_->GetAssignedOperand();
+  DCHECK(pred_op.IsAnyRegister());
+  DCHECK_EQ(successor->PredecessorCount(), 1);
+  data()->AddGapMove(successor->first_instruction_index(),
+                     Instruction::GapPosition::START, pred_op,
+                     top->GetSpillRangeOperand());
+  successor->mark_needs_frame();
+  top->SetLateSpillingSelected(true);
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/spill-placer.h b/src/compiler/backend/spill-placer.h
new file mode 100644
index 0000000..94a5358
--- /dev/null
+++ b/src/compiler/backend/spill-placer.h
@@ -0,0 +1,170 @@
+// Copyright 2020 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_SPILL_PLACER_H_
+#define V8_COMPILER_BACKEND_SPILL_PLACER_H_
+
+#include "src/compiler/backend/instruction.h"
+
+namespace v8 {
+namespace internal {
+
+namespace compiler {
+
+class LiveRangeFinder;
+class TopLevelLiveRange;
+class TopTierRegisterAllocationData;
+
+// SpillPlacer is an implementation of an algorithm to find optimal spill
+// insertion positions, where optimal is defined as:
+//
+// 1. Spills needed by deferred code don't affect non-deferred code.
+// 2. No control-flow path spills the same value more than once in non-deferred
+//    blocks.
+// 3. Where possible based on #2, control-flow paths through non-deferred code
+//    that don't need the value to be on the stack don't execute any spills.
+// 4. The fewest number of spill instructions is written to meet these rules.
+// 5. Spill instructions are placed as early as possible.
+//
+// These rules are an attempt to make code paths that don't need to spill faster
+// while not increasing code size too much.
+//
+// Considering just one value at a time for now, the steps are:
+//
+// 1. If the value is defined in a deferred block, or needs its value to be on
+//    the stack during the definition block, emit a move right after the
+//    definition and exit.
+// 2. Build an array representing the state at each block, where the state can
+//    be any of the following:
+//    - unmarked (default/initial state)
+//    - definition
+//    - spill required
+//    - spill required in non-deferred successor
+//    - spill required in deferred successor
+// 3. Mark the block containing the definition.
+// 4. Mark as "spill required" all blocks that contain any part of a spilled
+//    LiveRange, or any use that requires the value to be on the stack.
+// 5. Walk the block list backward, setting the "spill required in successor"
+//    values where appropriate. If both deferred and non-deferred successors
+//    require a spill, then the result should be "spill required in non-deferred
+//    successor".
+// 6. Walk the block list forward, updating marked blocks to "spill required" if
+//    all of their predecessors agree that a spill is required. Furthermore, if
+//    a block is marked as "spill required in non-deferred successor" and any
+//    non-deferred predecessor is marked as "spill required", then the current
+//    block is updated to "spill required". We must mark these merge points as
+//    "spill required" to obey rule #2 above: if we didn't, then there would
+//    exist a control-flow path through two different spilled regions.
+// 7. Walk the block list backward again, updating blocks to "spill required" if
+//    all of their successors agree that a spill is required, or if the current
+//    block is deferred and any of its successors require spills. If only some
+//    successors of a non-deferred block require spills, then insert spill moves
+//    at the beginning of those successors. If we manage to smear the "spill
+//    required" value all the way to the definition block, then insert a spill
+//    move at the definition instead. (Spilling at the definition implies that
+//    we didn't emit any other spill moves, and there is a DCHECK mechanism to
+//    ensure that invariant.)
+//
+// Loop back-edges can be safely ignored in every step. Anything that the loop
+// header needs on-stack will be spilled either in the loop header itself or
+// sometime before entering the loop, so its back-edge predecessors don't need
+// to contain any data about the loop header.
+//
+// The operations described in those steps are simple Boolean logic, so we can
+// easily process a batch of values at the same time as an optimization.
+class SpillPlacer {
+ public:
+  SpillPlacer(LiveRangeFinder* finder, TopTierRegisterAllocationData* data,
+              Zone* zone);
+
+  ~SpillPlacer();
+
+  SpillPlacer(const SpillPlacer&) = delete;
+  SpillPlacer& operator=(const SpillPlacer&) = delete;
+
+  // Adds the given TopLevelLiveRange to the SpillPlacer's state. Will
+  // eventually commit spill moves for that range and mark the range to indicate
+  // whether its value is spilled at the definition or some later point, so that
+  // subsequent phases can know whether to assume the value is always on-stack.
+  // However, those steps may happen during a later call to Add or during the
+  // destructor.
+  void Add(TopLevelLiveRange* range);
+
+ private:
+  TopTierRegisterAllocationData* data() const { return data_; }
+
+  // While initializing data for a range, returns the index within each Entry
+  // where data about that range should be stored. May cause data about previous
+  // ranges to be committed to make room if the table is full.
+  int GetOrCreateIndexForLatestVreg(int vreg);
+
+  bool IsLatestVreg(int vreg) const {
+    return assigned_indices_ > 0 &&
+           vreg_numbers_[assigned_indices_ - 1] == vreg;
+  }
+
+  // Processes all of the ranges which have been added, inserts spill moves for
+  // them to the instruction sequence, and marks the ranges with whether they
+  // are spilled at the definition or later.
+  void CommitSpills();
+
+  void ClearData();
+
+  // Updates the iteration bounds first_block_ and last_block_ so that they
+  // include the new value.
+  void ExpandBoundsToInclude(RpoNumber block);
+
+  void SetSpillRequired(InstructionBlock* block, int vreg,
+                        RpoNumber top_start_block);
+
+  void SetDefinition(RpoNumber block, int vreg);
+
+  // The first backward pass is responsible for marking blocks which do not
+  // themselves need the value to be on the stack, but which do have successors
+  // requiring the value to be on the stack.
+  void FirstBackwardPass();
+
+  // The forward pass is responsible for selecting merge points that should
+  // require the value to be on the stack.
+  void ForwardPass();
+
+  // The second backward pass is responsible for propagating the spill
+  // requirements to the earliest block where all successors can agree a spill
+  // is required. It also emits the actual spill instructions.
+  void SecondBackwardPass();
+
+  void CommitSpill(int vreg, InstructionBlock* predecessor,
+                   InstructionBlock* successor);
+
+  // Each Entry represents the state for 64 values at a block, so that we can
+  // compute a batch of values in parallel.
+  class Entry;
+  static constexpr int kValueIndicesPerEntry = 64;
+
+  // Objects provided to the constructor, which all outlive this SpillPlacer.
+  LiveRangeFinder* finder_;
+  TopTierRegisterAllocationData* data_;
+  Zone* zone_;
+
+  // An array of one Entry per block, where blocks are in reverse post-order.
+  Entry* entries_ = nullptr;
+
+  // An array representing which TopLevelLiveRange is in each bit.
+  int* vreg_numbers_ = nullptr;
+
+  // The number of vreg_numbers_ that have been assigned.
+  int assigned_indices_ = 0;
+
+  // The first and last block that have any definitions or uses in the current
+  // batch of values. In large functions, tracking these bounds can help prevent
+  // additional work.
+  RpoNumber first_block_ = RpoNumber::Invalid();
+  RpoNumber last_block_ = RpoNumber::Invalid();
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_SPILL_PLACER_H_
diff --git a/src/compiler/backend/unwinding-info-writer.h b/src/compiler/backend/unwinding-info-writer.h
new file mode 100644
index 0000000..a288e21
--- /dev/null
+++ b/src/compiler/backend/unwinding-info-writer.h
@@ -0,0 +1,61 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_UNWINDING_INFO_WRITER_H_
+#define V8_COMPILER_BACKEND_UNWINDING_INFO_WRITER_H_
+
+#include "src/flags/flags.h"
+
+#if V8_TARGET_ARCH_ARM
+#include "src/compiler/backend/arm/unwinding-info-writer-arm.h"
+#elif V8_TARGET_ARCH_ARM64
+#include "src/compiler/backend/arm64/unwinding-info-writer-arm64.h"
+#elif V8_TARGET_ARCH_X64
+#include "src/compiler/backend/x64/unwinding-info-writer-x64.h"
+#elif V8_TARGET_ARCH_S390X
+#include "src/compiler/backend/s390/unwinding-info-writer-s390.h"
+#elif V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_PPC64
+#include "src/compiler/backend/ppc/unwinding-info-writer-ppc.h"
+#else
+
+// Placeholder for unsupported architectures.
+
+#include "src/base/logging.h"
+
+namespace v8 {
+namespace internal {
+
+class EhFrameWriter;
+class Zone;
+
+namespace compiler {
+
+class InstructionBlock;
+
+static_assert(!FLAG_perf_prof_unwinding_info,
+              "--perf-prof-unwinding-info should be statically disabled if not "
+              "supported");
+
+class UnwindingInfoWriter {
+ public:
+  explicit UnwindingInfoWriter(Zone*) {}
+
+  void SetNumberOfInstructionBlocks(int number) {}
+
+  void BeginInstructionBlock(int pc_offset, const InstructionBlock*) {}
+
+  void EndInstructionBlock(const InstructionBlock*) {}
+
+  void Finish(int code_size) {}
+
+  EhFrameWriter* eh_frame_writer() { return nullptr; }
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif
+
+#endif  // V8_COMPILER_BACKEND_UNWINDING_INFO_WRITER_H_
diff --git a/src/compiler/backend/x64/code-generator-x64.cc b/src/compiler/backend/x64/code-generator-x64.cc
new file mode 100644
index 0000000..df1d6de
--- /dev/null
+++ b/src/compiler/backend/x64/code-generator-x64.cc
@@ -0,0 +1,4910 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <limits>
+
+#include "src/base/overflowing-math.h"
+#include "src/codegen/macro-assembler.h"
+#include "src/codegen/optimized-compilation-info.h"
+#include "src/codegen/x64/assembler-x64.h"
+#include "src/compiler/backend/code-generator-impl.h"
+#include "src/compiler/backend/code-generator.h"
+#include "src/compiler/backend/gap-resolver.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/osr.h"
+#include "src/heap/memory-chunk.h"
+#include "src/objects/smi.h"
+#include "src/wasm/wasm-code-manager.h"
+#include "src/wasm/wasm-objects.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+#define __ tasm()->
+
+// Adds X64 specific methods for decoding operands.
+class X64OperandConverter : public InstructionOperandConverter {
+ public:
+  X64OperandConverter(CodeGenerator* gen, Instruction* instr)
+      : InstructionOperandConverter(gen, instr) {}
+
+  Immediate InputImmediate(size_t index) {
+    return ToImmediate(instr_->InputAt(index));
+  }
+
+  Operand InputOperand(size_t index, int extra = 0) {
+    return ToOperand(instr_->InputAt(index), extra);
+  }
+
+  Operand OutputOperand() { return ToOperand(instr_->Output()); }
+
+  Immediate ToImmediate(InstructionOperand* operand) {
+    Constant constant = ToConstant(operand);
+    if (constant.type() == Constant::kFloat64) {
+      DCHECK_EQ(0, constant.ToFloat64().AsUint64());
+      return Immediate(0);
+    }
+    if (RelocInfo::IsWasmReference(constant.rmode())) {
+      return Immediate(constant.ToInt32(), constant.rmode());
+    }
+    return Immediate(constant.ToInt32());
+  }
+
+  Operand ToOperand(InstructionOperand* op, int extra = 0) {
+    DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
+    return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
+  }
+
+  Operand SlotToOperand(int slot_index, int extra = 0) {
+    FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
+    return Operand(offset.from_stack_pointer() ? rsp : rbp,
+                   offset.offset() + extra);
+  }
+
+  static size_t NextOffset(size_t* offset) {
+    size_t i = *offset;
+    (*offset)++;
+    return i;
+  }
+
+  static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
+    STATIC_ASSERT(0 == static_cast<int>(times_1));
+    STATIC_ASSERT(1 == static_cast<int>(times_2));
+    STATIC_ASSERT(2 == static_cast<int>(times_4));
+    STATIC_ASSERT(3 == static_cast<int>(times_8));
+    int scale = static_cast<int>(mode - one);
+    DCHECK(scale >= 0 && scale < 4);
+    return static_cast<ScaleFactor>(scale);
+  }
+
+  Operand MemoryOperand(size_t* offset) {
+    AddressingMode mode = AddressingModeField::decode(instr_->opcode());
+    switch (mode) {
+      case kMode_MR: {
+        Register base = InputRegister(NextOffset(offset));
+        int32_t disp = 0;
+        return Operand(base, disp);
+      }
+      case kMode_MRI: {
+        Register base = InputRegister(NextOffset(offset));
+        int32_t disp = InputInt32(NextOffset(offset));
+        return Operand(base, disp);
+      }
+      case kMode_MR1:
+      case kMode_MR2:
+      case kMode_MR4:
+      case kMode_MR8: {
+        Register base = InputRegister(NextOffset(offset));
+        Register index = InputRegister(NextOffset(offset));
+        ScaleFactor scale = ScaleFor(kMode_MR1, mode);
+        int32_t disp = 0;
+        return Operand(base, index, scale, disp);
+      }
+      case kMode_MR1I:
+      case kMode_MR2I:
+      case kMode_MR4I:
+      case kMode_MR8I: {
+        Register base = InputRegister(NextOffset(offset));
+        Register index = InputRegister(NextOffset(offset));
+        ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
+        int32_t disp = InputInt32(NextOffset(offset));
+        return Operand(base, index, scale, disp);
+      }
+      case kMode_M1: {
+        Register base = InputRegister(NextOffset(offset));
+        int32_t disp = 0;
+        return Operand(base, disp);
+      }
+      case kMode_M2:
+        UNREACHABLE();  // Should use kModeMR with more compact encoding instead
+        return Operand(no_reg, 0);
+      case kMode_M4:
+      case kMode_M8: {
+        Register index = InputRegister(NextOffset(offset));
+        ScaleFactor scale = ScaleFor(kMode_M1, mode);
+        int32_t disp = 0;
+        return Operand(index, scale, disp);
+      }
+      case kMode_M1I:
+      case kMode_M2I:
+      case kMode_M4I:
+      case kMode_M8I: {
+        Register index = InputRegister(NextOffset(offset));
+        ScaleFactor scale = ScaleFor(kMode_M1I, mode);
+        int32_t disp = InputInt32(NextOffset(offset));
+        return Operand(index, scale, disp);
+      }
+      case kMode_Root: {
+        Register base = kRootRegister;
+        int32_t disp = InputInt32(NextOffset(offset));
+        return Operand(base, disp);
+      }
+      case kMode_None:
+        UNREACHABLE();
+    }
+    UNREACHABLE();
+  }
+
+  Operand MemoryOperand(size_t first_input = 0) {
+    return MemoryOperand(&first_input);
+  }
+};
+
+namespace {
+
+bool HasAddressingMode(Instruction* instr) {
+  return instr->addressing_mode() != kMode_None;
+}
+
+bool HasImmediateInput(Instruction* instr, size_t index) {
+  return instr->InputAt(index)->IsImmediate();
+}
+
+bool HasRegisterInput(Instruction* instr, size_t index) {
+  return instr->InputAt(index)->IsRegister();
+}
+
+class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
+ public:
+  OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
+      : OutOfLineCode(gen), result_(result) {}
+
+  void Generate() final {
+    __ Xorps(result_, result_);
+    __ Divss(result_, result_);
+  }
+
+ private:
+  XMMRegister const result_;
+};
+
+class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
+ public:
+  OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
+      : OutOfLineCode(gen), result_(result) {}
+
+  void Generate() final {
+    __ Xorpd(result_, result_);
+    __ Divsd(result_, result_);
+  }
+
+ private:
+  XMMRegister const result_;
+};
+
+class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
+ public:
+  OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
+                             XMMRegister input, StubCallMode stub_mode,
+                             UnwindingInfoWriter* unwinding_info_writer)
+      : OutOfLineCode(gen),
+        result_(result),
+        input_(input),
+        stub_mode_(stub_mode),
+        unwinding_info_writer_(unwinding_info_writer),
+        isolate_(gen->isolate()),
+        zone_(gen->zone()) {}
+
+  void Generate() final {
+    __ AllocateStackSpace(kDoubleSize);
+    unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                      kDoubleSize);
+    __ Movsd(MemOperand(rsp, 0), input_);
+    if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
+      // A direct call to a wasm runtime stub defined in this module.
+      // Just encode the stub index. This will be patched when the code
+      // is added to the native module and copied into wasm code space.
+      __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
+    } else if (tasm()->options().inline_offheap_trampolines) {
+      // With embedded builtins we do not need the isolate here. This allows
+      // the call to be generated asynchronously.
+      __ CallBuiltin(Builtins::kDoubleToI);
+    } else {
+      __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
+    }
+    __ movl(result_, MemOperand(rsp, 0));
+    __ addq(rsp, Immediate(kDoubleSize));
+    unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                      -kDoubleSize);
+  }
+
+ private:
+  Register const result_;
+  XMMRegister const input_;
+  StubCallMode stub_mode_;
+  UnwindingInfoWriter* const unwinding_info_writer_;
+  Isolate* isolate_;
+  Zone* zone_;
+};
+
+class OutOfLineRecordWrite final : public OutOfLineCode {
+ public:
+  OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
+                       Register value, Register scratch0, Register scratch1,
+                       RecordWriteMode mode, StubCallMode stub_mode)
+      : OutOfLineCode(gen),
+        object_(object),
+        operand_(operand),
+        value_(value),
+        scratch0_(scratch0),
+        scratch1_(scratch1),
+        mode_(mode),
+        stub_mode_(stub_mode),
+        zone_(gen->zone()) {}
+
+  void Generate() final {
+    if (mode_ > RecordWriteMode::kValueIsPointer) {
+      __ JumpIfSmi(value_, exit());
+    }
+    if (COMPRESS_POINTERS_BOOL) {
+      __ DecompressTaggedPointer(value_, value_);
+    }
+    __ CheckPageFlag(value_, scratch0_,
+                     MemoryChunk::kPointersToHereAreInterestingMask, zero,
+                     exit());
+    __ leaq(scratch1_, operand_);
+
+    RememberedSetAction const remembered_set_action =
+        mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
+                                             : OMIT_REMEMBERED_SET;
+    SaveFPRegsMode const save_fp_mode =
+        frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
+
+    if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
+      __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
+    } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
+      // A direct call to a wasm runtime stub defined in this module.
+      // Just encode the stub index. This will be patched when the code
+      // is added to the native module and copied into wasm code space.
+      __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
+                             save_fp_mode, wasm::WasmCode::kRecordWrite);
+    } else {
+      __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
+                             save_fp_mode);
+    }
+  }
+
+ private:
+  Register const object_;
+  Operand const operand_;
+  Register const value_;
+  Register const scratch0_;
+  Register const scratch1_;
+  RecordWriteMode const mode_;
+  StubCallMode const stub_mode_;
+  Zone* zone_;
+};
+
+class WasmOutOfLineTrap : public OutOfLineCode {
+ public:
+  WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
+      : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
+
+  void Generate() override {
+    X64OperandConverter i(gen_, instr_);
+    TrapId trap_id =
+        static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
+    GenerateWithTrapId(trap_id);
+  }
+
+ protected:
+  CodeGenerator* gen_;
+
+  void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
+
+ private:
+  void GenerateCallToTrap(TrapId trap_id) {
+    if (!gen_->wasm_runtime_exception_support()) {
+      // We cannot test calls to the runtime in cctest/test-run-wasm.
+      // Therefore we emit a call to C here instead of a call to the runtime.
+      __ PrepareCallCFunction(0);
+      __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
+                       0);
+      __ LeaveFrame(StackFrame::WASM);
+      auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
+      size_t pop_size =
+          call_descriptor->StackParameterCount() * kSystemPointerSize;
+      // Use rcx as a scratch register, we return anyways immediately.
+      __ Ret(static_cast<int>(pop_size), rcx);
+    } else {
+      gen_->AssembleSourcePosition(instr_);
+      // A direct call to a wasm runtime stub defined in this module.
+      // Just encode the stub index. This will be patched when the code
+      // is added to the native module and copied into wasm code space.
+      __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
+      ReferenceMap* reference_map =
+          gen_->zone()->New<ReferenceMap>(gen_->zone());
+      gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
+      __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
+    }
+  }
+
+  Instruction* instr_;
+};
+
+class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
+ public:
+  WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
+      : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
+
+  void Generate() final {
+    gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
+    GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
+  }
+
+ private:
+  int pc_;
+};
+
+void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
+                         InstructionCode opcode, Instruction* instr,
+                         int pc) {
+  const MemoryAccessMode access_mode =
+      static_cast<MemoryAccessMode>(MiscField::decode(opcode));
+  if (access_mode == kMemoryAccessProtected) {
+    zone->New<WasmProtectedInstructionTrap>(codegen, pc, instr);
+  }
+}
+
+void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
+                                   InstructionCode opcode, Instruction* instr,
+                                   X64OperandConverter const& i) {
+  const MemoryAccessMode access_mode =
+      static_cast<MemoryAccessMode>(MiscField::decode(opcode));
+  if (access_mode == kMemoryAccessPoisoned) {
+    Register value = i.OutputRegister();
+    codegen->tasm()->andq(value, kSpeculationPoisonRegister);
+  }
+}
+
+}  // namespace
+
+#define ASSEMBLE_UNOP(asm_instr)         \
+  do {                                   \
+    if (instr->Output()->IsRegister()) { \
+      __ asm_instr(i.OutputRegister());  \
+    } else {                             \
+      __ asm_instr(i.OutputOperand());   \
+    }                                    \
+  } while (false)
+
+#define ASSEMBLE_BINOP(asm_instr)                                \
+  do {                                                           \
+    if (HasAddressingMode(instr)) {                              \
+      size_t index = 1;                                          \
+      Operand right = i.MemoryOperand(&index);                   \
+      __ asm_instr(i.InputRegister(0), right);                   \
+    } else {                                                     \
+      if (HasImmediateInput(instr, 1)) {                         \
+        if (HasRegisterInput(instr, 0)) {                        \
+          __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
+        } else {                                                 \
+          __ asm_instr(i.InputOperand(0), i.InputImmediate(1));  \
+        }                                                        \
+      } else {                                                   \
+        if (HasRegisterInput(instr, 1)) {                        \
+          __ asm_instr(i.InputRegister(0), i.InputRegister(1));  \
+        } else {                                                 \
+          __ asm_instr(i.InputRegister(0), i.InputOperand(1));   \
+        }                                                        \
+      }                                                          \
+    }                                                            \
+  } while (false)
+
+#define ASSEMBLE_COMPARE(asm_instr)                              \
+  do {                                                           \
+    if (HasAddressingMode(instr)) {                              \
+      size_t index = 0;                                          \
+      Operand left = i.MemoryOperand(&index);                    \
+      if (HasImmediateInput(instr, index)) {                     \
+        __ asm_instr(left, i.InputImmediate(index));             \
+      } else {                                                   \
+        __ asm_instr(left, i.InputRegister(index));              \
+      }                                                          \
+    } else {                                                     \
+      if (HasImmediateInput(instr, 1)) {                         \
+        if (HasRegisterInput(instr, 0)) {                        \
+          __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
+        } else {                                                 \
+          __ asm_instr(i.InputOperand(0), i.InputImmediate(1));  \
+        }                                                        \
+      } else {                                                   \
+        if (HasRegisterInput(instr, 1)) {                        \
+          __ asm_instr(i.InputRegister(0), i.InputRegister(1));  \
+        } else {                                                 \
+          __ asm_instr(i.InputRegister(0), i.InputOperand(1));   \
+        }                                                        \
+      }                                                          \
+    }                                                            \
+  } while (false)
+
+#define ASSEMBLE_MULT(asm_instr)                              \
+  do {                                                        \
+    if (HasImmediateInput(instr, 1)) {                        \
+      if (HasRegisterInput(instr, 0)) {                       \
+        __ asm_instr(i.OutputRegister(), i.InputRegister(0),  \
+                     i.InputImmediate(1));                    \
+      } else {                                                \
+        __ asm_instr(i.OutputRegister(), i.InputOperand(0),   \
+                     i.InputImmediate(1));                    \
+      }                                                       \
+    } else {                                                  \
+      if (HasRegisterInput(instr, 1)) {                       \
+        __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
+      } else {                                                \
+        __ asm_instr(i.OutputRegister(), i.InputOperand(1));  \
+      }                                                       \
+    }                                                         \
+  } while (false)
+
+#define ASSEMBLE_SHIFT(asm_instr, width)                                   \
+  do {                                                                     \
+    if (HasImmediateInput(instr, 1)) {                                     \
+      if (instr->Output()->IsRegister()) {                                 \
+        __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
+      } else {                                                             \
+        __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1)));  \
+      }                                                                    \
+    } else {                                                               \
+      if (instr->Output()->IsRegister()) {                                 \
+        __ asm_instr##_cl(i.OutputRegister());                             \
+      } else {                                                             \
+        __ asm_instr##_cl(i.OutputOperand());                              \
+      }                                                                    \
+    }                                                                      \
+  } while (false)
+
+#define ASSEMBLE_MOVX(asm_instr)                            \
+  do {                                                      \
+    if (HasAddressingMode(instr)) {                         \
+      __ asm_instr(i.OutputRegister(), i.MemoryOperand());  \
+    } else if (HasRegisterInput(instr, 0)) {                \
+      __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
+    } else {                                                \
+      __ asm_instr(i.OutputRegister(), i.InputOperand(0));  \
+    }                                                       \
+  } while (false)
+
+#define ASSEMBLE_SSE_BINOP(asm_instr)                                     \
+  do {                                                                    \
+    if (HasAddressingMode(instr)) {                                       \
+      size_t index = 1;                                                   \
+      Operand right = i.MemoryOperand(&index);                            \
+      __ asm_instr(i.InputDoubleRegister(0), right);                      \
+    } else {                                                              \
+      if (instr->InputAt(1)->IsFPRegister()) {                            \
+        __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
+      } else {                                                            \
+        __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1));        \
+      }                                                                   \
+    }                                                                     \
+  } while (false)
+
+#define ASSEMBLE_SSE_UNOP(asm_instr)                                    \
+  do {                                                                  \
+    if (instr->InputAt(0)->IsFPRegister()) {                            \
+      __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
+    } else {                                                            \
+      __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0));        \
+    }                                                                   \
+  } while (false)
+
+#define ASSEMBLE_AVX_BINOP(asm_instr)                                          \
+  do {                                                                         \
+    CpuFeatureScope avx_scope(tasm(), AVX);                                    \
+    if (HasAddressingMode(instr)) {                                            \
+      size_t index = 1;                                                        \
+      Operand right = i.MemoryOperand(&index);                                 \
+      __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), right); \
+    } else {                                                                   \
+      if (instr->InputAt(1)->IsFPRegister()) {                                 \
+        __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0),       \
+                     i.InputDoubleRegister(1));                                \
+      } else {                                                                 \
+        __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0),       \
+                     i.InputOperand(1));                                       \
+      }                                                                        \
+    }                                                                          \
+  } while (false)
+
+#define ASSEMBLE_IEEE754_BINOP(name)                                     \
+  do {                                                                   \
+    __ PrepareCallCFunction(2);                                          \
+    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
+  } while (false)
+
+#define ASSEMBLE_IEEE754_UNOP(name)                                      \
+  do {                                                                   \
+    __ PrepareCallCFunction(1);                                          \
+    __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
+  } while (false)
+
+#define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
+  do {                                                          \
+    Label binop;                                                \
+    __ bind(&binop);                                            \
+    __ mov_inst(rax, i.MemoryOperand(1));                       \
+    __ movl(i.TempRegister(0), rax);                            \
+    __ bin_inst(i.TempRegister(0), i.InputRegister(0));         \
+    __ lock();                                                  \
+    __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));     \
+    __ j(not_equal, &binop);                                    \
+  } while (false)
+
+#define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
+  do {                                                            \
+    Label binop;                                                  \
+    __ bind(&binop);                                              \
+    __ mov_inst(rax, i.MemoryOperand(1));                         \
+    __ movq(i.TempRegister(0), rax);                              \
+    __ bin_inst(i.TempRegister(0), i.InputRegister(0));           \
+    __ lock();                                                    \
+    __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));       \
+    __ j(not_equal, &binop);                                      \
+  } while (false)
+
+// Handles both SSE and AVX codegen. For SSE we use DefineSameAsFirst, so the
+// dst and first src will be the same. For AVX we don't restrict it that way, so
+// we will omit unnecessary moves.
+#define ASSEMBLE_SIMD_BINOP(opcode)                                      \
+  do {                                                                   \
+    if (CpuFeatures::IsSupported(AVX)) {                                 \
+      CpuFeatureScope avx_scope(tasm(), AVX);                            \
+      __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \
+                   i.InputSimd128Register(1));                           \
+    } else {                                                             \
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));   \
+      __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1));   \
+    }                                                                    \
+  } while (false)
+
+#define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index)      \
+  do {                                                       \
+    if (instr->InputAt(index)->IsSimd128Register()) {        \
+      __ opcode(dst_operand, i.InputSimd128Register(index)); \
+    } else {                                                 \
+      __ opcode(dst_operand, i.InputOperand(index));         \
+    }                                                        \
+  } while (false)
+
+#define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm)  \
+  do {                                                            \
+    if (instr->InputAt(index)->IsSimd128Register()) {             \
+      __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
+    } else {                                                      \
+      __ opcode(dst_operand, i.InputOperand(index), imm);         \
+    }                                                             \
+  } while (false)
+
+#define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode)             \
+  do {                                                   \
+    XMMRegister dst = i.OutputSimd128Register();         \
+    DCHECK_EQ(dst, i.InputSimd128Register(0));           \
+    byte input_index = instr->InputCount() == 2 ? 1 : 0; \
+    ASSEMBLE_SIMD_INSTR(opcode, dst, input_index);       \
+  } while (false)
+
+#define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, imm)                              \
+  do {                                                                      \
+    DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));        \
+    if (instr->InputAt(1)->IsSimd128Register()) {                           \
+      __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
+    } else {                                                                \
+      __ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm);         \
+    }                                                                       \
+  } while (false)
+
+#define ASSEMBLE_SIMD_ALL_TRUE(opcode)          \
+  do {                                          \
+    Register dst = i.OutputRegister();          \
+    XMMRegister tmp = i.TempSimd128Register(0); \
+    __ xorq(dst, dst);                          \
+    __ Pxor(tmp, tmp);                          \
+    __ opcode(tmp, i.InputSimd128Register(0));  \
+    __ Ptest(tmp, tmp);                         \
+    __ setcc(equal, dst);                       \
+  } while (false)
+
+// This macro will directly emit the opcode if the shift is an immediate - the
+// shift value will be taken modulo 2^width. Otherwise, it will emit code to
+// perform the modulus operation.
+#define ASSEMBLE_SIMD_SHIFT(opcode, width)                 \
+  do {                                                     \
+    XMMRegister dst = i.OutputSimd128Register();           \
+    if (HasImmediateInput(instr, 1)) {                     \
+      if (CpuFeatures::IsSupported(AVX)) {                 \
+        CpuFeatureScope avx_scope(tasm(), AVX);            \
+        __ v##opcode(dst, i.InputSimd128Register(0),       \
+                     byte{i.InputInt##width(1)});          \
+      } else {                                             \
+        DCHECK_EQ(dst, i.InputSimd128Register(0));         \
+        __ opcode(dst, byte{i.InputInt##width(1)});        \
+      }                                                    \
+    } else {                                               \
+      XMMRegister tmp = i.TempSimd128Register(0);          \
+      Register tmp_shift = i.TempRegister(1);              \
+      constexpr int mask = (1 << width) - 1;               \
+      __ movq(tmp_shift, i.InputRegister(1));              \
+      __ andq(tmp_shift, Immediate(mask));                 \
+      __ Movq(tmp, tmp_shift);                             \
+      if (CpuFeatures::IsSupported(AVX)) {                 \
+        CpuFeatureScope avx_scope(tasm(), AVX);            \
+        __ v##opcode(dst, i.InputSimd128Register(0), tmp); \
+      } else {                                             \
+        DCHECK_EQ(dst, i.InputSimd128Register(0));         \
+        __ opcode(dst, tmp);                               \
+      }                                                    \
+    }                                                      \
+  } while (false)
+
+#define ASSEMBLE_PINSR(ASM_INSTR)                                     \
+  do {                                                                \
+    EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
+    XMMRegister dst = i.OutputSimd128Register();                      \
+    XMMRegister src = i.InputSimd128Register(0);                      \
+    uint8_t laneidx = i.InputUint8(1);                                \
+    if (HasAddressingMode(instr)) {                                   \
+      __ ASM_INSTR(dst, src, i.MemoryOperand(2), laneidx);            \
+      break;                                                          \
+    }                                                                 \
+    if (instr->InputAt(2)->IsFPRegister()) {                          \
+      __ Movq(kScratchRegister, i.InputDoubleRegister(2));            \
+      __ ASM_INSTR(dst, src, kScratchRegister, laneidx);              \
+    } else if (instr->InputAt(2)->IsRegister()) {                     \
+      __ ASM_INSTR(dst, src, i.InputRegister(2), laneidx);            \
+    } else {                                                          \
+      __ ASM_INSTR(dst, src, i.InputOperand(2), laneidx);             \
+    }                                                                 \
+  } while (false)
+
+void CodeGenerator::AssembleDeconstructFrame() {
+  unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
+  __ movq(rsp, rbp);
+  __ popq(rbp);
+}
+
+void CodeGenerator::AssemblePrepareTailCall() {
+  if (frame_access_state()->has_frame()) {
+    __ movq(rbp, MemOperand(rbp, 0));
+  }
+  frame_access_state()->SetFrameAccessToSP();
+}
+
+void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
+                                                     Register scratch1,
+                                                     Register scratch2,
+                                                     Register scratch3) {
+  DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
+  Label done;
+
+  // Check if current frame is an arguments adaptor frame.
+  __ cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
+          Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
+  __ j(not_equal, &done, Label::kNear);
+
+  // Load arguments count from current arguments adaptor frame (note, it
+  // does not include receiver).
+  Register caller_args_count_reg = scratch1;
+  __ SmiUntag(caller_args_count_reg,
+              Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
+
+  __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3);
+  __ bind(&done);
+}
+
+namespace {
+
+void AdjustStackPointerForTailCall(Instruction* instr,
+                                   TurboAssembler* assembler, Linkage* linkage,
+                                   OptimizedCompilationInfo* info,
+                                   FrameAccessState* state,
+                                   int new_slot_above_sp,
+                                   bool allow_shrinkage = true) {
+  int stack_slot_delta;
+  if (instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp)) {
+    // For this special tail-call mode, the callee has the same arguments and
+    // linkage as the caller, and arguments adapter frames must be preserved.
+    // Thus we simply have reset the stack pointer register to its original
+    // value before frame construction.
+    // See also: AssembleConstructFrame.
+    DCHECK(!info->is_osr());
+    DCHECK_EQ(linkage->GetIncomingDescriptor()->CalleeSavedRegisters(), 0);
+    DCHECK_EQ(linkage->GetIncomingDescriptor()->CalleeSavedFPRegisters(), 0);
+    DCHECK_EQ(state->frame()->GetReturnSlotCount(), 0);
+    stack_slot_delta = (state->frame()->GetTotalFrameSlotCount() -
+                        kReturnAddressStackSlotCount) *
+                       -1;
+    DCHECK_LE(stack_slot_delta, 0);
+  } else {
+    int current_sp_offset = state->GetSPToFPSlotCount() +
+                            StandardFrameConstants::kFixedSlotCountAboveFp;
+    stack_slot_delta = new_slot_above_sp - current_sp_offset;
+  }
+
+  if (stack_slot_delta > 0) {
+    assembler->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
+    state->IncreaseSPDelta(stack_slot_delta);
+  } else if (allow_shrinkage && stack_slot_delta < 0) {
+    assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
+    state->IncreaseSPDelta(stack_slot_delta);
+  }
+}
+
+void SetupSimdImmediateInRegister(TurboAssembler* assembler, uint32_t* imms,
+                                  XMMRegister reg) {
+  assembler->Move(reg, make_uint64(imms[3], imms[2]),
+                  make_uint64(imms[1], imms[0]));
+}
+
+}  // namespace
+
+void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
+                                              int first_unused_stack_slot) {
+  CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
+  ZoneVector<MoveOperands*> pushes(zone());
+  GetPushCompatibleMoves(instr, flags, &pushes);
+
+  if (!pushes.empty() &&
+      (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
+       first_unused_stack_slot)) {
+    DCHECK(!instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp));
+    X64OperandConverter g(this, instr);
+    for (auto move : pushes) {
+      LocationOperand destination_location(
+          LocationOperand::cast(move->destination()));
+      InstructionOperand source(move->source());
+      AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
+                                    frame_access_state(),
+                                    destination_location.index());
+      if (source.IsStackSlot()) {
+        LocationOperand source_location(LocationOperand::cast(source));
+        __ Push(g.SlotToOperand(source_location.index()));
+      } else if (source.IsRegister()) {
+        LocationOperand source_location(LocationOperand::cast(source));
+        __ Push(source_location.GetRegister());
+      } else if (source.IsImmediate()) {
+        __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
+      } else {
+        // Pushes of non-scalar data types is not supported.
+        UNIMPLEMENTED();
+      }
+      frame_access_state()->IncreaseSPDelta(1);
+      move->Eliminate();
+    }
+  }
+  AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
+                                frame_access_state(), first_unused_stack_slot,
+                                false);
+}
+
+void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
+                                             int first_unused_stack_slot) {
+  AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
+                                frame_access_state(), first_unused_stack_slot);
+}
+
+// Check that {kJavaScriptCallCodeStartRegister} is correct.
+void CodeGenerator::AssembleCodeStartRegisterCheck() {
+  __ ComputeCodeStartAddress(rbx);
+  __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
+  __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
+}
+
+// Check if the code object is marked for deoptimization. If it is, then it
+// jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
+// to:
+//    1. read from memory the word that contains that bit, which can be found in
+//       the flags in the referenced {CodeDataContainer} object;
+//    2. test kMarkedForDeoptimizationBit in those flags; and
+//    3. if it is not zero then it jumps to the builtin.
+void CodeGenerator::BailoutIfDeoptimized() {
+  int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
+  __ LoadTaggedPointerField(rbx,
+                            Operand(kJavaScriptCallCodeStartRegister, offset));
+  __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
+           Immediate(1 << Code::kMarkedForDeoptimizationBit));
+  __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
+          RelocInfo::CODE_TARGET, not_zero);
+}
+
+void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
+  // Set a mask which has all bits set in the normal case, but has all
+  // bits cleared if we are speculatively executing the wrong PC.
+  __ ComputeCodeStartAddress(rbx);
+  __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
+  __ cmpq(kJavaScriptCallCodeStartRegister, rbx);
+  __ movq(rbx, Immediate(-1));
+  __ cmovq(equal, kSpeculationPoisonRegister, rbx);
+}
+
+void CodeGenerator::AssembleRegisterArgumentPoisoning() {
+  __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
+  __ andq(kContextRegister, kSpeculationPoisonRegister);
+  __ andq(rsp, kSpeculationPoisonRegister);
+}
+
+// Assembles an instruction after register allocation, producing machine code.
+CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
+    Instruction* instr) {
+  X64OperandConverter i(this, instr);
+  InstructionCode opcode = instr->opcode();
+  ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
+  switch (arch_opcode) {
+    case kArchCallCodeObject: {
+      if (HasImmediateInput(instr, 0)) {
+        Handle<Code> code = i.InputCode(0);
+        __ Call(code, RelocInfo::CODE_TARGET);
+      } else {
+        Register reg = i.InputRegister(0);
+        DCHECK_IMPLIES(
+            instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+            reg == kJavaScriptCallCodeStartRegister);
+        __ LoadCodeObjectEntry(reg, reg);
+        if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
+          __ RetpolineCall(reg);
+        } else {
+          __ call(reg);
+        }
+      }
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchCallBuiltinPointer: {
+      DCHECK(!HasImmediateInput(instr, 0));
+      Register builtin_index = i.InputRegister(0);
+      __ CallBuiltinByIndex(builtin_index);
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchCallWasmFunction: {
+      if (HasImmediateInput(instr, 0)) {
+        Constant constant = i.ToConstant(instr->InputAt(0));
+        Address wasm_code = static_cast<Address>(constant.ToInt64());
+        if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
+          __ near_call(wasm_code, constant.rmode());
+        } else {
+          if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
+            __ RetpolineCall(wasm_code, constant.rmode());
+          } else {
+            __ Call(wasm_code, constant.rmode());
+          }
+        }
+      } else {
+        Register reg = i.InputRegister(0);
+        if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
+          __ RetpolineCall(reg);
+        } else {
+          __ call(reg);
+        }
+      }
+      RecordCallPosition(instr);
+      frame_access_state()->ClearSPDelta();
+      break;
+    }
+    case kArchTailCallCodeObjectFromJSFunction:
+      if (!instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp)) {
+        AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
+                                         i.TempRegister(0), i.TempRegister(1),
+                                         i.TempRegister(2));
+      }
+      V8_FALLTHROUGH;
+    case kArchTailCallCodeObject: {
+      if (HasImmediateInput(instr, 0)) {
+        Handle<Code> code = i.InputCode(0);
+        __ Jump(code, RelocInfo::CODE_TARGET);
+      } else {
+        Register reg = i.InputRegister(0);
+        DCHECK_IMPLIES(
+            instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+            reg == kJavaScriptCallCodeStartRegister);
+        __ LoadCodeObjectEntry(reg, reg);
+        if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
+          __ RetpolineJump(reg);
+        } else {
+          __ jmp(reg);
+        }
+      }
+      unwinding_info_writer_.MarkBlockWillExit();
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchTailCallWasm: {
+      if (HasImmediateInput(instr, 0)) {
+        Constant constant = i.ToConstant(instr->InputAt(0));
+        Address wasm_code = static_cast<Address>(constant.ToInt64());
+        if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
+          __ near_jmp(wasm_code, constant.rmode());
+        } else {
+          __ Move(kScratchRegister, wasm_code, constant.rmode());
+          __ jmp(kScratchRegister);
+        }
+      } else {
+        Register reg = i.InputRegister(0);
+        if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
+          __ RetpolineJump(reg);
+        } else {
+          __ jmp(reg);
+        }
+      }
+      unwinding_info_writer_.MarkBlockWillExit();
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchTailCallAddress: {
+      CHECK(!HasImmediateInput(instr, 0));
+      Register reg = i.InputRegister(0);
+      DCHECK_IMPLIES(
+          instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
+          reg == kJavaScriptCallCodeStartRegister);
+      if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
+        __ RetpolineJump(reg);
+      } else {
+        __ jmp(reg);
+      }
+      unwinding_info_writer_.MarkBlockWillExit();
+      frame_access_state()->ClearSPDelta();
+      frame_access_state()->SetFrameAccessToDefault();
+      break;
+    }
+    case kArchCallJSFunction: {
+      Register func = i.InputRegister(0);
+      if (FLAG_debug_code) {
+        // Check the function's context matches the context argument.
+        __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
+        __ Assert(equal, AbortReason::kWrongFunctionContext);
+      }
+      static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
+      __ LoadTaggedPointerField(rcx,
+                                FieldOperand(func, JSFunction::kCodeOffset));
+      __ CallCodeObject(rcx);
+      frame_access_state()->ClearSPDelta();
+      RecordCallPosition(instr);
+      break;
+    }
+    case kArchPrepareCallCFunction: {
+      // Frame alignment requires using FP-relative frame addressing.
+      frame_access_state()->SetFrameAccessToFP();
+      int const num_parameters = MiscField::decode(instr->opcode());
+      __ PrepareCallCFunction(num_parameters);
+      break;
+    }
+    case kArchSaveCallerRegisters: {
+      fp_mode_ =
+          static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
+      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+      // kReturnRegister0 should have been saved before entering the stub.
+      int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
+      DCHECK(IsAligned(bytes, kSystemPointerSize));
+      DCHECK_EQ(0, frame_access_state()->sp_delta());
+      frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+      DCHECK(!caller_registers_saved_);
+      caller_registers_saved_ = true;
+      break;
+    }
+    case kArchRestoreCallerRegisters: {
+      DCHECK(fp_mode_ ==
+             static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
+      DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
+      // Don't overwrite the returned value.
+      int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
+      frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
+      DCHECK_EQ(0, frame_access_state()->sp_delta());
+      DCHECK(caller_registers_saved_);
+      caller_registers_saved_ = false;
+      break;
+    }
+    case kArchPrepareTailCall:
+      AssemblePrepareTailCall();
+      break;
+    case kArchCallCFunction: {
+      int const num_parameters = MiscField::decode(instr->opcode());
+      Label return_location;
+      if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
+        // Put the return address in a stack slot.
+        __ leaq(kScratchRegister, Operand(&return_location, 0));
+        __ movq(MemOperand(rbp, WasmExitFrameConstants::kCallingPCOffset),
+                kScratchRegister);
+      }
+      if (HasImmediateInput(instr, 0)) {
+        ExternalReference ref = i.InputExternalReference(0);
+        __ CallCFunction(ref, num_parameters);
+      } else {
+        Register func = i.InputRegister(0);
+        __ CallCFunction(func, num_parameters);
+      }
+      __ bind(&return_location);
+      if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
+        RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
+      }
+      frame_access_state()->SetFrameAccessToDefault();
+      // Ideally, we should decrement SP delta to match the change of stack
+      // pointer in CallCFunction. However, for certain architectures (e.g.
+      // ARM), there may be more strict alignment requirement, causing old SP
+      // to be saved on the stack. In those cases, we can not calculate the SP
+      // delta statically.
+      frame_access_state()->ClearSPDelta();
+      if (caller_registers_saved_) {
+        // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
+        // Here, we assume the sequence to be:
+        //   kArchSaveCallerRegisters;
+        //   kArchCallCFunction;
+        //   kArchRestoreCallerRegisters;
+        int bytes =
+            __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
+        frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
+      }
+      // TODO(tebbi): Do we need an lfence here?
+      break;
+    }
+    case kArchJmp:
+      AssembleArchJump(i.InputRpo(0));
+      break;
+    case kArchBinarySearchSwitch:
+      AssembleArchBinarySearchSwitch(instr);
+      break;
+    case kArchTableSwitch:
+      AssembleArchTableSwitch(instr);
+      break;
+    case kArchComment:
+      __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
+      break;
+    case kArchAbortCSAAssert:
+      DCHECK(i.InputRegister(0) == rdx);
+      {
+        // We don't actually want to generate a pile of code for this, so just
+        // claim there is a stack frame, without generating one.
+        FrameScope scope(tasm(), StackFrame::NONE);
+        __ Call(
+            isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
+            RelocInfo::CODE_TARGET);
+      }
+      __ int3();
+      unwinding_info_writer_.MarkBlockWillExit();
+      break;
+    case kArchDebugBreak:
+      __ DebugBreak();
+      break;
+    case kArchThrowTerminator:
+      unwinding_info_writer_.MarkBlockWillExit();
+      break;
+    case kArchNop:
+      // don't emit code for nops.
+      break;
+    case kArchDeoptimize: {
+      DeoptimizationExit* exit =
+          BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
+      __ jmp(exit->label());
+      break;
+    }
+    case kArchRet:
+      AssembleReturn(instr->InputAt(0));
+      break;
+    case kArchFramePointer:
+      __ movq(i.OutputRegister(), rbp);
+      break;
+    case kArchParentFramePointer:
+      if (frame_access_state()->has_frame()) {
+        __ movq(i.OutputRegister(), Operand(rbp, 0));
+      } else {
+        __ movq(i.OutputRegister(), rbp);
+      }
+      break;
+    case kArchStackPointerGreaterThan: {
+      // Potentially apply an offset to the current stack pointer before the
+      // comparison to consider the size difference of an optimized frame versus
+      // the contained unoptimized frames.
+
+      Register lhs_register = rsp;
+      uint32_t offset;
+
+      if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
+        lhs_register = kScratchRegister;
+        __ leaq(lhs_register, Operand(rsp, static_cast<int32_t>(offset) * -1));
+      }
+
+      constexpr size_t kValueIndex = 0;
+      if (HasAddressingMode(instr)) {
+        __ cmpq(lhs_register, i.MemoryOperand(kValueIndex));
+      } else {
+        __ cmpq(lhs_register, i.InputRegister(kValueIndex));
+      }
+      break;
+    }
+    case kArchStackCheckOffset:
+      __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
+      break;
+    case kArchTruncateDoubleToI: {
+      auto result = i.OutputRegister();
+      auto input = i.InputDoubleRegister(0);
+      auto ool = zone()->New<OutOfLineTruncateDoubleToI>(
+          this, result, input, DetermineStubCallMode(),
+          &unwinding_info_writer_);
+      // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
+      // use of Cvttsd2siq requires the movl below to avoid sign extension.
+      __ Cvttsd2siq(result, input);
+      __ cmpq(result, Immediate(1));
+      __ j(overflow, ool->entry());
+      __ bind(ool->exit());
+      __ movl(result, result);
+      break;
+    }
+    case kArchStoreWithWriteBarrier: {
+      RecordWriteMode mode =
+          static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
+      Register object = i.InputRegister(0);
+      size_t index = 0;
+      Operand operand = i.MemoryOperand(&index);
+      Register value = i.InputRegister(index);
+      Register scratch0 = i.TempRegister(0);
+      Register scratch1 = i.TempRegister(1);
+      auto ool = zone()->New<OutOfLineRecordWrite>(this, object, operand, value,
+                                                   scratch0, scratch1, mode,
+                                                   DetermineStubCallMode());
+      __ StoreTaggedField(operand, value);
+      __ CheckPageFlag(object, scratch0,
+                       MemoryChunk::kPointersFromHereAreInterestingMask,
+                       not_zero, ool->entry());
+      __ bind(ool->exit());
+      break;
+    }
+    case kArchWordPoisonOnSpeculation:
+      DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
+      __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
+      break;
+    case kX64MFence:
+      __ mfence();
+      break;
+    case kX64LFence:
+      __ lfence();
+      break;
+    case kArchStackSlot: {
+      FrameOffset offset =
+          frame_access_state()->GetFrameOffset(i.InputInt32(0));
+      Register base = offset.from_stack_pointer() ? rsp : rbp;
+      __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
+      break;
+    }
+    case kIeee754Float64Acos:
+      ASSEMBLE_IEEE754_UNOP(acos);
+      break;
+    case kIeee754Float64Acosh:
+      ASSEMBLE_IEEE754_UNOP(acosh);
+      break;
+    case kIeee754Float64Asin:
+      ASSEMBLE_IEEE754_UNOP(asin);
+      break;
+    case kIeee754Float64Asinh:
+      ASSEMBLE_IEEE754_UNOP(asinh);
+      break;
+    case kIeee754Float64Atan:
+      ASSEMBLE_IEEE754_UNOP(atan);
+      break;
+    case kIeee754Float64Atanh:
+      ASSEMBLE_IEEE754_UNOP(atanh);
+      break;
+    case kIeee754Float64Atan2:
+      ASSEMBLE_IEEE754_BINOP(atan2);
+      break;
+    case kIeee754Float64Cbrt:
+      ASSEMBLE_IEEE754_UNOP(cbrt);
+      break;
+    case kIeee754Float64Cos:
+      ASSEMBLE_IEEE754_UNOP(cos);
+      break;
+    case kIeee754Float64Cosh:
+      ASSEMBLE_IEEE754_UNOP(cosh);
+      break;
+    case kIeee754Float64Exp:
+      ASSEMBLE_IEEE754_UNOP(exp);
+      break;
+    case kIeee754Float64Expm1:
+      ASSEMBLE_IEEE754_UNOP(expm1);
+      break;
+    case kIeee754Float64Log:
+      ASSEMBLE_IEEE754_UNOP(log);
+      break;
+    case kIeee754Float64Log1p:
+      ASSEMBLE_IEEE754_UNOP(log1p);
+      break;
+    case kIeee754Float64Log2:
+      ASSEMBLE_IEEE754_UNOP(log2);
+      break;
+    case kIeee754Float64Log10:
+      ASSEMBLE_IEEE754_UNOP(log10);
+      break;
+    case kIeee754Float64Pow:
+      ASSEMBLE_IEEE754_BINOP(pow);
+      break;
+    case kIeee754Float64Sin:
+      ASSEMBLE_IEEE754_UNOP(sin);
+      break;
+    case kIeee754Float64Sinh:
+      ASSEMBLE_IEEE754_UNOP(sinh);
+      break;
+    case kIeee754Float64Tan:
+      ASSEMBLE_IEEE754_UNOP(tan);
+      break;
+    case kIeee754Float64Tanh:
+      ASSEMBLE_IEEE754_UNOP(tanh);
+      break;
+    case kX64Add32:
+      ASSEMBLE_BINOP(addl);
+      break;
+    case kX64Add:
+      ASSEMBLE_BINOP(addq);
+      break;
+    case kX64Sub32:
+      ASSEMBLE_BINOP(subl);
+      break;
+    case kX64Sub:
+      ASSEMBLE_BINOP(subq);
+      break;
+    case kX64And32:
+      ASSEMBLE_BINOP(andl);
+      break;
+    case kX64And:
+      ASSEMBLE_BINOP(andq);
+      break;
+    case kX64Cmp8:
+      ASSEMBLE_COMPARE(cmpb);
+      break;
+    case kX64Cmp16:
+      ASSEMBLE_COMPARE(cmpw);
+      break;
+    case kX64Cmp32:
+      ASSEMBLE_COMPARE(cmpl);
+      break;
+    case kX64Cmp:
+      ASSEMBLE_COMPARE(cmpq);
+      break;
+    case kX64Test8:
+      ASSEMBLE_COMPARE(testb);
+      break;
+    case kX64Test16:
+      ASSEMBLE_COMPARE(testw);
+      break;
+    case kX64Test32:
+      ASSEMBLE_COMPARE(testl);
+      break;
+    case kX64Test:
+      ASSEMBLE_COMPARE(testq);
+      break;
+    case kX64Imul32:
+      ASSEMBLE_MULT(imull);
+      break;
+    case kX64Imul:
+      ASSEMBLE_MULT(imulq);
+      break;
+    case kX64ImulHigh32:
+      if (HasRegisterInput(instr, 1)) {
+        __ imull(i.InputRegister(1));
+      } else {
+        __ imull(i.InputOperand(1));
+      }
+      break;
+    case kX64UmulHigh32:
+      if (HasRegisterInput(instr, 1)) {
+        __ mull(i.InputRegister(1));
+      } else {
+        __ mull(i.InputOperand(1));
+      }
+      break;
+    case kX64Idiv32:
+      __ cdq();
+      __ idivl(i.InputRegister(1));
+      break;
+    case kX64Idiv:
+      __ cqo();
+      __ idivq(i.InputRegister(1));
+      break;
+    case kX64Udiv32:
+      __ xorl(rdx, rdx);
+      __ divl(i.InputRegister(1));
+      break;
+    case kX64Udiv:
+      __ xorq(rdx, rdx);
+      __ divq(i.InputRegister(1));
+      break;
+    case kX64Not:
+      ASSEMBLE_UNOP(notq);
+      break;
+    case kX64Not32:
+      ASSEMBLE_UNOP(notl);
+      break;
+    case kX64Neg:
+      ASSEMBLE_UNOP(negq);
+      break;
+    case kX64Neg32:
+      ASSEMBLE_UNOP(negl);
+      break;
+    case kX64Or32:
+      ASSEMBLE_BINOP(orl);
+      break;
+    case kX64Or:
+      ASSEMBLE_BINOP(orq);
+      break;
+    case kX64Xor32:
+      ASSEMBLE_BINOP(xorl);
+      break;
+    case kX64Xor:
+      ASSEMBLE_BINOP(xorq);
+      break;
+    case kX64Shl32:
+      ASSEMBLE_SHIFT(shll, 5);
+      break;
+    case kX64Shl:
+      ASSEMBLE_SHIFT(shlq, 6);
+      break;
+    case kX64Shr32:
+      ASSEMBLE_SHIFT(shrl, 5);
+      break;
+    case kX64Shr:
+      ASSEMBLE_SHIFT(shrq, 6);
+      break;
+    case kX64Sar32:
+      ASSEMBLE_SHIFT(sarl, 5);
+      break;
+    case kX64Sar:
+      ASSEMBLE_SHIFT(sarq, 6);
+      break;
+    case kX64Rol32:
+      ASSEMBLE_SHIFT(roll, 5);
+      break;
+    case kX64Rol:
+      ASSEMBLE_SHIFT(rolq, 6);
+      break;
+    case kX64Ror32:
+      ASSEMBLE_SHIFT(rorl, 5);
+      break;
+    case kX64Ror:
+      ASSEMBLE_SHIFT(rorq, 6);
+      break;
+    case kX64Lzcnt:
+      if (HasRegisterInput(instr, 0)) {
+        __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
+      } else {
+        __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
+      }
+      break;
+    case kX64Lzcnt32:
+      if (HasRegisterInput(instr, 0)) {
+        __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
+      } else {
+        __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
+      }
+      break;
+    case kX64Tzcnt:
+      if (HasRegisterInput(instr, 0)) {
+        __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
+      } else {
+        __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
+      }
+      break;
+    case kX64Tzcnt32:
+      if (HasRegisterInput(instr, 0)) {
+        __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
+      } else {
+        __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
+      }
+      break;
+    case kX64Popcnt:
+      if (HasRegisterInput(instr, 0)) {
+        __ Popcntq(i.OutputRegister(), i.InputRegister(0));
+      } else {
+        __ Popcntq(i.OutputRegister(), i.InputOperand(0));
+      }
+      break;
+    case kX64Popcnt32:
+      if (HasRegisterInput(instr, 0)) {
+        __ Popcntl(i.OutputRegister(), i.InputRegister(0));
+      } else {
+        __ Popcntl(i.OutputRegister(), i.InputOperand(0));
+      }
+      break;
+    case kX64Bswap:
+      __ bswapq(i.OutputRegister());
+      break;
+    case kX64Bswap32:
+      __ bswapl(i.OutputRegister());
+      break;
+    case kSSEFloat32Cmp:
+      ASSEMBLE_SSE_BINOP(Ucomiss);
+      break;
+    case kSSEFloat32Add:
+      ASSEMBLE_SSE_BINOP(addss);
+      break;
+    case kSSEFloat32Sub:
+      ASSEMBLE_SSE_BINOP(subss);
+      break;
+    case kSSEFloat32Mul:
+      ASSEMBLE_SSE_BINOP(mulss);
+      break;
+    case kSSEFloat32Div:
+      ASSEMBLE_SSE_BINOP(divss);
+      // Don't delete this mov. It may improve performance on some CPUs,
+      // when there is a (v)mulss depending on the result.
+      __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+      break;
+    case kSSEFloat32Abs: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+      __ Pcmpeqd(tmp, tmp);
+      __ Psrlq(tmp, 33);
+      __ Andps(i.OutputDoubleRegister(), tmp);
+      break;
+    }
+    case kSSEFloat32Neg: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+      __ Pcmpeqd(tmp, tmp);
+      __ Psllq(tmp, 31);
+      __ Xorps(i.OutputDoubleRegister(), tmp);
+      break;
+    }
+    case kSSEFloat32Sqrt:
+      ASSEMBLE_SSE_UNOP(sqrtss);
+      break;
+    case kSSEFloat32ToFloat64:
+      ASSEMBLE_SSE_UNOP(Cvtss2sd);
+      break;
+    case kSSEFloat32Round: {
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      RoundingMode const mode =
+          static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+      __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
+      break;
+    }
+    case kSSEFloat32ToInt32:
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
+      } else {
+        __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
+      }
+      break;
+    case kSSEFloat32ToUint32: {
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
+      } else {
+        __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
+      }
+      break;
+    }
+    case kSSEFloat64Cmp:
+      ASSEMBLE_SSE_BINOP(Ucomisd);
+      break;
+    case kSSEFloat64Add:
+      ASSEMBLE_SSE_BINOP(addsd);
+      break;
+    case kSSEFloat64Sub:
+      ASSEMBLE_SSE_BINOP(subsd);
+      break;
+    case kSSEFloat64Mul:
+      ASSEMBLE_SSE_BINOP(mulsd);
+      break;
+    case kSSEFloat64Div:
+      ASSEMBLE_SSE_BINOP(divsd);
+      // Don't delete this mov. It may improve performance on some CPUs,
+      // when there is a (v)mulsd depending on the result.
+      __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+      break;
+    case kSSEFloat64Mod: {
+      __ AllocateStackSpace(kDoubleSize);
+      unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                       kDoubleSize);
+      // Move values to st(0) and st(1).
+      __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
+      __ fld_d(Operand(rsp, 0));
+      __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
+      __ fld_d(Operand(rsp, 0));
+      // Loop while fprem isn't done.
+      Label mod_loop;
+      __ bind(&mod_loop);
+      // This instructions traps on all kinds inputs, but we are assuming the
+      // floating point control word is set to ignore them all.
+      __ fprem();
+      // The following 2 instruction implicitly use rax.
+      __ fnstsw_ax();
+      if (CpuFeatures::IsSupported(SAHF)) {
+        CpuFeatureScope sahf_scope(tasm(), SAHF);
+        __ sahf();
+      } else {
+        __ shrl(rax, Immediate(8));
+        __ andl(rax, Immediate(0xFF));
+        __ pushq(rax);
+        unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                         kSystemPointerSize);
+        __ popfq();
+        unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                         -kSystemPointerSize);
+      }
+      __ j(parity_even, &mod_loop);
+      // Move output to stack and clean up.
+      __ fstp(1);
+      __ fstp_d(Operand(rsp, 0));
+      __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
+      __ addq(rsp, Immediate(kDoubleSize));
+      unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                       -kDoubleSize);
+      break;
+    }
+    case kSSEFloat32Max: {
+      Label compare_swap, done_compare;
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      auto ool =
+          zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
+      __ j(parity_even, ool->entry());
+      __ j(above, &done_compare, Label::kNear);
+      __ j(below, &compare_swap, Label::kNear);
+      __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
+      __ testl(kScratchRegister, Immediate(1));
+      __ j(zero, &done_compare, Label::kNear);
+      __ bind(&compare_swap);
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      __ bind(&done_compare);
+      __ bind(ool->exit());
+      break;
+    }
+    case kSSEFloat32Min: {
+      Label compare_swap, done_compare;
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      auto ool =
+          zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
+      __ j(parity_even, ool->entry());
+      __ j(below, &done_compare, Label::kNear);
+      __ j(above, &compare_swap, Label::kNear);
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
+      } else {
+        __ Movss(kScratchDoubleReg, i.InputOperand(1));
+        __ Movmskps(kScratchRegister, kScratchDoubleReg);
+      }
+      __ testl(kScratchRegister, Immediate(1));
+      __ j(zero, &done_compare, Label::kNear);
+      __ bind(&compare_swap);
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      __ bind(&done_compare);
+      __ bind(ool->exit());
+      break;
+    }
+    case kSSEFloat64Max: {
+      Label compare_swap, done_compare;
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      auto ool =
+          zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
+      __ j(parity_even, ool->entry());
+      __ j(above, &done_compare, Label::kNear);
+      __ j(below, &compare_swap, Label::kNear);
+      __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
+      __ testl(kScratchRegister, Immediate(1));
+      __ j(zero, &done_compare, Label::kNear);
+      __ bind(&compare_swap);
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      __ bind(&done_compare);
+      __ bind(ool->exit());
+      break;
+    }
+    case kSSEFloat64Min: {
+      Label compare_swap, done_compare;
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      auto ool =
+          zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
+      __ j(parity_even, ool->entry());
+      __ j(below, &done_compare, Label::kNear);
+      __ j(above, &compare_swap, Label::kNear);
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
+      } else {
+        __ Movsd(kScratchDoubleReg, i.InputOperand(1));
+        __ Movmskpd(kScratchRegister, kScratchDoubleReg);
+      }
+      __ testl(kScratchRegister, Immediate(1));
+      __ j(zero, &done_compare, Label::kNear);
+      __ bind(&compare_swap);
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      __ bind(&done_compare);
+      __ bind(ool->exit());
+      break;
+    }
+    case kX64F64x2Abs:
+    case kSSEFloat64Abs: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+      __ Pcmpeqd(tmp, tmp);
+      __ Psrlq(tmp, 1);
+      __ Andpd(i.OutputDoubleRegister(), tmp);
+      break;
+    }
+    case kX64F64x2Neg:
+    case kSSEFloat64Neg: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+      __ Pcmpeqd(tmp, tmp);
+      __ Psllq(tmp, 63);
+      __ Xorpd(i.OutputDoubleRegister(), tmp);
+      break;
+    }
+    case kSSEFloat64Sqrt:
+      ASSEMBLE_SSE_UNOP(Sqrtsd);
+      break;
+    case kSSEFloat64Round: {
+      CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      RoundingMode const mode =
+          static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+      __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
+      break;
+    }
+    case kSSEFloat64ToFloat32:
+      ASSEMBLE_SSE_UNOP(Cvtsd2ss);
+      break;
+    case kSSEFloat64ToInt32:
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
+      } else {
+        __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
+      }
+      break;
+    case kSSEFloat64ToUint32: {
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
+      } else {
+        __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
+      }
+      if (MiscField::decode(instr->opcode())) {
+        __ AssertZeroExtended(i.OutputRegister());
+      }
+      break;
+    }
+    case kSSEFloat32ToInt64:
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
+      } else {
+        __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
+      }
+      if (instr->OutputCount() > 1) {
+        __ Set(i.OutputRegister(1), 1);
+        Label done;
+        Label fail;
+        __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
+        if (instr->InputAt(0)->IsFPRegister()) {
+          __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
+        } else {
+          __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
+        }
+        // If the input is NaN, then the conversion fails.
+        __ j(parity_even, &fail, Label::kNear);
+        // If the input is INT64_MIN, then the conversion succeeds.
+        __ j(equal, &done, Label::kNear);
+        __ cmpq(i.OutputRegister(0), Immediate(1));
+        // If the conversion results in INT64_MIN, but the input was not
+        // INT64_MIN, then the conversion fails.
+        __ j(no_overflow, &done, Label::kNear);
+        __ bind(&fail);
+        __ Set(i.OutputRegister(1), 0);
+        __ bind(&done);
+      }
+      break;
+    case kSSEFloat64ToInt64:
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
+      } else {
+        __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
+      }
+      if (instr->OutputCount() > 1) {
+        __ Set(i.OutputRegister(1), 1);
+        Label done;
+        Label fail;
+        __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
+        if (instr->InputAt(0)->IsFPRegister()) {
+          __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
+        } else {
+          __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
+        }
+        // If the input is NaN, then the conversion fails.
+        __ j(parity_even, &fail, Label::kNear);
+        // If the input is INT64_MIN, then the conversion succeeds.
+        __ j(equal, &done, Label::kNear);
+        __ cmpq(i.OutputRegister(0), Immediate(1));
+        // If the conversion results in INT64_MIN, but the input was not
+        // INT64_MIN, then the conversion fails.
+        __ j(no_overflow, &done, Label::kNear);
+        __ bind(&fail);
+        __ Set(i.OutputRegister(1), 0);
+        __ bind(&done);
+      }
+      break;
+    case kSSEFloat32ToUint64: {
+      Label fail;
+      if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
+      } else {
+        __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
+      }
+      if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
+      __ bind(&fail);
+      break;
+    }
+    case kSSEFloat64ToUint64: {
+      Label fail;
+      if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
+      } else {
+        __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
+      }
+      if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
+      __ bind(&fail);
+      break;
+    }
+    case kSSEInt32ToFloat64:
+      if (HasRegisterInput(instr, 0)) {
+        __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
+      } else {
+        __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
+      }
+      break;
+    case kSSEInt32ToFloat32:
+      if (HasRegisterInput(instr, 0)) {
+        __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
+      } else {
+        __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
+      }
+      break;
+    case kSSEInt64ToFloat32:
+      if (HasRegisterInput(instr, 0)) {
+        __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
+      } else {
+        __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
+      }
+      break;
+    case kSSEInt64ToFloat64:
+      if (HasRegisterInput(instr, 0)) {
+        __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
+      } else {
+        __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
+      }
+      break;
+    case kSSEUint64ToFloat32:
+      if (HasRegisterInput(instr, 0)) {
+        __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
+      } else {
+        __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
+      }
+      break;
+    case kSSEUint64ToFloat64:
+      if (HasRegisterInput(instr, 0)) {
+        __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
+      } else {
+        __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
+      }
+      break;
+    case kSSEUint32ToFloat64:
+      if (HasRegisterInput(instr, 0)) {
+        __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
+      } else {
+        __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
+      }
+      break;
+    case kSSEUint32ToFloat32:
+      if (HasRegisterInput(instr, 0)) {
+        __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
+      } else {
+        __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
+      }
+      break;
+    case kSSEFloat64ExtractLowWord32:
+      if (instr->InputAt(0)->IsFPStackSlot()) {
+        __ movl(i.OutputRegister(), i.InputOperand(0));
+      } else {
+        __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
+      }
+      break;
+    case kSSEFloat64ExtractHighWord32:
+      if (instr->InputAt(0)->IsFPStackSlot()) {
+        __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
+      } else {
+        __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
+      }
+      break;
+    case kSSEFloat64InsertLowWord32:
+      if (HasRegisterInput(instr, 1)) {
+        __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
+      } else {
+        __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
+      }
+      break;
+    case kSSEFloat64InsertHighWord32:
+      if (HasRegisterInput(instr, 1)) {
+        __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
+      } else {
+        __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
+      }
+      break;
+    case kSSEFloat64LoadLowWord32:
+      if (HasRegisterInput(instr, 0)) {
+        __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
+      } else {
+        __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
+      }
+      break;
+    case kAVXFloat32Cmp: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      break;
+    }
+    case kAVXFloat32Add:
+      ASSEMBLE_AVX_BINOP(vaddss);
+      break;
+    case kAVXFloat32Sub:
+      ASSEMBLE_AVX_BINOP(vsubss);
+      break;
+    case kAVXFloat32Mul:
+      ASSEMBLE_AVX_BINOP(vmulss);
+      break;
+    case kAVXFloat32Div:
+      ASSEMBLE_AVX_BINOP(vdivss);
+      // Don't delete this mov. It may improve performance on some CPUs,
+      // when there is a (v)mulss depending on the result.
+      __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+      break;
+    case kAVXFloat64Cmp: {
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      if (instr->InputAt(1)->IsFPRegister()) {
+        __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
+      } else {
+        __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
+      }
+      break;
+    }
+    case kAVXFloat64Add:
+      ASSEMBLE_AVX_BINOP(vaddsd);
+      break;
+    case kAVXFloat64Sub:
+      ASSEMBLE_AVX_BINOP(vsubsd);
+      break;
+    case kAVXFloat64Mul:
+      ASSEMBLE_AVX_BINOP(vmulsd);
+      break;
+    case kAVXFloat64Div:
+      ASSEMBLE_AVX_BINOP(vdivsd);
+      // Don't delete this mov. It may improve performance on some CPUs,
+      // when there is a (v)mulsd depending on the result.
+      __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
+      break;
+    case kAVXFloat32Abs: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+      __ vpcmpeqd(tmp, tmp, tmp);
+      __ vpsrlq(tmp, tmp, 33);
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ vandps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
+      } else {
+        __ vandps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
+      }
+      break;
+    }
+    case kAVXFloat32Neg: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+      __ vpcmpeqd(tmp, tmp, tmp);
+      __ vpsllq(tmp, tmp, 31);
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ vxorps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
+      } else {
+        __ vxorps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
+      }
+      break;
+    }
+    case kAVXFloat64Abs: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+      __ vpcmpeqd(tmp, tmp, tmp);
+      __ vpsrlq(tmp, tmp, 1);
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ vandpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
+      } else {
+        __ vandpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
+      }
+      break;
+    }
+    case kAVXFloat64Neg: {
+      // TODO(bmeurer): Use RIP relative 128-bit constants.
+      CpuFeatureScope avx_scope(tasm(), AVX);
+      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+      __ vpcmpeqd(tmp, tmp, tmp);
+      __ vpsllq(tmp, tmp, 63);
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
+      } else {
+        __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
+      }
+      break;
+    }
+    case kSSEFloat64SilenceNaN:
+      __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
+      __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
+      break;
+    case kX64Movsxbl:
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      ASSEMBLE_MOVX(movsxbl);
+      __ AssertZeroExtended(i.OutputRegister());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kX64Movzxbl:
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      ASSEMBLE_MOVX(movzxbl);
+      __ AssertZeroExtended(i.OutputRegister());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kX64Movsxbq:
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      ASSEMBLE_MOVX(movsxbq);
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kX64Movzxbq:
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      ASSEMBLE_MOVX(movzxbq);
+      __ AssertZeroExtended(i.OutputRegister());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kX64Movb: {
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      size_t index = 0;
+      Operand operand = i.MemoryOperand(&index);
+      if (HasImmediateInput(instr, index)) {
+        __ movb(operand, Immediate(i.InputInt8(index)));
+      } else {
+        __ movb(operand, i.InputRegister(index));
+      }
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    }
+    case kX64Movsxwl:
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      ASSEMBLE_MOVX(movsxwl);
+      __ AssertZeroExtended(i.OutputRegister());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kX64Movzxwl:
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      ASSEMBLE_MOVX(movzxwl);
+      __ AssertZeroExtended(i.OutputRegister());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kX64Movsxwq:
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      ASSEMBLE_MOVX(movsxwq);
+      break;
+    case kX64Movzxwq:
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      ASSEMBLE_MOVX(movzxwq);
+      __ AssertZeroExtended(i.OutputRegister());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kX64Movw: {
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      size_t index = 0;
+      Operand operand = i.MemoryOperand(&index);
+      if (HasImmediateInput(instr, index)) {
+        __ movw(operand, Immediate(i.InputInt16(index)));
+      } else {
+        __ movw(operand, i.InputRegister(index));
+      }
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    }
+    case kX64Movl:
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      if (instr->HasOutput()) {
+        if (HasAddressingMode(instr)) {
+          __ movl(i.OutputRegister(), i.MemoryOperand());
+        } else {
+          if (HasRegisterInput(instr, 0)) {
+            __ movl(i.OutputRegister(), i.InputRegister(0));
+          } else {
+            __ movl(i.OutputRegister(), i.InputOperand(0));
+          }
+        }
+        __ AssertZeroExtended(i.OutputRegister());
+      } else {
+        size_t index = 0;
+        Operand operand = i.MemoryOperand(&index);
+        if (HasImmediateInput(instr, index)) {
+          __ movl(operand, i.InputImmediate(index));
+        } else {
+          __ movl(operand, i.InputRegister(index));
+        }
+      }
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kX64Movsxlq:
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      ASSEMBLE_MOVX(movsxlq);
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kX64MovqDecompressTaggedSigned: {
+      CHECK(instr->HasOutput());
+      __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    }
+    case kX64MovqDecompressTaggedPointer: {
+      CHECK(instr->HasOutput());
+      __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    }
+    case kX64MovqDecompressAnyTagged: {
+      CHECK(instr->HasOutput());
+      __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    }
+    case kX64MovqCompressTagged: {
+      CHECK(!instr->HasOutput());
+      size_t index = 0;
+      Operand operand = i.MemoryOperand(&index);
+      if (HasImmediateInput(instr, index)) {
+        __ StoreTaggedField(operand, i.InputImmediate(index));
+      } else {
+        __ StoreTaggedField(operand, i.InputRegister(index));
+      }
+      break;
+    }
+    case kX64Movq:
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      if (instr->HasOutput()) {
+        __ movq(i.OutputRegister(), i.MemoryOperand());
+      } else {
+        size_t index = 0;
+        Operand operand = i.MemoryOperand(&index);
+        if (HasImmediateInput(instr, index)) {
+          __ movq(operand, i.InputImmediate(index));
+        } else {
+          __ movq(operand, i.InputRegister(index));
+        }
+      }
+      EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
+      break;
+    case kX64Movss:
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      if (instr->HasOutput()) {
+        __ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
+      } else {
+        size_t index = 0;
+        Operand operand = i.MemoryOperand(&index);
+        __ Movss(operand, i.InputDoubleRegister(index));
+      }
+      break;
+    case kX64Movsd: {
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      if (instr->HasOutput()) {
+        const MemoryAccessMode access_mode =
+            static_cast<MemoryAccessMode>(MiscField::decode(opcode));
+        if (access_mode == kMemoryAccessPoisoned) {
+          // If we have to poison the loaded value, we load into a general
+          // purpose register first, mask it with the poison, and move the
+          // value from the general purpose register into the double register.
+          __ movq(kScratchRegister, i.MemoryOperand());
+          __ andq(kScratchRegister, kSpeculationPoisonRegister);
+          __ Movq(i.OutputDoubleRegister(), kScratchRegister);
+        } else {
+          __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
+        }
+      } else {
+        size_t index = 0;
+        Operand operand = i.MemoryOperand(&index);
+        __ Movsd(operand, i.InputDoubleRegister(index));
+      }
+      break;
+    }
+    case kX64Movdqu: {
+      CpuFeatureScope sse_scope(tasm(), SSSE3);
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      if (instr->HasOutput()) {
+        __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
+      } else {
+        size_t index = 0;
+        Operand operand = i.MemoryOperand(&index);
+        __ Movdqu(operand, i.InputSimd128Register(index));
+      }
+      break;
+    }
+    case kX64BitcastFI:
+      if (instr->InputAt(0)->IsFPStackSlot()) {
+        __ movl(i.OutputRegister(), i.InputOperand(0));
+      } else {
+        __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
+      }
+      break;
+    case kX64BitcastDL:
+      if (instr->InputAt(0)->IsFPStackSlot()) {
+        __ movq(i.OutputRegister(), i.InputOperand(0));
+      } else {
+        __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
+      }
+      break;
+    case kX64BitcastIF:
+      if (HasRegisterInput(instr, 0)) {
+        __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
+      } else {
+        __ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
+      }
+      break;
+    case kX64BitcastLD:
+      if (HasRegisterInput(instr, 0)) {
+        __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
+      } else {
+        __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
+      }
+      break;
+    case kX64Lea32: {
+      AddressingMode mode = AddressingModeField::decode(instr->opcode());
+      // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
+      // and addressing mode just happens to work out. The "addl"/"subl" forms
+      // in these cases are faster based on measurements.
+      if (i.InputRegister(0) == i.OutputRegister()) {
+        if (mode == kMode_MRI) {
+          int32_t constant_summand = i.InputInt32(1);
+          DCHECK_NE(0, constant_summand);
+          if (constant_summand > 0) {
+            __ addl(i.OutputRegister(), Immediate(constant_summand));
+          } else {
+            __ subl(i.OutputRegister(),
+                    Immediate(base::NegateWithWraparound(constant_summand)));
+          }
+        } else if (mode == kMode_MR1) {
+          if (i.InputRegister(1) == i.OutputRegister()) {
+            __ shll(i.OutputRegister(), Immediate(1));
+          } else {
+            __ addl(i.OutputRegister(), i.InputRegister(1));
+          }
+        } else if (mode == kMode_M2) {
+          __ shll(i.OutputRegister(), Immediate(1));
+        } else if (mode == kMode_M4) {
+          __ shll(i.OutputRegister(), Immediate(2));
+        } else if (mode == kMode_M8) {
+          __ shll(i.OutputRegister(), Immediate(3));
+        } else {
+          __ leal(i.OutputRegister(), i.MemoryOperand());
+        }
+      } else if (mode == kMode_MR1 &&
+                 i.InputRegister(1) == i.OutputRegister()) {
+        __ addl(i.OutputRegister(), i.InputRegister(0));
+      } else {
+        __ leal(i.OutputRegister(), i.MemoryOperand());
+      }
+      __ AssertZeroExtended(i.OutputRegister());
+      break;
+    }
+    case kX64Lea: {
+      AddressingMode mode = AddressingModeField::decode(instr->opcode());
+      // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
+      // and addressing mode just happens to work out. The "addq"/"subq" forms
+      // in these cases are faster based on measurements.
+      if (i.InputRegister(0) == i.OutputRegister()) {
+        if (mode == kMode_MRI) {
+          int32_t constant_summand = i.InputInt32(1);
+          if (constant_summand > 0) {
+            __ addq(i.OutputRegister(), Immediate(constant_summand));
+          } else if (constant_summand < 0) {
+            __ subq(i.OutputRegister(), Immediate(-constant_summand));
+          }
+        } else if (mode == kMode_MR1) {
+          if (i.InputRegister(1) == i.OutputRegister()) {
+            __ shlq(i.OutputRegister(), Immediate(1));
+          } else {
+            __ addq(i.OutputRegister(), i.InputRegister(1));
+          }
+        } else if (mode == kMode_M2) {
+          __ shlq(i.OutputRegister(), Immediate(1));
+        } else if (mode == kMode_M4) {
+          __ shlq(i.OutputRegister(), Immediate(2));
+        } else if (mode == kMode_M8) {
+          __ shlq(i.OutputRegister(), Immediate(3));
+        } else {
+          __ leaq(i.OutputRegister(), i.MemoryOperand());
+        }
+      } else if (mode == kMode_MR1 &&
+                 i.InputRegister(1) == i.OutputRegister()) {
+        __ addq(i.OutputRegister(), i.InputRegister(0));
+      } else {
+        __ leaq(i.OutputRegister(), i.MemoryOperand());
+      }
+      break;
+    }
+    case kX64Dec32:
+      __ decl(i.OutputRegister());
+      break;
+    case kX64Inc32:
+      __ incl(i.OutputRegister());
+      break;
+    case kX64Push:
+      if (HasAddressingMode(instr)) {
+        size_t index = 0;
+        Operand operand = i.MemoryOperand(&index);
+        __ pushq(operand);
+        frame_access_state()->IncreaseSPDelta(1);
+        unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                         kSystemPointerSize);
+      } else if (HasImmediateInput(instr, 0)) {
+        __ pushq(i.InputImmediate(0));
+        frame_access_state()->IncreaseSPDelta(1);
+        unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                         kSystemPointerSize);
+      } else if (HasRegisterInput(instr, 0)) {
+        __ pushq(i.InputRegister(0));
+        frame_access_state()->IncreaseSPDelta(1);
+        unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                         kSystemPointerSize);
+      } else if (instr->InputAt(0)->IsFloatRegister() ||
+                 instr->InputAt(0)->IsDoubleRegister()) {
+        // TODO(titzer): use another machine instruction?
+        __ AllocateStackSpace(kDoubleSize);
+        frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
+        unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                         kDoubleSize);
+        __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
+      } else if (instr->InputAt(0)->IsSimd128Register()) {
+        // TODO(titzer): use another machine instruction?
+        __ AllocateStackSpace(kSimd128Size);
+        frame_access_state()->IncreaseSPDelta(kSimd128Size /
+                                              kSystemPointerSize);
+        unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                         kSimd128Size);
+        __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
+      } else if (instr->InputAt(0)->IsStackSlot() ||
+                 instr->InputAt(0)->IsFloatStackSlot() ||
+                 instr->InputAt(0)->IsDoubleStackSlot()) {
+        __ pushq(i.InputOperand(0));
+        frame_access_state()->IncreaseSPDelta(1);
+        unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                         kSystemPointerSize);
+      } else {
+        DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
+        __ Movups(kScratchDoubleReg, i.InputOperand(0));
+        // TODO(titzer): use another machine instruction?
+        __ AllocateStackSpace(kSimd128Size);
+        frame_access_state()->IncreaseSPDelta(kSimd128Size /
+                                              kSystemPointerSize);
+        unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                         kSimd128Size);
+        __ Movups(Operand(rsp, 0), kScratchDoubleReg);
+      }
+      break;
+    case kX64Poke: {
+      int slot = MiscField::decode(instr->opcode());
+      if (HasImmediateInput(instr, 0)) {
+        __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
+      } else if (instr->InputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          __ Movsd(Operand(rsp, slot * kSystemPointerSize),
+                   i.InputDoubleRegister(0));
+        } else {
+          DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
+          __ Movss(Operand(rsp, slot * kSystemPointerSize),
+                   i.InputFloatRegister(0));
+        }
+      } else {
+        __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
+      }
+      break;
+    }
+    case kX64Peek: {
+      int reverse_slot = i.InputInt32(0);
+      int offset =
+          FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
+      if (instr->OutputAt(0)->IsFPRegister()) {
+        LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
+        if (op->representation() == MachineRepresentation::kFloat64) {
+          __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
+        } else if (op->representation() == MachineRepresentation::kFloat32) {
+          __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
+        } else {
+          DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
+          __ Movdqu(i.OutputSimd128Register(), Operand(rbp, offset));
+        }
+      } else {
+        __ movq(i.OutputRegister(), Operand(rbp, offset));
+      }
+      break;
+    }
+    case kX64F64x2Splat: {
+      XMMRegister dst = i.OutputSimd128Register();
+      if (instr->InputAt(0)->IsFPRegister()) {
+        __ Movddup(dst, i.InputDoubleRegister(0));
+      } else {
+        __ Movddup(dst, i.InputOperand(0));
+      }
+      break;
+    }
+    case kX64F64x2ExtractLane: {
+      __ Pextrq(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
+      __ Movq(i.OutputDoubleRegister(), kScratchRegister);
+      break;
+    }
+    case kX64F64x2Sqrt: {
+      __ Sqrtpd(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kX64F64x2Add: {
+      ASSEMBLE_SIMD_BINOP(addpd);
+      break;
+    }
+    case kX64F64x2Sub: {
+      ASSEMBLE_SIMD_BINOP(subpd);
+      break;
+    }
+    case kX64F64x2Mul: {
+      ASSEMBLE_SIMD_BINOP(mulpd);
+      break;
+    }
+    case kX64F64x2Div: {
+      ASSEMBLE_SIMD_BINOP(divpd);
+      break;
+    }
+    case kX64F64x2Min: {
+      XMMRegister src1 = i.InputSimd128Register(1),
+                  dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      // The minpd instruction doesn't propagate NaNs and +0's in its first
+      // operand. Perform minpd in both orders, merge the resuls, and adjust.
+      __ Movapd(kScratchDoubleReg, src1);
+      __ Minpd(kScratchDoubleReg, dst);
+      __ Minpd(dst, src1);
+      // propagate -0's and NaNs, which may be non-canonical.
+      __ Orpd(kScratchDoubleReg, dst);
+      // Canonicalize NaNs by quieting and clearing the payload.
+      __ Cmppd(dst, kScratchDoubleReg, int8_t{3});
+      __ Orpd(kScratchDoubleReg, dst);
+      __ Psrlq(dst, 13);
+      __ Andnpd(dst, kScratchDoubleReg);
+      break;
+    }
+    case kX64F64x2Max: {
+      XMMRegister src1 = i.InputSimd128Register(1),
+                  dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      // The maxpd instruction doesn't propagate NaNs and +0's in its first
+      // operand. Perform maxpd in both orders, merge the resuls, and adjust.
+      __ Movapd(kScratchDoubleReg, src1);
+      __ Maxpd(kScratchDoubleReg, dst);
+      __ Maxpd(dst, src1);
+      // Find discrepancies.
+      __ Xorpd(dst, kScratchDoubleReg);
+      // Propagate NaNs, which may be non-canonical.
+      __ Orpd(kScratchDoubleReg, dst);
+      // Propagate sign discrepancy and (subtle) quiet NaNs.
+      __ Subpd(kScratchDoubleReg, dst);
+      // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
+      __ Cmppd(dst, kScratchDoubleReg, int8_t{3});
+      __ Psrlq(dst, 13);
+      __ Andnpd(dst, kScratchDoubleReg);
+      break;
+    }
+    case kX64F64x2Eq: {
+      ASSEMBLE_SIMD_BINOP(cmpeqpd);
+      break;
+    }
+    case kX64F64x2Ne: {
+      ASSEMBLE_SIMD_BINOP(cmpneqpd);
+      break;
+    }
+    case kX64F64x2Lt: {
+      ASSEMBLE_SIMD_BINOP(cmpltpd);
+      break;
+    }
+    case kX64F64x2Le: {
+      ASSEMBLE_SIMD_BINOP(cmplepd);
+      break;
+    }
+    case kX64F64x2Qfma: {
+      if (CpuFeatures::IsSupported(FMA3)) {
+        CpuFeatureScope fma3_scope(tasm(), FMA3);
+        __ vfmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                       i.InputSimd128Register(2));
+      } else {
+        XMMRegister tmp = i.TempSimd128Register(0);
+        __ Movapd(tmp, i.InputSimd128Register(2));
+        __ Mulpd(tmp, i.InputSimd128Register(1));
+        __ Addpd(i.OutputSimd128Register(), tmp);
+      }
+      break;
+    }
+    case kX64F64x2Qfms: {
+      if (CpuFeatures::IsSupported(FMA3)) {
+        CpuFeatureScope fma3_scope(tasm(), FMA3);
+        __ vfnmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                        i.InputSimd128Register(2));
+      } else {
+        XMMRegister tmp = i.TempSimd128Register(0);
+        __ Movapd(tmp, i.InputSimd128Register(2));
+        __ Mulpd(tmp, i.InputSimd128Register(1));
+        __ Subpd(i.OutputSimd128Register(), tmp);
+      }
+      break;
+    }
+    case kX64F32x4Splat: {
+      __ Shufps(i.OutputSimd128Register(), i.InputDoubleRegister(0), 0);
+      break;
+    }
+    case kX64F32x4ExtractLane: {
+      if (CpuFeatures::IsSupported(AVX)) {
+        CpuFeatureScope avx_scope(tasm(), AVX);
+        XMMRegister src = i.InputSimd128Register(0);
+        // vshufps and leave junk in the 3 high lanes.
+        __ vshufps(i.OutputDoubleRegister(), src, src, i.InputInt8(1));
+      } else {
+        __ extractps(kScratchRegister, i.InputSimd128Register(0),
+                     i.InputUint8(1));
+        __ movd(i.OutputDoubleRegister(), kScratchRegister);
+      }
+      break;
+    }
+    case kX64F32x4ReplaceLane: {
+      // The insertps instruction uses imm8[5:4] to indicate the lane
+      // that needs to be replaced.
+      byte select = i.InputInt8(1) << 4 & 0x30;
+      if (instr->InputAt(2)->IsFPRegister()) {
+        __ Insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
+                    select);
+      } else {
+        __ Insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
+      }
+      break;
+    }
+    case kX64F32x4SConvertI32x4: {
+      __ Cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kX64F32x4UConvertI32x4: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Pxor(kScratchDoubleReg, kScratchDoubleReg);  // zeros
+      __ Pblendw(kScratchDoubleReg, dst, uint8_t{0x55});  // get lo 16 bits
+      __ Psubd(dst, kScratchDoubleReg);                   // get hi 16 bits
+      __ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // convert lo exactly
+      __ Psrld(dst, byte{1});            // divide by 2 to get in unsigned range
+      __ Cvtdq2ps(dst, dst);             // convert hi exactly
+      __ Addps(dst, dst);                // double hi, exactly
+      __ Addps(dst, kScratchDoubleReg);  // add hi and lo, may round.
+      break;
+    }
+    case kX64F32x4Abs: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(0);
+      if (dst == src) {
+        __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+        __ Psrld(kScratchDoubleReg, byte{1});
+        __ Andps(i.OutputSimd128Register(), kScratchDoubleReg);
+      } else {
+        __ Pcmpeqd(dst, dst);
+        __ Psrld(dst, byte{1});
+        __ Andps(dst, i.InputSimd128Register(0));
+      }
+      break;
+    }
+    case kX64F32x4Neg: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(0);
+      if (dst == src) {
+        __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+        __ Pslld(kScratchDoubleReg, byte{31});
+        __ Xorps(i.OutputSimd128Register(), kScratchDoubleReg);
+      } else {
+        __ Pcmpeqd(dst, dst);
+        __ Pslld(dst, byte{31});
+        __ Xorps(dst, i.InputSimd128Register(0));
+      }
+      break;
+    }
+    case kX64F32x4Sqrt: {
+      __ Sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kX64F32x4RecipApprox: {
+      __ Rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kX64F32x4RecipSqrtApprox: {
+      __ Rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kX64F32x4Add: {
+      ASSEMBLE_SIMD_BINOP(addps);
+      break;
+    }
+    case kX64F32x4AddHoriz: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ Haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
+      break;
+    }
+    case kX64F32x4Sub: {
+      ASSEMBLE_SIMD_BINOP(subps);
+      break;
+    }
+    case kX64F32x4Mul: {
+      ASSEMBLE_SIMD_BINOP(mulps);
+      break;
+    }
+    case kX64F32x4Div: {
+      ASSEMBLE_SIMD_BINOP(divps);
+      break;
+    }
+    case kX64F32x4Min: {
+      XMMRegister src1 = i.InputSimd128Register(1),
+                  dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      // The minps instruction doesn't propagate NaNs and +0's in its first
+      // operand. Perform minps in both orders, merge the resuls, and adjust.
+      __ Movaps(kScratchDoubleReg, src1);
+      __ Minps(kScratchDoubleReg, dst);
+      __ Minps(dst, src1);
+      // propagate -0's and NaNs, which may be non-canonical.
+      __ Orps(kScratchDoubleReg, dst);
+      // Canonicalize NaNs by quieting and clearing the payload.
+      __ Cmpps(dst, kScratchDoubleReg, int8_t{3});
+      __ Orps(kScratchDoubleReg, dst);
+      __ Psrld(dst, byte{10});
+      __ Andnps(dst, kScratchDoubleReg);
+      break;
+    }
+    case kX64F32x4Max: {
+      XMMRegister src1 = i.InputSimd128Register(1),
+                  dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      // The maxps instruction doesn't propagate NaNs and +0's in its first
+      // operand. Perform maxps in both orders, merge the resuls, and adjust.
+      __ Movaps(kScratchDoubleReg, src1);
+      __ Maxps(kScratchDoubleReg, dst);
+      __ Maxps(dst, src1);
+      // Find discrepancies.
+      __ Xorps(dst, kScratchDoubleReg);
+      // Propagate NaNs, which may be non-canonical.
+      __ Orps(kScratchDoubleReg, dst);
+      // Propagate sign discrepancy and (subtle) quiet NaNs.
+      __ Subps(kScratchDoubleReg, dst);
+      // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
+      __ Cmpps(dst, kScratchDoubleReg, int8_t{3});
+      __ Psrld(dst, byte{10});
+      __ Andnps(dst, kScratchDoubleReg);
+      break;
+    }
+    case kX64F32x4Eq: {
+      ASSEMBLE_SIMD_BINOP(cmpeqps);
+      break;
+    }
+    case kX64F32x4Ne: {
+      ASSEMBLE_SIMD_BINOP(cmpneqps);
+      break;
+    }
+    case kX64F32x4Lt: {
+      ASSEMBLE_SIMD_BINOP(cmpltps);
+      break;
+    }
+    case kX64F32x4Le: {
+      ASSEMBLE_SIMD_BINOP(cmpleps);
+      break;
+    }
+    case kX64F32x4Qfma: {
+      if (CpuFeatures::IsSupported(FMA3)) {
+        CpuFeatureScope fma3_scope(tasm(), FMA3);
+        __ vfmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                       i.InputSimd128Register(2));
+      } else {
+        XMMRegister tmp = i.TempSimd128Register(0);
+        __ Movaps(tmp, i.InputSimd128Register(2));
+        __ Mulps(tmp, i.InputSimd128Register(1));
+        __ Addps(i.OutputSimd128Register(), tmp);
+      }
+      break;
+    }
+    case kX64F32x4Qfms: {
+      if (CpuFeatures::IsSupported(FMA3)) {
+        CpuFeatureScope fma3_scope(tasm(), FMA3);
+        __ vfnmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1),
+                        i.InputSimd128Register(2));
+      } else {
+        XMMRegister tmp = i.TempSimd128Register(0);
+        __ Movaps(tmp, i.InputSimd128Register(2));
+        __ Mulps(tmp, i.InputSimd128Register(1));
+        __ Subps(i.OutputSimd128Register(), tmp);
+      }
+      break;
+    }
+    case kX64F32x4Pmin: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      __ Minps(dst, i.InputSimd128Register(1));
+      break;
+    }
+    case kX64F32x4Pmax: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      __ Maxps(dst, i.InputSimd128Register(1));
+      break;
+    }
+    case kX64F32x4Round: {
+      RoundingMode const mode =
+          static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+      __ Roundps(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
+      break;
+    }
+    case kX64F64x2Round: {
+      RoundingMode const mode =
+          static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
+      __ Roundpd(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
+      break;
+    }
+    case kX64F64x2Pmin: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      __ Minpd(dst, i.InputSimd128Register(1));
+      break;
+    }
+    case kX64F64x2Pmax: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      __ Maxpd(dst, i.InputSimd128Register(1));
+      break;
+    }
+    case kX64I64x2Splat: {
+      XMMRegister dst = i.OutputSimd128Register();
+      if (HasRegisterInput(instr, 0)) {
+        __ Movq(dst, i.InputRegister(0));
+      } else {
+        __ Movq(dst, i.InputOperand(0));
+      }
+      __ Movddup(dst, dst);
+      break;
+    }
+    case kX64I64x2ExtractLane: {
+      __ Pextrq(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
+      break;
+    }
+    case kX64I64x2Neg: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(0);
+      if (dst == src) {
+        __ Movapd(kScratchDoubleReg, src);
+        src = kScratchDoubleReg;
+      }
+      __ Pxor(dst, dst);
+      __ Psubq(dst, src);
+      break;
+    }
+    case kX64I64x2BitMask: {
+      __ Movmskpd(i.OutputRegister(), i.InputSimd128Register(0));
+      break;
+    }
+    case kX64I64x2Shl: {
+      // Take shift value modulo 2^6.
+      ASSEMBLE_SIMD_SHIFT(psllq, 6);
+      break;
+    }
+    case kX64I64x2ShrS: {
+      // TODO(zhin): there is vpsraq but requires AVX512
+      // ShrS on each quadword one at a time
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(0);
+      Register tmp = i.ToRegister(instr->TempAt(0));
+      // Modulo 64 not required as sarq_cl will mask cl to 6 bits.
+
+      // lower quadword
+      __ Pextrq(tmp, src, int8_t{0x0});
+      __ sarq_cl(tmp);
+      __ Pinsrq(dst, tmp, uint8_t{0x0});
+
+      // upper quadword
+      __ Pextrq(tmp, src, int8_t{0x1});
+      __ sarq_cl(tmp);
+      __ Pinsrq(dst, tmp, uint8_t{0x1});
+      break;
+    }
+    case kX64I64x2Add: {
+      ASSEMBLE_SIMD_BINOP(paddq);
+      break;
+    }
+    case kX64I64x2Sub: {
+      ASSEMBLE_SIMD_BINOP(psubq);
+      break;
+    }
+    case kX64I64x2Mul: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      XMMRegister left = i.InputSimd128Register(0);
+      XMMRegister right = i.InputSimd128Register(1);
+      XMMRegister tmp1 = i.TempSimd128Register(0);
+      XMMRegister tmp2 = i.TempSimd128Register(1);
+
+      __ Movaps(tmp1, left);
+      __ Movaps(tmp2, right);
+
+      // Multiply high dword of each qword of left with right.
+      __ Psrlq(tmp1, 32);
+      __ Pmuludq(tmp1, right);
+
+      // Multiply high dword of each qword of right with left.
+      __ Psrlq(tmp2, 32);
+      __ Pmuludq(tmp2, left);
+
+      __ Paddq(tmp2, tmp1);
+      __ Psllq(tmp2, 32);
+
+      __ Pmuludq(left, right);
+      __ Paddq(left, tmp2);  // left == dst
+      break;
+    }
+    case kX64I64x2Eq: {
+      ASSEMBLE_SIMD_BINOP(pcmpeqq);
+      break;
+    }
+    case kX64I64x2ShrU: {
+      // Take shift value modulo 2^6.
+      ASSEMBLE_SIMD_SHIFT(psrlq, 6);
+      break;
+    }
+    case kX64I32x4Splat: {
+      XMMRegister dst = i.OutputSimd128Register();
+      if (HasRegisterInput(instr, 0)) {
+        __ Movd(dst, i.InputRegister(0));
+      } else {
+        __ Movd(dst, i.InputOperand(0));
+      }
+      __ Pshufd(dst, dst, uint8_t{0x0});
+      break;
+    }
+    case kX64I32x4ExtractLane: {
+      __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
+      break;
+    }
+    case kX64I32x4SConvertF32x4: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister tmp = i.TempSimd128Register(0);
+      // NAN->0
+      __ Movaps(tmp, dst);
+      __ Cmpeqps(tmp, tmp);
+      __ Pand(dst, tmp);
+      // Set top bit if >= 0 (but not -0.0!)
+      __ Pxor(tmp, dst);
+      // Convert
+      __ Cvttps2dq(dst, dst);
+      // Set top bit if >=0 is now < 0
+      __ Pand(tmp, dst);
+      __ Psrad(tmp, byte{31});
+      // Set positive overflow lanes to 0x7FFFFFFF
+      __ Pxor(dst, tmp);
+      break;
+    }
+    case kX64I32x4SConvertI16x8Low: {
+      __ Pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kX64I32x4SConvertI16x8High: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Palignr(dst, i.InputSimd128Register(0), uint8_t{8});
+      __ Pmovsxwd(dst, dst);
+      break;
+    }
+    case kX64I32x4Neg: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(0);
+      if (dst == src) {
+        __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+        __ Psignd(dst, kScratchDoubleReg);
+      } else {
+        __ Pxor(dst, dst);
+        __ Psubd(dst, src);
+      }
+      break;
+    }
+    case kX64I32x4Shl: {
+      // Take shift value modulo 2^5.
+      ASSEMBLE_SIMD_SHIFT(pslld, 5);
+      break;
+    }
+    case kX64I32x4ShrS: {
+      // Take shift value modulo 2^5.
+      ASSEMBLE_SIMD_SHIFT(psrad, 5);
+      break;
+    }
+    case kX64I32x4Add: {
+      ASSEMBLE_SIMD_BINOP(paddd);
+      break;
+    }
+    case kX64I32x4AddHoriz: {
+      ASSEMBLE_SIMD_BINOP(phaddd);
+      break;
+    }
+    case kX64I32x4Sub: {
+      ASSEMBLE_SIMD_BINOP(psubd);
+      break;
+    }
+    case kX64I32x4Mul: {
+      ASSEMBLE_SIMD_BINOP(pmulld);
+      break;
+    }
+    case kX64I32x4MinS: {
+      ASSEMBLE_SIMD_BINOP(pminsd);
+      break;
+    }
+    case kX64I32x4MaxS: {
+      ASSEMBLE_SIMD_BINOP(pmaxsd);
+      break;
+    }
+    case kX64I32x4Eq: {
+      ASSEMBLE_SIMD_BINOP(pcmpeqd);
+      break;
+    }
+    case kX64I32x4Ne: {
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
+      __ Pcmpeqd(tmp, tmp);
+      __ Pxor(i.OutputSimd128Register(), tmp);
+      break;
+    }
+    case kX64I32x4GtS: {
+      ASSEMBLE_SIMD_BINOP(pcmpgtd);
+      break;
+    }
+    case kX64I32x4GeS: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(1);
+      __ Pminsd(dst, src);
+      __ Pcmpeqd(dst, src);
+      break;
+    }
+    case kX64I32x4UConvertF32x4: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister tmp = i.TempSimd128Register(0);
+      XMMRegister tmp2 = i.TempSimd128Register(1);
+      // NAN->0, negative->0
+      __ Pxor(tmp2, tmp2);
+      __ Maxps(dst, tmp2);
+      // scratch: float representation of max_signed
+      __ Pcmpeqd(tmp2, tmp2);
+      __ Psrld(tmp2, uint8_t{1});  // 0x7fffffff
+      __ Cvtdq2ps(tmp2, tmp2);     // 0x4f000000
+      // tmp: convert (src-max_signed).
+      // Positive overflow lanes -> 0x7FFFFFFF
+      // Negative lanes -> 0
+      __ Movaps(tmp, dst);
+      __ Subps(tmp, tmp2);
+      __ Cmpleps(tmp2, tmp);
+      __ Cvttps2dq(tmp, tmp);
+      __ Pxor(tmp, tmp2);
+      __ Pxor(tmp2, tmp2);
+      __ Pmaxsd(tmp, tmp2);
+      // convert. Overflow lanes above max_signed will be 0x80000000
+      __ Cvttps2dq(dst, dst);
+      // Add (src-max_signed) for overflow lanes.
+      __ Paddd(dst, tmp);
+      break;
+    }
+    case kX64I32x4UConvertI16x8Low: {
+      __ Pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kX64I32x4UConvertI16x8High: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Palignr(dst, i.InputSimd128Register(0), uint8_t{8});
+      __ Pmovzxwd(dst, dst);
+      break;
+    }
+    case kX64I32x4ShrU: {
+      // Take shift value modulo 2^5.
+      ASSEMBLE_SIMD_SHIFT(psrld, 5);
+      break;
+    }
+    case kX64I32x4MinU: {
+      ASSEMBLE_SIMD_BINOP(pminud);
+      break;
+    }
+    case kX64I32x4MaxU: {
+      ASSEMBLE_SIMD_BINOP(pmaxud);
+      break;
+    }
+    case kX64I32x4GtU: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(1);
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ Pmaxud(dst, src);
+      __ Pcmpeqd(dst, src);
+      __ Pcmpeqd(tmp, tmp);
+      __ Pxor(dst, tmp);
+      break;
+    }
+    case kX64I32x4GeU: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(1);
+      __ Pminud(dst, src);
+      __ Pcmpeqd(dst, src);
+      break;
+    }
+    case kX64I32x4Abs: {
+      __ Pabsd(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kX64I32x4BitMask: {
+      __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
+      break;
+    }
+    case kX64I32x4DotI16x8S: {
+      ASSEMBLE_SIMD_BINOP(pmaddwd);
+      break;
+    }
+    case kX64S128Const: {
+      // Emit code for generic constants as all zeros, or ones cases will be
+      // handled separately by the selector.
+      XMMRegister dst = i.OutputSimd128Register();
+      uint32_t imm[4] = {};
+      for (int j = 0; j < 4; j++) {
+        imm[j] = i.InputUint32(j);
+      }
+      SetupSimdImmediateInRegister(tasm(), imm, dst);
+      break;
+    }
+    case kX64S128Zero: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Pxor(dst, dst);
+      break;
+    }
+    case kX64S128AllOnes: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Pcmpeqd(dst, dst);
+      break;
+    }
+    case kX64I16x8Splat: {
+      XMMRegister dst = i.OutputSimd128Register();
+      if (HasRegisterInput(instr, 0)) {
+        __ Movd(dst, i.InputRegister(0));
+      } else {
+        __ Movd(dst, i.InputOperand(0));
+      }
+      __ Pshuflw(dst, dst, uint8_t{0x0});
+      __ Pshufd(dst, dst, uint8_t{0x0});
+      break;
+    }
+    case kX64I16x8ExtractLaneS: {
+      Register dst = i.OutputRegister();
+      __ Pextrw(dst, i.InputSimd128Register(0), i.InputUint8(1));
+      __ movsxwl(dst, dst);
+      break;
+    }
+    case kX64I16x8SConvertI8x16Low: {
+      __ Pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kX64I16x8SConvertI8x16High: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Palignr(dst, i.InputSimd128Register(0), uint8_t{8});
+      __ Pmovsxbw(dst, dst);
+      break;
+    }
+    case kX64I16x8Neg: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(0);
+      if (dst == src) {
+        __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+        __ Psignw(dst, kScratchDoubleReg);
+      } else {
+        __ Pxor(dst, dst);
+        __ Psubw(dst, src);
+      }
+      break;
+    }
+    case kX64I16x8Shl: {
+      // Take shift value modulo 2^4.
+      ASSEMBLE_SIMD_SHIFT(psllw, 4);
+      break;
+    }
+    case kX64I16x8ShrS: {
+      // Take shift value modulo 2^4.
+      ASSEMBLE_SIMD_SHIFT(psraw, 4);
+      break;
+    }
+    case kX64I16x8SConvertI32x4: {
+      ASSEMBLE_SIMD_BINOP(packssdw);
+      break;
+    }
+    case kX64I16x8Add: {
+      ASSEMBLE_SIMD_BINOP(paddw);
+      break;
+    }
+    case kX64I16x8AddSatS: {
+      ASSEMBLE_SIMD_BINOP(paddsw);
+      break;
+    }
+    case kX64I16x8AddHoriz: {
+      ASSEMBLE_SIMD_BINOP(phaddw);
+      break;
+    }
+    case kX64I16x8Sub: {
+      ASSEMBLE_SIMD_BINOP(psubw);
+      break;
+    }
+    case kX64I16x8SubSatS: {
+      ASSEMBLE_SIMD_BINOP(psubsw);
+      break;
+    }
+    case kX64I16x8Mul: {
+      ASSEMBLE_SIMD_BINOP(pmullw);
+      break;
+    }
+    case kX64I16x8MinS: {
+      ASSEMBLE_SIMD_BINOP(pminsw);
+      break;
+    }
+    case kX64I16x8MaxS: {
+      ASSEMBLE_SIMD_BINOP(pmaxsw);
+      break;
+    }
+    case kX64I16x8Eq: {
+      ASSEMBLE_SIMD_BINOP(pcmpeqw);
+      break;
+    }
+    case kX64I16x8Ne: {
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ Pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
+      __ Pcmpeqw(tmp, tmp);
+      __ Pxor(i.OutputSimd128Register(), tmp);
+      break;
+    }
+    case kX64I16x8GtS: {
+      ASSEMBLE_SIMD_BINOP(pcmpgtw);
+      break;
+    }
+    case kX64I16x8GeS: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(1);
+      __ Pminsw(dst, src);
+      __ Pcmpeqw(dst, src);
+      break;
+    }
+    case kX64I16x8UConvertI8x16Low: {
+      __ Pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kX64I16x8UConvertI8x16High: {
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Palignr(dst, i.InputSimd128Register(0), uint8_t{8});
+      __ Pmovzxbw(dst, dst);
+      break;
+    }
+    case kX64I16x8ShrU: {
+      // Take shift value modulo 2^4.
+      ASSEMBLE_SIMD_SHIFT(psrlw, 4);
+      break;
+    }
+    case kX64I16x8UConvertI32x4: {
+      ASSEMBLE_SIMD_BINOP(packusdw);
+      break;
+    }
+    case kX64I16x8AddSatU: {
+      ASSEMBLE_SIMD_BINOP(paddusw);
+      break;
+    }
+    case kX64I16x8SubSatU: {
+      ASSEMBLE_SIMD_BINOP(psubusw);
+      break;
+    }
+    case kX64I16x8MinU: {
+      ASSEMBLE_SIMD_BINOP(pminuw);
+      break;
+    }
+    case kX64I16x8MaxU: {
+      ASSEMBLE_SIMD_BINOP(pmaxuw);
+      break;
+    }
+    case kX64I16x8GtU: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(1);
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ Pmaxuw(dst, src);
+      __ Pcmpeqw(dst, src);
+      __ Pcmpeqw(tmp, tmp);
+      __ Pxor(dst, tmp);
+      break;
+    }
+    case kX64I16x8GeU: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(1);
+      __ Pminuw(dst, src);
+      __ Pcmpeqw(dst, src);
+      break;
+    }
+    case kX64I16x8RoundingAverageU: {
+      ASSEMBLE_SIMD_BINOP(pavgw);
+      break;
+    }
+    case kX64I16x8Abs: {
+      __ Pabsw(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kX64I16x8BitMask: {
+      Register dst = i.OutputRegister();
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ Packsswb(tmp, i.InputSimd128Register(0));
+      __ Pmovmskb(dst, tmp);
+      __ shrq(dst, Immediate(8));
+      break;
+    }
+    case kX64I8x16Splat: {
+      XMMRegister dst = i.OutputSimd128Register();
+      if (HasRegisterInput(instr, 0)) {
+        __ Movd(dst, i.InputRegister(0));
+      } else {
+        __ Movd(dst, i.InputOperand(0));
+      }
+      __ Xorps(kScratchDoubleReg, kScratchDoubleReg);
+      __ Pshufb(dst, kScratchDoubleReg);
+      break;
+    }
+    case kX64Pextrb: {
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      size_t index = 0;
+      if (HasAddressingMode(instr)) {
+        Operand operand = i.MemoryOperand(&index);
+        __ Pextrb(operand, i.InputSimd128Register(index),
+                  i.InputUint8(index + 1));
+      } else {
+        __ Pextrb(i.OutputRegister(), i.InputSimd128Register(0),
+                  i.InputUint8(1));
+      }
+      break;
+    }
+    case kX64Pextrw: {
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      size_t index = 0;
+      if (HasAddressingMode(instr)) {
+        Operand operand = i.MemoryOperand(&index);
+        __ Pextrw(operand, i.InputSimd128Register(index),
+                  i.InputUint8(index + 1));
+      } else {
+        __ Pextrw(i.OutputRegister(), i.InputSimd128Register(0),
+                  i.InputUint8(1));
+      }
+      break;
+    }
+    case kX64I8x16ExtractLaneS: {
+      Register dst = i.OutputRegister();
+      __ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
+      __ movsxbl(dst, dst);
+      break;
+    }
+    case kX64Pinsrb: {
+      ASSEMBLE_PINSR(Pinsrb);
+      break;
+    }
+    case kX64Pinsrw: {
+      ASSEMBLE_PINSR(Pinsrw);
+      break;
+    }
+    case kX64Pinsrd: {
+      ASSEMBLE_PINSR(Pinsrd);
+      break;
+    }
+    case kX64Pinsrq: {
+      ASSEMBLE_PINSR(Pinsrq);
+      break;
+    }
+    case kX64I8x16SConvertI16x8: {
+      ASSEMBLE_SIMD_BINOP(packsswb);
+      break;
+    }
+    case kX64I8x16Neg: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(0);
+      if (dst == src) {
+        __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
+        __ Psignb(dst, kScratchDoubleReg);
+      } else {
+        __ Pxor(dst, dst);
+        __ Psubb(dst, src);
+      }
+      break;
+    }
+    case kX64I8x16Shl: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      // Temp registers for shift mask and additional moves to XMM registers.
+      Register tmp = i.ToRegister(instr->TempAt(0));
+      XMMRegister tmp_simd = i.TempSimd128Register(1);
+      if (HasImmediateInput(instr, 1)) {
+        // Perform 16-bit shift, then mask away low bits.
+        uint8_t shift = i.InputInt3(1);
+        __ Psllw(dst, byte{shift});
+
+        uint8_t bmask = static_cast<uint8_t>(0xff << shift);
+        uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
+        __ movl(tmp, Immediate(mask));
+        __ Movd(tmp_simd, tmp);
+        __ Pshufd(tmp_simd, tmp_simd, uint8_t{0});
+        __ Pand(dst, tmp_simd);
+      } else {
+        // Mask off the unwanted bits before word-shifting.
+        __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
+        // Take shift value modulo 8.
+        __ movq(tmp, i.InputRegister(1));
+        __ andq(tmp, Immediate(7));
+        __ addq(tmp, Immediate(8));
+        __ Movq(tmp_simd, tmp);
+        __ Psrlw(kScratchDoubleReg, tmp_simd);
+        __ Packuswb(kScratchDoubleReg, kScratchDoubleReg);
+        __ Pand(dst, kScratchDoubleReg);
+        // TODO(zhin): subq here to avoid asking for another temporary register,
+        // examine codegen for other i8x16 shifts, they use less instructions.
+        __ subq(tmp, Immediate(8));
+        __ Movq(tmp_simd, tmp);
+        __ Psllw(dst, tmp_simd);
+      }
+      break;
+    }
+    case kX64I8x16ShrS: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      if (HasImmediateInput(instr, 1)) {
+        __ Punpckhbw(kScratchDoubleReg, dst);
+        __ Punpcklbw(dst, dst);
+        uint8_t shift = i.InputInt3(1) + 8;
+        __ Psraw(kScratchDoubleReg, shift);
+        __ Psraw(dst, shift);
+        __ Packsswb(dst, kScratchDoubleReg);
+      } else {
+        // Temp registers for shift mask andadditional moves to XMM registers.
+        Register tmp = i.ToRegister(instr->TempAt(0));
+        XMMRegister tmp_simd = i.TempSimd128Register(1);
+        // Unpack the bytes into words, do arithmetic shifts, and repack.
+        __ Punpckhbw(kScratchDoubleReg, dst);
+        __ Punpcklbw(dst, dst);
+        // Prepare shift value
+        __ movq(tmp, i.InputRegister(1));
+        // Take shift value modulo 8.
+        __ andq(tmp, Immediate(7));
+        __ addq(tmp, Immediate(8));
+        __ Movq(tmp_simd, tmp);
+        __ Psraw(kScratchDoubleReg, tmp_simd);
+        __ Psraw(dst, tmp_simd);
+        __ Packsswb(dst, kScratchDoubleReg);
+      }
+      break;
+    }
+    case kX64I8x16Add: {
+      ASSEMBLE_SIMD_BINOP(paddb);
+      break;
+    }
+    case kX64I8x16AddSatS: {
+      ASSEMBLE_SIMD_BINOP(paddsb);
+      break;
+    }
+    case kX64I8x16Sub: {
+      ASSEMBLE_SIMD_BINOP(psubb);
+      break;
+    }
+    case kX64I8x16SubSatS: {
+      ASSEMBLE_SIMD_BINOP(psubsb);
+      break;
+    }
+    case kX64I8x16Mul: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      XMMRegister right = i.InputSimd128Register(1);
+      XMMRegister tmp = i.TempSimd128Register(0);
+      // I16x8 view of I8x16
+      // left = AAaa AAaa ... AAaa AAaa
+      // right= BBbb BBbb ... BBbb BBbb
+      // t = 00AA 00AA ... 00AA 00AA
+      // s = 00BB 00BB ... 00BB 00BB
+      __ Movaps(tmp, dst);
+      __ Movaps(kScratchDoubleReg, right);
+      __ Psrlw(tmp, byte{8});
+      __ Psrlw(kScratchDoubleReg, byte{8});
+      // dst = left * 256
+      __ Psllw(dst, byte{8});
+      // t = I16x8Mul(t, s)
+      //    => __PP __PP ...  __PP  __PP
+      __ Pmullw(tmp, kScratchDoubleReg);
+      // dst = I16x8Mul(left * 256, right)
+      //    => pp__ pp__ ...  pp__  pp__
+      __ Pmullw(dst, right);
+      // t = I16x8Shl(t, 8)
+      //    => PP00 PP00 ...  PP00  PP00
+      __ Psllw(tmp, byte{8});
+      // dst = I16x8Shr(dst, 8)
+      //    => 00pp 00pp ...  00pp  00pp
+      __ Psrlw(dst, byte{8});
+      // dst = I16x8Or(dst, t)
+      //    => PPpp PPpp ...  PPpp  PPpp
+      __ Por(dst, tmp);
+      break;
+    }
+    case kX64I8x16MinS: {
+      ASSEMBLE_SIMD_BINOP(pminsb);
+      break;
+    }
+    case kX64I8x16MaxS: {
+      ASSEMBLE_SIMD_BINOP(pmaxsb);
+      break;
+    }
+    case kX64I8x16Eq: {
+      ASSEMBLE_SIMD_BINOP(pcmpeqb);
+      break;
+    }
+    case kX64I8x16Ne: {
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ Pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
+      __ Pcmpeqb(tmp, tmp);
+      __ Pxor(i.OutputSimd128Register(), tmp);
+      break;
+    }
+    case kX64I8x16GtS: {
+      ASSEMBLE_SIMD_BINOP(pcmpgtb);
+      break;
+    }
+    case kX64I8x16GeS: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(1);
+      __ Pminsb(dst, src);
+      __ Pcmpeqb(dst, src);
+      break;
+    }
+    case kX64I8x16UConvertI16x8: {
+      ASSEMBLE_SIMD_BINOP(packuswb);
+      break;
+    }
+    case kX64I8x16ShrU: {
+      XMMRegister dst = i.OutputSimd128Register();
+      // Unpack the bytes into words, do logical shifts, and repack.
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      // Temp registers for shift mask andadditional moves to XMM registers.
+      Register tmp = i.ToRegister(instr->TempAt(0));
+      XMMRegister tmp_simd = i.TempSimd128Register(1);
+      if (HasImmediateInput(instr, 1)) {
+        // Perform 16-bit shift, then mask away high bits.
+        uint8_t shift = i.InputInt3(1);
+        __ Psrlw(dst, byte{shift});
+
+        uint8_t bmask = 0xff >> shift;
+        uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
+        __ movl(tmp, Immediate(mask));
+        __ Movd(tmp_simd, tmp);
+        __ Pshufd(tmp_simd, tmp_simd, byte{0});
+        __ Pand(dst, tmp_simd);
+      } else {
+        __ Punpckhbw(kScratchDoubleReg, dst);
+        __ Punpcklbw(dst, dst);
+        // Prepare shift value
+        __ movq(tmp, i.InputRegister(1));
+        // Take shift value modulo 8.
+        __ andq(tmp, Immediate(7));
+        __ addq(tmp, Immediate(8));
+        __ Movq(tmp_simd, tmp);
+        __ Psrlw(kScratchDoubleReg, tmp_simd);
+        __ Psrlw(dst, tmp_simd);
+        __ Packuswb(dst, kScratchDoubleReg);
+      }
+      break;
+    }
+    case kX64I8x16AddSatU: {
+      ASSEMBLE_SIMD_BINOP(paddusb);
+      break;
+    }
+    case kX64I8x16SubSatU: {
+      ASSEMBLE_SIMD_BINOP(psubusb);
+      break;
+    }
+    case kX64I8x16MinU: {
+      ASSEMBLE_SIMD_BINOP(pminub);
+      break;
+    }
+    case kX64I8x16MaxU: {
+      ASSEMBLE_SIMD_BINOP(pmaxub);
+      break;
+    }
+    case kX64I8x16GtU: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(1);
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ Pmaxub(dst, src);
+      __ Pcmpeqb(dst, src);
+      __ Pcmpeqb(tmp, tmp);
+      __ Pxor(dst, tmp);
+      break;
+    }
+    case kX64I8x16GeU: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(1);
+      __ Pminub(dst, src);
+      __ Pcmpeqb(dst, src);
+      break;
+    }
+    case kX64I8x16RoundingAverageU: {
+      ASSEMBLE_SIMD_BINOP(pavgb);
+      break;
+    }
+    case kX64I8x16Abs: {
+      __ Pabsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      break;
+    }
+    case kX64I8x16BitMask: {
+      __ Pmovmskb(i.OutputRegister(), i.InputSimd128Register(0));
+      break;
+    }
+    case kX64I8x16SignSelect: {
+      __ Pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1), i.InputSimd128Register(2));
+      break;
+    }
+    case kX64I16x8SignSelect: {
+      if (CpuFeatures::IsSupported(AVX)) {
+        CpuFeatureScope avx_scope(tasm(), AVX);
+        __ vpsraw(kScratchDoubleReg, i.InputSimd128Register(2), 15);
+        __ vpblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                     i.InputSimd128Register(1), kScratchDoubleReg);
+      } else {
+        DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+        XMMRegister mask = i.InputSimd128Register(2);
+        DCHECK_EQ(xmm0, mask);
+        __ movapd(kScratchDoubleReg, mask);
+        __ pxor(mask, mask);
+        __ pcmpgtw(mask, kScratchDoubleReg);
+        __ pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(1));
+        // Restore mask.
+        __ movapd(mask, kScratchDoubleReg);
+      }
+      break;
+    }
+    case kX64I32x4SignSelect: {
+      __ Blendvps(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1), i.InputSimd128Register(2));
+      break;
+    }
+    case kX64I64x2SignSelect: {
+      __ Blendvpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
+                  i.InputSimd128Register(1), i.InputSimd128Register(2));
+      break;
+    }
+    case kX64S128And: {
+      ASSEMBLE_SIMD_BINOP(pand);
+      break;
+    }
+    case kX64S128Or: {
+      ASSEMBLE_SIMD_BINOP(por);
+      break;
+    }
+    case kX64S128Xor: {
+      ASSEMBLE_SIMD_BINOP(pxor);
+      break;
+    }
+    case kX64S128Not: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(0);
+      if (dst == src) {
+        __ Movaps(kScratchDoubleReg, dst);
+        __ Pcmpeqd(dst, dst);
+        __ Pxor(dst, kScratchDoubleReg);
+      } else {
+        __ Pcmpeqd(dst, dst);
+        __ Pxor(dst, src);
+      }
+
+      break;
+    }
+    case kX64S128Select: {
+      // Mask used here is stored in dst.
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Movaps(kScratchDoubleReg, i.InputSimd128Register(1));
+      __ Xorps(kScratchDoubleReg, i.InputSimd128Register(2));
+      __ Andps(dst, kScratchDoubleReg);
+      __ Xorps(dst, i.InputSimd128Register(2));
+      break;
+    }
+    case kX64S128AndNot: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      // The inputs have been inverted by instruction selector, so we can call
+      // andnps here without any modifications.
+      __ Andnps(dst, i.InputSimd128Register(1));
+      break;
+    }
+    case kX64I8x16Swizzle: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister mask = i.TempSimd128Register(0);
+
+      // Out-of-range indices should return 0, add 112 so that any value > 15
+      // saturates to 128 (top bit set), so pshufb will zero that lane.
+      __ Move(mask, uint32_t{0x70707070});
+      __ Pshufd(mask, mask, uint8_t{0x0});
+      __ Paddusb(mask, i.InputSimd128Register(1));
+      __ Pshufb(dst, mask);
+      break;
+    }
+    case kX64I8x16Shuffle: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister tmp_simd = i.TempSimd128Register(0);
+      if (instr->InputCount() == 5) {  // only one input operand
+        uint32_t mask[4] = {};
+        DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+        for (int j = 4; j > 0; j--) {
+          mask[j - 1] = i.InputUint32(j);
+        }
+
+        SetupSimdImmediateInRegister(tasm(), mask, tmp_simd);
+        __ Pshufb(dst, tmp_simd);
+      } else {  // two input operands
+        DCHECK_EQ(6, instr->InputCount());
+        ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 0);
+        uint32_t mask1[4] = {};
+        for (int j = 5; j > 1; j--) {
+          uint32_t lanes = i.InputUint32(j);
+          for (int k = 0; k < 32; k += 8) {
+            uint8_t lane = lanes >> k;
+            mask1[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
+          }
+        }
+        SetupSimdImmediateInRegister(tasm(), mask1, tmp_simd);
+        __ Pshufb(kScratchDoubleReg, tmp_simd);
+        uint32_t mask2[4] = {};
+        if (instr->InputAt(1)->IsSimd128Register()) {
+          XMMRegister src1 = i.InputSimd128Register(1);
+          if (src1 != dst) __ movups(dst, src1);
+        } else {
+          __ Movups(dst, i.InputOperand(1));
+        }
+        for (int j = 5; j > 1; j--) {
+          uint32_t lanes = i.InputUint32(j);
+          for (int k = 0; k < 32; k += 8) {
+            uint8_t lane = lanes >> k;
+            mask2[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
+          }
+        }
+        SetupSimdImmediateInRegister(tasm(), mask2, tmp_simd);
+        __ Pshufb(dst, tmp_simd);
+        __ Por(dst, kScratchDoubleReg);
+      }
+      break;
+    }
+    case kX64S128Load8Splat: {
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Pinsrb(dst, dst, i.MemoryOperand(), 0);
+      __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
+      __ Pshufb(dst, kScratchDoubleReg);
+      break;
+    }
+    case kX64S128Load16Splat: {
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      XMMRegister dst = i.OutputSimd128Register();
+      __ Pinsrw(dst, dst, i.MemoryOperand(), 0);
+      __ Pshuflw(dst, dst, uint8_t{0});
+      __ Punpcklqdq(dst, dst);
+      break;
+    }
+    case kX64S128Load32Splat: {
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      if (CpuFeatures::IsSupported(AVX)) {
+        CpuFeatureScope avx_scope(tasm(), AVX);
+        __ vbroadcastss(i.OutputSimd128Register(), i.MemoryOperand());
+      } else {
+        __ movss(i.OutputSimd128Register(), i.MemoryOperand());
+        __ shufps(i.OutputSimd128Register(), i.OutputSimd128Register(),
+                  byte{0});
+      }
+      break;
+    }
+    case kX64S128Load64Splat: {
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      __ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    }
+    case kX64S128Load8x8S: {
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      __ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    }
+    case kX64S128Load8x8U: {
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      __ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    }
+    case kX64S128Load16x4S: {
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      __ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    }
+    case kX64S128Load16x4U: {
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      __ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    }
+    case kX64S128Load32x2S: {
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      __ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    }
+    case kX64S128Load32x2U: {
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      __ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
+      break;
+    }
+    case kX64S128Store32Lane: {
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      size_t index = 0;
+      Operand operand = i.MemoryOperand(&index);
+      uint8_t lane = i.InputUint8(index + 1);
+      if (lane == 0) {
+        __ Movss(operand, i.InputSimd128Register(index));
+      } else {
+        DCHECK_GE(3, lane);
+        __ Extractps(operand, i.InputSimd128Register(index), lane);
+      }
+      break;
+    }
+    case kX64S128Store64Lane: {
+      EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
+      size_t index = 0;
+      Operand operand = i.MemoryOperand(&index);
+      uint8_t lane = i.InputUint8(index + 1);
+      if (lane == 0) {
+        __ Movlps(operand, i.InputSimd128Register(index));
+      } else {
+        DCHECK_EQ(1, lane);
+        __ Movhps(operand, i.InputSimd128Register(index));
+      }
+      break;
+    }
+    case kX64S32x4Swizzle: {
+      DCHECK_EQ(2, instr->InputCount());
+      ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0,
+                              i.InputUint8(1));
+      break;
+    }
+    case kX64S32x4Shuffle: {
+      DCHECK_EQ(4, instr->InputCount());  // Swizzles should be handled above.
+      uint8_t shuffle = i.InputUint8(2);
+      DCHECK_NE(0xe4, shuffle);  // A simple blend should be handled below.
+      ASSEMBLE_SIMD_IMM_INSTR(Pshufd, kScratchDoubleReg, 1, shuffle);
+      ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0, shuffle);
+      __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputUint8(3));
+      break;
+    }
+    case kX64S16x8Blend: {
+      ASSEMBLE_SIMD_IMM_SHUFFLE(Pblendw, i.InputUint8(2));
+      break;
+    }
+    case kX64S16x8HalfShuffle1: {
+      XMMRegister dst = i.OutputSimd128Register();
+      ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, i.InputUint8(1));
+      __ Pshufhw(dst, dst, i.InputUint8(2));
+      break;
+    }
+    case kX64S16x8HalfShuffle2: {
+      XMMRegister dst = i.OutputSimd128Register();
+      ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, kScratchDoubleReg, 1, i.InputUint8(2));
+      __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputUint8(3));
+      ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, i.InputUint8(2));
+      __ Pshufhw(dst, dst, i.InputUint8(3));
+      __ Pblendw(dst, kScratchDoubleReg, i.InputUint8(4));
+      break;
+    }
+    case kX64S8x16Alignr: {
+      ASSEMBLE_SIMD_IMM_SHUFFLE(Palignr, i.InputUint8(2));
+      break;
+    }
+    case kX64S16x8Dup: {
+      XMMRegister dst = i.OutputSimd128Register();
+      uint8_t lane = i.InputInt8(1) & 0x7;
+      uint8_t lane4 = lane & 0x3;
+      uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
+      if (lane < 4) {
+        ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, half_dup);
+        __ Pshufd(dst, dst, uint8_t{0});
+      } else {
+        ASSEMBLE_SIMD_IMM_INSTR(Pshufhw, dst, 0, half_dup);
+        __ Pshufd(dst, dst, uint8_t{0xaa});
+      }
+      break;
+    }
+    case kX64S8x16Dup: {
+      XMMRegister dst = i.OutputSimd128Register();
+      uint8_t lane = i.InputInt8(1) & 0xf;
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      if (lane < 8) {
+        __ Punpcklbw(dst, dst);
+      } else {
+        __ Punpckhbw(dst, dst);
+      }
+      lane &= 0x7;
+      uint8_t lane4 = lane & 0x3;
+      uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
+      if (lane < 4) {
+        __ Pshuflw(dst, dst, half_dup);
+        __ Pshufd(dst, dst, uint8_t{0});
+      } else {
+        __ Pshufhw(dst, dst, half_dup);
+        __ Pshufd(dst, dst, uint8_t{0xaa});
+      }
+      break;
+    }
+    case kX64S64x2UnpackHigh:
+      ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhqdq);
+      break;
+    case kX64S32x4UnpackHigh:
+      ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhdq);
+      break;
+    case kX64S16x8UnpackHigh:
+      ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhwd);
+      break;
+    case kX64S8x16UnpackHigh:
+      ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhbw);
+      break;
+    case kX64S64x2UnpackLow:
+      ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklqdq);
+      break;
+    case kX64S32x4UnpackLow:
+      ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckldq);
+      break;
+    case kX64S16x8UnpackLow:
+      ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklwd);
+      break;
+    case kX64S8x16UnpackLow:
+      ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklbw);
+      break;
+    case kX64S16x8UnzipHigh: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src2 = dst;
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      if (instr->InputCount() == 2) {
+        ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
+        __ Psrld(kScratchDoubleReg, byte{16});
+        src2 = kScratchDoubleReg;
+      }
+      __ Psrld(dst, byte{16});
+      __ Packusdw(dst, src2);
+      break;
+    }
+    case kX64S16x8UnzipLow: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src2 = dst;
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
+      if (instr->InputCount() == 2) {
+        ASSEMBLE_SIMD_IMM_INSTR(Pblendw, kScratchDoubleReg, 1, uint8_t{0x55});
+        src2 = kScratchDoubleReg;
+      }
+      __ Pblendw(dst, kScratchDoubleReg, uint8_t{0xaa});
+      __ Packusdw(dst, src2);
+      break;
+    }
+    case kX64S8x16UnzipHigh: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src2 = dst;
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      if (instr->InputCount() == 2) {
+        ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
+        __ Psrlw(kScratchDoubleReg, byte{8});
+        src2 = kScratchDoubleReg;
+      }
+      __ Psrlw(dst, byte{8});
+      __ Packuswb(dst, src2);
+      break;
+    }
+    case kX64S8x16UnzipLow: {
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src2 = dst;
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      if (instr->InputCount() == 2) {
+        ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
+        __ Psllw(kScratchDoubleReg, byte{8});
+        __ Psrlw(kScratchDoubleReg, byte{8});
+        src2 = kScratchDoubleReg;
+      }
+      __ Psllw(dst, byte{8});
+      __ Psrlw(dst, byte{8});
+      __ Packuswb(dst, src2);
+      break;
+    }
+    case kX64S8x16TransposeLow: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      __ Psllw(dst, byte{8});
+      if (instr->InputCount() == 1) {
+        __ Movups(kScratchDoubleReg, dst);
+      } else {
+        DCHECK_EQ(2, instr->InputCount());
+        ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
+        __ Psllw(kScratchDoubleReg, byte{8});
+      }
+      __ Psrlw(dst, byte{8});
+      __ Por(dst, kScratchDoubleReg);
+      break;
+    }
+    case kX64S8x16TransposeHigh: {
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      __ Psrlw(dst, byte{8});
+      if (instr->InputCount() == 1) {
+        __ Movups(kScratchDoubleReg, dst);
+      } else {
+        DCHECK_EQ(2, instr->InputCount());
+        ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
+        __ Psrlw(kScratchDoubleReg, byte{8});
+      }
+      __ Psllw(kScratchDoubleReg, byte{8});
+      __ Por(dst, kScratchDoubleReg);
+      break;
+    }
+    case kX64S8x8Reverse:
+    case kX64S8x4Reverse:
+    case kX64S8x2Reverse: {
+      DCHECK_EQ(1, instr->InputCount());
+      XMMRegister dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      if (arch_opcode != kX64S8x2Reverse) {
+        // First shuffle words into position.
+        uint8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
+        __ Pshuflw(dst, dst, shuffle_mask);
+        __ Pshufhw(dst, dst, shuffle_mask);
+      }
+      __ Movaps(kScratchDoubleReg, dst);
+      __ Psrlw(kScratchDoubleReg, byte{8});
+      __ Psllw(dst, byte{8});
+      __ Por(dst, kScratchDoubleReg);
+      break;
+    }
+    case kX64V32x4AnyTrue:
+    case kX64V16x8AnyTrue:
+    case kX64V8x16AnyTrue: {
+      Register dst = i.OutputRegister();
+      XMMRegister src = i.InputSimd128Register(0);
+
+      __ xorq(dst, dst);
+      __ Ptest(src, src);
+      __ setcc(not_equal, dst);
+      break;
+    }
+    // Need to split up all the different lane structures because the
+    // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
+    // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
+    // respectively.
+    case kX64V32x4AllTrue: {
+      ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
+      break;
+    }
+    case kX64V16x8AllTrue: {
+      ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqw);
+      break;
+    }
+    case kX64V8x16AllTrue: {
+      ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqb);
+      break;
+    }
+    case kWord32AtomicExchangeInt8: {
+      __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
+      __ movsxbl(i.InputRegister(0), i.InputRegister(0));
+      break;
+    }
+    case kWord32AtomicExchangeUint8: {
+      __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
+      __ movzxbl(i.InputRegister(0), i.InputRegister(0));
+      break;
+    }
+    case kWord32AtomicExchangeInt16: {
+      __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
+      __ movsxwl(i.InputRegister(0), i.InputRegister(0));
+      break;
+    }
+    case kWord32AtomicExchangeUint16: {
+      __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
+      __ movzxwl(i.InputRegister(0), i.InputRegister(0));
+      break;
+    }
+    case kWord32AtomicExchangeWord32: {
+      __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
+      break;
+    }
+    case kWord32AtomicCompareExchangeInt8: {
+      __ lock();
+      __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
+      __ movsxbl(rax, rax);
+      break;
+    }
+    case kWord32AtomicCompareExchangeUint8: {
+      __ lock();
+      __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
+      __ movzxbl(rax, rax);
+      break;
+    }
+    case kWord32AtomicCompareExchangeInt16: {
+      __ lock();
+      __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
+      __ movsxwl(rax, rax);
+      break;
+    }
+    case kWord32AtomicCompareExchangeUint16: {
+      __ lock();
+      __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
+      __ movzxwl(rax, rax);
+      break;
+    }
+    case kWord32AtomicCompareExchangeWord32: {
+      __ lock();
+      __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
+      break;
+    }
+#define ATOMIC_BINOP_CASE(op, inst)              \
+  case kWord32Atomic##op##Int8:                  \
+    ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
+    __ movsxbl(rax, rax);                        \
+    break;                                       \
+  case kWord32Atomic##op##Uint8:                 \
+    ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
+    __ movzxbl(rax, rax);                        \
+    break;                                       \
+  case kWord32Atomic##op##Int16:                 \
+    ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
+    __ movsxwl(rax, rax);                        \
+    break;                                       \
+  case kWord32Atomic##op##Uint16:                \
+    ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
+    __ movzxwl(rax, rax);                        \
+    break;                                       \
+  case kWord32Atomic##op##Word32:                \
+    ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
+    break;
+      ATOMIC_BINOP_CASE(Add, addl)
+      ATOMIC_BINOP_CASE(Sub, subl)
+      ATOMIC_BINOP_CASE(And, andl)
+      ATOMIC_BINOP_CASE(Or, orl)
+      ATOMIC_BINOP_CASE(Xor, xorl)
+#undef ATOMIC_BINOP_CASE
+    case kX64Word64AtomicExchangeUint8: {
+      __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
+      __ movzxbq(i.InputRegister(0), i.InputRegister(0));
+      break;
+    }
+    case kX64Word64AtomicExchangeUint16: {
+      __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
+      __ movzxwq(i.InputRegister(0), i.InputRegister(0));
+      break;
+    }
+    case kX64Word64AtomicExchangeUint32: {
+      __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
+      break;
+    }
+    case kX64Word64AtomicExchangeUint64: {
+      __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
+      break;
+    }
+    case kX64Word64AtomicCompareExchangeUint8: {
+      __ lock();
+      __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
+      __ movzxbq(rax, rax);
+      break;
+    }
+    case kX64Word64AtomicCompareExchangeUint16: {
+      __ lock();
+      __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
+      __ movzxwq(rax, rax);
+      break;
+    }
+    case kX64Word64AtomicCompareExchangeUint32: {
+      __ lock();
+      __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
+      // Zero-extend the 32 bit value to 64 bit.
+      __ movl(rax, rax);
+      break;
+    }
+    case kX64Word64AtomicCompareExchangeUint64: {
+      __ lock();
+      __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
+      break;
+    }
+#define ATOMIC64_BINOP_CASE(op, inst)              \
+  case kX64Word64Atomic##op##Uint8:                \
+    ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
+    __ movzxbq(rax, rax);                          \
+    break;                                         \
+  case kX64Word64Atomic##op##Uint16:               \
+    ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
+    __ movzxwq(rax, rax);                          \
+    break;                                         \
+  case kX64Word64Atomic##op##Uint32:               \
+    ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
+    break;                                         \
+  case kX64Word64Atomic##op##Uint64:               \
+    ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
+    break;
+      ATOMIC64_BINOP_CASE(Add, addq)
+      ATOMIC64_BINOP_CASE(Sub, subq)
+      ATOMIC64_BINOP_CASE(And, andq)
+      ATOMIC64_BINOP_CASE(Or, orq)
+      ATOMIC64_BINOP_CASE(Xor, xorq)
+#undef ATOMIC64_BINOP_CASE
+    case kWord32AtomicLoadInt8:
+    case kWord32AtomicLoadUint8:
+    case kWord32AtomicLoadInt16:
+    case kWord32AtomicLoadUint16:
+    case kWord32AtomicLoadWord32:
+    case kWord32AtomicStoreWord8:
+    case kWord32AtomicStoreWord16:
+    case kWord32AtomicStoreWord32:
+    case kX64Word64AtomicLoadUint8:
+    case kX64Word64AtomicLoadUint16:
+    case kX64Word64AtomicLoadUint32:
+    case kX64Word64AtomicLoadUint64:
+    case kX64Word64AtomicStoreWord8:
+    case kX64Word64AtomicStoreWord16:
+    case kX64Word64AtomicStoreWord32:
+    case kX64Word64AtomicStoreWord64:
+      UNREACHABLE();  // Won't be generated by instruction selector.
+      break;
+  }
+  return kSuccess;
+}  // NOLadability/fn_size)
+
+#undef ASSEMBLE_UNOP
+#undef ASSEMBLE_BINOP
+#undef ASSEMBLE_COMPARE
+#undef ASSEMBLE_MULT
+#undef ASSEMBLE_SHIFT
+#undef ASSEMBLE_MOVX
+#undef ASSEMBLE_SSE_BINOP
+#undef ASSEMBLE_SSE_UNOP
+#undef ASSEMBLE_AVX_BINOP
+#undef ASSEMBLE_IEEE754_BINOP
+#undef ASSEMBLE_IEEE754_UNOP
+#undef ASSEMBLE_ATOMIC_BINOP
+#undef ASSEMBLE_ATOMIC64_BINOP
+#undef ASSEMBLE_SIMD_INSTR
+#undef ASSEMBLE_SIMD_IMM_INSTR
+#undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
+#undef ASSEMBLE_SIMD_IMM_SHUFFLE
+#undef ASSEMBLE_SIMD_ALL_TRUE
+#undef ASSEMBLE_SIMD_SHIFT
+
+namespace {
+
+Condition FlagsConditionToCondition(FlagsCondition condition) {
+  switch (condition) {
+    case kUnorderedEqual:
+    case kEqual:
+      return equal;
+    case kUnorderedNotEqual:
+    case kNotEqual:
+      return not_equal;
+    case kSignedLessThan:
+      return less;
+    case kSignedGreaterThanOrEqual:
+      return greater_equal;
+    case kSignedLessThanOrEqual:
+      return less_equal;
+    case kSignedGreaterThan:
+      return greater;
+    case kUnsignedLessThan:
+      return below;
+    case kUnsignedGreaterThanOrEqual:
+      return above_equal;
+    case kUnsignedLessThanOrEqual:
+      return below_equal;
+    case kUnsignedGreaterThan:
+      return above;
+    case kOverflow:
+      return overflow;
+    case kNotOverflow:
+      return no_overflow;
+    default:
+      break;
+  }
+  UNREACHABLE();
+}
+
+}  // namespace
+
+// Assembles branches after this instruction.
+void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
+  Label::Distance flabel_distance =
+      branch->fallthru ? Label::kNear : Label::kFar;
+  Label* tlabel = branch->true_label;
+  Label* flabel = branch->false_label;
+  if (branch->condition == kUnorderedEqual) {
+    __ j(parity_even, flabel, flabel_distance);
+  } else if (branch->condition == kUnorderedNotEqual) {
+    __ j(parity_even, tlabel);
+  }
+  __ j(FlagsConditionToCondition(branch->condition), tlabel);
+
+  if (!branch->fallthru) __ jmp(flabel, flabel_distance);
+}
+
+void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
+                                            Instruction* instr) {
+  // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
+  if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
+    return;
+  }
+
+  condition = NegateFlagsCondition(condition);
+  __ movl(kScratchRegister, Immediate(0));
+  __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
+           kScratchRegister);
+}
+
+void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
+                                            BranchInfo* branch) {
+  Label::Distance flabel_distance =
+      branch->fallthru ? Label::kNear : Label::kFar;
+  Label* tlabel = branch->true_label;
+  Label* flabel = branch->false_label;
+  Label nodeopt;
+  if (branch->condition == kUnorderedEqual) {
+    __ j(parity_even, flabel, flabel_distance);
+  } else if (branch->condition == kUnorderedNotEqual) {
+    __ j(parity_even, tlabel);
+  }
+  __ j(FlagsConditionToCondition(branch->condition), tlabel);
+
+  if (FLAG_deopt_every_n_times > 0) {
+    ExternalReference counter =
+        ExternalReference::stress_deopt_count(isolate());
+
+    __ pushfq();
+    __ pushq(rax);
+    __ load_rax(counter);
+    __ decl(rax);
+    __ j(not_zero, &nodeopt, Label::kNear);
+
+    __ Set(rax, FLAG_deopt_every_n_times);
+    __ store_rax(counter);
+    __ popq(rax);
+    __ popfq();
+    __ jmp(tlabel);
+
+    __ bind(&nodeopt);
+    __ store_rax(counter);
+    __ popq(rax);
+    __ popfq();
+  }
+
+  if (!branch->fallthru) {
+    __ jmp(flabel, flabel_distance);
+  }
+}
+
+void CodeGenerator::AssembleArchJump(RpoNumber target) {
+  if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
+}
+
+void CodeGenerator::AssembleArchTrap(Instruction* instr,
+                                     FlagsCondition condition) {
+  auto ool = zone()->New<WasmOutOfLineTrap>(this, instr);
+  Label* tlabel = ool->entry();
+  Label end;
+  if (condition == kUnorderedEqual) {
+    __ j(parity_even, &end, Label::kNear);
+  } else if (condition == kUnorderedNotEqual) {
+    __ j(parity_even, tlabel);
+  }
+  __ j(FlagsConditionToCondition(condition), tlabel);
+  __ bind(&end);
+}
+
+// Assembles boolean materializations after this instruction.
+void CodeGenerator::AssembleArchBoolean(Instruction* instr,
+                                        FlagsCondition condition) {
+  X64OperandConverter i(this, instr);
+  Label done;
+
+  // Materialize a full 64-bit 1 or 0 value. The result register is always the
+  // last output of the instruction.
+  Label check;
+  DCHECK_NE(0u, instr->OutputCount());
+  Register reg = i.OutputRegister(instr->OutputCount() - 1);
+  if (condition == kUnorderedEqual) {
+    __ j(parity_odd, &check, Label::kNear);
+    __ movl(reg, Immediate(0));
+    __ jmp(&done, Label::kNear);
+  } else if (condition == kUnorderedNotEqual) {
+    __ j(parity_odd, &check, Label::kNear);
+    __ movl(reg, Immediate(1));
+    __ jmp(&done, Label::kNear);
+  }
+  __ bind(&check);
+  __ setcc(FlagsConditionToCondition(condition), reg);
+  __ movzxbl(reg, reg);
+  __ bind(&done);
+}
+
+void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
+  X64OperandConverter i(this, instr);
+  Register input = i.InputRegister(0);
+  std::vector<std::pair<int32_t, Label*>> cases;
+  for (size_t index = 2; index < instr->InputCount(); index += 2) {
+    cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
+  }
+  AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
+                                      cases.data() + cases.size());
+}
+
+void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
+  X64OperandConverter i(this, instr);
+  Register input = i.InputRegister(0);
+  int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
+  Label** cases = zone()->NewArray<Label*>(case_count);
+  for (int32_t index = 0; index < case_count; ++index) {
+    cases[index] = GetLabel(i.InputRpo(index + 2));
+  }
+  Label* const table = AddJumpTable(cases, case_count);
+  __ cmpl(input, Immediate(case_count));
+  __ j(above_equal, GetLabel(i.InputRpo(1)));
+  __ leaq(kScratchRegister, Operand(table));
+  __ jmp(Operand(kScratchRegister, input, times_8, 0));
+}
+
+namespace {
+
+static const int kQuadWordSize = 16;
+
+}  // namespace
+
+void CodeGenerator::FinishFrame(Frame* frame) {
+  CallDescriptor* call_descriptor = linkage()->GetIncomingDescriptor();
+
+  const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
+  if (saves_fp != 0) {
+    frame->AlignSavedCalleeRegisterSlots();
+    if (saves_fp != 0) {  // Save callee-saved XMM registers.
+      const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
+      frame->AllocateSavedCalleeRegisterSlots(
+          saves_fp_count * (kQuadWordSize / kSystemPointerSize));
+    }
+  }
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+  if (saves != 0) {  // Save callee-saved registers.
+    int count = 0;
+    for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
+      if (((1 << i) & saves)) {
+        ++count;
+      }
+    }
+    frame->AllocateSavedCalleeRegisterSlots(count);
+  }
+}
+
+void CodeGenerator::AssembleConstructFrame() {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+  if (frame_access_state()->has_frame()) {
+    int pc_base = __ pc_offset();
+
+    if (call_descriptor->IsCFunctionCall()) {
+      __ pushq(rbp);
+      __ movq(rbp, rsp);
+      if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
+        __ Push(Immediate(StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY)));
+        // Reserve stack space for saving the c_entry_fp later.
+        __ AllocateStackSpace(kSystemPointerSize);
+      }
+    } else if (call_descriptor->IsJSFunctionCall()) {
+      __ Prologue();
+    } else {
+      __ StubPrologue(info()->GetOutputStackFrameType());
+      if (call_descriptor->IsWasmFunctionCall()) {
+        __ pushq(kWasmInstanceRegister);
+      } else if (call_descriptor->IsWasmImportWrapper() ||
+                 call_descriptor->IsWasmCapiFunction()) {
+        // Wasm import wrappers are passed a tuple in the place of the instance.
+        // Unpack the tuple into the instance and the target callable.
+        // This must be done here in the codegen because it cannot be expressed
+        // properly in the graph.
+        __ LoadTaggedPointerField(
+            kJSFunctionRegister,
+            FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
+        __ LoadTaggedPointerField(
+            kWasmInstanceRegister,
+            FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
+        __ pushq(kWasmInstanceRegister);
+        if (call_descriptor->IsWasmCapiFunction()) {
+          // Reserve space for saving the PC later.
+          __ AllocateStackSpace(kSystemPointerSize);
+        }
+      }
+    }
+
+    unwinding_info_writer_.MarkFrameConstructed(pc_base);
+  }
+  int required_slots =
+      frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
+
+  if (info()->is_osr()) {
+    // TurboFan OSR-compiled functions cannot be entered directly.
+    __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
+
+    // Unoptimized code jumps directly to this entrypoint while the unoptimized
+    // frame is still on the stack. Optimized code uses OSR values directly from
+    // the unoptimized frame. Thus, all that needs to be done is to allocate the
+    // remaining stack slots.
+    if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
+    osr_pc_offset_ = __ pc_offset();
+    required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
+    ResetSpeculationPoison();
+  }
+
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+  const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
+
+  if (required_slots > 0) {
+    DCHECK(frame_access_state()->has_frame());
+    if (info()->IsWasm() && required_slots > 128) {
+      // For WebAssembly functions with big frames we have to do the stack
+      // overflow check before we construct the frame. Otherwise we may not
+      // have enough space on the stack to call the runtime for the stack
+      // overflow.
+      Label done;
+
+      // If the frame is bigger than the stack, we throw the stack overflow
+      // exception unconditionally. Thereby we can avoid the integer overflow
+      // check in the condition code.
+      if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
+        __ movq(kScratchRegister,
+                FieldOperand(kWasmInstanceRegister,
+                             WasmInstanceObject::kRealStackLimitAddressOffset));
+        __ movq(kScratchRegister, Operand(kScratchRegister, 0));
+        __ addq(kScratchRegister,
+                Immediate(required_slots * kSystemPointerSize));
+        __ cmpq(rsp, kScratchRegister);
+        __ j(above_equal, &done, Label::kNear);
+      }
+
+      __ near_call(wasm::WasmCode::kWasmStackOverflow,
+                   RelocInfo::WASM_STUB_CALL);
+      ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
+      RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
+      __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
+      __ bind(&done);
+    }
+
+    // Skip callee-saved and return slots, which are created below.
+    required_slots -= base::bits::CountPopulation(saves);
+    required_slots -= base::bits::CountPopulation(saves_fp) *
+                      (kQuadWordSize / kSystemPointerSize);
+    required_slots -= frame()->GetReturnSlotCount();
+    if (required_slots > 0) {
+      __ AllocateStackSpace(required_slots * kSystemPointerSize);
+    }
+  }
+
+  if (saves_fp != 0) {  // Save callee-saved XMM registers.
+    const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
+    const int stack_size = saves_fp_count * kQuadWordSize;
+    // Adjust the stack pointer.
+    __ AllocateStackSpace(stack_size);
+    // Store the registers on the stack.
+    int slot_idx = 0;
+    for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
+      if (!((1 << i) & saves_fp)) continue;
+      __ Movdqu(Operand(rsp, kQuadWordSize * slot_idx),
+                XMMRegister::from_code(i));
+      slot_idx++;
+    }
+  }
+
+  if (saves != 0) {  // Save callee-saved registers.
+    for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
+      if (!((1 << i) & saves)) continue;
+      __ pushq(Register::from_code(i));
+    }
+  }
+
+  // Allocate return slots (located after callee-saved).
+  if (frame()->GetReturnSlotCount() > 0) {
+    __ AllocateStackSpace(frame()->GetReturnSlotCount() * kSystemPointerSize);
+  }
+}
+
+void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
+  auto call_descriptor = linkage()->GetIncomingDescriptor();
+
+  // Restore registers.
+  const RegList saves = call_descriptor->CalleeSavedRegisters();
+  if (saves != 0) {
+    const int returns = frame()->GetReturnSlotCount();
+    if (returns != 0) {
+      __ addq(rsp, Immediate(returns * kSystemPointerSize));
+    }
+    for (int i = 0; i < Register::kNumRegisters; i++) {
+      if (!((1 << i) & saves)) continue;
+      __ popq(Register::from_code(i));
+    }
+  }
+  const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
+  if (saves_fp != 0) {
+    const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
+    const int stack_size = saves_fp_count * kQuadWordSize;
+    // Load the registers from the stack.
+    int slot_idx = 0;
+    for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
+      if (!((1 << i) & saves_fp)) continue;
+      __ Movdqu(XMMRegister::from_code(i),
+                Operand(rsp, kQuadWordSize * slot_idx));
+      slot_idx++;
+    }
+    // Adjust the stack pointer.
+    __ addq(rsp, Immediate(stack_size));
+  }
+
+  unwinding_info_writer_.MarkBlockWillExit();
+
+  // We might need rcx and r10 for scratch.
+  DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
+  DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & r10.bit());
+  X64OperandConverter g(this, nullptr);
+  int parameter_count =
+      static_cast<int>(call_descriptor->StackParameterCount());
+
+  // {aditional_pop_count} is only greater than zero if {parameter_count = 0}.
+  // Check RawMachineAssembler::PopAndReturn.
+  if (parameter_count != 0) {
+    if (additional_pop_count->IsImmediate()) {
+      DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
+    } else if (__ emit_debug_code()) {
+      __ cmpq(g.ToRegister(additional_pop_count), Immediate(0));
+      __ Assert(equal, AbortReason::kUnexpectedAdditionalPopValue);
+    }
+  }
+
+  Register argc_reg = rcx;
+#ifdef V8_NO_ARGUMENTS_ADAPTOR
+  // Functions with JS linkage have at least one parameter (the receiver).
+  // If {parameter_count} == 0, it means it is a builtin with
+  // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
+  // itself.
+  const bool drop_jsargs = frame_access_state()->has_frame() &&
+                           call_descriptor->IsJSFunctionCall() &&
+                           parameter_count != 0;
+#else
+  const bool drop_jsargs = false;
+#endif
+  if (call_descriptor->IsCFunctionCall()) {
+    AssembleDeconstructFrame();
+  } else if (frame_access_state()->has_frame()) {
+    if (additional_pop_count->IsImmediate() &&
+        g.ToConstant(additional_pop_count).ToInt32() == 0) {
+      // Canonicalize JSFunction return sites for now.
+      if (return_label_.is_bound()) {
+        __ jmp(&return_label_);
+        return;
+      } else {
+        __ bind(&return_label_);
+      }
+    }
+    if (drop_jsargs) {
+      // Get the actual argument count.
+      __ movq(argc_reg, Operand(rbp, StandardFrameConstants::kArgCOffset));
+    }
+    AssembleDeconstructFrame();
+  }
+
+  if (drop_jsargs) {
+    // We must pop all arguments from the stack (including the receiver). This
+    // number of arguments is given by max(1 + argc_reg, parameter_count).
+    int parameter_count_without_receiver =
+        parameter_count - 1;  // Exclude the receiver to simplify the
+                              // computation. We'll account for it at the end.
+    Label mismatch_return;
+    Register scratch_reg = r10;
+    DCHECK_NE(argc_reg, scratch_reg);
+    __ cmpq(argc_reg, Immediate(parameter_count_without_receiver));
+    __ j(greater, &mismatch_return, Label::kNear);
+    __ Ret(parameter_count * kSystemPointerSize, scratch_reg);
+    __ bind(&mismatch_return);
+    __ PopReturnAddressTo(scratch_reg);
+    __ leaq(rsp, Operand(rsp, argc_reg, times_system_pointer_size,
+                         kSystemPointerSize));  // Also pop the receiver.
+    // We use a return instead of a jump for better return address prediction.
+    __ PushReturnAddressFrom(scratch_reg);
+    __ Ret();
+  } else if (additional_pop_count->IsImmediate()) {
+    Register scratch_reg = r10;
+    int additional_count = g.ToConstant(additional_pop_count).ToInt32();
+    size_t pop_size = (parameter_count + additional_count) * kSystemPointerSize;
+    CHECK_LE(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
+    __ Ret(static_cast<int>(pop_size), scratch_reg);
+  } else {
+    Register pop_reg = g.ToRegister(additional_pop_count);
+    Register scratch_reg = pop_reg == r10 ? rcx : r10;
+    int pop_size = static_cast<int>(parameter_count * kSystemPointerSize);
+    __ PopReturnAddressTo(scratch_reg);
+    __ leaq(rsp, Operand(rsp, pop_reg, times_system_pointer_size,
+                         static_cast<int>(pop_size)));
+    __ PushReturnAddressFrom(scratch_reg);
+    __ Ret();
+  }
+}
+
+void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
+
+void CodeGenerator::PrepareForDeoptimizationExits(
+    ZoneDeque<DeoptimizationExit*>* exits) {}
+
+void CodeGenerator::IncrementStackAccessCounter(
+    InstructionOperand* source, InstructionOperand* destination) {
+  DCHECK(FLAG_trace_turbo_stack_accesses);
+  if (!info()->IsOptimizing() && !info()->IsWasm()) return;
+  DCHECK_NOT_NULL(debug_name_);
+  auto IncrementCounter = [&](ExternalReference counter) {
+    __ incl(__ ExternalReferenceAsOperand(counter));
+  };
+  if (source->IsAnyStackSlot()) {
+    IncrementCounter(
+        ExternalReference::address_of_load_from_stack_count(debug_name_));
+  }
+  if (destination->IsAnyStackSlot()) {
+    IncrementCounter(
+        ExternalReference::address_of_store_to_stack_count(debug_name_));
+  }
+}
+
+void CodeGenerator::AssembleMove(InstructionOperand* source,
+                                 InstructionOperand* destination) {
+  X64OperandConverter g(this, nullptr);
+  // Helper function to write the given constant to the dst register.
+  auto MoveConstantToRegister = [&](Register dst, Constant src) {
+    switch (src.type()) {
+      case Constant::kInt32: {
+        if (RelocInfo::IsWasmReference(src.rmode())) {
+          __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
+        } else {
+          int32_t value = src.ToInt32();
+          if (value == 0) {
+            __ xorl(dst, dst);
+          } else {
+            __ movl(dst, Immediate(value));
+          }
+        }
+        break;
+      }
+      case Constant::kInt64:
+        if (RelocInfo::IsWasmReference(src.rmode())) {
+          __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
+        } else {
+          __ Set(dst, src.ToInt64());
+        }
+        break;
+      case Constant::kFloat32:
+        __ MoveNumber(dst, src.ToFloat32());
+        break;
+      case Constant::kFloat64:
+        __ MoveNumber(dst, src.ToFloat64().value());
+        break;
+      case Constant::kExternalReference:
+        __ Move(dst, src.ToExternalReference());
+        break;
+      case Constant::kHeapObject: {
+        Handle<HeapObject> src_object = src.ToHeapObject();
+        RootIndex index;
+        if (IsMaterializableFromRoot(src_object, &index)) {
+          __ LoadRoot(dst, index);
+        } else {
+          __ Move(dst, src_object);
+        }
+        break;
+      }
+      case Constant::kCompressedHeapObject: {
+        Handle<HeapObject> src_object = src.ToHeapObject();
+        RootIndex index;
+        if (IsMaterializableFromRoot(src_object, &index)) {
+          __ LoadRoot(dst, index);
+        } else {
+          __ Move(dst, src_object, RelocInfo::COMPRESSED_EMBEDDED_OBJECT);
+        }
+        break;
+      }
+      case Constant::kDelayedStringConstant: {
+        const StringConstantBase* src_constant = src.ToDelayedStringConstant();
+        __ MoveStringConstant(dst, src_constant);
+        break;
+      }
+      case Constant::kRpoNumber:
+        UNREACHABLE();  // TODO(dcarney): load of labels on x64.
+        break;
+    }
+  };
+  // Helper function to write the given constant to the stack.
+  auto MoveConstantToSlot = [&](Operand dst, Constant src) {
+    if (!RelocInfo::IsWasmReference(src.rmode())) {
+      switch (src.type()) {
+        case Constant::kInt32:
+          __ movq(dst, Immediate(src.ToInt32()));
+          return;
+        case Constant::kInt64:
+          __ Set(dst, src.ToInt64());
+          return;
+        default:
+          break;
+      }
+    }
+    MoveConstantToRegister(kScratchRegister, src);
+    __ movq(dst, kScratchRegister);
+  };
+
+  if (FLAG_trace_turbo_stack_accesses) {
+    IncrementStackAccessCounter(source, destination);
+  }
+
+  // Dispatch on the source and destination operand kinds.
+  switch (MoveType::InferMove(source, destination)) {
+    case MoveType::kRegisterToRegister:
+      if (source->IsRegister()) {
+        __ movq(g.ToRegister(destination), g.ToRegister(source));
+      } else {
+        DCHECK(source->IsFPRegister());
+        __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
+      }
+      return;
+    case MoveType::kRegisterToStack: {
+      Operand dst = g.ToOperand(destination);
+      if (source->IsRegister()) {
+        __ movq(dst, g.ToRegister(source));
+      } else {
+        DCHECK(source->IsFPRegister());
+        XMMRegister src = g.ToDoubleRegister(source);
+        MachineRepresentation rep =
+            LocationOperand::cast(source)->representation();
+        if (rep != MachineRepresentation::kSimd128) {
+          __ Movsd(dst, src);
+        } else {
+          __ Movups(dst, src);
+        }
+      }
+      return;
+    }
+    case MoveType::kStackToRegister: {
+      Operand src = g.ToOperand(source);
+      if (source->IsStackSlot()) {
+        __ movq(g.ToRegister(destination), src);
+      } else {
+        DCHECK(source->IsFPStackSlot());
+        XMMRegister dst = g.ToDoubleRegister(destination);
+        MachineRepresentation rep =
+            LocationOperand::cast(source)->representation();
+        if (rep != MachineRepresentation::kSimd128) {
+          __ Movsd(dst, src);
+        } else {
+          __ Movups(dst, src);
+        }
+      }
+      return;
+    }
+    case MoveType::kStackToStack: {
+      Operand src = g.ToOperand(source);
+      Operand dst = g.ToOperand(destination);
+      if (source->IsStackSlot()) {
+        // Spill on demand to use a temporary register for memory-to-memory
+        // moves.
+        __ movq(kScratchRegister, src);
+        __ movq(dst, kScratchRegister);
+      } else {
+        MachineRepresentation rep =
+            LocationOperand::cast(source)->representation();
+        if (rep != MachineRepresentation::kSimd128) {
+          __ Movsd(kScratchDoubleReg, src);
+          __ Movsd(dst, kScratchDoubleReg);
+        } else {
+          DCHECK(source->IsSimd128StackSlot());
+          __ Movups(kScratchDoubleReg, src);
+          __ Movups(dst, kScratchDoubleReg);
+        }
+      }
+      return;
+    }
+    case MoveType::kConstantToRegister: {
+      Constant src = g.ToConstant(source);
+      if (destination->IsRegister()) {
+        MoveConstantToRegister(g.ToRegister(destination), src);
+      } else {
+        DCHECK(destination->IsFPRegister());
+        XMMRegister dst = g.ToDoubleRegister(destination);
+        if (src.type() == Constant::kFloat32) {
+          // TODO(turbofan): Can we do better here?
+          __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
+        } else {
+          DCHECK_EQ(src.type(), Constant::kFloat64);
+          __ Move(dst, src.ToFloat64().AsUint64());
+        }
+      }
+      return;
+    }
+    case MoveType::kConstantToStack: {
+      Constant src = g.ToConstant(source);
+      Operand dst = g.ToOperand(destination);
+      if (destination->IsStackSlot()) {
+        MoveConstantToSlot(dst, src);
+      } else {
+        DCHECK(destination->IsFPStackSlot());
+        if (src.type() == Constant::kFloat32) {
+          __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
+        } else {
+          DCHECK_EQ(src.type(), Constant::kFloat64);
+          __ movq(kScratchRegister, src.ToFloat64().AsUint64());
+          __ movq(dst, kScratchRegister);
+        }
+      }
+      return;
+    }
+  }
+  UNREACHABLE();
+}
+
+void CodeGenerator::AssembleSwap(InstructionOperand* source,
+                                 InstructionOperand* destination) {
+  if (FLAG_trace_turbo_stack_accesses) {
+    IncrementStackAccessCounter(source, destination);
+    IncrementStackAccessCounter(destination, source);
+  }
+
+  X64OperandConverter g(this, nullptr);
+  // Dispatch on the source and destination operand kinds.  Not all
+  // combinations are possible.
+  switch (MoveType::InferSwap(source, destination)) {
+    case MoveType::kRegisterToRegister: {
+      if (source->IsRegister()) {
+        Register src = g.ToRegister(source);
+        Register dst = g.ToRegister(destination);
+        __ movq(kScratchRegister, src);
+        __ movq(src, dst);
+        __ movq(dst, kScratchRegister);
+      } else {
+        DCHECK(source->IsFPRegister());
+        XMMRegister src = g.ToDoubleRegister(source);
+        XMMRegister dst = g.ToDoubleRegister(destination);
+        __ Movapd(kScratchDoubleReg, src);
+        __ Movapd(src, dst);
+        __ Movapd(dst, kScratchDoubleReg);
+      }
+      return;
+    }
+    case MoveType::kRegisterToStack: {
+      if (source->IsRegister()) {
+        Register src = g.ToRegister(source);
+        Operand dst = g.ToOperand(destination);
+        __ movq(kScratchRegister, src);
+        __ movq(src, dst);
+        __ movq(dst, kScratchRegister);
+      } else {
+        DCHECK(source->IsFPRegister());
+        XMMRegister src = g.ToDoubleRegister(source);
+        Operand dst = g.ToOperand(destination);
+        MachineRepresentation rep =
+            LocationOperand::cast(source)->representation();
+        if (rep != MachineRepresentation::kSimd128) {
+          __ Movsd(kScratchDoubleReg, src);
+          __ Movsd(src, dst);
+          __ Movsd(dst, kScratchDoubleReg);
+        } else {
+          __ Movups(kScratchDoubleReg, src);
+          __ Movups(src, dst);
+          __ Movups(dst, kScratchDoubleReg);
+        }
+      }
+      return;
+    }
+    case MoveType::kStackToStack: {
+      Operand src = g.ToOperand(source);
+      Operand dst = g.ToOperand(destination);
+      MachineRepresentation rep =
+          LocationOperand::cast(source)->representation();
+      if (rep != MachineRepresentation::kSimd128) {
+        Register tmp = kScratchRegister;
+        __ movq(tmp, dst);
+        __ pushq(src);  // Then use stack to copy src to destination.
+        unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                         kSystemPointerSize);
+        __ popq(dst);
+        unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                         -kSystemPointerSize);
+        __ movq(src, tmp);
+      } else {
+        // Without AVX, misaligned reads and writes will trap. Move using the
+        // stack, in two parts.
+        __ movups(kScratchDoubleReg, dst);  // Save dst in scratch register.
+        __ pushq(src);  // Then use stack to copy src to destination.
+        unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                         kSystemPointerSize);
+        __ popq(dst);
+        unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                         -kSystemPointerSize);
+        __ pushq(g.ToOperand(source, kSystemPointerSize));
+        unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                         kSystemPointerSize);
+        __ popq(g.ToOperand(destination, kSystemPointerSize));
+        unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
+                                                         -kSystemPointerSize);
+        __ movups(src, kScratchDoubleReg);
+      }
+      return;
+    }
+    default:
+      UNREACHABLE();
+  }
+}
+
+void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
+  for (size_t index = 0; index < target_count; ++index) {
+    __ dq(targets[index]);
+  }
+}
+
+#undef __
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/x64/instruction-codes-x64.h b/src/compiler/backend/x64/instruction-codes-x64.h
new file mode 100644
index 0000000..f1958e8
--- /dev/null
+++ b/src/compiler/backend/x64/instruction-codes-x64.h
@@ -0,0 +1,442 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_X64_INSTRUCTION_CODES_X64_H_
+#define V8_COMPILER_BACKEND_X64_INSTRUCTION_CODES_X64_H_
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+// X64-specific opcodes that specify which assembly sequence to emit.
+// Most opcodes specify a single instruction.
+#define TARGET_ARCH_OPCODE_LIST(V)        \
+  V(X64Add)                               \
+  V(X64Add32)                             \
+  V(X64And)                               \
+  V(X64And32)                             \
+  V(X64Cmp)                               \
+  V(X64Cmp32)                             \
+  V(X64Cmp16)                             \
+  V(X64Cmp8)                              \
+  V(X64Test)                              \
+  V(X64Test32)                            \
+  V(X64Test16)                            \
+  V(X64Test8)                             \
+  V(X64Or)                                \
+  V(X64Or32)                              \
+  V(X64Xor)                               \
+  V(X64Xor32)                             \
+  V(X64Sub)                               \
+  V(X64Sub32)                             \
+  V(X64Imul)                              \
+  V(X64Imul32)                            \
+  V(X64ImulHigh32)                        \
+  V(X64UmulHigh32)                        \
+  V(X64Idiv)                              \
+  V(X64Idiv32)                            \
+  V(X64Udiv)                              \
+  V(X64Udiv32)                            \
+  V(X64Not)                               \
+  V(X64Not32)                             \
+  V(X64Neg)                               \
+  V(X64Neg32)                             \
+  V(X64Shl)                               \
+  V(X64Shl32)                             \
+  V(X64Shr)                               \
+  V(X64Shr32)                             \
+  V(X64Sar)                               \
+  V(X64Sar32)                             \
+  V(X64Rol)                               \
+  V(X64Rol32)                             \
+  V(X64Ror)                               \
+  V(X64Ror32)                             \
+  V(X64Lzcnt)                             \
+  V(X64Lzcnt32)                           \
+  V(X64Tzcnt)                             \
+  V(X64Tzcnt32)                           \
+  V(X64Popcnt)                            \
+  V(X64Popcnt32)                          \
+  V(X64Bswap)                             \
+  V(X64Bswap32)                           \
+  V(X64MFence)                            \
+  V(X64LFence)                            \
+  V(SSEFloat32Cmp)                        \
+  V(SSEFloat32Add)                        \
+  V(SSEFloat32Sub)                        \
+  V(SSEFloat32Mul)                        \
+  V(SSEFloat32Div)                        \
+  V(SSEFloat32Abs)                        \
+  V(SSEFloat32Neg)                        \
+  V(SSEFloat32Sqrt)                       \
+  V(SSEFloat32ToFloat64)                  \
+  V(SSEFloat32ToInt32)                    \
+  V(SSEFloat32ToUint32)                   \
+  V(SSEFloat32Round)                      \
+  V(SSEFloat64Cmp)                        \
+  V(SSEFloat64Add)                        \
+  V(SSEFloat64Sub)                        \
+  V(SSEFloat64Mul)                        \
+  V(SSEFloat64Div)                        \
+  V(SSEFloat64Mod)                        \
+  V(SSEFloat64Abs)                        \
+  V(SSEFloat64Neg)                        \
+  V(SSEFloat64Sqrt)                       \
+  V(SSEFloat64Round)                      \
+  V(SSEFloat32Max)                        \
+  V(SSEFloat64Max)                        \
+  V(SSEFloat32Min)                        \
+  V(SSEFloat64Min)                        \
+  V(SSEFloat64ToFloat32)                  \
+  V(SSEFloat64ToInt32)                    \
+  V(SSEFloat64ToUint32)                   \
+  V(SSEFloat32ToInt64)                    \
+  V(SSEFloat64ToInt64)                    \
+  V(SSEFloat32ToUint64)                   \
+  V(SSEFloat64ToUint64)                   \
+  V(SSEInt32ToFloat64)                    \
+  V(SSEInt32ToFloat32)                    \
+  V(SSEInt64ToFloat32)                    \
+  V(SSEInt64ToFloat64)                    \
+  V(SSEUint64ToFloat32)                   \
+  V(SSEUint64ToFloat64)                   \
+  V(SSEUint32ToFloat64)                   \
+  V(SSEUint32ToFloat32)                   \
+  V(SSEFloat64ExtractLowWord32)           \
+  V(SSEFloat64ExtractHighWord32)          \
+  V(SSEFloat64InsertLowWord32)            \
+  V(SSEFloat64InsertHighWord32)           \
+  V(SSEFloat64LoadLowWord32)              \
+  V(SSEFloat64SilenceNaN)                 \
+  V(AVXFloat32Cmp)                        \
+  V(AVXFloat32Add)                        \
+  V(AVXFloat32Sub)                        \
+  V(AVXFloat32Mul)                        \
+  V(AVXFloat32Div)                        \
+  V(AVXFloat64Cmp)                        \
+  V(AVXFloat64Add)                        \
+  V(AVXFloat64Sub)                        \
+  V(AVXFloat64Mul)                        \
+  V(AVXFloat64Div)                        \
+  V(AVXFloat64Abs)                        \
+  V(AVXFloat64Neg)                        \
+  V(AVXFloat32Abs)                        \
+  V(AVXFloat32Neg)                        \
+  V(X64Movsxbl)                           \
+  V(X64Movzxbl)                           \
+  V(X64Movsxbq)                           \
+  V(X64Movzxbq)                           \
+  V(X64Movb)                              \
+  V(X64Movsxwl)                           \
+  V(X64Movzxwl)                           \
+  V(X64Movsxwq)                           \
+  V(X64Movzxwq)                           \
+  V(X64Movw)                              \
+  V(X64Movl)                              \
+  V(X64Movsxlq)                           \
+  V(X64MovqDecompressTaggedSigned)        \
+  V(X64MovqDecompressTaggedPointer)       \
+  V(X64MovqDecompressAnyTagged)           \
+  V(X64MovqCompressTagged)                \
+  V(X64Movq)                              \
+  V(X64Movsd)                             \
+  V(X64Movss)                             \
+  V(X64Movdqu)                            \
+  V(X64BitcastFI)                         \
+  V(X64BitcastDL)                         \
+  V(X64BitcastIF)                         \
+  V(X64BitcastLD)                         \
+  V(X64Lea32)                             \
+  V(X64Lea)                               \
+  V(X64Dec32)                             \
+  V(X64Inc32)                             \
+  V(X64Push)                              \
+  V(X64Poke)                              \
+  V(X64Peek)                              \
+  V(X64F64x2Splat)                        \
+  V(X64F64x2ExtractLane)                  \
+  V(X64F64x2Abs)                          \
+  V(X64F64x2Neg)                          \
+  V(X64F64x2Sqrt)                         \
+  V(X64F64x2Add)                          \
+  V(X64F64x2Sub)                          \
+  V(X64F64x2Mul)                          \
+  V(X64F64x2Div)                          \
+  V(X64F64x2Min)                          \
+  V(X64F64x2Max)                          \
+  V(X64F64x2Eq)                           \
+  V(X64F64x2Ne)                           \
+  V(X64F64x2Lt)                           \
+  V(X64F64x2Le)                           \
+  V(X64F64x2Qfma)                         \
+  V(X64F64x2Qfms)                         \
+  V(X64F64x2Pmin)                         \
+  V(X64F64x2Pmax)                         \
+  V(X64F64x2Round)                        \
+  V(X64F32x4Splat)                        \
+  V(X64F32x4ExtractLane)                  \
+  V(X64F32x4ReplaceLane)                  \
+  V(X64F32x4SConvertI32x4)                \
+  V(X64F32x4UConvertI32x4)                \
+  V(X64F32x4Abs)                          \
+  V(X64F32x4Neg)                          \
+  V(X64F32x4Sqrt)                         \
+  V(X64F32x4RecipApprox)                  \
+  V(X64F32x4RecipSqrtApprox)              \
+  V(X64F32x4Add)                          \
+  V(X64F32x4AddHoriz)                     \
+  V(X64F32x4Sub)                          \
+  V(X64F32x4Mul)                          \
+  V(X64F32x4Div)                          \
+  V(X64F32x4Min)                          \
+  V(X64F32x4Max)                          \
+  V(X64F32x4Eq)                           \
+  V(X64F32x4Ne)                           \
+  V(X64F32x4Lt)                           \
+  V(X64F32x4Le)                           \
+  V(X64F32x4Qfma)                         \
+  V(X64F32x4Qfms)                         \
+  V(X64F32x4Pmin)                         \
+  V(X64F32x4Pmax)                         \
+  V(X64F32x4Round)                        \
+  V(X64I64x2Splat)                        \
+  V(X64I64x2ExtractLane)                  \
+  V(X64I64x2Neg)                          \
+  V(X64I64x2BitMask)                      \
+  V(X64I64x2Shl)                          \
+  V(X64I64x2ShrS)                         \
+  V(X64I64x2Add)                          \
+  V(X64I64x2Sub)                          \
+  V(X64I64x2Mul)                          \
+  V(X64I64x2Eq)                           \
+  V(X64I64x2ShrU)                         \
+  V(X64I64x2SignSelect)                   \
+  V(X64I32x4Splat)                        \
+  V(X64I32x4ExtractLane)                  \
+  V(X64I32x4SConvertF32x4)                \
+  V(X64I32x4SConvertI16x8Low)             \
+  V(X64I32x4SConvertI16x8High)            \
+  V(X64I32x4Neg)                          \
+  V(X64I32x4Shl)                          \
+  V(X64I32x4ShrS)                         \
+  V(X64I32x4Add)                          \
+  V(X64I32x4AddHoriz)                     \
+  V(X64I32x4Sub)                          \
+  V(X64I32x4Mul)                          \
+  V(X64I32x4MinS)                         \
+  V(X64I32x4MaxS)                         \
+  V(X64I32x4Eq)                           \
+  V(X64I32x4Ne)                           \
+  V(X64I32x4GtS)                          \
+  V(X64I32x4GeS)                          \
+  V(X64I32x4UConvertF32x4)                \
+  V(X64I32x4UConvertI16x8Low)             \
+  V(X64I32x4UConvertI16x8High)            \
+  V(X64I32x4ShrU)                         \
+  V(X64I32x4MinU)                         \
+  V(X64I32x4MaxU)                         \
+  V(X64I32x4GtU)                          \
+  V(X64I32x4GeU)                          \
+  V(X64I32x4Abs)                          \
+  V(X64I32x4BitMask)                      \
+  V(X64I32x4DotI16x8S)                    \
+  V(X64I32x4SignSelect)                   \
+  V(X64I16x8Splat)                        \
+  V(X64I16x8ExtractLaneS)                 \
+  V(X64I16x8SConvertI8x16Low)             \
+  V(X64I16x8SConvertI8x16High)            \
+  V(X64I16x8Neg)                          \
+  V(X64I16x8Shl)                          \
+  V(X64I16x8ShrS)                         \
+  V(X64I16x8SConvertI32x4)                \
+  V(X64I16x8Add)                          \
+  V(X64I16x8AddSatS)                      \
+  V(X64I16x8AddHoriz)                     \
+  V(X64I16x8Sub)                          \
+  V(X64I16x8SubSatS)                      \
+  V(X64I16x8Mul)                          \
+  V(X64I16x8MinS)                         \
+  V(X64I16x8MaxS)                         \
+  V(X64I16x8Eq)                           \
+  V(X64I16x8Ne)                           \
+  V(X64I16x8GtS)                          \
+  V(X64I16x8GeS)                          \
+  V(X64I16x8UConvertI8x16Low)             \
+  V(X64I16x8UConvertI8x16High)            \
+  V(X64I16x8ShrU)                         \
+  V(X64I16x8UConvertI32x4)                \
+  V(X64I16x8AddSatU)                      \
+  V(X64I16x8SubSatU)                      \
+  V(X64I16x8MinU)                         \
+  V(X64I16x8MaxU)                         \
+  V(X64I16x8GtU)                          \
+  V(X64I16x8GeU)                          \
+  V(X64I16x8RoundingAverageU)             \
+  V(X64I16x8Abs)                          \
+  V(X64I16x8BitMask)                      \
+  V(X64I16x8SignSelect)                   \
+  V(X64I8x16Splat)                        \
+  V(X64I8x16ExtractLaneS)                 \
+  V(X64Pinsrb)                            \
+  V(X64Pinsrw)                            \
+  V(X64Pinsrd)                            \
+  V(X64Pinsrq)                            \
+  V(X64Pextrb)                            \
+  V(X64Pextrw)                            \
+  V(X64I8x16SConvertI16x8)                \
+  V(X64I8x16Neg)                          \
+  V(X64I8x16Shl)                          \
+  V(X64I8x16ShrS)                         \
+  V(X64I8x16Add)                          \
+  V(X64I8x16AddSatS)                      \
+  V(X64I8x16Sub)                          \
+  V(X64I8x16SubSatS)                      \
+  V(X64I8x16Mul)                          \
+  V(X64I8x16MinS)                         \
+  V(X64I8x16MaxS)                         \
+  V(X64I8x16Eq)                           \
+  V(X64I8x16Ne)                           \
+  V(X64I8x16GtS)                          \
+  V(X64I8x16GeS)                          \
+  V(X64I8x16UConvertI16x8)                \
+  V(X64I8x16AddSatU)                      \
+  V(X64I8x16SubSatU)                      \
+  V(X64I8x16ShrU)                         \
+  V(X64I8x16MinU)                         \
+  V(X64I8x16MaxU)                         \
+  V(X64I8x16GtU)                          \
+  V(X64I8x16GeU)                          \
+  V(X64I8x16RoundingAverageU)             \
+  V(X64I8x16Abs)                          \
+  V(X64I8x16BitMask)                      \
+  V(X64I8x16SignSelect)                   \
+  V(X64S128Const)                         \
+  V(X64S128Zero)                          \
+  V(X64S128AllOnes)                       \
+  V(X64S128Not)                           \
+  V(X64S128And)                           \
+  V(X64S128Or)                            \
+  V(X64S128Xor)                           \
+  V(X64S128Select)                        \
+  V(X64S128AndNot)                        \
+  V(X64I8x16Swizzle)                      \
+  V(X64I8x16Shuffle)                      \
+  V(X64S128Load8Splat)                    \
+  V(X64S128Load16Splat)                   \
+  V(X64S128Load32Splat)                   \
+  V(X64S128Load64Splat)                   \
+  V(X64S128Load8x8S)                      \
+  V(X64S128Load8x8U)                      \
+  V(X64S128Load16x4S)                     \
+  V(X64S128Load16x4U)                     \
+  V(X64S128Load32x2S)                     \
+  V(X64S128Load32x2U)                     \
+  V(X64S128Store32Lane)                   \
+  V(X64S128Store64Lane)                   \
+  V(X64S32x4Swizzle)                      \
+  V(X64S32x4Shuffle)                      \
+  V(X64S16x8Blend)                        \
+  V(X64S16x8HalfShuffle1)                 \
+  V(X64S16x8HalfShuffle2)                 \
+  V(X64S8x16Alignr)                       \
+  V(X64S16x8Dup)                          \
+  V(X64S8x16Dup)                          \
+  V(X64S16x8UnzipHigh)                    \
+  V(X64S16x8UnzipLow)                     \
+  V(X64S8x16UnzipHigh)                    \
+  V(X64S8x16UnzipLow)                     \
+  V(X64S64x2UnpackHigh)                   \
+  V(X64S32x4UnpackHigh)                   \
+  V(X64S16x8UnpackHigh)                   \
+  V(X64S8x16UnpackHigh)                   \
+  V(X64S64x2UnpackLow)                    \
+  V(X64S32x4UnpackLow)                    \
+  V(X64S16x8UnpackLow)                    \
+  V(X64S8x16UnpackLow)                    \
+  V(X64S8x16TransposeLow)                 \
+  V(X64S8x16TransposeHigh)                \
+  V(X64S8x8Reverse)                       \
+  V(X64S8x4Reverse)                       \
+  V(X64S8x2Reverse)                       \
+  V(X64V32x4AnyTrue)                      \
+  V(X64V32x4AllTrue)                      \
+  V(X64V16x8AnyTrue)                      \
+  V(X64V16x8AllTrue)                      \
+  V(X64V8x16AnyTrue)                      \
+  V(X64V8x16AllTrue)                      \
+  V(X64Word64AtomicLoadUint8)             \
+  V(X64Word64AtomicLoadUint16)            \
+  V(X64Word64AtomicLoadUint32)            \
+  V(X64Word64AtomicLoadUint64)            \
+  V(X64Word64AtomicStoreWord8)            \
+  V(X64Word64AtomicStoreWord16)           \
+  V(X64Word64AtomicStoreWord32)           \
+  V(X64Word64AtomicStoreWord64)           \
+  V(X64Word64AtomicAddUint8)              \
+  V(X64Word64AtomicAddUint16)             \
+  V(X64Word64AtomicAddUint32)             \
+  V(X64Word64AtomicAddUint64)             \
+  V(X64Word64AtomicSubUint8)              \
+  V(X64Word64AtomicSubUint16)             \
+  V(X64Word64AtomicSubUint32)             \
+  V(X64Word64AtomicSubUint64)             \
+  V(X64Word64AtomicAndUint8)              \
+  V(X64Word64AtomicAndUint16)             \
+  V(X64Word64AtomicAndUint32)             \
+  V(X64Word64AtomicAndUint64)             \
+  V(X64Word64AtomicOrUint8)               \
+  V(X64Word64AtomicOrUint16)              \
+  V(X64Word64AtomicOrUint32)              \
+  V(X64Word64AtomicOrUint64)              \
+  V(X64Word64AtomicXorUint8)              \
+  V(X64Word64AtomicXorUint16)             \
+  V(X64Word64AtomicXorUint32)             \
+  V(X64Word64AtomicXorUint64)             \
+  V(X64Word64AtomicExchangeUint8)         \
+  V(X64Word64AtomicExchangeUint16)        \
+  V(X64Word64AtomicExchangeUint32)        \
+  V(X64Word64AtomicExchangeUint64)        \
+  V(X64Word64AtomicCompareExchangeUint8)  \
+  V(X64Word64AtomicCompareExchangeUint16) \
+  V(X64Word64AtomicCompareExchangeUint32) \
+  V(X64Word64AtomicCompareExchangeUint64)
+
+// Addressing modes represent the "shape" of inputs to an instruction.
+// Many instructions support multiple addressing modes. Addressing modes
+// are encoded into the InstructionCode of the instruction and tell the
+// code generator after register allocation which assembler method to call.
+//
+// We use the following local notation for addressing modes:
+//
+// M = memory operand
+// R = base register
+// N = index register * N for N in {1, 2, 4, 8}
+// I = immediate displacement (32-bit signed integer)
+
+#define TARGET_ADDRESSING_MODE_LIST(V) \
+  V(MR)   /* [%r1            ] */      \
+  V(MRI)  /* [%r1         + K] */      \
+  V(MR1)  /* [%r1 + %r2*1    ] */      \
+  V(MR2)  /* [%r1 + %r2*2    ] */      \
+  V(MR4)  /* [%r1 + %r2*4    ] */      \
+  V(MR8)  /* [%r1 + %r2*8    ] */      \
+  V(MR1I) /* [%r1 + %r2*1 + K] */      \
+  V(MR2I) /* [%r1 + %r2*2 + K] */      \
+  V(MR4I) /* [%r1 + %r2*3 + K] */      \
+  V(MR8I) /* [%r1 + %r2*4 + K] */      \
+  V(M1)   /* [      %r2*1    ] */      \
+  V(M2)   /* [      %r2*2    ] */      \
+  V(M4)   /* [      %r2*4    ] */      \
+  V(M8)   /* [      %r2*8    ] */      \
+  V(M1I)  /* [      %r2*1 + K] */      \
+  V(M2I)  /* [      %r2*2 + K] */      \
+  V(M4I)  /* [      %r2*4 + K] */      \
+  V(M8I)  /* [      %r2*8 + K] */      \
+  V(Root) /* [%root       + K] */
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_X64_INSTRUCTION_CODES_X64_H_
diff --git a/src/compiler/backend/x64/instruction-scheduler-x64.cc b/src/compiler/backend/x64/instruction-scheduler-x64.cc
new file mode 100644
index 0000000..2af0877
--- /dev/null
+++ b/src/compiler/backend/x64/instruction-scheduler-x64.cc
@@ -0,0 +1,511 @@
+// Copyright 2015 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/instruction-scheduler.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+bool InstructionScheduler::SchedulerSupported() { return true; }
+
+int InstructionScheduler::GetTargetInstructionFlags(
+    const Instruction* instr) const {
+  switch (instr->arch_opcode()) {
+    case kX64Add:
+    case kX64Add32:
+    case kX64And:
+    case kX64And32:
+    case kX64Cmp:
+    case kX64Cmp32:
+    case kX64Cmp16:
+    case kX64Cmp8:
+    case kX64Test:
+    case kX64Test32:
+    case kX64Test16:
+    case kX64Test8:
+    case kX64Or:
+    case kX64Or32:
+    case kX64Xor:
+    case kX64Xor32:
+    case kX64Sub:
+    case kX64Sub32:
+    case kX64Imul:
+    case kX64Imul32:
+    case kX64ImulHigh32:
+    case kX64UmulHigh32:
+    case kX64Not:
+    case kX64Not32:
+    case kX64Neg:
+    case kX64Neg32:
+    case kX64Shl:
+    case kX64Shl32:
+    case kX64Shr:
+    case kX64Shr32:
+    case kX64Sar:
+    case kX64Sar32:
+    case kX64Rol:
+    case kX64Rol32:
+    case kX64Ror:
+    case kX64Ror32:
+    case kX64Lzcnt:
+    case kX64Lzcnt32:
+    case kX64Tzcnt:
+    case kX64Tzcnt32:
+    case kX64Popcnt:
+    case kX64Popcnt32:
+    case kX64Bswap:
+    case kX64Bswap32:
+    case kSSEFloat32Cmp:
+    case kSSEFloat32Add:
+    case kSSEFloat32Sub:
+    case kSSEFloat32Mul:
+    case kSSEFloat32Div:
+    case kSSEFloat32Abs:
+    case kSSEFloat32Neg:
+    case kSSEFloat32Sqrt:
+    case kSSEFloat32Round:
+    case kSSEFloat32ToFloat64:
+    case kSSEFloat64Cmp:
+    case kSSEFloat64Add:
+    case kSSEFloat64Sub:
+    case kSSEFloat64Mul:
+    case kSSEFloat64Div:
+    case kSSEFloat64Mod:
+    case kSSEFloat64Abs:
+    case kSSEFloat64Neg:
+    case kSSEFloat64Sqrt:
+    case kSSEFloat64Round:
+    case kSSEFloat32Max:
+    case kSSEFloat64Max:
+    case kSSEFloat32Min:
+    case kSSEFloat64Min:
+    case kSSEFloat64ToFloat32:
+    case kSSEFloat32ToInt32:
+    case kSSEFloat32ToUint32:
+    case kSSEFloat64ToInt32:
+    case kSSEFloat64ToUint32:
+    case kSSEFloat64ToInt64:
+    case kSSEFloat32ToInt64:
+    case kSSEFloat64ToUint64:
+    case kSSEFloat32ToUint64:
+    case kSSEInt32ToFloat64:
+    case kSSEInt32ToFloat32:
+    case kSSEInt64ToFloat32:
+    case kSSEInt64ToFloat64:
+    case kSSEUint64ToFloat32:
+    case kSSEUint64ToFloat64:
+    case kSSEUint32ToFloat64:
+    case kSSEUint32ToFloat32:
+    case kSSEFloat64ExtractLowWord32:
+    case kSSEFloat64ExtractHighWord32:
+    case kSSEFloat64InsertLowWord32:
+    case kSSEFloat64InsertHighWord32:
+    case kSSEFloat64LoadLowWord32:
+    case kSSEFloat64SilenceNaN:
+    case kAVXFloat32Cmp:
+    case kAVXFloat32Add:
+    case kAVXFloat32Sub:
+    case kAVXFloat32Mul:
+    case kAVXFloat32Div:
+    case kAVXFloat64Cmp:
+    case kAVXFloat64Add:
+    case kAVXFloat64Sub:
+    case kAVXFloat64Mul:
+    case kAVXFloat64Div:
+    case kAVXFloat64Abs:
+    case kAVXFloat64Neg:
+    case kAVXFloat32Abs:
+    case kAVXFloat32Neg:
+    case kX64BitcastFI:
+    case kX64BitcastDL:
+    case kX64BitcastIF:
+    case kX64BitcastLD:
+    case kX64Lea32:
+    case kX64Lea:
+    case kX64Dec32:
+    case kX64Inc32:
+    case kX64Pinsrb:
+    case kX64Pinsrw:
+    case kX64Pinsrd:
+    case kX64Pinsrq:
+    case kX64F64x2Splat:
+    case kX64F64x2ExtractLane:
+    case kX64F64x2Abs:
+    case kX64F64x2Neg:
+    case kX64F64x2Sqrt:
+    case kX64F64x2Add:
+    case kX64F64x2Sub:
+    case kX64F64x2Mul:
+    case kX64F64x2Div:
+    case kX64F64x2Min:
+    case kX64F64x2Max:
+    case kX64F64x2Eq:
+    case kX64F64x2Ne:
+    case kX64F64x2Lt:
+    case kX64F64x2Le:
+    case kX64F64x2Qfma:
+    case kX64F64x2Qfms:
+    case kX64F64x2Pmin:
+    case kX64F64x2Pmax:
+    case kX64F64x2Round:
+    case kX64F32x4Splat:
+    case kX64F32x4ExtractLane:
+    case kX64F32x4ReplaceLane:
+    case kX64F32x4SConvertI32x4:
+    case kX64F32x4UConvertI32x4:
+    case kX64F32x4RecipApprox:
+    case kX64F32x4RecipSqrtApprox:
+    case kX64F32x4Abs:
+    case kX64F32x4Neg:
+    case kX64F32x4Sqrt:
+    case kX64F32x4Add:
+    case kX64F32x4AddHoriz:
+    case kX64F32x4Sub:
+    case kX64F32x4Mul:
+    case kX64F32x4Div:
+    case kX64F32x4Min:
+    case kX64F32x4Max:
+    case kX64F32x4Eq:
+    case kX64F32x4Ne:
+    case kX64F32x4Lt:
+    case kX64F32x4Le:
+    case kX64F32x4Qfma:
+    case kX64F32x4Qfms:
+    case kX64F32x4Pmin:
+    case kX64F32x4Pmax:
+    case kX64F32x4Round:
+    case kX64I64x2Splat:
+    case kX64I64x2ExtractLane:
+    case kX64I64x2Neg:
+    case kX64I64x2BitMask:
+    case kX64I64x2Shl:
+    case kX64I64x2ShrS:
+    case kX64I64x2Add:
+    case kX64I64x2Sub:
+    case kX64I64x2Mul:
+    case kX64I64x2Eq:
+    case kX64I64x2ShrU:
+    case kX64I64x2SignSelect:
+    case kX64I32x4Splat:
+    case kX64I32x4ExtractLane:
+    case kX64I32x4SConvertF32x4:
+    case kX64I32x4SConvertI16x8Low:
+    case kX64I32x4SConvertI16x8High:
+    case kX64I32x4Neg:
+    case kX64I32x4Shl:
+    case kX64I32x4ShrS:
+    case kX64I32x4Add:
+    case kX64I32x4AddHoriz:
+    case kX64I32x4Sub:
+    case kX64I32x4Mul:
+    case kX64I32x4MinS:
+    case kX64I32x4MaxS:
+    case kX64I32x4Eq:
+    case kX64I32x4Ne:
+    case kX64I32x4GtS:
+    case kX64I32x4GeS:
+    case kX64I32x4UConvertF32x4:
+    case kX64I32x4UConvertI16x8Low:
+    case kX64I32x4UConvertI16x8High:
+    case kX64I32x4ShrU:
+    case kX64I32x4MinU:
+    case kX64I32x4MaxU:
+    case kX64I32x4GtU:
+    case kX64I32x4GeU:
+    case kX64I32x4Abs:
+    case kX64I32x4BitMask:
+    case kX64I32x4DotI16x8S:
+    case kX64I32x4SignSelect:
+    case kX64I16x8Splat:
+    case kX64I16x8ExtractLaneS:
+    case kX64I16x8SConvertI8x16Low:
+    case kX64I16x8SConvertI8x16High:
+    case kX64I16x8Neg:
+    case kX64I16x8Shl:
+    case kX64I16x8ShrS:
+    case kX64I16x8SConvertI32x4:
+    case kX64I16x8Add:
+    case kX64I16x8AddSatS:
+    case kX64I16x8AddHoriz:
+    case kX64I16x8Sub:
+    case kX64I16x8SubSatS:
+    case kX64I16x8Mul:
+    case kX64I16x8MinS:
+    case kX64I16x8MaxS:
+    case kX64I16x8Eq:
+    case kX64I16x8Ne:
+    case kX64I16x8GtS:
+    case kX64I16x8GeS:
+    case kX64I16x8UConvertI8x16Low:
+    case kX64I16x8UConvertI8x16High:
+    case kX64I16x8UConvertI32x4:
+    case kX64I16x8ShrU:
+    case kX64I16x8AddSatU:
+    case kX64I16x8SubSatU:
+    case kX64I16x8MinU:
+    case kX64I16x8MaxU:
+    case kX64I16x8GtU:
+    case kX64I16x8GeU:
+    case kX64I16x8RoundingAverageU:
+    case kX64I16x8Abs:
+    case kX64I16x8BitMask:
+    case kX64I16x8SignSelect:
+    case kX64I8x16Splat:
+    case kX64I8x16ExtractLaneS:
+    case kX64I8x16SConvertI16x8:
+    case kX64I8x16Neg:
+    case kX64I8x16Shl:
+    case kX64I8x16ShrS:
+    case kX64I8x16Add:
+    case kX64I8x16AddSatS:
+    case kX64I8x16Sub:
+    case kX64I8x16SubSatS:
+    case kX64I8x16Mul:
+    case kX64I8x16MinS:
+    case kX64I8x16MaxS:
+    case kX64I8x16Eq:
+    case kX64I8x16Ne:
+    case kX64I8x16GtS:
+    case kX64I8x16GeS:
+    case kX64I8x16UConvertI16x8:
+    case kX64I8x16AddSatU:
+    case kX64I8x16SubSatU:
+    case kX64I8x16ShrU:
+    case kX64I8x16MinU:
+    case kX64I8x16MaxU:
+    case kX64I8x16GtU:
+    case kX64I8x16GeU:
+    case kX64I8x16RoundingAverageU:
+    case kX64I8x16Abs:
+    case kX64I8x16BitMask:
+    case kX64I8x16SignSelect:
+    case kX64S128And:
+    case kX64S128Or:
+    case kX64S128Xor:
+    case kX64S128Not:
+    case kX64S128Select:
+    case kX64S128Const:
+    case kX64S128Zero:
+    case kX64S128AllOnes:
+    case kX64S128AndNot:
+    case kX64V32x4AnyTrue:
+    case kX64V32x4AllTrue:
+    case kX64V16x8AnyTrue:
+    case kX64V16x8AllTrue:
+    case kX64I8x16Swizzle:
+    case kX64I8x16Shuffle:
+    case kX64S32x4Swizzle:
+    case kX64S32x4Shuffle:
+    case kX64S16x8Blend:
+    case kX64S16x8HalfShuffle1:
+    case kX64S16x8HalfShuffle2:
+    case kX64S8x16Alignr:
+    case kX64S16x8Dup:
+    case kX64S8x16Dup:
+    case kX64S16x8UnzipHigh:
+    case kX64S16x8UnzipLow:
+    case kX64S8x16UnzipHigh:
+    case kX64S8x16UnzipLow:
+    case kX64S64x2UnpackHigh:
+    case kX64S32x4UnpackHigh:
+    case kX64S16x8UnpackHigh:
+    case kX64S8x16UnpackHigh:
+    case kX64S64x2UnpackLow:
+    case kX64S32x4UnpackLow:
+    case kX64S16x8UnpackLow:
+    case kX64S8x16UnpackLow:
+    case kX64S8x16TransposeLow:
+    case kX64S8x16TransposeHigh:
+    case kX64S8x8Reverse:
+    case kX64S8x4Reverse:
+    case kX64S8x2Reverse:
+    case kX64V8x16AnyTrue:
+    case kX64V8x16AllTrue:
+      return (instr->addressing_mode() == kMode_None)
+                 ? kNoOpcodeFlags
+                 : kIsLoadOperation | kHasSideEffect;
+
+    case kX64Idiv:
+    case kX64Idiv32:
+    case kX64Udiv:
+    case kX64Udiv32:
+      return (instr->addressing_mode() == kMode_None)
+                 ? kMayNeedDeoptOrTrapCheck
+                 : kMayNeedDeoptOrTrapCheck | kIsLoadOperation | kHasSideEffect;
+
+    case kX64Movsxbl:
+    case kX64Movzxbl:
+    case kX64Movsxbq:
+    case kX64Movzxbq:
+    case kX64Movsxwl:
+    case kX64Movzxwl:
+    case kX64Movsxwq:
+    case kX64Movzxwq:
+    case kX64Movsxlq:
+      DCHECK_LE(1, instr->InputCount());
+      return instr->InputAt(0)->IsRegister() ? kNoOpcodeFlags
+                                             : kIsLoadOperation;
+
+    case kX64Movb:
+    case kX64Movw:
+    case kX64S128Store32Lane:
+    case kX64S128Store64Lane:
+      return kHasSideEffect;
+
+    case kX64Pextrb:
+    case kX64Pextrw:
+    case kX64Movl:
+      if (instr->HasOutput()) {
+        DCHECK_LE(1, instr->InputCount());
+        return instr->InputAt(0)->IsRegister() ? kNoOpcodeFlags
+                                               : kIsLoadOperation;
+      } else {
+        return kHasSideEffect;
+      }
+
+    case kX64MovqDecompressTaggedSigned:
+    case kX64MovqDecompressTaggedPointer:
+    case kX64MovqDecompressAnyTagged:
+    case kX64MovqCompressTagged:
+    case kX64Movq:
+    case kX64Movsd:
+    case kX64Movss:
+    case kX64Movdqu:
+    case kX64S128Load8Splat:
+    case kX64S128Load16Splat:
+    case kX64S128Load32Splat:
+    case kX64S128Load64Splat:
+    case kX64S128Load8x8S:
+    case kX64S128Load8x8U:
+    case kX64S128Load16x4S:
+    case kX64S128Load16x4U:
+    case kX64S128Load32x2S:
+    case kX64S128Load32x2U:
+      return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
+
+    case kX64Peek:
+      return kIsLoadOperation;
+
+    case kX64Push:
+    case kX64Poke:
+      return kHasSideEffect;
+
+    case kX64MFence:
+    case kX64LFence:
+      return kHasSideEffect;
+
+    case kX64Word64AtomicLoadUint8:
+    case kX64Word64AtomicLoadUint16:
+    case kX64Word64AtomicLoadUint32:
+    case kX64Word64AtomicLoadUint64:
+      return kIsLoadOperation;
+
+    case kX64Word64AtomicStoreWord8:
+    case kX64Word64AtomicStoreWord16:
+    case kX64Word64AtomicStoreWord32:
+    case kX64Word64AtomicStoreWord64:
+    case kX64Word64AtomicAddUint8:
+    case kX64Word64AtomicAddUint16:
+    case kX64Word64AtomicAddUint32:
+    case kX64Word64AtomicAddUint64:
+    case kX64Word64AtomicSubUint8:
+    case kX64Word64AtomicSubUint16:
+    case kX64Word64AtomicSubUint32:
+    case kX64Word64AtomicSubUint64:
+    case kX64Word64AtomicAndUint8:
+    case kX64Word64AtomicAndUint16:
+    case kX64Word64AtomicAndUint32:
+    case kX64Word64AtomicAndUint64:
+    case kX64Word64AtomicOrUint8:
+    case kX64Word64AtomicOrUint16:
+    case kX64Word64AtomicOrUint32:
+    case kX64Word64AtomicOrUint64:
+    case kX64Word64AtomicXorUint8:
+    case kX64Word64AtomicXorUint16:
+    case kX64Word64AtomicXorUint32:
+    case kX64Word64AtomicXorUint64:
+    case kX64Word64AtomicExchangeUint8:
+    case kX64Word64AtomicExchangeUint16:
+    case kX64Word64AtomicExchangeUint32:
+    case kX64Word64AtomicExchangeUint64:
+    case kX64Word64AtomicCompareExchangeUint8:
+    case kX64Word64AtomicCompareExchangeUint16:
+    case kX64Word64AtomicCompareExchangeUint32:
+    case kX64Word64AtomicCompareExchangeUint64:
+      return kHasSideEffect;
+
+#define CASE(Name) case k##Name:
+      COMMON_ARCH_OPCODE_LIST(CASE)
+#undef CASE
+      // Already covered in architecture independent code.
+      UNREACHABLE();
+  }
+
+  UNREACHABLE();
+}
+
+int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
+  // Basic latency modeling for x64 instructions. They have been determined
+  // in an empirical way.
+  switch (instr->arch_opcode()) {
+    case kSSEFloat64Mul:
+      return 5;
+    case kX64Imul:
+    case kX64Imul32:
+    case kX64ImulHigh32:
+    case kX64UmulHigh32:
+    case kSSEFloat32Cmp:
+    case kSSEFloat32Add:
+    case kSSEFloat32Sub:
+    case kSSEFloat32Abs:
+    case kSSEFloat32Neg:
+    case kSSEFloat64Cmp:
+    case kSSEFloat64Add:
+    case kSSEFloat64Sub:
+    case kSSEFloat64Max:
+    case kSSEFloat64Min:
+    case kSSEFloat64Abs:
+    case kSSEFloat64Neg:
+      return 3;
+    case kSSEFloat32Mul:
+    case kSSEFloat32ToFloat64:
+    case kSSEFloat64ToFloat32:
+    case kSSEFloat32Round:
+    case kSSEFloat64Round:
+    case kSSEFloat32ToInt32:
+    case kSSEFloat32ToUint32:
+    case kSSEFloat64ToInt32:
+    case kSSEFloat64ToUint32:
+      return 4;
+    case kX64Idiv:
+      return 49;
+    case kX64Idiv32:
+      return 35;
+    case kX64Udiv:
+      return 38;
+    case kX64Udiv32:
+      return 26;
+    case kSSEFloat32Div:
+    case kSSEFloat64Div:
+    case kSSEFloat32Sqrt:
+    case kSSEFloat64Sqrt:
+      return 13;
+    case kSSEFloat32ToInt64:
+    case kSSEFloat64ToInt64:
+    case kSSEFloat32ToUint64:
+    case kSSEFloat64ToUint64:
+      return 10;
+    case kSSEFloat64Mod:
+      return 50;
+    case kArchTruncateDoubleToI:
+      return 6;
+    default:
+      return 1;
+  }
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/x64/instruction-selector-x64.cc b/src/compiler/backend/x64/instruction-selector-x64.cc
new file mode 100644
index 0000000..7a8a2b4
--- /dev/null
+++ b/src/compiler/backend/x64/instruction-selector-x64.cc
@@ -0,0 +1,3642 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <algorithm>
+
+#include "src/base/iterator.h"
+#include "src/base/logging.h"
+#include "src/base/overflowing-math.h"
+#include "src/codegen/machine-type.h"
+#include "src/compiler/backend/instruction-selector-impl.h"
+#include "src/compiler/machine-operator.h"
+#include "src/compiler/node-matchers.h"
+#include "src/compiler/node-properties.h"
+#include "src/roots/roots-inl.h"
+#include "src/wasm/simd-shuffle.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+// Adds X64-specific methods for generating operands.
+class X64OperandGenerator final : public OperandGenerator {
+ public:
+  explicit X64OperandGenerator(InstructionSelector* selector)
+      : OperandGenerator(selector) {}
+
+  bool CanBeImmediate(Node* node) {
+    switch (node->opcode()) {
+      case IrOpcode::kInt32Constant:
+      case IrOpcode::kRelocatableInt32Constant: {
+        const int32_t value = OpParameter<int32_t>(node->op());
+        // int32_t min will overflow if displacement mode is
+        // kNegativeDisplacement.
+        return value != std::numeric_limits<int32_t>::min();
+      }
+      case IrOpcode::kInt64Constant: {
+        const int64_t value = OpParameter<int64_t>(node->op());
+        return std::numeric_limits<int32_t>::min() < value &&
+               value <= std::numeric_limits<int32_t>::max();
+      }
+      case IrOpcode::kNumberConstant: {
+        const double value = OpParameter<double>(node->op());
+        return bit_cast<int64_t>(value) == 0;
+      }
+      default:
+        return false;
+    }
+  }
+
+  int32_t GetImmediateIntegerValue(Node* node) {
+    DCHECK(CanBeImmediate(node));
+    if (node->opcode() == IrOpcode::kInt32Constant) {
+      return OpParameter<int32_t>(node->op());
+    }
+    DCHECK_EQ(IrOpcode::kInt64Constant, node->opcode());
+    return static_cast<int32_t>(OpParameter<int64_t>(node->op()));
+  }
+
+  bool CanBeMemoryOperand(InstructionCode opcode, Node* node, Node* input,
+                          int effect_level) {
+    if (input->opcode() != IrOpcode::kLoad ||
+        !selector()->CanCover(node, input)) {
+      return false;
+    }
+    if (effect_level != selector()->GetEffectLevel(input)) {
+      return false;
+    }
+    MachineRepresentation rep =
+        LoadRepresentationOf(input->op()).representation();
+    switch (opcode) {
+      case kX64And:
+      case kX64Or:
+      case kX64Xor:
+      case kX64Add:
+      case kX64Sub:
+      case kX64Push:
+      case kX64Cmp:
+      case kX64Test:
+        // When pointer compression is enabled 64-bit memory operands can't be
+        // used for tagged values.
+        return rep == MachineRepresentation::kWord64 ||
+               (!COMPRESS_POINTERS_BOOL && IsAnyTagged(rep));
+      case kX64And32:
+      case kX64Or32:
+      case kX64Xor32:
+      case kX64Add32:
+      case kX64Sub32:
+      case kX64Cmp32:
+      case kX64Test32:
+        // When pointer compression is enabled 32-bit memory operands can be
+        // used for tagged values.
+        return rep == MachineRepresentation::kWord32 ||
+               (COMPRESS_POINTERS_BOOL &&
+                (IsAnyTagged(rep) || IsAnyCompressed(rep)));
+      case kAVXFloat64Add:
+      case kAVXFloat64Sub:
+      case kAVXFloat64Mul:
+        DCHECK_EQ(MachineRepresentation::kFloat64, rep);
+        return true;
+      case kAVXFloat32Add:
+      case kAVXFloat32Sub:
+      case kAVXFloat32Mul:
+        DCHECK_EQ(MachineRepresentation::kFloat32, rep);
+        return true;
+      case kX64Cmp16:
+      case kX64Test16:
+        return rep == MachineRepresentation::kWord16;
+      case kX64Cmp8:
+      case kX64Test8:
+        return rep == MachineRepresentation::kWord8;
+      default:
+        break;
+    }
+    return false;
+  }
+
+  AddressingMode GenerateMemoryOperandInputs(Node* index, int scale_exponent,
+                                             Node* base, Node* displacement,
+                                             DisplacementMode displacement_mode,
+                                             InstructionOperand inputs[],
+                                             size_t* input_count) {
+    AddressingMode mode = kMode_MRI;
+    if (base != nullptr && (index != nullptr || displacement != nullptr)) {
+      if (base->opcode() == IrOpcode::kInt32Constant &&
+          OpParameter<int32_t>(base->op()) == 0) {
+        base = nullptr;
+      } else if (base->opcode() == IrOpcode::kInt64Constant &&
+                 OpParameter<int64_t>(base->op()) == 0) {
+        base = nullptr;
+      }
+    }
+    if (base != nullptr) {
+      inputs[(*input_count)++] = UseRegister(base);
+      if (index != nullptr) {
+        DCHECK(scale_exponent >= 0 && scale_exponent <= 3);
+        inputs[(*input_count)++] = UseRegister(index);
+        if (displacement != nullptr) {
+          inputs[(*input_count)++] = displacement_mode == kNegativeDisplacement
+                                         ? UseNegatedImmediate(displacement)
+                                         : UseImmediate(displacement);
+          static const AddressingMode kMRnI_modes[] = {kMode_MR1I, kMode_MR2I,
+                                                       kMode_MR4I, kMode_MR8I};
+          mode = kMRnI_modes[scale_exponent];
+        } else {
+          static const AddressingMode kMRn_modes[] = {kMode_MR1, kMode_MR2,
+                                                      kMode_MR4, kMode_MR8};
+          mode = kMRn_modes[scale_exponent];
+        }
+      } else {
+        if (displacement == nullptr) {
+          mode = kMode_MR;
+        } else {
+          inputs[(*input_count)++] = displacement_mode == kNegativeDisplacement
+                                         ? UseNegatedImmediate(displacement)
+                                         : UseImmediate(displacement);
+          mode = kMode_MRI;
+        }
+      }
+    } else {
+      DCHECK(scale_exponent >= 0 && scale_exponent <= 3);
+      if (displacement != nullptr) {
+        if (index == nullptr) {
+          inputs[(*input_count)++] = UseRegister(displacement);
+          mode = kMode_MR;
+        } else {
+          inputs[(*input_count)++] = UseRegister(index);
+          inputs[(*input_count)++] = displacement_mode == kNegativeDisplacement
+                                         ? UseNegatedImmediate(displacement)
+                                         : UseImmediate(displacement);
+          static const AddressingMode kMnI_modes[] = {kMode_MRI, kMode_M2I,
+                                                      kMode_M4I, kMode_M8I};
+          mode = kMnI_modes[scale_exponent];
+        }
+      } else {
+        inputs[(*input_count)++] = UseRegister(index);
+        static const AddressingMode kMn_modes[] = {kMode_MR, kMode_MR1,
+                                                   kMode_M4, kMode_M8};
+        mode = kMn_modes[scale_exponent];
+        if (mode == kMode_MR1) {
+          // [%r1 + %r1*1] has a smaller encoding than [%r1*2+0]
+          inputs[(*input_count)++] = UseRegister(index);
+        }
+      }
+    }
+    return mode;
+  }
+
+  AddressingMode GetEffectiveAddressMemoryOperand(Node* operand,
+                                                  InstructionOperand inputs[],
+                                                  size_t* input_count) {
+    {
+      LoadMatcher<ExternalReferenceMatcher> m(operand);
+      if (m.index().HasResolvedValue() && m.object().HasResolvedValue() &&
+          selector()->CanAddressRelativeToRootsRegister(
+              m.object().ResolvedValue())) {
+        ptrdiff_t const delta =
+            m.index().ResolvedValue() +
+            TurboAssemblerBase::RootRegisterOffsetForExternalReference(
+                selector()->isolate(), m.object().ResolvedValue());
+        if (is_int32(delta)) {
+          inputs[(*input_count)++] = TempImmediate(static_cast<int32_t>(delta));
+          return kMode_Root;
+        }
+      }
+    }
+    BaseWithIndexAndDisplacement64Matcher m(operand, AddressOption::kAllowAll);
+    DCHECK(m.matches());
+    if (m.displacement() == nullptr || CanBeImmediate(m.displacement())) {
+      return GenerateMemoryOperandInputs(
+          m.index(), m.scale(), m.base(), m.displacement(),
+          m.displacement_mode(), inputs, input_count);
+    } else if (m.base() == nullptr &&
+               m.displacement_mode() == kPositiveDisplacement) {
+      // The displacement cannot be an immediate, but we can use the
+      // displacement as base instead and still benefit from addressing
+      // modes for the scale.
+      return GenerateMemoryOperandInputs(m.index(), m.scale(), m.displacement(),
+                                         nullptr, m.displacement_mode(), inputs,
+                                         input_count);
+    } else {
+      inputs[(*input_count)++] = UseRegister(operand->InputAt(0));
+      inputs[(*input_count)++] = UseRegister(operand->InputAt(1));
+      return kMode_MR1;
+    }
+  }
+
+  InstructionOperand GetEffectiveIndexOperand(Node* index,
+                                              AddressingMode* mode) {
+    if (CanBeImmediate(index)) {
+      *mode = kMode_MRI;
+      return UseImmediate(index);
+    } else {
+      *mode = kMode_MR1;
+      return UseUniqueRegister(index);
+    }
+  }
+
+  bool CanBeBetterLeftOperand(Node* node) const {
+    return !selector()->IsLive(node);
+  }
+};
+
+namespace {
+ArchOpcode GetLoadOpcode(LoadRepresentation load_rep) {
+  ArchOpcode opcode;
+  switch (load_rep.representation()) {
+    case MachineRepresentation::kFloat32:
+      opcode = kX64Movss;
+      break;
+    case MachineRepresentation::kFloat64:
+      opcode = kX64Movsd;
+      break;
+    case MachineRepresentation::kBit:  // Fall through.
+    case MachineRepresentation::kWord8:
+      opcode = load_rep.IsSigned() ? kX64Movsxbl : kX64Movzxbl;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = load_rep.IsSigned() ? kX64Movsxwl : kX64Movzxwl;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kX64Movl;
+      break;
+    case MachineRepresentation::kCompressedPointer:  // Fall through.
+    case MachineRepresentation::kCompressed:
+#ifdef V8_COMPRESS_POINTERS
+      opcode = kX64Movl;
+      break;
+#else
+      UNREACHABLE();
+#endif
+#ifdef V8_COMPRESS_POINTERS
+    case MachineRepresentation::kTaggedSigned:
+      opcode = kX64MovqDecompressTaggedSigned;
+      break;
+    case MachineRepresentation::kTaggedPointer:
+      opcode = kX64MovqDecompressTaggedPointer;
+      break;
+    case MachineRepresentation::kTagged:
+      opcode = kX64MovqDecompressAnyTagged;
+      break;
+#else
+    case MachineRepresentation::kTaggedSigned:   // Fall through.
+    case MachineRepresentation::kTaggedPointer:  // Fall through.
+    case MachineRepresentation::kTagged:         // Fall through.
+#endif
+    case MachineRepresentation::kWord64:
+      opcode = kX64Movq;
+      break;
+    case MachineRepresentation::kSimd128:  // Fall through.
+      opcode = kX64Movdqu;
+      break;
+    case MachineRepresentation::kNone:
+      UNREACHABLE();
+  }
+  return opcode;
+}
+
+ArchOpcode GetStoreOpcode(StoreRepresentation store_rep) {
+  switch (store_rep.representation()) {
+    case MachineRepresentation::kFloat32:
+      return kX64Movss;
+    case MachineRepresentation::kFloat64:
+      return kX64Movsd;
+    case MachineRepresentation::kBit:  // Fall through.
+    case MachineRepresentation::kWord8:
+      return kX64Movb;
+    case MachineRepresentation::kWord16:
+      return kX64Movw;
+    case MachineRepresentation::kWord32:
+      return kX64Movl;
+    case MachineRepresentation::kCompressedPointer:  // Fall through.
+    case MachineRepresentation::kCompressed:
+#ifdef V8_COMPRESS_POINTERS
+      return kX64MovqCompressTagged;
+#else
+      UNREACHABLE();
+#endif
+    case MachineRepresentation::kTaggedSigned:   // Fall through.
+    case MachineRepresentation::kTaggedPointer:  // Fall through.
+    case MachineRepresentation::kTagged:
+      return kX64MovqCompressTagged;
+    case MachineRepresentation::kWord64:
+      return kX64Movq;
+    case MachineRepresentation::kSimd128:  // Fall through.
+      return kX64Movdqu;
+    case MachineRepresentation::kNone:
+      UNREACHABLE();
+  }
+  UNREACHABLE();
+}
+
+}  // namespace
+
+void InstructionSelector::VisitStackSlot(Node* node) {
+  StackSlotRepresentation rep = StackSlotRepresentationOf(node->op());
+  int slot = frame_->AllocateSpillSlot(rep.size());
+  OperandGenerator g(this);
+
+  Emit(kArchStackSlot, g.DefineAsRegister(node),
+       sequence()->AddImmediate(Constant(slot)), 0, nullptr);
+}
+
+void InstructionSelector::VisitAbortCSAAssert(Node* node) {
+  X64OperandGenerator g(this);
+  Emit(kArchAbortCSAAssert, g.NoOutput(), g.UseFixed(node->InputAt(0), rdx));
+}
+
+void InstructionSelector::VisitLoadLane(Node* node) {
+  LoadLaneParameters params = LoadLaneParametersOf(node->op());
+  InstructionCode opcode = kArchNop;
+  if (params.rep == MachineType::Int8()) {
+    opcode = kX64Pinsrb;
+  } else if (params.rep == MachineType::Int16()) {
+    opcode = kX64Pinsrw;
+  } else if (params.rep == MachineType::Int32()) {
+    opcode = kX64Pinsrd;
+  } else if (params.rep == MachineType::Int64()) {
+    opcode = kX64Pinsrq;
+  } else {
+    UNREACHABLE();
+  }
+
+  X64OperandGenerator g(this);
+  InstructionOperand outputs[] = {g.DefineAsRegister(node)};
+  // Input 0 is value node, 1 is lane idx, and GetEffectiveAddressMemoryOperand
+  // uses up to 3 inputs. This ordering is consistent with other operations that
+  // use the same opcode.
+  InstructionOperand inputs[5];
+  size_t input_count = 0;
+
+  inputs[input_count++] = g.UseRegister(node->InputAt(2));
+  inputs[input_count++] = g.UseImmediate(params.laneidx);
+
+  AddressingMode mode =
+      g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
+  opcode |= AddressingModeField::encode(mode);
+
+  DCHECK_GE(5, input_count);
+
+  // x64 supports unaligned loads.
+  DCHECK_NE(params.kind, MemoryAccessKind::kUnaligned);
+  if (params.kind == MemoryAccessKind::kProtected) {
+    opcode |= MiscField::encode(kMemoryAccessProtected);
+  }
+  Emit(opcode, 1, outputs, input_count, inputs);
+}
+
+void InstructionSelector::VisitLoadTransform(Node* node) {
+  LoadTransformParameters params = LoadTransformParametersOf(node->op());
+  ArchOpcode opcode;
+  switch (params.transformation) {
+    case LoadTransformation::kS128Load8Splat:
+      opcode = kX64S128Load8Splat;
+      break;
+    case LoadTransformation::kS128Load16Splat:
+      opcode = kX64S128Load16Splat;
+      break;
+    case LoadTransformation::kS128Load32Splat:
+      opcode = kX64S128Load32Splat;
+      break;
+    case LoadTransformation::kS128Load64Splat:
+      opcode = kX64S128Load64Splat;
+      break;
+    case LoadTransformation::kS128Load8x8S:
+      opcode = kX64S128Load8x8S;
+      break;
+    case LoadTransformation::kS128Load8x8U:
+      opcode = kX64S128Load8x8U;
+      break;
+    case LoadTransformation::kS128Load16x4S:
+      opcode = kX64S128Load16x4S;
+      break;
+    case LoadTransformation::kS128Load16x4U:
+      opcode = kX64S128Load16x4U;
+      break;
+    case LoadTransformation::kS128Load32x2S:
+      opcode = kX64S128Load32x2S;
+      break;
+    case LoadTransformation::kS128Load32x2U:
+      opcode = kX64S128Load32x2U;
+      break;
+    case LoadTransformation::kS128Load32Zero:
+      opcode = kX64Movss;
+      break;
+    case LoadTransformation::kS128Load64Zero:
+      opcode = kX64Movsd;
+      break;
+    default:
+      UNREACHABLE();
+  }
+  // x64 supports unaligned loads
+  DCHECK_NE(params.kind, MemoryAccessKind::kUnaligned);
+  InstructionCode code = opcode;
+  if (params.kind == MemoryAccessKind::kProtected) {
+    code |= MiscField::encode(kMemoryAccessProtected);
+  }
+  VisitLoad(node, node, code);
+}
+
+void InstructionSelector::VisitLoad(Node* node, Node* value,
+                                    InstructionCode opcode) {
+  X64OperandGenerator g(this);
+  InstructionOperand outputs[] = {g.DefineAsRegister(node)};
+  InstructionOperand inputs[3];
+  size_t input_count = 0;
+  AddressingMode mode =
+      g.GetEffectiveAddressMemoryOperand(value, inputs, &input_count);
+  InstructionCode code = opcode | AddressingModeField::encode(mode);
+  if (node->opcode() == IrOpcode::kProtectedLoad) {
+    code |= MiscField::encode(kMemoryAccessProtected);
+  } else if (node->opcode() == IrOpcode::kPoisonedLoad) {
+    CHECK_NE(poisoning_level_, PoisoningMitigationLevel::kDontPoison);
+    code |= MiscField::encode(kMemoryAccessPoisoned);
+  }
+  Emit(code, 1, outputs, input_count, inputs);
+}
+
+void InstructionSelector::VisitLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  VisitLoad(node, node, GetLoadOpcode(load_rep));
+}
+
+void InstructionSelector::VisitPoisonedLoad(Node* node) { VisitLoad(node); }
+
+void InstructionSelector::VisitProtectedLoad(Node* node) { VisitLoad(node); }
+
+void InstructionSelector::VisitStore(Node* node) {
+  X64OperandGenerator g(this);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+
+  StoreRepresentation store_rep = StoreRepresentationOf(node->op());
+  WriteBarrierKind write_barrier_kind = store_rep.write_barrier_kind();
+
+  if (FLAG_enable_unconditional_write_barriers &&
+      CanBeTaggedOrCompressedPointer(store_rep.representation())) {
+    write_barrier_kind = kFullWriteBarrier;
+  }
+
+  if (write_barrier_kind != kNoWriteBarrier &&
+      V8_LIKELY(!FLAG_disable_write_barriers)) {
+    DCHECK(CanBeTaggedOrCompressedPointer(store_rep.representation()));
+    AddressingMode addressing_mode;
+    InstructionOperand inputs[] = {
+        g.UseUniqueRegister(base),
+        g.GetEffectiveIndexOperand(index, &addressing_mode),
+        g.UseUniqueRegister(value)};
+    RecordWriteMode record_write_mode =
+        WriteBarrierKindToRecordWriteMode(write_barrier_kind);
+    InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
+    InstructionCode code = kArchStoreWithWriteBarrier;
+    code |= AddressingModeField::encode(addressing_mode);
+    code |= MiscField::encode(static_cast<int>(record_write_mode));
+    Emit(code, 0, nullptr, arraysize(inputs), inputs, arraysize(temps), temps);
+  } else {
+    ArchOpcode opcode = GetStoreOpcode(store_rep);
+    InstructionOperand inputs[4];
+    size_t input_count = 0;
+    AddressingMode addressing_mode =
+        g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
+    InstructionCode code =
+        opcode | AddressingModeField::encode(addressing_mode);
+    if ((ElementSizeLog2Of(store_rep.representation()) <
+         kSystemPointerSizeLog2) &&
+        value->opcode() == IrOpcode::kTruncateInt64ToInt32) {
+      value = value->InputAt(0);
+    }
+    InstructionOperand value_operand =
+        g.CanBeImmediate(value) ? g.UseImmediate(value) : g.UseRegister(value);
+    inputs[input_count++] = value_operand;
+    Emit(code, 0, static_cast<InstructionOperand*>(nullptr), input_count,
+         inputs);
+  }
+}
+
+void InstructionSelector::VisitProtectedStore(Node* node) {
+  X64OperandGenerator g(this);
+  Node* value = node->InputAt(2);
+
+  StoreRepresentation store_rep = StoreRepresentationOf(node->op());
+
+  ArchOpcode opcode = GetStoreOpcode(store_rep);
+  InstructionOperand inputs[4];
+  size_t input_count = 0;
+  AddressingMode addressing_mode =
+      g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode) |
+                         MiscField::encode(kMemoryAccessProtected);
+  InstructionOperand value_operand =
+      g.CanBeImmediate(value) ? g.UseImmediate(value) : g.UseRegister(value);
+  inputs[input_count++] = value_operand;
+  Emit(code, 0, static_cast<InstructionOperand*>(nullptr), input_count, inputs);
+}
+
+// Architecture supports unaligned access, therefore VisitLoad is used instead
+void InstructionSelector::VisitUnalignedLoad(Node* node) { UNREACHABLE(); }
+
+// Architecture supports unaligned access, therefore VisitStore is used instead
+void InstructionSelector::VisitUnalignedStore(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitStoreLane(Node* node) {
+  X64OperandGenerator g(this);
+
+  StoreLaneParameters params = StoreLaneParametersOf(node->op());
+  InstructionCode opcode = kArchNop;
+  if (params.rep == MachineRepresentation::kWord8) {
+    opcode = kX64Pextrb;
+  } else if (params.rep == MachineRepresentation::kWord16) {
+    opcode = kX64Pextrw;
+  } else if (params.rep == MachineRepresentation::kWord32) {
+    opcode = kX64S128Store32Lane;
+  } else if (params.rep == MachineRepresentation::kWord64) {
+    opcode = kX64S128Store64Lane;
+  } else {
+    UNREACHABLE();
+  }
+
+  InstructionOperand inputs[4];
+  size_t input_count = 0;
+  AddressingMode addressing_mode =
+      g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
+  opcode |= AddressingModeField::encode(addressing_mode);
+
+  if (params.kind == MemoryAccessKind::kProtected) {
+    opcode |= MiscField::encode(kMemoryAccessProtected);
+  }
+
+  InstructionOperand value_operand = g.UseRegister(node->InputAt(2));
+  inputs[input_count++] = value_operand;
+  inputs[input_count++] = g.UseImmediate(params.laneidx);
+  DCHECK_GE(4, input_count);
+  Emit(opcode, 0, nullptr, input_count, inputs);
+}
+
+// Shared routine for multiple binary operations.
+static void VisitBinop(InstructionSelector* selector, Node* node,
+                       InstructionCode opcode, FlagsContinuation* cont) {
+  X64OperandGenerator g(selector);
+  Int32BinopMatcher m(node);
+  Node* left = m.left().node();
+  Node* right = m.right().node();
+  InstructionOperand inputs[8];
+  size_t input_count = 0;
+  InstructionOperand outputs[1];
+  size_t output_count = 0;
+
+  // TODO(turbofan): match complex addressing modes.
+  if (left == right) {
+    // If both inputs refer to the same operand, enforce allocating a register
+    // for both of them to ensure that we don't end up generating code like
+    // this:
+    //
+    //   mov rax, [rbp-0x10]
+    //   add rax, [rbp-0x10]
+    //   jo label
+    InstructionOperand const input = g.UseRegister(left);
+    inputs[input_count++] = input;
+    inputs[input_count++] = input;
+  } else if (g.CanBeImmediate(right)) {
+    inputs[input_count++] = g.UseRegister(left);
+    inputs[input_count++] = g.UseImmediate(right);
+  } else {
+    int effect_level = selector->GetEffectLevel(node, cont);
+    if (node->op()->HasProperty(Operator::kCommutative) &&
+        g.CanBeBetterLeftOperand(right) &&
+        (!g.CanBeBetterLeftOperand(left) ||
+         !g.CanBeMemoryOperand(opcode, node, right, effect_level))) {
+      std::swap(left, right);
+    }
+    if (g.CanBeMemoryOperand(opcode, node, right, effect_level)) {
+      inputs[input_count++] = g.UseRegister(left);
+      AddressingMode addressing_mode =
+          g.GetEffectiveAddressMemoryOperand(right, inputs, &input_count);
+      opcode |= AddressingModeField::encode(addressing_mode);
+    } else {
+      inputs[input_count++] = g.UseRegister(left);
+      inputs[input_count++] = g.Use(right);
+    }
+  }
+
+  if (cont->IsBranch()) {
+    inputs[input_count++] = g.Label(cont->true_block());
+    inputs[input_count++] = g.Label(cont->false_block());
+  }
+
+  outputs[output_count++] = g.DefineSameAsFirst(node);
+
+  DCHECK_NE(0u, input_count);
+  DCHECK_EQ(1u, output_count);
+  DCHECK_GE(arraysize(inputs), input_count);
+  DCHECK_GE(arraysize(outputs), output_count);
+
+  selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
+                                 inputs, cont);
+}
+
+// Shared routine for multiple binary operations.
+static void VisitBinop(InstructionSelector* selector, Node* node,
+                       InstructionCode opcode) {
+  FlagsContinuation cont;
+  VisitBinop(selector, node, opcode, &cont);
+}
+
+void InstructionSelector::VisitWord32And(Node* node) {
+  X64OperandGenerator g(this);
+  Uint32BinopMatcher m(node);
+  if (m.right().Is(0xFF)) {
+    Emit(kX64Movzxbl, g.DefineAsRegister(node), g.Use(m.left().node()));
+  } else if (m.right().Is(0xFFFF)) {
+    Emit(kX64Movzxwl, g.DefineAsRegister(node), g.Use(m.left().node()));
+  } else {
+    VisitBinop(this, node, kX64And32);
+  }
+}
+
+void InstructionSelector::VisitWord64And(Node* node) {
+  VisitBinop(this, node, kX64And);
+}
+
+void InstructionSelector::VisitWord32Or(Node* node) {
+  VisitBinop(this, node, kX64Or32);
+}
+
+void InstructionSelector::VisitWord64Or(Node* node) {
+  VisitBinop(this, node, kX64Or);
+}
+
+void InstructionSelector::VisitWord32Xor(Node* node) {
+  X64OperandGenerator g(this);
+  Uint32BinopMatcher m(node);
+  if (m.right().Is(-1)) {
+    Emit(kX64Not32, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()));
+  } else {
+    VisitBinop(this, node, kX64Xor32);
+  }
+}
+
+void InstructionSelector::VisitWord64Xor(Node* node) {
+  X64OperandGenerator g(this);
+  Uint64BinopMatcher m(node);
+  if (m.right().Is(-1)) {
+    Emit(kX64Not, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()));
+  } else {
+    VisitBinop(this, node, kX64Xor);
+  }
+}
+
+void InstructionSelector::VisitStackPointerGreaterThan(
+    Node* node, FlagsContinuation* cont) {
+  StackCheckKind kind = StackCheckKindOf(node->op());
+  InstructionCode opcode =
+      kArchStackPointerGreaterThan | MiscField::encode(static_cast<int>(kind));
+
+  int effect_level = GetEffectLevel(node, cont);
+
+  X64OperandGenerator g(this);
+  Node* const value = node->InputAt(0);
+  if (g.CanBeMemoryOperand(kX64Cmp, node, value, effect_level)) {
+    DCHECK_EQ(IrOpcode::kLoad, value->opcode());
+
+    // GetEffectiveAddressMemoryOperand can create at most 3 inputs.
+    static constexpr int kMaxInputCount = 3;
+
+    size_t input_count = 0;
+    InstructionOperand inputs[kMaxInputCount];
+    AddressingMode addressing_mode =
+        g.GetEffectiveAddressMemoryOperand(value, inputs, &input_count);
+    opcode |= AddressingModeField::encode(addressing_mode);
+    DCHECK_LE(input_count, kMaxInputCount);
+
+    EmitWithContinuation(opcode, 0, nullptr, input_count, inputs, cont);
+  } else {
+    EmitWithContinuation(opcode, g.UseRegister(value), cont);
+  }
+}
+
+namespace {
+
+bool TryMergeTruncateInt64ToInt32IntoLoad(InstructionSelector* selector,
+                                          Node* node, Node* load) {
+  if (load->opcode() == IrOpcode::kLoad && selector->CanCover(node, load)) {
+    LoadRepresentation load_rep = LoadRepresentationOf(load->op());
+    MachineRepresentation rep = load_rep.representation();
+    InstructionCode opcode;
+    switch (rep) {
+      case MachineRepresentation::kBit:  // Fall through.
+      case MachineRepresentation::kWord8:
+        opcode = load_rep.IsSigned() ? kX64Movsxbl : kX64Movzxbl;
+        break;
+      case MachineRepresentation::kWord16:
+        opcode = load_rep.IsSigned() ? kX64Movsxwl : kX64Movzxwl;
+        break;
+      case MachineRepresentation::kWord32:
+      case MachineRepresentation::kWord64:
+      case MachineRepresentation::kTaggedSigned:
+      case MachineRepresentation::kTagged:
+      case MachineRepresentation::kCompressed:  // Fall through.
+        opcode = kX64Movl;
+        break;
+      default:
+        UNREACHABLE();
+        return false;
+    }
+    X64OperandGenerator g(selector);
+    InstructionOperand outputs[] = {g.DefineAsRegister(node)};
+    size_t input_count = 0;
+    InstructionOperand inputs[3];
+    AddressingMode mode = g.GetEffectiveAddressMemoryOperand(
+        node->InputAt(0), inputs, &input_count);
+    opcode |= AddressingModeField::encode(mode);
+    selector->Emit(opcode, 1, outputs, input_count, inputs);
+    return true;
+  }
+  return false;
+}
+
+// Shared routine for multiple 32-bit shift operations.
+// TODO(bmeurer): Merge this with VisitWord64Shift using template magic?
+void VisitWord32Shift(InstructionSelector* selector, Node* node,
+                      ArchOpcode opcode) {
+  X64OperandGenerator g(selector);
+  Int32BinopMatcher m(node);
+  Node* left = m.left().node();
+  Node* right = m.right().node();
+
+  if (left->opcode() == IrOpcode::kTruncateInt64ToInt32) {
+    left = left->InputAt(0);
+  }
+
+  if (g.CanBeImmediate(right)) {
+    selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(left),
+                   g.UseImmediate(right));
+  } else {
+    selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(left),
+                   g.UseFixed(right, rcx));
+  }
+}
+
+// Shared routine for multiple 64-bit shift operations.
+// TODO(bmeurer): Merge this with VisitWord32Shift using template magic?
+void VisitWord64Shift(InstructionSelector* selector, Node* node,
+                      ArchOpcode opcode) {
+  X64OperandGenerator g(selector);
+  Int64BinopMatcher m(node);
+  Node* left = m.left().node();
+  Node* right = m.right().node();
+
+  if (g.CanBeImmediate(right)) {
+    selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(left),
+                   g.UseImmediate(right));
+  } else {
+    if (m.right().IsWord64And()) {
+      Int64BinopMatcher mright(right);
+      if (mright.right().Is(0x3F)) {
+        right = mright.left().node();
+      }
+    }
+    selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(left),
+                   g.UseFixed(right, rcx));
+  }
+}
+
+// Shared routine for multiple shift operations with continuation.
+template <typename BinopMatcher, int Bits>
+bool TryVisitWordShift(InstructionSelector* selector, Node* node,
+                       ArchOpcode opcode, FlagsContinuation* cont) {
+  X64OperandGenerator g(selector);
+  BinopMatcher m(node);
+  Node* left = m.left().node();
+  Node* right = m.right().node();
+
+  // If the shift count is 0, the flags are not affected.
+  if (!g.CanBeImmediate(right) ||
+      (g.GetImmediateIntegerValue(right) & (Bits - 1)) == 0) {
+    return false;
+  }
+  InstructionOperand output = g.DefineSameAsFirst(node);
+  InstructionOperand inputs[2];
+  inputs[0] = g.UseRegister(left);
+  inputs[1] = g.UseImmediate(right);
+  selector->EmitWithContinuation(opcode, 1, &output, 2, inputs, cont);
+  return true;
+}
+
+void EmitLea(InstructionSelector* selector, InstructionCode opcode,
+             Node* result, Node* index, int scale, Node* base,
+             Node* displacement, DisplacementMode displacement_mode) {
+  X64OperandGenerator g(selector);
+
+  InstructionOperand inputs[4];
+  size_t input_count = 0;
+  AddressingMode mode =
+      g.GenerateMemoryOperandInputs(index, scale, base, displacement,
+                                    displacement_mode, inputs, &input_count);
+
+  DCHECK_NE(0u, input_count);
+  DCHECK_GE(arraysize(inputs), input_count);
+
+  InstructionOperand outputs[1];
+  outputs[0] = g.DefineAsRegister(result);
+
+  opcode = AddressingModeField::encode(mode) | opcode;
+
+  selector->Emit(opcode, 1, outputs, input_count, inputs);
+}
+
+}  // namespace
+
+void InstructionSelector::VisitWord32Shl(Node* node) {
+  Int32ScaleMatcher m(node, true);
+  if (m.matches()) {
+    Node* index = node->InputAt(0);
+    Node* base = m.power_of_two_plus_one() ? index : nullptr;
+    EmitLea(this, kX64Lea32, node, index, m.scale(), base, nullptr,
+            kPositiveDisplacement);
+    return;
+  }
+  VisitWord32Shift(this, node, kX64Shl32);
+}
+
+void InstructionSelector::VisitWord64Shl(Node* node) {
+  X64OperandGenerator g(this);
+  Int64ScaleMatcher m(node, true);
+  if (m.matches()) {
+    Node* index = node->InputAt(0);
+    Node* base = m.power_of_two_plus_one() ? index : nullptr;
+    EmitLea(this, kX64Lea, node, index, m.scale(), base, nullptr,
+            kPositiveDisplacement);
+    return;
+  } else {
+    Int64BinopMatcher m(node);
+    if ((m.left().IsChangeInt32ToInt64() ||
+         m.left().IsChangeUint32ToUint64()) &&
+        m.right().IsInRange(32, 63)) {
+      // There's no need to sign/zero-extend to 64-bit if we shift out the upper
+      // 32 bits anyway.
+      Emit(kX64Shl, g.DefineSameAsFirst(node),
+           g.UseRegister(m.left().node()->InputAt(0)),
+           g.UseImmediate(m.right().node()));
+      return;
+    }
+  }
+  VisitWord64Shift(this, node, kX64Shl);
+}
+
+void InstructionSelector::VisitWord32Shr(Node* node) {
+  VisitWord32Shift(this, node, kX64Shr32);
+}
+
+namespace {
+
+inline AddressingMode AddDisplacementToAddressingMode(AddressingMode mode) {
+  switch (mode) {
+    case kMode_MR:
+      return kMode_MRI;
+      break;
+    case kMode_MR1:
+      return kMode_MR1I;
+      break;
+    case kMode_MR2:
+      return kMode_MR2I;
+      break;
+    case kMode_MR4:
+      return kMode_MR4I;
+      break;
+    case kMode_MR8:
+      return kMode_MR8I;
+      break;
+    case kMode_M1:
+      return kMode_M1I;
+      break;
+    case kMode_M2:
+      return kMode_M2I;
+      break;
+    case kMode_M4:
+      return kMode_M4I;
+      break;
+    case kMode_M8:
+      return kMode_M8I;
+      break;
+    case kMode_None:
+    case kMode_MRI:
+    case kMode_MR1I:
+    case kMode_MR2I:
+    case kMode_MR4I:
+    case kMode_MR8I:
+    case kMode_M1I:
+    case kMode_M2I:
+    case kMode_M4I:
+    case kMode_M8I:
+    case kMode_Root:
+      UNREACHABLE();
+  }
+  UNREACHABLE();
+}
+
+bool TryMatchLoadWord64AndShiftRight(InstructionSelector* selector, Node* node,
+                                     InstructionCode opcode) {
+  DCHECK(IrOpcode::kWord64Sar == node->opcode() ||
+         IrOpcode::kWord64Shr == node->opcode());
+  X64OperandGenerator g(selector);
+  Int64BinopMatcher m(node);
+  if (selector->CanCover(m.node(), m.left().node()) && m.left().IsLoad() &&
+      m.right().Is(32)) {
+    DCHECK_EQ(selector->GetEffectLevel(node),
+              selector->GetEffectLevel(m.left().node()));
+    // Just load and sign-extend the interesting 4 bytes instead. This happens,
+    // for example, when we're loading and untagging SMIs.
+    BaseWithIndexAndDisplacement64Matcher mleft(m.left().node(),
+                                                AddressOption::kAllowAll);
+    if (mleft.matches() && (mleft.displacement() == nullptr ||
+                            g.CanBeImmediate(mleft.displacement()))) {
+      size_t input_count = 0;
+      InstructionOperand inputs[3];
+      AddressingMode mode = g.GetEffectiveAddressMemoryOperand(
+          m.left().node(), inputs, &input_count);
+      if (mleft.displacement() == nullptr) {
+        // Make sure that the addressing mode indicates the presence of an
+        // immediate displacement. It seems that we never use M1 and M2, but we
+        // handle them here anyways.
+        mode = AddDisplacementToAddressingMode(mode);
+        inputs[input_count++] = ImmediateOperand(ImmediateOperand::INLINE, 4);
+      } else {
+        // In the case that the base address was zero, the displacement will be
+        // in a register and replacing it with an immediate is not allowed. This
+        // usually only happens in dead code anyway.
+        if (!inputs[input_count - 1].IsImmediate()) return false;
+        int32_t displacement = g.GetImmediateIntegerValue(mleft.displacement());
+        inputs[input_count - 1] =
+            ImmediateOperand(ImmediateOperand::INLINE, displacement + 4);
+      }
+      InstructionOperand outputs[] = {g.DefineAsRegister(node)};
+      InstructionCode code = opcode | AddressingModeField::encode(mode);
+      selector->Emit(code, 1, outputs, input_count, inputs);
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace
+
+void InstructionSelector::VisitWord64Shr(Node* node) {
+  if (TryMatchLoadWord64AndShiftRight(this, node, kX64Movl)) return;
+  VisitWord64Shift(this, node, kX64Shr);
+}
+
+void InstructionSelector::VisitWord32Sar(Node* node) {
+  X64OperandGenerator g(this);
+  Int32BinopMatcher m(node);
+  if (CanCover(m.node(), m.left().node()) && m.left().IsWord32Shl()) {
+    Int32BinopMatcher mleft(m.left().node());
+    if (mleft.right().Is(16) && m.right().Is(16)) {
+      Emit(kX64Movsxwl, g.DefineAsRegister(node), g.Use(mleft.left().node()));
+      return;
+    } else if (mleft.right().Is(24) && m.right().Is(24)) {
+      Emit(kX64Movsxbl, g.DefineAsRegister(node), g.Use(mleft.left().node()));
+      return;
+    }
+  }
+  VisitWord32Shift(this, node, kX64Sar32);
+}
+
+void InstructionSelector::VisitWord64Sar(Node* node) {
+  if (TryMatchLoadWord64AndShiftRight(this, node, kX64Movsxlq)) return;
+  VisitWord64Shift(this, node, kX64Sar);
+}
+
+void InstructionSelector::VisitWord32Rol(Node* node) {
+  VisitWord32Shift(this, node, kX64Rol32);
+}
+
+void InstructionSelector::VisitWord64Rol(Node* node) {
+  VisitWord64Shift(this, node, kX64Rol);
+}
+
+void InstructionSelector::VisitWord32Ror(Node* node) {
+  VisitWord32Shift(this, node, kX64Ror32);
+}
+
+void InstructionSelector::VisitWord64Ror(Node* node) {
+  VisitWord64Shift(this, node, kX64Ror);
+}
+
+void InstructionSelector::VisitWord32ReverseBits(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord64ReverseBits(Node* node) { UNREACHABLE(); }
+
+void InstructionSelector::VisitWord64ReverseBytes(Node* node) {
+  X64OperandGenerator g(this);
+  Emit(kX64Bswap, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
+  X64OperandGenerator g(this);
+  Emit(kX64Bswap32, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitInt32Add(Node* node) {
+  X64OperandGenerator g(this);
+
+  // No need to truncate the values before Int32Add.
+  DCHECK_EQ(node->InputCount(), 2);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  if (left->opcode() == IrOpcode::kTruncateInt64ToInt32) {
+    node->ReplaceInput(0, left->InputAt(0));
+  }
+  if (right->opcode() == IrOpcode::kTruncateInt64ToInt32) {
+    node->ReplaceInput(1, right->InputAt(0));
+  }
+
+  // Try to match the Add to a leal pattern
+  BaseWithIndexAndDisplacement32Matcher m(node);
+  if (m.matches() &&
+      (m.displacement() == nullptr || g.CanBeImmediate(m.displacement()))) {
+    EmitLea(this, kX64Lea32, node, m.index(), m.scale(), m.base(),
+            m.displacement(), m.displacement_mode());
+    return;
+  }
+
+  // No leal pattern match, use addl
+  VisitBinop(this, node, kX64Add32);
+}
+
+void InstructionSelector::VisitInt64Add(Node* node) {
+  X64OperandGenerator g(this);
+
+  // Try to match the Add to a leaq pattern
+  BaseWithIndexAndDisplacement64Matcher m(node);
+  if (m.matches() &&
+      (m.displacement() == nullptr || g.CanBeImmediate(m.displacement()))) {
+    EmitLea(this, kX64Lea, node, m.index(), m.scale(), m.base(),
+            m.displacement(), m.displacement_mode());
+    return;
+  }
+
+  // No leal pattern match, use addq
+  VisitBinop(this, node, kX64Add);
+}
+
+void InstructionSelector::VisitInt64AddWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kX64Add, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kX64Add, &cont);
+}
+
+void InstructionSelector::VisitInt32Sub(Node* node) {
+  X64OperandGenerator g(this);
+  DCHECK_EQ(node->InputCount(), 2);
+  Node* input1 = node->InputAt(0);
+  Node* input2 = node->InputAt(1);
+  if (input1->opcode() == IrOpcode::kTruncateInt64ToInt32 &&
+      g.CanBeImmediate(input2)) {
+    int32_t imm = g.GetImmediateIntegerValue(input2);
+    InstructionOperand int64_input = g.UseRegister(input1->InputAt(0));
+    if (imm == 0) {
+      // Emit "movl" for subtraction of 0.
+      Emit(kX64Movl, g.DefineAsRegister(node), int64_input);
+    } else {
+      // Omit truncation and turn subtractions of constant values into immediate
+      // "leal" instructions by negating the value.
+      Emit(kX64Lea32 | AddressingModeField::encode(kMode_MRI),
+           g.DefineAsRegister(node), int64_input,
+           g.TempImmediate(base::NegateWithWraparound(imm)));
+    }
+    return;
+  }
+
+  Int32BinopMatcher m(node);
+  if (m.left().Is(0)) {
+    Emit(kX64Neg32, g.DefineSameAsFirst(node), g.UseRegister(m.right().node()));
+  } else if (m.right().Is(0)) {
+    // {EmitIdentity} reuses the virtual register of the first input
+    // for the output. This is exactly what we want here.
+    EmitIdentity(node);
+  } else if (m.right().HasResolvedValue() &&
+             g.CanBeImmediate(m.right().node())) {
+    // Turn subtractions of constant values into immediate "leal" instructions
+    // by negating the value.
+    Emit(
+        kX64Lea32 | AddressingModeField::encode(kMode_MRI),
+        g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+        g.TempImmediate(base::NegateWithWraparound(m.right().ResolvedValue())));
+  } else {
+    VisitBinop(this, node, kX64Sub32);
+  }
+}
+
+void InstructionSelector::VisitInt64Sub(Node* node) {
+  X64OperandGenerator g(this);
+  Int64BinopMatcher m(node);
+  if (m.left().Is(0)) {
+    Emit(kX64Neg, g.DefineSameAsFirst(node), g.UseRegister(m.right().node()));
+  } else {
+    if (m.right().HasResolvedValue() && g.CanBeImmediate(m.right().node())) {
+      // Turn subtractions of constant values into immediate "leaq" instructions
+      // by negating the value.
+      Emit(kX64Lea | AddressingModeField::encode(kMode_MRI),
+           g.DefineAsRegister(node), g.UseRegister(m.left().node()),
+           g.TempImmediate(-static_cast<int32_t>(m.right().ResolvedValue())));
+      return;
+    }
+    VisitBinop(this, node, kX64Sub);
+  }
+}
+
+void InstructionSelector::VisitInt64SubWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kX64Sub, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kX64Sub, &cont);
+}
+
+namespace {
+
+void VisitMul(InstructionSelector* selector, Node* node, ArchOpcode opcode) {
+  X64OperandGenerator g(selector);
+  Int32BinopMatcher m(node);
+  Node* left = m.left().node();
+  Node* right = m.right().node();
+  if (g.CanBeImmediate(right)) {
+    selector->Emit(opcode, g.DefineAsRegister(node), g.Use(left),
+                   g.UseImmediate(right));
+  } else {
+    if (g.CanBeBetterLeftOperand(right)) {
+      std::swap(left, right);
+    }
+    selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(left),
+                   g.Use(right));
+  }
+}
+
+void VisitMulHigh(InstructionSelector* selector, Node* node,
+                  ArchOpcode opcode) {
+  X64OperandGenerator g(selector);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  if (selector->IsLive(left) && !selector->IsLive(right)) {
+    std::swap(left, right);
+  }
+  InstructionOperand temps[] = {g.TempRegister(rax)};
+  // TODO(turbofan): We use UseUniqueRegister here to improve register
+  // allocation.
+  selector->Emit(opcode, g.DefineAsFixed(node, rdx), g.UseFixed(left, rax),
+                 g.UseUniqueRegister(right), arraysize(temps), temps);
+}
+
+void VisitDiv(InstructionSelector* selector, Node* node, ArchOpcode opcode) {
+  X64OperandGenerator g(selector);
+  InstructionOperand temps[] = {g.TempRegister(rdx)};
+  selector->Emit(
+      opcode, g.DefineAsFixed(node, rax), g.UseFixed(node->InputAt(0), rax),
+      g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
+}
+
+void VisitMod(InstructionSelector* selector, Node* node, ArchOpcode opcode) {
+  X64OperandGenerator g(selector);
+  InstructionOperand temps[] = {g.TempRegister(rax)};
+  selector->Emit(
+      opcode, g.DefineAsFixed(node, rdx), g.UseFixed(node->InputAt(0), rax),
+      g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
+}
+
+}  // namespace
+
+void InstructionSelector::VisitInt32Mul(Node* node) {
+  Int32ScaleMatcher m(node, true);
+  if (m.matches()) {
+    Node* index = node->InputAt(0);
+    Node* base = m.power_of_two_plus_one() ? index : nullptr;
+    EmitLea(this, kX64Lea32, node, index, m.scale(), base, nullptr,
+            kPositiveDisplacement);
+    return;
+  }
+  VisitMul(this, node, kX64Imul32);
+}
+
+void InstructionSelector::VisitInt32MulWithOverflow(Node* node) {
+  // TODO(mvstanton): Use Int32ScaleMatcher somehow.
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kX64Imul32, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kX64Imul32, &cont);
+}
+
+void InstructionSelector::VisitInt64Mul(Node* node) {
+  VisitMul(this, node, kX64Imul);
+}
+
+void InstructionSelector::VisitInt32MulHigh(Node* node) {
+  VisitMulHigh(this, node, kX64ImulHigh32);
+}
+
+void InstructionSelector::VisitInt32Div(Node* node) {
+  VisitDiv(this, node, kX64Idiv32);
+}
+
+void InstructionSelector::VisitInt64Div(Node* node) {
+  VisitDiv(this, node, kX64Idiv);
+}
+
+void InstructionSelector::VisitUint32Div(Node* node) {
+  VisitDiv(this, node, kX64Udiv32);
+}
+
+void InstructionSelector::VisitUint64Div(Node* node) {
+  VisitDiv(this, node, kX64Udiv);
+}
+
+void InstructionSelector::VisitInt32Mod(Node* node) {
+  VisitMod(this, node, kX64Idiv32);
+}
+
+void InstructionSelector::VisitInt64Mod(Node* node) {
+  VisitMod(this, node, kX64Idiv);
+}
+
+void InstructionSelector::VisitUint32Mod(Node* node) {
+  VisitMod(this, node, kX64Udiv32);
+}
+
+void InstructionSelector::VisitUint64Mod(Node* node) {
+  VisitMod(this, node, kX64Udiv);
+}
+
+void InstructionSelector::VisitUint32MulHigh(Node* node) {
+  VisitMulHigh(this, node, kX64UmulHigh32);
+}
+
+void InstructionSelector::VisitTryTruncateFloat32ToInt64(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  Node* success_output = NodeProperties::FindProjection(node, 1);
+  if (success_output) {
+    outputs[output_count++] = g.DefineAsRegister(success_output);
+  }
+
+  Emit(kSSEFloat32ToInt64, output_count, outputs, 1, inputs);
+}
+
+void InstructionSelector::VisitTryTruncateFloat64ToInt64(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  Node* success_output = NodeProperties::FindProjection(node, 1);
+  if (success_output) {
+    outputs[output_count++] = g.DefineAsRegister(success_output);
+  }
+
+  Emit(kSSEFloat64ToInt64, output_count, outputs, 1, inputs);
+}
+
+void InstructionSelector::VisitTryTruncateFloat32ToUint64(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  Node* success_output = NodeProperties::FindProjection(node, 1);
+  if (success_output) {
+    outputs[output_count++] = g.DefineAsRegister(success_output);
+  }
+
+  Emit(kSSEFloat32ToUint64, output_count, outputs, 1, inputs);
+}
+
+void InstructionSelector::VisitTryTruncateFloat64ToUint64(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand inputs[] = {g.UseRegister(node->InputAt(0))};
+  InstructionOperand outputs[2];
+  size_t output_count = 0;
+  outputs[output_count++] = g.DefineAsRegister(node);
+
+  Node* success_output = NodeProperties::FindProjection(node, 1);
+  if (success_output) {
+    outputs[output_count++] = g.DefineAsRegister(success_output);
+  }
+
+  Emit(kSSEFloat64ToUint64, output_count, outputs, 1, inputs);
+}
+
+void InstructionSelector::VisitBitcastWord32ToWord64(Node* node) {
+  DCHECK(SmiValuesAre31Bits());
+  DCHECK(COMPRESS_POINTERS_BOOL);
+  EmitIdentity(node);
+}
+
+void InstructionSelector::VisitChangeInt32ToInt64(Node* node) {
+  DCHECK_EQ(node->InputCount(), 1);
+  Node* input = node->InputAt(0);
+  if (input->opcode() == IrOpcode::kTruncateInt64ToInt32) {
+    node->ReplaceInput(0, input->InputAt(0));
+  }
+
+  X64OperandGenerator g(this);
+  Node* const value = node->InputAt(0);
+  if (value->opcode() == IrOpcode::kLoad && CanCover(node, value)) {
+    LoadRepresentation load_rep = LoadRepresentationOf(value->op());
+    MachineRepresentation rep = load_rep.representation();
+    InstructionCode opcode;
+    switch (rep) {
+      case MachineRepresentation::kBit:  // Fall through.
+      case MachineRepresentation::kWord8:
+        opcode = load_rep.IsSigned() ? kX64Movsxbq : kX64Movzxbq;
+        break;
+      case MachineRepresentation::kWord16:
+        opcode = load_rep.IsSigned() ? kX64Movsxwq : kX64Movzxwq;
+        break;
+      case MachineRepresentation::kWord32:
+        opcode = load_rep.IsSigned() ? kX64Movsxlq : kX64Movl;
+        break;
+      default:
+        UNREACHABLE();
+    }
+    InstructionOperand outputs[] = {g.DefineAsRegister(node)};
+    size_t input_count = 0;
+    InstructionOperand inputs[3];
+    AddressingMode mode = g.GetEffectiveAddressMemoryOperand(
+        node->InputAt(0), inputs, &input_count);
+    opcode |= AddressingModeField::encode(mode);
+    Emit(opcode, 1, outputs, input_count, inputs);
+  } else {
+    Emit(kX64Movsxlq, g.DefineAsRegister(node), g.Use(node->InputAt(0)));
+  }
+}
+
+bool InstructionSelector::ZeroExtendsWord32ToWord64NoPhis(Node* node) {
+  X64OperandGenerator g(this);
+  DCHECK_NE(node->opcode(), IrOpcode::kPhi);
+  switch (node->opcode()) {
+    case IrOpcode::kWord32And:
+    case IrOpcode::kWord32Or:
+    case IrOpcode::kWord32Xor:
+    case IrOpcode::kWord32Shl:
+    case IrOpcode::kWord32Shr:
+    case IrOpcode::kWord32Sar:
+    case IrOpcode::kWord32Rol:
+    case IrOpcode::kWord32Ror:
+    case IrOpcode::kWord32Equal:
+    case IrOpcode::kInt32Add:
+    case IrOpcode::kInt32Sub:
+    case IrOpcode::kInt32Mul:
+    case IrOpcode::kInt32MulHigh:
+    case IrOpcode::kInt32Div:
+    case IrOpcode::kInt32LessThan:
+    case IrOpcode::kInt32LessThanOrEqual:
+    case IrOpcode::kInt32Mod:
+    case IrOpcode::kUint32Div:
+    case IrOpcode::kUint32LessThan:
+    case IrOpcode::kUint32LessThanOrEqual:
+    case IrOpcode::kUint32Mod:
+    case IrOpcode::kUint32MulHigh:
+    case IrOpcode::kTruncateInt64ToInt32:
+      // These 32-bit operations implicitly zero-extend to 64-bit on x64, so the
+      // zero-extension is a no-op.
+      return true;
+    case IrOpcode::kProjection: {
+      Node* const value = node->InputAt(0);
+      switch (value->opcode()) {
+        case IrOpcode::kInt32AddWithOverflow:
+        case IrOpcode::kInt32SubWithOverflow:
+        case IrOpcode::kInt32MulWithOverflow:
+          return true;
+        default:
+          return false;
+      }
+    }
+    case IrOpcode::kLoad:
+    case IrOpcode::kProtectedLoad:
+    case IrOpcode::kPoisonedLoad: {
+      // The movzxbl/movsxbl/movzxwl/movsxwl/movl operations implicitly
+      // zero-extend to 64-bit on x64, so the zero-extension is a no-op.
+      LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+      switch (load_rep.representation()) {
+        case MachineRepresentation::kWord8:
+        case MachineRepresentation::kWord16:
+        case MachineRepresentation::kWord32:
+          return true;
+        default:
+          return false;
+      }
+    }
+    case IrOpcode::kInt32Constant:
+    case IrOpcode::kInt64Constant:
+      // Constants are loaded with movl or movq, or xorl for zero; see
+      // CodeGenerator::AssembleMove. So any non-negative constant that fits
+      // in a 32-bit signed integer is zero-extended to 64 bits.
+      if (g.CanBeImmediate(node)) {
+        return g.GetImmediateIntegerValue(node) >= 0;
+      }
+      return false;
+    default:
+      return false;
+  }
+}
+
+void InstructionSelector::VisitChangeUint32ToUint64(Node* node) {
+  X64OperandGenerator g(this);
+  Node* value = node->InputAt(0);
+  if (ZeroExtendsWord32ToWord64(value)) {
+    // These 32-bit operations implicitly zero-extend to 64-bit on x64, so the
+    // zero-extension is a no-op.
+    return EmitIdentity(node);
+  }
+  Emit(kX64Movl, g.DefineAsRegister(node), g.Use(value));
+}
+
+namespace {
+
+void VisitRO(InstructionSelector* selector, Node* node,
+             InstructionCode opcode) {
+  X64OperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0)));
+}
+
+void VisitRR(InstructionSelector* selector, Node* node,
+             InstructionCode opcode) {
+  X64OperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineAsRegister(node),
+                 g.UseRegister(node->InputAt(0)));
+}
+
+void VisitRRO(InstructionSelector* selector, Node* node,
+              InstructionCode opcode) {
+  X64OperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineSameAsFirst(node),
+                 g.UseRegister(node->InputAt(0)), g.Use(node->InputAt(1)));
+}
+
+void VisitFloatBinop(InstructionSelector* selector, Node* node,
+                     InstructionCode avx_opcode, InstructionCode sse_opcode) {
+  X64OperandGenerator g(selector);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  InstructionOperand inputs[8];
+  size_t input_count = 0;
+  InstructionOperand outputs[1];
+  size_t output_count = 0;
+
+  if (left == right) {
+    // If both inputs refer to the same operand, enforce allocating a register
+    // for both of them to ensure that we don't end up generating code like
+    // this:
+    //
+    //   movss rax, [rbp-0x10]
+    //   addss rax, [rbp-0x10]
+    //   jo label
+    InstructionOperand const input = g.UseRegister(left);
+    inputs[input_count++] = input;
+    inputs[input_count++] = input;
+  } else {
+    int effect_level = selector->GetEffectLevel(node);
+    if (node->op()->HasProperty(Operator::kCommutative) &&
+        (g.CanBeBetterLeftOperand(right) ||
+         g.CanBeMemoryOperand(avx_opcode, node, left, effect_level)) &&
+        (!g.CanBeBetterLeftOperand(left) ||
+         !g.CanBeMemoryOperand(avx_opcode, node, right, effect_level))) {
+      std::swap(left, right);
+    }
+    if (g.CanBeMemoryOperand(avx_opcode, node, right, effect_level)) {
+      inputs[input_count++] = g.UseRegister(left);
+      AddressingMode addressing_mode =
+          g.GetEffectiveAddressMemoryOperand(right, inputs, &input_count);
+      avx_opcode |= AddressingModeField::encode(addressing_mode);
+      sse_opcode |= AddressingModeField::encode(addressing_mode);
+    } else {
+      inputs[input_count++] = g.UseRegister(left);
+      inputs[input_count++] = g.Use(right);
+    }
+  }
+
+  DCHECK_NE(0u, input_count);
+  DCHECK_GE(arraysize(inputs), input_count);
+
+  if (selector->IsSupported(AVX)) {
+    outputs[output_count++] = g.DefineAsRegister(node);
+    DCHECK_EQ(1u, output_count);
+    DCHECK_GE(arraysize(outputs), output_count);
+    selector->Emit(avx_opcode, output_count, outputs, input_count, inputs);
+  } else {
+    outputs[output_count++] = g.DefineSameAsFirst(node);
+    DCHECK_EQ(1u, output_count);
+    DCHECK_GE(arraysize(outputs), output_count);
+    selector->Emit(sse_opcode, output_count, outputs, input_count, inputs);
+  }
+}
+
+void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input,
+                    ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
+  X64OperandGenerator g(selector);
+  InstructionOperand temps[] = {g.TempDoubleRegister()};
+  if (selector->IsSupported(AVX)) {
+    selector->Emit(avx_opcode, g.DefineAsRegister(node), g.UseUnique(input),
+                   arraysize(temps), temps);
+  } else {
+    selector->Emit(sse_opcode, g.DefineSameAsFirst(node), g.UseRegister(input),
+                   arraysize(temps), temps);
+  }
+}
+
+}  // namespace
+
+#define RO_OP_LIST(V)                                                    \
+  V(Word64Clz, kX64Lzcnt)                                                \
+  V(Word32Clz, kX64Lzcnt32)                                              \
+  V(Word64Ctz, kX64Tzcnt)                                                \
+  V(Word32Ctz, kX64Tzcnt32)                                              \
+  V(Word64Popcnt, kX64Popcnt)                                            \
+  V(Word32Popcnt, kX64Popcnt32)                                          \
+  V(Float64Sqrt, kSSEFloat64Sqrt)                                        \
+  V(Float32Sqrt, kSSEFloat32Sqrt)                                        \
+  V(ChangeFloat64ToInt32, kSSEFloat64ToInt32)                            \
+  V(ChangeFloat64ToInt64, kSSEFloat64ToInt64)                            \
+  V(ChangeFloat64ToUint32, kSSEFloat64ToUint32 | MiscField::encode(1))   \
+  V(TruncateFloat64ToInt64, kSSEFloat64ToInt64)                          \
+  V(TruncateFloat64ToUint32, kSSEFloat64ToUint32 | MiscField::encode(0)) \
+  V(ChangeFloat64ToUint64, kSSEFloat64ToUint64)                          \
+  V(TruncateFloat64ToFloat32, kSSEFloat64ToFloat32)                      \
+  V(ChangeFloat32ToFloat64, kSSEFloat32ToFloat64)                        \
+  V(TruncateFloat32ToInt32, kSSEFloat32ToInt32)                          \
+  V(TruncateFloat32ToUint32, kSSEFloat32ToUint32)                        \
+  V(ChangeInt32ToFloat64, kSSEInt32ToFloat64)                            \
+  V(ChangeInt64ToFloat64, kSSEInt64ToFloat64)                            \
+  V(ChangeUint32ToFloat64, kSSEUint32ToFloat64)                          \
+  V(RoundFloat64ToInt32, kSSEFloat64ToInt32)                             \
+  V(RoundInt32ToFloat32, kSSEInt32ToFloat32)                             \
+  V(RoundInt64ToFloat32, kSSEInt64ToFloat32)                             \
+  V(RoundUint64ToFloat32, kSSEUint64ToFloat32)                           \
+  V(RoundInt64ToFloat64, kSSEInt64ToFloat64)                             \
+  V(RoundUint64ToFloat64, kSSEUint64ToFloat64)                           \
+  V(RoundUint32ToFloat32, kSSEUint32ToFloat32)                           \
+  V(BitcastFloat32ToInt32, kX64BitcastFI)                                \
+  V(BitcastFloat64ToInt64, kX64BitcastDL)                                \
+  V(BitcastInt32ToFloat32, kX64BitcastIF)                                \
+  V(BitcastInt64ToFloat64, kX64BitcastLD)                                \
+  V(Float64ExtractLowWord32, kSSEFloat64ExtractLowWord32)                \
+  V(Float64ExtractHighWord32, kSSEFloat64ExtractHighWord32)              \
+  V(SignExtendWord8ToInt32, kX64Movsxbl)                                 \
+  V(SignExtendWord16ToInt32, kX64Movsxwl)                                \
+  V(SignExtendWord8ToInt64, kX64Movsxbq)                                 \
+  V(SignExtendWord16ToInt64, kX64Movsxwq)                                \
+  V(SignExtendWord32ToInt64, kX64Movsxlq)
+
+#define RR_OP_LIST(V)                                                         \
+  V(Float32RoundDown, kSSEFloat32Round | MiscField::encode(kRoundDown))       \
+  V(Float64RoundDown, kSSEFloat64Round | MiscField::encode(kRoundDown))       \
+  V(Float32RoundUp, kSSEFloat32Round | MiscField::encode(kRoundUp))           \
+  V(Float64RoundUp, kSSEFloat64Round | MiscField::encode(kRoundUp))           \
+  V(Float32RoundTruncate, kSSEFloat32Round | MiscField::encode(kRoundToZero)) \
+  V(Float64RoundTruncate, kSSEFloat64Round | MiscField::encode(kRoundToZero)) \
+  V(Float32RoundTiesEven,                                                     \
+    kSSEFloat32Round | MiscField::encode(kRoundToNearest))                    \
+  V(Float64RoundTiesEven,                                                     \
+    kSSEFloat64Round | MiscField::encode(kRoundToNearest))                    \
+  V(F32x4Ceil, kX64F32x4Round | MiscField::encode(kRoundUp))                  \
+  V(F32x4Floor, kX64F32x4Round | MiscField::encode(kRoundDown))               \
+  V(F32x4Trunc, kX64F32x4Round | MiscField::encode(kRoundToZero))             \
+  V(F32x4NearestInt, kX64F32x4Round | MiscField::encode(kRoundToNearest))     \
+  V(F64x2Ceil, kX64F64x2Round | MiscField::encode(kRoundUp))                  \
+  V(F64x2Floor, kX64F64x2Round | MiscField::encode(kRoundDown))               \
+  V(F64x2Trunc, kX64F64x2Round | MiscField::encode(kRoundToZero))             \
+  V(F64x2NearestInt, kX64F64x2Round | MiscField::encode(kRoundToNearest))
+
+#define RO_VISITOR(Name, opcode)                      \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRO(this, node, opcode);                      \
+  }
+RO_OP_LIST(RO_VISITOR)
+#undef RO_VISITOR
+#undef RO_OP_LIST
+
+#define RR_VISITOR(Name, opcode)                      \
+  void InstructionSelector::Visit##Name(Node* node) { \
+    VisitRR(this, node, opcode);                      \
+  }
+RR_OP_LIST(RR_VISITOR)
+#undef RR_VISITOR
+#undef RR_OP_LIST
+
+void InstructionSelector::VisitTruncateFloat64ToWord32(Node* node) {
+  VisitRR(this, node, kArchTruncateDoubleToI);
+}
+
+void InstructionSelector::VisitTruncateInt64ToInt32(Node* node) {
+  // We rely on the fact that TruncateInt64ToInt32 zero extends the
+  // value (see ZeroExtendsWord32ToWord64). So all code paths here
+  // have to satisfy that condition.
+  X64OperandGenerator g(this);
+  Node* value = node->InputAt(0);
+  if (CanCover(node, value)) {
+    switch (value->opcode()) {
+      case IrOpcode::kWord64Sar:
+      case IrOpcode::kWord64Shr: {
+        Int64BinopMatcher m(value);
+        if (m.right().Is(32)) {
+          if (CanCoverTransitively(node, value, value->InputAt(0)) &&
+              TryMatchLoadWord64AndShiftRight(this, value, kX64Movl)) {
+            return EmitIdentity(node);
+          }
+          Emit(kX64Shr, g.DefineSameAsFirst(node),
+               g.UseRegister(m.left().node()), g.TempImmediate(32));
+          return;
+        }
+        break;
+      }
+      case IrOpcode::kLoad: {
+        if (TryMergeTruncateInt64ToInt32IntoLoad(this, node, value)) {
+          return;
+        }
+        break;
+      }
+      default:
+        break;
+    }
+  }
+  Emit(kX64Movl, g.DefineAsRegister(node), g.Use(value));
+}
+
+void InstructionSelector::VisitFloat32Add(Node* node) {
+  VisitFloatBinop(this, node, kAVXFloat32Add, kSSEFloat32Add);
+}
+
+void InstructionSelector::VisitFloat32Sub(Node* node) {
+  VisitFloatBinop(this, node, kAVXFloat32Sub, kSSEFloat32Sub);
+}
+
+void InstructionSelector::VisitFloat32Mul(Node* node) {
+  VisitFloatBinop(this, node, kAVXFloat32Mul, kSSEFloat32Mul);
+}
+
+void InstructionSelector::VisitFloat32Div(Node* node) {
+  VisitFloatBinop(this, node, kAVXFloat32Div, kSSEFloat32Div);
+}
+
+void InstructionSelector::VisitFloat32Abs(Node* node) {
+  VisitFloatUnop(this, node, node->InputAt(0), kAVXFloat32Abs, kSSEFloat32Abs);
+}
+
+void InstructionSelector::VisitFloat32Max(Node* node) {
+  VisitRRO(this, node, kSSEFloat32Max);
+}
+
+void InstructionSelector::VisitFloat32Min(Node* node) {
+  VisitRRO(this, node, kSSEFloat32Min);
+}
+
+void InstructionSelector::VisitFloat64Add(Node* node) {
+  VisitFloatBinop(this, node, kAVXFloat64Add, kSSEFloat64Add);
+}
+
+void InstructionSelector::VisitFloat64Sub(Node* node) {
+  VisitFloatBinop(this, node, kAVXFloat64Sub, kSSEFloat64Sub);
+}
+
+void InstructionSelector::VisitFloat64Mul(Node* node) {
+  VisitFloatBinop(this, node, kAVXFloat64Mul, kSSEFloat64Mul);
+}
+
+void InstructionSelector::VisitFloat64Div(Node* node) {
+  VisitFloatBinop(this, node, kAVXFloat64Div, kSSEFloat64Div);
+}
+
+void InstructionSelector::VisitFloat64Mod(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempRegister(rax)};
+  Emit(kSSEFloat64Mod, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), 1,
+       temps);
+}
+
+void InstructionSelector::VisitFloat64Max(Node* node) {
+  VisitRRO(this, node, kSSEFloat64Max);
+}
+
+void InstructionSelector::VisitFloat64Min(Node* node) {
+  VisitRRO(this, node, kSSEFloat64Min);
+}
+
+void InstructionSelector::VisitFloat64Abs(Node* node) {
+  VisitFloatUnop(this, node, node->InputAt(0), kAVXFloat64Abs, kSSEFloat64Abs);
+}
+
+void InstructionSelector::VisitFloat64RoundTiesAway(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitFloat32Neg(Node* node) {
+  VisitFloatUnop(this, node, node->InputAt(0), kAVXFloat32Neg, kSSEFloat32Neg);
+}
+
+void InstructionSelector::VisitFloat64Neg(Node* node) {
+  VisitFloatUnop(this, node, node->InputAt(0), kAVXFloat64Neg, kSSEFloat64Neg);
+}
+
+void InstructionSelector::VisitFloat64Ieee754Binop(Node* node,
+                                                   InstructionCode opcode) {
+  X64OperandGenerator g(this);
+  Emit(opcode, g.DefineAsFixed(node, xmm0), g.UseFixed(node->InputAt(0), xmm0),
+       g.UseFixed(node->InputAt(1), xmm1))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::VisitFloat64Ieee754Unop(Node* node,
+                                                  InstructionCode opcode) {
+  X64OperandGenerator g(this);
+  Emit(opcode, g.DefineAsFixed(node, xmm0), g.UseFixed(node->InputAt(0), xmm0))
+      ->MarkAsCall();
+}
+
+void InstructionSelector::EmitPrepareArguments(
+    ZoneVector<PushParameter>* arguments, const CallDescriptor* call_descriptor,
+    Node* node) {
+  X64OperandGenerator g(this);
+
+  // Prepare for C function call.
+  if (call_descriptor->IsCFunctionCall()) {
+    Emit(kArchPrepareCallCFunction | MiscField::encode(static_cast<int>(
+                                         call_descriptor->ParameterCount())),
+         0, nullptr, 0, nullptr);
+
+    // Poke any stack arguments.
+    for (size_t n = 0; n < arguments->size(); ++n) {
+      PushParameter input = (*arguments)[n];
+      if (input.node) {
+        int slot = static_cast<int>(n);
+        InstructionOperand value = g.CanBeImmediate(input.node)
+                                       ? g.UseImmediate(input.node)
+                                       : g.UseRegister(input.node);
+        Emit(kX64Poke | MiscField::encode(slot), g.NoOutput(), value);
+      }
+    }
+  } else {
+    // Push any stack arguments.
+    int effect_level = GetEffectLevel(node);
+    for (PushParameter input : base::Reversed(*arguments)) {
+      // Skip any alignment holes in pushed nodes. We may have one in case of a
+      // Simd128 stack argument.
+      if (input.node == nullptr) continue;
+      if (g.CanBeImmediate(input.node)) {
+        Emit(kX64Push, g.NoOutput(), g.UseImmediate(input.node));
+      } else if (IsSupported(ATOM) ||
+                 sequence()->IsFP(GetVirtualRegister(input.node))) {
+        // TODO(titzer): X64Push cannot handle stack->stack double moves
+        // because there is no way to encode fixed double slots.
+        Emit(kX64Push, g.NoOutput(), g.UseRegister(input.node));
+      } else if (g.CanBeMemoryOperand(kX64Push, node, input.node,
+                                      effect_level)) {
+        InstructionOperand outputs[1];
+        InstructionOperand inputs[4];
+        size_t input_count = 0;
+        InstructionCode opcode = kX64Push;
+        AddressingMode mode = g.GetEffectiveAddressMemoryOperand(
+            input.node, inputs, &input_count);
+        opcode |= AddressingModeField::encode(mode);
+        Emit(opcode, 0, outputs, input_count, inputs);
+      } else {
+        Emit(kX64Push, g.NoOutput(), g.UseAny(input.node));
+      }
+    }
+  }
+}
+
+void InstructionSelector::EmitPrepareResults(
+    ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
+    Node* node) {
+  X64OperandGenerator g(this);
+
+  int reverse_slot = 1;
+  for (PushParameter output : *results) {
+    if (!output.location.IsCallerFrameSlot()) continue;
+    // Skip any alignment holes in nodes.
+    if (output.node != nullptr) {
+      DCHECK(!call_descriptor->IsCFunctionCall());
+      if (output.location.GetType() == MachineType::Float32()) {
+        MarkAsFloat32(output.node);
+      } else if (output.location.GetType() == MachineType::Float64()) {
+        MarkAsFloat64(output.node);
+      } else if (output.location.GetType() == MachineType::Simd128()) {
+        MarkAsSimd128(output.node);
+      }
+      InstructionOperand result = g.DefineAsRegister(output.node);
+      InstructionOperand slot = g.UseImmediate(reverse_slot);
+      Emit(kX64Peek, 1, &result, 1, &slot);
+    }
+    reverse_slot += output.location.GetSizeInPointers();
+  }
+}
+
+bool InstructionSelector::IsTailCallAddressImmediate() { return true; }
+
+int InstructionSelector::GetTempsCountForTailCallFromJSFunction() { return 3; }
+
+namespace {
+
+void VisitCompareWithMemoryOperand(InstructionSelector* selector,
+                                   InstructionCode opcode, Node* left,
+                                   InstructionOperand right,
+                                   FlagsContinuation* cont) {
+  DCHECK_EQ(IrOpcode::kLoad, left->opcode());
+  X64OperandGenerator g(selector);
+  size_t input_count = 0;
+  InstructionOperand inputs[4];
+  AddressingMode addressing_mode =
+      g.GetEffectiveAddressMemoryOperand(left, inputs, &input_count);
+  opcode |= AddressingModeField::encode(addressing_mode);
+  inputs[input_count++] = right;
+
+  selector->EmitWithContinuation(opcode, 0, nullptr, input_count, inputs, cont);
+}
+
+// Shared routine for multiple compare operations.
+void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
+                  InstructionOperand left, InstructionOperand right,
+                  FlagsContinuation* cont) {
+  selector->EmitWithContinuation(opcode, left, right, cont);
+}
+
+// Shared routine for multiple compare operations.
+void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
+                  Node* left, Node* right, FlagsContinuation* cont,
+                  bool commutative) {
+  X64OperandGenerator g(selector);
+  if (commutative && g.CanBeBetterLeftOperand(right)) {
+    std::swap(left, right);
+  }
+  VisitCompare(selector, opcode, g.UseRegister(left), g.Use(right), cont);
+}
+
+MachineType MachineTypeForNarrow(Node* node, Node* hint_node) {
+  if (hint_node->opcode() == IrOpcode::kLoad) {
+    MachineType hint = LoadRepresentationOf(hint_node->op());
+    if (node->opcode() == IrOpcode::kInt32Constant ||
+        node->opcode() == IrOpcode::kInt64Constant) {
+      int64_t constant = node->opcode() == IrOpcode::kInt32Constant
+                             ? OpParameter<int32_t>(node->op())
+                             : OpParameter<int64_t>(node->op());
+      if (hint == MachineType::Int8()) {
+        if (constant >= std::numeric_limits<int8_t>::min() &&
+            constant <= std::numeric_limits<int8_t>::max()) {
+          return hint;
+        }
+      } else if (hint == MachineType::Uint8()) {
+        if (constant >= std::numeric_limits<uint8_t>::min() &&
+            constant <= std::numeric_limits<uint8_t>::max()) {
+          return hint;
+        }
+      } else if (hint == MachineType::Int16()) {
+        if (constant >= std::numeric_limits<int16_t>::min() &&
+            constant <= std::numeric_limits<int16_t>::max()) {
+          return hint;
+        }
+      } else if (hint == MachineType::Uint16()) {
+        if (constant >= std::numeric_limits<uint16_t>::min() &&
+            constant <= std::numeric_limits<uint16_t>::max()) {
+          return hint;
+        }
+      } else if (hint == MachineType::Int32()) {
+        return hint;
+      } else if (hint == MachineType::Uint32()) {
+        if (constant >= 0) return hint;
+      }
+    }
+  }
+  return node->opcode() == IrOpcode::kLoad ? LoadRepresentationOf(node->op())
+                                           : MachineType::None();
+}
+
+// Tries to match the size of the given opcode to that of the operands, if
+// possible.
+InstructionCode TryNarrowOpcodeSize(InstructionCode opcode, Node* left,
+                                    Node* right, FlagsContinuation* cont) {
+  // TODO(epertoso): we can probably get some size information out phi nodes.
+  // If the load representations don't match, both operands will be
+  // zero/sign-extended to 32bit.
+  MachineType left_type = MachineTypeForNarrow(left, right);
+  MachineType right_type = MachineTypeForNarrow(right, left);
+  if (left_type == right_type) {
+    switch (left_type.representation()) {
+      case MachineRepresentation::kBit:
+      case MachineRepresentation::kWord8: {
+        if (opcode == kX64Test32) return kX64Test8;
+        if (opcode == kX64Cmp32) {
+          if (left_type.semantic() == MachineSemantic::kUint32) {
+            cont->OverwriteUnsignedIfSigned();
+          } else {
+            CHECK_EQ(MachineSemantic::kInt32, left_type.semantic());
+          }
+          return kX64Cmp8;
+        }
+        break;
+      }
+      case MachineRepresentation::kWord16:
+        if (opcode == kX64Test32) return kX64Test16;
+        if (opcode == kX64Cmp32) {
+          if (left_type.semantic() == MachineSemantic::kUint32) {
+            cont->OverwriteUnsignedIfSigned();
+          } else {
+            CHECK_EQ(MachineSemantic::kInt32, left_type.semantic());
+          }
+          return kX64Cmp16;
+        }
+        break;
+#ifdef V8_COMPRESS_POINTERS
+      case MachineRepresentation::kTaggedSigned:
+      case MachineRepresentation::kTaggedPointer:
+      case MachineRepresentation::kTagged:
+        // When pointer compression is enabled the lower 32-bits uniquely
+        // identify tagged value.
+        if (opcode == kX64Cmp) return kX64Cmp32;
+        break;
+#endif
+      default:
+        break;
+    }
+  }
+  return opcode;
+}
+
+// Shared routine for multiple word compare operations.
+void VisitWordCompare(InstructionSelector* selector, Node* node,
+                      InstructionCode opcode, FlagsContinuation* cont) {
+  X64OperandGenerator g(selector);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+
+  // The 32-bit comparisons automatically truncate Word64
+  // values to Word32 range, no need to do that explicitly.
+  if (opcode == kX64Cmp32 || opcode == kX64Test32) {
+    if (left->opcode() == IrOpcode::kTruncateInt64ToInt32) {
+      left = left->InputAt(0);
+    }
+
+    if (right->opcode() == IrOpcode::kTruncateInt64ToInt32) {
+      right = right->InputAt(0);
+    }
+  }
+
+  opcode = TryNarrowOpcodeSize(opcode, left, right, cont);
+
+  // If one of the two inputs is an immediate, make sure it's on the right, or
+  // if one of the two inputs is a memory operand, make sure it's on the left.
+  int effect_level = selector->GetEffectLevel(node, cont);
+
+  if ((!g.CanBeImmediate(right) && g.CanBeImmediate(left)) ||
+      (g.CanBeMemoryOperand(opcode, node, right, effect_level) &&
+       !g.CanBeMemoryOperand(opcode, node, left, effect_level))) {
+    if (!node->op()->HasProperty(Operator::kCommutative)) cont->Commute();
+    std::swap(left, right);
+  }
+
+  // Match immediates on right side of comparison.
+  if (g.CanBeImmediate(right)) {
+    if (g.CanBeMemoryOperand(opcode, node, left, effect_level)) {
+      return VisitCompareWithMemoryOperand(selector, opcode, left,
+                                           g.UseImmediate(right), cont);
+    }
+    return VisitCompare(selector, opcode, g.Use(left), g.UseImmediate(right),
+                        cont);
+  }
+
+  // Match memory operands on left side of comparison.
+  if (g.CanBeMemoryOperand(opcode, node, left, effect_level)) {
+    return VisitCompareWithMemoryOperand(selector, opcode, left,
+                                         g.UseRegister(right), cont);
+  }
+
+  return VisitCompare(selector, opcode, left, right, cont,
+                      node->op()->HasProperty(Operator::kCommutative));
+}
+
+void VisitWord64EqualImpl(InstructionSelector* selector, Node* node,
+                          FlagsContinuation* cont) {
+  if (selector->CanUseRootsRegister()) {
+    X64OperandGenerator g(selector);
+    const RootsTable& roots_table = selector->isolate()->roots_table();
+    RootIndex root_index;
+    HeapObjectBinopMatcher m(node);
+    if (m.right().HasResolvedValue() &&
+        roots_table.IsRootHandle(m.right().ResolvedValue(), &root_index)) {
+      InstructionCode opcode =
+          kX64Cmp | AddressingModeField::encode(kMode_Root);
+      return VisitCompare(
+          selector, opcode,
+          g.TempImmediate(
+              TurboAssemblerBase::RootRegisterOffsetForRootIndex(root_index)),
+          g.UseRegister(m.left().node()), cont);
+    }
+  }
+  VisitWordCompare(selector, node, kX64Cmp, cont);
+}
+
+void VisitWord32EqualImpl(InstructionSelector* selector, Node* node,
+                          FlagsContinuation* cont) {
+  if (COMPRESS_POINTERS_BOOL && selector->CanUseRootsRegister()) {
+    X64OperandGenerator g(selector);
+    const RootsTable& roots_table = selector->isolate()->roots_table();
+    RootIndex root_index;
+    Node* left = nullptr;
+    Handle<HeapObject> right;
+    // HeapConstants and CompressedHeapConstants can be treated the same when
+    // using them as an input to a 32-bit comparison. Check whether either is
+    // present.
+    {
+      CompressedHeapObjectBinopMatcher m(node);
+      if (m.right().HasResolvedValue()) {
+        left = m.left().node();
+        right = m.right().ResolvedValue();
+      } else {
+        HeapObjectBinopMatcher m2(node);
+        if (m2.right().HasResolvedValue()) {
+          left = m2.left().node();
+          right = m2.right().ResolvedValue();
+        }
+      }
+    }
+    if (!right.is_null() && roots_table.IsRootHandle(right, &root_index)) {
+      DCHECK_NE(left, nullptr);
+      InstructionCode opcode =
+          kX64Cmp32 | AddressingModeField::encode(kMode_Root);
+      return VisitCompare(
+          selector, opcode,
+          g.TempImmediate(
+              TurboAssemblerBase::RootRegisterOffsetForRootIndex(root_index)),
+          g.UseRegister(left), cont);
+    }
+  }
+  VisitWordCompare(selector, node, kX64Cmp32, cont);
+}
+
+// Shared routine for comparison with zero.
+void VisitCompareZero(InstructionSelector* selector, Node* user, Node* node,
+                      InstructionCode opcode, FlagsContinuation* cont) {
+  X64OperandGenerator g(selector);
+  if (cont->IsBranch() &&
+      (cont->condition() == kNotEqual || cont->condition() == kEqual)) {
+    switch (node->opcode()) {
+#define FLAGS_SET_BINOP_LIST(V)        \
+  V(kInt32Add, VisitBinop, kX64Add32)  \
+  V(kInt32Sub, VisitBinop, kX64Sub32)  \
+  V(kWord32And, VisitBinop, kX64And32) \
+  V(kWord32Or, VisitBinop, kX64Or32)   \
+  V(kInt64Add, VisitBinop, kX64Add)    \
+  V(kInt64Sub, VisitBinop, kX64Sub)    \
+  V(kWord64And, VisitBinop, kX64And)   \
+  V(kWord64Or, VisitBinop, kX64Or)
+#define FLAGS_SET_BINOP(opcode, Visit, archOpcode)           \
+  case IrOpcode::opcode:                                     \
+    if (selector->IsOnlyUserOfNodeInSameBlock(user, node)) { \
+      return Visit(selector, node, archOpcode, cont);        \
+    }                                                        \
+    break;
+      FLAGS_SET_BINOP_LIST(FLAGS_SET_BINOP)
+#undef FLAGS_SET_BINOP_LIST
+#undef FLAGS_SET_BINOP
+
+#define TRY_VISIT_WORD32_SHIFT TryVisitWordShift<Int32BinopMatcher, 32>
+#define TRY_VISIT_WORD64_SHIFT TryVisitWordShift<Int64BinopMatcher, 64>
+// Skip Word64Sar/Word32Sar since no instruction reduction in most cases.
+#define FLAGS_SET_SHIFT_LIST(V)                    \
+  V(kWord32Shl, TRY_VISIT_WORD32_SHIFT, kX64Shl32) \
+  V(kWord32Shr, TRY_VISIT_WORD32_SHIFT, kX64Shr32) \
+  V(kWord64Shl, TRY_VISIT_WORD64_SHIFT, kX64Shl)   \
+  V(kWord64Shr, TRY_VISIT_WORD64_SHIFT, kX64Shr)
+#define FLAGS_SET_SHIFT(opcode, TryVisit, archOpcode)         \
+  case IrOpcode::opcode:                                      \
+    if (selector->IsOnlyUserOfNodeInSameBlock(user, node)) {  \
+      if (TryVisit(selector, node, archOpcode, cont)) return; \
+    }                                                         \
+    break;
+      FLAGS_SET_SHIFT_LIST(FLAGS_SET_SHIFT)
+#undef TRY_VISIT_WORD32_SHIFT
+#undef TRY_VISIT_WORD64_SHIFT
+#undef FLAGS_SET_SHIFT_LIST
+#undef FLAGS_SET_SHIFT
+      default:
+        break;
+    }
+  }
+  int effect_level = selector->GetEffectLevel(node, cont);
+  if (node->opcode() == IrOpcode::kLoad) {
+    switch (LoadRepresentationOf(node->op()).representation()) {
+      case MachineRepresentation::kWord8:
+        if (opcode == kX64Cmp32) {
+          opcode = kX64Cmp8;
+        } else if (opcode == kX64Test32) {
+          opcode = kX64Test8;
+        }
+        break;
+      case MachineRepresentation::kWord16:
+        if (opcode == kX64Cmp32) {
+          opcode = kX64Cmp16;
+        } else if (opcode == kX64Test32) {
+          opcode = kX64Test16;
+        }
+        break;
+      default:
+        break;
+    }
+  }
+  if (g.CanBeMemoryOperand(opcode, user, node, effect_level)) {
+    VisitCompareWithMemoryOperand(selector, opcode, node, g.TempImmediate(0),
+                                  cont);
+  } else {
+    VisitCompare(selector, opcode, g.Use(node), g.TempImmediate(0), cont);
+  }
+}
+
+// Shared routine for multiple float32 compare operations (inputs commuted).
+void VisitFloat32Compare(InstructionSelector* selector, Node* node,
+                         FlagsContinuation* cont) {
+  Node* const left = node->InputAt(0);
+  Node* const right = node->InputAt(1);
+  InstructionCode const opcode =
+      selector->IsSupported(AVX) ? kAVXFloat32Cmp : kSSEFloat32Cmp;
+  VisitCompare(selector, opcode, right, left, cont, false);
+}
+
+// Shared routine for multiple float64 compare operations (inputs commuted).
+void VisitFloat64Compare(InstructionSelector* selector, Node* node,
+                         FlagsContinuation* cont) {
+  Node* const left = node->InputAt(0);
+  Node* const right = node->InputAt(1);
+  InstructionCode const opcode =
+      selector->IsSupported(AVX) ? kAVXFloat64Cmp : kSSEFloat64Cmp;
+  VisitCompare(selector, opcode, right, left, cont, false);
+}
+
+// Shared routine for Word32/Word64 Atomic Binops
+void VisitAtomicBinop(InstructionSelector* selector, Node* node,
+                      ArchOpcode opcode) {
+  X64OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  AddressingMode addressing_mode;
+  InstructionOperand inputs[] = {
+      g.UseUniqueRegister(value), g.UseUniqueRegister(base),
+      g.GetEffectiveIndexOperand(index, &addressing_mode)};
+  InstructionOperand outputs[] = {g.DefineAsFixed(node, rax)};
+  InstructionOperand temps[] = {g.TempRegister()};
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs,
+                 arraysize(temps), temps);
+}
+
+// Shared routine for Word32/Word64 Atomic CmpExchg
+void VisitAtomicCompareExchange(InstructionSelector* selector, Node* node,
+                                ArchOpcode opcode) {
+  X64OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* old_value = node->InputAt(2);
+  Node* new_value = node->InputAt(3);
+  AddressingMode addressing_mode;
+  InstructionOperand inputs[] = {
+      g.UseFixed(old_value, rax), g.UseUniqueRegister(new_value),
+      g.UseUniqueRegister(base),
+      g.GetEffectiveIndexOperand(index, &addressing_mode)};
+  InstructionOperand outputs[] = {g.DefineAsFixed(node, rax)};
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs);
+}
+
+// Shared routine for Word32/Word64 Atomic Exchange
+void VisitAtomicExchange(InstructionSelector* selector, Node* node,
+                         ArchOpcode opcode) {
+  X64OperandGenerator g(selector);
+  Node* base = node->InputAt(0);
+  Node* index = node->InputAt(1);
+  Node* value = node->InputAt(2);
+  AddressingMode addressing_mode;
+  InstructionOperand inputs[] = {
+      g.UseUniqueRegister(value), g.UseUniqueRegister(base),
+      g.GetEffectiveIndexOperand(index, &addressing_mode)};
+  InstructionOperand outputs[] = {g.DefineSameAsFirst(node)};
+  InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
+  selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs);
+}
+
+}  // namespace
+
+// Shared routine for word comparison against zero.
+void InstructionSelector::VisitWordCompareZero(Node* user, Node* value,
+                                               FlagsContinuation* cont) {
+  // Try to combine with comparisons against 0 by simply inverting the branch.
+  while (value->opcode() == IrOpcode::kWord32Equal && CanCover(user, value)) {
+    Int32BinopMatcher m(value);
+    if (!m.right().Is(0)) break;
+
+    user = value;
+    value = m.left().node();
+    cont->Negate();
+  }
+
+  if (CanCover(user, value)) {
+    switch (value->opcode()) {
+      case IrOpcode::kWord32Equal:
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        return VisitWord32EqualImpl(this, value, cont);
+      case IrOpcode::kInt32LessThan:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThan);
+        return VisitWordCompare(this, value, kX64Cmp32, cont);
+      case IrOpcode::kInt32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
+        return VisitWordCompare(this, value, kX64Cmp32, cont);
+      case IrOpcode::kUint32LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitWordCompare(this, value, kX64Cmp32, cont);
+      case IrOpcode::kUint32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitWordCompare(this, value, kX64Cmp32, cont);
+      case IrOpcode::kWord64Equal: {
+        cont->OverwriteAndNegateIfEqual(kEqual);
+        Int64BinopMatcher m(value);
+        if (m.right().Is(0)) {
+          // Try to combine the branch with a comparison.
+          Node* const user = m.node();
+          Node* const value = m.left().node();
+          if (CanCover(user, value)) {
+            switch (value->opcode()) {
+              case IrOpcode::kInt64Sub:
+                return VisitWordCompare(this, value, kX64Cmp, cont);
+              case IrOpcode::kWord64And:
+                return VisitWordCompare(this, value, kX64Test, cont);
+              default:
+                break;
+            }
+          }
+          return VisitCompareZero(this, user, value, kX64Cmp, cont);
+        }
+        return VisitWord64EqualImpl(this, value, cont);
+      }
+      case IrOpcode::kInt64LessThan:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThan);
+        return VisitWordCompare(this, value, kX64Cmp, cont);
+      case IrOpcode::kInt64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
+        return VisitWordCompare(this, value, kX64Cmp, cont);
+      case IrOpcode::kUint64LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
+        return VisitWordCompare(this, value, kX64Cmp, cont);
+      case IrOpcode::kUint64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
+        return VisitWordCompare(this, value, kX64Cmp, cont);
+      case IrOpcode::kFloat32Equal:
+        cont->OverwriteAndNegateIfEqual(kUnorderedEqual);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat32LessThan:
+        cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThan);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat32LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThanOrEqual);
+        return VisitFloat32Compare(this, value, cont);
+      case IrOpcode::kFloat64Equal:
+        cont->OverwriteAndNegateIfEqual(kUnorderedEqual);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kFloat64LessThan: {
+        Float64BinopMatcher m(value);
+        if (m.left().Is(0.0) && m.right().IsFloat64Abs()) {
+          // This matches the pattern
+          //
+          //   Float64LessThan(#0.0, Float64Abs(x))
+          //
+          // which TurboFan generates for NumberToBoolean in the general case,
+          // and which evaluates to false if x is 0, -0 or NaN. We can compile
+          // this to a simple (v)ucomisd using not_equal flags condition, which
+          // avoids the costly Float64Abs.
+          cont->OverwriteAndNegateIfEqual(kNotEqual);
+          InstructionCode const opcode =
+              IsSupported(AVX) ? kAVXFloat64Cmp : kSSEFloat64Cmp;
+          return VisitCompare(this, opcode, m.left().node(),
+                              m.right().InputAt(0), cont, false);
+        }
+        cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThan);
+        return VisitFloat64Compare(this, value, cont);
+      }
+      case IrOpcode::kFloat64LessThanOrEqual:
+        cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThanOrEqual);
+        return VisitFloat64Compare(this, value, cont);
+      case IrOpcode::kProjection:
+        // Check if this is the overflow output projection of an
+        // <Operation>WithOverflow node.
+        if (ProjectionIndexOf(value->op()) == 1u) {
+          // We cannot combine the <Operation>WithOverflow with this branch
+          // unless the 0th projection (the use of the actual value of the
+          // <Operation> is either nullptr, which means there's no use of the
+          // actual value, or was already defined, which means it is scheduled
+          // *AFTER* this branch).
+          Node* const node = value->InputAt(0);
+          Node* const result = NodeProperties::FindProjection(node, 0);
+          if (result == nullptr || IsDefined(result)) {
+            switch (node->opcode()) {
+              case IrOpcode::kInt32AddWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kX64Add32, cont);
+              case IrOpcode::kInt32SubWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kX64Sub32, cont);
+              case IrOpcode::kInt32MulWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kX64Imul32, cont);
+              case IrOpcode::kInt64AddWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kX64Add, cont);
+              case IrOpcode::kInt64SubWithOverflow:
+                cont->OverwriteAndNegateIfEqual(kOverflow);
+                return VisitBinop(this, node, kX64Sub, cont);
+              default:
+                break;
+            }
+          }
+        }
+        break;
+      case IrOpcode::kInt32Sub:
+        return VisitWordCompare(this, value, kX64Cmp32, cont);
+      case IrOpcode::kWord32And:
+        return VisitWordCompare(this, value, kX64Test32, cont);
+      case IrOpcode::kStackPointerGreaterThan:
+        cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition);
+        return VisitStackPointerGreaterThan(value, cont);
+      default:
+        break;
+    }
+  }
+
+  // Branch could not be combined with a compare, emit compare against 0.
+  VisitCompareZero(this, user, value, kX64Cmp32, cont);
+}
+
+void InstructionSelector::VisitSwitch(Node* node, const SwitchInfo& sw) {
+  X64OperandGenerator g(this);
+  InstructionOperand value_operand = g.UseRegister(node->InputAt(0));
+
+  // Emit either ArchTableSwitch or ArchBinarySearchSwitch.
+  if (enable_switch_jump_table_ == kEnableSwitchJumpTable) {
+    static const size_t kMaxTableSwitchValueRange = 2 << 16;
+    size_t table_space_cost = 4 + sw.value_range();
+    size_t table_time_cost = 3;
+    size_t lookup_space_cost = 3 + 2 * sw.case_count();
+    size_t lookup_time_cost = sw.case_count();
+    if (sw.case_count() > 4 &&
+        table_space_cost + 3 * table_time_cost <=
+            lookup_space_cost + 3 * lookup_time_cost &&
+        sw.min_value() > std::numeric_limits<int32_t>::min() &&
+        sw.value_range() <= kMaxTableSwitchValueRange) {
+      InstructionOperand index_operand = g.TempRegister();
+      if (sw.min_value()) {
+        // The leal automatically zero extends, so result is a valid 64-bit
+        // index.
+        Emit(kX64Lea32 | AddressingModeField::encode(kMode_MRI), index_operand,
+             value_operand, g.TempImmediate(-sw.min_value()));
+      } else {
+        // Zero extend, because we use it as 64-bit index into the jump table.
+        if (ZeroExtendsWord32ToWord64(node->InputAt(0))) {
+          // Input value has already been zero-extended.
+          index_operand = value_operand;
+        } else {
+          Emit(kX64Movl, index_operand, value_operand);
+        }
+      }
+      // Generate a table lookup.
+      return EmitTableSwitch(sw, index_operand);
+    }
+  }
+
+  // Generate a tree of conditional jumps.
+  return EmitBinarySearchSwitch(sw, value_operand);
+}
+
+void InstructionSelector::VisitWord32Equal(Node* const node) {
+  Node* user = node;
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  Int32BinopMatcher m(user);
+  if (m.right().Is(0)) {
+    return VisitWordCompareZero(m.node(), m.left().node(), &cont);
+  }
+  VisitWord32EqualImpl(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
+  VisitWordCompare(this, node, kX64Cmp32, &cont);
+}
+
+void InstructionSelector::VisitInt32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
+  VisitWordCompare(this, node, kX64Cmp32, &cont);
+}
+
+void InstructionSelector::VisitUint32LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitWordCompare(this, node, kX64Cmp32, &cont);
+}
+
+void InstructionSelector::VisitUint32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitWordCompare(this, node, kX64Cmp32, &cont);
+}
+
+void InstructionSelector::VisitWord64Equal(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
+  Int64BinopMatcher m(node);
+  if (m.right().Is(0)) {
+    // Try to combine the equality check with a comparison.
+    Node* const user = m.node();
+    Node* const value = m.left().node();
+    if (CanCover(user, value)) {
+      switch (value->opcode()) {
+        case IrOpcode::kInt64Sub:
+          return VisitWordCompare(this, value, kX64Cmp, &cont);
+        case IrOpcode::kWord64And:
+          return VisitWordCompare(this, value, kX64Test, &cont);
+        default:
+          break;
+      }
+    }
+  }
+  VisitWord64EqualImpl(this, node, &cont);
+}
+
+void InstructionSelector::VisitInt32AddWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kX64Add32, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kX64Add32, &cont);
+}
+
+void InstructionSelector::VisitInt32SubWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
+    return VisitBinop(this, node, kX64Sub32, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kX64Sub32, &cont);
+}
+
+void InstructionSelector::VisitInt64LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
+  VisitWordCompare(this, node, kX64Cmp, &cont);
+}
+
+void InstructionSelector::VisitInt64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
+  VisitWordCompare(this, node, kX64Cmp, &cont);
+}
+
+void InstructionSelector::VisitUint64LessThan(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
+  VisitWordCompare(this, node, kX64Cmp, &cont);
+}
+
+void InstructionSelector::VisitUint64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
+  VisitWordCompare(this, node, kX64Cmp, &cont);
+}
+
+void InstructionSelector::VisitFloat32Equal(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnorderedEqual, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32LessThan(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedGreaterThan, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat32LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedGreaterThanOrEqual, node);
+  VisitFloat32Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64Equal(Node* node) {
+  FlagsContinuation cont = FlagsContinuation::ForSet(kUnorderedEqual, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64LessThan(Node* node) {
+  Float64BinopMatcher m(node);
+  if (m.left().Is(0.0) && m.right().IsFloat64Abs()) {
+    // This matches the pattern
+    //
+    //   Float64LessThan(#0.0, Float64Abs(x))
+    //
+    // which TurboFan generates for NumberToBoolean in the general case,
+    // and which evaluates to false if x is 0, -0 or NaN. We can compile
+    // this to a simple (v)ucomisd using not_equal flags condition, which
+    // avoids the costly Float64Abs.
+    FlagsContinuation cont = FlagsContinuation::ForSet(kNotEqual, node);
+    InstructionCode const opcode =
+        IsSupported(AVX) ? kAVXFloat64Cmp : kSSEFloat64Cmp;
+    return VisitCompare(this, opcode, m.left().node(), m.right().InputAt(0),
+                        &cont, false);
+  }
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedGreaterThan, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64LessThanOrEqual(Node* node) {
+  FlagsContinuation cont =
+      FlagsContinuation::ForSet(kUnsignedGreaterThanOrEqual, node);
+  VisitFloat64Compare(this, node, &cont);
+}
+
+void InstructionSelector::VisitFloat64InsertLowWord32(Node* node) {
+  X64OperandGenerator g(this);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  Float64Matcher mleft(left);
+  if (mleft.HasResolvedValue() &&
+      (bit_cast<uint64_t>(mleft.ResolvedValue()) >> 32) == 0u) {
+    Emit(kSSEFloat64LoadLowWord32, g.DefineAsRegister(node), g.Use(right));
+    return;
+  }
+  Emit(kSSEFloat64InsertLowWord32, g.DefineSameAsFirst(node),
+       g.UseRegister(left), g.Use(right));
+}
+
+void InstructionSelector::VisitFloat64InsertHighWord32(Node* node) {
+  X64OperandGenerator g(this);
+  Node* left = node->InputAt(0);
+  Node* right = node->InputAt(1);
+  Emit(kSSEFloat64InsertHighWord32, g.DefineSameAsFirst(node),
+       g.UseRegister(left), g.Use(right));
+}
+
+void InstructionSelector::VisitFloat64SilenceNaN(Node* node) {
+  X64OperandGenerator g(this);
+  Emit(kSSEFloat64SilenceNaN, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitMemoryBarrier(Node* node) {
+  X64OperandGenerator g(this);
+  Emit(kX64MFence, g.NoOutput());
+}
+
+void InstructionSelector::VisitWord32AtomicLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  DCHECK(load_rep.representation() == MachineRepresentation::kWord8 ||
+         load_rep.representation() == MachineRepresentation::kWord16 ||
+         load_rep.representation() == MachineRepresentation::kWord32);
+  USE(load_rep);
+  VisitLoad(node);
+}
+
+void InstructionSelector::VisitWord64AtomicLoad(Node* node) {
+  LoadRepresentation load_rep = LoadRepresentationOf(node->op());
+  USE(load_rep);
+  VisitLoad(node);
+}
+
+void InstructionSelector::VisitWord32AtomicStore(Node* node) {
+  MachineRepresentation rep = AtomicStoreRepresentationOf(node->op());
+  ArchOpcode opcode;
+  switch (rep) {
+    case MachineRepresentation::kWord8:
+      opcode = kWord32AtomicExchangeInt8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = kWord32AtomicExchangeInt16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kWord32AtomicExchangeWord32;
+      break;
+    default:
+      UNREACHABLE();
+  }
+  VisitAtomicExchange(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord64AtomicStore(Node* node) {
+  MachineRepresentation rep = AtomicStoreRepresentationOf(node->op());
+  ArchOpcode opcode;
+  switch (rep) {
+    case MachineRepresentation::kWord8:
+      opcode = kX64Word64AtomicExchangeUint8;
+      break;
+    case MachineRepresentation::kWord16:
+      opcode = kX64Word64AtomicExchangeUint16;
+      break;
+    case MachineRepresentation::kWord32:
+      opcode = kX64Word64AtomicExchangeUint32;
+      break;
+    case MachineRepresentation::kWord64:
+      opcode = kX64Word64AtomicExchangeUint64;
+      break;
+    default:
+      UNREACHABLE();
+  }
+  VisitAtomicExchange(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord32AtomicExchange(Node* node) {
+  MachineType type = AtomicOpType(node->op());
+  ArchOpcode opcode;
+  if (type == MachineType::Int8()) {
+    opcode = kWord32AtomicExchangeInt8;
+  } else if (type == MachineType::Uint8()) {
+    opcode = kWord32AtomicExchangeUint8;
+  } else if (type == MachineType::Int16()) {
+    opcode = kWord32AtomicExchangeInt16;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kWord32AtomicExchangeUint16;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = kWord32AtomicExchangeWord32;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicExchange(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord64AtomicExchange(Node* node) {
+  MachineType type = AtomicOpType(node->op());
+  ArchOpcode opcode;
+  if (type == MachineType::Uint8()) {
+    opcode = kX64Word64AtomicExchangeUint8;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kX64Word64AtomicExchangeUint16;
+  } else if (type == MachineType::Uint32()) {
+    opcode = kX64Word64AtomicExchangeUint32;
+  } else if (type == MachineType::Uint64()) {
+    opcode = kX64Word64AtomicExchangeUint64;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicExchange(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord32AtomicCompareExchange(Node* node) {
+  MachineType type = AtomicOpType(node->op());
+  ArchOpcode opcode;
+  if (type == MachineType::Int8()) {
+    opcode = kWord32AtomicCompareExchangeInt8;
+  } else if (type == MachineType::Uint8()) {
+    opcode = kWord32AtomicCompareExchangeUint8;
+  } else if (type == MachineType::Int16()) {
+    opcode = kWord32AtomicCompareExchangeInt16;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kWord32AtomicCompareExchangeUint16;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = kWord32AtomicCompareExchangeWord32;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicCompareExchange(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
+  MachineType type = AtomicOpType(node->op());
+  ArchOpcode opcode;
+  if (type == MachineType::Uint8()) {
+    opcode = kX64Word64AtomicCompareExchangeUint8;
+  } else if (type == MachineType::Uint16()) {
+    opcode = kX64Word64AtomicCompareExchangeUint16;
+  } else if (type == MachineType::Uint32()) {
+    opcode = kX64Word64AtomicCompareExchangeUint32;
+  } else if (type == MachineType::Uint64()) {
+    opcode = kX64Word64AtomicCompareExchangeUint64;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicCompareExchange(this, node, opcode);
+}
+
+void InstructionSelector::VisitWord32AtomicBinaryOperation(
+    Node* node, ArchOpcode int8_op, ArchOpcode uint8_op, ArchOpcode int16_op,
+    ArchOpcode uint16_op, ArchOpcode word32_op) {
+  MachineType type = AtomicOpType(node->op());
+  ArchOpcode opcode;
+  if (type == MachineType::Int8()) {
+    opcode = int8_op;
+  } else if (type == MachineType::Uint8()) {
+    opcode = uint8_op;
+  } else if (type == MachineType::Int16()) {
+    opcode = int16_op;
+  } else if (type == MachineType::Uint16()) {
+    opcode = uint16_op;
+  } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
+    opcode = word32_op;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicBinop(this, node, opcode);
+}
+
+#define VISIT_ATOMIC_BINOP(op)                                   \
+  void InstructionSelector::VisitWord32Atomic##op(Node* node) {  \
+    VisitWord32AtomicBinaryOperation(                            \
+        node, kWord32Atomic##op##Int8, kWord32Atomic##op##Uint8, \
+        kWord32Atomic##op##Int16, kWord32Atomic##op##Uint16,     \
+        kWord32Atomic##op##Word32);                              \
+  }
+VISIT_ATOMIC_BINOP(Add)
+VISIT_ATOMIC_BINOP(Sub)
+VISIT_ATOMIC_BINOP(And)
+VISIT_ATOMIC_BINOP(Or)
+VISIT_ATOMIC_BINOP(Xor)
+#undef VISIT_ATOMIC_BINOP
+
+void InstructionSelector::VisitWord64AtomicBinaryOperation(
+    Node* node, ArchOpcode uint8_op, ArchOpcode uint16_op, ArchOpcode uint32_op,
+    ArchOpcode word64_op) {
+  MachineType type = AtomicOpType(node->op());
+  ArchOpcode opcode;
+  if (type == MachineType::Uint8()) {
+    opcode = uint8_op;
+  } else if (type == MachineType::Uint16()) {
+    opcode = uint16_op;
+  } else if (type == MachineType::Uint32()) {
+    opcode = uint32_op;
+  } else if (type == MachineType::Uint64()) {
+    opcode = word64_op;
+  } else {
+    UNREACHABLE();
+  }
+  VisitAtomicBinop(this, node, opcode);
+}
+
+#define VISIT_ATOMIC_BINOP(op)                                           \
+  void InstructionSelector::VisitWord64Atomic##op(Node* node) {          \
+    VisitWord64AtomicBinaryOperation(                                    \
+        node, kX64Word64Atomic##op##Uint8, kX64Word64Atomic##op##Uint16, \
+        kX64Word64Atomic##op##Uint32, kX64Word64Atomic##op##Uint64);     \
+  }
+VISIT_ATOMIC_BINOP(Add)
+VISIT_ATOMIC_BINOP(Sub)
+VISIT_ATOMIC_BINOP(And)
+VISIT_ATOMIC_BINOP(Or)
+VISIT_ATOMIC_BINOP(Xor)
+#undef VISIT_ATOMIC_BINOP
+
+#define SIMD_BINOP_SSE_AVX_LIST(V) \
+  V(F64x2Add)                      \
+  V(F64x2Sub)                      \
+  V(F64x2Mul)                      \
+  V(F64x2Div)                      \
+  V(F64x2Eq)                       \
+  V(F64x2Ne)                       \
+  V(F64x2Lt)                       \
+  V(F64x2Le)                       \
+  V(F32x4Add)                      \
+  V(F32x4Sub)                      \
+  V(F32x4Mul)                      \
+  V(F32x4Div)                      \
+  V(F32x4Eq)                       \
+  V(F32x4Ne)                       \
+  V(F32x4Lt)                       \
+  V(F32x4Le)                       \
+  V(I64x2Add)                      \
+  V(I64x2Sub)                      \
+  V(I64x2Eq)                       \
+  V(I32x4Add)                      \
+  V(I32x4AddHoriz)                 \
+  V(I32x4Sub)                      \
+  V(I32x4Mul)                      \
+  V(I32x4MinS)                     \
+  V(I32x4MaxS)                     \
+  V(I32x4Eq)                       \
+  V(I32x4GtS)                      \
+  V(I32x4MinU)                     \
+  V(I32x4MaxU)                     \
+  V(I32x4DotI16x8S)                \
+  V(I16x8SConvertI32x4)            \
+  V(I16x8UConvertI32x4)            \
+  V(I16x8Add)                      \
+  V(I16x8AddSatS)                  \
+  V(I16x8AddHoriz)                 \
+  V(I16x8Sub)                      \
+  V(I16x8SubSatS)                  \
+  V(I16x8Mul)                      \
+  V(I16x8MinS)                     \
+  V(I16x8MaxS)                     \
+  V(I16x8Eq)                       \
+  V(I16x8GtS)                      \
+  V(I16x8AddSatU)                  \
+  V(I16x8SubSatU)                  \
+  V(I16x8MinU)                     \
+  V(I16x8MaxU)                     \
+  V(I16x8RoundingAverageU)         \
+  V(I8x16SConvertI16x8)            \
+  V(I8x16UConvertI16x8)            \
+  V(I8x16Add)                      \
+  V(I8x16AddSatS)                  \
+  V(I8x16Sub)                      \
+  V(I8x16SubSatS)                  \
+  V(I8x16MinS)                     \
+  V(I8x16MaxS)                     \
+  V(I8x16Eq)                       \
+  V(I8x16GtS)                      \
+  V(I8x16AddSatU)                  \
+  V(I8x16SubSatU)                  \
+  V(I8x16MinU)                     \
+  V(I8x16MaxU)                     \
+  V(I8x16RoundingAverageU)         \
+  V(S128And)                       \
+  V(S128Or)                        \
+  V(S128Xor)
+
+#define SIMD_BINOP_LIST(V) \
+  V(F64x2Min)              \
+  V(F64x2Max)              \
+  V(F32x4AddHoriz)         \
+  V(F32x4Min)              \
+  V(F32x4Max)              \
+  V(I32x4GeS)              \
+  V(I32x4GeU)              \
+  V(I16x8GeS)              \
+  V(I16x8GeU)              \
+  V(I8x16GeS)              \
+  V(I8x16GeU)
+
+#define SIMD_BINOP_ONE_TEMP_LIST(V) \
+  V(I32x4Ne)                        \
+  V(I32x4GtU)                       \
+  V(I16x8Ne)                        \
+  V(I16x8GtU)                       \
+  V(I8x16Ne)                        \
+  V(I8x16GtU)
+
+#define SIMD_UNOP_LIST(V)   \
+  V(F64x2Sqrt)              \
+  V(F32x4SConvertI32x4)     \
+  V(F32x4Abs)               \
+  V(F32x4Neg)               \
+  V(F32x4Sqrt)              \
+  V(F32x4RecipApprox)       \
+  V(F32x4RecipSqrtApprox)   \
+  V(I64x2Neg)               \
+  V(I64x2BitMask)           \
+  V(I32x4SConvertI16x8Low)  \
+  V(I32x4SConvertI16x8High) \
+  V(I32x4Neg)               \
+  V(I32x4UConvertI16x8Low)  \
+  V(I32x4UConvertI16x8High) \
+  V(I32x4Abs)               \
+  V(I32x4BitMask)           \
+  V(I16x8SConvertI8x16Low)  \
+  V(I16x8SConvertI8x16High) \
+  V(I16x8Neg)               \
+  V(I16x8UConvertI8x16Low)  \
+  V(I16x8UConvertI8x16High) \
+  V(I16x8Abs)               \
+  V(I8x16Neg)               \
+  V(I8x16Abs)               \
+  V(I8x16BitMask)           \
+  V(S128Not)
+
+#define SIMD_SHIFT_OPCODES(V) \
+  V(I64x2Shl)                 \
+  V(I64x2ShrU)                \
+  V(I32x4Shl)                 \
+  V(I32x4ShrS)                \
+  V(I32x4ShrU)                \
+  V(I16x8Shl)                 \
+  V(I16x8ShrS)                \
+  V(I16x8ShrU)
+
+#define SIMD_NARROW_SHIFT_OPCODES(V) \
+  V(I8x16Shl)                        \
+  V(I8x16ShrU)
+
+#define SIMD_ANYTRUE_LIST(V) \
+  V(V32x4AnyTrue)            \
+  V(V16x8AnyTrue)            \
+  V(V8x16AnyTrue)
+
+#define SIMD_ALLTRUE_LIST(V) \
+  V(V32x4AllTrue)            \
+  V(V16x8AllTrue)            \
+  V(V8x16AllTrue)
+
+void InstructionSelector::VisitS128Const(Node* node) {
+  X64OperandGenerator g(this);
+  static const int kUint32Immediates = kSimd128Size / sizeof(uint32_t);
+  uint32_t val[kUint32Immediates];
+  memcpy(val, S128ImmediateParameterOf(node->op()).data(), kSimd128Size);
+  // If all bytes are zeros or ones, avoid emitting code for generic constants
+  bool all_zeros = !(val[0] || val[1] || val[2] || val[3]);
+  bool all_ones = val[0] == UINT32_MAX && val[1] == UINT32_MAX &&
+                  val[2] == UINT32_MAX && val[3] == UINT32_MAX;
+  InstructionOperand dst = g.DefineAsRegister(node);
+  if (all_zeros) {
+    Emit(kX64S128Zero, dst);
+  } else if (all_ones) {
+    Emit(kX64S128AllOnes, dst);
+  } else {
+    Emit(kX64S128Const, dst, g.UseImmediate(val[0]), g.UseImmediate(val[1]),
+         g.UseImmediate(val[2]), g.UseImmediate(val[3]));
+  }
+}
+
+void InstructionSelector::VisitS128Zero(Node* node) {
+  X64OperandGenerator g(this);
+  Emit(kX64S128Zero, g.DefineAsRegister(node));
+}
+
+#define SIMD_TYPES_FOR_SPLAT(V) \
+  V(I64x2)                      \
+  V(I32x4)                      \
+  V(I16x8)                      \
+  V(I8x16)
+
+// Splat with an optimization for const 0.
+#define VISIT_SIMD_SPLAT(Type)                                               \
+  void InstructionSelector::Visit##Type##Splat(Node* node) {                 \
+    X64OperandGenerator g(this);                                             \
+    Node* input = node->InputAt(0);                                          \
+    if (g.CanBeImmediate(input) && g.GetImmediateIntegerValue(input) == 0) { \
+      Emit(kX64S128Zero, g.DefineAsRegister(node));                          \
+    } else {                                                                 \
+      Emit(kX64##Type##Splat, g.DefineAsRegister(node), g.Use(input));       \
+    }                                                                        \
+  }
+SIMD_TYPES_FOR_SPLAT(VISIT_SIMD_SPLAT)
+#undef VISIT_SIMD_SPLAT
+#undef SIMD_TYPES_FOR_SPLAT
+
+void InstructionSelector::VisitF64x2Splat(Node* node) {
+  X64OperandGenerator g(this);
+  Emit(kX64F64x2Splat, g.DefineAsRegister(node), g.Use(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitF32x4Splat(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand dst =
+      IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
+  Emit(kX64F32x4Splat, dst, g.UseRegister(node->InputAt(0)));
+}
+
+#define SIMD_VISIT_EXTRACT_LANE(Type, Sign, Op)                               \
+  void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) {      \
+    X64OperandGenerator g(this);                                              \
+    int32_t lane = OpParameter<int32_t>(node->op());                          \
+    Emit(kX64##Op, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)), \
+         g.UseImmediate(lane));                                               \
+  }
+SIMD_VISIT_EXTRACT_LANE(F64x2, , F64x2ExtractLane)
+SIMD_VISIT_EXTRACT_LANE(F32x4, , F32x4ExtractLane)
+SIMD_VISIT_EXTRACT_LANE(I64x2, , I64x2ExtractLane)
+SIMD_VISIT_EXTRACT_LANE(I32x4, , I32x4ExtractLane)
+SIMD_VISIT_EXTRACT_LANE(I16x8, S, I16x8ExtractLaneS)
+SIMD_VISIT_EXTRACT_LANE(I16x8, U, Pextrw)
+SIMD_VISIT_EXTRACT_LANE(I8x16, S, I8x16ExtractLaneS)
+SIMD_VISIT_EXTRACT_LANE(I8x16, U, Pextrb)
+#undef SIMD_VISIT_EXTRACT_LANE
+
+void InstructionSelector::VisitF32x4ReplaceLane(Node* node) {
+  X64OperandGenerator g(this);
+  int32_t lane = OpParameter<int32_t>(node->op());
+  Emit(kX64F32x4ReplaceLane, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)), g.UseImmediate(lane),
+       g.Use(node->InputAt(1)));
+}
+
+#define VISIT_SIMD_REPLACE_LANE(TYPE, OPCODE)                               \
+  void InstructionSelector::Visit##TYPE##ReplaceLane(Node* node) {          \
+    X64OperandGenerator g(this);                                            \
+    int32_t lane = OpParameter<int32_t>(node->op());                        \
+    Emit(OPCODE, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)), \
+         g.UseImmediate(lane), g.Use(node->InputAt(1)));                    \
+  }
+
+#define SIMD_TYPES_FOR_REPLACE_LANE(V) \
+  V(F64x2, kX64Pinsrq)                 \
+  V(I64x2, kX64Pinsrq)                 \
+  V(I32x4, kX64Pinsrd)                 \
+  V(I16x8, kX64Pinsrw)                 \
+  V(I8x16, kX64Pinsrb)
+
+SIMD_TYPES_FOR_REPLACE_LANE(VISIT_SIMD_REPLACE_LANE)
+#undef SIMD_TYPES_FOR_REPLACE_LANE
+#undef VISIT_SIMD_REPLACE_LANE
+
+#define VISIT_SIMD_SHIFT(Opcode)                                            \
+  void InstructionSelector::Visit##Opcode(Node* node) {                     \
+    X64OperandGenerator g(this);                                            \
+    InstructionOperand dst = IsSupported(AVX) ? g.DefineAsRegister(node)    \
+                                              : g.DefineSameAsFirst(node);  \
+    if (g.CanBeImmediate(node->InputAt(1))) {                               \
+      Emit(kX64##Opcode, dst, g.UseRegister(node->InputAt(0)),              \
+           g.UseImmediate(node->InputAt(1)));                               \
+    } else {                                                                \
+      InstructionOperand temps[] = {g.TempSimd128Register(),                \
+                                    g.TempRegister()};                      \
+      Emit(kX64##Opcode, dst, g.UseUniqueRegister(node->InputAt(0)),        \
+           g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \
+    }                                                                       \
+  }
+SIMD_SHIFT_OPCODES(VISIT_SIMD_SHIFT)
+#undef VISIT_SIMD_SHIFT
+#undef SIMD_SHIFT_OPCODES
+
+#define VISIT_SIMD_NARROW_SHIFT(Opcode)                                       \
+  void InstructionSelector::Visit##Opcode(Node* node) {                       \
+    X64OperandGenerator g(this);                                              \
+    InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \
+    if (g.CanBeImmediate(node->InputAt(1))) {                                 \
+      Emit(kX64##Opcode, g.DefineSameAsFirst(node),                           \
+           g.UseRegister(node->InputAt(0)), g.UseImmediate(node->InputAt(1)), \
+           arraysize(temps), temps);                                          \
+    } else {                                                                  \
+      Emit(kX64##Opcode, g.DefineSameAsFirst(node),                           \
+           g.UseUniqueRegister(node->InputAt(0)),                             \
+           g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);   \
+    }                                                                         \
+  }
+SIMD_NARROW_SHIFT_OPCODES(VISIT_SIMD_NARROW_SHIFT)
+#undef VISIT_SIMD_NARROW_SHIFT
+#undef SIMD_NARROW_SHIFT_OPCODES
+
+#define VISIT_SIMD_UNOP(Opcode)                         \
+  void InstructionSelector::Visit##Opcode(Node* node) { \
+    X64OperandGenerator g(this);                        \
+    Emit(kX64##Opcode, g.DefineAsRegister(node),        \
+         g.UseRegister(node->InputAt(0)));              \
+  }
+SIMD_UNOP_LIST(VISIT_SIMD_UNOP)
+#undef VISIT_SIMD_UNOP
+#undef SIMD_UNOP_LIST
+
+#define VISIT_SIMD_BINOP(Opcode)                                            \
+  void InstructionSelector::Visit##Opcode(Node* node) {                     \
+    X64OperandGenerator g(this);                                            \
+    Emit(kX64##Opcode, g.DefineSameAsFirst(node),                           \
+         g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); \
+  }
+SIMD_BINOP_LIST(VISIT_SIMD_BINOP)
+#undef VISIT_SIMD_BINOP
+#undef SIMD_BINOP_LIST
+
+#define VISIT_SIMD_BINOP(Opcode)                                              \
+  void InstructionSelector::Visit##Opcode(Node* node) {                       \
+    X64OperandGenerator g(this);                                              \
+    if (IsSupported(AVX)) {                                                   \
+      Emit(kX64##Opcode, g.DefineAsRegister(node),                            \
+           g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); \
+    } else {                                                                  \
+      Emit(kX64##Opcode, g.DefineSameAsFirst(node),                           \
+           g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); \
+    }                                                                         \
+  }
+SIMD_BINOP_SSE_AVX_LIST(VISIT_SIMD_BINOP)
+#undef VISIT_SIMD_BINOP
+#undef SIMD_BINOP_SSE_AVX_LIST
+
+#define VISIT_SIMD_BINOP_ONE_TEMP(Opcode)                                  \
+  void InstructionSelector::Visit##Opcode(Node* node) {                    \
+    X64OperandGenerator g(this);                                           \
+    InstructionOperand temps[] = {g.TempSimd128Register()};                \
+    Emit(kX64##Opcode, g.DefineSameAsFirst(node),                          \
+         g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), \
+         arraysize(temps), temps);                                         \
+  }
+SIMD_BINOP_ONE_TEMP_LIST(VISIT_SIMD_BINOP_ONE_TEMP)
+#undef VISIT_SIMD_BINOP_ONE_TEMP
+#undef SIMD_BINOP_ONE_TEMP_LIST
+
+#define VISIT_SIMD_ANYTRUE(Opcode)                      \
+  void InstructionSelector::Visit##Opcode(Node* node) { \
+    X64OperandGenerator g(this);                        \
+    Emit(kX64##Opcode, g.DefineAsRegister(node),        \
+         g.UseUniqueRegister(node->InputAt(0)));        \
+  }
+SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE)
+#undef VISIT_SIMD_ANYTRUE
+#undef SIMD_ANYTRUE_LIST
+
+#define VISIT_SIMD_ALLTRUE(Opcode)                                        \
+  void InstructionSelector::Visit##Opcode(Node* node) {                   \
+    X64OperandGenerator g(this);                                          \
+    InstructionOperand temps[] = {g.TempSimd128Register()};               \
+    Emit(kX64##Opcode, g.DefineAsRegister(node),                          \
+         g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); \
+  }
+SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE)
+#undef VISIT_SIMD_ALLTRUE
+#undef SIMD_ALLTRUE_LIST
+
+void InstructionSelector::VisitS128Select(Node* node) {
+  X64OperandGenerator g(this);
+  Emit(kX64S128Select, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
+       g.UseRegister(node->InputAt(2)));
+}
+
+namespace {
+void VisitSignSelect(InstructionSelector* selector, Node* node,
+                     ArchOpcode opcode) {
+  X64OperandGenerator g(selector);
+  // signselect(x, y, -1) = x
+  // pblendvb(dst, x, y, -1) = dst <- y, so we need to swap x and y.
+  if (selector->IsSupported(AVX)) {
+    selector->Emit(
+        opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(1)),
+        g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(2)));
+  } else {
+    selector->Emit(
+        opcode, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(1)),
+        g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(2), xmm0));
+  }
+}
+}  // namespace
+
+void InstructionSelector::VisitI8x16SignSelect(Node* node) {
+  VisitSignSelect(this, node, kX64I8x16SignSelect);
+}
+
+void InstructionSelector::VisitI16x8SignSelect(Node* node) {
+  VisitSignSelect(this, node, kX64I16x8SignSelect);
+}
+
+void InstructionSelector::VisitI32x4SignSelect(Node* node) {
+  VisitSignSelect(this, node, kX64I32x4SignSelect);
+}
+
+void InstructionSelector::VisitI64x2SignSelect(Node* node) {
+  VisitSignSelect(this, node, kX64I64x2SignSelect);
+}
+
+void InstructionSelector::VisitS128AndNot(Node* node) {
+  X64OperandGenerator g(this);
+  // andnps a b does ~a & b, but we want a & !b, so flip the input.
+  Emit(kX64S128AndNot, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(0)));
+}
+
+void InstructionSelector::VisitF64x2Abs(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempDoubleRegister()};
+  Emit(kX64F64x2Abs, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
+       arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitF64x2Neg(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempDoubleRegister()};
+  Emit(kX64F64x2Neg, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
+       arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
+  X64OperandGenerator g(this);
+  Emit(kX64F32x4UConvertI32x4, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)));
+}
+
+#define VISIT_SIMD_QFMOP(Opcode)                                             \
+  void InstructionSelector::Visit##Opcode(Node* node) {                      \
+    X64OperandGenerator g(this);                                             \
+    if (CpuFeatures::IsSupported(FMA3)) {                                    \
+      Emit(kX64##Opcode, g.DefineSameAsFirst(node),                          \
+           g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), \
+           g.UseRegister(node->InputAt(2)));                                 \
+    } else {                                                                 \
+      InstructionOperand temps[] = {g.TempSimd128Register()};                \
+      Emit(kX64##Opcode, g.DefineSameAsFirst(node),                          \
+           g.UseUniqueRegister(node->InputAt(0)),                            \
+           g.UseUniqueRegister(node->InputAt(1)),                            \
+           g.UseRegister(node->InputAt(2)), arraysize(temps), temps);        \
+    }                                                                        \
+  }
+VISIT_SIMD_QFMOP(F64x2Qfma)
+VISIT_SIMD_QFMOP(F64x2Qfms)
+VISIT_SIMD_QFMOP(F32x4Qfma)
+VISIT_SIMD_QFMOP(F32x4Qfms)
+#undef VISIT_SIMD_QFMOP
+
+void InstructionSelector::VisitI64x2ShrS(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempRegister()};
+  // Use fixed to rcx, to use sarq_cl in codegen.
+  Emit(kX64I64x2ShrS, g.DefineSameAsFirst(node),
+       g.UseUniqueRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), rcx),
+       arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitI64x2Mul(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register(),
+                                g.TempSimd128Register()};
+  Emit(kX64I64x2Mul, g.DefineSameAsFirst(node),
+       g.UseUniqueRegister(node->InputAt(0)),
+       g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  Emit(kX64I32x4SConvertF32x4, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register(),
+                                g.TempSimd128Register()};
+  Emit(kX64I32x4UConvertF32x4, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitI16x8BitMask(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  Emit(kX64I16x8BitMask, g.DefineAsRegister(node),
+       g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitI8x16Mul(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  Emit(kX64I8x16Mul, g.DefineSameAsFirst(node),
+       g.UseUniqueRegister(node->InputAt(0)),
+       g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitI8x16ShrS(Node* node) {
+  X64OperandGenerator g(this);
+  if (g.CanBeImmediate(node->InputAt(1))) {
+    Emit(kX64I8x16ShrS, g.DefineSameAsFirst(node),
+         g.UseRegister(node->InputAt(0)), g.UseImmediate(node->InputAt(1)));
+  } else {
+    InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
+    Emit(kX64I8x16ShrS, g.DefineSameAsFirst(node),
+         g.UseUniqueRegister(node->InputAt(0)),
+         g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
+  }
+}
+
+void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
+  UNREACHABLE();
+}
+
+void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
+  UNREACHABLE();
+}
+
+namespace {
+
+// Returns true if shuffle can be decomposed into two 16x4 half shuffles
+// followed by a 16x8 blend.
+// E.g. [3 2 1 0 15 14 13 12].
+bool TryMatch16x8HalfShuffle(uint8_t* shuffle16x8, uint8_t* blend_mask) {
+  *blend_mask = 0;
+  for (int i = 0; i < 8; i++) {
+    if ((shuffle16x8[i] & 0x4) != (i & 0x4)) return false;
+    *blend_mask |= (shuffle16x8[i] > 7 ? 1 : 0) << i;
+  }
+  return true;
+}
+
+struct ShuffleEntry {
+  uint8_t shuffle[kSimd128Size];
+  ArchOpcode opcode;
+  bool src0_needs_reg;
+  bool src1_needs_reg;
+};
+
+// Shuffles that map to architecture-specific instruction sequences. These are
+// matched very early, so we shouldn't include shuffles that match better in
+// later tests, like 32x4 and 16x8 shuffles. In general, these patterns should
+// map to either a single instruction, or be finer grained, such as zip/unzip or
+// transpose patterns.
+static const ShuffleEntry arch_shuffles[] = {
+    {{0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23},
+     kX64S64x2UnpackLow,
+     true,
+     true},
+    {{8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31},
+     kX64S64x2UnpackHigh,
+     true,
+     true},
+    {{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23},
+     kX64S32x4UnpackLow,
+     true,
+     true},
+    {{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31},
+     kX64S32x4UnpackHigh,
+     true,
+     true},
+    {{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23},
+     kX64S16x8UnpackLow,
+     true,
+     true},
+    {{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31},
+     kX64S16x8UnpackHigh,
+     true,
+     true},
+    {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
+     kX64S8x16UnpackLow,
+     true,
+     true},
+    {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31},
+     kX64S8x16UnpackHigh,
+     true,
+     true},
+
+    {{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29},
+     kX64S16x8UnzipLow,
+     true,
+     true},
+    {{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31},
+     kX64S16x8UnzipHigh,
+     true,
+     true},
+    {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30},
+     kX64S8x16UnzipLow,
+     true,
+     true},
+    {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31},
+     kX64S8x16UnzipHigh,
+     true,
+     true},
+    {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30},
+     kX64S8x16TransposeLow,
+     true,
+     true},
+    {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31},
+     kX64S8x16TransposeHigh,
+     true,
+     true},
+    {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8},
+     kX64S8x8Reverse,
+     true,
+     true},
+    {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
+     kX64S8x4Reverse,
+     true,
+     true},
+    {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
+     kX64S8x2Reverse,
+     true,
+     true}};
+
+bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
+                         size_t num_entries, bool is_swizzle,
+                         const ShuffleEntry** arch_shuffle) {
+  uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
+  for (size_t i = 0; i < num_entries; ++i) {
+    const ShuffleEntry& entry = table[i];
+    int j = 0;
+    for (; j < kSimd128Size; ++j) {
+      if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) {
+        break;
+      }
+    }
+    if (j == kSimd128Size) {
+      *arch_shuffle = &entry;
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace
+
+void InstructionSelector::VisitI8x16Shuffle(Node* node) {
+  uint8_t shuffle[kSimd128Size];
+  bool is_swizzle;
+  CanonicalizeShuffle(node, shuffle, &is_swizzle);
+
+  int imm_count = 0;
+  static const int kMaxImms = 6;
+  uint32_t imms[kMaxImms];
+  int temp_count = 0;
+  static const int kMaxTemps = 2;
+  InstructionOperand temps[kMaxTemps];
+
+  X64OperandGenerator g(this);
+  // Swizzles don't generally need DefineSameAsFirst to avoid a move.
+  bool no_same_as_first = is_swizzle;
+  // We generally need UseRegister for input0, Use for input1.
+  // TODO(v8:9198): We don't have 16-byte alignment for SIMD operands yet, but
+  // we retain this logic (continue setting these in the various shuffle match
+  // clauses), but ignore it when selecting registers or slots.
+  bool src0_needs_reg = true;
+  bool src1_needs_reg = false;
+  ArchOpcode opcode = kX64I8x16Shuffle;  // general shuffle is the default
+
+  uint8_t offset;
+  uint8_t shuffle32x4[4];
+  uint8_t shuffle16x8[8];
+  int index;
+  const ShuffleEntry* arch_shuffle;
+  if (wasm::SimdShuffle::TryMatchConcat(shuffle, &offset)) {
+    // Swap inputs from the normal order for (v)palignr.
+    SwapShuffleInputs(node);
+    is_swizzle = false;        // It's simpler to just handle the general case.
+    no_same_as_first = false;  // SSE requires same-as-first.
+    // TODO(v8:9608): also see v8:9083
+    src1_needs_reg = true;
+    opcode = kX64S8x16Alignr;
+    // palignr takes a single imm8 offset.
+    imms[imm_count++] = offset;
+  } else if (TryMatchArchShuffle(shuffle, arch_shuffles,
+                                 arraysize(arch_shuffles), is_swizzle,
+                                 &arch_shuffle)) {
+    opcode = arch_shuffle->opcode;
+    src0_needs_reg = arch_shuffle->src0_needs_reg;
+    // SSE can't take advantage of both operands in registers and needs
+    // same-as-first.
+    src1_needs_reg = arch_shuffle->src1_needs_reg;
+    no_same_as_first = false;
+  } else if (wasm::SimdShuffle::TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
+    uint8_t shuffle_mask = wasm::SimdShuffle::PackShuffle4(shuffle32x4);
+    if (is_swizzle) {
+      if (wasm::SimdShuffle::TryMatchIdentity(shuffle)) {
+        // Bypass normal shuffle code generation in this case.
+        EmitIdentity(node);
+        return;
+      } else {
+        // pshufd takes a single imm8 shuffle mask.
+        opcode = kX64S32x4Swizzle;
+        no_same_as_first = true;
+        // TODO(v8:9083): This doesn't strictly require a register, forcing the
+        // swizzles to always use registers until generation of incorrect memory
+        // operands can be fixed.
+        src0_needs_reg = true;
+        imms[imm_count++] = shuffle_mask;
+      }
+    } else {
+      // 2 operand shuffle
+      // A blend is more efficient than a general 32x4 shuffle; try it first.
+      if (wasm::SimdShuffle::TryMatchBlend(shuffle)) {
+        opcode = kX64S16x8Blend;
+        uint8_t blend_mask = wasm::SimdShuffle::PackBlend4(shuffle32x4);
+        imms[imm_count++] = blend_mask;
+      } else {
+        opcode = kX64S32x4Shuffle;
+        no_same_as_first = true;
+        // TODO(v8:9083): src0 and src1 is used by pshufd in codegen, which
+        // requires memory to be 16-byte aligned, since we cannot guarantee that
+        // yet, force using a register here.
+        src0_needs_reg = true;
+        src1_needs_reg = true;
+        imms[imm_count++] = shuffle_mask;
+        uint8_t blend_mask = wasm::SimdShuffle::PackBlend4(shuffle32x4);
+        imms[imm_count++] = blend_mask;
+      }
+    }
+  } else if (wasm::SimdShuffle::TryMatch16x8Shuffle(shuffle, shuffle16x8)) {
+    uint8_t blend_mask;
+    if (wasm::SimdShuffle::TryMatchBlend(shuffle)) {
+      opcode = kX64S16x8Blend;
+      blend_mask = wasm::SimdShuffle::PackBlend8(shuffle16x8);
+      imms[imm_count++] = blend_mask;
+    } else if (wasm::SimdShuffle::TryMatchSplat<8>(shuffle, &index)) {
+      opcode = kX64S16x8Dup;
+      src0_needs_reg = false;
+      imms[imm_count++] = index;
+    } else if (TryMatch16x8HalfShuffle(shuffle16x8, &blend_mask)) {
+      opcode = is_swizzle ? kX64S16x8HalfShuffle1 : kX64S16x8HalfShuffle2;
+      // Half-shuffles don't need DefineSameAsFirst or UseRegister(src0).
+      no_same_as_first = true;
+      src0_needs_reg = false;
+      uint8_t mask_lo = wasm::SimdShuffle::PackShuffle4(shuffle16x8);
+      uint8_t mask_hi = wasm::SimdShuffle::PackShuffle4(shuffle16x8 + 4);
+      imms[imm_count++] = mask_lo;
+      imms[imm_count++] = mask_hi;
+      if (!is_swizzle) imms[imm_count++] = blend_mask;
+    }
+  } else if (wasm::SimdShuffle::TryMatchSplat<16>(shuffle, &index)) {
+    opcode = kX64S8x16Dup;
+    no_same_as_first = false;
+    src0_needs_reg = true;
+    imms[imm_count++] = index;
+  }
+  if (opcode == kX64I8x16Shuffle) {
+    // Use same-as-first for general swizzle, but not shuffle.
+    no_same_as_first = !is_swizzle;
+    src0_needs_reg = !no_same_as_first;
+    imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle);
+    imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle + 4);
+    imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle + 8);
+    imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle + 12);
+    temps[temp_count++] = g.TempSimd128Register();
+  }
+
+  // Use DefineAsRegister(node) and Use(src0) if we can without forcing an extra
+  // move instruction in the CodeGenerator.
+  Node* input0 = node->InputAt(0);
+  InstructionOperand dst =
+      no_same_as_first ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
+  // TODO(v8:9198): Use src0_needs_reg when we have memory alignment for SIMD.
+  InstructionOperand src0 = g.UseUniqueRegister(input0);
+  USE(src0_needs_reg);
+
+  int input_count = 0;
+  InstructionOperand inputs[2 + kMaxImms + kMaxTemps];
+  inputs[input_count++] = src0;
+  if (!is_swizzle) {
+    Node* input1 = node->InputAt(1);
+    // TODO(v8:9198): Use src1_needs_reg when we have memory alignment for SIMD.
+    inputs[input_count++] = g.UseUniqueRegister(input1);
+    USE(src1_needs_reg);
+  }
+  for (int i = 0; i < imm_count; ++i) {
+    inputs[input_count++] = g.UseImmediate(imms[i]);
+  }
+  Emit(opcode, 1, &dst, input_count, inputs, temp_count, temps);
+}
+
+void InstructionSelector::VisitI8x16Swizzle(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  Emit(kX64I8x16Swizzle, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(1)),
+       arraysize(temps), temps);
+}
+
+namespace {
+void VisitPminOrPmax(InstructionSelector* selector, Node* node,
+                     ArchOpcode opcode) {
+  // Due to the way minps/minpd work, we want the dst to be same as the second
+  // input: b = pmin(a, b) directly maps to minps b a.
+  X64OperandGenerator g(selector);
+  selector->Emit(opcode, g.DefineSameAsFirst(node),
+                 g.UseRegister(node->InputAt(1)),
+                 g.UseRegister(node->InputAt(0)));
+}
+}  // namespace
+
+void InstructionSelector::VisitF32x4Pmin(Node* node) {
+  VisitPminOrPmax(this, node, kX64F32x4Pmin);
+}
+
+void InstructionSelector::VisitF32x4Pmax(Node* node) {
+  VisitPminOrPmax(this, node, kX64F32x4Pmax);
+}
+
+void InstructionSelector::VisitF64x2Pmin(Node* node) {
+  VisitPminOrPmax(this, node, kX64F64x2Pmin);
+}
+
+void InstructionSelector::VisitF64x2Pmax(Node* node) {
+  VisitPminOrPmax(this, node, kX64F64x2Pmax);
+}
+
+// static
+MachineOperatorBuilder::Flags
+InstructionSelector::SupportedMachineOperatorFlags() {
+  MachineOperatorBuilder::Flags flags =
+      MachineOperatorBuilder::kWord32ShiftIsSafe |
+      MachineOperatorBuilder::kWord32Ctz | MachineOperatorBuilder::kWord64Ctz |
+      MachineOperatorBuilder::kWord32Rol | MachineOperatorBuilder::kWord64Rol;
+  if (CpuFeatures::IsSupported(POPCNT)) {
+    flags |= MachineOperatorBuilder::kWord32Popcnt |
+             MachineOperatorBuilder::kWord64Popcnt;
+  }
+  if (CpuFeatures::IsSupported(SSE4_1)) {
+    flags |= MachineOperatorBuilder::kFloat32RoundDown |
+             MachineOperatorBuilder::kFloat64RoundDown |
+             MachineOperatorBuilder::kFloat32RoundUp |
+             MachineOperatorBuilder::kFloat64RoundUp |
+             MachineOperatorBuilder::kFloat32RoundTruncate |
+             MachineOperatorBuilder::kFloat64RoundTruncate |
+             MachineOperatorBuilder::kFloat32RoundTiesEven |
+             MachineOperatorBuilder::kFloat64RoundTiesEven;
+  }
+  return flags;
+}
+
+// static
+MachineOperatorBuilder::AlignmentRequirements
+InstructionSelector::AlignmentRequirements() {
+  return MachineOperatorBuilder::AlignmentRequirements::
+      FullUnalignedAccessSupport();
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/x64/unwinding-info-writer-x64.cc b/src/compiler/backend/x64/unwinding-info-writer-x64.cc
new file mode 100644
index 0000000..84619d6
--- /dev/null
+++ b/src/compiler/backend/x64/unwinding-info-writer-x64.cc
@@ -0,0 +1,96 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/backend/x64/unwinding-info-writer-x64.h"
+#include "src/compiler/backend/instruction.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+void UnwindingInfoWriter::BeginInstructionBlock(int pc_offset,
+                                                const InstructionBlock* block) {
+  if (!enabled()) return;
+
+  block_will_exit_ = false;
+
+  DCHECK_LT(block->rpo_number().ToInt(),
+            static_cast<int>(block_initial_states_.size()));
+  const BlockInitialState* initial_state =
+      block_initial_states_[block->rpo_number().ToInt()];
+  if (!initial_state) return;
+  if (initial_state->register_ != eh_frame_writer_.base_register() &&
+      initial_state->offset_ != eh_frame_writer_.base_offset()) {
+    eh_frame_writer_.AdvanceLocation(pc_offset);
+    eh_frame_writer_.SetBaseAddressRegisterAndOffset(initial_state->register_,
+                                                     initial_state->offset_);
+  } else if (initial_state->register_ != eh_frame_writer_.base_register()) {
+    eh_frame_writer_.AdvanceLocation(pc_offset);
+    eh_frame_writer_.SetBaseAddressRegister(initial_state->register_);
+  } else if (initial_state->offset_ != eh_frame_writer_.base_offset()) {
+    eh_frame_writer_.AdvanceLocation(pc_offset);
+    eh_frame_writer_.SetBaseAddressOffset(initial_state->offset_);
+  }
+
+    tracking_fp_ = initial_state->tracking_fp_;
+}
+
+void UnwindingInfoWriter::EndInstructionBlock(const InstructionBlock* block) {
+  if (!enabled() || block_will_exit_) return;
+
+  for (const RpoNumber& successor : block->successors()) {
+    int successor_index = successor.ToInt();
+    DCHECK_LT(successor_index, static_cast<int>(block_initial_states_.size()));
+    const BlockInitialState* existing_state =
+        block_initial_states_[successor_index];
+    // If we already had an entry for this BB, check that the values are the
+    // same we are trying to insert.
+    if (existing_state) {
+      DCHECK(existing_state->register_ == eh_frame_writer_.base_register());
+      DCHECK_EQ(existing_state->offset_, eh_frame_writer_.base_offset());
+      DCHECK_EQ(existing_state->tracking_fp_, tracking_fp_);
+    } else {
+      block_initial_states_[successor_index] = zone_->New<BlockInitialState>(
+          eh_frame_writer_.base_register(), eh_frame_writer_.base_offset(),
+          tracking_fp_);
+    }
+  }
+}
+
+void UnwindingInfoWriter::MarkFrameConstructed(int pc_base) {
+  if (!enabled()) return;
+
+  // push rbp
+  eh_frame_writer_.AdvanceLocation(pc_base + 1);
+  eh_frame_writer_.IncreaseBaseAddressOffset(kInt64Size);
+  // <base address> points at the bottom of the current frame on x64 and
+  // <base register> is rsp, which points to the top of the frame by definition.
+  // Thus, the distance between <base address> and the top is -<base offset>.
+  int top_of_stack = -eh_frame_writer_.base_offset();
+  eh_frame_writer_.RecordRegisterSavedToStack(rbp, top_of_stack);
+
+  // mov rbp, rsp
+  eh_frame_writer_.AdvanceLocation(pc_base + 4);
+  eh_frame_writer_.SetBaseAddressRegister(rbp);
+
+  tracking_fp_ = true;
+}
+
+void UnwindingInfoWriter::MarkFrameDeconstructed(int pc_base) {
+  if (!enabled()) return;
+
+  // mov rsp, rbp
+  eh_frame_writer_.AdvanceLocation(pc_base + 3);
+  eh_frame_writer_.SetBaseAddressRegister(rsp);
+
+  // pop rbp
+  eh_frame_writer_.AdvanceLocation(pc_base + 4);
+  eh_frame_writer_.IncreaseBaseAddressOffset(-kInt64Size);
+
+  tracking_fp_ = false;
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
diff --git a/src/compiler/backend/x64/unwinding-info-writer-x64.h b/src/compiler/backend/x64/unwinding-info-writer-x64.h
new file mode 100644
index 0000000..c85ad46
--- /dev/null
+++ b/src/compiler/backend/x64/unwinding-info-writer-x64.h
@@ -0,0 +1,80 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_COMPILER_BACKEND_X64_UNWINDING_INFO_WRITER_X64_H_
+#define V8_COMPILER_BACKEND_X64_UNWINDING_INFO_WRITER_X64_H_
+
+#include "src/diagnostics/eh-frame.h"
+#include "src/flags/flags.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+class InstructionBlock;
+
+class UnwindingInfoWriter {
+ public:
+  explicit UnwindingInfoWriter(Zone* zone)
+      : zone_(zone),
+        eh_frame_writer_(zone),
+        tracking_fp_(false),
+        block_will_exit_(false),
+        block_initial_states_(zone) {
+    if (enabled()) eh_frame_writer_.Initialize();
+  }
+
+  void MaybeIncreaseBaseOffsetAt(int pc_offset, int base_delta) {
+    if (enabled() && !tracking_fp_) {
+      eh_frame_writer_.AdvanceLocation(pc_offset);
+      eh_frame_writer_.IncreaseBaseAddressOffset(base_delta);
+    }
+  }
+
+  void SetNumberOfInstructionBlocks(int number) {
+    if (enabled()) block_initial_states_.resize(number);
+  }
+
+  void BeginInstructionBlock(int pc_offset, const InstructionBlock* block);
+  void EndInstructionBlock(const InstructionBlock* block);
+
+  void MarkFrameConstructed(int pc_base);
+  void MarkFrameDeconstructed(int pc_base);
+
+  void MarkBlockWillExit() { block_will_exit_ = true; }
+
+  void Finish(int code_size) {
+    if (enabled()) eh_frame_writer_.Finish(code_size);
+  }
+
+  EhFrameWriter* eh_frame_writer() {
+    return enabled() ? &eh_frame_writer_ : nullptr;
+  }
+
+ private:
+  bool enabled() const { return FLAG_perf_prof_unwinding_info; }
+
+  class BlockInitialState : public ZoneObject {
+   public:
+    BlockInitialState(Register reg, int offset, bool tracking_fp)
+        : register_(reg), offset_(offset), tracking_fp_(tracking_fp) {}
+
+    Register register_;
+    int offset_;
+    bool tracking_fp_;
+  };
+
+  Zone* zone_;
+  EhFrameWriter eh_frame_writer_;
+  bool tracking_fp_;
+  bool block_will_exit_;
+
+  ZoneVector<const BlockInitialState*> block_initial_states_;
+};
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_COMPILER_BACKEND_X64_UNWINDING_INFO_WRITER_X64_H_