Opt Compiler: Arm64: Add support for more IRs plus various fixes.

Add support for more IRs and update others.

Change-Id: Iae1bef01dc3c0d238a46fbd2800e71c38288b1d2
Signed-off-by: Serban Constantinescu <serban.constantinescu@arm.com>
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 8d43a5d..943f71f 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -38,7 +38,7 @@
 
 namespace arm64 {
 
-// TODO: clean-up some of the constant definitions.
+static constexpr bool kExplicitStackOverflowCheck = false;
 static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>);
 static constexpr int kCurrentMethodStackOffset = 0;
 
@@ -167,7 +167,7 @@
   return MemOperand(sp, location.GetStackIndex());
 }
 
-MemOperand HeapOperand(const Register& base, size_t offset) {
+MemOperand HeapOperand(const Register& base, size_t offset = 0) {
   // A heap reference must be 32bit, so fit in a W register.
   DCHECK(base.IsW());
   return MemOperand(base.X(), offset);
@@ -393,6 +393,20 @@
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
 };
 
+class StackOverflowCheckSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  StackOverflowCheckSlowPathARM64() {}
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    __ Bind(GetEntryLabel());
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowStackOverflow), nullptr, 0);
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathARM64);
+};
+
 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
   explicit SuspendCheckSlowPathARM64(HSuspendCheck* instruction,
@@ -418,7 +432,6 @@
     return &return_label_;
   }
 
-
  private:
   HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
@@ -437,7 +450,7 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
     __ Brk(__LINE__);  // TODO: Unimplemented TypeCheckSlowPathARM64.
-    __ b(GetExitLabel());
+    __ B(GetExitLabel());
   }
 
  private:
@@ -486,12 +499,23 @@
 }
 
 void CodeGeneratorARM64::GenerateFrameEntry() {
-  // TODO: Add proper support for the stack overflow check.
-  UseScratchRegisterScope temps(GetVIXLAssembler());
-  Register temp = temps.AcquireX();
-  __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
-  __ Ldr(temp, MemOperand(temp, 0));
-  RecordPcInfo(nullptr, 0);
+  bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod();
+  if (do_overflow_check) {
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    Register temp = temps.AcquireX();
+    if (kExplicitStackOverflowCheck) {
+      SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM64();
+      AddSlowPath(slow_path);
+
+      __ Ldr(temp, MemOperand(tr, Thread::StackEndOffset<kArm64WordSize>().Int32Value()));
+      __ Cmp(sp, temp);
+      __ B(lo, slow_path->GetEntryLabel());
+    } else {
+      __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
+      __ Ldr(wzr, MemOperand(temp, 0));
+      RecordPcInfo(nullptr, 0);
+    }
+  }
 
   CPURegList preserved_regs = GetFramePreservedRegisters();
   int frame_size = GetFrameSize();
@@ -594,12 +618,12 @@
 void CodeGeneratorARM64::MarkGCCard(Register object, Register value) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register card = temps.AcquireX();
-  Register temp = temps.AcquireX();
+  Register temp = temps.AcquireW();   // Index within the CardTable - 32bit.
   vixl::Label done;
   __ Cbz(value, &done);
   __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64WordSize>().Int32Value()));
   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
-  __ Strb(card, MemOperand(card, temp));
+  __ Strb(card, MemOperand(card, temp.X()));
   __ Bind(&done);
 }
 
@@ -607,7 +631,7 @@
   // Block reserved registers:
   //   ip0 (VIXL temporary)
   //   ip1 (VIXL temporary)
-  //   xSuspend (Suspend counter)
+  //   tr
   //   lr
   // sp is not part of the allocatable registers, so we don't need to block it.
   // TODO: Avoid blocking callee-saved registers, and instead preserve them
@@ -778,12 +802,14 @@
                                        uint32_t dex_pc) {
   __ Ldr(lr, MemOperand(tr, entry_point_offset));
   __ Blr(lr);
-  RecordPcInfo(instruction, dex_pc);
-  DCHECK(instruction->IsSuspendCheck()
-      || instruction->IsBoundsCheck()
-      || instruction->IsNullCheck()
-      || instruction->IsDivZeroCheck()
-      || !IsLeafMethod());
+  if (instruction != nullptr) {
+    RecordPcInfo(instruction, dex_pc);
+    DCHECK(instruction->IsSuspendCheck()
+        || instruction->IsBoundsCheck()
+        || instruction->IsNullCheck()
+        || instruction->IsDivZeroCheck()
+        || !IsLeafMethod());
+    }
 }
 
 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
@@ -793,12 +819,30 @@
   __ Ldr(temp, HeapOperand(class_reg, mirror::Class::StatusOffset()));
   __ Cmp(temp, mirror::Class::kStatusInitialized);
   __ B(lt, slow_path->GetEntryLabel());
-  // Even if the initialized flag is set, we may be in a situation where caches are not synced
-  // properly. Therefore, we do a memory fence.
-  __ Dmb(InnerShareable, BarrierAll);
+  // Even if the initialized flag is set, we need to ensure consistent memory ordering.
+  __ Dmb(InnerShareable, BarrierReads);
   __ Bind(slow_path->GetExitLabel());
 }
 
+void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
+                                                         HBasicBlock* successor) {
+  SuspendCheckSlowPathARM64* slow_path =
+    new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor);
+  codegen_->AddSlowPath(slow_path);
+  UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+  Register temp = temps.AcquireW();
+
+  __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64WordSize>().SizeValue()));
+  if (successor == nullptr) {
+    __ Cbnz(temp, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetReturnLabel());
+  } else {
+    __ Cbz(temp, codegen_->GetLabelOf(successor));
+    __ B(slow_path->GetEntryLabel());
+    // slow_path will return to GetLabelOf(successor).
+  }
+}
+
 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
                                                              CodeGeneratorARM64* codegen)
       : HGraphVisitor(graph),
@@ -807,10 +851,6 @@
 
 #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)              \
   M(ParallelMove)                                          \
-  M(Rem)                                                   \
-  M(Shl)                                                   \
-  M(Shr)                                                   \
-  M(UShr)                                                  \
 
 #define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode
 
@@ -903,6 +943,63 @@
   }
 }
 
+void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+  Primitive::Type type = instr->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected shift type " << type;
+  }
+}
+
+void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+
+  Primitive::Type type = instr->GetType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      Register dst = OutputRegister(instr);
+      Register lhs = InputRegisterAt(instr, 0);
+      Operand rhs = InputOperandAt(instr, 1);
+      if (rhs.IsImmediate()) {
+        uint32_t shift_value = (type == Primitive::kPrimInt)
+          ? static_cast<uint32_t>(rhs.immediate() & kMaxIntShiftValue)
+          : static_cast<uint32_t>(rhs.immediate() & kMaxLongShiftValue);
+        if (instr->IsShl()) {
+          __ Lsl(dst, lhs, shift_value);
+        } else if (instr->IsShr()) {
+          __ Asr(dst, lhs, shift_value);
+        } else {
+          __ Lsr(dst, lhs, shift_value);
+        }
+      } else {
+        Register rhs_reg = dst.IsX() ? rhs.reg().X() : rhs.reg().W();
+
+        if (instr->IsShl()) {
+          __ Lsl(dst, lhs, rhs_reg);
+        } else if (instr->IsShr()) {
+          __ Asr(dst, lhs, rhs_reg);
+        } else {
+          __ Lsr(dst, lhs, rhs_reg);
+        }
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected shift operation type " << type;
+  }
+}
+
 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
   HandleBinaryOp(instruction);
 }
@@ -933,17 +1030,17 @@
   Register obj = InputRegisterAt(instruction, 0);
   Location index = locations->InAt(1);
   size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
-  MemOperand source(obj);
+  MemOperand source = HeapOperand(obj);
   UseScratchRegisterScope temps(GetVIXLAssembler());
 
   if (index.IsConstant()) {
     offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
-    source = MemOperand(obj, offset);
+    source = HeapOperand(obj, offset);
   } else {
     Register temp = temps.AcquireSameSizeAs(obj);
     Register index_reg = RegisterFrom(index, Primitive::kPrimInt);
     __ Add(temp, obj, Operand(index_reg, LSL, Primitive::ComponentSizeShift(type)));
-    source = MemOperand(temp, offset);
+    source = HeapOperand(temp, offset);
   }
 
   codegen_->Load(type, OutputCPURegister(instruction), source);
@@ -988,17 +1085,17 @@
     CPURegister value = InputCPURegisterAt(instruction, 2);
     Location index = locations->InAt(1);
     size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
-    MemOperand destination(obj);
+    MemOperand destination = HeapOperand(obj);
     UseScratchRegisterScope temps(GetVIXLAssembler());
 
     if (index.IsConstant()) {
       offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
-      destination = MemOperand(obj, offset);
+      destination = HeapOperand(obj, offset);
     } else {
       Register temp = temps.AcquireSameSizeAs(obj);
       Register index_reg = InputRegisterAt(instruction, 1);
       __ Add(temp, obj, Operand(index_reg, LSL, Primitive::ComponentSizeShift(value_type)));
-      destination = MemOperand(temp, offset);
+      destination = HeapOperand(temp, offset);
     }
 
     codegen_->Store(value_type, value, destination);
@@ -1065,29 +1162,59 @@
   GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
 }
 
-void LocationsBuilderARM64::VisitCompare(HCompare* instruction) {
+void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void InstructionCodeGeneratorARM64::VisitCompare(HCompare* instruction) {
-  Primitive::Type in_type = instruction->InputAt(0)->GetType();
-
-  DCHECK_EQ(in_type, Primitive::kPrimLong);
+      new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
+  Primitive::Type in_type = compare->InputAt(0)->GetType();
   switch (in_type) {
     case Primitive::kPrimLong: {
-      vixl::Label done;
-      Register result = OutputRegister(instruction);
-      Register left = InputRegisterAt(instruction, 0);
-      Operand right = InputOperandAt(instruction, 1);
-      __ Subs(result.X(), left, right);
-      __ B(eq, &done);
-      __ Mov(result, 1);
-      __ Cneg(result, result, le);
-      __ Bind(&done);
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for compare operation " << in_type;
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
+  Primitive::Type in_type = compare->InputAt(0)->GetType();
+
+  //  0 if: left == right
+  //  1 if: left  > right
+  // -1 if: left  < right
+  switch (in_type) {
+    case Primitive::kPrimLong: {
+      Register result = OutputRegister(compare);
+      Register left = InputRegisterAt(compare, 0);
+      Operand right = InputOperandAt(compare, 1);
+
+      __ Cmp(left, right);
+      __ Cset(result, ne);
+      __ Cneg(result, result, lt);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      Register result = OutputRegister(compare);
+      FPRegister left = InputFPRegisterAt(compare, 0);
+      FPRegister right = InputFPRegisterAt(compare, 1);
+
+      __ Fcmp(left, right);
+      if (compare->IsGtBias()) {
+        __ Cset(result, ne);
+      } else {
+        __ Csetm(result, ne);
+      }
+      __ Cneg(result, result, compare->IsGtBias() ? mi : gt);
       break;
     }
     default:
@@ -1116,7 +1243,7 @@
   Condition cond = ARM64Condition(instruction->GetCondition());
 
   __ Cmp(lhs, rhs);
-  __ Csel(res, vixl::Assembler::AppropriateZeroRegFor(res), Operand(1), InvertCondition(cond));
+  __ Cset(res, cond);
 }
 
 #define FOR_EACH_CONDITION_INSTRUCTION(M)                                                \
@@ -1241,8 +1368,20 @@
 
 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
   HBasicBlock* successor = got->GetSuccessor();
-  // TODO: Support for suspend checks emission.
-  if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
+  DCHECK(!successor->IsExitBlock());
+  HBasicBlock* block = got->GetBlock();
+  HInstruction* previous = got->GetPrevious();
+  HLoopInformation* info = block->GetLoopInformation();
+
+  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
+    GenerateSuspendCheck(info->GetSuspendCheck(), successor);
+    return;
+  }
+  if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
+    GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+  }
+  if (!codegen_->GoesToNextBlock(block, successor)) {
     __ B(codegen_->GetLabelOf(successor));
   }
 }
@@ -1250,27 +1389,32 @@
 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
   HInstruction* cond = if_instr->InputAt(0);
-  DCHECK(cond->IsCondition());
-  if (cond->AsCondition()->NeedsMaterialization()) {
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
   HInstruction* cond = if_instr->InputAt(0);
-  DCHECK(cond->IsCondition());
   HCondition* condition = cond->AsCondition();
   vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
   vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
 
-  // TODO: Support constant condition input in VisitIf.
-
-  if (condition->NeedsMaterialization()) {
+  if (cond->IsIntConstant()) {
+    int32_t cond_value = cond->AsIntConstant()->GetValue();
+    if (cond_value == 1) {
+      if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) {
+        __ B(true_target);
+      }
+      return;
+    } else {
+      DCHECK_EQ(cond_value, 0);
+    }
+  } else if (!cond->IsCondition() || condition->NeedsMaterialization()) {
     // The condition instruction has been materialized, compare the output to 0.
     Location cond_val = if_instr->GetLocations()->InAt(0);
     DCHECK(cond_val.IsRegister());
     __ Cbnz(InputRegisterAt(if_instr, 0), true_target);
-
   } else {
     // The condition instruction has not been materialized, use its inputs as
     // the comparison and its condition as the branch condition.
@@ -1288,7 +1432,6 @@
       __ B(arm64_cond, true_target);
     }
   }
-
   if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) {
     __ B(false_target);
   }
@@ -1301,8 +1444,7 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
-  MemOperand field = MemOperand(InputRegisterAt(instruction, 0),
-                                instruction->GetFieldOffset().Uint32Value());
+  MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset());
   codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
 }
 
@@ -1316,7 +1458,7 @@
   Primitive::Type field_type = instruction->GetFieldType();
   CPURegister value = InputCPURegisterAt(instruction, 1);
   Register obj = InputRegisterAt(instruction, 0);
-  codegen_->Store(field_type, value, MemOperand(obj, instruction->GetFieldOffset().Uint32Value()));
+  codegen_->Store(field_type, value, HeapOperand(obj, instruction->GetFieldOffset()));
   if (field_type == Primitive::kPrimNot) {
     codegen_->MarkGCCard(obj, Register(value));
   }
@@ -1345,7 +1487,7 @@
   __ Cbz(obj, &done);
 
   // Compare the class of `obj` with `cls`.
-  __ Ldr(out, MemOperand(obj, mirror::Object::ClassOffset().Int32Value()));
+  __ Ldr(out, HeapOperand(obj, mirror::Object::ClassOffset()));
   __ Cmp(out, cls);
   if (instruction->IsClassFinal()) {
     // Classes must be equal for the instanceof to succeed.
@@ -1452,14 +1594,12 @@
   // temp = method;
   codegen_->LoadCurrentMethod(temp);
   // temp = temp->dex_cache_resolved_methods_;
-  __ Ldr(temp, MemOperand(temp.X(),
-                          mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
+  __ Ldr(temp, HeapOperand(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset()));
   // temp = temp[index_in_cache];
-  __ Ldr(temp, MemOperand(temp.X(), index_in_cache));
+  __ Ldr(temp, HeapOperand(temp, index_in_cache));
   // lr = temp->entry_point_from_quick_compiled_code_;
-  __ Ldr(lr, MemOperand(temp.X(),
-                        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                            kArm64WordSize).SizeValue()));
+  __ Ldr(lr, HeapOperand(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                          kArm64WordSize)));
   // lr();
   __ Blr(lr);
 
@@ -1470,7 +1610,7 @@
 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   LocationSummary* locations = invoke->GetLocations();
   Location receiver = locations->InAt(0);
-  Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0));
+  Register temp = WRegisterFrom(invoke->GetLocations()->GetTemp(0));
   size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() +
     invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
   Offset class_offset = mirror::Object::ClassOffset();
@@ -1478,16 +1618,16 @@
 
   // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
-    __ Ldr(temp.W(), MemOperand(sp, receiver.GetStackIndex()));
-    __ Ldr(temp.W(), MemOperand(temp, class_offset.SizeValue()));
+    __ Ldr(temp, MemOperand(sp, receiver.GetStackIndex()));
+    __ Ldr(temp, HeapOperand(temp, class_offset));
   } else {
     DCHECK(receiver.IsRegister());
-    __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
+    __ Ldr(temp, HeapOperandFrom(receiver, class_offset));
   }
   // temp = temp->GetMethodAt(method_offset);
-  __ Ldr(temp.W(), MemOperand(temp, method_offset));
+  __ Ldr(temp, HeapOperand(temp, method_offset));
   // lr = temp->GetEntryPoint();
-  __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
+  __ Ldr(lr, HeapOperand(temp, entry_point.SizeValue()));
   // lr();
   __ Blr(lr);
   DCHECK(!codegen_->IsLeafMethod());
@@ -1512,7 +1652,7 @@
     DCHECK(cls->CanCallRuntime());
     codegen_->LoadCurrentMethod(out);
     __ Ldr(out, HeapOperand(out, mirror::ArtMethod::DexCacheResolvedTypesOffset()));
-    __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
+    __ Ldr(out, HeapOperand(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
 
     SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
@@ -1561,7 +1701,7 @@
   codegen_->LoadCurrentMethod(out);
   __ Ldr(out, HeapOperand(out, mirror::ArtMethod::DeclaringClassOffset()));
   __ Ldr(out, HeapOperand(out, mirror::Class::DexCacheStringsOffset()));
-  __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex())));
+  __ Ldr(out, HeapOperand(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
   __ Cbz(out, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1803,6 +1943,43 @@
   LOG(FATAL) << "Unreachable";
 }
 
+void LocationsBuilderARM64::VisitRem(HRem* rem) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
+  switch (rem->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
+  Primitive::Type type = rem->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+      Register dividend = InputRegisterAt(rem, 0);
+      Register divisor = InputRegisterAt(rem, 1);
+      Register output = OutputRegister(rem);
+      Register temp = temps.AcquireSameSizeAs(output);
+
+      __ Sdiv(temp, dividend, divisor);
+      __ Msub(output, temp, divisor, dividend);
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+  }
+}
+
 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   Primitive::Type return_type = instruction->InputAt(0)->GetType();
@@ -1825,6 +2002,22 @@
   __ Br(lr);
 }
 
+void LocationsBuilderARM64::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void LocationsBuilderARM64::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
 void LocationsBuilderARM64::VisitStoreLocal(HStoreLocal* store) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
   Primitive::Type field_type = store->InputAt(1)->GetType();
@@ -1869,9 +2062,8 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
-  Register cls = InputRegisterAt(instruction, 0);
-  uint32_t offset = instruction->GetFieldOffset().Uint32Value();
-  codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), MemOperand(cls, offset));
+  MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset());
+  codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
 }
 
 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
@@ -1884,10 +2076,10 @@
 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   CPURegister value = InputCPURegisterAt(instruction, 1);
   Register cls = InputRegisterAt(instruction, 0);
-  uint32_t offset = instruction->GetFieldOffset().Uint32Value();
+  Offset offset = instruction->GetFieldOffset();
   Primitive::Type field_type = instruction->GetFieldType();
 
-  codegen_->Store(field_type, value, MemOperand(cls, offset));
+  codegen_->Store(field_type, value, HeapOperand(cls, offset));
   if (field_type == Primitive::kPrimNot) {
     codegen_->MarkGCCard(cls, Register(value));
   }
@@ -1898,14 +2090,17 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
-  // TODO: Improve support for suspend checks.
-  SuspendCheckSlowPathARM64* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, nullptr);
-  codegen_->AddSlowPath(slow_path);
-
-  __ Subs(wSuspend, wSuspend, 1);
-  __ B(slow_path->GetEntryLabel(), le);
-  __ Bind(slow_path->GetReturnLabel());
+  HBasicBlock* block = instruction->GetBlock();
+  if (block->GetLoopInformation() != nullptr) {
+    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
+    // The back edge will generate the suspend check.
+    return;
+  }
+  if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
+    // The goto will generate the suspend check.
+    return;
+  }
+  GenerateSuspendCheck(instruction, nullptr);
 }
 
 void LocationsBuilderARM64::VisitTemporary(HTemporary* temp) {
@@ -1962,17 +2157,34 @@
     int result_size = Primitive::ComponentSize(result_type);
     int input_size = Primitive::ComponentSize(input_type);
     int min_size = kBitsPerByte * std::min(result_size, input_size);
+    Register output = OutputRegister(conversion);
+    Register source = InputRegisterAt(conversion, 0);
     if ((result_type == Primitive::kPrimChar) ||
         ((input_type == Primitive::kPrimChar) && (result_size > input_size))) {
-      __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, min_size);
+      __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size);
     } else {
-      __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, min_size);
+      __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size);
     }
-    return;
+  } else if (IsFPType(result_type) && IsIntegralType(input_type)) {
+    CHECK(input_type == Primitive::kPrimInt || input_type == Primitive::kPrimLong);
+    __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
+  } else if (IsIntegralType(result_type) && IsFPType(input_type)) {
+    CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
+    __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0));
+  } else if (IsFPType(result_type) && IsFPType(input_type)) {
+    __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0));
+  } else {
+    LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
+                << " to " << result_type;
   }
+}
 
-  LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
-             << " to " << result_type;
+void LocationsBuilderARM64::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
 }
 
 void LocationsBuilderARM64::VisitXor(HXor* instruction) {