Record implicit null checks at the actual invoke time.

ImplicitNullChecks are recorded only for instructions directly (see NB
below) preceeded by NullChecks in the graph. This way we avoid recording
redundant safepoints and minimize the code size increase.

NB: ParallalelMoves might be inserted by the register allocator between
the NullChecks and their uses. These modify the environment and the
correct action would be to reverse their modification. This will be
addressed in a follow-up CL.

Change-Id: Ie50006e5a4bd22932dcf11348f5a655d253cd898
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 306845b..0109363 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -997,11 +997,12 @@
   }
 }
 
-void CodeGeneratorARM64::LoadAcquire(Primitive::Type type,
+void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
                                      CPURegister dst,
                                      const MemOperand& src) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register temp_base = temps.AcquireX();
+  Primitive::Type type = instruction->GetType();
 
   DCHECK(!src.IsRegisterOffset());
   DCHECK(!src.IsPreIndex());
@@ -1013,16 +1014,20 @@
   switch (type) {
     case Primitive::kPrimBoolean:
       __ Ldarb(Register(dst), base);
+      MaybeRecordImplicitNullCheck(instruction);
       break;
     case Primitive::kPrimByte:
       __ Ldarb(Register(dst), base);
+      MaybeRecordImplicitNullCheck(instruction);
       __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
       break;
     case Primitive::kPrimChar:
       __ Ldarh(Register(dst), base);
+      MaybeRecordImplicitNullCheck(instruction);
       break;
     case Primitive::kPrimShort:
       __ Ldarh(Register(dst), base);
+      MaybeRecordImplicitNullCheck(instruction);
       __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
       break;
     case Primitive::kPrimInt:
@@ -1030,6 +1035,7 @@
     case Primitive::kPrimLong:
       DCHECK_EQ(dst.Is64Bits(), Is64BitType(type));
       __ Ldar(Register(dst), base);
+      MaybeRecordImplicitNullCheck(instruction);
       break;
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
@@ -1038,6 +1044,7 @@
 
       Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
       __ Ldar(temp, base);
+      MaybeRecordImplicitNullCheck(instruction);
       __ Fmov(FPRegister(dst), temp);
       break;
     }
@@ -1399,6 +1406,7 @@
   }
 
   codegen_->Load(type, OutputCPURegister(instruction), source);
+  codegen_->MaybeRecordImplicitNullCheck(instruction);
 }
 
 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
@@ -1410,6 +1418,7 @@
 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
   __ Ldr(OutputRegister(instruction),
          HeapOperand(InputRegisterAt(instruction, 0), mirror::Array::LengthOffset()));
+  codegen_->MaybeRecordImplicitNullCheck(instruction);
 }
 
 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
@@ -1454,6 +1463,7 @@
     }
 
     codegen_->Store(value_type, value, destination);
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 }
 
@@ -1816,14 +1826,17 @@
 
   if (instruction->IsVolatile()) {
     if (kUseAcquireRelease) {
-      codegen_->LoadAcquire(instruction->GetType(), OutputCPURegister(instruction), field);
+      // NB: LoadAcquire will record the pc info if needed.
+      codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field);
     } else {
       codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
       // For IRIW sequential consistency kLoadAny is not sufficient.
       GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
     }
   } else {
     codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 }
 
@@ -1843,13 +1856,16 @@
   if (instruction->IsVolatile()) {
     if (kUseAcquireRelease) {
       codegen_->StoreRelease(field_type, value, HeapOperand(obj, offset));
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
     } else {
       GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
       codegen_->Store(field_type, value, HeapOperand(obj, offset));
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
       GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
     }
   } else {
     codegen_->Store(field_type, value, HeapOperand(obj, offset));
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
@@ -1953,6 +1969,7 @@
   } else {
     __ Ldr(temp, HeapOperandFrom(receiver, class_offset));
   }
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
   // temp = temp->GetImtEntryAt(method_offset);
   __ Ldr(temp, HeapOperand(temp, method_offset));
   // lr = temp->GetEntryPoint();
@@ -2018,6 +2035,7 @@
     DCHECK(receiver.IsRegister());
     __ Ldr(temp, HeapOperandFrom(receiver, class_offset));
   }
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
   // temp = temp->GetMethodAt(method_offset);
   __ Ldr(temp, HeapOperand(temp, method_offset));
   // lr = temp->GetEntryPoint();
@@ -2293,6 +2311,9 @@
 }
 
 void InstructionCodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
+  if (codegen_->CanMoveNullCheckToUser(instruction)) {
+    return;
+  }
   Location obj = instruction->GetLocations()->InAt(0);
 
   __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
@@ -2305,13 +2326,8 @@
 
   LocationSummary* locations = instruction->GetLocations();
   Location obj = locations->InAt(0);
-  if (obj.IsRegister()) {
-    __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
-  } else {
-    DCHECK(obj.IsConstant()) << obj;
-    DCHECK_EQ(obj.GetConstant()->AsIntConstant()->GetValue(), 0);
-    __ B(slow_path->GetEntryLabel());
-  }
+
+  __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
 }
 
 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
@@ -2512,7 +2528,8 @@
 
   if (instruction->IsVolatile()) {
     if (kUseAcquireRelease) {
-      codegen_->LoadAcquire(instruction->GetType(), OutputCPURegister(instruction), field);
+      // NB: LoadAcquire will record the pc info if needed.
+      codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field);
     } else {
       codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
       // For IRIW sequential consistency kLoadAny is not sufficient.