Avoid read barrier for image HLoadClass

Concurrent copying baker:

X86_64 core-optimizing-pic.oat: 28583112 -> 27906824 (2.4% smaller)

Around 0.4% of 2.4% is from re-enabling kBootImageLinkTimeAddress,
kBootImageLinkTimePcRelative, and kBootImageAddress.

N6P boot.oat 32: 73042140 -> 71891956 (1.57% smaller)
N6P boot.oat 64: 83831608 -> 82531456 (1.55% smaller)

EAAC: 1252 -> 1245 (32 samples)

Bug: 29516974

Test: test-art-host CC baker, N6P booting

Change-Id: I9a196cf0157058836981c43c93872e9f0c4919aa
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5230269..a21a09e 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -5408,17 +5408,6 @@
 
 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
     HLoadClass::LoadKind desired_class_load_kind) {
-  if (kEmitCompilerReadBarrier) {
-    switch (desired_class_load_kind) {
-      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
-      case HLoadClass::LoadKind::kBootImageAddress:
-        // TODO: Implement for read barrier.
-        return HLoadClass::LoadKind::kDexCacheViaMethod;
-      default:
-        break;
-    }
-  }
   switch (desired_class_load_kind) {
     case HLoadClass::LoadKind::kReferrersClass:
       break;
@@ -5454,11 +5443,12 @@
     return;
   }
 
-  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+  const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
-  if (kUseBakerReadBarrier && !cls->NeedsEnvironment()) {
+  if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
     locations->SetCustomSlowPathCallerSaves(RegisterSet());  // No caller-save registers.
   }
 
@@ -5482,6 +5472,7 @@
   Location out_loc = locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
 
+  const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
   bool generate_null_check = false;
   switch (cls->GetLoadKind()) {
     case HLoadClass::LoadKind::kReferrersClass: {
@@ -5490,16 +5481,20 @@
       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
       CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
       GenerateGcRootFieldLoad(
-          cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
+          cls,
+          out_loc,
+          Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
+          /*fixup_label*/nullptr,
+          requires_read_barrier);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
-      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK(!requires_read_barrier);
       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
       codegen_->RecordTypePatch(cls);
       break;
     case HLoadClass::LoadKind::kBootImageAddress: {
-      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK(!requires_read_barrier);
       DCHECK_NE(cls->GetAddress(), 0u);
       uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
       __ movl(out, Immediate(address));  // Zero-extended.
@@ -5511,11 +5506,19 @@
       // /* GcRoot<mirror::Class> */ out = *address
       if (IsUint<32>(cls->GetAddress())) {
         Address address = Address::Absolute(cls->GetAddress(), /* no_rip */ true);
-        GenerateGcRootFieldLoad(cls, out_loc, address);
+        GenerateGcRootFieldLoad(cls,
+                                out_loc,
+                                address,
+                                /*fixup_label*/nullptr,
+                                requires_read_barrier);
       } else {
         // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
         __ movq(out, Immediate(cls->GetAddress()));
-        GenerateGcRootFieldLoad(cls, out_loc, Address(out, 0));
+        GenerateGcRootFieldLoad(cls,
+                                out_loc,
+                                Address(out, 0),
+                                /*fixup_label*/nullptr,
+                                requires_read_barrier);
       }
       generate_null_check = !cls->IsInDexCache();
       break;
@@ -5526,7 +5529,7 @@
       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
                                           /* no_rip */ false);
       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
-      GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label);
+      GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, requires_read_barrier);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -5539,7 +5542,11 @@
                       ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
       // /* GcRoot<mirror::Class> */ out = out[type_index]
       GenerateGcRootFieldLoad(
-          cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
+          cls,
+          out_loc,
+          Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())),
+          /*fixup_label*/nullptr,
+          requires_read_barrier);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -6387,9 +6394,11 @@
 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
                                                              Location root,
                                                              const Address& address,
-                                                             Label* fixup_label) {
+                                                             Label* fixup_label,
+                                                             bool requires_read_barrier) {
   CpuRegister root_reg = root.AsRegister<CpuRegister>();
-  if (kEmitCompilerReadBarrier) {
+  if (requires_read_barrier) {
+    DCHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
       // Baker's read barrier are used: