Keep list of dex files for oat file in CompilerDriver.

Use this list to improve invoke-static/-direct dispatch for
intra-oat calls.

Also fix a latent ArmBaseRelativePatcher::ReserveSpaceEnd()
bug exposed by a buggy early version of this CL: when we
have unresolved patches at the end of all code, we need to
emit a final thunk. Though the OatWriter will try to patch
the unresolved call to a trampoline at the beginning of the
oat file, that trampoline may be too far and the relative
patcher doesn't know about it anyway, so it needs to assume
that a thunk is needed.

This reduces the overall size of oat files present in dalvik
cache on Nexus 9 after first boot by over 1MiB, AOSP ToT,
aosp_flounder-userdebug build.

Change-Id: I98604b70cb17377eed057c1c23971865cf344e43
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 8750aa8..fb116bb 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -375,6 +375,7 @@
       timings_logger_(timer),
       compiler_context_(nullptr),
       support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64),
+      dex_files_for_oat_file_(nullptr),
       compiled_method_storage_(swap_fd) {
   DCHECK(compiler_options_ != nullptr);
   DCHECK(verification_results_ != nullptr);
@@ -1371,8 +1372,7 @@
 }
 
 DexCacheArraysLayout CompilerDriver::GetDexCacheArraysLayout(const DexFile* dex_file) {
-  // Currently only image dex caches have fixed array layout.
-  return IsImage() && GetSupportBootImageFixup()
+  return ContainsElement(GetDexFilesForOatFile(), dex_file)
       ? DexCacheArraysLayout(GetInstructionSetPointerSize(instruction_set_), dex_file)
       : DexCacheArraysLayout();
 }
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 485cdcf..4ed4dc6 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -39,6 +39,7 @@
 #include "runtime.h"
 #include "safe_map.h"
 #include "thread_pool.h"
+#include "utils/array_ref.h"
 #include "utils/dex_cache_arrays_layout.h"
 
 namespace art {
@@ -101,7 +102,20 @@
 
   ~CompilerDriver();
 
-  void CompileAll(jobject class_loader, const std::vector<const DexFile*>& dex_files,
+  // Set dex files that will be stored in the oat file after being compiled.
+  void SetDexFilesForOatFile(const std::vector<const DexFile*>& dex_files) {
+    dex_files_for_oat_file_ = &dex_files;
+  }
+
+  // Get dex file that will be stored in the oat file after being compiled.
+  ArrayRef<const DexFile* const> GetDexFilesForOatFile() const {
+    return (dex_files_for_oat_file_ != nullptr)
+        ? ArrayRef<const DexFile* const>(*dex_files_for_oat_file_)
+        : ArrayRef<const DexFile* const>();
+  }
+
+  void CompileAll(jobject class_loader,
+                  const std::vector<const DexFile*>& dex_files,
                   TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
 
@@ -661,6 +675,9 @@
 
   bool support_boot_image_fixup_;
 
+  // List of dex files that will be stored in the oat file.
+  const std::vector<const DexFile*>* dex_files_for_oat_file_;
+
   CompiledMethodStorage compiled_method_storage_;
 
   friend class CompileClassVisitor;
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 7e31a7a..21d582e 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -76,6 +76,7 @@
       for (const DexFile* dex_file : class_linker->GetBootClassPath()) {
         dex_file->EnableWrite();
       }
+      compiler_driver_->SetDexFilesForOatFile(class_linker->GetBootClassPath());
       compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings);
 
       t.NewTiming("WriteElf");
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index ac38f3d..13754fd 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -36,7 +36,8 @@
   // of code. To avoid any alignment discrepancies for the final chunk, we always align the
   // offset after reserving of writing any chunk.
   uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
-  bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset, MethodReference(nullptr, 0u),
+  bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset,
+                                                MethodReference(nullptr, 0u),
                                                 aligned_offset);
   if (needs_thunk) {
     thunk_locations_.push_back(aligned_offset);
@@ -94,7 +95,8 @@
   // We need the MethodReference for that.
   if (!unprocessed_patches_.empty() &&
       next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) {
-    bool needs_thunk = ReserveSpaceProcessPatches(quick_code_offset, method_ref,
+    bool needs_thunk = ReserveSpaceProcessPatches(quick_code_offset,
+                                                  method_ref,
                                                   next_aligned_offset);
     if (needs_thunk) {
       // A single thunk will cover all pending patches.
@@ -156,7 +158,10 @@
         // If still unresolved, check if we have a thunk within range.
         if (thunk_locations_.empty() ||
             patch_offset - thunk_locations_.back() > max_negative_displacement_) {
-          return next_aligned_offset - patch_offset > max_positive_displacement_;
+          // No thunk in range, we need a thunk if the next aligned offset
+          // is out of range, or if we're at the end of all code.
+          return (next_aligned_offset - patch_offset > max_positive_displacement_) ||
+              (quick_code_offset == next_aligned_offset);  // End of code.
         }
       } else {
         uint32_t target_offset = result.second - CompiledCode::CodeDelta(instruction_set_);
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index 5515313..a259cda 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -233,6 +233,36 @@
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
 
+TEST_F(Thumb2RelativePatcherTest, CallTrampolineTooFar) {
+  constexpr uint32_t missing_method_index = 1024u;
+  auto method3_raw_code = GenNopsAndBl(3u, kBlPlus0);
+  constexpr uint32_t bl_offset_in_method3 = 3u * 2u;  // After NOPs.
+  ArrayRef<const uint8_t> method3_code(method3_raw_code);
+  ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
+  LinkerPatch method3_patches[] = {
+      LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, missing_method_index),
+  };
+
+  constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */;
+  bool thunk_in_gap = Create2MethodsWithGap(kNopCode,
+                                            ArrayRef<const LinkerPatch>(),
+                                            method3_code,
+                                            ArrayRef<const LinkerPatch>(method3_patches),
+                                            just_over_max_negative_disp - bl_offset_in_method3);
+  ASSERT_FALSE(thunk_in_gap);  // There should be a thunk but it should be after the method2.
+  ASSERT_FALSE(method_offset_map_.FindMethodOffset(MethodRef(missing_method_index)).first);
+
+  // Check linked code.
+  uint32_t method3_offset = GetMethodOffset(3u);
+  uint32_t thunk_offset = CompiledCode::AlignCode(method3_offset + method3_code.size(), kThumb2);
+  uint32_t diff = thunk_offset - (method3_offset + bl_offset_in_method3 + 4u /* PC adjustment */);
+  ASSERT_EQ(diff & 1u, 0u);
+  ASSERT_LT(diff >> 1, 1u << 8);  // Simple encoding, (diff >> 1) fits into 8 bits.
+  auto expected_code = GenNopsAndBl(3u, kBlPlus0 | ((diff >> 1) & 0xffu));
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code)));
+  EXPECT_TRUE(CheckThunk(thunk_offset));
+}
+
 TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) {
   auto method1_raw_code = GenNopsAndBl(3u, kBlPlus0);
   constexpr uint32_t bl_offset_in_method1 = 3u * 2u;  // After NOPs.
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index 2a426b5..0bfef5e 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -386,6 +386,39 @@
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
 
+TEST_F(Arm64RelativePatcherTestDefault, CallTrampolineTooFar) {
+  constexpr uint32_t missing_method_index = 1024u;
+  auto last_method_raw_code = GenNopsAndBl(1u, kBlPlus0);
+  constexpr uint32_t bl_offset_in_last_method = 1u * 4u;  // After NOPs.
+  ArrayRef<const uint8_t> last_method_code(last_method_raw_code);
+  ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size());
+  LinkerPatch last_method_patches[] = {
+      LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, missing_method_index),
+  };
+
+  constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4;
+  uint32_t last_method_idx = Create2MethodsWithGap(
+      kNopCode, ArrayRef<const LinkerPatch>(), last_method_code,
+      ArrayRef<const LinkerPatch>(last_method_patches),
+      just_over_max_negative_disp - bl_offset_in_last_method);
+  uint32_t method1_offset = GetMethodOffset(1u);
+  uint32_t last_method_offset = GetMethodOffset(last_method_idx);
+  ASSERT_EQ(method1_offset,
+            last_method_offset + bl_offset_in_last_method - just_over_max_negative_disp);
+  ASSERT_FALSE(method_offset_map_.FindMethodOffset(MethodRef(missing_method_index)).first);
+
+  // Check linked code.
+  uint32_t thunk_offset =
+      CompiledCode::AlignCode(last_method_offset + last_method_code.size(), kArm64);
+  uint32_t diff = thunk_offset - (last_method_offset + bl_offset_in_last_method);
+  ASSERT_EQ(diff & 3u, 0u);
+  ASSERT_LT(diff, 128 * MB);
+  auto expected_code = GenNopsAndBl(1u, kBlPlus0 | (diff >> 2));
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx),
+                                ArrayRef<const uint8_t>(expected_code)));
+  EXPECT_TRUE(CheckThunk(thunk_offset));
+}
+
 TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarAfter) {
   auto method1_raw_code = GenNopsAndBl(1u, kBlPlus0);
   constexpr uint32_t bl_offset_in_method1 = 1u * 4u;  // After NOPs.
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 06576cc..ea3cb66 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -98,6 +98,7 @@
   jobject class_loader = nullptr;
   if (kCompile) {
     TimingLogger timings2("OatTest::WriteRead", false, false);
+    compiler_driver_->SetDexFilesForOatFile(class_linker->GetBootClassPath());
     compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings2);
   }
 
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 6494964..a128079 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -20,6 +20,7 @@
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "driver/compiler_driver.h"
 #include "nodes.h"
+#include "runtime.h"
 
 namespace art {
 
@@ -78,7 +79,13 @@
     method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive;
     code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf;
   } else {
+    bool use_pc_relative_instructions =
+        ((direct_method == 0u || direct_code == static_cast<uintptr_t>(-1))) &&
+        ContainsElement(compiler_driver_->GetDexFilesForOatFile(), target_method.dex_file);
     if (direct_method != 0u) {  // Should we use a direct pointer to the method?
+      // Note: For JIT, kDirectAddressWithFixup doesn't make sense at all and while
+      // kDirectAddress would be fine for image methods, we don't support it at the moment.
+      DCHECK(!Runtime::Current()->UseJit());
       if (direct_method != static_cast<uintptr_t>(-1)) {  // Is the method pointer known now?
         method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress;
         method_load_data = direct_method;
@@ -87,24 +94,25 @@
       }
     } else {  // Use dex cache.
       DCHECK_EQ(target_method.dex_file, &graph_->GetDexFile());
-      DexCacheArraysLayout layout =
-          compiler_driver_->GetDexCacheArraysLayout(target_method.dex_file);
-      if (layout.Valid()) {  // Can we use PC-relative access to the dex cache arrays?
+      if (use_pc_relative_instructions) {  // Can we use PC-relative access to the dex cache arrays?
+        DCHECK(!Runtime::Current()->UseJit());
         method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative;
+        DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen_->GetInstructionSet()),
+                                    &graph_->GetDexFile());
         method_load_data = layout.MethodOffset(target_method.dex_method_index);
       } else {  // We must go through the ArtMethod's pointer to resolved methods.
         method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
       }
     }
     if (direct_code != 0u) {  // Should we use a direct pointer to the code?
+      // Note: For JIT, kCallPCRelative and kCallDirectWithFixup don't make sense at all and
+      // while kCallDirect would be fine for image methods, we don't support it at the moment.
+      DCHECK(!Runtime::Current()->UseJit());
       if (direct_code != static_cast<uintptr_t>(-1)) {  // Is the code pointer known now?
         code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallDirect;
         direct_code_ptr = direct_code;
-      } else if (compiler_driver_->IsImage() ||
-          target_method.dex_file == &graph_->GetDexFile()) {
+      } else if (use_pc_relative_instructions) {
         // Use PC-relative calls for invokes within a multi-dex oat file.
-        // TODO: Recognize when the target dex file is within the current oat file for
-        // app compilation. At the moment we recognize only the boot image as multi-dex.
         code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative;
       } else {  // The direct pointer will be known at link time.
         // NOTE: This is used for app->boot calls when compiling an app against
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 384b879..3ebd2f3 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1495,6 +1495,7 @@
                                      swap_fd_,
                                      profile_file_));
 
+    driver_->SetDexFilesForOatFile(dex_files_);
     driver_->CompileAll(class_loader, dex_files_, timings_);
   }