Keep list of dex files for oat file in CompilerDriver.
Use this list to improve invoke-static/-direct dispatch for
intra-oat calls.
Also fix a latent ArmBaseRelativePatcher::ReserveSpaceEnd()
bug exposed by a buggy early version of this CL: when we
have unresolved patches at the end of all code, we need to
emit a final thunk. Though the OatWriter will try to patch
the unresolved call to a trampoline at the beginning of the
oat file, that trampoline may be too far and the relative
patcher doesn't know about it anyway, so it needs to assume
that a thunk is needed.
This reduces the overall size of oat files present in dalvik
cache on Nexus 9 after first boot by over 1MiB, AOSP ToT,
aosp_flounder-userdebug build.
Change-Id: I98604b70cb17377eed057c1c23971865cf344e43
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 8750aa8..fb116bb 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -375,6 +375,7 @@
timings_logger_(timer),
compiler_context_(nullptr),
support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64),
+ dex_files_for_oat_file_(nullptr),
compiled_method_storage_(swap_fd) {
DCHECK(compiler_options_ != nullptr);
DCHECK(verification_results_ != nullptr);
@@ -1371,8 +1372,7 @@
}
DexCacheArraysLayout CompilerDriver::GetDexCacheArraysLayout(const DexFile* dex_file) {
- // Currently only image dex caches have fixed array layout.
- return IsImage() && GetSupportBootImageFixup()
+ return ContainsElement(GetDexFilesForOatFile(), dex_file)
? DexCacheArraysLayout(GetInstructionSetPointerSize(instruction_set_), dex_file)
: DexCacheArraysLayout();
}
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 485cdcf..4ed4dc6 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -39,6 +39,7 @@
#include "runtime.h"
#include "safe_map.h"
#include "thread_pool.h"
+#include "utils/array_ref.h"
#include "utils/dex_cache_arrays_layout.h"
namespace art {
@@ -101,7 +102,20 @@
~CompilerDriver();
- void CompileAll(jobject class_loader, const std::vector<const DexFile*>& dex_files,
+ // Set dex files that will be stored in the oat file after being compiled.
+ void SetDexFilesForOatFile(const std::vector<const DexFile*>& dex_files) {
+ dex_files_for_oat_file_ = &dex_files;
+ }
+
+ // Get dex file that will be stored in the oat file after being compiled.
+ ArrayRef<const DexFile* const> GetDexFilesForOatFile() const {
+ return (dex_files_for_oat_file_ != nullptr)
+ ? ArrayRef<const DexFile* const>(*dex_files_for_oat_file_)
+ : ArrayRef<const DexFile* const>();
+ }
+
+ void CompileAll(jobject class_loader,
+ const std::vector<const DexFile*>& dex_files,
TimingLogger* timings)
REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
@@ -661,6 +675,9 @@
bool support_boot_image_fixup_;
+ // List of dex files that will be stored in the oat file.
+ const std::vector<const DexFile*>* dex_files_for_oat_file_;
+
CompiledMethodStorage compiled_method_storage_;
friend class CompileClassVisitor;
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 7e31a7a..21d582e 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -76,6 +76,7 @@
for (const DexFile* dex_file : class_linker->GetBootClassPath()) {
dex_file->EnableWrite();
}
+ compiler_driver_->SetDexFilesForOatFile(class_linker->GetBootClassPath());
compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings);
t.NewTiming("WriteElf");
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index ac38f3d..13754fd 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -36,7 +36,8 @@
// of code. To avoid any alignment discrepancies for the final chunk, we always align the
// offset after reserving of writing any chunk.
uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
- bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset, MethodReference(nullptr, 0u),
+ bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset,
+ MethodReference(nullptr, 0u),
aligned_offset);
if (needs_thunk) {
thunk_locations_.push_back(aligned_offset);
@@ -94,7 +95,8 @@
// We need the MethodReference for that.
if (!unprocessed_patches_.empty() &&
next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) {
- bool needs_thunk = ReserveSpaceProcessPatches(quick_code_offset, method_ref,
+ bool needs_thunk = ReserveSpaceProcessPatches(quick_code_offset,
+ method_ref,
next_aligned_offset);
if (needs_thunk) {
// A single thunk will cover all pending patches.
@@ -156,7 +158,10 @@
// If still unresolved, check if we have a thunk within range.
if (thunk_locations_.empty() ||
patch_offset - thunk_locations_.back() > max_negative_displacement_) {
- return next_aligned_offset - patch_offset > max_positive_displacement_;
+ // No thunk in range, we need a thunk if the next aligned offset
+ // is out of range, or if we're at the end of all code.
+ return (next_aligned_offset - patch_offset > max_positive_displacement_) ||
+ (quick_code_offset == next_aligned_offset); // End of code.
}
} else {
uint32_t target_offset = result.second - CompiledCode::CodeDelta(instruction_set_);
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index 5515313..a259cda 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -233,6 +233,36 @@
EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
}
+TEST_F(Thumb2RelativePatcherTest, CallTrampolineTooFar) {
+ constexpr uint32_t missing_method_index = 1024u;
+ auto method3_raw_code = GenNopsAndBl(3u, kBlPlus0);
+ constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs.
+ ArrayRef<const uint8_t> method3_code(method3_raw_code);
+ ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
+ LinkerPatch method3_patches[] = {
+ LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, missing_method_index),
+ };
+
+ constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */;
+ bool thunk_in_gap = Create2MethodsWithGap(kNopCode,
+ ArrayRef<const LinkerPatch>(),
+ method3_code,
+ ArrayRef<const LinkerPatch>(method3_patches),
+ just_over_max_negative_disp - bl_offset_in_method3);
+ ASSERT_FALSE(thunk_in_gap); // There should be a thunk but it should be after the method2.
+ ASSERT_FALSE(method_offset_map_.FindMethodOffset(MethodRef(missing_method_index)).first);
+
+ // Check linked code.
+ uint32_t method3_offset = GetMethodOffset(3u);
+ uint32_t thunk_offset = CompiledCode::AlignCode(method3_offset + method3_code.size(), kThumb2);
+ uint32_t diff = thunk_offset - (method3_offset + bl_offset_in_method3 + 4u /* PC adjustment */);
+ ASSERT_EQ(diff & 1u, 0u);
+ ASSERT_LT(diff >> 1, 1u << 8); // Simple encoding, (diff >> 1) fits into 8 bits.
+ auto expected_code = GenNopsAndBl(3u, kBlPlus0 | ((diff >> 1) & 0xffu));
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code)));
+ EXPECT_TRUE(CheckThunk(thunk_offset));
+}
+
TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) {
auto method1_raw_code = GenNopsAndBl(3u, kBlPlus0);
constexpr uint32_t bl_offset_in_method1 = 3u * 2u; // After NOPs.
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index 2a426b5..0bfef5e 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -386,6 +386,39 @@
EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
}
+TEST_F(Arm64RelativePatcherTestDefault, CallTrampolineTooFar) {
+ constexpr uint32_t missing_method_index = 1024u;
+ auto last_method_raw_code = GenNopsAndBl(1u, kBlPlus0);
+ constexpr uint32_t bl_offset_in_last_method = 1u * 4u; // After NOPs.
+ ArrayRef<const uint8_t> last_method_code(last_method_raw_code);
+ ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size());
+ LinkerPatch last_method_patches[] = {
+ LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, missing_method_index),
+ };
+
+ constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4;
+ uint32_t last_method_idx = Create2MethodsWithGap(
+ kNopCode, ArrayRef<const LinkerPatch>(), last_method_code,
+ ArrayRef<const LinkerPatch>(last_method_patches),
+ just_over_max_negative_disp - bl_offset_in_last_method);
+ uint32_t method1_offset = GetMethodOffset(1u);
+ uint32_t last_method_offset = GetMethodOffset(last_method_idx);
+ ASSERT_EQ(method1_offset,
+ last_method_offset + bl_offset_in_last_method - just_over_max_negative_disp);
+ ASSERT_FALSE(method_offset_map_.FindMethodOffset(MethodRef(missing_method_index)).first);
+
+ // Check linked code.
+ uint32_t thunk_offset =
+ CompiledCode::AlignCode(last_method_offset + last_method_code.size(), kArm64);
+ uint32_t diff = thunk_offset - (last_method_offset + bl_offset_in_last_method);
+ ASSERT_EQ(diff & 3u, 0u);
+ ASSERT_LT(diff, 128 * MB);
+ auto expected_code = GenNopsAndBl(1u, kBlPlus0 | (diff >> 2));
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx),
+ ArrayRef<const uint8_t>(expected_code)));
+ EXPECT_TRUE(CheckThunk(thunk_offset));
+}
+
TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarAfter) {
auto method1_raw_code = GenNopsAndBl(1u, kBlPlus0);
constexpr uint32_t bl_offset_in_method1 = 1u * 4u; // After NOPs.
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 06576cc..ea3cb66 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -98,6 +98,7 @@
jobject class_loader = nullptr;
if (kCompile) {
TimingLogger timings2("OatTest::WriteRead", false, false);
+ compiler_driver_->SetDexFilesForOatFile(class_linker->GetBootClassPath());
compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings2);
}
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 6494964..a128079 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -20,6 +20,7 @@
#include "utils/dex_cache_arrays_layout-inl.h"
#include "driver/compiler_driver.h"
#include "nodes.h"
+#include "runtime.h"
namespace art {
@@ -78,7 +79,13 @@
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive;
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf;
} else {
+ bool use_pc_relative_instructions =
+ ((direct_method == 0u || direct_code == static_cast<uintptr_t>(-1))) &&
+ ContainsElement(compiler_driver_->GetDexFilesForOatFile(), target_method.dex_file);
if (direct_method != 0u) { // Should we use a direct pointer to the method?
+ // Note: For JIT, kDirectAddressWithFixup doesn't make sense at all and while
+ // kDirectAddress would be fine for image methods, we don't support it at the moment.
+ DCHECK(!Runtime::Current()->UseJit());
if (direct_method != static_cast<uintptr_t>(-1)) { // Is the method pointer known now?
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress;
method_load_data = direct_method;
@@ -87,24 +94,25 @@
}
} else { // Use dex cache.
DCHECK_EQ(target_method.dex_file, &graph_->GetDexFile());
- DexCacheArraysLayout layout =
- compiler_driver_->GetDexCacheArraysLayout(target_method.dex_file);
- if (layout.Valid()) { // Can we use PC-relative access to the dex cache arrays?
+ if (use_pc_relative_instructions) { // Can we use PC-relative access to the dex cache arrays?
+ DCHECK(!Runtime::Current()->UseJit());
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative;
+ DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen_->GetInstructionSet()),
+ &graph_->GetDexFile());
method_load_data = layout.MethodOffset(target_method.dex_method_index);
} else { // We must go through the ArtMethod's pointer to resolved methods.
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
}
}
if (direct_code != 0u) { // Should we use a direct pointer to the code?
+ // Note: For JIT, kCallPCRelative and kCallDirectWithFixup don't make sense at all and
+ // while kCallDirect would be fine for image methods, we don't support it at the moment.
+ DCHECK(!Runtime::Current()->UseJit());
if (direct_code != static_cast<uintptr_t>(-1)) { // Is the code pointer known now?
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallDirect;
direct_code_ptr = direct_code;
- } else if (compiler_driver_->IsImage() ||
- target_method.dex_file == &graph_->GetDexFile()) {
+ } else if (use_pc_relative_instructions) {
// Use PC-relative calls for invokes within a multi-dex oat file.
- // TODO: Recognize when the target dex file is within the current oat file for
- // app compilation. At the moment we recognize only the boot image as multi-dex.
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative;
} else { // The direct pointer will be known at link time.
// NOTE: This is used for app->boot calls when compiling an app against
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 384b879..3ebd2f3 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1495,6 +1495,7 @@
swap_fd_,
profile_file_));
+ driver_->SetDexFilesForOatFile(dex_files_);
driver_->CompileAll(class_loader, dex_files_, timings_);
}