Revert "Revert "Use trampolines for calls to helpers""
This reverts commit 081f73e888b3c246cf7635db37b7f1105cf1a2ff.
Change-Id: Ibd777f8ce73cf8ed6c4cb81d50bf6437ac28cb61
Conflicts:
compiler/dex/quick/mir_to_lir.h
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index b66082d..ba15d2a 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -22,6 +22,8 @@
#include <vector>
#include <unistd.h>
+#include "arch/arm/final_relocations_arm.h"
+#include "base/hex_dump.h"
#include "base/stl_util.h"
#include "base/timing_logger.h"
#include "class_linker.h"
@@ -504,6 +506,7 @@
UniquePtr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
PreCompile(class_loader, dex_files, thread_pool.get(), timings);
Compile(class_loader, dex_files, thread_pool.get(), timings);
+ PostCompile();
if (dump_stats_) {
stats_->Dump();
}
@@ -615,6 +618,10 @@
UpdateImageClasses(timings);
}
+void CompilerDriver::PostCompile() {
+ BuildEntrypointTrampolineCode();
+}
+
bool CompilerDriver::IsImageClass(const char* descriptor) const {
if (!IsImage()) {
return true;
@@ -1238,6 +1245,27 @@
return result;
}
+uint32_t CompilerDriver::AddEntrypointTrampoline(uint32_t entrypoint) {
+ return entrypoint_trampolines_.AddEntrypoint(Thread::Current(), entrypoint);
+}
+
+
+void CompilerDriver::BuildEntrypointTrampolineCode() {
+ const auto& table = entrypoint_trampolines_.GetTrampolineTable();
+ for (uint32_t offset : table) {
+ switch (instruction_set_) {
+ case kArm:
+ // Intentional fall through.
+ case kThumb2:
+ BuildArmEntrypointTrampolineCall(ThreadOffset<4>(offset));
+ break;
+ default:
+ UNIMPLEMENTED(FATAL) << "No entrypoint trampolines for this architecture";
+ }
+ }
+}
+
+
void CompilerDriver::AddCodePatch(const DexFile* dex_file,
uint16_t referrer_class_def_idx,
uint32_t referrer_method_idx,
@@ -2071,4 +2099,17 @@
}
return !compile;
}
+
+FinalEntrypointRelocationSet* CompilerDriver::AllocateFinalEntrypointRelocationSet(
+ CompilationUnit* cu) const {
+ switch (instruction_set_) {
+ case kArm:
+ case kThumb2:
+ return new FinalEntrypointRelocationSetArm(this);
+ default:
+ UNIMPLEMENTED(FATAL) << "Cannot allocate FinalEntrypointRelocationSet for non-ARM";
+ return nullptr;
+ }
+}
+
} // namespace art
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index d49523a..dcf99b8 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -601,6 +601,118 @@
// Should the compiler run on this method given profile information?
bool SkipCompilation(const std::string& method_name);
+ // Entrypoint trampolines.
+ //
+ // The idea here is that we can save code size by collecting the branches
+ // to the entrypoints (helper functions called by the generated code) into a
+ // table and then branching relative to that table from the code. On ARM 32 this
+ // will save 2 bytes per call. Only the entrypoints used by the program (the whole
+ // program - these are global) are in this table and are in no particular order.
+ //
+ // The trampolines will be placed right at the start of the .text section in the file
+ // and will consist of a table of instructions, each of which will branch relative to
+ // the thread register (r9 on ARM) to an entrypoint. On ARM this would look like:
+ //
+ // trampolines:
+ // 1: ldr pc, [r9, #40]
+ // 2: ldr pc, [r9, #8]
+ // ...
+ //
+ // Then a call to an entrypoint would be an immediate BL instruction to the appropriate
+ // label (1 or 2 in the above example). Because the entrypoint table has the lower bit
+ // of the address already set, the ldr pc will switch from ARM to Thumb for the entrypoint as
+ // necessary.
+ //
+ // On ARM, the range of a BL instruction is +-32M to this is more than enough for an
+ // immediate BL instruction in the generated code.
+ //
+ // The actual address of the trampoline for a particular entrypoint is not known until
+ // the OAT file is written and we know the addresses of all the branch instructions in
+ // the program. At this point we can rewrite the BL instruction to have the correct relative
+ // offset.
+ class EntrypointTrampolines {
+ public:
+ EntrypointTrampolines() : current_offset_(0), lock_("Entrypoint Trampolines") {}
+ ~EntrypointTrampolines() {}
+
+ // Add a trampoline and return the offset added. If it already exists
+ // return the offset it was added at previously.
+ uint32_t AddEntrypoint(Thread* self, uint32_t ep) LOCKS_EXCLUDED(lock_) {
+ MutexLock mu(self, lock_);
+ Trampolines::iterator tramp = trampolines_.find(ep);
+ if (tramp == trampolines_.end()) {
+ trampolines_[ep] = current_offset_;
+ trampoline_table_.push_back(ep);
+ LOG(DEBUG) << "adding new trampoline for " << ep << " at offset " << current_offset_;
+ return current_offset_++;
+ } else {
+ return tramp->second;
+ }
+ }
+
+ const std::vector<uint32_t>& GetTrampolineTable() const {
+ return trampoline_table_;
+ }
+
+ uint32_t GetTrampolineTableSize() const {
+ return current_offset_;
+ }
+
+ private:
+ uint32_t current_offset_;
+ // Mapping of entrypoint offset vs offset into trampoline table.
+ typedef std::map<uint32_t, uint32_t> Trampolines;
+ Trampolines trampolines_ GUARDED_BY(lock_);
+
+ // Table of all registered offsets in order of registration.
+ std::vector<uint32_t> trampoline_table_;
+ Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+ };
+
+ uint32_t AddEntrypointTrampoline(uint32_t entrypoint);
+
+ const std::vector<uint32_t>& GetEntrypointTrampolineTable() const {
+ return entrypoint_trampolines_.GetTrampolineTable();
+ }
+
+ uint32_t GetEntrypointTrampolineTableSize() const {
+ uint32_t size = entrypoint_trampolines_.GetTrampolineTableSize();
+ switch (instruction_set_) {
+ case kThumb2:
+ case kArm:
+ return size * 4;
+ default:
+ return size;
+ }
+ }
+
+ // Get the maximum offset between entrypoint trampoline islands. Different architectures
+ // have limitations on the max offset for a call instruction. This function is used
+ // to determine when we need to generate a new trampoline island in the output to keep
+ // subsequent calls in range.
+ size_t GetMaxEntrypointTrampolineOffset() const {
+ switch (instruction_set_) {
+ case kThumb2:
+ case kArm:
+ // On Thumb2, the max range of a BL instruction is 16MB. Give it a little wiggle room.
+ return 15*MB;
+ default:
+ // Returning 0 means we won't generate a trampoline island.
+ return 0;
+ }
+ }
+
+ void BuildEntrypointTrampolineCode();
+
+ // Architecture specific Entrypoint trampoline builder.
+ void BuildArmEntrypointTrampolineCall(ThreadOffset<4> offset);
+
+ const std::vector<uint8_t>& GetEntrypointTrampolineTableCode() const {
+ return entrypoint_trampoline_code_;
+ }
+
+ FinalEntrypointRelocationSet* AllocateFinalEntrypointRelocationSet(CompilationUnit* cu) const;
+
private:
// These flags are internal to CompilerDriver for collecting INVOKE resolution statistics.
// The only external contract is that unresolved method has flags 0 and resolved non-0.
@@ -638,6 +750,7 @@
LOCKS_EXCLUDED(Locks::mutator_lock_);
void LoadImageClasses(TimingLogger* timings);
+ void PostCompile() LOCKS_EXCLUDED(Locks::mutator_lock_);
// Attempt to resolve all type, methods, fields, and strings
// referenced from code in the dex file following PathClassLoader
@@ -798,6 +911,10 @@
DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_gc_map_;
DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_cfi_info_;
+ EntrypointTrampolines entrypoint_trampolines_;
+
+ std::vector<uint8_t> entrypoint_trampoline_code_;
+
DISALLOW_COPY_AND_ASSIGN(CompilerDriver);
};
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 20c6bc8..52248a6 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -50,7 +50,8 @@
small_method_threshold_(kDefaultSmallMethodThreshold),
tiny_method_threshold_(kDefaultTinyMethodThreshold),
num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold),
- generate_gdb_information_(false)
+ generate_gdb_information_(false),
+ generate_helper_trampolines_(false)
#ifdef ART_SEA_IR_MODE
, sea_ir_mode_(false)
#endif
@@ -62,7 +63,8 @@
size_t small_method_threshold,
size_t tiny_method_threshold,
size_t num_dex_methods_threshold,
- bool generate_gdb_information
+ bool generate_gdb_information,
+ bool generate_helper_trampolines
#ifdef ART_SEA_IR_MODE
, bool sea_ir_mode
#endif
@@ -73,7 +75,8 @@
small_method_threshold_(small_method_threshold),
tiny_method_threshold_(tiny_method_threshold),
num_dex_methods_threshold_(num_dex_methods_threshold),
- generate_gdb_information_(generate_gdb_information)
+ generate_gdb_information_(generate_gdb_information),
+ generate_helper_trampolines_(generate_helper_trampolines)
#ifdef ART_SEA_IR_MODE
, sea_ir_mode_(sea_ir_mode)
#endif
@@ -140,6 +143,10 @@
return generate_gdb_information_;
}
+ bool GenerateHelperTrampolines() const {
+ return generate_helper_trampolines_;
+ }
+
private:
CompilerFilter compiler_filter_;
size_t huge_method_threshold_;
@@ -148,6 +155,7 @@
size_t tiny_method_threshold_;
size_t num_dex_methods_threshold_;
bool generate_gdb_information_;
+ bool generate_helper_trampolines_;
#ifdef ART_SEA_IR_MODE
bool sea_ir_mode_;