Use trampolines for calls to helpers

This is an ARM specific optimization to the compiler
that uses trampoline islands to make calls to runtime
helper functions.  The intention is to reduce the size
of the generated code (by 2 bytes per call) without
affecting performance.

By default this is on when generating an OAT file.  It is
off when compiling to memory.

To switch this off in dex2oat, use the command line option:
--no-helper-trampolines

Enhances disassembler to print the trampoline entry on the
BL instruction like this:

0xb6a850c0: f7ffff9e  bl      -196 (0xb6a85000)  ; pTestSuspend

Bug: 12607709
Change-Id: I9202bdb7cf21252ad807bd48701f1f6ce8e3d0fe
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index a241d51..8bf3b04 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -24,6 +24,8 @@
 #include <unistd.h>
 #include <utility>
 
+#include "arch/arm/final_relocations_arm.h"
+#include "base/hex_dump.h"
 #include "base/stl_util.h"
 #include "base/timing_logger.h"
 #include "class_linker.h"
@@ -506,6 +508,7 @@
   UniquePtr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
   PreCompile(class_loader, dex_files, thread_pool.get(), timings);
   Compile(class_loader, dex_files, thread_pool.get(), timings);
+  PostCompile();
   if (dump_stats_) {
     stats_->Dump();
   }
@@ -617,6 +620,10 @@
   UpdateImageClasses(timings);
 }
 
+void CompilerDriver::PostCompile() {
+  BuildEntrypointTrampolineCode();
+}
+
 bool CompilerDriver::IsImageClass(const char* descriptor) const {
   if (!IsImage()) {
     return true;
@@ -1240,6 +1247,25 @@
   return result;
 }
 
+uint32_t CompilerDriver::AddEntrypointTrampoline(uint32_t entrypoint) {
+  return entrypoint_trampolines_.AddEntrypoint(Thread::Current(), entrypoint);
+}
+
+
+void CompilerDriver::BuildEntrypointTrampolineCode() {
+  const auto& table = entrypoint_trampolines_.GetTrampolineTable();
+  for (uint32_t offset : table) {
+    switch (instruction_set_) {
+      case kThumb2:
+        BuildArmEntrypointTrampolineCall(ThreadOffset<4>(offset));
+        break;
+      default:
+        UNIMPLEMENTED(FATAL) << "No entrypoint trampolines for this architecture";
+    }
+  }
+}
+
+
 void CompilerDriver::AddCodePatch(const DexFile* dex_file,
                                   uint16_t referrer_class_def_idx,
                                   uint32_t referrer_method_idx,
@@ -2150,4 +2176,17 @@
   }
   return !compile;
 }
+
+FinalEntrypointRelocationSet* CompilerDriver::AllocateFinalEntrypointRelocationSet(
+    CompilationUnit* cu) const {
+  switch (instruction_set_) {
+    case kArm:
+    case kThumb2:
+      return new FinalEntrypointRelocationSetArm(this);
+    default:
+      UNIMPLEMENTED(FATAL) << "Cannot allocate FinalEntrypointRelocationSet for non-ARM";
+      return nullptr;
+  }
+}
+
 }  // namespace art