Revert "Revert "Use trampolines for calls to helpers""

This reverts commit 081f73e888b3c246cf7635db37b7f1105cf1a2ff.

Change-Id: Ibd777f8ce73cf8ed6c4cb81d50bf6437ac28cb61

Conflicts:
	compiler/dex/quick/mir_to_lir.h
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index c9acd66..8c5c6c5 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -496,6 +496,7 @@
   kThumb2LdrdPcRel8,  // ldrd rt, rt2, pc +-/1024.
   kThumb2LdrdI8,     // ldrd rt, rt2, [rn +-/1024].
   kThumb2StrdI8,     // strd rt, rt2, [rn +-/1024].
+  kThumb2BlTramp,    // Thumb2 BL to trampoline
   kArmLast,
 };
 
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index f77b0a6..0e1275d 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -1035,6 +1035,11 @@
                  kFmtBitBlt, 7, 0,
                  IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE,
                  "strd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
+    ENCODING_MAP(kThumb2BlTramp, 0xf000d000,
+                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
+                 "bl", "!0t", 4, kFixupTrampCall),
 };
 
 // new_lir replaces orig_lir in the pcrel_fixup list.
@@ -1224,6 +1229,7 @@
   while (true) {
     offset_adjustment = 0;
     AssemblerStatus res = kSuccess;  // Assume success
+
     generation ^= 1;
     // Note: nodes requring possible fixup linked in ascending order.
     lir = first_fixup_;
@@ -1577,6 +1583,17 @@
           }
           break;
         }
+        case kFixupTrampCall: {
+          // This is a call to a trampoline.  The value for the trampoline call needs
+          // both the offset into the code and the trampoline to call.  It will be
+          // added to the list of calls when we actually insert this instruction into
+          // the code_buffer (when we have a stable instruction stream).
+          uint32_t instoffset = lir->offset;
+           // LOG(INFO) << "adding trampoline call: offset: " << instoffset <<
+             //  " entrypoint: " << lir->operands[0];
+          trampoline_calls_.push_back(TrampolineCall(instoffset, lir->operands[0]));
+          break;
+        }
         default:
           LOG(FATAL) << "Unexpected case " << lir->flags.fixup;
       }
@@ -1595,6 +1612,7 @@
       starting_offset += offset_adjustment;
       data_offset_ = (starting_offset + 0x3) & ~0x3;
       AssignDataOffsets();
+      trampoline_calls_.clear();            // These are invalid now.
     }
   }
 
@@ -1675,5 +1693,4 @@
 
   total_size_ = AssignFillArrayDataOffset(offset);
 }
-
 }  // namespace art
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index d0d0e6b..1b485a3 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -19,6 +19,7 @@
 #include "arm_lir.h"
 #include "codegen_arm.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "driver/compiler_options.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 
 namespace art {
@@ -468,4 +469,34 @@
   NewLIR1(kThumbBx, rARM_LR);
 }
 
+// Entrypoint calls.
+RegStorage ArmMir2Lir::CallHelperSetup(ThreadOffset<4> helper_offset) {
+  const CompilerOptions& compiler_options = cu_->compiler_driver->GetCompilerOptions();
+  if (compiler_options.GenerateHelperTrampolines()) {
+    return RegStorage::InvalidReg();
+  } else {
+    return LoadHelper(helper_offset);
+  }
+}
+
+LIR* ArmMir2Lir::CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
+    bool use_link) {
+  LIR* call_inst = nullptr;
+  if (use_link) {
+    const CompilerOptions& compiler_options = cu_->compiler_driver->GetCompilerOptions();
+    if (compiler_options.GenerateHelperTrampolines()) {
+      call_inst = OpThreadMem(kOpBlx, helper_offset);
+    } else {
+      call_inst = OpReg(kOpBlx, r_tgt);
+      FreeTemp(r_tgt);
+    }
+  } else {
+    call_inst = OpReg(kOpBx, r_tgt);
+    FreeTemp(r_tgt);
+  }
+  if (safepoint_pc) {
+    MarkSafepointPC(call_inst);
+  }
+  return call_inst;
+}
 }  // namespace art
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 13fa635..3c0aa03 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -195,6 +195,11 @@
     bool InexpensiveConstantLong(int64_t value);
     bool InexpensiveConstantDouble(int64_t value);
 
+    // Entrypoint calls.
+    RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
+    LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset,
+      bool safepoint_pc, bool use_link);
+
   private:
     void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
                                   ConditionCode ccode);
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 70cbdd2..8e6d9a8 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -1111,8 +1111,14 @@
 }
 
 LIR* ArmMir2Lir::OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) {
-  LOG(FATAL) << "Unexpected use of OpThreadMem for Arm";
-  return NULL;
+  if (op == kOpBlx) {
+    const uint32_t trampoline = cu_->compiler_driver->AddEntrypointTrampoline(
+        thread_offset.Int32Value());
+    return NewLIR1(kThumb2BlTramp, trampoline);
+  } else {
+    LOG(FATAL) << "Invalid opcode for ARM OpThreadMem on Arm";
+    return NULL;
+  }
 }
 
 LIR* ArmMir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 6e6b8f0..b163ef7 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1012,11 +1012,26 @@
     vmap_encoder.PushBackUnsigned(0u);  // Size is 0.
   }
 
+  // All relocations
+  UniquePtr<FinalRelocations> all_relocs(new FinalRelocations());
+
+  // Build the final relocations for this method.
+  if (trampoline_calls_.size() != 0) {
+    FinalEntrypointRelocationSet* ep_relocs =
+      cu_->compiler_driver->AllocateFinalEntrypointRelocationSet(cu_);
+    for (size_t i = 0 ; i < trampoline_calls_.size(); ++i) {
+      const TrampolineCall& call = trampoline_calls_[i];
+      ep_relocs->Add(call.code_offset_, call.trampoline_offset_);
+    }
+    all_relocs->push_back(ep_relocs);
+  }
+
   UniquePtr<std::vector<uint8_t> > cfi_info(ReturnCallFrameInformation());
   CompiledMethod* result =
       new CompiledMethod(*cu_->compiler_driver, cu_->instruction_set, code_buffer_, frame_size_,
                          core_spill_mask_, fp_spill_mask_, encoded_mapping_table_,
-                         vmap_encoder.GetData(), native_gc_map_, cfi_info.get());
+                         vmap_encoder.GetData(), native_gc_map_, cfi_info.get(),
+                         all_relocs.release());
   return result;
 }
 
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 4522379..2afc43c 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -17,6 +17,7 @@
 #include "dex/compiler_internals.h"
 #include "dex/quick/arm/arm_lir.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "driver/compiler_options.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/array.h"
 #include "mirror/object-inl.h"
@@ -968,12 +969,20 @@
         void Compile() {
           GenerateTargetLabel();
 
-          RegStorage r_tgt = m2l_->CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(4, pResolveString));
+          const CompilerOptions& compiler_options =
+            m2l_->cu_->compiler_driver->GetCompilerOptions();
+          if (compiler_options.GenerateHelperTrampolines()) {
+            m2l_->OpRegCopy(m2l_->TargetReg(kArg0), r_method_);
+            m2l_->CallHelper(RegStorage::InvalidReg(), QUICK_ENTRYPOINT_OFFSET(4, pResolveString),
+                             true);
+          } else {
+            RegStorage r_tgt = m2l_->CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(4, pResolveString));
 
-          m2l_->OpRegCopy(m2l_->TargetReg(kArg0), r_method_);   // .eq
-          LIR* call_inst = m2l_->OpReg(kOpBlx, r_tgt);
-          m2l_->MarkSafepointPC(call_inst);
-          m2l_->FreeTemp(r_tgt);
+            m2l_->OpRegCopy(m2l_->TargetReg(kArg0), r_method_);
+            LIR* call_inst = m2l_->OpReg(kOpBlx, r_tgt);
+            m2l_->MarkSafepointPC(call_inst);
+            m2l_->FreeTemp(r_tgt);
+          }
 
           m2l_->OpUnconditionalBranch(cont_);
         }
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index d827568..c91818b 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -19,6 +19,7 @@
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex_file-inl.h"
+#include "driver/compiler_options.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "invoke_type.h"
 #include "mirror/array.h"
@@ -62,25 +63,19 @@
 
 /*
  * To save scheduling time, helper calls are broken into two parts: generation of
- * the helper target address, and the actual call to the helper.  Because x86
- * has a memory call operation, part 1 is a NOP for x86.  For other targets,
- * load arguments between the two parts.
+ * the helper target address, and the actual call to the helper.
+ * These functions can be overridden by architecture specific codegen.
  */
 RegStorage Mir2Lir::CallHelperSetup(ThreadOffset<4> helper_offset) {
-  return (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) ? RegStorage::InvalidReg() : LoadHelper(helper_offset);
+  return LoadHelper(helper_offset);
 }
 
 /* NOTE: if r_tgt is a temp, it will be freed following use */
 LIR* Mir2Lir::CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
                          bool use_link) {
-  LIR* call_inst;
   OpKind op = use_link ? kOpBlx : kOpBx;
-  if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-    call_inst = OpThreadMem(op, helper_offset);
-  } else {
-    call_inst = OpReg(op, r_tgt);
-    FreeTemp(r_tgt);
-  }
+  LIR* call_inst = OpReg(op, r_tgt);
+  FreeTemp(r_tgt);
   if (safepoint_pc) {
     MarkSafepointPC(call_inst);
   }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 6dbeb34..5d8a6af 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -328,6 +328,18 @@
       LIR* const cont_;
     };
 
+    // This holds the data for a call to a trampoline.  An instruction is making a call
+    // to something through a trampoline and this holds the offset into the code containing
+    // the instruction, and which trampoline offset to call.
+    struct TrampolineCall {
+      TrampolineCall(uint32_t code_offset, uint32_t trampoline_offset) : code_offset_(code_offset),
+         trampoline_offset_(trampoline_offset) {
+      }
+
+      uint32_t code_offset_;          // Offset of instruction in method code stream (bytes).
+      uint32_t trampoline_offset_;    // Which trampoline to call.
+    };
+
     virtual ~Mir2Lir() {}
 
     int32_t s4FromSwitchData(const void* switch_data) {
@@ -615,11 +627,11 @@
     virtual void GenConstWide(RegLocation rl_dest, int64_t value);
     virtual void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
                        RegLocation rl_src1, RegLocation rl_src2);
+    virtual LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
+                    bool use_link = true);
+    virtual RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
 
     // Shared by all targets - implemented in gen_invoke.cc.
-    LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
-                    bool use_link = true);
-    RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
     void CallRuntimeHelper(ThreadOffset<4> helper_offset, bool safepoint_pc);
     void CallRuntimeHelperImm(ThreadOffset<4> helper_offset, int arg0, bool safepoint_pc);
     void CallRuntimeHelperReg(ThreadOffset<4> helper_offset, RegStorage arg0, bool safepoint_pc);
@@ -1280,6 +1292,7 @@
     LIR* last_lir_insn_;
 
     GrowableArray<LIRSlowPath*> slow_paths_;
+    std::vector<TrampolineCall> trampoline_calls_;
 };  // Class Mir2Lir
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 729b30d..2bd2caa 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -278,4 +278,18 @@
   NewLIR0(kX86Ret);
 }
 
+RegStorage X86Mir2Lir::CallHelperSetup(ThreadOffset<4> helper_offset) {
+  return RegStorage::InvalidReg();
+}
+
+LIR* X86Mir2Lir::CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
+    bool use_link) {
+  LIR* call_inst = OpThreadMem(use_link ? kOpBlx : kOpBx, helper_offset);
+  if (safepoint_pc) {
+    MarkSafepointPC(call_inst);
+  }
+  return call_inst;
+}
+
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index af2a140..e913d1d 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -328,6 +328,11 @@
      */
     std::vector<uint8_t>* ReturnCallFrameInformation();
 
+    // Entrypoint calls.
+    RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
+    LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset,
+      bool safepoint_pc, bool use_link);
+
   private:
     void EmitPrefix(const X86EncodingMap* entry);
     void EmitOpcode(const X86EncodingMap* entry);