GenSpecialCase support for x86

Moved GenSpecialCase from being ARM specific to common code to allow
it to be used by x86 quick as well.

Change-Id: I728733e8f4c4da99af6091ef77e5c76ae0fee850
Signed-off-by: Razvan A Lupusoru <razvan.a.lupusoru@intel.com>
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 10bcdb9..dfd8e63 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -881,6 +881,23 @@
   }
 }
 
+MIR* MIRGraph::GetNextUnconditionalMir(BasicBlock* bb, MIR* current) {
+  MIR* next_mir = nullptr;
+
+  if (current != nullptr) {
+    next_mir = current->next;
+  }
+
+  if (next_mir == nullptr) {
+    // Only look for next MIR that follows unconditionally.
+    if ((bb->taken == NullBasicBlockId) && (bb->fall_through != NullBasicBlockId)) {
+      next_mir = GetBasicBlock(bb->fall_through)->first_mir_insn;
+    }
+  }
+
+  return next_mir;
+}
+
 char* MIRGraph::GetDalvikDisassembly(const MIR* mir) {
   DecodedInstruction insn = mir->dalvikInsn;
   std::string str;
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index f8706c4..e866612 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -721,6 +721,17 @@
   void AppendMIR(BasicBlock* bb, MIR* mir);
   void PrependMIR(BasicBlock* bb, MIR* mir);
   void InsertMIRAfter(BasicBlock* bb, MIR* current_mir, MIR* new_mir);
+
+  /**
+   * @brief Used to obtain the next MIR that follows unconditionally.
+   * @details The implementation does not guarantee that a MIR does not
+   * follow even if this method returns nullptr.
+   * @param bb The basic block of "current" MIR.
+   * @param current The MIR for which to find an unconditional follower.
+   * @return Returns the following MIR if one can be found.
+   */
+  MIR* GetNextUnconditionalMir(BasicBlock* bb, MIR* current);
+
   char* GetDalvikDisassembly(const MIR* mir);
   void ReplaceSpecialChars(std::string& str);
   std::string GetSSAName(int ssa_reg);
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index a30e80a..b36dde9 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -18,225 +18,11 @@
 
 #include "arm_lir.h"
 #include "codegen_arm.h"
-#include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 
 namespace art {
 
-// TODO: generalize & move to RegUtil.cc
-// The number of dalvik registers passed in core registers.
-constexpr int kInArgsInCoreRegs = 3;
-// The core register corresponding to the first (index 0) input argument.
-constexpr int kInArg0CoreReg = r1;  // r0 is Method*.
-// Offset, in words, for getting args from stack (even core reg args have space on stack).
-constexpr int kInArgToStackOffset = 1;
-
-/* Lock argument if it's in register. */
-void ArmMir2Lir::LockArg(int in_position, bool wide) {
-  if (in_position < kInArgsInCoreRegs) {
-    LockTemp(kInArg0CoreReg + in_position);
-  }
-  if (wide && in_position + 1 < kInArgsInCoreRegs) {
-    LockTemp(kInArg0CoreReg + in_position + 1);
-  }
-}
-
-/* Load argument into register. LockArg(in_position, wide) must have been previously called. */
-int ArmMir2Lir::LoadArg(int in_position, bool wide) {
-  if (in_position < kInArgsInCoreRegs) {
-    int low_reg = kInArg0CoreReg + in_position;
-    if (!wide) {
-      return low_reg;
-    }
-    int high_reg = (in_position != kInArgsInCoreRegs - 1) ? low_reg + 1 : LoadArg(in_position + 1);
-    return (low_reg & 0xff) | ((high_reg & 0xff) << 8);
-  }
-  int low_reg = AllocTemp();
-  int offset = (in_position + kInArgToStackOffset) * sizeof(uint32_t);
-  if (!wide) {
-    LoadWordDisp(rARM_SP, offset, low_reg);
-    return low_reg;
-  }
-  int high_reg = AllocTemp();
-  LoadBaseDispWide(rARM_SP, offset, low_reg, high_reg, INVALID_SREG);
-  return (low_reg & 0xff) | ((high_reg & 0xff) << 8);
-}
-
-void ArmMir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
-  int reg = kInArg0CoreReg + in_position;
-  int offset = (in_position + kInArgToStackOffset) * sizeof(uint32_t);
-  if (!rl_dest.wide) {
-    if (in_position < kInArgsInCoreRegs) {
-      OpRegCopy(rl_dest.low_reg, reg);
-    } else {
-      LoadWordDisp(rARM_SP, offset, rl_dest.low_reg);
-    }
-  } else {
-    if (in_position < kInArgsInCoreRegs - 1) {
-      OpRegCopyWide(rl_dest.low_reg, rl_dest.high_reg, reg, reg + 1);
-    } else if (in_position == kInArgsInCoreRegs - 1) {
-      OpRegCopy(rl_dest.low_reg, reg);
-      LoadWordDisp(rARM_SP, offset + sizeof(uint32_t), rl_dest.high_reg);
-    } else {
-      LoadBaseDispWide(rARM_SP, offset, rl_dest.low_reg, rl_dest.high_reg, INVALID_SREG);
-    }
-  }
-}
-
-/* Find the next MIR, which may be in a following basic block */
-// TODO: make this a utility in mir_graph.
-MIR* ArmMir2Lir::GetNextMir(BasicBlock** p_bb, MIR* mir) {
-  BasicBlock* bb = *p_bb;
-  MIR* orig_mir = mir;
-  while (bb != NULL) {
-    if (mir != NULL) {
-      mir = mir->next;
-    }
-    if (mir != NULL) {
-      return mir;
-    } else {
-      bb = mir_graph_->GetBasicBlock(bb->fall_through);
-      *p_bb = bb;
-      if (bb) {
-         mir = bb->first_mir_insn;
-         if (mir != NULL) {
-           return mir;
-         }
-      }
-    }
-  }
-  return orig_mir;
-}
-
-/* Used for the "verbose" listing */
-// TODO:  move to common code
-void ArmMir2Lir::GenPrintLabel(MIR* mir) {
-  /* Mark the beginning of a Dalvik instruction for line tracking */
-  if (cu_->verbose) {
-    char* inst_str = mir_graph_->GetDalvikDisassembly(mir);
-    MarkBoundary(mir->offset, inst_str);
-  }
-}
-
-MIR* ArmMir2Lir::SpecialIGet(BasicBlock** bb, MIR* mir, const InlineMethod& special) {
-  // FastInstance() already checked by DexFileMethodInliner.
-  const InlineIGetIPutData& data = special.d.ifield_data;
-  if (data.method_is_static || data.object_arg != 0) {
-    return NULL;  // The object is not "this" and has to be null-checked.
-  }
-
-  DCHECK_NE(data.op_size, kDouble);  // The inliner doesn't distinguish kDouble, uses kLong.
-  bool wide = (data.op_size == kLong);
-
-  // Point of no return - no aborts after this
-  ArmMir2Lir::GenPrintLabel(mir);
-  LockArg(data.object_arg);
-  RegLocation rl_dest = wide ? GetReturnWide(false) : GetReturn(false);
-  int reg_obj = LoadArg(data.object_arg);
-  if (wide) {
-    LoadBaseDispWide(reg_obj, data.field_offset, rl_dest.low_reg, rl_dest.high_reg, INVALID_SREG);
-  } else {
-    LoadBaseDisp(reg_obj, data.field_offset, rl_dest.low_reg, kWord, INVALID_SREG);
-  }
-  if (data.is_volatile) {
-    GenMemBarrier(kLoadLoad);
-  }
-  return GetNextMir(bb, mir);
-}
-
-MIR* ArmMir2Lir::SpecialIPut(BasicBlock** bb, MIR* mir, const InlineMethod& special) {
-  // FastInstance() already checked by DexFileMethodInliner.
-  const InlineIGetIPutData& data = special.d.ifield_data;
-  if (data.method_is_static || data.object_arg != 0) {
-    return NULL;  // The object is not "this" and has to be null-checked.
-  }
-
-  DCHECK_NE(data.op_size, kDouble);  // The inliner doesn't distinguish kDouble, uses kLong.
-  bool wide = (data.op_size == kLong);
-
-  // Point of no return - no aborts after this
-  ArmMir2Lir::GenPrintLabel(mir);
-  LockArg(data.object_arg);
-  LockArg(data.src_arg, wide);
-  int reg_obj = LoadArg(data.object_arg);
-  int reg_src = LoadArg(data.src_arg, wide);
-  if (data.is_volatile) {
-    GenMemBarrier(kStoreStore);
-  }
-  if (wide) {
-    StoreBaseDispWide(reg_obj, data.field_offset, reg_src & 0xff, reg_src >> 8);
-  } else {
-    StoreBaseDisp(reg_obj, data.field_offset, reg_src, kWord);
-  }
-  if (data.is_volatile) {
-    GenMemBarrier(kLoadLoad);
-  }
-  if (data.is_object) {
-    MarkGCCard(reg_src, reg_obj);
-  }
-  return GetNextMir(bb, mir);
-}
-
-MIR* ArmMir2Lir::SpecialIdentity(MIR* mir, const InlineMethod& special) {
-  const InlineReturnArgData& data = special.d.return_data;
-  DCHECK_NE(data.op_size, kDouble);  // The inliner doesn't distinguish kDouble, uses kLong.
-  bool wide = (data.op_size == kLong);
-
-  // Point of no return - no aborts after this
-  ArmMir2Lir::GenPrintLabel(mir);
-  LockArg(data.arg, wide);
-  RegLocation rl_dest = wide ? GetReturnWide(false) : GetReturn(false);
-  LoadArgDirect(data.arg, rl_dest);
-  return mir;
-}
-
-/*
- * Special-case code genration for simple non-throwing leaf methods.
- */
-void ArmMir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir,
-                                const InlineMethod& special) {
-  DCHECK(special.flags & kInlineSpecial);
-  current_dalvik_offset_ = mir->offset;
-  MIR* next_mir = NULL;
-  switch (special.opcode) {
-    case kInlineOpNop:
-      DCHECK(mir->dalvikInsn.opcode == Instruction::RETURN_VOID);
-      next_mir = mir;
-      break;
-    case kInlineOpConst:
-      ArmMir2Lir::GenPrintLabel(mir);
-      LoadConstant(rARM_RET0, static_cast<int>(special.d.data));
-      next_mir = GetNextMir(&bb, mir);
-      break;
-    case kInlineOpIGet:
-      next_mir = SpecialIGet(&bb, mir, special);
-      break;
-    case kInlineOpIPut:
-      next_mir = SpecialIPut(&bb, mir, special);
-      break;
-    case kInlineOpReturnArg:
-      next_mir = SpecialIdentity(mir, special);
-      break;
-    default:
-      return;
-  }
-  if (next_mir != NULL) {
-    current_dalvik_offset_ = next_mir->offset;
-    if (special.opcode != kInlineOpReturnArg) {
-      ArmMir2Lir::GenPrintLabel(next_mir);
-    }
-    NewLIR1(kThumbBx, rARM_LR);
-    core_spill_mask_ = 0;
-    num_core_spills_ = 0;
-    fp_spill_mask_ = 0;
-    num_fp_spills_ = 0;
-    frame_size_ = 0;
-    core_vmap_table_.clear();
-    fp_vmap_table_.clear();
-  }
-}
-
 /*
  * The sparse table in the literal pool is an array of <key,displacement>
  * pairs.  For each set, we'll load them as a pair using ldmia.
@@ -610,4 +396,8 @@
   }
 }
 
+void ArmMir2Lir::GenSpecialExitSequence() {
+  NewLIR1(kThumbBx, rARM_LR);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 7ee241c..65dee80 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -52,6 +52,7 @@
     int AllocTypedTempPair(bool fp_hint, int reg_class);
     int S2d(int low_reg, int high_reg);
     int TargetReg(SpecialTargetRegister reg);
+    int GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
@@ -122,6 +123,7 @@
     void GenDivZeroCheck(int reg_lo, int reg_hi);
     void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
     void GenExitSequence();
+    void GenSpecialExitSequence();
     void GenFillArrayData(DexOffset table_offset, RegLocation rl_src);
     void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
@@ -136,7 +138,6 @@
     void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
     void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
     void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
-    void GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
 
     // Required for target - single operation generators.
     LIR* OpUnconditionalBranch(LIR* target);
@@ -170,7 +171,6 @@
     LIR* LoadBaseDispBody(int rBase, int displacement, int r_dest, int r_dest_hi, OpSize size,
                           int s_reg);
     LIR* StoreBaseDispBody(int rBase, int displacement, int r_src, int r_src_hi, OpSize size);
-    void GenPrintLabel(MIR* mir);
     LIR* OpRegRegRegShift(OpKind op, int r_dest, int r_src1, int r_src2, int shift);
     LIR* OpRegRegShift(OpKind op, int r_dest_src1, int r_src2, int shift);
     static const ArmEncodingMap EncodingMap[kArmLast];
@@ -185,13 +185,6 @@
   private:
     void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
                                   ConditionCode ccode);
-    void LockArg(int in_position, bool wide = false);
-    int LoadArg(int in_position, bool wide = false);
-    void LoadArgDirect(int in_position, RegLocation rl_dest);
-    MIR* GetNextMir(BasicBlock** p_bb, MIR* mir);
-    MIR* SpecialIGet(BasicBlock** bb, MIR* mir, const InlineMethod& special);
-    MIR* SpecialIPut(BasicBlock** bb, MIR* mir, const InlineMethod& special);
-    MIR* SpecialIdentity(MIR* mir, const InlineMethod& special);
     LIR* LoadFPConstantValue(int r_dest, int value);
     void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index ceec7d5..83431ad 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -83,6 +83,19 @@
   return res;
 }
 
+int ArmMir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
+  switch (arg_num) {
+    case 0:
+      return rARM_ARG1;
+    case 1:
+      return rARM_ARG2;
+    case 2:
+      return rARM_ARG3;
+    default:
+      return INVALID_REG;
+  }
+}
 
 // Create a double from a pair of singles.
 int ArmMir2Lir::S2d(int low_reg, int high_reg) {
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 05eb360..c5dccda 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1017,19 +1017,13 @@
   /* Allocate Registers using simple local allocation scheme */
   SimpleRegAlloc();
 
-  /*
-   * Custom codegen for special cases.  If for any reason the
-   * special codegen doesn't succeed, first_lir_insn_ will be
-   * set to NULL;
-   */
-  // TODO: Clean up GenSpecial() and return true only if special implementation is emitted.
-  // Currently, GenSpecial() returns IsSpecial() but doesn't check after SpecialMIR2LIR().
+  /* First try the custom light codegen for special cases. */
   DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
-  cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file)
+  bool special_worked = cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file)
       ->GenSpecial(this, cu_->method_idx);
 
-  /* Convert MIR to LIR, etc. */
-  if (first_lir_insn_ == NULL) {
+  /* Take normal path for converting MIR to LIR only if the special codegen did not succeed. */
+  if (special_worked == false) {
     MethodMIR2LIR();
   }
 
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 389dd9a..46d846a 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -407,9 +407,7 @@
     }
     special = it->second;
   }
-  // TODO: Return true only if special implementation is emitted.
-  backend->SpecialMIR2LIR(special);
-  return true;
+  return backend->SpecialMIR2LIR(special);
 }
 
 uint32_t DexFileMethodInliner::FindClassIndex(const DexFile* dex_file, IndexCache* cache,
@@ -596,7 +594,7 @@
   if (return_opcode == Instruction::RETURN_OBJECT && vB != 0) {
     return false;  // Returning non-null reference constant?
   }
-  result->opcode = kInlineOpConst;
+  result->opcode = kInlineOpNonWideConst;
   result->flags = kInlineSpecial;
   result->d.data = static_cast<uint64_t>(vB);
   return true;
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index fb7528e..f4c2d67 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -57,7 +57,7 @@
 
   kInlineOpNop,
   kInlineOpReturnArg,
-  kInlineOpConst,
+  kInlineOpNonWideConst,
   kInlineOpIGet,
   kInlineOpIPut,
 };
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index ee61c8b..5fa4596 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -293,10 +293,10 @@
     StoreWordDisp(TargetReg(kSp), 0, TargetReg(kArg0));
   }
 
-  if (cu_->num_ins == 0)
+  if (cu_->num_ins == 0) {
     return;
-  const int num_arg_regs = 3;
-  static SpecialTargetRegister arg_regs[] = {kArg1, kArg2, kArg3};
+  }
+
   int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
   /*
    * Copy incoming arguments to their proper home locations.
@@ -312,15 +312,17 @@
    */
   for (int i = 0; i < cu_->num_ins; i++) {
     PromotionMap* v_map = &promotion_map_[start_vreg + i];
-    if (i < num_arg_regs) {
+    int reg = GetArgMappingToPhysicalReg(i);
+
+    if (reg != INVALID_REG) {
       // If arriving in register
       bool need_flush = true;
       RegLocation* t_loc = &ArgLocs[i];
       if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
-        OpRegCopy(v_map->core_reg, TargetReg(arg_regs[i]));
+        OpRegCopy(v_map->core_reg, reg);
         need_flush = false;
       } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
-        OpRegCopy(v_map->FpReg, TargetReg(arg_regs[i]));
+        OpRegCopy(v_map->FpReg, reg);
         need_flush = false;
       } else {
         need_flush = true;
@@ -350,8 +352,7 @@
         }
       }
       if (need_flush) {
-        StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i),
-                      TargetReg(arg_regs[i]), kWord);
+        StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, kWord);
       }
     } else {
       // If arriving in frame & promoted
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index 2e385a3..a663519 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -23,9 +23,10 @@
 
 namespace art {
 
-void MipsMir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir,
+bool MipsMir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir,
                                  const InlineMethod& special) {
-    // TODO
+  // TODO
+  return false;
 }
 
 /*
@@ -345,4 +346,8 @@
   OpReg(kOpBx, r_RA);
 }
 
+void MipsMir2Lir::GenSpecialExitSequence() {
+  OpReg(kOpBx, r_RA);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 11b8f83..dad8a3b 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -52,6 +52,7 @@
     int AllocTypedTempPair(bool fp_hint, int reg_class);
     int S2d(int low_reg, int high_reg);
     int TargetReg(SpecialTargetRegister reg);
+    int GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
@@ -121,6 +122,7 @@
     void GenDivZeroCheck(int reg_lo, int reg_hi);
     void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
     void GenExitSequence();
+    void GenSpecialExitSequence();
     void GenFillArrayData(uint32_t table_offset, RegLocation rl_src);
     void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
@@ -133,7 +135,7 @@
     void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
     void GenPackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src);
     void GenSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src);
-    void GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
+    bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
 
     // Required for target - single operation generators.
     LIR* OpUnconditionalBranch(LIR* target);
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index b744adc..224e8f2 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -86,6 +86,20 @@
   return res;
 }
 
+int MipsMir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
+  switch (arg_num) {
+    case 0:
+      return rMIPS_ARG1;
+    case 1:
+      return rMIPS_ARG2;
+    case 2:
+      return rMIPS_ARG3;
+    default:
+      return INVALID_REG;
+  }
+}
+
 // Create a double from a pair of singles.
 int MipsMir2Lir::S2d(int low_reg, int high_reg) {
   return MIPS_S2D(low_reg, high_reg);
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index ae54fb8..8c2ed36 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -16,12 +16,244 @@
 
 #include "dex/compiler_internals.h"
 #include "dex/dataflow_iterator-inl.h"
+#include "dex/quick/dex_file_method_inliner.h"
 #include "mir_to_lir-inl.h"
 #include "object_utils.h"
 #include "thread-inl.h"
 
 namespace art {
 
+void Mir2Lir::LockArg(int in_position, bool wide) {
+  int reg_arg_low = GetArgMappingToPhysicalReg(in_position);
+  int reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) : INVALID_REG;
+
+  if (reg_arg_low != INVALID_REG) {
+    LockTemp(reg_arg_low);
+  }
+  if (reg_arg_high != INVALID_REG && reg_arg_low != reg_arg_high) {
+    LockTemp(reg_arg_high);
+  }
+}
+
+int Mir2Lir::LoadArg(int in_position, bool wide) {
+  int reg_arg_low = GetArgMappingToPhysicalReg(in_position);
+  int reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) : INVALID_REG;
+
+  int offset = StackVisitor::GetOutVROffset(in_position);
+  if (cu_->instruction_set == kX86) {
+    /*
+     * When doing a call for x86, it moves the stack pointer in order to push return.
+     * Thus, we add another 4 bytes to figure out the out of caller (in of callee).
+     * TODO: This needs revisited for 64-bit.
+     */
+    offset += sizeof(uint32_t);
+  }
+
+  // If the VR is wide and there is no register for high part, we need to load it.
+  if (wide && reg_arg_high == INVALID_REG) {
+    // If the low part is not in a reg, we allocate a pair. Otherwise, we just load to high reg.
+    if (reg_arg_low == INVALID_REG) {
+      int new_regs = AllocTypedTempPair(false, kAnyReg);
+      DECODE_REG_PAIR(new_regs, reg_arg_low, reg_arg_high);
+      LoadBaseDispWide(TargetReg(kSp), offset, reg_arg_low, reg_arg_high, INVALID_SREG);
+    } else {
+      reg_arg_high = AllocTemp();
+      int offset_high = offset + sizeof(uint32_t);
+      LoadWordDisp(TargetReg(kSp), offset_high, reg_arg_high);
+    }
+  }
+
+  // If the low part is not in a register yet, we need to load it.
+  if (reg_arg_low == INVALID_REG) {
+    reg_arg_low = AllocTemp();
+    LoadWordDisp(TargetReg(kSp), offset, reg_arg_low);
+  }
+
+  if (wide) {
+    return ENCODE_REG_PAIR(reg_arg_low, reg_arg_high);
+  } else {
+    return reg_arg_low;
+  }
+}
+
+void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
+  int offset = StackVisitor::GetOutVROffset(in_position);
+  if (cu_->instruction_set == kX86) {
+    /*
+     * When doing a call for x86, it moves the stack pointer in order to push return.
+     * Thus, we add another 4 bytes to figure out the out of caller (in of callee).
+     * TODO: This needs revisited for 64-bit.
+     */
+    offset += sizeof(uint32_t);
+  }
+
+  if (!rl_dest.wide) {
+    int reg = GetArgMappingToPhysicalReg(in_position);
+    if (reg != INVALID_REG) {
+      OpRegCopy(rl_dest.low_reg, reg);
+    } else {
+      LoadWordDisp(TargetReg(kSp), offset, rl_dest.low_reg);
+    }
+  } else {
+    int reg_arg_low = GetArgMappingToPhysicalReg(in_position);
+    int reg_arg_high = GetArgMappingToPhysicalReg(in_position + 1);
+
+    if (reg_arg_low != INVALID_REG && reg_arg_high != INVALID_REG) {
+      OpRegCopyWide(rl_dest.low_reg, rl_dest.high_reg, reg_arg_low, reg_arg_high);
+    } else if (reg_arg_low != INVALID_REG && reg_arg_high == INVALID_REG) {
+      OpRegCopy(rl_dest.low_reg, reg_arg_low);
+      int offset_high = offset + sizeof(uint32_t);
+      LoadWordDisp(TargetReg(kSp), offset_high, rl_dest.high_reg);
+    } else if (reg_arg_low == INVALID_REG && reg_arg_high != INVALID_REG) {
+      OpRegCopy(rl_dest.high_reg, reg_arg_high);
+      LoadWordDisp(TargetReg(kSp), offset, rl_dest.low_reg);
+    } else {
+      LoadBaseDispWide(TargetReg(kSp), offset, rl_dest.low_reg, rl_dest.high_reg, INVALID_SREG);
+    }
+  }
+}
+
+bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) {
+  // FastInstance() already checked by DexFileMethodInliner.
+  const InlineIGetIPutData& data = special.d.ifield_data;
+  if (data.method_is_static || data.object_arg != 0) {
+    // The object is not "this" and has to be null-checked.
+    return false;
+  }
+
+  DCHECK_NE(data.op_size, kDouble);  // The inliner doesn't distinguish kDouble, uses kLong.
+  bool wide = (data.op_size == kLong);
+  bool double_or_float = cu_->shorty[0] == 'F' || cu_->shorty[0] == 'D';
+
+  // Point of no return - no aborts after this
+  GenPrintLabel(mir);
+  LockArg(data.object_arg);
+  RegLocation rl_dest = wide ? GetReturnWide(double_or_float) : GetReturn(double_or_float);
+  int reg_obj = LoadArg(data.object_arg);
+  if (wide) {
+    LoadBaseDispWide(reg_obj, data.field_offset, rl_dest.low_reg, rl_dest.high_reg, INVALID_SREG);
+  } else {
+    LoadBaseDisp(reg_obj, data.field_offset, rl_dest.low_reg, kWord, INVALID_SREG);
+  }
+  if (data.is_volatile) {
+    GenMemBarrier(kLoadLoad);
+  }
+  return true;
+}
+
+bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) {
+  // FastInstance() already checked by DexFileMethodInliner.
+  const InlineIGetIPutData& data = special.d.ifield_data;
+  if (data.method_is_static || data.object_arg != 0) {
+    // The object is not "this" and has to be null-checked.
+    return false;
+  }
+
+  DCHECK_NE(data.op_size, kDouble);  // The inliner doesn't distinguish kDouble, uses kLong.
+  bool wide = (data.op_size == kLong);
+
+  // Point of no return - no aborts after this
+  GenPrintLabel(mir);
+  LockArg(data.object_arg);
+  LockArg(data.src_arg, wide);
+  int reg_obj = LoadArg(data.object_arg);
+  int reg_src = LoadArg(data.src_arg, wide);
+  if (data.is_volatile) {
+    GenMemBarrier(kStoreStore);
+  }
+  if (wide) {
+    int low_reg, high_reg;
+    DECODE_REG_PAIR(reg_src, low_reg, high_reg);
+    StoreBaseDispWide(reg_obj, data.field_offset, low_reg, high_reg);
+  } else {
+    StoreBaseDisp(reg_obj, data.field_offset, reg_src, kWord);
+  }
+  if (data.is_volatile) {
+    GenMemBarrier(kLoadLoad);
+  }
+  if (data.is_object) {
+    MarkGCCard(reg_src, reg_obj);
+  }
+  return true;
+}
+
+bool Mir2Lir::GenSpecialIdentity(MIR* mir, const InlineMethod& special) {
+  const InlineReturnArgData& data = special.d.return_data;
+  DCHECK_NE(data.op_size, kDouble);  // The inliner doesn't distinguish kDouble, uses kLong.
+  bool wide = (data.op_size == kLong);
+  bool double_or_float = cu_->shorty[0] == 'F' || cu_->shorty[0] == 'D';
+
+  // Point of no return - no aborts after this
+  GenPrintLabel(mir);
+  LockArg(data.arg, wide);
+  RegLocation rl_dest = wide ? GetReturnWide(double_or_float) : GetReturn(double_or_float);
+  LoadArgDirect(data.arg, rl_dest);
+  return true;
+}
+
+/*
+ * Special-case code generation for simple non-throwing leaf methods.
+ */
+bool Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special) {
+  DCHECK(special.flags & kInlineSpecial);
+  current_dalvik_offset_ = mir->offset;
+  MIR* return_mir = nullptr;
+  bool successful = false;
+
+  switch (special.opcode) {
+    case kInlineOpNop:
+      successful = true;
+      DCHECK_EQ(mir->dalvikInsn.opcode, Instruction::RETURN_VOID);
+      return_mir = mir;
+      break;
+    case kInlineOpNonWideConst: {
+      successful = true;
+      RegLocation rl_dest = GetReturn(cu_->shorty[0] == 'F');
+      GenPrintLabel(mir);
+      LoadConstant(rl_dest.low_reg, static_cast<int>(special.d.data));
+      return_mir = mir_graph_->GetNextUnconditionalMir(bb, mir);
+      break;
+    }
+    case kInlineOpReturnArg:
+      successful = GenSpecialIdentity(mir, special);
+      return_mir = mir;
+      break;
+    case kInlineOpIGet:
+      successful = GenSpecialIGet(mir, special);
+      return_mir = mir_graph_->GetNextUnconditionalMir(bb, mir);
+      break;
+    case kInlineOpIPut:
+      successful = GenSpecialIPut(mir, special);
+      return_mir = mir_graph_->GetNextUnconditionalMir(bb, mir);
+      break;
+    default:
+      break;
+  }
+
+  if (successful) {
+    // Handle verbosity for return MIR.
+    if (return_mir != nullptr) {
+      current_dalvik_offset_ = return_mir->offset;
+      // Not handling special identity case because it already generated code as part
+      // of the return. The label should have been added before any code was generated.
+      if (special.opcode != kInlineOpReturnArg) {
+        GenPrintLabel(return_mir);
+      }
+    }
+    GenSpecialExitSequence();
+
+    core_spill_mask_ = 0;
+    num_core_spills_ = 0;
+    fp_spill_mask_ = 0;
+    num_fp_spills_ = 0;
+    frame_size_ = 0;
+    core_vmap_table_.clear();
+    fp_vmap_table_.clear();
+  }
+
+  return successful;
+}
+
 /*
  * Target-independent code generation.  Use only high-level
  * load/store utilities here, or target-dependent genXX() handlers
@@ -693,6 +925,14 @@
   }
 }
 
+void Mir2Lir::GenPrintLabel(MIR* mir) {
+  // Mark the beginning of a Dalvik instruction for line tracking.
+  if (cu_->verbose) {
+     char* inst_str = mir_graph_->GetDalvikDisassembly(mir);
+     MarkBoundary(mir->offset, inst_str);
+  }
+}
+
 // Handle the content in each basic block.
 bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) {
   if (bb->block_type == kDead) return false;
@@ -745,11 +985,8 @@
     current_dalvik_offset_ = mir->offset;
     int opcode = mir->dalvikInsn.opcode;
 
-    // Mark the beginning of a Dalvik instruction for line tracking.
-    if (cu_->verbose) {
-       char* inst_str = mir_graph_->GetDalvikDisassembly(mir);
-       MarkBoundary(mir->offset, inst_str);
-    }
+    GenPrintLabel(mir);
+
     // Remember the first LIR for this block.
     if (head_lir == NULL) {
       head_lir = &block_label_list_[bb->id];
@@ -786,7 +1023,7 @@
   return false;
 }
 
-void Mir2Lir::SpecialMIR2LIR(const InlineMethod& special) {
+bool Mir2Lir::SpecialMIR2LIR(const InlineMethod& special) {
   cu_->NewTimingSplit("SpecialMIR2LIR");
   // Find the first DalvikByteCode block.
   int num_reachable_blocks = mir_graph_->GetNumReachableBlocks();
@@ -800,7 +1037,7 @@
     }
   }
   if (bb == NULL) {
-    return;
+    return false;
   }
   DCHECK_EQ(bb->start_offset, 0);
   DCHECK(bb->first_mir_insn != NULL);
@@ -813,7 +1050,7 @@
   ResetDefTracking();
   ClobberAllRegs();
 
-  GenSpecialCase(bb, mir, special);
+  return GenSpecialCase(bb, mir, special);
 }
 
 void Mir2Lir::MethodMIR2LIR() {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 5d4439f..729aaee 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -185,6 +185,13 @@
 #define ENCODE_MEM              (ENCODE_DALVIK_REG | ENCODE_LITERAL | \
                                  ENCODE_HEAP_REF | ENCODE_MUST_NOT_ALIAS)
 
+#define ENCODE_REG_PAIR(low_reg, high_reg) ((low_reg & 0xff) | ((high_reg & 0xff) << 8))
+#define DECODE_REG_PAIR(both_regs, low_reg, high_reg) \
+  do { \
+    low_reg = both_regs & 0xff; \
+    high_reg = (both_regs >> 8) & 0xff; \
+  } while (false)
+
 // Mask to denote sreg as the start of a double.  Must not interfere with low 16 bits.
 #define STARTING_DOUBLE_SREG 0x10000
 
@@ -738,7 +745,7 @@
     void CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list);
     void HandleExtendedMethodMIR(BasicBlock* bb, MIR* mir);
     bool MethodBlockCodeGen(BasicBlock* bb);
-    void SpecialMIR2LIR(const InlineMethod& special);
+    bool SpecialMIR2LIR(const InlineMethod& special);
     void MethodMIR2LIR();
 
     /*
@@ -809,6 +816,7 @@
     virtual int AllocTypedTempPair(bool fp_hint, int reg_class) = 0;
     virtual int S2d(int low_reg, int high_reg) = 0;
     virtual int TargetReg(SpecialTargetRegister reg) = 0;
+    virtual int GetArgMappingToPhysicalReg(int arg_num) = 0;
     virtual RegLocation GetReturnAlt() = 0;
     virtual RegLocation GetReturnWideAlt() = 0;
     virtual RegLocation LocCReturn() = 0;
@@ -949,8 +957,6 @@
                                  RegLocation rl_src) = 0;
     virtual void GenSparseSwitch(MIR* mir, DexOffset table_offset,
                                  RegLocation rl_src) = 0;
-    virtual void GenSpecialCase(BasicBlock* bb, MIR* mir,
-                                const InlineMethod& special) = 0;
     virtual void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_dest, int scale) = 0;
     virtual void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
@@ -1084,6 +1090,30 @@
                                             uint32_t type_idx, RegLocation rl_dest,
                                             RegLocation rl_src);
 
+    /**
+     * @brief Used to insert marker that can be used to associate MIR with LIR.
+     * @details Only inserts marker if verbosity is enabled.
+     * @param mir The mir that is currently being generated.
+     */
+    void GenPrintLabel(MIR* mir);
+
+    /**
+     * @brief Used to generate return sequence when there is no frame.
+     * @details Assumes that the return registers have already been populated.
+     */
+    virtual void GenSpecialExitSequence() = 0;
+
+    /**
+     * @brief Used to generate code for special methods that are known to be
+     * small enough to work in frameless mode.
+     * @param bb The basic block of the first MIR.
+     * @param mir The first MIR of the special method.
+     * @param special Information about the special method.
+     * @return Returns whether or not this was handled successfully. Returns false
+     * if caller should punt to normal MIR2LIR conversion.
+     */
+    virtual bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
+
   private:
     void ClobberBody(RegisterInfo* p);
     void ResetDefBody(RegisterInfo* p) {
@@ -1095,6 +1125,55 @@
       current_dalvik_offset_ = dexpc;
     }
 
+    /**
+     * @brief Used to lock register if argument at in_position was passed that way.
+     * @details Does nothing if the argument is passed via stack.
+     * @param in_position The argument number whose register to lock.
+     * @param wide Whether the argument is wide.
+     */
+    void LockArg(int in_position, bool wide = false);
+
+    /**
+     * @brief Used to load VR argument to a physical register.
+     * @details The load is only done if the argument is not already in physical register.
+     * LockArg must have been previously called.
+     * @param in_position The argument number to load.
+     * @param wide Whether the argument is 64-bit or not.
+     * @return Returns the register (or register pair) for the loaded argument.
+     */
+    int LoadArg(int in_position, bool wide = false);
+
+    /**
+     * @brief Used to load a VR argument directly to a specified register location.
+     * @param in_position The argument number to place in register.
+     * @param rl_dest The register location where to place argument.
+     */
+    void LoadArgDirect(int in_position, RegLocation rl_dest);
+
+    /**
+     * @brief Used to generate LIR for special getter method.
+     * @param mir The mir that represents the iget.
+     * @param special Information about the special getter method.
+     * @return Returns whether LIR was successfully generated.
+     */
+    bool GenSpecialIGet(MIR* mir, const InlineMethod& special);
+
+    /**
+     * @brief Used to generate LIR for special setter method.
+     * @param mir The mir that represents the iput.
+     * @param special Information about the special setter method.
+     * @return Returns whether LIR was successfully generated.
+     */
+    bool GenSpecialIPut(MIR* mir, const InlineMethod& special);
+
+    /**
+     * @brief Used to generate LIR for special return-args method.
+     * @param mir The mir that represents the return of argument.
+     * @param special Information about the special return-args method.
+     * @return Returns whether LIR was successfully generated.
+     */
+    bool GenSpecialIdentity(MIR* mir, const InlineMethod& special);
+
 
   public:
     // TODO: add accessors for these.
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 7f646e0..0613cdf 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -22,11 +22,6 @@
 
 namespace art {
 
-void X86Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir,
-                                const InlineMethod& special) {
-  // TODO
-}
-
 /*
  * The sparse table in the literal pool is an array of <key,displacement>
  * pairs.
@@ -255,4 +250,8 @@
   NewLIR0(kX86Ret);
 }
 
+void X86Mir2Lir::GenSpecialExitSequence() {
+  NewLIR0(kX86Ret);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 70263d8..6100a1d 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -52,6 +52,7 @@
     int AllocTypedTempPair(bool fp_hint, int reg_class);
     int S2d(int low_reg, int high_reg);
     int TargetReg(SpecialTargetRegister reg);
+    int GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
@@ -123,6 +124,7 @@
     void GenDivZeroCheck(int reg_lo, int reg_hi);
     void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
     void GenExitSequence();
+    void GenSpecialExitSequence();
     void GenFillArrayData(DexOffset table_offset, RegLocation rl_src);
     void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
@@ -135,7 +137,7 @@
     void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
     void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
     void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
-    void GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
+
     /*
      * @brief Generate a two address long operation with a constant value
      * @param rl_dest location of result
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 9dd6116..9eb112b 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -167,7 +167,14 @@
       NewLIR2(kX86MovdrxRR, dest_hi, src_lo);
     } else {
       // Handle overlap
-      if (src_hi == dest_lo) {
+      if (src_hi == dest_lo && src_lo == dest_hi) {
+        // Deal with cycles.
+        int temp_reg = AllocTemp();
+        OpRegCopy(temp_reg, dest_hi);
+        OpRegCopy(dest_hi, dest_lo);
+        OpRegCopy(dest_lo, temp_reg);
+        FreeTemp(temp_reg);
+      } else if (src_hi == dest_lo) {
         OpRegCopy(dest_hi, src_hi);
         OpRegCopy(dest_lo, src_lo);
       } else {
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 1893ffc..8e04e64 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -92,6 +92,21 @@
   return res;
 }
 
+int X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
+  // TODO: This is not 64-bit compliant and depends on new internal ABI.
+  switch (arg_num) {
+    case 0:
+      return rX86_ARG1;
+    case 1:
+      return rX86_ARG2;
+    case 2:
+      return rX86_ARG3;
+    default:
+      return INVALID_REG;
+  }
+}
+
 // Create a double from a pair of singles.
 int X86Mir2Lir::S2d(int low_reg, int high_reg) {
   return X86_S2D(low_reg, high_reg);
diff --git a/runtime/stack.h b/runtime/stack.h
index 8466069..2d56a74 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -654,10 +654,16 @@
     }  else if (reg < num_regs) {
       return locals_start + (reg * sizeof(uint32_t));
     } else {
-      return frame_size + ((reg - num_regs) * sizeof(uint32_t)) + sizeof(uint32_t);  // Dalvik in.
+      // Handle ins.
+      return frame_size + ((reg - num_regs) * sizeof(uint32_t)) + sizeof(StackReference<mirror::ArtMethod>);
     }
   }
 
+  static int GetOutVROffset(uint16_t out_num) {
+    // According to stack model, the first out is above the Method ptr.
+    return sizeof(StackReference<mirror::ArtMethod>) + (out_num * sizeof(uint32_t));
+  }
+
   uintptr_t GetCurrentQuickFramePc() const {
     return cur_quick_frame_pc_;
   }