X86 invocation tidy up.

Change-Id: I5bbea741e6434f9172e4041e562a5b15e2f37f95
diff --git a/src/compiler/codegen/x86/ArchFactory.cc b/src/compiler/codegen/x86/ArchFactory.cc
index f2c9c71..be0c9f4 100644
--- a/src/compiler/codegen/x86/ArchFactory.cc
+++ b/src/compiler/codegen/x86/ArchFactory.cc
@@ -88,23 +88,6 @@
 
 void genDebuggerUpdate(CompilationUnit* cUnit, int32_t offset);
 
-/*
- * In the Arm code a it is typical to use the link register
- * to hold the target address.  However, for X86 we must
- * ensure that all branch instructions can be restarted if
- * there is a trap in the shadow.  Allocate a temp register.
- */
-int loadHelper(CompilationUnit* cUnit, int offset)
-{
-    UNIMPLEMENTED(WARNING);
-    return 0;
-#if 0
-    int tReg = oatAllocTemp(cUnit);
-    loadWordDisp(cUnit, rSELF, offset, tReg);
-    return tReg;
-#endif
-}
-
 void spillCoreRegs(CompilationUnit* cUnit) {
   if (cUnit->numCoreSpills == 0) {
     return;
@@ -143,7 +126,6 @@
       LOG(FATAL) << "Bad opcode: " << op;
       break;
   }
-  DCHECK((EncodingMap[opcode].flags & IS_BINARY_OP) != 0);
   newLIR2(cUnit, opcode, rDest, threadOffset);
 }
 
diff --git a/src/compiler/codegen/x86/Assemble.cc b/src/compiler/codegen/x86/Assemble.cc
index f3f3957..d64db02 100644
--- a/src/compiler/codegen/x86/Assemble.cc
+++ b/src/compiler/codegen/x86/Assemble.cc
@@ -283,12 +283,13 @@
   EXT_0F_ENCODING_MAP(Movsx16, 0x00, 0xBF),
 #undef EXT_0F_ENCODING_MAP
 
-  { kX86Jcc,   kJcc,  IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0x70, 0, 0, 0, 0, 0 }, "Jcc", "!1c" },
-  { kX86Jmp,   kJmp,  IS_UNARY_OP  | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xE9, 0, 0, 0, 0, 0 }, "Jmp", "" },
-  { kX86CallR, kCall, IS_UNARY_OP  | IS_BRANCH,               { 0, 0, 0xE8, 0, 0, 0, 0, 0 }, "CallR", "" },
-  { kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH,               { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallM", "" },
-  { kX86CallA, kCall, IS_QUAD_OP   | IS_BRANCH,               { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallA", "" },
-  { kX86Ret,   kNullary,NO_OPERAND | IS_BRANCH,               { 0, 0, 0xC3, 0, 0, 0, 0, 0 }, "Ret", "" },
+  { kX86Jcc,   kJcc,  IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0,             0, 0x70, 0, 0, 0, 0, 0 }, "Jcc", "!1c" },
+  { kX86Jmp,   kJmp,  IS_UNARY_OP  | IS_BRANCH | NEEDS_FIXUP, { 0,             0, 0xE9, 0, 0, 0, 0, 0 }, "Jmp", "" },
+  { kX86CallR, kCall, IS_UNARY_OP  | IS_BRANCH,               { 0,             0, 0xE8, 0, 0, 0, 0, 0 }, "CallR", "!0r" },
+  { kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH | IS_LOAD,     { 0,             0, 0xFF, 0, 0, 2, 0, 0 }, "CallM", "[!0r+!1d]" },
+  { kX86CallA, kCall, IS_QUAD_OP   | IS_BRANCH | IS_LOAD,     { 0,             0, 0xFF, 0, 0, 2, 0, 0 }, "CallA", "[!0r+!1r<<!2d+!3d]" },
+  { kX86CallT, kCall, IS_UNARY_OP  | IS_BRANCH | IS_LOAD,     { THREAD_PREFIX, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallT", "fs:[!0d]" },
+  { kX86Ret,   kNullary,NO_OPERAND | IS_BRANCH,               { 0,             0, 0xC3, 0, 0, 0, 0, 0 }, "Ret", "" },
 };
 
 static size_t computeSize(X86EncodingMap* entry, int displacement, bool has_sib) {
@@ -413,6 +414,8 @@
           return computeSize(entry, lir->operands[1], false);
         case kX86CallA:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
           return computeSize(entry, lir->operands[3], true);
+        case kX86CallT:  // lir operands - 0: disp
+          return computeSize(entry, lir->operands[0], true);
         default:
           break;
       }
@@ -671,7 +674,8 @@
   }
 }
 
-void emitUnimplemented(CompilationUnit* cUnit, LIR* lir) {
+void emitUnimplemented(CompilationUnit* cUnit, const X86EncodingMap* entry, LIR* lir) {
+  UNIMPLEMENTED(WARNING) << "Unimplemented encoding for: " << entry->name;
   for (int i = 0; i < oatGetInsnSize(lir); ++i) {
     cUnit->codeBuffer.push_back(0xCC);  // push breakpoint instruction - int 3
   }
@@ -749,8 +753,7 @@
         emitRegImm(cUnit, entry, lir->operands[0], lir->operands[1]);
         break;
       default:
-        UNIMPLEMENTED(WARNING) << "Unimplemented encoding for: " << entry->name;
-        emitUnimplemented(cUnit, lir);
+        emitUnimplemented(cUnit, entry, lir);
         break;
     }
     CHECK_EQ(static_cast<size_t>(oatGetInsnSize(lir)),
diff --git a/src/compiler/codegen/x86/GenInvoke.cc b/src/compiler/codegen/x86/GenInvoke.cc
deleted file mode 100644
index e19afdd..0000000
--- a/src/compiler/codegen/x86/GenInvoke.cc
+++ /dev/null
@@ -1,528 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-namespace art {
-
-/*
- * This source files contains "gen" codegen routines that should
- * be applicable to most targets.  Only mid-level support utilities
- * and "op" calls may be used here.
- */
-
-
-/*
- * x86 targets will likely be different enough to need their own
- * invoke gen routies.
- */
-typedef int (*NextCallInsn)(CompilationUnit*, MIR*, int, uint32_t dexIdx,
-                            uint32_t methodIdx);
-/*
- * If there are any ins passed in registers that have not been promoted
- * to a callee-save register, flush them to the frame.  Perform initial
- * assignment of promoted arguments.
- */
-void flushIns(CompilationUnit* cUnit)
-{
-    if (cUnit->numIns == 0)
-        return;
-    int startVReg = cUnit->numDalvikRegisters - cUnit->numIns;
-    /*
-     * Arguments passed in registers should be flushed
-     * to their backing locations in the frame for now.
-     * Also, we need to do initial assignment for promoted
-     * arguments.  NOTE: an older version of dx had an issue
-     * in which it would reuse static method argument registers.
-     * This could result in the same Dalvik virtual register
-     * being promoted to both core and fp regs.  In those
-     * cases, copy argument to both.  This will be uncommon
-     * enough that it isn't worth attempting to optimize.
-     */
-    for (int i = 0; i < cUnit->numIns; i++) {
-        PromotionMap vMap = cUnit->promotionMap[startVReg + i];
-        if (i == 0 || i == 1) {
-            // If arriving in register
-            if (vMap.coreLocation == kLocPhysReg) {
-                opRegCopy(cUnit, vMap.coreReg, i == 0 ? rARG1 : rARG2);
-            }
-            if (vMap.fpLocation == kLocPhysReg) {
-                opRegCopy(cUnit, vMap.fpReg, i == 0 ? rARG1 : rARG2);
-            }
-            // Also put a copy in memory in case we're partially promoted
-            storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
-                          i == 0 ? rARG1 : rARG2, kWord);
-        } else {
-            // If arriving in frame & promoted
-            if (vMap.coreLocation == kLocPhysReg) {
-                loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
-                             vMap.coreReg);
-            }
-            if (vMap.fpLocation == kLocPhysReg) {
-                loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
-                             vMap.fpReg);
-            }
-        }
-    }
-}
-
-/*
- * Bit of a hack here - in leiu of a real scheduling pass,
- * emit the next instruction in static & direct invoke sequences.
- */
-int nextSDCallInsn(CompilationUnit* cUnit, MIR* mir,
-                   int state, uint32_t dexIdx, uint32_t unused)
-{
-    UNIMPLEMENTED(WARNING) << "nextSDCallInsn";
-    return 0;
-#if 0
-    switch(state) {
-        case 0:  // Get the current Method* [sets rARG0]
-            loadCurrMethodDirect(cUnit, rARG0);
-            break;
-        case 1:  // Get method->dex_cache_resolved_methods_
-            loadWordDisp(cUnit, rARG0,
-                Method::DexCacheResolvedMethodsOffset().Int32Value(),
-                rARG0);
-            break;
-        case 2:  // Grab target method*
-            loadWordDisp(cUnit, rARG0,
-                Array::DataOffset(sizeof(Object*)).Int32Value() + dexIdx * 4,
-                rARG0);
-            break;
-        case 3:  // Grab the code from the method*
-            loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
-                         rINVOKE_TGT);
-            break;
-        default:
-            return -1;
-    }
-    return state + 1;
-#endif
-}
-
-/*
- * Bit of a hack here - in leiu of a real scheduling pass,
- * emit the next instruction in a virtual invoke sequence.
- * We can use rLR as a temp prior to target address loading
- * Note also that we'll load the first argument ("this") into
- * rARG1 here rather than the standard loadArgRegs.
- */
-int nextVCallInsn(CompilationUnit* cUnit, MIR* mir,
-                  int state, uint32_t dexIdx, uint32_t methodIdx)
-{
-    UNIMPLEMENTED(WARNING) << "nextVCallInsn";
-    return 0;
-#if 0
-    RegLocation rlArg;
-    /*
-     * This is the fast path in which the target virtual method is
-     * fully resolved at compile time.
-     */
-    switch(state) {
-        case 0:  // Get "this" [set rARG1]
-            rlArg = oatGetSrc(cUnit, mir, 0);
-            loadValueDirectFixed(cUnit, rlArg, rARG1);
-            break;
-        case 1: // Is "this" null? [use rARG1]
-            genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
-            // get this->klass_ [use rARG1, set rINVOKE_TGT]
-            loadWordDisp(cUnit, rARG1, Object::ClassOffset().Int32Value(),
-                         rINVOKE_TGT);
-            break;
-        case 2: // Get this->klass_->vtable [usr rINVOKE_TGT, set rINVOKE_TGT]
-            loadWordDisp(cUnit, rINVOKE_TGT, Class::VTableOffset().Int32Value(),
-                         rINVOKE_TGT);
-            break;
-        case 3: // Get target method [use rINVOKE_TGT, set rARG0]
-            loadWordDisp(cUnit, rINVOKE_TGT, (methodIdx * 4) +
-                         Array::DataOffset(sizeof(Object*)).Int32Value(),
-                         rARG0);
-            break;
-        case 4: // Get the compiled code address [uses rARG0, sets rINVOKE_TGT]
-            loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
-                         rINVOKE_TGT);
-            break;
-        default:
-            return -1;
-    }
-    return state + 1;
-#endif
-}
-
-/*
- * Interleave launch code for INVOKE_SUPER.  See comments
- * for nextVCallIns.
- */
-int nextSuperCallInsn(CompilationUnit* cUnit, MIR* mir,
-                      int state, uint32_t dexIdx, uint32_t methodIdx)
-{
-    UNIMPLEMENTED(WARNING) << "nextSuperCallInsn";
-    return 0;
-#if 0
-    /*
-     * This is the fast path in which the target virtual method is
-     * fully resolved at compile time.  Note also that this path assumes
-     * that the check to verify that the target method index falls
-     * within the size of the super's vtable has been done at compile-time.
-     */
-    RegLocation rlArg;
-    switch(state) {
-        case 0: // Get current Method* [set rARG0]
-            loadCurrMethodDirect(cUnit, rARG0);
-            // Load "this" [set rARG1]
-            rlArg = oatGetSrc(cUnit, mir, 0);
-            loadValueDirectFixed(cUnit, rlArg, rARG1);
-            // Get method->declaring_class_ [use rARG0, set rINVOKE_TGT]
-            loadWordDisp(cUnit, rARG0,
-                         Method::DeclaringClassOffset().Int32Value(),
-                         rINVOKE_TGT);
-            // Is "this" null? [use rARG1]
-            genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
-            break;
-        case 1: // method->declaring_class_->super_class [use/set rINVOKE_TGT]
-            loadWordDisp(cUnit, rINVOKE_TGT,
-                         Class::SuperClassOffset().Int32Value(), rINVOKE_TGT);
-            break;
-        case 2: // Get ...->super_class_->vtable [u/s rINVOKE_TGT]
-            loadWordDisp(cUnit, rINVOKE_TGT,
-                         Class::VTableOffset().Int32Value(), rINVOKE_TGT);
-            break;
-        case 3: // Get target method [use rINVOKE_TGT, set rARG0]
-            loadWordDisp(cUnit, rINVOKE_TGT, (methodIdx * 4) +
-                         Array::DataOffset(sizeof(Object*)).Int32Value(),
-                         rARG0);
-            break;
-        case 4: // target compiled code address [uses rARG0, sets rINVOKE_TGT]
-            loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
-                         rINVOKE_TGT);
-            break;
-        default:
-            return -1;
-    }
-    return state + 1;
-#endif
-}
-
-int nextInvokeInsnSP(CompilationUnit* cUnit, MIR* mir, int trampoline,
-                     int state, uint32_t dexIdx, uint32_t methodIdx)
-{
-    UNIMPLEMENTED(WARNING) << "nextInvokeInsnSP";
-    return 0;
-#if 0
-    /*
-     * This handles the case in which the base method is not fully
-     * resolved at compile time, we bail to a runtime helper.
-     */
-    if (state == 0) {
-        // Load trampoline target
-        loadWordDisp(cUnit, rSELF, trampoline, rINVOKE_TGT);
-        // Load rARG0 with method index
-        loadConstant(cUnit, rARG0, dexIdx);
-        return 1;
-    }
-    return -1;
-#endif
-}
-
-int nextStaticCallInsnSP(CompilationUnit* cUnit, MIR* mir,
-                         int state, uint32_t dexIdx, uint32_t methodIdx)
-{
-  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeStaticTrampolineWithAccessCheck);
-  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
-}
-
-int nextDirectCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
-                         uint32_t dexIdx, uint32_t methodIdx)
-{
-  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeDirectTrampolineWithAccessCheck);
-  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
-}
-
-int nextSuperCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
-                        uint32_t dexIdx, uint32_t methodIdx)
-{
-  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeSuperTrampolineWithAccessCheck);
-  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
-}
-
-int nextVCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
-                    uint32_t dexIdx, uint32_t methodIdx)
-{
-  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeVirtualTrampolineWithAccessCheck);
-  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
-}
-
-/*
- * All invoke-interface calls bounce off of art_invoke_interface_trampoline,
- * which will locate the target and continue on via a tail call.
- */
-int nextInterfaceCallInsn(CompilationUnit* cUnit, MIR* mir, int state,
-                          uint32_t dexIdx, uint32_t unused)
-{
-  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampoline);
-  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
-}
-
-int nextInterfaceCallInsnWithAccessCheck(CompilationUnit* cUnit, MIR* mir,
-                                         int state, uint32_t dexIdx,
-                                         uint32_t unused)
-{
-  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampolineWithAccessCheck);
-  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
-}
-
-int loadArgRegs(CompilationUnit* cUnit, MIR* mir, DecodedInstruction* dInsn,
-                int callState, NextCallInsn nextCallInsn, uint32_t dexIdx,
-                uint32_t methodIdx, bool skipThis)
-{
-    UNIMPLEMENTED(WARNING) << "loadArgRegs";
-    return 0;
-#if 0
-    int nextReg = rARG1;
-    int nextArg = 0;
-    if (skipThis) {
-        nextReg++;
-        nextArg++;
-    }
-    for (; (nextReg <= rARG3) && (nextArg < mir->ssaRep->numUses); nextReg++) {
-        RegLocation rlArg = oatGetRawSrc(cUnit, mir, nextArg++);
-        rlArg = oatUpdateRawLoc(cUnit, rlArg);
-        if (rlArg.wide && (nextReg <= rARG2)) {
-            loadValueDirectWideFixed(cUnit, rlArg, nextReg, nextReg + 1);
-            nextReg++;
-            nextArg++;
-        } else {
-            rlArg.wide = false;
-            loadValueDirectFixed(cUnit, rlArg, nextReg);
-        }
-        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-    }
-    return callState;
-#endif
-}
-
-/*
- * Load up to 5 arguments, the first three of which will be in
- * rARG1 .. rARG3.  On entry rARG0 contains the current method pointer,
- * and as part of the load sequence, it must be replaced with
- * the target method pointer.  Note, this may also be called
- * for "range" variants if the number of arguments is 5 or fewer.
- */
-int genDalvikArgsNoRange(CompilationUnit* cUnit, MIR* mir,
-                         DecodedInstruction* dInsn, int callState,
-                         LIR** pcrLabel, NextCallInsn nextCallInsn,
-                         uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
-{
-    UNIMPLEMENTED(WARNING) << "genDalvikArgsNoRange";
-    return 0;
-#if 0
-    RegLocation rlArg;
-
-    /* If no arguments, just return */
-    if (dInsn->vA == 0)
-        return callState;
-
-    callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-
-    DCHECK_LE(dInsn->vA, 5U);
-    if (dInsn->vA > 3) {
-        uint32_t nextUse = 3;
-        //Detect special case of wide arg spanning arg3/arg4
-        RegLocation rlUse0 = oatGetRawSrc(cUnit, mir, 0);
-        RegLocation rlUse1 = oatGetRawSrc(cUnit, mir, 1);
-        RegLocation rlUse2 = oatGetRawSrc(cUnit, mir, 2);
-        if (((!rlUse0.wide && !rlUse1.wide) || rlUse0.wide) &&
-            rlUse2.wide) {
-            int reg;
-            // Wide spans, we need the 2nd half of uses[2].
-            rlArg = oatUpdateLocWide(cUnit, rlUse2);
-            if (rlArg.location == kLocPhysReg) {
-                reg = rlArg.highReg;
-            } else {
-                // rARG2 & rARG3 can safely be used here
-                reg = rARG3;
-                loadWordDisp(cUnit, rSP,
-                             oatSRegOffset(cUnit, rlArg.sRegLow) + 4, reg);
-                callState = nextCallInsn(cUnit, mir, callState, dexIdx,
-                                         methodIdx);
-            }
-            storeBaseDisp(cUnit, rSP, (nextUse + 1) * 4, reg, kWord);
-            storeBaseDisp(cUnit, rSP, 16 /* (3+1)*4 */, reg, kWord);
-            callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-            nextUse++;
-        }
-        // Loop through the rest
-        while (nextUse < dInsn->vA) {
-            int lowReg;
-            int highReg;
-            rlArg = oatGetRawSrc(cUnit, mir, nextUse);
-            rlArg = oatUpdateRawLoc(cUnit, rlArg);
-            if (rlArg.location == kLocPhysReg) {
-                lowReg = rlArg.lowReg;
-                highReg = rlArg.highReg;
-            } else {
-                lowReg = rARG2;
-                highReg = rARG3;
-                if (rlArg.wide) {
-                    loadValueDirectWideFixed(cUnit, rlArg, lowReg, highReg);
-                } else {
-                    loadValueDirectFixed(cUnit, rlArg, lowReg);
-                }
-                callState = nextCallInsn(cUnit, mir, callState, dexIdx,
-                                         methodIdx);
-            }
-            int outsOffset = (nextUse + 1) * 4;
-            if (rlArg.wide) {
-                storeBaseDispWide(cUnit, rSP, outsOffset, lowReg, highReg);
-                nextUse += 2;
-            } else {
-                storeWordDisp(cUnit, rSP, outsOffset, lowReg);
-                nextUse++;
-            }
-            callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-        }
-    }
-
-    callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
-                            dexIdx, methodIdx, skipThis);
-
-    if (pcrLabel) {
-        *pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
-    }
-    return callState;
-#endif
-}
-
-/*
- * May have 0+ arguments (also used for jumbo).  Note that
- * source virtual registers may be in physical registers, so may
- * need to be flushed to home location before copying.  This
- * applies to arg3 and above (see below).
- *
- * Two general strategies:
- *    If < 20 arguments
- *       Pass args 3-18 using vldm/vstm block copy
- *       Pass arg0, arg1 & arg2 in rARG1-rARG3
- *    If 20+ arguments
- *       Pass args arg19+ using memcpy block copy
- *       Pass arg0, arg1 & arg2 in rARG1-rARG3
- *
- */
-int genDalvikArgsRange(CompilationUnit* cUnit, MIR* mir,
-                       DecodedInstruction* dInsn, int callState,
-                       LIR** pcrLabel, NextCallInsn nextCallInsn,
-                       uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
-{
-    UNIMPLEMENTED(WARNING) << "genDalvikArgsRange";
-    return 0;
-#if 0
-    int firstArg = dInsn->vC;
-    int numArgs = dInsn->vA;
-
-    // If we can treat it as non-range (Jumbo ops will use range form)
-    if (numArgs <= 5)
-        return genDalvikArgsNoRange(cUnit, mir, dInsn, callState, pcrLabel,
-                                    nextCallInsn, dexIdx, methodIdx,
-                                    skipThis);
-    /*
-     * Make sure range list doesn't span the break between in normal
-     * Dalvik vRegs and the ins.
-     */
-    int highestArg = oatGetSrc(cUnit, mir, numArgs-1).sRegLow;
-    int boundaryReg = cUnit->numDalvikRegisters - cUnit->numIns;
-    if ((firstArg < boundaryReg) && (highestArg >= boundaryReg)) {
-        LOG(FATAL) << "Argument list spanned locals & args";
-    }
-
-    /*
-     * First load the non-register arguments.  Both forms expect all
-     * of the source arguments to be in their home frame location, so
-     * scan the sReg names and flush any that have been promoted to
-     * frame backing storage.
-     */
-    // Scan the rest of the args - if in physReg flush to memory
-    for (int nextArg = 0; nextArg < numArgs;) {
-        RegLocation loc = oatGetRawSrc(cUnit, mir, nextArg);
-        if (loc.wide) {
-            loc = oatUpdateLocWide(cUnit, loc);
-            if ((nextArg >= 2) && (loc.location == kLocPhysReg)) {
-                storeBaseDispWide(cUnit, rSP,
-                                  oatSRegOffset(cUnit, loc.sRegLow),
-                                  loc.lowReg, loc.highReg);
-            }
-            nextArg += 2;
-        } else {
-            loc = oatUpdateLoc(cUnit, loc);
-            if ((nextArg >= 3) && (loc.location == kLocPhysReg)) {
-                storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow),
-                              loc.lowReg, kWord);
-            }
-            nextArg++;
-        }
-    }
-
-    int startOffset = oatSRegOffset(cUnit,
-        cUnit->regLocation[mir->ssaRep->uses[3]].sRegLow);
-    int outsOffset = 4 /* Method* */ + (3 * 4);
-#if defined(TARGET_MIPS)
-    // Generate memcpy
-    opRegRegImm(cUnit, kOpAdd, rARG0, rSP, outsOffset);
-    opRegRegImm(cUnit, kOpAdd, rARG1, rSP, startOffset);
-    int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pMemcpy));
-    loadConstant(cUnit, rARG2, (numArgs - 3) * 4);
-    callRuntimeHelper(cUnit, rTgt);
-    // Restore Method*
-    loadCurrMethodDirect(cUnit, rARG0);
-#else
-    if (numArgs >= 20) {
-        // Generate memcpy
-        opRegRegImm(cUnit, kOpAdd, rARG0, rSP, outsOffset);
-        opRegRegImm(cUnit, kOpAdd, rARG1, rSP, startOffset);
-        int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pMemcpy));
-        loadConstant(cUnit, rARG2, (numArgs - 3) * 4);
-        callRuntimeHelper(cUnit, rTgt);
-        // Restore Method*
-        loadCurrMethodDirect(cUnit, rARG0);
-    } else {
-        // Use vldm/vstm pair using rARG3 as a temp
-        int regsLeft = std::min(numArgs - 3, 16);
-        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-        opRegRegImm(cUnit, kOpAdd, rARG3, rSP, startOffset);
-        LIR* ld = newLIR3(cUnit, kThumb2Vldms, rARG3, fr0, regsLeft);
-        //TUNING: loosen barrier
-        ld->defMask = ENCODE_ALL;
-        setMemRefType(ld, true /* isLoad */, kDalvikReg);
-        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-        opRegRegImm(cUnit, kOpAdd, rARG3, rSP, 4 /* Method* */ + (3 * 4));
-        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-        LIR* st = newLIR3(cUnit, kThumb2Vstms, rARG3, fr0, regsLeft);
-        setMemRefType(st, false /* isLoad */, kDalvikReg);
-        st->defMask = ENCODE_ALL;
-        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-    }
-#endif
-
-    callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
-                            dexIdx, methodIdx, skipThis);
-
-    callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-    if (pcrLabel) {
-        *pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
-    }
-    return callState;
-#endif
-}
-
-}  // namespace art
diff --git a/src/compiler/codegen/x86/X86/Factory.cc b/src/compiler/codegen/x86/X86/Factory.cc
index f3fc84a..96fa08a 100644
--- a/src/compiler/codegen/x86/X86/Factory.cc
+++ b/src/compiler/codegen/x86/X86/Factory.cc
@@ -92,41 +92,30 @@
  * 1) rDest is freshly returned from oatAllocTemp or
  * 2) The codegen is under fixed register usage
  */
-LIR *loadConstantNoClobber(CompilationUnit *cUnit, int rDest,
-                               int value)
-{
-    UNIMPLEMENTED(WARNING) << "loadConstantNoClobber";
-    return NULL;
-#if 0
-    LIR *res;
+LIR *loadConstantNoClobber(CompilationUnit *cUnit, int rDest, int value) {
+  LIR *res;
 
-    int rDestSave = rDest;
-    int isFpReg = FPREG(rDest);
-    if (isFpReg) {
-        DCHECK(SINGLEREG(rDest));
-        rDest = oatAllocTemp(cUnit);
-    }
+  int rDestSave = rDest;
+  int isFpReg = FPREG(rDest);
+  if (isFpReg) {
+    DCHECK(SINGLEREG(rDest));
+    rDest = oatAllocTemp(cUnit);
+  }
 
-    /* See if the value can be constructed cheaply */
-    if (value == 0) {
-        res = newLIR2(cUnit, kX86Move, rDest, r_ZERO);
-    } else if ((value > 0) && (value <= 65535)) {
-        res = newLIR3(cUnit, kX86Ori, rDest, r_ZERO, value);
-    } else if ((value < 0) && (value >= -32768)) {
-        res = newLIR3(cUnit, kX86Addiu, rDest, r_ZERO, value);
-    } else {
-        res = newLIR2(cUnit, kX86Lui, rDest, value>>16);
-        if (value & 0xffff)
-            newLIR3(cUnit, kX86Ori, rDest, rDest, value);
-    }
+  /* See if the value can be constructed cheaply */
+  if (value == 0) {
+    res = newLIR2(cUnit, kX86Xor32RR, rDest, rDest);
+  } else {
+    res = newLIR2(cUnit, kX86Mov32RI, rDest, value);
+  }
 
-    if (isFpReg) {
-        newLIR2(cUnit, kX86Mtc1, rDest, rDestSave);
-        oatFreeTemp(cUnit, rDest);
-    }
+  if (isFpReg) {
+    UNIMPLEMENTED(FATAL);
+    newLIR2(cUnit, kX86Mov32RR, rDest, rDestSave);
+    oatFreeTemp(cUnit, rDest);
+  }
 
-    return res;
-#endif
+  return res;
 }
 
 LIR* opBranchUnconditional(CompilationUnit *cUnit, OpKind op) {
@@ -311,6 +300,28 @@
   return opRegImm(cUnit, op, rDest, value);
 }
 
+LIR* opThreadMem(CompilationUnit* cUnit, OpKind op, int threadOffset) {
+  X86OpCode opcode = kX86Bkpt;
+  switch (op) {
+    case kOpBlx: opcode = kX86CallT;  break;
+    default:
+      LOG(FATAL) << "Bad opcode: " << op;
+      break;
+  }
+  return newLIR1(cUnit, opcode, threadOffset);
+}
+
+LIR* opMem(CompilationUnit* cUnit, OpKind op, int rBase, int disp) {
+  X86OpCode opcode = kX86Bkpt;
+  switch (op) {
+    case kOpBlx: opcode = kX86CallM;  break;
+    default:
+      LOG(FATAL) << "Bad opcode: " << op;
+      break;
+  }
+  return newLIR2(cUnit, opcode, rBase, disp);
+}
+
 LIR *loadConstantValueWide(CompilationUnit *cUnit, int rDestLo,
                                      int rDestHi, int valLo, int valHi)
 {
diff --git a/src/compiler/codegen/x86/X86/Gen.cc b/src/compiler/codegen/x86/X86/Gen.cc
index e7f18ca..5542317 100644
--- a/src/compiler/codegen/x86/X86/Gen.cc
+++ b/src/compiler/codegen/x86/X86/Gen.cc
@@ -447,9 +447,6 @@
   bool srcFP = FPREG(srcLo) && FPREG(srcHi);
   assert(FPREG(srcLo) == FPREG(srcHi));
   assert(FPREG(destLo) == FPREG(destHi));
-  LOG(INFO) << "RegCopyWide: destLo=" << destLo << " destHi=" << destHi
-      << " srcLo=" << srcLo << " srcHi=" << srcHi
-      << " dFP=" << destFP << " sFP=" << srcFP;
   if (destFP) {
     if (srcFP) {
       opRegCopy(cUnit, S2D(destLo, destHi), S2D(srcLo, srcHi));
diff --git a/src/compiler/codegen/x86/X86LIR.h b/src/compiler/codegen/x86/X86LIR.h
index c805333..a767ff8 100644
--- a/src/compiler/codegen/x86/X86LIR.h
+++ b/src/compiler/codegen/x86/X86LIR.h
@@ -232,6 +232,7 @@
 #define rARG2 rCX
 #define rRET0 rAX
 #define rRET1 rDX
+#define rINVOKE_TGT rAX
 
 #define isPseudoOpcode(opCode) ((int)(opCode) < 0)
 
@@ -442,6 +443,7 @@
     kX86CallM,  // call [base + disp]; lir operands - 0: base, 1: disp
     kX86CallA,  // call [base + index * scale + disp]
                 // lir operands - 0: base, 1: index, 2: scale, 3: disp
+    kX86CallT,  // call fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp
     kX86Ret,    // ret; no lir operands
     kX86Last
 };
diff --git a/src/compiler/codegen/x86/X86RallocUtil.cc b/src/compiler/codegen/x86/X86RallocUtil.cc
index ff5391d..466faa4 100644
--- a/src/compiler/codegen/x86/X86RallocUtil.cc
+++ b/src/compiler/codegen/x86/X86RallocUtil.cc
@@ -96,48 +96,10 @@
 /* Clobber all regs that might be used by an external C call */
 extern void oatClobberCalleeSave(CompilationUnit *cUnit)
 {
-    UNIMPLEMENTED(WARNING) << "oatClobberCalleeSave";
-#if 0
-    oatClobber(cUnit, r_ZERO);
-    oatClobber(cUnit, r_AT);
-    oatClobber(cUnit, r_V0);
-    oatClobber(cUnit, r_V1);
-    oatClobber(cUnit, r_A0);
-    oatClobber(cUnit, r_A1);
-    oatClobber(cUnit, r_A2);
-    oatClobber(cUnit, r_A3);
-    oatClobber(cUnit, r_T0);
-    oatClobber(cUnit, r_T1);
-    oatClobber(cUnit, r_T2);
-    oatClobber(cUnit, r_T3);
-    oatClobber(cUnit, r_T4);
-    oatClobber(cUnit, r_T5);
-    oatClobber(cUnit, r_T6);
-    oatClobber(cUnit, r_T7);
-    oatClobber(cUnit, r_T8);
-    oatClobber(cUnit, r_T9);
-    oatClobber(cUnit, r_K0);
-    oatClobber(cUnit, r_K1);
-    oatClobber(cUnit, r_GP);
-    oatClobber(cUnit, r_FP);
-    oatClobber(cUnit, r_RA);
-    oatClobber(cUnit, r_F0);
-    oatClobber(cUnit, r_F1);
-    oatClobber(cUnit, r_F2);
-    oatClobber(cUnit, r_F3);
-    oatClobber(cUnit, r_F4);
-    oatClobber(cUnit, r_F5);
-    oatClobber(cUnit, r_F6);
-    oatClobber(cUnit, r_F7);
-    oatClobber(cUnit, r_F8);
-    oatClobber(cUnit, r_F9);
-    oatClobber(cUnit, r_F10);
-    oatClobber(cUnit, r_F11);
-    oatClobber(cUnit, r_F12);
-    oatClobber(cUnit, r_F13);
-    oatClobber(cUnit, r_F14);
-    oatClobber(cUnit, r_F15);
-#endif
+    oatClobber(cUnit, rBX);
+    oatClobber(cUnit, rBP);
+    oatClobber(cUnit, rSI);
+    oatClobber(cUnit, rDI);
 }
 
 extern RegLocation oatGetReturnWideAlt(CompilationUnit* cUnit) {
@@ -170,25 +132,17 @@
 /* To be used when explicitly managing register use */
 extern void oatLockCallTemps(CompilationUnit* cUnit)
 {
-    UNIMPLEMENTED(WARNING) << "oatLockCallTemps";
-#if 0
     oatLockTemp(cUnit, rARG0);
     oatLockTemp(cUnit, rARG1);
     oatLockTemp(cUnit, rARG2);
-    oatLockTemp(cUnit, rARG3);
-#endif
 }
 
 /* To be used when explicitly managing register use */
 extern void oatFreeCallTemps(CompilationUnit* cUnit)
 {
-    UNIMPLEMENTED(WARNING) << "oatFreeCallTemps";
-#if 0
     oatFreeTemp(cUnit, rARG0);
     oatFreeTemp(cUnit, rARG1);
     oatFreeTemp(cUnit, rARG2);
-    oatFreeTemp(cUnit, rARG3);
-#endif
 }
 
 /* Convert an instruction to a NOP */
diff --git a/src/compiler/codegen/x86/x86/Codegen.cc b/src/compiler/codegen/x86/x86/Codegen.cc
index dd9217d..f8719d9 100644
--- a/src/compiler/codegen/x86/x86/Codegen.cc
+++ b/src/compiler/codegen/x86/x86/Codegen.cc
@@ -37,7 +37,7 @@
 /* Target independent gen routines */
 #include "../../GenCommon.cc"
 /* Shared invoke gen routines */
-#include "../GenInvoke.cc"
+#include "../../GenInvoke.cc"
 /* X86-specific factory utilities */
 #include "../ArchFactory.cc"