Various optimization fixes

Multiple problems surfaced when register promotion was enabled.  This
CL takes care of a few, but more remain.  The main problems dealt with
here are related to not having data types handy on invokes.  Solved by
intepreting the shorty of the target and updating the operand names
appropriately.

The other problem was a little nastier.  The codgen infrastructure wasn't
expecting wide results to overlap source operands (for example:

    add-long (v0,v1) = (v1,v2) + (v3,v4)

In the old world, the result pair would start with a fresh name
and temps.  In the new world, though, the register promotion mechanism
retains the mappings.  Not a difficult problem to solve, but I'll
need to very carefully examine the existing long op generators to
make sure they can handle overlaps.

Change-Id: I019607620f7a78cadc9e7c57f247806d0a68243d
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h
index 5a38c47..934139b 100644
--- a/src/compiler/CompilerIR.h
+++ b/src/compiler/CompilerIR.h
@@ -120,6 +120,8 @@
         const Method* calleeMethod;
         // Used by the inlined invoke to find the class and method pointers
         CallsiteInfo* callsiteInfo;
+        // Used to quickly locate all Phi opcodes
+        struct MIR* phiNext;
     } meta;
 } MIR;
 
@@ -223,6 +225,8 @@
     int* SSALastDefs;                   // length == method->registersSize
     ArenaBitVector* isConstantV;        // length == numSSAReg
     int* constantValues;                // length == numSSAReg
+    int* phiAliasMap;                   // length == numSSAReg
+    MIR* phiList;
 
     /* Map SSA names to location */
     RegLocation* regLocation;
diff --git a/src/compiler/Dataflow.cc b/src/compiler/Dataflow.cc
index 97a5e7b..65aa6d8 100644
--- a/src/compiler/Dataflow.cc
+++ b/src/compiler/Dataflow.cc
@@ -1954,9 +1954,9 @@
     int i;
 
     mir->ssaRep->numUses = numUses;
-    mir->ssaRep->uses = (int *)oatNew(sizeof(int) * numUses, false);
+    mir->ssaRep->uses = (int *)oatNew(sizeof(int) * numUses, true);
     // NOTE: will be filled in during type & size inference pass
-    mir->ssaRep->fpUse = (bool *)oatNew(sizeof(bool) * numUses, false);
+    mir->ssaRep->fpUse = (bool *)oatNew(sizeof(bool) * numUses, true);
 
     for (i = 0; i < numUses; i++) {
         handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->arg[i], i);
@@ -1971,9 +1971,9 @@
     int i;
 
     mir->ssaRep->numUses = numUses;
-    mir->ssaRep->uses = (int *)oatNew(sizeof(int) * numUses, false);
+    mir->ssaRep->uses = (int *)oatNew(sizeof(int) * numUses, true);
     // NOTE: will be filled in during type & size inference pass
-    mir->ssaRep->fpUse = (bool *)oatNew(sizeof(bool) * numUses, false);
+    mir->ssaRep->fpUse = (bool *)oatNew(sizeof(bool) * numUses, true);
 
     for (i = 0; i < numUses; i++) {
         handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vC+i, i);
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index a829f13..eef6888 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -702,6 +702,7 @@
     cUnit.printMe = compiler.IsVerbose();
     cUnit.printMeVerbose = compiler.IsVerbose();
     cUnit.disableOpt = 0 |
+         (1 << kTrackLiveTemps) |
          (1 << kLoadStoreElimination) |
          (1 << kLoadHoisting) |
          (1 << kSuppressLoads) |
diff --git a/src/compiler/Ralloc.cc b/src/compiler/Ralloc.cc
index 7111f6d..e7844b6 100644
--- a/src/compiler/Ralloc.cc
+++ b/src/compiler/Ralloc.cc
@@ -28,6 +28,26 @@
     return change;
 }
 
+STATIC bool remapNames(CompilationUnit* cUnit, BasicBlock* bb)
+{
+    if (bb->blockType != kDalvikByteCode && bb->blockType != kEntryBlock &&
+        bb->blockType != kExitBlock)
+        return false;
+
+    for (MIR* mir = bb->firstMIRInsn; mir; mir = mir->next) {
+        SSARepresentation *ssaRep = mir->ssaRep;
+        if (ssaRep) {
+            for (int i = 0; i < ssaRep->numUses; i++) {
+                ssaRep->uses[i] = cUnit->phiAliasMap[ssaRep->uses[i]];
+            }
+            for (int i = 0; i < ssaRep->numDefs; i++) {
+                ssaRep->defs[i] = cUnit->phiAliasMap[ssaRep->defs[i]];
+            }
+        }
+    }
+    return false;
+}
+
 /*
  * Infer types and sizes.  We don't need to track change on sizes,
  * as it doesn't propagate.  We're guaranteed at least one pass through
@@ -191,6 +211,11 @@
         }
     }
 
+    /* Remap names */
+    oatDataFlowAnalysisDispatcher(cUnit, remapNames,
+                                  kPreOrderDFSTraversal,
+                                  false /* isIterative */);
+
     /* Do type & size inference pass */
     oatDataFlowAnalysisDispatcher(cUnit, inferTypeAndSize,
                                   kPreOrderDFSTraversal,
diff --git a/src/compiler/SSATransformation.cc b/src/compiler/SSATransformation.cc
index 35f43ac..d35a8c7 100644
--- a/src/compiler/SSATransformation.cc
+++ b/src/compiler/SSATransformation.cc
@@ -488,6 +488,8 @@
             phi->dalvikInsn.opcode = (Opcode)kMirOpPhi;
             phi->dalvikInsn.vA = dalvikReg;
             phi->offset = phiBB->startOffset;
+            phi->meta.phiNext = cUnit->phiList;
+            cUnit->phiList = phi;
             oatPrependMIR(phiBB, phi);
         }
     }
diff --git a/src/compiler/codegen/Optimizer.h b/src/compiler/codegen/Optimizer.h
index 23f0f17..c946e93 100644
--- a/src/compiler/codegen/Optimizer.h
+++ b/src/compiler/codegen/Optimizer.h
@@ -29,6 +29,7 @@
     kSuppressLoads,
     kNullCheckElimination,
     kPromoteRegs,
+    kTrackLiveTemps,
 };
 
 /* Forward declarations */
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
index e436eea..5308f7c 100644
--- a/src/compiler/codegen/arm/ArmLIR.h
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -20,6 +20,9 @@
 #include "../../Dalvik.h"
 #include "../../CompilerInternals.h"
 
+// Set to 1 to measure cost of suspend check
+#define NO_SUSPEND 0
+
 /*
  * Runtime register usage conventions.
  *
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
index 0c99e18..0a5fc7e 100644
--- a/src/compiler/codegen/arm/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc
@@ -198,6 +198,7 @@
         loadWordDisp(cUnit, rBase, art::Array::DataOffset().Int32Value() +
                       sizeof(int32_t*)* typeIdx, rBase);
         // TUNING: fast path should fall through
+        // TUNING: Try a conditional skip here, might be faster
         ArmLIR* branchOver = genCmpImmBranch(cUnit, kArmCondNe, rBase, 0);
         loadWordDisp(cUnit, rSELF,
                      OFFSETOF_MEMBER(Thread, pInitializeStaticStorage), rLR);
@@ -536,19 +537,29 @@
     return state + 1;
 }
 
-/* Load up to 3 arguments in r1..r3 */
 STATIC int loadArgRegs(CompilationUnit* cUnit, MIR* mir,
-                       DecodedInstruction* dInsn, int callState,
-                       int *args, NextCallInsn nextCallInsn, ArmLIR* rollback)
+                          DecodedInstruction* dInsn, int callState,
+                          NextCallInsn nextCallInsn, ArmLIR* rollback,
+                          bool skipThis)
 {
-    for (int i = 0; i < 3; i++) {
-        if (args[i] != INVALID_REG) {
-            // Arguments are treated as a series of untyped 32-bit values.
-            RegLocation rlArg = oatGetRawSrc(cUnit, mir, i);
+    int nextReg = r1;
+    int nextArg = 0;
+    if (skipThis) {
+        nextReg++;
+        nextArg++;
+    }
+    for (; (nextReg <= r3) && (nextArg < mir->ssaRep->numUses); nextReg++) {
+        RegLocation rlArg = oatGetRawSrc(cUnit, mir, nextArg++);
+        rlArg = oatUpdateRawLoc(cUnit, rlArg);
+        if (rlArg.wide && (nextReg <= r2)) {
+            loadValueDirectWideFixed(cUnit, rlArg, nextReg, nextReg + 1);
+            nextReg++;
+            nextArg++;
+        } else {
             rlArg.wide = false;
-            loadValueDirectFixed(cUnit, rlArg, r1 + i);
-            callState = nextCallInsn(cUnit, mir, dInsn, callState, rollback);
+            loadValueDirectFixed(cUnit, rlArg, nextReg);
         }
+        callState = nextCallInsn(cUnit, mir, dInsn, callState, rollback);
     }
     return callState;
 }
@@ -724,7 +735,6 @@
                                 bool skipThis)
 {
     RegLocation rlArg;
-    int registerArgs[3];
 
     /* If no arguments, just return */
     if (dInsn->vA == 0)
@@ -732,40 +742,66 @@
 
     callState = nextCallInsn(cUnit, mir, dInsn, callState, rollback);
 
-    /*
-     * Load frame arguments arg4 & arg5 first. Coded a little odd to
-     * pre-schedule the method pointer target.
-     */
-    for (unsigned int i=3; i < dInsn->vA; i++) {
-        int reg;
-        // Treating args as untyped 32-bit chunks
-        rlArg = oatGetRawSrc(cUnit, mir, i);
-        rlArg.wide = false;
-        rlArg = oatUpdateLoc(cUnit, rlArg);
-        if (rlArg.location == kLocPhysReg) {
-            reg = rlArg.lowReg;
-        } else {
-            // r3 is the last arg register loaded, so can safely be used here
-            reg = r3;
-            loadValueDirectFixed(cUnit, rlArg, reg);
+    DCHECK_LE(dInsn->vA, 5U);
+    if (dInsn->vA > 3) {
+        uint32_t nextUse = 3;
+        //Detect special case of wide arg spanning arg3/arg4
+        RegLocation rlUse0 = oatGetRawSrc(cUnit, mir, 0);
+        RegLocation rlUse1 = oatGetRawSrc(cUnit, mir, 1);
+        RegLocation rlUse2 = oatGetRawSrc(cUnit, mir, 2);
+        if (((!rlUse0.wide && !rlUse1.wide) || rlUse0.wide) &&
+            rlUse2.wide) {
+            int reg;
+            // Wide spans, we need the 2nd half of uses[2].
+            rlArg = oatUpdateLocWide(cUnit, rlUse2);
+            if (rlArg.location == kLocPhysReg) {
+                reg = rlArg.highReg;
+            } else {
+                // r2 & r3 can safely be used here
+                reg = r3;
+                loadWordDisp(cUnit, rSP, rlArg.spOffset + 4, reg);
+                callState = nextCallInsn(cUnit, mir, dInsn, callState,
+                                         rollback);
+            }
+            storeBaseDisp(cUnit, rSP, (nextUse + 1) * 4, reg, kWord);
+            storeBaseDisp(cUnit, rSP, 16 /* (3+1)*4 */, reg, kWord);
+            callState = nextCallInsn(cUnit, mir, dInsn, callState, rollback);
+            nextUse++;
+        }
+        // Loop through the rest
+        while (nextUse < dInsn->vA) {
+            int lowReg;
+            int highReg;
+            rlArg = oatGetRawSrc(cUnit, mir, nextUse);
+            rlArg = oatUpdateRawLoc(cUnit, rlArg);
+            if (rlArg.location == kLocPhysReg) {
+                lowReg = rlArg.lowReg;
+                highReg = rlArg.highReg;
+            } else {
+                lowReg = r2;
+                highReg = r3;
+                if (rlArg.wide) {
+                    loadValueDirectWideFixed(cUnit, rlArg, lowReg, highReg);
+                } else {
+                    loadValueDirectFixed(cUnit, rlArg, lowReg);
+                }
+                callState = nextCallInsn(cUnit, mir, dInsn, callState,
+                                         rollback);
+            }
+            int outsOffset = (nextUse + 1) * 4;
+            if (rlArg.wide) {
+                storeBaseDispWide(cUnit, rSP, outsOffset, lowReg, highReg);
+                nextUse += 2;
+            } else {
+                storeWordDisp(cUnit, rSP, outsOffset, lowReg);
+                nextUse++;
+            }
             callState = nextCallInsn(cUnit, mir, dInsn, callState, rollback);
         }
-        storeBaseDisp(cUnit, rSP, (i + 1) * 4, reg, kWord);
-        callState = nextCallInsn(cUnit, mir, dInsn, callState, rollback);
     }
 
-    /* Load register arguments r1..r3 */
-    for (unsigned int i = 0; i < 3; i++) {
-        if (i < dInsn->vA)
-            registerArgs[i] = (isRange) ? dInsn->vC + i : i;
-        else
-            registerArgs[i] = INVALID_REG;
-    }
-    if (skipThis) {
-        registerArgs[0] = INVALID_REG;
-    }
-    callState = loadArgRegs(cUnit, mir, dInsn, callState, registerArgs,
-                            nextCallInsn, rollback);
+    callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
+                            rollback, skipThis);
 
     //TODO: better to move this into CallInsn lists
     // Load direct & need a "this" null check?
@@ -797,7 +833,6 @@
 {
     int firstArg = dInsn->vC;
     int numArgs = dInsn->vA;
-    int registerArgs[3];
 
     // If we can treat it as non-range (Jumbo ops will use range form)
     if (numArgs <= 5)
@@ -820,23 +855,21 @@
      * frame backing storage.
      */
     // Scan the rest of the args - if in physReg flush to memory
-    for (int i = 3; i < numArgs; i++) {
-        RegLocation loc = oatGetRawSrc(cUnit, mir, i);
+    for (int nextArg = 0; nextArg < numArgs;) {
+        RegLocation loc = oatGetRawSrc(cUnit, mir, nextArg);
         if (loc.wide) {
             loc = oatUpdateLocWide(cUnit, loc);
-            if (loc.location == kLocPhysReg) {  // TUNING: if dirty?
+            if ((nextArg >= 2) && (loc.location == kLocPhysReg)) {
                 storeBaseDispWide(cUnit, rSP, loc.spOffset, loc.lowReg,
                                   loc.highReg);
-                callState = nextCallInsn(cUnit, mir, dInsn, callState,
-                                         rollback);
             }
+            nextArg += 2;
         } else {
             loc = oatUpdateLoc(cUnit, loc);
-            if (loc.location == kLocPhysReg) {  // TUNING: if dirty?
+            if ((nextArg >= 3) && (loc.location == kLocPhysReg)) {
                 storeBaseDisp(cUnit, rSP, loc.spOffset, loc.lowReg, kWord);
-                callState = nextCallInsn(cUnit, mir, dInsn, callState,
-                                         rollback);
             }
+            nextArg++;
         }
     }
 
@@ -869,18 +902,8 @@
         callState = nextCallInsn(cUnit, mir, dInsn, callState, rollback);
     }
 
-    // Handle the 1st 3 in r1, r2 & r3
-    for (unsigned int i = 0; i < 3; i++) {
-       if (i < dInsn->vA)
-            registerArgs[i] = dInsn->vC + i;
-        else
-            registerArgs[i] = INVALID_REG;
-    }
-    if (skipThis) {
-        registerArgs[0] = INVALID_REG;
-    }
-    callState = loadArgRegs(cUnit, mir, dInsn, callState, registerArgs,
-                            nextCallInsn, rollback);
+    callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
+                            rollback, skipThis);
 
     callState = nextCallInsn(cUnit, mir, dInsn, callState, rollback);
     return callState;
@@ -907,6 +930,7 @@
     ArmLIR* nullCk;
     ArmLIR** pNullCk = direct ? &nullCk : NULL;
     NextCallInsn nextCallInsn = nextSDCallInsn;
+    oatFlushAllRegs(cUnit);    /* Everything to home location */
 
     // Explicit register usage
     oatLockCallTemps(cUnit);
@@ -937,6 +961,7 @@
     DecodedInstruction* dInsn = &mir->dalvikInsn;
     int callState = 0;
     ArmLIR* nullCk;
+    oatFlushAllRegs(cUnit);    /* Everything to home location */
 
     // Explicit register usage
     oatLockCallTemps(cUnit);
@@ -963,12 +988,12 @@
 {
     DecodedInstruction* dInsn = &mir->dalvikInsn;
     int callState = 0;
-    ArmLIR* nullCk;
     ArmLIR* rollback;
     art::ClassLinker* class_linker = art::Runtime::Current()->GetClassLinker();
     Method* baseMethod = class_linker->ResolveMethod(dInsn->vB, cUnit->method, false);
     NextCallInsn nextCallInsn;
     bool fastPath = true;
+    oatFlushAllRegs(cUnit);    /* Everything to home location */
 
     // Explicit register usage
     oatLockCallTemps(cUnit);
@@ -996,10 +1021,10 @@
         rollback->defMask = -1;
     }
     if (mir->dalvikInsn.opcode == OP_INVOKE_SUPER)
-        callState = genDalvikArgsNoRange(cUnit, mir, dInsn, callState, &nullCk,
+        callState = genDalvikArgsNoRange(cUnit, mir, dInsn, callState, NULL,
                                          false, nextCallInsn, rollback, true);
     else
-        callState = genDalvikArgsRange(cUnit, mir, dInsn, callState, &nullCk,
+        callState = genDalvikArgsRange(cUnit, mir, dInsn, callState, NULL,
                                        nextCallInsn, rollback, true);
     // Finish up any of the call sequence not interleaved in arg loading
     while (callState >= 0) {
@@ -1015,11 +1040,11 @@
 {
     DecodedInstruction* dInsn = &mir->dalvikInsn;
     int callState = 0;
-    ArmLIR* nullCk;
     ArmLIR* rollback;
     art::ClassLinker* class_linker = art::Runtime::Current()->GetClassLinker();
     Method* method = class_linker->ResolveMethod(dInsn->vB, cUnit->method, false);
     NextCallInsn nextCallInsn;
+    oatFlushAllRegs(cUnit);    /* Everything to home location */
 
     // Explicit register usage
     oatLockCallTemps(cUnit);
@@ -1035,10 +1060,10 @@
         rollback = NULL;
     }
     if (mir->dalvikInsn.opcode == OP_INVOKE_VIRTUAL)
-        callState = genDalvikArgsNoRange(cUnit, mir, dInsn, callState, &nullCk,
+        callState = genDalvikArgsNoRange(cUnit, mir, dInsn, callState, NULL,
                                          false, nextCallInsn, rollback, true);
     else
-        callState = genDalvikArgsRange(cUnit, mir, dInsn, callState, &nullCk,
+        callState = genDalvikArgsRange(cUnit, mir, dInsn, callState, NULL,
                                        nextCallInsn, rollback, true);
     // Finish up any of the call sequence not interleaved in arg loading
     while (callState >= 0) {
@@ -1109,6 +1134,7 @@
             break;
 
         case OP_RETURN_VOID:
+            genSuspendPoll(cUnit, mir);
             break;
 
         case OP_RETURN:
@@ -1889,7 +1915,9 @@
     for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
 
         oatResetRegPool(cUnit);
-        oatClobberAllRegs(cUnit);
+        if (cUnit->disableOpt & (1 << kTrackLiveTemps)) {
+            oatClobberAllRegs(cUnit);
+        }
 
         if (cUnit->disableOpt & (1 << kSuppressLoads)) {
             oatResetDefTracking(cUnit);
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 43b8ddc..c247fe7 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -828,9 +828,22 @@
     rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
     rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
     rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-    opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
-    opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg,
-                rlSrc2.highReg);
+    // The longs may overlap - use intermediate temp if so
+    if (rlResult.lowReg == rlSrc1.highReg) {
+        //FIXME: review all long arithmetic ops - there may be more of these
+        int tReg = oatAllocTemp(cUnit);
+        genRegCopy(cUnit, tReg, rlSrc1.highReg);
+        opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg,
+                    rlSrc2.lowReg);
+        opRegRegReg(cUnit, secondOp, rlResult.highReg, tReg,
+                    rlSrc2.highReg);
+        oatFreeTemp(cUnit, tReg);
+    } else {
+        opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg,
+                    rlSrc2.lowReg);
+        opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg,
+                    rlSrc2.highReg);
+    }
     /*
      * NOTE: If rlDest refers to a frame variable in a large frame, the
      * following storeValueWide might need to allocate a temp register.
@@ -864,6 +877,10 @@
     oatInitPool(pool->FPRegs, fpRegs, pool->numFPRegs);
     // Keep special registers from being allocated
     for (int i = 0; i < numReserved; i++) {
+        if (NO_SUSPEND && (reservedRegs[i] == rSUSPEND)) {
+            //To measure cost of suspend check
+            continue;
+        }
         oatMarkInUse(cUnit, reservedRegs[i]);
     }
     // Mark temp regs - all others not in use can be used for promotion
@@ -873,6 +890,22 @@
     for (int i = 0; i < numFPTemps; i++) {
         oatMarkTemp(cUnit, fpTemps[i]);
     }
+    // Construct the alias map.
+    cUnit->phiAliasMap = (int*)oatNew(cUnit->numSSARegs *
+                                      sizeof(cUnit->phiAliasMap[0]), false);
+    for (int i = 0; i < cUnit->numSSARegs; i++) {
+        cUnit->phiAliasMap[i] = i;
+    }
+    for (MIR* phi = cUnit->phiList; phi; phi = phi->meta.phiNext) {
+        int defReg = phi->ssaRep->defs[0];
+        for (int i = 0; i < phi->ssaRep->numUses; i++) {
+           for (int j = 0; j < cUnit->numSSARegs; j++) {
+               if (cUnit->phiAliasMap[j] == phi->ssaRep->uses[i]) {
+                   cUnit->phiAliasMap[j] = defReg;
+               }
+           }
+        }
+    }
 }
 
 /*
@@ -1674,7 +1707,7 @@
 /* Check if we need to check for pending suspend request */
 STATIC void genSuspendTest(CompilationUnit* cUnit, MIR* mir)
 {
-    if (mir->optimizationFlags & MIR_IGNORE_SUSPEND_CHECK) {
+    if (NO_SUSPEND || mir->optimizationFlags & MIR_IGNORE_SUSPEND_CHECK) {
         return;
     }
     newLIR2(cUnit, kThumbSubRI8, rSUSPEND, 1);
@@ -1693,7 +1726,7 @@
 /* Check for pending suspend request.  */
 STATIC void genSuspendPoll(CompilationUnit* cUnit, MIR* mir)
 {
-    if (mir->optimizationFlags & MIR_IGNORE_SUSPEND_CHECK) {
+    if (NO_SUSPEND || mir->optimizationFlags & MIR_IGNORE_SUSPEND_CHECK) {
         return;
     }
     oatLockCallTemps(cUnit);   // Explicit register usage
diff --git a/src/dex_verifier.cc b/src/dex_verifier.cc
index ee81543..329508f 100644
--- a/src/dex_verifier.cc
+++ b/src/dex_verifier.cc
@@ -4364,6 +4364,10 @@
 
   if (res_class == NULL) {
     //*failure = VERIFY_ERROR_NO_CLASS;
+#if 1
+    // FIXME - is this correct?  Inserted as a workaround?
+    Thread::Current()->ClearException();
+#endif
     LOG(ERROR) << "VFY: can't find class with index 0x" << std::hex << class_idx << std::dec;
     return NULL;
   }