Merge "Fixed test for static final fields that don't use <clinit>." into dalvik-dev
diff --git a/src/compiler/CompilerUtility.h b/src/compiler/CompilerUtility.h
index 1d969e7..c0dcaf7 100644
--- a/src/compiler/CompilerUtility.h
+++ b/src/compiler/CompilerUtility.h
@@ -110,5 +110,6 @@
                            const ArenaBitVector* bv, int length);
 void oatGetBlockName(struct BasicBlock* bb, char* name);
 const char* oatGetShortyFromTargetIdx(CompilationUnit*, int);
+void oatDumpRegLocTable(struct RegLocation*, int);
 
 #endif  // ART_SRC_COMPILER_COMPILER_UTILITY_H_
diff --git a/src/compiler/codegen/Ralloc.h b/src/compiler/codegen/Ralloc.h
index e2cb1ce..e87da88 100644
--- a/src/compiler/codegen/Ralloc.h
+++ b/src/compiler/codegen/Ralloc.h
@@ -137,7 +137,7 @@
 extern RegLocation oatGetReturnWide(CompilationUnit* cUnit);
 
 /* Clobber all regs that might be used by an external C call */
-extern void oatClobberCallRegs(CompilationUnit* cUnit);
+extern void oatClobberCalleeSave(CompilationUnit* cUnit);
 
 extern RegisterInfo *oatIsTemp(CompilationUnit* cUnit, int reg);
 
@@ -232,4 +232,7 @@
 
 extern void oatDoPromotion(CompilationUnit* cUnit);
 extern int oatVRegOffset(CompilationUnit* cUnit, int reg);
+extern void oatDumpCoreRegPool(CompilationUnit* cUint);
+extern void oatDumpFPRegPool(CompilationUnit* cUint);
+extern bool oatCheckCorePoolSanity(CompilationUnit* cUnit);
 #endif // ART_SRC_COMPILER_RALLOC_H_
diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc
index 69b98d4..47d5ac0 100644
--- a/src/compiler/codegen/RallocUtil.cc
+++ b/src/compiler/codegen/RallocUtil.cc
@@ -87,6 +87,16 @@
     LOG(INFO) << "================================================";
 }
 
+void oatDumpCoreRegPool(CompilationUnit* cUnit)
+{
+    dumpRegPool(cUnit->regPool->coreRegs, cUnit->regPool->numCoreRegs);
+}
+
+void oatDumpFpRegPool(CompilationUnit* cUnit)
+{
+    dumpRegPool(cUnit->regPool->FPRegs, cUnit->regPool->numFPRegs);
+}
+
 /* Get info for a reg. */
 STATIC RegisterInfo* getRegInfo(CompilationUnit* cUnit, int reg)
 {
@@ -154,12 +164,8 @@
     for (i=0; i< numRegs; i++) {
         if (p[i].reg == reg) {
             if (p[i].isTemp) {
-                if (p[i].isTemp && p[i].live && p[i].dirty) {
-                    if (p[i].pair) {
-                        oatFlushRegWide(cUnit, p[i].reg, p[i].partner);
-                    } else {
-                        oatFlushReg(cUnit, p[i].reg);
-                    }
+                if (p[i].live && p[i].dirty) {
+                    LOG(FATAL) << "Live & dirty temp in clobber";
                 }
                 p[i].live = false;
                 p[i].sReg = INVALID_SREG;
@@ -376,6 +382,7 @@
         next++;
     }
     if (required) {
+        oatCodegenDump(cUnit);
         dumpRegPool(cUnit->regPool->coreRegs,
                     cUnit->regPool->numCoreRegs);
         LOG(FATAL) << "No free temp registers";
@@ -427,7 +434,7 @@
         }
         next += 2;
     }
-    LOG(FATAL) << "No free temp registers";
+    LOG(FATAL) << "No free temp registers (pair)";
     return -1;
 }
 
@@ -605,6 +612,7 @@
         DCHECK_EQ(sReg1, sReg2);
         for (p = start; ;p = p->next) {
             ((ArmLIR *)p)->flags.isNop = true;
+            ((ArmLIR *)p)->flags.squashed = true;
             if (p == finish)
                 break;
         }
@@ -669,8 +677,8 @@
 extern void oatResetDefLoc(CompilationUnit* cUnit, RegLocation rl)
 {
     DCHECK(!rl.wide);
-    if (!(cUnit->disableOpt & (1 << kSuppressLoads))) {
-        RegisterInfo* p = getRegInfo(cUnit, rl.lowReg);
+    RegisterInfo* p = oatIsTemp(cUnit, rl.lowReg);
+    if (p && !(cUnit->disableOpt & (1 << kSuppressLoads))) {
         DCHECK(!p->pair);
         nullifyRange(cUnit, p->defStart, p->defEnd,
                      p->sReg, rl.sRegLow);
@@ -681,11 +689,15 @@
 extern void oatResetDefLocWide(CompilationUnit* cUnit, RegLocation rl)
 {
     DCHECK(rl.wide);
-    if (!(cUnit->disableOpt & (1 << kSuppressLoads))) {
-        RegisterInfo* p = getRegInfo(cUnit, rl.lowReg);
-        DCHECK(p->pair);
-        nullifyRange(cUnit, p->defStart, p->defEnd,
-                     p->sReg, rl.sRegLow);
+    RegisterInfo* pLow = oatIsTemp(cUnit, rl.lowReg);
+    RegisterInfo* pHigh = oatIsTemp(cUnit, rl.highReg);
+    if (pLow && !(cUnit->disableOpt & (1 << kSuppressLoads))) {
+        DCHECK(pLow->pair);
+        nullifyRange(cUnit, pLow->defStart, pLow->defEnd,
+                     pLow->sReg, rl.sRegLow);
+    }
+    if (pHigh && !(cUnit->disableOpt & (1 << kSuppressLoads))) {
+        DCHECK(pHigh->pair);
     }
     oatResetDef(cUnit, rl.lowReg);
     oatResetDef(cUnit, rl.highReg);
@@ -865,6 +877,7 @@
 extern RegLocation oatUpdateLoc(CompilationUnit* cUnit, RegLocation loc)
 {
     DCHECK(!loc.wide);
+    DCHECK(oatCheckCorePoolSanity(cUnit));
     if (loc.location == kLocDalvikFrame) {
         RegisterInfo* infoLo = allocLive(cUnit, loc.sRegLow, kAnyReg);
         if (infoLo) {
@@ -881,11 +894,39 @@
     return loc;
 }
 
+bool oatCheckCorePoolSanity(CompilationUnit* cUnit)
+{
+   for (static int i = 0; i < cUnit->regPool->numCoreRegs; i++) {
+       if (cUnit->regPool->coreRegs[i].pair) {
+           static int myReg = cUnit->regPool->coreRegs[i].reg;
+           static int mySreg = cUnit->regPool->coreRegs[i].sReg;
+           static int partnerReg = cUnit->regPool->coreRegs[i].partner;
+           static RegisterInfo* partner = getRegInfo(cUnit, partnerReg);
+           DCHECK(partner != NULL);
+           DCHECK(partner->pair);
+           DCHECK_EQ(myReg, partner->partner);
+           static int partnerSreg = partner->sReg;
+           if (mySreg == INVALID_SREG) {
+               DCHECK_EQ(partnerSreg, INVALID_SREG);
+           } else {
+               int diff = mySreg - partnerSreg;
+               DCHECK((diff == -1) || (diff == 1));
+           }
+       }
+       if (!cUnit->regPool->coreRegs[i].live) {
+           DCHECK(cUnit->regPool->coreRegs[i].defStart == NULL);
+           DCHECK(cUnit->regPool->coreRegs[i].defEnd == NULL);
+       }
+   }
+   return true;
+}
+
 /* see comments for updateLoc */
 extern RegLocation oatUpdateLocWide(CompilationUnit* cUnit,
                                     RegLocation loc)
 {
     DCHECK(loc.wide);
+    DCHECK(oatCheckCorePoolSanity(cUnit));
     if (loc.location == kLocDalvikFrame) {
         // Are the dalvik regs already live in physical registers?
         RegisterInfo* infoLo = allocLive(cUnit, loc.sRegLow, kAnyReg);
diff --git a/src/compiler/codegen/arm/ArchUtility.cc b/src/compiler/codegen/arm/ArchUtility.cc
index be1ab1e..3ceffae 100644
--- a/src/compiler/codegen/arm/ArchUtility.cc
+++ b/src/compiler/codegen/arm/ArchUtility.cc
@@ -306,8 +306,6 @@
 void oatDumpLIRInsn(CompilationUnit* cUnit, LIR* arg, unsigned char* baseAddr)
 {
     ArmLIR* lir = (ArmLIR*) arg;
-    if (lir->flags.isNop)
-        return;
     int offset = lir->generic.offset;
     int dest = lir->operands[0];
     const bool dumpNop = false;
@@ -374,8 +372,10 @@
                 buildInsnString(EncodingMap[lir->opcode].fmt, lir, opOperands,
                                 baseAddr, 256);
                 char tBuf[256];
-                snprintf(tBuf, 256, "%p (%04x): %-9s%s%s", baseAddr + offset, offset,
-                         opName, opOperands, lir->flags.isNop ? "(nop)" : "");
+                snprintf(tBuf, 256, "%p (%04x): %-9s%s%s%s",
+                         baseAddr + offset, offset,
+                         opName, opOperands, lir->flags.isNop ? "(nop)" : "",
+                         lir->flags.squashed ? "(squashed)" : "");
                 LOG(INFO) << tBuf;
             }
             break;
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
index 5308f7c..8f71571 100644
--- a/src/compiler/codegen/arm/ArmLIR.h
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -840,9 +840,10 @@
     struct {
         bool isNop:1;           // LIR is optimized away
         bool insertWrapper:1;   // insert branch to emulate memory accesses
+        bool squashed:1;        // Eliminated def
         unsigned int age:4;     // default is 0, set lazily by the optimizer
         unsigned int size:3;    // bytes (2 for thumb, 2/4 for thumb2)
-        unsigned int unused:23;
+        unsigned int unused:22;
     } flags;
     int aliasInfo;              // For Dalvik register & litpool disambiguation
     u8 useMask;                 // Resource mask for use
diff --git a/src/compiler/codegen/arm/ArmRallocUtil.cc b/src/compiler/codegen/arm/ArmRallocUtil.cc
index 3de0e79..84c3792 100644
--- a/src/compiler/codegen/arm/ArmRallocUtil.cc
+++ b/src/compiler/codegen/arm/ArmRallocUtil.cc
@@ -277,7 +277,7 @@
 }
 
 /* Clobber all regs that might be used by an external C call */
-extern void oatClobberCallRegs(CompilationUnit *cUnit)
+extern void oatClobberCalleeSave(CompilationUnit *cUnit)
 {
     oatClobber(cUnit, r0);
     oatClobber(cUnit, r1);
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
index aeb0134..9e026be 100644
--- a/src/compiler/codegen/arm/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc
@@ -20,8 +20,23 @@
                                    INVALID_REG, INVALID_SREG, 0,
                                    kLocDalvikFrame, INVALID_REG, INVALID_REG,
                                    INVALID_OFFSET};
-STATIC const RegLocation retLoc = LOC_DALVIK_RETURN_VAL;
-STATIC const RegLocation retLocWide = LOC_DALVIK_RETURN_VAL_WIDE;
+
+/* Mark register usage state and return long retloc */
+STATIC RegLocation getRetLocWide(CompilationUnit* cUnit)
+{
+    RegLocation res = LOC_DALVIK_RETURN_VAL_WIDE;
+    oatLockTemp(cUnit, res.lowReg);
+    oatLockTemp(cUnit, res.highReg);
+    oatMarkPair(cUnit, res.lowReg, res.highReg);
+    return res;
+}
+
+STATIC RegLocation getRetLoc(CompilationUnit* cUnit)
+{
+    RegLocation res = LOC_DALVIK_RETURN_VAL;
+    oatLockTemp(cUnit, res.lowReg);
+    return res;
+}
 
 /*
  * Let helper function take care of everything.  Will call
@@ -38,7 +53,6 @@
     loadConstant(cUnit, r0, mir->dalvikInsn.vC);  // arg0 <- type_id
     loadValueDirectFixed(cUnit, rlSrc, r2);       // arg2 <- count
     callRuntimeHelper(cUnit, rLR);
-    oatClobberCallRegs(cUnit);
     RegLocation rlResult = oatGetReturn(cUnit);
     storeValue(cUnit, rlDest, rlResult);
 }
@@ -170,19 +184,18 @@
     int fieldIdx = mir->dalvikInsn.vB;
     uint32_t typeIdx;
     Field* field = FindFieldWithResolvedStaticStorage(cUnit->method, fieldIdx, typeIdx);
+    oatFlushAllRegs(cUnit);
     if (SLOW_FIELD_PATH || field == NULL) {
         // Slow path
         LOG(INFO) << "Field " << fieldNameFromIndex(cUnit->method, fieldIdx)
             << " unresolved at compile time";
         int funcOffset = isObject ? OFFSETOF_MEMBER(Thread, pSetObjStatic)
                                   : OFFSETOF_MEMBER(Thread, pSet32Static);
-        oatFlushAllRegs(cUnit);
         loadWordDisp(cUnit, rSELF, funcOffset, rLR);
         loadConstant(cUnit, r0, mir->dalvikInsn.vB);
         loadCurrMethodDirect(cUnit, r1);
         loadValueDirect(cUnit, rlSrc, r2);
         callRuntimeHelper(cUnit, rLR);
-        oatClobberCallRegs(cUnit);
     } else {
         // fast path
         int fieldOffset = field->GetOffset().Int32Value();
@@ -209,6 +222,11 @@
         branchOver->generic.target = (LIR*)skipTarget;
         rlSrc = oatGetSrc(cUnit, mir, 0);
         rlSrc = loadValue(cUnit, rlSrc, kAnyReg);
+#if ANDROID_SMP != 0
+        if (field->IsVolatile()) {
+            oatGenMemBarrier(cUnit, kST);
+        }
+#endif
         storeWordDisp(cUnit, rBase, fieldOffset, rlSrc.lowReg);
 #if ANDROID_SMP != 0
         if (field->IsVolatile()) {
@@ -227,16 +245,20 @@
     int fieldIdx = mir->dalvikInsn.vB;
     uint32_t typeIdx;
     Field* field = FindFieldWithResolvedStaticStorage(cUnit->method, fieldIdx, typeIdx);
-    if (SLOW_FIELD_PATH || field == NULL) {
+    oatFlushAllRegs(cUnit);
+#if ANDROID_SMP != 0
+    bool isVolatile = (field == NULL) || field->IsVolatile();
+#else
+    bool isVolatile = false;
+#endif
+    if (SLOW_FIELD_PATH || field == NULL || isVolatile) {
         LOG(INFO) << "Field " << fieldNameFromIndex(cUnit->method, fieldIdx)
             << " unresolved at compile time";
-        oatFlushAllRegs(cUnit);
         loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pSet64Static), rLR);
         loadConstant(cUnit, r0, mir->dalvikInsn.vB);
         loadCurrMethodDirect(cUnit, r1);
         loadValueDirectWideFixed(cUnit, rlSrc, r2, r3);
         callRuntimeHelper(cUnit, rLR);
-        oatClobberCallRegs(cUnit);
     } else {
         // fast path
         int fieldOffset = field->GetOffset().Int32Value();
@@ -264,11 +286,6 @@
         rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg);
         storeBaseDispWide(cUnit, rBase, fieldOffset, rlSrc.lowReg,
                           rlSrc.highReg);
-#if ANDROID_SMP != 0
-        if (field->IsVolatile()) {
-            oatGenMemBarrier(cUnit, kSY);
-        }
-#endif
         oatFreeTemp(cUnit, rBase);
     }
 }
@@ -280,10 +297,15 @@
     int fieldIdx = mir->dalvikInsn.vB;
     uint32_t typeIdx;
     Field* field = FindFieldWithResolvedStaticStorage(cUnit->method, fieldIdx, typeIdx);
-    if (SLOW_FIELD_PATH || field == NULL) {
+#if ANDROID_SMP != 0
+    bool isVolatile = (field == NULL) || field->IsVolatile();
+#else
+    bool isVolatile = false;
+#endif
+    oatFlushAllRegs(cUnit);
+    if (SLOW_FIELD_PATH || field == NULL || isVolatile) {
         LOG(INFO) << "Field " << fieldNameFromIndex(cUnit->method, fieldIdx)
             << " unresolved at compile time";
-        oatFlushAllRegs(cUnit);
         loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pGet64Static), rLR);
         loadConstant(cUnit, r0, mir->dalvikInsn.vB);
         loadCurrMethodDirect(cUnit, r1);
@@ -315,11 +337,6 @@
         branchOver->generic.target = (LIR*)skipTarget;
         rlDest = oatGetDestWide(cUnit, mir, 0, 1);
         RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
-#if ANDROID_SMP != 0
-        if (field->IsVolatile()) {
-            oatGenMemBarrier(cUnit, kSY);
-        }
-#endif
         loadBaseDispWide(cUnit, NULL, rBase, fieldOffset, rlResult.lowReg,
                          rlResult.highReg, INVALID_SREG);
         oatFreeTemp(cUnit, rBase);
@@ -335,13 +352,13 @@
     Field* field = FindFieldWithResolvedStaticStorage(cUnit->method, fieldIdx, typeIdx);
     bool isObject = ((mir->dalvikInsn.opcode == OP_SGET_OBJECT) ||
                      (mir->dalvikInsn.opcode == OP_SGET_OBJECT_VOLATILE));
+    oatFlushAllRegs(cUnit);
     if (SLOW_FIELD_PATH || field == NULL) {
         LOG(INFO) << "Field " << fieldNameFromIndex(cUnit->method, fieldIdx)
             << " unresolved at compile time";
         // Slow path
         int funcOffset = isObject ? OFFSETOF_MEMBER(Thread, pGetObjStatic)
                                   : OFFSETOF_MEMBER(Thread, pGet32Static);
-        oatFlushAllRegs(cUnit);
         loadWordDisp(cUnit, rSELF, funcOffset, rLR);
         loadConstant(cUnit, r0, mir->dalvikInsn.vB);
         loadCurrMethodDirect(cUnit, r1);
@@ -950,6 +967,7 @@
     genShowTarget(cUnit);
 #endif
     opReg(cUnit, kOpBlx, rLR);
+    oatClobberCalleeSave(cUnit);
 }
 
 /*
@@ -982,6 +1000,7 @@
     genShowTarget(cUnit);
 #endif
     opReg(cUnit, kOpBlx, rLR);
+    oatClobberCalleeSave(cUnit);
 }
 
 STATIC void genInvokeSuper(CompilationUnit* cUnit, MIR* mir)
@@ -1034,6 +1053,7 @@
     genShowTarget(cUnit);
 #endif
     opReg(cUnit, kOpBlx, rLR);
+    oatClobberCalleeSave(cUnit);
 }
 
 STATIC void genInvokeVirtual(CompilationUnit* cUnit, MIR* mir)
@@ -1073,6 +1093,7 @@
     genShowTarget(cUnit);
 #endif
     opReg(cUnit, kOpBlx, rLR);
+    oatClobberCalleeSave(cUnit);
 }
 
 STATIC bool compileDalvikInstruction(CompilationUnit* cUnit, MIR* mir,
@@ -1140,44 +1161,25 @@
         case OP_RETURN:
         case OP_RETURN_OBJECT:
             genSuspendPoll(cUnit, mir);
-            storeValue(cUnit, retLoc, rlSrc[0]);
+            storeValue(cUnit, getRetLoc(cUnit), rlSrc[0]);
             break;
 
         case OP_RETURN_WIDE:
             genSuspendPoll(cUnit, mir);
-            rlDest = retLocWide;
-            rlDest.fp = rlSrc[0].fp;
-            storeValueWide(cUnit, rlDest, rlSrc[0]);
+            storeValueWide(cUnit, getRetLocWide(cUnit), rlSrc[0]);
             break;
 
         case OP_MOVE_RESULT_WIDE:
             if (mir->optimizationFlags & MIR_INLINED)
                 break;  // Nop - combined w/ previous invoke
-            /*
-             * Somewhat hacky here.   Because we're now passing
-             * return values in registers, we have to let the
-             * register allocation utilities know that the return
-             * registers are live and may not be used for address
-             * formation in storeValueWide.
-             */
-            DCHECK(retLocWide.lowReg == r0);
-            DCHECK(retLocWide.highReg == r1);
-            oatLockTemp(cUnit, retLocWide.lowReg);
-            oatLockTemp(cUnit, retLocWide.highReg);
-            storeValueWide(cUnit, rlDest, retLocWide);
-            oatFreeTemp(cUnit, retLocWide.lowReg);
-            oatFreeTemp(cUnit, retLocWide.highReg);
+            storeValueWide(cUnit, rlDest, getRetLocWide(cUnit));
             break;
 
         case OP_MOVE_RESULT:
         case OP_MOVE_RESULT_OBJECT:
             if (mir->optimizationFlags & MIR_INLINED)
                 break;  // Nop - combined w/ previous invoke
-            /* See comment for OP_MOVE_RESULT_WIDE */
-            DCHECK(retLoc.lowReg == r0);
-            oatLockTemp(cUnit, retLoc.lowReg);
-            storeValue(cUnit, rlDest, retLoc);
-            oatFreeTemp(cUnit, retLoc.lowReg);
+            storeValue(cUnit, rlDest, getRetLoc(cUnit));
             break;
 
         case OP_MOVE:
@@ -1848,7 +1850,10 @@
     labelList[blockId].opcode = kArmPseudoNormalBlockLabel;
     oatAppendLIR(cUnit, (LIR*) &labelList[blockId]);
 
+    /* Reset local optimization data on block boundaries */
+    oatResetRegPool(cUnit);
     oatClobberAllRegs(cUnit);
+    oatResetDefTracking(cUnit);
 
     ArmLIR* headLIR = NULL;
 
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index e3893d3..d81c200 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -57,6 +57,7 @@
 
 STATIC ArmLIR* callRuntimeHelper(CompilationUnit* cUnit, int reg)
 {
+    oatClobberCalleeSave(cUnit);
     return opReg(cUnit, kOpBlx, reg);
 }
 
@@ -373,7 +374,6 @@
     // Materialize a pointer to the fill data image
     newLIR3(cUnit, kThumb2Adr, r1, 0, (intptr_t)tabRec);
     callRuntimeHelper(cUnit, rLR);
-    oatClobberCallRegs(cUnit);
 }
 
 /*
@@ -406,6 +406,7 @@
 STATIC void getFieldOffset(CompilationUnit* cUnit, MIR* mir)
 {
     int fieldIdx = mir->dalvikInsn.vC;
+    oatFlushAllRegs(cUnit);
     LOG(INFO) << "Field " << fieldNameFromIndex(cUnit->method, fieldIdx)
         << " unresolved at compile time";
     oatLockCallTemps(cUnit);  // Explicit register usage
@@ -499,9 +500,12 @@
         genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir);/* null obj? */
 
         if (isVolatile) {
-            oatGenMemBarrier(cUnit, kSY);
+            oatGenMemBarrier(cUnit, kST);
         }
         storeBaseDisp(cUnit, rlObj.lowReg, fieldOffset, rlSrc.lowReg, size);
+        if (isVolatile) {
+            oatGenMemBarrier(cUnit, kSY);
+        }
     }
     if (isObject) {
         /* NOTE: marking card based on object head */
@@ -512,10 +516,15 @@
 STATIC void genIGetWide(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
                         RegLocation rlObj)
 {
+    RegLocation rlResult;
     Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
         GetResolvedField(mir->dalvikInsn.vC);
-    RegLocation rlResult;
-    if (fieldPtr == NULL) {
+#if ANDROID_SMP != 0
+    bool isVolatile = (fieldPtr == NULL) || fieldPtr->IsVolatile();
+#else
+    bool isVolatile = false;
+#endif
+    if ((fieldPtr == NULL) || isVolatile) {
         getFieldOffset(cUnit, mir);
         // Field offset in r0
         rlObj = loadValue(cUnit, rlObj, kCoreReg);
@@ -524,13 +533,8 @@
         opRegReg(cUnit, kOpAdd, r0, rlObj.lowReg);
         loadPair(cUnit, r0, rlResult.lowReg, rlResult.highReg);
         oatGenMemBarrier(cUnit, kSY);
-        storeValue(cUnit, rlDest, rlResult);
+        storeValueWide(cUnit, rlDest, rlResult);
     } else {
-#if ANDROID_SMP != 0
-        bool isVolatile = fieldPtr->IsVolatile();
-#else
-        bool isVolatile = false;
-#endif
         int fieldOffset = fieldPtr->GetOffset().Int32Value();
         rlObj = loadValue(cUnit, rlObj, kCoreReg);
         int regPtr = oatAllocTemp(cUnit);
@@ -543,10 +547,6 @@
 
         loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
 
-        if (isVolatile) {
-            oatGenMemBarrier(cUnit, kSY);
-        }
-
         oatFreeTemp(cUnit, regPtr);
         storeValueWide(cUnit, rlDest, rlResult);
     }
@@ -557,7 +557,12 @@
 {
     Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
         GetResolvedField(mir->dalvikInsn.vC);
-    if (fieldPtr == NULL) {
+#if ANDROID_SMP != 0
+    bool isVolatile = (fieldPtr == NULL) || fieldPtr->IsVolatile();
+#else
+    bool isVolatile = false;
+#endif
+    if ((fieldPtr == NULL) || isVolatile) {
         getFieldOffset(cUnit, mir);
         // Field offset in r0
         rlObj = loadValue(cUnit, rlObj, kCoreReg);
@@ -567,11 +572,6 @@
         oatGenMemBarrier(cUnit, kSY);
         storePair(cUnit, r0, rlSrc.lowReg, rlSrc.highReg);
     } else {
-#if ANDROID_SMP != 0
-        bool isVolatile = fieldPtr->IsVolatile();
-#else
-        bool isVolatile = false;
-#endif
         int fieldOffset = fieldPtr->GetOffset().Int32Value();
 
         rlObj = loadValue(cUnit, rlObj, kCoreReg);
@@ -581,9 +581,6 @@
         regPtr = oatAllocTemp(cUnit);
         opRegRegImm(cUnit, kOpAdd, regPtr, rlObj.lowReg, fieldOffset);
 
-        if (isVolatile) {
-            oatGenMemBarrier(cUnit, kSY);
-        }
         storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
 
         oatFreeTemp(cUnit, regPtr);
@@ -607,6 +604,7 @@
         storeValue(cUnit, rlDest, rlResult);
     } else {
         // Slow path.  Must test at runtime
+        oatFlushAllRegs(cUnit);
         ArmLIR* branch1 = genCmpImmBranch(cUnit, kArmCondEq, rlResult.lowReg,
                                           0);
         // Resolved, store and hop over following code
@@ -621,7 +619,6 @@
         genRegCopy(cUnit, r1, mReg);
         loadConstant(cUnit, r0, mir->dalvikInsn.vB);
         callRuntimeHelper(cUnit, rLR);
-        oatClobberCallRegs(cUnit);
         RegLocation rlResult = oatGetReturn(cUnit);
         storeValue(cUnit, rlDest, rlResult);
         // Rejoin code paths
@@ -663,13 +660,13 @@
     loadCurrMethodDirect(cUnit, r1);              // arg1 <= Method*
     loadConstant(cUnit, r0, mir->dalvikInsn.vB);  // arg0 <- type_id
     callRuntimeHelper(cUnit, rLR);
-    oatClobberCallRegs(cUnit);
     RegLocation rlResult = oatGetReturn(cUnit);
     storeValue(cUnit, rlDest, rlResult);
 }
 
 void genThrow(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
 {
+    oatFlushAllRegs(cUnit);
     loadWordDisp(cUnit, rSELF,
                  OFFSETOF_MEMBER(Thread, pDeliverException), rLR);
     loadValueDirectFixed(cUnit, rlSrc, r0);  // Get exception object
@@ -679,6 +676,7 @@
 STATIC void genInstanceof(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
                           RegLocation rlSrc)
 {
+    oatFlushAllRegs(cUnit);
     // May generate a call - use explicit registers
     oatLockCallTemps(cUnit);
     art::Class* classPtr = cUnit->method->GetDexCacheResolvedTypes()->
@@ -721,7 +719,6 @@
     genRegCopy(cUnit, r0, r3);
     genRegCopy(cUnit, r1, r2);
     callRuntimeHelper(cUnit, rLR);
-    oatClobberCallRegs(cUnit);
     /* branch target here */
     ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
     target->defMask = ENCODE_ALL;
@@ -733,6 +730,7 @@
 
 STATIC void genCheckCast(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
 {
+    oatFlushAllRegs(cUnit);
     // May generate a call - use explicit registers
     oatLockCallTemps(cUnit);
     art::Class* classPtr = cUnit->method->GetDexCacheResolvedTypes()->
@@ -773,7 +771,6 @@
     genRegCopy(cUnit, r0, r1);
     genRegCopy(cUnit, r1, r2);
     callRuntimeHelper(cUnit, rLR);
-    oatClobberCallRegs(cUnit);
     /* branch target here */
     ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
     target->defMask = ENCODE_ALL;
@@ -805,10 +802,12 @@
 STATIC void freeRegLocTemps(CompilationUnit* cUnit, RegLocation rlKeep,
                         RegLocation rlFree)
 {
-    if ((rlFree.lowReg != rlKeep.lowReg) && (rlFree.lowReg != rlKeep.highReg))
+    if ((rlFree.lowReg != rlKeep.lowReg) && (rlFree.lowReg != rlKeep.highReg) &&
+        (rlFree.highReg != rlKeep.lowReg) && (rlFree.highReg != rlKeep.highReg)) {
+        // No overlap, free both
         oatFreeTemp(cUnit, rlFree.lowReg);
-    if ((rlFree.highReg != rlKeep.lowReg) && (rlFree.highReg != rlKeep.highReg))
-        oatFreeTemp(cUnit, rlFree.lowReg);
+        oatFreeTemp(cUnit, rlFree.highReg);
+    }
 }
 
 STATIC void genLong3Addr(CompilationUnit* cUnit, MIR* mir, OpKind firstOp,
@@ -1108,7 +1107,6 @@
         loadValueDirectWideFixed(cUnit, rlSrc, r0, r1);
     }
     callRuntimeHelper(cUnit, rLR);
-    oatClobberCallRegs(cUnit);
     if (tgtSize == 1) {
         RegLocation rlResult;
         rlDest = oatGetDest(cUnit, mir, 0);
@@ -1163,7 +1161,6 @@
     loadValueDirectFixed(cUnit, rlSrc1, r0);
     loadValueDirectFixed(cUnit, rlSrc2, r1);
     callRuntimeHelper(cUnit, rLR);
-    oatClobberCallRegs(cUnit);
     rlResult = oatGetReturn(cUnit);
     storeValue(cUnit, rlDest, rlResult);
     return false;
@@ -1209,7 +1206,6 @@
     loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
     loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
     callRuntimeHelper(cUnit, rLR);
-    oatClobberCallRegs(cUnit);
     rlResult = oatGetReturnWide(cUnit);
     storeValueWide(cUnit, rlDest, rlResult);
     return false;
@@ -1278,6 +1274,7 @@
     int lenOffset = Array::LengthOffset().Int32Value();
     int dataOffset = Array::DataOffset().Int32Value();
 
+    oatFlushAllRegs(cUnit);
     /* Make sure it's a legal object Put. Use direct regs at first */
     loadValueDirectFixed(cUnit, rlArray, r1);
     loadValueDirectFixed(cUnit, rlSrc, r0);
@@ -1289,7 +1286,8 @@
     /* Get the array's clazz */
     loadWordDisp(cUnit, r1, Object::ClassOffset().Int32Value(), r1);
     callRuntimeHelper(cUnit, rLR);
-    oatClobberCallRegs(cUnit);
+    oatFreeTemp(cUnit, r0);
+    oatFreeTemp(cUnit, r1);
 
     // Now, redo loadValues in case they didn't survive the call
 
@@ -1479,7 +1477,6 @@
     loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
     loadValueDirect(cUnit, rlShift, r2);
     callRuntimeHelper(cUnit, rLR);
-    oatClobberCallRegs(cUnit);
     RegLocation rlResult = oatGetReturnWide(cUnit);
     storeValueWide(cUnit, rlDest, rlResult);
     return false;
@@ -1593,7 +1590,6 @@
         loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
         loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
         callRuntimeHelper(cUnit, rLR);
-        oatClobberCallRegs(cUnit);
         if (retReg == r0)
             rlResult = oatGetReturnWide(cUnit);
         else
@@ -1715,7 +1711,6 @@
             genImmedCheck(cUnit, kArmCondEq, r1, 0, mir, kArmThrowDivZero);
         }
         callRuntimeHelper(cUnit, rLR);
-        oatClobberCallRegs(cUnit);
         if (retReg == r0)
             rlResult = oatGetReturn(cUnit);
         else
@@ -1731,6 +1726,7 @@
     if (NO_SUSPEND || mir->optimizationFlags & MIR_IGNORE_SUSPEND_CHECK) {
         return;
     }
+    oatFlushAllRegs(cUnit);
     newLIR2(cUnit, kThumbSubRI8, rSUSPEND, 1);
     ArmLIR* branch = opCondBranch(cUnit, kArmCondEq);
     ArmLIR* retLab = newLIR0(cUnit, kArmPseudoTargetLabel);
@@ -1750,6 +1746,7 @@
     if (NO_SUSPEND || mir->optimizationFlags & MIR_IGNORE_SUSPEND_CHECK) {
         return;
     }
+    oatFlushAllRegs(cUnit);
     oatLockCallTemps(cUnit);   // Explicit register usage
     int rSuspendCount = r1;
     ArmLIR* ld;
@@ -2003,7 +2000,6 @@
             loadWordDisp(cUnit, rSELF, funcOffset, rLR);
             loadConstant(cUnit, r1, lit);
             callRuntimeHelper(cUnit, rLR);
-            oatClobberCallRegs(cUnit);
             if (isDiv)
                 rlResult = oatGetReturn(cUnit);
             else