Restore card marking, minor tuning

Restore GC card marks that were mistakenly dropped during an
earlier retructuring.  Add debugging to code to gather opcode
frequency statics.  Minor tuning for code size.

Change-Id: I117f62c29e29250277166e7f005706e27998f77a
diff --git a/src/compiler/Compiler.h b/src/compiler/Compiler.h
index 68a38a5..238e3ff 100644
--- a/src/compiler/Compiler.h
+++ b/src/compiler/Compiler.h
@@ -127,6 +127,7 @@
     kDebugVerifyDataflow,
     kDebugShowMemoryUsage,
     kDebugShowNops,
+    kDebugCountOpcodes,
 };
 
 extern uint32_t compilerDebugFlags;
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h
index b512c5b..184d4db 100644
--- a/src/compiler/CompilerIR.h
+++ b/src/compiler/CompilerIR.h
@@ -320,6 +320,7 @@
     bool hasLoop;                       // Contains a loop
     bool hasInvoke;                     // Contains an invoke instruction
     bool heapMemOp;                     // Mark mem ops for self verification
+    bool qdMode;                        // Compile for code size/compile time
     bool usesLinkRegister;              // For self-verification only
     bool methodTraceSupport;            // For TraceView profiling
     struct RegisterPool* regPool;
@@ -425,6 +426,7 @@
      struct ArenaMemBlock* currentArena;
      int numArenaBlocks;
      struct Memstats* mstats;
+     int* opcodeCount;    // Count Dalvik opcodes for tuning
 #ifndef NDEBUG
     /*
      * Sanity checking for the register temp tracking.  The same ssa
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index cca891d..3e0fd3a 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -51,6 +51,7 @@
      //(1 << kDebugVerifyDataflow) |
      //(1 << kDebugShowMemoryUsage) |
      //(1 << kDebugShowNops) |
+     //(1 << kDebugCountOpcodes) |
      0;
 
 inline bool contentIsInsn(const u2* codePtr) {
@@ -803,6 +804,12 @@
             (1 << kTrackLiveTemps));
     }
 
+    /* Gathering opcode stats? */
+    if (compilerDebugFlags & (1 << kDebugCountOpcodes)) {
+        cUnit->opcodeCount = (int*)oatNew(cUnit.get(),
+            kNumPackedOpcodes * sizeof(int), true, kAllocMisc);
+    }
+
     /* Assume non-throwing leaf */
     cUnit->attrs = (METHOD_IS_LEAF | METHOD_IS_THROW_FREE);
 
@@ -867,6 +874,9 @@
         insn->offset = curOffset;
         int width = parseInsn(cUnit.get(), codePtr, &insn->dalvikInsn, false);
         insn->width = width;
+        if (cUnit->opcodeCount != NULL) {
+            cUnit->opcodeCount[static_cast<int>(insn->dalvikInsn.opcode)]++;
+        }
 
         /* Terminate when the data section is seen */
         if (width == 0)
@@ -948,16 +958,21 @@
               ((cUnit->numBlocks > MANY_BLOCKS_INITIALIZER) &&
                PrettyMethod(method_idx, dex_file).find("init>") !=
                std::string::npos)) {
-            cUnit->disableDataflow = true;
-            // Disable optimization which require dataflow/ssa
-            cUnit->disableOpt |=
-                (1 << kNullCheckElimination) |
-                (1 << kBBOpt) |
-                (1 << kPromoteRegs);
-            if (cUnit->printMe) {
-                LOG(INFO) << "Compiler: " << PrettyMethod(method_idx, dex_file)
-                          << " too big: " << cUnit->numBlocks;
-            }
+            cUnit->qdMode = true;
+        }
+    }
+
+    if (cUnit->qdMode) {
+        cUnit->disableDataflow = true;
+        // Disable optimization which require dataflow/ssa
+        cUnit->disableOpt |=
+            (1 << kNullCheckElimination) |
+            (1 << kBBOpt) |
+            (1 << kPromoteRegs);
+        if (cUnit->printMe) {
+            LOG(INFO) << "QD mode enabled: "
+                      << PrettyMethod(method_idx, dex_file)
+                      << " too big: " << cUnit->numBlocks;
         }
     }
 
@@ -1011,6 +1026,17 @@
         if (cUnit->printMe) {
             oatCodegenDump(cUnit.get());
         }
+
+        if (cUnit->opcodeCount != NULL) {
+            LOG(INFO) << "Opcode Count";
+            for (int i = 0; i < kNumPackedOpcodes; i++) {
+                if (cUnit->opcodeCount[i] != 0) {
+                    LOG(INFO) << "-C- "
+                              <<Instruction::Name(static_cast<Instruction::Code>(i))
+                              << " " << cUnit->opcodeCount[i];
+                }
+            }
+        }
     }
 
     // Combine vmap tables - core regs, then fp regs - into vmapTable
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index 52ff3d7..cc0d624 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -1049,6 +1049,9 @@
             if (isVolatile) {
                 oatGenMemBarrier(cUnit, kSY);
             }
+            if (isObject) {
+                markGCCard(cUnit, rlSrc.lowReg, rlObj.lowReg);
+            }
         }
     } else {
         int setterOffset = isLongOrDouble ? OFFSETOF_MEMBER(Thread, pSet64Instance) :
@@ -1434,6 +1437,7 @@
     }
     storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
                      scale, kWord);
+    markGCCard(cUnit, rlSrc.lowReg, rlArray.lowReg);
 }
 
 /*
diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc
index ffff7d2..b3fa739 100644
--- a/src/compiler/codegen/RallocUtil.cc
+++ b/src/compiler/codegen/RallocUtil.cc
@@ -1100,6 +1100,7 @@
     int regBias = cUnit->numCompilerTemps + 1;
     int dalvikRegs = cUnit->numDalvikRegisters;
     int numRegs = dalvikRegs + regBias;
+    const int promotionThreshold = 2;
 
     // Allow target code to add any special registers
     oatAdjustSpillMask(cUnit);
@@ -1164,7 +1165,8 @@
 
     if (!(cUnit->disableOpt & (1 << kPromoteRegs))) {
         // Promote fpRegs
-        for (int i = 0; (i < numRegs) && (fpRegs[i].count > 0); i++) {
+        for (int i = 0; (i < numRegs) &&
+                        (fpRegs[i].count >= promotionThreshold ); i++) {
             int pMapIdx = SRegToPMap(cUnit, fpRegs[i].sReg);
             if (cUnit->promotionMap[pMapIdx].fpLocation != kLocPhysReg) {
                 int reg = oatAllocPreservedFPReg(cUnit, fpRegs[i].sReg,
@@ -1176,7 +1178,8 @@
         }
 
         // Promote core regs
-        for (int i = 0; (i < numRegs) && (coreRegs[i].count > 0); i++) {
+        for (int i = 0; (i < numRegs) &&
+                        (coreRegs[i].count > promotionThreshold); i++) {
             int pMapIdx = SRegToPMap(cUnit, coreRegs[i].sReg);
             if (cUnit->promotionMap[pMapIdx].coreLocation !=
                     kLocPhysReg) {
@@ -1186,8 +1189,17 @@
                 }
             }
         }
+    } else if (cUnit->qdMode) {
+        oatAllocPreservedCoreReg(cUnit, cUnit->methodSReg);
+        for (int i = 0; i < numRegs; i++) {
+            int reg = oatAllocPreservedCoreReg(cUnit, i);
+            if (reg < 0) {
+               break;  // No more left
+            }
+        }
     }
 
+
     // Now, update SSA names to new home locations
     for (int i = 0; i < cUnit->numSSARegs; i++) {
         RegLocation *curr = &cUnit->regLocation[i];