Polish JIT trace profiling and fix a unchaining bug when profiling is enabled.
diff --git a/vm/compiler/Utility.c b/vm/compiler/Utility.c
index b0c40e6..715f750 100644
--- a/vm/compiler/Utility.c
+++ b/vm/compiler/Utility.c
@@ -168,6 +168,8 @@
     totalStats->compiledDalvikSize += methodStats->compiledDalvikSize;
     totalStats->nativeSize += methodStats->nativeSize;
 
+    /* Enable the following when fine-tuning the JIT performance */
+#if 0
     int limit = (methodStats->dalvikSize >> 2) * 3;
 
     /* If over 3/4 of the Dalvik code is compiled, print something */
@@ -178,6 +180,7 @@
              methodStats->dalvikSize,
              methodStats->nativeSize);
     }
+#endif
     return 0;
 }
 
diff --git a/vm/compiler/codegen/armv5te/Assemble.c b/vm/compiler/codegen/armv5te/Assemble.c
index 9642c7d..3b3c161 100644
--- a/vm/compiler/codegen/armv5te/Assemble.c
+++ b/vm/compiler/codegen/armv5te/Assemble.c
@@ -292,7 +292,7 @@
 /* Write the numbers in the literal pool to the codegen stream */
 static void installDataContent(CompilationUnit *cUnit)
 {
-    int *dataPtr = (int *) (cUnit->baseAddr + cUnit->dataOffset);
+    int *dataPtr = (int *) ((char *) cUnit->baseAddr + cUnit->dataOffset);
     Armv5teLIR *dataLIR = (Armv5teLIR *) cUnit->wordList;
     while (dataLIR) {
         *dataPtr++ = dataLIR->operands[0];
@@ -479,8 +479,14 @@
     assert(chainCellOffsetLIR->opCode == ARMV5TE_16BIT_DATA &&
            chainCellOffsetLIR->operands[0] == CHAIN_CELL_OFFSET_TAG);
 
-    /* Replace the CHAIN_CELL_OFFSET_TAG with the real value */
-    chainCellOffsetLIR->operands[0] = chainCellOffset;
+    /*
+     * Replace the CHAIN_CELL_OFFSET_TAG with the real value. If trace
+     * profiling is enabled, subtract 4 (occupied by the counter word) from
+     * the absolute offset as the value stored in chainCellOffsetLIR is the
+     * delta from &chainCellOffsetLIR to &ChainCellCounts.
+     */
+    chainCellOffsetLIR->operands[0] =
+        gDvmJit.profile ? (chainCellOffset - 4) : chainCellOffset;
 
     offset += sizeof(chainCellCounts) + descSize;
 
@@ -546,7 +552,7 @@
 
     /* Flush dcache and invalidate the icache to maintain coherence */
     cacheflush((long)cUnit->baseAddr,
-               (long)(cUnit->baseAddr + offset), 0);
+               (long)((char *) cUnit->baseAddr + offset), 0);
 }
 
 static u4 assembleBXPair(int branchOffset)
@@ -689,7 +695,7 @@
     u2* pChainCellOffset = (u2*)((char*)codeAddr - 3);
     u2 chainCellOffset = *pChainCellOffset;
     ChainCellCounts *pChainCellCounts =
-          (ChainCellCounts*)((char*)codeAddr + chainCellOffset -3);
+          (ChainCellCounts*)((char*)codeAddr + chainCellOffset - 3);
     int cellSize;
     u4* pChainCells;
     u4* pStart;
diff --git a/vm/interp/Jit.c b/vm/interp/Jit.c
index ffdb283..54d24af 100644
--- a/vm/interp/Jit.c
+++ b/vm/interp/Jit.c
@@ -151,16 +151,83 @@
 
     pExecutionCount = (u4*) (traceBase);
     pCellOffset = (u2*) (traceBase + 4);
-    pCellCounts = (ChainCellCounts*) (traceBase + *pCellOffset);
+    pCellCounts = (ChainCellCounts*) ((char *)pCellOffset + *pCellOffset);
     desc = (JitTraceDescription*) ((char*)pCellCounts + sizeof(*pCellCounts));
     method = desc->method;
-    LOGD("TRACEPROFILE 0x%08x % 10d %s%s [0x%x,%d]", (int)traceBase,
-          *pExecutionCount, method->clazz->descriptor, method->name,
+    char *methodDesc = dexProtoCopyMethodDescriptor(&method->prototype);
+    LOGD("TRACEPROFILE 0x%08x % 10d %s%s;%s [0x%x,%d]", (int)traceBase,
+          *pExecutionCount,
+          method->clazz->descriptor, method->name, methodDesc,
           desc->trace[0].frag.startOffset,
           desc->trace[0].frag.numInsts);
+    free(methodDesc);
     return *pExecutionCount;
 }
 
+/* Handy function to retrieve the profile count */
+static inline int getProfileCount(const JitEntry *entry)
+{
+    if (entry->dPC == 0 || entry->codeAddress == 0)
+        return 0;
+    /*
+     * The codeAddress field has the low bit set to mark thumb
+     * mode.  We need to strip that off before reconstructing the
+     * trace data.  See the diagram in Assemble.c for more info
+     * on the trace layout in memory.
+     */
+    u4 *pExecutionCount = (u4 *) ((char*)entry->codeAddress - 7);
+
+    return *pExecutionCount;
+}
+
+/* qsort callback function */
+static int sortTraceProfileCount(const void *entry1, const void *entry2)
+{
+    const JitEntry *jitEntry1 = entry1;
+    const JitEntry *jitEntry2 = entry2;
+
+    int count1 = getProfileCount(jitEntry1);
+    int count2 = getProfileCount(jitEntry2);
+    return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1);
+}
+
+/* Sort the trace profile counts and dump them */
+static void sortAndPrintTraceProfiles()
+{
+    JitEntry *sortedEntries;
+    int numTraces = 0;
+    unsigned long counts = 0;
+    unsigned int i;
+
+    /* Make sure that the table is not changing */
+    dvmLockMutex(&gDvmJit.tableLock);
+
+    /* Sort the entries by descending order */
+    sortedEntries = malloc(sizeof(JitEntry) * gDvmJit.jitTableSize);
+    if (sortedEntries == NULL)
+        goto done;
+    memcpy(sortedEntries, gDvmJit.pJitEntryTable,
+           sizeof(JitEntry) * gDvmJit.jitTableSize);
+    qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry),
+          sortTraceProfileCount);
+
+    /* Dump the sorted entries */
+    for (i=0; i < gDvmJit.jitTableSize; i++) {
+        if (sortedEntries[i].dPC != 0) {
+            counts += dvmCompilerDumpTraceProfile(&sortedEntries[i]);
+            numTraces++;
+        }
+    }
+    if (numTraces == 0)
+        numTraces = 1;
+    LOGD("JIT: Average execution count -> %d",(int)(counts / numTraces));
+
+    free(sortedEntries);
+done:
+    dvmUnlockMutex(&gDvmJit.tableLock);
+    return;
+}
+
 /* Dumps debugging & tuning stats to the log */
 void dvmJitStats()
 {
@@ -196,18 +263,8 @@
              gDvmJit.invokeChain, gDvmJit.invokePredictedChain,
              gDvmJit.invokeNative, gDvmJit.returnOp);
 #endif
-       if (gDvmJit.profile) {
-           int numTraces = 0;
-           long counts = 0;
-           for (i=0; i < (int) gDvmJit.jitTableSize; i++) {
-              if (gDvmJit.pJitEntryTable[i].dPC != 0) {
-                  counts += dvmCompilerDumpTraceProfile( &gDvmJit.pJitEntryTable[i] );
-                  numTraces++;
-              }
-           }
-        if (numTraces == 0)
-              numTraces = 1;
-        LOGD("JIT: Average execution count -> %d",(int)(counts / numTraces));
+        if (gDvmJit.profile) {
+            sortAndPrintTraceProfiles();
         }
     }
 }