Polish JIT trace profiling and fix a unchaining bug when profiling is enabled.
diff --git a/vm/compiler/Utility.c b/vm/compiler/Utility.c
index b0c40e6..715f750 100644
--- a/vm/compiler/Utility.c
+++ b/vm/compiler/Utility.c
@@ -168,6 +168,8 @@
totalStats->compiledDalvikSize += methodStats->compiledDalvikSize;
totalStats->nativeSize += methodStats->nativeSize;
+ /* Enable the following when fine-tuning the JIT performance */
+#if 0
int limit = (methodStats->dalvikSize >> 2) * 3;
/* If over 3/4 of the Dalvik code is compiled, print something */
@@ -178,6 +180,7 @@
methodStats->dalvikSize,
methodStats->nativeSize);
}
+#endif
return 0;
}
diff --git a/vm/compiler/codegen/armv5te/Assemble.c b/vm/compiler/codegen/armv5te/Assemble.c
index 9642c7d..3b3c161 100644
--- a/vm/compiler/codegen/armv5te/Assemble.c
+++ b/vm/compiler/codegen/armv5te/Assemble.c
@@ -292,7 +292,7 @@
/* Write the numbers in the literal pool to the codegen stream */
static void installDataContent(CompilationUnit *cUnit)
{
- int *dataPtr = (int *) (cUnit->baseAddr + cUnit->dataOffset);
+ int *dataPtr = (int *) ((char *) cUnit->baseAddr + cUnit->dataOffset);
Armv5teLIR *dataLIR = (Armv5teLIR *) cUnit->wordList;
while (dataLIR) {
*dataPtr++ = dataLIR->operands[0];
@@ -479,8 +479,14 @@
assert(chainCellOffsetLIR->opCode == ARMV5TE_16BIT_DATA &&
chainCellOffsetLIR->operands[0] == CHAIN_CELL_OFFSET_TAG);
- /* Replace the CHAIN_CELL_OFFSET_TAG with the real value */
- chainCellOffsetLIR->operands[0] = chainCellOffset;
+ /*
+ * Replace the CHAIN_CELL_OFFSET_TAG with the real value. If trace
+ * profiling is enabled, subtract 4 (occupied by the counter word) from
+ * the absolute offset as the value stored in chainCellOffsetLIR is the
+ * delta from &chainCellOffsetLIR to &ChainCellCounts.
+ */
+ chainCellOffsetLIR->operands[0] =
+ gDvmJit.profile ? (chainCellOffset - 4) : chainCellOffset;
offset += sizeof(chainCellCounts) + descSize;
@@ -546,7 +552,7 @@
/* Flush dcache and invalidate the icache to maintain coherence */
cacheflush((long)cUnit->baseAddr,
- (long)(cUnit->baseAddr + offset), 0);
+ (long)((char *) cUnit->baseAddr + offset), 0);
}
static u4 assembleBXPair(int branchOffset)
@@ -689,7 +695,7 @@
u2* pChainCellOffset = (u2*)((char*)codeAddr - 3);
u2 chainCellOffset = *pChainCellOffset;
ChainCellCounts *pChainCellCounts =
- (ChainCellCounts*)((char*)codeAddr + chainCellOffset -3);
+ (ChainCellCounts*)((char*)codeAddr + chainCellOffset - 3);
int cellSize;
u4* pChainCells;
u4* pStart;
diff --git a/vm/interp/Jit.c b/vm/interp/Jit.c
index ffdb283..54d24af 100644
--- a/vm/interp/Jit.c
+++ b/vm/interp/Jit.c
@@ -151,16 +151,83 @@
pExecutionCount = (u4*) (traceBase);
pCellOffset = (u2*) (traceBase + 4);
- pCellCounts = (ChainCellCounts*) (traceBase + *pCellOffset);
+ pCellCounts = (ChainCellCounts*) ((char *)pCellOffset + *pCellOffset);
desc = (JitTraceDescription*) ((char*)pCellCounts + sizeof(*pCellCounts));
method = desc->method;
- LOGD("TRACEPROFILE 0x%08x % 10d %s%s [0x%x,%d]", (int)traceBase,
- *pExecutionCount, method->clazz->descriptor, method->name,
+ char *methodDesc = dexProtoCopyMethodDescriptor(&method->prototype);
+ LOGD("TRACEPROFILE 0x%08x % 10d %s%s;%s [0x%x,%d]", (int)traceBase,
+ *pExecutionCount,
+ method->clazz->descriptor, method->name, methodDesc,
desc->trace[0].frag.startOffset,
desc->trace[0].frag.numInsts);
+ free(methodDesc);
return *pExecutionCount;
}
+/* Handy function to retrieve the profile count */
+static inline int getProfileCount(const JitEntry *entry)
+{
+ if (entry->dPC == 0 || entry->codeAddress == 0)
+ return 0;
+ /*
+ * The codeAddress field has the low bit set to mark thumb
+ * mode. We need to strip that off before reconstructing the
+ * trace data. See the diagram in Assemble.c for more info
+ * on the trace layout in memory.
+ */
+ u4 *pExecutionCount = (u4 *) ((char*)entry->codeAddress - 7);
+
+ return *pExecutionCount;
+}
+
+/* qsort callback function */
+static int sortTraceProfileCount(const void *entry1, const void *entry2)
+{
+ const JitEntry *jitEntry1 = entry1;
+ const JitEntry *jitEntry2 = entry2;
+
+ int count1 = getProfileCount(jitEntry1);
+ int count2 = getProfileCount(jitEntry2);
+ return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1);
+}
+
+/* Sort the trace profile counts and dump them */
+static void sortAndPrintTraceProfiles()
+{
+ JitEntry *sortedEntries;
+ int numTraces = 0;
+ unsigned long counts = 0;
+ unsigned int i;
+
+ /* Make sure that the table is not changing */
+ dvmLockMutex(&gDvmJit.tableLock);
+
+ /* Sort the entries by descending order */
+ sortedEntries = malloc(sizeof(JitEntry) * gDvmJit.jitTableSize);
+ if (sortedEntries == NULL)
+ goto done;
+ memcpy(sortedEntries, gDvmJit.pJitEntryTable,
+ sizeof(JitEntry) * gDvmJit.jitTableSize);
+ qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry),
+ sortTraceProfileCount);
+
+ /* Dump the sorted entries */
+ for (i=0; i < gDvmJit.jitTableSize; i++) {
+ if (sortedEntries[i].dPC != 0) {
+ counts += dvmCompilerDumpTraceProfile(&sortedEntries[i]);
+ numTraces++;
+ }
+ }
+ if (numTraces == 0)
+ numTraces = 1;
+ LOGD("JIT: Average execution count -> %d",(int)(counts / numTraces));
+
+ free(sortedEntries);
+done:
+ dvmUnlockMutex(&gDvmJit.tableLock);
+ return;
+}
+
/* Dumps debugging & tuning stats to the log */
void dvmJitStats()
{
@@ -196,18 +263,8 @@
gDvmJit.invokeChain, gDvmJit.invokePredictedChain,
gDvmJit.invokeNative, gDvmJit.returnOp);
#endif
- if (gDvmJit.profile) {
- int numTraces = 0;
- long counts = 0;
- for (i=0; i < (int) gDvmJit.jitTableSize; i++) {
- if (gDvmJit.pJitEntryTable[i].dPC != 0) {
- counts += dvmCompilerDumpTraceProfile( &gDvmJit.pJitEntryTable[i] );
- numTraces++;
- }
- }
- if (numTraces == 0)
- numTraces = 1;
- LOGD("JIT: Average execution count -> %d",(int)(counts / numTraces));
+ if (gDvmJit.profile) {
+ sortAndPrintTraceProfiles();
}
}
}