Continuing evolution of Thumb2 support.
Bug fix for local optimization
Enable partial floating point store sinking (with significant perf gain!)
diff --git a/vm/compiler/codegen/arm/LocalOptimizations.c b/vm/compiler/codegen/arm/LocalOptimizations.c
index 5f43b87..11aaedd 100644
--- a/vm/compiler/codegen/arm/LocalOptimizations.c
+++ b/vm/compiler/codegen/arm/LocalOptimizations.c
@@ -18,6 +18,27 @@
#include "vm/compiler/CompilerInternals.h"
#include "ArmLIR.h"
+ArmLIR* dvmCompilerGenCopy(CompilationUnit *cUnit, int rDest, int rSrc);
+
+/* Is this a Dalvik register access? */
+static inline bool isDalvikLoad(ArmLIR *lir)
+{
+ return ((lir->operands[1] == rFP) &&
+ ((lir->opCode == THUMB_LDR_RRI5) ||
+ (lir->opCode == THUMB2_LDR_RRI12) ||
+ (lir->opCode == THUMB2_VLDRS) ||
+ (lir->opCode == THUMB2_VLDRD)));
+}
+
+static inline bool isDalvikStore(ArmLIR *lir)
+{
+ return ((lir->operands[1] == rFP) &&
+ ((lir->opCode == THUMB_STR_RRI5) ||
+ (lir->opCode == THUMB2_STR_RRI12) ||
+ (lir->opCode == THUMB2_VSTRS) ||
+ (lir->opCode == THUMB2_VSTRD)));
+}
+
/*
* Perform a pass of top-down walk to
* 1) Eliminate redundant loads and stores
@@ -37,8 +58,7 @@
if (thisLIR->age >= cUnit->optRound) {
continue;
}
- if (thisLIR->opCode == THUMB_STR_RRI5 &&
- thisLIR->operands[1] == rFP) {
+ if (isDalvikStore(thisLIR)) {
int dRegId = thisLIR->operands[2];
int nativeRegId = thisLIR->operands[0];
ArmLIR *checkLIR;
@@ -49,16 +69,17 @@
checkLIR = NEXT_LIR(checkLIR)) {
/* Check if a Dalvik register load is redundant */
- if (checkLIR->opCode == THUMB_LDR_RRI5 &&
- checkLIR->operands[1] == rFP &&
- checkLIR->operands[2] == dRegId) {
+ if (isDalvikLoad(checkLIR) &&
+ checkLIR->operands[2] == dRegId ) {
+ if (FPREG(nativeRegId) != FPREG(checkLIR->operands[0])) {
+ break; // TODO: handle gen<=>float copies
+ }
/* Insert a move to replace the load */
if (checkLIR->operands[0] != nativeRegId) {
- ArmLIR *moveLIR =
- dvmCompilerNew(sizeof(ArmLIR), true);
- moveLIR->opCode = THUMB_MOV_RR;
- moveLIR->operands[0] = checkLIR->operands[0];
- moveLIR->operands[1] = nativeRegId;
+ ArmLIR *moveLIR;
+ moveLIR = dvmCompilerRegCopy(cUnit,
+ checkLIR->operands[0],
+ nativeRegId);
/*
* Insertion is guaranteed to succeed since checkLIR
* is never the first LIR on the list
@@ -70,8 +91,7 @@
continue;
/* Found a true output dependency - nuke the previous store */
- } else if (checkLIR->opCode == THUMB_STR_RRI5 &&
- checkLIR->operands[1] == rFP &&
+ } else if (isDalvikStore(checkLIR) &&
checkLIR->operands[2] == dRegId) {
thisLIR->isNop = true;
break;
@@ -82,10 +102,6 @@
/* Last instruction reached */
stopHere |= checkLIR->generic.next == NULL;
- /* Store data is clobbered */
- stopHere |= (EncodingMap[checkLIR->opCode].flags &
- CLOBBER_DEST) != 0 &&
- checkLIR->operands[0] == nativeRegId;
/*
* Conservatively assume there is a memory dependency
* for st/ld multiples and reg+reg address mode
@@ -93,16 +109,21 @@
stopHere |= checkLIR->opCode == THUMB_STMIA ||
checkLIR->opCode == THUMB_LDMIA ||
checkLIR->opCode == THUMB_STR_RRR ||
- checkLIR->opCode == THUMB_LDR_RRR;
+ checkLIR->opCode == THUMB_LDR_RRR ||
+ checkLIR->opCode == THUMB2_VLDRD ||
+ checkLIR->opCode == THUMB2_VSTRD;
+;
-// FIXME: need to enhance this code to sink & play well with coprocessor ld/str
- stopHere |= checkLIR->opCode == THUMB2_VSTRS ||
- checkLIR->opCode == THUMB2_VSTRD ||
- checkLIR->opCode == THUMB2_VLDRS ||
- checkLIR->opCode == THUMB2_VLDRD;
+ if (!isPseudoOpCode(checkLIR->opCode)) {
- stopHere |= (EncodingMap[checkLIR->opCode].flags &
- IS_BRANCH) != 0;
+ /* Store data is clobbered */
+ stopHere |= (EncodingMap[checkLIR->opCode].flags &
+ CLOBBER_DEST) != 0 &&
+ checkLIR->operands[0] == nativeRegId;
+
+ stopHere |= (EncodingMap[checkLIR->opCode].flags &
+ IS_BRANCH) != 0;
+ }
/* Found a new place to put the store - move it here */
if (stopHere == true) {