Merge "Remove some SOA from JNI internal." into dalvik-dev
diff --git a/src/compiler/codegen/arm/fp_arm.cc b/src/compiler/codegen/arm/fp_arm.cc
index 5e0e73d..57c55cc 100644
--- a/src/compiler/codegen/arm/fp_arm.cc
+++ b/src/compiler/codegen/arm/fp_arm.cc
@@ -259,6 +259,7 @@
if (is_double) {
rl_src1 = LoadValueWide(cu, rl_src1, kFPReg);
rl_src2 = LoadValueWide(cu, rl_src2, kFPReg);
+ // In case result vreg is also a src vreg, break association to avoid useless copy by EvalLoc()
ClobberSReg(cu, rl_dest.s_reg_low);
rl_result = EvalLoc(cu, rl_dest, kCoreReg, true);
LoadConstant(cu, rl_result.low_reg, default_result);
@@ -267,6 +268,7 @@
} else {
rl_src1 = LoadValue(cu, rl_src1, kFPReg);
rl_src2 = LoadValue(cu, rl_src2, kFPReg);
+ // In case result vreg is also a srcvreg, break association to avoid useless copy by EvalLoc()
ClobberSReg(cu, rl_dest.s_reg_low);
rl_result = EvalLoc(cu, rl_dest, kCoreReg, true);
LoadConstant(cu, rl_result.low_reg, default_result);
diff --git a/src/compiler/codegen/codegen.h b/src/compiler/codegen/codegen.h
index 0698156..e512803 100644
--- a/src/compiler/codegen/codegen.h
+++ b/src/compiler/codegen/codegen.h
@@ -211,6 +211,7 @@
bool GenInlinedDoubleCvt(CompilationUnit *cu, CallInfo* info);
bool GenInlinedIndexOf(CompilationUnit* cu, CallInfo* info, bool zero_based);
bool GenInlinedStringCompareTo(CompilationUnit* cu, CallInfo* info);
+ bool GenInlinedCurrentThread(CompilationUnit* cu, CallInfo* info);
bool GenIntrinsic(CompilationUnit* cu, CallInfo* info);
// Shared by all targets - implemented in gen_loadstore.cc.
diff --git a/src/compiler/codegen/gen_invoke.cc b/src/compiler/codegen/gen_invoke.cc
index 41924e2..afaa053 100644
--- a/src/compiler/codegen/gen_invoke.cc
+++ b/src/compiler/codegen/gen_invoke.cc
@@ -18,6 +18,7 @@
#include "../compiler_ir.h"
#include "ralloc_util.h"
#include "codegen_util.h"
+#include "x86/codegen_x86.h"
namespace art {
@@ -1105,6 +1106,20 @@
return true;
}
+bool Codegen::GenInlinedCurrentThread(CompilationUnit* cu, CallInfo* info) {
+ RegLocation rl_dest = InlineTarget(cu, info);
+ RegLocation rl_result = EvalLoc(cu, rl_dest, kCoreReg, true);
+ int offset = Thread::PeerOffset().Int32Value();
+ if (cu->instruction_set == kThumb2) {
+ LoadWordDisp(cu, TargetReg(kSelf), offset, rl_result.low_reg);
+ } else {
+ CHECK(cu->instruction_set == kX86);
+ ((X86Codegen*)this)->OpRegThreadMem(cu, kOpMov, rl_result.low_reg, offset);
+ }
+ StoreValue(cu, rl_dest, rl_result);
+ return true;
+}
+
bool Codegen::GenIntrinsic(CompilationUnit* cu, CallInfo* info)
{
if (info->opt_flags & MIR_INLINED) {
@@ -1172,6 +1187,9 @@
if (tgt_method == "int java.lang.String.length()") {
return GenInlinedStringIsEmptyOrLength(cu, info, false /* is_empty */);
}
+ if (tgt_method == "java.lang.Thread java.lang.Thread.currentThread()") {
+ return GenInlinedCurrentThread(cu, info);
+ }
} else if (tgt_method.find("boolean sun.misc.Unsafe.compareAndSwap") != std::string::npos) {
if (tgt_method == "boolean sun.misc.Unsafe.compareAndSwapInt(java.lang.Object, long, int, int)") {
return GenInlinedCas32(cu, info, false);
diff --git a/src/compiler/codegen/local_optimizations.cc b/src/compiler/codegen/local_optimizations.cc
index ec915f0..69b5d8e 100644
--- a/src/compiler/codegen/local_optimizations.cc
+++ b/src/compiler/codegen/local_optimizations.cc
@@ -20,7 +20,7 @@
#define DEBUG_OPT(X)
-/* Check RAW, WAR, and WAR dependency on the register operands */
+/* Check RAW, WAR, and RAW dependency on the register operands */
#define CHECK_REG_DEP(use, def, check) ((def & check->use_mask) || \
((use | def) & check->def_mask))
diff --git a/src/compiler/codegen/ralloc_util.cc b/src/compiler/codegen/ralloc_util.cc
index 999c652..1a3a413 100644
--- a/src/compiler/codegen/ralloc_util.cc
+++ b/src/compiler/codegen/ralloc_util.cc
@@ -124,7 +124,17 @@
}
}
-/* Clobber any temp associated with an s_reg. Could be in either class */
+/*
+ * Break the association between a Dalvik vreg and a physical temp register of either register
+ * class.
+ * TODO: Ideally, the public version of this code should not exist. Besides its local usage
+ * in the register utilities, is is also used by code gen routines to work around a deficiency in
+ * local register allocation, which fails to distinguish between the "in" and "out" identities
+ * of Dalvik vregs. This can result in useless register copies when the same Dalvik vreg
+ * is used both as the source and destination register of an operation in which the type
+ * changes (for example: INT_TO_FLOAT v1, v1). Revisit when improved register allocation is
+ * addressed.
+ */
void ClobberSReg(CompilationUnit* cu, int s_reg)
{
#ifndef NDEBUG
diff --git a/src/compiler/codegen/x86/codegen_x86.h b/src/compiler/codegen/x86/codegen_x86.h
index dba4953..4ef186a 100644
--- a/src/compiler/codegen/x86/codegen_x86.h
+++ b/src/compiler/codegen/x86/codegen_x86.h
@@ -18,6 +18,7 @@
#define ART_SRC_COMPILER_CODEGEN_X86_CODEGENX86_H_
#include "../../compiler_internals.h"
+#include "x86_lir.h"
namespace art {
diff --git a/src/compiler/codegen/x86/fp_x86.cc b/src/compiler/codegen/x86/fp_x86.cc
index 78c737d..6bfe9a2 100644
--- a/src/compiler/codegen/x86/fp_x86.cc
+++ b/src/compiler/codegen/x86/fp_x86.cc
@@ -158,6 +158,7 @@
case Instruction::FLOAT_TO_INT: {
rl_src = LoadValue(cu, rl_src, kFPReg);
src_reg = rl_src.low_reg;
+ // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
ClobberSReg(cu, rl_dest.s_reg_low);
rl_result = EvalLoc(cu, rl_dest, kCoreReg, true);
int temp_reg = AllocTempFloat(cu);
@@ -179,6 +180,7 @@
case Instruction::DOUBLE_TO_INT: {
rl_src = LoadValueWide(cu, rl_src, kFPReg);
src_reg = rl_src.low_reg;
+ // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
ClobberSReg(cu, rl_dest.s_reg_low);
rl_result = EvalLoc(cu, rl_dest, kCoreReg, true);
int temp_reg = AllocTempDouble(cu) | X86_FP_DOUBLE;
@@ -245,6 +247,7 @@
rl_src2 = LoadValueWide(cu, rl_src2, kFPReg);
src_reg2 = S2d(rl_src2.low_reg, rl_src2.high_reg);
}
+ // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
ClobberSReg(cu, rl_dest.s_reg_low);
RegLocation rl_result = EvalLoc(cu, rl_dest, kCoreReg, true);
LoadConstantNoClobber(cu, rl_result.low_reg, unordered_gt ? 1 : 0);
diff --git a/src/compiler/codegen/x86/int_x86.cc b/src/compiler/codegen/x86/int_x86.cc
index 190208b..bd3a7fa 100644
--- a/src/compiler/codegen/x86/int_x86.cc
+++ b/src/compiler/codegen/x86/int_x86.cc
@@ -43,16 +43,6 @@
* x = y return 0
* x < y return -1
* x > y return 1
- *
- * slt t0, x.hi, y.hi; # (x.hi < y.hi) ? 1:0
- * sgt t1, x.hi, y.hi; # (y.hi > x.hi) ? 1:0
- * subu res, t0, t1 # res = -1:1:0 for [ < > = ]
- * bnez res, finish
- * sltu t0, x.lo, y.lo
- * sgtu r1, x.lo, y.lo
- * subu res, t0, t1
- * finish:
- *
*/
void X86Codegen::GenCmpLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2)
@@ -335,6 +325,8 @@
bool X86Codegen::GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2)
{
+ // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
+ // enough.
FlushAllRegs(cu);
LockCallTemps(cu); // Prepare for explicit register usage
LoadValueDirectWideFixed(cu, rl_src1, r0, r1);
@@ -351,6 +343,8 @@
bool X86Codegen::GenSubLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2)
{
+ // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
+ // enough.
FlushAllRegs(cu);
LockCallTemps(cu); // Prepare for explicit register usage
LoadValueDirectWideFixed(cu, rl_src1, r0, r1);
@@ -367,13 +361,15 @@
bool X86Codegen::GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2)
{
+ // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
+ // enough.
FlushAllRegs(cu);
LockCallTemps(cu); // Prepare for explicit register usage
LoadValueDirectWideFixed(cu, rl_src1, r0, r1);
LoadValueDirectWideFixed(cu, rl_src2, r2, r3);
- // Compute (r1:r0) = (r1:r0) + (r2:r3)
- OpRegReg(cu, kOpAnd, r0, r2); // r0 = r0 - r2
- OpRegReg(cu, kOpAnd, r1, r3); // r1 = r1 - r3 - CF
+ // Compute (r1:r0) = (r1:r0) & (r2:r3)
+ OpRegReg(cu, kOpAnd, r0, r2); // r0 = r0 & r2
+ OpRegReg(cu, kOpAnd, r1, r3); // r1 = r1 & r3
RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
INVALID_SREG, INVALID_SREG};
StoreValueWide(cu, rl_dest, rl_result);
@@ -383,13 +379,15 @@
bool X86Codegen::GenOrLong(CompilationUnit* cu, RegLocation rl_dest,
RegLocation rl_src1, RegLocation rl_src2)
{
+ // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
+ // enough.
FlushAllRegs(cu);
LockCallTemps(cu); // Prepare for explicit register usage
LoadValueDirectWideFixed(cu, rl_src1, r0, r1);
LoadValueDirectWideFixed(cu, rl_src2, r2, r3);
- // Compute (r1:r0) = (r1:r0) + (r2:r3)
- OpRegReg(cu, kOpOr, r0, r2); // r0 = r0 - r2
- OpRegReg(cu, kOpOr, r1, r3); // r1 = r1 - r3 - CF
+ // Compute (r1:r0) = (r1:r0) | (r2:r3)
+ OpRegReg(cu, kOpOr, r0, r2); // r0 = r0 | r2
+ OpRegReg(cu, kOpOr, r1, r3); // r1 = r1 | r3
RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
INVALID_SREG, INVALID_SREG};
StoreValueWide(cu, rl_dest, rl_result);
@@ -399,13 +397,15 @@
bool X86Codegen::GenXorLong(CompilationUnit* cu, RegLocation rl_dest,
RegLocation rl_src1, RegLocation rl_src2)
{
+ // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
+ // enough.
FlushAllRegs(cu);
LockCallTemps(cu); // Prepare for explicit register usage
LoadValueDirectWideFixed(cu, rl_src1, r0, r1);
LoadValueDirectWideFixed(cu, rl_src2, r2, r3);
- // Compute (r1:r0) = (r1:r0) + (r2:r3)
- OpRegReg(cu, kOpXor, r0, r2); // r0 = r0 - r2
- OpRegReg(cu, kOpXor, r1, r3); // r1 = r1 - r3 - CF
+ // Compute (r1:r0) = (r1:r0) ^ (r2:r3)
+ OpRegReg(cu, kOpXor, r0, r2); // r0 = r0 ^ r2
+ OpRegReg(cu, kOpXor, r1, r3); // r1 = r1 ^ r3
RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
INVALID_SREG, INVALID_SREG};
StoreValueWide(cu, rl_dest, rl_result);
@@ -431,6 +431,7 @@
X86OpCode opcode = kX86Bkpt;
switch (op) {
case kOpCmp: opcode = kX86Cmp32RT; break;
+ case kOpMov: opcode = kX86Mov32RT; break;
default:
LOG(FATAL) << "Bad opcode: " << op;
break;
diff --git a/src/thread.cc b/src/thread.cc
index 72ceaf0..75d0468 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -1636,6 +1636,7 @@
DO_THREAD_OFFSET(state_and_flags_);
DO_THREAD_OFFSET(card_table_);
DO_THREAD_OFFSET(exception_);
+ DO_THREAD_OFFSET(opeer_);
DO_THREAD_OFFSET(jni_env_);
DO_THREAD_OFFSET(self_);
DO_THREAD_OFFSET(stack_end_);
diff --git a/src/thread.h b/src/thread.h
index 4c065c5..8b9c81d 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -442,6 +442,10 @@
return ThreadOffset(OFFSETOF_MEMBER(Thread, exception_));
}
+ static ThreadOffset PeerOffset() {
+ return ThreadOffset(OFFSETOF_MEMBER(Thread, opeer_));
+ }
+
static ThreadOffset ThinLockIdOffset() {
return ThreadOffset(OFFSETOF_MEMBER(Thread, thin_lock_id_));
}