Support GenSelect for x86
kMirOpSelect is an extended MIR that has been generated in order
to remove trivial diamond shapes where the conditional is an
if-eqz or if-nez and on each of the paths there is a move or
const bytecode with same destination register.
This patch enables x86 to generate code for this extended MIR.
A) Handling the constant specialization of kMirOpSelect:
1) When the true case is zero and result_reg is not same as src_reg:
xor result_reg, result_reg
cmp $0, src_reg
mov t1, $false_case
cmovnz result_reg, t1
2) When the false case is zero and result_reg is not same as src_reg:
xor result_reg, result_reg
cmp $0, src_reg
mov t1, $true_case
cmovz result_reg, t1
3) All other cases (we do compare first to set eflags):
cmp $0, src_reg
mov result_reg, $true_case
mov t1, $false_case
cmovnz result_reg, t1
B) Handling the move specialization of kMirOpSelect:
1) When true case is already in place:
cmp $0, src_reg
cmovnz result_reg, false_reg
2) When false case is already in place:
cmp $0, src_reg
cmovz result_reg, true_reg
3) When neither cases are in place:
cmp $0, src_reg
mov result_reg, true_reg
cmovnz result_reg, false_reg
Change-Id: Ic7c50823208fe82019916476a0a77c6a271679fe
Signed-off-by: Razvan A Lupusoru <razvan.a.lupusoru@intel.com>
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 5c41520..0d53d4c 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -323,9 +323,10 @@
break;
}
// Is this the select pattern?
- // TODO: flesh out support for Mips and X86. NOTE: llvm's select op doesn't quite work here.
+ // TODO: flesh out support for Mips. NOTE: llvm's select op doesn't quite work here.
// TUNING: expand to support IF_xx compare & branches
- if (!(cu_->compiler_backend == kPortable) && (cu_->instruction_set == kThumb2) &&
+ if ((cu_->compiler_backend != kPortable) &&
+ (cu_->instruction_set == kThumb2 || cu_->instruction_set == kX86) &&
((mir->dalvikInsn.opcode == Instruction::IF_EQZ) ||
(mir->dalvikInsn.opcode == Instruction::IF_NEZ))) {
BasicBlock* ft = GetBasicBlock(bb->fall_through);
@@ -391,6 +392,11 @@
}
}
if (const_form) {
+ /*
+ * TODO: If both constants are the same value, then instead of generating
+ * a select, we should simply generate a const bytecode. This should be
+ * considered after inlining which can lead to CFG of this form.
+ */
// "true" set val in vB
mir->dalvikInsn.vB = if_true->dalvikInsn.vB;
// "false" set val in vC
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index b59ec5e..60b783d 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -796,7 +796,14 @@
virtual void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
bool is_double) = 0;
virtual void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) = 0;
+
+ /**
+ * @brief Lowers the kMirOpSelect MIR into LIR.
+ * @param bb The basic block in which the MIR is from.
+ * @param mir The MIR whose opcode is kMirOpSelect.
+ */
virtual void GenSelect(BasicBlock* bb, MIR* mir) = 0;
+
virtual void GenMemBarrier(MemBarrierKind barrier_kind) = 0;
virtual void GenMoveException(RegLocation rl_dest) = 0;
virtual void GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index ccae130..3f5f33c 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -180,7 +180,97 @@
}
void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
- UNIMPLEMENTED(FATAL) << "Need codegen for GenSelect";
+ RegLocation rl_result;
+ RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
+ RegLocation rl_dest = mir_graph_->GetDest(mir);
+ rl_src = LoadValue(rl_src, kCoreReg);
+
+ // The kMirOpSelect has two variants, one for constants and one for moves.
+ const bool is_constant_case = (mir->ssa_rep->num_uses == 1);
+
+ if (is_constant_case) {
+ int true_val = mir->dalvikInsn.vB;
+ int false_val = mir->dalvikInsn.vC;
+ rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+ /*
+ * 1) When the true case is zero and result_reg is not same as src_reg:
+ * xor result_reg, result_reg
+ * cmp $0, src_reg
+ * mov t1, $false_case
+ * cmovnz result_reg, t1
+ * 2) When the false case is zero and result_reg is not same as src_reg:
+ * xor result_reg, result_reg
+ * cmp $0, src_reg
+ * mov t1, $true_case
+ * cmovz result_reg, t1
+ * 3) All other cases (we do compare first to set eflags):
+ * cmp $0, src_reg
+ * mov result_reg, $true_case
+ * mov t1, $false_case
+ * cmovnz result_reg, t1
+ */
+ const bool result_reg_same_as_src = (rl_src.location == kLocPhysReg && rl_src.low_reg == rl_result.low_reg);
+ const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
+ const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src);
+ const bool catch_all_case = !(true_zero_case || false_zero_case);
+
+ if (true_zero_case || false_zero_case) {
+ OpRegReg(kOpXor, rl_result.low_reg, rl_result.low_reg);
+ }
+
+ if (true_zero_case || false_zero_case || catch_all_case) {
+ OpRegImm(kOpCmp, rl_src.low_reg, 0);
+ }
+
+ if (catch_all_case) {
+ OpRegImm(kOpMov, rl_result.low_reg, true_val);
+ }
+
+ if (true_zero_case || false_zero_case || catch_all_case) {
+ int immediateForTemp = false_zero_case ? true_val : false_val;
+ int temp1_reg = AllocTemp();
+ OpRegImm(kOpMov, temp1_reg, immediateForTemp);
+
+ ConditionCode cc = false_zero_case ? kCondEq : kCondNe;
+ OpCondRegReg(kOpCmov, cc, rl_result.low_reg, temp1_reg);
+
+ FreeTemp(temp1_reg);
+ }
+ } else {
+ RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
+ RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
+ rl_true = LoadValue(rl_true, kCoreReg);
+ rl_false = LoadValue(rl_false, kCoreReg);
+ rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+ /*
+ * 1) When true case is already in place:
+ * cmp $0, src_reg
+ * cmovnz result_reg, false_reg
+ * 2) When false case is already in place:
+ * cmp $0, src_reg
+ * cmovz result_reg, true_reg
+ * 3) When neither cases are in place:
+ * cmp $0, src_reg
+ * mov result_reg, true_reg
+ * cmovnz result_reg, false_reg
+ */
+
+ // kMirOpSelect is generated just for conditional cases when comparison is done with zero.
+ OpRegImm(kOpCmp, rl_src.low_reg, 0);
+
+ if (rl_result.low_reg == rl_true.low_reg) {
+ OpCondRegReg(kOpCmov, kCondNe, rl_result.low_reg, rl_false.low_reg);
+ } else if (rl_result.low_reg == rl_false.low_reg) {
+ OpCondRegReg(kOpCmov, kCondEq, rl_result.low_reg, rl_true.low_reg);
+ } else {
+ OpRegCopy(rl_result.low_reg, rl_true.low_reg);
+ OpCondRegReg(kOpCmov, kCondNe, rl_result.low_reg, rl_false.low_reg);
+ }
+ }
+
+ StoreValue(rl_dest, rl_result);
}
void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index a2c215c..0ef4034 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -141,7 +141,14 @@
case kOpSub: opcode = byte_imm ? kX86Sub32RI8 : kX86Sub32RI; break;
case kOpXor: opcode = byte_imm ? kX86Xor32RI8 : kX86Xor32RI; break;
case kOpCmp: opcode = byte_imm ? kX86Cmp32RI8 : kX86Cmp32RI; break;
- case kOpMov: return LoadConstantNoClobber(r_dest_src1, value);
+ case kOpMov:
+ /*
+ * Moving the constant zero into register can be specialized as an xor of the register.
+ * However, that sets eflags while the move does not. For that reason here, always do
+ * the move and if caller is flexible, they should be calling LoadConstantNoClobber instead.
+ */
+ opcode = kX86Mov32RI;
+ break;
case kOpMul:
opcode = byte_imm ? kX86Imul32RRI8 : kX86Imul32RRI;
return NewLIR3(opcode, r_dest_src1, r_dest_src1, value);