Support GenSelect for x86

kMirOpSelect is an extended MIR that has been generated in order
to remove trivial diamond shapes where the conditional is an
if-eqz or if-nez and on each of the paths there is a move or
const bytecode with same destination register.

This patch enables x86 to generate code for this extended MIR.

A) Handling the constant specialization of kMirOpSelect:
  1) When the true case is zero and result_reg is not same as src_reg:
      xor result_reg, result_reg
      cmp $0, src_reg
      mov t1, $false_case
      cmovnz result_reg, t1
  2) When the false case is zero and result_reg is not same as src_reg:
      xor result_reg, result_reg
      cmp $0, src_reg
      mov t1, $true_case
      cmovz result_reg, t1
  3) All other cases (we do compare first to set eflags):
      cmp $0, src_reg
      mov result_reg, $true_case
      mov t1, $false_case
      cmovnz result_reg, t1
B) Handling the move specialization of kMirOpSelect:
  1) When true case is already in place:
      cmp $0, src_reg
      cmovnz result_reg, false_reg
  2) When false case is already in place:
      cmp $0, src_reg
      cmovz result_reg, true_reg
  3) When neither cases are in place:
      cmp $0, src_reg
      mov result_reg, true_reg
      cmovnz result_reg, false_reg

Change-Id: Ic7c50823208fe82019916476a0a77c6a271679fe
Signed-off-by: Razvan A Lupusoru <razvan.a.lupusoru@intel.com>
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 5c41520..0d53d4c 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -323,9 +323,10 @@
           break;
       }
       // Is this the select pattern?
-      // TODO: flesh out support for Mips and X86.  NOTE: llvm's select op doesn't quite work here.
+      // TODO: flesh out support for Mips.  NOTE: llvm's select op doesn't quite work here.
       // TUNING: expand to support IF_xx compare & branches
-      if (!(cu_->compiler_backend == kPortable) && (cu_->instruction_set == kThumb2) &&
+      if ((cu_->compiler_backend != kPortable) &&
+          (cu_->instruction_set == kThumb2 || cu_->instruction_set == kX86) &&
           ((mir->dalvikInsn.opcode == Instruction::IF_EQZ) ||
           (mir->dalvikInsn.opcode == Instruction::IF_NEZ))) {
         BasicBlock* ft = GetBasicBlock(bb->fall_through);
@@ -391,6 +392,11 @@
                 }
               }
               if (const_form) {
+                /*
+                 * TODO: If both constants are the same value, then instead of generating
+                 * a select, we should simply generate a const bytecode. This should be
+                 * considered after inlining which can lead to CFG of this form.
+                 */
                 // "true" set val in vB
                 mir->dalvikInsn.vB = if_true->dalvikInsn.vB;
                 // "false" set val in vC
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index b59ec5e..60b783d 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -796,7 +796,14 @@
     virtual void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
                                      bool is_double) = 0;
     virtual void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) = 0;
+
+    /**
+     * @brief Lowers the kMirOpSelect MIR into LIR.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is kMirOpSelect.
+     */
     virtual void GenSelect(BasicBlock* bb, MIR* mir) = 0;
+
     virtual void GenMemBarrier(MemBarrierKind barrier_kind) = 0;
     virtual void GenMoveException(RegLocation rl_dest) = 0;
     virtual void GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index ccae130..3f5f33c 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -180,7 +180,97 @@
 }
 
 void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
-  UNIMPLEMENTED(FATAL) << "Need codegen for GenSelect";
+  RegLocation rl_result;
+  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
+  RegLocation rl_dest = mir_graph_->GetDest(mir);
+  rl_src = LoadValue(rl_src, kCoreReg);
+
+  // The kMirOpSelect has two variants, one for constants and one for moves.
+  const bool is_constant_case = (mir->ssa_rep->num_uses == 1);
+
+  if (is_constant_case) {
+    int true_val = mir->dalvikInsn.vB;
+    int false_val = mir->dalvikInsn.vC;
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+    /*
+     * 1) When the true case is zero and result_reg is not same as src_reg:
+     *     xor result_reg, result_reg
+     *     cmp $0, src_reg
+     *     mov t1, $false_case
+     *     cmovnz result_reg, t1
+     * 2) When the false case is zero and result_reg is not same as src_reg:
+     *     xor result_reg, result_reg
+     *     cmp $0, src_reg
+     *     mov t1, $true_case
+     *     cmovz result_reg, t1
+     * 3) All other cases (we do compare first to set eflags):
+     *     cmp $0, src_reg
+     *     mov result_reg, $true_case
+     *     mov t1, $false_case
+     *     cmovnz result_reg, t1
+     */
+    const bool result_reg_same_as_src = (rl_src.location == kLocPhysReg && rl_src.low_reg == rl_result.low_reg);
+    const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
+    const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src);
+    const bool catch_all_case = !(true_zero_case || false_zero_case);
+
+    if (true_zero_case || false_zero_case) {
+      OpRegReg(kOpXor, rl_result.low_reg, rl_result.low_reg);
+    }
+
+    if (true_zero_case || false_zero_case || catch_all_case) {
+      OpRegImm(kOpCmp, rl_src.low_reg, 0);
+    }
+
+    if (catch_all_case) {
+      OpRegImm(kOpMov, rl_result.low_reg, true_val);
+    }
+
+    if (true_zero_case || false_zero_case || catch_all_case) {
+      int immediateForTemp = false_zero_case ? true_val : false_val;
+      int temp1_reg = AllocTemp();
+      OpRegImm(kOpMov, temp1_reg, immediateForTemp);
+
+      ConditionCode cc = false_zero_case ? kCondEq : kCondNe;
+      OpCondRegReg(kOpCmov, cc, rl_result.low_reg, temp1_reg);
+
+      FreeTemp(temp1_reg);
+    }
+  } else {
+    RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
+    RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
+    rl_true = LoadValue(rl_true, kCoreReg);
+    rl_false = LoadValue(rl_false, kCoreReg);
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+    /*
+     * 1) When true case is already in place:
+     *     cmp $0, src_reg
+     *     cmovnz result_reg, false_reg
+     * 2) When false case is already in place:
+     *     cmp $0, src_reg
+     *     cmovz result_reg, true_reg
+     * 3) When neither cases are in place:
+     *     cmp $0, src_reg
+     *     mov result_reg, true_reg
+     *     cmovnz result_reg, false_reg
+     */
+
+    // kMirOpSelect is generated just for conditional cases when comparison is done with zero.
+    OpRegImm(kOpCmp, rl_src.low_reg, 0);
+
+    if (rl_result.low_reg == rl_true.low_reg) {
+      OpCondRegReg(kOpCmov, kCondNe, rl_result.low_reg, rl_false.low_reg);
+    } else if (rl_result.low_reg == rl_false.low_reg) {
+      OpCondRegReg(kOpCmov, kCondEq, rl_result.low_reg, rl_true.low_reg);
+    } else {
+      OpRegCopy(rl_result.low_reg, rl_true.low_reg);
+      OpCondRegReg(kOpCmov, kCondNe, rl_result.low_reg, rl_false.low_reg);
+    }
+  }
+
+  StoreValue(rl_dest, rl_result);
 }
 
 void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index a2c215c..0ef4034 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -141,7 +141,14 @@
     case kOpSub: opcode = byte_imm ? kX86Sub32RI8 : kX86Sub32RI; break;
     case kOpXor: opcode = byte_imm ? kX86Xor32RI8 : kX86Xor32RI; break;
     case kOpCmp: opcode = byte_imm ? kX86Cmp32RI8 : kX86Cmp32RI; break;
-    case kOpMov: return LoadConstantNoClobber(r_dest_src1, value);
+    case kOpMov:
+      /*
+       * Moving the constant zero into register can be specialized as an xor of the register.
+       * However, that sets eflags while the move does not. For that reason here, always do
+       * the move and if caller is flexible, they should be calling LoadConstantNoClobber instead.
+       */
+      opcode = kX86Mov32RI;
+      break;
     case kOpMul:
       opcode = byte_imm ? kX86Imul32RRI8 : kX86Imul32RRI;
       return NewLIR3(opcode, r_dest_src1, r_dest_src1, value);