ART: inline Math.Max/Min (float and double)

This implements the inlined version of Math.Max/Min intrinsics.

Change-Id: I2db8fa7603db3cdf01016ec26811a96f91b1e6ed
Signed-off-by: Alexei Zavjalov <alexei.zavjalov@intel.com>
Signed-off-by: Shou, Yixin <yixin.shou@intel.com>
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index ebe3f0a..efd9079 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -367,7 +367,11 @@
   EXT_0F_ENCODING_MAP(Ucomiss,   0x00, 0x2E, SETS_CCODES|REG_USE0),
   EXT_0F_ENCODING_MAP(Comisd,    0x66, 0x2F, SETS_CCODES|REG_USE0),
   EXT_0F_ENCODING_MAP(Comiss,    0x00, 0x2F, SETS_CCODES|REG_USE0),
+  EXT_0F_ENCODING_MAP(Orpd,      0x66, 0x56, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Orps,      0x00, 0x56, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Andpd,     0x66, 0x54, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Andps,     0x00, 0x54, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Xorpd,     0x66, 0x57, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Xorps,     0x00, 0x57, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Addsd,     0xF2, 0x58, REG_DEF0_USE0),
   EXT_0F_ENCODING_MAP(Addss,     0xF3, 0x58, REG_DEF0_USE0),
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index cf4521a..a67a5c8 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -173,6 +173,7 @@
   void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
   bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
   bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
+  bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double);
   bool GenInlinedSqrt(CallInfo* info);
   bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
   bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index fc65deb..62053fd 100755
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -705,4 +705,77 @@
   }
 }
 
+bool X86Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) {
+  if (is_double) {
+    RegLocation rl_src1 = LoadValueWide(info->args[0], kFPReg);
+    RegLocation rl_src2 = LoadValueWide(info->args[2], kFPReg);
+    RegLocation rl_dest = InlineTargetWide(info);
+    RegLocation rl_result = EvalLocWide(rl_dest, kFPReg, true);
+
+    // Avoid src2 corruption by OpRegCopyWide.
+    if (rl_result.reg == rl_src2.reg) {
+        std::swap(rl_src2.reg, rl_src1.reg);
+    }
+
+    OpRegCopyWide(rl_result.reg, rl_src1.reg);
+    NewLIR2(kX86UcomisdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
+    // If either arg is NaN, return NaN.
+    LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP);
+    // Min/Max branches.
+    LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB);
+    LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA);
+    // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0.
+    NewLIR2((is_min) ? kX86OrpdRR : kX86AndpdRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
+    LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0);
+    // Handle NaN.
+    branch_nan->target = NewLIR0(kPseudoTargetLabel);
+    LoadConstantWide(rl_result.reg, INT64_C(0x7ff8000000000000));
+    LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
+    // Handle Min/Max. Copy greater/lesser value from src2.
+    branch_cond1->target = NewLIR0(kPseudoTargetLabel);
+    OpRegCopyWide(rl_result.reg, rl_src2.reg);
+    // Right operand is already in result reg.
+    branch_cond2->target = NewLIR0(kPseudoTargetLabel);
+    // Exit.
+    branch_exit_nan->target = NewLIR0(kPseudoTargetLabel);
+    branch_exit_equal->target = NewLIR0(kPseudoTargetLabel);
+    StoreValueWide(rl_dest, rl_result);
+  } else {
+    RegLocation rl_src1 = LoadValue(info->args[0], kFPReg);
+    RegLocation rl_src2 = LoadValue(info->args[1], kFPReg);
+    RegLocation rl_dest = InlineTarget(info);
+    RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
+
+    // Avoid src2 corruption by OpRegCopyWide.
+    if (rl_result.reg == rl_src2.reg) {
+        std::swap(rl_src2.reg, rl_src1.reg);
+    }
+
+    OpRegCopy(rl_result.reg, rl_src1.reg);
+    NewLIR2(kX86UcomissRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
+    // If either arg is NaN, return NaN.
+    LIR* branch_nan = NewLIR2(kX86Jcc8, 0, kX86CondP);
+    // Min/Max branches.
+    LIR* branch_cond1 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondA : kX86CondB);
+    LIR* branch_cond2 = NewLIR2(kX86Jcc8, 0, (is_min) ? kX86CondB : kX86CondA);
+    // If equal, we need to resolve situations like min/max(0.0, -0.0) == -0.0/0.0.
+    NewLIR2((is_min) ? kX86OrpsRR : kX86AndpsRR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
+    LIR* branch_exit_equal = NewLIR1(kX86Jmp8, 0);
+    // Handle NaN.
+    branch_nan->target = NewLIR0(kPseudoTargetLabel);
+    LoadConstantNoClobber(rl_result.reg, 0x7fc00000);
+    LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
+    // Handle Min/Max. Copy greater/lesser value from src2.
+    branch_cond1->target = NewLIR0(kPseudoTargetLabel);
+    OpRegCopy(rl_result.reg, rl_src2.reg);
+    // Right operand is already in result reg.
+    branch_cond2->target = NewLIR0(kPseudoTargetLabel);
+    // Exit.
+    branch_exit_nan->target = NewLIR0(kPseudoTargetLabel);
+    branch_exit_equal->target = NewLIR0(kPseudoTargetLabel);
+    StoreValue(rl_dest, rl_result);
+  }
+  return true;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 047a65d..bae01d9 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -1050,6 +1050,7 @@
         ->IsIntrinsic(index, &method)) {
       switch (method.opcode) {
         case kIntrinsicAbsDouble:
+        case kIntrinsicMinMaxDouble:
           store_method_addr_ = true;
           break;
         default:
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 17f9b91..500c6b8 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -534,10 +534,14 @@
   Binary0fOpCode(kX86Ucomiss),  // unordered float compare
   Binary0fOpCode(kX86Comisd),   // double compare
   Binary0fOpCode(kX86Comiss),   // float compare
-  Binary0fOpCode(kX86Orps),     // or of floating point registers
-  Binary0fOpCode(kX86Xorps),    // xor of floating point registers
-  Binary0fOpCode(kX86Addsd),    // double add
-  Binary0fOpCode(kX86Addss),    // float add
+  Binary0fOpCode(kX86Orpd),     // double logical OR
+  Binary0fOpCode(kX86Orps),     // float logical OR
+  Binary0fOpCode(kX86Andpd),    // double logical AND
+  Binary0fOpCode(kX86Andps),    // float logical AND
+  Binary0fOpCode(kX86Xorpd),    // double logical XOR
+  Binary0fOpCode(kX86Xorps),    // float logical XOR
+  Binary0fOpCode(kX86Addsd),    // double ADD
+  Binary0fOpCode(kX86Addss),    // float ADD
   Binary0fOpCode(kX86Mulsd),    // double multiply
   Binary0fOpCode(kX86Mulss),    // float multiply
   Binary0fOpCode(kX86Cvtsd2ss),  // double to float