Support float & double negation in the optimizing compiler.

- Add support for the neg-float and neg-double Dex
  instructions in the optimizing compiler.
- Generate x86, x86-64 and ARM (but not ARM64) code for
  float and double HNeg nodes.
- Add related tests to test/415-optimizing-arith-neg.

Change-Id: I29739a86e13dbe6f64e191641d01637c867cba6c
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index e43841a..a4af900 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -896,6 +896,16 @@
       break;
     }
 
+    case Instruction::NEG_FLOAT: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimFloat);
+      break;
+    }
+
+    case Instruction::NEG_DOUBLE: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimDouble);
+      break;
+    }
+
     case Instruction::NOT_INT: {
       Unop_12x<HNot>(instruction, Primitive::kPrimInt);
       break;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index dd595d9..1e9fad0 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1189,7 +1189,8 @@
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
 
     default:
@@ -1229,8 +1230,14 @@
       break;
 
     case Primitive::kPrimFloat:
+      DCHECK(in.IsFpuRegister());
+      __ vnegs(out.As<SRegister>(), in.As<SRegister>());
+      break;
+
     case Primitive::kPrimDouble:
-      LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
+      DCHECK(in.IsFpuRegisterPair());
+      __ vnegd(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()),
+               FromLowSToD(in.AsFpuRegisterPairLow<SRegister>()));
       break;
 
     default:
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index b2d9187..42e6072 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1116,7 +1116,10 @@
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      // Output overlaps as we need a fresh (zero-initialized)
+      // register to perform subtraction from zero.
+      locations->SetOut(Location::RequiresFpuRegister());
       break;
 
     default:
@@ -1131,11 +1134,13 @@
   switch (neg->GetResultType()) {
     case Primitive::kPrimInt:
       DCHECK(in.IsRegister());
+      DCHECK(in.Equals(out));
       __ negl(out.As<Register>());
       break;
 
     case Primitive::kPrimLong:
       DCHECK(in.IsRegisterPair());
+      DCHECK(in.Equals(out));
       __ negl(out.AsRegisterPairLow<Register>());
       // Negation is similar to subtraction from zero.  The least
       // significant byte triggers a borrow when it is different from
@@ -1147,8 +1152,19 @@
       break;
 
     case Primitive::kPrimFloat:
+      DCHECK(!in.Equals(out));
+      // out = 0
+      __ xorps(out.As<XmmRegister>(), out.As<XmmRegister>());
+      // out = out - in
+      __ subss(out.As<XmmRegister>(), in.As<XmmRegister>());
+      break;
+
     case Primitive::kPrimDouble:
-      LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
+      DCHECK(!in.Equals(out));
+      // out = 0
+      __ xorpd(out.As<XmmRegister>(), out.As<XmmRegister>());
+      // out = out - in
+      __ subsd(out.As<XmmRegister>(), in.As<XmmRegister>());
       break;
 
     default:
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 2bd76c1..4401b9a 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1102,7 +1102,10 @@
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      // Output overlaps as we need a fresh (zero-initialized)
+      // register to perform subtraction from zero.
+      locations->SetOut(Location::RequiresFpuRegister());
       break;
 
     default:
@@ -1117,17 +1120,49 @@
   switch (neg->GetResultType()) {
     case Primitive::kPrimInt:
       DCHECK(in.IsRegister());
+      DCHECK(in.Equals(out));
       __ negl(out.As<CpuRegister>());
       break;
 
     case Primitive::kPrimLong:
       DCHECK(in.IsRegister());
+      DCHECK(in.Equals(out));
       __ negq(out.As<CpuRegister>());
       break;
 
     case Primitive::kPrimFloat:
+      DCHECK(in.IsFpuRegister());
+      DCHECK(out.IsFpuRegister());
+      DCHECK(!in.Equals(out));
+      // TODO: Instead of computing negation as a subtraction from
+      // zero, implement it with an exclusive or with value 0x80000000
+      // (mask for bit 31, representing the sign of a single-precision
+      // floating-point number), fetched from a constant pool:
+      //
+      //   xorps out, [RIP:...] // value at RIP is 0x80 00 00 00
+
+      // out = 0
+      __ xorps(out.As<XmmRegister>(), out.As<XmmRegister>());
+      // out = out - in
+      __ subss(out.As<XmmRegister>(), in.As<XmmRegister>());
+      break;
+
     case Primitive::kPrimDouble:
-      LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
+      DCHECK(in.IsFpuRegister());
+      DCHECK(out.IsFpuRegister());
+      DCHECK(!in.Equals(out));
+      // TODO: Instead of computing negation as a subtraction from
+      // zero, implement it with an exclusive or with value
+      // 0x8000000000000000 (mask for bit 63, representing the sign of
+      // a double-precision floating-point number), fetched from a
+      // constant pool:
+      //
+      //   xorpd out, [RIP:...] // value at RIP is 0x80 00 00 00 00 00 00 00
+
+      // out = 0
+      __ xorpd(out.As<XmmRegister>(), out.As<XmmRegister>());
+      // out = out - in
+      __ subsd(out.As<XmmRegister>(), in.As<XmmRegister>());
       break;
 
     default: