Add support for floats and doubles.

- Follows Quick conventions.
- Currently only works with baseline register allocator.

Change-Id: Ie4b8e298f4f5e1cd82364da83e4344d4fc3621a3
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 5015bd0..5bcc65b 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -102,28 +102,19 @@
 
   uint32_t pos = 1;
   for (int i = 0; i < number_of_parameters; i++) {
-    switch (shorty[pos++]) {
-      case 'F':
-      case 'D': {
-        return false;
-      }
-
-      default: {
-        // integer and reference parameters.
-        HParameterValue* parameter =
-            new (arena_) HParameterValue(parameter_index++, Primitive::GetType(shorty[pos - 1]));
-        entry_block_->AddInstruction(parameter);
-        HLocal* local = GetLocalAt(locals_index++);
-        // Store the parameter value in the local that the dex code will use
-        // to reference that parameter.
-        entry_block_->AddInstruction(new (arena_) HStoreLocal(local, parameter));
-        if (parameter->GetType() == Primitive::kPrimLong) {
-          i++;
-          locals_index++;
-          parameter_index++;
-        }
-        break;
-      }
+    HParameterValue* parameter =
+        new (arena_) HParameterValue(parameter_index++, Primitive::GetType(shorty[pos++]));
+    entry_block_->AddInstruction(parameter);
+    HLocal* local = GetLocalAt(locals_index++);
+    // Store the parameter value in the local that the dex code will use
+    // to reference that parameter.
+    entry_block_->AddInstruction(new (arena_) HStoreLocal(local, parameter));
+    bool is_wide = (parameter->GetType() == Primitive::kPrimLong)
+        || (parameter->GetType() == Primitive::kPrimDouble);
+    if (is_wide) {
+      i++;
+      locals_index++;
+      parameter_index++;
     }
   }
   return true;
@@ -402,10 +393,8 @@
   uint32_t argument_index = start_index;
   for (size_t i = start_index; i < number_of_vreg_arguments; i++, argument_index++) {
     Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]);
-    if (!IsTypeSupported(type)) {
-      return false;
-    }
-    if (!is_range && type == Primitive::kPrimLong && args[i] + 1 != args[i + 1]) {
+    bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble);
+    if (!is_range && is_wide && args[i] + 1 != args[i + 1]) {
       LOG(WARNING) << "Non sequential register pair in " << dex_compilation_unit_->GetSymbol()
                    << " at " << dex_offset;
       // We do not implement non sequential register pair.
@@ -413,15 +402,11 @@
     }
     HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type);
     invoke->SetArgumentAt(argument_index, arg);
-    if (type == Primitive::kPrimLong) {
+    if (is_wide) {
       i++;
     }
   }
 
-  if (!IsTypeSupported(return_type)) {
-    return false;
-  }
-
   DCHECK_EQ(argument_index, number_of_arguments);
   current_block_->AddInstruction(invoke);
   return true;
@@ -648,17 +633,22 @@
     }
 
     case Instruction::RETURN: {
-      BuildReturn(instruction, Primitive::kPrimInt);
+      DCHECK_NE(return_type_, Primitive::kPrimNot);
+      DCHECK_NE(return_type_, Primitive::kPrimLong);
+      DCHECK_NE(return_type_, Primitive::kPrimDouble);
+      BuildReturn(instruction, return_type_);
       break;
     }
 
     case Instruction::RETURN_OBJECT: {
-      BuildReturn(instruction, Primitive::kPrimNot);
+      DCHECK(return_type_ == Primitive::kPrimNot);
+      BuildReturn(instruction, return_type_);
       break;
     }
 
     case Instruction::RETURN_WIDE: {
-      BuildReturn(instruction, Primitive::kPrimLong);
+      DCHECK(return_type_ == Primitive::kPrimDouble || return_type_ == Primitive::kPrimLong);
+      BuildReturn(instruction, return_type_);
       break;
     }
 
@@ -698,6 +688,16 @@
       break;
     }
 
+    case Instruction::ADD_DOUBLE: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimDouble);
+      break;
+    }
+
+    case Instruction::ADD_FLOAT: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimFloat);
+      break;
+    }
+
     case Instruction::SUB_INT: {
       Binop_23x<HSub>(instruction, Primitive::kPrimInt);
       break;
@@ -718,6 +718,16 @@
       break;
     }
 
+    case Instruction::ADD_DOUBLE_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimDouble);
+      break;
+    }
+
+    case Instruction::ADD_FLOAT_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimFloat);
+      break;
+    }
+
     case Instruction::SUB_INT_2ADDR: {
       Binop_12x<HSub>(instruction, Primitive::kPrimInt);
       break;
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index e143786..7d7b188 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_BUILDER_H_
 
 #include "dex_file.h"
+#include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "primitive.h"
@@ -32,9 +33,9 @@
 class HGraphBuilder : public ValueObject {
  public:
   HGraphBuilder(ArenaAllocator* arena,
-                DexCompilationUnit* dex_compilation_unit = nullptr,
-                const DexFile* dex_file = nullptr,
-                CompilerDriver* driver = nullptr)
+                DexCompilationUnit* dex_compilation_unit,
+                const DexFile* dex_file,
+                CompilerDriver* driver)
       : arena_(arena),
         branch_targets_(arena, 0),
         locals_(arena, 0),
@@ -46,7 +47,24 @@
         constant1_(nullptr),
         dex_file_(dex_file),
         dex_compilation_unit_(dex_compilation_unit),
-        compiler_driver_(driver) {}
+        compiler_driver_(driver),
+        return_type_(Primitive::GetType(dex_compilation_unit_->GetShorty()[0])) {}
+
+  // Only for unit testing.
+  HGraphBuilder(ArenaAllocator* arena, Primitive::Type return_type = Primitive::kPrimInt)
+      : arena_(arena),
+        branch_targets_(arena, 0),
+        locals_(arena, 0),
+        entry_block_(nullptr),
+        exit_block_(nullptr),
+        current_block_(nullptr),
+        graph_(nullptr),
+        constant0_(nullptr),
+        constant1_(nullptr),
+        dex_file_(nullptr),
+        dex_compilation_unit_(nullptr),
+        compiler_driver_(nullptr),
+        return_type_(return_type) {}
 
   HGraph* BuildGraph(const DexFile::CodeItem& code);
 
@@ -128,6 +146,7 @@
   const DexFile* const dex_file_;
   DexCompilationUnit* const dex_compilation_unit_;
   CompilerDriver* const compiler_driver_;
+  const Primitive::Type return_type_;
 
   DISALLOW_COPY_AND_ASSIGN(HGraphBuilder);
 };
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 2a9a7b3..e6fe067 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -189,6 +189,9 @@
       if (loc.GetPolicy() == Location::kRequiresRegister) {
         loc = Location::RegisterLocation(
             AllocateFreeRegister(input->GetType(), blocked_registers_));
+      } else if (loc.GetPolicy() == Location::kRequiresFpuRegister) {
+        loc = Location::FpuRegisterLocation(
+            AllocateFreeRegister(input->GetType(), blocked_registers_));
       } else {
         DCHECK_EQ(loc.GetPolicy(), Location::kAny);
         HLoadLocal* load = input->AsLoadLocal();
@@ -223,6 +226,10 @@
         result_location = Location::RegisterLocation(
             AllocateFreeRegister(instruction->GetType(), blocked_registers_));
         break;
+      case Location::kRequiresFpuRegister:
+        result_location = Location::FpuRegisterLocation(
+            AllocateFreeRegister(instruction->GetType(), blocked_registers_));
+        break;
       case Location::kSameAsFirstInput:
         result_location = locations->InAt(0);
         break;
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index b58f3b3..c7623fe 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -203,28 +203,42 @@
   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
 };
 
-template <typename T>
+template <typename C, typename F>
 class CallingConvention {
  public:
-  CallingConvention(const T* registers, int number_of_registers)
-      : registers_(registers), number_of_registers_(number_of_registers) {}
+  CallingConvention(const C* registers,
+                    size_t number_of_registers,
+                    const F* fpu_registers,
+                    size_t number_of_fpu_registers)
+      : registers_(registers),
+        number_of_registers_(number_of_registers),
+        fpu_registers_(fpu_registers),
+        number_of_fpu_registers_(number_of_fpu_registers) {}
 
   size_t GetNumberOfRegisters() const { return number_of_registers_; }
+  size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
 
-  T GetRegisterAt(size_t index) const {
+  C GetRegisterAt(size_t index) const {
     DCHECK_LT(index, number_of_registers_);
     return registers_[index];
   }
 
-  uint8_t GetStackOffsetOf(size_t index) const {
+  F GetFpuRegisterAt(size_t index) const {
+    DCHECK_LT(index, number_of_fpu_registers_);
+    return fpu_registers_[index];
+  }
+
+  size_t GetStackOffsetOf(size_t index) const {
     // We still reserve the space for parameters passed by registers.
     // Add one for the method pointer.
     return (index + 1) * kVRegSize;
   }
 
  private:
-  const T* registers_;
+  const C* registers_;
   const size_t number_of_registers_;
+  const F* fpu_registers_;
+  const size_t number_of_fpu_registers_;
 
   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
 };
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 9d875df..a01e19d 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -47,12 +47,16 @@
 static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2 };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
+static constexpr DRegister kRuntimeParameterFpuRegisters[] = { };
+static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
 
-class InvokeRuntimeCallingConvention : public CallingConvention<Register> {
+class InvokeRuntimeCallingConvention : public CallingConvention<Register, DRegister> {
  public:
   InvokeRuntimeCallingConvention()
       : CallingConvention(kRuntimeParameterCoreRegisters,
-                          kRuntimeParameterCoreRegistersLength) {}
+                          kRuntimeParameterCoreRegistersLength,
+                          kRuntimeParameterFpuRegisters,
+                          kRuntimeParameterFpuRegistersLength) {}
 
  private:
   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
@@ -218,6 +222,10 @@
   return blocked_registers + kNumberOfAllocIds;
 }
 
+static bool* GetBlockedDRegisters(bool* blocked_registers) {
+  return blocked_registers + kNumberOfCoreRegisters + kNumberOfSRegisters;
+}
+
 ManagedRegister CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type,
                                                        bool* blocked_registers) const {
   switch (type) {
@@ -262,8 +270,10 @@
     }
 
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      LOG(FATAL) << "Unimplemented register type " << type;
+    case Primitive::kPrimDouble: {
+      int reg = AllocateFreeRegisterInternal(GetBlockedDRegisters(blocked_registers), kNumberOfDRegisters);
+      return ArmManagedRegister::FromDRegister(static_cast<DRegister>(reg));
+    }
 
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unreachable type " << type;
@@ -274,6 +284,7 @@
 
 void CodeGeneratorARM::SetupBlockedRegisters(bool* blocked_registers) const {
   bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers);
+  bool* blocked_fpu_registers = GetBlockedDRegisters(blocked_registers);
 
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs[R1_R2] = true;
@@ -300,6 +311,15 @@
   blocked_registers[R8] = true;
   blocked_registers[R10] = true;
   blocked_registers[R11] = true;
+
+  blocked_fpu_registers[D8] = true;
+  blocked_fpu_registers[D9] = true;
+  blocked_fpu_registers[D10] = true;
+  blocked_fpu_registers[D11] = true;
+  blocked_fpu_registers[D12] = true;
+  blocked_fpu_registers[D13] = true;
+  blocked_fpu_registers[D14] = true;
+  blocked_fpu_registers[D15] = true;
 }
 
 size_t CodeGeneratorARM::GetNumberOfRegisters() const {
@@ -348,16 +368,14 @@
 Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const {
   switch (load->GetType()) {
     case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
       return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
       break;
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      LOG(FATAL) << "Unimplemented type " << load->GetType();
+      return Location::StackSlot(GetStackSlot(load->GetLocal()));
 
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -378,6 +396,7 @@
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
     case Primitive::kPrimNot: {
       uint32_t index = gp_index_++;
       if (index < calling_convention.GetNumberOfRegisters()) {
@@ -387,7 +406,8 @@
       }
     }
 
-    case Primitive::kPrimLong: {
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble: {
       uint32_t index = gp_index_;
       gp_index_ += 2;
       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
@@ -400,11 +420,6 @@
       }
     }
 
-    case Primitive::kPrimDouble:
-    case Primitive::kPrimFloat:
-      LOG(FATAL) << "Unimplemented parameter type " << type;
-      break;
-
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unexpected parameter type " << type;
       break;
@@ -419,14 +434,32 @@
   if (destination.IsRegister()) {
     if (source.IsRegister()) {
       __ Mov(destination.AsArm().AsCoreRegister(), source.AsArm().AsCoreRegister());
+    } else if (source.IsFpuRegister()) {
+      __ vmovrs(destination.AsArm().AsCoreRegister(),
+                source.AsArm().AsOverlappingDRegisterLow());
     } else {
       __ ldr(destination.AsArm().AsCoreRegister(), Address(SP, source.GetStackIndex()));
     }
+  } else if (destination.IsFpuRegister()) {
+    if (source.IsRegister()) {
+      __ vmovsr(destination.AsArm().AsOverlappingDRegisterLow(),
+                source.AsArm().AsCoreRegister());
+    } else if (source.IsFpuRegister()) {
+      __ vmovs(destination.AsArm().AsOverlappingDRegisterLow(),
+               source.AsArm().AsOverlappingDRegisterLow());
+    } else {
+      __ vldrs(destination.AsArm().AsOverlappingDRegisterLow(),
+               Address(SP, source.GetStackIndex()));
+    }
   } else {
     DCHECK(destination.IsStackSlot());
     if (source.IsRegister()) {
       __ str(source.AsArm().AsCoreRegister(), Address(SP, destination.GetStackIndex()));
+    } else if (source.IsFpuRegister()) {
+      __ vstrs(source.AsArm().AsOverlappingDRegisterLow(),
+               Address(SP, destination.GetStackIndex()));
     } else {
+      DCHECK(source.IsStackSlot());
       __ ldr(IP, Address(SP, source.GetStackIndex()));
       __ str(IP, Address(SP, destination.GetStackIndex()));
     }
@@ -441,6 +474,8 @@
     if (source.IsRegister()) {
       __ Mov(destination.AsArm().AsRegisterPairLow(), source.AsArm().AsRegisterPairLow());
       __ Mov(destination.AsArm().AsRegisterPairHigh(), source.AsArm().AsRegisterPairHigh());
+    } else if (source.IsFpuRegister()) {
+      LOG(FATAL) << "Unimplemented";
     } else if (source.IsQuickParameter()) {
       uint32_t argument_index = source.GetQuickParameterIndex();
       InvokeDexCallingConvention calling_convention;
@@ -458,6 +493,12 @@
                           SP, source.GetStackIndex());
       }
     }
+  } else if (destination.IsFpuRegister()) {
+    if (source.IsDoubleStackSlot()) {
+      __ vldrd(destination.AsArm().AsDRegister(), Address(SP, source.GetStackIndex()));
+    } else {
+      LOG(FATAL) << "Unimplemented";
+    }
   } else if (destination.IsQuickParameter()) {
     InvokeDexCallingConvention calling_convention;
     uint32_t argument_index = destination.GetQuickParameterIndex();
@@ -465,6 +506,8 @@
       __ Mov(calling_convention.GetRegisterAt(argument_index), source.AsArm().AsRegisterPairLow());
       __ str(source.AsArm().AsRegisterPairHigh(),
              Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1)));
+    } else if (source.IsFpuRegister()) {
+      LOG(FATAL) << "Unimplemented";
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ ldr(calling_convention.GetRegisterAt(argument_index), Address(SP, source.GetStackIndex()));
@@ -489,6 +532,8 @@
       __ ldr(R0,
              Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize()));
       __ str(R0, Address(SP, destination.GetHighStackIndex(kArmWordSize)));
+    } else if (source.IsFpuRegister()) {
+      __ vstrd(source.AsArm().AsDRegister(), Address(SP, destination.GetStackIndex()));
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ ldr(IP, Address(SP, source.GetStackIndex()));
@@ -535,15 +580,17 @@
       case Primitive::kPrimShort:
       case Primitive::kPrimInt:
       case Primitive::kPrimNot:
+      case Primitive::kPrimFloat:
         Move32(location, Location::StackSlot(stack_slot));
         break;
 
       case Primitive::kPrimLong:
+      case Primitive::kPrimDouble:
         Move64(location, Location::DoubleStackSlot(stack_slot));
         break;
 
       default:
-        LOG(FATAL) << "Unimplemented type " << instruction->GetType();
+        LOG(FATAL) << "Unexpected type " << instruction->GetType();
     }
   } else {
     DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
@@ -554,15 +601,17 @@
       case Primitive::kPrimShort:
       case Primitive::kPrimNot:
       case Primitive::kPrimInt:
+      case Primitive::kPrimFloat:
         Move32(location, locations->Out());
         break;
 
       case Primitive::kPrimLong:
+      case Primitive::kPrimDouble:
         Move64(location, locations->Out());
         break;
 
       default:
-        LOG(FATAL) << "Unimplemented type " << instruction->GetType();
+        LOG(FATAL) << "Unexpected type " << instruction->GetType();
     }
   }
 }
@@ -764,15 +813,17 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
+    case Primitive::kPrimFloat:
       locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
       break;
 
     case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
       locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
       break;
 
     default:
-      LOG(FATAL) << "Unimplemented local type " << store->InputAt(1)->GetType();
+      LOG(FATAL) << "Unexpected local type " << store->InputAt(1)->GetType();
   }
 }
 
@@ -816,10 +867,12 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
+    case Primitive::kPrimFloat:
       locations->SetInAt(0, ArmCoreLocation(R0));
       break;
 
     case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
       locations->SetInAt(
           0, Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R0_R1)));
       break;
@@ -838,10 +891,12 @@
       case Primitive::kPrimShort:
       case Primitive::kPrimInt:
       case Primitive::kPrimNot:
+      case Primitive::kPrimFloat:
         DCHECK_EQ(ret->GetLocations()->InAt(0).AsArm().AsCoreRegister(), R0);
         break;
 
       case Primitive::kPrimLong:
+      case Primitive::kPrimDouble:
         DCHECK_EQ(ret->GetLocations()->InAt(0).AsArm().AsRegisterPair(), R0_R1);
         break;
 
@@ -911,20 +966,17 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
+    case Primitive::kPrimFloat:
       locations->SetOut(ArmCoreLocation(R0));
       break;
 
     case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
       locations->SetOut(Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R0_R1)));
       break;
 
     case Primitive::kPrimVoid:
       break;
-
-    case Primitive::kPrimDouble:
-    case Primitive::kPrimFloat:
-      LOG(FATAL) << "Unimplemented return type " << invoke->GetType();
-      break;
   }
 }
 
@@ -967,15 +1019,16 @@
       break;
     }
 
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister());
       break;
+    }
 
     default:
-      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
   }
 }
 
@@ -1003,15 +1056,20 @@
              ShifterOperand(locations->InAt(1).AsArm().AsRegisterPairHigh()));
       break;
 
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+    case Primitive::kPrimFloat:
+      __ vadds(locations->Out().AsArm().AsOverlappingDRegisterLow(),
+               locations->InAt(0).AsArm().AsOverlappingDRegisterLow(),
+               locations->InAt(1).AsArm().AsOverlappingDRegisterLow());
+      break;
+
+    case Primitive::kPrimDouble:
+      __ vaddd(locations->Out().AsArm().AsDRegister(),
+               locations->InAt(0).AsArm().AsDRegister(),
+               locations->InAt(1).AsArm().AsDRegister());
       break;
 
     default:
-      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
   }
 }
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 8c86b7a..b5de8ed 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -32,11 +32,16 @@
 static constexpr Register kParameterCoreRegisters[] = { R1, R2, R3 };
 static constexpr RegisterPair kParameterCorePairRegisters[] = { R1_R2, R2_R3 };
 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+static constexpr DRegister kParameterFpuRegisters[] = { };
+static constexpr size_t kParameterFpuRegistersLength = 0;
 
-class InvokeDexCallingConvention : public CallingConvention<Register> {
+class InvokeDexCallingConvention : public CallingConvention<Register, DRegister> {
  public:
   InvokeDexCallingConvention()
-      : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength) {}
+      : CallingConvention(kParameterCoreRegisters,
+                          kParameterCoreRegistersLength,
+                          kParameterFpuRegisters,
+                          kParameterFpuRegistersLength) {}
 
   RegisterPair GetRegisterPairAt(size_t argument_index) {
     DCHECK_LT(argument_index + 1, GetNumberOfRegisters());
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 9fb4cc2..1c4b400 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -47,12 +47,16 @@
 static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
+static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { };
+static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
 
-class InvokeRuntimeCallingConvention : public CallingConvention<Register> {
+class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> {
  public:
   InvokeRuntimeCallingConvention()
       : CallingConvention(kRuntimeParameterCoreRegisters,
-                          kRuntimeParameterCoreRegistersLength) {}
+                          kRuntimeParameterCoreRegistersLength,
+                          kRuntimeParameterFpuRegisters,
+                          kRuntimeParameterFpuRegistersLength) {}
 
  private:
   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
@@ -193,6 +197,10 @@
   return blocked_registers + kNumberOfAllocIds;
 }
 
+static bool* GetBlockedXmmRegisters(bool* blocked_registers) {
+  return blocked_registers + kNumberOfCpuRegisters;
+}
+
 ManagedRegister CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type,
                                                        bool* blocked_registers) const {
   switch (type) {
@@ -238,8 +246,11 @@
     }
 
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      LOG(FATAL) << "Unimplemented register type " << type;
+    case Primitive::kPrimDouble: {
+      XmmRegister reg = static_cast<XmmRegister>(AllocateFreeRegisterInternal(
+          GetBlockedXmmRegisters(blocked_registers), kNumberOfXmmRegisters));
+      return X86ManagedRegister::FromXmmRegister(reg);
+    }
 
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unreachable type " << type;
@@ -316,16 +327,14 @@
 Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const {
   switch (load->GetType()) {
     case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
       return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
       break;
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      LOG(FATAL) << "Unimplemented type " << load->GetType();
+      return Location::StackSlot(GetStackSlot(load->GetLocal()));
 
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -346,6 +355,7 @@
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
     case Primitive::kPrimNot: {
       uint32_t index = gp_index_++;
       if (index < calling_convention.GetNumberOfRegisters()) {
@@ -355,7 +365,8 @@
       }
     }
 
-    case Primitive::kPrimLong: {
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble: {
       uint32_t index = gp_index_;
       gp_index_ += 2;
       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
@@ -368,11 +379,6 @@
       }
     }
 
-    case Primitive::kPrimDouble:
-    case Primitive::kPrimFloat:
-      LOG(FATAL) << "Unimplemented parameter type " << type;
-      break;
-
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unexpected parameter type " << type;
       break;
@@ -387,13 +393,27 @@
   if (destination.IsRegister()) {
     if (source.IsRegister()) {
       __ movl(destination.AsX86().AsCpuRegister(), source.AsX86().AsCpuRegister());
+    } else if (source.IsFpuRegister()) {
+      __ movd(destination.AsX86().AsCpuRegister(), source.AsX86().AsXmmRegister());
     } else {
       DCHECK(source.IsStackSlot());
       __ movl(destination.AsX86().AsCpuRegister(), Address(ESP, source.GetStackIndex()));
     }
+  } else if (destination.IsFpuRegister()) {
+    if (source.IsRegister()) {
+      __ movd(destination.AsX86().AsXmmRegister(), source.AsX86().AsCpuRegister());
+    } else if (source.IsFpuRegister()) {
+      __ movaps(destination.AsX86().AsXmmRegister(), source.AsX86().AsXmmRegister());
+    } else {
+      DCHECK(source.IsStackSlot());
+      __ movss(destination.AsX86().AsXmmRegister(), Address(ESP, source.GetStackIndex()));
+    }
   } else {
+    DCHECK(destination.IsStackSlot());
     if (source.IsRegister()) {
       __ movl(Address(ESP, destination.GetStackIndex()), source.AsX86().AsCpuRegister());
+    } else if (source.IsFpuRegister()) {
+      __ movss(Address(ESP, destination.GetStackIndex()), source.AsX86().AsXmmRegister());
     } else {
       DCHECK(source.IsStackSlot());
       __ pushl(Address(ESP, source.GetStackIndex()));
@@ -410,6 +430,8 @@
     if (source.IsRegister()) {
       __ movl(destination.AsX86().AsRegisterPairLow(), source.AsX86().AsRegisterPairLow());
       __ movl(destination.AsX86().AsRegisterPairHigh(), source.AsX86().AsRegisterPairHigh());
+    } else if (source.IsFpuRegister()) {
+      LOG(FATAL) << "Unimplemented";
     } else if (source.IsQuickParameter()) {
       uint32_t argument_index = source.GetQuickParameterIndex();
       InvokeDexCallingConvention calling_convention;
@@ -430,6 +452,8 @@
       __ movl(calling_convention.GetRegisterAt(argument_index), source.AsX86().AsRegisterPairLow());
       __ movl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1)),
               source.AsX86().AsRegisterPairHigh());
+    } else if (source.IsFpuRegister()) {
+      LOG(FATAL) << "Unimplemented";
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ movl(calling_convention.GetRegisterAt(argument_index),
@@ -437,6 +461,12 @@
       __ pushl(Address(ESP, source.GetHighStackIndex(kX86WordSize)));
       __ popl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1)));
     }
+  } else if (destination.IsFpuRegister()) {
+    if (source.IsDoubleStackSlot()) {
+      __ movsd(destination.AsX86().AsXmmRegister(), Address(ESP, source.GetStackIndex()));
+    } else {
+      LOG(FATAL) << "Unimplemented";
+    }
   } else {
     DCHECK(destination.IsDoubleStackSlot());
     if (source.IsRegister()) {
@@ -448,9 +478,10 @@
       uint32_t argument_index = source.GetQuickParameterIndex();
       __ movl(Address(ESP, destination.GetStackIndex()),
               calling_convention.GetRegisterAt(argument_index));
-      __ pushl(Address(ESP,
-          calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize()));
-      __ popl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)));
+      DCHECK_EQ(calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize(),
+                static_cast<size_t>(destination.GetHighStackIndex(kX86WordSize)));
+    } else if (source.IsFpuRegister()) {
+      __ movsd(Address(ESP, destination.GetStackIndex()), source.AsX86().AsXmmRegister());
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ pushl(Address(ESP, source.GetStackIndex()));
@@ -479,6 +510,7 @@
       __ movl(Address(ESP, location.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value)));
     }
   } else if (instruction->AsLoadLocal() != nullptr) {
+    int slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
     switch (instruction->GetType()) {
       case Primitive::kPrimBoolean:
       case Primitive::kPrimByte:
@@ -486,12 +518,13 @@
       case Primitive::kPrimShort:
       case Primitive::kPrimInt:
       case Primitive::kPrimNot:
-        Move32(location, Location::StackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
+      case Primitive::kPrimFloat:
+        Move32(location, Location::StackSlot(slot));
         break;
 
       case Primitive::kPrimLong:
-        Move64(location, Location::DoubleStackSlot(
-            GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
+      case Primitive::kPrimDouble:
+        Move64(location, Location::DoubleStackSlot(slot));
         break;
 
       default:
@@ -506,15 +539,17 @@
       case Primitive::kPrimShort:
       case Primitive::kPrimInt:
       case Primitive::kPrimNot:
+      case Primitive::kPrimFloat:
         Move32(location, instruction->GetLocations()->Out());
         break;
 
       case Primitive::kPrimLong:
+      case Primitive::kPrimDouble:
         Move64(location, instruction->GetLocations()->Out());
         break;
 
       default:
-        LOG(FATAL) << "Unimplemented type " << instruction->GetType();
+        LOG(FATAL) << "Unexpected type " << instruction->GetType();
     }
   }
 }
@@ -631,15 +666,17 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
+    case Primitive::kPrimFloat:
       locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
       break;
 
     case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
       locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
       break;
 
     default:
-      LOG(FATAL) << "Unimplemented local type " << store->InputAt(1)->GetType();
+      LOG(FATAL) << "Unknown local type " << store->InputAt(1)->GetType();
   }
   store->SetLocations(locations);
 }
@@ -772,8 +809,14 @@
           0, Location::RegisterLocation(X86ManagedRegister::FromRegisterPair(EAX_EDX)));
       break;
 
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(
+          0, Location::FpuRegisterLocation(X86ManagedRegister::FromXmmRegister(XMM0)));
+      break;
+
     default:
-      LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType();
+      LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
   }
 }
 
@@ -793,8 +836,13 @@
         DCHECK_EQ(ret->GetLocations()->InAt(0).AsX86().AsRegisterPair(), EAX_EDX);
         break;
 
+      case Primitive::kPrimFloat:
+      case Primitive::kPrimDouble:
+        DCHECK_EQ(ret->GetLocations()->InAt(0).AsX86().AsXmmRegister(), XMM0);
+        break;
+
       default:
-        LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType();
+        LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
     }
   }
   codegen_->GenerateFrameExit();
@@ -865,7 +913,7 @@
 
     case Primitive::kPrimDouble:
     case Primitive::kPrimFloat:
-      LOG(FATAL) << "Unimplemented return type " << invoke->GetType();
+      locations->SetOut(Location::FpuRegisterLocation(X86ManagedRegister::FromXmmRegister(XMM0)));
       break;
   }
 
@@ -907,64 +955,74 @@
       break;
     }
 
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
+      locations->SetOut(Location::SameAsFirstInput());
       break;
+    }
 
     default:
-      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+      break;
   }
 }
 
 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
   LocationSummary* locations = add->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
   switch (add->GetResultType()) {
     case Primitive::kPrimInt: {
-      DCHECK_EQ(locations->InAt(0).AsX86().AsCpuRegister(),
-                locations->Out().AsX86().AsCpuRegister());
-      if (locations->InAt(1).IsRegister()) {
-        __ addl(locations->InAt(0).AsX86().AsCpuRegister(),
-                locations->InAt(1).AsX86().AsCpuRegister());
-      } else if (locations->InAt(1).IsConstant()) {
-        HConstant* instruction = locations->InAt(1).GetConstant();
+      DCHECK_EQ(first.AsX86().AsCpuRegister(), locations->Out().AsX86().AsCpuRegister());
+      if (second.IsRegister()) {
+        __ addl(first.AsX86().AsCpuRegister(), second.AsX86().AsCpuRegister());
+      } else if (second.IsConstant()) {
+        HConstant* instruction = second.GetConstant();
         Immediate imm(instruction->AsIntConstant()->GetValue());
-        __ addl(locations->InAt(0).AsX86().AsCpuRegister(), imm);
+        __ addl(first.AsX86().AsCpuRegister(), imm);
       } else {
-        __ addl(locations->InAt(0).AsX86().AsCpuRegister(),
-                Address(ESP, locations->InAt(1).GetStackIndex()));
+        __ addl(first.AsX86().AsCpuRegister(), Address(ESP, second.GetStackIndex()));
       }
       break;
     }
 
     case Primitive::kPrimLong: {
-      DCHECK_EQ(locations->InAt(0).AsX86().AsRegisterPair(),
+      DCHECK_EQ(first.AsX86().AsRegisterPair(),
                 locations->Out().AsX86().AsRegisterPair());
-      if (locations->InAt(1).IsRegister()) {
-        __ addl(locations->InAt(0).AsX86().AsRegisterPairLow(),
-                locations->InAt(1).AsX86().AsRegisterPairLow());
-        __ adcl(locations->InAt(0).AsX86().AsRegisterPairHigh(),
-                locations->InAt(1).AsX86().AsRegisterPairHigh());
+      if (second.IsRegister()) {
+        __ addl(first.AsX86().AsRegisterPairLow(), second.AsX86().AsRegisterPairLow());
+        __ adcl(first.AsX86().AsRegisterPairHigh(), second.AsX86().AsRegisterPairHigh());
       } else {
-        __ addl(locations->InAt(0).AsX86().AsRegisterPairLow(),
-                Address(ESP, locations->InAt(1).GetStackIndex()));
-        __ adcl(locations->InAt(0).AsX86().AsRegisterPairHigh(),
-                Address(ESP, locations->InAt(1).GetHighStackIndex(kX86WordSize)));
+        __ addl(first.AsX86().AsRegisterPairLow(), Address(ESP, second.GetStackIndex()));
+        __ adcl(first.AsX86().AsRegisterPairHigh(),
+                Address(ESP, second.GetHighStackIndex(kX86WordSize)));
       }
       break;
     }
 
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+    case Primitive::kPrimFloat: {
+      if (second.IsFpuRegister()) {
+        __ addss(first.AsX86().AsXmmRegister(), second.AsX86().AsXmmRegister());
+      } else {
+        __ addss(first.AsX86().AsXmmRegister(), Address(ESP, second.GetStackIndex()));
+      }
       break;
+    }
+
+    case Primitive::kPrimDouble: {
+      if (second.IsFpuRegister()) {
+        __ addsd(first.AsX86().AsXmmRegister(), second.AsX86().AsXmmRegister());
+      } else {
+        __ addsd(first.AsX86().AsXmmRegister(), Address(ESP, second.GetStackIndex()));
+      }
+      break;
+    }
 
     default:
-      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
   }
 }
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 23145bf..3e2ca90 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -32,11 +32,16 @@
 static constexpr Register kParameterCoreRegisters[] = { ECX, EDX, EBX };
 static constexpr RegisterPair kParameterCorePairRegisters[] = { ECX_EDX, EDX_EBX };
 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+static constexpr XmmRegister kParameterFpuRegisters[] = { };
+static constexpr size_t kParameterFpuRegistersLength = 0;
 
-class InvokeDexCallingConvention : public CallingConvention<Register> {
+class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegister> {
  public:
-  InvokeDexCallingConvention()
-      : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength) {}
+  InvokeDexCallingConvention() : CallingConvention(
+      kParameterCoreRegisters,
+      kParameterCoreRegistersLength,
+      kParameterFpuRegisters,
+      kParameterFpuRegistersLength) {}
 
   RegisterPair GetRegisterPairAt(size_t argument_index) {
     DCHECK_LT(argument_index + 1, GetNumberOfRegisters());
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index a4efe4f..cbf0630 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -51,12 +51,16 @@
 static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
+static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { };
+static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
 
-class InvokeRuntimeCallingConvention : public CallingConvention<Register> {
+class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> {
  public:
   InvokeRuntimeCallingConvention()
       : CallingConvention(kRuntimeParameterCoreRegisters,
-                          kRuntimeParameterCoreRegistersLength) {}
+                          kRuntimeParameterCoreRegistersLength,
+                          kRuntimeParameterFpuRegisters,
+                          kRuntimeParameterFpuRegistersLength) {}
 
  private:
   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
@@ -219,8 +223,11 @@
     }
 
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      LOG(FATAL) << "Unimplemented register type " << type;
+    case Primitive::kPrimDouble: {
+      size_t reg = AllocateFreeRegisterInternal(
+          blocked_registers + kNumberOfCpuRegisters, kNumberOfFloatRegisters);
+      return X86_64ManagedRegister::FromXmmRegister(static_cast<FloatRegister>(reg));
+    }
 
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unreachable type " << type;
@@ -243,6 +250,12 @@
   blocked_registers[R13] = true;
   blocked_registers[R14] = true;
   blocked_registers[R15] = true;
+
+  bool* blocked_xmm_registers = blocked_registers + kNumberOfCpuRegisters;
+  blocked_xmm_registers[XMM12] = true;
+  blocked_xmm_registers[XMM13] = true;
+  blocked_xmm_registers[XMM14] = true;
+  blocked_xmm_registers[XMM15] = true;
 }
 
 void CodeGeneratorX86_64::GenerateFrameEntry() {
@@ -291,16 +304,14 @@
 Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const {
   switch (load->GetType()) {
     case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
       return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
       break;
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      LOG(FATAL) << "Unimplemented type " << load->GetType();
+      return Location::StackSlot(GetStackSlot(load->GetLocal()));
 
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -321,15 +332,36 @@
   if (destination.IsRegister()) {
     if (source.IsRegister()) {
       __ movq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
+    } else if (source.IsFpuRegister()) {
+      __ movd(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsXmmRegister());
     } else if (source.IsStackSlot()) {
-      __ movl(destination.AsX86_64().AsCpuRegister(), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movl(destination.AsX86_64().AsCpuRegister(),
+              Address(CpuRegister(RSP), source.GetStackIndex()));
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ movq(destination.AsX86_64().AsCpuRegister(), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movq(destination.AsX86_64().AsCpuRegister(),
+              Address(CpuRegister(RSP), source.GetStackIndex()));
+    }
+  } else if (destination.IsFpuRegister()) {
+    if (source.IsRegister()) {
+      __ movd(destination.AsX86_64().AsXmmRegister(), source.AsX86_64().AsCpuRegister());
+    } else if (source.IsFpuRegister()) {
+      __ movaps(destination.AsX86_64().AsXmmRegister(), source.AsX86_64().AsXmmRegister());
+    } else if (source.IsStackSlot()) {
+      __ movss(destination.AsX86_64().AsXmmRegister(),
+              Address(CpuRegister(RSP), source.GetStackIndex()));
+    } else {
+      DCHECK(source.IsDoubleStackSlot());
+      __ movsd(destination.AsX86_64().AsXmmRegister(),
+               Address(CpuRegister(RSP), source.GetStackIndex()));
     }
   } else if (destination.IsStackSlot()) {
     if (source.IsRegister()) {
-      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsX86_64().AsCpuRegister());
+      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
+              source.AsX86_64().AsCpuRegister());
+    } else if (source.IsFpuRegister()) {
+      __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
+               source.AsX86_64().AsXmmRegister());
     } else {
       DCHECK(source.IsStackSlot());
       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
@@ -338,7 +370,11 @@
   } else {
     DCHECK(destination.IsDoubleStackSlot());
     if (source.IsRegister()) {
-      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsX86_64().AsCpuRegister());
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
+              source.AsX86_64().AsCpuRegister());
+    } else if (source.IsFpuRegister()) {
+      __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
+               source.AsX86_64().AsXmmRegister());
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
@@ -373,15 +409,17 @@
       case Primitive::kPrimShort:
       case Primitive::kPrimInt:
       case Primitive::kPrimNot:
+      case Primitive::kPrimFloat:
         Move(location, Location::StackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
         break;
 
       case Primitive::kPrimLong:
+      case Primitive::kPrimDouble:
         Move(location, Location::DoubleStackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
         break;
 
       default:
-        LOG(FATAL) << "Unimplemented local type " << instruction->GetType();
+        LOG(FATAL) << "Unexpected local type " << instruction->GetType();
     }
   } else {
     DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
@@ -393,11 +431,13 @@
       case Primitive::kPrimInt:
       case Primitive::kPrimNot:
       case Primitive::kPrimLong:
+      case Primitive::kPrimFloat:
+      case Primitive::kPrimDouble:
         Move(location, instruction->GetLocations()->Out());
         break;
 
       default:
-        LOG(FATAL) << "Unimplemented type " << instruction->GetType();
+        LOG(FATAL) << "Unexpected type " << instruction->GetType();
     }
   }
 }
@@ -512,15 +552,17 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
+    case Primitive::kPrimFloat:
       locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
       break;
 
     case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
       locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
       break;
 
     default:
-      LOG(FATAL) << "Unimplemented local type " << store->InputAt(1)->GetType();
+      LOG(FATAL) << "Unexpected local type " << store->InputAt(1)->GetType();
   }
 }
 
@@ -679,8 +721,14 @@
       locations->SetInAt(0, X86_64CpuLocation(RAX));
       break;
 
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0,
+          Location::FpuRegisterLocation(X86_64ManagedRegister::FromXmmRegister(XMM0)));
+      break;
+
     default:
-      LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType();
+      LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
   }
 }
 
@@ -697,8 +745,14 @@
         DCHECK_EQ(ret->GetLocations()->InAt(0).AsX86_64().AsCpuRegister().AsRegister(), RAX);
         break;
 
+      case Primitive::kPrimFloat:
+      case Primitive::kPrimDouble:
+        DCHECK_EQ(ret->GetLocations()->InAt(0).AsX86_64().AsXmmRegister().AsFloatRegister(),
+                  XMM0);
+        break;
+
       default:
-        LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType();
+        LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
     }
   }
   codegen_->GenerateFrameExit();
@@ -734,10 +788,27 @@
       }
     }
 
-    case Primitive::kPrimDouble:
-    case Primitive::kPrimFloat:
-      LOG(FATAL) << "Unimplemented parameter type " << type;
-      break;
+    case Primitive::kPrimFloat: {
+      uint32_t index = fp_index_++;
+      stack_index_++;
+      if (index < calling_convention.GetNumberOfFpuRegisters()) {
+        return Location::FpuRegisterLocation(X86_64ManagedRegister::FromXmmRegister(
+            calling_convention.GetFpuRegisterAt(index)));
+      } else {
+        return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
+      }
+    }
+
+    case Primitive::kPrimDouble: {
+      uint32_t index = fp_index_++;
+      stack_index_ += 2;
+      if (index < calling_convention.GetNumberOfFpuRegisters()) {
+        return Location::FpuRegisterLocation(X86_64ManagedRegister::FromXmmRegister(
+            calling_convention.GetFpuRegisterAt(index)));
+      } else {
+        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
+      }
+    }
 
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unexpected parameter type " << type;
@@ -807,7 +878,8 @@
 
     case Primitive::kPrimDouble:
     case Primitive::kPrimFloat:
-      LOG(FATAL) << "Unimplemented return type " << invoke->GetType();
+      locations->SetOut(
+          Location::FpuRegisterLocation(X86_64ManagedRegister::FromXmmRegister(XMM0)));
       break;
   }
 }
@@ -845,6 +917,7 @@
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
+
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RequiresRegister());
@@ -852,52 +925,67 @@
       break;
     }
 
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+    case Primitive::kPrimDouble:
+    case Primitive::kPrimFloat: {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
+      locations->SetOut(Location::SameAsFirstInput());
       break;
+    }
 
     default:
-      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
   LocationSummary* locations = add->GetLocations();
-  DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(),
-            locations->Out().AsX86_64().AsCpuRegister().AsRegister());
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
+  DCHECK(first.Equals(locations->Out()));
   switch (add->GetResultType()) {
     case Primitive::kPrimInt: {
-      if (locations->InAt(1).IsRegister()) {
-        __ addl(locations->InAt(0).AsX86_64().AsCpuRegister(),
-                locations->InAt(1).AsX86_64().AsCpuRegister());
-      } else if (locations->InAt(1).IsConstant()) {
-        HConstant* instruction = locations->InAt(1).GetConstant();
+      if (second.IsRegister()) {
+        __ addl(first.AsX86_64().AsCpuRegister(), second.AsX86_64().AsCpuRegister());
+      } else if (second.IsConstant()) {
+        HConstant* instruction = second.GetConstant();
         Immediate imm(instruction->AsIntConstant()->GetValue());
-        __ addl(locations->InAt(0).AsX86_64().AsCpuRegister(), imm);
+        __ addl(first.AsX86_64().AsCpuRegister(), imm);
       } else {
-        __ addl(locations->InAt(0).AsX86_64().AsCpuRegister(),
-                Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex()));
+        __ addl(first.AsX86_64().AsCpuRegister(),
+                Address(CpuRegister(RSP), second.GetStackIndex()));
       }
       break;
     }
+
     case Primitive::kPrimLong: {
-      __ addq(locations->InAt(0).AsX86_64().AsCpuRegister(),
-              locations->InAt(1).AsX86_64().AsCpuRegister());
+      __ addq(first.AsX86_64().AsCpuRegister(), second.AsX86_64().AsCpuRegister());
       break;
     }
 
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+    case Primitive::kPrimFloat: {
+      if (second.IsFpuRegister()) {
+        __ addss(first.AsX86_64().AsXmmRegister(), second.AsX86_64().AsXmmRegister());
+      } else {
+        __ addss(first.AsX86_64().AsXmmRegister(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
+    }
+
+    case Primitive::kPrimDouble: {
+      if (second.IsFpuRegister()) {
+        __ addsd(first.AsX86_64().AsXmmRegister(), second.AsX86_64().AsXmmRegister());
+      } else {
+        __ addsd(first.AsX86_64().AsXmmRegister(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
+      break;
+    }
 
     default:
-      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
   }
 }
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index a299cf6..c81f785 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -28,13 +28,19 @@
 static constexpr size_t kX86_64WordSize = 8;
 
 static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 };
+static constexpr FloatRegister kParameterFloatRegisters[] =
+    { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 };
 
 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters);
 
-class InvokeDexCallingConvention : public CallingConvention<Register> {
+class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> {
  public:
-  InvokeDexCallingConvention()
-      : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength) {}
+  InvokeDexCallingConvention() : CallingConvention(
+      kParameterCoreRegisters,
+      kParameterCoreRegistersLength,
+      kParameterFloatRegisters,
+      kParameterFloatRegistersLength) {}
 
  private:
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
@@ -42,13 +48,17 @@
 
 class InvokeDexCallingConventionVisitor {
  public:
-  InvokeDexCallingConventionVisitor() : gp_index_(0), stack_index_(0) {}
+  InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {}
 
   Location GetNextLocation(Primitive::Type type);
 
  private:
   InvokeDexCallingConvention calling_convention;
+  // The current index for cpu registers.
   uint32_t gp_index_;
+  // The current index for fpu registers.
+  uint32_t fp_index_;
+  // The current stack index.
   uint32_t stack_index_;
 
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
diff --git a/compiler/optimizing/constant_propagation_test.cc b/compiler/optimizing/constant_propagation_test.cc
index 5c8c709..d08d14d 100644
--- a/compiler/optimizing/constant_propagation_test.cc
+++ b/compiler/optimizing/constant_propagation_test.cc
@@ -27,10 +27,11 @@
 static void TestCode(const uint16_t* data,
                      const std::string& expected_before,
                      const std::string& expected_after_cp,
-                     const std::string& expected_after_dce) {
+                     const std::string& expected_after_dce,
+                     Primitive::Type return_type = Primitive::kPrimInt) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateCFG(&allocator, data);
+  HGraph* graph = CreateCFG(&allocator, data, return_type);
   ASSERT_NE(graph, nullptr);
 
   graph->BuildDominatorTree();
@@ -279,7 +280,7 @@
   };
   std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff);
 
-  TestCode(data, expected_before, expected_after_cp, expected_after_dce);
+  TestCode(data, expected_before, expected_after_cp, expected_after_dce, Primitive::kPrimLong);
 }
 
 /**
@@ -330,7 +331,7 @@
   };
   std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff);
 
-  TestCode(data, expected_before, expected_after_cp, expected_after_dce);
+  TestCode(data, expected_before, expected_after_cp, expected_after_dce, Primitive::kPrimLong);
 }
 
 /**
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 0e77deb..5f85b6a 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -39,31 +39,39 @@
   enum Kind {
     kInvalid = 0,
     kConstant = 1,
-    kStackSlot = 2,  // Word size slot.
+    kStackSlot = 2,  // 32bit stack slot.
     kDoubleStackSlot = 3,  // 64bit stack slot.
-    kRegister = 4,
+
+    kRegister = 4,  // Core register.
+
+    // We do not use the value 5 because it conflicts with kLocationConstantMask.
+    kDoNotUse = 5,
+
+    kFpuRegister = 6,  // Floating point processor.
+
     // On 32bits architectures, quick can pass a long where the
     // low bits are in the last parameter register, and the high
     // bits are in a stack slot. The kQuickParameter kind is for
     // handling this special case.
-    kQuickParameter = 6,
+    kQuickParameter = 7,
 
     // Unallocated location represents a location that is not fixed and can be
     // allocated by a register allocator.  Each unallocated location has
     // a policy that specifies what kind of location is suitable. Payload
     // contains register allocation policy.
-    kUnallocated = 7,
+    kUnallocated = 8,
   };
 
   Location() : value_(kInvalid) {
-    // Verify that non-tagged location kinds do not interfere with kConstantTag.
-    COMPILE_ASSERT((kInvalid & kLocationTagMask) != kConstant, TagError);
-    COMPILE_ASSERT((kUnallocated & kLocationTagMask) != kConstant, TagError);
-    COMPILE_ASSERT((kStackSlot & kLocationTagMask) != kConstant, TagError);
-    COMPILE_ASSERT((kDoubleStackSlot & kLocationTagMask) != kConstant, TagError);
-    COMPILE_ASSERT((kRegister & kLocationTagMask) != kConstant, TagError);
-    COMPILE_ASSERT((kQuickParameter & kLocationTagMask) != kConstant, TagError);
-    COMPILE_ASSERT((kConstant & kLocationTagMask) == kConstant, TagError);
+    // Verify that non-constant location kinds do not interfere with kConstant.
+    COMPILE_ASSERT((kInvalid & kLocationConstantMask) != kConstant, TagError);
+    COMPILE_ASSERT((kUnallocated & kLocationConstantMask) != kConstant, TagError);
+    COMPILE_ASSERT((kStackSlot & kLocationConstantMask) != kConstant, TagError);
+    COMPILE_ASSERT((kDoubleStackSlot & kLocationConstantMask) != kConstant, TagError);
+    COMPILE_ASSERT((kRegister & kLocationConstantMask) != kConstant, TagError);
+    COMPILE_ASSERT((kQuickParameter & kLocationConstantMask) != kConstant, TagError);
+    COMPILE_ASSERT((kFpuRegister & kLocationConstantMask) != kConstant, TagError);
+    COMPILE_ASSERT((kConstant & kLocationConstantMask) == kConstant, TagError);
 
     DCHECK(!IsValid());
   }
@@ -76,7 +84,7 @@
   }
 
   bool IsConstant() const {
-    return (value_ & kLocationTagMask) == kConstant;
+    return (value_ & kLocationConstantMask) == kConstant;
   }
 
   static Location ConstantLocation(HConstant* constant) {
@@ -86,7 +94,7 @@
 
   HConstant* GetConstant() const {
     DCHECK(IsConstant());
-    return reinterpret_cast<HConstant*>(value_ & ~kLocationTagMask);
+    return reinterpret_cast<HConstant*>(value_ & ~kLocationConstantMask);
   }
 
   bool IsValid() const {
@@ -107,12 +115,20 @@
     return Location(kRegister, reg.RegId());
   }
 
+  static Location FpuRegisterLocation(ManagedRegister reg) {
+    return Location(kFpuRegister, reg.RegId());
+  }
+
   bool IsRegister() const {
     return GetKind() == kRegister;
   }
 
+  bool IsFpuRegister() const {
+    return GetKind() == kFpuRegister;
+  }
+
   ManagedRegister reg() const {
-    DCHECK(IsRegister());
+    DCHECK(IsRegister() || IsFpuRegister());
     return static_cast<ManagedRegister>(GetPayload());
   }
 
@@ -192,7 +208,11 @@
       case kQuickParameter: return "Q";
       case kUnallocated: return "U";
       case kConstant: return "C";
+      case kFpuRegister: return "F";
+      case kDoNotUse:
+        LOG(FATAL) << "Should not use this location kind";
     }
+    UNREACHABLE();
     return "?";
   }
 
@@ -200,6 +220,7 @@
   enum Policy {
     kAny,
     kRequiresRegister,
+    kRequiresFpuRegister,
     kSameAsFirstInput,
   };
 
@@ -220,6 +241,10 @@
     return UnallocatedLocation(kRequiresRegister);
   }
 
+  static Location RequiresFpuRegister() {
+    return UnallocatedLocation(kRequiresFpuRegister);
+  }
+
   static Location RegisterOrConstant(HInstruction* instruction);
   static Location ByteRegisterOrConstant(ManagedRegister reg, HInstruction* instruction);
 
@@ -242,7 +267,7 @@
   // Number of bits required to encode Kind value.
   static constexpr uint32_t kBitsForKind = 4;
   static constexpr uint32_t kBitsForPayload = kWordSize * kBitsPerByte - kBitsForKind;
-  static constexpr uword kLocationTagMask = 0x3;
+  static constexpr uword kLocationConstantMask = 0x3;
 
   explicit Location(uword value) : value_(value) {}
 
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 6dd53e5..5b693dd 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -68,8 +68,10 @@
 }
 
 // Create a control-flow graph from Dex instructions.
-inline HGraph* CreateCFG(ArenaAllocator* allocator, const uint16_t* data) {
-  HGraphBuilder builder(allocator);
+inline HGraph* CreateCFG(ArenaAllocator* allocator,
+                         const uint16_t* data,
+                         Primitive::Type return_type = Primitive::kPrimInt) {
+  HGraphBuilder builder(allocator, return_type);
   const DexFile::CodeItem* item =
     reinterpret_cast<const DexFile::CodeItem*>(data);
   HGraph* graph = builder.BuildGraph(*item);
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 4c7c4e9..3ff24b7 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -277,6 +277,14 @@
 }
 
 
+void X86Assembler::movaps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x28);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
 void X86Assembler::movss(XmmRegister dst, const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 2113c85..1f6f7e6 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -260,6 +260,7 @@
 
   void setb(Condition condition, Register dst);
 
+  void movaps(XmmRegister dst, XmmRegister src);
   void movss(XmmRegister dst, const Address& src);
   void movss(const Address& dst, XmmRegister src);
   void movss(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 17339ae..705b639 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -312,6 +312,15 @@
 }
 
 
+void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x28);
+  EmitXmmRegisterOperand(src.LowBits(), dst);
+}
+
+
 void X86_64Assembler::movss(XmmRegister dst, const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index c3e6e3b..268f72b 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -300,6 +300,8 @@
 
   void leaq(CpuRegister dst, const Address& src);
 
+  void movaps(XmmRegister dst, XmmRegister src);
+
   void movss(XmmRegister dst, const Address& src);
   void movss(const Address& dst, XmmRegister src);
   void movss(XmmRegister dst, XmmRegister src);
diff --git a/test/410-floats/expected.txt b/test/410-floats/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/410-floats/expected.txt
diff --git a/test/410-floats/info.txt b/test/410-floats/info.txt
new file mode 100644
index 0000000..5332704
--- /dev/null
+++ b/test/410-floats/info.txt
@@ -0,0 +1 @@
+Small tests involving floats and doubles.
diff --git a/test/410-floats/src/Main.java b/test/410-floats/src/Main.java
new file mode 100644
index 0000000..d8d6fac
--- /dev/null
+++ b/test/410-floats/src/Main.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    assertEquals(4.2f, returnFloat());
+    float[] a = new float[1];
+    a[0] = 42.2f;
+    assertEquals(42.2f, returnFloat(a));
+
+    assertEquals(4.4, returnDouble());
+    double[] b = new double[1];
+    b[0] = 42.4;
+    assertEquals(42.4, returnDouble(b));
+
+    assertEquals(4.2f, invokeReturnFloat());
+    assertEquals(4.4, invokeReturnDouble());
+    assertEquals(4.2f, takeAFloat(4.2f));
+    assertEquals(3.1, takeADouble(3.1));
+    assertEquals(12.7, takeThreeDouble(3.1, 4.4, 5.2));
+    assertEquals(12.7f, takeThreeFloat(3.1f, 4.4f, 5.2f));
+    assertEquals(4.2f, invokeTakeAFloat(4.2f));
+    assertEquals(3.1, invokeTakeADouble(3.1));
+    assertEquals(12.7, invokeTakeThreeDouble(3.1, 4.4, 5.2));
+    assertEquals(12.7f, invokeTakeThreeFloat(3.1f, 4.4f, 5.2f));
+  }
+
+  public static float invokeReturnFloat() {
+    return returnFloat();
+  }
+
+  public static double invokeReturnDouble() {
+    return returnDouble();
+  }
+
+  public static float returnFloat() {
+    return 4.2f;
+  }
+
+  public static float returnFloat(float[] a) {
+    return a[0];
+  }
+
+  public static double returnDouble() {
+    return 4.4;
+  }
+
+  public static double returnDouble(double[] a) {
+    return a[0];
+  }
+
+  public static float takeAFloat(float a) {
+    return a;
+  }
+
+  public static double takeADouble(double a) {
+    return a;
+  }
+
+  public static double takeThreeDouble(double a, double b, double c) {
+    return a + b + c;
+  }
+
+  public static float takeThreeFloat(float a, float b, float c) {
+    return a + b + c;
+  }
+
+  public static float invokeTakeAFloat(float a) {
+    return takeAFloat(a);
+  }
+
+  public static double invokeTakeADouble(double a) {
+    return takeADouble(a);
+  }
+
+  public static double invokeTakeThreeDouble(double a, double b, double c) {
+    return takeThreeDouble(a, b, c);
+  }
+
+  public static float invokeTakeThreeFloat(float a, float b, float c) {
+    return takeThreeFloat(a, b, c);
+  }
+
+  public static void assertEquals(float expected, float actual) {
+    if (expected != actual) {
+      throw new AssertionError("Expected " + expected + " got " + actual);
+    }
+  }
+
+  public static void assertEquals(double expected, double actual) {
+    if (expected != actual) {
+      throw new AssertionError("Expected " + expected + " got " + actual);
+    }
+  }
+}