[optimizing compiler] Add REM_FLOAT and REM_DOUBLE

- for arm, x86, x86_64 backends
- reinstated fmod quick entry points for x86. This is a partial revert
of bd3682eada753de52975ae2b4a712bd87dc139a6 which added inline assembly
for floting point rem on x86. Note that Quick still uses the inline
version.
- fix rem tests for longs

Change-Id: I73be19a9f2f2bcf3f718d9ca636e67bdd72b5440
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index ca72f3f..0a3f830 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -1435,6 +1435,16 @@
       break;
     }
 
+    case Instruction::REM_FLOAT: {
+      Binop_23x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::REM_DOUBLE: {
+      Binop_23x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
     case Instruction::AND_INT: {
       Binop_23x<HAnd>(instruction, Primitive::kPrimInt);
       break;
@@ -1574,6 +1584,16 @@
       break;
     }
 
+    case Instruction::REM_FLOAT_2ADDR: {
+      Binop_12x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::REM_DOUBLE_2ADDR: {
+      Binop_12x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
     case Instruction::SHL_INT_2ADDR: {
       Binop_12x_shift<HShl>(instruction, Primitive::kPrimInt);
       break;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 7f358ea..461409d 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -499,19 +499,27 @@
 }
 
 void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) {
-  if (instruction != nullptr && instruction->IsTypeConversion()) {
+  if (instruction != nullptr) {
     // The code generated for some type conversions may call the
     // runtime, thus normally requiring a subsequent call to this
     // method.  However, the method verifier does not produce PC
-    // information for Dex type conversion instructions, as it
-    // considers them as "atomic" (they cannot join a GC).
+    // information for certain instructions, which are considered "atomic"
+    // (they cannot join a GC).
     // Therefore we do not currently record PC information for such
     // instructions.  As this may change later, we added this special
     // case so that code generators may nevertheless call
     // CodeGenerator::RecordPcInfo without triggering an error in
     // CodeGenerator::BuildNativeGCMap ("Missing ref for dex pc 0x")
     // thereafter.
-    return;
+    if (instruction->IsTypeConversion()) {
+      return;
+    }
+    if (instruction->IsRem()) {
+      Primitive::Type type = instruction->AsRem()->GetResultType();
+      if ((type == Primitive::kPrimFloat) || (type == Primitive::kPrimDouble)) {
+        return;
+      }
+    }
   }
 
   // Collect PC infos for the mapping table.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 36af393..cbe5f0c 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -44,7 +44,7 @@
 static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2, R3 };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
-static constexpr SRegister kRuntimeParameterFpuRegisters[] = { S0, S1 };
+static constexpr SRegister kRuntimeParameterFpuRegisters[] = { S0, S1, S2, S3 };
 static constexpr size_t kRuntimeParameterFpuRegistersLength =
     arraysize(kRuntimeParameterFpuRegisters);
 
@@ -2132,12 +2132,13 @@
 }
 
 void LocationsBuilderARM::VisitRem(HRem* rem) {
-  LocationSummary::CallKind call_kind = rem->GetResultType() == Primitive::kPrimLong
-      ? LocationSummary::kCall
-      : LocationSummary::kNoCall;
+  Primitive::Type type = rem->GetResultType();
+  LocationSummary::CallKind call_kind = type == Primitive::kPrimInt
+      ? LocationSummary::kNoCall
+      : LocationSummary::kCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
-  switch (rem->GetResultType()) {
+  switch (type) {
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RequiresRegister());
@@ -2155,14 +2156,26 @@
       locations->SetOut(Location::RegisterPairLocation(R2, R3));
       break;
     }
-    case Primitive::kPrimFloat:
+    case Primitive::kPrimFloat: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+      locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+      locations->SetOut(Location::FpuRegisterLocation(S0));
+      break;
+    }
+
     case Primitive::kPrimDouble: {
-      LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::FpuRegisterPairLocation(
+          calling_convention.GetFpuRegisterAt(0), calling_convention.GetFpuRegisterAt(1)));
+      locations->SetInAt(1, Location::FpuRegisterPairLocation(
+          calling_convention.GetFpuRegisterAt(2), calling_convention.GetFpuRegisterAt(3)));
+      locations->SetOut(Location::Location::FpuRegisterPairLocation(S0, S1));
       break;
     }
 
     default:
-      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+      LOG(FATAL) << "Unexpected rem type " << type;
   }
 }
 
@@ -2172,7 +2185,8 @@
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
 
-  switch (rem->GetResultType()) {
+  Primitive::Type type = rem->GetResultType();
+  switch (type) {
     case Primitive::kPrimInt: {
       Register reg1 = first.AsRegister<Register>();
       Register reg2 = second.AsRegister<Register>();
@@ -2188,26 +2202,22 @@
     }
 
     case Primitive::kPrimLong: {
-      InvokeRuntimeCallingConvention calling_convention;
-      DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
-      DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
-      DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
-      DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
-      DCHECK_EQ(R2, out.AsRegisterPairLow<Register>());
-      DCHECK_EQ(R3, out.AsRegisterPairHigh<Register>());
-
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc());
       break;
     }
 
-    case Primitive::kPrimFloat:
+    case Primitive::kPrimFloat: {
+      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc());
+      break;
+    }
+
     case Primitive::kPrimDouble: {
-      LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc());
       break;
     }
 
     default:
-      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+      LOG(FATAL) << "Unexpected rem type " << type;
   }
 }
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 2fd712f..8a0c2de 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2206,12 +2206,13 @@
 }
 
 void LocationsBuilderX86::VisitRem(HRem* rem) {
-  LocationSummary::CallKind call_kind = rem->GetResultType() == Primitive::kPrimLong
-      ? LocationSummary::kCall
-      : LocationSummary::kNoCall;
+  Primitive::Type type = rem->GetResultType();
+  LocationSummary::CallKind call_kind = type == Primitive::kPrimInt
+      ? LocationSummary::kNoCall
+      : LocationSummary::kCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
 
-  switch (rem->GetResultType()) {
+  switch (type) {
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RegisterLocation(EAX));
       locations->SetInAt(1, Location::RequiresRegister());
@@ -2228,14 +2229,29 @@
       locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
       break;
     }
-    case Primitive::kPrimFloat:
+    case Primitive::kPrimFloat: {
+      InvokeRuntimeCallingConvention calling_convention;
+      // x86 floating-point parameters are passed through core registers (EAX, ECX).
+      locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+      locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+      // The runtime helper puts the result in XMM0.
+      locations->SetOut(Location::FpuRegisterLocation(XMM0));
+      break;
+    }
     case Primitive::kPrimDouble: {
-      LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+      InvokeRuntimeCallingConvention calling_convention;
+      // x86 floating-point parameters are passed through core registers (EAX_ECX, EDX_EBX).
+      locations->SetInAt(0, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      // The runtime helper puts the result in XMM0.
+      locations->SetOut(Location::FpuRegisterLocation(XMM0));
       break;
     }
 
     default:
-      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+      LOG(FATAL) << "Unexpected rem type " << type;
   }
 }
 
@@ -2247,9 +2263,14 @@
       GenerateDivRemIntegral(rem);
       break;
     }
-    case Primitive::kPrimFloat:
+    case Primitive::kPrimFloat: {
+      __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pFmodf)));
+      codegen_->RecordPcInfo(rem, rem->GetDexPc());
+      break;
+    }
     case Primitive::kPrimDouble: {
-      LOG(FATAL) << "Unimplemented rem type " << type;
+      __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pFmod)));
+      codegen_->RecordPcInfo(rem, rem->GetDexPc());
       break;
     }
     default:
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 39a9766..233f4a4 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -43,8 +43,9 @@
 static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
-static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { };
-static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
+static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 };
+static constexpr size_t kRuntimeParameterFpuRegistersLength =
+    arraysize(kRuntimeParameterFpuRegisters);
 
 class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> {
  public:
@@ -1999,16 +2000,16 @@
   // 0x80000000(00000000)/-1 triggers an arithmetic exception!
   // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
   // so it's safe to just use negl instead of more complex comparisons.
-
-  __ cmpl(second_reg, Immediate(-1));
-  __ j(kEqual, slow_path->GetEntryLabel());
-
   if (type == Primitive::kPrimInt) {
+    __ cmpl(second_reg, Immediate(-1));
+    __ j(kEqual, slow_path->GetEntryLabel());
     // edx:eax <- sign-extended of eax
     __ cdq();
     // eax = quotient, edx = remainder
     __ idivl(second_reg);
   } else {
+    __ cmpq(second_reg, Immediate(-1));
+    __ j(kEqual, slow_path->GetEntryLabel());
     // rdx:rax <- sign-extended of rax
     __ cqo();
     // rax = quotient, rdx = remainder
@@ -2075,9 +2076,14 @@
 }
 
 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
-  switch (rem->GetResultType()) {
+  Primitive::Type type = rem->GetResultType();
+  LocationSummary::CallKind call_kind =
+      (type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)
+      ? LocationSummary::kNoCall
+      : LocationSummary::kCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
+
+  switch (type) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RegisterLocation(RAX));
@@ -2089,12 +2095,16 @@
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
-      LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+      locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+      // The runtime helper puts the result in XMM0.
+      locations->SetOut(Location::FpuRegisterLocation(XMM0));
       break;
     }
 
     default:
-      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+      LOG(FATAL) << "Unexpected rem type " << type;
   }
 }
 
@@ -2106,13 +2116,16 @@
       GenerateDivRemIntegral(rem);
       break;
     }
-
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+    case Primitive::kPrimFloat: {
+      __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pFmodf), true));
+      codegen_->RecordPcInfo(rem, rem->GetDexPc());
       break;
     }
-
+    case Primitive::kPrimDouble: {
+      __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pFmod), true));
+      codegen_->RecordPcInfo(rem, rem->GetDexPc());
+      break;
+    }
     default:
       LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
   }
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 48d6c80..a121542 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -29,6 +29,10 @@
 extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
 
+// fmod entrypointes.
+extern "C" double art_quick_fmod(double, double);
+extern "C" float art_quick_fmodf(float, float);
+
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
   // Interpreter
@@ -105,9 +109,9 @@
   // points->pCmpgFloat = NULL;  // Not needed on x86.
   // points->pCmplDouble = NULL;  // Not needed on x86.
   // points->pCmplFloat = NULL;  // Not needed on x86.
-  // qpoints->pFmod = NULL;  // Not needed on x86.
+  qpoints->pFmod = art_quick_fmod;
   // qpoints->pL2d = NULL;  // Not needed on x86.
-  // qpoints->pFmodf = NULL;  // Not needed on x86.
+  qpoints->pFmodf = art_quick_fmodf;
   // qpoints->pL2f = NULL;  // Not needed on x86.
   // points->pD2iz = NULL;  // Not needed on x86.
   // points->pF2iz = NULL;  // Not needed on x86.
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 1ce01c4..0bfa1ce 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -795,6 +795,35 @@
 
 NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
 
+DEFINE_FUNCTION art_quick_fmod
+    subl LITERAL(12), %esp        // alignment padding
+    CFI_ADJUST_CFA_OFFSET(12)
+    PUSH ebx                      // pass arg4 b.hi
+    PUSH edx                      // pass arg3 b.lo
+    PUSH ecx                      // pass arg2 a.hi
+    PUSH eax                      // pass arg1 a.lo
+    SETUP_GOT_NOSAVE ebx          // clobbers EBX
+    call PLT_SYMBOL(fmod)         // (jdouble a, jdouble b)
+    fstpl (%esp)                  // pop return value off fp stack
+    movsd (%esp), %xmm0           // place into %xmm0
+    addl LITERAL(28), %esp        // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-28)
+    ret
+END_FUNCTION art_quick_fmod
+
+DEFINE_FUNCTION art_quick_fmodf
+    PUSH eax                      // alignment padding
+    PUSH ecx                      // pass arg2 b
+    PUSH eax                      // pass arg1 a
+    SETUP_GOT_NOSAVE ebx          // clobbers EBX
+    call PLT_SYMBOL(fmodf)        // (jfloat a, jfloat b)
+    fstps (%esp)                  // pop return value off fp stack
+    movss (%esp), %xmm0           // place into %xmm0
+    addl LITERAL(12), %esp        // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-12)
+    ret
+END_FUNCTION art_quick_fmodf
+
 DEFINE_FUNCTION art_quick_d2l
     PUSH eax                      // alignment padding
     PUSH ecx                      // pass arg2 a.hi
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index a2766f7..2cfcfed 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -110,9 +110,9 @@
   // points->pCmpgFloat = NULL;  // Not needed on x86.
   // points->pCmplDouble = NULL;  // Not needed on x86.
   // points->pCmplFloat = NULL;  // Not needed on x86.
-  // qpoints->pFmod = NULL;  // Not needed on x86.
+  qpoints->pFmod = fmod;
   // qpoints->pL2d = NULL;  // Not needed on x86.
-  // qpoints->pFmodf = NULL;  // Not needed on x86.
+  qpoints->pFmodf = fmodf;
   // qpoints->pL2f = NULL;  // Not needed on x86.
   // points->pD2iz = NULL;  // Not needed on x86.
   // points->pF2iz = NULL;  // Not needed on x86.
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index a80e7d2..7f85ab7 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1101,6 +1101,8 @@
 UNIMPLEMENTED art_quick_lshl
 UNIMPLEMENTED art_quick_lshr
 UNIMPLEMENTED art_quick_lushr
+UNIMPLEMENTED art_quick_fmod
+UNIMPLEMENTED art_quick_fmodf
 
 THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCode, RETURN_IF_EAX_ZERO
 THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCode, RETURN_IF_EAX_ZERO
diff --git a/test/428-optimizing-arith-rem/src/Main.java b/test/428-optimizing-arith-rem/src/Main.java
index 46bd3c6..3f77318 100644
--- a/test/428-optimizing-arith-rem/src/Main.java
+++ b/test/428-optimizing-arith-rem/src/Main.java
@@ -16,49 +16,7 @@
 
 public class Main {
 
-  public static void expectEquals(int expected, int result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  public static void expectEquals(long expected, long result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
-
-  public static void expectDivisionByZero(int value) {
-    try {
-      $opt$Rem(value, 0);
-      throw new Error("Expected RuntimeException when modulo by 0");
-    } catch (java.lang.RuntimeException e) {
-    }
-    try {
-      $opt$RemZero(value);
-      throw new Error("Expected RuntimeException when modulo by 0");
-    } catch (java.lang.RuntimeException e) {
-    }
-  }
-
-  public static void expectDivisionByZero(long value) {
-    try {
-      $opt$Rem(value, 0L);
-      throw new Error("Expected RuntimeException when modulo by 0");
-    } catch (java.lang.RuntimeException e) {
-    }
-    try {
-      $opt$RemZero(value);
-      throw new Error("Expected RuntimeException when modulo by 0");
-    } catch (java.lang.RuntimeException e) {
-    }
-  }
-
   public static void main(String[] args) {
-    rem();
-  }
-
-  public static void rem() {
     remInt();
     remLong();
   }
@@ -115,22 +73,22 @@
     expectEquals(-7L, $opt$Rem(-7L, 9L));
     expectEquals(-7L, $opt$Rem(-7L, -9L));
 
-    expectEquals(0L, $opt$Rem(Integer.MAX_VALUE, 1L));
-    expectEquals(0L, $opt$Rem(Integer.MAX_VALUE, -1L));
-    expectEquals(0L, $opt$Rem(Integer.MIN_VALUE, 1L));
-    expectEquals(0L, $opt$Rem(Integer.MIN_VALUE, -1L)); // no overflow
-    expectEquals(-1L, $opt$Rem(Integer.MIN_VALUE, Integer.MAX_VALUE));
-    expectEquals(Integer.MAX_VALUE, $opt$Rem(Integer.MAX_VALUE, Integer.MIN_VALUE));
+    expectEquals(0L, $opt$Rem(Long.MAX_VALUE, 1L));
+    expectEquals(0L, $opt$Rem(Long.MAX_VALUE, -1L));
+    expectEquals(0L, $opt$Rem(Long.MIN_VALUE, 1L));
+    expectEquals(0L, $opt$Rem(Long.MIN_VALUE, -1L)); // no overflow
+    expectEquals(-1L, $opt$Rem(Long.MIN_VALUE, Long.MAX_VALUE));
+    expectEquals(Long.MAX_VALUE, $opt$Rem(Long.MAX_VALUE, Long.MIN_VALUE));
 
     expectEquals(0L, $opt$Rem(0L, 7L));
-    expectEquals(0L, $opt$Rem(0L, Integer.MAX_VALUE));
-    expectEquals(0L, $opt$Rem(0L, Integer.MIN_VALUE));
+    expectEquals(0L, $opt$Rem(0L, Long.MAX_VALUE));
+    expectEquals(0L, $opt$Rem(0L, Long.MIN_VALUE));
 
     expectDivisionByZero(0L);
     expectDivisionByZero(1L);
     expectDivisionByZero(5L);
-    expectDivisionByZero(Integer.MAX_VALUE);
-    expectDivisionByZero(Integer.MIN_VALUE);
+    expectDivisionByZero(Long.MAX_VALUE);
+    expectDivisionByZero(Long.MIN_VALUE);
   }
 
   static int $opt$Rem(int a, int b) {
@@ -157,4 +115,43 @@
   static long $opt$RemZero(long a) {
     return a % 0L;
   }
+
+  public static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectDivisionByZero(int value) {
+    try {
+      $opt$Rem(value, 0);
+      throw new Error("Expected RuntimeException when modulo by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+    try {
+      $opt$RemZero(value);
+      throw new Error("Expected RuntimeException when modulo by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+  }
+
+  public static void expectDivisionByZero(long value) {
+    try {
+      $opt$Rem(value, 0L);
+      throw new Error("Expected RuntimeException when modulo by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+    try {
+      $opt$RemZero(value);
+      throw new Error("Expected RuntimeException when modulo by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+  }
+
 }
diff --git a/test/436-rem-float/expected.txt b/test/436-rem-float/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/436-rem-float/expected.txt
diff --git a/test/436-rem-float/info.txt b/test/436-rem-float/info.txt
new file mode 100644
index 0000000..b023f59
--- /dev/null
+++ b/test/436-rem-float/info.txt
@@ -0,0 +1 @@
+Tests for floating point modulo (rem) operation.
diff --git a/test/436-rem-float/src/Main.java b/test/436-rem-float/src/Main.java
new file mode 100644
index 0000000..e20c21f
--- /dev/null
+++ b/test/436-rem-float/src/Main.java
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {
+    remFloat();
+    remDouble();
+  }
+
+  private static void remFloat() {
+    expectApproxEquals(2F, $opt$RemConst(6F));
+
+    expectApproxEquals(2F, $opt$Rem(5.1F, 3.1F));
+    expectApproxEquals(2.1F, $opt$Rem(5.1F, 3F));
+    expectApproxEquals(-2F, $opt$Rem(-5.1F, 3.1F));
+    expectApproxEquals(-2.1F, $opt$Rem(-5.1F, -3F));
+
+    expectApproxEquals(2F, $opt$Rem(6F, 4F));
+    expectApproxEquals(2F, $opt$Rem(6F, -4F));
+    expectApproxEquals(0F, $opt$Rem(6F, 3F));
+    expectApproxEquals(0F, $opt$Rem(6F, -3F));
+    expectApproxEquals(0F, $opt$Rem(6F, 1F));
+    expectApproxEquals(0F, $opt$Rem(6F, -1F));
+    expectApproxEquals(-1F, $opt$Rem(-7F, 3F));
+    expectApproxEquals(-1F, $opt$Rem(-7F, -3F));
+    expectApproxEquals(0F, $opt$Rem(6F, 6F));
+    expectApproxEquals(0F, $opt$Rem(-6F, -6F));
+    expectApproxEquals(7F, $opt$Rem(7F, 9F));
+    expectApproxEquals(7F, $opt$Rem(7F, -9F));
+    expectApproxEquals(-7F, $opt$Rem(-7F, 9F));
+    expectApproxEquals(-7F, $opt$Rem(-7F, -9F));
+
+    expectApproxEquals(0F, $opt$Rem(Float.MAX_VALUE, 1F));
+    expectApproxEquals(0F, $opt$Rem(Float.MAX_VALUE, -1F));
+    expectApproxEquals(0F, $opt$Rem(Float.MIN_VALUE, 1F));
+    expectApproxEquals(0F, $opt$Rem(Float.MIN_VALUE, -1F));
+
+    expectApproxEquals(0F, $opt$Rem(0F, 7F));
+    expectApproxEquals(0F, $opt$Rem(0F, Float.MAX_VALUE));
+    expectApproxEquals(0F, $opt$Rem(0F, Float.MIN_VALUE));
+
+    expectNaN($opt$Rem(Float.NaN, 3F));
+    expectNaN($opt$Rem(3F, Float.NaN));
+    expectNaN($opt$Rem(Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY));
+    expectNaN($opt$Rem(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY));
+    expectNaN($opt$Rem(3F, 0F));
+
+    expectApproxEquals(4F, $opt$Rem(4F, Float.POSITIVE_INFINITY));
+    expectApproxEquals(4F, $opt$Rem(4F, Float.NEGATIVE_INFINITY));
+  }
+
+  private static void remDouble() {
+    expectApproxEquals(2D, $opt$RemConst(6D));
+
+    expectApproxEquals(2D, $opt$Rem(5.1D, 3.1D));
+    expectApproxEquals(2.1D, $opt$Rem(5.1D, 3D));
+    expectApproxEquals(-2D, $opt$Rem(-5.1D, 3.1D));
+    expectApproxEquals(-2.1D, $opt$Rem(-5.1D, -3D));
+
+    expectApproxEquals(2D, $opt$Rem(6D, 4D));
+    expectApproxEquals(2D, $opt$Rem(6D, -4D));
+    expectApproxEquals(0D, $opt$Rem(6D, 3D));
+    expectApproxEquals(0D, $opt$Rem(6D, -3D));
+    expectApproxEquals(0D, $opt$Rem(6D, 1D));
+    expectApproxEquals(0D, $opt$Rem(6D, -1D));
+    expectApproxEquals(-1D, $opt$Rem(-7D, 3D));
+    expectApproxEquals(-1D, $opt$Rem(-7D, -3D));
+    expectApproxEquals(0D, $opt$Rem(6D, 6D));
+    expectApproxEquals(0D, $opt$Rem(-6D, -6D));
+    expectApproxEquals(7D, $opt$Rem(7D, 9D));
+    expectApproxEquals(7D, $opt$Rem(7D, -9D));
+    expectApproxEquals(-7D, $opt$Rem(-7D, 9D));
+    expectApproxEquals(-7D, $opt$Rem(-7D, -9D));
+
+    expectApproxEquals(0D, $opt$Rem(Double.MAX_VALUE, 1D));
+    expectApproxEquals(0D, $opt$Rem(Double.MAX_VALUE, -1D));
+    expectApproxEquals(0D, $opt$Rem(Double.MIN_VALUE, 1D));
+    expectApproxEquals(0D, $opt$Rem(Double.MIN_VALUE, -1D));
+
+    expectApproxEquals(0D, $opt$Rem(0D, 7D));
+    expectApproxEquals(0D, $opt$Rem(0D, Double.MAX_VALUE));
+    expectApproxEquals(0D, $opt$Rem(0D, Double.MIN_VALUE));
+
+    expectNaN($opt$Rem(Double.NaN, 3D));
+    expectNaN($opt$Rem(3D, Double.NaN));
+    expectNaN($opt$Rem(Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY));
+    expectNaN($opt$Rem(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY));
+    expectNaN($opt$Rem(3D, 0D));
+
+    expectApproxEquals(4D, $opt$Rem(4D, Double.POSITIVE_INFINITY));
+    expectApproxEquals(4D, $opt$Rem(4D, Double.NEGATIVE_INFINITY));
+  }
+
+  static float $opt$Rem(float a, float b) {
+    return a % b;
+  }
+
+ static float $opt$RemConst(float a) {
+    return a % 4F;
+  }
+
+  static double $opt$Rem(double a, double b) {
+    return a % b;
+  }
+
+  static double $opt$RemConst(double a) {
+    return a % 4D;
+  }
+
+  public static void expectApproxEquals(float a, float b) {
+    float maxDelta = 0.00001F;
+    boolean aproxEquals = (a > b) ? ((a - b) < maxDelta) : ((b - a) < maxDelta);
+    if (!aproxEquals) {
+      throw new Error("Expected: " + a + ", found: " + b
+          + ", with delta: " + maxDelta + " " + (a - b));
+    }
+  }
+
+  public static void expectApproxEquals(double a, double b) {
+    double maxDelta = 0.00001D;
+    boolean aproxEquals = (a > b) ? ((a - b) < maxDelta) : ((b - a) < maxDelta);
+    if (!aproxEquals) {
+      throw new Error("Expected: " + a + ", found: "
+          + b + ", with delta: " + maxDelta + " " + (a - b));
+    }
+  }
+
+  public static void expectNaN(float a) {
+    if (a == a) {
+      throw new Error("Expected NaN: " + a);
+    }
+  }
+
+  public static void expectNaN(double a) {
+    if (a == a) {
+      throw new Error("Expected NaN: " + a);
+    }
+  }
+
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 69ba288..c3fec5d 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -322,6 +322,7 @@
   424-checkcast \
   427-bounds \
   430-live-register-slow-path \
+  436-rem-float \
   800-smali \
 
 ifneq (,$(filter optimizing,$(COMPILER_TYPES)))