[optimizing compiler] Add REM_INT, REM_LONG

- for arm, x86, x86_64
- minor cleanup/fix in div tests

Change-Id: I240874010206a5a9b3aaffbc81a885b94c248f93
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index b51b6e7..855730e 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -568,12 +568,13 @@
   return true;
 }
 
-void HGraphBuilder::BuildCheckedDiv(uint16_t out_vreg,
-                                    uint16_t first_vreg,
-                                    int64_t second_vreg_or_constant,
-                                    uint32_t dex_pc,
-                                    Primitive::Type type,
-                                    bool second_is_constant) {
+void HGraphBuilder::BuildCheckedDivRem(uint16_t out_vreg,
+                                       uint16_t first_vreg,
+                                       int64_t second_vreg_or_constant,
+                                       uint32_t dex_pc,
+                                       Primitive::Type type,
+                                       bool second_is_constant,
+                                       bool isDiv) {
   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
   HInstruction* first = LoadLocal(first_vreg, type);
@@ -597,7 +598,11 @@
     temps.Add(current_block_->GetLastInstruction());
   }
 
-  current_block_->AddInstruction(new (arena_) HDiv(type, first, second, dex_pc));
+  if (isDiv) {
+    current_block_->AddInstruction(new (arena_) HDiv(type, first, second, dex_pc));
+  } else {
+    current_block_->AddInstruction(new (arena_) HRem(type, first, second, dex_pc));
+  }
   UpdateLocal(out_vreg, current_block_->GetLastInstruction());
 }
 
@@ -1078,14 +1083,14 @@
     }
 
     case Instruction::DIV_INT: {
-      BuildCheckedDiv(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                      dex_pc, Primitive::kPrimInt, false);
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, false, true);
       break;
     }
 
     case Instruction::DIV_LONG: {
-      BuildCheckedDiv(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                      dex_pc, Primitive::kPrimLong, false);
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimLong, false, true);
       break;
     }
 
@@ -1099,6 +1104,18 @@
       break;
     }
 
+    case Instruction::REM_INT: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, false, false);
+      break;
+    }
+
+    case Instruction::REM_LONG: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimLong, false, false);
+      break;
+    }
+
     case Instruction::AND_INT: {
       Binop_23x<HAnd>(instruction, Primitive::kPrimInt);
       break;
@@ -1185,14 +1202,26 @@
     }
 
     case Instruction::DIV_INT_2ADDR: {
-      BuildCheckedDiv(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                      dex_pc, Primitive::kPrimInt, false);
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimInt, false, true);
       break;
     }
 
     case Instruction::DIV_LONG_2ADDR: {
-      BuildCheckedDiv(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                      dex_pc, Primitive::kPrimLong, false);
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimLong, false, true);
+      break;
+    }
+
+    case Instruction::REM_INT_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimInt, false, false);
+      break;
+    }
+
+    case Instruction::REM_LONG_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimLong, false, false);
       break;
     }
 
@@ -1298,8 +1327,15 @@
 
     case Instruction::DIV_INT_LIT16:
     case Instruction::DIV_INT_LIT8: {
-      BuildCheckedDiv(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                      dex_pc, Primitive::kPrimInt, true);
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, true, true);
+      break;
+    }
+
+    case Instruction::REM_INT_LIT16:
+    case Instruction::REM_INT_LIT8: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, true, false);
       break;
     }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 799e628..897bcec 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -123,12 +123,13 @@
                       Primitive::Type input_type,
                       Primitive::Type result_type);
 
-  void BuildCheckedDiv(uint16_t out_reg,
-                       uint16_t first_reg,
-                       int64_t second_reg_or_constant,
-                       uint32_t dex_pc,
-                       Primitive::Type type,
-                       bool second_is_lit);
+  void BuildCheckedDivRem(uint16_t out_reg,
+                          uint16_t first_reg,
+                          int64_t second_reg_or_constant,
+                          uint32_t dex_pc,
+                          Primitive::Type type,
+                          bool second_is_lit,
+                          bool is_div);
 
   void BuildReturn(const Instruction& instruction, Primitive::Type type);
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 09e1b97..1c23170 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1842,6 +1842,86 @@
   }
 }
 
+void LocationsBuilderARM::VisitRem(HRem* rem) {
+  LocationSummary::CallKind call_kind = rem->GetResultType() == Primitive::kPrimLong
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
+
+  switch (rem->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      locations->AddTemp(Location::RequiresRegister());
+      break;
+    }
+    case Primitive::kPrimLong: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      // The runtime helper puts the output in R2,R3.
+      locations->SetOut(Location::RegisterPairLocation(R2, R3));
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARM::VisitRem(HRem* rem) {
+  LocationSummary* locations = rem->GetLocations();
+  Location out = locations->Out();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
+  switch (rem->GetResultType()) {
+    case Primitive::kPrimInt: {
+      Register reg1 = first.As<Register>();
+      Register reg2 = second.As<Register>();
+      Register temp = locations->GetTemp(0).As<Register>();
+
+      // temp = reg1 / reg2  (integer division)
+      // temp = temp * reg2
+      // dest = reg1 - temp
+      __ sdiv(temp, reg1, reg2);
+      __ mul(temp, temp, reg2);
+      __ sub(out.As<Register>(), reg1, ShifterOperand(temp));
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      InvokeRuntimeCallingConvention calling_convention;
+      DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(R2, out.AsRegisterPairLow<Register>());
+      DCHECK_EQ(R3, out.AsRegisterPairHigh<Register>());
+
+      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc());
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+  }
+}
+
 void LocationsBuilderARM::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 887a4ef..5432882 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -643,11 +643,12 @@
   M(MonitorOperation)                                      \
   M(Or)                                                    \
   M(ParallelMove)                                          \
+  M(Rem)                                                   \
   M(StaticFieldGet)                                        \
   M(StaticFieldSet)                                        \
   M(Throw)                                                 \
   M(TypeConversion)                                        \
-  M(Xor)                                                    \
+  M(Xor)                                                   \
 
 #define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 8a8fec2..f20ca01 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -100,19 +100,24 @@
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
 };
 
-class DivMinusOneSlowPathX86 : public SlowPathCodeX86 {
+class DivRemMinusOneSlowPathX86 : public SlowPathCodeX86 {
  public:
-  explicit DivMinusOneSlowPathX86(Register reg) : reg_(reg) {}
+  explicit DivRemMinusOneSlowPathX86(Register reg, bool is_div) : reg_(reg), is_div_(is_div) {}
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
-    __ negl(reg_);
+    if (is_div_) {
+      __ negl(reg_);
+    } else {
+      __ movl(reg_, Immediate(0));
+    }
     __ jmp(GetExitLabel());
   }
 
  private:
   Register reg_;
-  DISALLOW_COPY_AND_ASSIGN(DivMinusOneSlowPathX86);
+  bool is_div_;
+  DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
 };
 
 class StackOverflowCheckSlowPathX86 : public SlowPathCodeX86 {
@@ -1753,6 +1758,68 @@
   }
 }
 
+void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location out = locations->Out();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+  bool is_div = instruction->IsDiv();
+
+  switch (instruction->GetResultType()) {
+    case Primitive::kPrimInt: {
+      Register second_reg = second.As<Register>();
+      DCHECK_EQ(EAX, first.As<Register>());
+      DCHECK_EQ(is_div ? EAX : EDX, out.As<Register>());
+
+      SlowPathCodeX86* slow_path =
+          new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.As<Register>(), is_div);
+      codegen_->AddSlowPath(slow_path);
+
+      // 0x80000000/-1 triggers an arithmetic exception!
+      // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
+      // it's safe to just use negl instead of more complex comparisons.
+
+      __ cmpl(second_reg, Immediate(-1));
+      __ j(kEqual, slow_path->GetEntryLabel());
+
+      // edx:eax <- sign-extended of eax
+      __ cdq();
+      // eax = quotient, edx = remainder
+      __ idivl(second_reg);
+
+      __ Bind(slow_path->GetExitLabel());
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      InvokeRuntimeCallingConvention calling_convention;
+      DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
+      DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
+
+      if (is_div) {
+        __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pLdiv)));
+      } else {
+        __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pLmod)));
+      }
+      uint32_t dex_pc = is_div
+          ? instruction->AsDiv()->GetDexPc()
+          : instruction->AsRem()->GetDexPc();
+      codegen_->RecordPcInfo(instruction, dex_pc);
+
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
+  }
+}
+
 void LocationsBuilderX86::VisitDiv(HDiv* div) {
   LocationSummary::CallKind call_kind = div->GetResultType() == Primitive::kPrimLong
       ? LocationSummary::kCall
@@ -1798,45 +1865,9 @@
   Location second = locations->InAt(1);
 
   switch (div->GetResultType()) {
-    case Primitive::kPrimInt: {
-      DCHECK(first.Equals(out));
-      Register first_reg = first.As<Register>();
-      Register second_reg = second.As<Register>();
-      DCHECK_EQ(EAX, first_reg);
-      DCHECK_EQ(EDX, locations->GetTemp(0).As<Register>());
-
-      SlowPathCodeX86* slow_path =
-          new (GetGraph()->GetArena()) DivMinusOneSlowPathX86(first_reg);
-      codegen_->AddSlowPath(slow_path);
-
-      // 0x80000000/-1 triggers an arithmetic exception!
-      // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
-      // it's safe to just use negl instead of more complex comparisons.
-
-      __ cmpl(second_reg, Immediate(-1));
-      __ j(kEqual, slow_path->GetEntryLabel());
-
-      // edx:eax <- sign-extended of eax
-      __ cdq();
-      // eax = quotient, edx = remainder
-      __ idivl(second_reg);
-
-      __ Bind(slow_path->GetExitLabel());
-      break;
-    }
-
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
-      InvokeRuntimeCallingConvention calling_convention;
-      DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
-      DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
-      DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
-      DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
-      DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
-      DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
-
-      __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pLdiv)));
-      codegen_->RecordPcInfo(div, div->GetDexPc());
-
+      GenerateDivRemIntegral(div);
       break;
     }
 
@@ -1857,6 +1888,58 @@
   }
 }
 
+void LocationsBuilderX86::VisitRem(HRem* rem) {
+  LocationSummary::CallKind call_kind = rem->GetResultType() == Primitive::kPrimLong
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
+
+  switch (rem->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RegisterLocation(EAX));
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RegisterLocation(EDX));
+      break;
+    }
+    case Primitive::kPrimLong: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      // Runtime helper puts the result in EAX, EDX.
+      locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
+  Primitive::Type type = rem->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GenerateDivRemIntegral(rem);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      LOG(FATAL) << "Unimplemented rem type " << type;
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+  }
+}
+
 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 841b28b..8252f81 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -130,6 +130,7 @@
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg);
   void HandleBitwiseOperation(HBinaryOperation* instruction);
+  void GenerateDivRemIntegral(HBinaryOperation* instruction);
 
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5aa1c4a..910e73d 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -106,26 +106,36 @@
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
 };
 
-class DivMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 {
+class DivRemMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 {
  public:
-  explicit DivMinusOneSlowPathX86_64(Register reg, Primitive::Type type)
-      : reg_(reg), type_(type) {}
+  explicit DivRemMinusOneSlowPathX86_64(Register reg, Primitive::Type type, bool is_div)
+      : cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
     if (type_ == Primitive::kPrimInt) {
-      __ negl(CpuRegister(reg_));
+      if (is_div_) {
+        __ negl(cpu_reg_);
+      } else {
+        __ movl(cpu_reg_, Immediate(0));
+      }
+
     } else {
       DCHECK_EQ(Primitive::kPrimLong, type_);
-      __ negq(CpuRegister(reg_));
+      if (is_div_) {
+        __ negq(cpu_reg_);
+      } else {
+        __ movq(cpu_reg_, Immediate(0));
+      }
     }
     __ jmp(GetExitLabel());
   }
 
  private:
-  const Register reg_;
+  const CpuRegister cpu_reg_;
   const Primitive::Type type_;
-  DISALLOW_COPY_AND_ASSIGN(DivMinusOneSlowPathX86_64);
+  const bool is_div_;
+  DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
 };
 
 class StackOverflowCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
@@ -1701,6 +1711,47 @@
   }
 }
 
+void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  Primitive::Type type = instruction->GetResultType();
+  DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
+
+  bool is_div = instruction->IsDiv();
+  LocationSummary* locations = instruction->GetLocations();
+
+  CpuRegister out_reg = locations->Out().As<CpuRegister>();
+  CpuRegister second_reg = locations->InAt(1).As<CpuRegister>();
+
+  DCHECK_EQ(RAX, locations->InAt(0).As<CpuRegister>().AsRegister());
+  DCHECK_EQ(is_div ? RAX : RDX, out_reg.AsRegister());
+
+  SlowPathCodeX86_64* slow_path =
+      new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
+          out_reg.AsRegister(), type, is_div);
+  codegen_->AddSlowPath(slow_path);
+
+  // 0x80000000(00000000)/-1 triggers an arithmetic exception!
+  // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
+  // so it's safe to just use negl instead of more complex comparisons.
+
+  __ cmpl(second_reg, Immediate(-1));
+  __ j(kEqual, slow_path->GetEntryLabel());
+
+  if (type == Primitive::kPrimInt) {
+    // edx:eax <- sign-extended of eax
+    __ cdq();
+    // eax = quotient, edx = remainder
+    __ idivl(second_reg);
+  } else {
+    // rdx:rax <- sign-extended of rax
+    __ cqo();
+    // rax = quotient, rdx = remainder
+    __ idivq(second_reg);
+  }
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
@@ -1738,35 +1789,7 @@
   switch (type) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
-      CpuRegister first_reg = first.As<CpuRegister>();
-      CpuRegister second_reg = second.As<CpuRegister>();
-      DCHECK_EQ(RAX,  first_reg.AsRegister());
-      DCHECK_EQ(RDX, locations->GetTemp(0).As<CpuRegister>().AsRegister());
-
-      SlowPathCodeX86_64* slow_path =
-          new (GetGraph()->GetArena()) DivMinusOneSlowPathX86_64(first_reg.AsRegister(), type);
-      codegen_->AddSlowPath(slow_path);
-
-      // 0x80000000(00000000)/-1 triggers an arithmetic exception!
-      // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
-      // so it's safe to just use negl instead of more complex comparisons.
-
-      __ cmpl(second_reg, Immediate(-1));
-      __ j(kEqual, slow_path->GetEntryLabel());
-
-      if (type == Primitive::kPrimInt) {
-        // edx:eax <- sign-extended of eax
-        __ cdq();
-        // eax = quotient, edx = remainder
-        __ idivl(second_reg);
-      } else {
-        // rdx:rax <- sign-extended of rax
-        __ cqo();
-        // rax = quotient, rdx = remainder
-        __ idivq(second_reg);
-      }
-
-      __ Bind(slow_path->GetExitLabel());
+      GenerateDivRemIntegral(div);
       break;
     }
 
@@ -1785,6 +1808,50 @@
   }
 }
 
+void LocationsBuilderX86_64::VisitRem(HRem* rem) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
+  switch (rem->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RegisterLocation(RAX));
+      locations->SetInAt(1, Location::RequiresRegister());
+      // Intel uses rdx:rax as the dividend and puts the remainder in rdx
+      locations->SetOut(Location::RegisterLocation(RDX));
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
+  Primitive::Type type = rem->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GenerateDivRemIntegral(rem);
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+  }
+}
+
 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 4c6e475..86f3b4e 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -134,6 +134,7 @@
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void GenerateDivRemIntegral(HBinaryOperation* instruction);
 
   X86_64Assembler* const assembler_;
   CodeGeneratorX86_64* const codegen_;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 5af3cdd..383d8bc 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -521,6 +521,7 @@
   M(ParallelMove, Instruction)                                          \
   M(ParameterValue, Instruction)                                        \
   M(Phi, Instruction)                                                   \
+  M(Rem, BinaryOperation)                                             \
   M(Return, Instruction)                                                \
   M(ReturnVoid, Instruction)                                            \
   M(StaticFieldGet, Instruction)                                        \
@@ -1756,10 +1757,15 @@
   virtual int32_t Evaluate(int32_t x, int32_t y) const {
     // Our graph structure ensures we never have 0 for `y` during constant folding.
     DCHECK_NE(y, 0);
-    // Special case -1 to avoid getting a SIGFPE on x86.
+    // Special case -1 to avoid getting a SIGFPE on x86(_64).
     return (y == -1) ? -x : x / y;
   }
-  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x / y; }
+
+  virtual int64_t Evaluate(int64_t x, int64_t y) const {
+    DCHECK_NE(y, 0);
+    // Special case -1 to avoid getting a SIGFPE on x86(_64).
+    return (y == -1) ? -x : x / y;
+  }
 
   uint32_t GetDexPc() const { return dex_pc_; }
 
@@ -1771,6 +1777,33 @@
   DISALLOW_COPY_AND_ASSIGN(HDiv);
 };
 
+class HRem : public HBinaryOperation {
+ public:
+  HRem(Primitive::Type result_type, HInstruction* left, HInstruction* right, uint32_t dex_pc)
+      : HBinaryOperation(result_type, left, right), dex_pc_(dex_pc) {}
+
+  virtual int32_t Evaluate(int32_t x, int32_t y) const {
+    DCHECK_NE(y, 0);
+    // Special case -1 to avoid getting a SIGFPE on x86(_64).
+    return (y == -1) ? 0 : x % y;
+  }
+
+  virtual int64_t Evaluate(int64_t x, int64_t y) const {
+    DCHECK_NE(y, 0);
+    // Special case -1 to avoid getting a SIGFPE on x86(_64).
+    return (y == -1) ? 0 : x % y;
+  }
+
+  uint32_t GetDexPc() const { return dex_pc_; }
+
+  DECLARE_INSTRUCTION(Rem);
+
+ private:
+  const uint32_t dex_pc_;
+
+  DISALLOW_COPY_AND_ASSIGN(HRem);
+};
+
 class HDivZeroCheck : public HExpression<1> {
  public:
   HDivZeroCheck(HInstruction* value, uint32_t dex_pc)
diff --git a/test/417-optimizing-arith-div/src/Main.java b/test/417-optimizing-arith-div/src/Main.java
index a5dea15..909ceb4 100644
--- a/test/417-optimizing-arith-div/src/Main.java
+++ b/test/417-optimizing-arith-div/src/Main.java
@@ -24,6 +24,12 @@
     }
   }
 
+  public static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   public static void expectEquals(float expected, float result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
diff --git a/test/428-optimizing-arith-rem/expected.txt b/test/428-optimizing-arith-rem/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/428-optimizing-arith-rem/expected.txt
diff --git a/test/428-optimizing-arith-rem/info.txt b/test/428-optimizing-arith-rem/info.txt
new file mode 100644
index 0000000..3e37ffe
--- /dev/null
+++ b/test/428-optimizing-arith-rem/info.txt
@@ -0,0 +1 @@
+Tests for modulo (rem) operation.
diff --git a/test/428-optimizing-arith-rem/src/Main.java b/test/428-optimizing-arith-rem/src/Main.java
new file mode 100644
index 0000000..46bd3c6
--- /dev/null
+++ b/test/428-optimizing-arith-rem/src/Main.java
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectDivisionByZero(int value) {
+    try {
+      $opt$Rem(value, 0);
+      throw new Error("Expected RuntimeException when modulo by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+    try {
+      $opt$RemZero(value);
+      throw new Error("Expected RuntimeException when modulo by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+  }
+
+  public static void expectDivisionByZero(long value) {
+    try {
+      $opt$Rem(value, 0L);
+      throw new Error("Expected RuntimeException when modulo by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+    try {
+      $opt$RemZero(value);
+      throw new Error("Expected RuntimeException when modulo by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+  }
+
+  public static void main(String[] args) {
+    rem();
+  }
+
+  public static void rem() {
+    remInt();
+    remLong();
+  }
+
+  private static void remInt() {
+    expectEquals(2, $opt$RemConst(6));
+    expectEquals(2, $opt$Rem(6, 4));
+    expectEquals(2, $opt$Rem(6, -4));
+    expectEquals(0, $opt$Rem(6, 3));
+    expectEquals(0, $opt$Rem(6, -3));
+    expectEquals(0, $opt$Rem(6, 1));
+    expectEquals(0, $opt$Rem(6, -1));
+    expectEquals(-1, $opt$Rem(-7, 3));
+    expectEquals(-1, $opt$Rem(-7, -3));
+    expectEquals(0, $opt$Rem(6, 6));
+    expectEquals(0, $opt$Rem(-6, -6));
+    expectEquals(7, $opt$Rem(7, 9));
+    expectEquals(7, $opt$Rem(7, -9));
+    expectEquals(-7, $opt$Rem(-7, 9));
+    expectEquals(-7, $opt$Rem(-7, -9));
+
+    expectEquals(0, $opt$Rem(Integer.MAX_VALUE, 1));
+    expectEquals(0, $opt$Rem(Integer.MAX_VALUE, -1));
+    expectEquals(0, $opt$Rem(Integer.MIN_VALUE, 1));
+    expectEquals(0, $opt$Rem(Integer.MIN_VALUE, -1)); // no overflow
+    expectEquals(-1, $opt$Rem(Integer.MIN_VALUE, Integer.MAX_VALUE));
+    expectEquals(Integer.MAX_VALUE, $opt$Rem(Integer.MAX_VALUE, Integer.MIN_VALUE));
+
+    expectEquals(0, $opt$Rem(0, 7));
+    expectEquals(0, $opt$Rem(0, Integer.MAX_VALUE));
+    expectEquals(0, $opt$Rem(0, Integer.MIN_VALUE));
+
+    expectDivisionByZero(0);
+    expectDivisionByZero(1);
+    expectDivisionByZero(5);
+    expectDivisionByZero(Integer.MAX_VALUE);
+    expectDivisionByZero(Integer.MIN_VALUE);
+  }
+
+  private static void remLong() {
+    expectEquals(2L, $opt$RemConst(6L));
+    expectEquals(2L, $opt$Rem(6L, 4L));
+    expectEquals(2L, $opt$Rem(6L, -4L));
+    expectEquals(0L, $opt$Rem(6L, 3L));
+    expectEquals(0L, $opt$Rem(6L, -3L));
+    expectEquals(0L, $opt$Rem(6L, 1L));
+    expectEquals(0L, $opt$Rem(6L, -1L));
+    expectEquals(-1L, $opt$Rem(-7L, 3L));
+    expectEquals(-1L, $opt$Rem(-7L, -3L));
+    expectEquals(0L, $opt$Rem(6L, 6L));
+    expectEquals(0L, $opt$Rem(-6L, -6L));
+    expectEquals(7L, $opt$Rem(7L, 9L));
+    expectEquals(7L, $opt$Rem(7L, -9L));
+    expectEquals(-7L, $opt$Rem(-7L, 9L));
+    expectEquals(-7L, $opt$Rem(-7L, -9L));
+
+    expectEquals(0L, $opt$Rem(Integer.MAX_VALUE, 1L));
+    expectEquals(0L, $opt$Rem(Integer.MAX_VALUE, -1L));
+    expectEquals(0L, $opt$Rem(Integer.MIN_VALUE, 1L));
+    expectEquals(0L, $opt$Rem(Integer.MIN_VALUE, -1L)); // no overflow
+    expectEquals(-1L, $opt$Rem(Integer.MIN_VALUE, Integer.MAX_VALUE));
+    expectEquals(Integer.MAX_VALUE, $opt$Rem(Integer.MAX_VALUE, Integer.MIN_VALUE));
+
+    expectEquals(0L, $opt$Rem(0L, 7L));
+    expectEquals(0L, $opt$Rem(0L, Integer.MAX_VALUE));
+    expectEquals(0L, $opt$Rem(0L, Integer.MIN_VALUE));
+
+    expectDivisionByZero(0L);
+    expectDivisionByZero(1L);
+    expectDivisionByZero(5L);
+    expectDivisionByZero(Integer.MAX_VALUE);
+    expectDivisionByZero(Integer.MIN_VALUE);
+  }
+
+  static int $opt$Rem(int a, int b) {
+    return a % b;
+  }
+
+  static int $opt$RemZero(int a) {
+    return a % 0;
+  }
+
+  // Modulo by literals != 0 should not generate checks.
+  static int $opt$RemConst(int a) {
+    return a % 4;
+  }
+
+  static long $opt$RemConst(long a) {
+    return a % 4L;
+  }
+
+  static long $opt$Rem(long a, long b) {
+    return a % b;
+  }
+
+  static long $opt$RemZero(long a) {
+    return a % 0L;
+  }
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 3b949d6..01c5ae2 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -450,6 +450,7 @@
   426-monitor \
   427-bitwise \
   427-bounds \
+  428-optimizing-arith-rem \
   700-LoadArgRegs \
   701-easy-div-rem \
   702-LargeBranchOffset \