ARM lowering integer divide and remainder, with div by 0 checks.

ARM normally just returns 0 when dividing by 0 with the
software and hw implementations, which is different from
what X86 does. So, for NaCl, we've modified LLVM to trap
by inserting explicit 0 checks.

Uses -mattr=hwdiv-arm attribute to decide if 32-bit
sdiv/udiv are supported.

Also lower the unreachable-inst to a trap-inst, since we
need a trap instruction for divide by 0 anyway.

Misc: fix switch test under MINIMAL=1, since ARM requires
allow_dump for filetype=asm.
Random clang-format changes...

TODO: check via cross tests

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/1214693004.
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index fad9bcf..c090075 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -141,21 +141,34 @@
   return Utils::applyAlignment(Value, typeAlignInBytes);
 }
 
+// Conservatively check if at compile time we know that the operand is
+// definitely a non-zero integer.
+bool isGuaranteedNonzeroInt(const Operand *Op) {
+  if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
+    return Const->getValue() != 0;
+  }
+  return false;
+}
+
 } // end of anonymous namespace
 
-TargetARM32::TargetARM32(Cfg *Func) : TargetLowering(Func) {
+TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
   static_assert(
       (ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
           (TargetInstructionSet::ARM32InstructionSet_End -
            TargetInstructionSet::ARM32InstructionSet_Begin),
       "ARM32InstructionSet range different from TargetInstructionSet");
-  if (Func->getContext()->getFlags().getTargetInstructionSet() !=
+  if (Flags.getTargetInstructionSet() !=
       TargetInstructionSet::BaseInstructionSet) {
     InstructionSet = static_cast<ARM32InstructionSet>(
-        (Func->getContext()->getFlags().getTargetInstructionSet() -
+        (Flags.getTargetInstructionSet() -
          TargetInstructionSet::ARM32InstructionSet_Begin) +
         ARM32InstructionSet::Begin);
   }
+}
+
+TargetARM32::TargetARM32(Cfg *Func)
+    : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {
   // TODO: Don't initialize IntegerRegisters and friends every time.
   // Instead, initialize in some sort of static initializer for the
   // class.
@@ -1009,6 +1022,75 @@
   _mov(Dest, SP);
 }
 
+void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
+  if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
+    return;
+  Variable *SrcLoReg = legalizeToVar(SrcLo);
+  switch (Ty) {
+  default:
+    llvm_unreachable("Unexpected type");
+  case IceType_i8: {
+    Operand *Mask =
+        legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex);
+    _tst(SrcLoReg, Mask);
+    break;
+  }
+  case IceType_i16: {
+    Operand *Mask =
+        legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex);
+    _tst(SrcLoReg, Mask);
+    break;
+  }
+  case IceType_i32: {
+    _tst(SrcLoReg, SrcLoReg);
+    break;
+  }
+  case IceType_i64: {
+    Variable *ScratchReg = makeReg(IceType_i32);
+    _orrs(ScratchReg, SrcLoReg, SrcHi);
+    // ScratchReg isn't going to be used, but we need the
+    // side-effect of setting flags from this operation.
+    Context.insert(InstFakeUse::create(Func, ScratchReg));
+  }
+  }
+  InstARM32Label *Label = InstARM32Label::create(Func, this);
+  _br(Label, CondARM32::NE);
+  _trap();
+  Context.insert(Label);
+}
+
+void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
+                               Operand *Src1, ExtInstr ExtFunc,
+                               DivInstr DivFunc, const char *DivHelperName,
+                               bool IsRemainder) {
+  div0Check(Dest->getType(), Src1, nullptr);
+  Variable *Src1R = legalizeToVar(Src1);
+  Variable *T0R = Src0R;
+  Variable *T1R = Src1R;
+  if (Dest->getType() != IceType_i32) {
+    T0R = makeReg(IceType_i32);
+    (this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
+    T1R = makeReg(IceType_i32);
+    (this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
+  }
+  if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
+    (this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
+    if (IsRemainder) {
+      Variable *T2 = makeReg(IceType_i32);
+      _mls(T2, T, T1R, T0R);
+      T = T2;
+    }
+    _mov(Dest, T);
+  } else {
+    constexpr SizeT MaxSrcs = 2;
+    InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs);
+    Call->addArg(T0R);
+    Call->addArg(T1R);
+    lowerCall(Call);
+  }
+  return;
+}
+
 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
   Variable *Dest = Inst->getDest();
   // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier
@@ -1182,9 +1264,47 @@
     case InstArithmetic::Udiv:
     case InstArithmetic::Sdiv:
     case InstArithmetic::Urem:
-    case InstArithmetic::Srem:
-      UnimplementedError(Func->getContext()->getFlags());
-      break;
+    case InstArithmetic::Srem: {
+      // Check for divide by 0 (ARM normally doesn't trap, but we want it
+      // to trap for NaCl). Src1Lo and Src1Hi may have already been legalized
+      // to a register, which will hide a constant source operand.
+      // Instead, check the not-yet-legalized Src1 to optimize-out a divide
+      // by 0 check.
+      if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
+        if (C64->getValue() == 0) {
+          div0Check(IceType_i64, Src1Lo, Src1Hi);
+        }
+      } else {
+        div0Check(IceType_i64, Src1Lo, Src1Hi);
+      }
+      // Technically, ARM has their own aeabi routines, but we can use the
+      // non-aeabi routine as well.  LLVM uses __aeabi_ldivmod for div,
+      // but uses the more standard __moddi3 for rem.
+      const char *HelperName = "";
+      switch (Inst->getOp()) {
+      case InstArithmetic::Udiv:
+        HelperName = H_udiv_i64;
+        break;
+      case InstArithmetic::Sdiv:
+        HelperName = H_sdiv_i64;
+        break;
+      case InstArithmetic::Urem:
+        HelperName = H_urem_i64;
+        break;
+      case InstArithmetic::Srem:
+        HelperName = H_srem_i64;
+        break;
+      default:
+        llvm_unreachable("Should have only matched div ops.");
+        break;
+      }
+      constexpr SizeT MaxSrcs = 2;
+      InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
+      Call->addArg(Inst->getSrc(0));
+      Call->addArg(Inst->getSrc(1));
+      lowerCall(Call);
+      return;
+    }
     case InstArithmetic::Fadd:
     case InstArithmetic::Fsub:
     case InstArithmetic::Fmul:
@@ -1197,61 +1317,73 @@
     UnimplementedError(Func->getContext()->getFlags());
   } else { // Dest->getType() is non-i64 scalar
     Variable *Src0R = legalizeToVar(Inst->getSrc(0));
-    Src1 = legalize(Inst->getSrc(1), Legal_Reg | Legal_Flex);
+    Operand *Src1RF = legalize(Inst->getSrc(1), Legal_Reg | Legal_Flex);
     Variable *T = makeReg(Dest->getType());
     switch (Inst->getOp()) {
     case InstArithmetic::_num:
       llvm_unreachable("Unknown arithmetic operator");
       break;
     case InstArithmetic::Add: {
-      _add(T, Src0R, Src1);
+      _add(T, Src0R, Src1RF);
       _mov(Dest, T);
     } break;
     case InstArithmetic::And: {
-      _and(T, Src0R, Src1);
+      _and(T, Src0R, Src1RF);
       _mov(Dest, T);
     } break;
     case InstArithmetic::Or: {
-      _orr(T, Src0R, Src1);
+      _orr(T, Src0R, Src1RF);
       _mov(Dest, T);
     } break;
     case InstArithmetic::Xor: {
-      _eor(T, Src0R, Src1);
+      _eor(T, Src0R, Src1RF);
       _mov(Dest, T);
     } break;
     case InstArithmetic::Sub: {
-      _sub(T, Src0R, Src1);
+      _sub(T, Src0R, Src1RF);
       _mov(Dest, T);
     } break;
     case InstArithmetic::Mul: {
-      Variable *Src1R = legalizeToVar(Src1);
+      Variable *Src1R = legalizeToVar(Src1RF);
       _mul(T, Src0R, Src1R);
       _mov(Dest, T);
     } break;
     case InstArithmetic::Shl:
-      _lsl(T, Src0R, Src1);
+      _lsl(T, Src0R, Src1RF);
       _mov(Dest, T);
       break;
     case InstArithmetic::Lshr:
-      _lsr(T, Src0R, Src1);
+      _lsr(T, Src0R, Src1RF);
       _mov(Dest, T);
       break;
     case InstArithmetic::Ashr:
-      _asr(T, Src0R, Src1);
+      _asr(T, Src0R, Src1RF);
       _mov(Dest, T);
       break;
-    case InstArithmetic::Udiv:
-      UnimplementedError(Func->getContext()->getFlags());
-      break;
-    case InstArithmetic::Sdiv:
-      UnimplementedError(Func->getContext()->getFlags());
-      break;
-    case InstArithmetic::Urem:
-      UnimplementedError(Func->getContext()->getFlags());
-      break;
-    case InstArithmetic::Srem:
-      UnimplementedError(Func->getContext()->getFlags());
-      break;
+    case InstArithmetic::Udiv: {
+      constexpr bool IsRemainder = false;
+      lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt,
+                   &TargetARM32::_udiv, H_udiv_i32, IsRemainder);
+      return;
+    }
+    case InstArithmetic::Sdiv: {
+      constexpr bool IsRemainder = false;
+      lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt,
+                   &TargetARM32::_sdiv, H_sdiv_i32, IsRemainder);
+      return;
+    }
+    case InstArithmetic::Urem: {
+      constexpr bool IsRemainder = true;
+      lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt,
+                   &TargetARM32::_udiv, H_urem_i32, IsRemainder);
+      return;
+    }
+    case InstArithmetic::Srem: {
+      constexpr bool IsRemainder = true;
+      lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt,
+                   &TargetARM32::_sdiv, H_srem_i32, IsRemainder);
+      return;
+    }
     case InstArithmetic::Fadd:
       UnimplementedError(Func->getContext()->getFlags());
       break;
@@ -1322,7 +1454,7 @@
   Variable *Src0R = legalizeToVar(Cond);
   Constant *Zero = Ctx->getConstantZero(IceType_i32);
   _cmp(Src0R, Zero);
-  _br(CondARM32::NE, Inst->getTargetTrue(), Inst->getTargetFalse());
+  _br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE);
 }
 
 void TargetARM32::lowerCall(const InstCall *Instr) {
@@ -2113,7 +2245,7 @@
 }
 
 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Inst*/) {
-  UnimplementedError(Func->getContext()->getFlags());
+  _trap();
 }
 
 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
@@ -2417,7 +2549,7 @@
 }
 
 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
-    : TargetHeaderLowering(Ctx) {}
+    : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {}
 
 void TargetHeaderARM32::lower() {
   OstreamLocker L(Ctx);
@@ -2431,12 +2563,18 @@
   // sub-subsection of the first public subsection of the attributes.
   Str << ".eabi_attribute 67, \"2.09\"      @ Tag_conformance\n";
   // Chromebooks are at least A15, but do A9 for higher compat.
-  Str << ".cpu    cortex-a9\n"
-      << ".eabi_attribute 6, 10   @ Tag_CPU_arch: ARMv7\n"
+  // For some reason, the LLVM ARM asm parser has the .cpu directive override
+  // the mattr specified on the commandline. So to test hwdiv, we need to set
+  // the .cpu directive higher (can't just rely on --mattr=...).
+  if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
+    Str << ".cpu    cortex-a15\n";
+  } else {
+    Str << ".cpu    cortex-a9\n";
+  }
+  Str << ".eabi_attribute 6, 10   @ Tag_CPU_arch: ARMv7\n"
       << ".eabi_attribute 7, 65   @ Tag_CPU_arch_profile: App profile\n";
   Str << ".eabi_attribute 8, 1    @ Tag_ARM_ISA_use: Yes\n"
       << ".eabi_attribute 9, 2    @ Tag_THUMB_ISA_use: Thumb-2\n";
-  // TODO(jvoung): check other CPU features like HW div.
   Str << ".fpu    neon\n"
       << ".eabi_attribute 17, 1   @ Tag_ABI_PCS_GOT_use: permit directly\n"
       << ".eabi_attribute 20, 1   @ Tag_ABI_FP_denormal\n"
@@ -2450,6 +2588,9 @@
       << ".eabi_attribute 38, 1   @ Tag_ABI_FP_16bit_format\n"
       << ".eabi_attribute 42, 1   @ Tag_MPextension_use\n"
       << ".eabi_attribute 68, 1   @ Tag_Virtualization_use\n";
+  if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
+    Str << ".eabi_attribute 44, 2   @ Tag_DIV_use\n";
+  }
   // Technically R9 is used for TLS with Sandboxing, and we reserve it.
   // However, for compatibility with current NaCl LLVM, don't claim that.
   Str << ".eabi_attribute 14, 3   @ Tag_ABI_PCS_R9_use: Not used\n";