ARM32 vector ops - scalarize icmp, fcmp and cast.

This is part of a sequence of patches to quickly fill out vector
support by scalarizing the remaining operations. Later we can work to
generate better code.

BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4076
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/1683153003 .
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp
index 9c5e4e5..4538aa9 100644
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -1755,6 +1755,9 @@
   case IceType_v8i16:
   case IceType_v4i32:
   case IceType_v4f32:
+  case IceType_v16i1:
+  case IceType_v8i1:
+  case IceType_v4i1:
     Asm->vld1qr(getVecElmtBitsize(DestTy), Dest, getSrc(0), Func->getTarget());
     break;
   }
@@ -2094,6 +2097,9 @@
   case IceType_v8i16:
   case IceType_v4i32:
   case IceType_v4f32:
+  case IceType_v16i1:
+  case IceType_v8i1:
+  case IceType_v4i1:
     Asm->vst1qr(getVecElmtBitsize(Ty), Src0, Src1, Func->getTarget());
     break;
   }
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index 882b4ea..2935df9 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -705,34 +705,11 @@
 void TargetLowering::scalarizeArithmetic(InstArithmetic::OpKind Kind,
                                          Variable *Dest, Operand *Src0,
                                          Operand *Src1) {
-  assert(isVectorType(Dest->getType()));
-  Type Ty = Dest->getType();
-  Type ElementTy = typeElementType(Ty);
-  SizeT NumElements = typeNumElements(Ty);
-
-  Operand *T = Ctx->getConstantUndef(Ty);
-  for (SizeT I = 0; I < NumElements; ++I) {
-    Constant *Index = Ctx->getConstantInt32(I);
-
-    // Extract the next two inputs.
-    Variable *Op0 = Func->makeVariable(ElementTy);
-    Context.insert<InstExtractElement>(Op0, Src0, Index);
-    Variable *Op1 = Func->makeVariable(ElementTy);
-    Context.insert<InstExtractElement>(Op1, Src1, Index);
-
-    // Perform the arithmetic as a scalar operation.
-    Variable *Res = Func->makeVariable(ElementTy);
-    auto *Arith = Context.insert<InstArithmetic>(Kind, Res, Op0, Op1);
-    // We might have created an operation that needed a helper call.
-    genTargetHelperCallFor(Arith);
-
-    // Insert the result into position.
-    Variable *DestT = Func->makeVariable(Ty);
-    Context.insert<InstInsertElement>(DestT, T, Res, Index);
-    T = DestT;
-  }
-
-  Context.insert<InstAssign>(Dest, T);
+  scalarizeInstruction(
+      Dest, Src0, Src1,
+      [this, Kind](Variable *Dest, Variable *Src0, Variable *Src1) {
+        return Context.insert<InstArithmetic>(Kind, Dest, Src0, Src1);
+      });
 }
 
 void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C,
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index a6b6c13..3477f70 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -467,6 +467,82 @@
   void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
                            Operand *Src0, Operand *Src1);
 
+  /// Generalizes scalarizeArithmetic to support other instruction types.
+  ///
+  /// MakeInstruction is a function-like object with signature
+  /// (Variable *Dest, Variable *Src0, Variable *Src1) -> Instr *.
+  template <typename F>
+  void scalarizeInstruction(Variable *Dest, Operand *Src0, Operand *Src1,
+                            F &&MakeInstruction) {
+    const Type DestTy = Dest->getType();
+    assert(isVectorType(DestTy));
+    const Type DestElementTy = typeElementType(DestTy);
+    const SizeT NumElements = typeNumElements(DestTy);
+    const Type Src0ElementTy = typeElementType(Src0->getType());
+    const Type Src1ElementTy = typeElementType(Src1->getType());
+
+    assert(NumElements == typeNumElements(Src0->getType()));
+    assert(NumElements == typeNumElements(Src1->getType()));
+
+    Variable *T = Func->makeVariable(DestTy);
+    Context.insert<InstFakeDef>(T);
+    for (SizeT I = 0; I < NumElements; ++I) {
+      Constant *Index = Ctx->getConstantInt32(I);
+
+      // Extract the next two inputs.
+      Variable *Op0 = Func->makeVariable(Src0ElementTy);
+      Context.insert<InstExtractElement>(Op0, Src0, Index);
+      Variable *Op1 = Func->makeVariable(Src1ElementTy);
+      Context.insert<InstExtractElement>(Op1, Src1, Index);
+
+      // Perform the operation as a scalar operation.
+      Variable *Res = Func->makeVariable(DestElementTy);
+      auto Arith = MakeInstruction(Res, Op0, Op1);
+      // We might have created an operation that needed a helper call.
+      genTargetHelperCallFor(Arith);
+
+      // Insert the result into position.
+      Variable *DestT = Func->makeVariable(DestTy);
+      Context.insert<InstInsertElement>(DestT, T, Res, Index);
+      T = DestT;
+    }
+    Context.insert<InstAssign>(Dest, T);
+  }
+
+  template <typename F>
+  void scalarizeUnaryInstruction(Variable *Dest, Operand *Src0,
+                                 F &&MakeInstruction) {
+    const Type DestTy = Dest->getType();
+    assert(isVectorType(DestTy));
+    const Type DestElementTy = typeElementType(DestTy);
+    const SizeT NumElements = typeNumElements(DestTy);
+    const Type Src0ElementTy = typeElementType(Src0->getType());
+
+    assert(NumElements == typeNumElements(Src0->getType()));
+
+    Variable *T = Func->makeVariable(DestTy);
+    Context.insert<InstFakeDef>(T);
+    for (SizeT I = 0; I < NumElements; ++I) {
+      Constant *Index = Ctx->getConstantInt32(I);
+
+      // Extract the next two inputs.
+      Variable *Op0 = Func->makeVariable(Src0ElementTy);
+      Context.insert<InstExtractElement>(Op0, Src0, Index);
+
+      // Perform the operation as a scalar operation.
+      Variable *Res = Func->makeVariable(DestElementTy);
+      auto Arith = MakeInstruction(Res, Op0);
+      // We might have created an operation that needed a helper call.
+      genTargetHelperCallFor(Arith);
+
+      // Insert the result into position.
+      Variable *DestT = Func->makeVariable(DestTy);
+      Context.insert<InstInsertElement>(DestT, T, Res, Index);
+      T = DestT;
+    }
+    Context.insert<InstAssign>(Dest, T);
+  }
+
   /// SandboxType enumerates all possible sandboxing strategies that
   enum SandboxType {
     ST_None,
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 2da5542..2572f7c 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -577,8 +577,18 @@
     Variable *Dest = Instr->getDest();
     Operand *Src0 = Instr->getSrc(0);
     const Type DestTy = Dest->getType();
-    const InstCast::OpKind CastKind =
-        llvm::cast<InstCast>(Instr)->getCastKind();
+    auto *CastInstr = llvm::cast<InstCast>(Instr);
+    const InstCast::OpKind CastKind = CastInstr->getCastKind();
+
+    if (isVectorType(DestTy)) {
+      scalarizeUnaryInstruction(
+          Dest, Src0, [this, CastKind](Variable *Dest, Variable *Src) {
+            return Context.insert<InstCast>(CastKind, Dest, Src);
+          });
+      CastInstr->setDeleted();
+      return;
+    }
+
     switch (CastKind) {
     default:
       return;
@@ -723,6 +733,36 @@
     }
     llvm::report_fatal_error("Control flow should never have reached here.");
   }
+  case Inst::Icmp: {
+    Variable *Dest = Instr->getDest();
+    const Type DestTy = Dest->getType();
+    if (isVectorType(DestTy)) {
+      auto *CmpInstr = llvm::cast<InstIcmp>(Instr);
+      const auto Condition = CmpInstr->getCondition();
+      scalarizeInstruction(
+          Dest, CmpInstr->getSrc(0), CmpInstr->getSrc(1),
+          [this, Condition](Variable *Dest, Variable *Src0, Variable *Src1) {
+            return Context.insert<InstIcmp>(Condition, Dest, Src0, Src1);
+          });
+      CmpInstr->setDeleted();
+    }
+    return;
+  }
+  case Inst::Fcmp: {
+    Variable *Dest = Instr->getDest();
+    const Type DestTy = Dest->getType();
+    if (isVectorType(DestTy)) {
+      auto *CmpInstr = llvm::cast<InstFcmp>(Instr);
+      const auto Condition = CmpInstr->getCondition();
+      scalarizeInstruction(
+          Dest, CmpInstr->getSrc(0), CmpInstr->getSrc(1),
+          [this, Condition](Variable *Dest, Variable *Src0, Variable *Src1) {
+            return Context.insert<InstFcmp>(Condition, Dest, Src0, Src1);
+          });
+      CmpInstr->setDeleted();
+    }
+    return;
+  }
   }
 }
 
@@ -4194,9 +4234,6 @@
 }
 
 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) {
-  assert(Instr->getSrc(0)->getType() != IceType_i1);
-  assert(Instr->getSrc(1)->getType() != IceType_i1);
-
   Operand *Src0 = legalizeUndef(Instr->getSrc(0));
   Operand *Src1 = legalizeUndef(Instr->getSrc(1));
 
@@ -4233,6 +4270,7 @@
   switch (Src0->getType()) {
   default:
     llvm::report_fatal_error("Unhandled type in lowerIcmpCond");
+  case IceType_i1:
   case IceType_i8:
   case IceType_i16:
     return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1);