ARM32 vector division lowering.
Enables vector division by scalarization.
Also, removed an assert as suggested by Karl in a previous CL:
https://codereview.chromium.org/1646033002/diff/1/src/IceInstARM32.cpp#newcode717
BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4076
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/1681003002 .
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp
index b1d052c..600658c 100644
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -804,7 +804,6 @@
Asm->vmuld(Dest, getSrc(0), getSrc(1), CondARM32::AL);
break;
}
- assert(!Asm->needsTextFixup());
}
InstARM32Call::InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget)
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index d53fbab..75886d6 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -700,6 +700,39 @@
Ctx->getFlags().getForceMemIntrinOpt();
}
+void TargetLowering::scalarizeArithmetic(InstArithmetic::OpKind Kind,
+ Variable *Dest, Operand *Src0,
+ Operand *Src1) {
+ assert(isVectorType(Dest->getType()));
+ Type Ty = Dest->getType();
+ Type ElementTy = typeElementType(Ty);
+ SizeT NumElements = typeNumElements(Ty);
+
+ Operand *T = Ctx->getConstantUndef(Ty);
+ for (SizeT I = 0; I < NumElements; ++I) {
+ Constant *Index = Ctx->getConstantInt32(I);
+
+ // Extract the next two inputs.
+ Variable *Op0 = Func->makeVariable(ElementTy);
+ Context.insert<InstExtractElement>(Op0, Src0, Index);
+ Variable *Op1 = Func->makeVariable(ElementTy);
+ Context.insert<InstExtractElement>(Op1, Src1, Index);
+
+ // Perform the arithmetic as a scalar operation.
+ Variable *Res = Func->makeVariable(ElementTy);
+ auto *Arith = Context.insert<InstArithmetic>(Kind, Res, Op0, Op1);
+ // We might have created an operation that needed a helper call.
+ genTargetHelperCallFor(Arith);
+
+ // Insert the result into position.
+ Variable *DestT = Func->makeVariable(Ty);
+ Context.insert<InstInsertElement>(DestT, T, Res, Index);
+ T = DestT;
+ }
+
+ Context.insert<InstAssign>(Dest, T);
+}
+
void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C,
const char *Suffix) const {
if (!BuildDefs::dump())
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 1a0f0b7..55597ca 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -464,6 +464,9 @@
bool shouldOptimizeMemIntrins();
+ void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
+ Operand *Src0, Operand *Src1);
+
/// SandboxType enumerates all possible sandboxing strategies that
enum SandboxType {
ST_None,
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 9fa218b..1dde797 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -430,6 +430,18 @@
const Type DestTy = Dest->getType();
const InstArithmetic::OpKind Op =
llvm::cast<InstArithmetic>(Instr)->getOp();
+ if (isVectorType(DestTy)) {
+ switch (Op) {
+ default:
+ break;
+ case InstArithmetic::Fdiv:
+ case InstArithmetic::Udiv:
+ case InstArithmetic::Sdiv:
+ scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
+ Instr->setDeleted();
+ return;
+ }
+ }
switch (DestTy) {
default:
return;
@@ -2015,7 +2027,8 @@
Variable *SrcLoReg = legalizeToReg(SrcLo);
switch (Ty) {
default:
- llvm::report_fatal_error("Unexpected type");
+ llvm_unreachable(
+ ("Unexpected type in div0Check: " + typeIceString(Ty)).c_str());
case IceType_i8:
case IceType_i16: {
Operand *ShAmtImm = shAmtImm(32 - getScalarIntBitWidth(Ty));
@@ -5508,7 +5521,8 @@
Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
Variable *Reg = makeReg(Ty, RegNum);
Context.insert<InstFakeDef>(Reg);
- UnimplementedError(Func->getContext()->getFlags());
+ assert(isVectorType(Ty));
+ _veor(Reg, Reg, Reg);
return Reg;
}
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index bda5077..ee2e5b0 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -343,9 +343,6 @@
void eliminateNextVectorSextInstruction(Variable *SignExtendedResult);
- void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
- Operand *Src0, Operand *Src1);
-
void emitGetIP(CfgNode *Node) {
dispatchToConcrete(&Traits::ConcreteTarget::emitGetIP, std::move(Node));
}
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 8d838d4..6e338e6 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -6061,41 +6061,6 @@
_br(DefaultTarget);
}
-template <typename TraitsType>
-void TargetX86Base<TraitsType>::scalarizeArithmetic(InstArithmetic::OpKind Kind,
- Variable *Dest,
- Operand *Src0,
- Operand *Src1) {
- assert(isVectorType(Dest->getType()));
- Type Ty = Dest->getType();
- Type ElementTy = typeElementType(Ty);
- SizeT NumElements = typeNumElements(Ty);
-
- Operand *T = Ctx->getConstantUndef(Ty);
- for (SizeT I = 0; I < NumElements; ++I) {
- Constant *Index = Ctx->getConstantInt32(I);
-
- // Extract the next two inputs.
- Variable *Op0 = Func->makeVariable(ElementTy);
- Context.insert<InstExtractElement>(Op0, Src0, Index);
- Variable *Op1 = Func->makeVariable(ElementTy);
- Context.insert<InstExtractElement>(Op1, Src1, Index);
-
- // Perform the arithmetic as a scalar operation.
- Variable *Res = Func->makeVariable(ElementTy);
- auto *Arith = Context.insert<InstArithmetic>(Kind, Res, Op0, Op1);
- // We might have created an operation that needed a helper call.
- genTargetHelperCallFor(Arith);
-
- // Insert the result into position.
- Variable *DestT = Func->makeVariable(Ty);
- Context.insert<InstInsertElement>(DestT, T, Res, Index);
- T = DestT;
- }
-
- Context.insert<InstAssign>(Dest, T);
-}
-
/// The following pattern occurs often in lowered C and C++ code:
///
/// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
diff --git a/src/IceTypes.def b/src/IceTypes.def
index a1a2552..c5f3386 100644
--- a/src/IceTypes.def
+++ b/src/IceTypes.def
@@ -45,7 +45,7 @@
X(f32, 2, 4, 1, f32, "float", "f32") \
X(f64, 3, 8, 1, f64, "double", "f64") \
X(v4i1, 4, 1, 4, i1, "<4 x i1>", "v4i1") \
- X(v8i1, 4, 1, 8, i1, "<8 x i1>", "v8ii") \
+ X(v8i1, 4, 1, 8, i1, "<8 x i1>", "v8i1") \
X(v16i1, 4, 1, 16, i1, "<16 x i1>", "v16i1") \
X(v16i8, 4, 1, 16, i8, "<16 x i8>", "v16i8") \
X(v8i16, 4, 2, 8, i16, "<8 x i16>", "v8i16") \