[SveEmitter] Implement zeroing of false lanes
This implements zeroing of false lanes for binary operations,
where instead of merging into the first operand vector (_m)
a `select` is placed on the first input vector. This approach
easily translates to the use of the `zeroing movprfx` instruction.
This patch also adds builtins for svabd, svadd, svdiv, svdivr,
svmax, svmin, svmul, svmulh, svub and svsubr.
Reviewers: SjoerdMeijer, efriedma, rovka
Reviewed By: efriedma
Tags: #clang
Differential Revision: https://reviews.llvm.org/D77593
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index df45fef..b2fb263 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7657,6 +7657,14 @@
}
}
+ // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
+ if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
+ llvm::Type *OpndTy = Ops[1]->getType();
+ auto *SplatZero = Constant::getNullValue(OpndTy);
+ Function *Sel = CGM.getIntrinsic(Intrinsic::aarch64_sve_sel, OpndTy);
+ Ops[1] = Builder.CreateCall(Sel, {Ops[0], Ops[1], SplatZero});
+ }
+
Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, OverloadedTy);
Value *Call = Builder.CreateCall(F, Ops);
return Call;