ARM: use vqdmull and vqadds/vqsubs to implement vqdmlal/vqdmlsl
The NEON intrinsics vqdmlal and vqdmlsl are really just combinations of a
saturating-doubling-multiply (vqdmull) and a saturating add/sub, so now that
LLVM can spot those patterns Clang should emit them instead of specialised
intrinsics.
Feature already tested by existing ARM NEON intrinsics tests.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@189462 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index fc8d284..c9a2fcb 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -2561,12 +2561,28 @@
case ARM::BI__builtin_neon_vqaddq_v:
Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd");
- case ARM::BI__builtin_neon_vqdmlal_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlal, Ty),
- Ops, "vqdmlal");
- case ARM::BI__builtin_neon_vqdmlsl_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlsl, Ty),
- Ops, "vqdmlsl");
+ case ARM::BI__builtin_neon_vqdmlal_v: {
+ SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
+ Value *Mul = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
+ MulOps, "vqdmlal");
+
+ SmallVector<Value *, 2> AddOps;
+ AddOps.push_back(Ops[0]);
+ AddOps.push_back(Mul);
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqadds, Ty),
+ AddOps, "vqdmlal");
+ }
+ case ARM::BI__builtin_neon_vqdmlsl_v: {
+ SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
+ Value *Mul = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
+ MulOps, "vqdmlsl");
+
+ SmallVector<Value *, 2> SubOps;
+ SubOps.push_back(Ops[0]);
+ SubOps.push_back(Mul);
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqsubs, Ty),
+ SubOps, "vqdmlsl");
+ }
case ARM::BI__builtin_neon_vqdmulh_v:
case ARM::BI__builtin_neon_vqdmulhq_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, Ty),