[X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero) f32/f64 to i32 with generic IR (llvm)
This patch removes the llvm intrinsics (V)CVTTPS2DQ and VCVTTPD2DQ truncation (round to zero) conversions and auto-upgrades to FP_TO_SINT calls instead.
Note: I looked at updating CVTTPD2DQ as well but this still requires a lot more work to correctly lower.
Differential Revision: http://reviews.llvm.org/D20860
llvm-svn: 271510
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index acc6e6a..689859d 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -185,6 +185,8 @@
Name == "x86.sse2.cvtps2pd" ||
Name == "x86.avx.cvtdq2.pd.256" ||
Name == "x86.avx.cvt.ps2.pd.256" ||
+ Name == "x86.sse2.cvttps2dq" ||
+ Name.startswith("x86.avx.cvtt.") ||
Name.startswith("x86.avx.vinsertf128.") ||
Name == "x86.avx2.vinserti128" ||
Name.startswith("x86.avx.vextractf128.") ||
@@ -498,6 +500,12 @@
Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
else
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
+ } else if (Name == "llvm.x86.sse2.cvttps2dq" ||
+ Name.startswith("llvm.x86.avx.cvtt.")) {
+ // Truncation (round to zero) float/double to i32 vector conversion.
+ Value *Src = CI->getArgOperand(0);
+ VectorType *DstTy = cast<VectorType>(CI->getType());
+ Rep = Builder.CreateFPToSI(Src, DstTy, "cvtt");
} else if (Name.startswith("llvm.x86.avx.movnt.")) {
Module *M = F->getParent();
SmallVector<Metadata *, 1> Elts;