[X86] Remove SSE/AVX unaligned store intrinsics as clang no longer uses them. Auto upgrade to native unaligned store instructions.
llvm-svn: 271236
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index b679bd6..ce0b10d 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -191,6 +191,9 @@
Name == "x86.avx2.vextracti128" ||
Name.startswith("x86.avx.movnt.") ||
Name == "x86.sse2.storel.dq" ||
+ Name.startswith("x86.sse.storeu.") ||
+ Name.startswith("x86.sse2.storeu.") ||
+ Name.startswith("x86.avx.storeu.") ||
Name == "x86.sse42.crc32.64.8" ||
Name.startswith("x86.avx.vbroadcast.s") ||
Name.startswith("x86.sse2.psll.dq") ||
@@ -442,6 +445,20 @@
// Remove intrinsic.
CI->eraseFromParent();
return;
+ } else if (Name.startswith("llvm.x86.sse.storeu.") ||
+ Name.startswith("llvm.x86.sse2.storeu.") ||
+ Name.startswith("llvm.x86.avx.storeu.")) {
+ Value *Arg0 = CI->getArgOperand(0);
+ Value *Arg1 = CI->getArgOperand(1);
+
+ Arg0 = Builder.CreateBitCast(Arg0,
+ PointerType::getUnqual(Arg1->getType()),
+ "cast");
+ Builder.CreateAlignedStore(Arg1, Arg0, 1);
+
+ // Remove intrinsic.
+ CI->eraseFromParent();
+ return;
} else if (Name.startswith("llvm.x86.xop.vpcom")) {
Intrinsic::ID intID;
if (Name.endswith("ub"))
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index f5a845b..3c52d1d 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -905,11 +905,6 @@
IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
}
-def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
- (VMOVUPSYmr addr:$dst, VR256:$src)>;
-def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
- (VMOVUPDYmr addr:$dst, VR256:$src)>;
-
// Aliases to help the assembler pick two byte VEX encodings by swapping the
// operands relative to the normal instructions to use VEX.R instead of VEX.B.
def : InstAlias<"vmovaps\t{$src, $dst|$dst, $src}",
@@ -965,20 +960,6 @@
IIC_SSE_MOVU_P_RR>;
}
-let Predicates = [HasAVX] in {
- def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
- (VMOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
- (VMOVUPDmr addr:$dst, VR128:$src)>;
-}
-
-let Predicates = [UseSSE1] in
- def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
- (MOVUPSmr addr:$dst, VR128:$src)>;
-let Predicates = [UseSSE2] in
- def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
- (MOVUPDmr addr:$dst, VR128:$src)>;
-
// Use vmovaps/vmovups for AVX integer load/store.
let Predicates = [HasAVX, NoVLX] in {
// 128-bit load/store
@@ -3887,16 +3868,6 @@
} // ExeDomain = SSEPackedInt
-let Predicates = [HasAVX] in {
- def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
- (VMOVDQUmr addr:$dst, VR128:$src)>;
- def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
- (VMOVDQUYmr addr:$dst, VR256:$src)>;
-}
-let Predicates = [UseSSE2] in
-def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
- (MOVDQUmr addr:$dst, VR128:$src)>;
-
// Aliases to help the assembler pick two byte VEX encodings by swapping the
// operands relative to the normal instructions to use VEX.R instead of VEX.B.
def : InstAlias<"vmovdqa\t{$src, $dst|$dst, $src}",
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c87d046..e0ec74f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1397,32 +1397,6 @@
}
break;
- case Intrinsic::x86_sse_storeu_ps:
- case Intrinsic::x86_sse2_storeu_pd:
- case Intrinsic::x86_sse2_storeu_dq:
- // Turn X86 storeu -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
- 16) {
- Type *OpPtrTy =
- PointerType::getUnqual(II->getArgOperand(1)->getType());
- Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
- return new StoreInst(II->getArgOperand(1), Ptr);
- }
- break;
-
- case Intrinsic::x86_avx_storeu_ps_256:
- case Intrinsic::x86_avx_storeu_pd_256:
- case Intrinsic::x86_avx_storeu_dq_256:
- // Turn X86 storeu -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, AC, DT) >=
- 32) {
- Type *OpPtrTy =
- PointerType::getUnqual(II->getArgOperand(1)->getType());
- Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
- return new StoreInst(II->getArgOperand(1), Ptr);
- }
- break;
-
case Intrinsic::x86_vcvtph2ps_128:
case Intrinsic::x86_vcvtph2ps_256: {
auto Arg = II->getArgOperand(0);
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index ad70a70..125f2cb 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -684,12 +684,6 @@
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::prefetch:
- case Intrinsic::x86_sse_storeu_ps:
- case Intrinsic::x86_sse2_storeu_pd:
- case Intrinsic::x86_sse2_storeu_dq:
- case Intrinsic::x86_avx_storeu_ps_256:
- case Intrinsic::x86_avx_storeu_pd_256:
- case Intrinsic::x86_avx_storeu_dq_256:
if (II->getArgOperand(0) == OperandVal)
isAddress = true;
break;
@@ -706,20 +700,6 @@
AccessTy.AddrSpace = SI->getPointerAddressSpace();
} else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
AccessTy.AddrSpace = LI->getPointerAddressSpace();
- } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- // Addressing modes can also be folded into prefetches and a variety
- // of intrinsics.
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::x86_sse_storeu_ps:
- case Intrinsic::x86_sse2_storeu_pd:
- case Intrinsic::x86_sse2_storeu_dq:
- case Intrinsic::x86_avx_storeu_ps_256:
- case Intrinsic::x86_avx_storeu_pd_256:
- case Intrinsic::x86_avx_storeu_dq_256:
- AccessTy.MemTy = II->getArgOperand(0)->getType();
- break;
- }
}
// All pointers have the same requirements, so canonicalize them to an