[X86][FastISel] Teach how to select SSE4A nontemporal stores.
Add FastISel support for SSE4A scalar float / double non-temporal stores
Follow up to D13698
Differential Revision: http://reviews.llvm.org/D13773
llvm-svn: 250610
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 0771cbeb4..263c133 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -434,6 +434,7 @@
X86AddressMode &AM,
MachineMemOperand *MMO, bool Aligned) {
bool HasSSE2 = Subtarget->hasSSE2();
+ bool HasSSE4A = Subtarget->hasSSE4A();
bool HasAVX = Subtarget->hasAVX();
bool IsNonTemporal = MMO && MMO->isNonTemporal();
@@ -461,12 +462,22 @@
Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
break;
case MVT::f32:
- Opc = X86ScalarSSEf32 ?
- (HasAVX ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
+ if (X86ScalarSSEf32) {
+ if (IsNonTemporal && HasSSE4A)
+ Opc = X86::MOVNTSS;
+ else
+ Opc = HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
+ } else
+ Opc = X86::ST_Fp32m;
break;
case MVT::f64:
- Opc = X86ScalarSSEf64 ?
- (HasAVX ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
+ if (X86ScalarSSEf32) {
+ if (IsNonTemporal && HasSSE4A)
+ Opc = X86::MOVNTSD;
+ else
+ Opc = HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
+ } else
+ Opc = X86::ST_Fp64m;
break;
case MVT::v4f32:
if (Aligned) {