Replace the "movnt" intrinsics with a native store + nontemporal metadata bit.
<rdar://problem/8460511>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130791 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll
index e4e2d3a..417493f 100644
--- a/test/Assembler/AutoUpgradeIntrinsics.ll
+++ b/test/Assembler/AutoUpgradeIntrinsics.ll
@@ -10,6 +10,7 @@
; RUN: not grep {llvm\\.x86\\.sse2\\.loadu}
; RUN: llvm-as < %s | llvm-dis | \
; RUN: grep {llvm\\.x86\\.mmx\\.ps} | grep {x86_mmx} | count 16
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
declare i32 @llvm.ctpop.i28(i28 %val)
declare i32 @llvm.cttz.i29(i29 %val)
@@ -91,3 +92,20 @@
%v2 = call <2 x double> @llvm.x86.sse2.loadu.pd(double* %b)
ret void
}
+
+declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind readnone
+declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x double>) nounwind readnone
+declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind readnone
+declare void @llvm.x86.sse2.movnt.i(i8*, i32) nounwind readnone
+
+define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D) {
+; CHECK: store{{.*}}nontemporal
+ call void @llvm.x86.sse.movnt.ps(i8* %B, <4 x float> %A)
+; CHECK: store{{.*}}nontemporal
+ call void @llvm.x86.sse2.movnt.dq(i8* %B, <2 x double> %C)
+; CHECK: store{{.*}}nontemporal
+ call void @llvm.x86.sse2.movnt.pd(i8* %B, <2 x double> %C)
+; CHECK: store{{.*}}nontemporal
+ call void @llvm.x86.sse2.movnt.i(i8* %B, i32 %D)
+ ret void
+}
diff --git a/test/CodeGen/X86/nontemporal.ll b/test/CodeGen/X86/nontemporal.ll
new file mode 100644
index 0000000..1d09535
--- /dev/null
+++ b/test/CodeGen/X86/nontemporal.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
+; CHECK: movntps
+ %cast = bitcast i8* %B to <4 x float>*
+ store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0
+; CHECK: movntdq
+ %cast1 = bitcast i8* %B to <2 x i64>*
+ store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0
+; CHECK: movntpd
+ %cast2 = bitcast i8* %B to <2 x double>*
+ store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0
+; CHECK: movnti
+ %cast3 = bitcast i8* %B to i32*
+ store i32 %D, i32* %cast3, align 16, !nontemporal !0
+ ret void
+}
+
+!0 = metadata !{i32 1}