Cache the Function dependent subtarget on the MachineFunction. As preparation for removing the getSubtargetImpl() call from TargetMachine go ahead and flip the switch on caching the function dependent subtarget and remove the bare getSubtargetImpl call from the X86 port. As part of this add a few tests that show we can generate code and assemble on X86 based on features/cpu on the Function. llvm-svn: 232879

commit: c5a85af3b23b958f1c86768b7622503d76761966 [log] [tgz]
author: Eric Christopher <echristo@gmail.com> Sat Mar 21 03:13:10 2015 +0000
committer: Eric Christopher <echristo@gmail.com> Sat Mar 21 03:13:10 2015 +0000
tree: aeee4a24c27d615f149cdd235b81ddf4abb4b463
parent: cba722f8c13c8a52cf81a09ab4ad19478bb82e6e [diff]
diff --git a/llvm/test/CodeGen/X86/function-subtarget-features-2.ll b/llvm/test/CodeGen/X86/function-subtarget-features-2.ll
new file mode 100644
index 0000000..d7c7c2f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/function-subtarget-features-2.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86-64 -filetype=obj -o - | llvm-objdump -d - | FileCheck %s
+
+; This test verifies that we assemble code for different architectures
+; based on target-cpu and target-features attributes.
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo() #0 {
+entry:
+  call void asm sideeffect "aeskeygenassist  $$0x4, %xmm0, %xmm1", "~{dirflag},~{fpsr},~{flags}"()
+  ret void
+}
+
+; CHECK: foo
+; CHECK: aeskeygenassist
+
+define void @bar() #2 {
+entry:
+  call void asm sideeffect "crc32b 4(%rbx), %eax", "~{dirflag},~{fpsr},~{flags}"()
+  ret void
+}
+
+; CHECK: bar
+; CHECK: crc32b
+
+attributes #0 = { "target-cpu"="x86-64" "target-features"="+avx2" }
+attributes #2 = { "target-cpu"="corei7" "target-features"="+sse4.2" }

diff --git a/llvm/test/CodeGen/X86/function-subtarget-features.ll b/llvm/test/CodeGen/X86/function-subtarget-features.ll
new file mode 100644
index 0000000..b1e2585
--- /dev/null
+++ b/llvm/test/CodeGen/X86/function-subtarget-features.ll

@@ -0,0 +1,81 @@
+; RUN: llc < %s -march=x86-64 -o - | FileCheck %s
+
+; This test verifies that we produce different code for different architectures
+; based on target-cpu and target-features attributes.
+; In this case avx has a vmovss instruction and otherwise we should be using movss
+; to materialize constants.
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define float @_Z3barv() #0 {
+entry:
+  ret float 4.000000e+00
+}
+
+; CHECK: barv
+; CHECK: vmovss
+
+define float @_Z4testv() #1 {
+entry:
+  ret float 1.000000e+00
+}
+
+; CHECK: testv
+; CHECK: movss
+
+define float @_Z3foov() #2 {
+entry:
+  ret float 4.000000e+00
+}
+
+; CHECK: foov
+; CHECK: movss
+
+define float @_Z3bazv() #0 {
+entry:
+  ret float 4.000000e+00
+}
+
+; CHECK: bazv
+; CHECK: vmovss
+
+define <2 x i64> @foo(<2 x i64> %a) #3 {
+entry:
+  %a.addr = alloca <2 x i64>, align 16
+  store <2 x i64> %a, <2 x i64>* %a.addr, align 16
+  %0 = load <2 x i64>, <2 x i64>* %a.addr, align 16
+  %1 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %0, i8 4)
+  ret <2 x i64> %1
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8)
+
+; CHECK: foo
+; CHECK: aeskeygenassist
+
+; Function Attrs: nounwind uwtable
+define i32 @bar(i32 %crc, i8* %a) #3 {
+entry:
+  %crc.addr = alloca i32, align 4
+  %a.addr = alloca i8*, align 8
+  store i32 %crc, i32* %crc.addr, align 4
+  store i8* %a, i8** %a.addr, align 8
+  %0 = load i32, i32* %crc.addr, align 4
+  %1 = load i8*, i8** %a.addr, align 8
+  %incdec.ptr = getelementptr inbounds i8, i8* %1, i32 1
+  store i8* %incdec.ptr, i8** %a.addr, align 8
+  %2 = load i8, i8* %1, align 1
+  %3 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %0, i8 %2)
+  ret i32 %3
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8)
+
+; CHECK: bar
+; CHECK: crc32b
+
+attributes #0 = { "target-cpu"="x86-64" "target-features"="+avx2" }
+attributes #1 = { "target-cpu"="x86-64" }
+attributes #2 = { "target-cpu"="corei7" "target-features"="+sse4.2" }
+attributes #3 = { "target-cpu"="x86-64" "target-features"="+avx2,+aes" }
commit	c5a85af3b23b958f1c86768b7622503d76761966	[log] [tgz]
author	Eric Christopher <echristo@gmail.com>	Sat Mar 21 03:13:10 2015 +0000
committer	Eric Christopher <echristo@gmail.com>	Sat Mar 21 03:13:10 2015 +0000
tree	aeee4a24c27d615f149cdd235b81ddf4abb4b463
parent	cba722f8c13c8a52cf81a09ab4ad19478bb82e6e [diff]