On recent Intel u-arch's, folding loads into some unary SSE instructions can be non-optimal. To be precise, we should avoid folding loads if the instructions only update part of the destination register, and the non-updated part is not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these instructions breaks the partial register dependency and it can improve performance. e.g. movss (%rdi), %xmm0 cvtss2sd %xmm0, %xmm0 instead of cvtss2sd (%rdi), %xmm0 An alternative method to break dependency is to clear the register first. e.g. xorps %xmm0, %xmm0 cvtss2sd (%rdi), %xmm0 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@91672 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 400073d5467b79534d8c63b0d996a55e4252ff4b [log] [tgz]
author: Evan Cheng <evan.cheng@apple.com> Fri Dec 18 07:40:29 2009 +0000
committer: Evan Cheng <evan.cheng@apple.com> Fri Dec 18 07:40:29 2009 +0000
tree: f7204e84da8877e7b062f05bcb1878a05108b44e
parent: 3a5d409f3c2eccf1d1f0a4616023760829a4db67 [diff] [blame]
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index fb457dd..b2b48ed 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h

@@ -77,6 +77,14 @@
 
   /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
   bool IsBTMemSlow;
+
+  /// BreakSSEDep - True if codegen should unfold load or insert xorps / pxor
+  /// to break register dependency for a partial register update SSE
+  /// instruction. This is needed for instructions such as CVTSS2SD which
+  /// only update the lower part of the register, and the result of the updated
+  /// part does not depend on the contents of the destination before the
+  /// instruction, and the non-updated portion of the register is not used.
+  bool BreakSSEDep;
   
   /// DarwinVers - Nonzero if this is a darwin platform: the numeric
   /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
@@ -142,6 +150,7 @@
   bool hasFMA3() const { return HasFMA3; }
   bool hasFMA4() const { return HasFMA4; }
   bool isBTMemSlow() const { return IsBTMemSlow; }
+  bool shouldBreakSSEDep() const { return BreakSSEDep; }
 
   bool isTargetDarwin() const { return TargetType == isDarwin; }
   bool isTargetELF() const { return TargetType == isELF; }
commit	400073d5467b79534d8c63b0d996a55e4252ff4b	[log] [tgz]
author	Evan Cheng <evan.cheng@apple.com>	Fri Dec 18 07:40:29 2009 +0000
committer	Evan Cheng <evan.cheng@apple.com>	Fri Dec 18 07:40:29 2009 +0000
tree	f7204e84da8877e7b062f05bcb1878a05108b44e
parent	3a5d409f3c2eccf1d1f0a4616023760829a4db67 [diff] [blame]