Don't fold a load if the other operand is a TLS address.
With this we generate
movl %gs:0, %eax
leal i@NTPOFF(%eax), %eax
instead of
movl $i@NTPOFF, %eax
addl %gs:0, %eax
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@68778 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 2cd6f74..1b9572c 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -322,6 +322,8 @@
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
+ SDValue Op1 = U->getOperand(1);
+
// If the other operand is a 8-bit immediate we should fold the immediate
// instead. This reduces code size.
// e.g.
@@ -332,9 +334,25 @@
// addl 4(%esp), %eax
// The former is 2 bytes shorter. In case where the increment is 1, then
// the saving can be 4 bytes (by using incl %eax).
- if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(U->getOperand(1)))
+ if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
if (Imm->getAPIntValue().isSignedIntN(8))
return false;
+
+ // If the other operand is a TLS address, we should fold it instead.
+ // This produces
+ // movl %gs:0, %eax
+ // leal i@NTPOFF(%eax), %eax
+ // instead of
+ // movl $i@NTPOFF, %eax
+ // addl %gs:0, %eax
+ // if the block also has an access to a second TLS address this will save
+ // a load.
+ // FIXME: This is probably also true for non TLS addresses.
+ if (Op1.getOpcode() == X86ISD::Wrapper) {
+ SDValue Val = Op1.getOperand(0);
+ if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
+ return false;
+ }
}
}
@@ -1170,13 +1188,16 @@
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp) {
X86ISelAddressMode AM;
+
+ // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
+ // segments.
+ SDValue Copy = AM.Segment;
+ SDValue T = CurDAG->getRegister(0, MVT::i32);
+ AM.Segment = T;
if (MatchAddress(N, AM))
return false;
-
- //Is it better to set AM.Segment before calling MatchAddress to
- //prevent it from adding a segment?
- if (AM.Segment.getNode())
- return false;
+ assert (T == AM.Segment);
+ AM.Segment = Copy;
MVT VT = N.getValueType();
unsigned Complexity = 0;
diff --git a/test/CodeGen/X86/tls10.ll b/test/CodeGen/X86/tls10.ll
index 5f022e3..a4f2fb1 100644
--- a/test/CodeGen/X86/tls10.ll
+++ b/test/CodeGen/X86/tls10.ll
@@ -1,6 +1,6 @@
; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl \$i@NTPOFF, %eax} %t
-; RUN: grep {addl %gs:0, %eax} %t
+; RUN: grep {movl %gs:0, %eax} %t
+; RUN: grep {leal i@NTPOFF(%eax), %eax} %t
@i = external hidden thread_local global i32
diff --git a/test/CodeGen/X86/tls15.ll b/test/CodeGen/X86/tls15.ll
new file mode 100644
index 0000000..5d3ee16
--- /dev/null
+++ b/test/CodeGen/X86/tls15.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl %gs:0, %eax} %t | count 1
+; RUN: grep {leal i@NTPOFF(%eax), %ecx} %t
+; RUN: grep {leal j@NTPOFF(%eax), %eax} %t
+
+@i = thread_local global i32 0
+@j = thread_local global i32 0
+
+define void @f(i32** %a, i32** %b) {
+entry:
+ store i32* @i, i32** %a, align 8
+ store i32* @j, i32** %b, align 8
+ ret void
+}
diff --git a/test/CodeGen/X86/tls2.ll b/test/CodeGen/X86/tls2.ll
index 8edc64f..fb57ae1 100644
--- a/test/CodeGen/X86/tls2.ll
+++ b/test/CodeGen/X86/tls2.ll
@@ -1,6 +1,6 @@
; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl \$i@NTPOFF, %eax} %t
-; RUN: grep {addl %gs:0, %eax} %t
+; RUN: grep {movl %gs:0, %eax} %t
+; RUN: grep {leal i@NTPOFF(%eax), %eax} %t
@i = thread_local global i32 15
diff --git a/test/CodeGen/X86/tls6.ll b/test/CodeGen/X86/tls6.ll
index fc10a57..e0bcade 100644
--- a/test/CodeGen/X86/tls6.ll
+++ b/test/CodeGen/X86/tls6.ll
@@ -1,6 +1,6 @@
; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl \$i@NTPOFF, %eax} %t
-; RUN: grep {addl %gs:0, %eax} %t
+; RUN: grep {movl %gs:0, %eax} %t
+; RUN: grep {leal i@NTPOFF(%eax), %eax} %t
@i = internal thread_local global i32 15
diff --git a/test/CodeGen/X86/tls8.ll b/test/CodeGen/X86/tls8.ll
index fb570b0..4971fd2 100644
--- a/test/CodeGen/X86/tls8.ll
+++ b/test/CodeGen/X86/tls8.ll
@@ -1,6 +1,6 @@
; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl \$i@NTPOFF, %eax} %t
-; RUN: grep {addl %gs:0, %eax} %t
+; RUN: grep {movl %gs:0, %eax} %t
+; RUN: grep {leal i@NTPOFF(%eax), %eax} %t
@i = hidden thread_local global i32 15