X86 TLS: fix and optimize the implementation of "initial exec" model.

llvm-svn: 36355
diff --git a/llvm/lib/Target/X86/X86ATTAsmPrinter.cpp b/llvm/lib/Target/X86/X86ATTAsmPrinter.cpp
index f36de92..e595750a 100755
--- a/llvm/lib/Target/X86/X86ATTAsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86ATTAsmPrinter.cpp
@@ -277,7 +277,7 @@
     GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
     bool isThreadLocal = GVar && GVar->isThreadLocal();
 
-    if (!isMemOp && !isCallOp && !isThreadLocal) O << '$';
+    if (!isMemOp && !isCallOp) O << '$';
 
     std::string Name = Mang->getValueName(GV);
     X86SharedAsmPrinter::decorateName(Name, GV);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ec86924..e63545d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2996,6 +2996,10 @@
                                              GA->getValueType(0),
                                              GA->getOffset());
   SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA);
+
+  if (GA->getGlobal()->isDeclaration()) // initial exec TLS model
+    Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0);
+
   // The address of the thread local variable is the add of the thread
   // pointer with the offset of the variable.
   return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index fdc2bc9..1885547 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -2462,10 +2462,15 @@
 //
 
 def TLS_addr : I<0, Pseudo, (ops GR32:$dst, i32imm:$sym),
-               "leal $sym(,%ebx,1), $dst",
+               "leal ${sym:mem}(,%ebx,1), $dst",
                [(set GR32:$dst, (X86tlsaddr tglobaltlsaddr:$sym))]>,
                Imp<[EBX],[]>;
 
+let AddedComplexity = 10 in
+def TLS_gs : I<0, Pseudo, (ops GR32:$dst, GR32:$src),
+               "movl %gs:($src), $dst",
+               [(set GR32:$dst, (load (add X86TLStp, GR32:$src)))]>;
+
 def TLS_tp : I<0, Pseudo, (ops GR32:$dst),
                "movl %gs:0, $dst",
                [(set GR32:$dst, X86TLStp)]>;