optimize i8 and i16 tls values.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@66725 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 00ebce7..64d0021 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -2937,6 +2937,24 @@
                     (load (add X86TLStp, (X86Wrapper tglobaltlsaddr:$src))))]>,
                   SegGS;
 
+let AddedComplexity = 15 in
+def TLS_ext16_gs_ri : I<0x8B, Pseudo, (outs GR32:$dst), (ins i32imm:$src),
+                        "movzwl\t%gs:${src:mem}, $dst",
+                        [(set GR32:$dst,
+                          (extloadi32i16
+                            (add X86TLStp,
+                              (X86Wrapper tglobaltlsaddr:$src))))]>,
+                        SegGS;
+
+let AddedComplexity = 15 in
+def TLS_ext8_gs_ri : I<0x8B, Pseudo, (outs GR32:$dst), (ins i32imm:$src),
+                        "movzbl\t%gs:${src:mem}, $dst",
+                        [(set GR32:$dst,
+                          (extloadi32i8
+                            (add X86TLStp,
+                              (X86Wrapper tglobaltlsaddr:$src))))]>,
+                        SegGS;
+
 def TLS_tp : I<0x8B, Pseudo, (outs GR32:$dst), (ins),
                "movl\t%gs:0, $dst",
                [(set GR32:$dst, X86TLStp)]>, SegGS;
diff --git a/test/CodeGen/X86/tls11.ll b/test/CodeGen/X86/tls11.ll
new file mode 100644
index 0000000..32d0a12
--- /dev/null
+++ b/test/CodeGen/X86/tls11.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movzwl	%gs:i@NTPOFF, %eax} %t
+
+@i = thread_local global i16 15
+
+define i16 @f() {
+entry:
+	%tmp1 = load i16* @i
+	ret i16 %tmp1
+}
diff --git a/test/CodeGen/X86/tls12.ll b/test/CodeGen/X86/tls12.ll
new file mode 100644
index 0000000..c6f766d
--- /dev/null
+++ b/test/CodeGen/X86/tls12.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llc -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movzbl	%gs:i@NTPOFF, %eax} %t
+
+@i = thread_local global i8 15
+
+define i8 @f() {
+entry:
+	%tmp1 = load i8* @i
+	ret i8 %tmp1
+}