First half of the long-overdue support for segment override prefixes,
LDTs and __NR_modify_ldt.

- Each thread has its own LDT.  Usually NULL, but if we need to
  change an entry, it is allocated.  LDTs are inherited from parents
  as one would expect.

- We intercept __NR_modify_ldt and update the calling thread's LDT
  accordingly.  This is done in coregrind/vg_ldt.c.  The kernel
  never sees these syscalls.

- New architectural state for %cs, %ss, %ds, %es, %fs and %gs.
  Probably overkill including %cs and %ss.  These are saved and
  restored in the usual way, _except_ at syscalls -- there's no
  point, since we are hiding all LDT operations from the kernel now.
  This does assume that no syscall implicitly looks at the
  segment registers, but I think that's safe.

Still only halfway there.  JITter is still unaware of seg regs
and override prefixes.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1133 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am
index e31ee15..fc442e1 100644
--- a/coregrind/Makefile.am
+++ b/coregrind/Makefile.am
@@ -54,7 +54,8 @@
 	vg_syscall.S \
 	vg_to_ucode.c \
 	vg_translate.c \
-	vg_transtab.c
+	vg_transtab.c \
+	vg_ldt.c
 valgrind_so_LDFLAGS = -Wl,-z -Wl,initfirst -shared
 valgrind_so_LDADD = \
 	demangle/cp-demangle.o \
diff --git a/coregrind/arch/x86-linux/vg_syscall.S b/coregrind/arch/x86-linux/vg_syscall.S
index 52d6091..c10af41 100644
--- a/coregrind/arch/x86-linux/vg_syscall.S
+++ b/coregrind/arch/x86-linux/vg_syscall.S
@@ -54,18 +54,31 @@
 	
 	# Now copy the simulated machines state into the real one
 	# esp still refers to the simulators stack
-	frstor	VG_(m_state_static)+40
-	movl	VG_(m_state_static)+32, %eax
+	frstor	VG_(m_state_static)+64
+	movl	VG_(m_state_static)+56, %eax
 	pushl	%eax
 	popfl
-	movl	VG_(m_state_static)+0, %eax
-	movl	VG_(m_state_static)+4, %ecx
-	movl	VG_(m_state_static)+8, %edx
-	movl	VG_(m_state_static)+12, %ebx
-	movl	VG_(m_state_static)+16, %esp
-	movl	VG_(m_state_static)+20, %ebp
-	movl	VG_(m_state_static)+24, %esi
-	movl	VG_(m_state_static)+28, %edi
+#if 0
+	/* don't bother to save/restore seg regs across the kernel iface.  
+	   Once we have our hands on them, our simulation of it is 
+	   completely internal, and the kernel sees nothing.  
+	   What's more, loading new values in to %cs seems 
+	   to be impossible anyway. */
+	movw	VG_(m_state_static)+0, %cs
+	movw	VG_(m_state_static)+4, %ss
+	movw	VG_(m_state_static)+8, %ds
+	movw	VG_(m_state_static)+12, %es
+	movw	VG_(m_state_static)+16, %fs
+	movw	VG_(m_state_static)+20, %gs
+#endif
+	movl	VG_(m_state_static)+24, %eax
+	movl	VG_(m_state_static)+28, %ecx
+	movl	VG_(m_state_static)+32, %edx
+	movl	VG_(m_state_static)+36, %ebx
+	movl	VG_(m_state_static)+40, %esp
+	movl	VG_(m_state_static)+44, %ebp
+	movl	VG_(m_state_static)+48, %esi
+	movl	VG_(m_state_static)+52, %edi
 
 	# esp now refers to the simulatees stack
 	# Do the actual system call
@@ -73,25 +86,33 @@
 
 	# restore stack as soon as possible
 	# esp refers to simulatees stack
-	movl	%esp, VG_(m_state_static)+16
+	movl	%esp, VG_(m_state_static)+40
 	movl	VG_(esp_saved_over_syscall), %esp
 	# esp refers to simulators stack
 
 	# ... and undo everything else.  
-	# Copy real state back to simulated state.	
-	movl	%eax, VG_(m_state_static)+0
-	movl	%ecx, VG_(m_state_static)+4
-	movl	%edx, VG_(m_state_static)+8
-	movl	%ebx, VG_(m_state_static)+12
-	movl	%ebp, VG_(m_state_static)+20
-	movl	%esi, VG_(m_state_static)+24
-	movl	%edi, VG_(m_state_static)+28
+	# Copy real state back to simulated state.
+#if 0
+	movw	%cs, VG_(m_state_static)+0
+	movw	%ss, VG_(m_state_static)+4
+	movw	%ds, VG_(m_state_static)+8
+	movw	%es, VG_(m_state_static)+12
+	movw	%fs, VG_(m_state_static)+16
+	movw	%gs, VG_(m_state_static)+20
+#endif
+	movl	%eax, VG_(m_state_static)+24
+	movl	%ecx, VG_(m_state_static)+28
+	movl	%edx, VG_(m_state_static)+32
+	movl	%ebx, VG_(m_state_static)+36
+	movl	%ebp, VG_(m_state_static)+44
+	movl	%esi, VG_(m_state_static)+48
+	movl	%edi, VG_(m_state_static)+52
 	pushfl
 	popl	%eax
-	movl	%eax, VG_(m_state_static)+32
+	movl	%eax, VG_(m_state_static)+56
 	fwait
-	fnsave	VG_(m_state_static)+40
-	frstor	VG_(m_state_static)+40
+	fnsave	VG_(m_state_static)+64
+	frstor	VG_(m_state_static)+64
 
 	# Restore the state of the simulator
 	frstor	VG_(real_fpu_state_saved_over_syscall)
diff --git a/coregrind/vg_include.h b/coregrind/vg_include.h
index aadc697..2a04b31 100644
--- a/coregrind/vg_include.h
+++ b/coregrind/vg_include.h
@@ -408,6 +408,53 @@
 
 
 /* ---------------------------------------------------------------------
+   Exports of vg_ldt.c
+   ------------------------------------------------------------------ */
+
+/* This is the hardware-format for a segment descriptor, ie what the
+   x86 actually deals with.  It is 8 bytes long.  It's ugly.  */
+
+typedef struct _LDT_ENTRY {
+    union {
+       struct {
+          UShort      LimitLow;
+          UShort      BaseLow;
+          unsigned    BaseMid         : 8;
+          unsigned    Type            : 5;
+          unsigned    Dpl             : 2;
+          unsigned    Pres            : 1;
+          unsigned    LimitHi         : 4;
+          unsigned    Sys             : 1;
+          unsigned    Reserved_0      : 1;
+          unsigned    Default_Big     : 1;
+          unsigned    Granularity     : 1;
+          unsigned    BaseHi          : 8;
+       } Bits;
+       struct {
+          UInt word1;
+          UInt word2;
+       } Words;
+    } 
+    LdtEnt;
+} VgLdtEntry;
+
+/* Maximum number of LDT entries supported (by the x86). */
+#define VG_M_LDT_ENTRIES     8192
+/* The size of each LDT entry == sizeof(VgLdtEntry) */
+#define VG_LDT_ENTRY_SIZE  8
+
+/* Alloc & copy, and dealloc. */
+extern VgLdtEntry* 
+         VG_(allocate_LDT_for_thread) ( VgLdtEntry* parent_ldt );
+extern void       
+         VG_(deallocate_LDT_for_thread) ( VgLdtEntry* ldt );
+
+/* Simulate the modify_ldt syscall. */
+extern Int VG_(sys_modify_ldt) ( ThreadId tid,
+                                 Int func, void* ptr, UInt bytecount );
+
+
+/* ---------------------------------------------------------------------
    Exports of vg_scheduler.c
    ------------------------------------------------------------------ */
 
@@ -546,13 +593,33 @@
    */
    Addr stack_base;
 
-  /* Address of the highest legitimate word in this stack.  This is
-     used for error messages only -- not critical for execution
-     correctness.  Is is set for all stacks, specifically including
-     ThreadId == 0 (the main thread). */
+   /* Address of the highest legitimate word in this stack.  This is
+      used for error messages only -- not critical for execution
+      correctness.  Is is set for all stacks, specifically including
+      ThreadId == 0 (the main thread). */
    Addr stack_highest_word;
 
-   /* Saved machine context. */
+   /* Pointer to this thread's Local (Segment) Descriptor Table.
+      Starts out as NULL, indicating there is no table, and we hope to
+      keep it that way.  If the thread does __NR_modify_ldt to create
+      entries, we allocate a 8192-entry table at that point.  This is
+      a straight copy of the Linux kernel's scheme.  Don't forget to
+      deallocate this at thread exit. */
+   VgLdtEntry* ldt;
+
+   /* Saved machine context.  Note the FPU state, %EIP and segment
+      registers are not shadowed.
+
+      Although the segment registers are 16 bits long, storage
+      management here, in VG_(baseBlock) and in VG_(m_state_static) is
+      simplified if we pretend they are 32 bits. */
+   UInt m_cs;
+   UInt m_ss;
+   UInt m_ds;
+   UInt m_es;
+   UInt m_fs;
+   UInt m_gs;
+
    UInt m_eax;
    UInt m_ebx;
    UInt m_ecx;
@@ -965,7 +1032,8 @@
    Stuff is copied from baseBlock to here, the assembly magic runs,
    and then the inverse copy is done. 
  */
-extern UInt VG_(m_state_static) [8 /* int regs, in Intel order */ 
+extern UInt VG_(m_state_static) [6 /* segment regs, Intel order */
+                                 + 8 /* int regs, in Intel order */ 
                                  + 1 /* %eflags */ 
                                  + 1 /* %eip */
                                  + VG_SIZE_OF_FPUSTATE_W /* FPU state */
@@ -1278,6 +1346,13 @@
 extern Int VGOFF_(m_fpustate);
 extern Int VGOFF_(m_eip);
 
+extern Int VGOFF_(m_cs);
+extern Int VGOFF_(m_ss);
+extern Int VGOFF_(m_ds);
+extern Int VGOFF_(m_es);
+extern Int VGOFF_(m_fs);
+extern Int VGOFF_(m_gs);
+
 /* Reg-alloc spill area (VG_MAX_SPILLSLOTS words long). */
 extern Int VGOFF_(spillslots);
 
@@ -1296,6 +1371,9 @@
    Read-only parts of baseBlock.
    -------------------------------------------------- */
 
+/* This thread's LDT pointer. */
+extern Int VGOFF_(ldt);
+
 /* Offsets of addresses of helper functions.  A "helper" function is
    one which is called from generated code. */
 
diff --git a/coregrind/vg_kerneliface.h b/coregrind/vg_kerneliface.h
index ede3049..25a3c85 100644
--- a/coregrind/vg_kerneliface.h
+++ b/coregrind/vg_kerneliface.h
@@ -182,6 +182,7 @@
 #define VKI_ENOMEM          12      /* Out of memory */
 #define	VKI_EFAULT          14      /* Bad address */
 #define VKI_ESRCH            3      /* No such process */
+#define VKI_ENOSYS          38      /* Function not implemented */
 
 #define VKI_EWOULDBLOCK     VKI_EAGAIN  /* Operation would block */
 #define VKI_EAGAIN          11      /* Try again */
@@ -381,6 +382,34 @@
 
 #define VKI_SIZEOF_STRUCT_MODULE 96
 
+
+/* This is the structure passed to the modify_ldt syscall.  Just so as
+   to confuse and annoy everyone, this is _not_ the same as an
+   VgLdtEntry and has to be translated into such.  The logic for doing
+   so, in vg_ldt.c, is copied from the kernel sources. */
+/*
+ * ldt.h
+ *
+ * Definitions of structures used with the modify_ldt system call.
+ */
+typedef struct vki_modify_ldt_ldt_s {
+        unsigned int  entry_number;
+        unsigned long base_addr;
+        unsigned int  limit;
+        unsigned int  seg_32bit:1;
+        unsigned int  contents:2;
+        unsigned int  read_exec_only:1;
+        unsigned int  limit_in_pages:1;
+        unsigned int  seg_not_present:1;
+        unsigned int  useable:1;
+} vki_modify_ldt_t;
+
+#define VKI_MODIFY_LDT_CONTENTS_DATA        0
+#define VKI_MODIFY_LDT_CONTENTS_STACK       1
+#define VKI_MODIFY_LDT_CONTENTS_CODE        2
+
+
+
 #endif /* ndef __VG_KERNELIFACE_H */
 
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_ldt.c b/coregrind/vg_ldt.c
new file mode 100644
index 0000000..a431ac9
--- /dev/null
+++ b/coregrind/vg_ldt.c
@@ -0,0 +1,249 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Simulation of Local Descriptor Tables               vg_ldt.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_include.h"
+/* Allocate and deallocate LDTs for threads. */
+
+/* Create an LDT.  If the parent_ldt is NULL, zero out the
+   new one.  If non-NULL, copy the parent. */
+VgLdtEntry* VG_(allocate_LDT_for_thread) ( VgLdtEntry* parent_ldt )
+{
+   UInt        nbytes, i;
+   VgLdtEntry* ldt;
+
+   VG_(printf)("allocate_LDT_for_thread: parent = %p\n", parent_ldt );
+   vg_assert(VG_LDT_ENTRY_SIZE == sizeof(VgLdtEntry));
+   nbytes = VG_M_LDT_ENTRIES * VG_LDT_ENTRY_SIZE;
+ 
+   if (parent_ldt == NULL) {
+      /* Allocate a new zeroed-out one. */
+      ldt = (VgLdtEntry*)VG_(arena_calloc)(VG_AR_CORE, nbytes, 1);
+   } else {
+     ldt = (VgLdtEntry*)VG_(arena_malloc)(VG_AR_CORE, nbytes);
+     for (i = 0; i < VG_M_LDT_ENTRIES; i++)
+        ldt[i] = parent_ldt[i];
+   }
+
+   return ldt;
+}
+
+/* Free an LDT created by the above function. */
+void VG_(deallocate_LDT_for_thread) ( VgLdtEntry* ldt )
+{
+   VG_(printf)("deallocate_LDT_for_thread: ldt = %p\n", ldt );
+   if (ldt != NULL)
+      VG_(arena_free)(VG_AR_CORE, ldt);
+}
+
+
+
+/* Fish the base field out of an VgLdtEntry.  This is the only part we
+   are particularly interested in. */
+
+static 
+void *wine_ldt_get_base( const VgLdtEntry *ent )
+{
+    return (void *)(ent->LdtEnt.Bits.BaseLow |
+                    ((unsigned long)ent->LdtEnt.Bits.BaseMid) << 16 |
+                    ((unsigned long)ent->LdtEnt.Bits.BaseHi) << 24);
+}
+
+#if 0
+inline static unsigned int wine_ldt_get_limit( const VgLdtEntry *ent )
+{
+    unsigned int limit = ent->LimitLow | (ent->HighWord.Bits.LimitHi << 16);
+    if (ent->HighWord.Bits.Granularity) limit = (limit << 12) | 0xfff;
+    return limit;
+}
+#endif
+
+
+
+/* Translate a struct modify_ldt_ldt_s to an VgLdtEntry, using the
+   Linux kernel's logic (cut-n-paste of code in linux/kernel/ldt.c).  */
+
+static
+void translate_to_hw_format ( /* IN  */ struct vki_modify_ldt_ldt_s* inn,
+			      /* OUT */ VgLdtEntry* out,
+                                        Int oldmode )
+{
+   UInt entry_1, entry_2;
+
+   /* Allow LDTs to be cleared by the user. */
+   if (inn->base_addr == 0 && inn->limit == 0) {
+      if (oldmode ||
+          (inn->contents == 0      &&
+           inn->read_exec_only == 1   &&
+           inn->seg_32bit == 0      &&
+           inn->limit_in_pages == 0   &&
+           inn->seg_not_present == 1   &&
+           inn->useable == 0 )) {
+         entry_1 = 0;
+         entry_2 = 0;
+         goto install;
+      }
+   }
+
+   entry_1 = ((inn->base_addr & 0x0000ffff) << 16) |
+             (inn->limit & 0x0ffff);
+   entry_2 = (inn->base_addr & 0xff000000) |
+             ((inn->base_addr & 0x00ff0000) >> 16) |
+             (inn->limit & 0xf0000) |
+             ((inn->read_exec_only ^ 1) << 9) |
+             (inn->contents << 10) |
+             ((inn->seg_not_present ^ 1) << 15) |
+             (inn->seg_32bit << 22) |
+             (inn->limit_in_pages << 23) |
+             0x7000;
+   if (!oldmode)
+      entry_2 |= (inn->useable << 20);
+
+   /* Install the new entry ...  */
+  install:
+   out->LdtEnt.Words.word1 = entry_1;
+   out->LdtEnt.Words.word2 = entry_2;
+}
+
+
+/*
+ * linux/kernel/ldt.c
+ *
+ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+/*
+ * read_ldt() is not really atomic - this is not a problem since
+ * synchronization of reads and writes done to the LDT has to be
+ * assured by user-space anyway. Writes are atomic, to protect
+ * the security checks done on new descriptors.
+ */
+static
+Int read_ldt ( ThreadId tid, UChar* ptr, UInt bytecount )
+{
+   Int err;
+   UInt i, size;
+   Char* ldt;
+
+   VG_(printf)("read_ldt: tid = %d, ptr = %p, bytecount = %d\n",
+               tid, ptr, bytecount );
+
+   ldt = (Char*)(VG_(threads)[tid].ldt);
+   err = 0;
+   if (ldt == NULL)
+      /* LDT not allocated, meaning all entries are null */
+      goto out;
+
+   size = VG_M_LDT_ENTRIES * VG_LDT_ENTRY_SIZE;
+   if (size > bytecount)
+      size = bytecount;
+
+   err = size;
+   for (i = 0; i < size; i++)
+      ptr[i] = ldt[i];
+
+  out:
+   return err;
+}
+
+
+static
+Int write_ldt ( ThreadId tid, void* ptr, UInt bytecount, Int oldmode )
+{
+   Int error;
+   VgLdtEntry* ldt;
+   struct vki_modify_ldt_ldt_s* ldt_info; 
+
+   VG_(printf)("write_ldt: tid = %d, ptr = %p, "
+               "bytecount = %d, oldmode = %d\n",
+               tid, ptr, bytecount, oldmode );
+
+   ldt      = VG_(threads)[tid].ldt;
+   ldt_info = (struct vki_modify_ldt_ldt_s*)ptr;
+
+   error = -VKI_EINVAL;
+   if (bytecount != sizeof(struct vki_modify_ldt_ldt_s))
+      goto out;
+
+   error = -VKI_EINVAL;
+   if (ldt_info->entry_number >= VG_M_LDT_ENTRIES)
+      goto out;
+   if (ldt_info->contents == 3) {
+      if (oldmode)
+         goto out;
+      if (ldt_info->seg_not_present == 0)
+         goto out;
+   }
+
+   /* If this thread doesn't have an LDT, we'd better allocate it
+      now. */
+   if (ldt == NULL) {
+      ldt = VG_(allocate_LDT_for_thread)( NULL );
+      VG_(threads)[tid].ldt = ldt;
+   }
+
+   /* Install the new entry ...  */
+   translate_to_hw_format ( ldt_info, &ldt[ldt_info->entry_number], oldmode );
+   error = 0;
+
+  out:
+   return error;
+}
+
+
+Int VG_(sys_modify_ldt) ( ThreadId tid,
+                          Int func, void* ptr, UInt bytecount )
+{
+   Int ret = -VKI_ENOSYS;
+
+   switch (func) {
+   case 0:
+      ret = read_ldt(tid, ptr, bytecount);
+      break;
+   case 1:
+      ret = write_ldt(tid, ptr, bytecount, 1);
+      break;
+   case 2:
+      VG_(unimplemented)("sys_modify_ldt: func == 2");
+      /* god knows what this is about */
+      /* ret = read_default_ldt(ptr, bytecount); */
+      /*UNREACHED*/
+      break;
+   case 0x11:
+      ret = write_ldt(tid, ptr, bytecount, 0);
+      break;
+   }
+   return ret;
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                 vg_ldt.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_main.c b/coregrind/vg_main.c
index 582b652..80725ed 100644
--- a/coregrind/vg_main.c
+++ b/coregrind/vg_main.c
@@ -49,6 +49,13 @@
 Int VGOFF_(m_edi) = INVALID_OFFSET;
 Int VGOFF_(m_eflags) = INVALID_OFFSET;
 Int VGOFF_(m_fpustate) = INVALID_OFFSET;
+Int VGOFF_(ldt)   = INVALID_OFFSET;
+Int VGOFF_(m_cs)  = INVALID_OFFSET;
+Int VGOFF_(m_ss)  = INVALID_OFFSET;
+Int VGOFF_(m_ds)  = INVALID_OFFSET;
+Int VGOFF_(m_es)  = INVALID_OFFSET;
+Int VGOFF_(m_fs)  = INVALID_OFFSET;
+Int VGOFF_(m_gs)  = INVALID_OFFSET;
 Int VGOFF_(m_eip) = INVALID_OFFSET;
 Int VGOFF_(spillslots) = INVALID_OFFSET;
 Int VGOFF_(sh_eax) = INVALID_OFFSET;
@@ -230,6 +237,16 @@
 
    VGOFF_(m_fpustate) = alloc_BaB(VG_SIZE_OF_FPUSTATE_W);
 
+   /* This thread's LDT pointer, and segment registers. */
+   VGOFF_(ldt)   = alloc_BaB(1);
+   VGOFF_(m_cs)  = alloc_BaB(1);
+   VGOFF_(m_ss)  = alloc_BaB(1);
+   VGOFF_(m_ds)  = alloc_BaB(1);
+   VGOFF_(m_es)  = alloc_BaB(1);
+   VGOFF_(m_fs)  = alloc_BaB(1);
+   VGOFF_(m_gs)  = alloc_BaB(1);
+
+   /* Helper functions. */
    VGOFF_(helper_idiv_64_32)
       = alloc_BaB_1_set( (Addr) & VG_(helper_idiv_64_32) );
    VGOFF_(helper_div_64_32)
@@ -296,7 +313,7 @@
    VGOFF_(helper_DAA)
       = alloc_BaB_1_set( (Addr) & VG_(helper_DAA) );
 
-   /* Allocate slots for compact helpers */
+   /* Allocate slots for noncompact helpers */
    assign_helpers_in_baseBlock(VG_(n_noncompact_helpers), 
                                VG_(noncompact_helper_offsets), 
                                VG_(noncompact_helper_addrs));
@@ -1000,7 +1017,8 @@
    Copying to/from m_state_static.
    ------------------------------------------------------------------ */
 
-UInt VG_(m_state_static) [8 /* int regs, in Intel order */ 
+UInt VG_(m_state_static) [6 /* segment regs, Intel order */
+                          + 8 /* int regs, in Intel order */ 
                           + 1 /* %eflags */ 
                           + 1 /* %eip */
                           + VG_SIZE_OF_FPUSTATE_W /* FPU state */
@@ -1009,20 +1027,27 @@
 void VG_(copy_baseBlock_to_m_state_static) ( void )
 {
    Int i;
-   VG_(m_state_static)[ 0/4] = VG_(baseBlock)[VGOFF_(m_eax)];
-   VG_(m_state_static)[ 4/4] = VG_(baseBlock)[VGOFF_(m_ecx)];
-   VG_(m_state_static)[ 8/4] = VG_(baseBlock)[VGOFF_(m_edx)];
-   VG_(m_state_static)[12/4] = VG_(baseBlock)[VGOFF_(m_ebx)];
-   VG_(m_state_static)[16/4] = VG_(baseBlock)[VGOFF_(m_esp)];
-   VG_(m_state_static)[20/4] = VG_(baseBlock)[VGOFF_(m_ebp)];
-   VG_(m_state_static)[24/4] = VG_(baseBlock)[VGOFF_(m_esi)];
-   VG_(m_state_static)[28/4] = VG_(baseBlock)[VGOFF_(m_edi)];
+   VG_(m_state_static)[ 0/4] = VG_(baseBlock)[VGOFF_(m_cs)];
+   VG_(m_state_static)[ 4/4] = VG_(baseBlock)[VGOFF_(m_ss)];
+   VG_(m_state_static)[ 8/4] = VG_(baseBlock)[VGOFF_(m_ds)];
+   VG_(m_state_static)[12/4] = VG_(baseBlock)[VGOFF_(m_es)];
+   VG_(m_state_static)[16/4] = VG_(baseBlock)[VGOFF_(m_fs)];
+   VG_(m_state_static)[20/4] = VG_(baseBlock)[VGOFF_(m_gs)];
 
-   VG_(m_state_static)[32/4] = VG_(baseBlock)[VGOFF_(m_eflags)];
-   VG_(m_state_static)[36/4] = VG_(baseBlock)[VGOFF_(m_eip)];
+   VG_(m_state_static)[24/4] = VG_(baseBlock)[VGOFF_(m_eax)];
+   VG_(m_state_static)[28/4] = VG_(baseBlock)[VGOFF_(m_ecx)];
+   VG_(m_state_static)[32/4] = VG_(baseBlock)[VGOFF_(m_edx)];
+   VG_(m_state_static)[36/4] = VG_(baseBlock)[VGOFF_(m_ebx)];
+   VG_(m_state_static)[40/4] = VG_(baseBlock)[VGOFF_(m_esp)];
+   VG_(m_state_static)[44/4] = VG_(baseBlock)[VGOFF_(m_ebp)];
+   VG_(m_state_static)[48/4] = VG_(baseBlock)[VGOFF_(m_esi)];
+   VG_(m_state_static)[52/4] = VG_(baseBlock)[VGOFF_(m_edi)];
+
+   VG_(m_state_static)[56/4] = VG_(baseBlock)[VGOFF_(m_eflags)];
+   VG_(m_state_static)[60/4] = VG_(baseBlock)[VGOFF_(m_eip)];
 
    for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
-      VG_(m_state_static)[40/4 + i] 
+      VG_(m_state_static)[64/4 + i] 
          = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
 }
 
@@ -1030,21 +1055,28 @@
 void VG_(copy_m_state_static_to_baseBlock) ( void )
 {
    Int i;
-   VG_(baseBlock)[VGOFF_(m_eax)] = VG_(m_state_static)[ 0/4];
-   VG_(baseBlock)[VGOFF_(m_ecx)] = VG_(m_state_static)[ 4/4];
-   VG_(baseBlock)[VGOFF_(m_edx)] = VG_(m_state_static)[ 8/4];
-   VG_(baseBlock)[VGOFF_(m_ebx)] = VG_(m_state_static)[12/4];
-   VG_(baseBlock)[VGOFF_(m_esp)] = VG_(m_state_static)[16/4];
-   VG_(baseBlock)[VGOFF_(m_ebp)] = VG_(m_state_static)[20/4];
-   VG_(baseBlock)[VGOFF_(m_esi)] = VG_(m_state_static)[24/4];
-   VG_(baseBlock)[VGOFF_(m_edi)] = VG_(m_state_static)[28/4];
+   VG_(baseBlock)[VGOFF_(m_cs)] = VG_(m_state_static)[ 0/4];
+   VG_(baseBlock)[VGOFF_(m_ss)] = VG_(m_state_static)[ 4/4];
+   VG_(baseBlock)[VGOFF_(m_ds)] = VG_(m_state_static)[ 8/4];
+   VG_(baseBlock)[VGOFF_(m_es)] = VG_(m_state_static)[12/4];
+   VG_(baseBlock)[VGOFF_(m_fs)] = VG_(m_state_static)[16/4];
+   VG_(baseBlock)[VGOFF_(m_gs)] = VG_(m_state_static)[20/4];
 
-   VG_(baseBlock)[VGOFF_(m_eflags)] = VG_(m_state_static)[32/4];
-   VG_(baseBlock)[VGOFF_(m_eip)] = VG_(m_state_static)[36/4];
+   VG_(baseBlock)[VGOFF_(m_eax)] = VG_(m_state_static)[24/4];
+   VG_(baseBlock)[VGOFF_(m_ecx)] = VG_(m_state_static)[28/4];
+   VG_(baseBlock)[VGOFF_(m_edx)] = VG_(m_state_static)[32/4];
+   VG_(baseBlock)[VGOFF_(m_ebx)] = VG_(m_state_static)[36/4];
+   VG_(baseBlock)[VGOFF_(m_esp)] = VG_(m_state_static)[40/4];
+   VG_(baseBlock)[VGOFF_(m_ebp)] = VG_(m_state_static)[44/4];
+   VG_(baseBlock)[VGOFF_(m_esi)] = VG_(m_state_static)[48/4];
+   VG_(baseBlock)[VGOFF_(m_edi)] = VG_(m_state_static)[52/4];
+
+   VG_(baseBlock)[VGOFF_(m_eflags)] = VG_(m_state_static)[56/4];
+   VG_(baseBlock)[VGOFF_(m_eip)] = VG_(m_state_static)[60/4];
 
    for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
       VG_(baseBlock)[VGOFF_(m_fpustate) + i]
-         = VG_(m_state_static)[40/4 + i];
+         = VG_(m_state_static)[64/4 + i];
 }
 
 Addr VG_(get_stack_pointer) ( void )
@@ -1150,6 +1182,9 @@
    /* Set up baseBlock offsets and copy the saved machine's state into it. */
    vg_init_baseBlock();
    VG_(copy_m_state_static_to_baseBlock)();
+   /* Pretend that the root thread has a completely empty LDT to start
+      with. */
+   VG_(baseBlock)[VGOFF_(ldt)] = (UInt)NULL;
    vg_init_shadow_regs();
 
    /* Process Valgrind's command-line opts (from env var VG_OPTS). */
@@ -1157,13 +1192,11 @@
 
    /* Hook to delay things long enough so we can get the pid and
       attach GDB in another shell. */
-#if 0
-   { 
+   if (0) { 
       Int p, q;
       for (p = 0; p < 50000; p++)
          for (q = 0; q < 50000; q++) ;
    }
-#endif
 
    /* Initialise the scheduler, and copy the client's state from
       baseBlock into VG_(threads)[1].  This has to come before signal
@@ -1217,7 +1250,6 @@
 
    VG_(bbs_to_go) = VG_(clo_stop_after);
 
-
    /* Run! */
    VG_(running_on_simd_CPU) = True;
    VGP_PUSHCC(VgpSched);
diff --git a/coregrind/vg_scheduler.c b/coregrind/vg_scheduler.c
index b65426b..e6b8eea 100644
--- a/coregrind/vg_scheduler.c
+++ b/coregrind/vg_scheduler.c
@@ -398,6 +398,14 @@
    Int i;
    vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
 
+   VG_(baseBlock)[VGOFF_(ldt)]  = (UInt)VG_(threads)[tid].ldt;
+   VG_(baseBlock)[VGOFF_(m_cs)] = VG_(threads)[tid].m_cs;
+   VG_(baseBlock)[VGOFF_(m_ss)] = VG_(threads)[tid].m_ss;
+   VG_(baseBlock)[VGOFF_(m_ds)] = VG_(threads)[tid].m_ds;
+   VG_(baseBlock)[VGOFF_(m_es)] = VG_(threads)[tid].m_es;
+   VG_(baseBlock)[VGOFF_(m_fs)] = VG_(threads)[tid].m_fs;
+   VG_(baseBlock)[VGOFF_(m_gs)] = VG_(threads)[tid].m_gs;
+
    VG_(baseBlock)[VGOFF_(m_eax)] = VG_(threads)[tid].m_eax;
    VG_(baseBlock)[VGOFF_(m_ebx)] = VG_(threads)[tid].m_ebx;
    VG_(baseBlock)[VGOFF_(m_ecx)] = VG_(threads)[tid].m_ecx;
@@ -456,6 +464,21 @@
 
    vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
 
+
+   /* We don't copy out the LDT entry, because it can never be changed
+      by the normal actions of the thread, only by the modify_ldt
+      syscall, in which case we will correctly be updating
+      VG_(threads)[tid].ldt. */
+   vg_assert((void*)VG_(threads)[tid].ldt 
+             == (void*)VG_(baseBlock)[VGOFF_(ldt)]);
+
+   VG_(threads)[tid].m_cs = VG_(baseBlock)[VGOFF_(m_cs)];
+   VG_(threads)[tid].m_ss = VG_(baseBlock)[VGOFF_(m_ss)];
+   VG_(threads)[tid].m_ds = VG_(baseBlock)[VGOFF_(m_ds)];
+   VG_(threads)[tid].m_es = VG_(baseBlock)[VGOFF_(m_es)];
+   VG_(threads)[tid].m_fs = VG_(baseBlock)[VGOFF_(m_fs)];
+   VG_(threads)[tid].m_gs = VG_(baseBlock)[VGOFF_(m_gs)];
+
    VG_(threads)[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
    VG_(threads)[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
    VG_(threads)[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
@@ -494,6 +517,14 @@
    }
 
    /* Fill it up with junk. */
+   VG_(baseBlock)[VGOFF_(ldt)] = junk;
+   VG_(baseBlock)[VGOFF_(m_cs)] = junk;
+   VG_(baseBlock)[VGOFF_(m_ss)] = junk;
+   VG_(baseBlock)[VGOFF_(m_ds)] = junk;
+   VG_(baseBlock)[VGOFF_(m_es)] = junk;
+   VG_(baseBlock)[VGOFF_(m_fs)] = junk;
+   VG_(baseBlock)[VGOFF_(m_gs)] = junk;
+
    VG_(baseBlock)[VGOFF_(m_eax)] = junk;
    VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
    VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
@@ -581,6 +612,7 @@
 {
    Int j;
    vg_assert(tid >= 0 && tid < VG_N_THREADS);
+   VG_(threads)[tid].ldt                  = NULL;
    VG_(threads)[tid].tid                  = tid;
    VG_(threads)[tid].status               = VgTs_Empty;
    VG_(threads)[tid].associated_mx        = NULL;
@@ -1659,6 +1691,10 @@
          vg_waiting_fds[i].fd = -1; /* not in use */
       }
    }
+
+   /* Deallocate its LDT, if it ever had one. */
+   VG_(deallocate_LDT_for_thread)( VG_(threads)[tid].ldt );
+   VG_(threads)[tid].ldt = NULL;
 }
 
 
@@ -2186,6 +2222,16 @@
    VG_(threads)[tid].sig_mask = VG_(threads)[parent_tid].sig_mask;
    VG_(ksigemptyset)(&VG_(threads)[tid].sigs_waited_for);
 
+   /* We inherit our parent's LDT. */
+   if (VG_(threads)[parent_tid].ldt == NULL) {
+      /* We hope this is the common case. */
+      VG_(threads)[tid].ldt = NULL;
+   } else {
+      /* No luck .. we have to take a copy of the parent's. */
+      VG_(threads)[tid].ldt 
+        = VG_(allocate_LDT_for_thread)( VG_(threads)[parent_tid].ldt );
+   }
+
    /* return child's tid to parent */
    SET_EDX(parent_tid, tid); /* success */
 }
diff --git a/coregrind/vg_startup.S b/coregrind/vg_startup.S
index d6c202e..ae6bf94 100644
--- a/coregrind/vg_startup.S
+++ b/coregrind/vg_startup.S
@@ -85,21 +85,27 @@
 	# have not yet been set up.  Instead, they are copied to a
 	# temporary place (m_state_static).  In vg_main.c, once the
 	# baseBlock offsets are set up, values are copied into baseBlock.
-	movl	%eax, VG_(m_state_static)+0
-	movl	%ecx, VG_(m_state_static)+4
-	movl	%edx, VG_(m_state_static)+8
-	movl	%ebx, VG_(m_state_static)+12
-	movl	%esp, VG_(m_state_static)+16
-	movl	%ebp, VG_(m_state_static)+20
-	movl	%esi, VG_(m_state_static)+24
-	movl	%edi, VG_(m_state_static)+28
+	movw	%cs, VG_(m_state_static)+0
+	movw	%ss, VG_(m_state_static)+4
+	movw	%ds, VG_(m_state_static)+8
+	movw	%es, VG_(m_state_static)+12
+	movw	%fs, VG_(m_state_static)+16
+	movw	%gs, VG_(m_state_static)+20
+	movl	%eax, VG_(m_state_static)+24
+	movl	%ecx, VG_(m_state_static)+28
+	movl	%edx, VG_(m_state_static)+32
+	movl	%ebx, VG_(m_state_static)+36
+	movl	%esp, VG_(m_state_static)+40
+	movl	%ebp, VG_(m_state_static)+44
+	movl	%esi, VG_(m_state_static)+48
+	movl	%edi, VG_(m_state_static)+52
 	pushfl
 	popl	%eax
-	movl	%eax, VG_(m_state_static)+32
+	movl	%eax, VG_(m_state_static)+56
 	fwait
-	fnsave	VG_(m_state_static)+40
-	frstor	VG_(m_state_static)+40
-
+	fnsave	VG_(m_state_static)+64
+	frstor	VG_(m_state_static)+64
+	
 	# keep the first and last 10 words free to check for overruns	
 	movl	$VG_(stack)+39996 -40, %esp
 
@@ -108,7 +114,7 @@
 	# simulator.  So vg_main starts the simulator running at
 	# the insn labelled first_insn_to_simulate.
 
-	movl	$first_insn_to_simulate, VG_(m_state_static)+36
+	movl	$first_insn_to_simulate, VG_(m_state_static)+60
 	jmp	VG_(main)
 first_insn_to_simulate:
 	# Nothing else to do -- just return in the "normal" way.
@@ -136,18 +142,24 @@
 	# of the rest of the program continues on the real CPU,
 	# and there is no way for the simulator to regain control
 	# after this point.
-	frstor	VG_(m_state_static)+40
-	movl	VG_(m_state_static)+32, %eax
+	frstor	VG_(m_state_static)+64
+	movl	VG_(m_state_static)+56, %eax
 	pushl	%eax
 	popfl
-	movl	VG_(m_state_static)+0, %eax
-	movl	VG_(m_state_static)+4, %ecx
-	movl	VG_(m_state_static)+8, %edx
-	movl	VG_(m_state_static)+12, %ebx
-	movl	VG_(m_state_static)+16, %esp
-	movl	VG_(m_state_static)+20, %ebp
-	movl	VG_(m_state_static)+24, %esi
-	movl	VG_(m_state_static)+28, %edi
+	movw	VG_(m_state_static)+0, %cs
+	movw	VG_(m_state_static)+4, %ss
+	movw	VG_(m_state_static)+8, %ds
+	movw	VG_(m_state_static)+12, %es
+	movw	VG_(m_state_static)+16, %fs
+	movw	VG_(m_state_static)+20, %gs
+	movl	VG_(m_state_static)+24, %eax
+	movl	VG_(m_state_static)+28, %ecx
+	movl	VG_(m_state_static)+32, %edx
+	movl	VG_(m_state_static)+36, %ebx
+	movl	VG_(m_state_static)+40, %esp
+	movl	VG_(m_state_static)+44, %ebp
+	movl	VG_(m_state_static)+48, %esi
+	movl	VG_(m_state_static)+52, %edi
 
 	pushal
 	pushfl
@@ -157,8 +169,8 @@
 	popfl
 	popal
 	# re-restore the FPU state anyway ...
-	frstor	VG_(m_state_static)+40	
-	jmp	*VG_(m_state_static)+36
+	frstor	VG_(m_state_static)+64
+	jmp	*VG_(m_state_static)+60
 
 
 
diff --git a/coregrind/vg_syscall.S b/coregrind/vg_syscall.S
index 52d6091..c10af41 100644
--- a/coregrind/vg_syscall.S
+++ b/coregrind/vg_syscall.S
@@ -54,18 +54,31 @@
 	
 	# Now copy the simulated machines state into the real one
 	# esp still refers to the simulators stack
-	frstor	VG_(m_state_static)+40
-	movl	VG_(m_state_static)+32, %eax
+	frstor	VG_(m_state_static)+64
+	movl	VG_(m_state_static)+56, %eax
 	pushl	%eax
 	popfl
-	movl	VG_(m_state_static)+0, %eax
-	movl	VG_(m_state_static)+4, %ecx
-	movl	VG_(m_state_static)+8, %edx
-	movl	VG_(m_state_static)+12, %ebx
-	movl	VG_(m_state_static)+16, %esp
-	movl	VG_(m_state_static)+20, %ebp
-	movl	VG_(m_state_static)+24, %esi
-	movl	VG_(m_state_static)+28, %edi
+#if 0
+	/* don't bother to save/restore seg regs across the kernel iface.  
+	   Once we have our hands on them, our simulation of it is 
+	   completely internal, and the kernel sees nothing.  
+	   What's more, loading new values in to %cs seems 
+	   to be impossible anyway. */
+	movw	VG_(m_state_static)+0, %cs
+	movw	VG_(m_state_static)+4, %ss
+	movw	VG_(m_state_static)+8, %ds
+	movw	VG_(m_state_static)+12, %es
+	movw	VG_(m_state_static)+16, %fs
+	movw	VG_(m_state_static)+20, %gs
+#endif
+	movl	VG_(m_state_static)+24, %eax
+	movl	VG_(m_state_static)+28, %ecx
+	movl	VG_(m_state_static)+32, %edx
+	movl	VG_(m_state_static)+36, %ebx
+	movl	VG_(m_state_static)+40, %esp
+	movl	VG_(m_state_static)+44, %ebp
+	movl	VG_(m_state_static)+48, %esi
+	movl	VG_(m_state_static)+52, %edi
 
 	# esp now refers to the simulatees stack
 	# Do the actual system call
@@ -73,25 +86,33 @@
 
 	# restore stack as soon as possible
 	# esp refers to simulatees stack
-	movl	%esp, VG_(m_state_static)+16
+	movl	%esp, VG_(m_state_static)+40
 	movl	VG_(esp_saved_over_syscall), %esp
 	# esp refers to simulators stack
 
 	# ... and undo everything else.  
-	# Copy real state back to simulated state.	
-	movl	%eax, VG_(m_state_static)+0
-	movl	%ecx, VG_(m_state_static)+4
-	movl	%edx, VG_(m_state_static)+8
-	movl	%ebx, VG_(m_state_static)+12
-	movl	%ebp, VG_(m_state_static)+20
-	movl	%esi, VG_(m_state_static)+24
-	movl	%edi, VG_(m_state_static)+28
+	# Copy real state back to simulated state.
+#if 0
+	movw	%cs, VG_(m_state_static)+0
+	movw	%ss, VG_(m_state_static)+4
+	movw	%ds, VG_(m_state_static)+8
+	movw	%es, VG_(m_state_static)+12
+	movw	%fs, VG_(m_state_static)+16
+	movw	%gs, VG_(m_state_static)+20
+#endif
+	movl	%eax, VG_(m_state_static)+24
+	movl	%ecx, VG_(m_state_static)+28
+	movl	%edx, VG_(m_state_static)+32
+	movl	%ebx, VG_(m_state_static)+36
+	movl	%ebp, VG_(m_state_static)+44
+	movl	%esi, VG_(m_state_static)+48
+	movl	%edi, VG_(m_state_static)+52
 	pushfl
 	popl	%eax
-	movl	%eax, VG_(m_state_static)+32
+	movl	%eax, VG_(m_state_static)+56
 	fwait
-	fnsave	VG_(m_state_static)+40
-	frstor	VG_(m_state_static)+40
+	fnsave	VG_(m_state_static)+64
+	frstor	VG_(m_state_static)+64
 
 	# Restore the state of the simulator
 	frstor	VG_(real_fpu_state_saved_over_syscall)
diff --git a/coregrind/vg_syscalls.c b/coregrind/vg_syscalls.c
index a500deb..9a01d70 100644
--- a/coregrind/vg_syscalls.c
+++ b/coregrind/vg_syscalls.c
@@ -473,16 +473,33 @@
              "you are picking up Valgrind's implementation of libpthread.so.");
          break;
 
+      /* !!!!!!!!!! New, untested syscalls !!!!!!!!!!!!!!!!!!!!! */
+
 #     if defined(__NR_modify_ldt)
-      case __NR_modify_ldt:
-         VG_(nvidia_moan)();
-         VG_(unimplemented)
-            ("modify_ldt(): I (JRS) haven't investigated this yet; sorry.");
+      case __NR_modify_ldt: /* syscall 123 */
+         /* int modify_ldt(int func, void *ptr, 
+                           unsigned long bytecount); */
+         MAYBE_PRINTF("modify_ldt ( %d, %p, %d )\n", arg1,arg2,arg3);
+         if (arg1 == 0) {
+            /* read the LDT into ptr */
+            SYSCALL_TRACK( pre_mem_write, tst, 
+                           "modify_ldt(ptr)(func=0)", arg2, arg3 );
+         }
+         if (arg1 == 1) {
+            /* write the LDT with the entry pointed at by ptr */
+            SYSCALL_TRACK( pre_mem_read, tst, 
+                           "modify_ldt(ptr)(func=1)", arg2, 
+                           sizeof(struct vki_modify_ldt_ldt_s) );
+         }
+         /* "do" the syscall ourselves; the kernel never sees it */
+         res = VG_(sys_modify_ldt)( tid, arg1, (void*)arg2, arg3 );
+         SET_EAX(tid, res);
+         if (arg1 == 0 && !VG_(is_kerror)(res) && res > 0) {
+            VG_TRACK( post_mem_write, arg2, res );
+         }
          break;
 #     endif
 
-      /* !!!!!!!!!! New, untested syscalls !!!!!!!!!!!!!!!!!!!!! */
-
 #     if defined(__NR_vhangup)
       case __NR_vhangup: /* syscall 111 */
          /* int vhangup(void); */