First phase of TLS work.  At this point SPARC assembly dispatch is
broken, but it will be fixed "shortly."  This is pretty much the same
as the patch I sent to the dri-devel list on 22-Jun-2004.
diff --git a/src/mesa/glapi/gl_apitemp.py b/src/mesa/glapi/gl_apitemp.py
index b210c70..b8b32ac 100644
--- a/src/mesa/glapi/gl_apitemp.py
+++ b/src/mesa/glapi/gl_apitemp.py
@@ -112,6 +112,7 @@
  */
 
 
+#if defined( NAME )
 #ifndef KEYWORD1
 #define KEYWORD1
 #endif
@@ -120,10 +121,6 @@
 #define KEYWORD2
 #endif
 
-#ifndef NAME
-#error NAME must be defined
-#endif
-
 #ifndef DISPATCH
 #error DISPATCH must be defined
 #endif
@@ -140,6 +137,7 @@
 
 	def printInitDispatch(self):
 		print """
+#endif /* defined( NAME ) */
 
 /*
  * This is how a dispatch table can be initialized with all the functions
diff --git a/src/mesa/glapi/gl_x86_asm.py b/src/mesa/glapi/gl_x86_asm.py
index a0147b2..9bca436 100644
--- a/src/mesa/glapi/gl_x86_asm.py
+++ b/src/mesa/glapi/gl_x86_asm.py
@@ -77,15 +77,65 @@
 		print '#define GLOBL_FN(x) GLOBL x'
 		print '#endif'
 		print ''
-		print '#define GL_STUB(fn,off,stack)\t\t\t\t\\'
+		print '#if defined(PTHREADS)'
+		print '#  define GL_STUB(fn,off,fn_alt)\t\t\t\\'
 		print 'ALIGNTEXT16;\t\t\t\t\t\t\\'
-		print 'GLOBL_FN(GL_PREFIX(fn, fn ## @ ## stack));\t\t\\'
-		print 'GL_PREFIX(fn, fn ## @ ## stack):\t\t\t\\'
-		print '\tMOV_L(CONTENT(GLNAME(_glapi_Dispatch)), EAX) ;\t\\'
+		print 'GLOBL_FN(GL_PREFIX(fn, fn_alt));\t\t\t\\'
+		print 'GL_PREFIX(fn, fn_alt):\t\t\t\t\t\\'
+		print '\tMOV_L(CONTENT(GLNAME(_glapi_DispatchTSD)), EAX) ;\t\\'
+		print '\tTEST_L(EAX, EAX) ;\t\t\t\t\\'
+		print '\tJE(1f) ;\t\t\t\t\t\\'
+		print '\tJMP(GL_OFFSET(off)) ;\t\t\t\t\\'
+		print '1:\tCALL(get_dispatch) ;\t\t\t\t\\'
 		print '\tJMP(GL_OFFSET(off))'
+		print '#elif defined(THREADS)'
+		print '#  define GL_STUB(fn,off,fn_alt)\t\t\t\\'
+		print 'ALIGNTEXT16;\t\t\t\t\t\t\\'
+		print 'GLOBL_FN(GL_PREFIX(fn, fn_alt));\t\t\t\\'
+		print 'GL_PREFIX(fn, fn_alt):\t\t\t\t\t\\'
+		print '\tMOV_L(CONTENT(GLNAME(_glapi_DispatchTSD)), EAX) ;\t\\'
+		print '\tTEST_L(EAX, EAX) ;\t\t\t\t\\'
+		print '\tJE(1f) ;\t\t\t\t\t\\'
+		print '\tJMP(GL_OFFSET(off)) ;\t\t\t\t\\'
+		print '1:\tCALL(_glapi_get_dispatch) ;\t\t\t\\'
+		print '\tJMP(GL_OFFSET(off))'
+		print '#else /* Non-threaded version. */'
+		print '#  define GL_STUB(fn,off,fn_alt)\t\t\t\\'
+		print 'ALIGNTEXT16;\t\t\t\t\t\t\\'
+		print 'GLOBL_FN(GL_PREFIX(fn, fn_alt));\t\t\t\\'
+		print 'GL_PREFIX(fn, fn_alt):\t\t\t\t\t\\'
+		print '\tMOV_L(CONTENT(GLNAME(_glapi_DispatchTSD)), EAX) ;\t\\'
+		print '\tJMP(GL_OFFSET(off))'
+		print '#endif'
 		print ''
 		print 'SEG_TEXT'
+		print ''
+		print '#ifdef PTHREADS'
 		print 'EXTERN GLNAME(_glapi_Dispatch)'
+		print 'EXTERN GLNAME(_gl_DispatchTSD)'
+		print '#ifdef __PIC__'
+		print 'EXTERN GLNAME(pthread_getspecific@PLT)'
+		print '#else'
+		print 'EXTERN GLNAME(pthread_getspecific)'
+		print '#endif'
+		print ''
+		print 'ALIGNTEXT16'
+		print 'GLNAME(get_dispatch):'
+		print '\tSUB_L(CONST(24), ESP)'
+		print '\tPUSH_L(GLNAME(_gl_DispatchTSD))'
+		print '#ifdef __PIC__'
+		print '\tCALL(GLNAME(pthread_getspecific@PLT))'
+		print '#else'
+		print '\tCALL(GLNAME(pthread_getspecific))'
+		print '#endif'
+		print '\tADD_L(CONST(28), ESP)'
+		print '\tRET'
+		print '#elif defined(THREADS)'
+		print 'EXTERN GLNAME(_glapi_get_dispatch)'
+		print '#endif'
+		print ''
+		print '\t\tALIGNTEXT16 ; GLOBL gl_dispatch_functions_start'
+		print 'gl_dispatch_functions_start:'
 		print ''
 		return
 
@@ -95,11 +145,10 @@
 		return
 
 	def printFunction(self, f):
-		if f.fn_offset == -1: return
-
 		stack = self.get_stack_size(f)
 
-		print '\tGL_STUB(%s, _gloffset_%s, %u)' % (f.name, f.real_name, stack)
+		alt = "%s@%u" % (f.name, stack)
+		print '\tGL_STUB(%s, _gloffset_%s, %s)' % (f.name, f.real_name, alt)
 		return
 
 def show_usage():
diff --git a/src/mesa/glapi/glapi.c b/src/mesa/glapi/glapi.c
index d28e7c4..bba4b22 100644
--- a/src/mesa/glapi/glapi.c
+++ b/src/mesa/glapi/glapi.c
@@ -133,44 +133,44 @@
 
 
 /***** BEGIN THREAD-SAFE DISPATCH *****/
-/* if we support thread-safety, build a special dispatch table for use
- * in thread-safety mode (ThreadSafe == GL_TRUE).  Each entry in the
- * dispatch table will call _glthread_GetTSD() to get the actual dispatch
- * table bound to the current thread, then jump through that table.
- */
 
 #if defined(THREADS)
 
-static GLboolean ThreadSafe = GL_FALSE;  /* In thread-safe mode? */
-static _glthread_TSD DispatchTSD;        /* Per-thread dispatch pointer */
-static _glthread_TSD RealDispatchTSD;    /* only when using override */
-static _glthread_TSD ContextTSD;         /* Per-thread context pointer */
+/**
+ * \name Multi-threaded control support variables
+ *
+ * If thread-safety is supported, there are two potential mechanisms that can
+ * be used.  The old-style mechanism would set \c _glapi_Dispatch to a special
+ * thread-safe dispatch table.  These dispatch routines would call
+ * \c _glapi_get_dispatch to get the actual dispatch pointer.  In this
+ * setup \c _glapi_Dispatch could never be \c NULL.  This dual layered
+ * dispatch setup performed great for single-threaded apps, but didn't
+ * perform well for multithreaded apps.
+ *
+ * In the new mechansim, there are two variables.  The first is
+ * \c _glapi_DispatchTSD.  In the single-threaded case, this variable points
+ * to the dispatch table.  In the multi-threaded case, this variable is
+ * \c NULL, and thread-specific variable \c _gl_DispatchTSD points to the
+ * actual dispatch table.  \c _glapi_DispatchTSD is used to signal to the
+ * static dispatch functions to call \c _glapi_get_dispatch to get the real
+ * dispatch table.
+ * 
+ * Throughout the code \c _glapi_DispatchTSD == \c NULL is used to determine
+ * whether or not the application is multi-threaded.
+ */
+/*@{*/
+_glthread_TSD _gl_DispatchTSD;           /**< Per-thread dispatch pointer */
+static _glthread_TSD RealDispatchTSD;    /**< only when using override */
+static _glthread_TSD ContextTSD;         /**< Per-thread context pointer */
+/*@}*/
 
 
-#define KEYWORD1 static
-#define KEYWORD2 GLAPIENTRY
-#define NAME(func)  _ts_##func
-
-#define DISPATCH(FUNC, ARGS, MESSAGE)					\
-   struct _glapi_table *dispatch;					\
-   dispatch = (struct _glapi_table *) _glthread_GetTSD(&DispatchTSD);	\
-   if (!dispatch)							\
-      dispatch = (struct _glapi_table *) __glapi_noop_table;		\
-   (dispatch->FUNC) ARGS
-
-#define RETURN_DISPATCH(FUNC, ARGS, MESSAGE) 				\
-   struct _glapi_table *dispatch;					\
-   dispatch = (struct _glapi_table *) _glthread_GetTSD(&DispatchTSD);	\
-   if (!dispatch)							\
-      dispatch = (struct _glapi_table *) __glapi_noop_table;		\
-   return (dispatch->FUNC) ARGS
-
 #define DISPATCH_TABLE_NAME __glapi_threadsafe_table
 #define UNUSED_TABLE_NAME __usused_threadsafe_functions
 
-#define TABLE_ENTRY(name) (void *) _ts_##name
+#define TABLE_ENTRY(name) (void *) gl##name
 
-static int _ts_Unused(void)
+static int glUnused(void)
 {
    return 0;
 }
@@ -184,6 +184,7 @@
 
 
 struct _glapi_table *_glapi_Dispatch = (struct _glapi_table *) __glapi_noop_table;
+struct _glapi_table *_glapi_DispatchTSD = (struct _glapi_table *) __glapi_noop_table;
 struct _glapi_table *_glapi_RealDispatch = (struct _glapi_table *) __glapi_noop_table;
 
 /* Used when thread safety disabled */
@@ -218,7 +219,7 @@
 _glapi_check_multithread(void)
 {
 #if defined(THREADS)
-   if (!ThreadSafe) {
+   if ( _glapi_DispatchTSD != NULL ) {
       static unsigned long knownID;
       static GLboolean firstCall = GL_TRUE;
       if (firstCall) {
@@ -226,15 +227,13 @@
          firstCall = GL_FALSE;
       }
       else if (knownID != _glthread_GetID()) {
-         ThreadSafe = GL_TRUE;
-      }
-   }
-   if (ThreadSafe) {
-      /* make sure that this thread's dispatch pointer isn't null */
-      if (!_glapi_get_dispatch()) {
          _glapi_set_dispatch(NULL);
       }
    }
+   else if (!_glapi_get_dispatch()) {
+      /* make sure that this thread's dispatch pointer isn't null */
+      _glapi_set_dispatch(NULL);
+   }
 #endif
 }
 
@@ -250,10 +249,7 @@
 {
 #if defined(THREADS)
    _glthread_SetTSD(&ContextTSD, context);
-   if (ThreadSafe)
-      _glapi_Context = NULL;
-   else
-      _glapi_Context = context;
+   _glapi_Context = (_glapi_DispatchTSD == NULL) ? NULL : context;
 #else
    _glapi_Context = context;
 #endif
@@ -270,7 +266,7 @@
 _glapi_get_context(void)
 {
 #if defined(THREADS)
-   if (ThreadSafe) {
+   if ( _glapi_DispatchTSD == NULL ) {
       return _glthread_GetTSD(&ContextTSD);
    }
    else {
@@ -289,31 +285,37 @@
 void
 _glapi_set_dispatch(struct _glapi_table *dispatch)
 {
-   if (!dispatch) {
-      /* use the no-op functions */
-      dispatch = (struct _glapi_table *) __glapi_noop_table;
-   }
+   struct _glapi_table * old_style_dispatch;
+
+
+   /* Use the no-op functions if a NULL dispatch table was requested.
+    */
+
+   old_style_dispatch = (struct _glapi_table *) (dispatch == NULL)
+     ? __glapi_noop_table : dispatch;
+
 #ifdef DEBUG
-   else {
+   if (dispatch != NULL) {
       _glapi_check_table(dispatch);
    }
 #endif
 
 #if defined(THREADS)
    if (DispatchOverride) {
-      _glthread_SetTSD(&RealDispatchTSD, (void *) dispatch);
-      if (ThreadSafe)
+      _glthread_SetTSD(&RealDispatchTSD, (void *) old_style_dispatch);
+      if ( dispatch == NULL )
          _glapi_RealDispatch = (struct _glapi_table*) __glapi_threadsafe_table;
       else
          _glapi_RealDispatch = dispatch;
    }
    else {
       /* normal operation */
-      _glthread_SetTSD(&DispatchTSD, (void *) dispatch);
-      if (ThreadSafe)
-         _glapi_Dispatch = (struct _glapi_table *) __glapi_threadsafe_table;
-      else
-         _glapi_Dispatch = dispatch;
+      _glthread_SetTSD(&_gl_DispatchTSD, (void *) old_style_dispatch);
+      _glapi_DispatchTSD = dispatch;
+
+      _glapi_Dispatch = (dispatch == NULL)
+	  ? (struct _glapi_table *) __glapi_threadsafe_table
+	  : old_style_dispatch;
    }
 #else /*THREADS*/
    if (DispatchOverride) {
@@ -334,12 +336,12 @@
 _glapi_get_dispatch(void)
 {
 #if defined(THREADS)
-   if (ThreadSafe) {
+   if ( _glapi_DispatchTSD == NULL ) {
       if (DispatchOverride) {
          return (struct _glapi_table *) _glthread_GetTSD(&RealDispatchTSD);
       }
       else {
-         return (struct _glapi_table *) _glthread_GetTSD(&DispatchTSD);
+         return (struct _glapi_table *) _glthread_GetTSD(&_gl_DispatchTSD);
       }
    }
    else {
@@ -348,8 +350,8 @@
          return _glapi_RealDispatch;
       }
       else {
-         assert(_glapi_Dispatch);
-         return _glapi_Dispatch;
+         assert(_glapi_DispatchTSD);
+         return _glapi_DispatchTSD;
       }
    }
 #else
@@ -391,11 +393,14 @@
    _glapi_set_dispatch(real);
 
 #if defined(THREADS)
-   _glthread_SetTSD(&DispatchTSD, (void *) override);
-   if (ThreadSafe)
+   _glthread_SetTSD(&_gl_DispatchTSD, (void *) override);
+   if ( _glapi_DispatchTSD == NULL ) {
       _glapi_Dispatch = (struct _glapi_table *) __glapi_threadsafe_table;
-   else
+   }
+   else {
       _glapi_Dispatch = override;
+      _glapi_DispatchTSD = override;
+   }
 #else
    _glapi_Dispatch = override;
 #endif
@@ -427,7 +432,7 @@
    else {
       if (DispatchOverride) {
 #if defined(THREADS)
-         return (struct _glapi_table *) _glthread_GetTSD(&DispatchTSD);
+         return (struct _glapi_table *) _glthread_GetTSD(&_gl_DispatchTSD);
 #else
          return _glapi_Dispatch;
 #endif
@@ -446,7 +451,9 @@
 };
 
 
+#if !defined( USE_X86_ASM )
 #define NEED_FUNCTION_POINTER
+#endif
 
 /* The code in this file is auto-generated with Python */
 #include "glprocs.h"
@@ -468,7 +475,6 @@
    return NULL;
 }
 
-
 /*
  * Return dispatch table offset of the named static (built-in) function.
  * Return -1 if function not found.
@@ -485,6 +491,37 @@
 }
 
 
+#ifdef USE_X86_ASM
+extern const GLubyte gl_dispatch_functions_start[];
+
+# if defined(PTHREADS)
+#  define X86_DISPATCH_FUNCTION_SIZE  32
+# else
+#  define X86_DISPATCH_FUNCTION_SIZE  16
+# endif
+
+
+/*
+ * Return dispatch function address the named static (built-in) function.
+ * Return NULL if function not found.
+ */
+static const GLvoid *
+get_static_proc_address(const char *funcName)
+{
+   const glprocs_table_t * const f = find_entry( funcName );
+
+   if ( f != NULL ) {
+      return gl_dispatch_functions_start 
+	   + (X86_DISPATCH_FUNCTION_SIZE * f->Offset);
+   }
+   else {
+      return NULL;
+   }
+}
+
+#else
+
+
 /*
  * Return dispatch function address the named static (built-in) function.
  * Return NULL if function not found.
@@ -496,6 +533,8 @@
    return ( f != NULL ) ? f->Address : NULL;
 }
 
+#endif /* USE_X86_ASM */
+
 
 static const char *
 get_static_proc_name( GLuint offset )
@@ -576,7 +615,7 @@
    if (code) {
       memcpy(code, insn_template, sizeof(insn_template));
 
-      *(unsigned int *)(code + 0x01) = (unsigned int)&_glapi_Dispatch;
+      *(unsigned int *)(code + 0x01) = (unsigned int)&_glapi_DispatchTSD;
       *(unsigned int *)(code + 0x0b) = (unsigned int)functionOffset * 4;
       next_insn = (unsigned int)(code + 0x14);
       *(unsigned int *)(code + 0x10) = (unsigned int)_glapi_get_dispatch - next_insn;
diff --git a/src/mesa/glapi/glapitemp.h b/src/mesa/glapi/glapitemp.h
index 21523f2..62bb930 100644
--- a/src/mesa/glapi/glapitemp.h
+++ b/src/mesa/glapi/glapitemp.h
@@ -26,6 +26,7 @@
  */
 
 
+#if defined( NAME )
 #ifndef KEYWORD1
 #define KEYWORD1
 #endif
@@ -34,10 +35,6 @@
 #define KEYWORD2
 #endif
 
-#ifndef NAME
-#error NAME must be defined
-#endif
-
 #ifndef DISPATCH
 #error DISPATCH must be defined
 #endif
@@ -4778,6 +4775,7 @@
 }
 
 
+#endif /* defined( NAME ) */
 
 /*
  * This is how a dispatch table can be initialized with all the functions
diff --git a/src/mesa/glapi/glapitemp.py b/src/mesa/glapi/glapitemp.py
index cb24b04..8ce3092 100644
--- a/src/mesa/glapi/glapitemp.py
+++ b/src/mesa/glapi/glapitemp.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-# $Id: glapitemp.py,v 1.5 2002/11/30 17:18:46 brianp Exp $
+# $Id: glapitemp.py,v 1.6 2004/06/29 19:08:20 idr Exp $
 
 # Mesa 3-D graphics library
 # Version:  4.1
@@ -67,6 +67,7 @@
  */
 
 
+#if defined( NAME )
 #ifndef KEYWORD1
 #define KEYWORD1
 #endif
@@ -75,10 +76,6 @@
 #define KEYWORD2
 #endif
 
-#ifndef NAME
-#error NAME must be defined
-#endif
-
 #ifndef DISPATCH
 #error DISPATCH must be defined
 #endif
@@ -231,6 +228,7 @@
 
 def PrintInitDispatch():
 	print """
+#endif /* defined( NAME ) */
 
 /*
  * This is how a dispatch table can be initialized with all the functions
diff --git a/src/mesa/glapi/glthread.h b/src/mesa/glapi/glthread.h
index 4391ed4..283d549 100644
--- a/src/mesa/glapi/glthread.h
+++ b/src/mesa/glapi/glthread.h
@@ -109,6 +109,20 @@
 #define _glthread_UNLOCK_MUTEX(name) \
    (void) pthread_mutex_unlock(&(name))
 
+/* This is temporarilly removed because driver binaries cannot count on
+ * the existance of _gl_DispatchTSD in libGL.  It only exists in "new"
+ * libGL.  We may be able to ressurect this optimization at some point
+ * for DRI driver or for software Mesa.
+ */
+#if 0
+extern struct _glapi_table * _glapi_DispatchTSD;
+extern _glthread_TSD _gl_DispatchTSD;
+
+#define GL_CALL(name) \
+   (((__builtin_expect( _glapi_DispatchTSD != NULL, 1 )) \
+	? _glapi_DispatchTSD : (struct _glapi_table *) pthread_getspecific(_gl_DispatchTSD.key))-> name)
+#endif
+
 #endif /* PTHREADS */
 
 
@@ -291,8 +305,15 @@
 _glthread_SetTSD(_glthread_TSD *, void *);
 
 #ifndef GL_CALL
-# define GL_CALL(name) (*(_glapi_Dispatch-> name))
-#endif
+# if defined(THREADS)
+extern struct _glapi_table * _glapi_DispatchTSD;
+#  define GL_CALL(name) \
+   (((__builtin_expect( _glapi_DispatchTSD != NULL, 1 )) \
+	? _glapi_DispatchTSD : _glapi_get_dispatch())-> name)
+# else
+#  define GL_CALL(name) (*(_glapi_Dispatch-> name))
+# endif /* defined(THREADS) */
+#endif  /* ndef GL_CALL */
 
 
 #endif /* THREADS_H */