Abandon ifuncs and go with the traditional global function pointers.

In addition to the failing testcase, there were a couple of
regressions in piglit's attribs test: one from glBegin_unwrapped vs
glBegin confusion in the __asm__ directives we were generating, and
one where the function pointers apparently were just getting mixed up
at application runtime.
diff --git a/README.md b/README.md
index d02d304..6de507d 100644
--- a/README.md
+++ b/README.md
@@ -108,21 +108,3 @@
 meant replacing every single piece of GLEW, so we built
 piglit-dispatch from scratch.  And since we wanted to reuse it in
 other GL-related projects, this is the result.
-
-Caveats
--------
-
-* libepoxy isn't intended to be dlopen()ed.  On ifunc platforms,
-  RTLD_NOW would cause early resolution of all GL symbols, and the
-  following error from reentering the linker:
-
-```
-Inconsistency detected by ld.so: dl-open.c: 235: dl_open_worker:
-Assertion `_dl_debug_initialize (0, args->nsid)->r_state ==
-RT_CONSISTENT' failed!
-```
-
-* libepoxy isn't intended to be built statically.  It usess ifuncs on
-  linux, since it can avoid its dispatch table entirely in that case.
-  Building statically disables ifuncs, dropping you to a
-  higher-overhead path!
diff --git a/src/dispatch_common.c b/src/dispatch_common.c
index 03d08fc..11498a7 100644
--- a/src/dispatch_common.c
+++ b/src/dispatch_common.c
@@ -476,8 +476,16 @@
     }
 }
 
-PUBLIC void
-epoxy_glBegin(GLenum primtype)
+#ifdef _WIN32
+#define WRAPPER_VISIBILITY PUBLIC
+#define WRAPPER(x) x
+#else
+#define WRAPPER_VISIBILITY static
+#define WRAPPER(x) x ## _wrapped
+#endif
+
+WRAPPER_VISIBILITY void
+WRAPPER(epoxy_glBegin)(GLenum primtype)
 {
 #ifdef _WIN32
     InterlockedIncrement(&api.begin_count);
@@ -490,8 +498,8 @@
     epoxy_glBegin_unwrapped(primtype);
 }
 
-PUBLIC void
-epoxy_glEnd(void)
+WRAPPER_VISIBILITY void
+WRAPPER(epoxy_glEnd)(void)
 {
     epoxy_glEnd_unwrapped();
 
@@ -503,3 +511,8 @@
     pthread_mutex_unlock(&api.mutex);
 #endif
 }
+
+#ifndef _WIN32
+PUBLIC PFNGLBEGINPROC epoxy_glBegin = epoxy_glBegin_wrapped;
+PUBLIC PFNGLENDPROC epoxy_glEnd = epoxy_glEnd_wrapped;
+#endif
diff --git a/src/dispatch_common.h b/src/dispatch_common.h
index 45b6c45..73bbb1d 100644
--- a/src/dispatch_common.h
+++ b/src/dispatch_common.h
@@ -58,6 +58,18 @@
 #  endif
 #endif
 
+/* On win32, we're going to need to keep a per-thread dispatch table,
+ * since the function pointers depend on the device and pixel format
+ * of the current context.
+ */
+#if defined(_WIN32)
+#define USING_DISPATCH_TABLE 1
+#define UNWRAPPED_PROTO(x) x
+#else
+#define USING_DISPATCH_TABLE 0
+#define UNWRAPPED_PROTO(x) (*x)
+#endif
+
 void *epoxy_egl_dlsym(const char *name);
 void *epoxy_glx_dlsym(const char *name);
 void *epoxy_gl_dlsym(const char *name);
@@ -79,5 +91,5 @@
 
 bool epoxy_extension_in_string(const char *extension_list, const char *ext);
 
-void epoxy_glBegin_unwrapped(GLenum primtype);
-void epoxy_glEnd_unwrapped(void);
+void UNWRAPPED_PROTO(epoxy_glBegin_unwrapped)(GLenum primtype);
+void UNWRAPPED_PROTO(epoxy_glEnd_unwrapped)(void);
diff --git a/src/gen_dispatch.py b/src/gen_dispatch.py
index 7fbb83f..0466a06 100755
--- a/src/gen_dispatch.py
+++ b/src/gen_dispatch.py
@@ -501,9 +501,22 @@
         self.outln('')
         self.write_function_ptr_typedefs()
 
+        self.outln('/* The library ABI is a set of functions on win32 (where')
+        self.outln(' * we have to use per-thread dispatch tables) and a set')
+        self.outln(' * of function pointers, otherwise.')
+        self.outln(' */')
+        self.outln('#ifdef _WIN32')
+        self.outln('#define EPOXY_FPTR(x) x')
+        self.outln('#define EPOXY_FPTR_EXTERN')
+        self.outln('#else')
+        self.outln('#define EPOXY_FPTR(x) (*x)')
+        self.outln('#define EPOXY_FPTR_EXTERN extern')
+        self.outln('#endif')
+
         for func in self.sorted_functions:
-            self.outln('{0} epoxy_{1}({2});'.format(func.ret_type, func.name,
-                                                    func.args_decl))
+            self.outln('EPOXY_FPTR_EXTERN {0} EPOXY_FPTR(epoxy_{1})({2});'.format(func.ret_type,
+                                                                                  func.name,
+                                                                                  func.args_decl))
             self.outln('')
 
     def write_proto_define_header(self, file, style):
@@ -580,23 +593,28 @@
         self.outln('}')
         self.outln('')
 
-    def write_ifunc(self, func):
-        # Writes out the ifunc symbol that will have its PLT entry
-        # replaced with the resolved GL symbol when it's called.
-
-        # Tell the linker that even if we're in a static build, we need a PLT.
-        self.outln('__asm__(".type epoxy_{0}, @gnu_indirect_function");'.format(func.name));
-
-        # Tell the linker that epoxy_glWhatever() needs to be resolved
-        # by epoxy_ifunc_glWhatever().
-        self.outln('void *epoxy_ifunc_{0}(void) __asm__("epoxy_{0}");'.format(func.wrapped_name))
-
-        # Write out the actual epoxy_ifunc_glWhatever().
-        self.outln('{0} void *epoxy_ifunc_{1}(void)'.format(func.public, func.wrapped_name))
+    def write_function_pointer(self, func):
+        self.outln('static {0}'.format(func.ret_type))
+        self.outln('epoxy_{0}_rewrite_ptr({1})'.format(func.wrapped_name,
+                                                       func.args_decl))
         self.outln('{')
-        self.outln('    return epoxy_{0}_resolver();'.format(func.alias_name))
+        self.outln('    epoxy_{0} = (void *)epoxy_{1}_resolver();'.format(func.wrapped_name,
+                                                                          func.alias_name))
+
+        if func.ret_type == 'void':
+            self.outln('    epoxy_{0}({1});'.format(func.wrapped_name,
+                                                    func.args_list))
+        else:
+            self.outln('    return epoxy_{0}({1});'.format(func.wrapped_name,
+                                                           func.args_list))
+
         self.outln('}')
 
+        self.outln('{0}{1} epoxy_{2} = epoxy_{2}_rewrite_ptr;'.format(func.public,
+                                                                      func.ptr_type,
+                                                                      func.wrapped_name))
+        self.outln('')
+
     def write_provider_enums(self):
         self.outln('enum {0}_provider {{'.format(self.target))
 
@@ -671,22 +689,6 @@
         self.outln('#include "epoxy/{0}.h"'.format(self.target))
         self.outln('')
 
-        # ifuncs are a newer-gcc and newer-dynamic-linker feature that
-        # let the library rewrite the application's PLT entry with our
-        # resolved function pointer.
-        #
-        # Note that dlsym() when we're called from the dynamic linker
-        # while it's loading libraries will break. So, libepoxy can't
-        # be loaded with RTLD_NOW when ifuncs are in use.  We also
-        # can't use ifuncs for static builds, since static builds
-        # resolve just like RTLD_NOW.
-        self.outln('#if defined(__PIC__) && defined(__linux__)')
-        self.outln('#define USING_IFUNCS 1')
-        self.outln('#else')
-        self.outln('#define USING_IFUNCS 0')
-        self.outln('#endif')
-        self.outln('')
-
         self.outln('struct dispatch_table {')
         for func in self.sorted_functions:
             # Aliases don't get their own slot, since they use a shared resolver.
@@ -699,7 +701,7 @@
         # bottom. (I want the function_ptr_resolver as the first
         # per-GL-call code, since it's the most interesting to see
         # when you search for the implementation of a call)
-        self.outln('#if !USING_IFUNCS')
+        self.outln('#if USING_DISPATCH_TABLE')
         self.outln('static inline struct dispatch_table *')
         self.outln('get_dispatch_table(void);')
         self.outln('')
@@ -713,7 +715,7 @@
             if not func.alias_func:
                 self.write_function_ptr_resolver(func)
 
-        self.outln('#if !USING_IFUNCS')
+        self.outln('#if USING_DISPATCH_TABLE')
 
         for func in self.sorted_functions:
             if not func.alias_func:
@@ -738,12 +740,12 @@
         self.outln('}')
         self.outln('')
 
-        self.outln('#else /* !USING_IFUNCS */')
+        self.outln('#else /* !USING_DISPATCH_TABLE */')
 
         for func in self.sorted_functions:
-            self.write_ifunc(func)
+            self.write_function_pointer(func)
 
-        self.outln('#endif /* !USING_IFUNCS */')
+        self.outln('#endif /* !USING_DISPATCH_TABLE */')
 
 argparser = argparse.ArgumentParser(description='Generate GL dispatch wrappers.')
 argparser.add_argument('files', metavar='file.xml', nargs='+', help='GL API XML files to be parsed')
diff --git a/test/Makefile.am b/test/Makefile.am
index 1b774a1..83cfa44 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -87,8 +87,6 @@
 WGL_LIBS = libwgl_common.la
 endif
 
-XFAIL_TESTS = glx_shared_znow
-
 egl_has_extension_nocontext_LDFLAGS = $(X11_LIBS) $(EPOXY) libegl_common.la
 egl_has_extension_nocontext_DEPENDENCIES = $(EPOXY) libegl_common.la