Can compile "loader" and "layers" on Windows and Linux ...

These directories build and are partially turned-on on Windows, using the "tri"
demo (follow-on commit) and a "NULL driver" that was created out of the
sample/Intel driver.  The GetProcAddress() is not yet finding symbols in the
NULL driver.

For now:

- "C:\Windows\System32" is the default XGL driver directory.  The getenv()
  isn't yet working.  I suggest creating your own #define in order to point to
  where a driver is.

- In order to recognize a Windows driver, we must look at both its prefix and
  suffix (i.e. it is named "XGL_*.dll", e.g. "XGL_i965.dll).

- We autogenerate Windows ".def" files for the layers.  Additional info is:

  - This is necessary in order for a DLL to export symbols that can be queried
    using GetProcAddress().  We can't use the normal Windows approach of
    declaring these functions using "__declspec(dllexport)", because these
    functions are declared in "xgl.h".

  - This involves adding and running the new "xgl-win-def-file-generate.py"
    file.

  - NOTE: Layers don't have the xglInitAndEnumerateGpus() entrypoint, just the
    xglGetProcAddr() entrypoint (and now the xglEnumerateLayers() entrypoint).
    Generating them is pretty simple.

NOTE: In order to build on a 64-bit Windows 7/8 system, I did the following:

- Install VisualStudio 2013 Professional

- Install CMake from: http://www.cmake.org/cmake/resources/software.html

  - I let it add itself to the system PATH environment variable.

- Install Python 3 from: https://www.python.org/downloads

  - I let it add itself to the system PATH environment variable.

- Obtain the Git repository, checkout the "ian-150127-WinBuild" branch.

- Using a Cygwin shell: I did the following:

  - "cd" to the top-level directory (i.e. the one that contains the ".git"
    directory).

  - "mkdir _out64"

  - "cd _out64"

  - "cmake -G "Visual Studio 12 Win64" .."

- At this point, I used WindowsExplorer to open the "XGL.sln" file.  I can
  build.  CMake causes the build shortcut to be "Ctrl-Shift-B" instead of the
  normal "F7".  I had to right-click the "ALL_BUILD" project, go to
  Properties->Debugging and change the debug Command and Working Directory to
  point to "tri.exe" and where the executable are.  At this point, I can debug
  (using the normal "F5" shortcut).
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b14daee..4e11532 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,24 +22,30 @@
     endif()
 endif()
 
-# Hard code our LunarGLASS and glslang paths for now
-get_filename_component(GLSLANG_PREFIX ../glslang ABSOLUTE)
-get_filename_component(LUNARGLASS_PREFIX ../LunarGLASS ABSOLUTE)
+if (NOT WIN32)
+    # Hard code our LunarGLASS and glslang paths for now
+    get_filename_component(GLSLANG_PREFIX ../glslang ABSOLUTE)
+    get_filename_component(LUNARGLASS_PREFIX ../LunarGLASS ABSOLUTE)
 
-if(NOT EXISTS ${GLSLANG_PREFIX})
-    message(FATAL_ERROR "Necessary glslang components do not exist: " ${GLSLANG_PREFIX})
-endif()
+    if(NOT EXISTS ${GLSLANG_PREFIX})
+        message(FATAL_ERROR "Necessary glslang components do not exist: " ${GLSLANG_PREFIX})
+    endif()
 
-if(NOT EXISTS ${LUNARGLASS_PREFIX})
-    message(FATAL_ERROR "Necessary LunarGLASS components do not exist: " ${GLSLANG_PREFIX})
+    if(NOT EXISTS ${LUNARGLASS_PREFIX})
+        message(FATAL_ERROR "Necessary LunarGLASS components do not exist: " ${GLSLANG_PREFIX})
+    endif()
 endif()
 
 # loader: Generic XGL ICD loader
 # icd: Device dependent (DD) XGL components
 # tests: XGL tests
 add_subdirectory(loader)
-add_subdirectory(icd)
-add_subdirectory(tests)
-add_subdirectory(layers)
-add_subdirectory(demos)
-add_subdirectory(tools/glave)
+if (NOT WIN32)
+    add_subdirectory(icd)
+    add_subdirectory(tests)
+endif()
+    add_subdirectory(layers)
+if (NOT WIN32)
+    add_subdirectory(demos)
+endif()
+    add_subdirectory(tools/glave)
diff --git a/include/xglLayer.h b/include/xglLayer.h
index 97a2d4f..fc19257 100644
--- a/include/xglLayer.h
+++ b/include/xglLayer.h
@@ -6,7 +6,11 @@
 
 #include "xgl.h"
 #include "xglDbg.h"
+#if defined(_WIN32)
+#else // WIN32
+// FIXME: NEED WINDOWS EQUIVALENT
 #include "xglWsiX11Ext.h"
+#endif // WIN32
 #if defined(__GNUC__) && __GNUC__ >= 4
 #  define XGL_LAYER_EXPORT __attribute__((visibility("default")))
 #elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
@@ -147,10 +151,14 @@
     xglDbgSetDeviceOptionType DbgSetDeviceOption;
     xglCmdDbgMarkerBeginType CmdDbgMarkerBegin;
     xglCmdDbgMarkerEndType CmdDbgMarkerEnd;
+#if defined(_WIN32)
+// FIXME: NEED WINDOWS EQUIVALENT
+#else // WIN32
     xglWsiX11AssociateConnectionType WsiX11AssociateConnection;
     xglWsiX11GetMSCType WsiX11GetMSC;
     xglWsiX11CreatePresentableImageType WsiX11CreatePresentableImage;
     xglWsiX11QueuePresentType WsiX11QueuePresent;
+#endif // WIN32
 } XGL_LAYER_DISPATCH_TABLE;
 
 // LL node for tree of dbg callback functions
diff --git a/layers/CMakeLists.txt b/layers/CMakeLists.txt
index 3971ce5..4693d4e 100644
--- a/layers/CMakeLists.txt
+++ b/layers/CMakeLists.txt
@@ -14,13 +14,27 @@
 	)
 endmacro()
 
-macro(add_xgl_layer target)
+if (WIN32)
+    macro(add_xgl_layer target)
+	add_custom_command(OUTPUT XGLLayer${target}.def
+		COMMAND ${PROJECT_SOURCE_DIR}/xgl-win-def-file-generate.py ${target} > XGLLayer${target}.def
+		DEPENDS ${PROJECT_SOURCE_DIR}/xgl-win-def-file-generate.py
+	)
+	add_library(XGLLayer${target} SHARED ${ARGN} XGLLayer${target}.def)
+	add_dependencies(XGLLayer${target} generate_xgl_layer_helpers)
+	add_dependencies(XGLLayer${target} ${CMAKE_CURRENT_BINARY_DIR}/XGLLayer${target}.def)
+	set_target_properties(XGLLayer${target} PROPERTIES LINK_FLAGS "/DEF:${CMAKE_CURRENT_BINARY_DIR}/XGLLayer${target}.def")
+    endmacro()
+else()
+    macro(add_xgl_layer target)
 	add_library(XGLLayer${target} SHARED ${ARGN})
 	add_dependencies(XGLLayer${target} generate_xgl_layer_helpers)
-endmacro()
+    endmacro()
+endif()
 
 include_directories(
 	${CMAKE_CURRENT_SOURCE_DIR}
+	${CMAKE_CURRENT_SOURCE_DIR}/../loader
 	${CMAKE_CURRENT_BINARY_DIR}
 )
 
diff --git a/layers/basic.cpp b/layers/basic.cpp
index ebbc45b..ac8cf80 100644
--- a/layers/basic.cpp
+++ b/layers/basic.cpp
@@ -25,6 +25,7 @@
 #include <stdlib.h>
 #include <assert.h>
 #include <unordered_map>
+#include "loader_platform.h"
 #include "xgl_dispatch_table_helper.h"
 #include "xglLayer.h"
 
diff --git a/layers/draw_state.c b/layers/draw_state.c
index 53e3c46..5e6e6fe 100644
--- a/layers/draw_state.c
+++ b/layers/draw_state.c
@@ -25,9 +25,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <assert.h>
-#include <pthread.h>
-#include <unistd.h>
+#include "loader_platform.h"
 #include "xgl_dispatch_table_helper.h"
 #include "xgl_generic_intercept_proc_helper.h"
 #include "xgl_struct_string_helper.h"
@@ -37,9 +35,9 @@
 
 static XGL_LAYER_DISPATCH_TABLE nextTable;
 static XGL_BASE_LAYER_OBJECT *pCurObj;
-static pthread_once_t g_initOnce = PTHREAD_ONCE_INIT;
-// Could be smarter about locking with unique locks for various tasks, but just using one for now
-pthread_mutex_t globalLock = PTHREAD_MUTEX_INITIALIZER;
+static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(g_initOnce);
+static int globalLockInitialized = 0;
+static loader_platform_thread_mutex globalLock;
 
 // Ptr to LL of dbg functions
 static XGL_LAYER_DBG_FUNCTION_NODE *g_pDbgFunctionHead = NULL;
@@ -267,7 +265,7 @@
 
 static void insertDynamicState(const XGL_DYNAMIC_STATE_OBJECT state, const GENERIC_HEADER* pCreateInfo, XGL_STATE_BIND_POINT bindPoint)
 {
-    pthread_mutex_lock(&globalLock);
+    loader_platform_thread_lock_mutex(&globalLock);
     // Insert new node at head of appropriate LL
     DYNAMIC_STATE_NODE* pStateNode = (DYNAMIC_STATE_NODE*)malloc(sizeof(DYNAMIC_STATE_NODE));
     pStateNode->pNext = g_pDynamicStateHead[bindPoint];
@@ -275,13 +273,13 @@
     pStateNode->stateObj = state;
     pStateNode->pCreateInfo = (GENERIC_HEADER*)malloc(dynStateCreateInfoSize(pCreateInfo->sType));
     memcpy(pStateNode->pCreateInfo, pCreateInfo, dynStateCreateInfoSize(pCreateInfo->sType));
-    pthread_mutex_unlock(&globalLock);
+    loader_platform_thread_unlock_mutex(&globalLock);
 }
 // Set the last bound dynamic state of given type
 // TODO : Need to track this per cmdBuffer and correlate cmdBuffer for Draw w/ last bound for that cmdBuffer?
 static void setLastBoundDynamicState(const XGL_DYNAMIC_STATE_OBJECT state, const XGL_STATE_BIND_POINT sType)
 {
-    pthread_mutex_lock(&globalLock);
+    loader_platform_thread_lock_mutex(&globalLock);
     DYNAMIC_STATE_NODE* pTrav = g_pDynamicStateHead[sType];
     while (pTrav && (state != pTrav->stateObj)) {
         pTrav = pTrav->pNext;
@@ -292,12 +290,12 @@
         layerCbMsg(XGL_DBG_MSG_ERROR, XGL_VALIDATION_LEVEL_0, state, 0, DRAWSTATE_INVALID_DYNAMIC_STATE_OBJECT, "DS", str);
     }
     g_pLastBoundDynamicState[sType] = pTrav;
-    pthread_mutex_unlock(&globalLock);
+    loader_platform_thread_unlock_mutex(&globalLock);
 }
 // Print the last bound dynamic state
 static void printDynamicState()
 {
-    pthread_mutex_lock(&globalLock);
+    loader_platform_thread_lock_mutex(&globalLock);
     char str[1024];
     for (uint32_t i = 0; i < XGL_NUM_STATE_BIND_POINT; i++) {
         if (g_pLastBoundDynamicState[i]) {
@@ -318,37 +316,37 @@
             layerCbMsg(XGL_DBG_MSG_UNKNOWN, XGL_VALIDATION_LEVEL_0, NULL, 0, DRAWSTATE_NONE, "DS", str);
         }
     }
-    pthread_mutex_unlock(&globalLock);
+    loader_platform_thread_unlock_mutex(&globalLock);
 }
 // Retrieve pipeline node ptr for given pipeline object
 static PIPELINE_NODE *getPipeline(XGL_PIPELINE pipeline)
 {
-    pthread_mutex_lock(&globalLock);
+    loader_platform_thread_lock_mutex(&globalLock);
     PIPELINE_NODE *pTrav = g_pPipelineHead;
     while (pTrav) {
         if (pTrav->pipeline == pipeline) {
-            pthread_mutex_unlock(&globalLock);
+            loader_platform_thread_unlock_mutex(&globalLock);
             return pTrav;
         }
         pTrav = pTrav->pNext;
     }
-    pthread_mutex_unlock(&globalLock);
+    loader_platform_thread_unlock_mutex(&globalLock);
     return NULL;
 }
 
 // For given sampler, return a ptr to its Create Info struct, or NULL if sampler not found
 static XGL_SAMPLER_CREATE_INFO* getSamplerCreateInfo(const XGL_SAMPLER sampler)
 {
-    pthread_mutex_lock(&globalLock);
+    loader_platform_thread_lock_mutex(&globalLock);
     SAMPLER_NODE *pTrav = g_pSamplerHead;
     while (pTrav) {
         if (sampler == pTrav->sampler) {
-            pthread_mutex_unlock(&globalLock);
+            loader_platform_thread_unlock_mutex(&globalLock);
             return &pTrav->createInfo;
         }
         pTrav = pTrav->pNext;
     }
-    pthread_mutex_unlock(&globalLock);
+    loader_platform_thread_unlock_mutex(&globalLock);
     return NULL;
 }
 
@@ -704,16 +702,16 @@
 /*
 static DS_LL_HEAD* getDS(XGL_DESCRIPTOR_SET ds)
 {
-    pthread_mutex_lock(&globalLock);
+    loader_platform_thread_lock_mutex(&globalLock);
     DS_LL_HEAD *pTrav = pDSHead;
     while (pTrav) {
         if (pTrav->dsID == ds) {
-            pthread_mutex_unlock(&globalLock);
+            loader_platform_thread_unlock_mutex(&globalLock);
             return pTrav;
         }
         pTrav = pTrav->pNextDS;
     }
-    pthread_mutex_unlock(&globalLock);
+    loader_platform_thread_unlock_mutex(&globalLock);
     return NULL;
 }
 
@@ -733,16 +731,16 @@
 static bool32_t clearDS(XGL_DESCRIPTOR_SET descriptorSet, uint32_t startSlot, uint32_t slotCount)
 {
     DS_LL_HEAD *pTrav = getDS(descriptorSet);
-    pthread_mutex_lock(&globalLock);
+    loader_platform_thread_lock_mutex(&globalLock);
     if (!pTrav || ((startSlot + slotCount) > pTrav->numSlots)) {
         // TODO : Log more meaningful error here
-        pthread_mutex_unlock(&globalLock);
+        loader_platform_thread_unlock_mutex(&globalLock);
         return XGL_FALSE;
     }
     for (uint32_t i = startSlot; i < slotCount; i++) {
         memset((void*)&pTrav->dsSlot[i], 0, sizeof(DS_SLOT));
     }
-    pthread_mutex_unlock(&globalLock);
+    loader_platform_thread_unlock_mutex(&globalLock);
     return XGL_TRUE;
 }
 
@@ -1004,9 +1002,13 @@
         autoDumpOnce = 0;
         dumpDotFile("pipeline_dump.dot");
         // Convert dot to png if dot available
+#if defined(_WIN32)
+// FIXME: NEED WINDOWS EQUIVALENT
+#else // WIN32
         if(access( "/usr/bin/dot", X_OK) != -1) {
             system("/usr/bin/dot pipeline_dump.dot -Tpng -o pipeline_dump.png");
         }
+#endif // WIN32
     }
 }
 
@@ -1042,6 +1044,17 @@
 
     xglGetProcAddrType fpGetProcAddr = fpNextGPA((XGL_PHYSICAL_GPU) pCurObj->nextObject, (char *) "xglGetProcAddr");
     nextTable.GetProcAddr = fpGetProcAddr;
+
+    if (!globalLockInitialized)
+    {
+        // TODO/TBD: Need to delete this mutex sometime.  How???  One
+        // suggestion is to call this during xglCreateInstance(), and then we
+        // can clean it up during xglDestroyInstance().  However, that requires
+        // that the layer have per-instance locks.  We need to come back and
+        // address this soon.
+        loader_platform_thread_create_mutex(&globalLock);
+        globalLockInitialized = 1;
+    }
 }
 
 XGL_LAYER_EXPORT XGL_RESULT XGLAPI xglCreateInstance(const XGL_APPLICATION_INFO* pAppInfo, const XGL_ALLOC_CALLBACKS* pAllocCb, XGL_INSTANCE* pInstance)
@@ -1066,7 +1079,7 @@
 {
     XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) gpu;
     pCurObj = gpuw;
-    pthread_once(&g_initOnce, initDrawState);
+    loader_platform_thread_once(&g_initOnce, initDrawState);
     XGL_RESULT result = nextTable.GetGpuInfo((XGL_PHYSICAL_GPU)gpuw->nextObject, infoType, pDataSize, pData);
     return result;
 }
@@ -1075,7 +1088,7 @@
 {
     XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) gpu;
     pCurObj = gpuw;
-    pthread_once(&g_initOnce, initDrawState);
+    loader_platform_thread_once(&g_initOnce, initDrawState);
     XGL_RESULT result = nextTable.CreateDevice((XGL_PHYSICAL_GPU)gpuw->nextObject, pCreateInfo, pDevice);
     return result;
 }
@@ -1090,7 +1103,7 @@
 {
     XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) gpu;
     pCurObj = gpuw;
-    pthread_once(&g_initOnce, initDrawState);
+    loader_platform_thread_once(&g_initOnce, initDrawState);
     XGL_RESULT result = nextTable.GetExtensionSupport((XGL_PHYSICAL_GPU)gpuw->nextObject, pExtName);
     return result;
 }
@@ -1101,7 +1114,7 @@
     {
         XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) gpu;
         pCurObj = gpuw;
-        pthread_once(&g_initOnce, initDrawState);
+        loader_platform_thread_once(&g_initOnce, initDrawState);
         XGL_RESULT result = nextTable.EnumerateLayers((XGL_PHYSICAL_GPU)gpuw->nextObject, maxLayerCount, maxStringSize, pOutLayerCount, pOutLayers, pReserved);
         return result;
     } else
@@ -1185,7 +1198,7 @@
 {
     XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) gpu0;
     pCurObj = gpuw;
-    pthread_once(&g_initOnce, initDrawState);
+    loader_platform_thread_once(&g_initOnce, initDrawState);
     XGL_RESULT result = nextTable.GetMultiGpuCompatibility((XGL_PHYSICAL_GPU)gpuw->nextObject, gpu1, pInfo);
     return result;
 }
@@ -1407,7 +1420,7 @@
     char str[1024];
     sprintf(str, "Created Gfx Pipeline %p", (void*)*pPipeline);
     layerCbMsg(XGL_DBG_MSG_UNKNOWN, XGL_VALIDATION_LEVEL_0, pPipeline, 0, DRAWSTATE_NONE, "DS", str);
-    pthread_mutex_lock(&globalLock);
+    loader_platform_thread_lock_mutex(&globalLock);
     PIPELINE_NODE *pTrav = g_pPipelineHead;
     if (pTrav) {
         while (pTrav->pNext)
@@ -1422,7 +1435,7 @@
     memset((void*)pTrav, 0, sizeof(PIPELINE_NODE));
     pTrav->pipeline = *pPipeline;
     initPipeline(pTrav, pCreateInfo);
-    pthread_mutex_unlock(&globalLock);
+    loader_platform_thread_unlock_mutex(&globalLock);
     return result;
 }
 
@@ -1454,13 +1467,13 @@
 {
     XGL_RESULT result = nextTable.CreateSampler(device, pCreateInfo, pSampler);
     if (XGL_SUCCESS == result) {
-        pthread_mutex_lock(&globalLock);
+        loader_platform_thread_lock_mutex(&globalLock);
         SAMPLER_NODE *pNewNode = (SAMPLER_NODE*)malloc(sizeof(SAMPLER_NODE));
         pNewNode->sampler = *pSampler;
         memcpy(&pNewNode->createInfo, pCreateInfo, sizeof(XGL_SAMPLER_CREATE_INFO));
         pNewNode->pNext = g_pSamplerHead;
         g_pSamplerHead = pNewNode;
-        pthread_mutex_unlock(&globalLock);
+        loader_platform_thread_unlock_mutex(&globalLock);
     }
     return result;
 }
@@ -1743,10 +1756,10 @@
             sprintf(str, "You must call xglEndDescriptorSetUpdate(%p) before this call to xglCmdBindDescriptorSet()!", (void*)descriptorSet);
             layerCbMsg(XGL_DBG_MSG_ERROR, XGL_VALIDATION_LEVEL_0, descriptorSet, 0, DRAWSTATE_BINDING_DS_NO_END_UPDATE, "DS", str);
         }
-        pthread_mutex_lock(&globalLock);
+        loader_platform_thread_lock_mutex(&globalLock);
         lastBoundDS[index] = descriptorSet;
         lastBoundSlotOffset[index] = slotOffset;
-        pthread_mutex_unlock(&globalLock);
+        loader_platform_thread_unlock_mutex(&globalLock);
         char str[1024];
         sprintf(str, "DS %p bound to DS index %u on pipeline %s", (void*)descriptorSet, index, string_XGL_PIPELINE_BIND_POINT(pipelineBindPoint));
         layerCbMsg(XGL_DBG_MSG_UNKNOWN, XGL_VALIDATION_LEVEL_0, descriptorSet, 0, DRAWSTATE_NONE, "DS", str);
@@ -2037,11 +2050,14 @@
     nextTable.CmdDbgMarkerEnd(cmdBuffer);
 }
 
+#if defined(WIN32)
+// FIXME: NEED WINDOWS EQUIVALENT
+#else // WIN32
 XGL_LAYER_EXPORT XGL_RESULT XGLAPI xglWsiX11AssociateConnection(XGL_PHYSICAL_GPU gpu, const XGL_WSI_X11_CONNECTION_INFO* pConnectionInfo)
 {
     XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) gpu;
     pCurObj = gpuw;
-    pthread_once(&g_initOnce, initDrawState);
+    loader_platform_thread_once(&g_initOnce, initDrawState);
     XGL_RESULT result = nextTable.WsiX11AssociateConnection((XGL_PHYSICAL_GPU)gpuw->nextObject, pConnectionInfo);
     return result;
 }
@@ -2063,6 +2079,7 @@
     XGL_RESULT result = nextTable.WsiX11QueuePresent(queue, pPresentInfo, fence);
     return result;
 }
+#endif // WIN32
 
 void drawStateDumpDotFile(char* outFileName)
 {
@@ -2071,6 +2088,12 @@
 
 void drawStateDumpPngFile(char* outFileName)
 {
+#if defined(_WIN32)
+// FIXME: NEED WINDOWS EQUIVALENT
+        char str[1024];
+        sprintf(str, "Cannot execute dot program yet on Windows.");
+        layerCbMsg(XGL_DBG_MSG_ERROR, XGL_VALIDATION_LEVEL_0, NULL, 0, DRAWSTATE_MISSING_DOT_PROGRAM, "DS", str);
+#else // WIN32
     char dotExe[32] = "/usr/bin/dot";
     if( access(dotExe, X_OK) != -1) {
         dumpDotFile("/tmp/tmp.dot");
@@ -2084,6 +2107,7 @@
         sprintf(str, "Cannot execute dot program at (%s) to dump requested %s file.", dotExe, outFileName);
         layerCbMsg(XGL_DBG_MSG_ERROR, XGL_VALIDATION_LEVEL_0, NULL, 0, DRAWSTATE_MISSING_DOT_PROGRAM, "DS", str);
     }
+#endif // WIN32
 }
 
 XGL_LAYER_EXPORT void* XGLAPI xglGetProcAddr(XGL_PHYSICAL_GPU gpu, const char* funcName)
@@ -2094,7 +2118,7 @@
     if (gpu == NULL)
         return NULL;
     pCurObj = gpuw;
-    pthread_once(&g_initOnce, initDrawState);
+    loader_platform_thread_once(&g_initOnce, initDrawState);
 
     addr = layer_intercept_proc(funcName);
     if (addr)
diff --git a/layers/mem_tracker.c b/layers/mem_tracker.c
index a1ad749..0165cb0 100644
--- a/layers/mem_tracker.c
+++ b/layers/mem_tracker.c
@@ -26,7 +26,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <assert.h>
-#include <pthread.h>
+#include "loader_platform.h"
 #include "xgl_dispatch_table_helper.h"
 #include "xgl_generic_intercept_proc_helper.h"
 #include "xgl_struct_string_helper.h"
@@ -35,7 +35,7 @@
 
 static XGL_LAYER_DISPATCH_TABLE nextTable;
 static XGL_BASE_LAYER_OBJECT *pCurObj;
-static pthread_once_t g_initOnce = PTHREAD_ONCE_INIT;
+static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(g_initOnce);
 
 // Ptr to LL of dbg functions
 static XGL_LAYER_DBG_FUNCTION_NODE *g_pDbgFunctionHead = NULL;
@@ -414,8 +414,9 @@
     }
 
     XGL_RESULT result = insertMiniNode(&pMemTrav->pCmdBufferBindings, cb, &pMemTrav->refCount);
-    if (XGL_SUCCESS != result)
+    if (XGL_SUCCESS != result) {
         return result;
+    }
 
     // Now update Global CB's Mini Mem binding list
     GLOBAL_CB_NODE* pCBTrav = getGlobalCBNode(cb);
@@ -483,8 +484,9 @@
 // TODO : When should this be called?  There's no Destroy of CBs that I see
 static bool32_t deleteGlobalCBNode(const XGL_CMD_BUFFER cb)
 {
-    if (XGL_FALSE == freeCBBindings(cb))
+    if (XGL_FALSE == freeCBBindings(cb)) {
         return XGL_FALSE;
+    }
     // Delete the Global CB node
     GLOBAL_CB_NODE* pCBTrav = getGlobalCBNode(cb);
     pCBTrav = pGlobalCBHead;
@@ -990,7 +992,7 @@
 {
     XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) gpu;
     pCurObj = gpuw;
-    pthread_once(&g_initOnce, initMemTracker);
+    loader_platform_thread_once(&g_initOnce, initMemTracker);
     XGL_RESULT result = nextTable.GetGpuInfo((XGL_PHYSICAL_GPU)gpuw->nextObject, infoType, pDataSize, pData);
     return result;
 }
@@ -999,7 +1001,7 @@
 {
     XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) gpu;
     pCurObj = gpuw;
-    pthread_once(&g_initOnce, initMemTracker);
+    loader_platform_thread_once(&g_initOnce, initMemTracker);
     XGL_RESULT result = nextTable.CreateDevice((XGL_PHYSICAL_GPU)gpuw->nextObject, pCreateInfo, pDevice);
     // Save off device in case we need it to create Fences
     globalDevice = *pDevice;
@@ -1033,7 +1035,7 @@
 {
     XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) gpu;
     pCurObj = gpuw;
-    pthread_once(&g_initOnce, initMemTracker);
+    loader_platform_thread_once(&g_initOnce, initMemTracker);
     XGL_RESULT result = nextTable.GetExtensionSupport((XGL_PHYSICAL_GPU)gpuw->nextObject, pExtName);
     return result;
 }
@@ -1044,7 +1046,7 @@
     {
         XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) gpu;
         pCurObj = gpuw;
-        pthread_once(&g_initOnce, initMemTracker);
+        loader_platform_thread_once(&g_initOnce, initMemTracker);
         XGL_RESULT result = nextTable.EnumerateLayers((XGL_PHYSICAL_GPU)gpuw->nextObject, maxLayerCount, maxStringSize, pOutLayerCount, pOutLayers, pReserved);
         return result;
     } else
@@ -1171,7 +1173,7 @@
 {
     XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) gpu0;
     pCurObj = gpuw;
-    pthread_once(&g_initOnce, initMemTracker);
+    loader_platform_thread_once(&g_initOnce, initMemTracker);
     XGL_RESULT result = nextTable.GetMultiGpuCompatibility((XGL_PHYSICAL_GPU)gpuw->nextObject, gpu1, pInfo);
     return result;
 }
@@ -2085,11 +2087,14 @@
     nextTable.CmdDbgMarkerEnd(cmdBuffer);
 }
 
+#if defined(_WIN32)
+// FIXME: NEED WINDOWS EQUIVALENT
+#else // WIN32
 XGL_LAYER_EXPORT XGL_RESULT XGLAPI xglWsiX11AssociateConnection(XGL_PHYSICAL_GPU gpu, const XGL_WSI_X11_CONNECTION_INFO* pConnectionInfo)
 {
     XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) gpu;
     pCurObj = gpuw;
-    pthread_once(&g_initOnce, initMemTracker);
+    loader_platform_thread_once(&g_initOnce, initMemTracker);
     XGL_RESULT result = nextTable.WsiX11AssociateConnection((XGL_PHYSICAL_GPU)gpuw->nextObject, pConnectionInfo);
     return result;
 }
@@ -2123,6 +2128,7 @@
     XGL_RESULT result = nextTable.WsiX11QueuePresent(queue, pPresentInfo, fence);
     return result;
 }
+#endif // WIN32
 
 XGL_LAYER_EXPORT void* XGLAPI xglGetProcAddr(XGL_PHYSICAL_GPU gpu, const char* funcName)
 {
@@ -2132,7 +2138,7 @@
     if (gpu == NULL)
         return NULL;
     pCurObj = gpuw;
-    pthread_once(&g_initOnce, initMemTracker);
+    loader_platform_thread_once(&g_initOnce, initMemTracker);
 
     addr = layer_intercept_proc(funcName);
     if (addr)
diff --git a/layers/multi.cpp b/layers/multi.cpp
index 400ef5d..9551492 100644
--- a/layers/multi.cpp
+++ b/layers/multi.cpp
@@ -27,6 +27,7 @@
 #include <stdlib.h>
 #include <assert.h>
 #include <unordered_map>
+#include "loader_platform.h"
 #include "xgl_dispatch_table_helper.h"
 #include "xglLayer.h"
 
diff --git a/loader/CMakeLists.txt b/loader/CMakeLists.txt
index b2250ad..13a4b5b 100644
--- a/loader/CMakeLists.txt
+++ b/loader/CMakeLists.txt
@@ -13,6 +13,11 @@
 
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DXGL_PROTOTYPES")
 
-add_library(XGL SHARED loader.c dispatch.c table_ops.h)
+if (WIN32)
+    add_library(XGL SHARED loader.c dirent_on_windows.c dispatch.c table_ops.h)
+endif()
+if (NOT WIN32)
+    add_library(XGL SHARED loader.c dispatch.c table_ops.h)
+endif()
 set_target_properties(XGL PROPERTIES SOVERSION 0)
 target_link_libraries(XGL -ldl -lpthread)
diff --git a/loader/dirent_on_windows.c b/loader/dirent_on_windows.c
new file mode 100644
index 0000000..3564a26
--- /dev/null
+++ b/loader/dirent_on_windows.c
@@ -0,0 +1,148 @@
+/*
+
+    Implementation of POSIX directory browsing functions and types for Win32.
+
+    Author:  Kevlin Henney (kevlin@acm.org, kevlin@curbralan.com)
+    History: Created March 1997. Updated June 2003 and July 2012.
+    Rights:  See end of file.
+
+*/
+
+#include <dirent_on_windows.h>
+#include <errno.h>
+#include <io.h> /* _findfirst and _findnext set errno iff they return -1 */
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+typedef ptrdiff_t handle_type; /* C99's intptr_t not sufficiently portable */
+
+struct DIR
+{
+    handle_type         handle; /* -1 for failed rewind */
+    struct _finddata_t  info;
+    struct dirent       result; /* d_name null iff first time */
+    char                *name;  /* null-terminated char string */
+};
+
+DIR *opendir(const char *name)
+{
+    DIR *dir = 0;
+
+    if(name && name[0])
+    {
+        size_t base_length = strlen(name);
+        const char *all = /* search pattern must end with suitable wildcard */
+            strchr("/\\", name[base_length - 1]) ? "*" : "/*";
+
+        if((dir = (DIR *) malloc(sizeof *dir)) != 0 &&
+           (dir->name = (char *) malloc(base_length + strlen(all) + 1)) != 0)
+        {
+            strcat(strcpy(dir->name, name), all);
+
+            if((dir->handle =
+                (handle_type) _findfirst(dir->name, &dir->info)) != -1)
+            {
+                dir->result.d_name = 0;
+            }
+            else /* rollback */
+            {
+                free(dir->name);
+                free(dir);
+                dir = 0;
+            }
+        }
+        else /* rollback */
+        {
+            free(dir);
+            dir   = 0;
+            errno = ENOMEM;
+        }
+    }
+    else
+    {
+        errno = EINVAL;
+    }
+
+    return dir;
+}
+
+int closedir(DIR *dir)
+{
+    int result = -1;
+
+    if(dir)
+    {
+        if(dir->handle != -1)
+        {
+            result = _findclose(dir->handle);
+        }
+
+        free(dir->name);
+        free(dir);
+    }
+
+    if(result == -1) /* map all errors to EBADF */
+    {
+        errno = EBADF;
+    }
+
+    return result;
+}
+
+struct dirent *readdir(DIR *dir)
+{
+    struct dirent *result = 0;
+
+    if(dir && dir->handle != -1)
+    {
+        if(!dir->result.d_name || _findnext(dir->handle, &dir->info) != -1)
+        {
+            result         = &dir->result;
+            result->d_name = dir->info.name;
+        }
+    }
+    else
+    {
+        errno = EBADF;
+    }
+
+    return result;
+}
+
+void rewinddir(DIR *dir)
+{
+    if(dir && dir->handle != -1)
+    {
+        _findclose(dir->handle);
+        dir->handle = (handle_type) _findfirst(dir->name, &dir->info);
+        dir->result.d_name = 0;
+    }
+    else
+    {
+        errno = EBADF;
+    }
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+/*
+
+    Copyright Kevlin Henney, 1997, 2003, 2012. All rights reserved.
+
+    Permission to use, copy, modify, and distribute this software and its
+    documentation for any purpose is hereby granted without fee, provided
+    that this copyright and permissions notice appear in all copies and
+    derivatives.
+    
+    This software is supplied "as is" without express or implied warranty.
+
+    But that said, if there are any problems please get in touch.
+
+*/
diff --git a/loader/dirent_on_windows.h b/loader/dirent_on_windows.h
new file mode 100644
index 0000000..a02a0d8
--- /dev/null
+++ b/loader/dirent_on_windows.h
@@ -0,0 +1,50 @@
+#ifndef DIRENT_INCLUDED
+#define DIRENT_INCLUDED
+
+/*
+
+    Declaration of POSIX directory browsing functions and types for Win32.
+
+    Author:  Kevlin Henney (kevlin@acm.org, kevlin@curbralan.com)
+    History: Created March 1997. Updated June 2003.
+    Rights:  See end of file.
+    
+*/
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+typedef struct DIR DIR;
+
+struct dirent
+{
+    char *d_name;
+};
+
+DIR           *opendir(const char *);
+int           closedir(DIR *);
+struct dirent *readdir(DIR *);
+void          rewinddir(DIR *);
+
+/*
+
+    Copyright Kevlin Henney, 1997, 2003. All rights reserved.
+
+    Permission to use, copy, modify, and distribute this software and its
+    documentation for any purpose is hereby granted without fee, provided
+    that this copyright and permissions notice appear in all copies and
+    derivatives.
+    
+    This software is supplied "as is" without express or implied warranty.
+
+    But that said, if there are any problems please get in touch.
+
+*/
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/loader/loader.c b/loader/loader.c
index 0aa2940..c4cdce0 100644
--- a/loader/loader.c
+++ b/loader/loader.c
@@ -34,11 +34,12 @@
 #include <string.h>
 
 #include <sys/types.h>
+#if defined(WIN32)
+#include "dirent_on_windows.h"
+#else // WIN32
 #include <dirent.h>
-#include <unistd.h>
-#include <dlfcn.h>
-#include <pthread.h>
-#include <assert.h>
+#endif // WIN32
+#include "loader_platform.h"
 #include "table_ops.h"
 #include "loader.h"
 
@@ -48,7 +49,7 @@
 };
 
 struct loader_layers {
-    void *lib_handle;
+    loader_platform_dl_handle lib_handle;
     char name[256];
 };
 
@@ -79,7 +80,7 @@
 };
 
 struct loader_scanned_icds {
-    void *handle;
+    loader_platform_dl_handle handle;
     xglGetProcAddrType GetProcAddr;
     xglCreateInstanceType CreateInstance;
     xglDestroyInstanceType DestroyInstance;
@@ -105,6 +106,7 @@
     bool break_on_warning;
 } loader;
 
+
 static XGL_RESULT loader_msg_callback_add(XGL_DBG_MSG_CALLBACK_FUNCTION func,
                                           void *data)
 {
@@ -206,7 +208,7 @@
 static void
 loader_icd_destroy(struct loader_icd *icd)
 {
-    dlclose(icd->scanned_icds->handle);
+    loader_platform_close_library(icd->scanned_icds->handle);
     free(icd);
 }
 
@@ -296,20 +298,21 @@
 
 static void loader_scanned_icd_add(const char *filename)
 {
-    void *handle;
+    loader_platform_dl_handle handle;
     void *fp_gpa, *fp_enumerate, *fp_create_inst, *fp_destroy_inst;
     struct loader_scanned_icds *new_node;
 
-    handle = dlopen(filename, RTLD_LAZY);
+    // Used to call: dlopen(filename, RTLD_LAZY);
+    handle = loader_platform_open_library(filename);
     if (!handle) {
-        loader_log(XGL_DBG_MSG_WARNING, 0, dlerror());
+        loader_log(XGL_DBG_MSG_WARNING, 0, loader_platform_open_library_error(filename));
         return;
     }
 
 #define LOOKUP(func_ptr, func) do {                            \
-    func_ptr = (xgl ##func## Type) dlsym(handle, "xgl" #func); \
+    func_ptr = (xgl ##func## Type) loader_platform_get_proc_address(handle, "xgl" #func); \
     if (!func_ptr) {                                           \
-        loader_log(XGL_DBG_MSG_WARNING, 0, dlerror());         \
+        loader_log(XGL_DBG_MSG_WARNING, 0, loader_platform_get_proc_address_error("xgl" #func)); \
         return;                                                \
     }                                                          \
 } while (0)
@@ -335,12 +338,50 @@
     loader.scanned_icd_list = new_node;
 }
 
+#if defined(WIN32)
+
+#define PATH_SEPERATOR ';'
+#define DIRECTORY_SYMBOL "\\"
+#ifndef DEFAULT_XGL_DRIVERS_PATH
+// TODO: Is this a good default location?
+// Need to search for both 32bit and 64bit ICDs
+#define DEFAULT_XGL_DRIVERS_PATH "C:\\Windows\\System32"
+// TODO/TBD: Is this an appropriate prefix for Windows?
+#define XGL_DRIVER_LIBRARY_PREFIX "XGL_"
+#define XGL_DRIVER_LIBRARY_PREFIX_LEN 4
+// TODO/TBD: Is this an appropriate suffix for Windows?
+#define XGL_LAYER_LIBRARY_PREFIX "XGLLayer"
+#define XGL_LAYER_LIBRARY_PREFIX_LEN 8
+#define XGL_LIBRARY_SUFFIX ".dll"
+#define XGL_LIBRARY_SUFFIX_LEN 4
+#endif //  DEFAULT_XGL_DRIVERS_PATH
+#ifndef DEFAULT_XGL_LAYERS_PATH
+// TODO: Is this a good default location?
+#define DEFAULT_XGL_LAYERS_PATH "C:\\Windows\\System32"
+#endif //  DEFAULT_XGL_LAYERS_PATH
+
+#else // WIN32
+
+#define PATH_SEPERATOR ':'
+#define DIRECTORY_SYMBOL "/"
 #ifndef DEFAULT_XGL_DRIVERS_PATH
 // TODO: Is this a good default location?
 // Need to search for both 32bit and 64bit ICDs
 #define DEFAULT_XGL_DRIVERS_PATH "/usr/lib/i386-linux-gnu/xgl:/usr/lib/x86_64-linux-gnu/xgl"
+#define XGL_DRIVER_LIBRARY_PREFIX "libXGL_"
+#define XGL_DRIVER_LIBRARY_PREFIX_LEN 7
+#define XGL_LAYER_LIBRARY_PREFIX "libXGLLayer"
+#define XGL_LAYER_LIBRARY_PREFIX_LEN 11
+#define XGL_LIBRARY_SUFFIX ".so"
+#define XGL_LIBRARY_SUFFIX_LEN 3
+#endif //  DEFAULT_XGL_DRIVERS_PATH
+#ifndef DEFAULT_XGL_LAYERS_PATH
+// TODO: Are these good default locations?
+#define DEFAULT_XGL_LAYERS_PATH ".:/usr/lib/i386-linux-gnu/xgl:/usr/lib/x86_64-linux-gnu/xgl"
 #endif
 
+#endif // WIN32
+
 /**
  * Try to \c loader_icd_scan XGL driver(s).
  *
@@ -361,15 +402,19 @@
     int len;
 
     libPaths = NULL;
+#if !defined(WIN32)
     if (geteuid() == getuid()) {
-       /* don't allow setuid apps to use LIBXGL_DRIVERS_PATH */
+       /* Don't allow setuid apps to use LIBXGL_DRIVERS_PATH */
+#endif // WIN32
        libPaths = getenv("LIBXGL_DRIVERS_PATH");
+#if !defined(WIN32)
     }
+#endif // WIN32
     if (libPaths == NULL)
        libPaths = DEFAULT_XGL_DRIVERS_PATH;
 
     for (p = libPaths; *p; p = next) {
-       next = strchr(p, ':');
+       next = strchr(p, PATH_SEPERATOR);
        if (next == NULL) {
           len = strlen(p);
           next = p + len;
@@ -381,16 +426,28 @@
           next++;
        }
 
+       // TODO/TBD: Do we want to do this on Windows, or just let Windows take
+       // care of its own search path (which it apparently has)?
        sysdir = opendir(p);
        if (sysdir) {
           dent = readdir(sysdir);
           while (dent) {
-             /* look for ICDs starting with "libXGL_" */
-             if (!strncmp(dent->d_name, "libXGL_", 7)) {
-                snprintf(icd_library, 1024, "%s/%s",p,dent->d_name);
-
-                loader_scanned_icd_add(icd_library);
-             }
+             /* Look for ICDs starting with XGL_DRIVER_LIBRARY_PREFIX and
+              * ending with XGL_LIBRARY_SUFFIX
+              */
+              if (!strncmp(dent->d_name,
+                          XGL_DRIVER_LIBRARY_PREFIX,
+                          XGL_DRIVER_LIBRARY_PREFIX_LEN)) {
+                 int nlen = strlen(dent->d_name);
+                 const char *suf = dent->d_name + nlen - XGL_LIBRARY_SUFFIX_LEN;
+                 if ((nlen > XGL_LIBRARY_SUFFIX_LEN) &&
+                     !strncmp(suf,
+                              XGL_LIBRARY_SUFFIX,
+                              XGL_LIBRARY_SUFFIX_LEN)) {
+                    snprintf(icd_library, 1024, "%s" DIRECTORY_SYMBOL "%s", p,dent->d_name);
+                    loader_scanned_icd_add(icd_library);
+                 }
+              }
 
              dent = readdir(sysdir);
           }
@@ -398,14 +455,9 @@
        }
     }
 
-
     loader.icds_scanned = true;
 }
 
-#ifndef DEFAULT_XGL_LAYERS_PATH
-// TODO: Are these good default locations?
-#define DEFAULT_XGL_LAYERS_PATH ".:/usr/lib/i386-linux-gnu/xgl:/usr/lib/x86_64-linux-gnu/xgl"
-#endif
 
 static void layer_lib_scan_path(const char * libInPaths)
 {
@@ -423,11 +475,15 @@
         p = libInPaths;
     }
     else {
+#if !defined(WIN32)
         if (geteuid() == getuid()) {
+#endif // WIN32
             p = getenv("LIBXGL_LAYERS_PATH");
             if (p != NULL)
                 len = strlen(p);
+#if !defined(WIN32)
         }
+#endif // WIN32
     }
 
     if (len == 0) {
@@ -456,7 +512,7 @@
     loader.scanned_layer_count = 0;
 
     for (p = libPaths; *p; p = next) {
-       next = strchr(p, ':');
+       next = strchr(p, PATH_SEPERATOR);
        if (next == NULL) {
           len = strlen(p);
           next = p + len;
@@ -471,25 +527,37 @@
        if (curdir) {
           dent = readdir(curdir);
           while (dent) {
-             /* look for wrappers starting with "libXGLlayer" */
-             if (!strncmp(dent->d_name, "libXGLLayer", strlen("libXGLLayer"))) {
-                void * handle;
-                snprintf(temp_str, sizeof(temp_str), "%s/%s",p,dent->d_name);
-                if ((handle = dlopen(temp_str, RTLD_LAZY)) == NULL) {
-                    dent = readdir(curdir);
-                    continue;
-                }
-                if (loader.scanned_layer_count == MAX_LAYER_LIBRARIES) {
-                    loader_log(XGL_DBG_MSG_ERROR, 0, "%s ignored: max layer libraries exceed", temp_str);
-                    break;
-                }
-                if ((loader.scanned_layer_names[loader.scanned_layer_count] = malloc(strlen(temp_str) + 1)) == NULL) {
-                     loader_log(XGL_DBG_MSG_ERROR, 0, "%s ignored: out of memory", temp_str);
-                     break;
-                }
-                strcpy(loader.scanned_layer_names[loader.scanned_layer_count], temp_str);
-                loader.scanned_layer_count++;
-                dlclose(handle);
+             /* Look for layers starting with XGL_LAYER_LIBRARY_PREFIX and
+              * ending with XGL_LIBRARY_SUFFIX
+              */
+              if (!strncmp(dent->d_name,
+                          XGL_LAYER_LIBRARY_PREFIX,
+                          XGL_LAYER_LIBRARY_PREFIX_LEN)) {
+                 int nlen = strlen(dent->d_name);
+                 const char *suf = dent->d_name + nlen - XGL_LIBRARY_SUFFIX_LEN;
+                 if ((nlen > XGL_LIBRARY_SUFFIX_LEN) &&
+                     !strncmp(suf,
+                              XGL_LIBRARY_SUFFIX,
+                              XGL_LIBRARY_SUFFIX_LEN)) {
+                     loader_platform_dl_handle handle;
+                     snprintf(temp_str, sizeof(temp_str), "%s" DIRECTORY_SYMBOL "%s",p,dent->d_name);
+                     // Used to call: dlopen(temp_str, RTLD_LAZY)
+                     if ((handle = loader_platform_open_library(temp_str)) == NULL) {
+                         dent = readdir(curdir);
+                         continue;
+                     }
+                     if (loader.scanned_layer_count == MAX_LAYER_LIBRARIES) {
+                         loader_log(XGL_DBG_MSG_ERROR, 0, "%s ignored: max layer libraries exceed", temp_str);
+                         break;
+                     }
+                     if ((loader.scanned_layer_names[loader.scanned_layer_count] = malloc(strlen(temp_str) + 1)) == NULL) {
+                         loader_log(XGL_DBG_MSG_ERROR, 0, "%s ignored: out of memory", temp_str);
+                         break;
+                     }
+                     strcpy(loader.scanned_layer_names[loader.scanned_layer_count], temp_str);
+                     loader.scanned_layer_count++;
+                     loader_platform_close_library(handle);
+                 }
              }
 
              dent = readdir(curdir);
@@ -556,8 +624,9 @@
             obj = &(icd->layer_libs[gpu_index][i]);
             strncpy(obj->name, (char *) pLayerNames[i].layer_name, sizeof(obj->name) - 1);
             obj->name[sizeof(obj->name) - 1] = '\0';
-            if ((obj->lib_handle = dlopen(pLayerNames[i].lib_name, RTLD_LAZY | RTLD_DEEPBIND)) == NULL) {
-                loader_log(XGL_DBG_MSG_ERROR, 0, "Failed to open layer library %s got error %d", pLayerNames[i].lib_name, dlerror());
+            // Used to call: dlopen(pLayerNames[i].lib_name, RTLD_LAZY | RTLD_DEEPBIND)
+            if ((obj->lib_handle = loader_platform_open_library(pLayerNames[i].lib_name)) == NULL) {
+                loader_log(XGL_DBG_MSG_ERROR, 0, loader_platform_open_library_error(pLayerNames[i].lib_name));
                 continue;
             } else {
                 loader_log(XGL_DBG_MSG_UNKNOWN, 0, "Inserting layer %s from library %s", pLayerNames[i].layer_name, pLayerNames[i].lib_name);
@@ -570,7 +639,7 @@
 
 static bool find_layer_name(struct loader_icd *icd, uint32_t gpu_index, const char * layer_name, const char **lib_name)
 {
-    void *handle;
+    loader_platform_dl_handle handle;
     xglEnumerateLayersType fpEnumerateLayers;
     char layer_buf[16][256];
     char * layers[16];
@@ -580,14 +649,17 @@
 
     for (unsigned int j = 0; j < loader.scanned_layer_count; j++) {
         *lib_name = loader.scanned_layer_names[j];
-        if ((handle = dlopen(*lib_name, RTLD_LAZY)) == NULL)
+        // Used to call: dlopen(*lib_name, RTLD_LAZY)
+        if ((handle = loader_platform_open_library(*lib_name)) == NULL)
             continue;
-        if ((fpEnumerateLayers = dlsym(handle, "xglEnumerateLayers")) == NULL) {
-            //use default layer name based on library name libXGLLayer<name>.so
+        if ((fpEnumerateLayers = (xglEnumerateLayersType) loader_platform_get_proc_address(handle, "xglEnumerateLayers")) == NULL) {
             char * lib_str = malloc(strlen(*lib_name) + 1 + strlen(layer_name));
-            snprintf(lib_str, strlen(*lib_name) + strlen(layer_name), "libXGLLayer%s.so", layer_name);
-            dlclose(handle);
-            if (!strcmp(basename(*lib_name), lib_str)) {
+            //use default layer name
+            snprintf(lib_str, strlen(*lib_name) + strlen(layer_name),
+                     XGL_DRIVER_LIBRARY_PREFIX "%s" XGL_LIBRARY_SUFFIX,
+                     layer_name);
+            loader_platform_close_library(handle);
+            if (!strcmp(*lib_name, lib_str)) {
                 free(lib_str);
                 return true;
             }
@@ -598,16 +670,16 @@
         }
         else {
             size_t cnt;
-            fpEnumerateLayers(NULL, 16, 256, &cnt, layers, (void *) icd->gpus + gpu_index);
+            fpEnumerateLayers(NULL, 16, 256, &cnt, layers, (char *) icd->gpus + gpu_index);
             for (unsigned int i = 0; i < cnt; i++) {
                 if (!strcmp((char *) layers[i], layer_name)) {
-                    dlclose(handle);
+                    loader_platform_close_library(handle);
                     return true;
                 }
             }
         }
 
-        dlclose(handle);
+        loader_platform_close_library(handle);
     }
 
     return false;
@@ -630,7 +702,7 @@
 
     while (p && *p && count < MAX_LAYER_LIBRARIES) {
         const char *lib_name = NULL;
-        next = strchr(p, ':');
+        next = strchr(p, PATH_SEPERATOR);
         if (next == NULL) {
             len = strlen(p);
             next = p + len;
@@ -717,7 +789,7 @@
                 for (uint32_t i = 0; i < icd->layer_count[j]; i++) {
                     libs = &(icd->layer_libs[j][i]);
                     if (libs->lib_handle)
-                        dlclose(libs->lib_handle);
+                        loader_platform_close_library(libs->lib_handle);
                     libs->lib_handle = NULL;
                 }
                 if (icd->wrappedGpus[j])
@@ -763,8 +835,8 @@
 
             char funcStr[256];
             snprintf(funcStr, 256, "%sGetProcAddr",icd->layer_libs[gpu_index][i].name);
-            if ((nextGPA = dlsym(icd->layer_libs[gpu_index][i].lib_handle, funcStr)) == NULL)
-                nextGPA = dlsym(icd->layer_libs[gpu_index][i].lib_handle, "xglGetProcAddr");
+            if ((nextGPA = (xglGetProcAddrType) loader_platform_get_proc_address(icd->layer_libs[gpu_index][i].lib_handle, funcStr)) == NULL)
+                nextGPA = (xglGetProcAddrType) loader_platform_get_proc_address(icd->layer_libs[gpu_index][i].lib_handle, "xglGetProcAddr");
             if (!nextGPA) {
                 loader_log(XGL_DBG_MSG_ERROR, 0, "Failed to find xglGetProcAddr in layer %s", icd->layer_libs[gpu_index][i].name);
                 continue;
@@ -803,17 +875,18 @@
         const XGL_ALLOC_CALLBACKS*                  pAllocCb,
         XGL_INSTANCE*                               pInstance)
 {
-    static pthread_once_t once_icd = PTHREAD_ONCE_INIT;
-    static pthread_once_t once_layer = PTHREAD_ONCE_INIT;
+    static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(once_icd);
+    static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(once_layer);
     struct loader_instance *ptr_instance = NULL;
     struct loader_scanned_icds *scanned_icds;
     struct loader_icd *icd;
     XGL_RESULT res;
 
-    pthread_once(&once_icd, loader_icd_scan);
+    /* Scan/discover all ICD libraries in a single-threaded manner */
+    loader_platform_thread_once(&once_icd, loader_icd_scan);
 
-    /* get layer libraries */
-    pthread_once(&once_layer, layer_lib_scan);
+    /* get layer libraries in a single-threaded manner */
+    loader_platform_thread_once(&once_layer, layer_lib_scan);
 
     ptr_instance = (struct loader_instance*) malloc(sizeof(struct loader_instance));
     if (ptr_instance == NULL) {
@@ -967,8 +1040,9 @@
 
 LOADER_EXPORT void * XGLAPI xglGetProcAddr(XGL_PHYSICAL_GPU gpu, const char * pName)
 {
-    if (gpu == NULL)
+    if (gpu == NULL) {
         return NULL;
+    }
     XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) gpu;
     XGL_LAYER_DISPATCH_TABLE * disp_table = * (XGL_LAYER_DISPATCH_TABLE **) gpuw->baseObject;
     void *addr;
@@ -992,7 +1066,7 @@
     uint32_t count = 0;
     char *lib_name;
     struct loader_icd *icd = loader_get_icd((const XGL_BASE_LAYER_OBJECT *) gpu, &gpu_index);
-    void *handle;
+    loader_platform_dl_handle handle;
     xglEnumerateLayersType fpEnumerateLayers;
     char layer_buf[16][256];
     char * layers[16];
@@ -1008,16 +1082,17 @@
 
     for (unsigned int j = 0; j < loader.scanned_layer_count && count < maxLayerCount; j++) {
         lib_name = loader.scanned_layer_names[j];
-        if ((handle = dlopen(lib_name, RTLD_LAZY)) == NULL)
+        // Used to call: dlopen(*lib_name, RTLD_LAZY)
+        if ((handle = loader_platform_open_library(lib_name)) == NULL)
             continue;
-        if ((fpEnumerateLayers = dlsym(handle, "xglEnumerateLayers")) == NULL) {
-            //use default layer name based on library name libXGLLayer<name>.so
+        if ((fpEnumerateLayers = loader_platform_get_proc_address(handle, "xglEnumerateLayers")) == NULL) {
+            //use default layer name based on library name XGL_LAYER_LIBRARY_PREFIX<name>.XGL_LIBRARY_SUFFIX
             char *pEnd, *cpyStr;
             int siz;
-            dlclose(handle);
+            loader_platform_close_library(handle);
             lib_name = basename(lib_name);
             pEnd = strrchr(lib_name, '.');
-            siz = pEnd - lib_name - strlen("libXGLLayer") + 1;
+            siz = pEnd - lib_name - strlen(XGL_LAYER_LIBRARY_PREFIX) + 1;
             if (pEnd == NULL || siz <= 0)
                 continue;
             cpyStr = malloc(siz);
@@ -1025,7 +1100,7 @@
                 free(cpyStr);
                 continue;
             }
-            strncpy(cpyStr, lib_name + strlen("libXGLLayer"), siz);
+            strncpy(cpyStr, lib_name + strlen(XGL_LAYER_LIBRARY_PREFIX), siz);
             cpyStr[siz - 1] = '\0';
             if (siz > maxStringSize)
                 siz = maxStringSize;
@@ -1039,8 +1114,8 @@
             uint32_t n;
             XGL_RESULT res;
             n = (maxStringSize < 256) ? maxStringSize : 256;
-            res = fpEnumerateLayers(NULL, 16, n, &cnt, layers, (void *) icd->gpus + gpu_index);
-            dlclose(handle);
+            res = fpEnumerateLayers(NULL, 16, n, &cnt, layers, (char *) icd->gpus + gpu_index);
+            loader_platform_close_library(handle);
             if (res != XGL_SUCCESS)
                 continue;
             if (cnt + count > maxLayerCount)
diff --git a/loader/loader.h b/loader/loader.h
index 18c05b7..397c5d3 100644
--- a/loader/loader.h
+++ b/loader/loader.h
@@ -30,7 +30,11 @@
 
 #include <xgl.h>
 #include <xglDbg.h>
+#if defined(WIN32)
+// FIXME: NEED WINDOWS EQUIVALENT
+#else // WIN32
 #include <xglWsiX11Ext.h>
+#endif // WIN32
 #include <xglLayer.h>
 #if defined(__GNUC__) && __GNUC__ >= 4
 #  define LOADER_EXPORT __attribute__((visibility("default")))
diff --git a/loader/loader_platform.h b/loader/loader_platform.h
new file mode 100644
index 0000000..a9cd4df
--- /dev/null
+++ b/loader/loader_platform.h
@@ -0,0 +1,238 @@
+/*
+ * XGL
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ * Copyright 2014 Valve Software
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *   Ian Elliott <ian@lunarg.com>
+ */
+
+#ifndef LOADER_PLATFORM_H
+#define LOADER_PLATFORM_H
+
+#if defined(__linux__)
+/* Linux-specific common code: */
+
+// Headers:
+//#define _GNU_SOURCE 1
+// TBD: Are the contents of the following file used?
+#include <unistd.h>
+// Note: The following file is for dynamic loading:
+#include <dlfcn.h>
+#include <pthread.h>
+#include <assert.h>
+
+// C99:
+#define STATIC_INLINE static inline
+
+// Dynamic Loading:
+typedef void * loader_platform_dl_handle;
+static inline loader_platform_dl_handle loader_platform_open_library(const char* libPath)
+{
+    // NOTE: The prior (Linux only) loader code always used RTLD_LAZY.  In one
+    // place, it used RTLD_DEEPBIND.  It probably doesn't hurt to always use
+    // RTLD_DEEPBIND, and so that is what is being done.
+    return dlopen(libPath, RTLD_LAZY | RTLD_DEEPBIND | RTLD_LOCAL);
+}
+static inline char * loader_platform_open_library_error(const char* libPath)
+{
+    return dlerror();
+}
+static inline void loader_platform_close_library(loader_platform_dl_handle library)
+{
+    dlclose(library);
+}
+static inline void * loader_platform_get_proc_address(loader_platform_dl_handle library,
+                                                      const char *name)
+{
+    assert(library);
+    assert(name);
+    return dlsym(library, name);
+}
+static inline char * loader_platform_get_proc_address_error(const char *name)
+{
+    return dlerror();
+}
+
+// Threads:
+typedef pthread_t loader_platform_thread;
+#define LOADER_PLATFORM_THREAD_ONCE_DECLARATION(var) \
+    pthread_once_t var = PTHREAD_ONCE_INIT;
+static inline void loader_platform_thread_once(void *ctl, void (* func) (void))
+{
+    assert(func != NULL);
+    assert(ctl != NULL);
+    pthread_once((pthread_once_t *) ctl, func);
+}
+
+// Thread IDs:
+typedef pthread_t loader_platform_thread_id;
+static inline loader_platform_thread_id loader_platform_get_thread_id()
+{
+    return pthread_self();
+}
+
+// Thread mutex:
+typedef pthread_mutex_t loader_platform_thread_mutex;
+static inline void loader_platform_thread_create_mutex(loader_platform_thread_mutex* pMutex)
+{
+    pthread_mutex_init(pMutex, NULL);
+}
+static inline void loader_platform_thread_lock_mutex(loader_platform_thread_mutex* pMutex)
+{
+    pthread_mutex_lock(pMutex);
+}
+static inline void loader_platform_thread_unlock_mutex(loader_platform_thread_mutex* pMutex)
+{
+    pthread_mutex_unlock(pMutex);
+}
+static inline void loader_platform_thread_delete_mutex(loader_platform_thread_mutex* pMutex)
+{
+    pthread_mutex_destroy(pMutex);
+}
+
+
+#elif defined(_WIN32) // defined(__linux__)
+/* Windows-specific common code: */
+
+// Headers:
+#include <windows.h>
+#include <assert.h>
+#ifdef __cplusplus
+#include <iostream>
+#include <string>
+using namespace std;
+#endif // __cplusplus
+
+// C99:
+// Microsoft didn't implement C99 in Visual Studio; but started adding it with
+// VS2013.  However, VS2013 still didn't have snprintf().  The following is a
+// work-around.
+#define snprintf _snprintf
+#define STATIC_INLINE static
+// Microsoft also doesn't have basename().  Paths are different on Windows, and
+// so this is just a temporary solution in order to get us compiling, so that we
+// can test some scenarios, and develop the correct solution for Windows.
+  // TODO: Develop a better, permanent solution for Windows, to replace this
+  // temporary code:
+static char *basename(char *pathname)
+{
+    char *current, *next;
+
+#define DIRECTORY_SYMBOL '\\'
+
+// TODO/TBD: Do we need to deal with the Windows's ":" character?
+
+    for (current = pathname; *current != '\0'; current = next) {
+        next = strchr(current, DIRECTORY_SYMBOL);
+        if (next == NULL) {
+            // No more DIRECTORY_SYMBOL's so return p:
+            return current;
+        } else {
+            // Point one character past the DIRECTORY_SYMBOL:
+            next++;
+        }
+    }
+}
+
+// Dynamic Loading:
+typedef HMODULE loader_platform_dl_handle;
+static loader_platform_dl_handle loader_platform_open_library(const char* libPath)
+{
+    return LoadLibrary(libPath);
+}
+static char * loader_platform_open_library_error(const char* libPath)
+{
+    static char errorMsg[120];
+    snprintf(errorMsg, 119, "Failed to open dynamic library \"%s\"", libPath);
+    return errorMsg;
+}
+static void loader_platform_close_library(loader_platform_dl_handle library)
+{
+    FreeLibrary(library);
+}
+static void * loader_platform_get_proc_address(loader_platform_dl_handle library,
+                                               const char *name)
+{
+    assert(library);
+    assert(name);
+    return GetProcAddress(library, name);
+}
+static char * loader_platform_get_proc_address_error(const char *name)
+{
+    static char errorMsg[120];
+    snprintf(errorMsg, 119, "Failed to find function \"%s\" in dynamic library", name);
+    return errorMsg;
+}
+
+// Threads:
+typedef HANDLE loader_platform_thread;
+#define LOADER_PLATFORM_THREAD_ONCE_DECLARATION(var) \
+    INIT_ONCE var = INIT_ONCE_STATIC_INIT;
+static void loader_platform_thread_once(void *ctl, void (* func) (void))
+{
+    assert(func != NULL);
+    assert(ctl != NULL);
+    InitOnceExecuteOnce((PINIT_ONCE) ctl, (PINIT_ONCE_FN) func, NULL, NULL);
+}
+
+// Thread IDs:
+typedef DWORD loader_platform_thread_id;
+static loader_platform_thread_id loader_platform_get_thread_id()
+{
+    return GetCurrentThreadId();
+}
+
+// Thread mutex:
+typedef CRITICAL_SECTION loader_platform_thread_mutex;
+static void loader_platform_thread_create_mutex(loader_platform_thread_mutex* pMutex)
+{
+    InitializeCriticalSection(pMutex);
+}
+static void loader_platform_thread_lock_mutex(loader_platform_thread_mutex* pMutex)
+{
+    EnterCriticalSection(pMutex);
+}
+static void loader_platform_thread_unlock_mutex(loader_platform_thread_mutex* pMutex)
+{
+    LeaveCriticalSection(pMutex);
+}
+static void loader_platform_thread_delete_mutex(loader_platform_thread_mutex* pMutex)
+{
+    DeleteCriticalSection(pMutex);
+}
+
+#else // defined(_WIN32)
+
+#error The "loader_platform.h" file must be modified for this OS.
+
+// NOTE: In order to support another OS, an #elif needs to be added (above the
+// "#else // defined(_WIN32)") for that OS, and OS-specific versions of the
+// contents of this file must be created.
+
+// NOTE: Other OS-specific changes are also needed for this OS.  Search for
+// files with "WIN32" in it, as a quick way to find files that must be changed.
+
+#endif // defined(_WIN32)
+
+#endif /* LOADER_PLATFORM_H */
diff --git a/xgl-generate.py b/xgl-generate.py
index 2c5b7db..2fe51b0 100755
--- a/xgl-generate.py
+++ b/xgl-generate.py
@@ -119,6 +119,8 @@
         for proto in self.protos:
             if not self._is_dispatchable(proto):
                 continue
+            if 'WsiX11AssociateConnection' == proto.name:
+                funcs.append("#if !defined(_WIN32)")
             decl = proto.c_func(prefix="xgl", attr="XGLAPI")
             stmt = "(*disp)->%s" % proto.c_call()
             if proto.name == "CreateDevice":
@@ -196,6 +198,7 @@
                          "    %s;\n"
                              "}" % (qual, decl, proto.params[0].name, proto.params[0].name, stmt))
 
+        funcs.append("#endif")
         return "\n\n".join(funcs)
 
     def generate_body(self):
@@ -215,20 +218,24 @@
     def generate_header(self):
         return "\n".join(["#include <xgl.h>",
                           "#include <xglLayer.h>",
-                          "#include <string.h>"])
+                          "#include <string.h>",
+                          "#include \"loader_platform.h\""])
 
     def _generate_init(self):
         stmts = []
         for proto in self.protos:
+            if 'WsiX11AssociateConnection' == proto.name:
+                stmts.append("#if !defined(_WIN32)")
             if proto.name == "GetProcAddr":
                 stmts.append("table->%s = gpa; /* direct assignment */" %
                         proto.name)
             else:
                 stmts.append("table->%s = (xgl%sType) gpa(gpu, \"xgl%s\");" %
                         (proto.name, proto.name, proto.name))
+        stmts.append("#endif")
 
         func = []
-        func.append("static inline void %s_initialize_dispatch_table(XGL_LAYER_DISPATCH_TABLE *table,"
+        func.append("STATIC_INLINE void %s_initialize_dispatch_table(XGL_LAYER_DISPATCH_TABLE *table,"
                 % self.prefix)
         func.append("%s                                              xglGetProcAddrType gpa,"
                 % (" " * len(self.prefix)))
@@ -243,12 +250,15 @@
     def _generate_lookup(self):
         lookups = []
         for proto in self.protos:
+            if 'WsiX11AssociateConnection' == proto.name:
+                lookups.append("#if !defined(_WIN32)")
             lookups.append("if (!strcmp(name, \"%s\"))" % (proto.name))
             lookups.append("    return (void *) table->%s;"
                     % (proto.name))
+        lookups.append("#endif")
 
         func = []
-        func.append("static inline void *%s_lookup_dispatch_table(const XGL_LAYER_DISPATCH_TABLE *table,"
+        func.append("STATIC_INLINE void *%s_lookup_dispatch_table(const XGL_LAYER_DISPATCH_TABLE *table,"
                 % self.prefix)
         func.append("%s                                           const char *name)"
                 % (" " * len(self.prefix)))
@@ -316,10 +326,13 @@
 
         lookups = []
         for proto in self.protos:
+            if 'WsiX11AssociateConnection' == proto.name:
+                lookups.append("#if !defined(_WIN32)")
             lookups.append("if (!strcmp(%s, \"%s\"))" %
                     (gpa_pname, proto.name))
             lookups.append("    return (%s) %s%s;" %
                     (gpa_proto.ret, self.prefix, proto.name))
+        lookups.append("#endif")
 
         body = []
         body.append("%s %s" % (self.qual, gpa_decl))
@@ -358,12 +371,15 @@
                 lookups.append("/* no %s%s */" % (self.prefix, proto.name))
                 continue
 
+            if 'WsiX11AssociateConnection' == proto.name:
+                lookups.append("#if !defined(_WIN32)")
             lookups.append("if (!strcmp(name, \"%s\"))" % proto.name)
             lookups.append("    return (%s) %s%s;" %
                     (self.gpa.ret, self.prefix, proto.name))
+        lookups.append("#endif")
 
         body = []
-        body.append("static inline %s layer_intercept_proc(const char *name)" %
+        body.append("STATIC_INLINE %s layer_intercept_proc(const char *name)" %
                 self.gpa.ret)
         body.append("{")
         body.append(generate_get_proc_addr_check("name"))
diff --git a/xgl-layer-generate.py b/xgl-layer-generate.py
index 73bf0e9..10f7489 100755
--- a/xgl-layer-generate.py
+++ b/xgl-layer-generate.py
@@ -232,6 +232,8 @@
                     if proto.ret != "void":
                         ret_val = "XGL_RESULT result = "
                         stmt = "    return result;\n"
+                    if 'WsiX11AssociateConnection' == proto.name:
+                        funcs.append("#if !defined(_WIN32)")
                     if proto.name == "EnumerateLayers":
                         c_call = proto.c_call().replace("(" + proto.params[0].name, "((XGL_PHYSICAL_GPU)gpuw->nextObject", 1)
                         funcs.append('%s%s\n'
@@ -242,7 +244,7 @@
                                  '        sprintf(str, "At start of layered %s\\n");\n'
                                  '        layerCbMsg(XGL_DBG_MSG_UNKNOWN, XGL_VALIDATION_LEVEL_0, gpu, 0, 0, (char *) "GENERIC", (char *) str);\n'
                                  '        pCurObj = gpuw;\n'
-                                 '        pthread_once(&tabOnce, initLayerTable);\n'
+                                 '        loader_platform_thread_once(&tabOnce, initLayerTable);\n'
                                  '        %snextTable.%s;\n'
                                  '        sprintf(str, "Completed layered %s\\n");\n'
                                  '        layerCbMsg(XGL_DBG_MSG_UNKNOWN, XGL_VALIDATION_LEVEL_0, gpu, 0, 0, (char *) "GENERIC", (char *) str);\n'
@@ -276,13 +278,15 @@
                                  '    sprintf(str, "At start of layered %s\\n");\n'
                                  '    layerCbMsg(XGL_DBG_MSG_UNKNOWN, XGL_VALIDATION_LEVEL_0, gpuw, 0, 0, (char *) "GENERIC", (char *) str);\n'
                                  '    pCurObj = gpuw;\n'
-                                 '    pthread_once(&tabOnce, initLayerTable);\n'
+                                 '    loader_platform_thread_once(&tabOnce, initLayerTable);\n'
                                  '    %snextTable.%s;\n'
                                  '    sprintf(str, "Completed layered %s\\n");\n'
                                  '    layerCbMsg(XGL_DBG_MSG_UNKNOWN, XGL_VALIDATION_LEVEL_0, gpuw, 0, 0, (char *) "GENERIC", (char *) str);\n'
                                  '    fflush(stdout);\n'
                                  '%s'
                                  '}' % (qual, decl, proto.params[0].name, proto.name, ret_val, c_call, proto.name, stmt))
+                    if 'WsiX11QueuePresent' == proto.name:
+                        funcs.append("#endif")
                 elif "APIDumpCpp" in layer:
                     decl = proto.c_func(prefix="xgl", attr="XGLAPI")
                     param0_name = proto.params[0].name
@@ -303,13 +307,13 @@
                         file_mode = "a"
                         if 'CreateDevice' in proto.name:
                             file_mode = "w"
-                        f_open = 'pthread_mutex_lock( &file_lock );\n    pOutFile = fopen(outFileName, "%s");\n    ' % (file_mode)
+                        f_open = 'loader_platform_thread_lock_mutex(&printLock);\n    pOutFile = fopen(outFileName, "%s");\n    ' % (file_mode)
                         log_func = 'fprintf(pOutFile, "t{%%u} xgl%s(' % proto.name
-                        f_close = '\n    fclose(pOutFile);\n    pthread_mutex_unlock( &file_lock );'
+                        f_close = '\n    fclose(pOutFile);\n    loader_platform_thread_unlock_mutex(&printLock);'
                     else:
-                        f_open = 'pthread_mutex_lock( &print_lock );\n    '
+                        f_open = 'loader_platform_thread_lock_mutex(&printLock);\n    '
                         log_func = 'cout << "t{" << getTIDIndex() << "} xgl%s(' % proto.name
-                        f_close = '\n    pthread_mutex_unlock( &print_lock );'
+                        f_close = '\n    loader_platform_thread_unlock_mutex(&printLock);'
                     pindex = 0
                     prev_count_name = ''
                     for p in proto.params:
@@ -405,6 +409,8 @@
                                         log_func += '\n        cout << "   %s[" << i << "] (" << %s%s[i] << ")" << endl << tmp_str << endl;' % (proto.params[sp_index].name, print_cast, proto.params[sp_index].name)
                                     #log_func += '\n        fflush(stdout);'
                                 log_func += '\n    }'
+                    if 'WsiX11AssociateConnection' == proto.name:
+                        funcs.append("#if !defined(_WIN32)")
                     if proto.name == "EnumerateLayers":
                         c_call = proto.c_call().replace("(" + proto.params[0].name, "((XGL_PHYSICAL_GPU)gpuw->nextObject", 1)
                         funcs.append('%s%s\n'
@@ -412,7 +418,7 @@
                                  '    if (gpu != NULL) {\n'
                                  '        XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) %s;\n'
                                  '        pCurObj = gpuw;\n'
-                                 '        pthread_once(&tabOnce, initLayerTable);\n'
+                                 '        loader_platform_thread_once(&tabOnce, initLayerTable);\n'
                                  '        %snextTable.%s;\n'
                                  '        %s    %s    %s\n'
                                  '    %s'
@@ -438,11 +444,13 @@
                                  '{\n'
                                  '    XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) %s;\n'
                                  '    pCurObj = gpuw;\n'
-                                 '    pthread_once(&tabOnce, initLayerTable);\n'
+                                 '    loader_platform_thread_once(&tabOnce, initLayerTable);\n'
                                  '    %snextTable.%s;\n'
                                  '    %s%s%s\n'
                                  '%s'
                                  '}' % (qual, decl, proto.params[0].name, ret_val, c_call, f_open, log_func, f_close, stmt))
+                    if 'WsiX11QueuePresent' == proto.name:
+                        funcs.append("#endif")
                 elif "APIDump" in layer:
                     decl = proto.c_func(prefix="xgl", attr="XGLAPI")
                     param0_name = proto.params[0].name
@@ -463,13 +471,13 @@
                         file_mode = "a"
                         if 'CreateDevice' in proto.name:
                             file_mode = "w"
-                        f_open = 'pthread_mutex_lock( &file_lock );\n    pOutFile = fopen(outFileName, "%s");\n    ' % (file_mode)
+                        f_open = 'loader_platform_thread_lock_mutex(&printLock);\n    pOutFile = fopen(outFileName, "%s");\n    ' % (file_mode)
                         log_func = 'fprintf(pOutFile, "t{%%u} xgl%s(' % proto.name
-                        f_close = '\n    fclose(pOutFile);\n    pthread_mutex_unlock( &file_lock );'
+                        f_close = '\n    fclose(pOutFile);\n    loader_platform_thread_unlock_mutex(&printLock);'
                     else:
-                        f_open = 'pthread_mutex_lock( &print_lock );\n    '
+                        f_open = 'loader_platform_thread_lock_mutex(&printLock);\n    '
                         log_func = 'printf("t{%%u} xgl%s(' % proto.name
-                        f_close = '\n    pthread_mutex_unlock( &print_lock );'
+                        f_close = '\n    loader_platform_thread_unlock_mutex(&printLock);'
                     print_vals = ', getTIDIndex()'
                     pindex = 0
                     prev_count_name = ''
@@ -550,6 +558,8 @@
                                         log_func += '\n        printf("   %s[%%i] (%%p)\\n%%s\\n", i, (void*)%s, pTmpStr);' % (proto.params[sp_index].name, proto.params[sp_index].name)
                                     log_func += '\n        fflush(stdout);'
                                 log_func += '\n        free(pTmpStr);\n    }'
+                    if 'WsiX11AssociateConnection' == proto.name:
+                        funcs.append("#if !defined(_WIN32)")
                     if proto.name == "EnumerateLayers":
                         c_call = proto.c_call().replace("(" + proto.params[0].name, "((XGL_PHYSICAL_GPU)gpuw->nextObject", 1)
                         funcs.append('%s%s\n'
@@ -557,7 +567,7 @@
                                  '    if (gpu != NULL) {\n'
                                  '        XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) %s;\n'
                                  '        pCurObj = gpuw;\n'
-                                 '        pthread_once(&tabOnce, initLayerTable);\n'
+                                 '        loader_platform_thread_once(&tabOnce, initLayerTable);\n'
                                  '        %snextTable.%s;\n'
                                  '        %s    %s    %s\n'
                                  '    %s'
@@ -583,11 +593,13 @@
                                  '{\n'
                                  '    XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) %s;\n'
                                  '    pCurObj = gpuw;\n'
-                                 '    pthread_once(&tabOnce, initLayerTable);\n'
+                                 '    loader_platform_thread_once(&tabOnce, initLayerTable);\n'
                                  '    %snextTable.%s;\n'
                                  '    %s%s%s\n'
                                  '%s'
                                  '}' % (qual, decl, proto.params[0].name, ret_val, c_call, f_open, log_func, f_close, stmt))
+                    if 'WsiX11QueuePresent' == proto.name:
+                        funcs.append("#endif")
                 elif "ObjectTracker" == layer:
                     obj_type_mapping = {base_t : base_t.replace("XGL_", "XGL_OBJECT_TYPE_") for base_t in xgl.object_type_list}
                     # For the various "super-types" we have to use function to distinguish sub type
@@ -665,6 +677,8 @@
                     if proto.ret != "void":
                         ret_val = "XGL_RESULT result = "
                         stmt = "    return result;\n"
+                    if 'WsiX11AssociateConnection' == proto.name:
+                        funcs.append("#if !defined(_WIN32)")
                     if proto.name == "EnumerateLayers":
                         c_call = proto.c_call().replace("(" + proto.params[0].name, "((XGL_PHYSICAL_GPU)gpuw->nextObject", 1)
                         funcs.append('%s%s\n'
@@ -673,7 +687,7 @@
                                  '        XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) %s;\n'
                                  '    %s'
                                  '        pCurObj = gpuw;\n'
-                                 '        pthread_once(&tabOnce, initLayerTable);\n'
+                                 '        loader_platform_thread_once(&tabOnce, initLayerTable);\n'
                                  '        %snextTable.%s;\n'
                                  '    %s%s'
                                  '    %s'
@@ -701,11 +715,13 @@
                                  '    XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) %s;\n'
                                  '%s'
                                  '    pCurObj = gpuw;\n'
-                                 '    pthread_once(&tabOnce, initLayerTable);\n'
+                                 '    loader_platform_thread_once(&tabOnce, initLayerTable);\n'
                                  '    %snextTable.%s;\n'
                                  '%s%s'
                                  '%s'
                                  '}' % (qual, decl, proto.params[0].name, using_line, ret_val, c_call, create_line, destroy_line, stmt))
+                    if 'WsiX11QueuePresent' == proto.name:
+                        funcs.append("#endif")
                 elif "ParamChecker" == layer:
                     # TODO : Need to fix up the non-else cases below to do param checking as well
                     decl = proto.c_func(prefix="xgl", attr="XGLAPI")
@@ -762,6 +778,8 @@
                     if proto.ret != "void":
                         ret_val = "XGL_RESULT result = "
                         stmt = "    return result;\n"
+                    if 'WsiX11AssociateConnection' == proto.name:
+                        funcs.append("#if !defined(_WIN32)")
                     if proto.name == "EnumerateLayers":
                         c_call = proto.c_call().replace("(" + proto.params[0].name, "((XGL_PHYSICAL_GPU)gpuw->nextObject", 1)
                         funcs.append('%s%s\n'
@@ -772,7 +790,7 @@
                                  '        sprintf(str, "At start of layered %s\\n");\n'
                                  '        layerCbMsg(XGL_DBG_MSG_UNKNOWN, XGL_VALIDATION_LEVEL_0, gpu, 0, 0, "PARAMCHECK", str);\n'
                                  '        pCurObj = gpuw;\n'
-                                 '        pthread_once(&tabOnce, initLayerTable);\n'
+                                 '        loader_platform_thread_once(&tabOnce, initLayerTable);\n'
                                  '        %snextTable.%s;\n'
                                  '        sprintf(str, "Completed layered %s\\n");\n'
                                  '        layerCbMsg(XGL_DBG_MSG_UNKNOWN, XGL_VALIDATION_LEVEL_0, gpu, 0, 0, "PARAMCHECK", str);\n'
@@ -804,11 +822,13 @@
                                  '{\n'
                                  '    XGL_BASE_LAYER_OBJECT* gpuw = (XGL_BASE_LAYER_OBJECT *) %s;\n'
                                  '    pCurObj = gpuw;\n'
-                                 '    pthread_once(&tabOnce, initLayerTable);\n'
+                                 '    loader_platform_thread_once(&tabOnce, initLayerTable);\n'
                                  '%s\n'
                                  '    %snextTable.%s;\n'
                                  '%s'
                                  '}' % (qual, decl, proto.params[0].name, "\n".join(param_checks), ret_val, c_call, stmt))
+                    if 'WsiX11QueuePresent' == proto.name:
+                        funcs.append("#endif")
 
         return "\n\n".join(funcs)
 
@@ -856,7 +876,7 @@
                          "    if (gpu == NULL)\n"
                          "        return NULL;\n"
                          "    pCurObj = gpuw;\n"
-                         "    pthread_once(&tabOnce, initLayerTable);\n\n"
+                         "    loader_platform_thread_once(&tabOnce, initLayerTable);\n\n"
                          "    addr = layer_intercept_proc(funcName);\n"
                          "    if (addr)\n"
                          "        return addr;")
@@ -885,6 +905,24 @@
         func_body.append("}\n")
         return "\n".join(func_body)
 
+    def _generate_layer_dispatch_table_with_lock(self, prefix='xgl'):
+        func_body = ["#include \"xgl_dispatch_table_helper.h\""]
+        func_body.append('static void initLayerTable()\n'
+                         '{\n'
+                         '    xglGetProcAddrType fpNextGPA;\n'
+                         '    fpNextGPA = pCurObj->pGPA;\n'
+                         '    assert(fpNextGPA);\n');
+
+        func_body.append("    layer_initialize_dispatch_table(&nextTable, fpNextGPA, (XGL_PHYSICAL_GPU) pCurObj->nextObject);\n")
+        func_body.append("    if (!printLockInitialized)")
+        func_body.append("    {")
+        func_body.append("        // TODO/TBD: Need to delete this mutex sometime.  How???")
+        func_body.append("        loader_platform_thread_create_mutex(&printLock);")
+        func_body.append("        printLockInitialized = 1;")
+        func_body.append("    }")
+        func_body.append("}\n")
+        return "\n".join(func_body)
+
 class LayerFuncsSubcommand(Subcommand):
     def generate_header(self):
         return '#include <xglLayer.h>\n#include "loader.h"'
@@ -901,7 +939,7 @@
 
 class GenericLayerSubcommand(Subcommand):
     def generate_header(self):
-        return '#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <assert.h>\n#include <pthread.h>\n#include "xglLayer.h"\n\nstatic XGL_LAYER_DISPATCH_TABLE nextTable;\nstatic XGL_BASE_LAYER_OBJECT *pCurObj;\nstatic pthread_once_t tabOnce = PTHREAD_ONCE_INIT;\n'
+        return '#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include "loader_platform.h"\n#include "xglLayer.h"\n\nstatic XGL_LAYER_DISPATCH_TABLE nextTable;\nstatic XGL_BASE_LAYER_OBJECT *pCurObj;\n\nstatic LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabOnce);'
 
     def generate_body(self):
         body = [self._gen_layer_dbg_callback_header(),
@@ -914,20 +952,27 @@
 class ApiDumpSubcommand(Subcommand):
     def generate_header(self):
         header_txt = []
-        header_txt.append('#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <assert.h>\n#include <pthread.h>\n#include "xglLayer.h"\n#include "xgl_struct_string_helper.h"\n\nstatic XGL_LAYER_DISPATCH_TABLE nextTable;\nstatic XGL_BASE_LAYER_OBJECT *pCurObj;\nstatic pthread_once_t tabOnce = PTHREAD_ONCE_INIT;\npthread_mutex_t print_lock = PTHREAD_MUTEX_INITIALIZER;\n')
+        header_txt.append('#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>')
+        header_txt.append('#include "loader_platform.h"')
+        header_txt.append('#include "xglLayer.h"\n#include "xgl_struct_string_helper.h"\n')
+        header_txt.append('static XGL_LAYER_DISPATCH_TABLE nextTable;')
+        header_txt.append('static XGL_BASE_LAYER_OBJECT *pCurObj;\n')
+        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabOnce);')
+        header_txt.append('static int printLockInitialized = 0;')
+        header_txt.append('static loader_platform_thread_mutex printLock;\n')
         header_txt.append('#define MAX_TID 513')
-        header_txt.append('static pthread_t tidMapping[MAX_TID] = {0};')
+        header_txt.append('static loader_platform_thread_id tidMapping[MAX_TID] = {0};')
         header_txt.append('static uint32_t maxTID = 0;')
         header_txt.append('// Map actual TID to an index value and return that index')
         header_txt.append('//  This keeps TIDs in range from 0-MAX_TID and simplifies compares between runs')
         header_txt.append('static uint32_t getTIDIndex() {')
-        header_txt.append('    pthread_t tid = pthread_self();')
+        header_txt.append('    loader_platform_thread_id tid = loader_platform_get_thread_id();')
         header_txt.append('    for (uint32_t i = 0; i < maxTID; i++) {')
         header_txt.append('        if (tid == tidMapping[i])')
         header_txt.append('            return i;')
         header_txt.append('    }')
         header_txt.append("    // Don't yet have mapping, set it and return newly set index")
-        header_txt.append('    uint32_t retVal = (uint32_t)maxTID;')
+        header_txt.append('    uint32_t retVal = (uint32_t) maxTID;')
         header_txt.append('    tidMapping[maxTID++] = tid;')
         header_txt.append('    assert(maxTID < MAX_TID);')
         header_txt.append('    return retVal;')
@@ -935,7 +980,7 @@
         return "\n".join(header_txt)
 
     def generate_body(self):
-        body = [self._generate_layer_dispatch_table(),
+        body = [self._generate_layer_dispatch_table_with_lock(),
                 self._generate_dispatch_entrypoints("XGL_LAYER_EXPORT", "APIDump"),
                 self._generate_layer_gpa_function()]
 
@@ -944,20 +989,27 @@
 class ApiDumpCppSubcommand(Subcommand):
     def generate_header(self):
         header_txt = []
-        header_txt.append('#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <assert.h>\n#include <pthread.h>\n#include "xglLayer.h"\n#include "xgl_struct_string_helper_cpp.h"\n\nstatic XGL_LAYER_DISPATCH_TABLE nextTable;\nstatic XGL_BASE_LAYER_OBJECT *pCurObj;\nstatic pthread_once_t tabOnce = PTHREAD_ONCE_INIT;\npthread_mutex_t print_lock = PTHREAD_MUTEX_INITIALIZER;\n')
+        header_txt.append('#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>')
+        header_txt.append('#include "loader_platform.h"')
+        header_txt.append('#include "xglLayer.h"\n#include "xgl_struct_string_helper_cpp.h"\n')
+        header_txt.append('static XGL_LAYER_DISPATCH_TABLE nextTable;')
+        header_txt.append('static XGL_BASE_LAYER_OBJECT *pCurObj;\n')
+        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabOnce);')
+        header_txt.append('static int printLockInitialized = 0;')
+        header_txt.append('static loader_platform_thread_mutex printLock;\n')
         header_txt.append('#define MAX_TID 513')
-        header_txt.append('static pthread_t tidMapping[MAX_TID] = {0};')
+        header_txt.append('static loader_platform_thread_id tidMapping[MAX_TID] = {0};')
         header_txt.append('static uint32_t maxTID = 0;')
         header_txt.append('// Map actual TID to an index value and return that index')
         header_txt.append('//  This keeps TIDs in range from 0-MAX_TID and simplifies compares between runs')
         header_txt.append('static uint32_t getTIDIndex() {')
-        header_txt.append('    pthread_t tid = pthread_self();')
+        header_txt.append('    loader_platform_thread_id tid = loader_platform_get_thread_id();')
         header_txt.append('    for (uint32_t i = 0; i < maxTID; i++) {')
         header_txt.append('        if (tid == tidMapping[i])')
         header_txt.append('            return i;')
         header_txt.append('    }')
         header_txt.append("    // Don't yet have mapping, set it and return newly set index")
-        header_txt.append('    uint32_t retVal = (uint32_t)maxTID;')
+        header_txt.append('    uint32_t retVal = (uint32_t) maxTID;')
         header_txt.append('    tidMapping[maxTID++] = tid;')
         header_txt.append('    assert(maxTID < MAX_TID);')
         header_txt.append('    return retVal;')
@@ -965,7 +1017,7 @@
         return "\n".join(header_txt)
 
     def generate_body(self):
-        body = [self._generate_layer_dispatch_table(),
+        body = [self._generate_layer_dispatch_table_with_lock(),
                 self._generate_dispatch_entrypoints("XGL_LAYER_EXPORT", "APIDumpCpp"),
                 self._generate_layer_gpa_function()]
 
@@ -974,28 +1026,36 @@
 class ApiDumpFileSubcommand(Subcommand):
     def generate_header(self):
         header_txt = []
-        header_txt.append('#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <assert.h>\n#include <pthread.h>\n#include "xglLayer.h"\n#include "xgl_struct_string_helper.h"\n\nstatic XGL_LAYER_DISPATCH_TABLE nextTable;\nstatic XGL_BASE_LAYER_OBJECT *pCurObj;\nstatic pthread_once_t tabOnce = PTHREAD_ONCE_INIT;\n\nstatic FILE* pOutFile;\nstatic char* outFileName = "xgl_apidump.txt";\npthread_mutex_t file_lock = PTHREAD_MUTEX_INITIALIZER;\n')
+        header_txt.append('#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>')
+        header_txt.append('#include "loader_platform.h"')
+        header_txt.append('#include "xglLayer.h"\n#include "xgl_struct_string_helper.h"\n')
+        header_txt.append('static XGL_LAYER_DISPATCH_TABLE nextTable;')
+        header_txt.append('static XGL_BASE_LAYER_OBJECT *pCurObj;\n')
+        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabOnce);')
+        header_txt.append('static int printLockInitialized = 0;')
+        header_txt.append('static loader_platform_thread_mutex printLock;\n')
         header_txt.append('#define MAX_TID 513')
-        header_txt.append('static pthread_t tidMapping[MAX_TID] = {0};')
+        header_txt.append('static loader_platform_thread_id tidMapping[MAX_TID] = {0};')
         header_txt.append('static uint32_t maxTID = 0;')
         header_txt.append('// Map actual TID to an index value and return that index')
         header_txt.append('//  This keeps TIDs in range from 0-MAX_TID and simplifies compares between runs')
         header_txt.append('static uint32_t getTIDIndex() {')
-        header_txt.append('    pthread_t tid = pthread_self();')
+        header_txt.append('    loader_platform_thread_id tid = loader_platform_get_thread_id();')
         header_txt.append('    for (uint32_t i = 0; i < maxTID; i++) {')
         header_txt.append('        if (tid == tidMapping[i])')
         header_txt.append('            return i;')
         header_txt.append('    }')
         header_txt.append("    // Don't yet have mapping, set it and return newly set index")
-        header_txt.append('    uint32_t retVal = (uint32_t)maxTID;')
+        header_txt.append('    uint32_t retVal = (uint32_t) maxTID;')
         header_txt.append('    tidMapping[maxTID++] = tid;')
         header_txt.append('    assert(maxTID < MAX_TID);')
         header_txt.append('    return retVal;')
-        header_txt.append('}')
+        header_txt.append('}\n')
+        header_txt.append('static FILE* pOutFile;\nstatic char* outFileName = "xgl_apidump.txt";')
         return "\n".join(header_txt)
 
     def generate_body(self):
-        body = [self._generate_layer_dispatch_table(),
+        body = [self._generate_layer_dispatch_table_with_lock(),
                 self._generate_dispatch_entrypoints("XGL_LAYER_EXPORT", "APIDumpFile"),
                 self._generate_layer_gpa_function()]
 
@@ -1004,20 +1064,27 @@
 class ApiDumpNoAddrSubcommand(Subcommand):
     def generate_header(self):
         header_txt = []
-        header_txt.append('#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <assert.h>\n#include <pthread.h>\n#include "xglLayer.h"\n#include "xgl_struct_string_helper_no_addr.h"\n\nstatic XGL_LAYER_DISPATCH_TABLE nextTable;\nstatic XGL_BASE_LAYER_OBJECT *pCurObj;\nstatic pthread_once_t tabOnce = PTHREAD_ONCE_INIT;\npthread_mutex_t print_lock = PTHREAD_MUTEX_INITIALIZER;\n')
+        header_txt.append('#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>')
+        header_txt.append('#include "loader_platform.h"')
+        header_txt.append('#include "xglLayer.h"\n#include "xgl_struct_string_helper_no_addr.h"\n')
+        header_txt.append('static XGL_LAYER_DISPATCH_TABLE nextTable;')
+        header_txt.append('static XGL_BASE_LAYER_OBJECT *pCurObj;\n')
+        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabOnce);')
+        header_txt.append('static int printLockInitialized = 0;')
+        header_txt.append('static loader_platform_thread_mutex printLock;\n')
         header_txt.append('#define MAX_TID 513')
-        header_txt.append('static pthread_t tidMapping[MAX_TID] = {0};')
+        header_txt.append('static loader_platform_thread_id tidMapping[MAX_TID] = {0};')
         header_txt.append('static uint32_t maxTID = 0;')
         header_txt.append('// Map actual TID to an index value and return that index')
         header_txt.append('//  This keeps TIDs in range from 0-MAX_TID and simplifies compares between runs')
         header_txt.append('static uint32_t getTIDIndex() {')
-        header_txt.append('    pthread_t tid = pthread_self();')
+        header_txt.append('    loader_platform_thread_id tid = loader_platform_get_thread_id();')
         header_txt.append('    for (uint32_t i = 0; i < maxTID; i++) {')
         header_txt.append('        if (tid == tidMapping[i])')
         header_txt.append('            return i;')
         header_txt.append('    }')
         header_txt.append("    // Don't yet have mapping, set it and return newly set index")
-        header_txt.append('    uint32_t retVal = (uint32_t)maxTID;')
+        header_txt.append('    uint32_t retVal = (uint32_t) maxTID;')
         header_txt.append('    tidMapping[maxTID++] = tid;')
         header_txt.append('    assert(maxTID < MAX_TID);')
         header_txt.append('    return retVal;')
@@ -1025,7 +1092,7 @@
         return "\n".join(header_txt)
 
     def generate_body(self):
-        body = [self._generate_layer_dispatch_table(),
+        body = [self._generate_layer_dispatch_table_with_lock(),
                 self._generate_dispatch_entrypoints("XGL_LAYER_EXPORT", "APIDump", True),
                 self._generate_layer_gpa_function()]
 
@@ -1034,20 +1101,27 @@
 class ApiDumpNoAddrCppSubcommand(Subcommand):
     def generate_header(self):
         header_txt = []
-        header_txt.append('#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <assert.h>\n#include <pthread.h>\n#include "xglLayer.h"\n#include "xgl_struct_string_helper_no_addr_cpp.h"\n\nstatic XGL_LAYER_DISPATCH_TABLE nextTable;\nstatic XGL_BASE_LAYER_OBJECT *pCurObj;\nstatic pthread_once_t tabOnce = PTHREAD_ONCE_INIT;\npthread_mutex_t print_lock = PTHREAD_MUTEX_INITIALIZER;\n')
+        header_txt.append('#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>')
+        header_txt.append('#include "loader_platform.h"')
+        header_txt.append('#include "xglLayer.h"\n#include "xgl_struct_string_helper_no_addr_cpp.h"\n')
+        header_txt.append('static XGL_LAYER_DISPATCH_TABLE nextTable;')
+        header_txt.append('static XGL_BASE_LAYER_OBJECT *pCurObj;\n')
+        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabOnce);')
+        header_txt.append('static int printLockInitialized = 0;')
+        header_txt.append('static loader_platform_thread_mutex printLock;\n')
         header_txt.append('#define MAX_TID 513')
-        header_txt.append('static pthread_t tidMapping[MAX_TID] = {0};')
+        header_txt.append('static loader_platform_thread_id tidMapping[MAX_TID] = {0};')
         header_txt.append('static uint32_t maxTID = 0;')
         header_txt.append('// Map actual TID to an index value and return that index')
         header_txt.append('//  This keeps TIDs in range from 0-MAX_TID and simplifies compares between runs')
         header_txt.append('static uint32_t getTIDIndex() {')
-        header_txt.append('    pthread_t tid = pthread_self();')
+        header_txt.append('    loader_platform_thread_id tid = loader_platform_get_thread_id();')
         header_txt.append('    for (uint32_t i = 0; i < maxTID; i++) {')
         header_txt.append('        if (tid == tidMapping[i])')
         header_txt.append('            return i;')
         header_txt.append('    }')
         header_txt.append("    // Don't yet have mapping, set it and return newly set index")
-        header_txt.append('    uint32_t retVal = (uint32_t)maxTID;')
+        header_txt.append('    uint32_t retVal = (uint32_t) maxTID;')
         header_txt.append('    tidMapping[maxTID++] = tid;')
         header_txt.append('    assert(maxTID < MAX_TID);')
         header_txt.append('    return retVal;')
@@ -1055,7 +1129,7 @@
         return "\n".join(header_txt)
 
     def generate_body(self):
-        body = [self._generate_layer_dispatch_table(),
+        body = [self._generate_layer_dispatch_table_with_lock(),
                 self._generate_dispatch_entrypoints("XGL_LAYER_EXPORT", "APIDumpCpp", True),
                 self._generate_layer_gpa_function()]
 
@@ -1064,9 +1138,10 @@
 class ObjectTrackerSubcommand(Subcommand):
     def generate_header(self):
         header_txt = []
-        header_txt.append('#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <assert.h>\n#include <pthread.h>')
+        header_txt.append('#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include "loader_platform.h"')
         header_txt.append('#include "object_track.h"\n\nstatic XGL_LAYER_DISPATCH_TABLE nextTable;\nstatic XGL_BASE_LAYER_OBJECT *pCurObj;')
-        header_txt.append('static pthread_once_t tabOnce = PTHREAD_ONCE_INIT;\nstatic long long unsigned int object_track_index = 0;')
+        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabOnce);')
+        header_txt.append('static long long unsigned int object_track_index = 0;')
         header_txt.append('// Ptr to LL of dbg functions')
         header_txt.append('static XGL_LAYER_DBG_FUNCTION_NODE *pDbgFunctionHead = NULL;')
         header_txt.append('// Utility function to handle reporting')
@@ -1349,7 +1424,7 @@
 
 class ParamCheckerSubcommand(Subcommand):
     def generate_header(self):
-        return '#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <assert.h>\n#include <pthread.h>\n#include "xglLayer.h"\n#include "xgl_enum_validate_helper.h"\n#include "xgl_struct_validate_helper.h"\n\nstatic XGL_LAYER_DISPATCH_TABLE nextTable;\nstatic XGL_BASE_LAYER_OBJECT *pCurObj;\nstatic pthread_once_t tabOnce = PTHREAD_ONCE_INIT;\n'
+        return '#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include "loader_platform.h"\n#include "xglLayer.h"\n#include "xgl_enum_validate_helper.h"\n#include "xgl_struct_validate_helper.h"\n\nstatic XGL_LAYER_DISPATCH_TABLE nextTable;\nstatic XGL_BASE_LAYER_OBJECT *pCurObj;\nstatic LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabOnce);\n\n'
 
     def generate_body(self):
         body = [self._gen_layer_dbg_callback_header(),
diff --git a/xgl-win-def-file-generate.py b/xgl-win-def-file-generate.py
new file mode 100755
index 0000000..56016ee
--- /dev/null
+++ b/xgl-win-def-file-generate.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+#
+# XGL
+#
+# Copyright (C) 2015 LunarG, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+#
+# Authors:
+#   Chia-I Wu <olv@lunarg.com>
+#   Ian Elliott <ian@lunarg.com>
+
+import sys
+
+import xgl
+
+def generate_get_proc_addr_check(name):
+    return "    if (!%s || %s[0] != 'x' || %s[1] != 'g' || %s[2] != 'l')\n" \
+           "        return NULL;" % ((name,) * 4)
+
+class Subcommand(object):
+    def __init__(self, argv):
+        self.argv = argv
+        self.headers = xgl.headers
+        self.protos = xgl.protos
+
+    def run(self):
+        print(self.generate())
+
+    def generate(self):
+        copyright = self.generate_copyright()
+        header = self.generate_header()
+        body = self.generate_body()
+        footer = self.generate_footer()
+
+        contents = []
+        if copyright:
+            contents.append(copyright)
+        if header:
+            contents.append(header)
+        if body:
+            contents.append(body)
+        if footer:
+            contents.append(footer)
+
+        return "\n\n".join(contents)
+
+    def generate_copyright(self):
+        return """; THIS FILE IS GENERATED.  DO NOT EDIT.
+
+;;;; Begin Copyright Notice ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; XGL
+;
+; Copyright (C) 2015 LunarG, Inc.
+;
+; Permission is hereby granted, free of charge, to any person obtaining a
+; copy of this software and associated documentation files (the "Software"),
+; to deal in the Software without restriction, including without limitation
+; the rights to use, copy, modify, merge, publish, distribute, sublicense,
+; and/or sell copies of the Software, and to permit persons to whom the
+; Software is furnished to do so, subject to the following conditions:
+;
+; The above copyright notice and this permission notice shall be included
+; in all copies or substantial portions of the Software.
+;
+; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+; DEALINGS IN THE SOFTWARE.
+;;;;  End Copyright Notice ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; The following is required on Windows, for exporting symbols from the DLL"""
+
+    def generate_header(self):
+        pass
+
+    def generate_body(self):
+        return "LIBRARY " + sys.argv[1]
+
+    def generate_footer(self):
+        return "EXPORTS\n   xglGetProcAddr\n   xglEnumerateLayers"
+
+def main():
+
+    if len(sys.argv) < 1:
+        print("Usage: %s <file>" % sys.argv[0])
+        exit(1)
+
+    subcmd = Subcommand(sys.argv[1:])
+    subcmd.run()
+
+if __name__ == "__main__":
+    main()
diff --git a/xgl_helper.py b/xgl_helper.py
index b509731..c8d294a 100755
--- a/xgl_helper.py
+++ b/xgl_helper.py
@@ -528,6 +528,12 @@
         for s in sorted(self.struct_dict):
             sh_funcs.append('char* %s(const %s* pStruct, const char* prefix);\n' % (self._get_sh_func_name(s), typedef_fwd_dict[s]))
         sh_funcs.append('\n')
+        sh_funcs.append('#if defined(_WIN32)\n')
+        sh_funcs.append('// Microsoft did not implement C99 in Visual Studio; but started adding it with\n')
+        sh_funcs.append('// VS2013.  However, VS2013 still did not have snprintf().  The following is a\n')
+        sh_funcs.append('// work-around.\n')
+        sh_funcs.append('#define snprintf _snprintf\n')
+        sh_funcs.append('#endif // _WIN32\n\n')
         for s in self.struct_dict:
             p_out = ""
             p_args = ""
@@ -1007,7 +1013,7 @@
         body = []
         for bet in self.et_dict:
             fet = self.tf_dict[bet]
-            body.append("static inline uint32_t validate_%s(%s input_value)\n{\n    switch ((%s)input_value)\n    {" % (fet, fet, fet))
+            body.append("STATIC_INLINE uint32_t validate_%s(%s input_value)\n{\n    switch ((%s)input_value)\n    {" % (fet, fet, fet))
             for e in sorted(self.et_dict[bet]):
                 if (self.ev_dict[e]['unique']):
                     body.append('        case %s:' % (e))
@@ -1020,7 +1026,7 @@
             # bet == base_enum_type, fet == final_enum_type
         for bet in self.et_dict:
             fet = self.tf_dict[bet]
-            body.append("static inline const char* string_%s(%s input_value)\n{\n    switch ((%s)input_value)\n    {" % (fet, fet, fet))
+            body.append("STATIC_INLINE const char* string_%s(%s input_value)\n{\n    switch ((%s)input_value)\n    {" % (fet, fet, fet))
             for e in sorted(self.et_dict[bet]):
                 if (self.ev_dict[e]['unique']):
                     body.append('        case %s:\n            return "%s";' % (e, e))
@@ -1028,7 +1034,15 @@
         return "\n".join(body)
     
     def _generateSHHeader(self):
-        return "#pragma once\n\n#include <%s>\n\n" % self.in_file
+        header = []
+        header.append('#pragma once\n')
+        header.append('#include <%s>\n' % self.in_file)
+        header.append('#if defined(_WIN32)')
+        header.append('#define STATIC_INLINE static')
+        header.append('#else  // defined(_WIN32)')
+        header.append('#define STATIC_INLINE static inline')
+        header.append('#endif // defined(_WIN32)\n\n\n')
+        return "\n".join(header)
         
 
 class CMakeGen: