Add function to set an icd's dispatch table for subsequently created objects.

With layers the loaders dispatch table is used instead of the drivers dispatch
table.  The gpu objects were reflecting this change but not subsequent objects.
Tell the icd driver about the loader's dispatch table so it will use it on
object creation rather than the default  driver dispatch table.
diff --git a/loader/loader.c b/loader/loader.c
index 952367f..f994d40 100644
--- a/loader/loader.c
+++ b/loader/loader.c
@@ -40,15 +40,19 @@
 #include <assert.h>
 #include "loader.h"
 
+typedef XGL_VOID (* SetDispatchType)(XGL_LAYER_DISPATCH_TABLE * disp, XGL_BOOL debug);
 
 struct loader_icd {
     void *handle;
 
+   XGL_LAYER_DISPATCH_TABLE *loader_dispatch;
+
     GetProcAddrType GetProcAddr;
     InitAndEnumerateGpusType InitAndEnumerateGpus;
     DbgRegisterMsgCallbackType DbgRegisterMsgCallback;
     DbgUnregisterMsgCallbackType DbgUnregisterMsgCallback;
     DbgSetGlobalOptionType DbgSetGlobalOption;
+    SetDispatchType SetDispatch;
 
     struct loader_icd *next;
 };
@@ -69,7 +73,6 @@
 static struct {
     bool scanned;
     struct loader_icd *icds;
-    XGL_LAYER_DISPATCH_TABLE *loader_dispatch;
     XGL_UINT layer_count;
     bool layer_scaned;
     char layer_dirs[4096];
@@ -217,6 +220,7 @@
     LOOKUP(icd, DbgRegisterMsgCallback);
     LOOKUP(icd, DbgUnregisterMsgCallback);
     LOOKUP(icd, DbgSetGlobalOption);
+    LOOKUP(icd, SetDispatch);
 #undef LOOKUP
 
     return icd;
@@ -485,6 +489,8 @@
 
 static void init_dispatch_table(XGL_LAYER_DISPATCH_TABLE *tab, GetProcAddrType fpGPA, XGL_PHYSICAL_GPU gpu)
 {
+    XGL_BASE_LAYER_OBJECT* wrapped_obj = (XGL_BASE_LAYER_OBJECT*)gpu;
+    gpu = wrapped_obj->nextObject;
     tab->GetProcAddr = fpGPA;
     tab->InitAndEnumerateGpus = fpGPA(gpu, (const XGL_CHAR *) "xglInitAndEnumerateGpus");
     tab->GetGpuInfo = fpGPA(gpu, (const XGL_CHAR *) "xglGetGpuInfo");
@@ -612,14 +618,10 @@
 extern XGL_UINT ActivateLayers(XGL_PHYSICAL_GPU *gpu)
 {
     static bool layer_installed = false;
-    //const struct loader_icd *icd;
+
     /* activate any layer libraries */
     if (loader.layer_count > 0 && !layer_installed) {
 
-        //todo get icd from gpu
-        //icd = loader.icds;  // We are only going to configure the first driver
-        //SetDispatchType IcdSetDispatch = dlsym(icd->handle, "xglSetDispatch");
-
         // TODO For now just assume  all layers scanned will be  activated in the order they were scanned
         XGL_BASE_LAYER_OBJECT *gpuObj = (XGL_BASE_LAYER_OBJECT *) *gpu;
         XGL_BASE_LAYER_OBJECT *nextGpuObj;
@@ -649,9 +651,9 @@
             }
 
             if (i == 0) {
-                //TODO handle multiple icd case
-                init_dispatch_table(loader.loader_dispatch, nextGPA, gpuObj);
-                //IcdSetDispatch(&new_table, true);
+                //TODO handle multiple icd/gpus case, which dispatch table??
+                init_dispatch_table(loader.icds->loader_dispatch, nextGPA, gpuObj);
+                //loader.icds->SetDispatch(loader.icds->loader_dispatch, true);
             }
         }
         *gpu = ((XGL_PHYSICAL_GPU *) gpuObj);
@@ -964,10 +966,19 @@
 LOADER_EXPORT XGL_RESULT XGLAPI xglInitAndEnumerateGpus(const XGL_APPLICATION_INFO* pAppInfo, const XGL_ALLOC_CALLBACKS* pAllocCb, XGL_UINT maxGpus, XGL_UINT* pGpuCount, XGL_PHYSICAL_GPU* pGpus)
 {
     static pthread_once_t once = PTHREAD_ONCE_INIT;
-    const struct loader_icd *icd;
+    struct loader_icd *icd;
     XGL_UINT count = 0;
     XGL_RESULT res;
 
+    // cleanup any prior layer initializations
+    for (icd = loader.icds; icd; icd = icd->next) {
+        //TODO clean up the wrapped gpu structs from here and during layer activation
+        if (icd->loader_dispatch)
+            free(icd->loader_dispatch);
+        icd->loader_dispatch = NULL;
+        icd->SetDispatch(NULL, true);
+    }
+
     pthread_once(&once, loader_icd_scan);
 
     if (!loader.icds)
@@ -987,15 +998,16 @@
         res = icd->InitAndEnumerateGpus(pAppInfo, pAllocCb, max, &n, gpus);
         if (res == XGL_SUCCESS && n) {
             wrappedGpus = (XGL_BASE_LAYER_OBJECT*) malloc(n * sizeof(XGL_BASE_LAYER_OBJECT));
-            loader.loader_dispatch = (XGL_LAYER_DISPATCH_TABLE *) malloc(n * sizeof(XGL_LAYER_DISPATCH_TABLE));
+            icd->loader_dispatch = (XGL_LAYER_DISPATCH_TABLE *) malloc(n * sizeof(XGL_LAYER_DISPATCH_TABLE));
             for (int i = 0; i < n; i++) {
                 (wrappedGpus + i)->baseObject = gpus[i];
-                (wrappedGpus + i)->pGPA = getProcAddr; //loader.loader_dispatch + i; //getProcAddr;
+                (wrappedGpus + i)->pGPA = getProcAddr;
                 (wrappedGpus + i)->nextObject = gpus[i];
                 memcpy(pGpus + count, &wrappedGpus, sizeof(*pGpus));
-                init_dispatch_table(loader.loader_dispatch + i, getProcAddr, wrappedGpus + i);
+                init_dispatch_table(icd->loader_dispatch + i, getProcAddr, wrappedGpus + i);
                 const XGL_LAYER_DISPATCH_TABLE * *disp = (const XGL_LAYER_DISPATCH_TABLE *  *) gpus[i];
-                *disp = loader.loader_dispatch + i;
+                *disp = icd->loader_dispatch + i;
+                icd->SetDispatch(icd->loader_dispatch + i, true);
             }
 
             count += n;
diff --git a/xgl-generate.py b/xgl-generate.py
index 552c84d..2f155d8 100755
--- a/xgl-generate.py
+++ b/xgl-generate.py
@@ -122,7 +122,6 @@
                 if proto.ret != "XGL_VOID":
                     stmt = "return " + stmt
                 if proto.name == "CreateDevice" and qual == "LOADER_EXPORT ":
-                    stmt_cd = "XGL_RESULT res = " + "(*disp)->%s" % proto.c_call()
                     funcs.append("%s%s\n"
                              "{\n"
                              "    ActivateLayers(&%s);\n"
@@ -131,10 +130,7 @@
                              "            (const XGL_LAYER_DISPATCH_TABLE * const *) wrapped_obj->baseObject;\n"
                              "    %s = wrapped_obj->nextObject;\n"
                              "    %s;\n"
-                             "    const XGL_LAYER_DISPATCH_TABLE * *disp_dev = (const XGL_LAYER_DISPATCH_TABLE *  *) *%s;\n"
-                             "    *disp_dev = (const XGL_LAYER_DISPATCH_TABLE *) *disp;\n"
-                             "    return res;\n"
-                             "}" % (qual, decl, proto.params[0].name, proto.params[0].name, proto.params[0].name, stmt_cd, proto.params[2].name))
+                             "}" % (qual, decl, proto.params[0].name, proto.params[0].name, proto.params[0].name, stmt))
                 elif proto.params[0].ty != "XGL_PHYSICAL_GPU":
                     funcs.append("%s%s\n"
                              "{\n"