layers: Add initialization of instance dispatch table

Make the layer init  separate from either device or instance dispatch
table init, since these are done at different times.
diff --git a/layers/basic.cpp b/layers/basic.cpp
index e889e31..9f43c6d 100644
--- a/layers/basic.cpp
+++ b/layers/basic.cpp
@@ -33,6 +33,27 @@
 #include "loader_platform.h"
 
 static std::unordered_map<void *, VkLayerDispatchTable *> tableMap;
+static std::unordered_map<void *, VkLayerInstanceDispatchTable *> tableInstanceMap;
+
+static VkLayerInstanceDispatchTable * initLayerInstanceTable(const VkBaseLayerObject *instancew)
+{
+    VkLayerInstanceDispatchTable *pTable;
+
+    assert(instancew);
+    std::unordered_map<void *, VkLayerInstanceDispatchTable *>::const_iterator it = tableInstanceMap.find((void *) instancew->baseObject);
+    if (it == tableInstanceMap.end())
+    {
+        pTable =  new VkLayerInstanceDispatchTable;
+        tableInstanceMap[(void *) instancew->baseObject] = pTable;
+    } else
+    {
+        return it->second;
+    }
+
+    layer_init_instance_dispatch_table(pTable, (PFN_vkGetInstanceProcAddr) instancew->pGPA, (VkInstance) instancew->nextObject);
+
+    return pTable;
+}
 
 static VkLayerDispatchTable * initLayerTable(const VkBaseLayerObject *gpuw)
 {
@@ -198,7 +219,7 @@
     if (instance == NULL)
         return NULL;
 
-    // TODO initInstanceLayerTable((const VkBaseLayerObject *) instance);
+    initLayerInstanceTable((const VkBaseLayerObject *) instance);
 
     if (!strcmp("vkGetInstanceProcAddr", pName))
         return (void *) vkGetInstanceProcAddr;
diff --git a/layers/draw_state.cpp b/layers/draw_state.cpp
index 95fa1f5..cedf652 100755
--- a/layers/draw_state.cpp
+++ b/layers/draw_state.cpp
@@ -59,8 +59,12 @@
 unordered_map<VkFramebuffer, VkFramebufferCreateInfo*> frameBufferMap;
 
 static VkLayerDispatchTable nextTable;
+static VkLayerInstanceDispatchTable nextInstanceTable;
 static VkBaseLayerObject *pCurObj;
 static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(g_initOnce);
+static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(g_tabDeviceOnce);
+static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(g_tabInstanceOnce);
+
 // TODO : This can be much smarter, using separate locks for separate global data
 static int globalLockInitialized = 0;
 static loader_platform_thread_mutex globalLock;
@@ -1438,6 +1442,23 @@
     }
 }
 
+// TODO handle multiple GPUs/instances for both instance and device dispatch tables
+static void initDeviceTable(void)
+{
+    PFN_vkGetProcAddr fpNextGPA;
+    fpNextGPA = (PFN_vkGetProcAddr) pCurObj->pGPA;
+    assert(fpNextGPA);
+    layer_initialize_dispatch_table(&nextTable, fpNextGPA, (VkPhysicalDevice) pCurObj->nextObject);
+}
+
+static void initInstanceTable(void)
+{
+    PFN_vkGetInstanceProcAddr fpNextGPA;
+    fpNextGPA = (PFN_vkGetInstanceProcAddr) pCurObj->pGPA;
+    assert(fpNextGPA);
+    layer_init_instance_dispatch_table(&nextInstanceTable, fpNextGPA, (VkInstance) pCurObj->nextObject);
+}
+
 static void initDrawState(void)
 {
     const char *strOpt;
@@ -1455,13 +1476,6 @@
         if (g_logFile == NULL)
             g_logFile = stdout;
     }
-    // initialize Layer dispatch table
-    // TODO handle multiple GPUs
-    PFN_vkGetProcAddr fpNextGPA;
-    fpNextGPA = (PFN_vkGetProcAddr) pCurObj->pGPA;
-    assert(fpNextGPA);
-
-    layer_initialize_dispatch_table(&nextTable, fpNextGPA, (VkPhysicalDevice) pCurObj->nextObject);
 
     if (!globalLockInitialized)
     {
@@ -1477,8 +1491,6 @@
 
 VK_LAYER_EXPORT VkResult VKAPI vkCreateDevice(VkPhysicalDevice gpu, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice)
 {
-    pCurObj = (VkBaseLayerObject *) gpu;
-    loader_platform_thread_once(&g_initOnce, initDrawState);
     VkResult result = nextTable.CreateDevice(gpu, pCreateInfo, pDevice);
     return result;
 }
@@ -2732,6 +2744,7 @@
         return NULL;
     pCurObj = gpuw;
     loader_platform_thread_once(&g_initOnce, initDrawState);
+    loader_platform_thread_once(&g_tabDeviceOnce, initDeviceTable);
 
     if (!strcmp(funcName, "vkGetProcAddr"))
         return (void *) vkGetProcAddr;
@@ -2882,9 +2895,9 @@
     if (instance == NULL)
         return NULL;
 
-    //TODO
-    //pCurObj = gpuw;
-    //loader_platform_thread_once(&g_initInstanceOnce, initInstanceDrawState);
+    pCurObj = instw;
+    loader_platform_thread_once(&g_initOnce, initDrawState);
+    loader_platform_thread_once(&g_tabInstanceOnce, initInstanceTable);
 
     if (!strcmp(funcName, "vkGetInstanceProcAddr"))
         return (void *) vkGetInstanceProcAddr;
diff --git a/layers/mem_tracker.cpp b/layers/mem_tracker.cpp
index 2e9307d..20a69c3 100644
--- a/layers/mem_tracker.cpp
+++ b/layers/mem_tracker.cpp
@@ -43,8 +43,12 @@
 #include "layers_msg.h"
 
 static VkLayerDispatchTable nextTable;
+static VkLayerInstanceDispatchTable nextInstanceTable;
 static VkBaseLayerObject *pCurObj;
 static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(g_initOnce);
+static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(g_tabDeviceOnce);
+static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(g_tabInstanceOnce);
+
 // TODO : This can be much smarter, using separate locks for separate global data
 static int globalLockInitialized = 0;
 static loader_platform_thread_mutex globalLock;
@@ -771,6 +775,23 @@
     }
 }
 
+// TODO handle multiple GPUs/instances for both instance and device dispatch tables
+static void initDeviceTable(void)
+{
+    PFN_vkGetProcAddr fpNextGPA;
+    fpNextGPA = (PFN_vkGetProcAddr) pCurObj->pGPA;
+    assert(fpNextGPA);
+    layer_initialize_dispatch_table(&nextTable, fpNextGPA, (VkPhysicalDevice) pCurObj->nextObject);
+}
+
+static void initInstanceTable(void)
+{
+    PFN_vkGetInstanceProcAddr fpNextGPA;
+    fpNextGPA = (PFN_vkGetInstanceProcAddr) pCurObj->pGPA;
+    assert(fpNextGPA);
+    layer_init_instance_dispatch_table(&nextInstanceTable, fpNextGPA, (VkInstance) pCurObj->nextObject);
+}
+
 static void initMemTracker(
     void)
 {
@@ -790,14 +811,6 @@
         }
     }
 
-    // initialize Layer dispatch table
-    // TODO handle multiple GPUs
-    PFN_vkGetProcAddr fpNextGPA;
-    fpNextGPA = (PFN_vkGetProcAddr) pCurObj->pGPA;
-    assert(fpNextGPA);
-
-    layer_initialize_dispatch_table(&nextTable, fpNextGPA, (VkPhysicalDevice) pCurObj->nextObject);
-
     if (!globalLockInitialized)
     {
         // TODO/TBD: Need to delete this mutex sometime.  How???  One
@@ -815,8 +828,6 @@
     const VkDeviceCreateInfo *pCreateInfo,
     VkDevice                 *pDevice)
 {
-    pCurObj = (VkBaseLayerObject *) gpu;
-    loader_platform_thread_once(&g_initOnce, initMemTracker);
     VkResult result = nextTable.CreateDevice(gpu, pCreateInfo, pDevice);
     // Save off device in case we need it to create Fences
     globalDevice = *pDevice;
@@ -925,6 +936,7 @@
     {
         pCurObj = (VkBaseLayerObject *)  gpu;
         loader_platform_thread_once(&g_initOnce, initMemTracker);
+        loader_platform_thread_once(&g_tabDeviceOnce, initDeviceTable);
         VkResult result = nextTable.EnumerateLayers(gpu,
             maxStringSize, pLayerCount, pOutLayers, pReserved);
         return result;
@@ -2149,6 +2161,7 @@
     }
     pCurObj = gpuw;
     loader_platform_thread_once(&g_initOnce, initMemTracker);
+    loader_platform_thread_once(&g_tabDeviceOnce, initDeviceTable);
 
     if (!strcmp(funcName, "vkGetProcAddr"))
         return (void *) vkGetProcAddr;
@@ -2311,9 +2324,10 @@
     if (instance == NULL) {
         return NULL;
     }
-    //TODO
-    //pCurObj = instw;
-    //loader_platform_thread_once(&g_initInstanceOnce, initInstanceMemTracker);
+
+    pCurObj = instw;
+    loader_platform_thread_once(&g_initOnce, initMemTracker);
+    loader_platform_thread_once(&g_tabInstanceOnce, initInstanceTable);
 
     if (!strcmp(funcName, "vkGetProcAddr"))
         return (void *) vkGetProcAddr;
diff --git a/layers/multi.cpp b/layers/multi.cpp
index 78ee302..9732654 100644
--- a/layers/multi.cpp
+++ b/layers/multi.cpp
@@ -35,9 +35,11 @@
 #include "loader_platform.h"
 
 static void initLayerTable(const VkBaseLayerObject *gpuw, VkLayerDispatchTable *pTable, const unsigned int layerNum);
+static void initLayerInstanceTable(const VkBaseLayerObject *instw, VkLayerInstanceDispatchTable *pTable, const unsigned int layerNum);
 
 /******************************** Layer multi1 functions **************************/
 static std::unordered_map<void *, VkLayerDispatchTable *> tableMap1;
+static std::unordered_map<void *, VkLayerInstanceDispatchTable *> tableInstanceMap1;
 static bool layer1_first_activated = false;
 
 static VkLayerDispatchTable * getLayer1Table(const VkBaseLayerObject *gpuw)
@@ -57,6 +59,23 @@
         return it->second;
     }
 }
+static VkLayerInstanceDispatchTable * getLayer1InstanceTable(const VkBaseLayerObject *instw)
+{
+    VkLayerInstanceDispatchTable *pTable;
+
+    assert(instw);
+    std::unordered_map<void *, VkLayerInstanceDispatchTable *>::const_iterator it = tableInstanceMap1.find((void *) instw->baseObject);
+    if (it == tableInstanceMap1.end())
+    {
+        pTable =  new VkLayerInstanceDispatchTable;
+        tableInstanceMap1[(void *) instw->baseObject] = pTable;
+        initLayerInstanceTable(instw, pTable, 1);
+        return pTable;
+    } else
+    {
+        return it->second;
+    }
+}
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -137,14 +156,14 @@
     }
 }
 
-VK_LAYER_EXPORT void * VKAPI multi1InstanceGetProcAddr(VkInstance inst, const char* pName)
+VK_LAYER_EXPORT void * VKAPI multi1GetInstanceProcAddr(VkInstance inst, const char* pName)
 {
     VkBaseLayerObject* instw = (VkBaseLayerObject *) inst;
 
     if (inst == NULL)
         return NULL;
 
-    //TODO getLayer1InstanceTable(instw);
+    getLayer1InstanceTable(instw);
 
     if (!strcmp("vkCreateDevice", pName))
         return (void *) multi1CreateDevice;
@@ -161,8 +180,27 @@
 
 /******************************** Layer multi2 functions **************************/
 static std::unordered_map<void *, VkLayerDispatchTable *> tableMap2;
+static std::unordered_map<void *, VkLayerInstanceDispatchTable *> tableInstanceMap2;
 static bool layer2_first_activated = false;
 
+static VkLayerInstanceDispatchTable * getLayer2InstanceTable(const VkBaseLayerObject *instw)
+{
+    VkLayerInstanceDispatchTable *pTable;
+
+    assert(instw);
+    std::unordered_map<void *, VkLayerInstanceDispatchTable *>::const_iterator it = tableInstanceMap2.find((void *) instw->baseObject);
+    if (it == tableInstanceMap2.end())
+    {
+        pTable =  new VkLayerInstanceDispatchTable;
+        tableInstanceMap2[(void *) instw->baseObject] = pTable;
+        initLayerInstanceTable(instw, pTable, 2);
+        return pTable;
+    } else
+    {
+        return it->second;
+    }
+}
+
 static VkLayerDispatchTable * getLayer2Table(const VkBaseLayerObject *gpuw)
 {
     VkLayerDispatchTable *pTable;
@@ -259,14 +297,14 @@
     }
 }
 
-VK_LAYER_EXPORT void * VKAPI multi2InstanceGetProcAddr(VkInstance inst, const char* pName)
+VK_LAYER_EXPORT void * VKAPI multi2GetInstanceProcAddr(VkInstance inst, const char* pName)
 {
     VkBaseLayerObject* instw = (VkBaseLayerObject *) inst;
 
     if (inst == NULL)
         return NULL;
 
-    //TODO getLayer2InstanceTable(instw);
+    getLayer2InstanceTable(instw);
 
     if (!strcmp("vkCreateDevice", pName))
         return (void *) multi2CreateDevice;
@@ -379,17 +417,17 @@
     else if (!strcmp("vkGetProcAddr", pName))
         return (void *) vkGetProcAddr;
     else if (!strcmp("multi1GetInstanceProcAddr", pName))
-        return (void *) multi1GetProcAddr;
+        return (void *) multi1GetInstanceProcAddr;
     else if (!strcmp("multi2GetInstanceProcAddr", pName))
-        return (void *) multi2GetProcAddr;
+        return (void *) multi2GetInstanceProcAddr;
     else if (!strcmp("vkGetInstanceProcAddr", pName))
         return (void *) vkGetProcAddr;
 
     // use first layer activated as GPA dispatch table activation happens in order
     else if (layer1_first_activated)
-        return multi1InstanceGetProcAddr(inst, pName);
+        return multi1GetInstanceProcAddr(inst, pName);
     else if (layer2_first_activated)
-        return multi2InstanceGetProcAddr(inst, pName);
+        return multi2GetInstanceProcAddr(inst, pName);
     else
         return NULL;
 
@@ -407,3 +445,13 @@
 
     layer_initialize_dispatch_table(pTable, (PFN_vkGetProcAddr) gpuw->pGPA, (VkPhysicalDevice) gpuw->nextObject);
 }
+
+static void initLayerInstanceTable(const VkBaseLayerObject *instw, VkLayerInstanceDispatchTable *pTable, const unsigned int layerNum)
+{
+    if (layerNum == 2 && layer1_first_activated == false)
+        layer2_first_activated = true;
+    if (layerNum == 1 && layer2_first_activated == false)
+        layer1_first_activated = true;
+
+    layer_init_instance_dispatch_table(pTable, (PFN_vkGetInstanceProcAddr) instw->pGPA, (VkInstance) instw->nextObject);
+}
diff --git a/layers/param_checker.cpp b/layers/param_checker.cpp
index a63e4a9..8ed6d28 100644
--- a/layers/param_checker.cpp
+++ b/layers/param_checker.cpp
@@ -41,10 +41,31 @@
 #include "layers_msg.h"
 
 static VkLayerDispatchTable nextTable;
+static VkLayerInstanceDispatchTable nextInstanceTable;
 static VkBaseLayerObject *pCurObj;
-static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabOnce);
+static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(initOnce);
+static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabDeviceOnce);
+static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabInstanceOnce);
 
 #include "vk_dispatch_table_helper.h"
+
+// TODO handle multiple GPUs/instances for both instance and device dispatch tables
+static void initDeviceTable(void)
+{
+    PFN_vkGetProcAddr fpNextGPA;
+    fpNextGPA = (PFN_vkGetProcAddr) pCurObj->pGPA;
+    assert(fpNextGPA);
+    layer_initialize_dispatch_table(&nextTable, fpNextGPA, (VkPhysicalDevice) pCurObj->nextObject);
+}
+
+static void initInstanceTable(void)
+{
+    PFN_vkGetInstanceProcAddr fpNextGPA;
+    fpNextGPA = (PFN_vkGetInstanceProcAddr) pCurObj->pGPA;
+    assert(fpNextGPA);
+    layer_init_instance_dispatch_table(&nextInstanceTable, fpNextGPA, (VkInstance) pCurObj->nextObject);
+}
+
 static void initParamChecker(void)
 {
 
@@ -64,11 +85,6 @@
             g_logFile = stdout;
     }
 
-    PFN_vkGetProcAddr fpNextGPA;
-    fpNextGPA = (PFN_vkGetProcAddr) pCurObj->pGPA;
-    assert(fpNextGPA);
-
-    layer_initialize_dispatch_table(&nextTable, fpNextGPA, (VkPhysicalDevice) pCurObj->nextObject);
 }
 
 void PreCreateInstance(const VkApplicationInfo* pAppInfo, const VkAllocCallbacks* pAllocCb)
@@ -141,8 +157,6 @@
 
 VK_LAYER_EXPORT VkResult VKAPI vkGetPhysicalDeviceInfo(VkPhysicalDevice gpu, VkPhysicalDeviceInfoType infoType, size_t* pDataSize, void* pData)
 {
-    pCurObj = (VkBaseLayerObject *) gpu;
-    loader_platform_thread_once(&tabOnce, initParamChecker);
     char str[1024];
     if (!validate_VkPhysicalDeviceInfoType(infoType)) {
         sprintf(str, "Parameter infoType to function GetPhysicalDeviceInfo has invalid value of %i.", (int)infoType);
@@ -228,8 +242,6 @@
 
 VK_LAYER_EXPORT VkResult VKAPI vkCreateDevice(VkPhysicalDevice gpu, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice)
 {
-    pCurObj = (VkBaseLayerObject *) gpu;
-    loader_platform_thread_once(&tabOnce, initParamChecker);
     PreCreateDevice(gpu, pCreateInfo);
     VkResult result = nextTable.CreateDevice(gpu, pCreateInfo, pDevice);
     PostCreateDevice(result, pDevice);
@@ -313,7 +325,8 @@
         sprintf(str, "At start of layered EnumerateLayers\n");
         layerCbMsg(VK_DBG_MSG_UNKNOWN, VK_VALIDATION_LEVEL_0, nullptr, 0, 0, "PARAMCHECK", str);
         pCurObj = (VkBaseLayerObject *) gpu;
-        loader_platform_thread_once(&tabOnce, initParamChecker);
+        loader_platform_thread_once(&initOnce, initParamChecker);
+        loader_platform_thread_once(&tabDeviceOnce, initDeviceTable);
         VkResult result = nextTable.EnumerateLayers(gpu, maxStringSize, pLayerCount, pOutLayers, pReserved);
         sprintf(str, "Completed layered EnumerateLayers\n");
         layerCbMsg(VK_DBG_MSG_UNKNOWN, VK_VALIDATION_LEVEL_0, nullptr, 0, 0, "PARAMCHECK", str);
@@ -431,8 +444,6 @@
 
 VK_LAYER_EXPORT VkResult VKAPI vkGetMultiDeviceCompatibility(VkPhysicalDevice gpu0, VkPhysicalDevice gpu1, VkPhysicalDeviceCompatibilityInfo* pInfo)
 {
-    pCurObj = (VkBaseLayerObject *) gpu0;
-    loader_platform_thread_once(&tabOnce, initParamChecker);
 
     VkResult result = nextTable.GetMultiDeviceCompatibility(gpu0, gpu1, pInfo);
     return result;
@@ -2250,7 +2261,8 @@
     if (gpu == NULL)
         return NULL;
     pCurObj = gpuw;
-    loader_platform_thread_once(&tabOnce, initParamChecker);
+    loader_platform_thread_once(&initOnce, initParamChecker);
+    loader_platform_thread_once(&tabDeviceOnce, initDeviceTable);
 
     addr = layer_intercept_proc(funcName);
     if (addr)
@@ -2268,8 +2280,9 @@
     void* addr;
     if (inst == NULL)
         return NULL;
-    //TODO pCurObj = instw;
-    //TODO loader_platform_thread_once(&tabOnce, initParamChecker);
+    pCurObj = instw;
+    loader_platform_thread_once(&initOnce, initParamChecker);
+    loader_platform_thread_once(&tabInstanceOnce, initInstanceTable);
 
     addr = layer_intercept_instance_proc(funcName);
     if (addr)
diff --git a/layers/shader_checker.cpp b/layers/shader_checker.cpp
index f36fee3..8de9239 100644
--- a/layers/shader_checker.cpp
+++ b/layers/shader_checker.cpp
@@ -44,6 +44,7 @@
 
 static std::unordered_map<void *, VkLayerDispatchTable *> tableMap;
 static VkBaseLayerObject *pCurObj;
+static std::unordered_map<void *, VkLayerInstanceDispatchTable *> tableInstanceMap;
 static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(g_initOnce);
 // TODO : This can be much smarter, using separate locks for separate global data
 static int globalLockInitialized = 0;
@@ -164,6 +165,25 @@
     return pTable;
 }
 
+static VkLayerInstanceDispatchTable * initLayerInstanceTable(const VkBaseLayerObject *instw)
+{
+    VkLayerInstanceDispatchTable *pTable;
+
+    assert(instw);
+    std::unordered_map<void *, VkLayerInstanceDispatchTable *>::const_iterator it = tableInstanceMap.find((void *) instw->baseObject);
+    if (it == tableInstanceMap.end())
+    {
+        pTable =  new VkLayerInstanceDispatchTable;
+        tableInstanceMap[(void *) instw->baseObject] = pTable;
+    } else
+    {
+        return it->second;
+    }
+
+    layer_init_instance_dispatch_table(pTable, (PFN_vkGetInstanceProcAddr) instw->pGPA, (VkInstance) instw->nextObject);
+
+    return pTable;
+}
 
 VK_LAYER_EXPORT VkResult VKAPI vkCreateDevice(VkPhysicalDevice gpu, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice)
 {
@@ -1029,9 +1049,9 @@
     if (inst == NULL)
         return NULL;
 
-    //TODO initLayerTable((const VkBaseLayerObject *) inst);
+    initLayerInstanceTable((const VkBaseLayerObject *) inst);
 
-    // TODO loader_platform_thread_once(&g_initOnce, initInstanceLayer);
+    loader_platform_thread_once(&g_initOnce, initLayer);
 
 #define ADD_HOOK(fn)    \
     if (!strncmp(#fn, pName, sizeof(#fn))) \
diff --git a/vk-generate.py b/vk-generate.py
index aff0d59..c6f70f6 100755
--- a/vk-generate.py
+++ b/vk-generate.py
@@ -110,22 +110,30 @@
                           "#include <vkLayer.h>",
                           "#include <string.h>"])
 
-    def _generate_init(self):
+    def _generate_init(self, type):
         stmts = []
-        for proto in self.protos:
-            if proto.name != "GetGlobalExtensionInfo":
+        func = []
+        if type == "device":
+            for proto in self.protos:
                 stmts.append("table->%s = (PFN_vk%s) gpa(gpu, \"vk%s\");" %
                         (proto.name, proto.name, proto.name))
-            else:
-                stmts.append("table->%s = vk%s; /* non-dispatchable */" %
-                             (proto.name, proto.name))
-
-        func = []
-        func.append("static inline void %s_initialize_dispatch_table(VkLayerDispatchTable *table,"
+            func.append("static inline void %s_initialize_dispatch_table(VkLayerDispatchTable *table,"
                 % self.prefix)
-        func.append("%s                                              PFN_vkGetProcAddr gpa,"
+            func.append("%s                                              PFN_vkGetProcAddr gpa,"
                 % (" " * len(self.prefix)))
-        func.append("%s                                              VkPhysicalDevice gpu)"
+            func.append("%s                                              VkPhysicalDevice gpu)"
+                % (" " * len(self.prefix)))
+        else:
+            for proto in self.protos:
+                if proto.params[0].ty != "VkInstance" and proto.params[0].ty != "VkPhysicalDevice":
+                    continue
+                stmts.append("table->%s = (PFN_vk%s) gpa(instance, \"vk%s\");" %
+                          (proto.name, proto.name, proto.name))
+            func.append("static inline void %s_init_instance_dispatch_table(VkLayerInstanceDispatchTable *table,"
+                % self.prefix)
+            func.append("%s                                              PFN_vkGetInstanceProcAddr gpa,"
+                % (" " * len(self.prefix)))
+            func.append("%s                                              VkInstance instance)"
                 % (" " * len(self.prefix)))
         func.append("{")
         func.append("    %s" % "\n    ".join(stmts))
@@ -134,7 +142,8 @@
         return "\n".join(func)
 
     def generate_body(self):
-        body = [self._generate_init()]
+        body = [self._generate_init("device"),
+                self._generate_init("instance")]
 
         return "\n\n".join(body)
 
diff --git a/vk-layer-generate.py b/vk-layer-generate.py
index 0311209..0064259 100755
--- a/vk-layer-generate.py
+++ b/vk-layer-generate.py
@@ -411,7 +411,8 @@
                          "    if (gpu == VK_NULL_HANDLE)\n"
                          "        return NULL;\n"
                          "    pCurObj = gpuw;\n"
-                         "    loader_platform_thread_once(&tabOnce, init%s);\n\n"
+                         "    loader_platform_thread_once(&initOnce, init%s);\n\n"
+                         "    loader_platform_thread_once(&tabDeviceOnce, initDeviceTable);\n\n"
                          "    addr = layer_intercept_proc(funcName);\n"
                          "    if (addr)\n"
                          "        return addr;" % self.layer_name)
@@ -423,13 +424,8 @@
                 cpp_prefix = "reinterpret_cast<void*>("
                 cpp_postfix = ")"
             for ext_name in extensions:
-#<<<<<<< HEAD
                 func_body.append('    else if (!strncmp("%s", funcName, sizeof("%s")))\n'
                                  '        return %s%s%s;' % (ext_name, ext_name, cpp_prefix, ext_name, cpp_postfix))
-#=======
-#                func_body.append('    else if (!strcmp("%s", funcName))\n'
-#                                 '        return %s;' % (ext_name, ext_name))
-#>>>>>>> layers: Add GetInstanceProcAddr() to all layers
         func_body.append("    else {\n"
                          "        if (gpuw->pGPA == NULL)\n"
                          "            return NULL;\n"
@@ -442,8 +438,9 @@
                          "    void* addr;\n"
                          "    if (inst == VK_NULL_HANDLE)\n"
                          "        return NULL;\n"
-                         "    // TODO pCurObj = instw;\n"
-                         "    // TODO loader_platform_thread_once(&tabInstanceOnce, initInstance%s);\n\n"
+                         "    pCurObj = instw;\n"
+                         "    loader_platform_thread_once(&initOnce, init%s);\n\n"
+                         "    loader_platform_thread_once(&tabInstanceOnce, initInstanceTable);\n\n"
                          "    addr = layer_intercept_instance_proc(funcName);\n"
                          "    if (addr)\n"
                          "        return addr;" % self.layer_name)
@@ -481,11 +478,7 @@
             func_body.append('            g_logFile = stdout;')
             func_body.append('    }')
             func_body.append('')
-        func_body.append('    PFN_vkGetProcAddr fpNextGPA;\n'
-                         '    fpNextGPA = (PFN_vkGetProcAddr) pCurObj->pGPA;\n'
-                         '    assert(fpNextGPA);\n')
 
-        func_body.append("    layer_initialize_dispatch_table(&nextTable, fpNextGPA, (VkPhysicalDevice) pCurObj->nextObject);")
         if lockname is not None:
             func_body.append("    if (!%sLockInitialized)" % lockname)
             func_body.append("    {")
@@ -496,6 +489,22 @@
             func_body.append("        %sLockInitialized = 1;" % lockname)
             func_body.append("    }")
         func_body.append("}\n")
+        func_body.append('')
+        func_body.append('static void initDeviceTable(void)')
+        func_body.append('{')
+        func_body.append('    PFN_vkGetProcAddr fpNextGPA;')
+        func_body.append('    fpNextGPA = (PFN_vkGetProcAddr) pCurObj->pGPA;')
+        func_body.append('    assert(fpNextGPA);')
+        func_body.append('    layer_initialize_dispatch_table(&nextTable, fpNextGPA, (VkPhysicalDevice) pCurObj->nextObject);')
+        func_body.append('}')
+        func_body.append('')
+        func_body.append('static void initInstanceTable(void)')
+        func_body.append('{')
+        func_body.append('    PFN_vkGetInstanceProcAddr fpNextGPA;')
+        func_body.append('    fpNextGPA = (PFN_vkGetInstanceProcAddr) pCurObj->pGPA;')
+        func_body.append('    assert(fpNextGPA);')
+        func_body.append('    layer_init_instance_dispatch_table(&nextInstanceTable, fpNextGPA, (VkInstance) pCurObj->nextObject);')
+        func_body.append('}')
         return "\n".join(func_body)
 
 class LayerFuncsSubcommand(Subcommand):
@@ -514,7 +523,7 @@
 
 class GenericLayerSubcommand(Subcommand):
     def generate_header(self):
-        return '#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include "loader_platform.h"\n#include "vkLayer.h"\n//The following is #included again to catch certain OS-specific functions being used:\n#include "loader_platform.h"\n\n#include "layers_config.h"\n#include "layers_msg.h"\n\nstatic VkLayerDispatchTable nextTable;\nstatic VkBaseLayerObject *pCurObj;\n\nstatic LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabOnce);'
+        return '#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include "loader_platform.h"\n#include "vkLayer.h"\n//The following is #included again to catch certain OS-specific functions being used:\n#include "loader_platform.h"\n\n#include "layers_config.h"\n#include "layers_msg.h"\n\nstatic VkLayerDispatchTable nextTable;\nstatic VkLayerInstanceDispatchTable nextInstanceTable;\nstatic VkBaseLayerObject *pCurObj;\n\nstatic LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabDeviceOnce);\nstatic LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabInstanceOnce);\nstatic LOADER_PLATFORM_THREAD_ONCE_DECLARATION(initOnce);'
 
     def generate_intercept(self, proto, qual):
         if proto.name in [ 'DbgRegisterMsgCallback', 'DbgUnregisterMsgCallback' , 'GetGlobalExtensionInfo']:
@@ -535,7 +544,8 @@
                      '        sprintf(str, "At start of layered %s\\n");\n'
                      '        layerCbMsg(VK_DBG_MSG_UNKNOWN, VK_VALIDATION_LEVEL_0, gpu, 0, 0, (char *) "GENERIC", (char *) str);\n'
                      '        pCurObj = (VkBaseLayerObject *) gpu;\n'
-                     '        loader_platform_thread_once(&tabOnce, init%s);\n'
+                     '        loader_platform_thread_once(&initOnce, init%s);\n'
+                     '        loader_platform_thread_once(&tabDeviceOnce, initDeviceTable);\n'
                      '        %snextTable.%s;\n'
                      '        sprintf(str, "Completed layered %s\\n");\n'
                      '        layerCbMsg(VK_DBG_MSG_UNKNOWN, VK_VALIDATION_LEVEL_0, gpu, 0, 0, (char *) "GENERIC", (char *) str);\n'
@@ -606,10 +616,13 @@
         header_txt.append('#include "loader_platform.h"')
         header_txt.append('')
         header_txt.append('static VkLayerDispatchTable nextTable;')
+        header_txt.append('static VkLayerInstanceDispatchTable nextInstanceTable;')
         header_txt.append('static VkBaseLayerObject *pCurObj;')
         header_txt.append('static bool g_APIDumpDetailed = true;')
         header_txt.append('')
-        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabOnce);')
+        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabInstanceOnce);')
+        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabDeviceOnce);')
+        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(initOnce);')
         header_txt.append('static int printLockInitialized = 0;')
         header_txt.append('static loader_platform_thread_mutex printLock;')
         header_txt.append('')
@@ -697,11 +710,6 @@
         func_body.append('')
         func_body.append('    ConfigureOutputStream(writeToFile, flushAfterWrite);')
         func_body.append('')
-        func_body.append('    PFN_vkGetProcAddr fpNextGPA;')
-        func_body.append('    fpNextGPA = (PFN_vkGetProcAddr) pCurObj->pGPA;')
-        func_body.append('    assert(fpNextGPA);')
-        func_body.append('    layer_initialize_dispatch_table(&nextTable, fpNextGPA, (VkPhysicalDevice) pCurObj->nextObject);')
-        func_body.append('')
         func_body.append('    if (!printLockInitialized)')
         func_body.append('    {')
         func_body.append('        // TODO/TBD: Need to delete this mutex sometime.  How???')
@@ -710,6 +718,22 @@
         func_body.append('    }')
         func_body.append('}')
         func_body.append('')
+        func_body.append('static void initDeviceTable(void)')
+        func_body.append('{')
+        func_body.append('    PFN_vkGetProcAddr fpNextGPA;')
+        func_body.append('    fpNextGPA = (PFN_vkGetProcAddr) pCurObj->pGPA;')
+        func_body.append('    assert(fpNextGPA);')
+        func_body.append('    layer_initialize_dispatch_table(&nextTable, fpNextGPA, (VkPhysicalDevice) pCurObj->nextObject);')
+        func_body.append('}')
+        func_body.append('')
+        func_body.append('static void initInstanceTable(void)')
+        func_body.append('{')
+        func_body.append('    PFN_vkGetInstanceProcAddr fpNextGPA;')
+        func_body.append('    fpNextGPA = (PFN_vkGetInstanceProcAddr) pCurObj->pGPA;')
+        func_body.append('    assert(fpNextGPA);')
+        func_body.append('    layer_init_instance_dispatch_table(&nextInstanceTable, fpNextGPA, (VkInstance) pCurObj->nextObject);')
+        func_body.append('}')
+        func_body.append('')
         return "\n".join(func_body)
 
     def generate_intercept(self, proto, qual):
@@ -831,7 +855,8 @@
                      '    using namespace StreamControl;\n'
                      '    if (gpu != NULL) {\n'
                      '        pCurObj = (VkBaseLayerObject *) gpu;\n'
-                     '        loader_platform_thread_once(&tabOnce, init%s);\n'
+                     '        loader_platform_thread_once(&initOnce, init%s);\n'
+                     '        loader_platform_thread_once(&tabDeviceOnce, initDeviceTable);\n'
                      '        %snextTable.%s;\n'
                      '        %s    %s    %s\n'
                      '    %s'
@@ -865,14 +890,18 @@
     def generate_header(self):
         header_txt = []
         header_txt.append('#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <inttypes.h>\n#include "loader_platform.h"')
-        header_txt.append('#include "object_track.h"\n\nstatic VkLayerDispatchTable nextTable;\nstatic VkBaseLayerObject *pCurObj;')
+        header_txt.append('#include "object_track.h"\n\n')
         header_txt.append('#include <unordered_map>')
         header_txt.append('using namespace std;')
         header_txt.append('// The following is #included again to catch certain OS-specific functions being used:')
         header_txt.append('#include "loader_platform.h"')
         header_txt.append('#include "layers_config.h"')
         header_txt.append('#include "layers_msg.h"')
-        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabOnce);')
+        header_txt.append('static VkLayerDispatchTable nextTable;\nstatic VkLayerInstanceDispatchTable nextInstanceTable;\n')
+        header_txt.append('static VkBaseLayerObject *pCurObj;')
+        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(initOnce);')
+        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabDeviceOnce);')
+        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabInstanceOnce);')
         header_txt.append('static long long unsigned int object_track_index = 0;')
         header_txt.append('static int objLockInitialized = 0;')
         header_txt.append('static loader_platform_thread_mutex objLock;')
@@ -1226,7 +1255,8 @@
                      '{\n'
                      '    if (gpu != VK_NULL_HANDLE) {\n'
                      '        pCurObj = (VkBaseLayerObject *) gpu;\n'
-                     '        loader_platform_thread_once(&tabOnce, init%s);\n'
+                     '        loader_platform_thread_once(&initOnce, init%s);\n'
+                     '        loader_platform_thread_once(&tabDeviceOnce, initDeviceTable);\n'
                      '        %snextTable.%s;\n'
                      '    %s%s'
                      '    %s'
@@ -1238,7 +1268,7 @@
                      '        strncpy((char *) pOutLayers[0], "%s", maxStringSize);\n'
                      '        return VK_SUCCESS;\n'
                      '    }\n'
-                         '}' % (qual, decl, self.layer_name, ret_val, proto.c_call(), create_line, destroy_line, stmt, self.layer_name))
+                     '}' % (qual, decl, self.layer_name, ret_val, proto.c_call(), create_line, destroy_line, stmt, self.layer_name))
         elif 'GetPhysicalDeviceInfo' in proto.name:
             gpu_state  = '    if (infoType == VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PROPERTIES) {\n'
             gpu_state += '        if (pData != NULL) {\n'
@@ -1249,13 +1279,11 @@
             gpu_state += '    }\n'
             funcs.append('%s%s\n'
                      '{\n'
-                     '    pCurObj = (VkBaseLayerObject *) gpu;\n'
-                     '    loader_platform_thread_once(&tabOnce, init%s);\n'
                      '    %snextTable.%s;\n'
                      '%s%s'
                      '%s'
                      '%s'
-                     '}' % (qual, decl, self.layer_name, ret_val, proto.c_call(), create_line, destroy_line, gpu_state, stmt))
+                     '}' % (qual, decl, ret_val, proto.c_call(), create_line, destroy_line, gpu_state, stmt))
         else:
             funcs.append('%s%s\n'
                      '{\n'
@@ -1291,8 +1319,11 @@
         header_txt.append('#include "loader_platform.h"\n')
         header_txt.append('#include "layers_msg.h"\n')
         header_txt.append('static VkLayerDispatchTable nextTable;')
+        header_txt.append('static VkLayerInstanceDispatchTable nextInstanceTable;')
         header_txt.append('static VkBaseLayerObject *pCurObj;')
-        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabOnce);\n')
+        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabDeviceOnce);')
+        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(tabInstanceOnce);')
+        header_txt.append('static LOADER_PLATFORM_THREAD_ONCE_DECLARATION(initOnce);\n')
         header_txt.append('using namespace std;')
         header_txt.append('static unordered_map<int, void*> proxy_objectsInUse;\n')
         header_txt.append('static unordered_map<VkObject, loader_platform_thread_id> objectsInUse;\n')
@@ -1363,7 +1394,8 @@
                      '{\n'
                      '    if (gpu != NULL) {\n'
                      '        pCurObj = (VkBaseLayerObject *) %s;\n'
-                     '        loader_platform_thread_once(&tabOnce, init%s);\n'
+                     '        loader_platform_thread_once(&initOnce, init%s);\n'
+                     '        loader_platform_thread_once(&tabDeviceOnce, initDeviceTable);\n'
                      '        %snextTable.%s;\n'
                      '        fflush(stdout);\n'
                      '    %s'
@@ -1402,7 +1434,7 @@
             funcs.append('%s%s\n'
                      '{\n'
                      '    pCurObj = (VkBaseLayerObject *) %s;\n'
-                     '    loader_platform_thread_once(&tabOnce, init%s);\n'
+                     '    loader_platform_thread_once(&tabDeviceOnce, init%s);\n'
                      '    %snextTable.%s;\n'
                      '%s'
                      '}' % (qual, decl, proto.params[0].name, self.layer_name, ret_val, proto.c_call(), stmt))