loader: Move CreateDevice to device table

Discovered an issue where a layer was doing cleanup
in it's DestroyDevice function but the CreateDevice
was never called.
This happened because the extension was only enabled
on the device chain and the device chain doesn't actually
call CreateDevice. That happens on the Instance chain.
Making it so that we can call down the device chain -
which is terminated by the ICD.
We need to know the real device object to construct the
device chain heiarchy and when calling down the device
chain it should end with the ICD doing the actual device
object creation.

This patch fixes the issue by using the
same process as CreateInstance. The loader will call
the ICD's CreateDevice and pass that in the *pDevice
argument. The layers then ignore the PhysicalDevice parameter
and use the *pDevice to access the device chain.
To prevent the ICD from being called twice needed to
stub in a special loader_GetDeviceChainProcAddr to provide
a stub for only CreateDevice as the end of the chain.

integrate review feedback.
diff --git a/layers/basic.cpp b/layers/basic.cpp
index 990f669..de2755b 100644
--- a/layers/basic.cpp
+++ b/layers/basic.cpp
@@ -95,7 +95,7 @@
 VK_LAYER_EXPORT VkResult VKAPI vkCreateDevice(VkPhysicalDevice gpu, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice)
 {
     printf("At start of wrapped vkCreateDevice() call w/ gpu: %p\n", (void*)gpu);
-    VkResult result = instance_dispatch_table(gpu)->CreateDevice(gpu, pCreateInfo, pDevice);
+    VkResult result = device_dispatch_table(*pDevice)->CreateDevice(gpu, pCreateInfo, pDevice);
     printf("Completed wrapped vkCreateDevice() call w/ pDevice, Device %p: %p\n", (void*)pDevice, (void *) *pDevice);
     return result;
 }
@@ -137,6 +137,8 @@
         return (void *) vkGetDeviceProcAddr;
     }
 
+    if (!strcmp("vkCreateDevice", pName))
+        return (void *) vkCreateDevice;
     if (!strcmp("vkDestroyDevice", pName))
         return (void *) vkDestroyDevice;
     if (!strcmp("vkLayerExtension1", pName))
@@ -170,8 +172,6 @@
         return (void*) vkGetGlobalExtensionCount;
     if (!strcmp("vkGetGlobalExtensionProperties", pName))
         return (void*) vkGetGlobalExtensionProperties;
-    if (!strcmp("vkCreateDevice", pName))
-        return (void *) vkCreateDevice;
     else
     {
         if (instance_dispatch_table(instance)->GetInstanceProcAddr == NULL)
diff --git a/layers/draw_state.cpp b/layers/draw_state.cpp
index 0424416..146c641 100644
--- a/layers/draw_state.cpp
+++ b/layers/draw_state.cpp
@@ -1581,8 +1581,8 @@
 
 VK_LAYER_EXPORT VkResult VKAPI vkCreateDevice(VkPhysicalDevice gpu, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice)
 {
-    VkLayerInstanceDispatchTable *pInstanceTable = get_dispatch_table(draw_state_instance_table_map, gpu);
-    VkResult result = pInstanceTable->CreateDevice(gpu, pCreateInfo, pDevice);
+    VkLayerDispatchTable *pDeviceTable = get_dispatch_table(draw_state_device_table_map, *pDevice);
+    VkResult result = pDeviceTable->CreateDevice(gpu, pCreateInfo, pDevice);
     if (result == VK_SUCCESS) {
         layer_data *my_instance_data = get_my_data_ptr(get_dispatch_key(gpu), layer_data_map);
         VkLayerDispatchTable *pTable = get_dispatch_table(draw_state_device_table_map, *pDevice);
@@ -2912,6 +2912,8 @@
         initDeviceTable(draw_state_device_table_map, (const VkBaseLayerObject *) dev);
         return (void *) vkGetDeviceProcAddr;
     }
+    if (!strcmp(funcName, "vkCreateDevice"))
+        return (void*) vkCreateDevice;
     if (!strcmp(funcName, "vkDestroyDevice"))
         return (void*) vkDestroyDevice;
     if (!strcmp(funcName, "vkQueueSubmit"))
@@ -3071,8 +3073,6 @@
         return (void *) vkCreateInstance;
     if (!strcmp(funcName, "vkDestroyInstance"))
         return (void *) vkDestroyInstance;
-    if (!strcmp(funcName, "vkCreateDevice"))
-        return (void*) vkCreateDevice;
     if (!strcmp(funcName, "vkGetPhysicalDeviceExtensionCount"))
         return (void*) vkGetPhysicalDeviceExtensionCount;
     if (!strcmp(funcName, "vkGetPhysicalDeviceExtensionProperties"))
diff --git a/layers/image.cpp b/layers/image.cpp
index dab89e9..0282795 100644
--- a/layers/image.cpp
+++ b/layers/image.cpp
@@ -169,7 +169,7 @@
 
 VK_LAYER_EXPORT VkResult VKAPI vkCreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, VkDevice* pDevice)
 {
-    VkLayerInstanceDispatchTable *pTable = get_dispatch_table(image_instance_table_map, physicalDevice);
+    VkLayerDispatchTable *pTable = get_dispatch_table(image_device_table_map, *pDevice);
     VkResult result = pTable->CreateDevice(physicalDevice, pCreateInfo, pDevice);
     if(result == VK_SUCCESS)
     {
@@ -384,6 +384,8 @@
         return (void*) vkGetDeviceProcAddr;
     }
 
+    if (!strcmp(funcName, "vkCreateDevice"))
+        return (void*) vkCreateDevice;
     if (!strcmp(funcName, "vkDestroyDevice"))
         return (void*) vkDestroyDevice;
     if (!strcmp(funcName, "vkCreateImage"))
@@ -418,8 +420,6 @@
         return (void*) vkCreateInstance;
     if (!strcmp(funcName, "vkDestroyInstance"))
         return (void *) vkDestroyInstance;
-    if (!strcmp(funcName, "vkCreateDevice"))
-        return (void*) vkCreateDevice;
     if (!strcmp(funcName, "vkGetPhysicalDeviceExtensionProperties"))
         return (void*) vkGetPhysicalDeviceExtensionProperties;
     if (!strcmp(funcName, "vkGetPhysicalDeviceExtensionCount"))
diff --git a/layers/layers_table.cpp b/layers/layers_table.cpp
index b2edb5c..51baf2e 100644
--- a/layers/layers_table.cpp
+++ b/layers/layers_table.cpp
@@ -34,7 +34,6 @@
 // Map lookup must be thread safe
 VkLayerDispatchTable *device_dispatch_table(VkObject object)
 {
-//    VkLayerDispatchTable *pDisp  = *(VkLayerDispatchTable **) object;
     dispatch_key key = get_dispatch_key(object);
     device_table_map::const_iterator it = tableMap.find((void *) key);
     assert(it != tableMap.end() && "Not able to find device dispatch entry");
@@ -43,7 +42,6 @@
 
 VkLayerInstanceDispatchTable *instance_dispatch_table(VkObject object)
 {
-//    VkLayerInstanceDispatchTable *pDisp = *(VkLayerInstanceDispatchTable **) object;
     dispatch_key key = get_dispatch_key(object);
     instance_table_map::const_iterator it = tableInstanceMap.find((void *) key);
 #if DISPATCH_MAP_DEBUG
diff --git a/layers/mem_tracker.cpp b/layers/mem_tracker.cpp
index 817cb81..789899a 100644
--- a/layers/mem_tracker.cpp
+++ b/layers/mem_tracker.cpp
@@ -889,8 +889,8 @@
     const VkDeviceCreateInfo *pCreateInfo,
     VkDevice                 *pDevice)
 {
-    VkLayerInstanceDispatchTable *pInstanceTable = get_dispatch_table(mem_tracker_instance_table_map, gpu);
-    VkResult result = pInstanceTable->CreateDevice(gpu, pCreateInfo, pDevice);
+    VkLayerDispatchTable *pDeviceTable = get_dispatch_table(mem_tracker_device_table_map, *pDevice);
+    VkResult result = pDeviceTable->CreateDevice(gpu, pCreateInfo, pDevice);
     if (result == VK_SUCCESS) {
         layer_data *my_instance_data = get_my_data_ptr(get_dispatch_key(gpu), layer_data_map);
         layer_data *my_device_data = get_my_data_ptr(get_dispatch_key(*pDevice), layer_data_map);
@@ -2146,6 +2146,8 @@
         initDeviceTable(mem_tracker_device_table_map, (const VkBaseLayerObject *) dev);
         return (void *) vkGetDeviceProcAddr;
     }
+    if (!strcmp(funcName, "vkCreateDevice"))
+        return (void*) vkCreateDevice;
     if (!strcmp(funcName, "vkDestroyDevice"))
         return (void*) vkDestroyDevice;
     if (!strcmp(funcName, "vkQueueSubmit"))
@@ -2304,8 +2306,6 @@
         return (void *) vkDestroyInstance;
     if (!strcmp(funcName, "vkCreateInstance"))
         return (void*) vkCreateInstance;
-    if (!strcmp(funcName, "vkCreateDevice"))
-        return (void*) vkCreateDevice;
     if (!strcmp(funcName, "vkGetPhysicalDeviceExtensionCount"))
         return (void*) vkGetGlobalExtensionCount;
     if (!strcmp(funcName, "vkGetPhysicalDeviceExtensionProperties"))
diff --git a/layers/multi.cpp b/layers/multi.cpp
index 5f86b88..28dacf9 100644
--- a/layers/multi.cpp
+++ b/layers/multi.cpp
@@ -306,9 +306,8 @@
 VK_LAYER_EXPORT VkResult VKAPI multi2CreateDevice(VkPhysicalDevice gpu, const VkDeviceCreateInfo* pCreateInfo,
                                                       VkDevice* pDevice)
 {
-    VkLayerInstanceDispatchTable **ppDisp = (VkLayerInstanceDispatchTable **) gpu;
     printf("At start of multi2 vkCreateDevice()\n");
-    VkResult result = instance_dispatch_table2(gpu)->CreateDevice(gpu, pCreateInfo, pDevice);
+    VkResult result = device_dispatch_table2(*pDevice)->CreateDevice(gpu, pCreateInfo, pDevice);
     printf("Completed multi2 layer vkCreateDevice()\n");
     return result;
 }
@@ -346,6 +345,8 @@
         getLayer2Table(devw);
         return (void *) multi2GetDeviceProcAddr;
     }
+    if (!strcmp("vkCreateDevice", pName))
+        return (void *) multi2CreateDevice;
     if (!strcmp("vkDestroyDevice", pName))
         return (void *) multi2DestroyDevice;
     if (!strcmp("vkCreateCommandBuffer", pName))
@@ -376,8 +377,6 @@
         return (void *) multi2EnumeratePhysicalDevices;
     if (!strcmp("vkDestroyInstance", pName))
         return (void *) multi2DestroyInstance;
-    if (!strcmp("vkCreateDevice", pName))
-        return (void *) multi2CreateDevice;
     else if (!strcmp("GetGlobalExtensionProperties", pName))
         return (void*) vkGetGlobalExtensionProperties;
     else if (!strcmp("GetGlobalExtensionCount", pName))
diff --git a/layers/object_track.h b/layers/object_track.h
index 643c3b5..329ca10 100644
--- a/layers/object_track.h
+++ b/layers/object_track.h
@@ -528,14 +528,15 @@
     VkDevice                 *pDevice)
 {
     loader_platform_thread_lock_mutex(&objLock);
-    VkLayerInstanceDispatchTable *pInstanceTable = get_dispatch_table(ObjectTracker_instance_table_map, gpu);
-    VkResult result = pInstanceTable->CreateDevice(gpu, pCreateInfo, pDevice);
+//    VkLayerInstanceDispatchTable *pInstanceTable = get_dispatch_table(ObjectTracker_instance_table_map, gpu);
+    VkLayerDispatchTable *pDeviceTable = get_dispatch_table(ObjectTracker_device_table_map, *pDevice);
+    VkResult result = pDeviceTable->CreateDevice(gpu, pCreateInfo, pDevice);
     if (result == VK_SUCCESS) {
         layer_data *my_instance_data = get_my_data_ptr(get_dispatch_key(gpu), layer_data_map);
         //// VkLayerDispatchTable *pTable = get_dispatch_table(ObjectTracker_device_table_map, *pDevice);
         layer_data *my_device_data = get_my_data_ptr(get_dispatch_key(*pDevice), layer_data_map);
         my_device_data->report_data = layer_debug_report_create_device(my_instance_data->report_data, *pDevice);
-        create_obj(gpu, *pDevice, VK_OBJECT_TYPE_DEVICE);
+        create_obj(*pDevice, *pDevice, VK_OBJECT_TYPE_DEVICE);
     }
 
     loader_platform_thread_unlock_mutex(&objLock);
diff --git a/layers/param_checker.cpp b/layers/param_checker.cpp
index 1645102..70ced01 100644
--- a/layers/param_checker.cpp
+++ b/layers/param_checker.cpp
@@ -1801,7 +1801,7 @@
     const VkDeviceCreateInfo* pCreateInfo,
     VkDevice* pDevice)
 {
-    VkLayerInstanceDispatchTable *pTable = get_dispatch_table(pc_instance_table_map, physicalDevice);
+    VkLayerDispatchTable *pTable = get_dispatch_table(pc_device_table_map, *pDevice);
     VkResult result = pTable->CreateDevice(physicalDevice, pCreateInfo, pDevice);
     if(result == VK_SUCCESS)
     {
@@ -9672,6 +9672,8 @@
         return (void*) vkGetDeviceProcAddr;
     }
 
+    if (!strcmp(funcName, "vkCreateDevice"))
+        return (void*) vkCreateDevice;
     if (!strcmp(funcName, "vkDestroyDevice"))
         return (void*) vkDestroyDevice;
     if (!strcmp(funcName, "vkGetDeviceQueue"))
@@ -9882,8 +9884,6 @@
         return (void*) vkCreateInstance;
     if (!strcmp(funcName, "vkDestroyInstance"))
         return (void*) vkDestroyInstance;
-    if (!strcmp(funcName, "vkCreateDevice"))
-        return (void*) vkCreateDevice;
     if (!strcmp(funcName, "vkEnumeratePhysicalDevices"))
         return (void*) vkEnumeratePhysicalDevices;
     if (!strcmp(funcName, "vkGetPhysicalDeviceExtensionCount"))
diff --git a/layers/screenshot.cpp b/layers/screenshot.cpp
index d46205b..f7927df 100644
--- a/layers/screenshot.cpp
+++ b/layers/screenshot.cpp
@@ -311,8 +311,8 @@
     const VkDeviceCreateInfo *pCreateInfo,
     VkDevice                 *pDevice)
 {
-    VkLayerInstanceDispatchTable *pInstanceTable = get_dispatch_table(screenshot_instance_table_map, gpu);
-    VkResult result = pInstanceTable->CreateDevice(gpu, pCreateInfo, pDevice);
+    VkLayerDispatchTable *pDisp  = get_dispatch_table(screenshot_device_table_map, *pDevice);
+    VkResult result = pDisp->CreateDevice(gpu, pCreateInfo, pDevice);
 
     if (result == VK_SUCCESS) {
         createDeviceRegisterExtensions(pCreateInfo, *pDevice);
@@ -321,6 +321,8 @@
     return result;
 }
 
+/* TODO: Probably need a DestroyDevice as well */
+
 #define SCREENSHOT_LAYER_EXT_ARRAY_SIZE 2
 static const VkExtensionProperties ssExts[SCREENSHOT_LAYER_EXT_ARRAY_SIZE] = {
     {
@@ -561,6 +563,9 @@
         initDeviceTable(screenshot_device_table_map, (const VkBaseLayerObject *) dev);
         return (void *) vkGetDeviceProcAddr;
     }
+    if (!strcmp(funcName, "vkCreateDevice"))
+        return (void*) vkCreateDevice;
+
     if (!strcmp(funcName, "vkGetDeviceQueue"))
         return (void*) vkGetDeviceQueue;
 
@@ -610,8 +615,6 @@
         return (void *) vkDestroyInstance;
     if (!strcmp(funcName, "vkCreateInstance"))
         return (void*) vkCreateInstance;
-    if (!strcmp(funcName, "vkCreateDevice"))
-        return (void*) vkCreateDevice;
 
     if (get_dispatch_table(screenshot_instance_table_map, instance)->GetInstanceProcAddr == NULL)
         return NULL;