loader: Fixes in trampoline code to support layers which wrap objects

Loader trampoline code may see wrapped objects.  Don't do value comparisons
of dispatchable objects but instead compare dispatch tables to find objects.
PhysicalDevice objects where may have multiple gpus with same instance dispatch
will be fixed in a later patch.
diff --git a/loader/debug_report.c b/loader/debug_report.c
index c8686dc..fd66ab5 100644
--- a/loader/debug_report.c
+++ b/loader/debug_report.c
@@ -78,7 +78,7 @@
     if (!pNewDbgFuncNode)
         return VK_ERROR_OUT_OF_HOST_MEMORY;
 
-    struct loader_instance *inst = loader_instance(instance);
+    struct loader_instance *inst = loader_get_instance(instance);
     loader_platform_thread_lock_mutex(&loader_lock);
     VkResult result = inst->disp->DbgCreateMsgCallback(instance, msgFlags, pfnMsgCallback, pUserData, pMsgCallback);
     if (result == VK_SUCCESS) {
@@ -99,7 +99,7 @@
         VkInstance instance,
         VkDbgMsgCallback msg_callback)
 {
-    struct loader_instance *inst = loader_instance(instance);
+    struct loader_instance *inst = loader_get_instance(instance);
     loader_platform_thread_lock_mutex(&loader_lock);
     VkLayerDbgFunctionNode *pTrav = inst->DbgFunctionHead;
     VkLayerDbgFunctionNode *pPrev = pTrav;
diff --git a/loader/loader.c b/loader/loader.c
index 4b132c8..544d6e4 100644
--- a/loader/loader.c
+++ b/loader/loader.c
@@ -890,7 +890,8 @@
     for (struct loader_instance *inst = loader.instances; inst; inst = inst->next) {
         for (struct loader_icd *icd = inst->icds; icd; icd = icd->next) {
             for (struct loader_device *dev = icd->logical_device_list; dev; dev = dev->next)
-                if (dev->device == device) {
+                /* Value comparison of device prevents object wrapping by layers */
+                if (loader_get_dispatch(dev->device) == loader_get_dispatch(device)) {
                     *found_dev = dev;
                     return icd;
                 }
@@ -1984,6 +1985,24 @@
     return disp_table->GetInstanceProcAddr(inst, pName);
 }
 
+struct loader_instance *loader_get_instance(const VkInstance instance)
+{
+    /* look up the loader_instance in our list by comparing dispatch tables, as
+     * there is no guarantee the instance is still a loader_instance* after any
+     * layers which wrap the instance object.
+     */
+    const VkLayerInstanceDispatchTable *disp;
+    struct loader_instance *ptr_instance = NULL;
+    disp = loader_get_instance_dispatch(instance);
+    for (struct loader_instance *inst = loader.instances; inst; inst = inst->next) {
+        if (inst->disp == disp) {
+            ptr_instance = inst;
+            break;
+        }
+    }
+    return ptr_instance;
+}
+
 struct loader_icd * loader_get_icd(const VkPhysicalDevice gpu, uint32_t *gpu_index)
 {
 
@@ -1991,7 +2010,11 @@
     for (struct loader_instance *inst = loader.instances; inst; inst = inst->next) {
         for (struct loader_icd *icd = inst->icds; icd; icd = icd->next) {
             for (uint32_t i = 0; i < icd->gpu_count; i++)
-                if (icd->gpus[i] == gpu) {
+                /* Value comparison of VkPhysicalDevice prevents wrapping, use
+                 * instance device table instead (TODO this aliases GPUs within
+                 * an instance, since they have identical dispatch tables)
+                 */
+                if (loader_get_instance_dispatch(icd->gpus[i]) == loader_get_instance_dispatch(gpu)) {
                     *gpu_index = i;
                     return icd;
                 }
@@ -2973,7 +2996,9 @@
         }
     }
 
-    res = icd->CreateDevice(gpu, pCreateInfo, pDevice);
+    // since gpu object maybe wrapped by a layer need to get unwrapped version
+    // we haven't yet called down the chain for the layer to unwrap the object
+    res = icd->CreateDevice(icd->gpus[gpu_index], pCreateInfo, pDevice);
     if (res != VK_SUCCESS) {
         return res;
     }
@@ -3022,6 +3047,7 @@
     if (instance == VK_NULL_HANDLE) {
         /* get entrypoint addresses that are global (in the loader),
            doesn't include any instance extensions since they may not be enabled yet*/
+
         addr = globalGetProcAddr(pName);
 
         return addr;
@@ -3037,7 +3063,7 @@
     /* debug_report is a special case; need to return loader trampoline entrypoints
      * unless the extension is not enabled; also need to handle debug_report
      * utility functions */
-    struct loader_instance *ptr_instance = (struct loader_instance *) instance;
+    struct loader_instance *ptr_instance = loader_get_instance(instance);
     if (debug_report_instance_gpa(ptr_instance, pName, &addr)) {
         return addr;
     }
diff --git a/loader/loader.h b/loader/loader.h
index cd795c5..e27f18c 100644
--- a/loader/loader.h
+++ b/loader/loader.h
@@ -373,6 +373,8 @@
 struct loader_icd *loader_get_icd_and_device(
         const VkDevice device,
         struct loader_device **found_dev);
+struct loader_instance *loader_get_instance(
+        const VkInstance instance);
 struct loader_icd * loader_get_icd(
         const VkPhysicalDevice gpu,
         uint32_t *gpu_index);
diff --git a/loader/trampoline.c b/loader/trampoline.c
index 8ae8135..2ac48ee 100644
--- a/loader/trampoline.c
+++ b/loader/trampoline.c
@@ -178,13 +178,14 @@
                                             VkInstance instance)
 {
     const VkLayerInstanceDispatchTable *disp;
+    struct loader_instance *ptr_instance = NULL;
     disp = loader_get_instance_dispatch(instance);
 
     loader_platform_thread_lock_mutex(&loader_lock);
 
+    ptr_instance = loader_get_instance(instance);
     disp->DestroyInstance(instance);
 
-    struct loader_instance *ptr_instance = loader_instance(instance);
     loader_deactivate_instance_layers(ptr_instance);
     loader_heap_free(ptr_instance, ptr_instance->disp);
     loader_heap_free(ptr_instance, ptr_instance);