layers: Added fence tracking for command buffer completion

Cleaned up MemTracker fence tracking for command buffer completions,
fixed several issues with local fences.
diff --git a/layers/mem_tracker.c b/layers/mem_tracker.c
index 228dc45..8084e8f 100644
--- a/layers/mem_tracker.c
+++ b/layers/mem_tracker.c
@@ -22,6 +22,7 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+#include <inttypes.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -46,13 +47,17 @@
 
 #define MAX_BINDING 0xFFFFFFFF
 
-static GLOBAL_CB_NODE* pGlobalCBHead = NULL;
+static GLOBAL_CB_NODE*      pGlobalCBHead     = NULL;
 static GLOBAL_MEM_OBJ_NODE* pGlobalMemObjHead = NULL;
-static GLOBAL_OBJECT_NODE* pGlobalObjectHead = NULL;
-static XGL_DEVICE globalDevice = NULL;
-static uint64_t numCBNodes = 0;
-static uint64_t numMemObjNodes = 0;
-static uint64_t numObjectNodes = 0;
+static GLOBAL_OBJECT_NODE*  pGlobalObjectHead = NULL;
+static GLOBAL_FENCE_NODE*   pGlobalFenceList  = NULL;
+// TODO : Add support for per-queue and per-device fence completion
+static uint64_t             g_currentFenceId  = 1;
+static uint64_t             g_lastRetiredId   = 0;
+static XGL_DEVICE           globalDevice      = NULL;
+static uint64_t             numCBNodes        = 0;
+static uint64_t             numMemObjNodes    = 0;
+static uint64_t             numObjectNodes    = 0;
 
 // Check list for data and if it's not included insert new node
 //    into HEAD of list pointed to by pHEAD & update pHEAD
@@ -130,20 +135,140 @@
     }
     return pTrav;
 }
-// Set fence for given cb in global cb node
-static bool32_t setCBFence(const XGL_CMD_BUFFER cb, const XGL_FENCE fence, bool32_t localFlag)
-{
 
-    GLOBAL_CB_NODE* pTrav = getGlobalCBNode(cb);
-    if (!pTrav) {
-        char str[1024];
-        sprintf(str, "Unable to find node for CB %p in order to set fence to %p", (void*)cb, (void*)fence);
-        layerCbMsg(XGL_DBG_MSG_ERROR, XGL_VALIDATION_LEVEL_0, cb, 0, MEMTRACK_INVALID_CB, "MEM", str);
-        return XGL_FALSE;
+// Add a fence, creating one if necessary to our list of fences/fenceIds
+// Linked list is FIFO: head = oldest, tail = newest
+static uint64_t addFenceNode(XGL_FENCE fence)
+{
+    // Create fence node
+    GLOBAL_FENCE_NODE* pFenceNode = (GLOBAL_FENCE_NODE*)malloc(sizeof(GLOBAL_FENCE_NODE));
+    memset(pFenceNode, 0, sizeof(GLOBAL_FENCE_NODE));
+    pFenceNode->fenceId = g_currentFenceId++;
+    // If no fence, create an internal fence to track the submissions
+    if (fence == NULL) {
+        XGL_FENCE_CREATE_INFO fci;
+        fci.sType = XGL_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+        fci.pNext = NULL;
+        fci.flags = 0;
+        nextTable.CreateFence(globalDevice, &fci, &pFenceNode->fence);
+        pFenceNode->localFence = XGL_TRUE;
+    } else {
+        pFenceNode->localFence = XGL_FALSE;
+        pFenceNode->fence = fence;
     }
-    pTrav->fence = fence;
-    pTrav->localFlag = localFlag;
-    return XGL_TRUE;
+
+    // Add to END of fence list
+    if (pGlobalFenceList == NULL) {
+        pGlobalFenceList = pFenceNode;
+    } else {
+        GLOBAL_FENCE_NODE* pCurFenceNode = pGlobalFenceList;
+        while (pCurFenceNode && pCurFenceNode->pNextGlobalFenceNode != NULL) {
+            pCurFenceNode = pCurFenceNode->pNextGlobalFenceNode;
+        }
+        pCurFenceNode->pNextGlobalFenceNode = pFenceNode;
+    }
+
+    return pFenceNode->fenceId;
+}
+
+// Remove a node from our list of fences/fenceIds
+static void deleteFenceNode(uint64_t fenceId)
+{
+    if (fenceId != 0) {
+        // Search for a node with this fenceId
+        GLOBAL_FENCE_NODE* pCurFenceNode  = pGlobalFenceList;
+        GLOBAL_FENCE_NODE* pPrevFenceNode = pCurFenceNode;
+        while ((pCurFenceNode != NULL) && (pCurFenceNode->fenceId != fenceId)) {
+            pPrevFenceNode = pCurFenceNode;
+            pCurFenceNode  = pCurFenceNode->pNextGlobalFenceNode;
+        }
+        if (pCurFenceNode != NULL) {
+            // TODO:  Wait on this fence?
+            if (pCurFenceNode->localFence == XGL_TRUE) {
+                nextTable.DestroyObject(pCurFenceNode->fence);
+            }
+            // Remove links to this node
+            pPrevFenceNode->pNextGlobalFenceNode = pCurFenceNode->pNextGlobalFenceNode;
+            // Update head pointer if necessary
+            if (pCurFenceNode == pGlobalFenceList) {
+                pGlobalFenceList = pCurFenceNode->pNextGlobalFenceNode;
+            }
+            free(pCurFenceNode);
+        } else {
+            char str[1024];
+            sprintf(str, "FenceId %"PRIx64" node is missing from global fence list", fenceId);
+            layerCbMsg(XGL_DBG_MSG_ERROR, XGL_VALIDATION_LEVEL_0, NULL, 0, MEMTRACK_CB_MISSING_FENCE, "MEM", str);
+        }
+    }
+}
+
+// Search through list for this fence, deleting all nodes before it (with lower IDs) and updating lastRetiredId
+static void updateFenceTracking(XGL_FENCE fence)
+{
+    // Technically, we can delete all nodes until we hit this fence.  But for now, make sure they're in the list first.
+    GLOBAL_FENCE_NODE* pCurFenceNode  = pGlobalFenceList;
+    while ((pCurFenceNode != NULL) && (pCurFenceNode->fence != fence)) {
+        pCurFenceNode  = pCurFenceNode->pNextGlobalFenceNode;
+    }
+    if (pCurFenceNode != NULL) {
+        // Delete all nodes in front of this one and update the global last retired value
+        GLOBAL_FENCE_NODE* pDelNode = NULL;
+        g_lastRetiredId             = pCurFenceNode->fenceId;
+        pCurFenceNode               = pGlobalFenceList;
+        while ((pCurFenceNode != NULL) && (pCurFenceNode->fence != fence)) {
+            pDelNode = pCurFenceNode;
+            pCurFenceNode  = pCurFenceNode->pNextGlobalFenceNode;
+            deleteFenceNode(pDelNode->fenceId);
+        }
+    }
+}
+
+// Utility function that determines if a fenceId has been retired yet
+static bool32_t fenceRetired(uint64_t fenceId)
+{
+    bool32_t result = XGL_FALSE;
+    if (fenceId <= g_lastRetiredId) {
+        result = XGL_TRUE;
+    }
+    return result;
+}
+
+// Return the fence associated with a fenceId
+static XGL_FENCE getFenceFromId(uint64_t fenceId)
+{
+    XGL_FENCE fence = NULL;
+    if (fenceId != 0) {
+        // Search for a node with this fenceId
+        if (fenceId > g_lastRetiredId) {
+            GLOBAL_FENCE_NODE* pCurFenceNode  = pGlobalFenceList;
+            while ((pCurFenceNode != NULL) && (pCurFenceNode->fenceId != fenceId)) {
+                pCurFenceNode  = pCurFenceNode->pNextGlobalFenceNode;
+            }
+            if (pCurFenceNode != NULL) {
+                fence = pCurFenceNode->fence;
+            } else {
+                char str[1024];
+                sprintf(str, "Dangit, couldn't find fenceId %"PRIx64" in the list", fenceId);
+                layerCbMsg(XGL_DBG_MSG_ERROR, XGL_VALIDATION_LEVEL_0, NULL, 0, MEMTRACK_CB_MISSING_FENCE, "MEM", str);
+            }
+        }
+    }
+    return fence;
+}
+
+// Helper routine that updates the fence list to all-retired, as for Queue/DeviceWaitIdle
+static void retireAllFences(void)
+{
+    // In this case, we go throught the whole list, retiring each node and update the global retired ID until the list is empty
+    GLOBAL_FENCE_NODE* pCurFenceNode  = pGlobalFenceList;
+    GLOBAL_FENCE_NODE* pDelNode       = NULL;
+
+    while (pCurFenceNode != NULL) {
+        pDelNode        = pCurFenceNode;
+        pCurFenceNode   = pCurFenceNode->pNextGlobalFenceNode;
+        g_lastRetiredId = pDelNode->fenceId;
+        deleteFenceNode(pDelNode->fenceId);
+    }
 }
 
 static bool32_t validateCBMemRef(const XGL_CMD_BUFFER cb, uint32_t memRefCount, const XGL_MEMORY_REF* pMemRefs)
@@ -304,11 +429,9 @@
         layerCbMsg(XGL_DBG_MSG_ERROR, XGL_VALIDATION_LEVEL_0, cb, 0, MEMTRACK_INVALID_CB, "MEM", str);
         result = XGL_FALSE;
     } else {
-        if ((pCBTrav->fence != NULL) && (pCBTrav->localFlag == XGL_TRUE)) {
-            nextTable.DestroyObject(pCBTrav->fence);
-            pCBTrav->localFlag = XGL_FALSE;
+        if (!fenceRetired(pCBTrav->fenceId)) {
+            deleteFenceNode(pCBTrav->fenceId);
         }
-        pCBTrav->fence = NULL;
         MINI_NODE* pMemTrav = pCBTrav->pMemObjList;
         MINI_NODE* pDeleteMe = NULL;
         // We traverse LL in order and free nodes as they're cleared
@@ -413,6 +536,7 @@
         layerCbMsg(XGL_DBG_MSG_ERROR, XGL_VALIDATION_LEVEL_0, mem, 0, MEMTRACK_INVALID_MEM_OBJ, "MEM", str);
     }
 }
+
 // Check if fence for given CB is completed
 static bool32_t checkCBCompleted(const XGL_CMD_BUFFER cb)
 {
@@ -424,10 +548,11 @@
         layerCbMsg(XGL_DBG_MSG_ERROR, XGL_VALIDATION_LEVEL_0, cb, 0, MEMTRACK_INVALID_CB, "MEM", str);
         result = XGL_FALSE;
     } else {
-        if (pCBTrav->fence) {
-            if (XGL_SUCCESS != nextTable.GetFenceStatus(pCBTrav->fence)) {
+        if (!fenceRetired(pCBTrav->fenceId)) {
+            // Explicitly call the internal xglGetFenceStatus routine
+            if (XGL_SUCCESS != xglGetFenceStatus(getFenceFromId(pCBTrav->fenceId))) {
                 char str[1024];
-                sprintf(str, "Fence %p for CB %p has not completed", pCBTrav->fence, cb);
+                sprintf(str, "FenceId %"PRIx64", fence %p for CB %p has not completed", pCBTrav->fenceId, getFenceFromId(pCBTrav->fenceId), cb);
                 layerCbMsg(XGL_DBG_MSG_UNKNOWN, XGL_VALIDATION_LEVEL_0, cb, 0, MEMTRACK_NONE, "MEM", str);
                 result = XGL_FALSE;
             }
@@ -436,20 +561,6 @@
     return result;
 }
 
-static void clearCBFence(const XGL_FENCE fence)
-{
-    // TODO : This is slow and stupid
-    //  Ultimately would like a quick fence lookup w/ all of the CBs using that fence
-    //  We have to loop every CB for now b/c multiple CBs may use same fence
-    GLOBAL_CB_NODE* pCBTrav = pGlobalCBHead;
-    while (pCBTrav) {
-        if (pCBTrav->fence == fence) {
-            pCBTrav->fence = NULL;
-        }
-        pCBTrav = pCBTrav->pNextGlobalCBNode;
-    }
-}
-
 static bool32_t freeMemNode(XGL_GPU_MEMORY mem)
 {
     bool32_t result = XGL_TRUE;
@@ -752,7 +863,7 @@
         sprintf(str, "Details of Global CB list w/ HEAD at %p:", (void*)pTrav);
         layerCbMsg(XGL_DBG_MSG_UNKNOWN, XGL_VALIDATION_LEVEL_0, NULL, 0, MEMTRACK_NONE, "MEM", str);
         while (pTrav) {
-            sprintf(str, "    Global CB Node (%p) w/ pNextGlobalCBNode (%p) has CB %p, fence %p, and pMemObjList %p", (void*)pTrav, (void*)pTrav->pNextGlobalCBNode, (void*)pTrav->cmdBuffer, (void*)pTrav->fence, (void*)pTrav->pMemObjList);
+            sprintf(str, "    Global CB Node (%p) w/ pNextGlobalCBNode (%p) has CB %p, fenceId %"PRIx64", fence %p, and pMemObjList %p", (void*)pTrav, (void*)pTrav->pNextGlobalCBNode, (void*)pTrav->cmdBuffer, pTrav->fenceId, (void*)getFenceFromId(pTrav->fenceId), (void*)pTrav->pMemObjList);
             layerCbMsg(XGL_DBG_MSG_UNKNOWN, XGL_VALIDATION_LEVEL_0, NULL, 0, MEMTRACK_NONE, "MEM", str);
             MINI_NODE* pMemObjTrav = pTrav->pMemObjList;
             while (pMemObjTrav) {
@@ -765,17 +876,6 @@
     }
 }
 
-static XGL_FENCE createLocalFence()
-{
-    XGL_FENCE_CREATE_INFO fci;
-    fci.sType = XGL_STRUCTURE_TYPE_FENCE_CREATE_INFO;
-    fci.pNext = NULL;
-    fci.flags = 0;
-    XGL_FENCE fence;
-    nextTable.CreateFence(globalDevice, &fci, &fence);
-    return fence;
-}
-
 static void initMemTracker(void)
 {
     const char *strOpt;
@@ -878,20 +978,17 @@
 XGL_LAYER_EXPORT XGL_RESULT XGLAPI xglQueueSubmit(XGL_QUEUE queue, uint32_t cmdBufferCount, const XGL_CMD_BUFFER* pCmdBuffers, uint32_t memRefCount, const XGL_MEMORY_REF* pMemRefs, XGL_FENCE fence)
 {
     loader_platform_thread_lock_mutex(&globalLock);
-    bool32_t localFlag = XGL_FALSE;
     // TODO : Need to track fence and clear mem references when fence clears
-    XGL_FENCE localFence = fence;
-    if (XGL_NULL_HANDLE == fence) { // allocate our own fence to track cmd buffer
-        localFence = createLocalFence();
-        localFlag = XGL_TRUE;
-    }
-    char str[1024];
+    GLOBAL_CB_NODE* pCBNode = NULL;
+    uint64_t        fenceId = addFenceNode(fence);
+    char            str[1024];
     sprintf(str, "In xglQueueSubmit(), checking %u cmdBuffers with %u memRefs", cmdBufferCount, memRefCount);
     layerCbMsg(XGL_DBG_MSG_UNKNOWN, XGL_VALIDATION_LEVEL_0, queue, 0, MEMTRACK_NONE, "MEM", str);
     printMemList();
     printGlobalCB();
     for (uint32_t i = 0; i < cmdBufferCount; i++) {
-        setCBFence(pCmdBuffers[i], localFence, localFlag);
+        pCBNode = getGlobalCBNode(pCmdBuffers[i]);
+        pCBNode->fenceId = fenceId;
         sprintf(str, "Verifying mem refs for CB %p", pCmdBuffers[i]);
         layerCbMsg(XGL_DBG_MSG_UNKNOWN, XGL_VALIDATION_LEVEL_0, pCmdBuffers[i], 0, MEMTRACK_NONE, "MEM", str);
         if (XGL_FALSE == validateCBMemRef(pCmdBuffers[i], memRefCount, pMemRefs)) {
@@ -901,7 +998,7 @@
     }
     printGlobalCB();
     loader_platform_thread_unlock_mutex(&globalLock);
-    XGL_RESULT result = nextTable.QueueSubmit(queue, cmdBufferCount, pCmdBuffers, memRefCount, pMemRefs, localFence);
+    XGL_RESULT result = nextTable.QueueSubmit(queue, cmdBufferCount, pCmdBuffers, memRefCount, pMemRefs, getFenceFromId(fenceId));
     return result;
 }
 
@@ -1035,9 +1132,6 @@
                 clearObjectBinding(object);
             }
         }
-        if (XGL_STRUCTURE_TYPE_FENCE_CREATE_INFO == pTrav->sType) {
-            clearCBFence((XGL_FENCE)object);
-        }
         if (pGlobalObjectHead == pTrav) // update HEAD if needed
             pGlobalObjectHead = pTrav->pNext;
         // Delete the obj node from global list
@@ -1089,9 +1183,9 @@
 {
     XGL_RESULT result = nextTable.GetFenceStatus(fence);
     if (XGL_SUCCESS == result) {
-        // TODO : Properly we should add validation to make sure app is checking fence
-        //  on CB before Reset/Begin CB call is made
-        clearCBFence(fence);
+        loader_platform_thread_lock_mutex(&globalLock);
+        updateFenceTracking(fence);
+        loader_platform_thread_unlock_mutex(&globalLock);
     }
     return result;
 }
@@ -1099,22 +1193,44 @@
 XGL_LAYER_EXPORT XGL_RESULT XGLAPI xglWaitForFences(XGL_DEVICE device, uint32_t fenceCount, const XGL_FENCE* pFences, bool32_t waitAll, uint64_t timeout)
 {
     XGL_RESULT result = nextTable.WaitForFences(device, fenceCount, pFences, waitAll, timeout);
+    loader_platform_thread_lock_mutex(&globalLock);
     if (XGL_SUCCESS == result) {
-        // TODO : Properly we should add validation to make sure app is checking fence
-        //  on CB before Reset/Begin CB call is made
         if (waitAll) { // Clear all the fences
             for(uint32_t i = 0; i < fenceCount; i++) {
-                clearCBFence(pFences[i]);
+                updateFenceTracking(pFences[i]);
             }
         }
         else { // Clear only completed fences
             for(uint32_t i = 0; i < fenceCount; i++) {
                 if (XGL_SUCCESS == nextTable.GetFenceStatus(pFences[i])) {
-                    clearCBFence(pFences[i]);
+                    updateFenceTracking(pFences[i]);
                 }
             }
         }
     }
+    loader_platform_thread_unlock_mutex(&globalLock);
+    return result;
+}
+
+XGL_LAYER_EXPORT XGL_RESULT XGLAPI xglQueueWaitIdle(XGL_QUEUE queue)
+{
+    XGL_RESULT result = nextTable.QueueWaitIdle(queue);
+    if (XGL_SUCCESS == result) {
+        loader_platform_thread_lock_mutex(&globalLock);
+        retireAllFences();
+        loader_platform_thread_unlock_mutex(&globalLock);
+    }
+    return result;
+}
+
+XGL_LAYER_EXPORT XGL_RESULT XGLAPI xglDeviceWaitIdle(XGL_DEVICE device)
+{
+    XGL_RESULT result = nextTable.DeviceWaitIdle(device);
+    if (XGL_SUCCESS == result) {
+        loader_platform_thread_lock_mutex(&globalLock);
+        retireAllFences();
+        loader_platform_thread_unlock_mutex(&globalLock);
+    }
     return result;
 }
 
@@ -1305,7 +1421,7 @@
 {
     // This implicitly resets the Cmd Buffer so make sure any fence is done and then clear memory references
     GLOBAL_CB_NODE* pCBTrav = getGlobalCBNode(cmdBuffer);
-    if (pCBTrav && pCBTrav->fence) {
+    if (pCBTrav && (!fenceRetired(pCBTrav->fenceId))) {
         bool32_t cbDone = checkCBCompleted(cmdBuffer);
         if (XGL_FALSE == cbDone) {
             char str[1024];
@@ -1331,7 +1447,7 @@
 {
     // Verify that CB is complete (not in-flight)
     GLOBAL_CB_NODE* pCBTrav = getGlobalCBNode(cmdBuffer);
-    if (pCBTrav && pCBTrav->fence) {
+    if (pCBTrav && (!fenceRetired(pCBTrav->fenceId))) {
         bool32_t cbDone = checkCBCompleted(cmdBuffer);
         if (XGL_FALSE == cbDone) {
             char str[1024];
@@ -1779,6 +1895,18 @@
     loader_platform_thread_unlock_mutex(&globalLock);
     return result;
 }
+
+XGL_LAYER_EXPORT XGL_RESULT XGLAPI xglWsiX11QueuePresent(XGL_QUEUE queue, const XGL_WSI_X11_PRESENT_INFO*  pPresentInfo, XGL_FENCE fence)
+{
+    loader_platform_thread_lock_mutex(&globalLock);
+    addFenceNode(fence);
+    char            str[1024];
+    sprintf(str, "In xglWsiX11QueuePresent(), checking queue %p for fence %p", queue, fence);
+    layerCbMsg(XGL_DBG_MSG_UNKNOWN, XGL_VALIDATION_LEVEL_0, queue, 0, MEMTRACK_NONE, "MEM", str);
+    loader_platform_thread_unlock_mutex(&globalLock);
+    XGL_RESULT result = nextTable.WsiX11QueuePresent(queue, pPresentInfo, fence);
+    return result;
+}
 #endif // WIN32
 
 XGL_LAYER_EXPORT void* XGLAPI xglGetProcAddr(XGL_PHYSICAL_GPU gpu, const char* funcName)
@@ -1832,6 +1960,10 @@
         return (void*) xglGetFenceStatus;
     if (!strcmp(funcName, "xglWaitForFences"))
         return (void*) xglWaitForFences;
+    if (!strcmp(funcName, "xglQueueWaitIdle"))
+        return (void*) xglQueueWaitIdle;
+    if (!strcmp(funcName, "xglDeviceWaitIdle"))
+        return (void*) xglDeviceWaitIdle;
     if (!strcmp(funcName, "xglCreateEvent"))
         return (void*) xglCreateEvent;
     if (!strcmp(funcName, "xglCreateQueryPool"))
@@ -1923,6 +2055,8 @@
 #if !defined(WIN32)
     if (!strcmp(funcName, "xglWsiX11CreatePresentableImage"))
         return (void*) xglWsiX11CreatePresentableImage;
+    if (!strcmp(funcName, "xglWsiX11QueuePresent"))
+        return (void*) xglWsiX11QueuePresent;
 #endif
     else {
         if (gpuw->pGPA == NULL)