[vulkan] Maintain order of begin/end command buffer in deferred mode

bug: 135464985

If we record command buffers in async mode (no return from
vkBegin/EndCommandBuffer) in several guest threads,
it's possible the commands end up out of order when they arrive on the
host, leading to cool bugs.

To ensure the order when async mode is active, we use a special command

vkCommandBufferHostSyncGOOGLE

when the guest thread used to record to a command buffer switches.

Two issues of vkCommandBufferHostSyncGOOGLE are done:

1. The previous encoder issues vkCommandBufferHostSyncGOOGLE with the
previous sequence number.
2. The current encoder issues vkCommandBufferHostSyncGOOGLE with the
incremented sequence number along with a flag to wait on the host for
the vkCommandBufferHostSyncGOOGLE with the previous sequence number.
3. If the previous encoder is destroyed, we register a cleanup callback
that updates the pointer accordingly, so that we dont perform a stale
access.

By introducing this new command, we can then record command buffers
completely async and avoid having to add sequence numbers
to every command buffer recording API.

Note: We previously explored a solution where all command buffer
recording to be in one pipe, to implicitly accomplish ordering of
command buffer APIs. However, that turned out to be extremely difficult
to synchronize with other Vulkan APIs that were not concerned with
command buffer recording, as they could and sometimes need to run in
their separate threads.

Change-Id: I093be0a1600f22e0a355536d1671acdce3852541
diff --git a/system/vulkan_enc/VkEncoder.cpp b/system/vulkan_enc/VkEncoder.cpp
index 2299823..732bd21 100644
--- a/system/vulkan_enc/VkEncoder.cpp
+++ b/system/vulkan_enc/VkEncoder.cpp
@@ -43,6 +43,9 @@
 #include "goldfish_vk_private_defs.h"
 #include "goldfish_vk_transform_guest.h"
 
+#include <unordered_map>
+
+
 
 namespace goldfish_vk {
 
@@ -65,6 +68,14 @@
             m_logEncodes = atoi(encodeProp) > 0;
         }
     }
+
+    ~Impl() {
+        for (auto it : mCleanupCallbacks) {
+            fprintf(stderr, "%s: run cleanup callback for %p\n", __func__, it.first);
+            it.second();
+        }
+    }
+
     VulkanCountingStream* countingStream() { return &m_countingStream; }
     VulkanStreamGuest* stream() { return &m_stream; }
     Pool* pool() { return &m_pool; }
@@ -81,6 +92,15 @@
         m_stream.flush();
     }
 
+    // Assume the lock for the current encoder is held.
+    void registerCleanupCallback(void* handle, VkEncoder::CleanupCallback cb) {
+        mCleanupCallbacks.insert({handle, cb});
+    }
+
+    void unregisterCleanupCallback(void* handle) {
+        mCleanupCallbacks.erase(handle);
+    }
+
     Lock lock;
 
 private:
@@ -90,6 +110,8 @@
 
     Validation m_validation;
     bool m_logEncodes;
+
+    std::unordered_map<void*, VkEncoder::CleanupCallback> mCleanupCallbacks;
 };
 
 VkEncoder::VkEncoder(IOStream *stream) :
@@ -99,6 +121,14 @@
     mImpl->flush();
 }
 
+void VkEncoder::registerCleanupCallback(void* handle, VkEncoder::CleanupCallback cb) {
+    mImpl->registerCleanupCallback(handle, cb);
+}
+
+void VkEncoder::unregisterCleanupCallback(void* handle) {
+    mImpl->unregisterCleanupCallback(handle);
+}
+
 #define VALIDATE_RET(retType, success, validate) \
     retType goldfish_vk_validateResult = validate; \
     if (goldfish_vk_validateResult != success) return goldfish_vk_validateResult; \
@@ -23023,6 +23053,48 @@
     mImpl->log("finish vkResetCommandBufferAsyncGOOGLE");;
 }
 
+void VkEncoder::vkCommandBufferHostSyncGOOGLE(
+    VkCommandBuffer commandBuffer,
+    uint32_t needHostSync,
+    uint32_t sequenceNumber)
+{
+    AutoLock encoderLock(mImpl->lock);
+    AEMU_SCOPED_TRACE("vkCommandBufferHostSyncGOOGLE encode");
+    mImpl->log("start vkCommandBufferHostSyncGOOGLE");
+    auto stream = mImpl->stream();
+    auto countingStream = mImpl->countingStream();
+    auto resources = mImpl->resources();
+    auto pool = mImpl->pool();
+    stream->setHandleMapping(resources->unwrapMapping());
+    VkCommandBuffer local_commandBuffer;
+    uint32_t local_needHostSync;
+    uint32_t local_sequenceNumber;
+    local_commandBuffer = commandBuffer;
+    local_needHostSync = needHostSync;
+    local_sequenceNumber = sequenceNumber;
+    countingStream->rewind();
+    {
+        uint64_t cgen_var_1518;
+        countingStream->handleMapping()->mapHandles_VkCommandBuffer_u64(&local_commandBuffer, &cgen_var_1518, 1);
+        countingStream->write((uint64_t*)&cgen_var_1518, 1 * 8);
+        countingStream->write((uint32_t*)&local_needHostSync, sizeof(uint32_t));
+        countingStream->write((uint32_t*)&local_sequenceNumber, sizeof(uint32_t));
+    }
+    uint32_t packetSize_vkCommandBufferHostSyncGOOGLE = 4 + 4 + (uint32_t)countingStream->bytesWritten();
+    countingStream->rewind();
+    uint32_t opcode_vkCommandBufferHostSyncGOOGLE = OP_vkCommandBufferHostSyncGOOGLE;
+    stream->write(&opcode_vkCommandBufferHostSyncGOOGLE, sizeof(uint32_t));
+    stream->write(&packetSize_vkCommandBufferHostSyncGOOGLE, sizeof(uint32_t));
+    uint64_t cgen_var_1519;
+    stream->handleMapping()->mapHandles_VkCommandBuffer_u64(&local_commandBuffer, &cgen_var_1519, 1);
+    stream->write((uint64_t*)&cgen_var_1519, 1 * 8);
+    stream->write((uint32_t*)&local_needHostSync, sizeof(uint32_t));
+    stream->write((uint32_t*)&local_sequenceNumber, sizeof(uint32_t));
+    AEMU_SCOPED_TRACE("vkCommandBufferHostSyncGOOGLE readParams");
+    AEMU_SCOPED_TRACE("vkCommandBufferHostSyncGOOGLE returnUnmarshal");
+    mImpl->log("finish vkCommandBufferHostSyncGOOGLE");;
+}
+
 #endif
 
 } // namespace goldfish_vk