Some Vulkan memory fixes and cleanup

* Switch back to not setting transfer_dst on all buffers
* Add some missing unit tests
* Add tracking of heap usage for debugging purposes
* Fall back to non-device-local memory if device-local allocation fails

BUG=skia:5031
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2356343003

Committed: https://skia.googlesource.com/skia/+/c5850e9fdb62cc4ae5ed2b6af51aea92cac07455
Review-Url: https://codereview.chromium.org/2356343003
diff --git a/src/gpu/vk/GrVkMemory.cpp b/src/gpu/vk/GrVkMemory.cpp
index 98b2f89..f517b98 100644
--- a/src/gpu/vk/GrVkMemory.cpp
+++ b/src/gpu/vk/GrVkMemory.cpp
@@ -10,16 +10,24 @@
 #include "GrVkGpu.h"
 #include "GrVkUtil.h"
 
+#ifdef SK_DEBUG
+// for simple tracking of how much we're using in each heap
+// last counter is for non-subheap allocations
+VkDeviceSize gHeapUsage[VK_MAX_MEMORY_HEAPS+1] = { 0 };
+#endif
+
 static bool get_valid_memory_type_index(const VkPhysicalDeviceMemoryProperties& physDevMemProps,
                                         uint32_t typeBits,
                                         VkMemoryPropertyFlags requestedMemFlags,
-                                        uint32_t* typeIndex) {
+                                        uint32_t* typeIndex,
+                                        uint32_t* heapIndex) {
     for (uint32_t i = 0; i < physDevMemProps.memoryTypeCount; ++i) {
         if (typeBits & (1 << i)) {
             uint32_t supportedFlags = physDevMemProps.memoryTypes[i].propertyFlags &
                                       requestedMemFlags;
             if (supportedFlags == requestedMemFlags) {
                 *typeIndex = i;
+                *heapIndex = physDevMemProps.memoryTypes[i].heapIndex;
                 return true;
             }
         }
@@ -56,6 +64,7 @@
     GR_VK_CALL(iface, GetBufferMemoryRequirements(device, buffer, &memReqs));
 
     uint32_t typeIndex = 0;
+    uint32_t heapIndex = 0;
     const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties();
     if (dynamic) {
         // try to get cached and ideally non-coherent memory first
@@ -63,12 +72,14 @@
                                          memReqs.memoryTypeBits,
                                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
                                          VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
-                                         &typeIndex)) {
+                                         &typeIndex,
+                                         &heapIndex)) {
             // some sort of host-visible memory type should always be available for dynamic buffers
             SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
                                                          memReqs.memoryTypeBits,
                                                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
-                                                         &typeIndex));
+                                                         &typeIndex,
+                                                         &heapIndex));
         }
 
         VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags;
@@ -79,15 +90,22 @@
         SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
                                                      memReqs.memoryTypeBits,
                                                      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
-                                                     &typeIndex));
+                                                     &typeIndex,
+                                                     &heapIndex));
         alloc->fFlags = 0x0;
     }
 
     GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type));
 
-    if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) {
-        SkDebugf("Failed to alloc buffer\n");
-        return false;
+    if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
+        // if static, try to allocate from non-host-visible non-device-local memory instead
+        if (dynamic ||
+            !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits,
+                                         0, &typeIndex, &heapIndex) ||
+            !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
+            SkDebugf("Failed to alloc buffer\n");
+            return false;
+        }
     }
 
     // Bind buffer
@@ -130,6 +148,7 @@
     GR_VK_CALL(iface, GetImageMemoryRequirements(device, image, &memReqs));
 
     uint32_t typeIndex = 0;
+    uint32_t heapIndex = 0;
     GrVkHeap* heap;
     const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties();
     if (linearTiling) {
@@ -138,12 +157,14 @@
         if (!get_valid_memory_type_index(phDevMemProps,
                                          memReqs.memoryTypeBits,
                                          desiredMemProps,
-                                         &typeIndex)) {
+                                         &typeIndex,
+                                         &heapIndex)) {
             // some sort of host-visible memory type should always be available
             SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
                                                          memReqs.memoryTypeBits,
                                                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
-                                                         &typeIndex));
+                                                         &typeIndex,
+                                                         &heapIndex));
         }
         heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap);
         VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags;
@@ -154,7 +175,8 @@
         SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
                                                      memReqs.memoryTypeBits,
                                                      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
-                                                     &typeIndex));
+                                                     &typeIndex,
+                                                     &heapIndex));
         if (memReqs.size <= kMaxSmallImageSize) {
             heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap);
         } else {
@@ -163,9 +185,15 @@
         alloc->fFlags = 0x0;
     }
 
-    if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) {
-        SkDebugf("Failed to alloc image\n");
-        return false;
+    if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
+        // if optimal, try to allocate from non-host-visible non-device-local memory instead
+        if (linearTiling ||
+            !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits,
+                                         0, &typeIndex, &heapIndex) ||
+            !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
+            SkDebugf("Failed to alloc image\n");
+            return false;
+        }
     }
 
     // Bind image
@@ -431,10 +459,13 @@
 #endif
 }
 
-GrVkSubHeap::GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex,
+GrVkSubHeap::GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, uint32_t heapIndex,
                          VkDeviceSize size, VkDeviceSize alignment)
     : INHERITED(size, alignment)
     , fGpu(gpu)
+#ifdef SK_DEBUG
+    , fHeapIndex(heapIndex)
+#endif
     , fMemoryTypeIndex(memoryTypeIndex) {
 
     VkMemoryAllocateInfo allocInfo = {
@@ -450,12 +481,20 @@
                                                                  &fAlloc));
     if (VK_SUCCESS != err) {
         this->reset();
+    } 
+#ifdef SK_DEBUG
+    else {
+        gHeapUsage[heapIndex] += size;
     }
+#endif
 }
 
 GrVkSubHeap::~GrVkSubHeap() {
     const GrVkInterface* iface = fGpu->vkInterface();
     GR_VK_CALL(iface, FreeMemory(fGpu->device(), fAlloc, nullptr));
+#ifdef SK_DEBUG
+    gHeapUsage[fHeapIndex] -= fSize;
+#endif
 }
 
 bool GrVkSubHeap::alloc(VkDeviceSize size, GrVkAlloc* alloc) {
@@ -470,7 +509,7 @@
 }
 
 bool GrVkHeap::subAlloc(VkDeviceSize size, VkDeviceSize alignment,
-                        uint32_t memoryTypeIndex, GrVkAlloc* alloc) {
+                        uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) {
     VkDeviceSize alignedSize = align_size(size, alignment);
 
     // if requested is larger than our subheap allocation, just alloc directly
@@ -491,6 +530,9 @@
         }
         alloc->fOffset = 0;
         alloc->fSize = 0;    // hint that this is not a subheap allocation
+#ifdef SK_DEBUG
+        gHeapUsage[VK_MAX_MEMORY_HEAPS] += alignedSize;
+#endif
 
         return true;
     }
@@ -520,11 +562,11 @@
 
     // need to allocate a new subheap
     SkAutoTDelete<GrVkSubHeap>& subHeap = fSubHeaps.push_back();
-    subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, fSubHeapSize, alignment));
+    subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, fSubHeapSize, alignment));
     // try to recover from failed allocation by only allocating what we need
     if (subHeap->size() == 0) {
         VkDeviceSize alignedSize = align_size(size, alignment);
-        subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, alignedSize, alignment));
+        subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment));
         if (subHeap->size() == 0) {
             return false;
         }
@@ -539,7 +581,7 @@
 }
 
 bool GrVkHeap::singleAlloc(VkDeviceSize size, VkDeviceSize alignment,
-                           uint32_t memoryTypeIndex, GrVkAlloc* alloc) {
+                           uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) {
     VkDeviceSize alignedSize = align_size(size, alignment);
 
     // first try to find an unallocated subheap that fits our allocation request
@@ -568,7 +610,7 @@
 
     // need to allocate a new subheap
     SkAutoTDelete<GrVkSubHeap>& subHeap = fSubHeaps.push_back();
-    subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, alignedSize, alignment));
+    subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment));
     fAllocSize += alignedSize;
     if (subHeap->alloc(size, alloc)) {
         fUsedSize += alloc->fSize;