Some Vulkan memory fixes and cleanup

* Switch back to not setting transfer_dst on all buffers
* Add some missing unit tests
* Add tracking of heap usage for debugging purposes
* Fall back to non-device-local memory if device-local allocation fails

BUG=skia:5031
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2356343003

Committed: https://skia.googlesource.com/skia/+/c5850e9fdb62cc4ae5ed2b6af51aea92cac07455
Review-Url: https://codereview.chromium.org/2356343003
diff --git a/src/gpu/vk/GrVkBuffer.cpp b/src/gpu/vk/GrVkBuffer.cpp
index 5d2b565..82674b4 100644
--- a/src/gpu/vk/GrVkBuffer.cpp
+++ b/src/gpu/vk/GrVkBuffer.cpp
@@ -45,7 +45,9 @@
             bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
             break;
     }
-    bufInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+    if (!desc.fDynamic) {
+        bufInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+    }
 
     bufInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
     bufInfo.queueFamilyIndexCount = 0;
diff --git a/src/gpu/vk/GrVkMemory.cpp b/src/gpu/vk/GrVkMemory.cpp
index 98b2f89..f517b98 100644
--- a/src/gpu/vk/GrVkMemory.cpp
+++ b/src/gpu/vk/GrVkMemory.cpp
@@ -10,16 +10,24 @@
 #include "GrVkGpu.h"
 #include "GrVkUtil.h"
 
+#ifdef SK_DEBUG
+// for simple tracking of how much we're using in each heap
+// last counter is for non-subheap allocations
+VkDeviceSize gHeapUsage[VK_MAX_MEMORY_HEAPS+1] = { 0 };
+#endif
+
 static bool get_valid_memory_type_index(const VkPhysicalDeviceMemoryProperties& physDevMemProps,
                                         uint32_t typeBits,
                                         VkMemoryPropertyFlags requestedMemFlags,
-                                        uint32_t* typeIndex) {
+                                        uint32_t* typeIndex,
+                                        uint32_t* heapIndex) {
     for (uint32_t i = 0; i < physDevMemProps.memoryTypeCount; ++i) {
         if (typeBits & (1 << i)) {
             uint32_t supportedFlags = physDevMemProps.memoryTypes[i].propertyFlags &
                                       requestedMemFlags;
             if (supportedFlags == requestedMemFlags) {
                 *typeIndex = i;
+                *heapIndex = physDevMemProps.memoryTypes[i].heapIndex;
                 return true;
             }
         }
@@ -56,6 +64,7 @@
     GR_VK_CALL(iface, GetBufferMemoryRequirements(device, buffer, &memReqs));
 
     uint32_t typeIndex = 0;
+    uint32_t heapIndex = 0;
     const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties();
     if (dynamic) {
         // try to get cached and ideally non-coherent memory first
@@ -63,12 +72,14 @@
                                          memReqs.memoryTypeBits,
                                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
                                          VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
-                                         &typeIndex)) {
+                                         &typeIndex,
+                                         &heapIndex)) {
             // some sort of host-visible memory type should always be available for dynamic buffers
             SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
                                                          memReqs.memoryTypeBits,
                                                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
-                                                         &typeIndex));
+                                                         &typeIndex,
+                                                         &heapIndex));
         }
 
         VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags;
@@ -79,15 +90,22 @@
         SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
                                                      memReqs.memoryTypeBits,
                                                      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
-                                                     &typeIndex));
+                                                     &typeIndex,
+                                                     &heapIndex));
         alloc->fFlags = 0x0;
     }
 
     GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type));
 
-    if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) {
-        SkDebugf("Failed to alloc buffer\n");
-        return false;
+    if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
+        // if static, try to allocate from non-host-visible non-device-local memory instead
+        if (dynamic ||
+            !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits,
+                                         0, &typeIndex, &heapIndex) ||
+            !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
+            SkDebugf("Failed to alloc buffer\n");
+            return false;
+        }
     }
 
     // Bind buffer
@@ -130,6 +148,7 @@
     GR_VK_CALL(iface, GetImageMemoryRequirements(device, image, &memReqs));
 
     uint32_t typeIndex = 0;
+    uint32_t heapIndex = 0;
     GrVkHeap* heap;
     const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties();
     if (linearTiling) {
@@ -138,12 +157,14 @@
         if (!get_valid_memory_type_index(phDevMemProps,
                                          memReqs.memoryTypeBits,
                                          desiredMemProps,
-                                         &typeIndex)) {
+                                         &typeIndex,
+                                         &heapIndex)) {
             // some sort of host-visible memory type should always be available
             SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
                                                          memReqs.memoryTypeBits,
                                                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
-                                                         &typeIndex));
+                                                         &typeIndex,
+                                                         &heapIndex));
         }
         heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap);
         VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags;
@@ -154,7 +175,8 @@
         SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
                                                      memReqs.memoryTypeBits,
                                                      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
-                                                     &typeIndex));
+                                                     &typeIndex,
+                                                     &heapIndex));
         if (memReqs.size <= kMaxSmallImageSize) {
             heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap);
         } else {
@@ -163,9 +185,15 @@
         alloc->fFlags = 0x0;
     }
 
-    if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) {
-        SkDebugf("Failed to alloc image\n");
-        return false;
+    if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
+        // if optimal, try to allocate from non-host-visible non-device-local memory instead
+        if (linearTiling ||
+            !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits,
+                                         0, &typeIndex, &heapIndex) ||
+            !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
+            SkDebugf("Failed to alloc image\n");
+            return false;
+        }
     }
 
     // Bind image
@@ -431,10 +459,13 @@
 #endif
 }
 
-GrVkSubHeap::GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex,
+GrVkSubHeap::GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, uint32_t heapIndex,
                          VkDeviceSize size, VkDeviceSize alignment)
     : INHERITED(size, alignment)
     , fGpu(gpu)
+#ifdef SK_DEBUG
+    , fHeapIndex(heapIndex)
+#endif
     , fMemoryTypeIndex(memoryTypeIndex) {
 
     VkMemoryAllocateInfo allocInfo = {
@@ -450,12 +481,20 @@
                                                                  &fAlloc));
     if (VK_SUCCESS != err) {
         this->reset();
+    } 
+#ifdef SK_DEBUG
+    else {
+        gHeapUsage[heapIndex] += size;
     }
+#endif
 }
 
 GrVkSubHeap::~GrVkSubHeap() {
     const GrVkInterface* iface = fGpu->vkInterface();
     GR_VK_CALL(iface, FreeMemory(fGpu->device(), fAlloc, nullptr));
+#ifdef SK_DEBUG
+    gHeapUsage[fHeapIndex] -= fSize;
+#endif
 }
 
 bool GrVkSubHeap::alloc(VkDeviceSize size, GrVkAlloc* alloc) {
@@ -470,7 +509,7 @@
 }
 
 bool GrVkHeap::subAlloc(VkDeviceSize size, VkDeviceSize alignment,
-                        uint32_t memoryTypeIndex, GrVkAlloc* alloc) {
+                        uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) {
     VkDeviceSize alignedSize = align_size(size, alignment);
 
     // if requested is larger than our subheap allocation, just alloc directly
@@ -491,6 +530,9 @@
         }
         alloc->fOffset = 0;
         alloc->fSize = 0;    // hint that this is not a subheap allocation
+#ifdef SK_DEBUG
+        gHeapUsage[VK_MAX_MEMORY_HEAPS] += alignedSize;
+#endif
 
         return true;
     }
@@ -520,11 +562,11 @@
 
     // need to allocate a new subheap
     SkAutoTDelete<GrVkSubHeap>& subHeap = fSubHeaps.push_back();
-    subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, fSubHeapSize, alignment));
+    subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, fSubHeapSize, alignment));
     // try to recover from failed allocation by only allocating what we need
     if (subHeap->size() == 0) {
         VkDeviceSize alignedSize = align_size(size, alignment);
-        subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, alignedSize, alignment));
+        subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment));
         if (subHeap->size() == 0) {
             return false;
         }
@@ -539,7 +581,7 @@
 }
 
 bool GrVkHeap::singleAlloc(VkDeviceSize size, VkDeviceSize alignment,
-                           uint32_t memoryTypeIndex, GrVkAlloc* alloc) {
+                           uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) {
     VkDeviceSize alignedSize = align_size(size, alignment);
 
     // first try to find an unallocated subheap that fits our allocation request
@@ -568,7 +610,7 @@
 
     // need to allocate a new subheap
     SkAutoTDelete<GrVkSubHeap>& subHeap = fSubHeaps.push_back();
-    subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, alignedSize, alignment));
+    subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment));
     fAllocSize += alignedSize;
     if (subHeap->alloc(size, alloc)) {
         fUsedSize += alloc->fSize;
diff --git a/src/gpu/vk/GrVkMemory.h b/src/gpu/vk/GrVkMemory.h
index f8d5fdf..a1d4392 100644
--- a/src/gpu/vk/GrVkMemory.h
+++ b/src/gpu/vk/GrVkMemory.h
@@ -93,7 +93,7 @@
 
 class GrVkSubHeap : public GrVkFreeListAlloc {
 public:
-    GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex,
+    GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, uint32_t heapIndex,
                 VkDeviceSize size, VkDeviceSize alignment);
     ~GrVkSubHeap();
 
@@ -105,6 +105,9 @@
 
 private:
     const GrVkGpu* fGpu;
+#ifdef SK_DEBUG
+    uint32_t       fHeapIndex;
+#endif    
     uint32_t       fMemoryTypeIndex;
     VkDeviceMemory fAlloc;
 
@@ -135,21 +138,24 @@
     VkDeviceSize allocSize() const { return fAllocSize; }
     VkDeviceSize usedSize() const { return fUsedSize; }
 
-    bool alloc(VkDeviceSize size, VkDeviceSize alignment, uint32_t memoryTypeIndex, 
-               GrVkAlloc* alloc) {
+    bool alloc(VkDeviceSize size, VkDeviceSize alignment, uint32_t memoryTypeIndex,
+               uint32_t heapIndex, GrVkAlloc* alloc) {
         SkASSERT(size > 0);
-        return (*this.*fAllocFunc)(size, alignment, memoryTypeIndex, alloc);
+        return (*this.*fAllocFunc)(size, alignment, memoryTypeIndex, heapIndex, alloc);
     }
     bool free(const GrVkAlloc& alloc);
 
 private:
-    typedef bool (GrVkHeap::*AllocFunc)(VkDeviceSize size, VkDeviceSize alignment, 
-                                        uint32_t memoryTypeIndex, GrVkAlloc* alloc);
+    typedef bool (GrVkHeap::*AllocFunc)(VkDeviceSize size, VkDeviceSize alignment,
+                                        uint32_t memoryTypeIndex, uint32_t heapIndex,
+                                        GrVkAlloc* alloc);
 
-    bool subAlloc(VkDeviceSize size, VkDeviceSize alignment, 
-                  uint32_t memoryTypeIndex, GrVkAlloc* alloc);
+    bool subAlloc(VkDeviceSize size, VkDeviceSize alignment,
+                  uint32_t memoryTypeIndex, uint32_t heapIndex,
+                  GrVkAlloc* alloc);
     bool singleAlloc(VkDeviceSize size, VkDeviceSize alignment,
-                     uint32_t memoryTypeIndex, GrVkAlloc* alloc);
+                     uint32_t memoryTypeIndex, uint32_t heapIndex,
+                     GrVkAlloc* alloc);
 
     const GrVkGpu*         fGpu;
     VkDeviceSize           fSubHeapSize;
diff --git a/tests/VkHeapTests.cpp b/tests/VkHeapTests.cpp
index c4a9beb..4561c90 100755
--- a/tests/VkHeapTests.cpp
+++ b/tests/VkHeapTests.cpp
@@ -21,8 +21,8 @@
 void subheap_test(skiatest::Reporter* reporter, GrContext* context) {
     GrVkGpu* gpu = static_cast<GrVkGpu*>(context->getGpu());
 
-    // heap index doesn't matter, we're just testing the suballocation algorithm so we'll use 0
-    GrVkSubHeap heap(gpu, 0, 64 * 1024, 32);
+    // memtype doesn't matter, we're just testing the suballocation algorithm so we'll use 0
+    GrVkSubHeap heap(gpu, 0, 0, 64 * 1024, 32);
     GrVkAlloc alloc0, alloc1, alloc2, alloc3;
     // test full allocation and free
     REPORTER_ASSERT(reporter, heap.alloc(64 * 1024, &alloc0));
@@ -118,34 +118,35 @@
 void suballoc_test(skiatest::Reporter* reporter, GrContext* context) {
     GrVkGpu* gpu = static_cast<GrVkGpu*>(context->getGpu());
 
-    // heap index doesn't matter, we're just testing the allocation algorithm so we'll use 0
+    // memtype/heap index don't matter, we're just testing the allocation algorithm so we'll use 0
     GrVkHeap heap(gpu, GrVkHeap::kSubAlloc_Strategy, 64 * 1024);
     GrVkAlloc alloc0, alloc1, alloc2, alloc3;
     const VkDeviceSize kAlignment = 16;
+    const uint32_t kMemType = 0;
     const uint32_t kHeapIndex = 0;
 
     REPORTER_ASSERT(reporter, heap.allocSize() == 0 && heap.usedSize() == 0);
 
     // fragment allocations so we need to grow heap
-    REPORTER_ASSERT(reporter, heap.alloc(19 * 1024 - 3, kAlignment, kHeapIndex, &alloc0));
-    REPORTER_ASSERT(reporter, heap.alloc(5 * 1024 - 9, kAlignment, kHeapIndex, &alloc1));
-    REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 15, kAlignment, kHeapIndex, &alloc2));
-    REPORTER_ASSERT(reporter, heap.alloc(3 * 1024 - 6, kAlignment, kHeapIndex, &alloc3));
+    REPORTER_ASSERT(reporter, heap.alloc(19 * 1024 - 3, kAlignment, kMemType, kHeapIndex, &alloc0));
+    REPORTER_ASSERT(reporter, heap.alloc(5 * 1024 - 9, kAlignment, kMemType, kHeapIndex, &alloc1));
+    REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 15, kAlignment, kMemType, kHeapIndex, &alloc2));
+    REPORTER_ASSERT(reporter, heap.alloc(3 * 1024 - 6, kAlignment, kMemType, kHeapIndex, &alloc3));
     REPORTER_ASSERT(reporter, heap.allocSize() == 64 * 1024 && heap.usedSize() == 42 * 1024);
     heap.free(alloc0);
     REPORTER_ASSERT(reporter, heap.allocSize() == 64 * 1024 && heap.usedSize() == 23 * 1024);
     heap.free(alloc2);
     REPORTER_ASSERT(reporter, heap.allocSize() == 64 * 1024 && heap.usedSize() == 8 * 1024);
     // we expect the heap to grow here
-    REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kHeapIndex, &alloc0));
+    REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0));
     REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 48 * 1024);
     heap.free(alloc3);
     REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 45 * 1024);
     // heap should not grow here (first subheap has exactly enough room)
-    REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kHeapIndex, &alloc3));
+    REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kMemType, kHeapIndex, &alloc3));
     REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 85 * 1024);
     // heap should not grow here (second subheap has room)
-    REPORTER_ASSERT(reporter, heap.alloc(22 * 1024, kAlignment, kHeapIndex, &alloc2));
+    REPORTER_ASSERT(reporter, heap.alloc(22 * 1024, kAlignment, kMemType, kHeapIndex, &alloc2));
     REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 107 * 1024);
     heap.free(alloc1);
     REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 102 * 1024);
@@ -156,45 +157,58 @@
     heap.free(alloc3);
     REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 0 * 1024);
     // heap should not grow here (allocating more than subheap size)
-    REPORTER_ASSERT(reporter, heap.alloc(128 * 1024, kAlignment, kHeapIndex, &alloc0));
+    REPORTER_ASSERT(reporter, heap.alloc(128 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0));
     REPORTER_ASSERT(reporter, 0 == alloc0.fSize);
     REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 0 * 1024);
     heap.free(alloc0);
+    REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0));
+    REPORTER_ASSERT(reporter, heap.allocSize() == 128 * 1024 && heap.usedSize() == 24 * 1024);
+    // heap should alloc a new subheap because the memory type is different
+    REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType+1, kHeapIndex, &alloc1));
+    REPORTER_ASSERT(reporter, heap.allocSize() == 192 * 1024 && heap.usedSize() == 48 * 1024);
+    // heap should alloc a new subheap because the alignment is different
+    REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, 128, kMemType, kHeapIndex, &alloc2));
+    REPORTER_ASSERT(reporter, heap.allocSize() == 256 * 1024 && heap.usedSize() == 72 * 1024);
+    heap.free(alloc2);
+    heap.free(alloc0);
+    heap.free(alloc1);
+    REPORTER_ASSERT(reporter, heap.allocSize() == 256 * 1024 && heap.usedSize() == 0 * 1024);
 }
 
 void singlealloc_test(skiatest::Reporter* reporter, GrContext* context) {
     GrVkGpu* gpu = static_cast<GrVkGpu*>(context->getGpu());
 
-    // heap index doesn't matter, we're just testing the allocation algorithm so we'll use 0
+    // memtype/heap index don't matter, we're just testing the allocation algorithm so we'll use 0
     GrVkHeap heap(gpu, GrVkHeap::kSingleAlloc_Strategy, 64 * 1024);
     GrVkAlloc alloc0, alloc1, alloc2, alloc3;
     const VkDeviceSize kAlignment = 64;
+    const uint32_t kMemType = 0;
     const uint32_t kHeapIndex = 0;
 
     REPORTER_ASSERT(reporter, heap.allocSize() == 0 && heap.usedSize() == 0);
 
     // make a few allocations
-    REPORTER_ASSERT(reporter, heap.alloc(49 * 1024 - 3, kAlignment, kHeapIndex, &alloc0));
-    REPORTER_ASSERT(reporter, heap.alloc(5 * 1024 - 37, kAlignment, kHeapIndex, &alloc1));
-    REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 11, kAlignment, kHeapIndex, &alloc2));
-    REPORTER_ASSERT(reporter, heap.alloc(3 * 1024 - 29, kAlignment, kHeapIndex, &alloc3));
+    REPORTER_ASSERT(reporter, heap.alloc(49 * 1024 - 3, kAlignment, kMemType, kHeapIndex, &alloc0));
+    REPORTER_ASSERT(reporter, heap.alloc(5 * 1024 - 37, kAlignment, kMemType, kHeapIndex, &alloc1));
+    REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 11, kAlignment, kMemType, kHeapIndex, &alloc2));
+    REPORTER_ASSERT(reporter, heap.alloc(3 * 1024 - 29, kAlignment, kMemType, kHeapIndex, &alloc3));
     REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 72 * 1024);
     heap.free(alloc0);
     REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 23 * 1024);
     heap.free(alloc2);
     REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 8 * 1024);
     // heap should not grow here (first subheap has room)
-    REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kHeapIndex, &alloc0));
+    REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0));
     REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 48 * 1024);
     heap.free(alloc3);
     REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 45 * 1024);
     // check for exact fit -- heap should not grow here (third subheap has room)
-    REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 63, kAlignment, kHeapIndex, &alloc2));
+    REPORTER_ASSERT(reporter, heap.alloc(15 * 1024 - 63, kAlignment, kMemType, kHeapIndex, &alloc2));
     REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 60 * 1024);
     heap.free(alloc2);
     REPORTER_ASSERT(reporter, heap.allocSize() == 72 * 1024 && heap.usedSize() == 45 * 1024);
     // heap should grow here (no subheap has room)
-    REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kHeapIndex, &alloc3));
+    REPORTER_ASSERT(reporter, heap.alloc(40 * 1024, kAlignment, kMemType, kHeapIndex, &alloc3));
     REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 85 * 1024);
     heap.free(alloc1);
     REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 80 * 1024);
@@ -202,6 +216,18 @@
     REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 40 * 1024);
     heap.free(alloc3);
     REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 0 * 1024);
+    REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType, kHeapIndex, &alloc0));
+    REPORTER_ASSERT(reporter, heap.allocSize() == 112 * 1024 && heap.usedSize() == 24 * 1024);
+    // heap should alloc a new subheap because the memory type is different
+    REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, kAlignment, kMemType + 1, kHeapIndex, &alloc1));
+    REPORTER_ASSERT(reporter, heap.allocSize() == 136 * 1024 && heap.usedSize() == 48 * 1024);
+    // heap should alloc a new subheap because the alignment is different
+    REPORTER_ASSERT(reporter, heap.alloc(24 * 1024, 128, kMemType, kHeapIndex, &alloc2));
+    REPORTER_ASSERT(reporter, heap.allocSize() == 160 * 1024 && heap.usedSize() == 72 * 1024);
+    heap.free(alloc1);
+    heap.free(alloc2);
+    heap.free(alloc0);
+    REPORTER_ASSERT(reporter, heap.allocSize() == 160 * 1024 && heap.usedSize() == 0 * 1024);
 }
 
 DEF_GPUTEST_FOR_VULKAN_CONTEXT(VkHeapTests, reporter, ctxInfo) {