Create free list heap for suballocation

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2029763002

Review-Url: https://codereview.chromium.org/2029763002
diff --git a/src/gpu/vk/GrVkBuffer.cpp b/src/gpu/vk/GrVkBuffer.cpp
index 12925db..1c7c4d0 100644
--- a/src/gpu/vk/GrVkBuffer.cpp
+++ b/src/gpu/vk/GrVkBuffer.cpp
@@ -56,37 +56,23 @@
         return nullptr;
     }
 
-    VkMemoryPropertyFlags requiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-                                             VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
-                                             VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
-
     if (!GrVkMemory::AllocAndBindBufferMemory(gpu,
                                               buffer,
-                                              requiredMemProps,
+                                              desc.fType,
                                               &alloc)) {
-        // Try again without requiring host cached memory
-        requiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
-        if (!GrVkMemory::AllocAndBindBufferMemory(gpu,
-                                                  buffer,
-                                                  requiredMemProps,
-                                                  &alloc)) {
-            VK_CALL(gpu, DestroyBuffer(gpu->device(), buffer, nullptr));
-            return nullptr;
-        }
+        return nullptr;
     }
 
-    const GrVkBuffer::Resource* resource = new GrVkBuffer::Resource(buffer, alloc);
+    const GrVkBuffer::Resource* resource = new GrVkBuffer::Resource(buffer, alloc, desc.fType);
     if (!resource) {
         VK_CALL(gpu, DestroyBuffer(gpu->device(), buffer, nullptr));
-        GrVkMemory::FreeBufferMemory(gpu, alloc);
+        GrVkMemory::FreeBufferMemory(gpu, desc.fType, alloc);
         return nullptr;
     }
 
     return resource;
 }
 
-
 void GrVkBuffer::addMemoryBarrier(const GrVkGpu* gpu,
                                   VkAccessFlags srcAccessMask,
                                   VkAccessFlags dstAccesMask,
@@ -113,7 +99,7 @@
     SkASSERT(fBuffer);
     SkASSERT(fAlloc.fMemory);
     VK_CALL(gpu, DestroyBuffer(gpu->device(), fBuffer, nullptr));
-    GrVkMemory::FreeBufferMemory(gpu, fAlloc);
+    GrVkMemory::FreeBufferMemory(gpu, fType, fAlloc);
 }
 
 void GrVkBuffer::vkRelease(const GrVkGpu* gpu) {
diff --git a/src/gpu/vk/GrVkBuffer.h b/src/gpu/vk/GrVkBuffer.h
index f7d43c7..0bfbeca 100644
--- a/src/gpu/vk/GrVkBuffer.h
+++ b/src/gpu/vk/GrVkBuffer.h
@@ -54,11 +54,12 @@
 
     class Resource : public GrVkResource {
     public:
-        Resource(VkBuffer buf, const GrVkAlloc& alloc) 
-            : INHERITED(), fBuffer(buf), fAlloc(alloc) {}
+        Resource(VkBuffer buf, const GrVkAlloc& alloc, Type type)
+            : INHERITED(), fBuffer(buf), fAlloc(alloc), fType(type) {}
 
-        VkBuffer  fBuffer;
-        GrVkAlloc fAlloc;
+        VkBuffer           fBuffer;
+        GrVkAlloc          fAlloc;
+        Type               fType;
 
     private:
         void freeGPUData(const GrVkGpu* gpu) const;
diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp
index 6f95365..b808354 100644
--- a/src/gpu/vk/GrVkGpu.cpp
+++ b/src/gpu/vk/GrVkGpu.cpp
@@ -132,6 +132,16 @@
     fCurrentCmdBuffer = fResourceProvider.createPrimaryCommandBuffer();
     SkASSERT(fCurrentCmdBuffer);
     fCurrentCmdBuffer->begin(this);
+
+    // set up our heaps
+    fHeaps[kLinearImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 16*1024*1024));
+    fHeaps[kOptimalImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 64*1024*1024));
+    fHeaps[kSmallOptimalImage_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 2*1024*1024));
+    fHeaps[kVertexBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0));
+    fHeaps[kIndexBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0));
+    fHeaps[kUniformBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 64*1024));
+    fHeaps[kCopyReadBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSingleAlloc_Strategy, 0));
+    fHeaps[kCopyWriteBuffer_Heap].reset(new GrVkHeap(this, GrVkHeap::kSubAlloc_Strategy, 16*1024*1024));
 }
 
 GrVkGpu::~GrVkGpu() {
@@ -880,11 +890,8 @@
     usageFlags |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
     usageFlags |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
 
-    VkFlags memProps = (srcData && linearTiling) ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
-                                                   VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
-
     VkImage image = VK_NULL_HANDLE;
-    GrVkAlloc alloc = { VK_NULL_HANDLE, 0 };
+    GrVkAlloc alloc = { VK_NULL_HANDLE, 0, 0 };
 
     VkImageTiling imageTiling = linearTiling ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
     VkImageLayout initialLayout = (VK_IMAGE_TILING_LINEAR == imageTiling)
@@ -917,7 +924,7 @@
 
     GR_VK_CALL_ERRCHECK(this->vkInterface(), CreateImage(this->device(), &imageCreateInfo, nullptr, &image));
 
-    if (!GrVkMemory::AllocAndBindImageMemory(this, image, memProps, &alloc)) {
+    if (!GrVkMemory::AllocAndBindImageMemory(this, image, linearTiling, &alloc)) {
         VK_CALL(DestroyImage(this->device(), image, nullptr));
         return 0;
     }
@@ -938,7 +945,7 @@
             err = VK_CALL(MapMemory(fDevice, alloc.fMemory, alloc.fOffset, layout.rowPitch * h,
                                     0, &mapPtr));
             if (err) {
-                GrVkMemory::FreeImageMemory(this, alloc);
+                GrVkMemory::FreeImageMemory(this, linearTiling, alloc);
                 VK_CALL(DestroyImage(this->device(), image, nullptr));
                 return 0;
             }
@@ -989,15 +996,12 @@
 }
 
 void GrVkGpu::deleteTestingOnlyBackendTexture(GrBackendObject id, bool abandon) {
-    const GrVkImageInfo* backend = reinterpret_cast<const GrVkImageInfo*>(id);
-
+    GrVkImageInfo* backend = reinterpret_cast<GrVkImageInfo*>(id);
     if (backend) {
         if (!abandon) {
             // something in the command buffer may still be using this, so force submit
             this->submitCommandBuffer(kForce_SyncQueue);
-
-            GrVkMemory::FreeImageMemory(this, backend->fAlloc);
-            VK_CALL(DestroyImage(this->device(), backend->fImage, nullptr));
+            GrVkImage::DestroyImageInfo(this, backend);
         }
         delete backend;
     }
diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h
index cd72c69..b076de2 100644
--- a/src/gpu/vk/GrVkGpu.h
+++ b/src/gpu/vk/GrVkGpu.h
@@ -13,6 +13,7 @@
 #include "vk/GrVkBackendContext.h"
 #include "GrVkCaps.h"
 #include "GrVkIndexBuffer.h"
+#include "GrVkMemory.h"
 #include "GrVkResourceProvider.h"
 #include "GrVkVertexBuffer.h"
 #include "GrVkUtil.h"
@@ -122,6 +123,27 @@
 
     void generateMipmap(GrVkTexture* tex) const;
 
+    // Heaps
+    enum Heap {
+        kLinearImage_Heap = 0,
+        // We separate out small (i.e., <= 16K) images to reduce fragmentation
+        // in the main heap.
+        kOptimalImage_Heap,
+        kSmallOptimalImage_Heap,
+        // We have separate vertex and image heaps, because it's possible that 
+        // a given Vulkan driver may allocate them separately.
+        kVertexBuffer_Heap,
+        kIndexBuffer_Heap,
+        kUniformBuffer_Heap,
+        kCopyReadBuffer_Heap,
+        kCopyWriteBuffer_Heap,
+
+        kLastHeap = kCopyWriteBuffer_Heap
+    };
+    static const int kHeapCount = kLastHeap + 1;
+
+    GrVkHeap* getHeap(Heap heap) const { return fHeaps[heap]; }
+
 private:
     GrVkGpu(GrContext* context, const GrContextOptions& options,
             const GrVkBackendContext* backendContext);
@@ -226,6 +248,8 @@
     GrVkPrimaryCommandBuffer*              fCurrentCmdBuffer;
     VkPhysicalDeviceMemoryProperties       fPhysDevMemProps;
 
+    SkAutoTDelete<GrVkHeap>                fHeaps[kHeapCount];
+
 #ifdef ENABLE_VK_LAYERS
     // For reporting validation layer errors
     VkDebugReportCallbackEXT               fCallback;
diff --git a/src/gpu/vk/GrVkImage.cpp b/src/gpu/vk/GrVkImage.cpp
index bbef1b4..70dd448 100644
--- a/src/gpu/vk/GrVkImage.cpp
+++ b/src/gpu/vk/GrVkImage.cpp
@@ -64,9 +64,9 @@
     VkImage image = 0;
     GrVkAlloc alloc;
 
-    VkImageLayout initialLayout = (VK_IMAGE_TILING_LINEAR == imageDesc.fImageTiling)
-        ? VK_IMAGE_LAYOUT_PREINITIALIZED
-        : VK_IMAGE_LAYOUT_UNDEFINED;
+    bool isLinear = VK_IMAGE_TILING_LINEAR == imageDesc.fImageTiling;
+    VkImageLayout initialLayout = isLinear ? VK_IMAGE_LAYOUT_PREINITIALIZED
+                                           : VK_IMAGE_LAYOUT_UNDEFINED;
 
     // Create Image
     VkSampleCountFlagBits vkSamples;
@@ -102,7 +102,7 @@
     GR_VK_CALL_ERRCHECK(gpu->vkInterface(), CreateImage(gpu->device(), &imageCreateInfo, nullptr,
                                                         &image));
 
-    if (!GrVkMemory::AllocAndBindImageMemory(gpu, image, imageDesc.fMemProps, &alloc)) {
+    if (!GrVkMemory::AllocAndBindImageMemory(gpu, image, isLinear, &alloc)) {
         VK_CALL(gpu, DestroyImage(gpu->device(), image, nullptr));
         return false;
     }
@@ -118,11 +118,12 @@
 
 void GrVkImage::DestroyImageInfo(const GrVkGpu* gpu, GrVkImageInfo* info) {
     VK_CALL(gpu, DestroyImage(gpu->device(), info->fImage, nullptr));
-    GrVkMemory::FreeImageMemory(gpu, info->fAlloc);
+    bool isLinear = VK_IMAGE_TILING_LINEAR == info->fImageTiling;
+    GrVkMemory::FreeImageMemory(gpu, isLinear, info->fAlloc);
 }
 
-void GrVkImage::setNewResource(VkImage image, const GrVkAlloc& alloc) {
-    fResource = new Resource(image, alloc);
+void GrVkImage::setNewResource(VkImage image, const GrVkAlloc& alloc, VkImageTiling tiling) {
+    fResource = new Resource(image, alloc, tiling);
 }
 
 GrVkImage::~GrVkImage() {
@@ -146,7 +147,8 @@
 
 void GrVkImage::Resource::freeGPUData(const GrVkGpu* gpu) const {
     VK_CALL(gpu, DestroyImage(gpu->device(), fImage, nullptr));
-    GrVkMemory::FreeImageMemory(gpu, fAlloc);
+    bool isLinear = (VK_IMAGE_TILING_LINEAR == fImageTiling);
+    GrVkMemory::FreeImageMemory(gpu, isLinear, fAlloc);
 }
 
 void GrVkImage::BorrowedResource::freeGPUData(const GrVkGpu* gpu) const {
diff --git a/src/gpu/vk/GrVkImage.h b/src/gpu/vk/GrVkImage.h
index 85ee620..fe18069 100644
--- a/src/gpu/vk/GrVkImage.h
+++ b/src/gpu/vk/GrVkImage.h
@@ -33,9 +33,9 @@
         : fInfo(info)
         , fIsBorrowed(kBorrowed_Wrapped == wrapped) {
         if (kBorrowed_Wrapped == wrapped) {
-            fResource = new BorrowedResource(info.fImage, info.fAlloc);
+            fResource = new BorrowedResource(info.fImage, info.fAlloc, info.fImageTiling);
         } else {
-            fResource = new Resource(info.fImage, info.fAlloc);
+            fResource = new Resource(info.fImage, info.fAlloc, info.fImageTiling);
         }
     }
     virtual ~GrVkImage();
@@ -87,13 +87,12 @@
     void releaseImage(const GrVkGpu* gpu);
     void abandonImage();
 
-    void setNewResource(VkImage image, const GrVkAlloc& alloc);
+    void setNewResource(VkImage image, const GrVkAlloc& alloc, VkImageTiling tiling);
 
     GrVkImageInfo   fInfo;
     bool            fIsBorrowed;
 
 private:
-    // unlike GrVkBuffer, this needs to be public so GrVkStencilAttachment can use it
     class Resource : public GrVkResource {
     public:
         Resource()
@@ -103,8 +102,8 @@
             fAlloc.fOffset = 0;
         }
 
-    Resource(VkImage image, const GrVkAlloc& alloc) 
-            : fImage(image), fAlloc(alloc) {}
+        Resource(VkImage image, const GrVkAlloc& alloc, VkImageTiling tiling)
+            : fImage(image), fAlloc(alloc), fImageTiling(tiling) {}
 
         ~Resource() override {}
 
@@ -113,6 +112,7 @@
 
         VkImage        fImage;
         GrVkAlloc      fAlloc;
+        VkImageTiling  fImageTiling;
 
         typedef GrVkResource INHERITED;
     };
@@ -120,8 +120,8 @@
     // for wrapped textures
     class BorrowedResource : public Resource {
     public:
-        BorrowedResource(VkImage image, const GrVkAlloc& alloc) 
-            : Resource(image, alloc) {
+        BorrowedResource(VkImage image, const GrVkAlloc& alloc, VkImageTiling tiling)
+            : Resource(image, alloc, tiling) {
         }
     private:
         void freeGPUData(const GrVkGpu* gpu) const override;
diff --git a/src/gpu/vk/GrVkMemory.cpp b/src/gpu/vk/GrVkMemory.cpp
index e0ab3a6..fa0bcb5 100644
--- a/src/gpu/vk/GrVkMemory.cpp
+++ b/src/gpu/vk/GrVkMemory.cpp
@@ -29,38 +29,26 @@
     return false;
 }
 
-static bool alloc_device_memory(const GrVkGpu* gpu,
-                                VkMemoryRequirements* memReqs,
-                                const VkMemoryPropertyFlags flags,
-                                VkDeviceMemory* memory) {
-    uint32_t typeIndex;
-    if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(),
-                                     memReqs->memoryTypeBits,
-                                     flags,
-                                     &typeIndex)) {
-        return false;
-    }
-
-    VkMemoryAllocateInfo allocInfo = {
-        VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,      // sType
-        NULL,                                        // pNext
-        memReqs->size,                               // allocationSize
-        typeIndex,                                   // memoryTypeIndex
+static GrVkGpu::Heap buffer_type_to_heap(GrVkBuffer::Type type) {
+    const GrVkGpu::Heap kBufferToHeap[]{
+        GrVkGpu::kVertexBuffer_Heap,
+        GrVkGpu::kIndexBuffer_Heap,
+        GrVkGpu::kUniformBuffer_Heap,
+        GrVkGpu::kCopyReadBuffer_Heap,
+        GrVkGpu::kCopyWriteBuffer_Heap,
     };
+    GR_STATIC_ASSERT(0 == GrVkBuffer::kVertex_Type);
+    GR_STATIC_ASSERT(1 == GrVkBuffer::kIndex_Type);
+    GR_STATIC_ASSERT(2 == GrVkBuffer::kUniform_Type);
+    GR_STATIC_ASSERT(3 == GrVkBuffer::kCopyRead_Type);
+    GR_STATIC_ASSERT(4 == GrVkBuffer::kCopyWrite_Type);
 
-    VkResult err = GR_VK_CALL(gpu->vkInterface(), AllocateMemory(gpu->device(),
-                                                                 &allocInfo,
-                                                                 nullptr,
-                                                                 memory));
-    if (err) {
-        return false;
-    }
-    return true;
+    return kBufferToHeap[type];
 }
 
 bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu,
                                           VkBuffer buffer,
-                                          const VkMemoryPropertyFlags flags,
+                                          GrVkBuffer::Type type,
                                           GrVkAlloc* alloc) {
     const GrVkInterface* iface = gpu->vkInterface();
     VkDevice device = gpu->device();
@@ -68,30 +56,61 @@
     VkMemoryRequirements memReqs;
     GR_VK_CALL(iface, GetBufferMemoryRequirements(device, buffer, &memReqs));
 
-    if (!alloc_device_memory(gpu, &memReqs, flags, &alloc->fMemory)) {
+    VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                                            VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+                                            VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+    uint32_t typeIndex;
+    if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(),
+                                     memReqs.memoryTypeBits,
+                                     desiredMemProps,
+                                     &typeIndex)) {
+        // this memory type should always be available
+        SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(),
+                                                     memReqs.memoryTypeBits,
+                                                     VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                                                     VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+                                                     &typeIndex));
+    }
+
+    GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type));
+
+    if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) {
+        SkDebugf("Failed to alloc buffer\n");
         return false;
     }
-    // for now, offset is always 0
-    alloc->fOffset = 0;
 
     // Bind Memory to device
     VkResult err = GR_VK_CALL(iface, BindBufferMemory(device, buffer, 
                                                       alloc->fMemory, alloc->fOffset));
     if (err) {
-        GR_VK_CALL(iface, FreeMemory(device, alloc->fMemory, nullptr));
+        SkASSERT_RELEASE(heap->free(*alloc));
         return false;
     }
+
     return true;
 }
 
-void GrVkMemory::FreeBufferMemory(const GrVkGpu* gpu, const GrVkAlloc& alloc) {
-    const GrVkInterface* iface = gpu->vkInterface();
-    GR_VK_CALL(iface, FreeMemory(gpu->device(), alloc.fMemory, nullptr));
+void GrVkMemory::FreeBufferMemory(const GrVkGpu* gpu, GrVkBuffer::Type type,
+                                  const GrVkAlloc& alloc) {
+
+    GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type));
+    SkASSERT_RELEASE(heap->free(alloc));
+}
+
+// for debugging
+static uint64_t gTotalImageMemory = 0;
+static uint64_t gTotalImageMemoryFullPage = 0;
+
+const VkDeviceSize kMaxSmallImageSize = 16 * 1024;
+const VkDeviceSize kMinVulkanPageSize = 16 * 1024;
+
+static VkDeviceSize align_size(VkDeviceSize size, VkDeviceSize alignment) {
+    return (size + alignment - 1) & ~(alignment - 1);
 }
 
 bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu,
                                          VkImage image,
-                                         const VkMemoryPropertyFlags flags,
+                                         bool linearTiling,
                                          GrVkAlloc* alloc) {
     const GrVkInterface* iface = gpu->vkInterface();
     VkDevice device = gpu->device();
@@ -99,25 +118,76 @@
     VkMemoryRequirements memReqs;
     GR_VK_CALL(iface, GetImageMemoryRequirements(device, image, &memReqs));
 
-    if (!alloc_device_memory(gpu, &memReqs, flags, &alloc->fMemory)) {
+    uint32_t typeIndex;
+    GrVkHeap* heap;
+    if (linearTiling) {
+        VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                                                VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+                                                VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+        if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(),
+                                         memReqs.memoryTypeBits,
+                                         desiredMemProps,
+                                         &typeIndex)) {
+            // this memory type should always be available
+            SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(),
+                                                         memReqs.memoryTypeBits,
+                                                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                                                         VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+                                                         &typeIndex));
+        }
+        heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap);
+    } else {
+        // this memory type should always be available
+        SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(),
+                                                     memReqs.memoryTypeBits,
+                                                     VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+                                                     &typeIndex));
+        if (memReqs.size <= kMaxSmallImageSize) {
+            heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap);
+        } else {
+            heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap);
+        }
+    }
+
+    if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) {
+        SkDebugf("Failed to alloc image\n");
         return false;
     }
-    // for now, offset is always 0
-    alloc->fOffset = 0;
 
     // Bind Memory to device
     VkResult err = GR_VK_CALL(iface, BindImageMemory(device, image,
                               alloc->fMemory, alloc->fOffset));
     if (err) {
-        GR_VK_CALL(iface, FreeMemory(device, alloc->fMemory, nullptr));
+        SkASSERT_RELEASE(heap->free(*alloc));
         return false;
     }
+
+    gTotalImageMemory += alloc->fSize;
+
+    VkDeviceSize pageAlignedSize = align_size(alloc->fSize, kMinVulkanPageSize);
+    gTotalImageMemoryFullPage += pageAlignedSize;
+
     return true;
 }
 
-void GrVkMemory::FreeImageMemory(const GrVkGpu* gpu, const GrVkAlloc& alloc) {
-    const GrVkInterface* iface = gpu->vkInterface();
-    GR_VK_CALL(iface, FreeMemory(gpu->device(), alloc.fMemory, nullptr));
+void GrVkMemory::FreeImageMemory(const GrVkGpu* gpu, bool linearTiling,
+                                 const GrVkAlloc& alloc) {
+    GrVkHeap* heap;
+    if (linearTiling) {
+        heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap);
+    } else if (alloc.fSize <= kMaxSmallImageSize) {
+        heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap);
+    } else {
+        heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap);
+    }
+    if (!heap->free(alloc)) {
+        // must be an adopted allocation
+        GR_VK_CALL(gpu->vkInterface(), FreeMemory(gpu->device(), alloc.fMemory, nullptr));
+    } else {
+        gTotalImageMemory -= alloc.fSize;
+        VkDeviceSize pageAlignedSize = align_size(alloc.fSize, kMinVulkanPageSize);
+        gTotalImageMemoryFullPage -= pageAlignedSize;
+    }
 }
 
 VkPipelineStageFlags GrVkMemory::LayoutToPipelineStageFlags(const VkImageLayout layout) {
@@ -169,3 +239,289 @@
     }
     return flags;
 }
+
+GrVkSubHeap::GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, 
+                         VkDeviceSize size, VkDeviceSize alignment)
+    : fGpu(gpu)
+    , fMemoryTypeIndex(memoryTypeIndex) {
+
+    VkMemoryAllocateInfo allocInfo = {
+        VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,      // sType
+        NULL,                                        // pNext
+        size,                                        // allocationSize
+        memoryTypeIndex,                             // memoryTypeIndex
+    };
+
+    VkResult err = GR_VK_CALL(gpu->vkInterface(), AllocateMemory(gpu->device(),
+                                                                 &allocInfo,
+                                                                 nullptr,
+                                                                 &fAlloc));
+
+    if (VK_SUCCESS == err) {
+        fSize = size;
+        fAlignment = alignment;
+        fFreeSize = size;
+        fLargestBlockSize = size;
+        fLargestBlockOffset = 0;
+
+        Block* block = fFreeList.addToTail();
+        block->fOffset = 0;
+        block->fSize = fSize;
+    } else {
+        fSize = 0;
+        fAlignment = 0;
+        fFreeSize = 0;
+        fLargestBlockSize = 0;
+    }
+}
+
+GrVkSubHeap::~GrVkSubHeap() {
+    const GrVkInterface* iface = fGpu->vkInterface();
+    GR_VK_CALL(iface, FreeMemory(fGpu->device(), fAlloc, nullptr));
+
+    fFreeList.reset();
+}
+
+bool GrVkSubHeap::alloc(VkDeviceSize size, GrVkAlloc* alloc) {
+    VkDeviceSize alignedSize = align_size(size, fAlignment);
+
+    // find the smallest block big enough for our allocation
+    FreeList::Iter iter = fFreeList.headIter();
+    FreeList::Iter bestFitIter;
+    VkDeviceSize   bestFitSize = fSize + 1;
+    VkDeviceSize   secondLargestSize = 0;
+    VkDeviceSize   secondLargestOffset = 0;
+    while (iter.get()) {
+        Block* block = iter.get();
+        // need to adjust size to match desired alignment
+        SkASSERT(align_size(block->fOffset, fAlignment) - block->fOffset == 0);
+        if (block->fSize >= alignedSize && block->fSize < bestFitSize) {
+            bestFitIter = iter;
+            bestFitSize = block->fSize;
+        }
+        if (secondLargestSize < block->fSize && block->fOffset != fLargestBlockOffset) {
+            secondLargestSize = block->fSize;
+            secondLargestOffset = block->fOffset;
+        }
+        iter.next();
+    }
+    SkASSERT(secondLargestSize <= fLargestBlockSize);
+
+    Block* bestFit = bestFitIter.get();
+    if (bestFit) {
+        alloc->fMemory = fAlloc;
+        SkASSERT(align_size(bestFit->fOffset, fAlignment) == bestFit->fOffset);
+        alloc->fOffset = bestFit->fOffset;
+        alloc->fSize = alignedSize;
+        // adjust or remove current block
+        VkDeviceSize originalBestFitOffset = bestFit->fOffset;
+        if (bestFit->fSize > alignedSize) {
+            bestFit->fOffset += alignedSize;
+            bestFit->fSize -= alignedSize;
+            if (fLargestBlockOffset == originalBestFitOffset) {
+                if (bestFit->fSize >= secondLargestSize) {
+                    fLargestBlockSize = bestFit->fSize;
+                    fLargestBlockOffset = bestFit->fOffset;
+                } else {
+                    fLargestBlockSize = secondLargestSize;
+                    fLargestBlockOffset = secondLargestOffset;
+                }
+            }
+#ifdef SK_DEBUG
+            VkDeviceSize largestSize = 0;
+            iter = fFreeList.headIter();
+            while (iter.get()) {
+                Block* block = iter.get();
+                if (largestSize < block->fSize) {
+                    largestSize = block->fSize;
+                }
+                iter.next();
+            }
+            SkASSERT(largestSize == fLargestBlockSize)
+#endif
+        } else {
+            SkASSERT(bestFit->fSize == alignedSize);
+            if (fLargestBlockOffset == originalBestFitOffset) {
+                fLargestBlockSize = secondLargestSize;
+                fLargestBlockOffset = secondLargestOffset;
+            }
+            fFreeList.remove(bestFit);
+#ifdef SK_DEBUG
+            VkDeviceSize largestSize = 0;
+            iter = fFreeList.headIter();
+            while (iter.get()) {
+                Block* block = iter.get();
+                if (largestSize < block->fSize) {
+                    largestSize = block->fSize;
+                }
+                iter.next();
+            }
+            SkASSERT(largestSize == fLargestBlockSize);
+#endif
+        }
+        fFreeSize -= alignedSize;
+
+        return true;
+    }
+    
+    SkDebugf("Can't allocate %d bytes, %d bytes available, largest free block %d\n", alignedSize, fFreeSize, fLargestBlockSize);
+
+    return false;
+}
+
+
+void GrVkSubHeap::free(const GrVkAlloc& alloc) {
+    SkASSERT(alloc.fMemory == fAlloc);
+
+    // find the block right after this allocation
+    FreeList::Iter iter = fFreeList.headIter();
+    while (iter.get() && iter.get()->fOffset < alloc.fOffset) {
+        iter.next();
+    } 
+    FreeList::Iter prev = iter;
+    prev.prev();
+    // we have four cases:
+    // we exactly follow the previous one
+    Block* block;
+    if (prev.get() && prev.get()->fOffset + prev.get()->fSize == alloc.fOffset) {
+        block = prev.get();
+        block->fSize += alloc.fSize;
+        if (block->fOffset == fLargestBlockOffset) {
+            fLargestBlockSize = block->fSize;
+        }
+        // and additionally we may exactly precede the next one
+        if (iter.get() && iter.get()->fOffset == alloc.fOffset + alloc.fSize) {
+            block->fSize += iter.get()->fSize;
+            if (iter.get()->fOffset == fLargestBlockOffset) {
+                fLargestBlockOffset = block->fOffset;
+                fLargestBlockSize = block->fSize;
+            }
+            fFreeList.remove(iter.get());
+        }
+    // or we only exactly proceed the next one
+    } else if (iter.get() && iter.get()->fOffset == alloc.fOffset + alloc.fSize) {
+        block = iter.get();
+        block->fSize += alloc.fSize;
+        if (block->fOffset == fLargestBlockOffset) {
+            fLargestBlockOffset = alloc.fOffset;
+            fLargestBlockSize = block->fSize;
+        }
+        block->fOffset = alloc.fOffset;
+    // or we fall somewhere in between, with gaps
+    } else {
+        block = fFreeList.addBefore(iter);
+        block->fOffset = alloc.fOffset;
+        block->fSize = alloc.fSize;
+    }
+    fFreeSize += alloc.fSize;
+    if (block->fSize > fLargestBlockSize) {
+        fLargestBlockSize = block->fSize;
+        fLargestBlockOffset = block->fOffset;
+    }
+
+#ifdef SK_DEBUG
+    VkDeviceSize   largestSize = 0;
+    iter = fFreeList.headIter();
+    while (iter.get()) {
+        Block* block = iter.get();
+        if (largestSize < block->fSize) {
+            largestSize = block->fSize;
+        }
+        iter.next();
+    }
+    SkASSERT(fLargestBlockSize == largestSize);
+#endif
+}
+
+GrVkHeap::~GrVkHeap() {
+}
+
+bool GrVkHeap::subAlloc(VkDeviceSize size, VkDeviceSize alignment, 
+                        uint32_t memoryTypeIndex, GrVkAlloc* alloc) {
+    VkDeviceSize alignedSize = align_size(size, alignment);
+
+    // first try to find a subheap that fits our allocation request
+    int bestFitIndex = -1;
+    VkDeviceSize bestFitSize = 0x7FFFFFFF;
+    for (auto i = 0; i < fSubHeaps.count(); ++i) {
+        if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex) {
+            VkDeviceSize heapSize = fSubHeaps[i]->largestBlockSize();
+            if (heapSize > alignedSize && heapSize < bestFitSize) {
+                bestFitIndex = i;
+                bestFitSize = heapSize;
+            }
+        }
+    }
+
+    if (bestFitIndex >= 0) {
+        SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment);
+        if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) {
+            fUsedSize += alloc->fSize;
+            return true;
+        }
+        return false;
+    } 
+
+    // need to allocate a new subheap
+    SkAutoTDelete<GrVkSubHeap>& subHeap = fSubHeaps.push_back();
+    subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, fSubHeapSize, alignment));
+    fAllocSize += fSubHeapSize;
+    if (subHeap->alloc(size, alloc)) {
+        fUsedSize += alloc->fSize;
+        return true;
+    }
+
+    return false;
+}
+
+bool GrVkHeap::singleAlloc(VkDeviceSize size, VkDeviceSize alignment, 
+                           uint32_t memoryTypeIndex, GrVkAlloc* alloc) {
+    VkDeviceSize alignedSize = align_size(size, alignment);
+
+    // first try to find an unallocated subheap that fits our allocation request
+    int bestFitIndex = -1;
+    VkDeviceSize bestFitSize = 0x7FFFFFFF;
+    for (auto i = 0; i < fSubHeaps.count(); ++i) {
+        if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex && fSubHeaps[i]->unallocated()) {
+            VkDeviceSize heapSize = fSubHeaps[i]->size();
+            if (heapSize > alignedSize && heapSize < bestFitSize) {
+                bestFitIndex = i;
+                bestFitSize = heapSize;
+            }
+        }
+    }
+
+    if (bestFitIndex >= 0) {
+        SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment);
+        if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) {
+            fUsedSize += alloc->fSize;
+            return true;
+        }
+        return false;
+    }
+
+    // need to allocate a new subheap
+    SkAutoTDelete<GrVkSubHeap>& subHeap = fSubHeaps.push_back();
+    subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, alignedSize, alignment));
+    fAllocSize += alignedSize;
+    if (subHeap->alloc(size, alloc)) {
+        fUsedSize += alloc->fSize;
+        return true;
+    }
+
+    return false;
+}
+
+bool GrVkHeap::free(const GrVkAlloc& alloc) {
+    for (auto i = 0; i < fSubHeaps.count(); ++i) {
+        if (fSubHeaps[i]->memory() == alloc.fMemory) {
+            fSubHeaps[i]->free(alloc);
+            fUsedSize -= alloc.fSize;
+            return true;
+        }
+    }
+
+    return false;
+}
+
+
diff --git a/src/gpu/vk/GrVkMemory.h b/src/gpu/vk/GrVkMemory.h
index 279dd58..197bbe8 100644
--- a/src/gpu/vk/GrVkMemory.h
+++ b/src/gpu/vk/GrVkMemory.h
@@ -8,6 +8,9 @@
 #ifndef GrVkMemory_DEFINED
 #define GrVkMemory_DEFINED
 
+#include "GrVkBuffer.h"
+#include "SkTArray.h"
+#include "SkTLList.h"
 #include "vk/GrVkDefines.h"
 #include "vk/GrVkTypes.h"
 
@@ -16,23 +19,102 @@
 namespace GrVkMemory {
     /**
     * Allocates vulkan device memory and binds it to the gpu's device for the given object.
-    * Returns true of allocation succeeded.
+    * Returns true if allocation succeeded.
     */
     bool AllocAndBindBufferMemory(const GrVkGpu* gpu,
                                   VkBuffer buffer,
-                                  const VkMemoryPropertyFlags flags,
+                                  GrVkBuffer::Type type,
                                   GrVkAlloc* alloc);
-    void FreeBufferMemory(const GrVkGpu* gpu, const GrVkAlloc& alloc);
+    void FreeBufferMemory(const GrVkGpu* gpu, GrVkBuffer::Type type, const GrVkAlloc& alloc);
 
     bool AllocAndBindImageMemory(const GrVkGpu* gpu,
                                  VkImage image,
-                                 const VkMemoryPropertyFlags flags,
+                                 bool linearTiling,
                                  GrVkAlloc* alloc);
-    void FreeImageMemory(const GrVkGpu* gpu, const GrVkAlloc& alloc);
+    void FreeImageMemory(const GrVkGpu* gpu, bool linearTiling, const GrVkAlloc& alloc);
 
     VkPipelineStageFlags LayoutToPipelineStageFlags(const VkImageLayout layout);
 
     VkAccessFlags LayoutToSrcAccessMask(const VkImageLayout layout);
 }
 
+class GrVkSubHeap {
+public:
+    GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, 
+                VkDeviceSize size, VkDeviceSize alignment);
+    ~GrVkSubHeap();
+
+    uint32_t  memoryTypeIndex() const { return fMemoryTypeIndex;  }
+    VkDeviceSize size() const { return fSize; }
+    VkDeviceSize alignment() const { return fAlignment; }
+    VkDeviceSize freeSize() const { return fFreeSize; }
+    VkDeviceSize largestBlockSize() const { return fLargestBlockSize; }
+    VkDeviceMemory memory() { return fAlloc; }
+
+    bool unallocated() const { return fSize == fFreeSize; }
+
+    bool alloc(VkDeviceSize size, GrVkAlloc* alloc);
+    void free(const GrVkAlloc& alloc);
+
+private:
+    struct Block {
+        VkDeviceSize fOffset;
+        VkDeviceSize fSize;
+    };
+    typedef SkTLList<Block, 16> FreeList;
+
+    const GrVkGpu* fGpu;
+    uint32_t       fMemoryTypeIndex;
+    VkDeviceSize   fSize;
+    VkDeviceSize   fAlignment;
+    VkDeviceSize   fFreeSize;
+    VkDeviceSize   fLargestBlockSize;
+    VkDeviceSize   fLargestBlockOffset;
+    VkDeviceMemory fAlloc;
+    FreeList       fFreeList;
+};
+
+class GrVkHeap {
+public:
+    enum Strategy {
+        kSubAlloc_Strategy,       // alloc large subheaps and suballoc within them
+        kSingleAlloc_Strategy     // alloc/recycle an individual subheap per object
+    };
+
+    GrVkHeap(const GrVkGpu* gpu, Strategy strategy, VkDeviceSize subHeapSize)
+        : fGpu(gpu)
+        , fSubHeapSize(subHeapSize)
+        , fAllocSize(0)
+        , fUsedSize(0) {
+        if (strategy == kSubAlloc_Strategy) {
+            fAllocFunc = &GrVkHeap::subAlloc;
+        } else {
+            fAllocFunc = &GrVkHeap::singleAlloc;
+        }
+    }
+
+    ~GrVkHeap();
+
+    bool alloc(VkDeviceSize size, VkDeviceSize alignment, uint32_t memoryTypeIndex, 
+               GrVkAlloc* alloc) {
+        return (*this.*fAllocFunc)(size, alignment, memoryTypeIndex, alloc);
+    }
+    bool free(const GrVkAlloc& alloc);
+
+private:
+    typedef bool (GrVkHeap::*AllocFunc)(VkDeviceSize size, VkDeviceSize alignment, 
+                                        uint32_t memoryTypeIndex, GrVkAlloc* alloc);
+
+    bool subAlloc(VkDeviceSize size, VkDeviceSize alignment, 
+                  uint32_t memoryTypeIndex, GrVkAlloc* alloc);
+    bool singleAlloc(VkDeviceSize size, VkDeviceSize alignment,
+                     uint32_t memoryTypeIndex, GrVkAlloc* alloc);
+
+    const GrVkGpu*         fGpu;
+    VkDeviceSize           fSubHeapSize;
+    VkDeviceSize           fAllocSize;
+    VkDeviceSize           fUsedSize;
+    AllocFunc              fAllocFunc;
+    SkTArray<SkAutoTDelete<GrVkSubHeap>> fSubHeaps;
+};
 #endif
diff --git a/src/gpu/vk/GrVkTexture.cpp b/src/gpu/vk/GrVkTexture.cpp
index 0adf87b..5d15311 100644
--- a/src/gpu/vk/GrVkTexture.cpp
+++ b/src/gpu/vk/GrVkTexture.cpp
@@ -216,7 +216,7 @@
         fLinearTextureView = nullptr;
     }
 
-    this->setNewResource(info.fImage, info.fAlloc);
+    this->setNewResource(info.fImage, info.fAlloc, info.fImageTiling);
     fTextureView = textureView;
     fInfo = info;
     this->texturePriv().setMaxMipMapLevel(mipLevels);