Set up cache in vulkan to reuse GrVkPrograms (aka VkPipelines)

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1816153002

Review URL: https://codereview.chromium.org/1816153002
diff --git a/src/gpu/GrStencil.cpp b/src/gpu/GrStencil.cpp
index f37aa31..189cc38 100644
--- a/src/gpu/GrStencil.cpp
+++ b/src/gpu/GrStencil.cpp
@@ -9,6 +9,8 @@
 
 #include "GrStencil.h"
 
+#include "GrProcessor.h"
+
 ////////////////////////////////////////////////////////////////////////////////
 // Stencil Rules for Merging user stencil space into clip
 
@@ -393,3 +395,10 @@
     }
     return false;
 }
+
+void GrStencilSettings::genKey(GrProcessorKeyBuilder* b) const {
+    static const int kCount = sizeof(GrStencilSettings) / sizeof(uint32_t);
+    GR_STATIC_ASSERT(0 == sizeof(GrStencilSettings) % sizeof(uint32_t));
+    uint32_t* key = b->add32n(kCount);
+    memcpy(key, this, sizeof(GrStencilSettings));
+}
diff --git a/src/gpu/GrStencil.h b/src/gpu/GrStencil.h
index 28042b0..fa7d626 100644
--- a/src/gpu/GrStencil.h
+++ b/src/gpu/GrStencil.h
@@ -13,6 +13,8 @@
 #include "GrTypes.h"
 #include "SkRegion.h"
 
+class GrProcessorKeyBuilder;
+
 /**
  * Gr uses the stencil buffer to implement complex clipping inside the
  * GrDrawTarget class. The GrDrawTarget makes a subset of the stencil buffer
@@ -285,6 +287,8 @@
         return fPassOps[0] < kStencilOpCount;
     }
 
+    void genKey(GrProcessorKeyBuilder* b) const;
+
     bool operator == (const GrStencilSettings& s) const {
         static const size_t gCompareSize = sizeof(GrStencilSettings) -
                                            sizeof(fFlags);
diff --git a/src/gpu/gl/GrGLGpu.h b/src/gpu/gl/GrGLGpu.h
index a358273..bcb3c19 100644
--- a/src/gpu/gl/GrGLGpu.h
+++ b/src/gpu/gl/GrGLGpu.h
@@ -270,7 +270,6 @@
         ProgramCache(GrGLGpu* gpu);
         ~ProgramCache();
 
-        void reset();
         void abandon();
         GrGLProgram* refProgram(const GrGLGpu* gpu, const GrPipeline&, const GrPrimitiveProcessor&);
 
diff --git a/src/gpu/gl/GrGLGpuProgramCache.cpp b/src/gpu/gl/GrGLGpuProgramCache.cpp
index f37264a..dc449f8 100644
--- a/src/gpu/gl/GrGLGpuProgramCache.cpp
+++ b/src/gpu/gl/GrGLGpuProgramCache.cpp
@@ -77,7 +77,7 @@
 #endif
 }
 
-void GrGLGpu::ProgramCache::reset() {
+void GrGLGpu::ProgramCache::abandon() {
     for (int i = 0; i < fCount; ++i) {
         SkASSERT(fEntries[i]->fProgram.get());
         fEntries[i]->fProgram->abandon();
@@ -99,10 +99,6 @@
 #endif
 }
 
-void GrGLGpu::ProgramCache::abandon() {
-    this->reset();
-}
-
 int GrGLGpu::ProgramCache::search(const GrProgramDesc& desc) const {
     ProgDescLess less;
     return SkTSearch(fEntries, fCount, desc, sizeof(Entry*), less);
diff --git a/src/gpu/glsl/GrGLSLShaderBuilder.h b/src/gpu/glsl/GrGLSLShaderBuilder.h
index c408c8a..5b3a01f 100644
--- a/src/gpu/glsl/GrGLSLShaderBuilder.h
+++ b/src/gpu/glsl/GrGLSLShaderBuilder.h
@@ -224,6 +224,6 @@
     friend class GrGLProgramBuilder;
     friend class GrGLSLVaryingHandler; // to access noperspective interpolation feature.
     friend class GrGLPathProgramBuilder; // to access fInputs.
-    friend class GrVkProgramBuilder;
+    friend class GrVkPipelineStateBuilder;
 };
 #endif
diff --git a/src/gpu/vk/GrVkCommandBuffer.cpp b/src/gpu/vk/GrVkCommandBuffer.cpp
index c05e955..ea2b1be 100644
--- a/src/gpu/vk/GrVkCommandBuffer.cpp
+++ b/src/gpu/vk/GrVkCommandBuffer.cpp
@@ -12,7 +12,7 @@
 #include "GrVkPipeline.h"
 #include "GrVkRenderPass.h"
 #include "GrVkRenderTarget.h"
-#include "GrVkProgram.h"
+#include "GrVkPipelineState.h"
 #include "GrVkTransferBuffer.h"
 #include "GrVkUtil.h"
 
@@ -347,7 +347,7 @@
 }
 
 void GrVkCommandBuffer::bindDescriptorSets(const GrVkGpu* gpu,
-                                           GrVkProgram* program,
+                                           GrVkPipelineState* pipelineState,
                                            VkPipelineLayout layout,
                                            uint32_t firstSet,
                                            uint32_t setCount,
@@ -363,7 +363,7 @@
                                                          descriptorSets,
                                                          dynamicOffsetCount,
                                                          dynamicOffsets));
-    program->addUniformResources(*this);
+    pipelineState->addUniformResources(*this);
 }
 
 void GrVkCommandBuffer::bindPipeline(const GrVkGpu* gpu, const GrVkPipeline* pipeline) {
diff --git a/src/gpu/vk/GrVkCommandBuffer.h b/src/gpu/vk/GrVkCommandBuffer.h
index f63a191..e7b2543 100644
--- a/src/gpu/vk/GrVkCommandBuffer.h
+++ b/src/gpu/vk/GrVkCommandBuffer.h
@@ -83,7 +83,7 @@
     }
 
     void bindDescriptorSets(const GrVkGpu* gpu,
-                            GrVkProgram*,
+                            GrVkPipelineState*,
                             VkPipelineLayout layout,
                             uint32_t firstSet,
                             uint32_t setCount,
diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp
index f01c2e4..21e4ee3 100644
--- a/src/gpu/vk/GrVkGpu.cpp
+++ b/src/gpu/vk/GrVkGpu.cpp
@@ -21,9 +21,7 @@
 #include "GrVkIndexBuffer.h"
 #include "GrVkMemory.h"
 #include "GrVkPipeline.h"
-#include "GrVkProgram.h"
-#include "GrVkProgramBuilder.h"
-#include "GrVkProgramDesc.h"
+#include "GrVkPipelineState.h"
 #include "GrVkRenderPass.h"
 #include "GrVkResourceProvider.h"
 #include "GrVkTexture.h"
@@ -478,12 +476,6 @@
     if (renderTarget) {
         tex = GrVkTextureRenderTarget::CreateNewTextureRenderTarget(this, desc, lifeCycle,
                                                                     imageDesc);
-#if 0
-        // This clear can be included to fix warning described in htttps://bugs.skia.org/5045
-        // Obviously we do not want to be clearling needlessly every time we create a render target.
-        SkIRect rect = SkIRect::MakeWH(tex->width(), tex->height());
-        this->clear(rect, GrColor_TRANSPARENT_BLACK, tex->asRenderTarget());
-#endif
     } else {
         tex = GrVkTexture::CreateNewTexture(this, desc, lifeCycle, imageDesc);
     }
@@ -1291,32 +1283,22 @@
 
     return true;
 }
-
 bool GrVkGpu::prepareDrawState(const GrPipeline& pipeline,
                                const GrPrimitiveProcessor& primProc,
                                GrPrimitiveType primitiveType,
                                const GrVkRenderPass& renderPass,
-                               GrVkProgram** program) {
-    // Get GrVkProgramDesc
-    GrVkProgramDesc desc;
-    if (!GrVkProgramDescBuilder::Build(&desc, primProc, pipeline, *this->vkCaps().glslCaps())) {
-        GrCapsDebugf(this->caps(), "Failed to vk program descriptor!\n");
+                               GrVkPipelineState** pipelineState) {
+    *pipelineState = fResourceProvider.findOrCreateCompatiblePipelineState(pipeline,
+                                                                           primProc,
+                                                                           primitiveType,
+                                                                           renderPass);
+    if (!pipelineState) {
         return false;
     }
 
-    *program = GrVkProgramBuilder::CreateProgram(this,
-                                                 pipeline,
-                                                 primProc,
-                                                 primitiveType,
-                                                 desc,
-                                                 renderPass);
-    if (!program) {
-        return false;
-    }
+    (*pipelineState)->setData(this, primProc, pipeline);
 
-    (*program)->setData(this, primProc, pipeline);
-
-    (*program)->bind(this, fCurrentCmdBuffer);
+    (*pipelineState)->bind(this, fCurrentCmdBuffer);
 
     GrVkPipeline::SetDynamicState(this, fCurrentCmdBuffer, pipeline);
 
@@ -1337,9 +1319,9 @@
 
     fCurrentCmdBuffer->beginRenderPass(this, renderPass, *vkRT);
 
-    GrVkProgram* program = nullptr;
+    GrVkPipelineState* pipelineState = nullptr;
     GrPrimitiveType primitiveType = meshes[0].primitiveType();
-    if (!this->prepareDrawState(pipeline, primProc, primitiveType, *renderPass, &program)) {
+    if (!this->prepareDrawState(pipeline, primProc, primitiveType, *renderPass, &pipelineState)) {
         return;
     }
 
@@ -1391,21 +1373,18 @@
         do {
             if (nonIdxMesh->primitiveType() != primitiveType) {
                 // Technically we don't have to call this here (since there is a safety check in
-                // program:setData but this will allow for quicker freeing of resources if the
-                // program sits in a cache for a while.
-                program->freeTempResources(this);
-                // This free will go away once we setup a program cache, and then the cache will be
-                // responsible for call freeGpuResources.
-                program->freeGPUResources(this);
-                program->unref();
-                SkDEBUGCODE(program = nullptr);
+                // pipelineState:setData but this will allow for quicker freeing of resources if the
+                // pipelineState sits in a cache for a while.
+                pipelineState->freeTempResources(this);
+                pipelineState->unref();
+                SkDEBUGCODE(pipelineState = nullptr);
                 primitiveType = nonIdxMesh->primitiveType();
                 if (!this->prepareDrawState(pipeline, primProc, primitiveType, *renderPass,
-                                            &program)) {
+                                            &pipelineState)) {
                     return;
                 }
             }
-            SkASSERT(program);
+            SkASSERT(pipelineState);
             this->bindGeometry(primProc, *nonIdxMesh);
 
             if (nonIdxMesh->isIndexed()) {
@@ -1429,14 +1408,11 @@
 
     fCurrentCmdBuffer->endRenderPass(this);
 
-    // Technically we don't have to call this here (since there is a safety check in program:setData
-    // but this will allow for quicker freeing of resources if the program sits in a cache for a
-    // while.
-    program->freeTempResources(this);
-    // This free will go away once we setup a program cache, and then the cache will be responsible
-    // for call freeGpuResources.
-    program->freeGPUResources(this);
-    program->unref();
+    // Technically we don't have to call this here (since there is a safety check in
+    // pipelineState:setData but this will allow for quicker freeing of resources if the
+    // pipelineState sits in a cache for a while.
+    pipelineState->freeTempResources(this);
+    pipelineState->unref();
 
 #if SWAP_PER_DRAW
     glFlush();
diff --git a/src/gpu/vk/GrVkGpu.h b/src/gpu/vk/GrVkGpu.h
index b9f9027..4be3b8c 100644
--- a/src/gpu/vk/GrVkGpu.h
+++ b/src/gpu/vk/GrVkGpu.h
@@ -13,7 +13,6 @@
 #include "vk/GrVkBackendContext.h"
 #include "GrVkCaps.h"
 #include "GrVkIndexBuffer.h"
-#include "GrVkProgram.h"
 #include "GrVkResourceProvider.h"
 #include "GrVkVertexBuffer.h"
 #include "GrVkUtil.h"
@@ -27,6 +26,7 @@
 class GrVkBufferImpl;
 class GrVkCommandBuffer;
 class GrVkPipeline;
+class GrVkPipelineState;
 class GrVkRenderPass;
 class GrVkTexture;
 struct GrVkInterface;
@@ -186,7 +186,7 @@
                           const GrPrimitiveProcessor&,
                           GrPrimitiveType,
                           const GrVkRenderPass&,
-                          GrVkProgram** program);
+                          GrVkPipelineState** pipelineState);
 
     // Bind vertex and index buffers
     void bindGeometry(const GrPrimitiveProcessor&, const GrNonInstancedMesh&);
diff --git a/src/gpu/vk/GrVkPipeline.cpp b/src/gpu/vk/GrVkPipeline.cpp
index 7571a40..9c0fd28 100644
--- a/src/gpu/vk/GrVkPipeline.cpp
+++ b/src/gpu/vk/GrVkPipeline.cpp
@@ -479,7 +479,6 @@
     GR_VK_CALL(gpu->vkInterface(), DestroyPipeline(gpu->device(), fPipeline, nullptr));
 }
 
-
 void set_dynamic_scissor_state(GrVkGpu* gpu,
                                GrVkCommandBuffer* cmdBuffer,
                                const GrPipeline& pipeline,
@@ -544,8 +543,6 @@
     cmdBuffer->setBlendConstants(gpu, floatColors);
 }
 
-
-                                          
 void GrVkPipeline::SetDynamicState(GrVkGpu* gpu,
                                    GrVkCommandBuffer* cmdBuffer,
                                    const GrPipeline& pipeline) {
diff --git a/src/gpu/vk/GrVkProgram.cpp b/src/gpu/vk/GrVkPipelineState.cpp
similarity index 73%
rename from src/gpu/vk/GrVkProgram.cpp
rename to src/gpu/vk/GrVkPipelineState.cpp
index 9580a3a..996ce4a 100644
--- a/src/gpu/vk/GrVkProgram.cpp
+++ b/src/gpu/vk/GrVkPipelineState.cpp
@@ -5,7 +5,7 @@
 * found in the LICENSE file.
 */
 
-#include "GrVkProgram.h"
+#include "GrVkPipelineState.h"
 
 #include "GrPipeline.h"
 #include "GrVkCommandBuffer.h"
@@ -14,6 +14,7 @@
 #include "GrVkImageView.h"
 #include "GrVkMemory.h"
 #include "GrVkPipeline.h"
+#include "GrVkRenderTarget.h"
 #include "GrVkSampler.h"
 #include "GrVkTexture.h"
 #include "GrVkUniformBuffer.h"
@@ -21,25 +22,27 @@
 #include "glsl/GrGLSLGeometryProcessor.h"
 #include "glsl/GrGLSLXferProcessor.h"
 
-GrVkProgram::GrVkProgram(GrVkGpu* gpu,
-                         GrVkPipeline* pipeline,
-                         VkPipelineLayout layout,
-                         VkDescriptorSetLayout dsLayout[2],
-                         const BuiltinUniformHandles& builtinUniformHandles,
-                         const UniformInfoArray& uniforms,
-                         uint32_t vertexUniformSize,
-                         uint32_t fragmentUniformSize,
-                         uint32_t numSamplers,
-                         GrGLSLPrimitiveProcessor* geometryProcessor,
-                         GrGLSLXferProcessor* xferProcessor,
-                         const GrGLSLFragProcs& fragmentProcessors)
+GrVkPipelineState::GrVkPipelineState(GrVkGpu* gpu,
+                                     const GrVkPipelineState::Desc& desc,
+                                     GrVkPipeline* pipeline,
+                                     VkPipelineLayout layout,
+                                     VkDescriptorSetLayout dsLayout[2],
+                                     const BuiltinUniformHandles& builtinUniformHandles,
+                                     const UniformInfoArray& uniforms,
+                                     uint32_t vertexUniformSize,
+                                     uint32_t fragmentUniformSize,
+                                     uint32_t numSamplers,
+                                     GrGLSLPrimitiveProcessor* geometryProcessor,
+                                     GrGLSLXferProcessor* xferProcessor,
+                                     const GrGLSLFragProcs& fragmentProcessors)
     : fPipeline(pipeline)
     , fPipelineLayout(layout)
     , fBuiltinUniformHandles(builtinUniformHandles)
     , fGeometryProcessor(geometryProcessor)
     , fXferProcessor(xferProcessor)
     , fFragmentProcessors(fragmentProcessors)
-    , fProgramDataManager(uniforms, vertexUniformSize, fragmentUniformSize)
+    , fDesc(desc)
+    , fDataManager(uniforms, vertexUniformSize, fragmentUniformSize)
     , fSamplerPoolManager(dsLayout[GrVkUniformHandler::kSamplerDescSet],
                           VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, numSamplers, gpu)
     , fUniformPoolManager(dsLayout[GrVkUniformHandler::kUniformBufferDescSet],
@@ -65,7 +68,7 @@
     fNumSamplers = numSamplers;
 }
 
-GrVkProgram::~GrVkProgram() {
+GrVkPipelineState::~GrVkPipelineState() {
     // Must of freed all GPU resources before this is destroyed
     SkASSERT(!fPipeline);
     SkASSERT(!fPipelineLayout);
@@ -74,7 +77,7 @@
     SkASSERT(!fTextures.count());
 }
 
-void GrVkProgram::freeTempResources(const GrVkGpu* gpu) {
+void GrVkPipelineState::freeTempResources(const GrVkGpu* gpu) {
     for (int i = 0; i < fSamplers.count(); ++i) {
         fSamplers[i]->unref(gpu);
     }
@@ -91,7 +94,7 @@
     fTextures.rewind();
 }
 
-void GrVkProgram::freeGPUResources(const GrVkGpu* gpu) {
+void GrVkPipelineState::freeGPUResources(const GrVkGpu* gpu) {
     if (fPipeline) {
         fPipeline->unref(gpu);
         fPipeline = nullptr;
@@ -118,7 +121,7 @@
     this->freeTempResources(gpu);
 }
 
-void GrVkProgram::abandonGPUResources() {
+void GrVkPipelineState::abandonGPUResources() {
     fPipeline->unrefAndAbandon();
     fPipeline = nullptr;
 
@@ -157,9 +160,9 @@
     }
 }
 
-void GrVkProgram::setData(GrVkGpu* gpu,
-                          const GrPrimitiveProcessor& primProc,
-                          const GrPipeline& pipeline) {
+void GrVkPipelineState::setData(GrVkGpu* gpu,
+                                const GrPrimitiveProcessor& primProc,
+                                const GrPipeline& pipeline) {
     // This is here to protect against someone calling setData multiple times in a row without
     // freeing the tempData between calls.
     this->freeTempResources(gpu);
@@ -168,18 +171,18 @@
 
     SkSTArray<8, const GrTextureAccess*> textureBindings;
 
-    fGeometryProcessor->setData(fProgramDataManager, primProc);
+    fGeometryProcessor->setData(fDataManager, primProc);
     append_texture_bindings(primProc, &textureBindings);
 
     for (int i = 0; i < fFragmentProcessors.count(); ++i) {
         const GrFragmentProcessor& processor = pipeline.getFragmentProcessor(i);
-        fFragmentProcessors[i]->setData(fProgramDataManager, processor);
-        fGeometryProcessor->setTransformData(primProc, fProgramDataManager, i,
+        fFragmentProcessors[i]->setData(fDataManager, processor);
+        fGeometryProcessor->setTransformData(primProc, fDataManager, i,
                                              processor.coordTransforms());
         append_texture_bindings(processor, &textureBindings);
     }
 
-    fXferProcessor->setData(fProgramDataManager, pipeline.getXferProcessor());
+    fXferProcessor->setData(fDataManager, pipeline.getXferProcessor());
     append_texture_bindings(pipeline.getXferProcessor(), &textureBindings);
 
     // Get new descriptor sets
@@ -195,8 +198,8 @@
     this->writeSamplers(gpu, textureBindings);
 }
 
-void GrVkProgram::writeUniformBuffers(const GrVkGpu* gpu) {
-    fProgramDataManager.uploadUniformBuffers(gpu, fVertexUniformBuffer, fFragmentUniformBuffer);
+void GrVkPipelineState::writeUniformBuffers(const GrVkGpu* gpu) {
+    fDataManager.uploadUniformBuffers(gpu, fVertexUniformBuffer, fFragmentUniformBuffer);
 
     VkWriteDescriptorSet descriptorWrites[2];
     memset(descriptorWrites, 0, 2 * sizeof(VkWriteDescriptorSet));
@@ -223,6 +226,13 @@
         descriptorWrites[0].pImageInfo = nullptr;
         descriptorWrites[0].pBufferInfo = &vertBufferInfo;
         descriptorWrites[0].pTexelBufferView = nullptr;
+
+        fVertexUniformBuffer->addMemoryBarrier(gpu,
+                                               VK_ACCESS_HOST_WRITE_BIT,
+                                               VK_ACCESS_UNIFORM_READ_BIT,
+                                               VK_PIPELINE_STAGE_HOST_BIT,
+                                               VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
+                                               false);
     }
 
     VkDescriptorBufferInfo fragBufferInfo;
@@ -247,6 +257,13 @@
         descriptorWrites[1].pImageInfo = nullptr;
         descriptorWrites[1].pBufferInfo = &fragBufferInfo;
         descriptorWrites[1].pTexelBufferView = nullptr;
+
+        fFragmentUniformBuffer->addMemoryBarrier(gpu,
+                                                 VK_ACCESS_HOST_WRITE_BIT,
+                                                 VK_ACCESS_UNIFORM_READ_BIT,
+                                                 VK_PIPELINE_STAGE_HOST_BIT,
+                                                 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
+                                                 false);
     }
 
     if (uniformBindingUpdateCount) {
@@ -257,8 +274,8 @@
     }
 }
 
-void GrVkProgram::writeSamplers(GrVkGpu* gpu,
-                                const SkTArray<const GrTextureAccess*>& textureBindings) {
+void GrVkPipelineState::writeSamplers(GrVkGpu* gpu,
+                                      const SkTArray<const GrTextureAccess*>& textureBindings) {
     SkASSERT(fNumSamplers == textureBindings.count());
 
     for (int i = 0; i < textureBindings.count(); ++i) {
@@ -316,11 +333,11 @@
     }
 }
 
-void GrVkProgram::setRenderTargetState(const GrPipeline& pipeline) {
+void GrVkPipelineState::setRenderTargetState(const GrPipeline& pipeline) {
     // Load the RT height uniform if it is needed to y-flip gl_FragCoord.
     if (fBuiltinUniformHandles.fRTHeightUni.isValid() &&
         fRenderTargetState.fRenderTargetSize.fHeight != pipeline.getRenderTarget()->height()) {
-        fProgramDataManager.set1f(fBuiltinUniformHandles.fRTHeightUni,
+        fDataManager.set1f(fBuiltinUniformHandles.fRTHeightUni,
                                   SkIntToScalar(pipeline.getRenderTarget()->height()));
     }
 
@@ -336,11 +353,11 @@
 
         float rtAdjustmentVec[4];
         fRenderTargetState.getRTAdjustmentVec(rtAdjustmentVec);
-        fProgramDataManager.set4fv(fBuiltinUniformHandles.fRTAdjustmentUni, 1, rtAdjustmentVec);
+        fDataManager.set4fv(fBuiltinUniformHandles.fRTAdjustmentUni, 1, rtAdjustmentVec);
     }
 }
 
-void GrVkProgram::bind(const GrVkGpu* gpu, GrVkCommandBuffer* commandBuffer) {
+void GrVkPipelineState::bind(const GrVkGpu* gpu, GrVkCommandBuffer* commandBuffer) {
     commandBuffer->bindPipeline(gpu, fPipeline);
 
     if (fDSCount) {
@@ -349,7 +366,7 @@
     }
 }
 
-void GrVkProgram::addUniformResources(GrVkCommandBuffer& commandBuffer) {
+void GrVkPipelineState::addUniformResources(GrVkCommandBuffer& commandBuffer) {
     if (fSamplerPoolManager.fPool) {
         commandBuffer.addResource(fSamplerPoolManager.fPool);
     }
@@ -378,7 +395,7 @@
 
 ////////////////////////////////////////////////////////////////////////////////
 
-void GrVkProgram::DescriptorPoolManager::getNewPool(GrVkGpu* gpu) {
+void GrVkPipelineState::DescriptorPoolManager::getNewPool(GrVkGpu* gpu) {
     if (fPool) {
         fPool->unref(gpu);
         SkASSERT(fMaxDescriptorSets < (SK_MaxU32 >> 1));
@@ -392,7 +409,7 @@
     SkASSERT(fPool || !fMaxDescriptorSets);
 }
 
-void GrVkProgram::DescriptorPoolManager::getNewDescriptorSet(GrVkGpu* gpu, VkDescriptorSet* ds) {
+void GrVkPipelineState::DescriptorPoolManager::getNewDescriptorSet(GrVkGpu* gpu, VkDescriptorSet* ds) {
     if (!fMaxDescriptorSets) {
         return;
     }
@@ -409,13 +426,12 @@
     dsAllocateInfo.descriptorPool = fPool->descPool();
     dsAllocateInfo.descriptorSetCount = 1;
     dsAllocateInfo.pSetLayouts = &fDescLayout;
-
     GR_VK_CALL_ERRCHECK(gpu->vkInterface(), AllocateDescriptorSets(gpu->device(),
                                                                    &dsAllocateInfo,
                                                                    ds));
 }
 
-void GrVkProgram::DescriptorPoolManager::freeGPUResources(const GrVkGpu* gpu) {
+void GrVkPipelineState::DescriptorPoolManager::freeGPUResources(const GrVkGpu* gpu) {
     if (fDescLayout) {
         GR_VK_CALL(gpu->vkInterface(), DestroyDescriptorSetLayout(gpu->device(), fDescLayout,
                                                                   nullptr));
@@ -428,10 +444,53 @@
     }
 }
 
-void GrVkProgram::DescriptorPoolManager::abandonGPUResources() {
+void GrVkPipelineState::DescriptorPoolManager::abandonGPUResources() {
     fDescLayout = VK_NULL_HANDLE;
     if (fPool) {
         fPool->unrefAndAbandon();
         fPool = nullptr;
     }
 }
+
+uint32_t get_blend_info_key(const GrPipeline& pipeline) {
+    GrXferProcessor::BlendInfo blendInfo;
+    pipeline.getXferProcessor().getBlendInfo(&blendInfo);
+
+    static const uint32_t kBlendWriteShift = 1;
+    static const uint32_t kBlendCoeffShift = 5;
+    GR_STATIC_ASSERT(kLast_GrBlendCoeff < (1 << kBlendCoeffShift));
+    GR_STATIC_ASSERT(kFirstAdvancedGrBlendEquation - 1 < 4);
+
+    uint32_t key = blendInfo.fWriteColor;
+    key |= (blendInfo.fSrcBlend << kBlendWriteShift);
+    key |= (blendInfo.fDstBlend << (kBlendWriteShift + kBlendCoeffShift));
+    key |= (blendInfo.fEquation << (kBlendWriteShift + 2 * kBlendCoeffShift));
+
+    return key;
+}
+
+void GrVkPipelineState::BuildStateKey(const GrPipeline& pipeline, GrPrimitiveType primitiveType,
+                                      SkTArray<uint8_t, true>* key) {
+    // Save room for the key length and key header
+    key->reset();
+    key->push_back_n(kData_StateKeyOffset);
+
+    GrProcessorKeyBuilder b(key);
+
+    GrVkRenderTarget* vkRT = (GrVkRenderTarget*)pipeline.getRenderTarget();
+    vkRT->simpleRenderPass()->genKey(&b);
+
+    pipeline.getStencil().genKey(&b);
+
+    SkASSERT(sizeof(GrPipelineBuilder::DrawFace) <= sizeof(uint32_t));
+    b.add32(pipeline.getDrawFace());
+
+    b.add32(get_blend_info_key(pipeline));
+
+    b.add32(primitiveType);
+
+    // Set key length 
+    int keyLength = key->count();
+    SkASSERT(0 == (keyLength % 4));
+    *reinterpret_cast<uint32_t*>(key->begin()) = SkToU32(keyLength);
+}
diff --git a/src/gpu/vk/GrVkPipelineState.h b/src/gpu/vk/GrVkPipelineState.h
new file mode 100644
index 0000000..740004b
--- /dev/null
+++ b/src/gpu/vk/GrVkPipelineState.h
@@ -0,0 +1,286 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+
+#ifndef GrVkPipelineState_DEFINED
+#define GrVkPipelineState_DEFINED
+
+#include "GrVkImage.h"
+#include "GrVkProgramDesc.h"
+#include "GrVkPipelineStateDataManager.h"
+#include "glsl/GrGLSLProgramBuilder.h"
+
+#include "vulkan/vulkan.h"
+
+class GrPipeline;
+class GrVkCommandBuffer;
+class GrVkDescriptorPool;
+class GrVkGpu;
+class GrVkImageView;
+class GrVkPipeline;
+class GrVkSampler;
+class GrVkUniformBuffer;
+
+/**
+ * This class holds onto a GrVkPipeline object that we use for draws. Besides storing the acutal
+ * GrVkPipeline object, this class is also responsible handling all uniforms, descriptors, samplers,
+ * and other similar objects that are used along with the VkPipeline in the draw. This includes both
+ * allocating and freeing these objects, as well as updating their values.
+ */
+class GrVkPipelineState : public SkRefCnt {
+public:
+    typedef GrGLSLProgramBuilder::BuiltinUniformHandles BuiltinUniformHandles;
+
+    ~GrVkPipelineState();
+
+    GrVkPipeline* vkPipeline() const { return fPipeline; }
+
+    void setData(GrVkGpu*, const GrPrimitiveProcessor&, const GrPipeline&);
+
+    void bind(const GrVkGpu* gpu, GrVkCommandBuffer* commandBuffer);
+
+    void addUniformResources(GrVkCommandBuffer&);
+
+    void freeGPUResources(const GrVkGpu* gpu);
+
+    // This releases resources that only a given instance of a GrVkPipelineState needs to hold onto
+    // and don't need to survive across new uses of the GrVkPipelineState.
+    void freeTempResources(const GrVkGpu* gpu);
+
+    void abandonGPUResources();
+
+    // The key is composed of two parts:
+    // 1. uint32_t for total key length
+    // 2. Pipeline state data
+    enum StateKeyOffsets {
+        // Part 1.
+        kLength_StateKeyOffset = 0,
+        // Part 2.
+        kData_StateKeyOffset = kLength_StateKeyOffset + sizeof(uint32_t),
+    };
+    static void BuildStateKey(const GrPipeline&, GrPrimitiveType primitiveType,
+                               SkTArray<unsigned char, true>* key);
+
+    /**
+     * For Vulkan we want to cache the entire VkPipeline for reuse of draws. The Desc here holds all
+     * the information needed to differentiate one pipeline from another.
+     *
+     * The GrVkProgramDesc contains all the information need to create the actual shaders for the
+     * pipeline.
+     *
+     * The fStateKey is used to store all the inputs for the rest of the state stored on the
+     * pipeline. This includes stencil settings, blending information, render pass format, draw face
+     * information, and primitive type. Note that some state is set dynamically on the pipeline for
+     * each draw  and thus is not included in this descriptor. This includes the viewport, scissor,
+     * and blend constant.
+     *
+     * A checksum which includes the fProgramDesc and fStateKey is included at the top of the Desc
+     * for caching purposes and faster equality checks.
+     */
+    struct Desc {
+        uint32_t                fChecksum;
+        GrVkProgramDesc         fProgramDesc;
+
+        enum {
+            kRenderPassKeyAlloc = 12, // This is typical color attachment with no stencil or msaa
+            kStencilKeyAlloc = sizeof(GrStencilSettings),
+            kDrawFaceKeyAlloc = 4,
+            kBlendingKeyAlloc = 4,
+            kPrimitiveTypeKeyAlloc = 4,
+            kPreAllocSize = kData_StateKeyOffset + kRenderPassKeyAlloc + kStencilKeyAlloc +
+                            kDrawFaceKeyAlloc + kBlendingKeyAlloc + kPrimitiveTypeKeyAlloc,
+        };
+        SkSTArray<kPreAllocSize, uint8_t, true> fStateKey;
+
+        bool operator== (const Desc& that) const {
+            if (fChecksum != that.fChecksum || fProgramDesc != that.fProgramDesc) {
+                return false;
+            }
+            // We store the keyLength at the start of fVkKey. Thus we don't have to worry about
+            // different length keys since we will fail on the comparison immediately. Therefore we
+            // just use this PipelineDesc to get the length to iterate over.
+            int keyLength = fStateKey.count();
+            SkASSERT(SkIsAlign4(keyLength));
+            int l = keyLength >> 2;
+            const uint32_t* aKey = reinterpret_cast<const uint32_t*>(fStateKey.begin());
+            const uint32_t* bKey = reinterpret_cast<const uint32_t*>(that.fStateKey.begin());
+            for (int i = 0; i < l; ++i) {
+                if (aKey[i] != bKey[i]) {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        static bool Less(const Desc& a, const Desc& b) {
+            if (a.fChecksum != b.fChecksum) {
+                return a.fChecksum < b.fChecksum ? true : false;
+            }
+            bool progDescLess = GrProgramDesc::Less(a.fProgramDesc, b.fProgramDesc);
+            if (progDescLess || a.fProgramDesc != b.fProgramDesc) {
+                return progDescLess;
+            }
+
+            int keyLength = a.fStateKey.count();
+            SkASSERT(SkIsAlign4(keyLength));
+            int l = keyLength >> 2;
+            const uint32_t* aKey = reinterpret_cast<const uint32_t*>(a.fStateKey.begin());
+            const uint32_t* bKey = reinterpret_cast<const uint32_t*>(b.fStateKey.begin());
+            for (int i = 0; i < l; ++i) {
+                if (aKey[i] != bKey[i]) {
+                    return aKey[i] < bKey[i] ? true : false;
+                }
+            }
+            return false;
+        }
+    };
+
+    const Desc& getDesc() { return fDesc; }
+
+private:
+    typedef GrVkPipelineStateDataManager::UniformInfoArray UniformInfoArray;
+    typedef GrGLSLProgramDataManager::UniformHandle UniformHandle;
+
+    GrVkPipelineState(GrVkGpu* gpu,
+                      const GrVkPipelineState::Desc&,
+                      GrVkPipeline* pipeline,
+                      VkPipelineLayout layout,
+                      VkDescriptorSetLayout dsLayout[2],
+                      const BuiltinUniformHandles& builtinUniformHandles,
+                      const UniformInfoArray& uniforms,
+                      uint32_t vertexUniformSize,
+                      uint32_t fragmentUniformSize,
+                      uint32_t numSamplers,
+                      GrGLSLPrimitiveProcessor* geometryProcessor,
+                      GrGLSLXferProcessor* xferProcessor,
+                      const GrGLSLFragProcs& fragmentProcessors);
+
+    // Each pool will manage one type of descriptor. Thus each descriptor set we use will all be of
+    // one VkDescriptorType.
+    struct DescriptorPoolManager {
+        DescriptorPoolManager(VkDescriptorSetLayout layout, VkDescriptorType type,
+                              uint32_t descCount, GrVkGpu* gpu)
+            : fDescLayout(layout)
+            , fDescType(type)
+            , fCurrentDescriptorSet(0)
+            , fPool(nullptr) {
+            SkASSERT(descCount < (SK_MaxU32 >> 2));
+            fMaxDescriptorSets = descCount << 2;
+            this->getNewPool(gpu);
+        }
+
+        ~DescriptorPoolManager() {
+            SkASSERT(!fDescLayout);
+            SkASSERT(!fPool);
+        }
+
+        void getNewDescriptorSet(GrVkGpu* gpu, VkDescriptorSet* ds);
+
+        void freeGPUResources(const GrVkGpu* gpu);
+        void abandonGPUResources();
+
+        VkDescriptorSetLayout  fDescLayout;
+        VkDescriptorType       fDescType;
+        uint32_t               fMaxDescriptorSets;
+        uint32_t               fCurrentDescriptorSet;
+        GrVkDescriptorPool*    fPool;
+
+    private:
+        void getNewPool(GrVkGpu* gpu);
+    };
+
+    void writeUniformBuffers(const GrVkGpu* gpu);
+
+    void writeSamplers(GrVkGpu* gpu, const SkTArray<const GrTextureAccess*>& textureBindings);
+
+    /**
+    * We use the RT's size and origin to adjust from Skia device space to vulkan normalized device
+    * space and to make device space positions have the correct origin for processors that require
+    * them.
+    */
+    struct RenderTargetState {
+        SkISize         fRenderTargetSize;
+        GrSurfaceOrigin fRenderTargetOrigin;
+
+        RenderTargetState() { this->invalidate(); }
+        void invalidate() {
+            fRenderTargetSize.fWidth = -1;
+            fRenderTargetSize.fHeight = -1;
+            fRenderTargetOrigin = (GrSurfaceOrigin)-1;
+        }
+
+        /**
+        * Gets a vec4 that adjusts the position from Skia device coords to Vulkans normalized device
+        * coords. Assuming the transformed position, pos, is a homogeneous vec3, the vec, v, is
+        * applied as such:
+        * pos.x = dot(v.xy, pos.xz)
+        * pos.y = dot(v.zw, pos.yz)
+        */
+        void getRTAdjustmentVec(float* destVec) {
+            destVec[0] = 2.f / fRenderTargetSize.fWidth;
+            destVec[1] = -1.f;
+            if (kBottomLeft_GrSurfaceOrigin == fRenderTargetOrigin) {
+                destVec[2] = -2.f / fRenderTargetSize.fHeight;
+                destVec[3] = 1.f;
+            } else {
+                destVec[2] = 2.f / fRenderTargetSize.fHeight;
+                destVec[3] = -1.f;
+            }
+        }
+    };
+
+    // Helper for setData() that sets the view matrix and loads the render target height uniform
+    void setRenderTargetState(const GrPipeline&);
+
+    // GrVkResources
+    GrVkPipeline* fPipeline;
+
+    // Used for binding DescriptorSets to the command buffer but does not need to survive during
+    // command buffer execution. Thus this is not need to be a GrVkResource.
+    VkPipelineLayout fPipelineLayout;
+
+    // The DescriptorSets need to survive until the gpu has finished all draws that use them.
+    // However, they will only be freed by the descriptor pool. Thus by simply keeping the
+    // descriptor pool alive through the draw, the descritor sets will also stay alive. Thus we do
+    // not need a GrVkResource versions of VkDescriptorSet. We hold on to these in the
+    // GrVkPipelineState since we update the descriptor sets and bind them at separate times;
+    VkDescriptorSet fDescriptorSets[2];
+
+    // Meta data so we know which descriptor sets we are using and need to bind.
+    int fStartDS;
+    int fDSCount;
+
+    SkAutoTDelete<GrVkUniformBuffer> fVertexUniformBuffer;
+    SkAutoTDelete<GrVkUniformBuffer> fFragmentUniformBuffer;
+
+    // GrVkResources used for sampling textures
+    SkTDArray<GrVkSampler*> fSamplers;
+    SkTDArray<const GrVkImageView*> fTextureViews;
+    SkTDArray<const GrVkImage::Resource*> fTextures;
+
+    // Tracks the current render target uniforms stored in the vertex buffer.
+    RenderTargetState fRenderTargetState;
+    BuiltinUniformHandles fBuiltinUniformHandles;
+
+    // Processors in the GrVkPipelineState
+    SkAutoTDelete<GrGLSLPrimitiveProcessor> fGeometryProcessor;
+    SkAutoTDelete<GrGLSLXferProcessor> fXferProcessor;
+    GrGLSLFragProcs fFragmentProcessors;
+
+    Desc fDesc;
+
+    GrVkPipelineStateDataManager fDataManager;
+
+    DescriptorPoolManager fSamplerPoolManager;
+    DescriptorPoolManager fUniformPoolManager;
+
+    int fNumSamplers;
+
+    friend class GrVkPipelineStateBuilder;
+};
+
+#endif
diff --git a/src/gpu/vk/GrVkProgramBuilder.cpp b/src/gpu/vk/GrVkPipelineStateBuilder.cpp
similarity index 80%
rename from src/gpu/vk/GrVkProgramBuilder.cpp
rename to src/gpu/vk/GrVkPipelineStateBuilder.cpp
index 9dbbbb8..3b077f0 100644
--- a/src/gpu/vk/GrVkProgramBuilder.cpp
+++ b/src/gpu/vk/GrVkPipelineStateBuilder.cpp
@@ -5,21 +5,21 @@
 * found in the LICENSE file.
 */
 
-#include "vk/GrVkProgramBuilder.h"
+#include "vk/GrVkPipelineStateBuilder.h"
 
 #include "vk/GrVkGpu.h"
 #include "vk/GrVkRenderPass.h"
-#include "vk/GrVkProgram.h"
 
-GrVkProgram* GrVkProgramBuilder::CreateProgram(GrVkGpu* gpu,
-                                               const GrPipeline& pipeline,
-                                               const GrPrimitiveProcessor& primProc,
-                                               GrPrimitiveType primitiveType,
-                                               const GrVkProgramDesc& desc,
-                                               const GrVkRenderPass& renderPass) {
+GrVkPipelineState* GrVkPipelineStateBuilder::CreatePipelineState(
+                                                               GrVkGpu* gpu,
+                                                               const GrPipeline& pipeline,
+                                                               const GrPrimitiveProcessor& primProc,
+                                                               GrPrimitiveType primitiveType,
+                                                               const GrVkPipelineState::Desc& desc,
+                                                               const GrVkRenderPass& renderPass) {
     // create a builder.  This will be handed off to effects so they can use it to add
     // uniforms, varyings, textures, etc
-    GrVkProgramBuilder builder(gpu, pipeline, primProc, desc);
+    GrVkPipelineStateBuilder builder(gpu, pipeline, primProc, desc.fProgramDesc);
 
     GrGLSLExpr4 inputColor;
     GrGLSLExpr4 inputCoverage;
@@ -29,27 +29,27 @@
         return nullptr;
     }
 
-    return builder.finalize(primitiveType, renderPass);
+    return builder.finalize(primitiveType, renderPass, desc);
 }
 
-GrVkProgramBuilder::GrVkProgramBuilder(GrVkGpu* gpu,
-                                       const GrPipeline& pipeline,
-                                       const GrPrimitiveProcessor& primProc,
-                                       const GrVkProgramDesc& desc)
+GrVkPipelineStateBuilder::GrVkPipelineStateBuilder(GrVkGpu* gpu,
+                                                   const GrPipeline& pipeline,
+                                                   const GrPrimitiveProcessor& primProc,
+                                                   const GrVkProgramDesc& desc)
     : INHERITED(pipeline, primProc, desc) 
     , fGpu(gpu)
     , fVaryingHandler(this) 
     , fUniformHandler(this) {
 }
 
-const GrCaps* GrVkProgramBuilder::caps() const {
+const GrCaps* GrVkPipelineStateBuilder::caps() const {
     return fGpu->caps();
 }
-const GrGLSLCaps* GrVkProgramBuilder::glslCaps() const {
+const GrGLSLCaps* GrVkPipelineStateBuilder::glslCaps() const {
     return fGpu->vkCaps().glslCaps();
 }
 
-void GrVkProgramBuilder::finalizeFragmentOutputColor(GrGLSLShaderVar& outputColor) {
+void GrVkPipelineStateBuilder::finalizeFragmentOutputColor(GrGLSLShaderVar& outputColor) {
     outputColor.setLayoutQualifier("location = 0");
 }
 
@@ -76,11 +76,11 @@
     return shaderc_glsl_fragment_shader;
 }
 
-bool GrVkProgramBuilder::CreateVkShaderModule(const GrVkGpu* gpu,
-                                              VkShaderStageFlagBits stage,
-                                              const GrGLSLShaderBuilder& builder,
-                                              VkShaderModule* shaderModule,
-                                              VkPipelineShaderStageCreateInfo* stageInfo) {
+bool GrVkPipelineStateBuilder::CreateVkShaderModule(const GrVkGpu* gpu,
+                                                    VkShaderStageFlagBits stage,
+                                                    const GrGLSLShaderBuilder& builder,
+                                                    VkShaderModule* shaderModule,
+                                                    VkPipelineShaderStageCreateInfo* stageInfo) {
     SkString shaderString;
     for (int i = 0; i < builder.fCompilerStrings.count(); ++i) {
         if (builder.fCompilerStrings[i]) {
@@ -140,8 +140,9 @@
     return true;
 }
 
-GrVkProgram* GrVkProgramBuilder::finalize(GrPrimitiveType primitiveType,
-                                          const GrVkRenderPass& renderPass) {
+GrVkPipelineState* GrVkPipelineStateBuilder::finalize(GrPrimitiveType primitiveType,
+                                                      const GrVkRenderPass& renderPass,
+                                                      const GrVkPipelineState::Desc& desc) {
     VkDescriptorSetLayout dsLayout[2];
     VkPipelineLayout pipelineLayout;
     VkShaderModule vertShaderModule;
@@ -272,16 +273,17 @@
         return nullptr;
     }
 
-    return new GrVkProgram(fGpu,
-                           pipeline,
-                           pipelineLayout,
-                           dsLayout,
-                           fUniformHandles,
-                           fUniformHandler.fUniforms,
-                           fUniformHandler.fCurrentVertexUBOOffset,
-                           fUniformHandler.fCurrentFragmentUBOOffset,
-                           numSamplers,
-                           fGeometryProcessor,
-                           fXferProcessor,
-                           fFragmentProcessors);
+    return new GrVkPipelineState(fGpu,
+                                 desc,
+                                 pipeline,
+                                 pipelineLayout,
+                                 dsLayout,
+                                 fUniformHandles,
+                                 fUniformHandler.fUniforms,
+                                 fUniformHandler.fCurrentVertexUBOOffset,
+                                 fUniformHandler.fCurrentFragmentUBOOffset,
+                                 numSamplers,
+                                 fGeometryProcessor,
+                                 fXferProcessor,
+                                 fFragmentProcessors);
 }
diff --git a/src/gpu/vk/GrVkPipelineStateBuilder.h b/src/gpu/vk/GrVkPipelineStateBuilder.h
new file mode 100644
index 0000000..751a62a
--- /dev/null
+++ b/src/gpu/vk/GrVkPipelineStateBuilder.h
@@ -0,0 +1,74 @@
+/*
+* Copyright 2016 Google Inc.
+*
+* Use of this source code is governed by a BSD-style license that can be
+* found in the LICENSE file.
+*/
+
+#ifndef GrVkPipelineStateBuilder_DEFINED
+#define GrVkPipelineStateBuilder_DEFINED
+
+#include "glsl/GrGLSLProgramBuilder.h"
+
+#include "GrPipeline.h"
+#include "GrVkPipelineState.h"
+#include "GrVkUniformHandler.h"
+#include "GrVkVaryingHandler.h"
+
+#include "shaderc/shaderc.h"
+#include "vulkan/vulkan.h"
+
+class GrVkGpu;
+class GrVkRenderPass;
+class GrVkProgramDesc;
+
+class GrVkPipelineStateBuilder : public GrGLSLProgramBuilder {
+public:
+    /** Generates a pipeline state.
+    *
+    * The GrVkPipelineState implements what is specified in the GrPipeline and GrPrimitiveProcessor
+    * as input. After successful generation, the builder result objects are available to be used.
+    * @return true if generation was successful.
+    */
+    static GrVkPipelineState* CreatePipelineState(GrVkGpu*,
+                                                  const GrPipeline&,
+                                                  const GrPrimitiveProcessor&,
+                                                  GrPrimitiveType,
+                                                  const GrVkPipelineState::Desc&,
+                                                  const GrVkRenderPass& renderPass);
+
+    const GrCaps* caps() const override;
+    const GrGLSLCaps* glslCaps() const override;
+
+    GrVkGpu* gpu() const { return fGpu; }
+
+    void finalizeFragmentOutputColor(GrGLSLShaderVar& outputColor) override;
+
+private:
+    GrVkPipelineStateBuilder(GrVkGpu*,
+                             const GrPipeline&,
+                             const GrPrimitiveProcessor&,
+                             const GrVkProgramDesc&);
+
+    GrVkPipelineState* finalize(GrPrimitiveType primitiveType,
+                                const GrVkRenderPass& renderPass,
+                                const GrVkPipelineState::Desc&);
+
+    static bool CreateVkShaderModule(const GrVkGpu* gpu,
+                                     VkShaderStageFlagBits stage,
+                                     const GrGLSLShaderBuilder& builder,
+                                     VkShaderModule* shaderModule,
+                                     VkPipelineShaderStageCreateInfo* stageInfo);
+
+    GrGLSLUniformHandler* uniformHandler() override { return &fUniformHandler; }
+    const GrGLSLUniformHandler* uniformHandler() const override { return &fUniformHandler; }
+    GrGLSLVaryingHandler* varyingHandler() override { return &fVaryingHandler; }
+
+    GrVkGpu* fGpu;
+    GrVkVaryingHandler        fVaryingHandler;
+    GrVkUniformHandler        fUniformHandler;
+
+    typedef GrGLSLProgramBuilder INHERITED;
+};
+
+#endif
diff --git a/src/gpu/vk/GrVkPipelineStateCache.cpp b/src/gpu/vk/GrVkPipelineStateCache.cpp
new file mode 100644
index 0000000..f2092af
--- /dev/null
+++ b/src/gpu/vk/GrVkPipelineStateCache.cpp
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "GrVkResourceProvider.h"
+
+#include "GrVkGpu.h"
+#include "GrProcessor.h"
+#include "GrVkPipelineState.h"
+#include "GrVkPipelineStateBuilder.h"
+#include "SkRTConf.h"
+#include "SkTSearch.h"
+#include "glsl/GrGLSLFragmentProcessor.h"
+#include "glsl/GrGLSLProgramDataManager.h"
+
+#ifdef PIPELINE_STATE_CACHE_STATS
+SK_CONF_DECLARE(bool, c_DisplayCache, "gpu.displayCache", false,
+                "Display pipeline state cache usage.");
+#endif
+
+typedef GrGLSLProgramDataManager::UniformHandle UniformHandle;
+
+struct GrVkResourceProvider::PipelineStateCache::Entry {
+
+    Entry() : fPipelineState(nullptr), fLRUStamp(0) {}
+
+    SkAutoTUnref<GrVkPipelineState> fPipelineState;
+    unsigned int                    fLRUStamp;
+};
+
+struct GrVkResourceProvider::PipelineStateCache::PipelineDescLess {
+    bool operator() (const GrVkPipelineState::Desc& desc, const Entry* entry) {
+        SkASSERT(entry->fPipelineState.get());
+        return GrVkPipelineState::Desc::Less(desc, entry->fPipelineState->getDesc());
+    }
+
+    bool operator() (const Entry* entry, const GrVkPipelineState::Desc& desc) {
+        SkASSERT(entry->fPipelineState.get());
+        return GrVkPipelineState::Desc::Less(entry->fPipelineState->getDesc(), desc);
+    }
+};
+
+GrVkResourceProvider::PipelineStateCache::PipelineStateCache(GrVkGpu* gpu)
+    : fCount(0)
+    , fCurrLRUStamp(0)
+    , fGpu(gpu)
+#ifdef PIPELINE_STATE_CACHE_STATS
+    , fTotalRequests(0)
+    , fCacheMisses(0)
+    , fHashMisses(0)
+#endif
+{
+    for (int i = 0; i < 1 << kHashBits; ++i) {
+        fHashTable[i] = nullptr;
+    }
+}
+
+GrVkResourceProvider::PipelineStateCache::~PipelineStateCache() {
+    SkASSERT(0 == fCount);
+    // dump stats
+#ifdef PIPELINE_STATE_CACHE_STATS
+    if (c_DisplayCache) {
+        SkDebugf("--- Pipeline State Cache ---\n");
+        SkDebugf("Total requests: %d\n", fTotalRequests);
+        SkDebugf("Cache misses: %d\n", fCacheMisses);
+        SkDebugf("Cache miss %%: %f\n", (fTotalRequests > 0) ?
+                 100.f * fCacheMisses / fTotalRequests :
+                 0.f);
+        int cacheHits = fTotalRequests - fCacheMisses;
+        SkDebugf("Hash miss %%: %f\n", (cacheHits > 0) ? 100.f * fHashMisses / cacheHits : 0.f);
+        SkDebugf("---------------------\n");
+    }
+#endif
+}
+
+void GrVkResourceProvider::PipelineStateCache::reset() {
+    for (int i = 0; i < fCount; ++i) {
+        delete fEntries[i];
+        fEntries[i] = nullptr;
+    }
+    fCount = 0;
+
+    // zero out hash table
+    for (int i = 0; i < 1 << kHashBits; i++) {
+        fHashTable[i] = nullptr;
+    }
+
+    fCurrLRUStamp = 0;
+}
+
+void GrVkResourceProvider::PipelineStateCache::abandon() {
+    for (int i = 0; i < fCount; ++i) {
+        SkASSERT(fEntries[i]->fPipelineState.get());
+        fEntries[i]->fPipelineState->abandonGPUResources();
+    }
+    this->reset();
+}
+
+void GrVkResourceProvider::PipelineStateCache::release() {
+    for (int i = 0; i < fCount; ++i) {
+        SkASSERT(fEntries[i]->fPipelineState.get());
+        fEntries[i]->fPipelineState->freeGPUResources(fGpu);
+    }
+    this->reset();
+}
+
+int GrVkResourceProvider::PipelineStateCache::search(const GrVkPipelineState::Desc& desc) const {
+    PipelineDescLess less;
+    return SkTSearch(fEntries, fCount, desc, sizeof(Entry*), less);
+}
+
+GrVkPipelineState* GrVkResourceProvider::PipelineStateCache::refPipelineState(
+                                                               const GrPipeline& pipeline,
+                                                               const GrPrimitiveProcessor& primProc,
+                                                               GrPrimitiveType primiteType,
+                                                               const GrVkRenderPass& renderPass) {
+#ifdef PIPELINE_STATE_CACHE_STATS
+    ++fTotalRequests;
+#endif
+
+    Entry* entry = nullptr;
+
+    // Get GrVkProgramDesc
+    GrVkPipelineState::Desc desc;
+    if (!GrVkProgramDescBuilder::Build(&desc.fProgramDesc,
+                                       primProc,
+                                       pipeline,
+                                       *fGpu->vkCaps().glslCaps())) {
+        GrCapsDebugf(fGpu->caps(), "Failed to build vk program descriptor!\n");
+        return false;
+    }
+
+    // Get vulkan specific descriptor key
+    GrVkPipelineState::BuildStateKey(pipeline, primiteType, &desc.fStateKey);
+    // Get checksum of entire PipelineDesc
+    int keyLength = desc.fStateKey.count();
+    SkASSERT(0 == (keyLength % 4));
+    // Seed the checksum with the checksum of the programDesc then add the vulkan key to it.
+    desc.fChecksum = SkChecksum::Murmur3(desc.fStateKey.begin(), keyLength,
+                                         desc.fProgramDesc.getChecksum());
+
+    uint32_t hashIdx = desc.fChecksum;
+    hashIdx ^= hashIdx >> 16;
+    if (kHashBits <= 8) {
+        hashIdx ^= hashIdx >> 8;
+    }
+    hashIdx &= ((1 << kHashBits) - 1);
+    Entry* hashedEntry = fHashTable[hashIdx];
+    if (hashedEntry && hashedEntry->fPipelineState->getDesc() == desc) {
+        SkASSERT(hashedEntry->fPipelineState);
+        entry = hashedEntry;
+    }
+
+    int entryIdx;
+    if (nullptr == entry) {
+        entryIdx = this->search(desc);
+        if (entryIdx >= 0) {
+            entry = fEntries[entryIdx];
+#ifdef PIPELINE_STATE_CACHE_STATS
+            ++fHashMisses;
+#endif
+        }
+    }
+
+    if (nullptr == entry) {
+        // We have a cache miss
+#ifdef PIPELINE_STATE_CACHE_STATS
+        ++fCacheMisses;
+#endif
+        GrVkPipelineState* pipelineState =
+            GrVkPipelineStateBuilder::CreatePipelineState(fGpu,
+                                                          pipeline,
+                                                          primProc,
+                                                          primiteType,
+                                                          desc,
+                                                          renderPass);
+        if (nullptr == pipelineState) {
+            return nullptr;
+        }
+        int purgeIdx = 0;
+        if (fCount < kMaxEntries) {
+            entry = new Entry;
+            purgeIdx = fCount++;
+            fEntries[purgeIdx] = entry;
+        } else {
+            SkASSERT(fCount == kMaxEntries);
+            purgeIdx = 0;
+            for (int i = 1; i < kMaxEntries; ++i) {
+                if (fEntries[i]->fLRUStamp < fEntries[purgeIdx]->fLRUStamp) {
+                    purgeIdx = i;
+                }
+            }
+            entry = fEntries[purgeIdx];
+            int purgedHashIdx = entry->fPipelineState->getDesc().fChecksum & ((1 << kHashBits) - 1);
+            if (fHashTable[purgedHashIdx] == entry) {
+                fHashTable[purgedHashIdx] = nullptr;
+            }
+            entry->fPipelineState->freeGPUResources(fGpu);
+        }
+        SkASSERT(fEntries[purgeIdx] == entry);
+        entry->fPipelineState.reset(pipelineState);
+        // We need to shift fEntries around so that the entry currently at purgeIdx is placed
+        // just before the entry at ~entryIdx (in order to keep fEntries sorted by descriptor).
+        entryIdx = ~entryIdx;
+        if (entryIdx < purgeIdx) {
+            //  Let E and P be the entries at index entryIdx and purgeIdx, respectively.
+            //  If the entries array looks like this:
+            //       aaaaEbbbbbPccccc
+            //  we rearrange it to look like this:
+            //       aaaaPEbbbbbccccc
+            size_t copySize = (purgeIdx - entryIdx) * sizeof(Entry*);
+            memmove(fEntries + entryIdx + 1, fEntries + entryIdx, copySize);
+            fEntries[entryIdx] = entry;
+        } else if (purgeIdx < entryIdx) {
+            //  If the entries array looks like this:
+            //       aaaaPbbbbbEccccc
+            //  we rearrange it to look like this:
+            //       aaaabbbbbPEccccc
+            size_t copySize = (entryIdx - purgeIdx - 1) * sizeof(Entry*);
+            memmove(fEntries + purgeIdx, fEntries + purgeIdx + 1, copySize);
+            fEntries[entryIdx - 1] = entry;
+        }
+#ifdef SK_DEBUG
+        SkASSERT(fEntries[0]->fPipelineState.get());
+        for (int i = 0; i < fCount - 1; ++i) {
+            SkASSERT(fEntries[i + 1]->fPipelineState.get());
+            const GrVkPipelineState::Desc& a = fEntries[i]->fPipelineState->getDesc();
+            const GrVkPipelineState::Desc& b = fEntries[i + 1]->fPipelineState->getDesc();
+            SkASSERT(GrVkPipelineState::Desc::Less(a, b));
+            SkASSERT(!GrVkPipelineState::Desc::Less(b, a));
+        }
+#endif
+    }
+
+    fHashTable[hashIdx] = entry;
+    entry->fLRUStamp = fCurrLRUStamp;
+
+    if (SK_MaxU32 == fCurrLRUStamp) {
+        // wrap around! just trash our LRU, one time hit.
+        for (int i = 0; i < fCount; ++i) {
+            fEntries[i]->fLRUStamp = 0;
+        }
+    }
+    ++fCurrLRUStamp;
+    return SkRef(entry->fPipelineState.get());
+}
diff --git a/src/gpu/vk/GrVkProgramDataManager.cpp b/src/gpu/vk/GrVkPipelineStateDataManager.cpp
similarity index 75%
rename from src/gpu/vk/GrVkProgramDataManager.cpp
rename to src/gpu/vk/GrVkPipelineStateDataManager.cpp
index e798cd7..76ad483 100644
--- a/src/gpu/vk/GrVkProgramDataManager.cpp
+++ b/src/gpu/vk/GrVkPipelineStateDataManager.cpp
@@ -5,14 +5,14 @@
 * found in the LICENSE file.
 */
 
-#include "GrVkProgramDataManager.h"
+#include "GrVkPipelineStateDataManager.h"
 
 #include "GrVkGpu.h"
 #include "GrVkUniformBuffer.h"
 
-GrVkProgramDataManager::GrVkProgramDataManager(const UniformInfoArray& uniforms,
-                                               uint32_t vertexUniformSize,
-                                               uint32_t fragmentUniformSize)
+GrVkPipelineStateDataManager::GrVkPipelineStateDataManager(const UniformInfoArray& uniforms,
+                                                           uint32_t vertexUniformSize,
+                                                           uint32_t fragmentUniformSize)
     : fVertexUniformSize(vertexUniformSize)
     , fFragmentUniformSize(fragmentUniformSize)
     , fVertexUniformsDirty(false) 
@@ -40,7 +40,7 @@
     }
 }
 
-void* GrVkProgramDataManager::getBufferPtrAndMarkDirty(const Uniform& uni) const {
+void* GrVkPipelineStateDataManager::getBufferPtrAndMarkDirty(const Uniform& uni) const {
     void* buffer;
     if (GrVkUniformHandler::kVertexBinding == uni.fBinding) {
         buffer = fVertexUniformData.get();
@@ -55,7 +55,7 @@
     return buffer;
 }
 
-void GrVkProgramDataManager::set1f(UniformHandle u, float v0) const {
+void GrVkPipelineStateDataManager::set1f(UniformHandle u, float v0) const {
     const Uniform& uni = fUniforms[u.toIndex()];
     SkASSERT(uni.fType == kFloat_GrSLType);
     SkASSERT(GrGLSLShaderVar::kNonArray == uni.fArrayCount);
@@ -65,9 +65,9 @@
     memcpy(buffer, &v0, sizeof(float));
 }
 
-void GrVkProgramDataManager::set1fv(UniformHandle u,
-                                    int arrayCount,
-                                    const float v[]) const {
+void GrVkPipelineStateDataManager::set1fv(UniformHandle u,
+                                          int arrayCount,
+                                          const float v[]) const {
     const Uniform& uni = fUniforms[u.toIndex()];
     SkASSERT(uni.fType == kFloat_GrSLType);
     SkASSERT(arrayCount > 0);
@@ -84,7 +84,7 @@
     }
 }
 
-void GrVkProgramDataManager::set2f(UniformHandle u, float v0, float v1) const {
+void GrVkPipelineStateDataManager::set2f(UniformHandle u, float v0, float v1) const {
     const Uniform& uni = fUniforms[u.toIndex()];
     SkASSERT(uni.fType == kVec2f_GrSLType);
     SkASSERT(GrGLSLShaderVar::kNonArray == uni.fArrayCount);
@@ -95,9 +95,9 @@
     memcpy(buffer, v, 2 * sizeof(float));
 }
 
-void GrVkProgramDataManager::set2fv(UniformHandle u,
-                                    int arrayCount,
-                                    const float v[]) const {
+void GrVkPipelineStateDataManager::set2fv(UniformHandle u,
+                                          int arrayCount,
+                                          const float v[]) const {
     const Uniform& uni = fUniforms[u.toIndex()];
     SkASSERT(uni.fType == kVec2f_GrSLType);
     SkASSERT(arrayCount > 0);
@@ -114,7 +114,7 @@
     }
 }
 
-void GrVkProgramDataManager::set3f(UniformHandle u, float v0, float v1, float v2) const {
+void GrVkPipelineStateDataManager::set3f(UniformHandle u, float v0, float v1, float v2) const {
     const Uniform& uni = fUniforms[u.toIndex()];
     SkASSERT(uni.fType == kVec3f_GrSLType);
     SkASSERT(GrGLSLShaderVar::kNonArray == uni.fArrayCount);
@@ -125,9 +125,9 @@
     memcpy(buffer, v, 3 * sizeof(float));
 }
 
-void GrVkProgramDataManager::set3fv(UniformHandle u,
-                                    int arrayCount,
-                                    const float v[]) const {
+void GrVkPipelineStateDataManager::set3fv(UniformHandle u,
+                                          int arrayCount,
+                                          const float v[]) const {
     const Uniform& uni = fUniforms[u.toIndex()];
     SkASSERT(uni.fType == kVec3f_GrSLType);
     SkASSERT(arrayCount > 0);
@@ -144,7 +144,11 @@
     }
 }
 
-void GrVkProgramDataManager::set4f(UniformHandle u, float v0, float v1, float v2, float v3) const {
+void GrVkPipelineStateDataManager::set4f(UniformHandle u,
+                                         float v0,
+                                         float v1,
+                                         float v2,
+                                         float v3) const {
     const Uniform& uni = fUniforms[u.toIndex()];
     SkASSERT(uni.fType == kVec4f_GrSLType);
     SkASSERT(GrGLSLShaderVar::kNonArray == uni.fArrayCount);
@@ -155,9 +159,9 @@
     memcpy(buffer, v, 4 * sizeof(float));
 }
 
-void GrVkProgramDataManager::set4fv(UniformHandle u,
-                                    int arrayCount,
-                                    const float v[]) const {
+void GrVkPipelineStateDataManager::set4fv(UniformHandle u,
+                                          int arrayCount,
+                                          const float v[]) const {
     const Uniform& uni = fUniforms[u.toIndex()];
     SkASSERT(uni.fType == kVec4f_GrSLType);
     SkASSERT(arrayCount > 0);
@@ -170,35 +174,41 @@
     memcpy(buffer, v, arrayCount * 4 * sizeof(float));
 }
 
-void GrVkProgramDataManager::setMatrix2f(UniformHandle u, const float matrix[]) const {
+void GrVkPipelineStateDataManager::setMatrix2f(UniformHandle u, const float matrix[]) const {
     this->setMatrices<2>(u, 1, matrix);
 }
 
-void GrVkProgramDataManager::setMatrix2fv(UniformHandle u, int arrayCount, const float m[]) const {
+void GrVkPipelineStateDataManager::setMatrix2fv(UniformHandle u,
+                                                int arrayCount,
+                                                const float m[]) const {
     this->setMatrices<2>(u, arrayCount, m);
 }
 
-void GrVkProgramDataManager::setMatrix3f(UniformHandle u, const float matrix[]) const {
+void GrVkPipelineStateDataManager::setMatrix3f(UniformHandle u, const float matrix[]) const {
     this->setMatrices<3>(u, 1, matrix);
 }
 
-void GrVkProgramDataManager::setMatrix3fv(UniformHandle u, int arrayCount, const float m[]) const {
+void GrVkPipelineStateDataManager::setMatrix3fv(UniformHandle u,
+                                                int arrayCount,
+                                                const float m[]) const {
     this->setMatrices<3>(u, arrayCount, m);
 }
 
-void GrVkProgramDataManager::setMatrix4f(UniformHandle u, const float matrix[]) const {
+void GrVkPipelineStateDataManager::setMatrix4f(UniformHandle u, const float matrix[]) const {
     this->setMatrices<4>(u, 1, matrix);
 }
 
-void GrVkProgramDataManager::setMatrix4fv(UniformHandle u, int arrayCount, const float m[]) const {
+void GrVkPipelineStateDataManager::setMatrix4fv(UniformHandle u,
+                                                int arrayCount,
+                                                const float m[]) const {
     this->setMatrices<4>(u, arrayCount, m);
 }
 
 template<int N> struct set_uniform_matrix;
 
-template<int N> inline void GrVkProgramDataManager::setMatrices(UniformHandle u,
-                                                                int arrayCount,
-                                                                const float matrices[]) const {
+template<int N> inline void GrVkPipelineStateDataManager::setMatrices(UniformHandle u,
+                                                                      int arrayCount,
+                                                                     const float matrices[]) const {
     const Uniform& uni = fUniforms[u.toIndex()];
     SkASSERT(uni.fType == kMat22f_GrSLType + (N - 2));
     SkASSERT(arrayCount > 0);
@@ -243,9 +253,9 @@
     }
 };
 
-void GrVkProgramDataManager::uploadUniformBuffers(const GrVkGpu* gpu,
-                                                  GrVkUniformBuffer* vertexBuffer,
-                                                  GrVkUniformBuffer* fragmentBuffer) const {
+void GrVkPipelineStateDataManager::uploadUniformBuffers(const GrVkGpu* gpu,
+                                                        GrVkUniformBuffer* vertexBuffer,
+                                                        GrVkUniformBuffer* fragmentBuffer) const {
     if (vertexBuffer && fVertexUniformsDirty) {
         vertexBuffer->addMemoryBarrier(gpu,
                                        VK_ACCESS_UNIFORM_READ_BIT,
diff --git a/src/gpu/vk/GrVkProgramDataManager.h b/src/gpu/vk/GrVkPipelineStateDataManager.h
similarity index 88%
rename from src/gpu/vk/GrVkProgramDataManager.h
rename to src/gpu/vk/GrVkPipelineStateDataManager.h
index bd0d688..b75d65c 100644
--- a/src/gpu/vk/GrVkProgramDataManager.h
+++ b/src/gpu/vk/GrVkPipelineStateDataManager.h
@@ -5,8 +5,8 @@
 * found in the LICENSE file.
 */
 
-#ifndef GrVkProgramDataManager_DEFINED
-#define GrVkProgramDataManager_DEFINED
+#ifndef GrVkPipelineStateDataManager_DEFINED
+#define GrVkPipelineStateDataManager_DEFINED
 
 #include "glsl/GrGLSLProgramDataManager.h"
 
@@ -15,13 +15,13 @@
 class GrVkGpu;
 class GrVkUniformBuffer;
 
-class GrVkProgramDataManager : public GrGLSLProgramDataManager {
+class GrVkPipelineStateDataManager : public GrGLSLProgramDataManager {
 public:
     typedef GrVkUniformHandler::UniformInfoArray UniformInfoArray;
 
-    GrVkProgramDataManager(const UniformInfoArray&,
-                           uint32_t vertexUniformSize,
-                           uint32_t fragmentUniformSize);
+    GrVkPipelineStateDataManager(const UniformInfoArray&,
+                                 uint32_t vertexUniformSize,
+                                 uint32_t fragmentUniformSize);
 
     void set1f(UniformHandle, float v0) const override;
     void set1fv(UniformHandle, int arrayCount, const float v[]) const override;
diff --git a/src/gpu/vk/GrVkProgram.h b/src/gpu/vk/GrVkProgram.h
deleted file mode 100644
index 23a9713..0000000
--- a/src/gpu/vk/GrVkProgram.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Copyright 2015 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-
-#ifndef GrVkProgram_DEFINED
-#define GrVkProgram_DEFINED
-
-#include "GrVkImage.h"
-#include "GrVkProgramDesc.h"
-#include "GrVkProgramDataManager.h"
-#include "glsl/GrGLSLProgramBuilder.h"
-
-#include "vulkan/vulkan.h"
-
-class GrPipeline;
-class GrVkCommandBuffer;
-class GrVkDescriptorPool;
-class GrVkGpu;
-class GrVkImageView;
-class GrVkPipeline;
-class GrVkSampler;
-class GrVkUniformBuffer;
-
-class GrVkProgram : public SkRefCnt {
-public:
-    typedef GrGLSLProgramBuilder::BuiltinUniformHandles BuiltinUniformHandles;
-
-    ~GrVkProgram();
-
-    GrVkPipeline* vkPipeline() const { return fPipeline; }
-
-    void setData(GrVkGpu*, const GrPrimitiveProcessor&, const GrPipeline&);
-
-    void bind(const GrVkGpu* gpu, GrVkCommandBuffer* commandBuffer);
-
-    void addUniformResources(GrVkCommandBuffer&);
-
-    void freeGPUResources(const GrVkGpu* gpu);
-
-    // This releases resources the only a given instance of a GrVkProgram needs to hold onto and do
-    // don't need to survive across new uses of the program.
-    void freeTempResources(const GrVkGpu* gpu);
-
-    void abandonGPUResources();
-
-private:
-    typedef GrVkProgramDataManager::UniformInfoArray UniformInfoArray;
-    typedef GrGLSLProgramDataManager::UniformHandle UniformHandle;
-
-    GrVkProgram(GrVkGpu* gpu,
-                GrVkPipeline* pipeline,
-                VkPipelineLayout layout,
-                VkDescriptorSetLayout dsLayout[2],
-                const BuiltinUniformHandles& builtinUniformHandles,
-                const UniformInfoArray& uniforms,
-                uint32_t vertexUniformSize,
-                uint32_t fragmentUniformSize,
-                uint32_t numSamplers,
-                GrGLSLPrimitiveProcessor* geometryProcessor,
-                GrGLSLXferProcessor* xferProcessor,
-                const GrGLSLFragProcs& fragmentProcessors);
-
-    // Each pool will manage one type of descriptor. Thus each descriptor set we use will all be of
-    // one VkDescriptorType.
-    struct DescriptorPoolManager {
-        DescriptorPoolManager(VkDescriptorSetLayout layout, VkDescriptorType type,
-                              uint32_t descCount, GrVkGpu* gpu)
-            : fDescLayout(layout)
-            , fDescType(type)
-            , fCurrentDescriptorSet(0)
-            , fPool(nullptr) {
-            SkASSERT(descCount < (SK_MaxU32 >> 2));
-            fMaxDescriptorSets = descCount << 2;
-            this->getNewPool(gpu);
-        }
-
-        ~DescriptorPoolManager() {
-            SkASSERT(!fDescLayout);
-            SkASSERT(!fPool);
-        }
-
-        void getNewDescriptorSet(GrVkGpu* gpu, VkDescriptorSet* ds);
-
-        void freeGPUResources(const GrVkGpu* gpu);
-        void abandonGPUResources();
-
-        VkDescriptorSetLayout  fDescLayout;
-        VkDescriptorType       fDescType;
-        uint32_t               fMaxDescriptorSets;
-        uint32_t               fCurrentDescriptorSet;
-        GrVkDescriptorPool*    fPool;
-
-    private:
-        void getNewPool(GrVkGpu* gpu);
-    };
-
-    void writeUniformBuffers(const GrVkGpu* gpu);
-
-    void writeSamplers(GrVkGpu* gpu, const SkTArray<const GrTextureAccess*>& textureBindings);
-
-
-    /**
-    * We use the RT's size and origin to adjust from Skia device space to OpenGL normalized device
-    * space and to make device space positions have the correct origin for processors that require
-    * them.
-    */
-    struct RenderTargetState {
-        SkISize         fRenderTargetSize;
-        GrSurfaceOrigin fRenderTargetOrigin;
-
-        RenderTargetState() { this->invalidate(); }
-        void invalidate() {
-            fRenderTargetSize.fWidth = -1;
-            fRenderTargetSize.fHeight = -1;
-            fRenderTargetOrigin = (GrSurfaceOrigin)-1;
-        }
-
-        /**
-        * Gets a vec4 that adjusts the position from Skia device coords to GL's normalized device
-        * coords. Assuming the transformed position, pos, is a homogeneous vec3, the vec, v, is
-        * applied as such:
-        * pos.x = dot(v.xy, pos.xz)
-        * pos.y = dot(v.zw, pos.yz)
-        */
-        void getRTAdjustmentVec(float* destVec) {
-            destVec[0] = 2.f / fRenderTargetSize.fWidth;
-            destVec[1] = -1.f;
-            if (kBottomLeft_GrSurfaceOrigin == fRenderTargetOrigin) {
-                destVec[2] = -2.f / fRenderTargetSize.fHeight;
-                destVec[3] = 1.f;
-            } else {
-                destVec[2] = 2.f / fRenderTargetSize.fHeight;
-                destVec[3] = -1.f;
-            }
-        }
-    };
-
-    // Helper for setData() that sets the view matrix and loads the render target height uniform
-    void setRenderTargetState(const GrPipeline&);
-
-    // GrVkResources
-    GrVkPipeline*       fPipeline;
-
-    // Used for binding DescriptorSets to the command buffer but does not need to survive during
-    // command buffer execution. Thus this is not need to be a GrVkResource.
-    VkPipelineLayout fPipelineLayout;
-
-    // The DescriptorSets need to survive until the gpu has finished all draws that use them.
-    // However, they will only be freed by the descriptor pool. Thus by simply keeping the
-    // descriptor pool alive through the draw, the descritor sets will also stay alive. Thus we do
-    // not need a GrVkResource versions of VkDescriptorSet. We hold on to these in the program since
-    // we update the descriptor sets and bind them at separate times;
-    VkDescriptorSet       fDescriptorSets[2];
-
-    // Meta data so we know which descriptor sets we are using and need to bind.
-    int                   fStartDS;
-    int                   fDSCount;
-
-    SkAutoTDelete<GrVkUniformBuffer>    fVertexUniformBuffer;
-    SkAutoTDelete<GrVkUniformBuffer>    fFragmentUniformBuffer;
-
-    // GrVkResources used for sampling textures
-    SkTDArray<GrVkSampler*>                fSamplers;
-    SkTDArray<const GrVkImageView*>        fTextureViews;
-    SkTDArray<const GrVkImage::Resource*>  fTextures;
-
-    // Tracks the current render target uniforms stored in the vertex buffer.
-    RenderTargetState fRenderTargetState;
-    BuiltinUniformHandles fBuiltinUniformHandles;
-
-    // Processors in the program
-    SkAutoTDelete<GrGLSLPrimitiveProcessor> fGeometryProcessor;
-    SkAutoTDelete<GrGLSLXferProcessor> fXferProcessor;
-    GrGLSLFragProcs fFragmentProcessors;
-
-    GrVkProgramDataManager fProgramDataManager;
-
-    DescriptorPoolManager  fSamplerPoolManager;
-    DescriptorPoolManager  fUniformPoolManager;
-
-    int fNumSamplers;
-
-    friend class GrVkProgramBuilder;
-};
-
-#endif
diff --git a/src/gpu/vk/GrVkProgramBuilder.h b/src/gpu/vk/GrVkProgramBuilder.h
deleted file mode 100644
index 65fe546..0000000
--- a/src/gpu/vk/GrVkProgramBuilder.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
-* Copyright 2016 Google Inc.
-*
-* Use of this source code is governed by a BSD-style license that can be
-* found in the LICENSE file.
-*/
-
-#ifndef GrVkProgramBuilder_DEFINED
-#define GrVkProgramBuilder_DEFINED
-
-#include "glsl/GrGLSLProgramBuilder.h"
-
-#include "GrPipeline.h"
-#include "vk/GrVkUniformHandler.h"
-#include "vk/GrVkVaryingHandler.h"
-
-#include "shaderc/shaderc.h"
-#include "vulkan/vulkan.h"
-
-class GrVkGpu;
-class GrVkRenderPass;
-class GrVkProgram;
-class GrVkProgramDesc;
-
-class GrVkProgramBuilder : public GrGLSLProgramBuilder {
-public:
-    /** Generates a shader program.
-    *
-    * The program implements what is specified in the stages given as input.
-    * After successful generation, the builder result objects are available
-    * to be used.
-    * @return true if generation was successful.
-    */
-    static GrVkProgram* CreateProgram(GrVkGpu*,
-                                      const GrPipeline&,
-                                      const GrPrimitiveProcessor&,
-                                      GrPrimitiveType,
-                                      const GrVkProgramDesc&,
-                                      const GrVkRenderPass& renderPass);
-
-    const GrCaps* caps() const override;
-    const GrGLSLCaps* glslCaps() const override;
-
-    GrVkGpu* gpu() const { return fGpu; }
-
-    void finalizeFragmentOutputColor(GrGLSLShaderVar& outputColor) override;
-
-private:
-    GrVkProgramBuilder(GrVkGpu*,
-                       const GrPipeline&,
-                       const GrPrimitiveProcessor&,
-                       const GrVkProgramDesc&);
-
-    GrVkProgram* finalize(GrPrimitiveType primitiveType, const GrVkRenderPass& renderPass);
-
-    static bool CreateVkShaderModule(const GrVkGpu* gpu,
-                                     VkShaderStageFlagBits stage,
-                                     const GrGLSLShaderBuilder& builder,
-                                     VkShaderModule* shaderModule,
-                                     VkPipelineShaderStageCreateInfo* stageInfo);
-
-    GrGLSLUniformHandler* uniformHandler() override { return &fUniformHandler; }
-    const GrGLSLUniformHandler* uniformHandler() const override { return &fUniformHandler; }
-    GrGLSLVaryingHandler* varyingHandler() override { return &fVaryingHandler; }
-
-    GrVkGpu* fGpu;
-    GrVkVaryingHandler        fVaryingHandler;
-    GrVkUniformHandler        fUniformHandler;
-
-    typedef GrGLSLProgramBuilder INHERITED;
-};
-
-#endif
diff --git a/src/gpu/vk/GrVkRenderPass.cpp b/src/gpu/vk/GrVkRenderPass.cpp
index 958d604..3538776 100644
--- a/src/gpu/vk/GrVkRenderPass.cpp
+++ b/src/gpu/vk/GrVkRenderPass.cpp
@@ -7,6 +7,7 @@
 
 #include "GrVkRenderPass.h"
 
+#include "GrProcessor.h"
 #include "GrVkFramebuffer.h"
 #include "GrVkGpu.h"
 #include "GrVkRenderTarget.h"
@@ -218,3 +219,20 @@
 
     return true;
 }
+
+void GrVkRenderPass::genKey(GrProcessorKeyBuilder* b) const {
+    b->add32(fAttachmentFlags);
+    if (fAttachmentFlags & kColor_AttachmentFlag) {
+        b->add32(fAttachmentsDescriptor.fColor.fFormat);
+        b->add32(fAttachmentsDescriptor.fColor.fSamples);
+    }
+    if (fAttachmentFlags & kResolve_AttachmentFlag) {
+        b->add32(fAttachmentsDescriptor.fResolve.fFormat);
+        b->add32(fAttachmentsDescriptor.fResolve.fSamples);
+    }
+    if (fAttachmentFlags & kStencil_AttachmentFlag) {
+        b->add32(fAttachmentsDescriptor.fStencil.fFormat);
+        b->add32(fAttachmentsDescriptor.fStencil.fSamples);
+    }
+}
+
diff --git a/src/gpu/vk/GrVkRenderPass.h b/src/gpu/vk/GrVkRenderPass.h
index 2f7e287..1068ada 100644
--- a/src/gpu/vk/GrVkRenderPass.h
+++ b/src/gpu/vk/GrVkRenderPass.h
@@ -14,6 +14,7 @@
 
 #include "vulkan/vulkan.h"
 
+class GrProcessorKeyBuilder;
 class GrVkGpu;
 class GrVkRenderTarget;
 
@@ -72,6 +73,8 @@
 
     VkRenderPass vkRenderPass() const { return fRenderPass; }
 
+    void genKey(GrProcessorKeyBuilder* b) const;
+
 private:
     GrVkRenderPass(const GrVkRenderPass&);
     GrVkRenderPass& operator=(const GrVkRenderPass&);
diff --git a/src/gpu/vk/GrVkResourceProvider.cpp b/src/gpu/vk/GrVkResourceProvider.cpp
index 3e0dfa0..8bdb946 100644
--- a/src/gpu/vk/GrVkResourceProvider.cpp
+++ b/src/gpu/vk/GrVkResourceProvider.cpp
@@ -21,11 +21,13 @@
 
 GrVkResourceProvider::GrVkResourceProvider(GrVkGpu* gpu) : fGpu(gpu)
                                                          , fPipelineCache(VK_NULL_HANDLE) {
+    fPipelineStateCache = new PipelineStateCache(gpu);
 }
 
 GrVkResourceProvider::~GrVkResourceProvider() {
     SkASSERT(0 == fSimpleRenderPasses.count());
     SkASSERT(VK_NULL_HANDLE == fPipelineCache);
+    delete fPipelineStateCache;
 }
 
 void GrVkResourceProvider::init() {
@@ -94,6 +96,14 @@
     return sampler;
 }
 
+GrVkPipelineState* GrVkResourceProvider::findOrCreateCompatiblePipelineState(
+                                                                 const GrPipeline& pipeline,
+                                                                 const GrPrimitiveProcessor& proc,
+                                                                 GrPrimitiveType primitiveType,
+                                                                 const GrVkRenderPass& renderPass) {
+    return fPipelineStateCache->refPipelineState(pipeline, proc, primitiveType, renderPass);
+}
+
 GrVkCommandBuffer* GrVkResourceProvider::createCommandBuffer() {
     GrVkCommandBuffer* cmdBuffer = GrVkCommandBuffer::Create(fGpu, fGpu->cmdPool());
     fActiveCommandBuffers.push_back(cmdBuffer);
@@ -132,6 +142,8 @@
     }
     fSamplers.reset();
 
+    fPipelineStateCache->release();
+
 #ifdef SK_TRACE_VK_RESOURCES
     SkASSERT(0 == GrVkResource::fTrace.count());
 #endif
@@ -160,6 +172,8 @@
     }
     fSamplers.reset();
 
+    fPipelineStateCache->abandon();
+
 #ifdef SK_TRACE_VK_RESOURCES
     SkASSERT(0 == GrVkResource::fTrace.count());
 #endif
diff --git a/src/gpu/vk/GrVkResourceProvider.h b/src/gpu/vk/GrVkResourceProvider.h
index 66a2556..5e2d172 100644
--- a/src/gpu/vk/GrVkResourceProvider.h
+++ b/src/gpu/vk/GrVkResourceProvider.h
@@ -8,7 +8,9 @@
 #ifndef GrVkResourceProvider_DEFINED
 #define GrVkResourceProvider_DEFINED
 
+#include "GrGpu.h"
 #include "GrVkDescriptorPool.h"
+#include "GrVkPipelineState.h"
 #include "GrVkResource.h"
 #include "GrVkUtil.h"
 #include "SkTArray.h"
@@ -61,6 +63,11 @@
     // The refcount is incremented and a pointer returned.
     GrVkSampler* findOrCreateCompatibleSampler(const GrTextureParams&);
 
+    GrVkPipelineState* findOrCreateCompatiblePipelineState(const GrPipeline&,
+                                                           const GrPrimitiveProcessor&,
+                                                           GrPrimitiveType,
+                                                           const GrVkRenderPass& renderPass);
+
     // Destroy any cached resources. To be called before destroying the VkDevice.
     // The assumption is that all queues are idle and all command buffers are finished.
     // For resource tracing to work properly, this should be called after unrefing all other
@@ -73,6 +80,52 @@
     void abandonResources();
 
 private:
+    class PipelineStateCache : public ::SkNoncopyable {
+    public:
+        PipelineStateCache(GrVkGpu* gpu);
+        ~PipelineStateCache();
+
+        void abandon();
+        void release();
+        GrVkPipelineState* refPipelineState(const GrPipeline&,
+                                            const GrPrimitiveProcessor&,
+                                            GrPrimitiveType,
+                                            const GrVkRenderPass& renderPass);
+
+    private:
+        enum {
+            // We may actually have kMaxEntries+1 PipelineStates in context because we create a new
+            // PipelineState before evicting from the cache.
+            kMaxEntries = 128,
+            kHashBits = 6,
+        };
+
+        struct Entry;
+
+        struct PipelineDescLess;
+
+        void reset();
+
+        // binary search for entry matching desc. returns index into fEntries that matches desc or ~
+        // of the index of where it should be inserted.
+        int search(const GrVkPipelineState::Desc& desc) const;
+
+        // sorted array of all the entries
+        Entry*                      fEntries[kMaxEntries];
+        // hash table based on lowest kHashBits bits of the pipeline state key. Used to avoid binary
+        // searching fEntries.
+        Entry*                      fHashTable[1 << kHashBits];
+
+        int                         fCount;
+        unsigned int                fCurrLRUStamp;
+        GrVkGpu*                    fGpu;
+#ifdef PIPELINE_STATE_CACHE_STATS
+        int                         fTotalRequests;
+        int                         fCacheMisses;
+        int                         fHashMisses; // cache hit but hash table missed
+#endif
+    };
+
     GrVkGpu* fGpu;
 
     // Central cache for creating pipelines
@@ -86,8 +139,11 @@
     SkSTArray<4, GrVkCommandBuffer*> fActiveCommandBuffers;
 
     // Stores GrVkSampler objects that we've already created so we can reuse them across multiple
-    // programs
+    // GrVkPipelineStates
     SkTDynamicHash<GrVkSampler, uint8_t> fSamplers;
+
+    // Cache of GrVkPipelineStates
+    PipelineStateCache* fPipelineStateCache;
 };
 
 #endif
diff --git a/src/gpu/vk/GrVkUniformHandler.h b/src/gpu/vk/GrVkUniformHandler.h
index 37bde33..8a3f314 100644
--- a/src/gpu/vk/GrVkUniformHandler.h
+++ b/src/gpu/vk/GrVkUniformHandler.h
@@ -77,7 +77,7 @@
     uint32_t         fCurrentFragmentUBOOffset;
     uint32_t         fCurrentSamplerBinding;
 
-    friend class GrVkProgramBuilder;
+    friend class GrVkPipelineStateBuilder;
 
     typedef GrGLSLUniformHandler INHERITED;
 };
diff --git a/src/gpu/vk/GrVkVaryingHandler.h b/src/gpu/vk/GrVkVaryingHandler.h
index d471941..cebf455 100644
--- a/src/gpu/vk/GrVkVaryingHandler.h
+++ b/src/gpu/vk/GrVkVaryingHandler.h
@@ -19,9 +19,9 @@
 private:
     void onFinalize() override;
 
-    friend class GrVkProgramBuilder;
+    friend class GrVkPipelineStateBuilder;
 
     typedef GrGLSLVaryingHandler INHERITED;
 };
 
-#endif
\ No newline at end of file
+#endif