Write direct to RT/Tex when possible, use BGRA rather than RGBA intermediate texture on ANGLE

Review URL: http://codereview.appspot.com/5417046/




git-svn-id: http://skia.googlecode.com/svn/trunk@2714 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/include/gpu/GrGLConfig.h b/include/gpu/GrGLConfig.h
index ea7bcf3..cfb5141 100644
--- a/include/gpu/GrGLConfig.h
+++ b/include/gpu/GrGLConfig.h
@@ -78,9 +78,9 @@
  * The GrGLInterface field fCallback specifies the function ptr and there is an
  * additional field fCallbackData of type intptr_t for client data.
  *
- * GR_GL_RGBA_8888_READBACK_SLOW: Set this to 1 if it is known that performing
- * glReadPixels with format=GL_RGBA, type=GL_UNISIGNED_BYTE is significantly 
- * slower than format=GL_BGRA, type=GL_UNISIGNED_BYTE.
+ * GR_GL_RGBA_8888_PIXEL_OPS_SLOW: Set this to 1 if it is known that performing
+ * glReadPixels / glTex(Sub)Image with format=GL_RGBA, type=GL_UNISIGNED_BYTE is
+ * significantly slower than format=GL_BGRA, type=GL_UNISIGNED_BYTE.
  */
 
 #if !defined(GR_GL_LOG_CALLS)
@@ -115,8 +115,8 @@
     #define GR_GL_PER_GL_FUNC_CALLBACK          0
 #endif
 
-#if !defined(GR_GL_RGBA_8888_READBACK_SLOW)
-    #define GR_GL_RGBA_8888_READBACK_SLOW       0
+#if !defined(GR_GL_RGBA_8888_PIXEL_OPS_SLOW)
+    #define GR_GL_RGBA_8888_PIXEL_OPS_SLOW      0
 #endif
 
 #if(GR_GL_NO_CONSTANT_ATTRIBUTES) && (GR_GL_ATTRIBUTE_MATRICES)
diff --git a/include/gpu/GrGLConfig_chrome.h b/include/gpu/GrGLConfig_chrome.h
index 79324ab..72d330a 100644
--- a/include/gpu/GrGLConfig_chrome.h
+++ b/include/gpu/GrGLConfig_chrome.h
@@ -14,8 +14,8 @@
 // ANGLE creates a temp VB for vertex attributes not specified per-vertex.
 #define GR_GL_NO_CONSTANT_ATTRIBUTES    GR_WIN32_BUILD
 
-// RGBA Readbacks are a slow path in ANGLE
-#define GR_GL_RGBA_8888_READBACK_SLOW   GR_WIN32_BUILD
+// For RGBA teximage/readpixels ANGLE will sw-convert to/from BGRA.
+#define GR_GL_RGBA_8888_PIXEL_OPS_SLOW  GR_WIN32_BUILD
 
 // cmd buffer allocates memory and memsets it to zero when it sees glBufferData
 // with NULL.
diff --git a/src/gpu/GrContext.cpp b/src/gpu/GrContext.cpp
index fe85b97..3a26d2f 100644
--- a/src/gpu/GrContext.cpp
+++ b/src/gpu/GrContext.cpp
@@ -1803,6 +1803,25 @@
     // TODO: when underlying api has a direct way to do this we should use it
     // (e.g. glDrawPixels on desktop GL).
 
+    // If the RT is also a texture and we don't have to do PM/UPM conversion
+    // then take the texture path, which we expect to be at least as fast or
+    // faster since it doesn't use an intermediate texture as we do below.
+    if (NULL != target->asTexture() &&
+        GrPixelConfigIsUnpremultiplied(target->config()) ==
+        GrPixelConfigIsUnpremultiplied(config)) {
+
+        this->internalWriteTexturePixels(target->asTexture(),
+                                            left, top, width, height,
+                                            config, buffer, rowBytes, flags);
+        return;
+    }
+
+    bool swapRAndB = fGpu->preferredReadPixelsConfig(config) ==
+                     GrPixelConfigSwapRAndB(config);
+    if (swapRAndB) {
+        config = GrPixelConfigSwapRAndB(config);
+    }
+
     const GrTextureDesc desc = {
         kNone_GrTextureFlags, kNone_GrAALevel, width, height, { config }
     };
@@ -1827,6 +1846,7 @@
     sampler.setClampNoFilter();
     matrix.setIDiv(texture->width(), texture->height());
     sampler.setMatrix(matrix);
+    sampler.setRAndBSwap(swapRAndB);
     fGpu->setSamplerState(0, sampler);
 
     GrVertexLayout layout = GrDrawTarget::StagePosAsTexCoordVertexLayoutBit(0);
diff --git a/src/gpu/GrGpu.h b/src/gpu/GrGpu.h
index 9b39319..2f36b76 100644
--- a/src/gpu/GrGpu.h
+++ b/src/gpu/GrGpu.h
@@ -195,6 +195,13 @@
     }
 
     /**
+     * Same as above but applies to writeTexturePixels
+     */
+    virtual GrPixelConfig preferredWritePixelsConfig(GrPixelConfig config) {
+        return config;
+    }
+
+    /**
      * OpenGL's readPixels returns the result bottom-to-top while the skia
      * API is top-to-bottom. Thus we have to do a y-axis flip. The obvious
      * solution is to have the subclass do the flip using either the CPU or GPU.
diff --git a/src/gpu/GrGpuGL.cpp b/src/gpu/GrGpuGL.cpp
index facfac9..83797d5 100644
--- a/src/gpu/GrGpuGL.cpp
+++ b/src/gpu/GrGpuGL.cpp
@@ -439,7 +439,15 @@
 }
 
 GrPixelConfig GrGpuGL::preferredReadPixelsConfig(GrPixelConfig config) {
-    if (GR_GL_RGBA_8888_READBACK_SLOW && GrPixelConfigIsRGBA8888(config)) {
+    if (GR_GL_RGBA_8888_PIXEL_OPS_SLOW && GrPixelConfigIsRGBA8888(config)) {
+        return GrPixelConfigSwapRAndB(config);
+    } else {
+        return config;
+    }
+}
+
+GrPixelConfig GrGpuGL::preferredWritePixelsConfig(GrPixelConfig config) {
+    if (GR_GL_RGBA_8888_PIXEL_OPS_SLOW && GrPixelConfigIsRGBA8888(config)) {
         return GrPixelConfigSwapRAndB(config);
     } else {
         return config;
diff --git a/src/gpu/GrGpuGL.h b/src/gpu/GrGpuGL.h
index 18baedf..3b8e6d7 100644
--- a/src/gpu/GrGpuGL.h
+++ b/src/gpu/GrGpuGL.h
@@ -31,6 +31,8 @@
 
     virtual GrPixelConfig preferredReadPixelsConfig(GrPixelConfig config)
                                                                     SK_OVERRIDE;
+    virtual GrPixelConfig preferredWritePixelsConfig(GrPixelConfig config)
+                                                                    SK_OVERRIDE;
 
     virtual bool readPixelsWillPayForYFlip(
                                     GrRenderTarget* renderTarget,