sf: Use DirtyRect to optimize Full GPU composition

Use layer DirtyRect information in optimizing full GPU composition by
using startTile and endTile GL calls.

Change-Id: Iec0f9de2ce667438bbcc00d02e500055068df05a
diff --git a/services/surfaceflinger/DisplayDevice.cpp b/services/surfaceflinger/DisplayDevice.cpp
index a4864b3..c166aaa 100644
--- a/services/surfaceflinger/DisplayDevice.cpp
+++ b/services/surfaceflinger/DisplayDevice.cpp
@@ -145,6 +145,13 @@
     }
 }
 
+#ifdef QCOM_BSP
+void DisplayDevice::eglSwapPreserved(bool enable) const {
+    int swapValue = enable ? EGL_BUFFER_PRESERVED : EGL_BUFFER_DESTROYED;
+    eglSurfaceAttrib(mDisplay, mSurface, EGL_SWAP_BEHAVIOR, swapValue);
+}
+#endif
+
 void DisplayDevice::disconnect(HWComposer& hwc) {
     if (mHwcDisplayId >= 0) {
         hwc.disconnectDisplay(mHwcDisplayId);
diff --git a/services/surfaceflinger/DisplayDevice.h b/services/surfaceflinger/DisplayDevice.h
index d3f784a..76243f0 100644
--- a/services/surfaceflinger/DisplayDevice.h
+++ b/services/surfaceflinger/DisplayDevice.h
@@ -170,6 +170,12 @@
     uint32_t getPageFlipCount() const;
     void dump(String8& result) const;
 
+#ifdef QCOM_BSP
+    /* To set egl atribute, EGL_SWAP_BEHAVIOR value
+     * (EGL_BUFFER_PRESERVED/EGL_BUFFER_DESTROYED)
+     */
+    void eglSwapPreserved(bool status) const;
+#endif
 private:
     /*
      *  Constants, set during initialization
diff --git a/services/surfaceflinger/DisplayHardware/HWComposer.cpp b/services/surfaceflinger/DisplayHardware/HWComposer.cpp
index f9928be..400053e 100644
--- a/services/surfaceflinger/DisplayHardware/HWComposer.cpp
+++ b/services/surfaceflinger/DisplayHardware/HWComposer.cpp
@@ -47,6 +47,8 @@
 #include "../Layer.h"           // needed only for debugging
 #include "../SurfaceFlinger.h"
 
+#define GPUTILERECT_DEBUG 0
+
 namespace android {
 
 #define MIN_HWC_HEADER_VERSION HWC_HEADER_VERSION
@@ -698,6 +700,10 @@
             DisplayData& disp(mDisplayData[i]);
             disp.hasFbComp = false;
             disp.hasOvComp = false;
+#ifdef QCOM_BSP
+            disp.hasBlitComp = false;
+#endif
+
             if (disp.list) {
                 for (size_t j=0 ; j<disp.list->numHwLayers ; j++) {
                     hwc_layer_1_t& l = disp.list->hwLayers[j];
@@ -716,6 +722,9 @@
                     // trigger a FLIP
                     if(l.compositionType == HWC_BLIT) {
                         disp.hasFbComp = true;
+#ifdef QCOM_BSP
+                        disp.hasBlitComp = true;
+#endif
                     }
                     if (l.compositionType == HWC_OVERLAY) {
                         disp.hasOvComp = true;
@@ -735,6 +744,13 @@
     return (status_t)err;
 }
 
+#ifdef QCOM_BSP
+bool HWComposer::hasHwcOrBlitComposition(int32_t id) const {
+    if (!mHwc || uint32_t(id) > 31 || !mAllocatedDisplayIDs.hasBit(id))
+        return false;
+    return mDisplayData[id].hasOvComp || mDisplayData[id].hasBlitComp;
+}
+#endif
 bool HWComposer::hasHwcComposition(int32_t id) const {
     if (!mHwc || uint32_t(id)>31 || !mAllocatedDisplayIDs.hasBit(id))
         return false;
@@ -1337,5 +1353,124 @@
     free(list);
 }
 
-// ---------------------------------------------------------------------------
+#ifdef QCOM_BSP
+//======================== GPU TiledRect/DR changes =====================
+bool HWComposer::areVisibleRegionsOverlapping(int32_t id ) {
+    const Vector< sp<Layer> >& currentLayers  =
+            mFlinger->getLayerSortedByZForHwcDisplay(id);
+    size_t count = currentLayers.size();
+    Region consolidatedVisibleRegion;
+
+    for (size_t i=0; i<count; i++) {
+        //If there are any overlapping visible regions, disable GPUTileRect
+        if(!consolidatedVisibleRegion.intersect(
+                 currentLayers[i]->visibleRegion).isEmpty()){
+            return true;
+        }
+        consolidatedVisibleRegion.orSelf(currentLayers[i]->visibleRegion);
+    }
+    return false;
+}
+
+bool HWComposer::needsScaling(int32_t id) {
+    DisplayData& disp(mDisplayData[id]);
+    for (size_t i=0; i<disp.list->numHwLayers-1; i++) {
+        int dst_w, dst_h, src_w, src_h;
+        hwc_layer_1_t& layer = disp.list->hwLayers[i];
+        hwc_rect_t displayFrame  = layer.displayFrame;
+
+        hwc_rect_t sourceCropI = {0,0,0,0};
+        sourceCropI.left = int(ceilf(layer.sourceCropf.left));
+        sourceCropI.top = int(ceilf(layer.sourceCropf.top));
+        sourceCropI.right = int(floorf(layer.sourceCropf.right));
+        sourceCropI.bottom = int(floorf(layer.sourceCropf.bottom));
+
+        dst_w = displayFrame.right - displayFrame.left;
+        dst_h = displayFrame.bottom - displayFrame.top;
+        src_w = sourceCropI.right - sourceCropI.left;
+        src_h = sourceCropI.bottom - sourceCropI.top;
+
+        if(((src_w != dst_w) || (src_h != dst_h))) {
+            return true;
+        }
+    }
+    return false;
+}
+
+void HWComposer::computeUnionDirtyRect(int32_t id, Rect& unionDirtyRect) {
+    const Vector< sp<Layer> >& currentLayers =
+            mFlinger->getLayerSortedByZForHwcDisplay(id);
+    size_t count = currentLayers.size();
+    Region unionDirtyRegion;
+    DisplayData& disp(mDisplayData[id]);
+
+    // Find UnionDr of all layers
+    for (size_t i=0; i<count; i++) {
+        hwc_layer_1_t& l = disp.list->hwLayers[i];
+        Rect dr(0,0,0,0);
+        if(currentLayers[i]->hasNewFrame()) {
+            dr = Rect(l.dirtyRect.left, l.dirtyRect.top, l.dirtyRect.right,
+                  l.dirtyRect.bottom);
+            hwc_rect_t dst = l.displayFrame;
+
+            //Map dirtyRect to layer destination before using
+            hwc_rect_t src = {0,0,0,0};
+            src.left = int(ceilf(l.sourceCropf.left));
+            src.top = int(ceilf(l.sourceCropf.top));
+            src.right = int(floorf(l.sourceCropf.right));
+            src.bottom = int(floorf(l.sourceCropf.bottom));
+
+            int x_off = dst.left - src.left;
+            int y_off = dst.top - src.top;
+            dr = dr.offsetBy(x_off, y_off);
+            unionDirtyRegion = unionDirtyRegion.orSelf(dr);
+        }
+    }
+    unionDirtyRect = unionDirtyRegion.getBounds();
+}
+
+bool HWComposer::isGeometryChanged(int32_t id) {
+    DisplayData& disp(mDisplayData[id]);
+    return ( disp.list->flags & HWC_GEOMETRY_CHANGED );
+}
+/* Finds if we can enable DR optimization for GpuComp
+ * 1. return false if geometry is changed
+ * 2. if overlapping visible regions present.
+ * 3. Compute a Union Dirty Rect to operate on. */
+bool HWComposer::canUseTiledDR(int32_t id, Rect& unionDr ){
+    bool status = true;
+
+    if (isGeometryChanged(id)) {
+        ALOGD_IF(GPUTILERECT_DEBUG, "GPUTileRect : geometrychanged, disable");
+        status = false;
+    } else if ( hasHwcOrBlitComposition(id)) {
+     /* Currently enabled only for full GPU Comp
+      * TODO : enable for mixed mode also */
+        ALOGD_IF(GPUTILERECT_DEBUG, "GPUTileRect: Blit comp, disable");
+        status = false;
+    } else if (areVisibleRegionsOverlapping(id)) {
+      /* With DirtyRect optimiaton, On certain targets we are  seeing slightly
+       * lower FPS in use cases where visible regions overlap in Full GPU Comp.
+       * Hence this optimizatin has been disabled for usecases where visible
+       * regions overlap. TODO : Analyse & handle overlap usecases. */
+       ALOGD_IF(GPUTILERECT_DEBUG, "GPUTileRect: Visible \
+             regions overlap, disable");
+       status = false;
+    } else if (needsScaling(id)) {
+       /* Do Not use TiledDR optimization, if layers need scaling */
+       ALOGD_IF(GPUTILERECT_DEBUG, "GPUTileRect: Layers need scaling, disable");
+       status = false;
+    } else {
+        computeUnionDirtyRect(id, unionDr);
+        if(unionDr.isEmpty())
+        {
+            ALOGD_IF(GPUTILERECT_DEBUG,"GPUTileRect: UnionDr is emtpy, \
+                  No need to PRESERVE");
+            status = false;
+        }
+    }
+    return status;
+}
+#endif
+
 }; // namespace android
diff --git a/services/surfaceflinger/DisplayHardware/HWComposer.h b/services/surfaceflinger/DisplayHardware/HWComposer.h
index 16c0f04..9652ead 100644
--- a/services/surfaceflinger/DisplayHardware/HWComposer.h
+++ b/services/surfaceflinger/DisplayHardware/HWComposer.h
@@ -117,6 +117,14 @@
     // does this display have layers handled by GLES
     bool hasGlesComposition(int32_t id) const;
 
+#ifdef QCOM_BSP
+    // does this display have layers handled by overlays/blit
+    bool hasHwcOrBlitComposition(int32_t id) const;
+
+    //GPUTiledRect : function to find out if DR can be used in GPU Comp.
+    bool canUseTiledDR(int32_t id, Rect& dr);
+#endif
+
     // get the releaseFence file descriptor for a display's framebuffer layer.
     // the release fence is only valid after commit()
     sp<Fence> getAndResetReleaseFence(int32_t id);
@@ -338,6 +346,9 @@
         bool connected;
         bool hasFbComp;
         bool hasOvComp;
+#ifdef QCOM_BSP
+        bool hasBlitComp;
+#endif
         size_t capacity;
         hwc_display_contents_1* list;
         hwc_layer_1* framebufferTarget;
@@ -376,6 +387,14 @@
 
     // thread-safe
     mutable Mutex mEventControlLock;
+
+#ifdef QCOM_BSP
+    //GPUTileRect Optimization Functions.
+    bool isGeometryChanged(int32_t id);
+    void computeUnionDirtyRect(int32_t id, Rect& unionDirtyRect);
+    bool areVisibleRegionsOverlapping(int32_t id );
+    bool needsScaling(int32_t id);
+#endif
 };
 
 // ---------------------------------------------------------------------------
diff --git a/services/surfaceflinger/Layer.cpp b/services/surfaceflinger/Layer.cpp
index d0186b8..457a92d 100755
--- a/services/surfaceflinger/Layer.cpp
+++ b/services/surfaceflinger/Layer.cpp
@@ -1449,6 +1449,12 @@
     mFlinger->onLayerDestroyed(mLayer);
 }
 
+#ifdef QCOM_BSP
+bool Layer::hasNewFrame() const {
+   return (mQueuedFrames > 0);
+}
+#endif
+
 // ---------------------------------------------------------------------------
 }; // namespace android
 
diff --git a/services/surfaceflinger/Layer.h b/services/surfaceflinger/Layer.h
index e0ef30b..a4d0fcf 100644
--- a/services/surfaceflinger/Layer.h
+++ b/services/surfaceflinger/Layer.h
@@ -307,6 +307,11 @@
     void logFrameStats();
     void getFrameStats(FrameStats* outStats) const;
 
+#ifdef QCOM_BSP
+    //GPUTileRect : Return true if the layer has been updated in this frame.
+    bool hasNewFrame() const;
+#endif
+
 protected:
     // constant
     sp<SurfaceFlinger> mFlinger;
diff --git a/services/surfaceflinger/RenderEngine/GLES20RenderEngine.cpp b/services/surfaceflinger/RenderEngine/GLES20RenderEngine.cpp
index 8ebafbc..c131e0b 100644
--- a/services/surfaceflinger/RenderEngine/GLES20RenderEngine.cpp
+++ b/services/surfaceflinger/RenderEngine/GLES20RenderEngine.cpp
@@ -183,6 +183,18 @@
     glDisable(GL_BLEND);
 }
 
+#ifdef QCOM_BSP
+void GLES20RenderEngine::startTileComposition(int x , int y, int width,
+                                            int height, bool preserve) {
+    glStartTilingQCOM(x, y, width, height,
+          (preserve ? GL_COLOR_BUFFER_BIT0_QCOM : GL_NONE));
+}
+
+void GLES20RenderEngine::endTileComposition(unsigned int preserveMask) {
+    glEndTilingQCOM(preserveMask);
+}
+#endif
+
 
 void GLES20RenderEngine::bindImageAsFramebuffer(EGLImageKHR image,
         uint32_t* texName, uint32_t* fbName, uint32_t* status) {
diff --git a/services/surfaceflinger/RenderEngine/GLES20RenderEngine.h b/services/surfaceflinger/RenderEngine/GLES20RenderEngine.h
index 3d6243e..d85be08 100644
--- a/services/surfaceflinger/RenderEngine/GLES20RenderEngine.h
+++ b/services/surfaceflinger/RenderEngine/GLES20RenderEngine.h
@@ -74,6 +74,11 @@
     virtual void setupFillWithColor(float r, float g, float b, float a);
     virtual void disableTexturing();
     virtual void disableBlending();
+#ifdef QCOM_BSP
+    virtual void startTileComposition(int x , int y, int width,
+          int height,bool preserve );
+    virtual void endTileComposition(unsigned int preserveMask);
+#endif
 
     virtual void drawMesh(const Mesh& mesh);
 
diff --git a/services/surfaceflinger/RenderEngine/RenderEngine.h b/services/surfaceflinger/RenderEngine/RenderEngine.h
index acbff9b..b22f194 100644
--- a/services/surfaceflinger/RenderEngine/RenderEngine.h
+++ b/services/surfaceflinger/RenderEngine/RenderEngine.h
@@ -100,6 +100,12 @@
 
     virtual void disableTexturing() = 0;
     virtual void disableBlending() = 0;
+#ifdef QCOM_BSP
+    virtual void startTileComposition(int x, int y, int width,
+          int height, bool preserve){}
+    virtual void endTileComposition(unsigned int) {}
+#endif
+
 
     // drawing
     virtual void drawMesh(const Mesh& mesh) = 0;
diff --git a/services/surfaceflinger/SurfaceFlinger.cpp b/services/surfaceflinger/SurfaceFlinger.cpp
index e21b936..4b39f70 100644
--- a/services/surfaceflinger/SurfaceFlinger.cpp
+++ b/services/surfaceflinger/SurfaceFlinger.cpp
@@ -155,7 +155,8 @@
         mPrimaryHWVsyncEnabled(false),
         mHWVsyncAvailable(false),
         mDaltonize(false),
-        mHasColorMatrix(false)
+        mHasColorMatrix(false),
+        mGpuTileRenderEnable(false)
 {
     ALOGI("SurfaceFlinger is starting");
 
@@ -176,6 +177,13 @@
             mDebugDDMS = 0;
         }
     }
+#ifdef QCOM_BSP
+    property_get("debug.sf.gpu_comp_tiling", value, "1");
+    mGpuTileRenderEnable = atoi(value) ? true : false;
+    if(mGpuTileRenderEnable)
+       ALOGV("DirtyRect optimization enabled for FULL GPU Composition");
+#endif
+
     ALOGI_IF(mDebugRegion, "showupdates enabled");
     ALOGI_IF(mDebugDDMS, "DDMS debugging enabled");
 }
@@ -1941,6 +1949,25 @@
     hw->swapBuffers(getHwComposer());
 }
 
+#ifdef QCOM_BSP
+bool SurfaceFlinger::computeTiledDr(const sp<const DisplayDevice>& hw,
+                                         Rect& unionDirtyRect) {
+    int fbWidth= hw->getWidth();
+    int fbHeight= hw->getHeight();
+    Rect fullScreenRect = Rect(0,0,fbWidth, fbHeight);
+    const int32_t id = hw->getHwcDisplayId();
+    unionDirtyRect.clear();
+    HWComposer& hwc(getHwComposer());
+
+    /* Compute and return the Union of Dirty Rects.
+     * Return false if the unionDR is fullscreen, as there is no benefit from
+     * preserving full screen.*/
+    return (hwc.canUseTiledDR(id, unionDirtyRect) &&
+          (unionDirtyRect != fullScreenRect));
+
+}
+#endif
+
 bool SurfaceFlinger::doComposeSurfaces(const sp<const DisplayDevice>& hw, const Region& dirty)
 {
     RenderEngine& engine(getRenderEngine());
@@ -1949,7 +1976,10 @@
     HWComposer::LayerListIterator cur = hwc.begin(id);
     const HWComposer::LayerListIterator end = hwc.end(id);
 
+    Rect unionDirtyRect;
+    Region clearRegion;
     bool hasGlesComposition = hwc.hasGlesComposition(id);
+    bool canUseGpuTileRender = false;
     if (hasGlesComposition) {
         if (!hw->makeCurrent(mEGLDisplay, mEGLContext)) {
             ALOGW("DisplayDevice::makeCurrent failed. Aborting surface composition for display %s",
@@ -1960,6 +1990,12 @@
             }
             return false;
         }
+#ifdef QCOM_BSP
+        /* Compute DirtyRegion , if DR optimization for GPU comp optimization
+         * is ON & device is primary.*/
+        if(mGpuTileRenderEnable && (mDisplays.size()==1))
+            canUseGpuTileRender = computeTiledDr(hw, unionDirtyRect);
+#endif
 
         // Never touch the framebuffer if we don't have any framebuffer layers
         const bool hasHwcComposition = hwc.hasHwcComposition(id);
@@ -1985,14 +2021,31 @@
             // but limit it to the dirty region
             region.andSelf(dirty);
 
+
             // screen is already cleared here
-            if (!region.isEmpty()) {
-                if (cur != end) {
-                    if (cur->getCompositionType() != HWC_BLIT)
-                        // can happen with SurfaceView
-                        drawWormhole(hw, region);
-                } else
+#ifdef QCOM_BSP
+            clearRegion.clear();
+            if(mGpuTileRenderEnable && (mDisplays.size()==1)) {
+                clearRegion = region;
+                if (cur == end) {
                     drawWormhole(hw, region);
+                } else if(canUseGpuTileRender) {
+                   /* If GPUTileRect DR optimization on clear only the UnionDR
+                    * (computed by computeTiledDr) which is the actual region
+                    * that will be drawn on FB in this cycle.. */
+                    clearRegion = clearRegion.andSelf(Region(unionDirtyRect));
+                }
+            } else
+#endif
+            {
+                if (!region.isEmpty()) {
+                    if (cur != end) {
+                        if (cur->getCompositionType() != HWC_BLIT)
+                            // can happen with SurfaceView
+                            drawWormhole(hw, region);
+                    } else
+                        drawWormhole(hw, region);
+                }
             }
         }
 
@@ -2024,6 +2077,32 @@
     const Transform& tr = hw->getTransform();
     if (cur != end) {
         // we're using h/w composer
+#ifdef QCOM_BSP
+        int fbWidth= hw->getWidth();
+        int fbHeight= hw->getHeight();
+        /* if GPUTileRender optimization property is on & can be used
+         * i) Enable EGL_SWAP_PRESERVED flag
+         * ii) do startTile with union DirtyRect
+         * else , Disable EGL_SWAP_PRESERVED */
+        if(mGpuTileRenderEnable && (mDisplays.size()==1)) {
+            if(canUseGpuTileRender) {
+                hw->eglSwapPreserved(true);
+                Rect dr = unionDirtyRect;
+                engine.startTileComposition(dr.left, (fbHeight-dr.bottom),
+                      (dr.right-dr.left),
+                      (dr.bottom-dr.top), 0);
+            } else {
+                // Un Set EGL_SWAP_PRESERVED flag, if no tiling required.
+                hw->eglSwapPreserved(false);
+            }
+            // DrawWormHole/Any Draw has to be within startTile & EndTile
+            if (cur->getCompositionType() != HWC_BLIT &&
+                  !clearRegion.isEmpty()){
+                drawWormhole(hw, clearRegion);
+            }
+        }
+#endif
+
         for (size_t i=0 ; i<count && cur!=end ; ++i, ++cur) {
             const sp<Layer>& layer(layers[i]);
             const Region clip(dirty.intersect(tr.transform(layer->visibleRegion)));
@@ -2059,6 +2138,15 @@
             }
             layer->setAcquireFence(hw, *cur);
         }
+
+#ifdef QCOM_BSP
+        // call EndTile, if starTile has been called in this cycle.
+        if(mGpuTileRenderEnable && (mDisplays.size()==1)) {
+            if(canUseGpuTileRender) {
+                engine.endTileComposition(GL_PRESERVE);
+            }
+        }
+#endif
     } else {
         // we're not using h/w composer
         for (size_t i=0 ; i<count ; ++i) {
diff --git a/services/surfaceflinger/SurfaceFlinger.h b/services/surfaceflinger/SurfaceFlinger.h
index 53b616c..233f72c 100644
--- a/services/surfaceflinger/SurfaceFlinger.h
+++ b/services/surfaceflinger/SurfaceFlinger.h
@@ -480,6 +480,19 @@
     nsecs_t mLastTransactionTime;
     bool mBootFinished;
 
+    // Set if the Gpu Tile render DR optimization enabled
+    bool mGpuTileRenderEnable;
+#ifdef QCOM_BSP
+    // Find out if GPU composition can use Dirtyregion optimization
+    // Get the mode individual layer Dirty rect / union dirty rect to operate &
+    // the dirty region
+    bool computeTiledDr(const sp<const DisplayDevice>& hw,Rect& dirtyRect);
+    enum {
+       GL_PRESERVE_NONE = 0,
+       GL_PRESERVE      = 1
+    };
+#endif
+
     // these are thread safe
     mutable MessageQueue mEventQueue;
     FrameTracker mAnimFrameTracker;