sf: Enable GPU Tiled DR optimization for overlap and mixed mode use cases

Extend the GPU Tiled DR optimization to Overlapping layers use cases
and mixed mode composition

Change-Id: Iaba496be21d5c4ceeedfe3405cf26b36677cd14b
diff --git a/services/surfaceflinger/DisplayHardware/HWComposer.cpp b/services/surfaceflinger/DisplayHardware/HWComposer.cpp
index 00668f0..fa9e662 100644
--- a/services/surfaceflinger/DisplayHardware/HWComposer.cpp
+++ b/services/surfaceflinger/DisplayHardware/HWComposer.cpp
@@ -202,6 +202,11 @@
         // we don't have VSYNC support, we need to fake it
         mVSyncThread = new VSyncThread(*this);
     }
+#ifdef QCOM_BSP
+    // Threshold Area to enable GPU Tiled Rect.
+    property_get("debug.hwc.gpuTiledThreshold", value, "1.9");
+    mDynThreshold = atof(value);
+#endif
 }
 
 HWComposer::~HWComposer() {
@@ -705,6 +710,12 @@
 #endif
 
             if (disp.list) {
+#ifdef QCOM_BSP
+               //GPUTILERECT
+               prev_comp_map[i] = current_comp_map[i];
+               current_comp_map[i].reset();
+               current_comp_map[i].count = disp.list->numHwLayers-1;
+#endif
                 for (size_t j=0 ; j<disp.list->numHwLayers ; j++) {
                     hwc_layer_1_t& l = disp.list->hwLayers[j];
 
@@ -732,6 +743,12 @@
                     if (l.compositionType == HWC_CURSOR_OVERLAY) {
                         disp.hasOvComp = true;
                     }
+#ifdef QCOM_BSP
+                    //GPUTILERECT
+                    if(l.compositionType != HWC_FRAMEBUFFER_TARGET) {
+                        current_comp_map[i].compType[j] = l.compositionType;
+                    }
+#endif
                 }
                 if (disp.list->numHwLayers == (disp.framebufferTarget ? 1 : 0)) {
                     disp.hasFbComp = true;
@@ -745,10 +762,10 @@
 }
 
 #ifdef QCOM_BSP
-bool HWComposer::hasHwcOrBlitComposition(int32_t id) const {
+bool HWComposer::hasBlitComposition(int32_t id) const {
     if (!mHwc || uint32_t(id) > 31 || !mAllocatedDisplayIDs.hasBit(id))
         return false;
-    return mDisplayData[id].hasOvComp || mDisplayData[id].hasBlitComp;
+    return mDisplayData[id].hasBlitComp;
 }
 #endif
 bool HWComposer::hasHwcComposition(int32_t id) const {
@@ -1374,6 +1391,37 @@
     return false;
 }
 
+bool HWComposer::canHandleOverlapArea(int32_t id, Rect unionDr) {
+    DisplayData& disp(mDisplayData[id]);
+    float layerAreaSum = 0;
+    float drArea = ((unionDr.right-unionDr.left)* (unionDr.bottom-unionDr.top));
+    hwc_layer_1_t& fbLayer = disp.list->hwLayers[disp.list->numHwLayers-1];
+    hwc_rect_t fbDisplayFrame  = fbLayer.displayFrame;
+    float fbLayerArea = ((fbDisplayFrame.right - fbDisplayFrame.left)*
+              (fbDisplayFrame.bottom - fbDisplayFrame.top));
+
+    //Compute sum of the Areas of FB layers intersecting with Union Dirty Rect
+    for (size_t i=0; i<disp.list->numHwLayers-1; i++) {
+        hwc_layer_1_t& layer = disp.list->hwLayers[i];
+        if(layer.compositionType != HWC_FRAMEBUFFER)
+           continue;
+
+        hwc_rect_t displayFrame  = layer.displayFrame;
+        Rect df(displayFrame.left, displayFrame.top,
+              displayFrame.right, displayFrame.bottom);
+        Rect df_dirty;
+        df_dirty.clear();
+        if(df.intersect(unionDr, &df_dirty))
+            layerAreaSum += ((df_dirty.right - df_dirty.left)*
+                  (df_dirty.bottom - df_dirty.top));
+    }
+    ALOGD_IF(GPUTILERECT_DEBUG,"GPUTileRect: overlap/FB : %f",
+           (layerAreaSum/fbLayerArea));
+    // Return false, if the sum of layer Areas intersecting with union Dr is
+    // more than the threshold as we are not getting better performance.
+    return (mDynThreshold > (layerAreaSum/fbLayerArea));
+}
+
 bool HWComposer::needsScaling(int32_t id) {
     if (!mHwc || uint32_t(id)>31 || !mAllocatedDisplayIDs.hasBit(id))
         return false;
@@ -1413,8 +1461,10 @@
     // Find UnionDr of all layers
     for (size_t i=0; i<count; i++) {
         hwc_layer_1_t& l = disp.list->hwLayers[i];
-        Rect dr(0,0,0,0);
-        if(currentLayers[i]->hasNewFrame()) {
+        Rect dr;
+        dr.clear();
+        if((l.compositionType == HWC_FRAMEBUFFER) &&
+              currentLayers[i]->hasNewFrame()) {
             dr = Rect(l.dirtyRect.left, l.dirtyRect.top, l.dirtyRect.right,
                   l.dirtyRect.bottom);
             hwc_rect_t dst = l.displayFrame;
@@ -1434,7 +1484,12 @@
     }
     unionDirtyRect = unionDirtyRegion.getBounds();
 }
-
+bool HWComposer::isCompositionMapChanged(int32_t id) {
+    if (prev_comp_map[id] == current_comp_map[id]) {
+        return false;
+    }
+    return true;
+}
 bool HWComposer::isGeometryChanged(int32_t id) {
     if (!mHwc || uint32_t(id)>31 || !mAllocatedDisplayIDs.hasBit(id))
         return false;
@@ -1453,27 +1508,28 @@
     if (isGeometryChanged(id)) {
         ALOGD_IF(GPUTILERECT_DEBUG, "GPUTileRect : geometrychanged, disable");
         status = false;
-    } else if ( hasHwcOrBlitComposition(id)) {
-     /* Currently enabled only for full GPU Comp
-      * TODO : enable for mixed mode also */
+    } else if ( hasBlitComposition(id)) {
         ALOGD_IF(GPUTILERECT_DEBUG, "GPUTileRect: Blit comp, disable");
         status = false;
-    } else if (areVisibleRegionsOverlapping(id)) {
-      /* With DirtyRect optimiaton, On certain targets we are  seeing slightly
-       * lower FPS in use cases where visible regions overlap in Full GPU Comp.
-       * Hence this optimizatin has been disabled for usecases where visible
-       * regions overlap. TODO : Analyse & handle overlap usecases. */
-       ALOGD_IF(GPUTILERECT_DEBUG, "GPUTileRect: Visible \
-             regions overlap, disable");
-       status = false;
+    } else if ( isCompositionMapChanged(id)) {
+        ALOGD_IF(GPUTILERECT_DEBUG, "GPUTileRect: comp map changed, disable");
+        status = false;
     } else if (needsScaling(id)) {
        /* Do Not use TiledDR optimization, if layers need scaling */
        ALOGD_IF(GPUTILERECT_DEBUG, "GPUTileRect: Layers need scaling, disable");
        status = false;
     } else {
         computeUnionDirtyRect(id, unionDr);
-        if(unionDr.isEmpty())
-        {
+        if(areVisibleRegionsOverlapping(id) &&
+              !canHandleOverlapArea(id, unionDr)){
+           /* With DR optimizaton, On certain targets we are seeing slightly
+            * lower FPS in use cases where visible regions overlap &
+            * the total dirty area of layers is greater than a threshold value.
+            * Hence this optimization has been disabled for such use cases */
+            ALOGD_IF(GPUTILERECT_DEBUG, "GPUTileRect: Visible \
+                 regions overlap & Total Dirty Area > Threashold, disable");
+            status = false;
+        } else if(unionDr.isEmpty()) {
             ALOGD_IF(GPUTILERECT_DEBUG,"GPUTileRect: UnionDr is emtpy, \
                   No need to PRESERVE");
             status = false;
diff --git a/services/surfaceflinger/DisplayHardware/HWComposer.h b/services/surfaceflinger/DisplayHardware/HWComposer.h
index 9652ead..2d76ef1 100644
--- a/services/surfaceflinger/DisplayHardware/HWComposer.h
+++ b/services/surfaceflinger/DisplayHardware/HWComposer.h
@@ -32,6 +32,8 @@
 #include <utils/Timers.h>
 #include <utils/Vector.h>
 
+#define MAX_LAYER_COUNT 32
+
 extern "C" int clock_nanosleep(clockid_t clock_id, int flags,
                            const struct timespec *request,
                            struct timespec *remain);
@@ -118,8 +120,8 @@
     bool hasGlesComposition(int32_t id) const;
 
 #ifdef QCOM_BSP
-    // does this display have layers handled by overlays/blit
-    bool hasHwcOrBlitComposition(int32_t id) const;
+    // does this display have layers handled by BLIT HW
+    bool hasBlitComposition(int32_t id) const;
 
     //GPUTiledRect : function to find out if DR can be used in GPU Comp.
     bool canUseTiledDR(int32_t id, Rect& dr);
@@ -388,12 +390,50 @@
     // thread-safe
     mutable Mutex mEventControlLock;
 
+    //GPUTileRect : CompMap, class to track the composition type of layers
+    struct CompMap {
+        int32_t count;
+        int32_t compType[MAX_LAYER_COUNT];
+        CompMap () {
+            reset();
+        }
+        void reset () {
+            count=0;
+            for(size_t i= 0; i <MAX_LAYER_COUNT; i++) {
+                compType[i] = -1;
+            }
+        }
+        CompMap& operator=(const CompMap &rhs) {
+            if(this != &rhs) {
+                reset();
+                count = rhs.count;
+                for(int32_t i=0; i<count; i++) {
+                    compType[i] = rhs.compType[i];
+                }
+            }
+            return *this;
+        }
+        bool operator== (CompMap &rhs) {
+            if( count != rhs.count)
+                return false;
+            for(int32_t i=0; i<count; i++) {
+                if(compType[i] != rhs.compType[i])
+                    return false;
+            }
+            return true;
+        }
+    };
+
 #ifdef QCOM_BSP
     //GPUTileRect Optimization Functions.
+    CompMap prev_comp_map[MAX_HWC_DISPLAYS], current_comp_map[MAX_HWC_DISPLAYS];
+    bool isCompositionMapChanged(int32_t id);
     bool isGeometryChanged(int32_t id);
     void computeUnionDirtyRect(int32_t id, Rect& unionDirtyRect);
     bool areVisibleRegionsOverlapping(int32_t id );
     bool needsScaling(int32_t id);
+    float mDynThreshold;
+    bool canHandleOverlapArea(int32_t id, Rect unionDr);
 #endif
 };
 
diff --git a/services/surfaceflinger/SurfaceFlinger.cpp b/services/surfaceflinger/SurfaceFlinger.cpp
index fc8b125..d6140dd 100644
--- a/services/surfaceflinger/SurfaceFlinger.cpp
+++ b/services/surfaceflinger/SurfaceFlinger.cpp
@@ -2085,6 +2085,7 @@
 
     Region clearRegion;
     bool hasGlesComposition = hwc.hasGlesComposition(id);
+    const bool hasHwcComposition = hwc.hasHwcComposition(id);
     if (hasGlesComposition) {
         if (!hw->makeCurrent(mEGLDisplay, mEGLContext)) {
             ALOGW("DisplayDevice::makeCurrent failed. Aborting surface composition for display %s",
@@ -2097,14 +2098,14 @@
         }
 
         // Never touch the framebuffer if we don't have any framebuffer layers
-        const bool hasHwcComposition = hwc.hasHwcComposition(id);
         if (hasHwcComposition) {
             // when using overlays, we assume a fully transparent framebuffer
             // NOTE: we could reduce how much we need to clear, for instance
             // remove where there are opaque FB layers. however, on some
             // GPUs doing a "clean slate" clear might be more efficient.
             // We'll revisit later if needed.
-            engine.clearWithColor(0, 0, 0, 0);
+            if(!(mGpuTileRenderEnable && (mDisplays.size()==1)))
+                engine.clearWithColor(0, 0, 0, 0);
         } else {
             // we start with the whole screen area
             const Region bounds(hw->getBounds());
@@ -2195,9 +2196,21 @@
                 hw->eglSwapPreserved(false);
             }
             // DrawWormHole/Any Draw has to be within startTile & EndTile
-            if (cur->getCompositionType() != HWC_BLIT &&
-                  !clearRegion.isEmpty()){
-                drawWormhole(hw, clearRegion);
+            if (hasHwcComposition) {
+                if(mCanUseGpuTileRender && !mUnionDirtyRect.isEmpty()) {
+                    const Rect& scissor(mUnionDirtyRect);
+                    engine.setScissor(scissor.left, hw->getHeight()- scissor.bottom,
+                    scissor.getWidth(), scissor.getHeight());
+                    engine.clearWithColor(0, 0, 0, 0);
+                    engine.disableScissor();
+                } else {
+                    engine.clearWithColor(0, 0, 0, 0);
+                }
+            } else {
+                if (cur->getCompositionType() != HWC_BLIT &&
+                      !clearRegion.isEmpty()){
+                    drawWormhole(hw, clearRegion);
+                }
             }
         }
 #endif