Merge "qdutils: Remove fb open dependency to read MDSS capabiliites"
diff --git a/libhwcomposer/hwc_mdpcomp.cpp b/libhwcomposer/hwc_mdpcomp.cpp
index bdc2dbd..2a1b123 100644
--- a/libhwcomposer/hwc_mdpcomp.cpp
+++ b/libhwcomposer/hwc_mdpcomp.cpp
@@ -44,7 +44,7 @@
 bool MDPComp::sEnablePartialFrameUpdate = false;
 int MDPComp::sMaxPipesPerMixer = MAX_PIPES_PER_MIXER;
 float MDPComp::sMaxBw = 2.3f;
-uint32_t MDPComp::sCompBytesClaimed = 0;
+double MDPComp::sBwClaimed = 0.0;
 
 MDPComp* MDPComp::getObject(hwc_context_t *ctx, const int& dpy) {
     if(isDisplaySplit(ctx, dpy)) {
@@ -605,7 +605,8 @@
 
     bool ret = false;
     if(isLoadBasedCompDoable(ctx, list)) {
-        ret = loadBasedComp(ctx, list);
+        ret = loadBasedCompPreferGPU(ctx, list) ||
+                loadBasedCompPreferMDP(ctx, list);
     }
 
     if(!ret) {
@@ -657,14 +658,16 @@
     return true;
 }
 
-bool MDPComp::loadBasedComp(hwc_context_t *ctx,
+bool MDPComp::loadBasedCompPreferGPU(hwc_context_t *ctx,
         hwc_display_contents_1_t* list) {
     int numAppLayers = ctx->listStats[mDpy].numAppLayers;
     mCurrentFrame.reset(numAppLayers);
 
-    //TODO BatchSize could be optimized further based on available pipes, split
-    //displays etc.
-    const int batchSize = numAppLayers - (sMaxPipesPerMixer - 1);
+    int stagesForMDP = min(sMaxPipesPerMixer, ctx->mOverlay->availablePipes(
+            mDpy, Overlay::MIXER_DEFAULT));
+    //If MDP has X possible stages, it can take X layers.
+    const int batchSize = numAppLayers - (stagesForMDP - 1); //1 for FB
+
     if(batchSize <= 0) {
         ALOGD_IF(isDebug(), "%s: Not attempting", __FUNCTION__);
         return false;
@@ -720,6 +723,59 @@
     return true;
 }
 
+bool MDPComp::loadBasedCompPreferMDP(hwc_context_t *ctx,
+        hwc_display_contents_1_t* list) {
+    const int numAppLayers = ctx->listStats[mDpy].numAppLayers;
+    //TODO get the ib from sysfs node.
+    //Full screen is from ib perspective, not actual full screen
+    const int bpp = 4;
+    double panelRefRate =
+                1000000000.0 / ctx->dpyAttr[mDpy].vsync_period;
+
+    double bwLeft = sMaxBw - sBwClaimed;
+
+    const int fullScreenLayers = bwLeft * 1000000000 / (ctx->dpyAttr[mDpy].xres
+            * ctx->dpyAttr[mDpy].yres * bpp * panelRefRate);
+
+    const int fbBatchSize = numAppLayers - (fullScreenLayers - 1);
+    //If batch size is not at least 2, we aren't really preferring MDP, since
+    //only 1 layer going to GPU could actually translate into an entire FB
+    //needed to be fetched by MDP, thus needing more b/w rather than less.
+    if(fbBatchSize < 2 || fbBatchSize > numAppLayers) {
+        ALOGD_IF(isDebug(), "%s: Not attempting", __FUNCTION__);
+        return false;
+    }
+
+    //Top-most layers constitute FB batch
+    const int fbBatchStart = numAppLayers - fbBatchSize;
+
+    //Bottom-most layers constitute MDP batch
+    for(int i = 0; i < fbBatchStart; i++) {
+        hwc_layer_1_t* layer = &list->hwLayers[i];
+        if(not isSupportedForMDPComp(ctx, layer)) {
+            ALOGD_IF(isDebug(), "%s: MDP unsupported layer found at %d",
+                    __FUNCTION__, i);
+            return false;
+        }
+        mCurrentFrame.isFBComposed[i] = false;
+    }
+
+    mCurrentFrame.fbZ = fbBatchStart;
+    mCurrentFrame.fbCount = fbBatchSize;
+    mCurrentFrame.mdpCount = mCurrentFrame.layerCount - fbBatchSize;
+
+    if(!resourceCheck(ctx, list)) {
+        ALOGD_IF(isDebug(), "%s: resource check failed", __FUNCTION__);
+        return false;
+    }
+
+    ALOGD_IF(isDebug(), "%s: FB Z %d, num app layers %d, MDP Batch Size %d",
+                __FUNCTION__, mCurrentFrame.fbZ, numAppLayers,
+                numAppLayers - fbBatchSize);
+
+    return true;
+}
+
 bool MDPComp::isLoadBasedCompDoable(hwc_context_t *ctx,
         hwc_display_contents_1_t* list) {
     if(mDpy or isSecurePresent(ctx, mDpy) or
@@ -736,7 +792,6 @@
     mCurrentFrame.reset(numAppLayers);
     updateYUV(ctx, list, secureOnly);
     int mdpCount = mCurrentFrame.mdpCount;
-    int fbNeeded = (mCurrentFrame.fbCount != 0);
 
     if(!isYuvPresent(ctx, mDpy)) {
         return false;
@@ -1132,10 +1187,10 @@
     //Will be added for other targets if we run into bandwidth issues and when
     //we have profiling data to set an upper limit.
     if(qdutils::MDPVersion::getInstance().is8x74v2()) {
-        const uint32_t ONE_GIG = 1024 * 1024 * 1024;
+        const uint32_t ONE_GIG = 1000 * 1000 * 1000;
         double panelRefRate =
                 1000000000.0 / ctx->dpyAttr[mDpy].vsync_period;
-        if((size + sCompBytesClaimed) > ((sMaxBw / panelRefRate) * ONE_GIG)) {
+        if((size * panelRefRate) > ((sMaxBw - sBwClaimed) * ONE_GIG)) {
             return false;
         }
     }
@@ -1250,7 +1305,9 @@
     }
 
 exit:
-    sCompBytesClaimed += calcMDPBytesRead(ctx, list);
+    //gbps (bytes / nanosec = gigabytes / sec)
+    sBwClaimed += calcMDPBytesRead(ctx, list) /
+            (double)ctx->dpyAttr[mDpy].vsync_period;
     return ret;
 }
 
diff --git a/libhwcomposer/hwc_mdpcomp.h b/libhwcomposer/hwc_mdpcomp.h
index 7063d32..adf74bb 100644
--- a/libhwcomposer/hwc_mdpcomp.h
+++ b/libhwcomposer/hwc_mdpcomp.h
@@ -52,7 +52,7 @@
     /* Initialize MDP comp*/
     static bool init(hwc_context_t *ctx);
     static void resetIdleFallBack() { sIdleFallBack = false; }
-    static void reset() { sCompBytesClaimed = 0; };
+    static void reset() { sBwClaimed = 0.0; };
 
 protected:
     enum { MAX_SEC_LAYERS = 1 }; //TODO add property support
@@ -147,8 +147,16 @@
     bool partialMDPComp(hwc_context_t *ctx, hwc_display_contents_1_t* list);
     /* Partial MDP comp that uses caching to save power as primary goal */
     bool cacheBasedComp(hwc_context_t *ctx, hwc_display_contents_1_t* list);
-    /* Partial MDP comp that uses number of pixels to optimize perf goal */
-    bool loadBasedComp(hwc_context_t *ctx, hwc_display_contents_1_t* list);
+    /* Partial MDP comp that prefers GPU perf-wise. Since the GPU's
+     * perf is proportional to the pixels it processes, we use the number of
+     * pixels as a heuristic */
+    bool loadBasedCompPreferGPU(hwc_context_t *ctx,
+            hwc_display_contents_1_t* list);
+    /* Partial MDP comp that prefers MDP perf-wise. Since the MDP's perf is
+     * proportional to the bandwidth, overlaps it sees, we use that as a
+     * heuristic */
+    bool loadBasedCompPreferMDP(hwc_context_t *ctx,
+            hwc_display_contents_1_t* list);
     /* Checks if its worth doing load based partial comp */
     bool isLoadBasedCompDoable(hwc_context_t *ctx,
             hwc_display_contents_1_t* list);
@@ -205,9 +213,9 @@
     static int sMaxPipesPerMixer;
     //Max bandwidth. Value is in GBPS. For ex: 2.3 means 2.3GBPS
     static float sMaxBw;
-    //Tracks composition bytes claimed. Represented as the total w*h*bpp
-    //going to MDP mixers
-    static uint32_t sCompBytesClaimed;
+    //Tracks composition bandwidth claimed. Represented as the total
+    //w*h*bpp*fps (gigabytes-per-second) going to MDP mixers.
+    static double sBwClaimed;
     static IdleInvalidator *idleInvalidator;
     struct FrameInfo mCurrentFrame;
     struct LayerCache mCachedFrame;
diff --git a/libqdutils/qdMetaData.cpp b/libqdutils/qdMetaData.cpp
index 89ca92e..f39eef9 100644
--- a/libqdutils/qdMetaData.cpp
+++ b/libqdutils/qdMetaData.cpp
@@ -81,6 +81,26 @@
         case UPDATE_BUFFER_GEOMETRY:
             memcpy((void *)&data->bufferDim, param, sizeof(BufferDim_t));
             break;
+        case PP_PARAM_VFM_DATA:
+        {
+            int32_t     indx = 0;
+            VfmData_t*  pVfmData = reinterpret_cast <VfmData_t *>(param);
+            int32_t     dataType = pVfmData->dataType;
+
+            if(dataType > 0){
+                indx = getVfmDataIdx(dataType);
+                if(indx < MAX_VFM_DATA_COUNT){
+                    data->vfmDataBitMap |= dataType;
+                    memcpy((void *)&data->vfmData[indx], param,
+                        sizeof(VfmData_t));
+                }else{
+                    ALOGE("unknown dataType %d", dataType);
+                }
+            }else{
+                ALOGE("invalid dataType in PP_PARAM_VFM_DATA %d", dataType);
+            }
+        }
+        break;
         default:
             ALOGE("Unknown paramType %d", paramType);
             break;
diff --git a/libqdutils/qdMetaData.h b/libqdutils/qdMetaData.h
index d5354a4..4b6e678 100644
--- a/libqdutils/qdMetaData.h
+++ b/libqdutils/qdMetaData.h
@@ -31,6 +31,19 @@
 #define _QDMETADATA_H
 
 #define MAX_IGC_LUT_ENTRIES 256
+#define MAX_VFM_DATA_SIZE   64 //bytes per data buffer
+#define MAX_VFM_DATA_COUNT  16 //number of data buffers
+
+/* This macro finds the index corresponding to a type */
+/* This is equivalent to indx = LOG_2(type) */
+inline int32_t getVfmDataIdx(int32_t type){
+    int32_t indx = 0, x = type;
+    while( x >> 1) {
+        x = (x >> 1);
+        indx++;
+    }
+    return indx;
+}
 
 struct HSICData_t {
     int32_t hue;
@@ -57,6 +70,11 @@
     int32_t sliceHeight;
 };
 
+struct VfmData_t {
+    int32_t dataType;
+    char    data[MAX_VFM_DATA_SIZE];
+};
+
 struct MetaData_t {
     int32_t operation;
     int32_t interlaced;
@@ -67,6 +85,8 @@
     IGCData_t igcData;
     Sharp2Data_t Sharp2Data;
     int64_t timestamp;
+    int32_t vfmDataBitMap;
+    VfmData_t vfmData[MAX_VFM_DATA_COUNT];
 };
 
 typedef enum {
@@ -78,6 +98,7 @@
     PP_PARAM_SHARP2     = 0x0020,
     PP_PARAM_TIMESTAMP  = 0x0040,
     UPDATE_BUFFER_GEOMETRY = 0x0080,
+    PP_PARAM_VFM_DATA   = 0x0100,
 } DispParamType;
 
 int setMetaData(private_handle_t *handle, DispParamType paramType, void *param);