msm: vidc: Introduce Performance Monitoring System

Enhance current DCVS algorithm to take care of Venus
performance fluctuations by adjusting clock, bus BW
using pending buffer counts.

CRs-Fixed: 2012520
Change-Id: I448201ec596b71b692ee5b993fc36716f420612d
Signed-off-by: Praneeth Paladugu <ppaladug@codeaurora.org>
diff --git a/drivers/media/platform/msm/vidc/governors/msm_vidc_dyn_gov.c b/drivers/media/platform/msm/vidc/governors/msm_vidc_dyn_gov.c
index 0618768..9daf053 100644
--- a/drivers/media/platform/msm/vidc/governors/msm_vidc_dyn_gov.c
+++ b/drivers/media/platform/msm/vidc/governors/msm_vidc_dyn_gov.c
@@ -387,13 +387,23 @@
 
 	integer_part = d->compression_ratio >> 16;
 	frac_part =
-		((d->compression_ratio - (integer_part * 65536)) * 100) >> 16;
+		((d->compression_ratio - (integer_part << 16)) * 100) >> 16;
 
 	dpb_read_compression_factor = FP(integer_part, frac_part, 100);
 
+	integer_part = d->complexity_factor >> 16;
+	frac_part =
+		((d->complexity_factor - (integer_part << 16)) * 100) >> 16;
+
+	motion_vector_complexity = FP(integer_part, frac_part, 100);
+
 	dpb_write_compression_factor = !dpb_compression_enabled ? FP_ONE :
 		__compression_ratio(__lut(width, height, fps), opb_bpp);
 
+	dpb_write_compression_factor = d->use_dpb_read ?
+		dpb_read_compression_factor :
+		dpb_write_compression_factor;
+
 	opb_compression_factor = !opb_compression_enabled ? FP_ONE :
 		__compression_ratio(__lut(width, height, fps), opb_bpp);
 
@@ -437,8 +447,6 @@
 			lcu_per_frame * fps / bps(1));
 	ddr.line_buffer_write = ddr.line_buffer_read;
 
-	motion_vector_complexity = FP_INT(4);
-
 	bw_for_1x_8bpc = fp_div(FP_INT(width * height), FP_INT(32 * 8));
 
 	bw_for_1x_8bpc = fp_mult(bw_for_1x_8bpc,
diff --git a/drivers/media/platform/msm/vidc/msm_vidc_clocks.c b/drivers/media/platform/msm/vidc/msm_vidc_clocks.c
index 6867735..cb3c526 100644
--- a/drivers/media/platform/msm/vidc/msm_vidc_clocks.c
+++ b/drivers/media/platform/msm/vidc/msm_vidc_clocks.c
@@ -16,6 +16,9 @@
 #include "msm_vidc_debug.h"
 #include "msm_vidc_clocks.h"
 
+#define MSM_VIDC_MIN_UBWC_COMPLEXITY_FACTOR 1
+#define MSM_VIDC_MAX_UBWC_COMPLEXITY_FACTOR 4
+
 static inline unsigned long int get_ubwc_compression_ratio(
 	struct ubwc_cr_stats_info_type ubwc_stats_info)
 {
@@ -91,16 +94,30 @@
 	struct vidc_bus_vote_data *vote_data)
 {
 	struct recon_buf *binfo;
-	u32 CR = 0, CF = 0;
+	u32 CR = 0, min_cf = MSM_VIDC_MIN_UBWC_COMPLEXITY_FACTOR,
+		max_cf = MSM_VIDC_MAX_UBWC_COMPLEXITY_FACTOR;
 
 	mutex_lock(&inst->reconbufs.lock);
 	list_for_each_entry(binfo, &inst->reconbufs.list, list) {
 		CR = max(CR, binfo->CR);
-		CF = max(CF, binfo->CF);
+		min_cf = min(min_cf, binfo->CF);
+		max_cf = max(max_cf, binfo->CF);
 	}
 	mutex_unlock(&inst->reconbufs.lock);
-	vote_data->complexity_factor = CF;
 	vote_data->compression_ratio = CR;
+
+	vote_data->complexity_factor = max_cf;
+	vote_data->use_dpb_read = false;
+	if (inst->clk_data.load <= inst->clk_data.load_norm) {
+		vote_data->complexity_factor = min_cf;
+		vote_data->use_dpb_read = true;
+	}
+
+	dprintk(VIDC_DBG,
+		"Complression Ratio = %d Complexity Factor = %d\n",
+			vote_data->compression_ratio,
+			vote_data->complexity_factor);
+
 	return 0;
 }
 
@@ -129,6 +146,32 @@
 	mutex_lock(&core->lock);
 	list_for_each_entry(inst, &core->instances, list) {
 		int codec = 0;
+		struct msm_vidc_buffer *temp, *next;
+		u32 filled_len = 0;
+		u32 device_addr = 0;
+
+		if (!inst) {
+			dprintk(VIDC_ERR, "%s Invalid args\n",
+				__func__);
+			return -EINVAL;
+		}
+
+		mutex_lock(&inst->registeredbufs.lock);
+		list_for_each_entry_safe(temp, next,
+				&inst->registeredbufs.list, list) {
+			if (temp->vvb.vb2_buf.type ==
+				V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
+				filled_len = max(filled_len,
+					temp->vvb.vb2_buf.planes[0].bytesused);
+				device_addr = temp->smem[0].device_addr;
+			}
+		}
+		mutex_unlock(&inst->registeredbufs.lock);
+
+		if (!filled_len || !device_addr) {
+			dprintk(VIDC_DBG, "%s No ETBs\n", __func__);
+			continue;
+		}
 
 		++vote_data_count;
 
@@ -253,17 +296,17 @@
 
 	/* Buffers outside FW are with display */
 	buffers_outside_fw = total_output_buf - fw_pending_bufs;
-	dprintk(VIDC_DBG,
+	dprintk(VIDC_PROF,
 		"Counts : total_output_buf = %d fw_pending_bufs = %d buffers_outside_fw = %d\n",
 		total_output_buf, fw_pending_bufs, buffers_outside_fw);
 
-	if (buffers_outside_fw >=  dcvs->min_threshold &&
-			dcvs->load > dcvs->load_low) {
+	if (buffers_outside_fw >=  dcvs->min_threshold)
 		dcvs->load = dcvs->load_low;
-	} else if (buffers_outside_fw < dcvs->min_threshold &&
-			dcvs->load == dcvs->load_low) {
+	else if (buffers_outside_fw <= dcvs->max_threshold)
 		dcvs->load = dcvs->load_high;
-	}
+	else
+		dcvs->load = dcvs->load_norm;
+
 	return rc;
 }
 
@@ -291,8 +334,6 @@
 	mutex_unlock(&inst->freqs.lock);
 }
 
-// TODO this needs to be removed later and use queued_list
-
 void msm_vidc_clear_freq_entry(struct msm_vidc_inst *inst,
 	u32 device_addr)
 {
@@ -322,9 +363,8 @@
 
 	/* If current requirement is within DCVS limits, try DCVS. */
 
-	if (freq < inst->clk_data.load_high) {
+	if (freq < inst->clk_data.load_norm) {
 		dprintk(VIDC_DBG, "Calling DCVS now\n");
-		// TODO calling DCVS here may reduce the residency. Re-visit.
 		msm_dcvs_scale_clocks(inst);
 		freq = inst->clk_data.load;
 	}
@@ -346,6 +386,18 @@
 	mutex_unlock(&inst->freqs.lock);
 }
 
+static unsigned long msm_vidc_max_freq(struct msm_vidc_core *core)
+{
+	struct allowed_clock_rates_table *allowed_clks_tbl = NULL;
+	unsigned long freq = 0;
+
+	allowed_clks_tbl = core->resources.allowed_clks_tbl;
+	freq = allowed_clks_tbl[0].clock_rate;
+	dprintk(VIDC_PROF, "Max rate = %lu", freq);
+
+	return freq;
+}
+
 static unsigned long msm_vidc_calc_freq(struct msm_vidc_inst *inst,
 	u32 filled_len)
 {
@@ -377,17 +429,17 @@
 
 		vsp_cycles = mbs_per_second * inst->clk_data.entry->vsp_cycles;
 		/* 10 / 7 is overhead factor */
-		vsp_cycles += (inst->prop.fps * filled_len * 8 * 10) / 7;
+		vsp_cycles += ((inst->prop.fps * filled_len * 8) / 7) * 10;
 
 	} else {
-		// TODO return Min or Max ?
 		dprintk(VIDC_ERR, "Unknown session type = %s\n", __func__);
-		return freq;
+		return msm_vidc_max_freq(inst->core);
 	}
 
 	freq = max(vpp_cycles, vsp_cycles);
 
-	dprintk(VIDC_PROF, "%s Inst %pK : Freq = %lu\n", __func__, inst, freq);
+	dprintk(VIDC_PROF, "%s Inst %pK : Filled Len = %d Freq = %lu\n",
+		__func__, inst, filled_len, freq);
 
 	return freq;
 }
@@ -429,18 +481,6 @@
 	return rc;
 }
 
-static unsigned long msm_vidc_max_freq(struct msm_vidc_core *core)
-{
-	struct allowed_clock_rates_table *allowed_clks_tbl = NULL;
-	unsigned long freq = 0;
-
-	allowed_clks_tbl = core->resources.allowed_clks_tbl;
-	freq = allowed_clks_tbl[0].clock_rate;
-	dprintk(VIDC_PROF, "Max rate = %lu", freq);
-
-	return freq;
-}
-
 int msm_vidc_update_operating_rate(struct msm_vidc_inst *inst)
 {
 	struct v4l2_ctrl *ctrl = NULL;
@@ -531,8 +571,7 @@
 	mutex_lock(&inst->registeredbufs.lock);
 	list_for_each_entry_safe(temp, next, &inst->registeredbufs.list, list) {
 		if (temp->vvb.vb2_buf.type ==
-				V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE &&
-				temp->deferred) {
+				V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
 			filled_len = max(filled_len,
 				temp->vvb.vb2_buf.planes[0].bytesused);
 			device_addr = temp->smem[0].device_addr;
@@ -541,8 +580,8 @@
 	mutex_unlock(&inst->registeredbufs.lock);
 
 	if (!filled_len || !device_addr) {
-		dprintk(VIDC_PROF, "No Change in frequency\n");
-		goto decision_done;
+		dprintk(VIDC_DBG, "%s No ETBs\n", __func__);
+		goto no_clock_change;
 	}
 
 	freq = msm_vidc_calc_freq(inst, filled_len);
@@ -559,8 +598,9 @@
 	else
 		inst->clk_data.curr_freq = freq;
 
-decision_done:
 	msm_vidc_set_clocks(inst->core);
+
+no_clock_change:
 	return 0;
 }
 
@@ -607,7 +647,6 @@
 	}
 	inst->clk_data.dcvs_mode = true;
 
-	// TODO : Update with proper number based on on-target tuning.
 	inst->clk_data.extra_capture_buffer_count =
 		DCVS_DEC_EXTRA_OUTPUT_BUFFERS;
 	inst->clk_data.extra_output_buffer_count =
@@ -645,12 +684,13 @@
 
 static inline void msm_dcvs_print_dcvs_stats(struct clock_data *dcvs)
 {
-	dprintk(VIDC_DBG,
-		"DCVS: Load_Low %d, Load High %d\n",
+	dprintk(VIDC_PROF,
+		"DCVS: Load_Low %d, Load Norm %d, Load High %d\n",
 		dcvs->load_low,
+		dcvs->load_norm,
 		dcvs->load_high);
 
-	dprintk(VIDC_DBG,
+	dprintk(VIDC_PROF,
 		"DCVS: min_threshold %d, max_threshold %d\n",
 		dcvs->min_threshold, dcvs->max_threshold);
 }
@@ -663,6 +703,7 @@
 	u64 total_freq = 0, rate = 0, load;
 	int cycles;
 	struct clock_data *dcvs;
+	struct hal_buffer_requirements *output_buf_req;
 
 	dprintk(VIDC_DBG, "Init DCVS Load\n");
 
@@ -683,12 +724,22 @@
 			cycles;
 
 		dcvs->buffer_type = HAL_BUFFER_INPUT;
-		// TODO : Update with proper no based on Buffer counts change.
-		dcvs->min_threshold = 7;
+		dcvs->min_threshold =
+			msm_vidc_get_extra_buff_count(inst, HAL_BUFFER_INPUT);
 	} else if (inst->session_type == MSM_VIDC_DECODER) {
 		dcvs->buffer_type = msm_comm_get_hal_output_buffer(inst);
-		// TODO : Update with proper no based on Buffer counts change.
-		dcvs->min_threshold = 4;
+		output_buf_req = get_buff_req_buffer(inst,
+				dcvs->buffer_type);
+		if (!output_buf_req) {
+			dprintk(VIDC_ERR,
+				"%s: No bufer req for buffer type %x\n",
+				__func__, dcvs->buffer_type);
+			return;
+		}
+		dcvs->max_threshold = output_buf_req->buffer_count_actual -
+			output_buf_req->buffer_count_min_host + 1;
+		dcvs->min_threshold =
+			msm_vidc_get_extra_buff_count(inst, dcvs->buffer_type);
 	} else {
 		return;
 	}
@@ -701,8 +752,12 @@
 			break;
 	}
 
-	dcvs->load = dcvs->load_high = rate;
-	dcvs->load_low = allowed_clks_tbl[i+1].clock_rate;
+	dcvs->load = dcvs->load_norm = rate;
+
+	dcvs->load_low = i < (core->resources.allowed_clks_tbl_size - 1) ?
+		allowed_clks_tbl[i+1].clock_rate : dcvs->load_norm;
+	dcvs->load_high = i > 0 ? allowed_clks_tbl[i-1].clock_rate :
+		dcvs->load_norm;
 
 	inst->clk_data.buffer_counter = 0;
 
diff --git a/drivers/media/platform/msm/vidc/msm_vidc_clocks.h b/drivers/media/platform/msm/vidc/msm_vidc_clocks.h
index e1226e4..705cb7c 100644
--- a/drivers/media/platform/msm/vidc/msm_vidc_clocks.h
+++ b/drivers/media/platform/msm/vidc/msm_vidc_clocks.h
@@ -15,21 +15,11 @@
 #define _MSM_VIDC_CLOCKS_H_
 #include "msm_vidc_internal.h"
 
-/* Low threshold for encoder dcvs */
-#define DCVS_ENC_LOW_THR 4
-/* High threshold for encoder dcvs */
-#define DCVS_ENC_HIGH_THR 9
 /* extra o/p buffers in case of encoder dcvs */
 #define DCVS_ENC_EXTRA_OUTPUT_BUFFERS 2
+
 /* extra o/p buffers in case of decoder dcvs */
 #define DCVS_DEC_EXTRA_OUTPUT_BUFFERS 4
-/* Default threshold to reduce the core frequency */
-#define DCVS_NOMINAL_THRESHOLD 8
-/* Default threshold to increase the core frequency */
-#define DCVS_TURBO_THRESHOLD 4
-
-/* Considering one safeguard buffer */
-#define DCVS_BUFFER_SAFEGUARD (DCVS_DEC_EXTRA_OUTPUT_BUFFERS - 1)
 
 void msm_clock_data_reset(struct msm_vidc_inst *inst);
 int msm_vidc_update_operating_rate(struct msm_vidc_inst *inst);
diff --git a/drivers/media/platform/msm/vidc/msm_vidc_internal.h b/drivers/media/platform/msm/vidc/msm_vidc_internal.h
index 22772ef..373dbba 100644
--- a/drivers/media/platform/msm/vidc/msm_vidc_internal.h
+++ b/drivers/media/platform/msm/vidc/msm_vidc_internal.h
@@ -243,6 +243,7 @@
 	int buffer_counter;
 	int load;
 	int load_low;
+	int load_norm;
 	int load_high;
 	int min_threshold;
 	int max_threshold;
diff --git a/drivers/media/platform/msm/vidc/vidc_hfi_api.h b/drivers/media/platform/msm/vidc/vidc_hfi_api.h
index 4b3b2fd..79ce858 100644
--- a/drivers/media/platform/msm/vidc/vidc_hfi_api.h
+++ b/drivers/media/platform/msm/vidc/vidc_hfi_api.h
@@ -1339,6 +1339,7 @@
 	int output_height, output_width;
 	int compression_ratio;
 	int complexity_factor;
+	bool use_dpb_read;
 	unsigned int lcu_size;
 	enum msm_vidc_power_mode power_mode;
 	enum hal_work_mode work_mode;