msm: vidc: Introduce Performance Monitoring System
Enhance current DCVS algorithm to take care of Venus
performance fluctuations by adjusting clock, bus BW
using pending buffer counts.
CRs-Fixed: 2012520
Change-Id: I448201ec596b71b692ee5b993fc36716f420612d
Signed-off-by: Praneeth Paladugu <ppaladug@codeaurora.org>
diff --git a/drivers/media/platform/msm/vidc/governors/msm_vidc_dyn_gov.c b/drivers/media/platform/msm/vidc/governors/msm_vidc_dyn_gov.c
index 0618768..9daf053 100644
--- a/drivers/media/platform/msm/vidc/governors/msm_vidc_dyn_gov.c
+++ b/drivers/media/platform/msm/vidc/governors/msm_vidc_dyn_gov.c
@@ -387,13 +387,23 @@
integer_part = d->compression_ratio >> 16;
frac_part =
- ((d->compression_ratio - (integer_part * 65536)) * 100) >> 16;
+ ((d->compression_ratio - (integer_part << 16)) * 100) >> 16;
dpb_read_compression_factor = FP(integer_part, frac_part, 100);
+ integer_part = d->complexity_factor >> 16;
+ frac_part =
+ ((d->complexity_factor - (integer_part << 16)) * 100) >> 16;
+
+ motion_vector_complexity = FP(integer_part, frac_part, 100);
+
dpb_write_compression_factor = !dpb_compression_enabled ? FP_ONE :
__compression_ratio(__lut(width, height, fps), opb_bpp);
+ dpb_write_compression_factor = d->use_dpb_read ?
+ dpb_read_compression_factor :
+ dpb_write_compression_factor;
+
opb_compression_factor = !opb_compression_enabled ? FP_ONE :
__compression_ratio(__lut(width, height, fps), opb_bpp);
@@ -437,8 +447,6 @@
lcu_per_frame * fps / bps(1));
ddr.line_buffer_write = ddr.line_buffer_read;
- motion_vector_complexity = FP_INT(4);
-
bw_for_1x_8bpc = fp_div(FP_INT(width * height), FP_INT(32 * 8));
bw_for_1x_8bpc = fp_mult(bw_for_1x_8bpc,
diff --git a/drivers/media/platform/msm/vidc/msm_vidc_clocks.c b/drivers/media/platform/msm/vidc/msm_vidc_clocks.c
index 6867735..cb3c526 100644
--- a/drivers/media/platform/msm/vidc/msm_vidc_clocks.c
+++ b/drivers/media/platform/msm/vidc/msm_vidc_clocks.c
@@ -16,6 +16,9 @@
#include "msm_vidc_debug.h"
#include "msm_vidc_clocks.h"
+#define MSM_VIDC_MIN_UBWC_COMPLEXITY_FACTOR 1
+#define MSM_VIDC_MAX_UBWC_COMPLEXITY_FACTOR 4
+
static inline unsigned long int get_ubwc_compression_ratio(
struct ubwc_cr_stats_info_type ubwc_stats_info)
{
@@ -91,16 +94,30 @@
struct vidc_bus_vote_data *vote_data)
{
struct recon_buf *binfo;
- u32 CR = 0, CF = 0;
+ u32 CR = 0, min_cf = MSM_VIDC_MIN_UBWC_COMPLEXITY_FACTOR,
+ max_cf = MSM_VIDC_MAX_UBWC_COMPLEXITY_FACTOR;
mutex_lock(&inst->reconbufs.lock);
list_for_each_entry(binfo, &inst->reconbufs.list, list) {
CR = max(CR, binfo->CR);
- CF = max(CF, binfo->CF);
+ min_cf = min(min_cf, binfo->CF);
+ max_cf = max(max_cf, binfo->CF);
}
mutex_unlock(&inst->reconbufs.lock);
- vote_data->complexity_factor = CF;
vote_data->compression_ratio = CR;
+
+ vote_data->complexity_factor = max_cf;
+ vote_data->use_dpb_read = false;
+ if (inst->clk_data.load <= inst->clk_data.load_norm) {
+ vote_data->complexity_factor = min_cf;
+ vote_data->use_dpb_read = true;
+ }
+
+ dprintk(VIDC_DBG,
+ "Complression Ratio = %d Complexity Factor = %d\n",
+ vote_data->compression_ratio,
+ vote_data->complexity_factor);
+
return 0;
}
@@ -129,6 +146,32 @@
mutex_lock(&core->lock);
list_for_each_entry(inst, &core->instances, list) {
int codec = 0;
+ struct msm_vidc_buffer *temp, *next;
+ u32 filled_len = 0;
+ u32 device_addr = 0;
+
+ if (!inst) {
+ dprintk(VIDC_ERR, "%s Invalid args\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ mutex_lock(&inst->registeredbufs.lock);
+ list_for_each_entry_safe(temp, next,
+ &inst->registeredbufs.list, list) {
+ if (temp->vvb.vb2_buf.type ==
+ V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
+ filled_len = max(filled_len,
+ temp->vvb.vb2_buf.planes[0].bytesused);
+ device_addr = temp->smem[0].device_addr;
+ }
+ }
+ mutex_unlock(&inst->registeredbufs.lock);
+
+ if (!filled_len || !device_addr) {
+ dprintk(VIDC_DBG, "%s No ETBs\n", __func__);
+ continue;
+ }
++vote_data_count;
@@ -253,17 +296,17 @@
/* Buffers outside FW are with display */
buffers_outside_fw = total_output_buf - fw_pending_bufs;
- dprintk(VIDC_DBG,
+ dprintk(VIDC_PROF,
"Counts : total_output_buf = %d fw_pending_bufs = %d buffers_outside_fw = %d\n",
total_output_buf, fw_pending_bufs, buffers_outside_fw);
- if (buffers_outside_fw >= dcvs->min_threshold &&
- dcvs->load > dcvs->load_low) {
+ if (buffers_outside_fw >= dcvs->min_threshold)
dcvs->load = dcvs->load_low;
- } else if (buffers_outside_fw < dcvs->min_threshold &&
- dcvs->load == dcvs->load_low) {
+ else if (buffers_outside_fw <= dcvs->max_threshold)
dcvs->load = dcvs->load_high;
- }
+ else
+ dcvs->load = dcvs->load_norm;
+
return rc;
}
@@ -291,8 +334,6 @@
mutex_unlock(&inst->freqs.lock);
}
-// TODO this needs to be removed later and use queued_list
-
void msm_vidc_clear_freq_entry(struct msm_vidc_inst *inst,
u32 device_addr)
{
@@ -322,9 +363,8 @@
/* If current requirement is within DCVS limits, try DCVS. */
- if (freq < inst->clk_data.load_high) {
+ if (freq < inst->clk_data.load_norm) {
dprintk(VIDC_DBG, "Calling DCVS now\n");
- // TODO calling DCVS here may reduce the residency. Re-visit.
msm_dcvs_scale_clocks(inst);
freq = inst->clk_data.load;
}
@@ -346,6 +386,18 @@
mutex_unlock(&inst->freqs.lock);
}
+static unsigned long msm_vidc_max_freq(struct msm_vidc_core *core)
+{
+ struct allowed_clock_rates_table *allowed_clks_tbl = NULL;
+ unsigned long freq = 0;
+
+ allowed_clks_tbl = core->resources.allowed_clks_tbl;
+ freq = allowed_clks_tbl[0].clock_rate;
+ dprintk(VIDC_PROF, "Max rate = %lu", freq);
+
+ return freq;
+}
+
static unsigned long msm_vidc_calc_freq(struct msm_vidc_inst *inst,
u32 filled_len)
{
@@ -377,17 +429,17 @@
vsp_cycles = mbs_per_second * inst->clk_data.entry->vsp_cycles;
/* 10 / 7 is overhead factor */
- vsp_cycles += (inst->prop.fps * filled_len * 8 * 10) / 7;
+ vsp_cycles += ((inst->prop.fps * filled_len * 8) / 7) * 10;
} else {
- // TODO return Min or Max ?
dprintk(VIDC_ERR, "Unknown session type = %s\n", __func__);
- return freq;
+ return msm_vidc_max_freq(inst->core);
}
freq = max(vpp_cycles, vsp_cycles);
- dprintk(VIDC_PROF, "%s Inst %pK : Freq = %lu\n", __func__, inst, freq);
+ dprintk(VIDC_PROF, "%s Inst %pK : Filled Len = %d Freq = %lu\n",
+ __func__, inst, filled_len, freq);
return freq;
}
@@ -429,18 +481,6 @@
return rc;
}
-static unsigned long msm_vidc_max_freq(struct msm_vidc_core *core)
-{
- struct allowed_clock_rates_table *allowed_clks_tbl = NULL;
- unsigned long freq = 0;
-
- allowed_clks_tbl = core->resources.allowed_clks_tbl;
- freq = allowed_clks_tbl[0].clock_rate;
- dprintk(VIDC_PROF, "Max rate = %lu", freq);
-
- return freq;
-}
-
int msm_vidc_update_operating_rate(struct msm_vidc_inst *inst)
{
struct v4l2_ctrl *ctrl = NULL;
@@ -531,8 +571,7 @@
mutex_lock(&inst->registeredbufs.lock);
list_for_each_entry_safe(temp, next, &inst->registeredbufs.list, list) {
if (temp->vvb.vb2_buf.type ==
- V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE &&
- temp->deferred) {
+ V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
filled_len = max(filled_len,
temp->vvb.vb2_buf.planes[0].bytesused);
device_addr = temp->smem[0].device_addr;
@@ -541,8 +580,8 @@
mutex_unlock(&inst->registeredbufs.lock);
if (!filled_len || !device_addr) {
- dprintk(VIDC_PROF, "No Change in frequency\n");
- goto decision_done;
+ dprintk(VIDC_DBG, "%s No ETBs\n", __func__);
+ goto no_clock_change;
}
freq = msm_vidc_calc_freq(inst, filled_len);
@@ -559,8 +598,9 @@
else
inst->clk_data.curr_freq = freq;
-decision_done:
msm_vidc_set_clocks(inst->core);
+
+no_clock_change:
return 0;
}
@@ -607,7 +647,6 @@
}
inst->clk_data.dcvs_mode = true;
- // TODO : Update with proper number based on on-target tuning.
inst->clk_data.extra_capture_buffer_count =
DCVS_DEC_EXTRA_OUTPUT_BUFFERS;
inst->clk_data.extra_output_buffer_count =
@@ -645,12 +684,13 @@
static inline void msm_dcvs_print_dcvs_stats(struct clock_data *dcvs)
{
- dprintk(VIDC_DBG,
- "DCVS: Load_Low %d, Load High %d\n",
+ dprintk(VIDC_PROF,
+ "DCVS: Load_Low %d, Load Norm %d, Load High %d\n",
dcvs->load_low,
+ dcvs->load_norm,
dcvs->load_high);
- dprintk(VIDC_DBG,
+ dprintk(VIDC_PROF,
"DCVS: min_threshold %d, max_threshold %d\n",
dcvs->min_threshold, dcvs->max_threshold);
}
@@ -663,6 +703,7 @@
u64 total_freq = 0, rate = 0, load;
int cycles;
struct clock_data *dcvs;
+ struct hal_buffer_requirements *output_buf_req;
dprintk(VIDC_DBG, "Init DCVS Load\n");
@@ -683,12 +724,22 @@
cycles;
dcvs->buffer_type = HAL_BUFFER_INPUT;
- // TODO : Update with proper no based on Buffer counts change.
- dcvs->min_threshold = 7;
+ dcvs->min_threshold =
+ msm_vidc_get_extra_buff_count(inst, HAL_BUFFER_INPUT);
} else if (inst->session_type == MSM_VIDC_DECODER) {
dcvs->buffer_type = msm_comm_get_hal_output_buffer(inst);
- // TODO : Update with proper no based on Buffer counts change.
- dcvs->min_threshold = 4;
+ output_buf_req = get_buff_req_buffer(inst,
+ dcvs->buffer_type);
+ if (!output_buf_req) {
+ dprintk(VIDC_ERR,
+ "%s: No bufer req for buffer type %x\n",
+ __func__, dcvs->buffer_type);
+ return;
+ }
+ dcvs->max_threshold = output_buf_req->buffer_count_actual -
+ output_buf_req->buffer_count_min_host + 1;
+ dcvs->min_threshold =
+ msm_vidc_get_extra_buff_count(inst, dcvs->buffer_type);
} else {
return;
}
@@ -701,8 +752,12 @@
break;
}
- dcvs->load = dcvs->load_high = rate;
- dcvs->load_low = allowed_clks_tbl[i+1].clock_rate;
+ dcvs->load = dcvs->load_norm = rate;
+
+ dcvs->load_low = i < (core->resources.allowed_clks_tbl_size - 1) ?
+ allowed_clks_tbl[i+1].clock_rate : dcvs->load_norm;
+ dcvs->load_high = i > 0 ? allowed_clks_tbl[i-1].clock_rate :
+ dcvs->load_norm;
inst->clk_data.buffer_counter = 0;
diff --git a/drivers/media/platform/msm/vidc/msm_vidc_clocks.h b/drivers/media/platform/msm/vidc/msm_vidc_clocks.h
index e1226e4..705cb7c 100644
--- a/drivers/media/platform/msm/vidc/msm_vidc_clocks.h
+++ b/drivers/media/platform/msm/vidc/msm_vidc_clocks.h
@@ -15,21 +15,11 @@
#define _MSM_VIDC_CLOCKS_H_
#include "msm_vidc_internal.h"
-/* Low threshold for encoder dcvs */
-#define DCVS_ENC_LOW_THR 4
-/* High threshold for encoder dcvs */
-#define DCVS_ENC_HIGH_THR 9
/* extra o/p buffers in case of encoder dcvs */
#define DCVS_ENC_EXTRA_OUTPUT_BUFFERS 2
+
/* extra o/p buffers in case of decoder dcvs */
#define DCVS_DEC_EXTRA_OUTPUT_BUFFERS 4
-/* Default threshold to reduce the core frequency */
-#define DCVS_NOMINAL_THRESHOLD 8
-/* Default threshold to increase the core frequency */
-#define DCVS_TURBO_THRESHOLD 4
-
-/* Considering one safeguard buffer */
-#define DCVS_BUFFER_SAFEGUARD (DCVS_DEC_EXTRA_OUTPUT_BUFFERS - 1)
void msm_clock_data_reset(struct msm_vidc_inst *inst);
int msm_vidc_update_operating_rate(struct msm_vidc_inst *inst);
diff --git a/drivers/media/platform/msm/vidc/msm_vidc_internal.h b/drivers/media/platform/msm/vidc/msm_vidc_internal.h
index 22772ef..373dbba 100644
--- a/drivers/media/platform/msm/vidc/msm_vidc_internal.h
+++ b/drivers/media/platform/msm/vidc/msm_vidc_internal.h
@@ -243,6 +243,7 @@
int buffer_counter;
int load;
int load_low;
+ int load_norm;
int load_high;
int min_threshold;
int max_threshold;
diff --git a/drivers/media/platform/msm/vidc/vidc_hfi_api.h b/drivers/media/platform/msm/vidc/vidc_hfi_api.h
index 4b3b2fd..79ce858 100644
--- a/drivers/media/platform/msm/vidc/vidc_hfi_api.h
+++ b/drivers/media/platform/msm/vidc/vidc_hfi_api.h
@@ -1339,6 +1339,7 @@
int output_height, output_width;
int compression_ratio;
int complexity_factor;
+ bool use_dpb_read;
unsigned int lcu_size;
enum msm_vidc_power_mode power_mode;
enum hal_work_mode work_mode;