blob: cdcfa96e7a4bdcfe53a5862bede2f4728a0524f8 [file] [log] [blame]
/* Copyright (c) 2015-2017, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/module.h>
#include "governor.h"
#include "fixedpoint.h"
#include "msm_vidc_internal.h"
#include "msm_vidc_debug.h"
#include "vidc_hfi_api.h"
#define COMPRESSION_RATIO_MAX 5
static bool debug;
module_param(debug, bool, 0644);
enum governor_mode {
GOVERNOR_DDR,
GOVERNOR_LLCC,
};
struct governor {
enum governor_mode mode;
struct devfreq_governor devfreq_gov;
};
/*
* Minimum dimensions that the governor is willing to calculate
* bandwidth for. This means that anything bandwidth(0, 0) ==
* bandwidth(BASELINE_DIMENSIONS.width, BASELINE_DIMENSIONS.height)
*/
const struct {
int height, width;
} BASELINE_DIMENSIONS = {
.width = 1280,
.height = 720,
};
/*
* These are hardcoded AB values that the governor votes for in certain
* situations, where a certain bus frequency is desired. It isn't exactly
* scalable since different platforms have different bus widths, but we'll
* deal with that in the future.
*/
const unsigned long NOMINAL_BW_MBPS = 6000 /* ideally 320 Mhz */,
SVS_BW_MBPS = 2000 /* ideally 100 Mhz */;
/* converts Mbps to bps (the "b" part can be bits or bytes based on context) */
#define kbps(__mbps) ((__mbps) * 1000)
#define bps(__mbps) (kbps(__mbps) * 1000)
#define GENERATE_COMPRESSION_PROFILE(__bpp, __worst) { \
.bpp = __bpp, \
.ratio = __worst, \
}
/*
* The below table is a structural representation of the following table:
* Resolution | Bitrate | Compression Ratio |
* ............|............|.........................................|
* Width Height|Average High|Avg_8bpc Worst_8bpc Avg_10bpc Worst_10bpc|
* 1280 720| 7 14| 1.69 1.28 1.49 1.23|
* 1920 1080| 20 40| 1.69 1.28 1.49 1.23|
* 2560 1440| 32 64| 2.2 1.26 1.97 1.22|
* 3840 2160| 42 84| 2.2 1.26 1.97 1.22|
* 4096 2160| 44 88| 2.2 1.26 1.97 1.22|
* 4096 2304| 48 96| 2.2 1.26 1.97 1.22|
*/
static struct lut {
int frame_size; /* width x height */
int frame_rate;
unsigned long bitrate;
struct {
int bpp;
fp_t ratio;
} compression_ratio[COMPRESSION_RATIO_MAX];
} const LUT[] = {
{
.frame_size = 1280 * 720,
.frame_rate = 30,
.bitrate = 14,
.compression_ratio = {
GENERATE_COMPRESSION_PROFILE(8,
FP(1, 28, 100)),
GENERATE_COMPRESSION_PROFILE(10,
FP(1, 23, 100)),
}
},
{
.frame_size = 1280 * 720,
.frame_rate = 60,
.bitrate = 22,
.compression_ratio = {
GENERATE_COMPRESSION_PROFILE(8,
FP(1, 28, 100)),
GENERATE_COMPRESSION_PROFILE(10,
FP(1, 23, 100)),
}
},
{
.frame_size = 1920 * 1088,
.frame_rate = 30,
.bitrate = 40,
.compression_ratio = {
GENERATE_COMPRESSION_PROFILE(8,
FP(1, 28, 100)),
GENERATE_COMPRESSION_PROFILE(10,
FP(1, 23, 100)),
}
},
{
.frame_size = 1920 * 1088,
.frame_rate = 60,
.bitrate = 64,
.compression_ratio = {
GENERATE_COMPRESSION_PROFILE(8,
FP(1, 28, 100)),
GENERATE_COMPRESSION_PROFILE(10,
FP(1, 23, 100)),
}
},
{
.frame_size = 2560 * 1440,
.frame_rate = 30,
.bitrate = 64,
.compression_ratio = {
GENERATE_COMPRESSION_PROFILE(8,
FP(1, 26, 100)),
GENERATE_COMPRESSION_PROFILE(10,
FP(1, 22, 100)),
}
},
{
.frame_size = 2560 * 1440,
.frame_rate = 60,
.bitrate = 102,
.compression_ratio = {
GENERATE_COMPRESSION_PROFILE(8,
FP(1, 26, 100)),
GENERATE_COMPRESSION_PROFILE(10,
FP(1, 22, 100)),
}
},
{
.frame_size = 3840 * 2160,
.frame_rate = 30,
.bitrate = 84,
.compression_ratio = {
GENERATE_COMPRESSION_PROFILE(8,
FP(1, 26, 100)),
GENERATE_COMPRESSION_PROFILE(10,
FP(1, 22, 100)),
}
},
{
.frame_size = 3840 * 2160,
.frame_rate = 60,
.bitrate = 134,
.compression_ratio = {
GENERATE_COMPRESSION_PROFILE(8,
FP(1, 26, 100)),
GENERATE_COMPRESSION_PROFILE(10,
FP(1, 22, 100)),
}
},
{
.frame_size = 4096 * 2160,
.frame_rate = 30,
.bitrate = 88,
.compression_ratio = {
GENERATE_COMPRESSION_PROFILE(8,
FP(1, 26, 100)),
GENERATE_COMPRESSION_PROFILE(10,
FP(1, 22, 100)),
}
},
{
.frame_size = 4096 * 2160,
.frame_rate = 60,
.bitrate = 141,
.compression_ratio = {
GENERATE_COMPRESSION_PROFILE(8,
FP(1, 26, 100)),
GENERATE_COMPRESSION_PROFILE(10,
FP(1, 22, 100)),
}
},
{
.frame_size = 4096 * 2304,
.frame_rate = 30,
.bitrate = 96,
.compression_ratio = {
GENERATE_COMPRESSION_PROFILE(8,
FP(1, 26, 100)),
GENERATE_COMPRESSION_PROFILE(10,
FP(1, 22, 100)),
}
},
{
.frame_size = 4096 * 2304,
.frame_rate = 60,
.bitrate = 154,
.compression_ratio = {
GENERATE_COMPRESSION_PROFILE(8,
FP(1, 26, 100)),
GENERATE_COMPRESSION_PROFILE(10,
FP(1, 22, 100)),
}
},
};
static struct lut const *__lut(int width, int height, int fps)
{
int frame_size = height * width, c = 0;
do {
if (LUT[c].frame_size >= frame_size && LUT[c].frame_rate >= fps)
return &LUT[c];
} while (++c < ARRAY_SIZE(LUT));
return &LUT[ARRAY_SIZE(LUT) - 1];
}
static fp_t __compression_ratio(struct lut const *entry, int bpp)
{
int c = 0;
for (c = 0; c < COMPRESSION_RATIO_MAX; ++c) {
if (entry->compression_ratio[c].bpp == bpp)
return entry->compression_ratio[c].ratio;
}
WARN(true, "Shouldn't be here, LUT possibly corrupted?\n");
return FP_ZERO; /* impossible */
}
#define DUMP_HEADER_MAGIC 0xdeadbeef
#define DUMP_FP_FMT "%FP" /* special format for fp_t */
struct dump {
char *key;
char *format;
size_t val;
};
static void __dump(struct dump dump[], int len)
{
int c = 0;
for (c = 0; c < len; ++c) {
char format_line[128] = "", formatted_line[128] = "";
if (dump[c].val == DUMP_HEADER_MAGIC) {
snprintf(formatted_line, sizeof(formatted_line), "%s\n",
dump[c].key);
} else {
bool fp_format = !strcmp(dump[c].format, DUMP_FP_FMT);
if (!fp_format) {
snprintf(format_line, sizeof(format_line),
" %-35s: %s\n", dump[c].key,
dump[c].format);
snprintf(formatted_line, sizeof(formatted_line),
format_line, dump[c].val);
} else {
size_t integer_part, fractional_part;
integer_part = fp_int(dump[c].val);
fractional_part = fp_frac(dump[c].val);
snprintf(formatted_line, sizeof(formatted_line),
" %-35s: %zd + %zd/%zd\n",
dump[c].key, integer_part,
fractional_part,
fp_frac_base());
}
}
dprintk(VIDC_DBG, "%s", formatted_line);
}
}
static unsigned long __calculate_vpe(struct vidc_bus_vote_data *d,
enum governor_mode gm)
{
return 0;
}
static bool __ubwc(enum hal_uncompressed_format f)
{
switch (f) {
case HAL_COLOR_FORMAT_NV12_UBWC:
case HAL_COLOR_FORMAT_NV12_TP10_UBWC:
return true;
default:
return false;
}
}
static int __bpp(enum hal_uncompressed_format f)
{
switch (f) {
case HAL_COLOR_FORMAT_NV12:
case HAL_COLOR_FORMAT_NV21:
case HAL_COLOR_FORMAT_NV12_UBWC:
return 8;
case HAL_COLOR_FORMAT_NV12_TP10_UBWC:
case HAL_COLOR_FORMAT_P010:
return 10;
default:
dprintk(VIDC_ERR,
"What's this? We don't support this colorformat (%x)",
f);
return INT_MAX;
}
}
static unsigned long __calculate_decoder(struct vidc_bus_vote_data *d,
enum governor_mode gm) {
/*
* XXX: Don't fool around with any of the hardcoded numbers unless you
* know /exactly/ what you're doing. Many of these numbers are
* measured heuristics and hardcoded numbers taken from the firmware.
*/
/* Decoder parameters */
int width, height, lcu_size, dpb_bpp, opb_bpp, fps, opb_factor;
bool unified_dpb_opb, dpb_compression_enabled, opb_compression_enabled,
llc_ref_read_l2_cache_enabled = false,
llc_vpss_ds_line_buf_enabled = false;
fp_t dpb_opb_scaling_ratio, dpb_read_compression_factor,
dpb_write_compression_factor, opb_compression_factor,
qsmmu_bw_overhead_factor, height_ratio;
/* Derived parameters */
int lcu_per_frame, tnbr_per_lcu, colocated_bytes_per_lcu;
unsigned long bitrate;
fp_t bins_to_bit_factor, dpb_write_factor, ten_bpc_packing_factor,
ten_bpc_bpp_factor, vsp_read_factor, vsp_write_factor,
bw_for_1x_8bpc, dpb_bw_for_1x,
motion_vector_complexity = 0, row_cache_penalty = 0, opb_bw = 0,
dpb_total = 0;
/* Output parameters */
struct {
fp_t vsp_read, vsp_write, collocated_read, collocated_write,
line_buffer_read, line_buffer_write, recon_read,
recon_write, opb_read, opb_write, dpb_read, dpb_write,
total;
} ddr = {0};
struct {
fp_t dpb_read, opb_read, total;
} llc = {0};
unsigned long ret = 0;
unsigned int integer_part, frac_part;
width = max(d->input_width, BASELINE_DIMENSIONS.width);
height = max(d->input_height, BASELINE_DIMENSIONS.height);
lcu_size = d->lcu_size;
dpb_bpp = d->num_formats >= 1 ? __bpp(d->color_formats[0]) : INT_MAX;
opb_bpp = d->num_formats >= 2 ? __bpp(d->color_formats[1]) : dpb_bpp;
fps = d->fps;
unified_dpb_opb = d->num_formats == 1;
dpb_opb_scaling_ratio = fp_div(FP_INT(d->input_width * d->input_height),
FP_INT(d->output_width * d->output_height));
height_ratio = fp_div(d->input_height, d->output_height);
dpb_compression_enabled = d->num_formats >= 1 &&
__ubwc(d->color_formats[0]);
opb_compression_enabled = d->num_formats >= 2 &&
__ubwc(d->color_formats[1]);
/*
* Convert Q16 number into Integer and Fractional part upto 2 places.
* Ex : 105752 / 65536 = 1.61; 1.61 in Q16 = 105752;
* Integer part = 105752 / 65536 = 1;
* Reminder = 105752 - 1 * 65536 = 40216;
* Fractional part = 40216 * 100 / 65536 = 61;
* Now converto to FP(1, 61, 100) for below code.
*/
integer_part = d->compression_ratio >> 16;
frac_part =
((d->compression_ratio - (integer_part << 16)) * 100) >> 16;
dpb_read_compression_factor = FP(integer_part, frac_part, 100);
integer_part = d->complexity_factor >> 16;
frac_part =
((d->complexity_factor - (integer_part << 16)) * 100) >> 16;
motion_vector_complexity = FP(integer_part, frac_part, 100);
dpb_write_compression_factor = !dpb_compression_enabled ? FP_ONE :
__compression_ratio(__lut(width, height, fps), opb_bpp);
dpb_write_compression_factor = d->use_dpb_read ?
dpb_read_compression_factor :
dpb_write_compression_factor;
opb_compression_factor = !opb_compression_enabled ? FP_ONE :
__compression_ratio(__lut(width, height, fps), opb_bpp);
llc_ref_read_l2_cache_enabled = llc_vpss_ds_line_buf_enabled = false;
if (d->use_sys_cache) {
llc_ref_read_l2_cache_enabled = true;
llc_vpss_ds_line_buf_enabled = true;
}
/* Derived parameters setup */
lcu_per_frame = DIV_ROUND_UP(width, lcu_size) *
DIV_ROUND_UP(height, lcu_size);
bitrate = __lut(width, height, fps)->bitrate;
bins_to_bit_factor = d->work_mode == VIDC_WORK_MODE_1 ?
FP_INT(0) : FP_INT(4);
vsp_read_factor = bins_to_bit_factor + FP_INT(2);
dpb_write_factor = FP(1, 5, 100);
ten_bpc_packing_factor = FP(1, 67, 1000);
ten_bpc_bpp_factor = FP(1, 1, 4);
vsp_write_factor = bins_to_bit_factor;
tnbr_per_lcu = lcu_size == 16 ? 128 :
lcu_size == 32 ? 64 : 128;
colocated_bytes_per_lcu = lcu_size == 16 ? 16 :
lcu_size == 32 ? 64 : 256;
/* ........................................ for DDR */
ddr.vsp_read = fp_div(fp_mult(FP_INT(bitrate),
vsp_read_factor), FP_INT(8));
ddr.vsp_write = fp_div(fp_mult(FP_INT(bitrate),
vsp_write_factor), FP_INT(8));
ddr.collocated_read = FP_INT(lcu_per_frame *
colocated_bytes_per_lcu * fps / bps(1));
ddr.collocated_write = FP_INT(lcu_per_frame *
colocated_bytes_per_lcu * fps / bps(1));
ddr.line_buffer_read = FP_INT(tnbr_per_lcu *
lcu_per_frame * fps / bps(1));
ddr.line_buffer_write = ddr.line_buffer_read;
bw_for_1x_8bpc = fp_div(FP_INT(width * height), FP_INT(32 * 8));
bw_for_1x_8bpc = fp_mult(bw_for_1x_8bpc,
fp_div(FP_INT(((int)(256 * fps))), FP_INT(1000 * 1000)));
dpb_bw_for_1x = dpb_bpp == 8 ? bw_for_1x_8bpc :
fp_mult(bw_for_1x_8bpc, fp_mult(ten_bpc_packing_factor,
ten_bpc_bpp_factor));
ddr.dpb_read = fp_div(fp_mult(fp_mult(dpb_bw_for_1x,
motion_vector_complexity), dpb_write_factor),
dpb_read_compression_factor);
ddr.dpb_write = fp_div(fp_mult(dpb_bw_for_1x, dpb_write_factor),
dpb_write_compression_factor);
dpb_total = ddr.dpb_read + ddr.dpb_write;
if (llc_ref_read_l2_cache_enabled) {
row_cache_penalty = FP(1, 30, 100);
ddr.dpb_read = fp_div(ddr.dpb_read, row_cache_penalty);
llc.dpb_read = dpb_total - ddr.dpb_read;
}
opb_factor = dpb_bpp == 8 ? 8 : 4;
ddr.opb_read = unified_dpb_opb ? 0 : opb_compression_enabled ?
fp_div(fp_mult(fp_div(dpb_bw_for_1x, dpb_opb_scaling_ratio),
FP_INT(opb_factor)), height_ratio) : 0;
ddr.opb_write = unified_dpb_opb ? 0 : opb_compression_enabled ?
ddr.dpb_read : fp_div(fp_div(fp_mult(dpb_bw_for_1x,
FP(1, 50, 100)), dpb_opb_scaling_ratio),
opb_compression_factor);
if (llc_vpss_ds_line_buf_enabled) {
llc.opb_read = ddr.opb_read;
ddr.opb_write -= ddr.opb_read;
ddr.opb_read = 0;
}
ddr.total = ddr.vsp_read + ddr.vsp_write +
ddr.collocated_read + ddr.collocated_write +
ddr.opb_read + ddr.opb_write +
ddr.dpb_read + ddr.dpb_write;
qsmmu_bw_overhead_factor = FP(1, 3, 100);
ddr.total = fp_mult(ddr.total, qsmmu_bw_overhead_factor);
llc.total = llc.dpb_read + llc.opb_read;
/* Dump all the variables for easier debugging */
if (debug) {
struct dump dump[] = {
{"DECODER PARAMETERS", "", DUMP_HEADER_MAGIC},
{"LCU size", "%d", lcu_size},
{"DPB bitdepth", "%d", dpb_bpp},
{"frame rate", "%d", fps},
{"DPB/OPB unified", "%d", unified_dpb_opb},
{"DPB/OPB downscaling ratio", DUMP_FP_FMT,
dpb_opb_scaling_ratio},
{"DPB compression", "%d", dpb_compression_enabled},
{"OPB compression", "%d", opb_compression_enabled},
{"DPB Read compression factor", DUMP_FP_FMT,
dpb_read_compression_factor},
{"DPB Write compression factor", DUMP_FP_FMT,
dpb_write_compression_factor},
{"OPB compression factor", DUMP_FP_FMT,
opb_compression_factor},
{"frame width", "%d", width},
{"frame height", "%d", height},
{"DERIVED PARAMETERS (1)", "", DUMP_HEADER_MAGIC},
{"LCUs/frame", "%d", lcu_per_frame},
{"bitrate (Mbit/sec)", "%d", bitrate},
{"bins to bit factor", DUMP_FP_FMT, bins_to_bit_factor},
{"DPB write factor", DUMP_FP_FMT, dpb_write_factor},
{"10bpc packing factor", DUMP_FP_FMT,
ten_bpc_packing_factor},
{"10bpc,BPP factor", DUMP_FP_FMT, ten_bpc_bpp_factor},
{"VSP read factor", DUMP_FP_FMT, vsp_read_factor},
{"VSP write factor", DUMP_FP_FMT, vsp_write_factor},
{"TNBR/LCU", "%d", tnbr_per_lcu},
{"colocated bytes/LCU", "%d", colocated_bytes_per_lcu},
{"B/W for 1x (NV12 8bpc)", DUMP_FP_FMT, bw_for_1x_8bpc},
{"DPB B/W For 1x (NV12)", DUMP_FP_FMT, dpb_bw_for_1x},
{"DERIVED PARAMETERS (2)", "", DUMP_HEADER_MAGIC},
{"MV complexity", DUMP_FP_FMT, motion_vector_complexity},
{"row cache penalty", DUMP_FP_FMT, row_cache_penalty},
{"qsmmu_bw_overhead_factor", DUMP_FP_FMT,
qsmmu_bw_overhead_factor},
{"OPB B/W (single instance)", DUMP_FP_FMT, opb_bw},
{"INTERMEDIATE DDR B/W", "", DUMP_HEADER_MAGIC},
{"VSP read", DUMP_FP_FMT, ddr.vsp_read},
{"VSP write", DUMP_FP_FMT, ddr.vsp_write},
{"collocated read", DUMP_FP_FMT, ddr.collocated_read},
{"collocated write", DUMP_FP_FMT, ddr.collocated_write},
{"line buffer read", DUMP_FP_FMT, ddr.line_buffer_read},
{"line buffer write", DUMP_FP_FMT, ddr.line_buffer_write},
{"recon read", DUMP_FP_FMT, ddr.recon_read},
{"recon write", DUMP_FP_FMT, ddr.recon_write},
{"OPB read", DUMP_FP_FMT, ddr.opb_read},
{"OPB write", DUMP_FP_FMT, ddr.opb_write},
{"DPB read", DUMP_FP_FMT, ddr.dpb_read},
{"DPB write", DUMP_FP_FMT, ddr.dpb_write},
{"LLC DPB read", DUMP_FP_FMT, llc.dpb_read},
{"LLC OPB read", DUMP_FP_FMT, llc.opb_read},
};
__dump(dump, ARRAY_SIZE(dump));
}
switch (gm) {
case GOVERNOR_DDR:
ret = kbps(fp_round(ddr.total));
break;
case GOVERNOR_LLCC:
ret = kbps(fp_round(llc.total));
break;
default:
dprintk(VIDC_ERR, "%s - Unknown governor\n", __func__);
}
return ret;
}
static unsigned long __calculate_encoder(struct vidc_bus_vote_data *d,
enum governor_mode gm)
{
/*
* XXX: Don't fool around with any of the hardcoded numbers unless you
* know /exactly/ what you're doing. Many of these numbers are
* measured heuristics and hardcoded numbers taken from the firmware.
*/
/* Encoder Parameters */
int width, height, fps, dpb_bpp, lcu_per_frame, lcu_size,
vertical_tile_width, colocated_bytes_per_lcu, bitrate,
ref_overlap_bw_factor;
enum hal_uncompressed_format dpb_color_format, original_color_format;
bool dpb_compression_enabled, original_compression_enabled,
work_mode_1, low_power, rotation, cropping_or_scaling,
b_frames_enabled = false,
llc_dual_core_ref_read_buf_enabled = false,
llc_top_line_buf_enabled = false,
llc_ref_chroma_cache_enabled = false;
fp_t dpb_compression_factor, original_compression_factor,
input_compression_factor, qsmmu_bw_overhead_factor,
ref_y_bw_factor, ref_cb_cr_bw_factor, ten_bpc_bpp_factor,
bw_for_1x_8bpc, dpb_bw_for_1x, ref_cb_cr_read,
bins_to_bit_factor, ref_y_read, ten_bpc_packing_factor,
dpb_write_factor, ref_overlap_bw, llc_ref_y_read,
llc_ref_cb_cr_read;
fp_t integer_part, frac_part;
unsigned long ret = 0;
/* Output paramaters */
struct {
fp_t vsp_read, vsp_write, collocated_read, collocated_write,
line_buffer_read, line_buffer_write, original_read,
original_write, dpb_read, dpb_write, total;
} ddr = {0};
struct {
fp_t dpb_read, line_buffer, total;
} llc = {0};
/* Encoder Parameters setup */
ten_bpc_packing_factor = FP(1, 67, 1000);
ten_bpc_bpp_factor = FP(1, 1, 4);
rotation = false;
cropping_or_scaling = false;
vertical_tile_width = 960;
ref_y_bw_factor = FP(1, 30, 100);
ref_cb_cr_bw_factor = FP(1, 50, 100);
dpb_write_factor = FP(1, 8, 100);
/* Derived Parameters */
lcu_size = d->lcu_size;
fps = d->fps;
b_frames_enabled = d->b_frames_enabled;
width = max(d->input_width, BASELINE_DIMENSIONS.width);
height = max(d->input_height, BASELINE_DIMENSIONS.height);
bitrate = __lut(width, height, fps)->bitrate;
lcu_per_frame = DIV_ROUND_UP(width, lcu_size) *
DIV_ROUND_UP(height, lcu_size);
dpb_color_format = HAL_COLOR_FORMAT_NV12_UBWC;
original_color_format = d->num_formats >= 1 ?
d->color_formats[0] : HAL_UNUSED_COLOR;
dpb_bpp = d->num_formats >= 1 ? __bpp(d->color_formats[0]) : INT_MAX;
dpb_compression_enabled = __ubwc(dpb_color_format);
original_compression_enabled = __ubwc(original_color_format);
work_mode_1 = d->work_mode == VIDC_WORK_MODE_1;
low_power = d->power_mode == VIDC_POWER_LOW;
bins_to_bit_factor = work_mode_1 ?
FP_INT(0) : FP_INT(4);
if (d->use_sys_cache) {
llc_dual_core_ref_read_buf_enabled = true;
llc_ref_chroma_cache_enabled = true;
}
/*
* Convert Q16 number into Integer and Fractional part upto 2 places.
* Ex : 105752 / 65536 = 1.61; 1.61 in Q16 = 105752;
* Integer part = 105752 / 65536 = 1;
* Reminder = 105752 - 1 * 65536 = 40216;
* Fractional part = 40216 * 100 / 65536 = 61;
* Now converto to FP(1, 61, 100) for below code.
*/
integer_part = d->compression_ratio >> 16;
frac_part =
((d->compression_ratio - (integer_part * 65536)) * 100) >> 16;
dpb_compression_factor = FP(integer_part, frac_part, 100);
integer_part = d->input_cr >> 16;
frac_part =
((d->input_cr - (integer_part * 65536)) * 100) >> 16;
input_compression_factor = FP(integer_part, frac_part, 100);
original_compression_factor =
original_compression_enabled ? d->use_dpb_read ?
dpb_compression_factor : input_compression_factor :
FP_ONE;
ddr.vsp_read = fp_mult(fp_div(FP_INT(bitrate), FP_INT(8)),
bins_to_bit_factor);
ddr.vsp_write = ddr.vsp_read + fp_div(FP_INT(bitrate), FP_INT(8));
colocated_bytes_per_lcu = lcu_size == 16 ? 16 :
lcu_size == 32 ? 64 : 256;
ddr.collocated_read = FP_INT(lcu_per_frame *
colocated_bytes_per_lcu * fps / bps(1));
ddr.collocated_write = ddr.collocated_read;
ddr.line_buffer_read = FP_INT(16 * lcu_per_frame * fps / bps(1));
ddr.line_buffer_write = ddr.line_buffer_read;
llc.line_buffer = ddr.line_buffer_read + ddr.line_buffer_write;
if (llc_top_line_buf_enabled)
ddr.line_buffer_read = ddr.line_buffer_write = FP_INT(0);
llc.line_buffer -= (ddr.line_buffer_read + ddr.line_buffer_write);
bw_for_1x_8bpc = fp_div(FP_INT(width * height), FP_INT(32 * 8));
bw_for_1x_8bpc = fp_mult(bw_for_1x_8bpc,
fp_div(FP_INT(((int)(256 * fps))), FP_INT(1000 * 1000)));
dpb_bw_for_1x = dpb_bpp == 8 ? bw_for_1x_8bpc :
fp_mult(bw_for_1x_8bpc, fp_mult(ten_bpc_packing_factor,
ten_bpc_bpp_factor));
ddr.original_read = fp_div(fp_mult(FP(1, 50, 100), dpb_bw_for_1x),
input_compression_factor);
ddr.original_write = FP_ZERO;
ref_y_bw_factor =
width == vertical_tile_width ? FP_INT(1) : ref_y_bw_factor;
ref_y_read = fp_mult(ref_y_bw_factor, dpb_bw_for_1x);
ref_y_read = fp_div(ref_y_read, dpb_compression_factor);
ref_y_read =
b_frames_enabled ? fp_mult(ref_y_read, FP_INT(2)) : ref_y_read;
llc_ref_y_read = ref_y_read;
if (llc_dual_core_ref_read_buf_enabled)
ref_y_read = fp_div(ref_y_read, FP_INT(2));
llc_ref_y_read -= ref_y_read;
ref_cb_cr_read = fp_mult(ref_cb_cr_bw_factor, dpb_bw_for_1x);
ref_cb_cr_read = fp_div(ref_cb_cr_read, dpb_compression_factor);
ref_cb_cr_read =
b_frames_enabled ? fp_mult(ref_cb_cr_read, FP_INT(2)) :
ref_cb_cr_read;
llc_ref_cb_cr_read = ref_cb_cr_read;
if (llc_ref_chroma_cache_enabled)
ref_cb_cr_read = fp_div(ref_cb_cr_read, ref_cb_cr_bw_factor);
if (llc_dual_core_ref_read_buf_enabled)
ref_cb_cr_read = fp_div(ref_cb_cr_read, FP_INT(2));
llc_ref_cb_cr_read -= ref_cb_cr_read;
ddr.dpb_write = fp_mult(dpb_write_factor, dpb_bw_for_1x);
ddr.dpb_write = fp_mult(ddr.dpb_write, FP(1, 50, 100));
ddr.dpb_write = fp_div(ddr.dpb_write, input_compression_factor);
ref_overlap_bw_factor =
width <= vertical_tile_width ? FP_INT(0) : FP_INT(1);
ref_overlap_bw = fp_mult(ddr.dpb_write, ref_overlap_bw_factor);
ref_overlap_bw = fp_div(ref_overlap_bw, dpb_write_factor);
ref_overlap_bw = fp_mult(ref_overlap_bw,
(dpb_write_factor - FP_INT(1)));
ddr.dpb_read = ref_y_read + ref_cb_cr_read + ref_overlap_bw;
llc.dpb_read = llc_ref_y_read + llc_ref_cb_cr_read;
ddr.total = ddr.vsp_read + ddr.vsp_write +
ddr.collocated_read + ddr.collocated_write +
ddr.line_buffer_read + ddr.line_buffer_write +
ddr.original_read + ddr.original_write +
ddr.dpb_read + ddr.dpb_write;
llc.total = llc.dpb_read + llc.line_buffer;
qsmmu_bw_overhead_factor = FP(1, 3, 100);
ddr.total = fp_mult(ddr.total, qsmmu_bw_overhead_factor);
if (debug) {
struct dump dump[] = {
{"ENCODER PARAMETERS", "", DUMP_HEADER_MAGIC},
{"width", "%d", width},
{"height", "%d", height},
{"DPB format", "%#x", dpb_color_format},
{"original frame format", "%#x", original_color_format},
{"fps", "%d", fps},
{"DPB compression enable", "%d", dpb_compression_enabled},
{"original compression enable", "%d",
original_compression_enabled},
{"low power mode", "%d", low_power},
{"Work Mode", "%d", work_mode_1},
{"DPB compression factor", DUMP_FP_FMT,
dpb_compression_factor},
{"original compression factor", DUMP_FP_FMT,
original_compression_factor},
{"rotation", "%d", rotation},
{"cropping or scaling", "%d", cropping_or_scaling},
{"DERIVED PARAMETERS", "", DUMP_HEADER_MAGIC},
{"LCU size", "%d", lcu_size},
{"bitrate (Mbit/sec)", "%lu", bitrate},
{"bins to bit factor", DUMP_FP_FMT, bins_to_bit_factor},
{"qsmmu_bw_overhead_factor",
DUMP_FP_FMT, qsmmu_bw_overhead_factor},
{"INTERMEDIATE B/W DDR", "", DUMP_HEADER_MAGIC},
{"ref_y_read", DUMP_FP_FMT, ref_y_read},
{"ref_cb_cr_read", DUMP_FP_FMT, ref_cb_cr_read},
{"ref_overlap_bw", DUMP_FP_FMT, ref_overlap_bw},
{"VSP read", DUMP_FP_FMT, ddr.vsp_read},
{"VSP write", DUMP_FP_FMT, ddr.vsp_write},
{"collocated read", DUMP_FP_FMT, ddr.collocated_read},
{"collocated write", DUMP_FP_FMT, ddr.collocated_write},
{"line buffer read", DUMP_FP_FMT, ddr.line_buffer_read},
{"line buffer write", DUMP_FP_FMT, ddr.line_buffer_write},
{"original read", DUMP_FP_FMT, ddr.original_read},
{"original write", DUMP_FP_FMT, ddr.original_write},
{"DPB read", DUMP_FP_FMT, ddr.dpb_read},
{"DPB write", DUMP_FP_FMT, ddr.dpb_write},
{"LLC DPB read", DUMP_FP_FMT, llc.dpb_read},
{"LLC Line buffer", DUMP_FP_FMT, llc.line_buffer},
};
__dump(dump, ARRAY_SIZE(dump));
}
switch (gm) {
case GOVERNOR_DDR:
ret = kbps(fp_round(ddr.total));
break;
case GOVERNOR_LLCC:
ret = kbps(fp_round(llc.total));
break;
default:
dprintk(VIDC_ERR, "%s - Unknown governor\n", __func__);
}
return ret;
}
static unsigned long __calculate(struct vidc_bus_vote_data *d,
enum governor_mode gm)
{
unsigned long (*calc[])(struct vidc_bus_vote_data *,
enum governor_mode) = {
[HAL_VIDEO_DOMAIN_VPE] = __calculate_vpe,
[HAL_VIDEO_DOMAIN_ENCODER] = __calculate_encoder,
[HAL_VIDEO_DOMAIN_DECODER] = __calculate_decoder,
};
if (d->domain >= ARRAY_SIZE(calc)) {
dprintk(VIDC_ERR, "%s: invalid domain %d\n",
__func__, d->domain);
return 0;
}
return calc[d->domain](d, gm);
}
static int __get_target_freq(struct devfreq *dev, unsigned long *freq)
{
unsigned long ab_kbps = 0, c = 0;
struct devfreq_dev_status stats = {0};
struct msm_vidc_gov_data *vidc_data = NULL;
struct governor *gov = NULL;
if (!dev || !freq)
return -EINVAL;
gov = container_of(dev->governor,
struct governor, devfreq_gov);
dev->profile->get_dev_status(dev->dev.parent, &stats);
vidc_data = (struct msm_vidc_gov_data *)stats.private_data;
if (!vidc_data || !vidc_data->data_count)
goto exit;
for (c = 0; c < vidc_data->data_count; ++c) {
if (vidc_data->data->power_mode == VIDC_POWER_TURBO) {
ab_kbps = INT_MAX;
goto exit;
}
}
for (c = 0; c < vidc_data->data_count; ++c)
ab_kbps += __calculate(&vidc_data->data[c], gov->mode);
exit:
*freq = clamp(ab_kbps, dev->min_freq, dev->max_freq ?: UINT_MAX);
trace_msm_vidc_perf_bus_vote(gov->devfreq_gov.name, *freq);
return 0;
}
static int __event_handler(struct devfreq *devfreq, unsigned int event,
void *data)
{
int rc = 0;
if (!devfreq)
return -EINVAL;
switch (event) {
case DEVFREQ_GOV_START:
case DEVFREQ_GOV_RESUME:
case DEVFREQ_GOV_SUSPEND:
mutex_lock(&devfreq->lock);
rc = update_devfreq(devfreq);
mutex_unlock(&devfreq->lock);
break;
}
return rc;
}
static struct governor governors[] = {
{
.mode = GOVERNOR_DDR,
.devfreq_gov = {
.name = "msm-vidc-ddr",
.get_target_freq = __get_target_freq,
.event_handler = __event_handler,
},
},
{
.mode = GOVERNOR_LLCC,
.devfreq_gov = {
.name = "msm-vidc-llcc",
.get_target_freq = __get_target_freq,
.event_handler = __event_handler,
},
},
};
static int __init msm_vidc_bw_gov_init(void)
{
int c = 0, rc = 0;
for (c = 0; c < ARRAY_SIZE(governors); ++c) {
dprintk(VIDC_DBG, "Adding governor %s\n",
governors[c].devfreq_gov.name);
rc = devfreq_add_governor(&governors[c].devfreq_gov);
if (rc) {
dprintk(VIDC_ERR, "Error adding governor %s: %d\n",
governors[c].devfreq_gov.name, rc);
break;
}
}
return rc;
}
module_init(msm_vidc_bw_gov_init);
static void __exit msm_vidc_bw_gov_exit(void)
{
int c = 0;
for (c = 0; c < ARRAY_SIZE(governors); ++c) {
dprintk(VIDC_DBG, "Removing governor %s\n",
governors[c].devfreq_gov.name);
devfreq_remove_governor(&governors[c].devfreq_gov);
}
}
module_exit(msm_vidc_bw_gov_exit);
MODULE_LICENSE("GPL v2");