cpufreq: governor: Implement per policy instances of governors

Currently, there can't be multiple instances of single governor_type.
If we have a multi-package system, where we have multiple instances
of struct policy (per package), we can't have multiple instances of
same governor. i.e. We can't have multiple instances of ondemand
governor for multiple packages.

Governors directory in sysfs is created at /sys/devices/system/cpu/cpufreq/
governor-name/. Which again reflects that there can be only one
instance of a governor_type in the system.

This is a bottleneck for multicluster system, where we want different
packages to use same governor type, but with different tunables.

This patch uses the infrastructure provided by earlier patch and
implements init/exit routines for ondemand and conservative
governors.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 5a76086..26fbb72 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -22,12 +22,29 @@
 #include <linux/export.h>
 #include <linux/kernel_stat.h>
 #include <linux/mutex.h>
+#include <linux/slab.h>
 #include <linux/tick.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
 #include "cpufreq_governor.h"
 
+static struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy)
+{
+	if (have_governor_per_policy())
+		return &policy->kobj;
+	else
+		return cpufreq_global_kobject;
+}
+
+static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data)
+{
+	if (have_governor_per_policy())
+		return dbs_data->cdata->attr_group_gov_pol;
+	else
+		return dbs_data->cdata->attr_group_gov_sys;
+}
+
 static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
 {
 	u64 idle_time;
@@ -65,7 +82,7 @@
 
 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 {
-	struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu);
+	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 	struct cpufreq_policy *policy;
@@ -73,7 +90,7 @@
 	unsigned int ignore_nice;
 	unsigned int j;
 
-	if (dbs_data->governor == GOV_ONDEMAND)
+	if (dbs_data->cdata->governor == GOV_ONDEMAND)
 		ignore_nice = od_tuners->ignore_nice;
 	else
 		ignore_nice = cs_tuners->ignore_nice;
@@ -87,7 +104,7 @@
 		unsigned int idle_time, wall_time, iowait_time;
 		unsigned int load;
 
-		j_cdbs = dbs_data->get_cpu_cdbs(j);
+		j_cdbs = dbs_data->cdata->get_cpu_cdbs(j);
 
 		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
 
@@ -117,9 +134,9 @@
 			idle_time += jiffies_to_usecs(cur_nice_jiffies);
 		}
 
-		if (dbs_data->governor == GOV_ONDEMAND) {
+		if (dbs_data->cdata->governor == GOV_ONDEMAND) {
 			struct od_cpu_dbs_info_s *od_j_dbs_info =
-				dbs_data->get_cpu_dbs_info_s(cpu);
+				dbs_data->cdata->get_cpu_dbs_info_s(cpu);
 
 			cur_iowait_time = get_cpu_iowait_time_us(j,
 					&cur_wall_time);
@@ -145,7 +162,7 @@
 
 		load = 100 * (wall_time - idle_time) / wall_time;
 
-		if (dbs_data->governor == GOV_ONDEMAND) {
+		if (dbs_data->cdata->governor == GOV_ONDEMAND) {
 			int freq_avg = __cpufreq_driver_getavg(policy, j);
 			if (freq_avg <= 0)
 				freq_avg = policy->cur;
@@ -157,7 +174,7 @@
 			max_load = load;
 	}
 
-	dbs_data->gov_check_cpu(cpu, max_load);
+	dbs_data->cdata->gov_check_cpu(cpu, max_load);
 }
 EXPORT_SYMBOL_GPL(dbs_check_cpu);
 
@@ -165,14 +182,14 @@
 				  unsigned int sampling_rate)
 {
 	int delay = delay_for_sampling_rate(sampling_rate);
-	struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu);
+	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
 
 	schedule_delayed_work_on(cpu, &cdbs->work, delay);
 }
 
 static inline void dbs_timer_exit(struct dbs_data *dbs_data, int cpu)
 {
-	struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu);
+	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
 
 	cancel_delayed_work_sync(&cdbs->work);
 }
@@ -196,31 +213,128 @@
 }
 EXPORT_SYMBOL_GPL(need_load_eval);
 
-int cpufreq_governor_dbs(struct dbs_data *dbs_data,
-		struct cpufreq_policy *policy, unsigned int event)
+static void set_sampling_rate(struct dbs_data *dbs_data,
+		unsigned int sampling_rate)
 {
+	if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
+		struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
+		cs_tuners->sampling_rate = sampling_rate;
+	} else {
+		struct od_dbs_tuners *od_tuners = dbs_data->tuners;
+		od_tuners->sampling_rate = sampling_rate;
+	}
+}
+
+int cpufreq_governor_dbs(struct cpufreq_policy *policy,
+		struct common_dbs_data *cdata, unsigned int event)
+{
+	struct dbs_data *dbs_data;
 	struct od_cpu_dbs_info_s *od_dbs_info = NULL;
 	struct cs_cpu_dbs_info_s *cs_dbs_info = NULL;
-	struct cs_ops *cs_ops = NULL;
 	struct od_ops *od_ops = NULL;
-	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
-	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
+	struct od_dbs_tuners *od_tuners = NULL;
+	struct cs_dbs_tuners *cs_tuners = NULL;
 	struct cpu_dbs_common_info *cpu_cdbs;
-	unsigned int *sampling_rate, latency, ignore_nice, j, cpu = policy->cpu;
+	unsigned int sampling_rate, latency, ignore_nice, j, cpu = policy->cpu;
 	int rc;
 
-	cpu_cdbs = dbs_data->get_cpu_cdbs(cpu);
+	if (have_governor_per_policy())
+		dbs_data = policy->governor_data;
+	else
+		dbs_data = cdata->gdbs_data;
 
-	if (dbs_data->governor == GOV_CONSERVATIVE) {
-		cs_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu);
-		sampling_rate = &cs_tuners->sampling_rate;
+	WARN_ON(!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT));
+
+	switch (event) {
+	case CPUFREQ_GOV_POLICY_INIT:
+		if (have_governor_per_policy()) {
+			WARN_ON(dbs_data);
+		} else if (dbs_data) {
+			policy->governor_data = dbs_data;
+			return 0;
+		}
+
+		dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL);
+		if (!dbs_data) {
+			pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__);
+			return -ENOMEM;
+		}
+
+		dbs_data->cdata = cdata;
+		rc = cdata->init(dbs_data);
+		if (rc) {
+			pr_err("%s: POLICY_INIT: init() failed\n", __func__);
+			kfree(dbs_data);
+			return rc;
+		}
+
+		rc = sysfs_create_group(get_governor_parent_kobj(policy),
+				get_sysfs_attr(dbs_data));
+		if (rc) {
+			cdata->exit(dbs_data);
+			kfree(dbs_data);
+			return rc;
+		}
+
+		policy->governor_data = dbs_data;
+
+		/* policy latency is in nS. Convert it to uS first */
+		latency = policy->cpuinfo.transition_latency / 1000;
+		if (latency == 0)
+			latency = 1;
+
+		/* Bring kernel and HW constraints together */
+		dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate,
+				MIN_LATENCY_MULTIPLIER * latency);
+		set_sampling_rate(dbs_data, max(dbs_data->min_sampling_rate,
+					latency * LATENCY_MULTIPLIER));
+
+		if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
+			struct cs_ops *cs_ops = dbs_data->cdata->gov_ops;
+
+			cpufreq_register_notifier(cs_ops->notifier_block,
+					CPUFREQ_TRANSITION_NOTIFIER);
+		}
+
+		if (!have_governor_per_policy())
+			cdata->gdbs_data = dbs_data;
+
+		return 0;
+	case CPUFREQ_GOV_POLICY_EXIT:
+		if ((policy->governor->initialized == 1) ||
+				have_governor_per_policy()) {
+			sysfs_remove_group(get_governor_parent_kobj(policy),
+					get_sysfs_attr(dbs_data));
+
+			if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
+				struct cs_ops *cs_ops = dbs_data->cdata->gov_ops;
+
+				cpufreq_unregister_notifier(cs_ops->notifier_block,
+						CPUFREQ_TRANSITION_NOTIFIER);
+			}
+
+			cdata->exit(dbs_data);
+			kfree(dbs_data);
+			cdata->gdbs_data = NULL;
+		}
+
+		policy->governor_data = NULL;
+		return 0;
+	}
+
+	cpu_cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
+
+	if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
+		cs_tuners = dbs_data->tuners;
+		cs_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu);
+		sampling_rate = cs_tuners->sampling_rate;
 		ignore_nice = cs_tuners->ignore_nice;
-		cs_ops = dbs_data->gov_ops;
 	} else {
-		od_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu);
-		sampling_rate = &od_tuners->sampling_rate;
+		od_tuners = dbs_data->tuners;
+		od_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu);
+		sampling_rate = od_tuners->sampling_rate;
 		ignore_nice = od_tuners->ignore_nice;
-		od_ops = dbs_data->gov_ops;
+		od_ops = dbs_data->cdata->gov_ops;
 	}
 
 	switch (event) {
@@ -232,7 +346,7 @@
 
 		for_each_cpu(j, policy->cpus) {
 			struct cpu_dbs_common_info *j_cdbs =
-				dbs_data->get_cpu_cdbs(j);
+				dbs_data->cdata->get_cpu_cdbs(j);
 
 			j_cdbs->cpu = j;
 			j_cdbs->cur_policy = policy;
@@ -244,69 +358,34 @@
 
 			mutex_init(&j_cdbs->timer_mutex);
 			INIT_DEFERRABLE_WORK(&j_cdbs->work,
-					     dbs_data->gov_dbs_timer);
-		}
-
-		if (!policy->governor->initialized) {
-			rc = sysfs_create_group(cpufreq_global_kobject,
-					dbs_data->attr_group);
-			if (rc) {
-				mutex_unlock(&dbs_data->mutex);
-				return rc;
-			}
+					     dbs_data->cdata->gov_dbs_timer);
 		}
 
 		/*
 		 * conservative does not implement micro like ondemand
 		 * governor, thus we are bound to jiffes/HZ
 		 */
-		if (dbs_data->governor == GOV_CONSERVATIVE) {
+		if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
 			cs_dbs_info->down_skip = 0;
 			cs_dbs_info->enable = 1;
 			cs_dbs_info->requested_freq = policy->cur;
-
-			if (!policy->governor->initialized) {
-				cpufreq_register_notifier(cs_ops->notifier_block,
-						CPUFREQ_TRANSITION_NOTIFIER);
-
-				dbs_data->min_sampling_rate =
-					MIN_SAMPLING_RATE_RATIO *
-					jiffies_to_usecs(10);
-			}
 		} else {
 			od_dbs_info->rate_mult = 1;
 			od_dbs_info->sample_type = OD_NORMAL_SAMPLE;
 			od_ops->powersave_bias_init_cpu(cpu);
-
-			if (!policy->governor->initialized)
-				od_tuners->io_is_busy = od_ops->io_busy();
 		}
 
-		if (policy->governor->initialized)
-			goto unlock;
-
-		/* policy latency is in nS. Convert it to uS first */
-		latency = policy->cpuinfo.transition_latency / 1000;
-		if (latency == 0)
-			latency = 1;
-
-		/* Bring kernel and HW constraints together */
-		dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate,
-				MIN_LATENCY_MULTIPLIER * latency);
-		*sampling_rate = max(dbs_data->min_sampling_rate, latency *
-				LATENCY_MULTIPLIER);
-unlock:
 		mutex_unlock(&dbs_data->mutex);
 
 		/* Initiate timer time stamp */
 		cpu_cdbs->time_stamp = ktime_get();
 
 		for_each_cpu(j, policy->cpus)
-			dbs_timer_init(dbs_data, j, *sampling_rate);
+			dbs_timer_init(dbs_data, j, sampling_rate);
 		break;
 
 	case CPUFREQ_GOV_STOP:
-		if (dbs_data->governor == GOV_CONSERVATIVE)
+		if (dbs_data->cdata->governor == GOV_CONSERVATIVE)
 			cs_dbs_info->enable = 0;
 
 		for_each_cpu(j, policy->cpus)
@@ -315,13 +394,6 @@
 		mutex_lock(&dbs_data->mutex);
 		mutex_destroy(&cpu_cdbs->timer_mutex);
 
-		if (policy->governor->initialized == 1) {
-			sysfs_remove_group(cpufreq_global_kobject,
-					dbs_data->attr_group);
-			if (dbs_data->governor == GOV_CONSERVATIVE)
-				cpufreq_unregister_notifier(cs_ops->notifier_block,
-						CPUFREQ_TRANSITION_NOTIFIER);
-		}
 		mutex_unlock(&dbs_data->mutex);
 
 		break;