ANDROID: sched/fair: add tunable to force selection at cpu granularity

EAS assumes that clusters with smaller capacity cores are more
energy-efficient. This may not be true on non-big-little devices,
so EAS can make incorrect cluster selections when finding a CPU
to wake. The "sched_is_big_little" hint can be used to cause a
cpu-based selection instead of cluster-based selection.

This change incorporates the addition of the sync hint enable patch

EAS did not honour synchronous wakeup hints, a new sysctl is
created to ask EAS to use this information when selecting a CPU.
The control is called "sched_sync_hint_enable".

Also contains:

EAS: sched/fair: for SMP bias toward idle core with capacity

For SMP devices, on wakeup bias towards idle cores that have capacity
vs busy devices that need a higher OPP

eas: favor idle cpus for boosted tasks

BUG: 29533997
BUG: 29512132
Change-Id: I0cc9a1b1b88fb52916f18bf2d25715bdc3634f9c
Signed-off-by: Juri Lelli <juri.lelli@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>

eas/sched/fair: Favoring busy cpus with low OPPs

BUG: 29533997
BUG: 29512132
Change-Id: I9305b3239698d64278db715a2e277ea0bb4ece79

Signed-off-by: Juri Lelli <juri.lelli@arm.com>
Signed-off-by: Andres Oportus <andresoportus@google.com>
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index abe17fc..d2a69b5 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -18,6 +18,8 @@
 extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
 extern unsigned int sysctl_sched_child_runs_first;
+extern unsigned int sysctl_sched_is_big_little;
+extern unsigned int sysctl_sched_sync_hint_enable;
 extern unsigned int sysctl_sched_cstate_aware;
 
 enum sched_tunable_scaling {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0523b7e..8128ccd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -51,7 +51,10 @@
 unsigned int sysctl_sched_latency = 6000000ULL;
 unsigned int normalized_sysctl_sched_latency = 6000000ULL;
 
+unsigned int sysctl_sched_is_big_little = 0;
+unsigned int sysctl_sched_sync_hint_enable = 1;
 unsigned int sysctl_sched_cstate_aware = 1;
+
 /*
  * The initial- and re-scaling of tunables is configurable
  * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
@@ -6217,7 +6220,97 @@
 	return target;
 }
 
-static int energy_aware_wake_cpu(struct task_struct *p, int target)
+static inline int find_best_target(struct task_struct *p)
+{
+	int i, boosted;
+	int target_cpu = -1;
+	int target_capacity = 0;
+	int backup_capacity = 0;
+	int idle_cpu = -1;
+	int best_idle_cstate = INT_MAX;
+	int backup_cpu = -1;
+	unsigned long task_util_boosted, new_util;
+
+	/*
+	 * Favor 1) busy cpu with most capacity at current OPP
+	 *       2) idle_cpu with capacity at current OPP
+	 *       3) busy cpu with capacity at higher OPP
+	 */
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+	boosted = schedtune_task_boost(p);
+#else
+	boosted = 0;
+#endif
+	task_util_boosted = boosted_task_util(p);
+	for_each_cpu(i, tsk_cpus_allowed(p)) {
+		int cur_capacity = capacity_curr_of(i);
+		struct rq *rq = cpu_rq(i);
+		int idle_idx = idle_get_state_idx(rq);
+
+		/*
+		 * p's blocked utilization is still accounted for on prev_cpu
+		 * so prev_cpu will receive a negative bias due to the double
+		 * accounting. However, the blocked utilization may be zero.
+		 */
+		new_util = cpu_util(i) + task_util_boosted;
+
+		/*
+		 * Ensure minimum capacity to grant the required boost.
+		 * The target CPU can be already at a capacity level higher
+		 * than the one required to boost the task.
+		 */
+
+		if (new_util > capacity_orig_of(i))
+			continue;
+
+		/*
+		 * For boosted tasks we favor idle cpus unconditionally to
+		 * improve latency.
+		 */
+		if (idle_idx >= 0 && boosted) {
+			if (idle_cpu < 0 ||
+				(sysctl_sched_cstate_aware &&
+				 best_idle_cstate > idle_idx)) {
+				best_idle_cstate = idle_idx;
+				idle_cpu = i;
+			}
+			continue;
+		}
+
+		if (new_util < cur_capacity) {
+			if (cpu_rq(i)->nr_running) {
+				if (target_capacity == 0 ||
+					target_capacity > cur_capacity) {
+					/* busy CPU with most capacity at current OPP */
+					target_cpu = i;
+					target_capacity = cur_capacity;
+				}
+			} else if (!boosted) {
+				if (idle_cpu < 0 ||
+					(sysctl_sched_cstate_aware &&
+						best_idle_cstate > idle_idx)) {
+					best_idle_cstate = idle_idx;
+					idle_cpu = i;
+				}
+			}
+		} else if (backup_capacity == 0 ||
+				backup_capacity > cur_capacity) {
+			/* first busy CPU with capacity at higher OPP */
+			backup_capacity = cur_capacity;
+			backup_cpu = i;
+		}
+	}
+
+	if (!boosted && target_cpu < 0) {
+		target_cpu = idle_cpu >= 0 ? idle_cpu : backup_cpu;
+	}
+
+	if (boosted && idle_cpu >= 0)
+		target_cpu = idle_cpu;
+	return target_cpu;
+}
+
+static int energy_aware_wake_cpu(struct task_struct *p, int target, int sync)
 {
 	struct sched_domain *sd;
 	struct sched_group *sg, *sg_target;
@@ -6225,6 +6318,14 @@
 	int target_cpu = task_cpu(p);
 	int i;
 
+	if (sysctl_sched_sync_hint_enable && sync) {
+		int cpu = smp_processor_id();
+		cpumask_t search_cpus;
+		cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask);
+		if (cpumask_test_cpu(cpu, &search_cpus))
+			return cpu;
+	}
+
 	sd = rcu_dereference(per_cpu(sd_ea, task_cpu(p)));
 
 	if (!sd)
@@ -6233,50 +6334,60 @@
 	sg = sd->groups;
 	sg_target = sg;
 
-	/*
-	 * Find group with sufficient capacity. We only get here if no cpu is
-	 * overutilized. We may end up overutilizing a cpu by adding the task,
-	 * but that should not be any worse than select_idle_sibling().
-	 * load_balance() should sort it out later as we get above the tipping
-	 * point.
-	 */
-	do {
-		/* Assuming all cpus are the same in group */
-		int max_cap_cpu = group_first_cpu(sg);
+	if (sysctl_sched_is_big_little) {
 
 		/*
-		 * Assume smaller max capacity means more energy-efficient.
-		 * Ideally we should query the energy model for the right
-		 * answer but it easily ends up in an exhaustive search.
+		 * Find group with sufficient capacity. We only get here if no cpu is
+		 * overutilized. We may end up overutilizing a cpu by adding the task,
+		 * but that should not be any worse than select_idle_sibling().
+		 * load_balance() should sort it out later as we get above the tipping
+		 * point.
 		 */
-		if (capacity_of(max_cap_cpu) < target_max_cap &&
-		    task_fits_max(p, max_cap_cpu)) {
-			sg_target = sg;
-			target_max_cap = capacity_of(max_cap_cpu);
-		}
-	} while (sg = sg->next, sg != sd->groups);
+		do {
+			/* Assuming all cpus are the same in group */
+			int max_cap_cpu = group_first_cpu(sg);
 
-	/* Find cpu with sufficient capacity */
-	for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) {
+			/*
+			 * Assume smaller max capacity means more energy-efficient.
+			 * Ideally we should query the energy model for the right
+			 * answer but it easily ends up in an exhaustive search.
+			 */
+			if (capacity_of(max_cap_cpu) < target_max_cap &&
+			    task_fits_max(p, max_cap_cpu)) {
+				sg_target = sg;
+				target_max_cap = capacity_of(max_cap_cpu);
+			}
+		} while (sg = sg->next, sg != sd->groups);
+
+		/* Find cpu with sufficient capacity */
+		for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) {
+			/*
+			 * p's blocked utilization is still accounted for on prev_cpu
+			 * so prev_cpu will receive a negative bias due to the double
+			 * accounting. However, the blocked utilization may be zero.
+			 */
+			int new_util = cpu_util(i) + boosted_task_util(p);
+
+			if (new_util > capacity_orig_of(i))
+				continue;
+
+			if (new_util < capacity_curr_of(i)) {
+				target_cpu = i;
+				if (cpu_rq(i)->nr_running)
+					break;
+			}
+
+			/* cpu has capacity at higher OPP, keep it as fallback */
+			if (target_cpu == task_cpu(p))
+				target_cpu = i;
+		}
+	} else {
 		/*
-		 * p's blocked utilization is still accounted for on prev_cpu
-		 * so prev_cpu will receive a negative bias due to the double
-		 * accounting. However, the blocked utilization may be zero.
+		 * Find a cpu with sufficient capacity
 		 */
-		int new_util = cpu_util(i) + boosted_task_util(p);
-
-		if (new_util > capacity_orig_of(i))
-			continue;
-
-		if (new_util < capacity_curr_of(i)) {
-			target_cpu = i;
-			if (cpu_rq(i)->nr_running)
-				break;
-		}
-
-		/* cpu has capacity at higher OPP, keep it as fallback */
-		if (target_cpu == task_cpu(p))
-			target_cpu = i;
+		int tmp_target = find_best_target(p);
+		if (tmp_target >= 0)
+			target_cpu = tmp_target;
 	}
 
 	if (target_cpu != task_cpu(p)) {
@@ -6355,7 +6466,7 @@
 
 	if (!sd) {
 		if (energy_aware() && !cpu_rq(cpu)->rd->overutilized)
-			new_cpu = energy_aware_wake_cpu(p, prev_cpu);
+			new_cpu = energy_aware_wake_cpu(p, prev_cpu, sync);
 		else if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
 			new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3a35679..a025ba1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -308,6 +308,20 @@
 		.extra2		= &max_sched_granularity_ns,
 	},
 	{
+		.procname	= "sched_is_big_little",
+		.data		= &sysctl_sched_is_big_little,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sched_sync_hint_enable",
+		.data		= &sysctl_sched_sync_hint_enable,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
 		.procname	= "sched_cstate_aware",
 		.data		= &sysctl_sched_cstate_aware,
 		.maxlen		= sizeof(unsigned int),