sched: utilize big cluster CPUs evenly

Utilize big cluster CPUs evenly by rotating the first candidate CPU and
avoid previous CPU every second.

Change-Id: Idecb3df0080bfec0ec8fd86b1c5b0449ccfc551b
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 08fd4be..d43ad69 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9624,3 +9624,50 @@
 #endif /* CONFIG_SCHED_WALT */
 
 __read_mostly bool sched_predl = 1;
+
+#ifdef CONFIG_SCHED_CORE_ROTATE
+int
+find_first_cpu_bit(struct task_struct *p, const cpumask_t *search_cpus,
+		   struct sched_group *sg_target, bool *avoid_prev_cpu,
+		   bool *do_rotate, struct find_first_cpu_bit_env *env)
+{
+	int i = -1;
+	unsigned long mcc;
+	int cpu = smp_processor_id();
+
+	mcc = cpu_rq(cpu)->rd->max_cpu_capacity.val;
+
+	/* do rotation only for big CPUs. */
+	*do_rotate = (cpumask_first(search_cpus) < nr_cpu_ids &&
+		     capacity_orig_of(cpumask_first(search_cpus)) == mcc);
+
+	if (*do_rotate) {
+		if (time_before_eq(jiffies, *env->avoid_prev_cpu_last +
+				   env->interval))
+			return *env->rotate_cpu_start;
+
+		spin_lock(env->rotate_lock);
+		if (time_after(jiffies, *env->avoid_prev_cpu_last +
+					env->interval)) {
+			cpumask_t tmpmask;
+
+			*env->avoid_prev_cpu_last = jiffies;
+			*avoid_prev_cpu = true;
+
+			cpumask_copy(&tmpmask, sched_group_cpus(sg_target));
+			cpumask_andnot(&tmpmask, &tmpmask, cpu_isolated_mask);
+
+			i = cpumask_next(*env->rotate_cpu_start, &tmpmask);
+			if (i >= nr_cpu_ids)
+				i = cpumask_first(&tmpmask) - 1;
+			/* Change start CPU every interval. */
+			*env->rotate_cpu_start = i;
+		} else {
+			i = *env->rotate_cpu_start;
+		}
+		spin_unlock(env->rotate_lock);
+	}
+
+	return i;
+}
+#endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d61c570..7b459ca 100755
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5034,6 +5034,19 @@
 DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
 DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
 
+#ifdef CONFIG_SCHED_CORE_ROTATE
+static int rotate_cpu_start;
+static DEFINE_SPINLOCK(rotate_lock);
+static unsigned long avoid_prev_cpu_last;
+
+static struct find_first_cpu_bit_env first_cpu_bit_env = {
+	.avoid_prev_cpu_last = &avoid_prev_cpu_last,
+	.rotate_cpu_start = &rotate_cpu_start,
+	.interval = HZ,
+	.rotate_lock = &rotate_lock,
+};
+#endif
+
 #ifdef CONFIG_NO_HZ_COMMON
 /*
  * per rq 'load' arrray crap; XXX kill this.
@@ -6883,7 +6896,7 @@
 	int target_cpu, targeted_cpus = 0;
 	unsigned long task_util_boosted = 0, curr_util = 0;
 	long new_util, new_util_cum;
-	int i;
+	int i = -1;
 	int ediff = -1;
 	int cpu = smp_processor_id();
 	int min_util_cpu = -1;
@@ -6902,8 +6915,17 @@
 	enum sched_boost_policy placement_boost = task_sched_boost(p) ?
 				sched_boost_policy() : SCHED_BOOST_NONE;
 	struct related_thread_group *grp;
+	cpumask_t search_cpus;
+	int prev_cpu = task_cpu(p);
+#ifdef CONFIG_SCHED_CORE_ROTATE
+	bool do_rotate = false;
+	bool avoid_prev_cpu = false;
+#else
+#define do_rotate false
+#define avoid_prev_cpu false
+#endif
 
-	sd = rcu_dereference(per_cpu(sd_ea, task_cpu(p)));
+	sd = rcu_dereference(per_cpu(sd_ea, prev_cpu));
 
 	if (!sd)
 		return target;
@@ -6922,7 +6944,7 @@
 		rtg_target = &grp->preferred_cluster->cpus;
 
 	if (sync && bias_to_waker_cpu(p, cpu, rtg_target)) {
-		trace_sched_task_util_bias_to_waker(p, task_cpu(p),
+		trace_sched_task_util_bias_to_waker(p, prev_cpu,
 					task_util(p), cpu, cpu, 0, need_idle);
 		return cpu;
 	}
@@ -6985,14 +7007,30 @@
 
 		target_cpu = -1;
 
+		cpumask_copy(&search_cpus, tsk_cpus_allowed(p));
+		cpumask_and(&search_cpus, &search_cpus,
+			    sched_group_cpus(sg_target));
+
+#ifdef CONFIG_SCHED_CORE_ROTATE
+		i = find_first_cpu_bit(p, &search_cpus, sg_target,
+				       &avoid_prev_cpu, &do_rotate,
+				       &first_cpu_bit_env);
+
+retry:
+#endif
 		/* Find cpu with sufficient capacity */
-		for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) {
+		while ((i = cpumask_next(i, &search_cpus)) < nr_cpu_ids) {
+			cpumask_clear_cpu(i, &search_cpus);
+
 			if (cpu_isolated(i))
 				continue;
 
 			if (isolated_candidate == -1)
 				isolated_candidate = i;
 
+			if (avoid_prev_cpu && i == prev_cpu)
+				continue;
+
 			if (is_reserved(i))
 				continue;
 
@@ -7061,9 +7099,9 @@
 						    new_util ||
 						    (target_cpu_util ==
 						     new_util &&
-						     (i == task_cpu(p) ||
+						     (i == prev_cpu ||
 						      (target_cpu !=
-						       task_cpu(p) &&
+						       prev_cpu &&
 						       target_cpu_new_util_cum >
 						       new_util_cum))))) {
 						min_idle_idx_cpu = i;
@@ -7075,6 +7113,9 @@
 					}
 				} else if (cpu_rq(i)->nr_running) {
 					target_cpu = i;
+#ifdef CONFIG_SCHED_CORE_ROTATE
+					do_rotate = false;
+#endif
 					break;
 				}
 			} else if (!need_idle) {
@@ -7114,6 +7155,19 @@
 			}
 		}
 
+#ifdef CONFIG_SCHED_CORE_ROTATE
+		if (do_rotate) {
+			/*
+			 * We started iteration somewhere in the middle of
+			 * cpumask.  Iterate once again from bit 0 to the
+			 * previous starting point bit.
+			 */
+			do_rotate = false;
+			i = -1;
+			goto retry;
+		}
+#endif
+
 		if (target_cpu == -1 ||
 		    (target_cpu != min_util_cpu && !safe_to_pack &&
 		     !is_packing_eligible(p, task_util_boosted, sg_target,
@@ -7148,7 +7202,8 @@
 		}
 	}
 
-	if (target_cpu != task_cpu(p) && !cpu_isolated(task_cpu(p))) {
+	if (target_cpu != task_cpu(p) && !avoid_prev_cpu &&
+	    !cpu_isolated(task_cpu(p))) {
 		struct energy_env eenv = {
 			.util_delta	= task_util(p),
 			.src_cpu	= task_cpu(p),
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 35382df..7feba85 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1711,6 +1711,19 @@
 	return NULL;
 }
 
+#ifdef CONFIG_SCHED_CORE_ROTATE
+static int rotate_cpu_start;
+static DEFINE_SPINLOCK(rotate_lock);
+static unsigned long avoid_prev_cpu_last;
+
+static struct find_first_cpu_bit_env first_cpu_bit_env = {
+	.avoid_prev_cpu_last = &avoid_prev_cpu_last,
+	.rotate_cpu_start = &rotate_cpu_start,
+	.interval = HZ,
+	.rotate_lock = &rotate_lock,
+};
+#endif
+
 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
 
 static int find_lowest_rq(struct task_struct *task)
@@ -1719,7 +1732,7 @@
 	struct sched_group *sg, *sg_target;
 	struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
 	int this_cpu = smp_processor_id();
-	int cpu, best_cpu;
+	int cpu = -1, best_cpu;
 	struct cpumask search_cpu, backup_search_cpu;
 	unsigned long cpu_capacity;
 	unsigned long best_capacity;
@@ -1730,6 +1743,13 @@
 	int best_cpu_idle_idx = INT_MAX;
 	int cpu_idle_idx = -1;
 	bool placement_boost;
+#ifdef CONFIG_SCHED_CORE_ROTATE
+	bool do_rotate = false;
+	bool avoid_prev_cpu = false;
+#else
+#define do_rotate false
+#define avoid_prev_cpu false
+#endif
 
 	/* Make sure the mask is initialized first */
 	if (unlikely(!lowest_mask))
@@ -1761,6 +1781,10 @@
 
 		sg = sd->groups;
 		do {
+			if (!cpumask_intersects(lowest_mask,
+						sched_group_cpus(sg)))
+				continue;
+
 			cpu = group_first_cpu(sg);
 			cpu_capacity = capacity_orig_of(cpu);
 
@@ -1778,20 +1802,37 @@
 		} while (sg = sg->next, sg != sd->groups);
 		rcu_read_unlock();
 
-		cpumask_and(&search_cpu, lowest_mask,
-			    sched_group_cpus(sg_target));
-		cpumask_copy(&backup_search_cpu, lowest_mask);
-		cpumask_andnot(&backup_search_cpu, &backup_search_cpu,
-			       &search_cpu);
+		if (sg_target) {
+			cpumask_and(&search_cpu, lowest_mask,
+				    sched_group_cpus(sg_target));
+			cpumask_copy(&backup_search_cpu, lowest_mask);
+			cpumask_andnot(&backup_search_cpu, &backup_search_cpu,
+				       &search_cpu);
+
+#ifdef CONFIG_SCHED_CORE_ROTATE
+			cpu = find_first_cpu_bit(task, &search_cpu, sg_target,
+						 &avoid_prev_cpu, &do_rotate,
+						 &first_cpu_bit_env);
+#endif
+		} else {
+			cpumask_copy(&search_cpu, lowest_mask);
+			cpumask_clear(&backup_search_cpu);
+			cpu = -1;
+		}
 
 retry:
-		for_each_cpu(cpu, &search_cpu) {
+		while ((cpu = cpumask_next(cpu, &search_cpu)) < nr_cpu_ids) {
+			cpumask_clear_cpu(cpu, &search_cpu);
+
 			/*
 			 * Don't use capcity_curr_of() since it will
 			 * double count rt task load.
 			 */
 			util = cpu_util(cpu);
 
+			if (avoid_prev_cpu && cpu == task_cpu(task))
+				continue;
+
 			if (__cpu_overutilized(cpu, util + tutil))
 				continue;
 
@@ -1838,6 +1879,19 @@
 			best_cpu = cpu;
 		}
 
+#ifdef CONFIG_SCHED_CORE_ROTATE
+		if (do_rotate) {
+			/*
+			 * We started iteration somewhere in the middle of
+			 * cpumask.  Iterate once again from bit 0 to the
+			 * previous starting point bit.
+			 */
+			do_rotate = false;
+			cpu = -1;
+			goto retry;
+		}
+#endif
+
 		if (best_cpu != -1) {
 			return best_cpu;
 		} else if (!cpumask_empty(&backup_search_cpu)) {
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a5b1377..7ac731d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2841,3 +2841,17 @@
 {
 	return sched_feat(ENERGY_AWARE);
 }
+
+#ifdef CONFIG_SCHED_CORE_ROTATE
+struct find_first_cpu_bit_env {
+	unsigned long *avoid_prev_cpu_last;
+	int *rotate_cpu_start;
+	int interval;
+	spinlock_t *rotate_lock;
+};
+
+int
+find_first_cpu_bit(struct task_struct *p, const cpumask_t *search_cpus,
+		   struct sched_group *sg_target, bool *avoid_prev_cpu,
+		   bool *do_rotate, struct find_first_cpu_bit_env *env);
+#endif