sched: utilize big cluster CPUs evenly
Utilize big cluster CPUs evenly by rotating the first candidate CPU and
avoid previous CPU every second.
Change-Id: Idecb3df0080bfec0ec8fd86b1c5b0449ccfc551b
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 08fd4be..d43ad69 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9624,3 +9624,50 @@
#endif /* CONFIG_SCHED_WALT */
__read_mostly bool sched_predl = 1;
+
+#ifdef CONFIG_SCHED_CORE_ROTATE
+int
+find_first_cpu_bit(struct task_struct *p, const cpumask_t *search_cpus,
+ struct sched_group *sg_target, bool *avoid_prev_cpu,
+ bool *do_rotate, struct find_first_cpu_bit_env *env)
+{
+ int i = -1;
+ unsigned long mcc;
+ int cpu = smp_processor_id();
+
+ mcc = cpu_rq(cpu)->rd->max_cpu_capacity.val;
+
+ /* do rotation only for big CPUs. */
+ *do_rotate = (cpumask_first(search_cpus) < nr_cpu_ids &&
+ capacity_orig_of(cpumask_first(search_cpus)) == mcc);
+
+ if (*do_rotate) {
+ if (time_before_eq(jiffies, *env->avoid_prev_cpu_last +
+ env->interval))
+ return *env->rotate_cpu_start;
+
+ spin_lock(env->rotate_lock);
+ if (time_after(jiffies, *env->avoid_prev_cpu_last +
+ env->interval)) {
+ cpumask_t tmpmask;
+
+ *env->avoid_prev_cpu_last = jiffies;
+ *avoid_prev_cpu = true;
+
+ cpumask_copy(&tmpmask, sched_group_cpus(sg_target));
+ cpumask_andnot(&tmpmask, &tmpmask, cpu_isolated_mask);
+
+ i = cpumask_next(*env->rotate_cpu_start, &tmpmask);
+ if (i >= nr_cpu_ids)
+ i = cpumask_first(&tmpmask) - 1;
+ /* Change start CPU every interval. */
+ *env->rotate_cpu_start = i;
+ } else {
+ i = *env->rotate_cpu_start;
+ }
+ spin_unlock(env->rotate_lock);
+ }
+
+ return i;
+}
+#endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d61c570..7b459ca 100755
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5034,6 +5034,19 @@
DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
+#ifdef CONFIG_SCHED_CORE_ROTATE
+static int rotate_cpu_start;
+static DEFINE_SPINLOCK(rotate_lock);
+static unsigned long avoid_prev_cpu_last;
+
+static struct find_first_cpu_bit_env first_cpu_bit_env = {
+ .avoid_prev_cpu_last = &avoid_prev_cpu_last,
+ .rotate_cpu_start = &rotate_cpu_start,
+ .interval = HZ,
+ .rotate_lock = &rotate_lock,
+};
+#endif
+
#ifdef CONFIG_NO_HZ_COMMON
/*
* per rq 'load' arrray crap; XXX kill this.
@@ -6883,7 +6896,7 @@
int target_cpu, targeted_cpus = 0;
unsigned long task_util_boosted = 0, curr_util = 0;
long new_util, new_util_cum;
- int i;
+ int i = -1;
int ediff = -1;
int cpu = smp_processor_id();
int min_util_cpu = -1;
@@ -6902,8 +6915,17 @@
enum sched_boost_policy placement_boost = task_sched_boost(p) ?
sched_boost_policy() : SCHED_BOOST_NONE;
struct related_thread_group *grp;
+ cpumask_t search_cpus;
+ int prev_cpu = task_cpu(p);
+#ifdef CONFIG_SCHED_CORE_ROTATE
+ bool do_rotate = false;
+ bool avoid_prev_cpu = false;
+#else
+#define do_rotate false
+#define avoid_prev_cpu false
+#endif
- sd = rcu_dereference(per_cpu(sd_ea, task_cpu(p)));
+ sd = rcu_dereference(per_cpu(sd_ea, prev_cpu));
if (!sd)
return target;
@@ -6922,7 +6944,7 @@
rtg_target = &grp->preferred_cluster->cpus;
if (sync && bias_to_waker_cpu(p, cpu, rtg_target)) {
- trace_sched_task_util_bias_to_waker(p, task_cpu(p),
+ trace_sched_task_util_bias_to_waker(p, prev_cpu,
task_util(p), cpu, cpu, 0, need_idle);
return cpu;
}
@@ -6985,14 +7007,30 @@
target_cpu = -1;
+ cpumask_copy(&search_cpus, tsk_cpus_allowed(p));
+ cpumask_and(&search_cpus, &search_cpus,
+ sched_group_cpus(sg_target));
+
+#ifdef CONFIG_SCHED_CORE_ROTATE
+ i = find_first_cpu_bit(p, &search_cpus, sg_target,
+ &avoid_prev_cpu, &do_rotate,
+ &first_cpu_bit_env);
+
+retry:
+#endif
/* Find cpu with sufficient capacity */
- for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) {
+ while ((i = cpumask_next(i, &search_cpus)) < nr_cpu_ids) {
+ cpumask_clear_cpu(i, &search_cpus);
+
if (cpu_isolated(i))
continue;
if (isolated_candidate == -1)
isolated_candidate = i;
+ if (avoid_prev_cpu && i == prev_cpu)
+ continue;
+
if (is_reserved(i))
continue;
@@ -7061,9 +7099,9 @@
new_util ||
(target_cpu_util ==
new_util &&
- (i == task_cpu(p) ||
+ (i == prev_cpu ||
(target_cpu !=
- task_cpu(p) &&
+ prev_cpu &&
target_cpu_new_util_cum >
new_util_cum))))) {
min_idle_idx_cpu = i;
@@ -7075,6 +7113,9 @@
}
} else if (cpu_rq(i)->nr_running) {
target_cpu = i;
+#ifdef CONFIG_SCHED_CORE_ROTATE
+ do_rotate = false;
+#endif
break;
}
} else if (!need_idle) {
@@ -7114,6 +7155,19 @@
}
}
+#ifdef CONFIG_SCHED_CORE_ROTATE
+ if (do_rotate) {
+ /*
+ * We started iteration somewhere in the middle of
+ * cpumask. Iterate once again from bit 0 to the
+ * previous starting point bit.
+ */
+ do_rotate = false;
+ i = -1;
+ goto retry;
+ }
+#endif
+
if (target_cpu == -1 ||
(target_cpu != min_util_cpu && !safe_to_pack &&
!is_packing_eligible(p, task_util_boosted, sg_target,
@@ -7148,7 +7202,8 @@
}
}
- if (target_cpu != task_cpu(p) && !cpu_isolated(task_cpu(p))) {
+ if (target_cpu != task_cpu(p) && !avoid_prev_cpu &&
+ !cpu_isolated(task_cpu(p))) {
struct energy_env eenv = {
.util_delta = task_util(p),
.src_cpu = task_cpu(p),
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 35382df..7feba85 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1711,6 +1711,19 @@
return NULL;
}
+#ifdef CONFIG_SCHED_CORE_ROTATE
+static int rotate_cpu_start;
+static DEFINE_SPINLOCK(rotate_lock);
+static unsigned long avoid_prev_cpu_last;
+
+static struct find_first_cpu_bit_env first_cpu_bit_env = {
+ .avoid_prev_cpu_last = &avoid_prev_cpu_last,
+ .rotate_cpu_start = &rotate_cpu_start,
+ .interval = HZ,
+ .rotate_lock = &rotate_lock,
+};
+#endif
+
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
static int find_lowest_rq(struct task_struct *task)
@@ -1719,7 +1732,7 @@
struct sched_group *sg, *sg_target;
struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
int this_cpu = smp_processor_id();
- int cpu, best_cpu;
+ int cpu = -1, best_cpu;
struct cpumask search_cpu, backup_search_cpu;
unsigned long cpu_capacity;
unsigned long best_capacity;
@@ -1730,6 +1743,13 @@
int best_cpu_idle_idx = INT_MAX;
int cpu_idle_idx = -1;
bool placement_boost;
+#ifdef CONFIG_SCHED_CORE_ROTATE
+ bool do_rotate = false;
+ bool avoid_prev_cpu = false;
+#else
+#define do_rotate false
+#define avoid_prev_cpu false
+#endif
/* Make sure the mask is initialized first */
if (unlikely(!lowest_mask))
@@ -1761,6 +1781,10 @@
sg = sd->groups;
do {
+ if (!cpumask_intersects(lowest_mask,
+ sched_group_cpus(sg)))
+ continue;
+
cpu = group_first_cpu(sg);
cpu_capacity = capacity_orig_of(cpu);
@@ -1778,20 +1802,37 @@
} while (sg = sg->next, sg != sd->groups);
rcu_read_unlock();
- cpumask_and(&search_cpu, lowest_mask,
- sched_group_cpus(sg_target));
- cpumask_copy(&backup_search_cpu, lowest_mask);
- cpumask_andnot(&backup_search_cpu, &backup_search_cpu,
- &search_cpu);
+ if (sg_target) {
+ cpumask_and(&search_cpu, lowest_mask,
+ sched_group_cpus(sg_target));
+ cpumask_copy(&backup_search_cpu, lowest_mask);
+ cpumask_andnot(&backup_search_cpu, &backup_search_cpu,
+ &search_cpu);
+
+#ifdef CONFIG_SCHED_CORE_ROTATE
+ cpu = find_first_cpu_bit(task, &search_cpu, sg_target,
+ &avoid_prev_cpu, &do_rotate,
+ &first_cpu_bit_env);
+#endif
+ } else {
+ cpumask_copy(&search_cpu, lowest_mask);
+ cpumask_clear(&backup_search_cpu);
+ cpu = -1;
+ }
retry:
- for_each_cpu(cpu, &search_cpu) {
+ while ((cpu = cpumask_next(cpu, &search_cpu)) < nr_cpu_ids) {
+ cpumask_clear_cpu(cpu, &search_cpu);
+
/*
* Don't use capcity_curr_of() since it will
* double count rt task load.
*/
util = cpu_util(cpu);
+ if (avoid_prev_cpu && cpu == task_cpu(task))
+ continue;
+
if (__cpu_overutilized(cpu, util + tutil))
continue;
@@ -1838,6 +1879,19 @@
best_cpu = cpu;
}
+#ifdef CONFIG_SCHED_CORE_ROTATE
+ if (do_rotate) {
+ /*
+ * We started iteration somewhere in the middle of
+ * cpumask. Iterate once again from bit 0 to the
+ * previous starting point bit.
+ */
+ do_rotate = false;
+ cpu = -1;
+ goto retry;
+ }
+#endif
+
if (best_cpu != -1) {
return best_cpu;
} else if (!cpumask_empty(&backup_search_cpu)) {
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a5b1377..7ac731d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2841,3 +2841,17 @@
{
return sched_feat(ENERGY_AWARE);
}
+
+#ifdef CONFIG_SCHED_CORE_ROTATE
+struct find_first_cpu_bit_env {
+ unsigned long *avoid_prev_cpu_last;
+ int *rotate_cpu_start;
+ int interval;
+ spinlock_t *rotate_lock;
+};
+
+int
+find_first_cpu_bit(struct task_struct *p, const cpumask_t *search_cpus,
+ struct sched_group *sg_target, bool *avoid_prev_cpu,
+ bool *do_rotate, struct find_first_cpu_bit_env *env);
+#endif