sched: improve the scheduler
This change is for general scheduler improvement.
Change-Id: Ice980dde340bff8362b4f2adc679423d8f54e8e4
Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 62c770d..290e2b2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1777,7 +1777,7 @@
u32 init_load_pct;
u64 last_wake_ts;
u64 last_switch_out_ts;
- u64 last_cpu_selected_ts;
+ u64 last_enqueued_ts;
struct related_thread_group *grp;
struct list_head grp_list;
u64 cpu_cycles;
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 12bd032..3e97574 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -34,6 +34,7 @@
extern unsigned int sysctl_sched_boost;
extern unsigned int sysctl_sched_group_upmigrate_pct;
extern unsigned int sysctl_sched_group_downmigrate_pct;
+extern unsigned int sysctl_sched_walt_rotate_big_tasks;
extern int
walt_proc_update_handler(struct ctl_table *table, int write,
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 0125cde..63f2baf 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -596,8 +596,8 @@
TRACE_EVENT(sched_load_to_gov,
- TP_PROTO(struct rq *rq, u64 aggr_grp_load, u32 tt_load, u64 freq_aggr_thresh, u64 load, int policy),
- TP_ARGS(rq, aggr_grp_load, tt_load, freq_aggr_thresh, load, policy),
+ TP_PROTO(struct rq *rq, u64 aggr_grp_load, u32 tt_load, u64 freq_aggr_thresh, u64 load, int policy, int big_task_rotation),
+ TP_ARGS(rq, aggr_grp_load, tt_load, freq_aggr_thresh, load, policy, big_task_rotation),
TP_STRUCT__entry(
__field( int, cpu )
@@ -612,6 +612,7 @@
__field( u64, grp_nt_ps )
__field( u64, pl )
__field( u64, load )
+ __field( int, big_task_rotation )
),
TP_fast_assign(
@@ -627,13 +628,15 @@
__entry->grp_nt_ps = rq->grp_time.nt_prev_runnable_sum;
__entry->pl = rq->walt_stats.pred_demands_sum;
__entry->load = load;
+ __entry->big_task_rotation = big_task_rotation;
),
- TP_printk("cpu=%d policy=%d ed_task_pid=%d aggr_grp_load=%llu freq_aggr_thresh=%llu tt_load=%llu rq_ps=%llu grp_rq_ps=%llu nt_ps=%llu grp_nt_ps=%llu pl=%llu load=%llu",
+ TP_printk("cpu=%d policy=%d ed_task_pid=%d aggr_grp_load=%llu freq_aggr_thresh=%llu tt_load=%llu rq_ps=%llu grp_rq_ps=%llu nt_ps=%llu grp_nt_ps=%llu pl=%llu load=%llu big_task_rotation=%d",
__entry->cpu, __entry->policy, __entry->ed_task_pid,
__entry->aggr_grp_load, __entry->freq_aggr_thresh,
__entry->tt_load, __entry->rq_ps, __entry->grp_rq_ps,
- __entry->nt_ps, __entry->grp_nt_ps, __entry->pl, __entry->load)
+ __entry->nt_ps, __entry->grp_nt_ps, __entry->pl, __entry->load,
+ __entry->big_task_rotation)
);
#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7b02ae6..31b45b7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -775,6 +775,7 @@
if (!(flags & ENQUEUE_RESTORE))
sched_info_queued(rq, p);
p->sched_class->enqueue_task(rq, p, flags);
+ walt_update_last_enqueue(p);
trace_sched_enq_deq_task(p, 1, cpumask_bits(&p->cpus_allowed)[0]);
}
diff --git a/kernel/sched/core_ctl.c b/kernel/sched/core_ctl.c
index cc5a97c..c0a8a2a 100644
--- a/kernel/sched/core_ctl.c
+++ b/kernel/sched/core_ctl.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2014-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2014-2018, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -482,6 +482,7 @@
sched_get_nr_running_avg(&avg, &iowait_avg, &big_avg,
&max_nr, &big_max_nr);
+ walt_rotation_checkpoint(big_avg);
spin_lock_irqsave(&state_lock, flags);
for_each_cluster(cluster, index) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1ff2e5e..55c3957 100755
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -11430,6 +11430,141 @@
return rc;
}
+#ifdef CONFIG_SCHED_WALT
+struct walt_rotate_work {
+ struct work_struct w;
+ struct task_struct *src_task;
+ struct task_struct *dst_task;
+ int src_cpu;
+ int dst_cpu;
+};
+
+static DEFINE_PER_CPU(struct walt_rotate_work, walt_rotate_works);
+
+static void walt_rotate_work_func(struct work_struct *work)
+{
+ struct walt_rotate_work *wr = container_of(work,
+ struct walt_rotate_work, w);
+
+ migrate_swap(wr->src_task, wr->dst_task);
+
+ put_task_struct(wr->src_task);
+ put_task_struct(wr->dst_task);
+
+ clear_reserved(wr->src_cpu);
+ clear_reserved(wr->dst_cpu);
+}
+
+void walt_rotate_work_init(void)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ struct walt_rotate_work *wr = &per_cpu(walt_rotate_works, i);
+
+ INIT_WORK(&wr->w, walt_rotate_work_func);
+ }
+}
+
+#define WALT_ROTATION_THRESHOLD_NS 16000000
+static void walt_check_for_rotation(struct rq *src_rq)
+{
+ u64 wc, wait, max_wait = 0, run, max_run = 0;
+ int deserved_cpu = nr_cpu_ids, dst_cpu = nr_cpu_ids;
+ int i, src_cpu = cpu_of(src_rq);
+ struct rq *dst_rq;
+ struct walt_rotate_work *wr = NULL;
+
+ if (!walt_rotation_enabled)
+ return;
+
+ if (got_boost_kick())
+ return;
+
+ if (is_max_capacity_cpu(src_cpu))
+ return;
+
+ wc = ktime_get_ns();
+ for_each_possible_cpu(i) {
+ struct rq *rq = cpu_rq(i);
+
+ if (is_max_capacity_cpu(i))
+ break;
+
+ if (is_reserved(i))
+ continue;
+
+ if (!rq->misfit_task || rq->curr->sched_class !=
+ &fair_sched_class)
+ continue;
+
+ wait = wc - rq->curr->last_enqueued_ts;
+ if (wait > max_wait) {
+ max_wait = wait;
+ deserved_cpu = i;
+ }
+ }
+
+ if (deserved_cpu != src_cpu)
+ return;
+
+ for_each_possible_cpu(i) {
+ struct rq *rq = cpu_rq(i);
+
+ if (!is_max_capacity_cpu(i))
+ continue;
+
+ if (is_reserved(i))
+ continue;
+
+ if (rq->curr->sched_class != &fair_sched_class)
+ continue;
+
+ if (rq->nr_running > 1)
+ continue;
+
+ run = wc - rq->curr->last_enqueued_ts;
+
+ if (run < WALT_ROTATION_THRESHOLD_NS)
+ continue;
+
+ if (run > max_run) {
+ max_run = run;
+ dst_cpu = i;
+ }
+ }
+
+ if (dst_cpu == nr_cpu_ids)
+ return;
+
+ dst_rq = cpu_rq(dst_cpu);
+
+ double_rq_lock(src_rq, dst_rq);
+ if (dst_rq->curr->sched_class == &fair_sched_class) {
+ get_task_struct(src_rq->curr);
+ get_task_struct(dst_rq->curr);
+
+ mark_reserved(src_cpu);
+ mark_reserved(dst_cpu);
+ wr = &per_cpu(walt_rotate_works, src_cpu);
+
+ wr->src_task = src_rq->curr;
+ wr->dst_task = dst_rq->curr;
+
+ wr->src_cpu = src_cpu;
+ wr->dst_cpu = dst_cpu;
+ }
+ double_rq_unlock(src_rq, dst_rq);
+
+ if (wr)
+ queue_work_on(src_cpu, system_highpri_wq, &wr->w);
+}
+#else
+static inline void walt_check_for_rotation(struct rq *rq)
+{
+}
+#endif
+
static DEFINE_RAW_SPINLOCK(migration_lock);
void check_for_migration(struct rq *rq, struct task_struct *p)
{
@@ -11459,6 +11594,8 @@
&rq->active_balance_work);
return;
}
+ } else {
+ walt_check_for_rotation(rq);
}
raw_spin_unlock(&migration_lock);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 5508248..e0aa30d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -943,8 +943,8 @@
};
extern void sched_setnuma(struct task_struct *p, int node);
extern int migrate_task_to(struct task_struct *p, int cpu);
-extern int migrate_swap(struct task_struct *, struct task_struct *);
#endif /* CONFIG_NUMA_BALANCING */
+extern int migrate_swap(struct task_struct *cur, struct task_struct *p);
#ifdef CONFIG_SMP
diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
index b7da03f..23fd885 100644
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c
@@ -88,6 +88,9 @@
__read_mostly unsigned int sysctl_sched_cpu_high_irqload = (10 * NSEC_PER_MSEC);
+unsigned int sysctl_sched_walt_rotate_big_tasks;
+unsigned int walt_rotation_enabled;
+
/*
* sched_window_stats_policy and sched_ravg_hist_size have a 'sysctl' copy
* associated with them. This is required for atomic update of those variables
@@ -316,7 +319,8 @@
struct task_struct *p;
int loop_max = 10;
- if (sched_boost_policy() == SCHED_BOOST_NONE || !rq->cfs.h_nr_running)
+ if ((!walt_rotation_enabled && sched_boost_policy() ==
+ SCHED_BOOST_NONE) || !rq->cfs.h_nr_running)
return 0;
rq->ed_task = NULL;
@@ -487,7 +491,7 @@
done:
trace_sched_load_to_gov(rq, aggr_grp_load, tt_load, freq_aggr_thresh,
- load, reporting_policy);
+ load, reporting_policy, walt_rotation_enabled);
return load;
}
@@ -2019,7 +2023,7 @@
wallclock = ktime_get_ns();
p->ravg.mark_start = p->last_wake_ts = wallclock;
- p->last_cpu_selected_ts = wallclock;
+ p->last_enqueued_ts = wallclock;
p->last_switch_out_ts = 0;
update_task_cpu_cycles(p, cpu_of(rq));
}
@@ -3143,6 +3147,19 @@
core_ctl_check(this_rq()->window_start);
}
+void walt_rotation_checkpoint(int nr_big)
+{
+ if (!hmp_capable())
+ return;
+
+ if (!sysctl_sched_walt_rotate_big_tasks || sched_boost() != NO_BOOST) {
+ walt_rotation_enabled = 0;
+ return;
+ }
+
+ walt_rotation_enabled = nr_big >= num_possible_cpus();
+}
+
int walt_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
@@ -3178,6 +3195,8 @@
cpumask_set_cpu(cpu_of(rq), &rq->freq_domain_cpumask);
init_irq_work(&walt_migration_irq_work, walt_irq_work);
init_irq_work(&walt_cpufreq_irq_work, walt_irq_work);
+ walt_rotate_work_init();
+
rq->walt_stats.cumulative_runnable_avg = 0;
rq->window_start = 0;
rq->cum_window_start = 0;
diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h
index c8780cf..da53ea4 100644
--- a/kernel/sched/walt.h
+++ b/kernel/sched/walt.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -291,9 +291,20 @@
return sysctl_sched_is_big_little ? prev_cpu : min_power_cpu;
}
+static inline void walt_update_last_enqueue(struct task_struct *p)
+{
+ p->last_enqueued_ts = ktime_get_ns();
+}
+extern void walt_rotate_work_init(void);
+extern void walt_rotation_checkpoint(int nr_big);
+extern unsigned int walt_rotation_enabled;
+
#else /* CONFIG_SCHED_WALT */
static inline void walt_sched_init(struct rq *rq) { }
+static inline void walt_rotate_work_init(void) { }
+static inline void walt_rotation_checkpoint(int nr_big) { }
+static inline void walt_update_last_enqueue(struct task_struct *p) { }
static inline void update_task_ravg(struct task_struct *p, struct rq *rq,
int event, u64 wallclock, u64 irqtime) { }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a01c821..b057784 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -325,6 +325,15 @@
.extra1 = &zero,
.extra2 = &three,
},
+ {
+ .procname = "sched_walt_rotate_big_tasks",
+ .data = &sysctl_sched_walt_rotate_big_tasks,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
#endif
{
.procname = "sched_upmigrate",