sched: remove wait_runtime limit
remove the wait_runtime-limit fields and the code depending on it, now
that the math has been changed over to rely on the vruntime metric.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5e5c457..353630d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -905,7 +905,6 @@
u64 vruntime;
u64 prev_sum_exec_runtime;
u64 wait_start_fair;
- u64 sleep_start_fair;
#ifdef CONFIG_SCHEDSTATS
u64 wait_start;
diff --git a/kernel/sched.c b/kernel/sched.c
index 3a4ac0b..21cc3b2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -180,7 +180,6 @@
u64 exec_clock;
u64 min_vruntime;
s64 wait_runtime;
- u64 sleeper_bonus;
unsigned long wait_runtime_overruns, wait_runtime_underruns;
struct rb_root tasks_timeline;
@@ -673,19 +672,6 @@
}
#endif
-static u64 div64_likely32(u64 divident, unsigned long divisor)
-{
-#if BITS_PER_LONG == 32
- if (likely(divident <= 0xffffffffULL))
- return (u32)divident / divisor;
- do_div(divident, divisor);
-
- return divident;
-#else
- return divident / divisor;
-#endif
-}
-
#if BITS_PER_LONG == 32
# define WMULT_CONST (~0UL)
#else
@@ -1016,8 +1002,6 @@
if (p->se.wait_start_fair)
p->se.wait_start_fair -= fair_clock_offset;
- if (p->se.sleep_start_fair)
- p->se.sleep_start_fair -= fair_clock_offset;
#ifdef CONFIG_SCHEDSTATS
if (p->se.wait_start)
@@ -1592,7 +1576,6 @@
p->se.sum_exec_runtime = 0;
p->se.prev_sum_exec_runtime = 0;
p->se.wait_runtime = 0;
- p->se.sleep_start_fair = 0;
#ifdef CONFIG_SCHEDSTATS
p->se.wait_start = 0;
@@ -6582,7 +6565,6 @@
p->se.wait_runtime = 0;
p->se.exec_start = 0;
p->se.wait_start_fair = 0;
- p->se.sleep_start_fair = 0;
#ifdef CONFIG_SCHEDSTATS
p->se.wait_start = 0;
p->se.sleep_start = 0;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 62965f0..3350169 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -148,7 +148,6 @@
P(wait_runtime);
P(wait_runtime_overruns);
P(wait_runtime_underruns);
- P(sleeper_bonus);
#undef P
print_cfs_rq_runtime_sum(m, cpu, cfs_rq);
@@ -272,7 +271,6 @@
P(se.wait_runtime);
P(se.wait_start_fair);
P(se.exec_start);
- P(se.sleep_start_fair);
P(se.vruntime);
P(se.sum_exec_runtime);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 72f202a..a94189c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -249,41 +249,11 @@
return period;
}
-static inline void
-limit_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
- long limit = sysctl_sched_runtime_limit;
-
- /*
- * Niced tasks have the same history dynamic range as
- * non-niced tasks:
- */
- if (unlikely(se->wait_runtime > limit)) {
- se->wait_runtime = limit;
- schedstat_inc(se, wait_runtime_overruns);
- schedstat_inc(cfs_rq, wait_runtime_overruns);
- }
- if (unlikely(se->wait_runtime < -limit)) {
- se->wait_runtime = -limit;
- schedstat_inc(se, wait_runtime_underruns);
- schedstat_inc(cfs_rq, wait_runtime_underruns);
- }
-}
-
-static inline void
-__add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
-{
- se->wait_runtime += delta;
- schedstat_add(se, sum_wait_runtime, delta);
- limit_wait_runtime(cfs_rq, se);
-}
-
static void
add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
{
- schedstat_add(cfs_rq, wait_runtime, -se->wait_runtime);
- __add_wait_runtime(cfs_rq, se, delta);
- schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
+ se->wait_runtime += delta;
+ schedstat_add(cfs_rq, wait_runtime, delta);
}
/*
@@ -294,7 +264,7 @@
__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
unsigned long delta_exec)
{
- unsigned long delta, delta_fair, delta_mine, delta_exec_weighted;
+ unsigned long delta_fair, delta_mine, delta_exec_weighted;
struct load_weight *lw = &cfs_rq->load;
unsigned long load = lw->weight;
@@ -318,14 +288,6 @@
delta_fair = calc_delta_fair(delta_exec, lw);
delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw);
- if (cfs_rq->sleeper_bonus > sysctl_sched_min_granularity) {
- delta = min((u64)delta_mine, cfs_rq->sleeper_bonus);
- delta = min(delta, (unsigned long)(
- (long)sysctl_sched_runtime_limit - curr->wait_runtime));
- cfs_rq->sleeper_bonus -= delta;
- delta_mine -= delta;
- }
-
cfs_rq->fair_clock += delta_fair;
/*
* We executed delta_exec amount of time on the CPU,
@@ -461,58 +423,8 @@
* Scheduling class queueing methods:
*/
-static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se,
- unsigned long delta_fair)
-{
- unsigned long load = cfs_rq->load.weight;
- long prev_runtime;
-
- /*
- * Do not boost sleepers if there's too much bonus 'in flight'
- * already:
- */
- if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit))
- return;
-
- if (sched_feat(SLEEPER_LOAD_AVG))
- load = rq_of(cfs_rq)->cpu_load[2];
-
- /*
- * Fix up delta_fair with the effect of us running
- * during the whole sleep period:
- */
- if (sched_feat(SLEEPER_AVG))
- delta_fair = div64_likely32((u64)delta_fair * load,
- load + se->load.weight);
-
- delta_fair = calc_weighted(delta_fair, se);
-
- prev_runtime = se->wait_runtime;
- __add_wait_runtime(cfs_rq, se, delta_fair);
- delta_fair = se->wait_runtime - prev_runtime;
-
- /*
- * Track the amount of bonus we've given to sleepers:
- */
- cfs_rq->sleeper_bonus += delta_fair;
-}
-
static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- struct task_struct *tsk = task_of(se);
- unsigned long delta_fair;
-
- if ((entity_is_task(se) && tsk->policy == SCHED_BATCH) ||
- !sched_feat(FAIR_SLEEPERS))
- return;
-
- delta_fair = (unsigned long)min((u64)(2*sysctl_sched_runtime_limit),
- (u64)(cfs_rq->fair_clock - se->sleep_start_fair));
-
- __enqueue_sleeper(cfs_rq, se, delta_fair);
-
- se->sleep_start_fair = 0;
-
#ifdef CONFIG_SCHEDSTATS
if (se->sleep_start) {
u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;
@@ -544,6 +456,8 @@
* time that the task spent sleeping:
*/
if (unlikely(prof_on == SLEEP_PROFILING)) {
+ struct task_struct *tsk = task_of(se);
+
profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk),
delta >> 20);
}
@@ -604,7 +518,6 @@
{
update_stats_dequeue(cfs_rq, se);
if (sleep) {
- se->sleep_start_fair = cfs_rq->fair_clock;
#ifdef CONFIG_SCHEDSTATS
if (entity_is_task(se)) {
struct task_struct *tsk = task_of(se);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9b1b0d4..97b15c2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -266,17 +266,6 @@
},
{
.ctl_name = CTL_UNNUMBERED,
- .procname = "sched_runtime_limit_ns",
- .data = &sysctl_sched_runtime_limit,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
- .extra1 = &min_sched_granularity_ns,
- .extra2 = &max_sched_granularity_ns,
- },
- {
- .ctl_name = CTL_UNNUMBERED,
.procname = "sched_child_runs_first",
.data = &sysctl_sched_child_runs_first,
.maxlen = sizeof(unsigned int),