sched: fix wakeup granularity for buddies
The wakeup buddy logic didn't use the same wakeup granularity logic as the
wakeup preemption did, this might cause the ->next buddy to be selected past
the point where we would have preempted had the task been a single running
instance.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 86a9337..b01f8e7 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -73,13 +73,13 @@
/*
* SCHED_OTHER wake-up granularity.
- * (default: 5 msec * (1 + ilog(ncpus)), units: nanoseconds)
+ * (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds)
*
* This option delays the preemption effects of decoupled workloads
* and reduces their over-scheduling. Synchronous workloads will still
* have immediate wakeup/sleep latencies.
*/
-unsigned int sysctl_sched_wakeup_granularity = 5000000UL;
+unsigned int sysctl_sched_wakeup_granularity = 10000000UL;
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
@@ -629,20 +629,16 @@
se->prev_sum_exec_runtime = se->sum_exec_runtime;
}
+static int
+wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
+
static struct sched_entity *
pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- s64 diff, gran;
-
if (!cfs_rq->next)
return se;
- diff = cfs_rq->next->vruntime - se->vruntime;
- if (diff < 0)
- return se;
-
- gran = calc_delta_fair(sysctl_sched_wakeup_granularity, &cfs_rq->load);
- if (diff > gran)
+ if (wakeup_preempt_entity(cfs_rq->next, se) != 0)
return se;
return cfs_rq->next;
@@ -1101,6 +1097,48 @@
}
#endif /* CONFIG_SMP */
+static unsigned long wakeup_gran(struct sched_entity *se)
+{
+ unsigned long gran = sysctl_sched_wakeup_granularity;
+
+ /*
+ * More easily preempt - nice tasks, while not making
+ * it harder for + nice tasks.
+ */
+ if (unlikely(se->load.weight > NICE_0_LOAD))
+ gran = calc_delta_fair(gran, &se->load);
+
+ return gran;
+}
+
+/*
+ * Should 'se' preempt 'curr'.
+ *
+ * |s1
+ * |s2
+ * |s3
+ * g
+ * |<--->|c
+ *
+ * w(c, s1) = -1
+ * w(c, s2) = 0
+ * w(c, s3) = 1
+ *
+ */
+static int
+wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
+{
+ s64 gran, vdiff = curr->vruntime - se->vruntime;
+
+ if (vdiff < 0)
+ return -1;
+
+ gran = wakeup_gran(curr);
+ if (vdiff > gran)
+ return 1;
+
+ return 0;
+}
/*
* Preempt the current task with a newly woken task if needed:
@@ -1110,7 +1148,6 @@
struct task_struct *curr = rq->curr;
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
struct sched_entity *se = &curr->se, *pse = &p->se;
- unsigned long gran;
if (unlikely(rt_prio(p->prio))) {
update_rq_clock(rq);
@@ -1140,15 +1177,7 @@
pse = parent_entity(pse);
}
- gran = sysctl_sched_wakeup_granularity;
- /*
- * More easily preempt - nice tasks, while not making
- * it harder for + nice tasks.
- */
- if (unlikely(se->load.weight > NICE_0_LOAD))
- gran = calc_delta_fair(gran, &se->load);
-
- if (pse->vruntime + gran < se->vruntime)
+ if (wakeup_preempt_entity(se, pse) == 1)
resched_task(curr);
}