sched: reintroduce cache-hot affinity

reintroduce a simplified version of cache-hot/cold scheduling
affinity. This improves performance with certain SMP workloads,
such as sysbench.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8be5b57..fcc9a5a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1415,6 +1415,7 @@
 extern unsigned int sysctl_sched_batch_wakeup_granularity;
 extern unsigned int sysctl_sched_child_runs_first;
 extern unsigned int sysctl_sched_features;
+extern unsigned int sysctl_sched_migration_cost;
 #endif
 
 extern unsigned int sysctl_sched_compat_yield;
diff --git a/kernel/sched.c b/kernel/sched.c
index 791dd08..089d8b1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2119,6 +2119,17 @@
 }
 
 /*
+ * Is this task likely cache-hot:
+ */
+static inline int
+task_hot(struct task_struct *p, unsigned long long now, struct sched_domain *sd)
+{
+	s64 delta = now - p->se.exec_start;
+
+	return delta < (long long)sysctl_sched_migration_cost;
+}
+
+/*
  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
  */
 static
@@ -2139,6 +2150,22 @@
 	if (task_running(rq, p))
 		return 0;
 
+	/*
+	 * Aggressive migration if:
+	 * 1) task is cache cold, or
+	 * 2) too many balance attempts have failed.
+	 */
+
+	if (sd->nr_balance_failed > sd->cache_nice_tries) {
+#ifdef CONFIG_SCHEDSTATS
+		if (task_hot(p, rq->clock, sd))
+			schedstat_inc(sd, lb_hot_gained[idle]);
+#endif
+		return 1;
+	}
+
+	if (task_hot(p, rq->clock, sd))
+		return 0;
 	return 1;
 }
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index cea1fa3..a17b785 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -74,6 +74,8 @@
  */
 const_debug unsigned int sysctl_sched_wakeup_granularity = 10000000UL;
 
+const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
+
 /**************************************************************
  * CFS operations on generic schedulable entities:
  */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 230ca4e..ec14aa8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -277,6 +277,14 @@
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "sched_migration_cost",
+		.data		= &sysctl_sched_migration_cost,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #endif
 	{
 		.ctl_name	= CTL_UNNUMBERED,