sched: reintroduce the sched_min_granularity tunable

we lost the sched_min_granularity tunable to a clever optimization
that uses the sched_latency/min_granularity ratio - but the ratio
is quite unintuitive to users and can also crash the kernel if the
ratio is set to 0. So reintroduce the min_granularity tunable,
while keeping the ratio maintained internally.

no functionality changed.

[ mingo@elte.hu: some fixlets. ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 415e5c3..ca198a7 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -211,7 +211,7 @@
 #define PN(x) \
 	SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
 	PN(sysctl_sched_latency);
-	PN(sysctl_sched_nr_latency);
+	PN(sysctl_sched_min_granularity);
 	PN(sysctl_sched_wakeup_granularity);
 	PN(sysctl_sched_batch_wakeup_granularity);
 	PN(sysctl_sched_child_runs_first);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 8763bee..c495dcf 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -35,18 +35,23 @@
 const_debug unsigned int sysctl_sched_latency = 20000000ULL;
 
 /*
+ * Minimal preemption granularity for CPU-bound tasks:
+ * (default: 1 msec, units: nanoseconds)
+ */
+const_debug unsigned int sysctl_sched_min_granularity = 1000000ULL;
+
+/*
+ * is kept at sysctl_sched_latency / sysctl_sched_min_granularity
+ */
+const_debug unsigned int sched_nr_latency = 20;
+
+/*
  * After fork, child runs first. (default) If set to 0 then
  * parent will (try to) run first.
  */
 const_debug unsigned int sysctl_sched_child_runs_first = 1;
 
 /*
- * Minimal preemption granularity for CPU-bound tasks:
- * (default: 2 msec, units: nanoseconds)
- */
-const_debug unsigned int sysctl_sched_nr_latency = 20;
-
-/*
  * sys_sched_yield() compat mode
  *
  * This option switches the agressive yield implementation of the
@@ -212,6 +217,22 @@
  * Scheduling class statistics methods:
  */
 
+#ifdef CONFIG_SCHED_DEBUG
+int sched_nr_latency_handler(struct ctl_table *table, int write,
+		struct file *filp, void __user *buffer, size_t *lenp,
+		loff_t *ppos)
+{
+	int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+
+	if (ret || !write)
+		return ret;
+
+	sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency,
+					sysctl_sched_min_granularity);
+
+	return 0;
+}
+#endif
 
 /*
  * The idea is to set a period in which each task runs once.
@@ -224,7 +245,7 @@
 static u64 __sched_period(unsigned long nr_running)
 {
 	u64 period = sysctl_sched_latency;
-	unsigned long nr_latency = sysctl_sched_nr_latency;
+	unsigned long nr_latency = sched_nr_latency;
 
 	if (unlikely(nr_running > nr_latency)) {
 		period *= nr_running;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3b4efbe..6e3b63c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -235,11 +235,14 @@
 #ifdef CONFIG_SCHED_DEBUG
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "sched_nr_latency",
-		.data		= &sysctl_sched_nr_latency,
+		.procname	= "sched_min_granularity_ns",
+		.data		= &sysctl_sched_min_granularity,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &sched_nr_latency_handler,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_sched_granularity_ns,
+		.extra2		= &max_sched_granularity_ns,
 	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
@@ -247,7 +250,7 @@
 		.data		= &sysctl_sched_latency,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
+		.proc_handler	= &sched_nr_latency_handler,
 		.strategy	= &sysctl_intvec,
 		.extra1		= &min_sched_granularity_ns,
 		.extra2		= &max_sched_granularity_ns,