Merge branch 'timers/nohz' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into timers/nohz

Pull nohz-full enabling patches from Frederic Weisbecker:

  " This handles perf and CPUs that get more than one task and fix posix cpu timers
    handling.

    This can finally stop the tick."

Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e47ee46..0140830 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -799,6 +799,12 @@
 static inline void perf_event_task_tick(void)				{ }
 #endif
 
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
+extern bool perf_event_can_stop_tick(void);
+#else
+static inline bool perf_event_can_stop_tick(void)			{ return true; }
+#endif
+
 #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
 
 /*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1ff9e0a..a74aded 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1856,6 +1856,12 @@
 static inline void wake_up_nohz_cpu(int cpu) { }
 #endif
 
+#ifdef CONFIG_NO_HZ_FULL
+extern bool sched_can_stop_tick(void);
+#else
+static inline bool sched_can_stop_tick(void) { return false; }
+#endif
+
 #ifdef CONFIG_SCHED_AUTOGROUP
 extern void sched_autogroup_create_attach(struct task_struct *p);
 extern void sched_autogroup_detach(struct task_struct *p);
diff --git a/include/linux/tick.h b/include/linux/tick.h
index d290168..9180f4b 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -160,13 +160,17 @@
 #ifdef CONFIG_NO_HZ_FULL
 extern void tick_nohz_init(void);
 extern int tick_nohz_full_cpu(int cpu);
+extern void tick_nohz_full_check(void);
 extern void tick_nohz_full_kick(void);
 extern void tick_nohz_full_kick_all(void);
+extern void tick_nohz_task_switch(struct task_struct *tsk);
 #else
 static inline void tick_nohz_init(void) { }
 static inline int tick_nohz_full_cpu(int cpu) { return 0; }
+static inline void tick_nohz_full_check(void) { }
 static inline void tick_nohz_full_kick(void) { }
 static inline void tick_nohz_full_kick_all(void) { }
+static inline void tick_nohz_task_switch(struct task_struct *tsk) { }
 #endif
 
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b0cd865..ddb993b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -18,6 +18,7 @@
 #include <linux/poll.h>
 #include <linux/slab.h>
 #include <linux/hash.h>
+#include <linux/tick.h>
 #include <linux/sysfs.h>
 #include <linux/dcache.h>
 #include <linux/percpu.h>
@@ -655,8 +656,12 @@
 
 	WARN_ON(!irqs_disabled());
 
-	if (list_empty(&cpuctx->rotation_list))
+	if (list_empty(&cpuctx->rotation_list)) {
+		int was_empty = list_empty(head);
 		list_add(&cpuctx->rotation_list, head);
+		if (was_empty)
+			tick_nohz_full_kick();
+	}
 }
 
 static void get_ctx(struct perf_event_context *ctx)
@@ -2555,6 +2560,16 @@
 		list_del_init(&cpuctx->rotation_list);
 }
 
+#ifdef CONFIG_NO_HZ_FULL
+bool perf_event_can_stop_tick(void)
+{
+	if (list_empty(&__get_cpu_var(rotation_list)))
+		return true;
+	else
+		return false;
+}
+#endif
+
 void perf_event_task_tick(void)
 {
 	struct list_head *head = &__get_cpu_var(rotation_list);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 84d5cb3..42670e9 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -673,12 +673,12 @@
 bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
 {
 	if (!task_cputime_zero(&tsk->cputime_expires))
-		return true;
+		return false;
 
 	if (tsk->signal->cputimer.running)
-		return true;
+		return false;
 
-	return false;
+	return true;
 }
 #else
 static inline void posix_cpu_timer_kick_nohz(void) { }
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0f0a5b3..dd09def 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -650,6 +650,24 @@
 
 #endif /* CONFIG_NO_HZ_COMMON */
 
+#ifdef CONFIG_NO_HZ_FULL
+bool sched_can_stop_tick(void)
+{
+       struct rq *rq;
+
+       rq = this_rq();
+
+       /* Make sure rq->nr_running update is visible after the IPI */
+       smp_rmb();
+
+       /* More than one running task need preemption */
+       if (rq->nr_running > 1)
+               return false;
+
+       return true;
+}
+#endif /* CONFIG_NO_HZ_FULL */
+
 void sched_avg_update(struct rq *rq)
 {
 	s64 period = sched_avg_period();
@@ -1380,7 +1398,8 @@
 
 void scheduler_ipi(void)
 {
-	if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
+	if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()
+	    && !tick_nohz_full_cpu(smp_processor_id()))
 		return;
 
 	/*
@@ -1397,6 +1416,7 @@
 	 * somewhat pessimize the simple resched case.
 	 */
 	irq_enter();
+	tick_nohz_full_check();
 	sched_ttwu_pending();
 
 	/*
@@ -1876,6 +1896,8 @@
 		kprobe_flush_task(prev);
 		put_task_struct(prev);
 	}
+
+	tick_nohz_task_switch(current);
 }
 
 #ifdef CONFIG_SMP
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 889904d..eb363aa 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -5,6 +5,7 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/stop_machine.h>
+#include <linux/tick.h>
 
 #include "cpupri.h"
 
@@ -1106,6 +1107,16 @@
 static inline void inc_nr_running(struct rq *rq)
 {
 	rq->nr_running++;
+
+#ifdef CONFIG_NO_HZ_FULL
+	if (rq->nr_running == 2) {
+		if (tick_nohz_full_cpu(rq->cpu)) {
+			/* Order rq->nr_running write against the IPI */
+			smp_wmb();
+			smp_send_reschedule(rq->cpu);
+		}
+       }
+#endif
 }
 
 static inline void dec_nr_running(struct rq *rq)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index de15813..8b1446d 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -337,6 +337,19 @@
 	}
 }
 
+static inline void tick_irq_exit(void)
+{
+#ifdef CONFIG_NO_HZ_COMMON
+	int cpu = smp_processor_id();
+
+	/* Make sure that timer wheel updates are propagated */
+	if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
+		if (!in_interrupt())
+			tick_nohz_irq_exit();
+	}
+#endif
+}
+
 /*
  * Exit an interrupt context. Process softirqs if needed and possible:
  */
@@ -348,11 +361,7 @@
 	if (!in_interrupt() && local_softirq_pending())
 		invoke_softirq();
 
-#ifdef CONFIG_NO_HZ_COMMON
-	/* Make sure that timer wheel updates are propagated */
-	if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
-		tick_nohz_irq_exit();
-#endif
+	tick_irq_exit();
 	rcu_irq_exit();
 	sched_preempt_enable_no_resched();
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 884a9f3..12a900d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -21,6 +21,8 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/irq_work.h>
+#include <linux/posix-timers.h>
+#include <linux/perf_event.h>
 
 #include <asm/irq_regs.h>
 
@@ -147,16 +149,48 @@
 static cpumask_var_t nohz_full_mask;
 bool have_nohz_full_mask;
 
+static bool can_stop_full_tick(void)
+{
+	WARN_ON_ONCE(!irqs_disabled());
+
+	if (!sched_can_stop_tick())
+		return false;
+
+	if (!posix_cpu_timers_can_stop_tick(current))
+		return false;
+
+	if (!perf_event_can_stop_tick())
+		return false;
+
+	/* sched_clock_tick() needs us? */
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+	/*
+	 * TODO: kick full dynticks CPUs when
+	 * sched_clock_stable is set.
+	 */
+	if (!sched_clock_stable)
+		return false;
+#endif
+
+	return true;
+}
+
+static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now);
+
 /*
  * Re-evaluate the need for the tick on the current CPU
  * and restart it if necessary.
  */
-static void tick_nohz_full_check(void)
+void tick_nohz_full_check(void)
 {
-	/*
-	 * STUB for now, will be filled with the full tick stop/restart
-	 * infrastructure patches
-	 */
+	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
+
+	if (tick_nohz_full_cpu(smp_processor_id())) {
+		if (ts->tick_stopped && !is_idle_task(current)) {
+			if (!can_stop_full_tick())
+				tick_nohz_restart_sched_tick(ts, ktime_get());
+		}
+	}
 }
 
 static void nohz_full_kick_work_func(struct irq_work *work)
@@ -198,6 +232,26 @@
 	preempt_enable();
 }
 
+/*
+ * Re-evaluate the need for the tick as we switch the current task.
+ * It might need the tick due to per task/process properties:
+ * perf events, posix cpu timers, ...
+ */
+void tick_nohz_task_switch(struct task_struct *tsk)
+{
+	unsigned long flags;
+
+	if (!tick_nohz_full_cpu(smp_processor_id()))
+		return;
+
+	local_irq_save(flags);
+
+	if (tick_nohz_tick_stopped() && !can_stop_full_tick())
+		tick_nohz_full_kick();
+
+	local_irq_restore(flags);
+}
+
 int tick_nohz_full_cpu(int cpu)
 {
 	if (!have_nohz_full_mask)
@@ -613,6 +667,24 @@
 	return ret;
 }
 
+static void tick_nohz_full_stop_tick(struct tick_sched *ts)
+{
+#ifdef CONFIG_NO_HZ_FULL
+       int cpu = smp_processor_id();
+
+       if (!tick_nohz_full_cpu(cpu) || is_idle_task(current))
+               return;
+
+       if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
+	       return;
+
+       if (!can_stop_full_tick())
+               return;
+
+       tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
+#endif
+}
+
 static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 {
 	/*
@@ -739,12 +811,13 @@
 {
 	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
 
-	if (!ts->inidle)
-		return;
-
-	/* Cancel the timer because CPU already waken up from the C-states*/
-	menu_hrtimer_cancel();
-	__tick_nohz_idle_enter(ts);
+	if (ts->inidle) {
+		/* Cancel the timer because CPU already waken up from the C-states*/
+		menu_hrtimer_cancel();
+		__tick_nohz_idle_enter(ts);
+	} else {
+		tick_nohz_full_stop_tick(ts);
+	}
 }
 
 /**