fix for account_group_exec_runtime(), make sure ->signal can't be freed under rq->lock
Impact: fix hang/crash on ia64 under high load
This is ugly, but the simplest patch by far.
Unlike other similar routines, account_group_exec_runtime() could be
called "implicitly" from within scheduler after exit_notify(). This
means we can race with the parent doing release_task(), we can't just
check ->signal != NULL.
Change __exit_signal() to do spin_unlock_wait(&task_rq(tsk)->lock)
before __cleanup_signal() to make sure ->signal can't be freed under
task_rq(tsk)->lock. Note that task_rq_unlock_wait() doesn't care
about the case when tsk changes cpu/rq under us, this should be OK.
Thanks to Ingo who nacked my previous buggy patch.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Reported-by: Doug Chapman <doug.chapman@hp.com>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 295b7c7..644ffbd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -247,6 +247,7 @@
extern void init_idle_bootup_task(struct task_struct *idle);
extern int runqueue_is_locked(void);
+extern void task_rq_unlock_wait(struct task_struct *p);
extern cpumask_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
diff --git a/kernel/exit.c b/kernel/exit.c
index 80137a5..ae2b92b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -141,6 +141,11 @@
if (sig) {
flush_sigqueue(&sig->shared_pending);
taskstats_tgid_free(sig);
+ /*
+ * Make sure ->signal can't go away under rq->lock,
+ * see account_group_exec_runtime().
+ */
+ task_rq_unlock_wait(tsk);
__cleanup_signal(sig);
}
}
diff --git a/kernel/sched.c b/kernel/sched.c
index f314924..50a21f9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -969,6 +969,14 @@
}
}
+void task_rq_unlock_wait(struct task_struct *p)
+{
+ struct rq *rq = task_rq(p);
+
+ smp_mb(); /* spin-unlock-wait is not a full memory barrier */
+ spin_unlock_wait(&rq->lock);
+}
+
static void __task_rq_unlock(struct rq *rq)
__releases(rq->lock)
{