Blame - kernel/rcupreempt.c - kernel/msm-4.9

blob: 27827931ca0dd6ca905040955616c970b2e7539d [file] [log] [blame]

Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1	/*
				2	* Read-Copy Update mechanism for mutual exclusion, realtime implementation
				3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License as published by
				6	* the Free Software Foundation; either version 2 of the License, or
				7	* (at your option) any later version.
				8	*
				9	* This program is distributed in the hope that it will be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write to the Free Software
				16	* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
				17	*
				18	* Copyright IBM Corporation, 2006
				19	*
				20	* Authors: Paul E. McKenney <paulmck@us.ibm.com>
				21	* With thanks to Esben Nielsen, Bill Huey, and Ingo Molnar
				22	* for pushing me away from locks and towards counters, and
				23	* to Suparna Bhattacharya for pushing me completely away
				24	* from atomic instructions on the read side.
				25	*
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	26	* - Added handling of Dynamic Ticks
				27	* Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com>
				28	* - Steven Rostedt <srostedt@redhat.com>
				29	*
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	30	* Papers: http://www.rdrop.com/users/paulmck/RCU
				31	*
				32	* Design Document: http://lwn.net/Articles/253651/
				33	*
				34	* For detailed explanation of Read-Copy Update mechanism see -
				35	* Documentation/RCU/ *.txt
				36	*
				37	*/
				38	#include <linux/types.h>
				39	#include <linux/kernel.h>
				40	#include <linux/init.h>
				41	#include <linux/spinlock.h>
				42	#include <linux/smp.h>
				43	#include <linux/rcupdate.h>
				44	#include <linux/interrupt.h>
				45	#include <linux/sched.h>
				46	#include <asm/atomic.h>
				47	#include <linux/bitops.h>
				48	#include <linux/module.h>
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	49	#include <linux/kthread.h>
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	50	#include <linux/completion.h>
				51	#include <linux/moduleparam.h>
				52	#include <linux/percpu.h>
				53	#include <linux/notifier.h>
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	54	#include <linux/cpu.h>
				55	#include <linux/random.h>
				56	#include <linux/delay.h>
				57	#include <linux/byteorder/swabb.h>
				58	#include <linux/cpumask.h>
				59	#include <linux/rcupreempt_trace.h>
				60
				61	/*
				62	* Macro that prevents the compiler from reordering accesses, but does
				63	* absolutely -nothing- to prevent CPUs from reordering. This is used
				64	* only to mediate communication between mainline code and hardware
				65	* interrupt and NMI handlers.
				66	*/
				67	#define ACCESS_ONCE(x) ((volatile typeof(x) )&(x))
				68
				69	/*
				70	* PREEMPT_RCU data structures.
				71	*/
				72
				73	/*
				74	* GP_STAGES specifies the number of times the state machine has
				75	* to go through the all the rcu_try_flip_states (see below)
				76	* in a single Grace Period.
				77	*
				78	* GP in GP_STAGES stands for Grace Period ;)
				79	*/
				80	#define GP_STAGES 2
				81	struct rcu_data {
				82	spinlock_t lock; /* Protect rcu_data fields. */
				83	long completed; /* Number of last completed batch. */
				84	int waitlistcount;
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	85	struct rcu_head *nextlist;
				86	struct rcu_head **nexttail;
				87	struct rcu_head *waitlist[GP_STAGES];
				88	struct rcu_head **waittail[GP_STAGES];
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	89	struct rcu_head donelist; / from waitlist & waitschedlist */
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	90	struct rcu_head **donetail;
				91	long rcu_flipctr[2];
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	92	struct rcu_head *nextschedlist;
				93	struct rcu_head **nextschedtail;
				94	struct rcu_head *waitschedlist;
				95	struct rcu_head **waitschedtail;
				96	int rcu_sched_sleeping;
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	97	#ifdef CONFIG_RCU_TRACE
				98	struct rcupreempt_trace trace;
				99	#endif /* #ifdef CONFIG_RCU_TRACE */
				100	};
				101
				102	/*
				103	* States for rcu_try_flip() and friends.
				104	*/
				105
				106	enum rcu_try_flip_states {
				107
				108	/*
				109	* Stay here if nothing is happening. Flip the counter if somthing
				110	* starts happening. Denoted by "I"
				111	*/
				112	rcu_try_flip_idle_state,
				113
				114	/*
				115	* Wait here for all CPUs to notice that the counter has flipped. This
				116	* prevents the old set of counters from ever being incremented once
				117	* we leave this state, which in turn is necessary because we cannot
				118	* test any individual counter for zero -- we can only check the sum.
				119	* Denoted by "A".
				120	*/
				121	rcu_try_flip_waitack_state,
				122
				123	/*
				124	* Wait here for the sum of the old per-CPU counters to reach zero.
				125	* Denoted by "Z".
				126	*/
				127	rcu_try_flip_waitzero_state,
				128
				129	/*
				130	* Wait here for each of the other CPUs to execute a memory barrier.
				131	* This is necessary to ensure that these other CPUs really have
				132	* completed executing their RCU read-side critical sections, despite
				133	* their CPUs wildly reordering memory. Denoted by "M".
				134	*/
				135	rcu_try_flip_waitmb_state,
				136	};
				137
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	138	/*
				139	* States for rcu_ctrlblk.rcu_sched_sleep.
				140	*/
				141
				142	enum rcu_sched_sleep_states {
				143	rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP. */
				144	rcu_sched_sleep_prep, /* Thinking of sleeping, rechecking. */
				145	rcu_sched_sleeping, /* Sleeping, awaken if GP needed. */
				146	};
				147
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	148	struct rcu_ctrlblk {
				149	spinlock_t fliplock; /* Protect state-machine transitions. */
				150	long completed; /* Number of last completed batch. */
				151	enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
				152	the rcu state machine */
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	153	spinlock_t schedlock; /* Protect rcu_sched sleep state. */
				154	enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */
				155	wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	156	};
				157
				158	static DEFINE_PER_CPU(struct rcu_data, rcu_data);
				159	static struct rcu_ctrlblk rcu_ctrlblk = {
				160	.fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
				161	.completed = 0,
				162	.rcu_try_flip_state = rcu_try_flip_idle_state,
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	163	.schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock),
				164	.sched_sleep = rcu_sched_not_sleeping,
				165	.sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq),
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	166	};
				167
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	168	static struct task_struct *rcu_sched_grace_period_task;
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	169
				170	#ifdef CONFIG_RCU_TRACE
				171	static char *rcu_try_flip_state_names[] =
				172	{ "idle", "waitack", "waitzero", "waitmb" };
				173	#endif /* #ifdef CONFIG_RCU_TRACE */
				174
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	175	static cpumask_t rcu_cpu_online_map __read_mostly = CPU_MASK_NONE;
				176
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	177	/*
				178	* Enum and per-CPU flag to determine when each CPU has seen
				179	* the most recent counter flip.
				180	*/
				181
				182	enum rcu_flip_flag_values {
				183	rcu_flip_seen, /* Steady/initial state, last flip seen. */
				184	/* Only GP detector can update. */
				185	rcu_flipped /* Flip just completed, need confirmation. */
				186	/* Only corresponding CPU can update. */
				187	};
				188	static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_flip_flag_values, rcu_flip_flag)
				189	= rcu_flip_seen;
				190
				191	/*
				192	* Enum and per-CPU flag to determine when each CPU has executed the
				193	* needed memory barrier to fence in memory references from its last RCU
				194	* read-side critical section in the just-completed grace period.
				195	*/
				196
				197	enum rcu_mb_flag_values {
				198	rcu_mb_done, /* Steady/initial state, no mb()s required. */
				199	/* Only GP detector can update. */
				200	rcu_mb_needed /* Flip just completed, need an mb(). */
				201	/* Only corresponding CPU can update. */
				202	};
				203	static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
				204	= rcu_mb_done;
				205
				206	/*
				207	* RCU_DATA_ME: find the current CPU's rcu_data structure.
				208	* RCU_DATA_CPU: find the specified CPU's rcu_data structure.
				209	*/
				210	#define RCU_DATA_ME() (&__get_cpu_var(rcu_data))
				211	#define RCU_DATA_CPU(cpu) (&per_cpu(rcu_data, cpu))
				212
				213	/*
				214	* Helper macro for tracing when the appropriate rcu_data is not
				215	* cached in a local variable, but where the CPU number is so cached.
				216	*/
				217	#define RCU_TRACE_CPU(f, cpu) RCU_TRACE(f, &(RCU_DATA_CPU(cpu)->trace));
				218
				219	/*
				220	* Helper macro for tracing when the appropriate rcu_data is not
				221	* cached in a local variable.
				222	*/
				223	#define RCU_TRACE_ME(f) RCU_TRACE(f, &(RCU_DATA_ME()->trace));
				224
				225	/*
				226	* Helper macro for tracing when the appropriate rcu_data is pointed
				227	* to by a local variable.
				228	*/
				229	#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
				230
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	231	#define RCU_SCHED_BATCH_TIME (HZ / 50)
				232
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	233	/*
				234	* Return the number of RCU batches processed thus far. Useful
				235	* for debug and statistics.
				236	*/
				237	long rcu_batches_completed(void)
				238	{
				239	return rcu_ctrlblk.completed;
				240	}
				241	EXPORT_SYMBOL_GPL(rcu_batches_completed);
				242
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	243	void __rcu_read_lock(void)
				244	{
				245	int idx;
				246	struct task_struct *t = current;
				247	int nesting;
				248
				249	nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
				250	if (nesting != 0) {
				251
				252	/* An earlier rcu_read_lock() covers us, just count it. */
				253
				254	t->rcu_read_lock_nesting = nesting + 1;
				255
				256	} else {
				257	unsigned long flags;
				258
				259	/*
				260	* We disable interrupts for the following reasons:
				261	* - If we get scheduling clock interrupt here, and we
				262	* end up acking the counter flip, it's like a promise
				263	* that we will never increment the old counter again.
				264	* Thus we will break that promise if that
				265	* scheduling clock interrupt happens between the time
				266	* we pick the .completed field and the time that we
				267	* increment our counter.
				268	*
				269	* - We don't want to be preempted out here.
				270	*
				271	* NMIs can still occur, of course, and might themselves
				272	* contain rcu_read_lock().
				273	*/
				274
				275	local_irq_save(flags);
				276
				277	/*
				278	* Outermost nesting of rcu_read_lock(), so increment
				279	* the current counter for the current CPU. Use volatile
				280	* casts to prevent the compiler from reordering.
				281	*/
				282
				283	idx = ACCESS_ONCE(rcu_ctrlblk.completed) & 0x1;
				284	ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])++;
				285
				286	/*
				287	* Now that the per-CPU counter has been incremented, we
				288	* are protected from races with rcu_read_lock() invoked
				289	* from NMI handlers on this CPU. We can therefore safely
				290	* increment the nesting counter, relieving further NMIs
				291	* of the need to increment the per-CPU counter.
				292	*/
				293
				294	ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting + 1;
				295
				296	/*
				297	* Now that we have preventing any NMIs from storing
				298	* to the ->rcu_flipctr_idx, we can safely use it to
				299	* remember which counter to decrement in the matching
				300	* rcu_read_unlock().
				301	*/
				302
				303	ACCESS_ONCE(t->rcu_flipctr_idx) = idx;
				304	local_irq_restore(flags);
				305	}
				306	}
				307	EXPORT_SYMBOL_GPL(__rcu_read_lock);
				308
				309	void __rcu_read_unlock(void)
				310	{
				311	int idx;
				312	struct task_struct *t = current;
				313	int nesting;
				314
				315	nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
				316	if (nesting > 1) {
				317
				318	/*
				319	* We are still protected by the enclosing rcu_read_lock(),
				320	* so simply decrement the counter.
				321	*/
				322
				323	t->rcu_read_lock_nesting = nesting - 1;
				324
				325	} else {
				326	unsigned long flags;
				327
				328	/*
				329	* Disable local interrupts to prevent the grace-period
				330	* detection state machine from seeing us half-done.
				331	* NMIs can still occur, of course, and might themselves
				332	* contain rcu_read_lock() and rcu_read_unlock().
				333	*/
				334
				335	local_irq_save(flags);
				336
				337	/*
				338	* Outermost nesting of rcu_read_unlock(), so we must
				339	* decrement the current counter for the current CPU.
				340	* This must be done carefully, because NMIs can
				341	* occur at any point in this code, and any rcu_read_lock()
				342	* and rcu_read_unlock() pairs in the NMI handlers
				343	* must interact non-destructively with this code.
				344	* Lots of volatile casts, and -very- careful ordering.
				345	*
				346	* Changes to this code, including this one, must be
				347	* inspected, validated, and tested extremely carefully!!!
				348	*/
				349
				350	/*
				351	* First, pick up the index.
				352	*/
				353
				354	idx = ACCESS_ONCE(t->rcu_flipctr_idx);
				355
				356	/*
				357	* Now that we have fetched the counter index, it is
				358	* safe to decrement the per-task RCU nesting counter.
				359	* After this, any interrupts or NMIs will increment and
				360	* decrement the per-CPU counters.
				361	*/
				362	ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting - 1;
				363
				364	/*
				365	* It is now safe to decrement this task's nesting count.
				366	* NMIs that occur after this statement will route their
				367	* rcu_read_lock() calls through this "else" clause, and
				368	* will thus start incrementing the per-CPU counter on
				369	* their own. They will also clobber ->rcu_flipctr_idx,
				370	* but that is OK, since we have already fetched it.
				371	*/
				372
				373	ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])--;
				374	local_irq_restore(flags);
				375	}
				376	}
				377	EXPORT_SYMBOL_GPL(__rcu_read_unlock);
				378
				379	/*
				380	* If a global counter flip has occurred since the last time that we
				381	* advanced callbacks, advance them. Hardware interrupts must be
				382	* disabled when calling this function.
				383	*/
				384	static void __rcu_advance_callbacks(struct rcu_data *rdp)
				385	{
				386	int cpu;
				387	int i;
				388	int wlc = 0;
				389
				390	if (rdp->completed != rcu_ctrlblk.completed) {
				391	if (rdp->waitlist[GP_STAGES - 1] != NULL) {
				392	*rdp->donetail = rdp->waitlist[GP_STAGES - 1];
				393	rdp->donetail = rdp->waittail[GP_STAGES - 1];
				394	RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp);
				395	}
				396	for (i = GP_STAGES - 2; i >= 0; i--) {
				397	if (rdp->waitlist[i] != NULL) {
				398	rdp->waitlist[i + 1] = rdp->waitlist[i];
				399	rdp->waittail[i + 1] = rdp->waittail[i];
				400	wlc++;
				401	} else {
				402	rdp->waitlist[i + 1] = NULL;
				403	rdp->waittail[i + 1] =
				404	&rdp->waitlist[i + 1];
				405	}
				406	}
				407	if (rdp->nextlist != NULL) {
				408	rdp->waitlist[0] = rdp->nextlist;
				409	rdp->waittail[0] = rdp->nexttail;
				410	wlc++;
				411	rdp->nextlist = NULL;
				412	rdp->nexttail = &rdp->nextlist;
				413	RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp);
				414	} else {
				415	rdp->waitlist[0] = NULL;
				416	rdp->waittail[0] = &rdp->waitlist[0];
				417	}
				418	rdp->waitlistcount = wlc;
				419	rdp->completed = rcu_ctrlblk.completed;
				420	}
				421
				422	/*
				423	* Check to see if this CPU needs to report that it has seen
				424	* the most recent counter flip, thereby declaring that all
				425	* subsequent rcu_read_lock() invocations will respect this flip.
				426	*/
				427
				428	cpu = raw_smp_processor_id();
				429	if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
				430	smp_mb(); /* Subsequent counter accesses must see new value */
				431	per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
				432	smp_mb(); /* Subsequent RCU read-side critical sections */
				433	/* seen -after- acknowledgement. */
				434	}
				435	}
				436
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	437	DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
				438	.dynticks = 1,
				439	};
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	440
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	441	#ifdef CONFIG_NO_HZ
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	442	static DEFINE_PER_CPU(int, rcu_update_flag);
				443
				444	/**
				445	* rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
				446	*
				447	* If the CPU was idle with dynamic ticks active, this updates the
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	448	* rcu_dyntick_sched.dynticks to let the RCU handling know that the
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	449	* CPU is active.
				450	*/
				451	void rcu_irq_enter(void)
				452	{
				453	int cpu = smp_processor_id();
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	454	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	455
				456	if (per_cpu(rcu_update_flag, cpu))
				457	per_cpu(rcu_update_flag, cpu)++;
				458
				459	/*
				460	* Only update if we are coming from a stopped ticks mode
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	461	* (rcu_dyntick_sched.dynticks is even).
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	462	*/
				463	if (!in_interrupt() &&
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	464	(rdssp->dynticks & 0x1) == 0) {
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	465	/*
				466	* The following might seem like we could have a race
				467	* with NMI/SMIs. But this really isn't a problem.
				468	* Here we do a read/modify/write, and the race happens
				469	* when an NMI/SMI comes in after the read and before
				470	* the write. But NMI/SMIs will increment this counter
				471	* twice before returning, so the zero bit will not
				472	* be corrupted by the NMI/SMI which is the most important
				473	* part.
				474	*
				475	* The only thing is that we would bring back the counter
				476	* to a postion that it was in during the NMI/SMI.
				477	* But the zero bit would be set, so the rest of the
				478	* counter would again be ignored.
				479	*
				480	* On return from the IRQ, the counter may have the zero
				481	* bit be 0 and the counter the same as the return from
				482	* the NMI/SMI. If the state machine was so unlucky to
				483	* see that, it still doesn't matter, since all
				484	* RCU read-side critical sections on this CPU would
				485	* have already completed.
				486	*/
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	487	rdssp->dynticks++;
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	488	/*
				489	* The following memory barrier ensures that any
				490	* rcu_read_lock() primitives in the irq handler
				491	* are seen by other CPUs to follow the above
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	492	* increment to rcu_dyntick_sched.dynticks. This is
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	493	* required in order for other CPUs to correctly
				494	* determine when it is safe to advance the RCU
				495	* grace-period state machine.
				496	*/
				497	smp_mb(); /* see above block comment. */
				498	/*
				499	* Since we can't determine the dynamic tick mode from
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	500	* the rcu_dyntick_sched.dynticks after this routine,
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	501	* we use a second flag to acknowledge that we came
				502	* from an idle state with ticks stopped.
				503	*/
				504	per_cpu(rcu_update_flag, cpu)++;
				505	/*
				506	* If we take an NMI/SMI now, they will also increment
				507	* the rcu_update_flag, and will not update the
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	508	* rcu_dyntick_sched.dynticks on exit. That is for
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	509	* this IRQ to do.
				510	*/
				511	}
				512	}
				513
				514	/**
				515	* rcu_irq_exit - Called from exiting Hard irq context.
				516	*
				517	* If the CPU was idle with dynamic ticks active, update the
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	518	* rcu_dyntick_sched.dynticks to put let the RCU handling be
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	519	* aware that the CPU is going back to idle with no ticks.
				520	*/
				521	void rcu_irq_exit(void)
				522	{
				523	int cpu = smp_processor_id();
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	524	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	525
				526	/*
				527	* rcu_update_flag is set if we interrupted the CPU
				528	* when it was idle with ticks stopped.
				529	* Once this occurs, we keep track of interrupt nesting
				530	* because a NMI/SMI could also come in, and we still
				531	* only want the IRQ that started the increment of the
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	532	* rcu_dyntick_sched.dynticks to be the one that modifies
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	533	* it on exit.
				534	*/
				535	if (per_cpu(rcu_update_flag, cpu)) {
				536	if (--per_cpu(rcu_update_flag, cpu))
				537	return;
				538
				539	/* This must match the interrupt nesting */
				540	WARN_ON(in_interrupt());
				541
				542	/*
				543	* If an NMI/SMI happens now we are still
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	544	* protected by the rcu_dyntick_sched.dynticks being odd.
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	545	*/
				546
				547	/*
				548	* The following memory barrier ensures that any
				549	* rcu_read_unlock() primitives in the irq handler
				550	* are seen by other CPUs to preceed the following
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	551	* increment to rcu_dyntick_sched.dynticks. This
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	552	* is required in order for other CPUs to determine
				553	* when it is safe to advance the RCU grace-period
				554	* state machine.
				555	*/
				556	smp_mb(); /* see above block comment. */
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	557	rdssp->dynticks++;
				558	WARN_ON(rdssp->dynticks & 0x1);
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	559	}
				560	}
				561
				562	static void dyntick_save_progress_counter(int cpu)
				563	{
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	564	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
				565
				566	rdssp->dynticks_snap = rdssp->dynticks;
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	567	}
				568
				569	static inline int
				570	rcu_try_flip_waitack_needed(int cpu)
				571	{
				572	long curr;
				573	long snap;
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	574	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	575
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	576	curr = rdssp->dynticks;
				577	snap = rdssp->dynticks_snap;
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	578	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
				579
				580	/*
				581	* If the CPU remained in dynticks mode for the entire time
				582	* and didn't take any interrupts, NMIs, SMIs, or whatever,
				583	* then it cannot be in the middle of an rcu_read_lock(), so
				584	* the next rcu_read_lock() it executes must use the new value
				585	* of the counter. So we can safely pretend that this CPU
				586	* already acknowledged the counter.
				587	*/
				588
				589	if ((curr == snap) && ((curr & 0x1) == 0))
				590	return 0;
				591
				592	/*
				593	* If the CPU passed through or entered a dynticks idle phase with
				594	* no active irq handlers, then, as above, we can safely pretend
				595	* that this CPU already acknowledged the counter.
				596	*/
				597
Paul E. McKenney	d7c0651	2008-05-12 21:21:06 +0200	[diff] [blame]	598	if ((curr - snap) > 2 \|\| (curr & 0x1) == 0)
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	599	return 0;
				600
				601	/* We need this CPU to explicitly acknowledge the counter flip. */
				602
				603	return 1;
				604	}
				605
				606	static inline int
				607	rcu_try_flip_waitmb_needed(int cpu)
				608	{
				609	long curr;
				610	long snap;
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	611	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	612
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	613	curr = rdssp->dynticks;
				614	snap = rdssp->dynticks_snap;
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	615	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
				616
				617	/*
				618	* If the CPU remained in dynticks mode for the entire time
				619	* and didn't take any interrupts, NMIs, SMIs, or whatever,
				620	* then it cannot have executed an RCU read-side critical section
				621	* during that time, so there is no need for it to execute a
				622	* memory barrier.
				623	*/
				624
				625	if ((curr == snap) && ((curr & 0x1) == 0))
				626	return 0;
				627
				628	/*
				629	* If the CPU either entered or exited an outermost interrupt,
				630	* SMI, NMI, or whatever handler, then we know that it executed
				631	* a memory barrier when doing so. So we don't need another one.
				632	*/
				633	if (curr != snap)
				634	return 0;
				635
				636	/* We need the CPU to execute a memory barrier. */
				637
				638	return 1;
				639	}
				640
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	641	static void dyntick_save_progress_counter_sched(int cpu)
				642	{
				643	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
				644
				645	rdssp->sched_dynticks_snap = rdssp->dynticks;
				646	}
				647
				648	static int rcu_qsctr_inc_needed_dyntick(int cpu)
				649	{
				650	long curr;
				651	long snap;
				652	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
				653
				654	curr = rdssp->dynticks;
				655	snap = rdssp->sched_dynticks_snap;
				656	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
				657
				658	/*
				659	* If the CPU remained in dynticks mode for the entire time
				660	* and didn't take any interrupts, NMIs, SMIs, or whatever,
				661	* then it cannot be in the middle of an rcu_read_lock(), so
				662	* the next rcu_read_lock() it executes must use the new value
				663	* of the counter. Therefore, this CPU has been in a quiescent
				664	* state the entire time, and we don't need to wait for it.
				665	*/
				666
				667	if ((curr == snap) && ((curr & 0x1) == 0))
				668	return 0;
				669
				670	/*
				671	* If the CPU passed through or entered a dynticks idle phase with
				672	* no active irq handlers, then, as above, this CPU has already
				673	* passed through a quiescent state.
				674	*/
				675
				676	if ((curr - snap) > 2 \|\| (snap & 0x1) == 0)
				677	return 0;
				678
				679	/* We need this CPU to go through a quiescent state. */
				680
				681	return 1;
				682	}
				683
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	684	#else /* !CONFIG_NO_HZ */
				685
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	686	# define dyntick_save_progress_counter(cpu) do { } while (0)
				687	# define rcu_try_flip_waitack_needed(cpu) (1)
				688	# define rcu_try_flip_waitmb_needed(cpu) (1)
				689
				690	# define dyntick_save_progress_counter_sched(cpu) do { } while (0)
				691	# define rcu_qsctr_inc_needed_dyntick(cpu) (1)
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	692
				693	#endif /* CONFIG_NO_HZ */
				694
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	695	static void save_qsctr_sched(int cpu)
				696	{
				697	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
				698
				699	rdssp->sched_qs_snap = rdssp->sched_qs;
				700	}
				701
				702	static inline int rcu_qsctr_inc_needed(int cpu)
				703	{
				704	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
				705
				706	/*
				707	* If there has been a quiescent state, no more need to wait
				708	* on this CPU.
				709	*/
				710
				711	if (rdssp->sched_qs != rdssp->sched_qs_snap) {
				712	smp_mb(); /* force ordering with cpu entering schedule(). */
				713	return 0;
				714	}
				715
				716	/* We need this CPU to go through a quiescent state. */
				717
				718	return 1;
				719	}
				720
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	721	/*
				722	* Get here when RCU is idle. Decide whether we need to
				723	* move out of idle state, and return non-zero if so.
				724	* "Straightforward" approach for the moment, might later
				725	* use callback-list lengths, grace-period duration, or
				726	* some such to determine when to exit idle state.
				727	* Might also need a pre-idle test that does not acquire
				728	* the lock, but let's get the simple case working first...
				729	*/
				730
				731	static int
				732	rcu_try_flip_idle(void)
				733	{
				734	int cpu;
				735
				736	RCU_TRACE_ME(rcupreempt_trace_try_flip_i1);
				737	if (!rcu_pending(smp_processor_id())) {
				738	RCU_TRACE_ME(rcupreempt_trace_try_flip_ie1);
				739	return 0;
				740	}
				741
				742	/*
				743	* Do the flip.
				744	*/
				745
				746	RCU_TRACE_ME(rcupreempt_trace_try_flip_g1);
				747	rcu_ctrlblk.completed++; /* stands in for rcu_try_flip_g2 */
				748
				749	/*
				750	* Need a memory barrier so that other CPUs see the new
				751	* counter value before they see the subsequent change of all
				752	* the rcu_flip_flag instances to rcu_flipped.
				753	*/
				754
				755	smp_mb(); /* see above block comment. */
				756
				757	/* Now ask each CPU for acknowledgement of the flip. */
				758
Mike Travis	363ab6f	2008-05-12 21:21:13 +0200	[diff] [blame]	759	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) {
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	760	per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	761	dyntick_save_progress_counter(cpu);
				762	}
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	763
				764	return 1;
				765	}
				766
				767	/*
				768	* Wait for CPUs to acknowledge the flip.
				769	*/
				770
				771	static int
				772	rcu_try_flip_waitack(void)
				773	{
				774	int cpu;
				775
				776	RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
Mike Travis	363ab6f	2008-05-12 21:21:13 +0200	[diff] [blame]	777	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	778	if (rcu_try_flip_waitack_needed(cpu) &&
				779	per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	780	RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
				781	return 0;
				782	}
				783
				784	/*
				785	* Make sure our checks above don't bleed into subsequent
				786	* waiting for the sum of the counters to reach zero.
				787	*/
				788
				789	smp_mb(); /* see above block comment. */
				790	RCU_TRACE_ME(rcupreempt_trace_try_flip_a2);
				791	return 1;
				792	}
				793
				794	/*
				795	* Wait for collective ``last'' counter to reach zero,
				796	* then tell all CPUs to do an end-of-grace-period memory barrier.
				797	*/
				798
				799	static int
				800	rcu_try_flip_waitzero(void)
				801	{
				802	int cpu;
				803	int lastidx = !(rcu_ctrlblk.completed & 0x1);
				804	int sum = 0;
				805
				806	/* Check to see if the sum of the "last" counters is zero. */
				807
				808	RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
Mike Travis	363ab6f	2008-05-12 21:21:13 +0200	[diff] [blame]	809	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	810	sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
				811	if (sum != 0) {
				812	RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
				813	return 0;
				814	}
				815
				816	/*
				817	* This ensures that the other CPUs see the call for
				818	* memory barriers -after- the sum to zero has been
				819	* detected here
				820	*/
				821	smp_mb(); /* ^^^^^^^^^^^^ */
				822
				823	/* Call for a memory barrier from each CPU. */
Mike Travis	363ab6f	2008-05-12 21:21:13 +0200	[diff] [blame]	824	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) {
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	825	per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	826	dyntick_save_progress_counter(cpu);
				827	}
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	828
				829	RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
				830	return 1;
				831	}
				832
				833	/*
				834	* Wait for all CPUs to do their end-of-grace-period memory barrier.
				835	* Return 0 once all CPUs have done so.
				836	*/
				837
				838	static int
				839	rcu_try_flip_waitmb(void)
				840	{
				841	int cpu;
				842
				843	RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
Mike Travis	363ab6f	2008-05-12 21:21:13 +0200	[diff] [blame]	844	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
Steven Rostedt	2232c2d	2008-02-29 18:46:50 +0100	[diff] [blame]	845	if (rcu_try_flip_waitmb_needed(cpu) &&
				846	per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	847	RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
				848	return 0;
				849	}
				850
				851	smp_mb(); /* Ensure that the above checks precede any following flip. */
				852	RCU_TRACE_ME(rcupreempt_trace_try_flip_m2);
				853	return 1;
				854	}
				855
				856	/*
				857	* Attempt a single flip of the counters. Remember, a single flip does
				858	* -not- constitute a grace period. Instead, the interval between
				859	* at least GP_STAGES consecutive flips is a grace period.
				860	*
				861	* If anyone is nuts enough to run this CONFIG_PREEMPT_RCU implementation
				862	* on a large SMP, they might want to use a hierarchical organization of
				863	* the per-CPU-counter pairs.
				864	*/
				865	static void rcu_try_flip(void)
				866	{
				867	unsigned long flags;
				868
				869	RCU_TRACE_ME(rcupreempt_trace_try_flip_1);
				870	if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, flags))) {
				871	RCU_TRACE_ME(rcupreempt_trace_try_flip_e1);
				872	return;
				873	}
				874
				875	/*
				876	* Take the next transition(s) through the RCU grace-period
				877	* flip-counter state machine.
				878	*/
				879
				880	switch (rcu_ctrlblk.rcu_try_flip_state) {
				881	case rcu_try_flip_idle_state:
				882	if (rcu_try_flip_idle())
				883	rcu_ctrlblk.rcu_try_flip_state =
				884	rcu_try_flip_waitack_state;
				885	break;
				886	case rcu_try_flip_waitack_state:
				887	if (rcu_try_flip_waitack())
				888	rcu_ctrlblk.rcu_try_flip_state =
				889	rcu_try_flip_waitzero_state;
				890	break;
				891	case rcu_try_flip_waitzero_state:
				892	if (rcu_try_flip_waitzero())
				893	rcu_ctrlblk.rcu_try_flip_state =
				894	rcu_try_flip_waitmb_state;
				895	break;
				896	case rcu_try_flip_waitmb_state:
				897	if (rcu_try_flip_waitmb())
				898	rcu_ctrlblk.rcu_try_flip_state =
				899	rcu_try_flip_idle_state;
				900	}
				901	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
				902	}
				903
				904	/*
				905	* Check to see if this CPU needs to do a memory barrier in order to
				906	* ensure that any prior RCU read-side critical sections have committed
				907	* their counter manipulations and critical-section memory references
				908	* before declaring the grace period to be completed.
				909	*/
				910	static void rcu_check_mb(int cpu)
				911	{
				912	if (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed) {
				913	smp_mb(); /* Ensure RCU read-side accesses are visible. */
				914	per_cpu(rcu_mb_flag, cpu) = rcu_mb_done;
				915	}
				916	}
				917
				918	void rcu_check_callbacks(int cpu, int user)
				919	{
				920	unsigned long flags;
				921	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
				922
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	923	/*
				924	* If this CPU took its interrupt from user mode or from the
				925	* idle loop, and this is not a nested interrupt, then
				926	* this CPU has to have exited all prior preept-disable
				927	* sections of code. So increment the counter to note this.
				928	*
				929	* The memory barrier is needed to handle the case where
				930	* writes from a preempt-disable section of code get reordered
				931	* into schedule() by this CPU's write buffer. So the memory
				932	* barrier makes sure that the rcu_qsctr_inc() is seen by other
				933	* CPUs to happen after any such write.
				934	*/
				935
				936	if (user \|\|
				937	(idle_cpu(cpu) && !in_softirq() &&
				938	hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
				939	smp_mb(); /* Guard against aggressive schedule(). */
				940	rcu_qsctr_inc(cpu);
				941	}
				942
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	943	rcu_check_mb(cpu);
				944	if (rcu_ctrlblk.completed == rdp->completed)
				945	rcu_try_flip();
				946	spin_lock_irqsave(&rdp->lock, flags);
				947	RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
				948	__rcu_advance_callbacks(rdp);
				949	if (rdp->donelist == NULL) {
				950	spin_unlock_irqrestore(&rdp->lock, flags);
				951	} else {
				952	spin_unlock_irqrestore(&rdp->lock, flags);
				953	raise_softirq(RCU_SOFTIRQ);
				954	}
				955	}
				956
				957	/*
				958	* Needed by dynticks, to make sure all RCU processing has finished
				959	* when we go idle:
				960	*/
				961	void rcu_advance_callbacks(int cpu, int user)
				962	{
				963	unsigned long flags;
				964	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
				965
				966	if (rcu_ctrlblk.completed == rdp->completed) {
				967	rcu_try_flip();
				968	if (rcu_ctrlblk.completed == rdp->completed)
				969	return;
				970	}
				971	spin_lock_irqsave(&rdp->lock, flags);
				972	RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
				973	__rcu_advance_callbacks(rdp);
				974	spin_unlock_irqrestore(&rdp->lock, flags);
				975	}
				976
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	977	#ifdef CONFIG_HOTPLUG_CPU
				978	#define rcu_offline_cpu_enqueue(srclist, srctail, dstlist, dsttail) do { \
				979	*dsttail = srclist; \
				980	if (srclist != NULL) { \
				981	dsttail = srctail; \
				982	srclist = NULL; \
				983	srctail = &srclist;\
				984	} \
				985	} while (0)
				986
				987	void rcu_offline_cpu(int cpu)
				988	{
				989	int i;
				990	struct rcu_head *list = NULL;
				991	unsigned long flags;
				992	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	993	struct rcu_head *schedlist = NULL;
				994	struct rcu_head **schedtail = &schedlist;
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	995	struct rcu_head **tail = &list;
				996
				997	/*
				998	* Remove all callbacks from the newly dead CPU, retaining order.
				999	* Otherwise rcu_barrier() will fail
				1000	*/
				1001
				1002	spin_lock_irqsave(&rdp->lock, flags);
				1003	rcu_offline_cpu_enqueue(rdp->donelist, rdp->donetail, list, tail);
				1004	for (i = GP_STAGES - 1; i >= 0; i--)
				1005	rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
				1006	list, tail);
				1007	rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1008	rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail,
				1009	schedlist, schedtail);
				1010	rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail,
				1011	schedlist, schedtail);
				1012	rdp->rcu_sched_sleeping = 0;
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	1013	spin_unlock_irqrestore(&rdp->lock, flags);
				1014	rdp->waitlistcount = 0;
				1015
				1016	/* Disengage the newly dead CPU from the grace-period computation. */
				1017
				1018	spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
				1019	rcu_check_mb(cpu);
				1020	if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
				1021	smp_mb(); /* Subsequent counter accesses must see new value */
				1022	per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
				1023	smp_mb(); /* Subsequent RCU read-side critical sections */
				1024	/* seen -after- acknowledgement. */
				1025	}
				1026
				1027	RCU_DATA_ME()->rcu_flipctr[0] += RCU_DATA_CPU(cpu)->rcu_flipctr[0];
				1028	RCU_DATA_ME()->rcu_flipctr[1] += RCU_DATA_CPU(cpu)->rcu_flipctr[1];
				1029
				1030	RCU_DATA_CPU(cpu)->rcu_flipctr[0] = 0;
				1031	RCU_DATA_CPU(cpu)->rcu_flipctr[1] = 0;
				1032
				1033	cpu_clear(cpu, rcu_cpu_online_map);
				1034
				1035	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
				1036
				1037	/*
				1038	* Place the removed callbacks on the current CPU's queue.
				1039	* Make them all start a new grace period: simple approach,
				1040	* in theory could starve a given set of callbacks, but
				1041	* you would need to be doing some serious CPU hotplugging
				1042	* to make this happen. If this becomes a problem, adding
				1043	* a synchronize_rcu() to the hotplug path would be a simple
				1044	* fix.
				1045	*/
				1046
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1047	local_irq_save(flags); /* disable preempt till we know what lock. */
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	1048	rdp = RCU_DATA_ME();
Paul E. McKenney	ae77886	2008-02-27 16:21:10 -0800	[diff] [blame]	1049	spin_lock(&rdp->lock);
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	1050	*rdp->nexttail = list;
				1051	if (list)
				1052	rdp->nexttail = tail;
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1053	*rdp->nextschedtail = schedlist;
				1054	if (schedlist)
				1055	rdp->nextschedtail = schedtail;
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	1056	spin_unlock_irqrestore(&rdp->lock, flags);
				1057	}
				1058
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	1059	#else /* #ifdef CONFIG_HOTPLUG_CPU */
				1060
				1061	void rcu_offline_cpu(int cpu)
				1062	{
				1063	}
				1064
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	1065	#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
				1066
Nick Piggin	70ff055	2008-07-10 17:25:35 +1000	[diff] [blame]	1067	void __cpuinit rcu_online_cpu(int cpu)
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1068	{
				1069	unsigned long flags;
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1070	struct rcu_data *rdp;
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1071
				1072	spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
				1073	cpu_set(cpu, rcu_cpu_online_map);
				1074	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1075
				1076	/*
				1077	* The rcu_sched grace-period processing might have bypassed
				1078	* this CPU, given that it was not in the rcu_cpu_online_map
				1079	* when the grace-period scan started. This means that the
				1080	* grace-period task might sleep. So make sure that if this
				1081	* should happen, the first callback posted to this CPU will
				1082	* wake up the grace-period task if need be.
				1083	*/
				1084
				1085	rdp = RCU_DATA_CPU(cpu);
				1086	spin_lock_irqsave(&rdp->lock, flags);
				1087	rdp->rcu_sched_sleeping = 1;
				1088	spin_unlock_irqrestore(&rdp->lock, flags);
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1089	}
				1090
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1091	static void rcu_process_callbacks(struct softirq_action *unused)
				1092	{
				1093	unsigned long flags;
				1094	struct rcu_head next, list;
Paul E. McKenney	c9e7100	2008-02-28 11:51:07 -0800	[diff] [blame]	1095	struct rcu_data *rdp;
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1096
Paul E. McKenney	c9e7100	2008-02-28 11:51:07 -0800	[diff] [blame]	1097	local_irq_save(flags);
				1098	rdp = RCU_DATA_ME();
				1099	spin_lock(&rdp->lock);
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1100	list = rdp->donelist;
				1101	if (list == NULL) {
				1102	spin_unlock_irqrestore(&rdp->lock, flags);
				1103	return;
				1104	}
				1105	rdp->donelist = NULL;
				1106	rdp->donetail = &rdp->donelist;
				1107	RCU_TRACE_RDP(rcupreempt_trace_done_remove, rdp);
				1108	spin_unlock_irqrestore(&rdp->lock, flags);
				1109	while (list) {
				1110	next = list->next;
				1111	list->func(list);
				1112	list = next;
				1113	RCU_TRACE_ME(rcupreempt_trace_invoke);
				1114	}
				1115	}
				1116
				1117	void call_rcu(struct rcu_head head, void (func)(struct rcu_head *rcu))
				1118	{
				1119	unsigned long flags;
				1120	struct rcu_data *rdp;
				1121
				1122	head->func = func;
				1123	head->next = NULL;
				1124	local_irq_save(flags);
				1125	rdp = RCU_DATA_ME();
				1126	spin_lock(&rdp->lock);
				1127	__rcu_advance_callbacks(rdp);
				1128	*rdp->nexttail = head;
				1129	rdp->nexttail = &head->next;
				1130	RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1131	spin_unlock_irqrestore(&rdp->lock, flags);
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1132	}
				1133	EXPORT_SYMBOL_GPL(call_rcu);
				1134
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1135	void call_rcu_sched(struct rcu_head head, void (func)(struct rcu_head *rcu))
				1136	{
				1137	unsigned long flags;
				1138	struct rcu_data *rdp;
				1139	int wake_gp = 0;
				1140
				1141	head->func = func;
				1142	head->next = NULL;
				1143	local_irq_save(flags);
				1144	rdp = RCU_DATA_ME();
				1145	spin_lock(&rdp->lock);
				1146	*rdp->nextschedtail = head;
				1147	rdp->nextschedtail = &head->next;
				1148	if (rdp->rcu_sched_sleeping) {
				1149
				1150	/* Grace-period processing might be sleeping... */
				1151
				1152	rdp->rcu_sched_sleeping = 0;
				1153	wake_gp = 1;
				1154	}
				1155	spin_unlock_irqrestore(&rdp->lock, flags);
				1156	if (wake_gp) {
				1157
				1158	/* Wake up grace-period processing, unless someone beat us. */
				1159
				1160	spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
				1161	if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping)
				1162	wake_gp = 0;
				1163	rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping;
				1164	spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
				1165	if (wake_gp)
				1166	wake_up_interruptible(&rcu_ctrlblk.sched_wq);
				1167	}
				1168	}
				1169	EXPORT_SYMBOL_GPL(call_rcu_sched);
				1170
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1171	/*
				1172	* Wait until all currently running preempt_disable() code segments
				1173	* (including hardware-irq-disable segments) complete. Note that
				1174	* in -rt this does -not- necessarily result in all currently executing
				1175	* interrupt -handlers- having completed.
				1176	*/
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1177	synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched)
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1178	EXPORT_SYMBOL_GPL(__synchronize_sched);
				1179
				1180	/*
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1181	* kthread function that manages call_rcu_sched grace periods.
				1182	*/
				1183	static int rcu_sched_grace_period(void *arg)
				1184	{
				1185	int couldsleep; /* might sleep after current pass. */
				1186	int couldsleepnext = 0; /* might sleep after next pass. */
				1187	int cpu;
				1188	unsigned long flags;
				1189	struct rcu_data *rdp;
				1190	int ret;
				1191
				1192	/*
				1193	* Each pass through the following loop handles one
				1194	* rcu_sched grace period cycle.
				1195	*/
				1196	do {
				1197	/* Save each CPU's current state. */
				1198
				1199	for_each_online_cpu(cpu) {
				1200	dyntick_save_progress_counter_sched(cpu);
				1201	save_qsctr_sched(cpu);
				1202	}
				1203
				1204	/*
				1205	* Sleep for about an RCU grace-period's worth to
				1206	* allow better batching and to consume less CPU.
				1207	*/
				1208	schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME);
				1209
				1210	/*
				1211	* If there was nothing to do last time, prepare to
				1212	* sleep at the end of the current grace period cycle.
				1213	*/
				1214	couldsleep = couldsleepnext;
				1215	couldsleepnext = 1;
				1216	if (couldsleep) {
				1217	spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
				1218	rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep;
				1219	spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
				1220	}
				1221
				1222	/*
				1223	* Wait on each CPU in turn to have either visited
				1224	* a quiescent state or been in dynticks-idle mode.
				1225	*/
				1226	for_each_online_cpu(cpu) {
				1227	while (rcu_qsctr_inc_needed(cpu) &&
				1228	rcu_qsctr_inc_needed_dyntick(cpu)) {
				1229	/* resched_cpu(cpu); @@@ */
				1230	schedule_timeout_interruptible(1);
				1231	}
				1232	}
				1233
				1234	/* Advance callbacks for each CPU. */
				1235
				1236	for_each_online_cpu(cpu) {
				1237
				1238	rdp = RCU_DATA_CPU(cpu);
				1239	spin_lock_irqsave(&rdp->lock, flags);
				1240
				1241	/*
				1242	* We are running on this CPU irq-disabled, so no
				1243	* CPU can go offline until we re-enable irqs.
				1244	* The current CPU might have already gone
				1245	* offline (between the for_each_offline_cpu and
				1246	* the spin_lock_irqsave), but in that case all its
				1247	* callback lists will be empty, so no harm done.
				1248	*
				1249	* Advance the callbacks! We share normal RCU's
				1250	* donelist, since callbacks are invoked the
				1251	* same way in either case.
				1252	*/
				1253	if (rdp->waitschedlist != NULL) {
				1254	*rdp->donetail = rdp->waitschedlist;
				1255	rdp->donetail = rdp->waitschedtail;
				1256
				1257	/*
				1258	* Next rcu_check_callbacks() will
				1259	* do the required raise_softirq().
				1260	*/
				1261	}
				1262	if (rdp->nextschedlist != NULL) {
				1263	rdp->waitschedlist = rdp->nextschedlist;
				1264	rdp->waitschedtail = rdp->nextschedtail;
				1265	couldsleep = 0;
				1266	couldsleepnext = 0;
				1267	} else {
				1268	rdp->waitschedlist = NULL;
				1269	rdp->waitschedtail = &rdp->waitschedlist;
				1270	}
				1271	rdp->nextschedlist = NULL;
				1272	rdp->nextschedtail = &rdp->nextschedlist;
				1273
				1274	/* Mark sleep intention. */
				1275
				1276	rdp->rcu_sched_sleeping = couldsleep;
				1277
				1278	spin_unlock_irqrestore(&rdp->lock, flags);
				1279	}
				1280
				1281	/* If we saw callbacks on the last scan, go deal with them. */
				1282
				1283	if (!couldsleep)
				1284	continue;
				1285
				1286	/* Attempt to block... */
				1287
				1288	spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
				1289	if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) {
				1290
				1291	/*
				1292	* Someone posted a callback after we scanned.
				1293	* Go take care of it.
				1294	*/
				1295	spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
				1296	couldsleepnext = 0;
				1297	continue;
				1298	}
				1299
				1300	/* Block until the next person posts a callback. */
				1301
				1302	rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
				1303	spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
				1304	ret = 0;
				1305	__wait_event_interruptible(rcu_ctrlblk.sched_wq,
				1306	rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
				1307	ret);
				1308
				1309	/*
				1310	* Signals would prevent us from sleeping, and we cannot
				1311	* do much with them in any case. So flush them.
				1312	*/
				1313	if (ret)
				1314	flush_signals(current);
				1315	couldsleepnext = 0;
				1316
				1317	} while (!kthread_should_stop());
				1318
				1319	return (0);
				1320	}
				1321
				1322	/*
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1323	* Check to see if any future RCU-related work will need to be done
				1324	* by the current CPU, even if none need be done immediately, returning
				1325	* 1 if so. Assumes that notifiers would take care of handling any
				1326	* outstanding requests from the RCU core.
				1327	*
				1328	* This function is part of the RCU implementation; it is -not-
				1329	* an exported member of the RCU API.
				1330	*/
				1331	int rcu_needs_cpu(int cpu)
				1332	{
				1333	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
				1334
				1335	return (rdp->donelist != NULL \|\|
				1336	!!rdp->waitlistcount \|\|
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1337	rdp->nextlist != NULL \|\|
				1338	rdp->nextschedlist != NULL \|\|
				1339	rdp->waitschedlist != NULL);
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1340	}
				1341
				1342	int rcu_pending(int cpu)
				1343	{
				1344	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
				1345
				1346	/* The CPU has at least one callback queued somewhere. */
				1347
				1348	if (rdp->donelist != NULL \|\|
				1349	!!rdp->waitlistcount \|\|
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1350	rdp->nextlist != NULL \|\|
				1351	rdp->nextschedlist != NULL \|\|
				1352	rdp->waitschedlist != NULL)
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1353	return 1;
				1354
				1355	/* The RCU core needs an acknowledgement from this CPU. */
				1356
				1357	if ((per_cpu(rcu_flip_flag, cpu) == rcu_flipped) \|\|
				1358	(per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed))
				1359	return 1;
				1360
				1361	/* This CPU has fallen behind the global grace-period number. */
				1362
				1363	if (rdp->completed != rcu_ctrlblk.completed)
				1364	return 1;
				1365
				1366	/* Nothing needed from this CPU. */
				1367
				1368	return 0;
				1369	}
				1370
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	1371	static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
				1372	unsigned long action, void *hcpu)
				1373	{
				1374	long cpu = (long)hcpu;
				1375
				1376	switch (action) {
				1377	case CPU_UP_PREPARE:
				1378	case CPU_UP_PREPARE_FROZEN:
				1379	rcu_online_cpu(cpu);
				1380	break;
				1381	case CPU_UP_CANCELED:
				1382	case CPU_UP_CANCELED_FROZEN:
				1383	case CPU_DEAD:
				1384	case CPU_DEAD_FROZEN:
				1385	rcu_offline_cpu(cpu);
				1386	break;
				1387	default:
				1388	break;
				1389	}
				1390	return NOTIFY_OK;
				1391	}
				1392
				1393	static struct notifier_block __cpuinitdata rcu_nb = {
				1394	.notifier_call = rcu_cpu_notify,
				1395	};
				1396
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1397	void __init __rcu_init(void)
				1398	{
				1399	int cpu;
				1400	int i;
				1401	struct rcu_data *rdp;
				1402
				1403	printk(KERN_NOTICE "Preemptible RCU implementation.\n");
				1404	for_each_possible_cpu(cpu) {
				1405	rdp = RCU_DATA_CPU(cpu);
				1406	spin_lock_init(&rdp->lock);
				1407	rdp->completed = 0;
				1408	rdp->waitlistcount = 0;
				1409	rdp->nextlist = NULL;
				1410	rdp->nexttail = &rdp->nextlist;
				1411	for (i = 0; i < GP_STAGES; i++) {
				1412	rdp->waitlist[i] = NULL;
				1413	rdp->waittail[i] = &rdp->waitlist[i];
				1414	}
				1415	rdp->donelist = NULL;
				1416	rdp->donetail = &rdp->donelist;
				1417	rdp->rcu_flipctr[0] = 0;
				1418	rdp->rcu_flipctr[1] = 0;
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1419	rdp->nextschedlist = NULL;
				1420	rdp->nextschedtail = &rdp->nextschedlist;
				1421	rdp->waitschedlist = NULL;
				1422	rdp->waitschedtail = &rdp->waitschedlist;
				1423	rdp->rcu_sched_sleeping = 0;
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1424	}
Paul E. McKenney	eaf649e	2008-01-25 21:08:25 +0100	[diff] [blame]	1425	register_cpu_notifier(&rcu_nb);
				1426
				1427	/*
				1428	* We don't need protection against CPU-Hotplug here
				1429	* since
				1430	* a) If a CPU comes online while we are iterating over the
				1431	* cpu_online_map below, we would only end up making a
				1432	* duplicate call to rcu_online_cpu() which sets the corresponding
				1433	* CPU's mask in the rcu_cpu_online_map.
				1434	*
				1435	* b) A CPU cannot go offline at this point in time since the user
				1436	* does not have access to the sysfs interface, nor do we
				1437	* suspend the system.
				1438	*/
				1439	for_each_online_cpu(cpu)
				1440	rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long) cpu);
				1441
Carlos R. Mafra	962cf36	2008-05-15 11:15:37 -0300	[diff] [blame]	1442	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1443	}
				1444
				1445	/*
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1446	* Late-boot-time RCU initialization that must wait until after scheduler
				1447	* has been initialized.
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1448	*/
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1449	void __init rcu_init_sched(void)
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1450	{
Paul E. McKenney	4446a36	2008-05-12 21:21:05 +0200	[diff] [blame]	1451	rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period,
				1452	NULL,
				1453	"rcu_sched_grace_period");
				1454	WARN_ON(IS_ERR(rcu_sched_grace_period_task));
Paul E. McKenney	e260be6	2008-01-25 21:08:24 +0100	[diff] [blame]	1455	}
				1456
				1457	#ifdef CONFIG_RCU_TRACE
				1458	long *rcupreempt_flipctr(int cpu)
				1459	{
				1460	return &RCU_DATA_CPU(cpu)->rcu_flipctr[0];
				1461	}
				1462	EXPORT_SYMBOL_GPL(rcupreempt_flipctr);
				1463
				1464	int rcupreempt_flip_flag(int cpu)
				1465	{
				1466	return per_cpu(rcu_flip_flag, cpu);
				1467	}
				1468	EXPORT_SYMBOL_GPL(rcupreempt_flip_flag);
				1469
				1470	int rcupreempt_mb_flag(int cpu)
				1471	{
				1472	return per_cpu(rcu_mb_flag, cpu);
				1473	}
				1474	EXPORT_SYMBOL_GPL(rcupreempt_mb_flag);
				1475
				1476	char *rcupreempt_try_flip_state_name(void)
				1477	{
				1478	return rcu_try_flip_state_names[rcu_ctrlblk.rcu_try_flip_state];
				1479	}
				1480	EXPORT_SYMBOL_GPL(rcupreempt_try_flip_state_name);
				1481
				1482	struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
				1483	{
				1484	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
				1485
				1486	return &rdp->trace;
				1487	}
				1488	EXPORT_SYMBOL_GPL(rcupreempt_trace_cpu);
				1489
				1490	#endif /* #ifdef RCU_TRACE */