Blame - kernel/rcu/tree_exp.h - kernel/msm-4.9

blob: 78eba4120d4658137d62d3ede31bd52265270218 [file] [log] [blame]

Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	1	/*
				2	* RCU expedited grace periods
				3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License as published by
				6	* the Free Software Foundation; either version 2 of the License, or
				7	* (at your option) any later version.
				8	*
				9	* This program is distributed in the hope that it will be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, you can access it online at
				16	* http://www.gnu.org/licenses/gpl-2.0.html.
				17	*
				18	* Copyright IBM Corporation, 2016
				19	*
				20	* Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
				21	*/
				22
				23	/* Wrapper functions for expedited grace periods. */
				24	static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
				25	{
				26	rcu_seq_start(&rsp->expedited_sequence);
				27	}
				28	static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
				29	{
				30	rcu_seq_end(&rsp->expedited_sequence);
				31	smp_mb(); /* Ensure that consecutive grace periods serialize. */
				32	}
				33	static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
				34	{
				35	unsigned long s;
				36
				37	smp_mb(); /* Caller's modifications seen first by other CPUs. */
				38	s = rcu_seq_snap(&rsp->expedited_sequence);
				39	trace_rcu_exp_grace_period(rsp->name, s, TPS("snap"));
				40	return s;
				41	}
				42	static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
				43	{
				44	return rcu_seq_done(&rsp->expedited_sequence, s);
				45	}
				46
				47	/*
				48	* Reset the ->expmaskinit values in the rcu_node tree to reflect any
				49	* recent CPU-online activity. Note that these masks are not cleared
				50	* when CPUs go offline, so they reflect the union of all CPUs that have
				51	* ever been online. This means that this function normally takes its
				52	* no-work-to-do fastpath.
				53	*/
				54	static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
				55	{
				56	bool done;
				57	unsigned long flags;
				58	unsigned long mask;
				59	unsigned long oldmask;
				60	int ncpus = READ_ONCE(rsp->ncpus);
				61	struct rcu_node *rnp;
				62	struct rcu_node *rnp_up;
				63
				64	/* If no new CPUs onlined since last time, nothing to do. */
				65	if (likely(ncpus == rsp->ncpus_snap))
				66	return;
				67	rsp->ncpus_snap = ncpus;
				68
				69	/*
				70	* Each pass through the following loop propagates newly onlined
				71	* CPUs for the current rcu_node structure up the rcu_node tree.
				72	*/
				73	rcu_for_each_leaf_node(rsp, rnp) {
				74	raw_spin_lock_irqsave_rcu_node(rnp, flags);
				75	if (rnp->expmaskinit == rnp->expmaskinitnext) {
				76	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				77	continue; /* No new CPUs, nothing to do. */
				78	}
				79
				80	/* Update this node's mask, track old value for propagation. */
				81	oldmask = rnp->expmaskinit;
				82	rnp->expmaskinit = rnp->expmaskinitnext;
				83	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				84
				85	/* If was already nonzero, nothing to propagate. */
				86	if (oldmask)
				87	continue;
				88
				89	/* Propagate the new CPU up the tree. */
				90	mask = rnp->grpmask;
				91	rnp_up = rnp->parent;
				92	done = false;
				93	while (rnp_up) {
				94	raw_spin_lock_irqsave_rcu_node(rnp_up, flags);
				95	if (rnp_up->expmaskinit)
				96	done = true;
				97	rnp_up->expmaskinit \|= mask;
				98	raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags);
				99	if (done)
				100	break;
				101	mask = rnp_up->grpmask;
				102	rnp_up = rnp_up->parent;
				103	}
				104	}
				105	}
				106
				107	/*
				108	* Reset the ->expmask values in the rcu_node tree in preparation for
				109	* a new expedited grace period.
				110	*/
				111	static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
				112	{
				113	unsigned long flags;
				114	struct rcu_node *rnp;
				115
				116	sync_exp_reset_tree_hotplug(rsp);
				117	rcu_for_each_node_breadth_first(rsp, rnp) {
				118	raw_spin_lock_irqsave_rcu_node(rnp, flags);
				119	WARN_ON_ONCE(rnp->expmask);
				120	rnp->expmask = rnp->expmaskinit;
				121	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				122	}
				123	}
				124
				125	/*
				126	* Return non-zero if there is no RCU expedited grace period in progress
				127	* for the specified rcu_node structure, in other words, if all CPUs and
				128	* tasks covered by the specified rcu_node structure have done their bit
				129	* for the current expedited grace period. Works only for preemptible
				130	* RCU -- other RCU implementation use other means.
				131	*
				132	* Caller must hold the rcu_state's exp_mutex.
				133	*/
				134	static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
				135	{
				136	return rnp->exp_tasks == NULL &&
				137	READ_ONCE(rnp->expmask) == 0;
				138	}
				139
				140	/*
				141	* Report the exit from RCU read-side critical section for the last task
				142	* that queued itself during or before the current expedited preemptible-RCU
				143	* grace period. This event is reported either to the rcu_node structure on
				144	* which the task was queued or to one of that rcu_node structure's ancestors,
				145	* recursively up the tree. (Calm down, calm down, we do the recursion
				146	* iteratively!)
				147	*
				148	* Caller must hold the rcu_state's exp_mutex and the specified rcu_node
				149	* structure's ->lock.
				150	*/
				151	static void __rcu_report_exp_rnp(struct rcu_state rsp, struct rcu_node rnp,
				152	bool wake, unsigned long flags)
				153	__releases(rnp->lock)
				154	{
				155	unsigned long mask;
				156
				157	for (;;) {
				158	if (!sync_rcu_preempt_exp_done(rnp)) {
				159	if (!rnp->expmask)
				160	rcu_initiate_boost(rnp, flags);
				161	else
				162	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				163	break;
				164	}
				165	if (rnp->parent == NULL) {
				166	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				167	if (wake) {
				168	smp_mb(); /* EGP done before wake_up(). */
				169	swake_up(&rsp->expedited_wq);
				170	}
				171	break;
				172	}
				173	mask = rnp->grpmask;
				174	raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */
				175	rnp = rnp->parent;
				176	raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
				177	WARN_ON_ONCE(!(rnp->expmask & mask));
				178	rnp->expmask &= ~mask;
				179	}
				180	}
				181
				182	/*
				183	* Report expedited quiescent state for specified node. This is a
				184	* lock-acquisition wrapper function for __rcu_report_exp_rnp().
				185	*
				186	* Caller must hold the rcu_state's exp_mutex.
				187	*/
				188	static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
				189	struct rcu_node *rnp, bool wake)
				190	{
				191	unsigned long flags;
				192
				193	raw_spin_lock_irqsave_rcu_node(rnp, flags);
				194	__rcu_report_exp_rnp(rsp, rnp, wake, flags);
				195	}
				196
				197	/*
				198	* Report expedited quiescent state for multiple CPUs, all covered by the
				199	* specified leaf rcu_node structure. Caller must hold the rcu_state's
				200	* exp_mutex.
				201	*/
				202	static void rcu_report_exp_cpu_mult(struct rcu_state rsp, struct rcu_node rnp,
				203	unsigned long mask, bool wake)
				204	{
				205	unsigned long flags;
				206
				207	raw_spin_lock_irqsave_rcu_node(rnp, flags);
				208	if (!(rnp->expmask & mask)) {
				209	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				210	return;
				211	}
				212	rnp->expmask &= ~mask;
				213	__rcu_report_exp_rnp(rsp, rnp, wake, flags); /* Releases rnp->lock. */
				214	}
				215
				216	/*
				217	* Report expedited quiescent state for specified rcu_data (CPU).
				218	*/
				219	static void rcu_report_exp_rdp(struct rcu_state rsp, struct rcu_data rdp,
				220	bool wake)
				221	{
				222	rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake);
				223	}
				224
				225	/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
				226	static bool sync_exp_work_done(struct rcu_state rsp, atomic_long_t stat,
				227	unsigned long s)
				228	{
				229	if (rcu_exp_gp_seq_done(rsp, s)) {
				230	trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
				231	/* Ensure test happens before caller kfree(). */
				232	smp_mb__before_atomic(); /* ^^^ */
				233	atomic_long_inc(stat);
				234	return true;
				235	}
				236	return false;
				237	}
				238
				239	/*
				240	* Funnel-lock acquisition for expedited grace periods. Returns true
				241	* if some other task completed an expedited grace period that this task
				242	* can piggy-back on, and with no mutex held. Otherwise, returns false
				243	* with the mutex held, indicating that the caller must actually do the
				244	* expedited grace period.
				245	*/
				246	static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
				247	{
				248	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
				249	struct rcu_node *rnp = rdp->mynode;
				250	struct rcu_node *rnp_root = rcu_get_root(rsp);
				251
				252	/* Low-contention fastpath. */
				253	if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
				254	(rnp == rnp_root \|\|
				255	ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	256	mutex_trylock(&rsp->exp_mutex))
				257	goto fastpath;
				258
				259	/*
				260	* Each pass through the following loop works its way up
				261	* the rcu_node tree, returning if others have done the work or
				262	* otherwise falls through to acquire rsp->exp_mutex. The mapping
				263	* from CPU to rcu_node structure can be inexact, as it is just
				264	* promoting locality and is not strictly needed for correctness.
				265	*/
				266	for (; rnp != NULL; rnp = rnp->parent) {
				267	if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s))
				268	return true;
				269
				270	/* Work not done, either wait here or go up. */
				271	spin_lock(&rnp->exp_lock);
				272	if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) {
				273
				274	/* Someone else doing GP, so wait for them. */
				275	spin_unlock(&rnp->exp_lock);
				276	trace_rcu_exp_funnel_lock(rsp->name, rnp->level,
				277	rnp->grplo, rnp->grphi,
				278	TPS("wait"));
				279	wait_event(rnp->exp_wq[(s >> 1) & 0x3],
				280	sync_exp_work_done(rsp,
				281	&rdp->exp_workdone2, s));
				282	return true;
				283	}
				284	rnp->exp_seq_rq = s; /* Followers can wait on us. */
				285	spin_unlock(&rnp->exp_lock);
				286	trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo,
				287	rnp->grphi, TPS("nxtlvl"));
				288	}
				289	mutex_lock(&rsp->exp_mutex);
				290	fastpath:
				291	if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) {
				292	mutex_unlock(&rsp->exp_mutex);
				293	return true;
				294	}
				295	rcu_exp_gp_seq_start(rsp);
				296	trace_rcu_exp_grace_period(rsp->name, s, TPS("start"));
				297	return false;
				298	}
				299
				300	/* Invoked on each online non-idle CPU for expedited quiescent state. */
				301	static void sync_sched_exp_handler(void *data)
				302	{
				303	struct rcu_data *rdp;
				304	struct rcu_node *rnp;
				305	struct rcu_state *rsp = data;
				306
				307	rdp = this_cpu_ptr(rsp->rda);
				308	rnp = rdp->mynode;
				309	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) \|\|
				310	__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
				311	return;
				312	if (rcu_is_cpu_rrupt_from_idle()) {
				313	rcu_report_exp_rdp(&rcu_sched_state,
				314	this_cpu_ptr(&rcu_sched_data), true);
				315	return;
				316	}
				317	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
				318	resched_cpu(smp_processor_id());
				319	}
				320
				321	/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
				322	static void sync_sched_exp_online_cleanup(int cpu)
				323	{
				324	struct rcu_data *rdp;
				325	int ret;
				326	struct rcu_node *rnp;
				327	struct rcu_state *rsp = &rcu_sched_state;
				328
				329	rdp = per_cpu_ptr(rsp->rda, cpu);
				330	rnp = rdp->mynode;
				331	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
				332	return;
				333	ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
				334	WARN_ON_ONCE(ret);
				335	}
				336
				337	/*
				338	* Select the nodes that the upcoming expedited grace period needs
				339	* to wait for.
				340	*/
				341	static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
				342	smp_call_func_t func)
				343	{
				344	int cpu;
				345	unsigned long flags;
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	346	unsigned long mask_ofl_test;
				347	unsigned long mask_ofl_ipi;
				348	int ret;
				349	struct rcu_node *rnp;
				350
				351	sync_exp_reset_tree(rsp);
				352	rcu_for_each_leaf_node(rsp, rnp) {
				353	raw_spin_lock_irqsave_rcu_node(rnp, flags);
				354
				355	/* Each pass checks a CPU for identity, offline, and idle. */
				356	mask_ofl_test = 0;
Mark Rutland	bc75e99	2016-06-03 15:20:04 +0100	[diff] [blame]	357	for_each_leaf_node_possible_cpu(rnp, cpu) {
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	358	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
				359	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
				360
				361	if (raw_smp_processor_id() == cpu \|\|
Paul E. McKenney	98834b8	2016-06-29 17:04:19 -0700	[diff] [blame]	362	!(atomic_add_return(0, &rdtp->dynticks) & 0x1) \|\|
				363	!(rnp->qsmaskinitnext & rdp->grpmask))
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	364	mask_ofl_test \|= rdp->grpmask;
				365	}
				366	mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
				367
				368	/*
				369	* Need to wait for any blocked tasks as well. Note that
				370	* additional blocking tasks will also block the expedited
				371	* GP until such time as the ->expmask bits are cleared.
				372	*/
				373	if (rcu_preempt_has_tasks(rnp))
				374	rnp->exp_tasks = rnp->blkd_tasks.next;
				375	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				376
				377	/* IPI the remaining CPUs for expedited quiescent state. */
Mark Rutland	bc75e99	2016-06-03 15:20:04 +0100	[diff] [blame]	378	for_each_leaf_node_possible_cpu(rnp, cpu) {
				379	unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	380	if (!(mask_ofl_ipi & mask))
				381	continue;
				382	retry_ipi:
				383	ret = smp_call_function_single(cpu, func, rsp, 0);
				384	if (!ret) {
				385	mask_ofl_ipi &= ~mask;
				386	continue;
				387	}
Paul E. McKenney	385c859	2016-06-30 12:16:11 -0700	[diff] [blame]	388	/* Failed, raced with CPU hotplug operation. */
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	389	raw_spin_lock_irqsave_rcu_node(rnp, flags);
Paul E. McKenney	385c859	2016-06-30 12:16:11 -0700	[diff] [blame]	390	if ((rnp->qsmaskinitnext & mask) &&
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	391	(rnp->expmask & mask)) {
Paul E. McKenney	385c859	2016-06-30 12:16:11 -0700	[diff] [blame]	392	/* Online, so delay for a bit and try again. */
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	393	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				394	schedule_timeout_uninterruptible(1);
Paul E. McKenney	385c859	2016-06-30 12:16:11 -0700	[diff] [blame]	395	goto retry_ipi;
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	396	}
Paul E. McKenney	385c859	2016-06-30 12:16:11 -0700	[diff] [blame]	397	/* CPU really is offline, so we can ignore it. */
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	398	if (!(rnp->expmask & mask))
				399	mask_ofl_ipi &= ~mask;
				400	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
				401	}
				402	/* Report quiescent states for those that went offline. */
				403	mask_ofl_test \|= mask_ofl_ipi;
				404	if (mask_ofl_test)
				405	rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
				406	}
				407	}
				408
				409	static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
				410	{
				411	int cpu;
				412	unsigned long jiffies_stall;
				413	unsigned long jiffies_start;
				414	unsigned long mask;
				415	int ndetected;
				416	struct rcu_node *rnp;
				417	struct rcu_node *rnp_root = rcu_get_root(rsp);
				418	int ret;
				419
				420	jiffies_stall = rcu_jiffies_till_stall_check();
				421	jiffies_start = jiffies;
				422
				423	for (;;) {
				424	ret = swait_event_timeout(
				425	rsp->expedited_wq,
				426	sync_rcu_preempt_exp_done(rnp_root),
				427	jiffies_stall);
				428	if (ret > 0 \|\| sync_rcu_preempt_exp_done(rnp_root))
				429	return;
Paul E. McKenney	908d2c1	2016-06-29 14:34:59 -0700	[diff] [blame]	430	WARN_ON(ret < 0); /* workqueues should not be signaled. */
Paul E. McKenney	24a6cff	2016-06-29 14:49:29 -0700	[diff] [blame]	431	if (rcu_cpu_stall_suppress)
				432	continue;
				433	panic_on_rcu_stall();
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	434	pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
				435	rsp->name);
				436	ndetected = 0;
				437	rcu_for_each_leaf_node(rsp, rnp) {
				438	ndetected += rcu_print_task_exp_stall(rnp);
Mark Rutland	bc75e99	2016-06-03 15:20:04 +0100	[diff] [blame]	439	for_each_leaf_node_possible_cpu(rnp, cpu) {
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	440	struct rcu_data *rdp;
				441
Mark Rutland	bc75e99	2016-06-03 15:20:04 +0100	[diff] [blame]	442	mask = leaf_node_cpu_bit(rnp, cpu);
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	443	if (!(rnp->expmask & mask))
				444	continue;
				445	ndetected++;
				446	rdp = per_cpu_ptr(rsp->rda, cpu);
				447	pr_cont(" %d-%c%c%c", cpu,
				448	"O."[!!cpu_online(cpu)],
				449	"o."[!!(rdp->grpmask & rnp->expmaskinit)],
				450	"N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
				451	}
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	452	}
				453	pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
				454	jiffies - jiffies_start, rsp->expedited_sequence,
				455	rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
				456	if (ndetected) {
				457	pr_err("blocking rcu_node structures:");
				458	rcu_for_each_node_breadth_first(rsp, rnp) {
				459	if (rnp == rnp_root)
				460	continue; /* printed unconditionally */
				461	if (sync_rcu_preempt_exp_done(rnp))
				462	continue;
				463	pr_cont(" l=%u:%d-%d:%#lx/%c",
				464	rnp->level, rnp->grplo, rnp->grphi,
				465	rnp->expmask,
				466	".T"[!!rnp->exp_tasks]);
				467	}
				468	pr_cont("\n");
				469	}
				470	rcu_for_each_leaf_node(rsp, rnp) {
Mark Rutland	bc75e99	2016-06-03 15:20:04 +0100	[diff] [blame]	471	for_each_leaf_node_possible_cpu(rnp, cpu) {
				472	mask = leaf_node_cpu_bit(rnp, cpu);
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	473	if (!(rnp->expmask & mask))
				474	continue;
				475	dump_cpu_task(cpu);
				476	}
				477	}
				478	jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
				479	}
				480	}
				481
				482	/*
				483	* Wait for the current expedited grace period to complete, and then
				484	* wake up everyone who piggybacked on the just-completed expedited
				485	* grace period. Also update all the ->exp_seq_rq counters as needed
				486	* in order to avoid counter-wrap problems.
				487	*/
				488	static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
				489	{
				490	struct rcu_node *rnp;
				491
				492	synchronize_sched_expedited_wait(rsp);
				493	rcu_exp_gp_seq_end(rsp);
				494	trace_rcu_exp_grace_period(rsp->name, s, TPS("end"));
				495
				496	/*
				497	* Switch over to wakeup mode, allowing the next GP, but -only- the
				498	* next GP, to proceed.
				499	*/
				500	mutex_lock(&rsp->exp_wake_mutex);
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	501
				502	rcu_for_each_node_breadth_first(rsp, rnp) {
				503	if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
				504	spin_lock(&rnp->exp_lock);
				505	/* Recheck, avoid hang in case someone just arrived. */
				506	if (ULONG_CMP_LT(rnp->exp_seq_rq, s))
				507	rnp->exp_seq_rq = s;
				508	spin_unlock(&rnp->exp_lock);
				509	}
				510	wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]);
				511	}
				512	trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake"));
				513	mutex_unlock(&rsp->exp_wake_mutex);
				514	}
				515
Paul E. McKenney	8b355e3	2016-06-29 13:46:25 -0700	[diff] [blame]	516	/* Let the workqueue handler know what it is supposed to do. */
				517	struct rcu_exp_work {
				518	smp_call_func_t rew_func;
				519	struct rcu_state *rew_rsp;
				520	unsigned long rew_s;
				521	struct work_struct rew_work;
				522	};
				523
				524	/*
Paul E. McKenney	90687fc	2017-01-10 02:28:26 -0800	[diff] [blame]	525	* Common code to drive an expedited grace period forward, used by
				526	* workqueues and mid-boot-time tasks.
				527	*/
				528	static void rcu_exp_sel_wait_wake(struct rcu_state *rsp,
				529	smp_call_func_t func, unsigned long s)
				530	{
				531	/* Initialize the rcu_node tree in preparation for the wait. */
				532	sync_rcu_exp_select_cpus(rsp, func);
				533
				534	/* Wait and clean up, including waking everyone. */
				535	rcu_exp_wait_wake(rsp, s);
				536	}
				537
				538	/*
Paul E. McKenney	8b355e3	2016-06-29 13:46:25 -0700	[diff] [blame]	539	* Work-queue handler to drive an expedited grace period forward.
				540	*/
				541	static void wait_rcu_exp_gp(struct work_struct *wp)
				542	{
				543	struct rcu_exp_work *rewp;
				544
Paul E. McKenney	8b355e3	2016-06-29 13:46:25 -0700	[diff] [blame]	545	rewp = container_of(wp, struct rcu_exp_work, rew_work);
Paul E. McKenney	90687fc	2017-01-10 02:28:26 -0800	[diff] [blame]	546	rcu_exp_sel_wait_wake(rewp->rew_rsp, rewp->rew_func, rewp->rew_s);
Paul E. McKenney	8b355e3	2016-06-29 13:46:25 -0700	[diff] [blame]	547	}
				548
Paul E. McKenney	f7b8eb8	2016-06-24 11:30:32 -0700	[diff] [blame]	549	/*
				550	* Given an rcu_state pointer and a smp_call_function() handler, kick
				551	* off the specified flavor of expedited grace period.
				552	*/
				553	static void _synchronize_rcu_expedited(struct rcu_state *rsp,
				554	smp_call_func_t func)
				555	{
Paul E. McKenney	8b355e3	2016-06-29 13:46:25 -0700	[diff] [blame]	556	struct rcu_data *rdp;
				557	struct rcu_exp_work rew;
				558	struct rcu_node *rnp;
Paul E. McKenney	f7b8eb8	2016-06-24 11:30:32 -0700	[diff] [blame]	559	unsigned long s;
				560
				561	/* If expedited grace periods are prohibited, fall back to normal. */
				562	if (rcu_gp_is_normal()) {
				563	wait_rcu_gp(rsp->call);
				564	return;
				565	}
				566
				567	/* Take a snapshot of the sequence number. */
				568	s = rcu_exp_gp_seq_snap(rsp);
				569	if (exp_funnel_lock(rsp, s))
				570	return; /* Someone else did our work for us. */
				571
Paul E. McKenney	90687fc	2017-01-10 02:28:26 -0800	[diff] [blame]	572	/* Ensure that load happens before action based on it. */
				573	if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) {
				574	/* Direct call during scheduler init and early_initcalls(). */
				575	rcu_exp_sel_wait_wake(rsp, func, s);
				576	} else {
				577	/* Marshall arguments & schedule the expedited grace period. */
				578	rew.rew_func = func;
				579	rew.rew_rsp = rsp;
				580	rew.rew_s = s;
				581	INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
				582	schedule_work(&rew.rew_work);
				583	}
Paul E. McKenney	f7b8eb8	2016-06-24 11:30:32 -0700	[diff] [blame]	584
Paul E. McKenney	8b355e3	2016-06-29 13:46:25 -0700	[diff] [blame]	585	/* Wait for expedited grace period to complete. */
				586	rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
				587	rnp = rcu_get_root(rsp);
				588	wait_event(rnp->exp_wq[(s >> 1) & 0x3],
				589	sync_exp_work_done(rsp,
				590	&rdp->exp_workdone0, s));
				591
				592	/* Let the next expedited grace period start. */
				593	mutex_unlock(&rsp->exp_mutex);
Paul E. McKenney	f7b8eb8	2016-06-24 11:30:32 -0700	[diff] [blame]	594	}
				595
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	596	/**
				597	* synchronize_sched_expedited - Brute-force RCU-sched grace period
				598	*
				599	* Wait for an RCU-sched grace period to elapse, but use a "big hammer"
				600	* approach to force the grace period to end quickly. This consumes
				601	* significant time on all CPUs and is unfriendly to real-time workloads,
				602	* so is thus not recommended for any sort of common-case code. In fact,
				603	* if you are using synchronize_sched_expedited() in a loop, please
				604	* restructure your code to batch your updates, and then use a single
				605	* synchronize_sched() instead.
				606	*
				607	* This implementation can be thought of as an application of sequence
				608	* locking to expedited grace periods, but using the sequence counter to
				609	* determine when someone else has already done the work instead of for
				610	* retrying readers.
				611	*/
				612	void synchronize_sched_expedited(void)
				613	{
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	614	struct rcu_state *rsp = &rcu_sched_state;
				615
				616	/* If only one CPU, this is automatically a grace period. */
				617	if (rcu_blocking_is_gp())
				618	return;
				619
Paul E. McKenney	f7b8eb8	2016-06-24 11:30:32 -0700	[diff] [blame]	620	_synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
Paul E. McKenney	3549c2b	2016-04-15 16:35:29 -0700	[diff] [blame]	621	}
				622	EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
Paul E. McKenney	40e0a6c	2016-04-15 16:44:07 -0700	[diff] [blame]	623
				624	#ifdef CONFIG_PREEMPT_RCU
				625
				626	/*
				627	* Remote handler for smp_call_function_single(). If there is an
				628	* RCU read-side critical section in effect, request that the
				629	* next rcu_read_unlock() record the quiescent state up the
				630	* ->expmask fields in the rcu_node tree. Otherwise, immediately
				631	* report the quiescent state.
				632	*/
				633	static void sync_rcu_exp_handler(void *info)
				634	{
				635	struct rcu_data *rdp;
				636	struct rcu_state *rsp = info;
				637	struct task_struct *t = current;
				638
				639	/*
				640	* Within an RCU read-side critical section, request that the next
				641	* rcu_read_unlock() report. Unless this RCU read-side critical
				642	* section has already blocked, in which case it is already set
				643	* up for the expedited grace period to wait on it.
				644	*/
				645	if (t->rcu_read_lock_nesting > 0 &&
				646	!t->rcu_read_unlock_special.b.blocked) {
				647	t->rcu_read_unlock_special.b.exp_need_qs = true;
				648	return;
				649	}
				650
				651	/*
				652	* We are either exiting an RCU read-side critical section (negative
				653	* values of t->rcu_read_lock_nesting) or are not in one at all
				654	* (zero value of t->rcu_read_lock_nesting). Or we are in an RCU
				655	* read-side critical section that blocked before this expedited
				656	* grace period started. Either way, we can immediately report
				657	* the quiescent state.
				658	*/
				659	rdp = this_cpu_ptr(rsp->rda);
				660	rcu_report_exp_rdp(rsp, rdp, true);
				661	}
				662
				663	/**
				664	* synchronize_rcu_expedited - Brute-force RCU grace period
				665	*
				666	* Wait for an RCU-preempt grace period, but expedite it. The basic
				667	* idea is to IPI all non-idle non-nohz online CPUs. The IPI handler
				668	* checks whether the CPU is in an RCU-preempt critical section, and
				669	* if so, it sets a flag that causes the outermost rcu_read_unlock()
				670	* to report the quiescent state. On the other hand, if the CPU is
				671	* not in an RCU read-side critical section, the IPI handler reports
				672	* the quiescent state immediately.
				673	*
				674	* Although this is a greate improvement over previous expedited
				675	* implementations, it is still unfriendly to real-time workloads, so is
				676	* thus not recommended for any sort of common-case code. In fact, if
				677	* you are using synchronize_rcu_expedited() in a loop, please restructure
				678	* your code to batch your updates, and then Use a single synchronize_rcu()
				679	* instead.
				680	*/
				681	void synchronize_rcu_expedited(void)
				682	{
				683	struct rcu_state *rsp = rcu_state_p;
Paul E. McKenney	40e0a6c	2016-04-15 16:44:07 -0700	[diff] [blame]	684
Paul E. McKenney	90687fc	2017-01-10 02:28:26 -0800	[diff] [blame]	685	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
				686	return;
Paul E. McKenney	f7b8eb8	2016-06-24 11:30:32 -0700	[diff] [blame]	687	_synchronize_rcu_expedited(rsp, sync_rcu_exp_handler);
Paul E. McKenney	40e0a6c	2016-04-15 16:44:07 -0700	[diff] [blame]	688	}
				689	EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
				690
				691	#else /* #ifdef CONFIG_PREEMPT_RCU */
				692
				693	/*
				694	* Wait for an rcu-preempt grace period, but make it happen quickly.
				695	* But because preemptible RCU does not exist, map to rcu-sched.
				696	*/
				697	void synchronize_rcu_expedited(void)
				698	{
				699	synchronize_sched_expedited();
				700	}
				701	EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
				702
				703	#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
Paul E. McKenney	90687fc	2017-01-10 02:28:26 -0800	[diff] [blame]	704
				705	/*
				706	* Switch to run-time mode once Tree RCU has fully initialized.
				707	*/
				708	static int __init rcu_exp_runtime_mode(void)
				709	{
				710	rcu_test_sync_prims();
				711	rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
				712	rcu_test_sync_prims();
				713	return 0;
				714	}
				715	core_initcall(rcu_exp_runtime_mode);