Blame - drivers/staging/lustre/lustre/ptlrpc/service.c - kernel/msm-4.9

blob: 003344ccfffcde1d88a43556fa404be11072be36 [file] [log] [blame]

Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1	/*
				2	* GPL HEADER START
				3	*
				4	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				5	*
				6	* This program is free software; you can redistribute it and/or modify
				7	* it under the terms of the GNU General Public License version 2 only,
				8	* as published by the Free Software Foundation.
				9	*
				10	* This program is distributed in the hope that it will be useful, but
				11	* WITHOUT ANY WARRANTY; without even the implied warranty of
				12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				13	* General Public License version 2 for more details (a copy is included
				14	* in the LICENSE file that accompanied this code).
				15	*
				16	* You should have received a copy of the GNU General Public License
				17	* version 2 along with this program; If not, see
				18	* http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
				19	*
				20	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				21	* CA 95054 USA or visit www.sun.com if you need additional information or
				22	* have any questions.
				23	*
				24	* GPL HEADER END
				25	*/
				26	/*
				27	* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
				28	* Use is subject to license terms.
				29	*
				30	* Copyright (c) 2010, 2012, Intel Corporation.
				31	*/
				32	/*
				33	* This file is part of Lustre, http://www.lustre.org/
				34	* Lustre is a trademark of Sun Microsystems, Inc.
				35	*/
				36
				37	#define DEBUG_SUBSYSTEM S_RPC
Greg Kroah-Hartman	e27db14	2014-07-11 22:29:36 -0700	[diff] [blame]	38	#include "../include/obd_support.h"
				39	#include "../include/obd_class.h"
				40	#include "../include/lustre_net.h"
				41	#include "../include/lu_object.h"
Greg Kroah-Hartman	9fdaf8c	2014-07-11 20:51:16 -0700	[diff] [blame]	42	#include "../../include/linux/lnet/types.h"
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	43	#include "ptlrpc_internal.h"
				44
				45	/* The following are visible and mutable through /sys/module/ptlrpc */
Daniel Machon	b963e72	2015-08-08 20:43:25 +0200	[diff] [blame^]	46	int test_req_buffer_pressure;
Peng Tao	8cc7b4b	2013-11-21 22:28:30 +0800	[diff] [blame]	47	module_param(test_req_buffer_pressure, int, 0444);
				48	MODULE_PARM_DESC(test_req_buffer_pressure, "set non-zero to put pressure on request buffer pools");
				49	module_param(at_min, int, 0644);
				50	MODULE_PARM_DESC(at_min, "Adaptive timeout minimum (sec)");
				51	module_param(at_max, int, 0644);
				52	MODULE_PARM_DESC(at_max, "Adaptive timeout maximum (sec)");
				53	module_param(at_history, int, 0644);
				54	MODULE_PARM_DESC(at_history,
				55	"Adaptive timeouts remember the slowest event that took place within this period (sec)");
				56	module_param(at_early_margin, int, 0644);
				57	MODULE_PARM_DESC(at_early_margin, "How soon before an RPC deadline to send an early reply");
				58	module_param(at_extra, int, 0644);
				59	MODULE_PARM_DESC(at_extra, "How much extra time to give with each early reply");
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	60
				61
				62	/* forward ref */
				63	static int ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt);
				64	static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req);
				65	static void ptlrpc_at_remove_timed(struct ptlrpc_request *req);
				66
				67	/** Holds a list of all PTLRPC services */
				68	LIST_HEAD(ptlrpc_all_services);
				69	/** Used to protect the \e ptlrpc_all_services list */
				70	struct mutex ptlrpc_all_services_mutex;
				71
Zoltán Lajos Kis	a96389d	2015-08-02 22:36:31 +0200	[diff] [blame]	72	static struct ptlrpc_request_buffer_desc *
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	73	ptlrpc_alloc_rqbd(struct ptlrpc_service_part *svcpt)
				74	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	75	struct ptlrpc_service *svc = svcpt->scp_service;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	76	struct ptlrpc_request_buffer_desc *rqbd;
				77
Julia Lawall	bae97e8	2015-05-03 15:21:44 +0200	[diff] [blame]	78	rqbd = kzalloc_node(sizeof(*rqbd), GFP_NOFS,
				79	cfs_cpt_spread_node(svc->srv_cptable,
				80	svcpt->scp_cpt));
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	81	if (rqbd == NULL)
				82	return NULL;
				83
				84	rqbd->rqbd_svcpt = svcpt;
				85	rqbd->rqbd_refcount = 0;
				86	rqbd->rqbd_cbid.cbid_fn = request_in_callback;
				87	rqbd->rqbd_cbid.cbid_arg = rqbd;
				88	INIT_LIST_HEAD(&rqbd->rqbd_reqs);
				89	OBD_CPT_ALLOC_LARGE(rqbd->rqbd_buffer, svc->srv_cptable,
				90	svcpt->scp_cpt, svc->srv_buf_size);
				91	if (rqbd->rqbd_buffer == NULL) {
Julia Lawall	9ae1059	2015-05-01 17:51:12 +0200	[diff] [blame]	92	kfree(rqbd);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	93	return NULL;
				94	}
				95
				96	spin_lock(&svcpt->scp_lock);
				97	list_add(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle);
				98	svcpt->scp_nrqbds_total++;
				99	spin_unlock(&svcpt->scp_lock);
				100
				101	return rqbd;
				102	}
				103
Zoltán Lajos Kis	a96389d	2015-08-02 22:36:31 +0200	[diff] [blame]	104	static void
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	105	ptlrpc_free_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
				106	{
				107	struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt;
				108
				109	LASSERT(rqbd->rqbd_refcount == 0);
				110	LASSERT(list_empty(&rqbd->rqbd_reqs));
				111
				112	spin_lock(&svcpt->scp_lock);
				113	list_del(&rqbd->rqbd_list);
				114	svcpt->scp_nrqbds_total--;
				115	spin_unlock(&svcpt->scp_lock);
				116
Julia Lawall	ee0ec19	2015-06-11 14:02:58 +0200	[diff] [blame]	117	kvfree(rqbd->rqbd_buffer);
Julia Lawall	9ae1059	2015-05-01 17:51:12 +0200	[diff] [blame]	118	kfree(rqbd);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	119	}
				120
Zoltán Lajos Kis	a96389d	2015-08-02 22:36:31 +0200	[diff] [blame]	121	static int
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	122	ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post)
				123	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	124	struct ptlrpc_service *svc = svcpt->scp_service;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	125	struct ptlrpc_request_buffer_desc *rqbd;
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	126	int rc = 0;
				127	int i;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	128
				129	if (svcpt->scp_rqbd_allocating)
				130	goto try_post;
				131
				132	spin_lock(&svcpt->scp_lock);
				133	/* check again with lock */
				134	if (svcpt->scp_rqbd_allocating) {
				135	/* NB: we might allow more than one thread in the future */
				136	LASSERT(svcpt->scp_rqbd_allocating == 1);
				137	spin_unlock(&svcpt->scp_lock);
				138	goto try_post;
				139	}
				140
				141	svcpt->scp_rqbd_allocating++;
				142	spin_unlock(&svcpt->scp_lock);
				143
				144
				145	for (i = 0; i < svc->srv_nbuf_per_group; i++) {
				146	/* NB: another thread might have recycled enough rqbds, we
				147	* need to make sure it wouldn't over-allocate, see LU-1212. */
				148	if (svcpt->scp_nrqbds_posted >= svc->srv_nbuf_per_group)
				149	break;
				150
				151	rqbd = ptlrpc_alloc_rqbd(svcpt);
				152
				153	if (rqbd == NULL) {
				154	CERROR("%s: Can't allocate request buffer\n",
				155	svc->srv_name);
				156	rc = -ENOMEM;
				157	break;
				158	}
				159	}
				160
				161	spin_lock(&svcpt->scp_lock);
				162
				163	LASSERT(svcpt->scp_rqbd_allocating == 1);
				164	svcpt->scp_rqbd_allocating--;
				165
				166	spin_unlock(&svcpt->scp_lock);
				167
				168	CDEBUG(D_RPCTRACE,
				169	"%s: allocate %d new %d-byte reqbufs (%d/%d left), rc = %d\n",
				170	svc->srv_name, i, svc->srv_buf_size, svcpt->scp_nrqbds_posted,
				171	svcpt->scp_nrqbds_total, rc);
				172
				173	try_post:
				174	if (post && rc == 0)
				175	rc = ptlrpc_server_post_idle_rqbds(svcpt);
				176
				177	return rc;
				178	}
				179
				180	/**
				181	* Part of Rep-Ack logic.
Masanari Iida	369e5c9	2014-02-08 00:30:36 +0900	[diff] [blame]	182	* Puts a lock and its mode into reply state associated to request reply.
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	183	*/
				184	void
				185	ptlrpc_save_lock(struct ptlrpc_request *req,
				186	struct lustre_handle *lock, int mode, int no_ack)
				187	{
				188	struct ptlrpc_reply_state *rs = req->rq_reply_state;
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	189	int idx;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	190
				191	LASSERT(rs != NULL);
				192	LASSERT(rs->rs_nlocks < RS_MAX_LOCKS);
				193
				194	if (req->rq_export->exp_disconnected) {
				195	ldlm_lock_decref(lock, mode);
				196	} else {
				197	idx = rs->rs_nlocks++;
				198	rs->rs_locks[idx] = *lock;
				199	rs->rs_modes[idx] = mode;
				200	rs->rs_difficult = 1;
				201	rs->rs_no_ack = !!no_ack;
				202	}
				203	}
				204	EXPORT_SYMBOL(ptlrpc_save_lock);
				205
				206
				207	struct ptlrpc_hr_partition;
				208
				209	struct ptlrpc_hr_thread {
				210	int hrt_id; /* thread ID */
				211	spinlock_t hrt_lock;
				212	wait_queue_head_t hrt_waitq;
				213	struct list_head hrt_queue; /* RS queue */
				214	struct ptlrpc_hr_partition *hrt_partition;
				215	};
				216
				217	struct ptlrpc_hr_partition {
				218	/* # of started threads */
				219	atomic_t hrp_nstarted;
				220	/* # of stopped threads */
				221	atomic_t hrp_nstopped;
				222	/* cpu partition id */
				223	int hrp_cpt;
				224	/* round-robin rotor for choosing thread */
				225	int hrp_rotor;
				226	/* total number of threads on this partition */
				227	int hrp_nthrs;
				228	/* threads table */
				229	struct ptlrpc_hr_thread *hrp_thrs;
				230	};
				231
				232	#define HRT_RUNNING 0
				233	#define HRT_STOPPING 1
				234
				235	struct ptlrpc_hr_service {
				236	/* CPU partition table, it's just cfs_cpt_table for now */
				237	struct cfs_cpt_table *hr_cpt_table;
				238	/** controller sleep waitq */
				239	wait_queue_head_t hr_waitq;
				240	unsigned int hr_stopping;
				241	/** roundrobin rotor for non-affinity service */
				242	unsigned int hr_rotor;
				243	/* partition data */
				244	struct ptlrpc_hr_partition **hr_partitions;
				245	};
				246
				247	struct rs_batch {
				248	struct list_head rsb_replies;
				249	unsigned int rsb_n_replies;
				250	struct ptlrpc_service_part *rsb_svcpt;
				251	};
				252
				253	/** reply handling service. */
				254	static struct ptlrpc_hr_service ptlrpc_hr;
				255
				256	/**
Masanari Iida	369e5c9	2014-02-08 00:30:36 +0900	[diff] [blame]	257	* maximum number of replies scheduled in one batch
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	258	*/
				259	#define MAX_SCHEDULED 256
				260
				261	/**
				262	* Initialize a reply batch.
				263	*
				264	* \param b batch
				265	*/
				266	static void rs_batch_init(struct rs_batch *b)
				267	{
Joe Perches	ec83e61	2013-10-13 20:22:03 -0700	[diff] [blame]	268	memset(b, 0, sizeof(*b));
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	269	INIT_LIST_HEAD(&b->rsb_replies);
				270	}
				271
				272	/**
				273	* Choose an hr thread to dispatch requests to.
				274	*/
				275	static struct ptlrpc_hr_thread *
				276	ptlrpc_hr_select(struct ptlrpc_service_part *svcpt)
				277	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	278	struct ptlrpc_hr_partition *hrp;
				279	unsigned int rotor;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	280
				281	if (svcpt->scp_cpt >= 0 &&
				282	svcpt->scp_service->srv_cptable == ptlrpc_hr.hr_cpt_table) {
				283	/* directly match partition */
				284	hrp = ptlrpc_hr.hr_partitions[svcpt->scp_cpt];
				285
				286	} else {
				287	rotor = ptlrpc_hr.hr_rotor++;
				288	rotor %= cfs_cpt_number(ptlrpc_hr.hr_cpt_table);
				289
				290	hrp = ptlrpc_hr.hr_partitions[rotor];
				291	}
				292
				293	rotor = hrp->hrp_rotor++;
				294	return &hrp->hrp_thrs[rotor % hrp->hrp_nthrs];
				295	}
				296
				297	/**
				298	* Dispatch all replies accumulated in the batch to one from
				299	* dedicated reply handling threads.
				300	*
				301	* \param b batch
				302	*/
				303	static void rs_batch_dispatch(struct rs_batch *b)
				304	{
				305	if (b->rsb_n_replies != 0) {
				306	struct ptlrpc_hr_thread *hrt;
				307
				308	hrt = ptlrpc_hr_select(b->rsb_svcpt);
				309
				310	spin_lock(&hrt->hrt_lock);
				311	list_splice_init(&b->rsb_replies, &hrt->hrt_queue);
				312	spin_unlock(&hrt->hrt_lock);
				313
				314	wake_up(&hrt->hrt_waitq);
				315	b->rsb_n_replies = 0;
				316	}
				317	}
				318
				319	/**
				320	* Add a reply to a batch.
				321	* Add one reply object to a batch, schedule batched replies if overload.
				322	*
				323	* \param b batch
				324	* \param rs reply
				325	*/
				326	static void rs_batch_add(struct rs_batch b, struct ptlrpc_reply_state rs)
				327	{
				328	struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
				329
				330	if (svcpt != b->rsb_svcpt \|\| b->rsb_n_replies >= MAX_SCHEDULED) {
				331	if (b->rsb_svcpt != NULL) {
				332	rs_batch_dispatch(b);
				333	spin_unlock(&b->rsb_svcpt->scp_rep_lock);
				334	}
				335	spin_lock(&svcpt->scp_rep_lock);
				336	b->rsb_svcpt = svcpt;
				337	}
				338	spin_lock(&rs->rs_lock);
				339	rs->rs_scheduled_ever = 1;
				340	if (rs->rs_scheduled == 0) {
				341	list_move(&rs->rs_list, &b->rsb_replies);
				342	rs->rs_scheduled = 1;
				343	b->rsb_n_replies++;
				344	}
				345	rs->rs_committed = 1;
				346	spin_unlock(&rs->rs_lock);
				347	}
				348
				349	/**
				350	* Reply batch finalization.
				351	* Dispatch remaining replies from the batch
				352	* and release remaining spinlock.
				353	*
				354	* \param b batch
				355	*/
				356	static void rs_batch_fini(struct rs_batch *b)
				357	{
				358	if (b->rsb_svcpt != NULL) {
				359	rs_batch_dispatch(b);
				360	spin_unlock(&b->rsb_svcpt->scp_rep_lock);
				361	}
				362	}
				363
				364	#define DECLARE_RS_BATCH(b) struct rs_batch b
				365
				366
				367	/**
				368	* Put reply state into a queue for processing because we received
				369	* ACK from the client
				370	*/
				371	void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs)
				372	{
				373	struct ptlrpc_hr_thread *hrt;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	374
				375	LASSERT(list_empty(&rs->rs_list));
				376
				377	hrt = ptlrpc_hr_select(rs->rs_svcpt);
				378
				379	spin_lock(&hrt->hrt_lock);
				380	list_add_tail(&rs->rs_list, &hrt->hrt_queue);
				381	spin_unlock(&hrt->hrt_lock);
				382
				383	wake_up(&hrt->hrt_waitq);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	384	}
				385
				386	void
				387	ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs)
				388	{
Li Xi	5e42bc9	2014-04-27 13:07:06 -0400	[diff] [blame]	389	assert_spin_locked(&rs->rs_svcpt->scp_rep_lock);
				390	assert_spin_locked(&rs->rs_lock);
Kristina Martsenko	3949015e	2013-11-11 21:34:58 +0200	[diff] [blame]	391	LASSERT(rs->rs_difficult);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	392	rs->rs_scheduled_ever = 1; /* flag any notification attempt */
				393
				394	if (rs->rs_scheduled) { /* being set up or already notified */
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	395	return;
				396	}
				397
				398	rs->rs_scheduled = 1;
				399	list_del_init(&rs->rs_list);
				400	ptlrpc_dispatch_difficult_reply(rs);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	401	}
				402	EXPORT_SYMBOL(ptlrpc_schedule_difficult_reply);
				403
				404	void ptlrpc_commit_replies(struct obd_export *exp)
				405	{
				406	struct ptlrpc_reply_state rs, nxt;
				407	DECLARE_RS_BATCH(batch);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	408
				409	rs_batch_init(&batch);
				410	/* Find any replies that have been committed and get their service
				411	* to attend to complete them. */
				412
				413	/* CAVEAT EMPTOR: spinlock ordering!!! */
				414	spin_lock(&exp->exp_uncommitted_replies_lock);
				415	list_for_each_entry_safe(rs, nxt, &exp->exp_uncommitted_replies,
				416	rs_obd_list) {
Kristina Martsenko	3949015e	2013-11-11 21:34:58 +0200	[diff] [blame]	417	LASSERT(rs->rs_difficult);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	418	/* VBR: per-export last_committed */
				419	LASSERT(rs->rs_export);
				420	if (rs->rs_transno <= exp->exp_last_committed) {
				421	list_del_init(&rs->rs_obd_list);
				422	rs_batch_add(&batch, rs);
				423	}
				424	}
				425	spin_unlock(&exp->exp_uncommitted_replies_lock);
				426	rs_batch_fini(&batch);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	427	}
				428	EXPORT_SYMBOL(ptlrpc_commit_replies);
				429
				430	static int
				431	ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt)
				432	{
				433	struct ptlrpc_request_buffer_desc *rqbd;
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	434	int rc;
				435	int posted = 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	436
				437	for (;;) {
				438	spin_lock(&svcpt->scp_lock);
				439
				440	if (list_empty(&svcpt->scp_rqbd_idle)) {
				441	spin_unlock(&svcpt->scp_lock);
				442	return posted;
				443	}
				444
				445	rqbd = list_entry(svcpt->scp_rqbd_idle.next,
				446	struct ptlrpc_request_buffer_desc,
				447	rqbd_list);
				448	list_del(&rqbd->rqbd_list);
				449
				450	/* assume we will post successfully */
				451	svcpt->scp_nrqbds_posted++;
				452	list_add(&rqbd->rqbd_list, &svcpt->scp_rqbd_posted);
				453
				454	spin_unlock(&svcpt->scp_lock);
				455
				456	rc = ptlrpc_register_rqbd(rqbd);
				457	if (rc != 0)
				458	break;
				459
				460	posted = 1;
				461	}
				462
				463	spin_lock(&svcpt->scp_lock);
				464
				465	svcpt->scp_nrqbds_posted--;
				466	list_del(&rqbd->rqbd_list);
				467	list_add_tail(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle);
				468
				469	/* Don't complain if no request buffers are posted right now; LNET
				470	* won't drop requests because we set the portal lazy! */
				471
				472	spin_unlock(&svcpt->scp_lock);
				473
				474	return -1;
				475	}
				476
				477	static void ptlrpc_at_timer(unsigned long castmeharder)
				478	{
				479	struct ptlrpc_service_part *svcpt;
				480
				481	svcpt = (struct ptlrpc_service_part *)castmeharder;
				482
				483	svcpt->scp_at_check = 1;
				484	svcpt->scp_at_checktime = cfs_time_current();
				485	wake_up(&svcpt->scp_waitq);
				486	}
				487
				488	static void
				489	ptlrpc_server_nthreads_check(struct ptlrpc_service *svc,
				490	struct ptlrpc_service_conf *conf)
				491	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	492	struct ptlrpc_service_thr_conf *tc = &conf->psc_thr;
				493	unsigned init;
				494	unsigned total;
				495	unsigned nthrs;
				496	int weight;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	497
				498	/*
				499	* Common code for estimating & validating threads number.
				500	* CPT affinity service could have percpt thread-pool instead
				501	* of a global thread-pool, which means user might not always
				502	* get the threads number they give it in conf::tc_nthrs_user
				503	* even they did set. It's because we need to validate threads
				504	* number for each CPT to guarantee each pool will have enough
				505	* threads to keep the service healthy.
				506	*/
				507	init = PTLRPC_NTHRS_INIT + (svc->srv_ops.so_hpreq_handler != NULL);
				508	init = max_t(int, init, tc->tc_nthrs_init);
				509
				510	/* NB: please see comments in lustre_lnet.h for definition
				511	* details of these members */
				512	LASSERT(tc->tc_nthrs_max != 0);
				513
				514	if (tc->tc_nthrs_user != 0) {
				515	/* In case there is a reason to test a service with many
				516	* threads, we give a less strict check here, it can
				517	* be up to 8 * nthrs_max */
				518	total = min(tc->tc_nthrs_max * 8, tc->tc_nthrs_user);
				519	nthrs = total / svc->srv_ncpts;
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	520	init = max(init, nthrs);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	521	goto out;
				522	}
				523
				524	total = tc->tc_nthrs_max;
				525	if (tc->tc_nthrs_base == 0) {
				526	/* don't care about base threads number per partition,
				527	* this is most for non-affinity service */
				528	nthrs = total / svc->srv_ncpts;
				529	goto out;
				530	}
				531
				532	nthrs = tc->tc_nthrs_base;
				533	if (svc->srv_ncpts == 1) {
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	534	int i;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	535
				536	/* NB: Increase the base number if it's single partition
				537	* and total number of cores/HTs is larger or equal to 4.
				538	* result will always < 2 * nthrs_base */
				539	weight = cfs_cpt_weight(svc->srv_cptable, CFS_CPT_ANY);
				540	for (i = 1; (weight >> (i + 1)) != 0 && /* >= 4 cores/HTs */
				541	(tc->tc_nthrs_base >> i) != 0; i++)
				542	nthrs += tc->tc_nthrs_base >> i;
				543	}
				544
				545	if (tc->tc_thr_factor != 0) {
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	546	int factor = tc->tc_thr_factor;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	547	const int fade = 4;
				548
				549	/*
				550	* User wants to increase number of threads with for
				551	* each CPU core/HT, most likely the factor is larger then
				552	* one thread/core because service threads are supposed to
				553	* be blocked by lock or wait for IO.
				554	*/
				555	/*
				556	* Amdahl's law says that adding processors wouldn't give
				557	* a linear increasing of parallelism, so it's nonsense to
				558	* have too many threads no matter how many cores/HTs
				559	* there are.
				560	*/
Oleg Drokin	6301647	2015-03-02 01:01:46 -0500	[diff] [blame]	561	/* weight is # of HTs */
Bartosz Golaszewski	06931e6	2015-05-26 15:11:28 +0200	[diff] [blame]	562	if (cpumask_weight(topology_sibling_cpumask(0)) > 1) {
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	563	/* depress thread factor for hyper-thread */
				564	factor = factor - (factor >> 1) + (factor >> 3);
				565	}
				566
				567	weight = cfs_cpt_weight(svc->srv_cptable, 0);
				568	LASSERT(weight > 0);
				569
				570	for (; factor > 0 && weight > 0; factor--, weight -= fade)
				571	nthrs += min(weight, fade) * factor;
				572	}
				573
				574	if (nthrs * svc->srv_ncpts > tc->tc_nthrs_max) {
				575	nthrs = max(tc->tc_nthrs_base,
				576	tc->tc_nthrs_max / svc->srv_ncpts);
				577	}
				578	out:
				579	nthrs = max(nthrs, tc->tc_nthrs_init);
				580	svc->srv_nthrs_cpt_limit = nthrs;
				581	svc->srv_nthrs_cpt_init = init;
				582
				583	if (nthrs * svc->srv_ncpts > tc->tc_nthrs_max) {
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	584	CDEBUG(D_OTHER, "%s: This service may have more threads (%d) than the given soft limit (%d)\n",
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	585	svc->srv_name, nthrs * svc->srv_ncpts,
				586	tc->tc_nthrs_max);
				587	}
				588	}
				589
				590	/**
				591	* Initialize percpt data for a service
				592	*/
				593	static int
				594	ptlrpc_service_part_init(struct ptlrpc_service *svc,
				595	struct ptlrpc_service_part *svcpt, int cpt)
				596	{
				597	struct ptlrpc_at_array *array;
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	598	int size;
				599	int index;
				600	int rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	601
				602	svcpt->scp_cpt = cpt;
				603	INIT_LIST_HEAD(&svcpt->scp_threads);
				604
				605	/* rqbd and incoming request queue */
				606	spin_lock_init(&svcpt->scp_lock);
				607	INIT_LIST_HEAD(&svcpt->scp_rqbd_idle);
				608	INIT_LIST_HEAD(&svcpt->scp_rqbd_posted);
				609	INIT_LIST_HEAD(&svcpt->scp_req_incoming);
				610	init_waitqueue_head(&svcpt->scp_waitq);
				611	/* history request & rqbd list */
				612	INIT_LIST_HEAD(&svcpt->scp_hist_reqs);
				613	INIT_LIST_HEAD(&svcpt->scp_hist_rqbds);
				614
Masanari Iida	369e5c9	2014-02-08 00:30:36 +0900	[diff] [blame]	615	/* active requests and hp requests */
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	616	spin_lock_init(&svcpt->scp_req_lock);
				617
				618	/* reply states */
				619	spin_lock_init(&svcpt->scp_rep_lock);
				620	INIT_LIST_HEAD(&svcpt->scp_rep_active);
				621	INIT_LIST_HEAD(&svcpt->scp_rep_idle);
				622	init_waitqueue_head(&svcpt->scp_rep_waitq);
				623	atomic_set(&svcpt->scp_nreps_difficult, 0);
				624
				625	/* adaptive timeout */
				626	spin_lock_init(&svcpt->scp_at_lock);
				627	array = &svcpt->scp_at_array;
				628
				629	size = at_est2timeout(at_max);
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	630	array->paa_size = size;
				631	array->paa_count = 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	632	array->paa_deadline = -1;
				633
				634	/* allocate memory for scp_at_array (ptlrpc_at_array) */
Julia Lawall	bae97e8	2015-05-03 15:21:44 +0200	[diff] [blame]	635	array->paa_reqs_array =
				636	kzalloc_node(sizeof(struct list_head) * size, GFP_NOFS,
				637	cfs_cpt_spread_node(svc->srv_cptable, cpt));
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	638	if (array->paa_reqs_array == NULL)
				639	return -ENOMEM;
				640
				641	for (index = 0; index < size; index++)
				642	INIT_LIST_HEAD(&array->paa_reqs_array[index]);
				643
Julia Lawall	bae97e8	2015-05-03 15:21:44 +0200	[diff] [blame]	644	array->paa_reqs_count =
				645	kzalloc_node(sizeof(__u32) * size, GFP_NOFS,
				646	cfs_cpt_spread_node(svc->srv_cptable, cpt));
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	647	if (array->paa_reqs_count == NULL)
Julia Lawall	207e99c	2015-05-01 21:37:47 +0200	[diff] [blame]	648	goto free_reqs_array;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	649
				650	cfs_timer_init(&svcpt->scp_at_timer, ptlrpc_at_timer, svcpt);
				651	/* At SOW, service time should be quick; 10s seems generous. If client
				652	* timeout is less than this, we'll be sending an early reply. */
				653	at_init(&svcpt->scp_at_estimate, 10, 0);
				654
				655	/* assign this before call ptlrpc_grow_req_bufs */
				656	svcpt->scp_service = svc;
				657	/* Now allocate the request buffers, but don't post them now */
				658	rc = ptlrpc_grow_req_bufs(svcpt, 0);
				659	/* We shouldn't be under memory pressure at startup, so
				660	* fail if we can't allocate all our buffers at this time. */
				661	if (rc != 0)
Julia Lawall	207e99c	2015-05-01 21:37:47 +0200	[diff] [blame]	662	goto free_reqs_count;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	663
				664	return 0;
				665
Julia Lawall	207e99c	2015-05-01 21:37:47 +0200	[diff] [blame]	666	free_reqs_count:
				667	kfree(array->paa_reqs_count);
				668	array->paa_reqs_count = NULL;
				669	free_reqs_array:
				670	kfree(array->paa_reqs_array);
				671	array->paa_reqs_array = NULL;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	672
				673	return -ENOMEM;
				674	}
				675
				676	/**
				677	* Initialize service on a given portal.
				678	* This includes starting serving threads , allocating and posting rqbds and
				679	* so on.
				680	*/
				681	struct ptlrpc_service *
				682	ptlrpc_register_service(struct ptlrpc_service_conf *conf,
Oleg Drokin	328676f	2015-05-21 15:32:08 -0400	[diff] [blame]	683	struct kset *parent,
Dmitry Eremin	700815d	2015-05-21 15:32:11 -0400	[diff] [blame]	684	struct dentry *debugfs_entry)
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	685	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	686	struct ptlrpc_service_cpt_conf *cconf = &conf->psc_cpt;
				687	struct ptlrpc_service *service;
				688	struct ptlrpc_service_part *svcpt;
				689	struct cfs_cpt_table *cptable;
				690	__u32 *cpts = NULL;
				691	int ncpts;
				692	int cpt;
				693	int rc;
				694	int i;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	695
				696	LASSERT(conf->psc_buf.bc_nbufs > 0);
				697	LASSERT(conf->psc_buf.bc_buf_size >=
				698	conf->psc_buf.bc_req_max_size + SPTLRPC_MAX_PAYLOAD);
				699	LASSERT(conf->psc_thr.tc_ctx_tags != 0);
				700
				701	cptable = cconf->cc_cptable;
				702	if (cptable == NULL)
				703	cptable = cfs_cpt_table;
				704
				705	if (!conf->psc_thr.tc_cpu_affinity) {
				706	ncpts = 1;
				707	} else {
				708	ncpts = cfs_cpt_number(cptable);
				709	if (cconf->cc_pattern != NULL) {
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	710	struct cfs_expr_list *el;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	711
				712	rc = cfs_expr_list_parse(cconf->cc_pattern,
				713	strlen(cconf->cc_pattern),
				714	0, ncpts - 1, &el);
				715	if (rc != 0) {
				716	CERROR("%s: invalid CPT pattern string: %s",
				717	conf->psc_name, cconf->cc_pattern);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	718	return ERR_PTR(-EINVAL);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	719	}
				720
				721	rc = cfs_expr_list_values(el, ncpts, &cpts);
				722	cfs_expr_list_free(el);
				723	if (rc <= 0) {
				724	CERROR("%s: failed to parse CPT array %s: %d\n",
				725	conf->psc_name, cconf->cc_pattern, rc);
Julia Lawall	207e99c	2015-05-01 21:37:47 +0200	[diff] [blame]	726	kfree(cpts);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	727	return ERR_PTR(rc < 0 ? rc : -EINVAL);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	728	}
				729	ncpts = rc;
				730	}
				731	}
				732
Julia Lawall	9ae1059	2015-05-01 17:51:12 +0200	[diff] [blame]	733	service = kzalloc(offsetof(struct ptlrpc_service, srv_parts[ncpts]),
				734	GFP_NOFS);
Julia Lawall	597851a	2015-06-20 18:59:10 +0200	[diff] [blame]	735	if (!service) {
Julia Lawall	207e99c	2015-05-01 21:37:47 +0200	[diff] [blame]	736	kfree(cpts);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	737	return ERR_PTR(-ENOMEM);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	738	}
				739
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	740	service->srv_cptable = cptable;
				741	service->srv_cpts = cpts;
				742	service->srv_ncpts = ncpts;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	743
				744	service->srv_cpt_bits = 0; /* it's zero already, easy to read... */
				745	while ((1 << service->srv_cpt_bits) < cfs_cpt_number(cptable))
				746	service->srv_cpt_bits++;
				747
				748	/* public members */
				749	spin_lock_init(&service->srv_lock);
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	750	service->srv_name = conf->psc_name;
				751	service->srv_watchdog_factor = conf->psc_watchdog_factor;
Masanari Iida	b6da17f	2014-02-08 00:30:40 +0900	[diff] [blame]	752	INIT_LIST_HEAD(&service->srv_list); /* for safety of cleanup */
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	753
				754	/* buffer configuration */
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	755	service->srv_nbuf_per_group = test_req_buffer_pressure ?
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	756	1 : conf->psc_buf.bc_nbufs;
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	757	service->srv_max_req_size = conf->psc_buf.bc_req_max_size +
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	758	SPTLRPC_MAX_PAYLOAD;
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	759	service->srv_buf_size = conf->psc_buf.bc_buf_size;
				760	service->srv_rep_portal = conf->psc_buf.bc_rep_portal;
				761	service->srv_req_portal = conf->psc_buf.bc_req_portal;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	762
				763	/* Increase max reply size to next power of two */
				764	service->srv_max_reply_size = 1;
				765	while (service->srv_max_reply_size <
				766	conf->psc_buf.bc_rep_max_size + SPTLRPC_MAX_PAYLOAD)
				767	service->srv_max_reply_size <<= 1;
				768
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	769	service->srv_thread_name = conf->psc_thr.tc_thr_name;
				770	service->srv_ctx_tags = conf->psc_thr.tc_ctx_tags;
				771	service->srv_hpreq_ratio = PTLRPC_SVC_HP_RATIO;
				772	service->srv_ops = conf->psc_ops;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	773
				774	for (i = 0; i < ncpts; i++) {
				775	if (!conf->psc_thr.tc_cpu_affinity)
				776	cpt = CFS_CPT_ANY;
				777	else
				778	cpt = cpts != NULL ? cpts[i] : i;
				779
Julia Lawall	bae97e8	2015-05-03 15:21:44 +0200	[diff] [blame]	780	svcpt = kzalloc_node(sizeof(*svcpt), GFP_NOFS,
				781	cfs_cpt_spread_node(cptable, cpt));
Julia Lawall	a9b3e8f	2014-09-07 18:18:29 +0200	[diff] [blame]	782	if (svcpt == NULL) {
				783	rc = -ENOMEM;
				784	goto failed;
				785	}
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	786
				787	service->srv_parts[i] = svcpt;
				788	rc = ptlrpc_service_part_init(service, svcpt, cpt);
				789	if (rc != 0)
Julia Lawall	a9b3e8f	2014-09-07 18:18:29 +0200	[diff] [blame]	790	goto failed;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	791	}
				792
				793	ptlrpc_server_nthreads_check(service, conf);
				794
				795	rc = LNetSetLazyPortal(service->srv_req_portal);
				796	LASSERT(rc == 0);
				797
				798	mutex_lock(&ptlrpc_all_services_mutex);
Kristina Martsenko	3949015e	2013-11-11 21:34:58 +0200	[diff] [blame]	799	list_add(&service->srv_list, &ptlrpc_all_services);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	800	mutex_unlock(&ptlrpc_all_services_mutex);
				801
Oleg Drokin	328676f	2015-05-21 15:32:08 -0400	[diff] [blame]	802	if (parent) {
				803	rc = ptlrpc_sysfs_register_service(parent, service);
				804	if (rc)
				805	goto failed;
				806	}
				807
Dmitry Eremin	700815d	2015-05-21 15:32:11 -0400	[diff] [blame]	808	if (!IS_ERR_OR_NULL(debugfs_entry))
				809	ptlrpc_ldebugfs_register_service(debugfs_entry, service);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	810
				811	rc = ptlrpc_service_nrs_setup(service);
				812	if (rc != 0)
Julia Lawall	a9b3e8f	2014-09-07 18:18:29 +0200	[diff] [blame]	813	goto failed;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	814
				815	CDEBUG(D_NET, "%s: Started, listening on portal %d\n",
				816	service->srv_name, service->srv_req_portal);
				817
				818	rc = ptlrpc_start_threads(service);
				819	if (rc != 0) {
				820	CERROR("Failed to start threads for service %s: %d\n",
				821	service->srv_name, rc);
Julia Lawall	a9b3e8f	2014-09-07 18:18:29 +0200	[diff] [blame]	822	goto failed;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	823	}
				824
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	825	return service;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	826	failed:
				827	ptlrpc_unregister_service(service);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	828	return ERR_PTR(rc);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	829	}
				830	EXPORT_SYMBOL(ptlrpc_register_service);
				831
				832	/**
				833	* to actually free the request, must be called without holding svc_lock.
				834	* note it's caller's responsibility to unlink req->rq_list.
				835	*/
				836	static void ptlrpc_server_free_request(struct ptlrpc_request *req)
				837	{
				838	LASSERT(atomic_read(&req->rq_refcount) == 0);
				839	LASSERT(list_empty(&req->rq_timed_list));
				840
				841	/* DEBUG_REQ() assumes the reply state of a request with a valid
				842	* ref will not be destroyed until that reference is dropped. */
				843	ptlrpc_req_drop_rs(req);
				844
				845	sptlrpc_svc_ctx_decref(req);
				846
				847	if (req != &req->rq_rqbd->rqbd_req) {
				848	/* NB request buffers use an embedded
				849	* req if the incoming req unlinked the
				850	* MD; this isn't one of them! */
Andriy Skulysh	35b2e1b	2014-04-27 13:06:35 -0400	[diff] [blame]	851	ptlrpc_request_cache_free(req);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	852	}
				853	}
				854
				855	/**
				856	* drop a reference count of the request. if it reaches 0, we either
				857	* put it into history list, or free it immediately.
				858	*/
				859	void ptlrpc_server_drop_request(struct ptlrpc_request *req)
				860	{
				861	struct ptlrpc_request_buffer_desc *rqbd = req->rq_rqbd;
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	862	struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt;
				863	struct ptlrpc_service *svc = svcpt->scp_service;
				864	int refcount;
				865	struct list_head *tmp;
				866	struct list_head *nxt;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	867
				868	if (!atomic_dec_and_test(&req->rq_refcount))
				869	return;
				870
				871	if (req->rq_at_linked) {
				872	spin_lock(&svcpt->scp_at_lock);
				873	/* recheck with lock, in case it's unlinked by
				874	* ptlrpc_at_check_timed() */
				875	if (likely(req->rq_at_linked))
				876	ptlrpc_at_remove_timed(req);
				877	spin_unlock(&svcpt->scp_at_lock);
				878	}
				879
				880	LASSERT(list_empty(&req->rq_timed_list));
				881
				882	/* finalize request */
				883	if (req->rq_export) {
				884	class_export_put(req->rq_export);
				885	req->rq_export = NULL;
				886	}
				887
				888	spin_lock(&svcpt->scp_lock);
				889
				890	list_add(&req->rq_list, &rqbd->rqbd_reqs);
				891
				892	refcount = --(rqbd->rqbd_refcount);
				893	if (refcount == 0) {
				894	/* request buffer is now idle: add to history */
				895	list_del(&rqbd->rqbd_list);
				896
				897	list_add_tail(&rqbd->rqbd_list, &svcpt->scp_hist_rqbds);
				898	svcpt->scp_hist_nrqbds++;
				899
				900	/* cull some history?
				901	* I expect only about 1 or 2 rqbds need to be recycled here */
				902	while (svcpt->scp_hist_nrqbds > svc->srv_hist_nrqbds_cpt_max) {
				903	rqbd = list_entry(svcpt->scp_hist_rqbds.next,
				904	struct ptlrpc_request_buffer_desc,
				905	rqbd_list);
				906
				907	list_del(&rqbd->rqbd_list);
				908	svcpt->scp_hist_nrqbds--;
				909
				910	/* remove rqbd's reqs from svc's req history while
				911	* I've got the service lock */
				912	list_for_each(tmp, &rqbd->rqbd_reqs) {
				913	req = list_entry(tmp, struct ptlrpc_request,
				914	rq_list);
				915	/* Track the highest culled req seq */
				916	if (req->rq_history_seq >
				917	svcpt->scp_hist_seq_culled) {
				918	svcpt->scp_hist_seq_culled =
				919	req->rq_history_seq;
				920	}
				921	list_del(&req->rq_history_list);
				922	}
				923
				924	spin_unlock(&svcpt->scp_lock);
				925
				926	list_for_each_safe(tmp, nxt, &rqbd->rqbd_reqs) {
				927	req = list_entry(rqbd->rqbd_reqs.next,
				928	struct ptlrpc_request,
				929	rq_list);
				930	list_del(&req->rq_list);
				931	ptlrpc_server_free_request(req);
				932	}
				933
				934	spin_lock(&svcpt->scp_lock);
				935	/*
				936	* now all reqs including the embedded req has been
				937	* disposed, schedule request buffer for re-use.
				938	*/
				939	LASSERT(atomic_read(&rqbd->rqbd_req.rq_refcount) ==
				940	0);
				941	list_add_tail(&rqbd->rqbd_list,
				942	&svcpt->scp_rqbd_idle);
				943	}
				944
				945	spin_unlock(&svcpt->scp_lock);
				946	} else if (req->rq_reply_state && req->rq_reply_state->rs_prealloc) {
				947	/* If we are low on memory, we are not interested in history */
				948	list_del(&req->rq_list);
				949	list_del_init(&req->rq_history_list);
				950
				951	/* Track the highest culled req seq */
				952	if (req->rq_history_seq > svcpt->scp_hist_seq_culled)
				953	svcpt->scp_hist_seq_culled = req->rq_history_seq;
				954
				955	spin_unlock(&svcpt->scp_lock);
				956
				957	ptlrpc_server_free_request(req);
				958	} else {
				959	spin_unlock(&svcpt->scp_lock);
				960	}
				961	}
				962
				963	/** Change request export and move hp request from old export to new */
				964	void ptlrpc_request_change_export(struct ptlrpc_request *req,
				965	struct obd_export *export)
				966	{
				967	if (req->rq_export != NULL) {
				968	if (!list_empty(&req->rq_exp_list)) {
				969	/* remove rq_exp_list from last export */
				970	spin_lock_bh(&req->rq_export->exp_rpc_lock);
				971	list_del_init(&req->rq_exp_list);
				972	spin_unlock_bh(&req->rq_export->exp_rpc_lock);
				973
				974	/* export has one reference already, so it`s safe to
				975	* add req to export queue here and get another
				976	* reference for request later */
				977	spin_lock_bh(&export->exp_rpc_lock);
				978	list_add(&req->rq_exp_list, &export->exp_hp_rpcs);
				979	spin_unlock_bh(&export->exp_rpc_lock);
				980	}
				981	class_export_rpc_dec(req->rq_export);
				982	class_export_put(req->rq_export);
				983	}
				984
				985	/* request takes one export refcount */
				986	req->rq_export = class_export_get(export);
				987	class_export_rpc_inc(export);
				988
				989	return;
				990	}
				991
				992	/**
				993	* to finish a request: stop sending more early replies, and release
				994	* the request.
				995	*/
				996	static void ptlrpc_server_finish_request(struct ptlrpc_service_part *svcpt,
				997	struct ptlrpc_request *req)
				998	{
				999	ptlrpc_server_hpreq_fini(req);
				1000
				1001	ptlrpc_server_drop_request(req);
				1002	}
				1003
				1004	/**
				1005	* to finish a active request: stop sending more early replies, and release
				1006	* the request. should be called after we finished handling the request.
				1007	*/
				1008	static void ptlrpc_server_finish_active_request(
				1009	struct ptlrpc_service_part *svcpt,
				1010	struct ptlrpc_request *req)
				1011	{
				1012	spin_lock(&svcpt->scp_req_lock);
				1013	ptlrpc_nrs_req_stop_nolock(req);
				1014	svcpt->scp_nreqs_active--;
				1015	if (req->rq_hp)
				1016	svcpt->scp_nhreqs_active--;
				1017	spin_unlock(&svcpt->scp_req_lock);
				1018
				1019	ptlrpc_nrs_req_finalize(req);
				1020
				1021	if (req->rq_export != NULL)
				1022	class_export_rpc_dec(req->rq_export);
				1023
				1024	ptlrpc_server_finish_request(svcpt, req);
				1025	}
				1026
				1027	/**
				1028	* This function makes sure dead exports are evicted in a timely manner.
				1029	* This function is only called when some export receives a message (i.e.,
				1030	* the network is up.)
				1031	*/
				1032	static void ptlrpc_update_export_timer(struct obd_export *exp, long extra_delay)
				1033	{
				1034	struct obd_export *oldest_exp;
				1035	time_t oldest_time, new_time;
				1036
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1037	LASSERT(exp);
				1038
				1039	/* Compensate for slow machines, etc, by faking our request time
				1040	into the future. Although this can break the strict time-ordering
				1041	of the list, we can be really lazy here - we don't have to evict
				1042	at the exact right moment. Eventually, all silent exports
				1043	will make it to the top of the list. */
				1044
				1045	/* Do not pay attention on 1sec or smaller renewals. */
Greg Kroah-Hartman	7264b8a	2014-07-12 00:38:48 -0700	[diff] [blame]	1046	new_time = get_seconds() + extra_delay;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1047	if (exp->exp_last_request_time + 1 /second / >= new_time)
Greg Kroah-Hartman	e05e02e	2013-08-02 18:20:34 +0800	[diff] [blame]	1048	return;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1049
				1050	exp->exp_last_request_time = new_time;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1051
				1052	/* exports may get disconnected from the chain even though the
				1053	export has references, so we must keep the spin lock while
				1054	manipulating the lists */
				1055	spin_lock(&exp->exp_obd->obd_dev_lock);
				1056
				1057	if (list_empty(&exp->exp_obd_chain_timed)) {
				1058	/* this one is not timed */
				1059	spin_unlock(&exp->exp_obd->obd_dev_lock);
Greg Kroah-Hartman	e05e02e	2013-08-02 18:20:34 +0800	[diff] [blame]	1060	return;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1061	}
				1062
				1063	list_move_tail(&exp->exp_obd_chain_timed,
				1064	&exp->exp_obd->obd_exports_timed);
				1065
				1066	oldest_exp = list_entry(exp->exp_obd->obd_exports_timed.next,
				1067	struct obd_export, exp_obd_chain_timed);
				1068	oldest_time = oldest_exp->exp_last_request_time;
				1069	spin_unlock(&exp->exp_obd->obd_dev_lock);
				1070
				1071	if (exp->exp_obd->obd_recovering) {
				1072	/* be nice to everyone during recovery */
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1073	return;
				1074	}
				1075
				1076	/* Note - racing to start/reset the obd_eviction timer is safe */
				1077	if (exp->exp_obd->obd_eviction_timer == 0) {
				1078	/* Check if the oldest entry is expired. */
Greg Kroah-Hartman	7264b8a	2014-07-12 00:38:48 -0700	[diff] [blame]	1079	if (get_seconds() > (oldest_time + PING_EVICT_TIMEOUT +
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1080	extra_delay)) {
				1081	/* We need a second timer, in case the net was down and
				1082	* it just came back. Since the pinger may skip every
				1083	* other PING_INTERVAL (see note in ptlrpc_pinger_main),
				1084	* we better wait for 3. */
				1085	exp->exp_obd->obd_eviction_timer =
Greg Kroah-Hartman	7264b8a	2014-07-12 00:38:48 -0700	[diff] [blame]	1086	get_seconds() + 3 * PING_INTERVAL;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1087	CDEBUG(D_HA, "%s: Think about evicting %s from "CFS_TIME_T"\n",
				1088	exp->exp_obd->obd_name,
				1089	obd_export_nid2str(oldest_exp), oldest_time);
				1090	}
				1091	} else {
Greg Kroah-Hartman	7264b8a	2014-07-12 00:38:48 -0700	[diff] [blame]	1092	if (get_seconds() >
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1093	(exp->exp_obd->obd_eviction_timer + extra_delay)) {
				1094	/* The evictor won't evict anyone who we've heard from
				1095	* recently, so we don't have to check before we start
				1096	* it. */
				1097	if (!ping_evictor_wake(exp))
				1098	exp->exp_obd->obd_eviction_timer = 0;
				1099	}
				1100	}
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1101	}
				1102
				1103	/**
				1104	* Sanity check request \a req.
				1105	* Return 0 if all is ok, error code otherwise.
				1106	*/
				1107	static int ptlrpc_check_req(struct ptlrpc_request *req)
				1108	{
Dmitry Eremin	f60d7c3	2014-06-22 21:32:09 -0400	[diff] [blame]	1109	struct obd_device *obd = req->rq_export->exp_obd;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1110	int rc = 0;
				1111
				1112	if (unlikely(lustre_msg_get_conn_cnt(req->rq_reqmsg) <
				1113	req->rq_export->exp_conn_cnt)) {
				1114	DEBUG_REQ(D_RPCTRACE, req,
				1115	"DROPPING req from old connection %d < %d",
				1116	lustre_msg_get_conn_cnt(req->rq_reqmsg),
				1117	req->rq_export->exp_conn_cnt);
				1118	return -EEXIST;
				1119	}
Dmitry Eremin	f60d7c3	2014-06-22 21:32:09 -0400	[diff] [blame]	1120	if (unlikely(obd == NULL \|\| obd->obd_fail)) {
Kristina Martsenko	532118c	2013-11-11 21:35:03 +0200	[diff] [blame]	1121	/*
				1122	* Failing over, don't handle any more reqs, send
				1123	* error response instead.
				1124	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1125	CDEBUG(D_RPCTRACE, "Dropping req %p for failed obd %s\n",
Dmitry Eremin	f60d7c3	2014-06-22 21:32:09 -0400	[diff] [blame]	1126	req, (obd != NULL) ? obd->obd_name : "unknown");
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1127	rc = -ENODEV;
				1128	} else if (lustre_msg_get_flags(req->rq_reqmsg) &
				1129	(MSG_REPLAY \| MSG_REQ_REPLAY_DONE) &&
Dmitry Eremin	f60d7c3	2014-06-22 21:32:09 -0400	[diff] [blame]	1130	!obd->obd_recovering) {
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1131	DEBUG_REQ(D_ERROR, req,
				1132	"Invalid replay without recovery");
				1133	class_fail_export(req->rq_export);
				1134	rc = -ENODEV;
				1135	} else if (lustre_msg_get_transno(req->rq_reqmsg) != 0 &&
Dmitry Eremin	f60d7c3	2014-06-22 21:32:09 -0400	[diff] [blame]	1136	!obd->obd_recovering) {
Greg Kroah-Hartman	b0f5aad	2014-07-12 20:06:04 -0700	[diff] [blame]	1137	DEBUG_REQ(D_ERROR, req, "Invalid req with transno %llu without recovery",
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1138	lustre_msg_get_transno(req->rq_reqmsg));
				1139	class_fail_export(req->rq_export);
				1140	rc = -ENODEV;
				1141	}
				1142
				1143	if (unlikely(rc < 0)) {
				1144	req->rq_status = rc;
				1145	ptlrpc_error(req);
				1146	}
				1147	return rc;
				1148	}
				1149
				1150	static void ptlrpc_at_set_timer(struct ptlrpc_service_part *svcpt)
				1151	{
				1152	struct ptlrpc_at_array *array = &svcpt->scp_at_array;
				1153	__s32 next;
				1154
				1155	if (array->paa_count == 0) {
				1156	cfs_timer_disarm(&svcpt->scp_at_timer);
				1157	return;
				1158	}
				1159
				1160	/* Set timer for closest deadline */
Greg Kroah-Hartman	7264b8a	2014-07-12 00:38:48 -0700	[diff] [blame]	1161	next = (__s32)(array->paa_deadline - get_seconds() -
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1162	at_early_margin);
				1163	if (next <= 0) {
				1164	ptlrpc_at_timer((unsigned long)svcpt);
				1165	} else {
				1166	cfs_timer_arm(&svcpt->scp_at_timer, cfs_time_shift(next));
				1167	CDEBUG(D_INFO, "armed %s at %+ds\n",
				1168	svcpt->scp_service->srv_name, next);
				1169	}
				1170	}
				1171
				1172	/* Add rpc to early reply check list */
				1173	static int ptlrpc_at_add_timed(struct ptlrpc_request *req)
				1174	{
				1175	struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
				1176	struct ptlrpc_at_array *array = &svcpt->scp_at_array;
				1177	struct ptlrpc_request *rq = NULL;
				1178	__u32 index;
				1179
				1180	if (AT_OFF)
Julia Lawall	fbe7c6c	2014-08-26 22:00:33 +0200	[diff] [blame]	1181	return 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1182
				1183	if (req->rq_no_reply)
				1184	return 0;
				1185
				1186	if ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT) == 0)
Julia Lawall	fbe7c6c	2014-08-26 22:00:33 +0200	[diff] [blame]	1187	return -ENOSYS;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1188
				1189	spin_lock(&svcpt->scp_at_lock);
				1190	LASSERT(list_empty(&req->rq_timed_list));
				1191
				1192	index = (unsigned long)req->rq_deadline % array->paa_size;
				1193	if (array->paa_reqs_count[index] > 0) {
				1194	/* latest rpcs will have the latest deadlines in the list,
				1195	* so search backward. */
				1196	list_for_each_entry_reverse(rq,
				1197	&array->paa_reqs_array[index],
				1198	rq_timed_list) {
				1199	if (req->rq_deadline >= rq->rq_deadline) {
				1200	list_add(&req->rq_timed_list,
				1201	&rq->rq_timed_list);
				1202	break;
				1203	}
				1204	}
				1205	}
				1206
				1207	/* Add the request at the head of the list */
				1208	if (list_empty(&req->rq_timed_list))
				1209	list_add(&req->rq_timed_list,
				1210	&array->paa_reqs_array[index]);
				1211
				1212	spin_lock(&req->rq_lock);
				1213	req->rq_at_linked = 1;
				1214	spin_unlock(&req->rq_lock);
				1215	req->rq_at_index = index;
				1216	array->paa_reqs_count[index]++;
				1217	array->paa_count++;
				1218	if (array->paa_count == 1 \|\| array->paa_deadline > req->rq_deadline) {
				1219	array->paa_deadline = req->rq_deadline;
				1220	ptlrpc_at_set_timer(svcpt);
				1221	}
				1222	spin_unlock(&svcpt->scp_at_lock);
				1223
				1224	return 0;
				1225	}
				1226
				1227	static void
				1228	ptlrpc_at_remove_timed(struct ptlrpc_request *req)
				1229	{
				1230	struct ptlrpc_at_array *array;
				1231
				1232	array = &req->rq_rqbd->rqbd_svcpt->scp_at_array;
				1233
				1234	/* NB: must call with hold svcpt::scp_at_lock */
				1235	LASSERT(!list_empty(&req->rq_timed_list));
				1236	list_del_init(&req->rq_timed_list);
				1237
				1238	spin_lock(&req->rq_lock);
				1239	req->rq_at_linked = 0;
				1240	spin_unlock(&req->rq_lock);
				1241
				1242	array->paa_reqs_count[req->rq_at_index]--;
				1243	array->paa_count--;
				1244	}
				1245
				1246	static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
				1247	{
				1248	struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
				1249	struct ptlrpc_request *reqcopy;
				1250	struct lustre_msg *reqmsg;
Greg Kroah-Hartman	b2d201b	2014-07-12 00:45:51 -0700	[diff] [blame]	1251	long olddl = req->rq_deadline - get_seconds();
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1252	time_t newdl;
				1253	int rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1254
				1255	/* deadline is when the client expects us to reply, margin is the
				1256	difference between clients' and servers' expectations */
				1257	DEBUG_REQ(D_ADAPTTO, req,
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	1258	"%ssending early reply (deadline %+lds, margin %+lds) for %d+%d",
				1259	AT_OFF ? "AT off - not " : "",
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1260	olddl, olddl - at_get(&svcpt->scp_at_estimate),
				1261	at_get(&svcpt->scp_at_estimate), at_extra);
				1262
				1263	if (AT_OFF)
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1264	return 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1265
				1266	if (olddl < 0) {
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	1267	DEBUG_REQ(D_WARNING, req, "Already past deadline (%+lds), not sending early reply. Consider increasing at_early_margin (%d)?",
				1268	olddl, at_early_margin);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1269
				1270	/* Return an error so we're not re-added to the timed list. */
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1271	return -ETIMEDOUT;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1272	}
				1273
Kristina Martsenko	cb68dd2	2013-11-11 21:34:59 +0200	[diff] [blame]	1274	if (!(lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	1275	DEBUG_REQ(D_INFO, req, "Wanted to ask client for more time, but no AT support");
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1276	return -ENOSYS;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1277	}
				1278
				1279	if (req->rq_export &&
				1280	lustre_msg_get_flags(req->rq_reqmsg) &
				1281	(MSG_REPLAY \| MSG_REQ_REPLAY_DONE \| MSG_LOCK_REPLAY_DONE)) {
				1282	/* During recovery, we don't want to send too many early
				1283	* replies, but on the other hand we want to make sure the
				1284	* client has enough time to resend if the rpc is lost. So
				1285	* during the recovery period send at least 4 early replies,
				1286	* spacing them every at_extra if we can. at_estimate should
				1287	* always equal this fixed value during recovery. */
				1288	at_measured(&svcpt->scp_at_estimate, min(at_extra,
				1289	req->rq_export->exp_obd->obd_recovery_timeout / 4));
				1290	} else {
				1291	/* Fake our processing time into the future to ask the clients
				1292	* for some extra amount of time */
				1293	at_measured(&svcpt->scp_at_estimate, at_extra +
Greg Kroah-Hartman	7264b8a	2014-07-12 00:38:48 -0700	[diff] [blame]	1294	get_seconds() -
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1295	req->rq_arrival_time.tv_sec);
				1296
				1297	/* Check to see if we've actually increased the deadline -
				1298	* we may be past adaptive_max */
				1299	if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
				1300	at_get(&svcpt->scp_at_estimate)) {
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	1301	DEBUG_REQ(D_WARNING, req, "Couldn't add any time (%ld/%ld), not sending early reply\n",
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1302	olddl, req->rq_arrival_time.tv_sec +
				1303	at_get(&svcpt->scp_at_estimate) -
Greg Kroah-Hartman	7264b8a	2014-07-12 00:38:48 -0700	[diff] [blame]	1304	get_seconds());
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1305	return -ETIMEDOUT;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1306	}
				1307	}
Greg Kroah-Hartman	7264b8a	2014-07-12 00:38:48 -0700	[diff] [blame]	1308	newdl = get_seconds() + at_get(&svcpt->scp_at_estimate);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1309
Ann Koehler	0be19af	2014-04-27 13:06:36 -0400	[diff] [blame]	1310	reqcopy = ptlrpc_request_cache_alloc(GFP_NOFS);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1311	if (reqcopy == NULL)
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1312	return -ENOMEM;
Julia Lawall	ee0ec19	2015-06-11 14:02:58 +0200	[diff] [blame]	1313	reqmsg = libcfs_kvzalloc(req->rq_reqlen, GFP_NOFS);
Julia Lawall	a9b3e8f	2014-09-07 18:18:29 +0200	[diff] [blame]	1314	if (!reqmsg) {
				1315	rc = -ENOMEM;
				1316	goto out_free;
				1317	}
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1318
				1319	reqcopy = req;
				1320	reqcopy->rq_reply_state = NULL;
				1321	reqcopy->rq_rep_swab_mask = 0;
				1322	reqcopy->rq_pack_bulk = 0;
				1323	reqcopy->rq_pack_udesc = 0;
				1324	reqcopy->rq_packed_final = 0;
				1325	sptlrpc_svc_ctx_addref(reqcopy);
				1326	/* We only need the reqmsg for the magic */
				1327	reqcopy->rq_reqmsg = reqmsg;
				1328	memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen);
				1329
				1330	LASSERT(atomic_read(&req->rq_refcount));
				1331	/** if it is last refcount then early reply isn't needed */
				1332	if (atomic_read(&req->rq_refcount) == 1) {
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	1333	DEBUG_REQ(D_ADAPTTO, reqcopy, "Normal reply already sent out, abort sending early reply\n");
Julia Lawall	a9b3e8f	2014-09-07 18:18:29 +0200	[diff] [blame]	1334	rc = -EINVAL;
				1335	goto out;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1336	}
				1337
				1338	/* Connection ref */
				1339	reqcopy->rq_export = class_conn2export(
				1340	lustre_msg_get_handle(reqcopy->rq_reqmsg));
Julia Lawall	a9b3e8f	2014-09-07 18:18:29 +0200	[diff] [blame]	1341	if (reqcopy->rq_export == NULL) {
				1342	rc = -ENODEV;
				1343	goto out;
				1344	}
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1345
				1346	/* RPC ref */
				1347	class_export_rpc_inc(reqcopy->rq_export);
				1348	if (reqcopy->rq_export->exp_obd &&
Julia Lawall	a9b3e8f	2014-09-07 18:18:29 +0200	[diff] [blame]	1349	reqcopy->rq_export->exp_obd->obd_fail) {
				1350	rc = -ENODEV;
				1351	goto out_put;
				1352	}
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1353
				1354	rc = lustre_pack_reply_flags(reqcopy, 1, NULL, NULL, LPRFL_EARLY_REPLY);
				1355	if (rc)
Julia Lawall	a9b3e8f	2014-09-07 18:18:29 +0200	[diff] [blame]	1356	goto out_put;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1357
				1358	rc = ptlrpc_send_reply(reqcopy, PTLRPC_REPLY_EARLY);
				1359
				1360	if (!rc) {
				1361	/* Adjust our own deadline to what we told the client */
				1362	req->rq_deadline = newdl;
				1363	req->rq_early_count++; /* number sent, server side */
				1364	} else {
				1365	DEBUG_REQ(D_ERROR, req, "Early reply send failed %d", rc);
				1366	}
				1367
				1368	/* Free the (early) reply state from lustre_pack_reply.
				1369	(ptlrpc_send_reply takes it's own rs ref, so this is safe here) */
				1370	ptlrpc_req_drop_rs(reqcopy);
				1371
				1372	out_put:
				1373	class_export_rpc_dec(reqcopy->rq_export);
				1374	class_export_put(reqcopy->rq_export);
				1375	out:
				1376	sptlrpc_svc_ctx_decref(reqcopy);
Julia Lawall	ee0ec19	2015-06-11 14:02:58 +0200	[diff] [blame]	1377	kvfree(reqmsg);
Andriy Skulysh	35b2e1b	2014-04-27 13:06:35 -0400	[diff] [blame]	1378	out_free:
				1379	ptlrpc_request_cache_free(reqcopy);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1380	return rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1381	}
				1382
				1383	/* Send early replies to everybody expiring within at_early_margin
				1384	asking for at_extra time */
				1385	static int ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt)
				1386	{
				1387	struct ptlrpc_at_array *array = &svcpt->scp_at_array;
				1388	struct ptlrpc_request rq, n;
				1389	struct list_head work_list;
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	1390	__u32 index, count;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1391	time_t deadline;
Greg Kroah-Hartman	7264b8a	2014-07-12 00:38:48 -0700	[diff] [blame]	1392	time_t now = get_seconds();
Greg Kroah-Hartman	b2d201b	2014-07-12 00:45:51 -0700	[diff] [blame]	1393	long delay;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1394	int first, counter = 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1395
				1396	spin_lock(&svcpt->scp_at_lock);
				1397	if (svcpt->scp_at_check == 0) {
				1398	spin_unlock(&svcpt->scp_at_lock);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1399	return 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1400	}
				1401	delay = cfs_time_sub(cfs_time_current(), svcpt->scp_at_checktime);
				1402	svcpt->scp_at_check = 0;
				1403
				1404	if (array->paa_count == 0) {
				1405	spin_unlock(&svcpt->scp_at_lock);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1406	return 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1407	}
				1408
				1409	/* The timer went off, but maybe the nearest rpc already completed. */
				1410	first = array->paa_deadline - now;
				1411	if (first > at_early_margin) {
				1412	/* We've still got plenty of time. Reset the timer. */
				1413	ptlrpc_at_set_timer(svcpt);
				1414	spin_unlock(&svcpt->scp_at_lock);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1415	return 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1416	}
				1417
				1418	/* We're close to a timeout, and we don't know how much longer the
				1419	server will take. Send early replies to everyone expiring soon. */
				1420	INIT_LIST_HEAD(&work_list);
				1421	deadline = -1;
				1422	index = (unsigned long)array->paa_deadline % array->paa_size;
				1423	count = array->paa_count;
				1424	while (count > 0) {
				1425	count -= array->paa_reqs_count[index];
				1426	list_for_each_entry_safe(rq, n,
				1427	&array->paa_reqs_array[index],
				1428	rq_timed_list) {
				1429	if (rq->rq_deadline > now + at_early_margin) {
				1430	/* update the earliest deadline */
				1431	if (deadline == -1 \|\|
				1432	rq->rq_deadline < deadline)
				1433	deadline = rq->rq_deadline;
				1434	break;
				1435	}
				1436
				1437	ptlrpc_at_remove_timed(rq);
				1438	/**
				1439	* ptlrpc_server_drop_request() may drop
				1440	* refcount to 0 already. Let's check this and
				1441	* don't add entry to work_list
				1442	*/
				1443	if (likely(atomic_inc_not_zero(&rq->rq_refcount)))
				1444	list_add(&rq->rq_timed_list, &work_list);
				1445	counter++;
				1446	}
				1447
				1448	if (++index >= array->paa_size)
				1449	index = 0;
				1450	}
				1451	array->paa_deadline = deadline;
				1452	/* we have a new earliest deadline, restart the timer */
				1453	ptlrpc_at_set_timer(svcpt);
				1454
				1455	spin_unlock(&svcpt->scp_at_lock);
				1456
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	1457	CDEBUG(D_ADAPTTO, "timeout in %+ds, asking for %d secs on %d early replies\n",
				1458	first, at_extra, counter);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1459	if (first < 0) {
				1460	/* We're already past request deadlines before we even get a
				1461	chance to send early replies */
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	1462	LCONSOLE_WARN("%s: This server is not able to keep up with request traffic (cpu-bound).\n",
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1463	svcpt->scp_service->srv_name);
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	1464	CWARN("earlyQ=%d reqQ=%d recA=%d, svcEst=%d, delay=" CFS_DURATION_T "(jiff)\n",
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1465	counter, svcpt->scp_nreqs_incoming,
				1466	svcpt->scp_nreqs_active,
				1467	at_get(&svcpt->scp_at_estimate), delay);
				1468	}
				1469
				1470	/* we took additional refcount so entries can't be deleted from list, no
				1471	* locking is needed */
				1472	while (!list_empty(&work_list)) {
				1473	rq = list_entry(work_list.next, struct ptlrpc_request,
				1474	rq_timed_list);
				1475	list_del_init(&rq->rq_timed_list);
				1476
				1477	if (ptlrpc_at_send_early_reply(rq) == 0)
				1478	ptlrpc_at_add_timed(rq);
				1479
				1480	ptlrpc_server_drop_request(rq);
				1481	}
				1482
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1483	return 1; /* return "did_something" for liblustre */
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1484	}
				1485
				1486	/**
				1487	* Put the request to the export list if the request may become
				1488	* a high priority one.
				1489	*/
				1490	static int ptlrpc_server_hpreq_init(struct ptlrpc_service_part *svcpt,
				1491	struct ptlrpc_request *req)
				1492	{
				1493	int rc = 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1494
				1495	if (svcpt->scp_service->srv_ops.so_hpreq_handler) {
				1496	rc = svcpt->scp_service->srv_ops.so_hpreq_handler(req);
				1497	if (rc < 0)
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1498	return rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1499	LASSERT(rc == 0);
				1500	}
				1501	if (req->rq_export && req->rq_ops) {
				1502	/* Perform request specific check. We should do this check
				1503	* before the request is added into exp_hp_rpcs list otherwise
				1504	* it may hit swab race at LU-1044. */
				1505	if (req->rq_ops->hpreq_check) {
				1506	rc = req->rq_ops->hpreq_check(req);
				1507	/**
				1508	* XXX: Out of all current
				1509	* ptlrpc_hpreq_ops::hpreq_check(), only
				1510	* ldlm_cancel_hpreq_check() can return an error code;
				1511	* other functions assert in similar places, which seems
				1512	* odd. What also does not seem right is that handlers
				1513	* for those RPCs do not assert on the same checks, but
				1514	* rather handle the error cases. e.g. see
				1515	* ost_rw_hpreq_check(), and ost_brw_read(),
				1516	* ost_brw_write().
				1517	*/
				1518	if (rc < 0)
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1519	return rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1520	LASSERT(rc == 0 \|\| rc == 1);
				1521	}
				1522
				1523	spin_lock_bh(&req->rq_export->exp_rpc_lock);
				1524	list_add(&req->rq_exp_list,
				1525	&req->rq_export->exp_hp_rpcs);
				1526	spin_unlock_bh(&req->rq_export->exp_rpc_lock);
				1527	}
				1528
				1529	ptlrpc_nrs_req_initialize(svcpt, req, rc);
				1530
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1531	return rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1532	}
				1533
				1534	/** Remove the request from the export list. */
				1535	static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req)
				1536	{
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1537	if (req->rq_export && req->rq_ops) {
				1538	/* refresh lock timeout again so that client has more
				1539	* room to send lock cancel RPC. */
				1540	if (req->rq_ops->hpreq_fini)
				1541	req->rq_ops->hpreq_fini(req);
				1542
				1543	spin_lock_bh(&req->rq_export->exp_rpc_lock);
				1544	list_del_init(&req->rq_exp_list);
				1545	spin_unlock_bh(&req->rq_export->exp_rpc_lock);
				1546	}
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1547	}
				1548
				1549	static int ptlrpc_hpreq_check(struct ptlrpc_request *req)
				1550	{
				1551	return 1;
				1552	}
				1553
				1554	static struct ptlrpc_hpreq_ops ptlrpc_hpreq_common = {
				1555	.hpreq_check = ptlrpc_hpreq_check,
				1556	};
				1557
				1558	/* Hi-Priority RPC check by RPC operation code. */
				1559	int ptlrpc_hpreq_handler(struct ptlrpc_request *req)
				1560	{
				1561	int opc = lustre_msg_get_opc(req->rq_reqmsg);
				1562
				1563	/* Check for export to let only reconnects for not yet evicted
				1564	* export to become a HP rpc. */
				1565	if ((req->rq_export != NULL) &&
				1566	(opc == OBD_PING \|\| opc == MDS_CONNECT \|\| opc == OST_CONNECT))
				1567	req->rq_ops = &ptlrpc_hpreq_common;
				1568
				1569	return 0;
				1570	}
				1571	EXPORT_SYMBOL(ptlrpc_hpreq_handler);
				1572
				1573	static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt,
				1574	struct ptlrpc_request *req)
				1575	{
				1576	int rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1577
				1578	rc = ptlrpc_server_hpreq_init(svcpt, req);
				1579	if (rc < 0)
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1580	return rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1581
				1582	ptlrpc_nrs_req_add(svcpt, req, !!rc);
				1583
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1584	return 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1585	}
				1586
				1587	/**
				1588	* Allow to handle high priority request
				1589	* User can call it w/o any lock but need to hold
				1590	* ptlrpc_service_part::scp_req_lock to get reliable result
				1591	*/
				1592	static bool ptlrpc_server_allow_high(struct ptlrpc_service_part *svcpt,
				1593	bool force)
				1594	{
				1595	int running = svcpt->scp_nthrs_running;
				1596
				1597	if (!nrs_svcpt_has_hp(svcpt))
				1598	return false;
				1599
				1600	if (force)
				1601	return true;
				1602
				1603	if (unlikely(svcpt->scp_service->srv_req_portal == MDS_REQUEST_PORTAL &&
				1604	CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CANCEL_RESEND))) {
				1605	/* leave just 1 thread for normal RPCs */
				1606	running = PTLRPC_NTHRS_INIT;
				1607	if (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL)
				1608	running += 1;
				1609	}
				1610
				1611	if (svcpt->scp_nreqs_active >= running - 1)
				1612	return false;
				1613
				1614	if (svcpt->scp_nhreqs_active == 0)
				1615	return true;
				1616
				1617	return !ptlrpc_nrs_req_pending_nolock(svcpt, false) \|\|
				1618	svcpt->scp_hreq_count < svcpt->scp_service->srv_hpreq_ratio;
				1619	}
				1620
				1621	static bool ptlrpc_server_high_pending(struct ptlrpc_service_part *svcpt,
				1622	bool force)
				1623	{
				1624	return ptlrpc_server_allow_high(svcpt, force) &&
				1625	ptlrpc_nrs_req_pending_nolock(svcpt, true);
				1626	}
				1627
				1628	/**
				1629	* Only allow normal priority requests on a service that has a high-priority
				1630	* queue if forced (i.e. cleanup), if there are other high priority requests
				1631	* already being processed (i.e. those threads can service more high-priority
				1632	* requests), or if there are enough idle threads that a later thread can do
				1633	* a high priority request.
				1634	* User can call it w/o any lock but need to hold
				1635	* ptlrpc_service_part::scp_req_lock to get reliable result
				1636	*/
				1637	static bool ptlrpc_server_allow_normal(struct ptlrpc_service_part *svcpt,
				1638	bool force)
				1639	{
				1640	int running = svcpt->scp_nthrs_running;
				1641	if (unlikely(svcpt->scp_service->srv_req_portal == MDS_REQUEST_PORTAL &&
				1642	CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CANCEL_RESEND))) {
				1643	/* leave just 1 thread for normal RPCs */
				1644	running = PTLRPC_NTHRS_INIT;
				1645	if (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL)
				1646	running += 1;
				1647	}
				1648
				1649	if (force \|\|
				1650	svcpt->scp_nreqs_active < running - 2)
				1651	return true;
				1652
				1653	if (svcpt->scp_nreqs_active >= running - 1)
				1654	return false;
				1655
				1656	return svcpt->scp_nhreqs_active > 0 \|\| !nrs_svcpt_has_hp(svcpt);
				1657	}
				1658
				1659	static bool ptlrpc_server_normal_pending(struct ptlrpc_service_part *svcpt,
				1660	bool force)
				1661	{
				1662	return ptlrpc_server_allow_normal(svcpt, force) &&
				1663	ptlrpc_nrs_req_pending_nolock(svcpt, false);
				1664	}
				1665
				1666	/**
				1667	* Returns true if there are requests available in incoming
				1668	* request queue for processing and it is allowed to fetch them.
				1669	* User can call it w/o any lock but need to hold ptlrpc_service::scp_req_lock
				1670	* to get reliable result
				1671	* \see ptlrpc_server_allow_normal
				1672	* \see ptlrpc_server_allow high
				1673	*/
				1674	static inline bool
				1675	ptlrpc_server_request_pending(struct ptlrpc_service_part *svcpt, bool force)
				1676	{
				1677	return ptlrpc_server_high_pending(svcpt, force) \|\|
				1678	ptlrpc_server_normal_pending(svcpt, force);
				1679	}
				1680
				1681	/**
				1682	* Fetch a request for processing from queue of unprocessed requests.
				1683	* Favors high-priority requests.
				1684	* Returns a pointer to fetched request.
				1685	*/
				1686	static struct ptlrpc_request *
				1687	ptlrpc_server_request_get(struct ptlrpc_service_part *svcpt, bool force)
				1688	{
				1689	struct ptlrpc_request *req = NULL;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1690
				1691	spin_lock(&svcpt->scp_req_lock);
				1692
				1693	if (ptlrpc_server_high_pending(svcpt, force)) {
				1694	req = ptlrpc_nrs_req_get_nolock(svcpt, true, force);
				1695	if (req != NULL) {
				1696	svcpt->scp_hreq_count++;
				1697	goto got_request;
				1698	}
				1699	}
				1700
				1701	if (ptlrpc_server_normal_pending(svcpt, force)) {
				1702	req = ptlrpc_nrs_req_get_nolock(svcpt, false, force);
				1703	if (req != NULL) {
				1704	svcpt->scp_hreq_count = 0;
				1705	goto got_request;
				1706	}
				1707	}
				1708
				1709	spin_unlock(&svcpt->scp_req_lock);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1710	return NULL;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1711
				1712	got_request:
				1713	svcpt->scp_nreqs_active++;
				1714	if (req->rq_hp)
				1715	svcpt->scp_nhreqs_active++;
				1716
				1717	spin_unlock(&svcpt->scp_req_lock);
				1718
				1719	if (likely(req->rq_export))
				1720	class_export_rpc_inc(req->rq_export);
				1721
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1722	return req;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1723	}
				1724
				1725	/**
				1726	* Handle freshly incoming reqs, add to timed early reply list,
				1727	* pass on to regular request queue.
				1728	* All incoming requests pass through here before getting into
				1729	* ptlrpc_server_handle_req later on.
				1730	*/
				1731	static int
				1732	ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt,
				1733	struct ptlrpc_thread *thread)
				1734	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	1735	struct ptlrpc_service *svc = svcpt->scp_service;
				1736	struct ptlrpc_request *req;
				1737	__u32 deadline;
				1738	int rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1739
				1740	spin_lock(&svcpt->scp_lock);
				1741	if (list_empty(&svcpt->scp_req_incoming)) {
				1742	spin_unlock(&svcpt->scp_lock);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1743	return 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1744	}
				1745
				1746	req = list_entry(svcpt->scp_req_incoming.next,
				1747	struct ptlrpc_request, rq_list);
				1748	list_del_init(&req->rq_list);
				1749	svcpt->scp_nreqs_incoming--;
				1750	/* Consider this still a "queued" request as far as stats are
				1751	* concerned */
				1752	spin_unlock(&svcpt->scp_lock);
				1753
				1754	/* go through security check/transform */
				1755	rc = sptlrpc_svc_unwrap_request(req);
				1756	switch (rc) {
				1757	case SECSVC_OK:
				1758	break;
				1759	case SECSVC_COMPLETE:
				1760	target_send_reply(req, 0, OBD_FAIL_MDS_ALL_REPLY_NET);
				1761	goto err_req;
				1762	case SECSVC_DROP:
				1763	goto err_req;
				1764	default:
				1765	LBUG();
				1766	}
				1767
				1768	/*
				1769	* for null-flavored rpc, msg has been unpacked by sptlrpc, although
				1770	* redo it wouldn't be harmful.
				1771	*/
				1772	if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) {
				1773	rc = ptlrpc_unpack_req_msg(req, req->rq_reqlen);
				1774	if (rc != 0) {
Greg Kroah-Hartman	b0f5aad	2014-07-12 20:06:04 -0700	[diff] [blame]	1775	CERROR("error unpacking request: ptl %d from %s x%llu\n",
				1776	svc->srv_req_portal, libcfs_id2str(req->rq_peer),
				1777	req->rq_xid);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1778	goto err_req;
				1779	}
				1780	}
				1781
				1782	rc = lustre_unpack_req_ptlrpc_body(req, MSG_PTLRPC_BODY_OFF);
				1783	if (rc) {
Greg Kroah-Hartman	b0f5aad	2014-07-12 20:06:04 -0700	[diff] [blame]	1784	CERROR("error unpacking ptlrpc body: ptl %d from %s x%llu\n",
				1785	svc->srv_req_portal, libcfs_id2str(req->rq_peer),
				1786	req->rq_xid);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1787	goto err_req;
				1788	}
				1789
				1790	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_REQ_OPC) &&
				1791	lustre_msg_get_opc(req->rq_reqmsg) == cfs_fail_val) {
Greg Kroah-Hartman	b0f5aad	2014-07-12 20:06:04 -0700	[diff] [blame]	1792	CERROR("drop incoming rpc opc %u, x%llu\n",
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1793	cfs_fail_val, req->rq_xid);
				1794	goto err_req;
				1795	}
				1796
				1797	rc = -EINVAL;
				1798	if (lustre_msg_get_type(req->rq_reqmsg) != PTL_RPC_MSG_REQUEST) {
				1799	CERROR("wrong packet type received (type=%u) from %s\n",
				1800	lustre_msg_get_type(req->rq_reqmsg),
				1801	libcfs_id2str(req->rq_peer));
				1802	goto err_req;
				1803	}
				1804
Kristina Martsenko	3949015e	2013-11-11 21:34:58 +0200	[diff] [blame]	1805	switch (lustre_msg_get_opc(req->rq_reqmsg)) {
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1806	case MDS_WRITEPAGE:
				1807	case OST_WRITE:
				1808	req->rq_bulk_write = 1;
				1809	break;
				1810	case MDS_READPAGE:
				1811	case OST_READ:
				1812	case MGS_CONFIG_READ:
				1813	req->rq_bulk_read = 1;
				1814	break;
				1815	}
				1816
Greg Kroah-Hartman	b0f5aad	2014-07-12 20:06:04 -0700	[diff] [blame]	1817	CDEBUG(D_RPCTRACE, "got req x%llu\n", req->rq_xid);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1818
				1819	req->rq_export = class_conn2export(
				1820	lustre_msg_get_handle(req->rq_reqmsg));
				1821	if (req->rq_export) {
				1822	rc = ptlrpc_check_req(req);
				1823	if (rc == 0) {
				1824	rc = sptlrpc_target_export_check(req->rq_export, req);
				1825	if (rc)
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	1826	DEBUG_REQ(D_ERROR, req, "DROPPING req with illegal security flavor,");
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1827	}
				1828
				1829	if (rc)
				1830	goto err_req;
				1831	ptlrpc_update_export_timer(req->rq_export, 0);
				1832	}
				1833
				1834	/* req_in handling should/must be fast */
Greg Kroah-Hartman	7264b8a	2014-07-12 00:38:48 -0700	[diff] [blame]	1835	if (get_seconds() - req->rq_arrival_time.tv_sec > 5)
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1836	DEBUG_REQ(D_WARNING, req, "Slow req_in handling "CFS_DURATION_T"s",
Greg Kroah-Hartman	7264b8a	2014-07-12 00:38:48 -0700	[diff] [blame]	1837	cfs_time_sub(get_seconds(),
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1838	req->rq_arrival_time.tv_sec));
				1839
				1840	/* Set rpc server deadline and add it to the timed list */
				1841	deadline = (lustre_msghdr_get_flags(req->rq_reqmsg) &
				1842	MSGHDR_AT_SUPPORT) ?
				1843	/* The max time the client expects us to take */
				1844	lustre_msg_get_timeout(req->rq_reqmsg) : obd_timeout;
				1845	req->rq_deadline = req->rq_arrival_time.tv_sec + deadline;
				1846	if (unlikely(deadline == 0)) {
				1847	DEBUG_REQ(D_ERROR, req, "Dropping request with 0 timeout");
				1848	goto err_req;
				1849	}
				1850
				1851	req->rq_svc_thread = thread;
				1852
				1853	ptlrpc_at_add_timed(req);
				1854
				1855	/* Move it over to the request processing queue */
				1856	rc = ptlrpc_server_request_add(svcpt, req);
				1857	if (rc)
Julia Lawall	a9b3e8f	2014-09-07 18:18:29 +0200	[diff] [blame]	1858	goto err_req;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1859
				1860	wake_up(&svcpt->scp_waitq);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1861	return 1;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1862
				1863	err_req:
				1864	ptlrpc_server_finish_request(svcpt, req);
				1865
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1866	return 1;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1867	}
				1868
				1869	/**
				1870	* Main incoming request handling logic.
				1871	* Calls handler function from service to do actual processing.
				1872	*/
				1873	static int
				1874	ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
				1875	struct ptlrpc_thread *thread)
				1876	{
				1877	struct ptlrpc_service *svc = svcpt->scp_service;
				1878	struct ptlrpc_request *request;
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	1879	struct timeval work_start;
				1880	struct timeval work_end;
				1881	long timediff;
				1882	int rc;
				1883	int fail_opc = 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1884
				1885	request = ptlrpc_server_request_get(svcpt, false);
				1886	if (request == NULL)
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	1887	return 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1888
				1889	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT))
				1890	fail_opc = OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT;
				1891	else if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT))
				1892	fail_opc = OBD_FAIL_PTLRPC_HPREQ_TIMEOUT;
				1893
				1894	if (unlikely(fail_opc)) {
				1895	if (request->rq_export && request->rq_ops)
				1896	OBD_FAIL_TIMEOUT(fail_opc, 4);
				1897	}
				1898
				1899	ptlrpc_rqphase_move(request, RQ_PHASE_INTERPRET);
				1900
Kristina Martsenko	3949015e	2013-11-11 21:34:58 +0200	[diff] [blame]	1901	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DUMP_LOG))
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1902	libcfs_debug_dumplog();
				1903
				1904	do_gettimeofday(&work_start);
Greg Donald	1d8cb70	2014-08-25 20:07:19 -0500	[diff] [blame]	1905	timediff = cfs_timeval_sub(&work_start, &request->rq_arrival_time,
				1906	NULL);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1907	if (likely(svc->srv_stats != NULL)) {
				1908	lprocfs_counter_add(svc->srv_stats, PTLRPC_REQWAIT_CNTR,
				1909	timediff);
				1910	lprocfs_counter_add(svc->srv_stats, PTLRPC_REQQDEPTH_CNTR,
				1911	svcpt->scp_nreqs_incoming);
				1912	lprocfs_counter_add(svc->srv_stats, PTLRPC_REQACTIVE_CNTR,
				1913	svcpt->scp_nreqs_active);
				1914	lprocfs_counter_add(svc->srv_stats, PTLRPC_TIMEOUT,
				1915	at_get(&svcpt->scp_at_estimate));
				1916	}
				1917
				1918	rc = lu_context_init(&request->rq_session, LCT_SESSION \| LCT_NOREF);
				1919	if (rc) {
				1920	CERROR("Failure to initialize session: %d\n", rc);
				1921	goto out_req;
				1922	}
				1923	request->rq_session.lc_thread = thread;
				1924	request->rq_session.lc_cookie = 0x5;
				1925	lu_context_enter(&request->rq_session);
				1926
Greg Kroah-Hartman	b0f5aad	2014-07-12 20:06:04 -0700	[diff] [blame]	1927	CDEBUG(D_NET, "got req %llu\n", request->rq_xid);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1928
				1929	request->rq_svc_thread = thread;
				1930	if (thread)
				1931	request->rq_svc_thread->t_env->le_ses = &request->rq_session;
				1932
				1933	if (likely(request->rq_export)) {
				1934	if (unlikely(ptlrpc_check_req(request)))
				1935	goto put_conn;
				1936	ptlrpc_update_export_timer(request->rq_export, timediff >> 19);
				1937	}
				1938
				1939	/* Discard requests queued for longer than the deadline.
				1940	The deadline is increased if we send an early reply. */
Greg Kroah-Hartman	7264b8a	2014-07-12 00:38:48 -0700	[diff] [blame]	1941	if (get_seconds() > request->rq_deadline) {
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	1942	DEBUG_REQ(D_ERROR, request, "Dropping timed-out request from %s: deadline " CFS_DURATION_T ":" CFS_DURATION_T "s ago\n",
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1943	libcfs_id2str(request->rq_peer),
				1944	cfs_time_sub(request->rq_deadline,
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	1945	request->rq_arrival_time.tv_sec),
Greg Kroah-Hartman	7264b8a	2014-07-12 00:38:48 -0700	[diff] [blame]	1946	cfs_time_sub(get_seconds(),
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	1947	request->rq_deadline));
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1948	goto put_conn;
				1949	}
				1950
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	1951	CDEBUG(D_RPCTRACE, "Handling RPC pname:cluuid+ref:pid:xid:nid:opc %s:%s+%d:%d:x%llu:%s:%d\n",
				1952	current_comm(),
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1953	(request->rq_export ?
				1954	(char *)request->rq_export->exp_client_uuid.uuid : "0"),
				1955	(request->rq_export ?
				1956	atomic_read(&request->rq_export->exp_refcount) : -99),
				1957	lustre_msg_get_status(request->rq_reqmsg), request->rq_xid,
				1958	libcfs_id2str(request->rq_peer),
				1959	lustre_msg_get_opc(request->rq_reqmsg));
				1960
				1961	if (lustre_msg_get_opc(request->rq_reqmsg) != OBD_PING)
				1962	CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_PAUSE_REQ, cfs_fail_val);
				1963
				1964	rc = svc->srv_ops.so_req_handler(request);
				1965
				1966	ptlrpc_rqphase_move(request, RQ_PHASE_COMPLETE);
				1967
				1968	put_conn:
				1969	lu_context_exit(&request->rq_session);
				1970	lu_context_fini(&request->rq_session);
				1971
Greg Kroah-Hartman	7264b8a	2014-07-12 00:38:48 -0700	[diff] [blame]	1972	if (unlikely(get_seconds() > request->rq_deadline)) {
Kristina Martsenko	532118c	2013-11-11 21:35:03 +0200	[diff] [blame]	1973	DEBUG_REQ(D_WARNING, request,
				1974	"Request took longer than estimated ("
				1975	CFS_DURATION_T":"CFS_DURATION_T
				1976	"s); client may timeout.",
				1977	cfs_time_sub(request->rq_deadline,
				1978	request->rq_arrival_time.tv_sec),
Greg Kroah-Hartman	7264b8a	2014-07-12 00:38:48 -0700	[diff] [blame]	1979	cfs_time_sub(get_seconds(),
Kristina Martsenko	532118c	2013-11-11 21:35:03 +0200	[diff] [blame]	1980	request->rq_deadline));
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	1981	}
				1982
				1983	do_gettimeofday(&work_end);
				1984	timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	1985	CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:nid:opc %s:%s+%d:%d:x%llu:%s:%d Request processed in %ldus (%ldus total) trans %llu rc %d/%d\n",
				1986	current_comm(),
				1987	(request->rq_export ?
				1988	(char *)request->rq_export->exp_client_uuid.uuid : "0"),
				1989	(request->rq_export ?
				1990	atomic_read(&request->rq_export->exp_refcount) : -99),
				1991	lustre_msg_get_status(request->rq_reqmsg),
				1992	request->rq_xid,
				1993	libcfs_id2str(request->rq_peer),
				1994	lustre_msg_get_opc(request->rq_reqmsg),
				1995	timediff,
				1996	cfs_timeval_sub(&work_end, &request->rq_arrival_time, NULL),
				1997	(request->rq_repmsg ?
				1998	lustre_msg_get_transno(request->rq_repmsg) :
				1999	request->rq_transno),
				2000	request->rq_status,
				2001	(request->rq_repmsg ?
				2002	lustre_msg_get_status(request->rq_repmsg) : -999));
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2003	if (likely(svc->srv_stats != NULL && request->rq_reqmsg != NULL)) {
				2004	__u32 op = lustre_msg_get_opc(request->rq_reqmsg);
				2005	int opc = opcode_offset(op);
				2006	if (opc > 0 && !(op == LDLM_ENQUEUE \|\| op == MDS_REINT)) {
				2007	LASSERT(opc < LUSTRE_MAX_OPCODES);
				2008	lprocfs_counter_add(svc->srv_stats,
				2009	opc + EXTRA_MAX_OPCODES,
				2010	timediff);
				2011	}
				2012	}
				2013	if (unlikely(request->rq_early_count)) {
				2014	DEBUG_REQ(D_ADAPTTO, request,
				2015	"sent %d early replies before finishing in "
				2016	CFS_DURATION_T"s",
				2017	request->rq_early_count,
				2018	cfs_time_sub(work_end.tv_sec,
				2019	request->rq_arrival_time.tv_sec));
				2020	}
				2021
				2022	out_req:
				2023	ptlrpc_server_finish_active_request(svcpt, request);
				2024
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	2025	return 1;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2026	}
				2027
				2028	/**
				2029	* An internal function to process a single reply state object.
				2030	*/
				2031	static int
				2032	ptlrpc_handle_rs(struct ptlrpc_reply_state *rs)
				2033	{
				2034	struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	2035	struct ptlrpc_service *svc = svcpt->scp_service;
				2036	struct obd_export *exp;
				2037	int nlocks;
				2038	int been_handled;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2039
				2040	exp = rs->rs_export;
				2041
Kristina Martsenko	3949015e	2013-11-11 21:34:58 +0200	[diff] [blame]	2042	LASSERT(rs->rs_difficult);
				2043	LASSERT(rs->rs_scheduled);
				2044	LASSERT(list_empty(&rs->rs_list));
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2045
				2046	spin_lock(&exp->exp_lock);
				2047	/* Noop if removed already */
Kristina Martsenko	3949015e	2013-11-11 21:34:58 +0200	[diff] [blame]	2048	list_del_init(&rs->rs_exp_list);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2049	spin_unlock(&exp->exp_lock);
				2050
				2051	/* The disk commit callback holds exp_uncommitted_replies_lock while it
				2052	* iterates over newly committed replies, removing them from
				2053	* exp_uncommitted_replies. It then drops this lock and schedules the
				2054	* replies it found for handling here.
				2055	*
				2056	* We can avoid contention for exp_uncommitted_replies_lock between the
				2057	* HRT threads and further commit callbacks by checking rs_committed
				2058	* which is set in the commit callback while it holds both
				2059	* rs_lock and exp_uncommitted_reples.
				2060	*
				2061	* If we see rs_committed clear, the commit callback _may_ not have
				2062	* handled this reply yet and we race with it to grab
				2063	* exp_uncommitted_replies_lock before removing the reply from
				2064	* exp_uncommitted_replies. Note that if we lose the race and the
				2065	* reply has already been removed, list_del_init() is a noop.
				2066	*
				2067	* If we see rs_committed set, we know the commit callback is handling,
				2068	* or has handled this reply since store reordering might allow us to
				2069	* see rs_committed set out of sequence. But since this is done
				2070	* holding rs_lock, we can be sure it has all completed once we hold
				2071	* rs_lock, which we do right next.
				2072	*/
				2073	if (!rs->rs_committed) {
				2074	spin_lock(&exp->exp_uncommitted_replies_lock);
				2075	list_del_init(&rs->rs_obd_list);
				2076	spin_unlock(&exp->exp_uncommitted_replies_lock);
				2077	}
				2078
				2079	spin_lock(&rs->rs_lock);
				2080
				2081	been_handled = rs->rs_handled;
				2082	rs->rs_handled = 1;
				2083
				2084	nlocks = rs->rs_nlocks; /* atomic "steal", but */
				2085	rs->rs_nlocks = 0; /* locks still on rs_locks! */
				2086
				2087	if (nlocks == 0 && !been_handled) {
				2088	/* If we see this, we should already have seen the warning
				2089	* in mds_steal_ack_locks() */
Greg Kroah-Hartman	f537dd2	2014-07-12 18:41:09 -0700	[diff] [blame]	2090	CDEBUG(D_HA, "All locks stolen from rs %p x%lld.t%lld o%d NID %s\n",
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2091	rs,
				2092	rs->rs_xid, rs->rs_transno, rs->rs_opc,
				2093	libcfs_nid2str(exp->exp_connection->c_peer.nid));
				2094	}
				2095
				2096	if ((!been_handled && rs->rs_on_net) \|\| nlocks > 0) {
				2097	spin_unlock(&rs->rs_lock);
				2098
				2099	if (!been_handled && rs->rs_on_net) {
				2100	LNetMDUnlink(rs->rs_md_h);
				2101	/* Ignore return code; we're racing with completion */
				2102	}
				2103
				2104	while (nlocks-- > 0)
				2105	ldlm_lock_decref(&rs->rs_locks[nlocks],
				2106	rs->rs_modes[nlocks]);
				2107
				2108	spin_lock(&rs->rs_lock);
				2109	}
				2110
				2111	rs->rs_scheduled = 0;
				2112
				2113	if (!rs->rs_on_net) {
				2114	/* Off the net */
				2115	spin_unlock(&rs->rs_lock);
				2116
Kristina Martsenko	3949015e	2013-11-11 21:34:58 +0200	[diff] [blame]	2117	class_export_put(exp);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2118	rs->rs_export = NULL;
Kristina Martsenko	3949015e	2013-11-11 21:34:58 +0200	[diff] [blame]	2119	ptlrpc_rs_decref(rs);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2120	if (atomic_dec_and_test(&svcpt->scp_nreps_difficult) &&
				2121	svc->srv_is_stopping)
				2122	wake_up_all(&svcpt->scp_waitq);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	2123	return 1;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2124	}
				2125
				2126	/* still on the net; callback will schedule */
				2127	spin_unlock(&rs->rs_lock);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	2128	return 1;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2129	}
				2130
				2131
				2132	static void
				2133	ptlrpc_check_rqbd_pool(struct ptlrpc_service_part *svcpt)
				2134	{
				2135	int avail = svcpt->scp_nrqbds_posted;
				2136	int low_water = test_req_buffer_pressure ? 0 :
				2137	svcpt->scp_service->srv_nbuf_per_group / 2;
				2138
				2139	/* NB I'm not locking; just looking. */
				2140
				2141	/* CAVEAT EMPTOR: We might be allocating buffers here because we've
				2142	* allowed the request history to grow out of control. We could put a
				2143	* sanity check on that here and cull some history if we need the
				2144	* space. */
				2145
				2146	if (avail <= low_water)
				2147	ptlrpc_grow_req_bufs(svcpt, 1);
				2148
				2149	if (svcpt->scp_service->srv_stats) {
				2150	lprocfs_counter_add(svcpt->scp_service->srv_stats,
				2151	PTLRPC_REQBUF_AVAIL_CNTR, avail);
				2152	}
				2153	}
				2154
				2155	static int
				2156	ptlrpc_retry_rqbds(void *arg)
				2157	{
				2158	struct ptlrpc_service_part svcpt = (struct ptlrpc_service_part )arg;
				2159
				2160	svcpt->scp_rqbd_timeout = 0;
				2161	return -ETIMEDOUT;
				2162	}
				2163
				2164	static inline int
				2165	ptlrpc_threads_enough(struct ptlrpc_service_part *svcpt)
				2166	{
				2167	return svcpt->scp_nreqs_active <
				2168	svcpt->scp_nthrs_running - 1 -
				2169	(svcpt->scp_service->srv_ops.so_hpreq_handler != NULL);
				2170	}
				2171
				2172	/**
				2173	* allowed to create more threads
				2174	* user can call it w/o any lock but need to hold
				2175	* ptlrpc_service_part::scp_lock to get reliable result
				2176	*/
				2177	static inline int
				2178	ptlrpc_threads_increasable(struct ptlrpc_service_part *svcpt)
				2179	{
				2180	return svcpt->scp_nthrs_running +
				2181	svcpt->scp_nthrs_starting <
				2182	svcpt->scp_service->srv_nthrs_cpt_limit;
				2183	}
				2184
				2185	/**
				2186	* too many requests and allowed to create more threads
				2187	*/
				2188	static inline int
				2189	ptlrpc_threads_need_create(struct ptlrpc_service_part *svcpt)
				2190	{
				2191	return !ptlrpc_threads_enough(svcpt) &&
				2192	ptlrpc_threads_increasable(svcpt);
				2193	}
				2194
				2195	static inline int
				2196	ptlrpc_thread_stopping(struct ptlrpc_thread *thread)
				2197	{
				2198	return thread_is_stopping(thread) \|\|
				2199	thread->t_svcpt->scp_service->srv_is_stopping;
				2200	}
				2201
				2202	static inline int
				2203	ptlrpc_rqbd_pending(struct ptlrpc_service_part *svcpt)
				2204	{
				2205	return !list_empty(&svcpt->scp_rqbd_idle) &&
				2206	svcpt->scp_rqbd_timeout == 0;
				2207	}
				2208
				2209	static inline int
				2210	ptlrpc_at_check(struct ptlrpc_service_part *svcpt)
				2211	{
				2212	return svcpt->scp_at_check;
				2213	}
				2214
				2215	/**
				2216	* requests wait on preprocessing
				2217	* user can call it w/o any lock but need to hold
				2218	* ptlrpc_service_part::scp_lock to get reliable result
				2219	*/
				2220	static inline int
				2221	ptlrpc_server_request_incoming(struct ptlrpc_service_part *svcpt)
				2222	{
				2223	return !list_empty(&svcpt->scp_req_incoming);
				2224	}
				2225
				2226	static __attribute__((__noinline__)) int
				2227	ptlrpc_wait_event(struct ptlrpc_service_part *svcpt,
				2228	struct ptlrpc_thread *thread)
				2229	{
				2230	/* Don't exit while there are replies to be handled */
				2231	struct l_wait_info lwi = LWI_TIMEOUT(svcpt->scp_rqbd_timeout,
				2232	ptlrpc_retry_rqbds, svcpt);
				2233
Peng Tao	5d4450c	2013-07-15 22:27:15 +0800	[diff] [blame]	2234	/* XXX: Add this back when libcfs watchdog is merged upstream
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2235	lc_watchdog_disable(thread->t_watchdog);
Peng Tao	5d4450c	2013-07-15 22:27:15 +0800	[diff] [blame]	2236	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2237
				2238	cond_resched();
				2239
				2240	l_wait_event_exclusive_head(svcpt->scp_waitq,
				2241	ptlrpc_thread_stopping(thread) \|\|
				2242	ptlrpc_server_request_incoming(svcpt) \|\|
				2243	ptlrpc_server_request_pending(svcpt, false) \|\|
				2244	ptlrpc_rqbd_pending(svcpt) \|\|
				2245	ptlrpc_at_check(svcpt), &lwi);
				2246
				2247	if (ptlrpc_thread_stopping(thread))
				2248	return -EINTR;
				2249
Peng Tao	5d4450c	2013-07-15 22:27:15 +0800	[diff] [blame]	2250	/*
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2251	lc_watchdog_touch(thread->t_watchdog,
				2252	ptlrpc_server_get_timeout(svcpt));
Peng Tao	5d4450c	2013-07-15 22:27:15 +0800	[diff] [blame]	2253	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2254	return 0;
				2255	}
				2256
				2257	/**
				2258	* Main thread body for service threads.
				2259	* Waits in a loop waiting for new requests to process to appear.
				2260	* Every time an incoming requests is added to its queue, a waitq
				2261	* is woken up and one of the threads will handle it.
				2262	*/
				2263	static int ptlrpc_main(void *arg)
				2264	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	2265	struct ptlrpc_thread thread = (struct ptlrpc_thread )arg;
				2266	struct ptlrpc_service_part *svcpt = thread->t_svcpt;
				2267	struct ptlrpc_service *svc = svcpt->scp_service;
				2268	struct ptlrpc_reply_state *rs;
Greg Kroah-Hartman	c88a6cb	2013-08-04 07:59:19 +0800	[diff] [blame]	2269	struct group_info *ginfo = NULL;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2270	struct lu_env *env;
				2271	int counter = 0, rc = 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2272
				2273	thread->t_pid = current_pid();
				2274	unshare_fs_struct();
				2275
				2276	/* NB: we will call cfs_cpt_bind() for all threads, because we
				2277	* might want to run lustre server only on a subset of system CPUs,
				2278	* in that case ->scp_cpt is CFS_CPT_ANY */
				2279	rc = cfs_cpt_bind(svc->srv_cptable, svcpt->scp_cpt);
				2280	if (rc != 0) {
				2281	CWARN("%s: failed to bind %s on CPT %d\n",
				2282	svc->srv_name, thread->t_name, svcpt->scp_cpt);
				2283	}
				2284
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2285	ginfo = groups_alloc(0);
				2286	if (!ginfo) {
				2287	rc = -ENOMEM;
				2288	goto out;
				2289	}
				2290
				2291	set_current_groups(ginfo);
				2292	put_group_info(ginfo);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2293
				2294	if (svc->srv_ops.so_thr_init != NULL) {
				2295	rc = svc->srv_ops.so_thr_init(thread);
				2296	if (rc)
				2297	goto out;
				2298	}
				2299
Julia Lawall	9ae1059	2015-05-01 17:51:12 +0200	[diff] [blame]	2300	env = kzalloc(sizeof(*env), GFP_NOFS);
Julia Lawall	597851a	2015-06-20 18:59:10 +0200	[diff] [blame]	2301	if (!env) {
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2302	rc = -ENOMEM;
				2303	goto out_srv_fini;
				2304	}
				2305
				2306	rc = lu_context_init(&env->le_ctx,
				2307	svc->srv_ctx_tags\|LCT_REMEMBER\|LCT_NOREF);
				2308	if (rc)
				2309	goto out_srv_fini;
				2310
				2311	thread->t_env = env;
				2312	env->le_ctx.lc_thread = thread;
				2313	env->le_ctx.lc_cookie = 0x6;
				2314
				2315	while (!list_empty(&svcpt->scp_rqbd_idle)) {
				2316	rc = ptlrpc_server_post_idle_rqbds(svcpt);
				2317	if (rc >= 0)
				2318	continue;
				2319
				2320	CERROR("Failed to post rqbd for %s on CPT %d: %d\n",
				2321	svc->srv_name, svcpt->scp_cpt, rc);
				2322	goto out_srv_fini;
				2323	}
				2324
				2325	/* Alloc reply state structure for this one */
Julia Lawall	ee0ec19	2015-06-11 14:02:58 +0200	[diff] [blame]	2326	rs = libcfs_kvzalloc(svc->srv_max_reply_size, GFP_NOFS);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2327	if (!rs) {
				2328	rc = -ENOMEM;
				2329	goto out_srv_fini;
				2330	}
				2331
				2332	spin_lock(&svcpt->scp_lock);
				2333
				2334	LASSERT(thread_is_starting(thread));
				2335	thread_clear_flags(thread, SVC_STARTING);
				2336
				2337	LASSERT(svcpt->scp_nthrs_starting == 1);
				2338	svcpt->scp_nthrs_starting--;
				2339
				2340	/* SVC_STOPPING may already be set here if someone else is trying
				2341	* to stop the service while this new thread has been dynamically
				2342	* forked. We still set SVC_RUNNING to let our creator know that
				2343	* we are now running, however we will exit as soon as possible */
				2344	thread_add_flags(thread, SVC_RUNNING);
				2345	svcpt->scp_nthrs_running++;
				2346	spin_unlock(&svcpt->scp_lock);
				2347
				2348	/* wake up our creator in case he's still waiting. */
				2349	wake_up(&thread->t_ctl_waitq);
				2350
Peng Tao	5d4450c	2013-07-15 22:27:15 +0800	[diff] [blame]	2351	/*
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2352	thread->t_watchdog = lc_watchdog_add(ptlrpc_server_get_timeout(svcpt),
				2353	NULL, NULL);
Peng Tao	5d4450c	2013-07-15 22:27:15 +0800	[diff] [blame]	2354	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2355
				2356	spin_lock(&svcpt->scp_rep_lock);
				2357	list_add(&rs->rs_list, &svcpt->scp_rep_idle);
				2358	wake_up(&svcpt->scp_rep_waitq);
				2359	spin_unlock(&svcpt->scp_rep_lock);
				2360
				2361	CDEBUG(D_NET, "service thread %d (#%d) started\n", thread->t_id,
				2362	svcpt->scp_nthrs_running);
				2363
				2364	/* XXX maintain a list of all managed devices: insert here */
				2365	while (!ptlrpc_thread_stopping(thread)) {
				2366	if (ptlrpc_wait_event(svcpt, thread))
				2367	break;
				2368
				2369	ptlrpc_check_rqbd_pool(svcpt);
				2370
				2371	if (ptlrpc_threads_need_create(svcpt)) {
				2372	/* Ignore return code - we tried... */
				2373	ptlrpc_start_thread(svcpt, 0);
				2374	}
				2375
				2376	/* Process all incoming reqs before handling any */
				2377	if (ptlrpc_server_request_incoming(svcpt)) {
				2378	lu_context_enter(&env->le_ctx);
wang di	4ee688d	2013-06-03 21:40:39 +0800	[diff] [blame]	2379	env->le_ses = NULL;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2380	ptlrpc_server_handle_req_in(svcpt, thread);
				2381	lu_context_exit(&env->le_ctx);
				2382
				2383	/* but limit ourselves in case of flood */
				2384	if (counter++ < 100)
				2385	continue;
				2386	counter = 0;
				2387	}
				2388
				2389	if (ptlrpc_at_check(svcpt))
				2390	ptlrpc_at_check_timed(svcpt);
				2391
				2392	if (ptlrpc_server_request_pending(svcpt, false)) {
				2393	lu_context_enter(&env->le_ctx);
				2394	ptlrpc_server_handle_request(svcpt, thread);
				2395	lu_context_exit(&env->le_ctx);
				2396	}
				2397
				2398	if (ptlrpc_rqbd_pending(svcpt) &&
				2399	ptlrpc_server_post_idle_rqbds(svcpt) < 0) {
				2400	/* I just failed to repost request buffers.
				2401	* Wait for a timeout (unless something else
				2402	* happens) before I try again */
				2403	svcpt->scp_rqbd_timeout = cfs_time_seconds(1) / 10;
				2404	CDEBUG(D_RPCTRACE, "Posted buffers: %d\n",
				2405	svcpt->scp_nrqbds_posted);
				2406	}
				2407	}
				2408
Peng Tao	5d4450c	2013-07-15 22:27:15 +0800	[diff] [blame]	2409	/*
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2410	lc_watchdog_delete(thread->t_watchdog);
				2411	thread->t_watchdog = NULL;
Peng Tao	5d4450c	2013-07-15 22:27:15 +0800	[diff] [blame]	2412	*/
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2413
				2414	out_srv_fini:
				2415	/*
				2416	* deconstruct service specific state created by ptlrpc_start_thread()
				2417	*/
				2418	if (svc->srv_ops.so_thr_done != NULL)
				2419	svc->srv_ops.so_thr_done(thread);
				2420
				2421	if (env != NULL) {
				2422	lu_context_fini(&env->le_ctx);
Julia Lawall	9ae1059	2015-05-01 17:51:12 +0200	[diff] [blame]	2423	kfree(env);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2424	}
				2425	out:
				2426	CDEBUG(D_RPCTRACE, "service thread [ %p : %u ] %d exiting: rc %d\n",
				2427	thread, thread->t_pid, thread->t_id, rc);
				2428
				2429	spin_lock(&svcpt->scp_lock);
				2430	if (thread_test_and_clear_flags(thread, SVC_STARTING))
				2431	svcpt->scp_nthrs_starting--;
				2432
				2433	if (thread_test_and_clear_flags(thread, SVC_RUNNING)) {
				2434	/* must know immediately */
				2435	svcpt->scp_nthrs_running--;
				2436	}
				2437
				2438	thread->t_id = rc;
				2439	thread_add_flags(thread, SVC_STOPPED);
				2440
				2441	wake_up(&thread->t_ctl_waitq);
				2442	spin_unlock(&svcpt->scp_lock);
				2443
				2444	return rc;
				2445	}
				2446
				2447	static int hrt_dont_sleep(struct ptlrpc_hr_thread *hrt,
				2448	struct list_head *replies)
				2449	{
				2450	int result;
				2451
				2452	spin_lock(&hrt->hrt_lock);
				2453
				2454	list_splice_init(&hrt->hrt_queue, replies);
				2455	result = ptlrpc_hr.hr_stopping \|\| !list_empty(replies);
				2456
				2457	spin_unlock(&hrt->hrt_lock);
				2458	return result;
				2459	}
				2460
				2461	/**
				2462	* Main body of "handle reply" function.
				2463	* It processes acked reply states
				2464	*/
				2465	static int ptlrpc_hr_main(void *arg)
				2466	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	2467	struct ptlrpc_hr_thread hrt = (struct ptlrpc_hr_thread )arg;
				2468	struct ptlrpc_hr_partition *hrp = hrt->hrt_partition;
				2469	LIST_HEAD (replies);
				2470	char threadname[20];
				2471	int rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2472
				2473	snprintf(threadname, sizeof(threadname), "ptlrpc_hr%02d_%03d",
				2474	hrp->hrp_cpt, hrt->hrt_id);
				2475	unshare_fs_struct();
				2476
				2477	rc = cfs_cpt_bind(ptlrpc_hr.hr_cpt_table, hrp->hrp_cpt);
				2478	if (rc != 0) {
				2479	CWARN("Failed to bind %s on CPT %d of CPT table %p: rc = %d\n",
				2480	threadname, hrp->hrp_cpt, ptlrpc_hr.hr_cpt_table, rc);
				2481	}
				2482
				2483	atomic_inc(&hrp->hrp_nstarted);
				2484	wake_up(&ptlrpc_hr.hr_waitq);
				2485
				2486	while (!ptlrpc_hr.hr_stopping) {
				2487	l_wait_condition(hrt->hrt_waitq, hrt_dont_sleep(hrt, &replies));
				2488
				2489	while (!list_empty(&replies)) {
				2490	struct ptlrpc_reply_state *rs;
				2491
				2492	rs = list_entry(replies.prev,
				2493	struct ptlrpc_reply_state,
				2494	rs_list);
				2495	list_del_init(&rs->rs_list);
				2496	ptlrpc_handle_rs(rs);
				2497	}
				2498	}
				2499
				2500	atomic_inc(&hrp->hrp_nstopped);
				2501	wake_up(&ptlrpc_hr.hr_waitq);
				2502
				2503	return 0;
				2504	}
				2505
				2506	static void ptlrpc_stop_hr_threads(void)
				2507	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	2508	struct ptlrpc_hr_partition *hrp;
				2509	int i;
				2510	int j;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2511
				2512	ptlrpc_hr.hr_stopping = 1;
				2513
				2514	cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
				2515	if (hrp->hrp_thrs == NULL)
				2516	continue; /* uninitialized */
				2517	for (j = 0; j < hrp->hrp_nthrs; j++)
				2518	wake_up_all(&hrp->hrp_thrs[j].hrt_waitq);
				2519	}
				2520
				2521	cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
				2522	if (hrp->hrp_thrs == NULL)
				2523	continue; /* uninitialized */
				2524	wait_event(ptlrpc_hr.hr_waitq,
				2525	atomic_read(&hrp->hrp_nstopped) ==
				2526	atomic_read(&hrp->hrp_nstarted));
				2527	}
				2528	}
				2529
				2530	static int ptlrpc_start_hr_threads(void)
				2531	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	2532	struct ptlrpc_hr_partition *hrp;
				2533	int i;
				2534	int j;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2535
				2536	cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	2537	int rc = 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2538
				2539	for (j = 0; j < hrp->hrp_nthrs; j++) {
				2540	struct ptlrpc_hr_thread *hrt = &hrp->hrp_thrs[j];
				2541	rc = PTR_ERR(kthread_run(ptlrpc_hr_main,
				2542	&hrp->hrp_thrs[j],
				2543	"ptlrpc_hr%02d_%03d",
				2544	hrp->hrp_cpt,
				2545	hrt->hrt_id));
				2546	if (IS_ERR_VALUE(rc))
				2547	break;
				2548	}
				2549	wait_event(ptlrpc_hr.hr_waitq,
				2550	atomic_read(&hrp->hrp_nstarted) == j);
				2551	if (!IS_ERR_VALUE(rc))
				2552	continue;
				2553
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	2554	CERROR("Reply handling thread %d:%d Failed on starting: rc = %d\n",
				2555	i, j, rc);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2556	ptlrpc_stop_hr_threads();
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	2557	return rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2558	}
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	2559	return 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2560	}
				2561
				2562	static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt)
				2563	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	2564	struct l_wait_info lwi = { 0 };
				2565	struct ptlrpc_thread *thread;
				2566	LIST_HEAD (zombie);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2567
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2568	CDEBUG(D_INFO, "Stopping threads for service %s\n",
				2569	svcpt->scp_service->srv_name);
				2570
				2571	spin_lock(&svcpt->scp_lock);
				2572	/* let the thread know that we would like it to stop asap */
				2573	list_for_each_entry(thread, &svcpt->scp_threads, t_link) {
				2574	CDEBUG(D_INFO, "Stopping thread %s #%u\n",
				2575	svcpt->scp_service->srv_thread_name, thread->t_id);
				2576	thread_add_flags(thread, SVC_STOPPING);
				2577	}
				2578
				2579	wake_up_all(&svcpt->scp_waitq);
				2580
				2581	while (!list_empty(&svcpt->scp_threads)) {
				2582	thread = list_entry(svcpt->scp_threads.next,
				2583	struct ptlrpc_thread, t_link);
				2584	if (thread_is_stopped(thread)) {
				2585	list_del(&thread->t_link);
				2586	list_add(&thread->t_link, &zombie);
				2587	continue;
				2588	}
				2589	spin_unlock(&svcpt->scp_lock);
				2590
				2591	CDEBUG(D_INFO, "waiting for stopping-thread %s #%u\n",
				2592	svcpt->scp_service->srv_thread_name, thread->t_id);
				2593	l_wait_event(thread->t_ctl_waitq,
				2594	thread_is_stopped(thread), &lwi);
				2595
				2596	spin_lock(&svcpt->scp_lock);
				2597	}
				2598
				2599	spin_unlock(&svcpt->scp_lock);
				2600
				2601	while (!list_empty(&zombie)) {
				2602	thread = list_entry(zombie.next,
				2603	struct ptlrpc_thread, t_link);
				2604	list_del(&thread->t_link);
Julia Lawall	9ae1059	2015-05-01 17:51:12 +0200	[diff] [blame]	2605	kfree(thread);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2606	}
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2607	}
				2608
				2609	/**
				2610	* Stops all threads of a particular service \a svc
				2611	*/
				2612	void ptlrpc_stop_all_threads(struct ptlrpc_service *svc)
				2613	{
				2614	struct ptlrpc_service_part *svcpt;
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	2615	int i;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2616
				2617	ptlrpc_service_for_each_part(svcpt, i, svc) {
				2618	if (svcpt->scp_service != NULL)
				2619	ptlrpc_svcpt_stop_threads(svcpt);
				2620	}
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2621	}
				2622	EXPORT_SYMBOL(ptlrpc_stop_all_threads);
				2623
				2624	int ptlrpc_start_threads(struct ptlrpc_service *svc)
				2625	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	2626	int rc = 0;
				2627	int i;
				2628	int j;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2629
				2630	/* We require 2 threads min, see note in ptlrpc_server_handle_request */
				2631	LASSERT(svc->srv_nthrs_cpt_init >= PTLRPC_NTHRS_INIT);
				2632
				2633	for (i = 0; i < svc->srv_ncpts; i++) {
				2634	for (j = 0; j < svc->srv_nthrs_cpt_init; j++) {
				2635	rc = ptlrpc_start_thread(svc->srv_parts[i], 1);
				2636	if (rc == 0)
				2637	continue;
				2638
				2639	if (rc != -EMFILE)
				2640	goto failed;
				2641	/* We have enough threads, don't start more. b=15759 */
				2642	break;
				2643	}
				2644	}
				2645
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	2646	return 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2647	failed:
				2648	CERROR("cannot start %s thread #%d_%d: rc %d\n",
				2649	svc->srv_thread_name, i, j, rc);
				2650	ptlrpc_stop_all_threads(svc);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	2651	return rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2652	}
				2653	EXPORT_SYMBOL(ptlrpc_start_threads);
				2654
				2655	int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait)
				2656	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	2657	struct l_wait_info lwi = { 0 };
				2658	struct ptlrpc_thread *thread;
				2659	struct ptlrpc_service *svc;
				2660	int rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2661
				2662	LASSERT(svcpt != NULL);
				2663
				2664	svc = svcpt->scp_service;
				2665
				2666	CDEBUG(D_RPCTRACE, "%s[%d] started %d min %d max %d\n",
				2667	svc->srv_name, svcpt->scp_cpt, svcpt->scp_nthrs_running,
				2668	svc->srv_nthrs_cpt_init, svc->srv_nthrs_cpt_limit);
				2669
				2670	again:
				2671	if (unlikely(svc->srv_is_stopping))
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	2672	return -ESRCH;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2673
				2674	if (!ptlrpc_threads_increasable(svcpt) \|\|
				2675	(OBD_FAIL_CHECK(OBD_FAIL_TGT_TOOMANY_THREADS) &&
				2676	svcpt->scp_nthrs_running == svc->srv_nthrs_cpt_init - 1))
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	2677	return -EMFILE;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2678
Julia Lawall	bae97e8	2015-05-03 15:21:44 +0200	[diff] [blame]	2679	thread = kzalloc_node(sizeof(*thread), GFP_NOFS,
				2680	cfs_cpt_spread_node(svc->srv_cptable,
				2681	svcpt->scp_cpt));
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2682	if (thread == NULL)
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	2683	return -ENOMEM;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2684	init_waitqueue_head(&thread->t_ctl_waitq);
				2685
				2686	spin_lock(&svcpt->scp_lock);
				2687	if (!ptlrpc_threads_increasable(svcpt)) {
				2688	spin_unlock(&svcpt->scp_lock);
Julia Lawall	9ae1059	2015-05-01 17:51:12 +0200	[diff] [blame]	2689	kfree(thread);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	2690	return -EMFILE;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2691	}
				2692
				2693	if (svcpt->scp_nthrs_starting != 0) {
				2694	/* serialize starting because some modules (obdfilter)
				2695	* might require unique and contiguous t_id */
				2696	LASSERT(svcpt->scp_nthrs_starting == 1);
				2697	spin_unlock(&svcpt->scp_lock);
Julia Lawall	9ae1059	2015-05-01 17:51:12 +0200	[diff] [blame]	2698	kfree(thread);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2699	if (wait) {
				2700	CDEBUG(D_INFO, "Waiting for creating thread %s #%d\n",
				2701	svc->srv_thread_name, svcpt->scp_thr_nextid);
				2702	schedule();
				2703	goto again;
				2704	}
				2705
				2706	CDEBUG(D_INFO, "Creating thread %s #%d race, retry later\n",
				2707	svc->srv_thread_name, svcpt->scp_thr_nextid);
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	2708	return -EAGAIN;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2709	}
				2710
				2711	svcpt->scp_nthrs_starting++;
				2712	thread->t_id = svcpt->scp_thr_nextid++;
				2713	thread_add_flags(thread, SVC_STARTING);
				2714	thread->t_svcpt = svcpt;
				2715
				2716	list_add(&thread->t_link, &svcpt->scp_threads);
				2717	spin_unlock(&svcpt->scp_lock);
				2718
				2719	if (svcpt->scp_cpt >= 0) {
Kees Cook	9edf0f6	2013-09-10 21:37:19 -0700	[diff] [blame]	2720	snprintf(thread->t_name, sizeof(thread->t_name), "%s%02d_%03d",
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2721	svc->srv_thread_name, svcpt->scp_cpt, thread->t_id);
				2722	} else {
Kees Cook	9edf0f6	2013-09-10 21:37:19 -0700	[diff] [blame]	2723	snprintf(thread->t_name, sizeof(thread->t_name), "%s_%04d",
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2724	svc->srv_thread_name, thread->t_id);
				2725	}
				2726
				2727	CDEBUG(D_RPCTRACE, "starting thread '%s'\n", thread->t_name);
Kees Cook	9edf0f6	2013-09-10 21:37:19 -0700	[diff] [blame]	2728	rc = PTR_ERR(kthread_run(ptlrpc_main, thread, "%s", thread->t_name));
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2729	if (IS_ERR_VALUE(rc)) {
				2730	CERROR("cannot start thread '%s': rc %d\n",
				2731	thread->t_name, rc);
				2732	spin_lock(&svcpt->scp_lock);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2733	--svcpt->scp_nthrs_starting;
Hiroya Nozaki	5be8e07	2013-07-23 00:06:48 +0800	[diff] [blame]	2734	if (thread_is_stopping(thread)) {
Masanari Iida	369e5c9	2014-02-08 00:30:36 +0900	[diff] [blame]	2735	/* this ptlrpc_thread is being handled
Hiroya Nozaki	5be8e07	2013-07-23 00:06:48 +0800	[diff] [blame]	2736	* by ptlrpc_svcpt_stop_threads now
				2737	*/
				2738	thread_add_flags(thread, SVC_STOPPED);
				2739	wake_up(&thread->t_ctl_waitq);
				2740	spin_unlock(&svcpt->scp_lock);
				2741	} else {
				2742	list_del(&thread->t_link);
				2743	spin_unlock(&svcpt->scp_lock);
Julia Lawall	9ae1059	2015-05-01 17:51:12 +0200	[diff] [blame]	2744	kfree(thread);
Hiroya Nozaki	5be8e07	2013-07-23 00:06:48 +0800	[diff] [blame]	2745	}
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	2746	return rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2747	}
				2748
				2749	if (!wait)
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	2750	return 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2751
				2752	l_wait_event(thread->t_ctl_waitq,
				2753	thread_is_running(thread) \|\| thread_is_stopped(thread),
				2754	&lwi);
				2755
				2756	rc = thread_is_stopped(thread) ? thread->t_id : 0;
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	2757	return rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2758	}
				2759
				2760	int ptlrpc_hr_init(void)
				2761	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	2762	struct ptlrpc_hr_partition *hrp;
				2763	struct ptlrpc_hr_thread *hrt;
				2764	int rc;
				2765	int i;
				2766	int j;
				2767	int weight;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2768
				2769	memset(&ptlrpc_hr, 0, sizeof(ptlrpc_hr));
				2770	ptlrpc_hr.hr_cpt_table = cfs_cpt_table;
				2771
				2772	ptlrpc_hr.hr_partitions = cfs_percpt_alloc(ptlrpc_hr.hr_cpt_table,
				2773	sizeof(*hrp));
				2774	if (ptlrpc_hr.hr_partitions == NULL)
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	2775	return -ENOMEM;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2776
				2777	init_waitqueue_head(&ptlrpc_hr.hr_waitq);
				2778
Bartosz Golaszewski	06931e6	2015-05-26 15:11:28 +0200	[diff] [blame]	2779	weight = cpumask_weight(topology_sibling_cpumask(0));
Peng Tao	3867ea5	2013-07-15 22:27:10 +0800	[diff] [blame]	2780
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2781	cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
				2782	hrp->hrp_cpt = i;
				2783
				2784	atomic_set(&hrp->hrp_nstarted, 0);
				2785	atomic_set(&hrp->hrp_nstopped, 0);
				2786
				2787	hrp->hrp_nthrs = cfs_cpt_weight(ptlrpc_hr.hr_cpt_table, i);
Peng Tao	3867ea5	2013-07-15 22:27:10 +0800	[diff] [blame]	2788	hrp->hrp_nthrs /= weight;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2789
				2790	LASSERT(hrp->hrp_nthrs > 0);
Julia Lawall	bae97e8	2015-05-03 15:21:44 +0200	[diff] [blame]	2791	hrp->hrp_thrs =
				2792	kzalloc_node(hrp->hrp_nthrs * sizeof(*hrt), GFP_NOFS,
				2793	cfs_cpt_spread_node(ptlrpc_hr.hr_cpt_table,
				2794	i));
Julia Lawall	a9b3e8f	2014-09-07 18:18:29 +0200	[diff] [blame]	2795	if (hrp->hrp_thrs == NULL) {
				2796	rc = -ENOMEM;
				2797	goto out;
				2798	}
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2799
				2800	for (j = 0; j < hrp->hrp_nthrs; j++) {
				2801	hrt = &hrp->hrp_thrs[j];
				2802
				2803	hrt->hrt_id = j;
				2804	hrt->hrt_partition = hrp;
				2805	init_waitqueue_head(&hrt->hrt_waitq);
				2806	spin_lock_init(&hrt->hrt_lock);
				2807	INIT_LIST_HEAD(&hrt->hrt_queue);
				2808	}
				2809	}
				2810
				2811	rc = ptlrpc_start_hr_threads();
				2812	out:
				2813	if (rc != 0)
				2814	ptlrpc_hr_fini();
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	2815	return rc;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2816	}
				2817
				2818	void ptlrpc_hr_fini(void)
				2819	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	2820	struct ptlrpc_hr_partition *hrp;
				2821	int i;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2822
				2823	if (ptlrpc_hr.hr_partitions == NULL)
				2824	return;
				2825
				2826	ptlrpc_stop_hr_threads();
				2827
				2828	cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
Markus Elfring	a5cb888	2015-07-14 09:35:42 +0200	[diff] [blame]	2829	kfree(hrp->hrp_thrs);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2830	}
				2831
				2832	cfs_percpt_free(ptlrpc_hr.hr_partitions);
				2833	ptlrpc_hr.hr_partitions = NULL;
				2834	}
				2835
				2836
				2837	/**
				2838	* Wait until all already scheduled replies are processed.
				2839	*/
				2840	static void ptlrpc_wait_replies(struct ptlrpc_service_part *svcpt)
				2841	{
				2842	while (1) {
				2843	int rc;
				2844	struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(10),
				2845	NULL, NULL);
				2846
				2847	rc = l_wait_event(svcpt->scp_waitq,
				2848	atomic_read(&svcpt->scp_nreps_difficult) == 0, &lwi);
				2849	if (rc == 0)
				2850	break;
				2851	CWARN("Unexpectedly long timeout %s %p\n",
				2852	svcpt->scp_service->srv_name, svcpt->scp_service);
				2853	}
				2854	}
				2855
				2856	static void
				2857	ptlrpc_service_del_atimer(struct ptlrpc_service *svc)
				2858	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	2859	struct ptlrpc_service_part *svcpt;
				2860	int i;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2861
				2862	/* early disarm AT timer... */
				2863	ptlrpc_service_for_each_part(svcpt, i, svc) {
				2864	if (svcpt->scp_service != NULL)
				2865	cfs_timer_disarm(&svcpt->scp_at_timer);
				2866	}
				2867	}
				2868
				2869	static void
				2870	ptlrpc_service_unlink_rqbd(struct ptlrpc_service *svc)
				2871	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	2872	struct ptlrpc_service_part *svcpt;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2873	struct ptlrpc_request_buffer_desc *rqbd;
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	2874	struct l_wait_info lwi;
				2875	int rc;
				2876	int i;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2877
				2878	/* All history will be culled when the next request buffer is
				2879	* freed in ptlrpc_service_purge_all() */
				2880	svc->srv_hist_nrqbds_cpt_max = 0;
				2881
				2882	rc = LNetClearLazyPortal(svc->srv_req_portal);
				2883	LASSERT(rc == 0);
				2884
				2885	ptlrpc_service_for_each_part(svcpt, i, svc) {
				2886	if (svcpt->scp_service == NULL)
				2887	break;
				2888
				2889	/* Unlink all the request buffers. This forces a 'final'
				2890	* event with its 'unlink' flag set for each posted rqbd */
				2891	list_for_each_entry(rqbd, &svcpt->scp_rqbd_posted,
				2892	rqbd_list) {
				2893	rc = LNetMDUnlink(rqbd->rqbd_md_h);
				2894	LASSERT(rc == 0 \|\| rc == -ENOENT);
				2895	}
				2896	}
				2897
				2898	ptlrpc_service_for_each_part(svcpt, i, svc) {
				2899	if (svcpt->scp_service == NULL)
				2900	break;
				2901
				2902	/* Wait for the network to release any buffers
				2903	* it's currently filling */
				2904	spin_lock(&svcpt->scp_lock);
				2905	while (svcpt->scp_nrqbds_posted != 0) {
				2906	spin_unlock(&svcpt->scp_lock);
				2907	/* Network access will complete in finite time but
				2908	* the HUGE timeout lets us CWARN for visibility
				2909	* of sluggish NALs */
				2910	lwi = LWI_TIMEOUT_INTERVAL(
				2911	cfs_time_seconds(LONG_UNLINK),
				2912	cfs_time_seconds(1), NULL, NULL);
				2913	rc = l_wait_event(svcpt->scp_waitq,
				2914	svcpt->scp_nrqbds_posted == 0, &lwi);
				2915	if (rc == -ETIMEDOUT) {
Joe Perches	2d00bd1	2014-11-23 11:28:50 -0800	[diff] [blame]	2916	CWARN("Service %s waiting for request buffers\n",
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2917	svcpt->scp_service->srv_name);
				2918	}
				2919	spin_lock(&svcpt->scp_lock);
				2920	}
				2921	spin_unlock(&svcpt->scp_lock);
				2922	}
				2923	}
				2924
				2925	static void
				2926	ptlrpc_service_purge_all(struct ptlrpc_service *svc)
				2927	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	2928	struct ptlrpc_service_part *svcpt;
				2929	struct ptlrpc_request_buffer_desc *rqbd;
				2930	struct ptlrpc_request *req;
				2931	struct ptlrpc_reply_state *rs;
				2932	int i;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2933
				2934	ptlrpc_service_for_each_part(svcpt, i, svc) {
				2935	if (svcpt->scp_service == NULL)
				2936	break;
				2937
				2938	spin_lock(&svcpt->scp_rep_lock);
				2939	while (!list_empty(&svcpt->scp_rep_active)) {
				2940	rs = list_entry(svcpt->scp_rep_active.next,
				2941	struct ptlrpc_reply_state, rs_list);
				2942	spin_lock(&rs->rs_lock);
				2943	ptlrpc_schedule_difficult_reply(rs);
				2944	spin_unlock(&rs->rs_lock);
				2945	}
				2946	spin_unlock(&svcpt->scp_rep_lock);
				2947
				2948	/* purge the request queue. NB No new replies (rqbds
				2949	* all unlinked) and no service threads, so I'm the only
				2950	* thread noodling the request queue now */
				2951	while (!list_empty(&svcpt->scp_req_incoming)) {
				2952	req = list_entry(svcpt->scp_req_incoming.next,
				2953	struct ptlrpc_request, rq_list);
				2954
				2955	list_del(&req->rq_list);
				2956	svcpt->scp_nreqs_incoming--;
				2957	ptlrpc_server_finish_request(svcpt, req);
				2958	}
				2959
				2960	while (ptlrpc_server_request_pending(svcpt, true)) {
				2961	req = ptlrpc_server_request_get(svcpt, true);
				2962	ptlrpc_server_finish_active_request(svcpt, req);
				2963	}
				2964
				2965	LASSERT(list_empty(&svcpt->scp_rqbd_posted));
				2966	LASSERT(svcpt->scp_nreqs_incoming == 0);
				2967	LASSERT(svcpt->scp_nreqs_active == 0);
				2968	/* history should have been culled by
				2969	* ptlrpc_server_finish_request */
				2970	LASSERT(svcpt->scp_hist_nrqbds == 0);
				2971
				2972	/* Now free all the request buffers since nothing
				2973	* references them any more... */
				2974
				2975	while (!list_empty(&svcpt->scp_rqbd_idle)) {
				2976	rqbd = list_entry(svcpt->scp_rqbd_idle.next,
				2977	struct ptlrpc_request_buffer_desc,
				2978	rqbd_list);
				2979	ptlrpc_free_rqbd(rqbd);
				2980	}
				2981	ptlrpc_wait_replies(svcpt);
				2982
				2983	while (!list_empty(&svcpt->scp_rep_idle)) {
				2984	rs = list_entry(svcpt->scp_rep_idle.next,
				2985	struct ptlrpc_reply_state,
				2986	rs_list);
				2987	list_del(&rs->rs_list);
Julia Lawall	ee0ec19	2015-06-11 14:02:58 +0200	[diff] [blame]	2988	kvfree(rs);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2989	}
				2990	}
				2991	}
				2992
				2993	static void
				2994	ptlrpc_service_free(struct ptlrpc_service *svc)
				2995	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	2996	struct ptlrpc_service_part *svcpt;
				2997	struct ptlrpc_at_array *array;
				2998	int i;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	2999
				3000	ptlrpc_service_for_each_part(svcpt, i, svc) {
				3001	if (svcpt->scp_service == NULL)
				3002	break;
				3003
				3004	/* In case somebody rearmed this in the meantime */
				3005	cfs_timer_disarm(&svcpt->scp_at_timer);
				3006	array = &svcpt->scp_at_array;
				3007
Julia Lawall	207e99c	2015-05-01 21:37:47 +0200	[diff] [blame]	3008	kfree(array->paa_reqs_array);
				3009	array->paa_reqs_array = NULL;
				3010	kfree(array->paa_reqs_count);
				3011	array->paa_reqs_count = NULL;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	3012	}
				3013
				3014	ptlrpc_service_for_each_part(svcpt, i, svc)
Julia Lawall	9ae1059	2015-05-01 17:51:12 +0200	[diff] [blame]	3015	kfree(svcpt);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	3016
				3017	if (svc->srv_cpts != NULL)
				3018	cfs_expr_list_values_free(svc->srv_cpts, svc->srv_ncpts);
				3019
Julia Lawall	9ae1059	2015-05-01 17:51:12 +0200	[diff] [blame]	3020	kfree(svc);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	3021	}
				3022
				3023	int ptlrpc_unregister_service(struct ptlrpc_service *service)
				3024	{
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	3025	CDEBUG(D_NET, "%s: tearing down\n", service->srv_name);
				3026
				3027	service->srv_is_stopping = 1;
				3028
				3029	mutex_lock(&ptlrpc_all_services_mutex);
				3030	list_del_init(&service->srv_list);
				3031	mutex_unlock(&ptlrpc_all_services_mutex);
				3032
				3033	ptlrpc_service_del_atimer(service);
				3034	ptlrpc_stop_all_threads(service);
				3035
				3036	ptlrpc_service_unlink_rqbd(service);
				3037	ptlrpc_service_purge_all(service);
				3038	ptlrpc_service_nrs_cleanup(service);
				3039
				3040	ptlrpc_lprocfs_unregister_service(service);
Oleg Drokin	328676f	2015-05-21 15:32:08 -0400	[diff] [blame]	3041	ptlrpc_sysfs_unregister_service(service);
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	3042
				3043	ptlrpc_service_free(service);
				3044
Greg Kroah-Hartman	0a3bdb0	2013-08-03 10:35:28 +0800	[diff] [blame]	3045	return 0;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	3046	}
				3047	EXPORT_SYMBOL(ptlrpc_unregister_service);
				3048
				3049	/**
				3050	* Returns 0 if the service is healthy.
				3051	*
				3052	* Right now, it just checks to make sure that requests aren't languishing
				3053	* in the queue. We'll use this health check to govern whether a node needs
				3054	* to be shot, so it's intentionally non-aggressive. */
Zoltán Lajos Kis	a96389d	2015-08-02 22:36:31 +0200	[diff] [blame]	3055	static int ptlrpc_svcpt_health_check(struct ptlrpc_service_part *svcpt)
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	3056	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	3057	struct ptlrpc_request *request = NULL;
				3058	struct timeval right_now;
				3059	long timediff;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	3060
				3061	do_gettimeofday(&right_now);
				3062
				3063	spin_lock(&svcpt->scp_req_lock);
				3064	/* How long has the next entry been waiting? */
				3065	if (ptlrpc_server_high_pending(svcpt, true))
				3066	request = ptlrpc_nrs_req_peek_nolock(svcpt, true);
				3067	else if (ptlrpc_server_normal_pending(svcpt, true))
				3068	request = ptlrpc_nrs_req_peek_nolock(svcpt, false);
				3069
				3070	if (request == NULL) {
				3071	spin_unlock(&svcpt->scp_req_lock);
				3072	return 0;
				3073	}
				3074
				3075	timediff = cfs_timeval_sub(&right_now, &request->rq_arrival_time, NULL);
				3076	spin_unlock(&svcpt->scp_req_lock);
				3077
				3078	if ((timediff / ONE_MILLION) >
				3079	(AT_OFF ? obd_timeout * 3 / 2 : at_max)) {
				3080	CERROR("%s: unhealthy - request has been waiting %lds\n",
				3081	svcpt->scp_service->srv_name, timediff / ONE_MILLION);
				3082	return -1;
				3083	}
				3084
				3085	return 0;
				3086	}
				3087
				3088	int
				3089	ptlrpc_service_health_check(struct ptlrpc_service *svc)
				3090	{
Chris Hanna	d0bfef3	2015-06-03 10:28:26 -0400	[diff] [blame]	3091	struct ptlrpc_service_part *svcpt;
				3092	int i;
Peng Tao	d7e09d0	2013-05-02 16:46:55 +0800	[diff] [blame]	3093
				3094	if (svc == NULL)
				3095	return 0;
				3096
				3097	ptlrpc_service_for_each_part(svcpt, i, svc) {
				3098	int rc = ptlrpc_svcpt_health_check(svcpt);
				3099
				3100	if (rc != 0)
				3101	return rc;
				3102	}
				3103	return 0;
				3104	}
				3105	EXPORT_SYMBOL(ptlrpc_service_health_check);