Blame - drivers/block/drbd/drbd_req.c - kernel/msm-4.19

blob: 97a9e69dd2396b0dec294447c8996aba6eef3abb [file] [log] [blame]

Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1	/*
				2	drbd_req.c
				3
				4	This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
				5
				6	Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
				7	Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
				8	Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
				9
				10	drbd is free software; you can redistribute it and/or modify
				11	it under the terms of the GNU General Public License as published by
				12	the Free Software Foundation; either version 2, or (at your option)
				13	any later version.
				14
				15	drbd is distributed in the hope that it will be useful,
				16	but WITHOUT ANY WARRANTY; without even the implied warranty of
				17	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				18	GNU General Public License for more details.
				19
				20	You should have received a copy of the GNU General Public License
				21	along with drbd; see the file COPYING. If not, write to
				22	the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
				23
				24	*/
				25
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	26	#include <linux/module.h>
				27
				28	#include <linux/slab.h>
				29	#include <linux/drbd.h>
				30	#include "drbd_int.h"
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	31	#include "drbd_req.h"
				32
				33
Philipp Reisner	57bcb6c	2011-12-03 11:18:56 +0100	[diff] [blame]	34	static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size);
				35
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	36	/* Update disk stats at start of I/O request */
				37	static void _drbd_start_io_acct(struct drbd_conf mdev, struct drbd_request req, struct bio *bio)
				38	{
				39	const int rw = bio_data_dir(bio);
				40	int cpu;
				41	cpu = part_stat_lock();
Philipp Reisner	72585d2	2012-02-23 12:56:26 +0100	[diff] [blame]	42	part_round_stats(cpu, &mdev->vdisk->part0);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	43	part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);
				44	part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio));
Philipp Reisner	376694a	2011-11-07 10:54:28 +0100	[diff] [blame]	45	(void) cpu; /* The macro invocations above want the cpu argument, I do not like
				46	the compiler warning about cpu only assigned but never used... */
Philipp Reisner	753c891	2009-11-18 15:52:51 +0100	[diff] [blame]	47	part_inc_in_flight(&mdev->vdisk->part0, rw);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	48	part_stat_unlock();
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	49	}
				50
				51	/* Update disk stats when completing request upwards */
				52	static void _drbd_end_io_acct(struct drbd_conf mdev, struct drbd_request req)
				53	{
				54	int rw = bio_data_dir(req->master_bio);
				55	unsigned long duration = jiffies - req->start_time;
				56	int cpu;
				57	cpu = part_stat_lock();
				58	part_stat_add(cpu, &mdev->vdisk->part0, ticks[rw], duration);
				59	part_round_stats(cpu, &mdev->vdisk->part0);
Philipp Reisner	753c891	2009-11-18 15:52:51 +0100	[diff] [blame]	60	part_dec_in_flight(&mdev->vdisk->part0, rw);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	61	part_stat_unlock();
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	62	}
				63
Andreas Gruenbacher	9e204cd	2011-01-26 18:45:11 +0100	[diff] [blame]	64	static struct drbd_request drbd_req_new(struct drbd_conf mdev,
				65	struct bio *bio_src)
				66	{
				67	struct drbd_request *req;
				68
				69	req = mempool_alloc(drbd_request_mempool, GFP_NOIO);
				70	if (!req)
				71	return NULL;
				72
				73	drbd_req_make_private_bio(req, bio_src);
				74	req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
Philipp Reisner	a21e929	2011-02-08 15:08:49 +0100	[diff] [blame]	75	req->w.mdev = mdev;
Andreas Gruenbacher	9e204cd	2011-01-26 18:45:11 +0100	[diff] [blame]	76	req->master_bio = bio_src;
				77	req->epoch = 0;
Andreas Gruenbacher	5384064	2011-01-28 10:31:04 +0100	[diff] [blame]	78
Andreas Gruenbacher	9e204cd	2011-01-26 18:45:11 +0100	[diff] [blame]	79	drbd_clear_interval(&req->i);
				80	req->i.sector = bio_src->bi_sector;
				81	req->i.size = bio_src->bi_size;
Andreas Gruenbacher	5e47226	2011-01-27 14:42:51 +0100	[diff] [blame]	82	req->i.local = true;
Andreas Gruenbacher	5384064	2011-01-28 10:31:04 +0100	[diff] [blame]	83	req->i.waiting = false;
				84
Andreas Gruenbacher	9e204cd	2011-01-26 18:45:11 +0100	[diff] [blame]	85	INIT_LIST_HEAD(&req->tl_requests);
				86	INIT_LIST_HEAD(&req->w.list);
				87
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	88	/* one reference to be put by __drbd_make_request */
Lars Ellenberg	b406777	2012-01-24 16:58:11 +0100	[diff] [blame]	89	atomic_set(&req->completion_ref, 1);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	90	/* one kref as long as completion_ref > 0 */
Lars Ellenberg	b406777	2012-01-24 16:58:11 +0100	[diff] [blame]	91	kref_init(&req->kref);
Andreas Gruenbacher	9e204cd	2011-01-26 18:45:11 +0100	[diff] [blame]	92	return req;
				93	}
				94
Lars Ellenberg	9a278a7	2012-07-24 10:12:36 +0200	[diff] [blame]	95	void drbd_req_destroy(struct kref *kref)
Andreas Gruenbacher	9e204cd	2011-01-26 18:45:11 +0100	[diff] [blame]	96	{
Lars Ellenberg	b406777	2012-01-24 16:58:11 +0100	[diff] [blame]	97	struct drbd_request *req = container_of(kref, struct drbd_request, kref);
				98	struct drbd_conf *mdev = req->w.mdev;
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	99	const unsigned s = req->rq_state;
				100
				101	if ((req->master_bio && !(s & RQ_POSTPONED)) \|\|
				102	atomic_read(&req->completion_ref) \|\|
				103	(s & RQ_LOCAL_PENDING) \|\|
				104	((s & RQ_NET_MASK) && !(s & RQ_NET_DONE))) {
				105	dev_err(DEV, "drbd_req_destroy: Logic BUG rq_state = 0x%x, completion_ref = %d\n",
				106	s, atomic_read(&req->completion_ref));
				107	return;
				108	}
Philipp Reisner	288f422	2010-05-27 15:07:43 +0200	[diff] [blame]	109
				110	/* remove it from the transfer log.
				111	* well, only if it had been there in the first
				112	* place... if it had not (local only or conflicting
				113	* and never sent), it should still be "empty" as
				114	* initialized in drbd_req_new(), so we can list_del() it
				115	* here unconditionally */
Lars Ellenberg	2312f0b3	2011-11-24 10:36:25 +0100	[diff] [blame]	116	list_del_init(&req->tl_requests);
Philipp Reisner	288f422	2010-05-27 15:07:43 +0200	[diff] [blame]	117
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	118	/* if it was a write, we may have to set the corresponding
				119	* bit(s) out-of-sync first. If it had a local part, we need to
				120	* release the reference to the activity log. */
Lars Ellenberg	b406777	2012-01-24 16:58:11 +0100	[diff] [blame]	121	if (s & RQ_WRITE) {
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	122	/* Set out-of-sync unless both OK flags are set
				123	* (local only or remote failed).
				124	* Other places where we set out-of-sync:
				125	* READ with local io-error */
Lars Ellenberg	70f17b6	2012-09-03 14:08:35 +0200	[diff] [blame]	126
				127	/* There is a special case:
				128	* we may notice late that IO was suspended,
				129	* and postpone, or schedule for retry, a write,
				130	* before it even was submitted or sent.
				131	* In that case we do not want to touch the bitmap at all.
				132	*/
				133	if ((s & (RQ_POSTPONED\|RQ_LOCAL_MASK\|RQ_NET_MASK)) != RQ_POSTPONED) {
Philipp Reisner	d764401	2012-08-28 14:39:44 +0200	[diff] [blame]	134	if (!(s & RQ_NET_OK) \|\| !(s & RQ_LOCAL_OK))
				135	drbd_set_out_of_sync(mdev, req->i.sector, req->i.size);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	136
Philipp Reisner	d764401	2012-08-28 14:39:44 +0200	[diff] [blame]	137	if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS))
				138	drbd_set_in_sync(mdev, req->i.sector, req->i.size);
				139	}
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	140
				141	/* one might be tempted to move the drbd_al_complete_io
Andreas Gruenbacher	fcefa62	2011-02-17 16:46:59 +0100	[diff] [blame]	142	* to the local io completion callback drbd_request_endio.
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	143	* but, if this was a mirror write, we may only
				144	* drbd_al_complete_io after this is RQ_NET_DONE,
				145	* otherwise the extent could be dropped from the al
				146	* before it has actually been written on the peer.
				147	* if we crash before our peer knows about the request,
				148	* but after the extent has been dropped from the al,
				149	* we would forget to resync the corresponding extent.
				150	*/
Philipp Reisner	76590cd	2012-08-29 15:23:14 +0200	[diff] [blame]	151	if (s & RQ_IN_ACT_LOG) {
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	152	if (get_ldev_if_state(mdev, D_FAILED)) {
Philipp Reisner	76590cd	2012-08-29 15:23:14 +0200	[diff] [blame]	153	drbd_al_complete_io(mdev, &req->i);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	154	put_ldev(mdev);
				155	} else if (__ratelimit(&drbd_ratelimit_state)) {
Lars Ellenberg	181286a	2011-03-31 15:18:56 +0200	[diff] [blame]	156	dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu, %u), "
				157	"but my Disk seems to have failed :(\n",
				158	(unsigned long long) req->i.sector, req->i.size);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	159	}
				160	}
				161	}
				162
Lars Ellenberg	9a278a7	2012-07-24 10:12:36 +0200	[diff] [blame]	163	mempool_free(req, drbd_request_mempool);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	164	}
				165
Lars Ellenberg	b6dd1a8	2011-11-28 15:04:49 +0100	[diff] [blame]	166	static void wake_all_senders(struct drbd_tconn *tconn) {
				167	wake_up(&tconn->sender_work.q_wait);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	168	}
				169
Lars Ellenberg	b6dd1a8	2011-11-28 15:04:49 +0100	[diff] [blame]	170	/* must hold resource->req_lock */
				171	static void start_new_tl_epoch(struct drbd_tconn *tconn)
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	172	{
Lars Ellenberg	99b4d8f	2012-08-07 06:42:09 +0200	[diff] [blame]	173	/* no point closing an epoch, if it is empty, anyways. */
				174	if (tconn->current_tle_writes == 0)
				175	return;
				176
Lars Ellenberg	b6dd1a8	2011-11-28 15:04:49 +0100	[diff] [blame]	177	tconn->current_tle_writes = 0;
				178	atomic_inc(&tconn->current_tle_nr);
				179	wake_all_senders(tconn);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	180	}
				181
				182	void complete_master_bio(struct drbd_conf *mdev,
				183	struct bio_and_error *m)
				184	{
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	185	bio_endio(m->bio, m->error);
				186	dec_ap_bio(mdev);
				187	}
				188
Andreas Gruenbacher	5384064	2011-01-28 10:31:04 +0100	[diff] [blame]	189
				190	static void drbd_remove_request_interval(struct rb_root *root,
				191	struct drbd_request *req)
				192	{
Philipp Reisner	a21e929	2011-02-08 15:08:49 +0100	[diff] [blame]	193	struct drbd_conf *mdev = req->w.mdev;
Andreas Gruenbacher	5384064	2011-01-28 10:31:04 +0100	[diff] [blame]	194	struct drbd_interval *i = &req->i;
				195
				196	drbd_remove_interval(root, i);
				197
				198	/* Wake up any processes waiting for this request to complete. */
				199	if (i->waiting)
				200	wake_up(&mdev->misc_wait);
				201	}
				202
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	203	/* Helper for __req_mod().
				204	* Set m->bio to the master bio, if it is fit to be completed,
				205	* or leave it alone (it is initialized to NULL in __req_mod),
				206	* if it has already been completed, or cannot be completed yet.
				207	* If m->bio is set, the error status to be returned is placed in m->error.
				208	*/
Lars Ellenberg	6870ca6	2012-03-26 17:02:45 +0200	[diff] [blame]	209	static
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	210	void drbd_req_complete(struct drbd_request req, struct bio_and_error m)
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	211	{
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	212	const unsigned s = req->rq_state;
Philipp Reisner	a21e929	2011-02-08 15:08:49 +0100	[diff] [blame]	213	struct drbd_conf *mdev = req->w.mdev;
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	214	int rw;
				215	int error, ok;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	216
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	217	/* we must not complete the master bio, while it is
				218	* still being processed by _drbd_send_zc_bio (drbd_send_dblock)
				219	* not yet acknowledged by the peer
				220	* not yet completed by the local io subsystem
				221	* these flags may get cleared in any order by
				222	* the worker,
				223	* the receiver,
				224	* the bio_endio completion callbacks.
				225	*/
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	226	if ((s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) \|\|
				227	(s & RQ_NET_QUEUED) \|\| (s & RQ_NET_PENDING) \|\|
				228	(s & RQ_COMPLETION_SUSP)) {
				229	dev_err(DEV, "drbd_req_complete: Logic BUG rq_state = 0x%x\n", s);
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	230	return;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	231	}
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	232
				233	if (!req->master_bio) {
				234	dev_err(DEV, "drbd_req_complete: Logic BUG, master_bio == NULL!\n");
				235	return;
				236	}
				237
				238	rw = bio_rw(req->master_bio);
				239
				240	/*
				241	* figure out whether to report success or failure.
				242	*
				243	* report success when at least one of the operations succeeded.
				244	* or, to put the other way,
				245	* only report failure, when both operations failed.
				246	*
				247	* what to do about the failures is handled elsewhere.
				248	* what we need to do here is just: complete the master_bio.
				249	*
				250	* local completion error, if any, has been stored as ERR_PTR
				251	* in private_bio within drbd_request_endio.
				252	*/
				253	ok = (s & RQ_LOCAL_OK) \|\| (s & RQ_NET_OK);
				254	error = PTR_ERR(req->private_bio);
				255
				256	/* remove the request from the conflict detection
				257	* respective block_id verification hash */
				258	if (!drbd_interval_empty(&req->i)) {
				259	struct rb_root *root;
				260
				261	if (rw == WRITE)
				262	root = &mdev->write_requests;
				263	else
				264	root = &mdev->read_requests;
				265	drbd_remove_request_interval(root, req);
				266	} else if (!(s & RQ_POSTPONED))
				267	D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
				268
				269	/* Before we can signal completion to the upper layers,
				270	* we may need to close the current transfer log epoch.
				271	* We are within the request lock, so we can simply compare
				272	* the request epoch number with the current transfer log
				273	* epoch number. If they match, increase the current_tle_nr,
				274	* and reset the transfer log epoch write_cnt.
				275	*/
				276	if (rw == WRITE &&
				277	req->epoch == atomic_read(&mdev->tconn->current_tle_nr))
				278	start_new_tl_epoch(mdev->tconn);
				279
				280	/* Update disk stats */
				281	_drbd_end_io_acct(mdev, req);
				282
				283	/* If READ failed,
				284	* have it be pushed back to the retry work queue,
				285	* so it will re-enter __drbd_make_request(),
				286	* and be re-assigned to a suitable local or remote path,
				287	* or failed if we do not have access to good data anymore.
				288	*
				289	* Unless it was failed early by __drbd_make_request(),
				290	* because no path was available, in which case
				291	* it was not even added to the transfer_log.
				292	*
				293	* READA may fail, and will not be retried.
				294	*
				295	* WRITE should have used all available paths already.
				296	*/
				297	if (!ok && rw == READ && !list_empty(&req->tl_requests))
				298	req->rq_state \|= RQ_POSTPONED;
				299
				300	if (!(req->rq_state & RQ_POSTPONED)) {
				301	m->error = ok ? 0 : (error ?: -EIO);
				302	m->bio = req->master_bio;
				303	req->master_bio = NULL;
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	304	}
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	305	}
				306
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	307	static int drbd_req_put_completion_ref(struct drbd_request req, struct bio_and_error m, int put)
Philipp Reisner	cfa0341	2010-06-23 17:18:51 +0200	[diff] [blame]	308	{
Philipp Reisner	a21e929	2011-02-08 15:08:49 +0100	[diff] [blame]	309	struct drbd_conf *mdev = req->w.mdev;
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	310	D_ASSERT(m \|\| (req->rq_state & RQ_POSTPONED));
Philipp Reisner	cfa0341	2010-06-23 17:18:51 +0200	[diff] [blame]	311
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	312	if (!atomic_sub_and_test(put, &req->completion_ref))
				313	return 0;
				314
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	315	drbd_req_complete(req, m);
Lars Ellenberg	9a278a7	2012-07-24 10:12:36 +0200	[diff] [blame]	316
				317	if (req->rq_state & RQ_POSTPONED) {
				318	/* don't destroy the req object just yet,
				319	* but queue it for retry */
				320	drbd_restart_request(req);
				321	return 0;
				322	}
				323
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	324	return 1;
				325	}
				326
				327	/* I'd like this to be the only place that manipulates
				328	* req->completion_ref and req->kref. */
				329	static void mod_rq_state(struct drbd_request req, struct bio_and_error m,
				330	int clear, int set)
				331	{
				332	struct drbd_conf *mdev = req->w.mdev;
				333	unsigned s = req->rq_state;
				334	int c_put = 0;
				335	int k_put = 0;
				336
Philipp Reisner	5af2e8c	2012-08-14 11:28:52 +0200	[diff] [blame]	337	if (drbd_suspended(mdev) && !((s \| clear) & RQ_COMPLETION_SUSP))
				338	set \|= RQ_COMPLETION_SUSP;
				339
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	340	/* apply */
				341
				342	req->rq_state &= ~clear;
				343	req->rq_state \|= set;
				344
				345	/* no change? */
				346	if (req->rq_state == s)
				347	return;
				348
				349	/* intent: get references */
				350
				351	if (!(s & RQ_LOCAL_PENDING) && (set & RQ_LOCAL_PENDING))
				352	atomic_inc(&req->completion_ref);
				353
				354	if (!(s & RQ_NET_PENDING) && (set & RQ_NET_PENDING)) {
				355	inc_ap_pending(mdev);
				356	atomic_inc(&req->completion_ref);
				357	}
				358
				359	if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED))
				360	atomic_inc(&req->completion_ref);
				361
				362	if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK))
				363	kref_get(&req->kref); /* wait for the DONE */
				364
				365	if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT))
				366	atomic_add(req->i.size >> 9, &mdev->ap_in_flight);
				367
Philipp Reisner	5af2e8c	2012-08-14 11:28:52 +0200	[diff] [blame]	368	if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP))
				369	atomic_inc(&req->completion_ref);
				370
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	371	/* progress: put references */
				372
				373	if ((s & RQ_COMPLETION_SUSP) && (clear & RQ_COMPLETION_SUSP))
				374	++c_put;
				375
				376	if (!(s & RQ_LOCAL_ABORTED) && (set & RQ_LOCAL_ABORTED)) {
				377	D_ASSERT(req->rq_state & RQ_LOCAL_PENDING);
				378	/* local completion may still come in later,
				379	* we need to keep the req object around. */
				380	kref_get(&req->kref);
				381	++c_put;
				382	}
				383
				384	if ((s & RQ_LOCAL_PENDING) && (clear & RQ_LOCAL_PENDING)) {
				385	if (req->rq_state & RQ_LOCAL_ABORTED)
				386	++k_put;
				387	else
				388	++c_put;
				389	}
				390
				391	if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) {
				392	dec_ap_pending(mdev);
				393	++c_put;
				394	}
				395
				396	if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED))
				397	++c_put;
				398
				399	if ((s & RQ_EXP_BARR_ACK) && !(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) {
				400	if (req->rq_state & RQ_NET_SENT)
				401	atomic_sub(req->i.size >> 9, &mdev->ap_in_flight);
				402	++k_put;
				403	}
				404
				405	/* potentially complete and destroy */
				406
				407	if (k_put \|\| c_put) {
				408	/* Completion does it's own kref_put. If we are going to
				409	* kref_sub below, we need req to be still around then. */
				410	int at_least = k_put + !!c_put;
				411	int refcount = atomic_read(&req->kref.refcount);
				412	if (refcount < at_least)
				413	dev_err(DEV,
				414	"mod_rq_state: Logic BUG: %x -> %x: refcount = %d, should be >= %d\n",
				415	s, req->rq_state, refcount, at_least);
				416	}
				417
				418	/* If we made progress, retry conflicting peer requests, if any. */
				419	if (req->i.waiting)
				420	wake_up(&mdev->misc_wait);
				421
				422	if (c_put)
				423	k_put += drbd_req_put_completion_ref(req, m, c_put);
				424	if (k_put)
				425	kref_sub(&req->kref, k_put, drbd_req_destroy);
Philipp Reisner	cfa0341	2010-06-23 17:18:51 +0200	[diff] [blame]	426	}
				427
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	428	/* obviously this could be coded as many single functions
				429	* instead of one huge switch,
				430	* or by putting the code directly in the respective locations
				431	* (as it has been before).
				432	*
				433	* but having it this way
				434	* enforces that it is all in this one place, where it is easier to audit,
				435	* it makes it obvious that whatever "event" "happens" to a request should
				436	* happen "atomically" within the req_lock,
				437	* and it enforces that we have to think in a very structured manner
				438	* about the "events" that may happen to a request during its life time ...
				439	*/
Philipp Reisner	2a80699	2010-06-09 14:07:43 +0200	[diff] [blame]	440	int __req_mod(struct drbd_request *req, enum drbd_req_event what,
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	441	struct bio_and_error *m)
				442	{
Philipp Reisner	a21e929	2011-02-08 15:08:49 +0100	[diff] [blame]	443	struct drbd_conf *mdev = req->w.mdev;
Philipp Reisner	44ed167	2011-04-19 17:10:19 +0200	[diff] [blame]	444	struct net_conf *nc;
Philipp Reisner	303d144	2011-04-13 16:24:47 -0700	[diff] [blame]	445	int p, rv = 0;
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	446
				447	if (m)
				448	m->bio = NULL;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	449
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	450	switch (what) {
				451	default:
				452	dev_err(DEV, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__);
				453	break;
				454
				455	/* does not happen...
				456	* initialization done in drbd_req_new
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	457	case CREATED:
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	458	break;
				459	*/
				460
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	461	case TO_BE_SENT: /* via network */
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	462	/* reached via __drbd_make_request
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	463	* and from w_read_retry_remote */
				464	D_ASSERT(!(req->rq_state & RQ_NET_MASK));
Philipp Reisner	44ed167	2011-04-19 17:10:19 +0200	[diff] [blame]	465	rcu_read_lock();
				466	nc = rcu_dereference(mdev->tconn->net_conf);
				467	p = nc->wire_protocol;
				468	rcu_read_unlock();
Philipp Reisner	303d144	2011-04-13 16:24:47 -0700	[diff] [blame]	469	req->rq_state \|=
				470	p == DRBD_PROT_C ? RQ_EXP_WRITE_ACK :
				471	p == DRBD_PROT_B ? RQ_EXP_RECEIVE_ACK : 0;
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	472	mod_rq_state(req, m, 0, RQ_NET_PENDING);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	473	break;
				474
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	475	case TO_BE_SUBMITTED: /* locally */
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	476	/* reached via __drbd_make_request */
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	477	D_ASSERT(!(req->rq_state & RQ_LOCAL_MASK));
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	478	mod_rq_state(req, m, 0, RQ_LOCAL_PENDING);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	479	break;
				480
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	481	case COMPLETED_OK:
Philipp Reisner	cdfda63	2011-07-05 15:38:59 +0200	[diff] [blame]	482	if (req->rq_state & RQ_WRITE)
Andreas Gruenbacher	ace652a	2011-01-03 17:09:58 +0100	[diff] [blame]	483	mdev->writ_cnt += req->i.size >> 9;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	484	else
Andreas Gruenbacher	ace652a	2011-01-03 17:09:58 +0100	[diff] [blame]	485	mdev->read_cnt += req->i.size >> 9;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	486
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	487	mod_rq_state(req, m, RQ_LOCAL_PENDING,
				488	RQ_LOCAL_COMPLETED\|RQ_LOCAL_OK);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	489	break;
				490
Philipp Reisner	cdfda63	2011-07-05 15:38:59 +0200	[diff] [blame]	491	case ABORT_DISK_IO:
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	492	mod_rq_state(req, m, 0, RQ_LOCAL_ABORTED);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	493	break;
				494
Lars Ellenberg	edc9f5e	2012-09-27 15:18:21 +0200	[diff] [blame^]	495	case WRITE_COMPLETED_WITH_ERROR:
				496	__drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
				497	mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
				498	break;
				499
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	500	case READ_COMPLETED_WITH_ERROR:
Andreas Gruenbacher	ace652a	2011-01-03 17:09:58 +0100	[diff] [blame]	501	drbd_set_out_of_sync(mdev, req->i.sector, req->i.size);
Lars Ellenberg	edc9f5e	2012-09-27 15:18:21 +0200	[diff] [blame^]	502	__drbd_chk_io_error(mdev, DRBD_READ_ERROR);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	503	/* fall through. */
				504	case READ_AHEAD_COMPLETED_WITH_ERROR:
				505	/* it is legal to fail READA, no __drbd_chk_io_error in that case. */
				506	mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
Lars Ellenberg	4439c40	2012-03-26 17:29:30 +0200	[diff] [blame]	507	break;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	508
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	509	case QUEUE_FOR_NET_READ:
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	510	/* READ or READA, and
				511	* no local disk,
				512	* or target area marked as invalid,
				513	* or just got an io-error. */
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	514	/* from __drbd_make_request
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	515	* or from bio_endio during read io-error recovery */
				516
Lars Ellenberg	6870ca6	2012-03-26 17:02:45 +0200	[diff] [blame]	517	/* So we can verify the handle in the answer packet.
				518	* Corresponding drbd_remove_request_interval is in
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	519	* drbd_req_complete() */
Lars Ellenberg	97ddb68	2011-07-15 23:52:44 +0200	[diff] [blame]	520	D_ASSERT(drbd_interval_empty(&req->i));
Andreas Gruenbacher	dac1389	2011-01-21 17:18:39 +0100	[diff] [blame]	521	drbd_insert_interval(&mdev->read_requests, &req->i);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	522
Lars Ellenberg	83c3883	2009-11-03 02:22:06 +0100	[diff] [blame]	523	set_bit(UNPLUG_REMOTE, &mdev->flags);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	524
				525	D_ASSERT(req->rq_state & RQ_NET_PENDING);
Lars Ellenberg	4439c40	2012-03-26 17:29:30 +0200	[diff] [blame]	526	D_ASSERT((req->rq_state & RQ_LOCAL_MASK) == 0);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	527	mod_rq_state(req, m, 0, RQ_NET_QUEUED);
Lars Ellenberg	4439c40	2012-03-26 17:29:30 +0200	[diff] [blame]	528	req->w.cb = w_send_read_req;
Lars Ellenberg	d5b27b0	2011-11-14 15:42:37 +0100	[diff] [blame]	529	drbd_queue_work(&mdev->tconn->sender_work, &req->w);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	530	break;
				531
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	532	case QUEUE_FOR_NET_WRITE:
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	533	/* assert something? */
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	534	/* from __drbd_make_request only */
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	535
Lars Ellenberg	6870ca6	2012-03-26 17:02:45 +0200	[diff] [blame]	536	/* Corresponding drbd_remove_request_interval is in
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	537	* drbd_req_complete() */
Lars Ellenberg	97ddb68	2011-07-15 23:52:44 +0200	[diff] [blame]	538	D_ASSERT(drbd_interval_empty(&req->i));
Andreas Gruenbacher	de69671	2011-01-20 15:00:24 +0100	[diff] [blame]	539	drbd_insert_interval(&mdev->write_requests, &req->i);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	540
				541	/* NOTE
				542	* In case the req ended up on the transfer log before being
				543	* queued on the worker, it could lead to this request being
				544	* missed during cleanup after connection loss.
				545	* So we have to do both operations here,
				546	* within the same lock that protects the transfer log.
				547	*
				548	* _req_add_to_epoch(req); this has to be after the
				549	* _maybe_start_new_epoch(req); which happened in
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	550	* __drbd_make_request, because we now may set the bit
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	551	* again ourselves to close the current epoch.
				552	*
				553	* Add req to the (now) current epoch (barrier). */
				554
Lars Ellenberg	83c3883	2009-11-03 02:22:06 +0100	[diff] [blame]	555	/* otherwise we may lose an unplug, which may cause some remote
				556	* io-scheduler timeout to expire, increasing maximum latency,
				557	* hurting performance. */
				558	set_bit(UNPLUG_REMOTE, &mdev->flags);
				559
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	560	/* queue work item to send data */
				561	D_ASSERT(req->rq_state & RQ_NET_PENDING);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	562	mod_rq_state(req, m, 0, RQ_NET_QUEUED\|RQ_EXP_BARR_ACK);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	563	req->w.cb = w_send_dblock;
Lars Ellenberg	d5b27b0	2011-11-14 15:42:37 +0100	[diff] [blame]	564	drbd_queue_work(&mdev->tconn->sender_work, &req->w);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	565
				566	/* close the epoch, in case it outgrew the limit */
Philipp Reisner	44ed167	2011-04-19 17:10:19 +0200	[diff] [blame]	567	rcu_read_lock();
				568	nc = rcu_dereference(mdev->tconn->net_conf);
				569	p = nc->max_epoch_size;
				570	rcu_read_unlock();
Lars Ellenberg	b6dd1a8	2011-11-28 15:04:49 +0100	[diff] [blame]	571	if (mdev->tconn->current_tle_writes >= p)
				572	start_new_tl_epoch(mdev->tconn);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	573
				574	break;
				575
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	576	case QUEUE_FOR_SEND_OOS:
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	577	mod_rq_state(req, m, 0, RQ_NET_QUEUED);
Andreas Gruenbacher	8f7bed7	2010-12-19 23:53:14 +0100	[diff] [blame]	578	req->w.cb = w_send_out_of_sync;
Lars Ellenberg	d5b27b0	2011-11-14 15:42:37 +0100	[diff] [blame]	579	drbd_queue_work(&mdev->tconn->sender_work, &req->w);
Philipp Reisner	73a01a1	2010-10-27 14:33:00 +0200	[diff] [blame]	580	break;
				581
Lars Ellenberg	ea9d672	2012-03-26 16:46:39 +0200	[diff] [blame]	582	case READ_RETRY_REMOTE_CANCELED:
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	583	case SEND_CANCELED:
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	584	case SEND_FAILED:
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	585	/* real cleanup will be done from tl_clear. just update flags
				586	* so it is no longer marked as on the worker queue */
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	587	mod_rq_state(req, m, RQ_NET_QUEUED, 0);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	588	break;
				589
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	590	case HANDED_OVER_TO_NETWORK:
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	591	/* assert something? */
				592	if (bio_data_dir(req->master_bio) == WRITE &&
Philipp Reisner	303d144	2011-04-13 16:24:47 -0700	[diff] [blame]	593	!(req->rq_state & (RQ_EXP_RECEIVE_ACK \| RQ_EXP_WRITE_ACK))) {
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	594	/* this is what is dangerous about protocol A:
				595	* pretend it was successfully written on the peer. */
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	596	if (req->rq_state & RQ_NET_PENDING)
				597	mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
				598	/* else: neg-ack was faster... */
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	599	/* it is still not yet RQ_NET_DONE until the
				600	* corresponding epoch barrier got acked as well,
				601	* so we know what to dirty on connection loss */
				602	}
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	603	mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT);
Lars Ellenberg	27a434f	2012-03-26 16:44:59 +0200	[diff] [blame]	604	break;
				605
				606	case OOS_HANDED_TO_NETWORK:
				607	/* Was not set PENDING, no longer QUEUED, so is now DONE
				608	* as far as this connection is concerned. */
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	609	mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_DONE);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	610	break;
				611
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	612	case CONNECTION_LOST_WHILE_PENDING:
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	613	/* transfer log cleanup after connection loss */
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	614	mod_rq_state(req, m,
				615	RQ_NET_OK\|RQ_NET_PENDING\|RQ_COMPLETION_SUSP,
				616	RQ_NET_DONE);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	617	break;
				618
Lars Ellenberg	d4dabbe	2012-08-01 12:33:51 +0200	[diff] [blame]	619	case CONFLICT_RESOLVED:
				620	/* for superseded conflicting writes of multiple primaries,
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	621	* there is no need to keep anything in the tl, potential
Lars Ellenberg	934722a	2012-07-24 09:31:18 +0200	[diff] [blame]	622	* node crashes are covered by the activity log.
				623	*
				624	* If this request had been marked as RQ_POSTPONED before,
Lars Ellenberg	d4dabbe	2012-08-01 12:33:51 +0200	[diff] [blame]	625	* it will actually not be completed, but "restarted",
Lars Ellenberg	934722a	2012-07-24 09:31:18 +0200	[diff] [blame]	626	* resubmitted from the retry worker context. */
				627	D_ASSERT(req->rq_state & RQ_NET_PENDING);
				628	D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK);
				629	mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_DONE\|RQ_NET_OK);
				630	break;
				631
Lars Ellenberg	0afd569	2012-03-26 16:51:11 +0200	[diff] [blame]	632	case WRITE_ACKED_BY_PEER_AND_SIS:
Lars Ellenberg	934722a	2012-07-24 09:31:18 +0200	[diff] [blame]	633	req->rq_state \|= RQ_NET_SIS;
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	634	case WRITE_ACKED_BY_PEER:
Philipp Reisner	303d144	2011-04-13 16:24:47 -0700	[diff] [blame]	635	D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	636	/* protocol C; successfully written on peer.
Lars Ellenberg	0afd569	2012-03-26 16:51:11 +0200	[diff] [blame]	637	* Nothing more to do here.
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	638	* We want to keep the tl in place for all protocols, to cater
Lars Ellenberg	0afd569	2012-03-26 16:51:11 +0200	[diff] [blame]	639	* for volatile write-back caches on lower level devices. */
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	640
Philipp Reisner	303d144	2011-04-13 16:24:47 -0700	[diff] [blame]	641	goto ack_common;
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	642	case RECV_ACKED_BY_PEER:
Philipp Reisner	303d144	2011-04-13 16:24:47 -0700	[diff] [blame]	643	D_ASSERT(req->rq_state & RQ_EXP_RECEIVE_ACK);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	644	/* protocol B; pretends to be successfully written on peer.
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	645	* see also notes above in HANDED_OVER_TO_NETWORK about
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	646	* protocol != C */
Philipp Reisner	303d144	2011-04-13 16:24:47 -0700	[diff] [blame]	647	ack_common:
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	648	D_ASSERT(req->rq_state & RQ_NET_PENDING);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	649	mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	650	break;
				651
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	652	case POSTPONE_WRITE:
Philipp Reisner	303d144	2011-04-13 16:24:47 -0700	[diff] [blame]	653	D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK);
				654	/* If this node has already detected the write conflict, the
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	655	* worker will be waiting on misc_wait. Wake it up once this
				656	* request has completed locally.
				657	*/
				658	D_ASSERT(req->rq_state & RQ_NET_PENDING);
				659	req->rq_state \|= RQ_POSTPONED;
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	660	if (req->i.waiting)
				661	wake_up(&mdev->misc_wait);
				662	/* Do not clear RQ_NET_PENDING. This request will make further
				663	* progress via restart_conflicting_writes() or
				664	* fail_postponed_requests(). Hopefully. */
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	665	break;
				666
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	667	case NEG_ACKED:
Lars Ellenberg	46e21bb	2012-08-07 06:47:14 +0200	[diff] [blame]	668	mod_rq_state(req, m, RQ_NET_OK\|RQ_NET_PENDING, 0);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	669	break;
				670
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	671	case FAIL_FROZEN_DISK_IO:
Philipp Reisner	265be2d	2010-05-31 10:14:17 +0200	[diff] [blame]	672	if (!(req->rq_state & RQ_LOCAL_COMPLETED))
				673	break;
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	674	mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0);
Philipp Reisner	265be2d	2010-05-31 10:14:17 +0200	[diff] [blame]	675	break;
				676
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	677	case RESTART_FROZEN_DISK_IO:
Philipp Reisner	265be2d	2010-05-31 10:14:17 +0200	[diff] [blame]	678	if (!(req->rq_state & RQ_LOCAL_COMPLETED))
				679	break;
				680
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	681	mod_rq_state(req, m,
				682	RQ_COMPLETION_SUSP\|RQ_LOCAL_COMPLETED,
				683	RQ_LOCAL_PENDING);
Philipp Reisner	265be2d	2010-05-31 10:14:17 +0200	[diff] [blame]	684
				685	rv = MR_READ;
				686	if (bio_data_dir(req->master_bio) == WRITE)
				687	rv = MR_WRITE;
				688
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	689	get_ldev(mdev); /* always succeeds in this call path */
Philipp Reisner	265be2d	2010-05-31 10:14:17 +0200	[diff] [blame]	690	req->w.cb = w_restart_disk_io;
Lars Ellenberg	d5b27b0	2011-11-14 15:42:37 +0100	[diff] [blame]	691	drbd_queue_work(&mdev->tconn->sender_work, &req->w);
Philipp Reisner	265be2d	2010-05-31 10:14:17 +0200	[diff] [blame]	692	break;
				693
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	694	case RESEND:
Philipp Reisner	8a0bab2	2012-08-07 13:28:00 +0200	[diff] [blame]	695	/* Simply complete (local only) READs. */
				696	if (!(req->rq_state & RQ_WRITE) && !req->w.cb) {
				697	mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0);
				698	break;
				699	}
				700
Philipp Reisner	11b58e7	2010-05-12 17:08:26 +0200	[diff] [blame]	701	/* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	702	before the connection loss (B&C only); only P_BARRIER_ACK
				703	(or the local completion?) was missing when we suspended.
Lars Ellenberg	6870ca6	2012-03-26 17:02:45 +0200	[diff] [blame]	704	Throwing them out of the TL here by pretending we got a BARRIER_ACK.
				705	During connection handshake, we ensure that the peer was not rebooted. */
Philipp Reisner	11b58e7	2010-05-12 17:08:26 +0200	[diff] [blame]	706	if (!(req->rq_state & RQ_NET_OK)) {
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	707	/* FIXME could this possibly be a req->w.cb == w_send_out_of_sync?
				708	* in that case we must not set RQ_NET_PENDING. */
				709
				710	mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED\|RQ_NET_PENDING);
Philipp Reisner	11b58e7	2010-05-12 17:08:26 +0200	[diff] [blame]	711	if (req->w.cb) {
Lars Ellenberg	d5b27b0	2011-11-14 15:42:37 +0100	[diff] [blame]	712	drbd_queue_work(&mdev->tconn->sender_work, &req->w);
Philipp Reisner	11b58e7	2010-05-12 17:08:26 +0200	[diff] [blame]	713	rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	714	} /* else: FIXME can this happen? */
Philipp Reisner	11b58e7	2010-05-12 17:08:26 +0200	[diff] [blame]	715	break;
				716	}
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	717	/* else, fall through to BARRIER_ACKED */
Philipp Reisner	11b58e7	2010-05-12 17:08:26 +0200	[diff] [blame]	718
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	719	case BARRIER_ACKED:
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	720	/* barrier ack for READ requests does not make sense */
Philipp Reisner	288f422	2010-05-27 15:07:43 +0200	[diff] [blame]	721	if (!(req->rq_state & RQ_WRITE))
				722	break;
				723
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	724	if (req->rq_state & RQ_NET_PENDING) {
Andreas Gruenbacher	a209b4a	2011-08-17 12:43:25 +0200	[diff] [blame]	725	/* barrier came in before all requests were acked.
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	726	* this is bad, because if the connection is lost now,
				727	* we won't be able to clean them up... */
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	728	dev_err(DEV, "FIXME (BARRIER_ACKED but pending)\n");
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	729	}
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	730	/* Allowed to complete requests, even while suspended.
				731	* As this is called for all requests within a matching epoch,
				732	* we need to filter, and only set RQ_NET_DONE for those that
				733	* have actually been on the wire. */
				734	mod_rq_state(req, m, RQ_COMPLETION_SUSP,
				735	(req->rq_state & RQ_NET_MASK) ? RQ_NET_DONE : 0);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	736	break;
				737
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	738	case DATA_RECEIVED:
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	739	D_ASSERT(req->rq_state & RQ_NET_PENDING);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	740	mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK\|RQ_NET_DONE);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	741	break;
				742	};
Philipp Reisner	2a80699	2010-06-09 14:07:43 +0200	[diff] [blame]	743
				744	return rv;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	745	}
				746
				747	/* we may do a local read if:
				748	* - we are consistent (of course),
				749	* - or we are generally inconsistent,
				750	* BUT we are still/already IN SYNC for this area.
				751	* since size may be bigger than BM_BLOCK_SIZE,
				752	* we may need to check several bits.
				753	*/
Andreas Gruenbacher	0da34df	2010-12-19 20:48:29 +0100	[diff] [blame]	754	static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size)
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	755	{
				756	unsigned long sbnr, ebnr;
				757	sector_t esector, nr_sectors;
				758
				759	if (mdev->state.disk == D_UP_TO_DATE)
Andreas Gruenbacher	0da34df	2010-12-19 20:48:29 +0100	[diff] [blame]	760	return true;
Lars Ellenberg	8c387de	2011-02-18 14:13:07 +0100	[diff] [blame]	761	if (mdev->state.disk != D_INCONSISTENT)
Andreas Gruenbacher	0da34df	2010-12-19 20:48:29 +0100	[diff] [blame]	762	return false;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	763	esector = sector + (size >> 9) - 1;
Andreas Gruenbacher	8ca9844	2011-02-21 12:34:58 +0100	[diff] [blame]	764	nr_sectors = drbd_get_capacity(mdev->this_bdev);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	765	D_ASSERT(sector < nr_sectors);
				766	D_ASSERT(esector < nr_sectors);
				767
				768	sbnr = BM_SECT_TO_BIT(sector);
				769	ebnr = BM_SECT_TO_BIT(esector);
				770
Andreas Gruenbacher	0da34df	2010-12-19 20:48:29 +0100	[diff] [blame]	771	return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	772	}
				773
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	774	static bool remote_due_to_read_balancing(struct drbd_conf *mdev, sector_t sector,
				775	enum drbd_read_balancing rbm)
Philipp Reisner	380207d	2011-11-11 12:31:20 +0100	[diff] [blame]	776	{
Philipp Reisner	380207d	2011-11-11 12:31:20 +0100	[diff] [blame]	777	struct backing_dev_info *bdi;
Philipp Reisner	d60de03	2011-11-17 10:12:31 +0100	[diff] [blame]	778	int stripe_shift;
Philipp Reisner	380207d	2011-11-11 12:31:20 +0100	[diff] [blame]	779
Philipp Reisner	380207d	2011-11-11 12:31:20 +0100	[diff] [blame]	780	switch (rbm) {
				781	case RB_CONGESTED_REMOTE:
				782	bdi = &mdev->ldev->backing_bdev->bd_disk->queue->backing_dev_info;
				783	return bdi_read_congested(bdi);
				784	case RB_LEAST_PENDING:
				785	return atomic_read(&mdev->local_cnt) >
				786	atomic_read(&mdev->ap_pending_cnt) + atomic_read(&mdev->rs_pending_cnt);
Philipp Reisner	d60de03	2011-11-17 10:12:31 +0100	[diff] [blame]	787	case RB_32K_STRIPING: /* stripe_shift = 15 */
				788	case RB_64K_STRIPING:
				789	case RB_128K_STRIPING:
				790	case RB_256K_STRIPING:
				791	case RB_512K_STRIPING:
				792	case RB_1M_STRIPING: /* stripe_shift = 20 */
				793	stripe_shift = (rbm - RB_32K_STRIPING + 15);
				794	return (sector >> (stripe_shift - 9)) & 1;
Philipp Reisner	380207d	2011-11-11 12:31:20 +0100	[diff] [blame]	795	case RB_ROUND_ROBIN:
				796	return test_and_change_bit(READ_BALANCE_RR, &mdev->flags);
				797	case RB_PREFER_REMOTE:
				798	return true;
				799	case RB_PREFER_LOCAL:
				800	default:
				801	return false;
				802	}
				803	}
				804
Andreas Gruenbacher	6024fec	2011-01-28 15:53:51 +0100	[diff] [blame]	805	/*
				806	* complete_conflicting_writes - wait for any conflicting write requests
				807	*
				808	* The write_requests tree contains all active write requests which we
				809	* currently know about. Wait for any requests to complete which conflict with
				810	* the new one.
Lars Ellenberg	648e46b	2012-03-26 20:12:24 +0200	[diff] [blame]	811	*
				812	* Only way out: remove the conflicting intervals from the tree.
Andreas Gruenbacher	6024fec	2011-01-28 15:53:51 +0100	[diff] [blame]	813	*/
Lars Ellenberg	648e46b	2012-03-26 20:12:24 +0200	[diff] [blame]	814	static void complete_conflicting_writes(struct drbd_request *req)
Andreas Gruenbacher	6024fec	2011-01-28 15:53:51 +0100	[diff] [blame]	815	{
Lars Ellenberg	648e46b	2012-03-26 20:12:24 +0200	[diff] [blame]	816	DEFINE_WAIT(wait);
				817	struct drbd_conf *mdev = req->w.mdev;
				818	struct drbd_interval *i;
				819	sector_t sector = req->i.sector;
				820	int size = req->i.size;
Andreas Gruenbacher	6024fec	2011-01-28 15:53:51 +0100	[diff] [blame]	821
Lars Ellenberg	648e46b	2012-03-26 20:12:24 +0200	[diff] [blame]	822	i = drbd_find_overlap(&mdev->write_requests, sector, size);
				823	if (!i)
				824	return;
				825
				826	for (;;) {
				827	prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbacher	6024fec	2011-01-28 15:53:51 +0100	[diff] [blame]	828	i = drbd_find_overlap(&mdev->write_requests, sector, size);
				829	if (!i)
Lars Ellenberg	648e46b	2012-03-26 20:12:24 +0200	[diff] [blame]	830	break;
				831	/* Indicate to wake up device->misc_wait on progress. */
				832	i->waiting = true;
				833	spin_unlock_irq(&mdev->tconn->req_lock);
				834	schedule();
				835	spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher	6024fec	2011-01-28 15:53:51 +0100	[diff] [blame]	836	}
Lars Ellenberg	648e46b	2012-03-26 20:12:24 +0200	[diff] [blame]	837	finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher	6024fec	2011-01-28 15:53:51 +0100	[diff] [blame]	838	}
				839
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	840	/* called within req_lock and rcu_read_lock() */
Lars Ellenberg	3b9ef85	2012-07-30 09:06:26 +0200	[diff] [blame]	841	static void maybe_pull_ahead(struct drbd_conf *mdev)
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	842	{
				843	struct drbd_tconn *tconn = mdev->tconn;
				844	struct net_conf *nc;
				845	bool congested = false;
				846	enum drbd_on_congestion on_congestion;
				847
				848	nc = rcu_dereference(tconn->net_conf);
				849	on_congestion = nc ? nc->on_congestion : OC_BLOCK;
				850	if (on_congestion == OC_BLOCK \|\|
				851	tconn->agreed_pro_version < 96)
Lars Ellenberg	3b9ef85	2012-07-30 09:06:26 +0200	[diff] [blame]	852	return;
				853
				854	/* If I don't even have good local storage, we can not reasonably try
				855	* to pull ahead of the peer. We also need the local reference to make
				856	* sure mdev->act_log is there.
				857	*/
				858	if (!get_ldev_if_state(mdev, D_UP_TO_DATE))
				859	return;
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	860
				861	if (nc->cong_fill &&
				862	atomic_read(&mdev->ap_in_flight) >= nc->cong_fill) {
				863	dev_info(DEV, "Congestion-fill threshold reached\n");
				864	congested = true;
				865	}
				866
				867	if (mdev->act_log->used >= nc->cong_extents) {
				868	dev_info(DEV, "Congestion-extents threshold reached\n");
				869	congested = true;
				870	}
				871
				872	if (congested) {
Lars Ellenberg	99b4d8f	2012-08-07 06:42:09 +0200	[diff] [blame]	873	/* start a new epoch for non-mirrored writes */
				874	start_new_tl_epoch(mdev->tconn);
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	875
				876	if (on_congestion == OC_PULL_AHEAD)
				877	_drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
				878	else /nc->on_congestion == OC_DISCONNECT /
				879	_drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL);
				880	}
Lars Ellenberg	3b9ef85	2012-07-30 09:06:26 +0200	[diff] [blame]	881	put_ldev(mdev);
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	882	}
				883
				884	/* If this returns false, and req->private_bio is still set,
				885	* this should be submitted locally.
				886	*
				887	* If it returns false, but req->private_bio is not set,
				888	* we do not have access to good data :(
				889	*
				890	* Otherwise, this destroys req->private_bio, if any,
				891	* and returns true.
				892	*/
				893	static bool do_remote_read(struct drbd_request *req)
				894	{
				895	struct drbd_conf *mdev = req->w.mdev;
				896	enum drbd_read_balancing rbm;
				897
				898	if (req->private_bio) {
				899	if (!drbd_may_do_local_read(mdev,
				900	req->i.sector, req->i.size)) {
				901	bio_put(req->private_bio);
				902	req->private_bio = NULL;
				903	put_ldev(mdev);
				904	}
				905	}
				906
				907	if (mdev->state.pdsk != D_UP_TO_DATE)
				908	return false;
				909
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	910	if (req->private_bio == NULL)
				911	return true;
				912
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	913	/* TODO: improve read balancing decisions, take into account drbd
				914	* protocol, pending requests etc. */
				915
				916	rcu_read_lock();
				917	rbm = rcu_dereference(mdev->ldev->disk_conf)->read_balancing;
				918	rcu_read_unlock();
				919
				920	if (rbm == RB_PREFER_LOCAL && req->private_bio)
				921	return false; /* submit locally */
				922
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	923	if (remote_due_to_read_balancing(mdev, req->i.sector, rbm)) {
				924	if (req->private_bio) {
				925	bio_put(req->private_bio);
				926	req->private_bio = NULL;
				927	put_ldev(mdev);
				928	}
				929	return true;
				930	}
				931
				932	return false;
				933	}
				934
				935	/* returns number of connections (== 1, for drbd 8.4)
				936	* expected to actually write this data,
				937	* which does NOT include those that we are L_AHEAD for. */
				938	static int drbd_process_write_request(struct drbd_request *req)
				939	{
				940	struct drbd_conf *mdev = req->w.mdev;
				941	int remote, send_oos;
				942
				943	rcu_read_lock();
				944	remote = drbd_should_do_remote(mdev->state);
				945	if (remote) {
Lars Ellenberg	3b9ef85	2012-07-30 09:06:26 +0200	[diff] [blame]	946	maybe_pull_ahead(mdev);
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	947	remote = drbd_should_do_remote(mdev->state);
				948	}
				949	send_oos = drbd_should_send_out_of_sync(mdev->state);
				950	rcu_read_unlock();
				951
Lars Ellenberg	519b6d3	2012-08-03 02:19:09 +0200	[diff] [blame]	952	/* Need to replicate writes. Unless it is an empty flush,
				953	* which is better mapped to a DRBD P_BARRIER packet,
				954	* also for drbd wire protocol compatibility reasons.
				955	* If this was a flush, just start a new epoch.
				956	* Unless the current epoch was empty anyways, or we are not currently
				957	* replicating, in which case there is no point. */
				958	if (unlikely(req->i.size == 0)) {
				959	/* The only size==0 bios we expect are empty flushes. */
				960	D_ASSERT(req->master_bio->bi_rw & REQ_FLUSH);
Lars Ellenberg	99b4d8f	2012-08-07 06:42:09 +0200	[diff] [blame]	961	if (remote)
Lars Ellenberg	519b6d3	2012-08-03 02:19:09 +0200	[diff] [blame]	962	start_new_tl_epoch(mdev->tconn);
				963	return 0;
				964	}
				965
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	966	if (!remote && !send_oos)
				967	return 0;
				968
				969	D_ASSERT(!(remote && send_oos));
				970
				971	if (remote) {
				972	_req_mod(req, TO_BE_SENT);
				973	_req_mod(req, QUEUE_FOR_NET_WRITE);
				974	} else if (drbd_set_out_of_sync(mdev, req->i.sector, req->i.size))
				975	_req_mod(req, QUEUE_FOR_SEND_OOS);
				976
				977	return remote;
				978	}
				979
				980	static void
				981	drbd_submit_req_private_bio(struct drbd_request *req)
				982	{
				983	struct drbd_conf *mdev = req->w.mdev;
				984	struct bio *bio = req->private_bio;
				985	const int rw = bio_rw(bio);
				986
				987	bio->bi_bdev = mdev->ldev->backing_bdev;
				988
				989	/* State may have changed since we grabbed our reference on the
				990	* ->ldev member. Double check, and short-circuit to endio.
				991	* In case the last activity log transaction failed to get on
				992	* stable storage, and this is a WRITE, we may not even submit
				993	* this bio. */
				994	if (get_ldev(mdev)) {
				995	if (drbd_insert_fault(mdev,
				996	rw == WRITE ? DRBD_FAULT_DT_WR
				997	: rw == READ ? DRBD_FAULT_DT_RD
				998	: DRBD_FAULT_DT_RA))
				999	bio_endio(bio, -EIO);
				1000	else
				1001	generic_make_request(bio);
				1002	put_ldev(mdev);
				1003	} else
				1004	bio_endio(bio, -EIO);
				1005	}
				1006
Lars Ellenberg	5df69ec	2012-01-24 16:49:58 +0100	[diff] [blame]	1007	void __drbd_make_request(struct drbd_conf mdev, struct bio bio, unsigned long start_time)
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1008	{
				1009	const int rw = bio_rw(bio);
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1010	struct bio_and_error m = { NULL, };
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1011	struct drbd_request *req;
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1012	bool no_remote = false;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1013
				1014	/* allocate outside of all locks; */
				1015	req = drbd_req_new(mdev, bio);
				1016	if (!req) {
				1017	dec_ap_bio(mdev);
				1018	/* only pass the error to the upper layers.
				1019	* if user cannot handle io errors, that's not our business. */
				1020	dev_err(DEV, "could not kmalloc() req\n");
				1021	bio_endio(bio, -ENOMEM);
Lars Ellenberg	5df69ec	2012-01-24 16:49:58 +0100	[diff] [blame]	1022	return;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1023	}
Philipp Reisner	aeda1cd6	2010-11-09 17:45:06 +0100	[diff] [blame]	1024	req->start_time = start_time;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1025
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1026	if (!get_ldev(mdev)) {
				1027	bio_put(req->private_bio);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1028	req->private_bio = NULL;
				1029	}
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1030
				1031	/* For WRITES going to the local disk, grab a reference on the target
				1032	* extent. This waits for any resync activity in the corresponding
				1033	* resync extent to finish, and, if necessary, pulls in the target
				1034	* extent into the activity log, which involves further disk io because
Lars Ellenberg	519b6d3	2012-08-03 02:19:09 +0200	[diff] [blame]	1035	* of transactional on-disk meta data updates.
				1036	* Empty flushes don't need to go into the activity log, they can only
				1037	* flush data for pending writes which are already in there. */
				1038	if (rw == WRITE && req->private_bio && req->i.size
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1039	&& !test_bit(AL_SUSPENDED, &mdev->flags)) {
Philipp Reisner	0778286	2010-08-31 12:00:50 +0200	[diff] [blame]	1040	req->rq_state \|= RQ_IN_ACT_LOG;
Lars Ellenberg	181286a	2011-03-31 15:18:56 +0200	[diff] [blame]	1041	drbd_al_begin_io(mdev, &req->i);
Philipp Reisner	0778286	2010-08-31 12:00:50 +0200	[diff] [blame]	1042	}
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1043
Philipp Reisner	87eeee4	2011-01-19 14:16:30 +0100	[diff] [blame]	1044	spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher	6024fec	2011-01-28 15:53:51 +0100	[diff] [blame]	1045	if (rw == WRITE) {
Lars Ellenberg	648e46b	2012-03-26 20:12:24 +0200	[diff] [blame]	1046	/* This may temporarily give up the req_lock,
				1047	* but will re-aquire it before it returns here.
				1048	* Needs to be before the check on drbd_suspended() */
				1049	complete_conflicting_writes(req);
Andreas Gruenbacher	6024fec	2011-01-28 15:53:51 +0100	[diff] [blame]	1050	}
				1051
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1052	/* no more giving up req_lock from now on! */
				1053
Philipp Reisner	2aebfab	2011-03-28 16:48:11 +0200	[diff] [blame]	1054	if (drbd_suspended(mdev)) {
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1055	/* push back and retry: */
				1056	req->rq_state \|= RQ_POSTPONED;
				1057	if (req->private_bio) {
				1058	bio_put(req->private_bio);
				1059	req->private_bio = NULL;
Philipp Reisner	d764401	2012-08-28 14:39:44 +0200	[diff] [blame]	1060	put_ldev(mdev);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1061	}
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1062	goto out;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1063	}
				1064
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1065	/* Update disk stats */
				1066	_drbd_start_io_acct(mdev, req, bio);
				1067
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1068	/* We fail READ/READA early, if we can not serve it.
				1069	* We must do this before req is registered on any lists.
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	1070	* Otherwise, drbd_req_complete() will queue failed READ for retry. */
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1071	if (rw != WRITE) {
				1072	if (!do_remote_read(req) && !req->private_bio)
				1073	goto nodata;
				1074	}
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1075
Lars Ellenberg	b6dd1a8	2011-11-28 15:04:49 +0100	[diff] [blame]	1076	/* which transfer log epoch does this belong to? */
				1077	req->epoch = atomic_read(&mdev->tconn->current_tle_nr);
Lars Ellenberg	b6dd1a8	2011-11-28 15:04:49 +0100	[diff] [blame]	1078
Lars Ellenberg	519b6d3	2012-08-03 02:19:09 +0200	[diff] [blame]	1079	/* no point in adding empty flushes to the transfer log,
				1080	* they are mapped to drbd barriers already. */
Lars Ellenberg	99b4d8f	2012-08-07 06:42:09 +0200	[diff] [blame]	1081	if (likely(req->i.size!=0)) {
				1082	if (rw == WRITE)
				1083	mdev->tconn->current_tle_writes++;
				1084
Lars Ellenberg	519b6d3	2012-08-03 02:19:09 +0200	[diff] [blame]	1085	list_add_tail(&req->tl_requests, &mdev->tconn->transfer_log);
Lars Ellenberg	99b4d8f	2012-08-07 06:42:09 +0200	[diff] [blame]	1086	}
Philipp Reisner	288f422	2010-05-27 15:07:43 +0200	[diff] [blame]	1087
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1088	if (rw == WRITE) {
				1089	if (!drbd_process_write_request(req))
				1090	no_remote = true;
				1091	} else {
				1092	/* We either have a private_bio, or we can read from remote.
				1093	* Otherwise we had done the goto nodata above. */
				1094	if (req->private_bio == NULL) {
				1095	_req_mod(req, TO_BE_SENT);
				1096	_req_mod(req, QUEUE_FOR_NET_READ);
				1097	} else
				1098	no_remote = true;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1099	}
Philipp Reisner	6753171	2010-10-27 12:21:30 +0200	[diff] [blame]	1100
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1101	if (req->private_bio) {
				1102	/* needs to be marked within the same spinlock */
				1103	_req_mod(req, TO_BE_SUBMITTED);
				1104	/* but we need to give up the spinlock to submit */
				1105	spin_unlock_irq(&mdev->tconn->req_lock);
				1106	drbd_submit_req_private_bio(req);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	1107	spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1108	} else if (no_remote) {
				1109	nodata:
				1110	if (__ratelimit(&drbd_ratelimit_state))
				1111	dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
				1112	/* A write may have been queued for send_oos, however.
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	1113	* So we can not simply free it, we must go through drbd_req_put_completion_ref() */
Philipp Reisner	6753171	2010-10-27 12:21:30 +0200	[diff] [blame]	1114	}
				1115
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1116	out:
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	1117	if (drbd_req_put_completion_ref(req, &m, 1))
				1118	kref_put(&req->kref, drbd_req_destroy);
Philipp Reisner	87eeee4	2011-01-19 14:16:30 +0100	[diff] [blame]	1119	spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1120
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1121	if (m.bio)
				1122	complete_master_bio(mdev, &m);
Lars Ellenberg	5df69ec	2012-01-24 16:49:58 +0100	[diff] [blame]	1123	return;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1124	}
				1125
Andreas Gruenbacher	2f58dcf	2010-12-13 17:48:19 +0100	[diff] [blame]	1126	int drbd_make_request(struct request_queue q, struct bio bio)
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1127	{
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1128	struct drbd_conf mdev = (struct drbd_conf ) q->queuedata;
Philipp Reisner	aeda1cd6	2010-11-09 17:45:06 +0100	[diff] [blame]	1129	unsigned long start_time;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1130
Philipp Reisner	aeda1cd6	2010-11-09 17:45:06 +0100	[diff] [blame]	1131	start_time = jiffies;
				1132
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1133	/*
				1134	* what we "blindly" assume:
				1135	*/
Andreas Gruenbacher	c670a39	2011-02-21 12:41:39 +0100	[diff] [blame]	1136	D_ASSERT(IS_ALIGNED(bio->bi_size, 512));
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1137
Lars Ellenberg	5df69ec	2012-01-24 16:49:58 +0100	[diff] [blame]	1138	inc_ap_bio(mdev);
				1139	__drbd_make_request(mdev, bio, start_time);
Philipp Reisner	69b6a3b	2011-12-20 11:49:58 +0100	[diff] [blame]	1140
				1141	return 0;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1142	}
				1143
Lars Ellenberg	23361cf	2011-03-31 16:36:43 +0200	[diff] [blame]	1144	/* This is called by bio_add_page().
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1145	*
Lars Ellenberg	23361cf	2011-03-31 16:36:43 +0200	[diff] [blame]	1146	* q->max_hw_sectors and other global limits are already enforced there.
				1147	*
				1148	* We need to call down to our lower level device,
				1149	* in case it has special restrictions.
				1150	*
				1151	* We also may need to enforce configured max-bio-bvecs limits.
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1152	*
				1153	* As long as the BIO is empty we have to allow at least one bvec,
Lars Ellenberg	23361cf	2011-03-31 16:36:43 +0200	[diff] [blame]	1154	* regardless of size and offset, so no need to ask lower levels.
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1155	*/
				1156	int drbd_merge_bvec(struct request_queue q, struct bvec_merge_data bvm, struct bio_vec *bvec)
				1157	{
				1158	struct drbd_conf mdev = (struct drbd_conf ) q->queuedata;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1159	unsigned int bio_size = bvm->bi_size;
Lars Ellenberg	23361cf	2011-03-31 16:36:43 +0200	[diff] [blame]	1160	int limit = DRBD_MAX_BIO_SIZE;
				1161	int backing_limit;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1162
Lars Ellenberg	23361cf	2011-03-31 16:36:43 +0200	[diff] [blame]	1163	if (bio_size && get_ldev(mdev)) {
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1164	struct request_queue * const b =
				1165	mdev->ldev->backing_bdev->bd_disk->queue;
Lars Ellenberg	a1c88d0	2010-05-14 19:16:41 +0200	[diff] [blame]	1166	if (b->merge_bvec_fn) {
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1167	backing_limit = b->merge_bvec_fn(b, bvm, bvec);
				1168	limit = min(limit, backing_limit);
				1169	}
				1170	put_ldev(mdev);
				1171	}
				1172	return limit;
				1173	}
Philipp Reisner	7fde2be	2011-03-01 11:08:28 +0100	[diff] [blame]	1174
Lars Ellenberg	b6dd1a8	2011-11-28 15:04:49 +0100	[diff] [blame]	1175	struct drbd_request find_oldest_request(struct drbd_tconn tconn)
				1176	{
				1177	/* Walk the transfer log,
				1178	* and find the oldest not yet completed request */
				1179	struct drbd_request *r;
				1180	list_for_each_entry(r, &tconn->transfer_log, tl_requests) {
Lars Ellenberg	b406777	2012-01-24 16:58:11 +0100	[diff] [blame]	1181	if (atomic_read(&r->completion_ref))
Lars Ellenberg	b6dd1a8	2011-11-28 15:04:49 +0100	[diff] [blame]	1182	return r;
				1183	}
				1184	return NULL;
				1185	}
				1186
Philipp Reisner	7fde2be	2011-03-01 11:08:28 +0100	[diff] [blame]	1187	void request_timer_fn(unsigned long data)
				1188	{
				1189	struct drbd_conf mdev = (struct drbd_conf ) data;
Philipp Reisner	8b924f1	2011-03-01 11:08:28 +0100	[diff] [blame]	1190	struct drbd_tconn *tconn = mdev->tconn;
Philipp Reisner	7fde2be	2011-03-01 11:08:28 +0100	[diff] [blame]	1191	struct drbd_request req; / oldest request */
Philipp Reisner	44ed167	2011-04-19 17:10:19 +0200	[diff] [blame]	1192	struct net_conf *nc;
Philipp Reisner	3b03ad5	2011-07-15 13:53:06 +0200	[diff] [blame]	1193	unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
Lars Ellenberg	07be15b	2012-05-07 11:53:08 +0200	[diff] [blame]	1194	unsigned long now;
Philipp Reisner	7fde2be	2011-03-01 11:08:28 +0100	[diff] [blame]	1195
Philipp Reisner	44ed167	2011-04-19 17:10:19 +0200	[diff] [blame]	1196	rcu_read_lock();
				1197	nc = rcu_dereference(tconn->net_conf);
Lars Ellenberg	07be15b	2012-05-07 11:53:08 +0200	[diff] [blame]	1198	if (nc && mdev->state.conn >= C_WF_REPORT_PARAMS)
				1199	ent = nc->timeout * HZ/10 * nc->ko_count;
Philipp Reisner	cdfda63	2011-07-05 15:38:59 +0200	[diff] [blame]	1200
Lars Ellenberg	07be15b	2012-05-07 11:53:08 +0200	[diff] [blame]	1201	if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */
Philipp Reisner	cdfda63	2011-07-05 15:38:59 +0200	[diff] [blame]	1202	dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10;
				1203	put_ldev(mdev);
				1204	}
Philipp Reisner	44ed167	2011-04-19 17:10:19 +0200	[diff] [blame]	1205	rcu_read_unlock();
				1206
Philipp Reisner	cdfda63	2011-07-05 15:38:59 +0200	[diff] [blame]	1207	et = min_not_zero(dt, ent);
				1208
Lars Ellenberg	07be15b	2012-05-07 11:53:08 +0200	[diff] [blame]	1209	if (!et)
Philipp Reisner	7fde2be	2011-03-01 11:08:28 +0100	[diff] [blame]	1210	return; /* Recurring timer stopped */
				1211
Lars Ellenberg	07be15b	2012-05-07 11:53:08 +0200	[diff] [blame]	1212	now = jiffies;
				1213
Philipp Reisner	8b924f1	2011-03-01 11:08:28 +0100	[diff] [blame]	1214	spin_lock_irq(&tconn->req_lock);
Lars Ellenberg	b6dd1a8	2011-11-28 15:04:49 +0100	[diff] [blame]	1215	req = find_oldest_request(tconn);
				1216	if (!req) {
Philipp Reisner	8b924f1	2011-03-01 11:08:28 +0100	[diff] [blame]	1217	spin_unlock_irq(&tconn->req_lock);
Lars Ellenberg	07be15b	2012-05-07 11:53:08 +0200	[diff] [blame]	1218	mod_timer(&mdev->request_timer, now + et);
Philipp Reisner	7fde2be	2011-03-01 11:08:28 +0100	[diff] [blame]	1219	return;
				1220	}
				1221
Lars Ellenberg	07be15b	2012-05-07 11:53:08 +0200	[diff] [blame]	1222	/* The request is considered timed out, if
				1223	* - we have some effective timeout from the configuration,
				1224	* with above state restrictions applied,
				1225	* - the oldest request is waiting for a response from the network
				1226	* resp. the local disk,
				1227	* - the oldest request is in fact older than the effective timeout,
				1228	* - the connection was established (resp. disk was attached)
				1229	* for longer than the timeout already.
				1230	* Note that for 32bit jiffies and very stable connections/disks,
				1231	* we may have a wrap around, which is catched by
				1232	* !time_in_range(now, last_..._jif, last_..._jif + timeout).
				1233	*
				1234	* Side effect: once per 32bit wrap-around interval, which means every
				1235	* ~198 days with 250 HZ, we have a window where the timeout would need
				1236	* to expire twice (worst case) to become effective. Good enough.
				1237	*/
				1238	if (ent && req->rq_state & RQ_NET_PENDING &&
				1239	time_after(now, req->start_time + ent) &&
				1240	!time_in_range(now, tconn->last_reconnect_jif, tconn->last_reconnect_jif + ent)) {
				1241	dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
				1242	_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE \| CS_HARD, NULL);
Philipp Reisner	7fde2be	2011-03-01 11:08:28 +0100	[diff] [blame]	1243	}
Lars Ellenberg	07be15b	2012-05-07 11:53:08 +0200	[diff] [blame]	1244	if (dt && req->rq_state & RQ_LOCAL_PENDING && req->w.mdev == mdev &&
				1245	time_after(now, req->start_time + dt) &&
				1246	!time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) {
				1247	dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
Lars Ellenberg	0c84966	2012-07-30 09:07:28 +0200	[diff] [blame]	1248	__drbd_chk_io_error(mdev, DRBD_FORCE_DETACH);
Philipp Reisner	cdfda63	2011-07-05 15:38:59 +0200	[diff] [blame]	1249	}
Lars Ellenberg	07be15b	2012-05-07 11:53:08 +0200	[diff] [blame]	1250	nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
Philipp Reisner	8b924f1	2011-03-01 11:08:28 +0100	[diff] [blame]	1251	spin_unlock_irq(&tconn->req_lock);
Philipp Reisner	3b03ad5	2011-07-15 13:53:06 +0200	[diff] [blame]	1252	mod_timer(&mdev->request_timer, nt);
Philipp Reisner	7fde2be	2011-03-01 11:08:28 +0100	[diff] [blame]	1253	}