Blame - drivers/block/drbd/drbd_req.c - kernel/msm-4.19

blob: 3add7c5e97e0c96b62ba2601e6509a271d461511 [file] [log] [blame]

Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1	/*
				2	drbd_req.c
				3
				4	This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
				5
				6	Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
				7	Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
				8	Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
				9
				10	drbd is free software; you can redistribute it and/or modify
				11	it under the terms of the GNU General Public License as published by
				12	the Free Software Foundation; either version 2, or (at your option)
				13	any later version.
				14
				15	drbd is distributed in the hope that it will be useful,
				16	but WITHOUT ANY WARRANTY; without even the implied warranty of
				17	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				18	GNU General Public License for more details.
				19
				20	You should have received a copy of the GNU General Public License
				21	along with drbd; see the file COPYING. If not, write to
				22	the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
				23
				24	*/
				25
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	26	#include <linux/module.h>
				27
				28	#include <linux/slab.h>
				29	#include <linux/drbd.h>
				30	#include "drbd_int.h"
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	31	#include "drbd_req.h"
				32
				33
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	34	static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, int size);
Philipp Reisner	57bcb6c	2011-12-03 11:18:56 +0100	[diff] [blame]	35
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	36	/* Update disk stats at start of I/O request */
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	37	static void _drbd_start_io_acct(struct drbd_device device, struct drbd_request req)
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	38	{
Gu Zheng	2448085	2014-11-24 11:05:25 +0800	[diff] [blame]	39	generic_start_io_acct(bio_data_dir(req->master_bio), req->i.size >> 9,
				40	&device->vdisk->part0);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	41	}
				42
				43	/* Update disk stats when completing request upwards */
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	44	static void _drbd_end_io_acct(struct drbd_device device, struct drbd_request req)
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	45	{
Gu Zheng	2448085	2014-11-24 11:05:25 +0800	[diff] [blame]	46	generic_end_io_acct(bio_data_dir(req->master_bio),
				47	&device->vdisk->part0, req->start_jif);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	48	}
				49
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	50	static struct drbd_request drbd_req_new(struct drbd_device device,
Andreas Gruenbacher	9e204cd	2011-01-26 18:45:11 +0100	[diff] [blame]	51	struct bio *bio_src)
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	52	{
Andreas Gruenbacher	9e204cd	2011-01-26 18:45:11 +0100	[diff] [blame]	53	struct drbd_request *req;
				54
David Rientjes	23fe8f8	2015-03-24 16:22:32 -0700	[diff] [blame]	55	req = mempool_alloc(drbd_request_mempool, GFP_NOIO);
Andreas Gruenbacher	9e204cd	2011-01-26 18:45:11 +0100	[diff] [blame]	56	if (!req)
				57	return NULL;
David Rientjes	23fe8f8	2015-03-24 16:22:32 -0700	[diff] [blame]	58	memset(req, 0, sizeof(*req));
Andreas Gruenbacher	9e204cd	2011-01-26 18:45:11 +0100	[diff] [blame]	59
				60	drbd_req_make_private_bio(req, bio_src);
				61	req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
Andreas Gruenbacher	84b8c06	2011-07-28 15:27:51 +0200	[diff] [blame]	62	req->device = device;
Andreas Gruenbacher	9e204cd	2011-01-26 18:45:11 +0100	[diff] [blame]	63	req->master_bio = bio_src;
				64	req->epoch = 0;
Andreas Gruenbacher	5384064	2011-01-28 10:31:04 +0100	[diff] [blame]	65
Andreas Gruenbacher	9e204cd	2011-01-26 18:45:11 +0100	[diff] [blame]	66	drbd_clear_interval(&req->i);
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	67	req->i.sector = bio_src->bi_iter.bi_sector;
				68	req->i.size = bio_src->bi_iter.bi_size;
Andreas Gruenbacher	5e47226	2011-01-27 14:42:51 +0100	[diff] [blame]	69	req->i.local = true;
Andreas Gruenbacher	5384064	2011-01-28 10:31:04 +0100	[diff] [blame]	70	req->i.waiting = false;
				71
Andreas Gruenbacher	9e204cd	2011-01-26 18:45:11 +0100	[diff] [blame]	72	INIT_LIST_HEAD(&req->tl_requests);
				73	INIT_LIST_HEAD(&req->w.list);
Lars Ellenberg	844a6ae	2013-11-22 12:52:03 +0100	[diff] [blame]	74	INIT_LIST_HEAD(&req->req_pending_master_completion);
				75	INIT_LIST_HEAD(&req->req_pending_local);
Andreas Gruenbacher	9e204cd	2011-01-26 18:45:11 +0100	[diff] [blame]	76
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	77	/* one reference to be put by __drbd_make_request */
Lars Ellenberg	b406777	2012-01-24 16:58:11 +0100	[diff] [blame]	78	atomic_set(&req->completion_ref, 1);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	79	/* one kref as long as completion_ref > 0 */
Lars Ellenberg	b406777	2012-01-24 16:58:11 +0100	[diff] [blame]	80	kref_init(&req->kref);
Andreas Gruenbacher	9e204cd	2011-01-26 18:45:11 +0100	[diff] [blame]	81	return req;
				82	}
				83
Lars Ellenberg	08d0dab	2014-03-20 11:19:22 +0100	[diff] [blame]	84	static void drbd_remove_request_interval(struct rb_root *root,
				85	struct drbd_request *req)
				86	{
				87	struct drbd_device *device = req->device;
				88	struct drbd_interval *i = &req->i;
				89
				90	drbd_remove_interval(root, i);
				91
				92	/* Wake up any processes waiting for this request to complete. */
				93	if (i->waiting)
				94	wake_up(&device->misc_wait);
				95	}
				96
Lars Ellenberg	9a278a7	2012-07-24 10:12:36 +0200	[diff] [blame]	97	void drbd_req_destroy(struct kref *kref)
Andreas Gruenbacher	9e204cd	2011-01-26 18:45:11 +0100	[diff] [blame]	98	{
Lars Ellenberg	b406777	2012-01-24 16:58:11 +0100	[diff] [blame]	99	struct drbd_request *req = container_of(kref, struct drbd_request, kref);
Andreas Gruenbacher	84b8c06	2011-07-28 15:27:51 +0200	[diff] [blame]	100	struct drbd_device *device = req->device;
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	101	const unsigned s = req->rq_state;
				102
				103	if ((req->master_bio && !(s & RQ_POSTPONED)) \|\|
				104	atomic_read(&req->completion_ref) \|\|
				105	(s & RQ_LOCAL_PENDING) \|\|
				106	((s & RQ_NET_MASK) && !(s & RQ_NET_DONE))) {
Andreas Gruenbacher	d018017	2011-07-03 17:53:52 +0200	[diff] [blame]	107	drbd_err(device, "drbd_req_destroy: Logic BUG rq_state = 0x%x, completion_ref = %d\n",
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	108	s, atomic_read(&req->completion_ref));
				109	return;
				110	}
Philipp Reisner	288f422	2010-05-27 15:07:43 +0200	[diff] [blame]	111
Lars Ellenberg	844a6ae	2013-11-22 12:52:03 +0100	[diff] [blame]	112	/* If called from mod_rq_state (expected normal case) or
				113	* drbd_send_and_submit (the less likely normal path), this holds the
				114	* req_lock, and req->tl_requests will typicaly be on ->transfer_log,
				115	* though it may be still empty (never added to the transfer log).
				116	*
				117	* If called from do_retry(), we do NOT hold the req_lock, but we are
				118	* still allowed to unconditionally list_del(&req->tl_requests),
				119	* because it will be on a local on-stack list only. */
Lars Ellenberg	2312f0b3	2011-11-24 10:36:25 +0100	[diff] [blame]	120	list_del_init(&req->tl_requests);
Philipp Reisner	288f422	2010-05-27 15:07:43 +0200	[diff] [blame]	121
Lars Ellenberg	08d0dab	2014-03-20 11:19:22 +0100	[diff] [blame]	122	/* finally remove the request from the conflict detection
				123	* respective block_id verification interval tree. */
				124	if (!drbd_interval_empty(&req->i)) {
				125	struct rb_root *root;
				126
				127	if (s & RQ_WRITE)
				128	root = &device->write_requests;
				129	else
				130	root = &device->read_requests;
				131	drbd_remove_request_interval(root, req);
				132	} else if (s & (RQ_NET_MASK & ~RQ_NET_DONE) && req->i.size != 0)
				133	drbd_err(device, "drbd_req_destroy: Logic BUG: interval empty, but: rq_state=0x%x, sect=%llu, size=%u\n",
				134	s, (unsigned long long)req->i.sector, req->i.size);
				135
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	136	/* if it was a write, we may have to set the corresponding
				137	* bit(s) out-of-sync first. If it had a local part, we need to
				138	* release the reference to the activity log. */
Lars Ellenberg	b406777	2012-01-24 16:58:11 +0100	[diff] [blame]	139	if (s & RQ_WRITE) {
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	140	/* Set out-of-sync unless both OK flags are set
				141	* (local only or remote failed).
				142	* Other places where we set out-of-sync:
				143	* READ with local io-error */
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	144
Lars Ellenberg	70f17b6	2012-09-03 14:08:35 +0200	[diff] [blame]	145	/* There is a special case:
				146	* we may notice late that IO was suspended,
				147	* and postpone, or schedule for retry, a write,
				148	* before it even was submitted or sent.
				149	* In that case we do not want to touch the bitmap at all.
				150	*/
				151	if ((s & (RQ_POSTPONED\|RQ_LOCAL_MASK\|RQ_NET_MASK)) != RQ_POSTPONED) {
Philipp Reisner	d764401	2012-08-28 14:39:44 +0200	[diff] [blame]	152	if (!(s & RQ_NET_OK) \|\| !(s & RQ_LOCAL_OK))
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	153	drbd_set_out_of_sync(device, req->i.sector, req->i.size);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	154
Philipp Reisner	d764401	2012-08-28 14:39:44 +0200	[diff] [blame]	155	if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS))
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	156	drbd_set_in_sync(device, req->i.sector, req->i.size);
Philipp Reisner	d764401	2012-08-28 14:39:44 +0200	[diff] [blame]	157	}
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	158
				159	/* one might be tempted to move the drbd_al_complete_io
Andreas Gruenbacher	fcefa62	2011-02-17 16:46:59 +0100	[diff] [blame]	160	* to the local io completion callback drbd_request_endio.
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	161	* but, if this was a mirror write, we may only
				162	* drbd_al_complete_io after this is RQ_NET_DONE,
				163	* otherwise the extent could be dropped from the al
				164	* before it has actually been written on the peer.
				165	* if we crash before our peer knows about the request,
				166	* but after the extent has been dropped from the al,
				167	* we would forget to resync the corresponding extent.
				168	*/
Philipp Reisner	76590cd	2012-08-29 15:23:14 +0200	[diff] [blame]	169	if (s & RQ_IN_ACT_LOG) {
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	170	if (get_ldev_if_state(device, D_FAILED)) {
				171	drbd_al_complete_io(device, &req->i);
				172	put_ldev(device);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	173	} else if (__ratelimit(&drbd_ratelimit_state)) {
Andreas Gruenbacher	d018017	2011-07-03 17:53:52 +0200	[diff] [blame]	174	drbd_warn(device, "Should have called drbd_al_complete_io(, %llu, %u), "
Lars Ellenberg	181286a	2011-03-31 15:18:56 +0200	[diff] [blame]	175	"but my Disk seems to have failed :(\n",
				176	(unsigned long long) req->i.sector, req->i.size);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	177	}
				178	}
				179	}
				180
Lars Ellenberg	9a278a7	2012-07-24 10:12:36 +0200	[diff] [blame]	181	mempool_free(req, drbd_request_mempool);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	182	}
				183
Andreas Gruenbacher	bde89a9	2011-05-30 16:32:41 +0200	[diff] [blame]	184	static void wake_all_senders(struct drbd_connection *connection)
				185	{
				186	wake_up(&connection->sender_work.q_wait);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	187	}
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	188
Lars Ellenberg	b6dd1a8	2011-11-28 15:04:49 +0100	[diff] [blame]	189	/* must hold resource->req_lock */
Andreas Gruenbacher	bde89a9	2011-05-30 16:32:41 +0200	[diff] [blame]	190	void start_new_tl_epoch(struct drbd_connection *connection)
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	191	{
Lars Ellenberg	99b4d8f	2012-08-07 06:42:09 +0200	[diff] [blame]	192	/* no point closing an epoch, if it is empty, anyways. */
Andreas Gruenbacher	bde89a9	2011-05-30 16:32:41 +0200	[diff] [blame]	193	if (connection->current_tle_writes == 0)
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	194	return;
				195
Andreas Gruenbacher	bde89a9	2011-05-30 16:32:41 +0200	[diff] [blame]	196	connection->current_tle_writes = 0;
				197	atomic_inc(&connection->current_tle_nr);
				198	wake_all_senders(connection);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	199	}
				200
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	201	void complete_master_bio(struct drbd_device *device,
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	202	struct bio_and_error *m)
				203	{
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	204	m->bio->bi_error = m->error;
				205	bio_endio(m->bio);
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	206	dec_ap_bio(device);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	207	}
				208
Andreas Gruenbacher	5384064	2011-01-28 10:31:04 +0100	[diff] [blame]	209
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	210	/* Helper for __req_mod().
				211	* Set m->bio to the master bio, if it is fit to be completed,
				212	* or leave it alone (it is initialized to NULL in __req_mod),
				213	* if it has already been completed, or cannot be completed yet.
				214	* If m->bio is set, the error status to be returned is placed in m->error.
				215	*/
Lars Ellenberg	6870ca6	2012-03-26 17:02:45 +0200	[diff] [blame]	216	static
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	217	void drbd_req_complete(struct drbd_request req, struct bio_and_error m)
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	218	{
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	219	const unsigned s = req->rq_state;
Andreas Gruenbacher	84b8c06	2011-07-28 15:27:51 +0200	[diff] [blame]	220	struct drbd_device *device = req->device;
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	221	int rw;
				222	int error, ok;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	223
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	224	/* we must not complete the master bio, while it is
				225	* still being processed by _drbd_send_zc_bio (drbd_send_dblock)
				226	* not yet acknowledged by the peer
				227	* not yet completed by the local io subsystem
				228	* these flags may get cleared in any order by
				229	* the worker,
				230	* the receiver,
				231	* the bio_endio completion callbacks.
				232	*/
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	233	if ((s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) \|\|
				234	(s & RQ_NET_QUEUED) \|\| (s & RQ_NET_PENDING) \|\|
				235	(s & RQ_COMPLETION_SUSP)) {
Andreas Gruenbacher	d018017	2011-07-03 17:53:52 +0200	[diff] [blame]	236	drbd_err(device, "drbd_req_complete: Logic BUG rq_state = 0x%x\n", s);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	237	return;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	238	}
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	239
				240	if (!req->master_bio) {
Andreas Gruenbacher	d018017	2011-07-03 17:53:52 +0200	[diff] [blame]	241	drbd_err(device, "drbd_req_complete: Logic BUG, master_bio == NULL!\n");
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	242	return;
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	243	}
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	244
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	245	rw = bio_rw(req->master_bio);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	246
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	247	/*
				248	* figure out whether to report success or failure.
				249	*
				250	* report success when at least one of the operations succeeded.
				251	* or, to put the other way,
				252	* only report failure, when both operations failed.
				253	*
				254	* what to do about the failures is handled elsewhere.
				255	* what we need to do here is just: complete the master_bio.
				256	*
				257	* local completion error, if any, has been stored as ERR_PTR
				258	* in private_bio within drbd_request_endio.
				259	*/
				260	ok = (s & RQ_LOCAL_OK) \|\| (s & RQ_NET_OK);
				261	error = PTR_ERR(req->private_bio);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	262
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	263	/* Before we can signal completion to the upper layers,
				264	* we may need to close the current transfer log epoch.
				265	* We are within the request lock, so we can simply compare
				266	* the request epoch number with the current transfer log
				267	* epoch number. If they match, increase the current_tle_nr,
				268	* and reset the transfer log epoch write_cnt.
				269	*/
				270	if (rw == WRITE &&
Andreas Gruenbacher	a6b32bc	2011-05-31 14:33:49 +0200	[diff] [blame]	271	req->epoch == atomic_read(&first_peer_device(device)->connection->current_tle_nr))
				272	start_new_tl_epoch(first_peer_device(device)->connection);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	273
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	274	/* Update disk stats */
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	275	_drbd_end_io_acct(device, req);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	276
				277	/* If READ failed,
				278	* have it be pushed back to the retry work queue,
				279	* so it will re-enter __drbd_make_request(),
				280	* and be re-assigned to a suitable local or remote path,
				281	* or failed if we do not have access to good data anymore.
				282	*
				283	* Unless it was failed early by __drbd_make_request(),
				284	* because no path was available, in which case
				285	* it was not even added to the transfer_log.
				286	*
				287	* READA may fail, and will not be retried.
				288	*
				289	* WRITE should have used all available paths already.
				290	*/
				291	if (!ok && rw == READ && !list_empty(&req->tl_requests))
				292	req->rq_state \|= RQ_POSTPONED;
				293
				294	if (!(req->rq_state & RQ_POSTPONED)) {
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	295	m->error = ok ? 0 : (error ?: -EIO);
				296	m->bio = req->master_bio;
				297	req->master_bio = NULL;
Lars Ellenberg	08d0dab	2014-03-20 11:19:22 +0100	[diff] [blame]	298	/* We leave it in the tree, to be able to verify later
				299	* write-acks in protocol != C during resync.
				300	* But we mark it as "complete", so it won't be counted as
				301	* conflict in a multi-primary setup. */
				302	req->i.completed = true;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	303	}
Lars Ellenberg	08d0dab	2014-03-20 11:19:22 +0100	[diff] [blame]	304
				305	if (req->i.waiting)
				306	wake_up(&device->misc_wait);
Lars Ellenberg	844a6ae	2013-11-22 12:52:03 +0100	[diff] [blame]	307
				308	/* Either we are about to complete to upper layers,
				309	* or we will restart this request.
				310	* In either case, the request object will be destroyed soon,
				311	* so better remove it from all lists. */
				312	list_del_init(&req->req_pending_master_completion);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	313	}
				314
Lars Ellenberg	844a6ae	2013-11-22 12:52:03 +0100	[diff] [blame]	315	/* still holds resource->req_lock */
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	316	static int drbd_req_put_completion_ref(struct drbd_request req, struct bio_and_error m, int put)
Philipp Reisner	cfa0341	2010-06-23 17:18:51 +0200	[diff] [blame]	317	{
Andreas Gruenbacher	84b8c06	2011-07-28 15:27:51 +0200	[diff] [blame]	318	struct drbd_device *device = req->device;
Andreas Gruenbacher	0b0ba1e	2011-06-27 16:23:33 +0200	[diff] [blame]	319	D_ASSERT(device, m \|\| (req->rq_state & RQ_POSTPONED));
Philipp Reisner	cfa0341	2010-06-23 17:18:51 +0200	[diff] [blame]	320
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	321	if (!atomic_sub_and_test(put, &req->completion_ref))
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	322	return 0;
				323
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	324	drbd_req_complete(req, m);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	325
Lars Ellenberg	9a278a7	2012-07-24 10:12:36 +0200	[diff] [blame]	326	if (req->rq_state & RQ_POSTPONED) {
				327	/* don't destroy the req object just yet,
				328	* but queue it for retry */
				329	drbd_restart_request(req);
				330	return 0;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	331	}
				332
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	333	return 1;
				334	}
				335
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	336	static void set_if_null_req_next(struct drbd_peer_device peer_device, struct drbd_request req)
				337	{
				338	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
				339	if (!connection)
				340	return;
				341	if (connection->req_next == NULL)
				342	connection->req_next = req;
				343	}
				344
				345	static void advance_conn_req_next(struct drbd_peer_device peer_device, struct drbd_request req)
				346	{
				347	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
				348	if (!connection)
				349	return;
				350	if (connection->req_next != req)
				351	return;
				352	list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
				353	const unsigned s = req->rq_state;
				354	if (s & RQ_NET_QUEUED)
				355	break;
				356	}
				357	if (&req->tl_requests == &connection->transfer_log)
				358	req = NULL;
				359	connection->req_next = req;
				360	}
				361
				362	static void set_if_null_req_ack_pending(struct drbd_peer_device peer_device, struct drbd_request req)
				363	{
				364	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
				365	if (!connection)
				366	return;
				367	if (connection->req_ack_pending == NULL)
				368	connection->req_ack_pending = req;
				369	}
				370
				371	static void advance_conn_req_ack_pending(struct drbd_peer_device peer_device, struct drbd_request req)
				372	{
				373	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
				374	if (!connection)
				375	return;
				376	if (connection->req_ack_pending != req)
				377	return;
				378	list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
				379	const unsigned s = req->rq_state;
				380	if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING))
				381	break;
				382	}
				383	if (&req->tl_requests == &connection->transfer_log)
				384	req = NULL;
				385	connection->req_ack_pending = req;
				386	}
				387
				388	static void set_if_null_req_not_net_done(struct drbd_peer_device peer_device, struct drbd_request req)
				389	{
				390	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
				391	if (!connection)
				392	return;
				393	if (connection->req_not_net_done == NULL)
				394	connection->req_not_net_done = req;
				395	}
				396
				397	static void advance_conn_req_not_net_done(struct drbd_peer_device peer_device, struct drbd_request req)
				398	{
				399	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
				400	if (!connection)
				401	return;
				402	if (connection->req_not_net_done != req)
				403	return;
				404	list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
				405	const unsigned s = req->rq_state;
				406	if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE))
				407	break;
				408	}
				409	if (&req->tl_requests == &connection->transfer_log)
				410	req = NULL;
				411	connection->req_not_net_done = req;
				412	}
				413
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	414	/* I'd like this to be the only place that manipulates
				415	* req->completion_ref and req->kref. */
				416	static void mod_rq_state(struct drbd_request req, struct bio_and_error m,
				417	int clear, int set)
				418	{
Andreas Gruenbacher	84b8c06	2011-07-28 15:27:51 +0200	[diff] [blame]	419	struct drbd_device *device = req->device;
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	420	struct drbd_peer_device *peer_device = first_peer_device(device);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	421	unsigned s = req->rq_state;
				422	int c_put = 0;
				423	int k_put = 0;
				424
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	425	if (drbd_suspended(device) && !((s \| clear) & RQ_COMPLETION_SUSP))
Philipp Reisner	5af2e8c	2012-08-14 11:28:52 +0200	[diff] [blame]	426	set \|= RQ_COMPLETION_SUSP;
				427
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	428	/* apply */
				429
				430	req->rq_state &= ~clear;
				431	req->rq_state \|= set;
				432
				433	/* no change? */
				434	if (req->rq_state == s)
				435	return;
				436
				437	/* intent: get references */
				438
				439	if (!(s & RQ_LOCAL_PENDING) && (set & RQ_LOCAL_PENDING))
				440	atomic_inc(&req->completion_ref);
				441
				442	if (!(s & RQ_NET_PENDING) && (set & RQ_NET_PENDING)) {
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	443	inc_ap_pending(device);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	444	atomic_inc(&req->completion_ref);
				445	}
				446
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	447	if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) {
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	448	atomic_inc(&req->completion_ref);
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	449	set_if_null_req_next(peer_device, req);
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	450	}
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	451
				452	if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK))
				453	kref_get(&req->kref); /* wait for the DONE */
				454
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	455	if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
				456	/* potentially already completed in the asender thread */
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	457	if (!(s & RQ_NET_DONE)) {
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	458	atomic_add(req->i.size >> 9, &device->ap_in_flight);
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	459	set_if_null_req_not_net_done(peer_device, req);
				460	}
				461	if (s & RQ_NET_PENDING)
				462	set_if_null_req_ack_pending(peer_device, req);
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	463	}
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	464
Philipp Reisner	5af2e8c	2012-08-14 11:28:52 +0200	[diff] [blame]	465	if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP))
				466	atomic_inc(&req->completion_ref);
				467
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	468	/* progress: put references */
				469
				470	if ((s & RQ_COMPLETION_SUSP) && (clear & RQ_COMPLETION_SUSP))
				471	++c_put;
				472
				473	if (!(s & RQ_LOCAL_ABORTED) && (set & RQ_LOCAL_ABORTED)) {
Andreas Gruenbacher	0b0ba1e	2011-06-27 16:23:33 +0200	[diff] [blame]	474	D_ASSERT(device, req->rq_state & RQ_LOCAL_PENDING);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	475	/* local completion may still come in later,
				476	* we need to keep the req object around. */
				477	kref_get(&req->kref);
				478	++c_put;
				479	}
				480
				481	if ((s & RQ_LOCAL_PENDING) && (clear & RQ_LOCAL_PENDING)) {
				482	if (req->rq_state & RQ_LOCAL_ABORTED)
				483	++k_put;
				484	else
				485	++c_put;
Lars Ellenberg	844a6ae	2013-11-22 12:52:03 +0100	[diff] [blame]	486	list_del_init(&req->req_pending_local);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	487	}
				488
				489	if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) {
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	490	dec_ap_pending(device);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	491	++c_put;
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	492	req->acked_jif = jiffies;
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	493	advance_conn_req_ack_pending(peer_device, req);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	494	}
				495
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	496	if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) {
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	497	++c_put;
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	498	advance_conn_req_next(peer_device, req);
				499	}
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	500
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	501	if (!(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) {
				502	if (s & RQ_NET_SENT)
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	503	atomic_sub(req->i.size >> 9, &device->ap_in_flight);
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	504	if (s & RQ_EXP_BARR_ACK)
				505	++k_put;
				506	req->net_done_jif = jiffies;
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	507
				508	/* in ahead/behind mode, or just in case,
				509	* before we finally destroy this request,
				510	* the caching pointers must not reference it anymore */
				511	advance_conn_req_next(peer_device, req);
				512	advance_conn_req_ack_pending(peer_device, req);
				513	advance_conn_req_not_net_done(peer_device, req);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	514	}
				515
				516	/* potentially complete and destroy */
				517
				518	if (k_put \|\| c_put) {
				519	/* Completion does it's own kref_put. If we are going to
				520	* kref_sub below, we need req to be still around then. */
				521	int at_least = k_put + !!c_put;
				522	int refcount = atomic_read(&req->kref.refcount);
				523	if (refcount < at_least)
Andreas Gruenbacher	d018017	2011-07-03 17:53:52 +0200	[diff] [blame]	524	drbd_err(device,
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	525	"mod_rq_state: Logic BUG: %x -> %x: refcount = %d, should be >= %d\n",
				526	s, req->rq_state, refcount, at_least);
				527	}
				528
				529	/* If we made progress, retry conflicting peer requests, if any. */
				530	if (req->i.waiting)
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	531	wake_up(&device->misc_wait);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	532
				533	if (c_put)
				534	k_put += drbd_req_put_completion_ref(req, m, c_put);
				535	if (k_put)
				536	kref_sub(&req->kref, k_put, drbd_req_destroy);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	537	}
				538
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	539	static void drbd_report_io_error(struct drbd_device device, struct drbd_request req)
Lars Ellenberg	ccae786	2012-09-26 14:07:04 +0200	[diff] [blame]	540	{
				541	char b[BDEVNAME_SIZE];
				542
Lars Ellenberg	42839f6	2012-09-27 15:19:38 +0200	[diff] [blame]	543	if (!__ratelimit(&drbd_ratelimit_state))
Lars Ellenberg	ccae786	2012-09-26 14:07:04 +0200	[diff] [blame]	544	return;
				545
Andreas Gruenbacher	d018017	2011-07-03 17:53:52 +0200	[diff] [blame]	546	drbd_warn(device, "local %s IO error sector %llu+%u on %s\n",
Lars Ellenberg	ccae786	2012-09-26 14:07:04 +0200	[diff] [blame]	547	(req->rq_state & RQ_WRITE) ? "WRITE" : "READ",
Lars Ellenberg	42839f6	2012-09-27 15:19:38 +0200	[diff] [blame]	548	(unsigned long long)req->i.sector,
				549	req->i.size >> 9,
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	550	bdevname(device->ldev->backing_bdev, b));
Lars Ellenberg	ccae786	2012-09-26 14:07:04 +0200	[diff] [blame]	551	}
				552
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	553	/* Helper for HANDED_OVER_TO_NETWORK.
				554	* Is this a protocol A write (neither WRITE_ACK nor RECEIVE_ACK expected)?
				555	* Is it also still "PENDING"?
				556	* --> If so, clear PENDING and set NET_OK below.
				557	* If it is a protocol A write, but not RQ_PENDING anymore, neg-ack was faster
				558	* (and we must not set RQ_NET_OK) */
				559	static inline bool is_pending_write_protocol_A(struct drbd_request *req)
				560	{
				561	return (req->rq_state &
				562	(RQ_WRITE\|RQ_NET_PENDING\|RQ_EXP_WRITE_ACK\|RQ_EXP_RECEIVE_ACK))
				563	== (RQ_WRITE\|RQ_NET_PENDING);
				564	}
				565
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	566	/* obviously this could be coded as many single functions
				567	* instead of one huge switch,
				568	* or by putting the code directly in the respective locations
				569	* (as it has been before).
				570	*
				571	* but having it this way
				572	* enforces that it is all in this one place, where it is easier to audit,
				573	* it makes it obvious that whatever "event" "happens" to a request should
				574	* happen "atomically" within the req_lock,
				575	* and it enforces that we have to think in a very structured manner
				576	* about the "events" that may happen to a request during its life time ...
				577	*/
Philipp Reisner	2a80699	2010-06-09 14:07:43 +0200	[diff] [blame]	578	int __req_mod(struct drbd_request *req, enum drbd_req_event what,
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	579	struct bio_and_error *m)
				580	{
Lars Ellenberg	44a4d55	2013-11-22 12:40:58 +0100	[diff] [blame]	581	struct drbd_device *const device = req->device;
				582	struct drbd_peer_device *const peer_device = first_peer_device(device);
				583	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
Philipp Reisner	44ed167	2011-04-19 17:10:19 +0200	[diff] [blame]	584	struct net_conf *nc;
Philipp Reisner	303d144	2011-04-13 16:24:47 -0700	[diff] [blame]	585	int p, rv = 0;
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	586
				587	if (m)
				588	m->bio = NULL;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	589
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	590	switch (what) {
				591	default:
Andreas Gruenbacher	d018017	2011-07-03 17:53:52 +0200	[diff] [blame]	592	drbd_err(device, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	593	break;
				594
				595	/* does not happen...
				596	* initialization done in drbd_req_new
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	597	case CREATED:
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	598	break;
				599	*/
				600
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	601	case TO_BE_SENT: /* via network */
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	602	/* reached via __drbd_make_request
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	603	* and from w_read_retry_remote */
Andreas Gruenbacher	0b0ba1e	2011-06-27 16:23:33 +0200	[diff] [blame]	604	D_ASSERT(device, !(req->rq_state & RQ_NET_MASK));
Philipp Reisner	44ed167	2011-04-19 17:10:19 +0200	[diff] [blame]	605	rcu_read_lock();
Lars Ellenberg	44a4d55	2013-11-22 12:40:58 +0100	[diff] [blame]	606	nc = rcu_dereference(connection->net_conf);
Philipp Reisner	44ed167	2011-04-19 17:10:19 +0200	[diff] [blame]	607	p = nc->wire_protocol;
				608	rcu_read_unlock();
Philipp Reisner	303d144	2011-04-13 16:24:47 -0700	[diff] [blame]	609	req->rq_state \|=
				610	p == DRBD_PROT_C ? RQ_EXP_WRITE_ACK :
				611	p == DRBD_PROT_B ? RQ_EXP_RECEIVE_ACK : 0;
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	612	mod_rq_state(req, m, 0, RQ_NET_PENDING);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	613	break;
				614
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	615	case TO_BE_SUBMITTED: /* locally */
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	616	/* reached via __drbd_make_request */
Andreas Gruenbacher	0b0ba1e	2011-06-27 16:23:33 +0200	[diff] [blame]	617	D_ASSERT(device, !(req->rq_state & RQ_LOCAL_MASK));
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	618	mod_rq_state(req, m, 0, RQ_LOCAL_PENDING);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	619	break;
				620
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	621	case COMPLETED_OK:
Philipp Reisner	2b4dd36	2011-03-14 13:01:50 +0100	[diff] [blame]	622	if (req->rq_state & RQ_WRITE)
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	623	device->writ_cnt += req->i.size >> 9;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	624	else
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	625	device->read_cnt += req->i.size >> 9;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	626
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	627	mod_rq_state(req, m, RQ_LOCAL_PENDING,
				628	RQ_LOCAL_COMPLETED\|RQ_LOCAL_OK);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	629	break;
				630
Philipp Reisner	cdfda63	2011-07-05 15:38:59 +0200	[diff] [blame]	631	case ABORT_DISK_IO:
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	632	mod_rq_state(req, m, 0, RQ_LOCAL_ABORTED);
Philipp Reisner	2b4dd36	2011-03-14 13:01:50 +0100	[diff] [blame]	633	break;
				634
Lars Ellenberg	edc9f5e	2012-09-27 15:18:21 +0200	[diff] [blame]	635	case WRITE_COMPLETED_WITH_ERROR:
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	636	drbd_report_io_error(device, req);
				637	__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
Lars Ellenberg	edc9f5e	2012-09-27 15:18:21 +0200	[diff] [blame]	638	mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	639	break;
				640
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	641	case READ_COMPLETED_WITH_ERROR:
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	642	drbd_set_out_of_sync(device, req->i.sector, req->i.size);
				643	drbd_report_io_error(device, req);
				644	__drbd_chk_io_error(device, DRBD_READ_ERROR);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	645	/* fall through. */
				646	case READ_AHEAD_COMPLETED_WITH_ERROR:
				647	/* it is legal to fail READA, no __drbd_chk_io_error in that case. */
				648	mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
Lars Ellenberg	4439c40	2012-03-26 17:29:30 +0200	[diff] [blame]	649	break;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	650
Lars Ellenberg	2f632ae	2014-04-28 18:43:24 +0200	[diff] [blame]	651	case DISCARD_COMPLETED_NOTSUPP:
				652	case DISCARD_COMPLETED_WITH_ERROR:
				653	/* I'd rather not detach from local disk just because it
				654	* failed a REQ_DISCARD. */
				655	mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
				656	break;
				657
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	658	case QUEUE_FOR_NET_READ:
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	659	/* READ or READA, and
				660	* no local disk,
				661	* or target area marked as invalid,
				662	* or just got an io-error. */
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	663	/* from __drbd_make_request
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	664	* or from bio_endio during read io-error recovery */
				665
Lars Ellenberg	6870ca6	2012-03-26 17:02:45 +0200	[diff] [blame]	666	/* So we can verify the handle in the answer packet.
				667	* Corresponding drbd_remove_request_interval is in
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	668	* drbd_req_complete() */
Andreas Gruenbacher	0b0ba1e	2011-06-27 16:23:33 +0200	[diff] [blame]	669	D_ASSERT(device, drbd_interval_empty(&req->i));
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	670	drbd_insert_interval(&device->read_requests, &req->i);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	671
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	672	set_bit(UNPLUG_REMOTE, &device->flags);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	673
Andreas Gruenbacher	0b0ba1e	2011-06-27 16:23:33 +0200	[diff] [blame]	674	D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
				675	D_ASSERT(device, (req->rq_state & RQ_LOCAL_MASK) == 0);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	676	mod_rq_state(req, m, 0, RQ_NET_QUEUED);
Lars Ellenberg	4439c40	2012-03-26 17:29:30 +0200	[diff] [blame]	677	req->w.cb = w_send_read_req;
Lars Ellenberg	44a4d55	2013-11-22 12:40:58 +0100	[diff] [blame]	678	drbd_queue_work(&connection->sender_work,
Andreas Gruenbacher	84b8c06	2011-07-28 15:27:51 +0200	[diff] [blame]	679	&req->w);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	680	break;
				681
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	682	case QUEUE_FOR_NET_WRITE:
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	683	/* assert something? */
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	684	/* from __drbd_make_request only */
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	685
Lars Ellenberg	6870ca6	2012-03-26 17:02:45 +0200	[diff] [blame]	686	/* Corresponding drbd_remove_request_interval is in
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	687	* drbd_req_complete() */
Andreas Gruenbacher	0b0ba1e	2011-06-27 16:23:33 +0200	[diff] [blame]	688	D_ASSERT(device, drbd_interval_empty(&req->i));
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	689	drbd_insert_interval(&device->write_requests, &req->i);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	690
				691	/* NOTE
				692	* In case the req ended up on the transfer log before being
				693	* queued on the worker, it could lead to this request being
				694	* missed during cleanup after connection loss.
				695	* So we have to do both operations here,
				696	* within the same lock that protects the transfer log.
				697	*
				698	* _req_add_to_epoch(req); this has to be after the
				699	* _maybe_start_new_epoch(req); which happened in
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	700	* __drbd_make_request, because we now may set the bit
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	701	* again ourselves to close the current epoch.
				702	*
				703	* Add req to the (now) current epoch (barrier). */
				704
Lars Ellenberg	83c3883	2009-11-03 02:22:06 +0100	[diff] [blame]	705	/* otherwise we may lose an unplug, which may cause some remote
				706	* io-scheduler timeout to expire, increasing maximum latency,
				707	* hurting performance. */
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	708	set_bit(UNPLUG_REMOTE, &device->flags);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	709
				710	/* queue work item to send data */
Andreas Gruenbacher	0b0ba1e	2011-06-27 16:23:33 +0200	[diff] [blame]	711	D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	712	mod_rq_state(req, m, 0, RQ_NET_QUEUED\|RQ_EXP_BARR_ACK);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	713	req->w.cb = w_send_dblock;
Lars Ellenberg	44a4d55	2013-11-22 12:40:58 +0100	[diff] [blame]	714	drbd_queue_work(&connection->sender_work,
Andreas Gruenbacher	84b8c06	2011-07-28 15:27:51 +0200	[diff] [blame]	715	&req->w);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	716
				717	/* close the epoch, in case it outgrew the limit */
Philipp Reisner	44ed167	2011-04-19 17:10:19 +0200	[diff] [blame]	718	rcu_read_lock();
Lars Ellenberg	44a4d55	2013-11-22 12:40:58 +0100	[diff] [blame]	719	nc = rcu_dereference(connection->net_conf);
Philipp Reisner	44ed167	2011-04-19 17:10:19 +0200	[diff] [blame]	720	p = nc->max_epoch_size;
				721	rcu_read_unlock();
Lars Ellenberg	44a4d55	2013-11-22 12:40:58 +0100	[diff] [blame]	722	if (connection->current_tle_writes >= p)
				723	start_new_tl_epoch(connection);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	724
				725	break;
				726
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	727	case QUEUE_FOR_SEND_OOS:
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	728	mod_rq_state(req, m, 0, RQ_NET_QUEUED);
Andreas Gruenbacher	8f7bed7	2010-12-19 23:53:14 +0100	[diff] [blame]	729	req->w.cb = w_send_out_of_sync;
Lars Ellenberg	44a4d55	2013-11-22 12:40:58 +0100	[diff] [blame]	730	drbd_queue_work(&connection->sender_work,
Andreas Gruenbacher	84b8c06	2011-07-28 15:27:51 +0200	[diff] [blame]	731	&req->w);
Philipp Reisner	73a01a1	2010-10-27 14:33:00 +0200	[diff] [blame]	732	break;
				733
Lars Ellenberg	ea9d672	2012-03-26 16:46:39 +0200	[diff] [blame]	734	case READ_RETRY_REMOTE_CANCELED:
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	735	case SEND_CANCELED:
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	736	case SEND_FAILED:
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	737	/* real cleanup will be done from tl_clear. just update flags
				738	* so it is no longer marked as on the worker queue */
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	739	mod_rq_state(req, m, RQ_NET_QUEUED, 0);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	740	break;
				741
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	742	case HANDED_OVER_TO_NETWORK:
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	743	/* assert something? */
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	744	if (is_pending_write_protocol_A(req))
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	745	/* this is what is dangerous about protocol A:
				746	* pretend it was successfully written on the peer. */
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	747	mod_rq_state(req, m, RQ_NET_QUEUED\|RQ_NET_PENDING,
				748	RQ_NET_SENT\|RQ_NET_OK);
				749	else
				750	mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT);
				751	/* It is still not yet RQ_NET_DONE until the
				752	* corresponding epoch barrier got acked as well,
				753	* so we know what to dirty on connection loss. */
Lars Ellenberg	6d49e10	2012-01-11 09:43:25 +0100	[diff] [blame]	754	break;
				755
Lars Ellenberg	27a434f	2012-03-26 16:44:59 +0200	[diff] [blame]	756	case OOS_HANDED_TO_NETWORK:
Lars Ellenberg	6d49e10	2012-01-11 09:43:25 +0100	[diff] [blame]	757	/* Was not set PENDING, no longer QUEUED, so is now DONE
				758	* as far as this connection is concerned. */
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	759	mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_DONE);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	760	break;
				761
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	762	case CONNECTION_LOST_WHILE_PENDING:
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	763	/* transfer log cleanup after connection loss */
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	764	mod_rq_state(req, m,
				765	RQ_NET_OK\|RQ_NET_PENDING\|RQ_COMPLETION_SUSP,
				766	RQ_NET_DONE);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	767	break;
				768
Lars Ellenberg	d4dabbe	2012-08-01 12:33:51 +0200	[diff] [blame]	769	case CONFLICT_RESOLVED:
				770	/* for superseded conflicting writes of multiple primaries,
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	771	* there is no need to keep anything in the tl, potential
Lars Ellenberg	934722a	2012-07-24 09:31:18 +0200	[diff] [blame]	772	* node crashes are covered by the activity log.
				773	*
				774	* If this request had been marked as RQ_POSTPONED before,
Lars Ellenberg	d4dabbe	2012-08-01 12:33:51 +0200	[diff] [blame]	775	* it will actually not be completed, but "restarted",
Lars Ellenberg	934722a	2012-07-24 09:31:18 +0200	[diff] [blame]	776	* resubmitted from the retry worker context. */
Andreas Gruenbacher	0b0ba1e	2011-06-27 16:23:33 +0200	[diff] [blame]	777	D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
				778	D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK);
Lars Ellenberg	934722a	2012-07-24 09:31:18 +0200	[diff] [blame]	779	mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_DONE\|RQ_NET_OK);
				780	break;
				781
Lars Ellenberg	0afd569	2012-03-26 16:51:11 +0200	[diff] [blame]	782	case WRITE_ACKED_BY_PEER_AND_SIS:
Lars Ellenberg	934722a	2012-07-24 09:31:18 +0200	[diff] [blame]	783	req->rq_state \|= RQ_NET_SIS;
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	784	case WRITE_ACKED_BY_PEER:
Lars Ellenberg	08d0dab	2014-03-20 11:19:22 +0100	[diff] [blame]	785	/* Normal operation protocol C: successfully written on peer.
				786	* During resync, even in protocol != C,
				787	* we requested an explicit write ack anyways.
				788	* Which means we cannot even assert anything here.
Lars Ellenberg	d64957c	2012-03-23 14:42:19 +0100	[diff] [blame]	789	* Nothing more to do here.
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	790	* We want to keep the tl in place for all protocols, to cater
Lars Ellenberg	d64957c	2012-03-23 14:42:19 +0100	[diff] [blame]	791	* for volatile write-back caches on lower level devices. */
Philipp Reisner	303d144	2011-04-13 16:24:47 -0700	[diff] [blame]	792	goto ack_common;
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	793	case RECV_ACKED_BY_PEER:
Andreas Gruenbacher	0b0ba1e	2011-06-27 16:23:33 +0200	[diff] [blame]	794	D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	795	/* protocol B; pretends to be successfully written on peer.
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	796	* see also notes above in HANDED_OVER_TO_NETWORK about
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	797	* protocol != C */
Philipp Reisner	303d144	2011-04-13 16:24:47 -0700	[diff] [blame]	798	ack_common:
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	799	mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	800	break;
				801
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	802	case POSTPONE_WRITE:
Andreas Gruenbacher	0b0ba1e	2011-06-27 16:23:33 +0200	[diff] [blame]	803	D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK);
Philipp Reisner	303d144	2011-04-13 16:24:47 -0700	[diff] [blame]	804	/* If this node has already detected the write conflict, the
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	805	* worker will be waiting on misc_wait. Wake it up once this
				806	* request has completed locally.
				807	*/
Andreas Gruenbacher	0b0ba1e	2011-06-27 16:23:33 +0200	[diff] [blame]	808	D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
Andreas Gruenbacher	7be8da0	2011-02-22 02:15:32 +0100	[diff] [blame]	809	req->rq_state \|= RQ_POSTPONED;
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	810	if (req->i.waiting)
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	811	wake_up(&device->misc_wait);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	812	/* Do not clear RQ_NET_PENDING. This request will make further
				813	* progress via restart_conflicting_writes() or
				814	* fail_postponed_requests(). Hopefully. */
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	815	break;
				816
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	817	case NEG_ACKED:
Lars Ellenberg	46e21bb	2012-08-07 06:47:14 +0200	[diff] [blame]	818	mod_rq_state(req, m, RQ_NET_OK\|RQ_NET_PENDING, 0);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	819	break;
				820
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	821	case FAIL_FROZEN_DISK_IO:
Philipp Reisner	265be2d	2010-05-31 10:14:17 +0200	[diff] [blame]	822	if (!(req->rq_state & RQ_LOCAL_COMPLETED))
				823	break;
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	824	mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0);
Philipp Reisner	265be2d	2010-05-31 10:14:17 +0200	[diff] [blame]	825	break;
				826
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	827	case RESTART_FROZEN_DISK_IO:
Philipp Reisner	265be2d	2010-05-31 10:14:17 +0200	[diff] [blame]	828	if (!(req->rq_state & RQ_LOCAL_COMPLETED))
				829	break;
				830
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	831	mod_rq_state(req, m,
				832	RQ_COMPLETION_SUSP\|RQ_LOCAL_COMPLETED,
				833	RQ_LOCAL_PENDING);
Philipp Reisner	265be2d	2010-05-31 10:14:17 +0200	[diff] [blame]	834
				835	rv = MR_READ;
				836	if (bio_data_dir(req->master_bio) == WRITE)
				837	rv = MR_WRITE;
				838
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	839	get_ldev(device); /* always succeeds in this call path */
Philipp Reisner	265be2d	2010-05-31 10:14:17 +0200	[diff] [blame]	840	req->w.cb = w_restart_disk_io;
Lars Ellenberg	44a4d55	2013-11-22 12:40:58 +0100	[diff] [blame]	841	drbd_queue_work(&connection->sender_work,
Andreas Gruenbacher	84b8c06	2011-07-28 15:27:51 +0200	[diff] [blame]	842	&req->w);
Philipp Reisner	265be2d	2010-05-31 10:14:17 +0200	[diff] [blame]	843	break;
				844
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	845	case RESEND:
Philipp Reisner	509fc01	2012-07-31 11:22:58 +0200	[diff] [blame]	846	/* Simply complete (local only) READs. */
				847	if (!(req->rq_state & RQ_WRITE) && !req->w.cb) {
Philipp Reisner	8a0bab2	2012-08-07 13:28:00 +0200	[diff] [blame]	848	mod_rq_state(req, m, RQ_COMPLETION_SUSP, 0);
Philipp Reisner	509fc01	2012-07-31 11:22:58 +0200	[diff] [blame]	849	break;
				850	}
				851
Philipp Reisner	11b58e7	2010-05-12 17:08:26 +0200	[diff] [blame]	852	/* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	853	before the connection loss (B&C only); only P_BARRIER_ACK
				854	(or the local completion?) was missing when we suspended.
Lars Ellenberg	6870ca6	2012-03-26 17:02:45 +0200	[diff] [blame]	855	Throwing them out of the TL here by pretending we got a BARRIER_ACK.
				856	During connection handshake, we ensure that the peer was not rebooted. */
Philipp Reisner	11b58e7	2010-05-12 17:08:26 +0200	[diff] [blame]	857	if (!(req->rq_state & RQ_NET_OK)) {
Andreas Gruenbacher	84b8c06	2011-07-28 15:27:51 +0200	[diff] [blame]	858	/* FIXME could this possibly be a req->dw.cb == w_send_out_of_sync?
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	859	* in that case we must not set RQ_NET_PENDING. */
				860
				861	mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED\|RQ_NET_PENDING);
Philipp Reisner	11b58e7	2010-05-12 17:08:26 +0200	[diff] [blame]	862	if (req->w.cb) {
Lars Ellenberg	44a4d55	2013-11-22 12:40:58 +0100	[diff] [blame]	863	/* w.cb expected to be w_send_dblock, or w_send_read_req */
				864	drbd_queue_work(&connection->sender_work,
Andreas Gruenbacher	84b8c06	2011-07-28 15:27:51 +0200	[diff] [blame]	865	&req->w);
Philipp Reisner	11b58e7	2010-05-12 17:08:26 +0200	[diff] [blame]	866	rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	867	} /* else: FIXME can this happen? */
Philipp Reisner	11b58e7	2010-05-12 17:08:26 +0200	[diff] [blame]	868	break;
				869	}
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	870	/* else, fall through to BARRIER_ACKED */
Philipp Reisner	11b58e7	2010-05-12 17:08:26 +0200	[diff] [blame]	871
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	872	case BARRIER_ACKED:
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	873	/* barrier ack for READ requests does not make sense */
Philipp Reisner	288f422	2010-05-27 15:07:43 +0200	[diff] [blame]	874	if (!(req->rq_state & RQ_WRITE))
				875	break;
				876
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	877	if (req->rq_state & RQ_NET_PENDING) {
Andreas Gruenbacher	a209b4a	2011-08-17 12:43:25 +0200	[diff] [blame]	878	/* barrier came in before all requests were acked.
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	879	* this is bad, because if the connection is lost now,
				880	* we won't be able to clean them up... */
Andreas Gruenbacher	d018017	2011-07-03 17:53:52 +0200	[diff] [blame]	881	drbd_err(device, "FIXME (BARRIER_ACKED but pending)\n");
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	882	}
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	883	/* Allowed to complete requests, even while suspended.
				884	* As this is called for all requests within a matching epoch,
				885	* we need to filter, and only set RQ_NET_DONE for those that
				886	* have actually been on the wire. */
				887	mod_rq_state(req, m, RQ_COMPLETION_SUSP,
				888	(req->rq_state & RQ_NET_MASK) ? RQ_NET_DONE : 0);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	889	break;
				890
Andreas Gruenbacher	8554df1	2011-01-25 15:37:43 +0100	[diff] [blame]	891	case DATA_RECEIVED:
Andreas Gruenbacher	0b0ba1e	2011-06-27 16:23:33 +0200	[diff] [blame]	892	D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	893	mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK\|RQ_NET_DONE);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	894	break;
Lars Ellenberg	7074e4a	2013-03-27 14:08:41 +0100	[diff] [blame]	895
				896	case QUEUE_AS_DRBD_BARRIER:
Lars Ellenberg	44a4d55	2013-11-22 12:40:58 +0100	[diff] [blame]	897	start_new_tl_epoch(connection);
Lars Ellenberg	7074e4a	2013-03-27 14:08:41 +0100	[diff] [blame]	898	mod_rq_state(req, m, 0, RQ_NET_OK\|RQ_NET_DONE);
				899	break;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	900	};
Philipp Reisner	2a80699	2010-06-09 14:07:43 +0200	[diff] [blame]	901
				902	return rv;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	903	}
				904
				905	/* we may do a local read if:
				906	* - we are consistent (of course),
				907	* - or we are generally inconsistent,
				908	* BUT we are still/already IN SYNC for this area.
				909	* since size may be bigger than BM_BLOCK_SIZE,
				910	* we may need to check several bits.
				911	*/
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	912	static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector, int size)
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	913	{
				914	unsigned long sbnr, ebnr;
				915	sector_t esector, nr_sectors;
				916
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	917	if (device->state.disk == D_UP_TO_DATE)
Andreas Gruenbacher	0da34df	2010-12-19 20:48:29 +0100	[diff] [blame]	918	return true;
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	919	if (device->state.disk != D_INCONSISTENT)
Andreas Gruenbacher	0da34df	2010-12-19 20:48:29 +0100	[diff] [blame]	920	return false;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	921	esector = sector + (size >> 9) - 1;
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	922	nr_sectors = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacher	0b0ba1e	2011-06-27 16:23:33 +0200	[diff] [blame]	923	D_ASSERT(device, sector < nr_sectors);
				924	D_ASSERT(device, esector < nr_sectors);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	925
				926	sbnr = BM_SECT_TO_BIT(sector);
				927	ebnr = BM_SECT_TO_BIT(esector);
				928
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	929	return drbd_bm_count_bits(device, sbnr, ebnr) == 0;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	930	}
				931
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	932	static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t sector,
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	933	enum drbd_read_balancing rbm)
Philipp Reisner	380207d	2011-11-11 12:31:20 +0100	[diff] [blame]	934	{
Philipp Reisner	380207d	2011-11-11 12:31:20 +0100	[diff] [blame]	935	struct backing_dev_info *bdi;
Philipp Reisner	d60de03	2011-11-17 10:12:31 +0100	[diff] [blame]	936	int stripe_shift;
Philipp Reisner	380207d	2011-11-11 12:31:20 +0100	[diff] [blame]	937
Philipp Reisner	380207d	2011-11-11 12:31:20 +0100	[diff] [blame]	938	switch (rbm) {
				939	case RB_CONGESTED_REMOTE:
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	940	bdi = &device->ldev->backing_bdev->bd_disk->queue->backing_dev_info;
Philipp Reisner	380207d	2011-11-11 12:31:20 +0100	[diff] [blame]	941	return bdi_read_congested(bdi);
				942	case RB_LEAST_PENDING:
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	943	return atomic_read(&device->local_cnt) >
				944	atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);
Philipp Reisner	d60de03	2011-11-17 10:12:31 +0100	[diff] [blame]	945	case RB_32K_STRIPING: /* stripe_shift = 15 */
				946	case RB_64K_STRIPING:
				947	case RB_128K_STRIPING:
				948	case RB_256K_STRIPING:
				949	case RB_512K_STRIPING:
				950	case RB_1M_STRIPING: /* stripe_shift = 20 */
				951	stripe_shift = (rbm - RB_32K_STRIPING + 15);
				952	return (sector >> (stripe_shift - 9)) & 1;
Philipp Reisner	380207d	2011-11-11 12:31:20 +0100	[diff] [blame]	953	case RB_ROUND_ROBIN:
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	954	return test_and_change_bit(READ_BALANCE_RR, &device->flags);
Philipp Reisner	380207d	2011-11-11 12:31:20 +0100	[diff] [blame]	955	case RB_PREFER_REMOTE:
				956	return true;
				957	case RB_PREFER_LOCAL:
				958	default:
				959	return false;
				960	}
				961	}
				962
Andreas Gruenbacher	6024fec	2011-01-28 15:53:51 +0100	[diff] [blame]	963	/*
				964	* complete_conflicting_writes - wait for any conflicting write requests
				965	*
				966	* The write_requests tree contains all active write requests which we
				967	* currently know about. Wait for any requests to complete which conflict with
				968	* the new one.
Lars Ellenberg	648e46b	2012-03-26 20:12:24 +0200	[diff] [blame]	969	*
				970	* Only way out: remove the conflicting intervals from the tree.
Andreas Gruenbacher	6024fec	2011-01-28 15:53:51 +0100	[diff] [blame]	971	*/
Lars Ellenberg	648e46b	2012-03-26 20:12:24 +0200	[diff] [blame]	972	static void complete_conflicting_writes(struct drbd_request *req)
Andreas Gruenbacher	6024fec	2011-01-28 15:53:51 +0100	[diff] [blame]	973	{
Lars Ellenberg	648e46b	2012-03-26 20:12:24 +0200	[diff] [blame]	974	DEFINE_WAIT(wait);
Andreas Gruenbacher	84b8c06	2011-07-28 15:27:51 +0200	[diff] [blame]	975	struct drbd_device *device = req->device;
Lars Ellenberg	648e46b	2012-03-26 20:12:24 +0200	[diff] [blame]	976	struct drbd_interval *i;
				977	sector_t sector = req->i.sector;
				978	int size = req->i.size;
Andreas Gruenbacher	6024fec	2011-01-28 15:53:51 +0100	[diff] [blame]	979
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	980	i = drbd_find_overlap(&device->write_requests, sector, size);
Lars Ellenberg	648e46b	2012-03-26 20:12:24 +0200	[diff] [blame]	981	if (!i)
				982	return;
				983
				984	for (;;) {
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	985	prepare_to_wait(&device->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
				986	i = drbd_find_overlap(&device->write_requests, sector, size);
Andreas Gruenbacher	6024fec	2011-01-28 15:53:51 +0100	[diff] [blame]	987	if (!i)
Lars Ellenberg	648e46b	2012-03-26 20:12:24 +0200	[diff] [blame]	988	break;
				989	/* Indicate to wake up device->misc_wait on progress. */
				990	i->waiting = true;
Andreas Gruenbacher	0500813	2011-07-07 14:19:42 +0200	[diff] [blame]	991	spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg	648e46b	2012-03-26 20:12:24 +0200	[diff] [blame]	992	schedule();
Andreas Gruenbacher	0500813	2011-07-07 14:19:42 +0200	[diff] [blame]	993	spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher	6024fec	2011-01-28 15:53:51 +0100	[diff] [blame]	994	}
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	995	finish_wait(&device->misc_wait, &wait);
Andreas Gruenbacher	6024fec	2011-01-28 15:53:51 +0100	[diff] [blame]	996	}
				997
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	998	/* called within req_lock and rcu_read_lock() */
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	999	static void maybe_pull_ahead(struct drbd_device *device)
Lars Ellenberg	0d5934e	2012-06-08 14:17:36 +0200	[diff] [blame]	1000	{
Andreas Gruenbacher	a6b32bc	2011-05-31 14:33:49 +0200	[diff] [blame]	1001	struct drbd_connection *connection = first_peer_device(device)->connection;
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1002	struct net_conf *nc;
				1003	bool congested = false;
				1004	enum drbd_on_congestion on_congestion;
				1005
Lars Ellenberg	607f25e	2013-03-27 14:08:45 +0100	[diff] [blame]	1006	rcu_read_lock();
Andreas Gruenbacher	bde89a9	2011-05-30 16:32:41 +0200	[diff] [blame]	1007	nc = rcu_dereference(connection->net_conf);
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1008	on_congestion = nc ? nc->on_congestion : OC_BLOCK;
Lars Ellenberg	607f25e	2013-03-27 14:08:45 +0100	[diff] [blame]	1009	rcu_read_unlock();
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1010	if (on_congestion == OC_BLOCK \|\|
Andreas Gruenbacher	bde89a9	2011-05-30 16:32:41 +0200	[diff] [blame]	1011	connection->agreed_pro_version < 96)
Lars Ellenberg	3b9ef85	2012-07-30 09:06:26 +0200	[diff] [blame]	1012	return;
Lars Ellenberg	0d5934e	2012-06-08 14:17:36 +0200	[diff] [blame]	1013
Lars Ellenberg	0c066bc	2014-03-20 14:04:35 +0100	[diff] [blame]	1014	if (on_congestion == OC_PULL_AHEAD && device->state.conn == C_AHEAD)
				1015	return; /* nothing to do ... */
				1016
Lars Ellenberg	0d5934e	2012-06-08 14:17:36 +0200	[diff] [blame]	1017	/* If I don't even have good local storage, we can not reasonably try
				1018	* to pull ahead of the peer. We also need the local reference to make
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1019	* sure device->act_log is there.
Lars Ellenberg	0d5934e	2012-06-08 14:17:36 +0200	[diff] [blame]	1020	*/
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1021	if (!get_ldev_if_state(device, D_UP_TO_DATE))
Lars Ellenberg	0d5934e	2012-06-08 14:17:36 +0200	[diff] [blame]	1022	return;
				1023
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1024	if (nc->cong_fill &&
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1025	atomic_read(&device->ap_in_flight) >= nc->cong_fill) {
Andreas Gruenbacher	d018017	2011-07-03 17:53:52 +0200	[diff] [blame]	1026	drbd_info(device, "Congestion-fill threshold reached\n");
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1027	congested = true;
Lars Ellenberg	0d5934e	2012-06-08 14:17:36 +0200	[diff] [blame]	1028	}
				1029
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1030	if (device->act_log->used >= nc->cong_extents) {
Andreas Gruenbacher	d018017	2011-07-03 17:53:52 +0200	[diff] [blame]	1031	drbd_info(device, "Congestion-extents threshold reached\n");
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1032	congested = true;
Lars Ellenberg	0d5934e	2012-06-08 14:17:36 +0200	[diff] [blame]	1033	}
				1034
				1035	if (congested) {
Lars Ellenberg	99b4d8f	2012-08-07 06:42:09 +0200	[diff] [blame]	1036	/* start a new epoch for non-mirrored writes */
Andreas Gruenbacher	a6b32bc	2011-05-31 14:33:49 +0200	[diff] [blame]	1037	start_new_tl_epoch(first_peer_device(device)->connection);
Lars Ellenberg	0d5934e	2012-06-08 14:17:36 +0200	[diff] [blame]	1038
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1039	if (on_congestion == OC_PULL_AHEAD)
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1040	_drbd_set_state(_NS(device, conn, C_AHEAD), 0, NULL);
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1041	else /nc->on_congestion == OC_DISCONNECT /
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1042	_drbd_set_state(_NS(device, conn, C_DISCONNECTING), 0, NULL);
Lars Ellenberg	0d5934e	2012-06-08 14:17:36 +0200	[diff] [blame]	1043	}
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1044	put_ldev(device);
Lars Ellenberg	0d5934e	2012-06-08 14:17:36 +0200	[diff] [blame]	1045	}
				1046
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1047	/* If this returns false, and req->private_bio is still set,
				1048	* this should be submitted locally.
				1049	*
				1050	* If it returns false, but req->private_bio is not set,
				1051	* we do not have access to good data :(
				1052	*
				1053	* Otherwise, this destroys req->private_bio, if any,
				1054	* and returns true.
				1055	*/
				1056	static bool do_remote_read(struct drbd_request *req)
				1057	{
Andreas Gruenbacher	84b8c06	2011-07-28 15:27:51 +0200	[diff] [blame]	1058	struct drbd_device *device = req->device;
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1059	enum drbd_read_balancing rbm;
				1060
				1061	if (req->private_bio) {
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1062	if (!drbd_may_do_local_read(device,
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1063	req->i.sector, req->i.size)) {
				1064	bio_put(req->private_bio);
				1065	req->private_bio = NULL;
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1066	put_ldev(device);
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1067	}
				1068	}
				1069
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1070	if (device->state.pdsk != D_UP_TO_DATE)
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1071	return false;
				1072
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	1073	if (req->private_bio == NULL)
				1074	return true;
				1075
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1076	/* TODO: improve read balancing decisions, take into account drbd
				1077	* protocol, pending requests etc. */
				1078
				1079	rcu_read_lock();
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1080	rbm = rcu_dereference(device->ldev->disk_conf)->read_balancing;
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1081	rcu_read_unlock();
				1082
				1083	if (rbm == RB_PREFER_LOCAL && req->private_bio)
				1084	return false; /* submit locally */
				1085
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1086	if (remote_due_to_read_balancing(device, req->i.sector, rbm)) {
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1087	if (req->private_bio) {
				1088	bio_put(req->private_bio);
				1089	req->private_bio = NULL;
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1090	put_ldev(device);
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1091	}
				1092	return true;
				1093	}
				1094
				1095	return false;
				1096	}
				1097
Andreas Gruenbacher	2e9ffde	2014-08-08 17:48:00 +0200	[diff] [blame]	1098	bool drbd_should_do_remote(union drbd_dev_state s)
				1099	{
				1100	return s.pdsk == D_UP_TO_DATE \|\|
				1101	(s.pdsk >= D_INCONSISTENT &&
				1102	s.conn >= C_WF_BITMAP_T &&
				1103	s.conn < C_AHEAD);
				1104	/* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
				1105	That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
				1106	states. */
				1107	}
				1108
				1109	static bool drbd_should_send_out_of_sync(union drbd_dev_state s)
				1110	{
				1111	return s.conn == C_AHEAD \|\| s.conn == C_WF_BITMAP_S;
				1112	/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
				1113	since we enter state C_AHEAD only if proto >= 96 */
				1114	}
				1115
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1116	/* returns number of connections (== 1, for drbd 8.4)
				1117	* expected to actually write this data,
				1118	* which does NOT include those that we are L_AHEAD for. */
				1119	static int drbd_process_write_request(struct drbd_request *req)
				1120	{
Andreas Gruenbacher	84b8c06	2011-07-28 15:27:51 +0200	[diff] [blame]	1121	struct drbd_device *device = req->device;
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1122	int remote, send_oos;
				1123
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1124	remote = drbd_should_do_remote(device->state);
				1125	send_oos = drbd_should_send_out_of_sync(device->state);
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1126
Lars Ellenberg	519b6d3	2012-08-03 02:19:09 +0200	[diff] [blame]	1127	/* Need to replicate writes. Unless it is an empty flush,
				1128	* which is better mapped to a DRBD P_BARRIER packet,
				1129	* also for drbd wire protocol compatibility reasons.
				1130	* If this was a flush, just start a new epoch.
				1131	* Unless the current epoch was empty anyways, or we are not currently
				1132	* replicating, in which case there is no point. */
				1133	if (unlikely(req->i.size == 0)) {
				1134	/* The only size==0 bios we expect are empty flushes. */
Andreas Gruenbacher	0b0ba1e	2011-06-27 16:23:33 +0200	[diff] [blame]	1135	D_ASSERT(device, req->master_bio->bi_rw & REQ_FLUSH);
Lars Ellenberg	99b4d8f	2012-08-07 06:42:09 +0200	[diff] [blame]	1136	if (remote)
Lars Ellenberg	7074e4a	2013-03-27 14:08:41 +0100	[diff] [blame]	1137	_req_mod(req, QUEUE_AS_DRBD_BARRIER);
				1138	return remote;
Lars Ellenberg	519b6d3	2012-08-03 02:19:09 +0200	[diff] [blame]	1139	}
				1140
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1141	if (!remote && !send_oos)
				1142	return 0;
				1143
Andreas Gruenbacher	0b0ba1e	2011-06-27 16:23:33 +0200	[diff] [blame]	1144	D_ASSERT(device, !(remote && send_oos));
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1145
				1146	if (remote) {
				1147	_req_mod(req, TO_BE_SENT);
				1148	_req_mod(req, QUEUE_FOR_NET_WRITE);
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1149	} else if (drbd_set_out_of_sync(device, req->i.sector, req->i.size))
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1150	_req_mod(req, QUEUE_FOR_SEND_OOS);
				1151
				1152	return remote;
				1153	}
				1154
				1155	static void
				1156	drbd_submit_req_private_bio(struct drbd_request *req)
				1157	{
Andreas Gruenbacher	84b8c06	2011-07-28 15:27:51 +0200	[diff] [blame]	1158	struct drbd_device *device = req->device;
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1159	struct bio *bio = req->private_bio;
				1160	const int rw = bio_rw(bio);
				1161
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1162	bio->bi_bdev = device->ldev->backing_bdev;
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1163
				1164	/* State may have changed since we grabbed our reference on the
				1165	* ->ldev member. Double check, and short-circuit to endio.
				1166	* In case the last activity log transaction failed to get on
				1167	* stable storage, and this is a WRITE, we may not even submit
				1168	* this bio. */
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1169	if (get_ldev(device)) {
				1170	if (drbd_insert_fault(device,
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1171	rw == WRITE ? DRBD_FAULT_DT_WR
				1172	: rw == READ ? DRBD_FAULT_DT_RD
				1173	: DRBD_FAULT_DT_RA))
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1174	bio_io_error(bio);
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1175	else
				1176	generic_make_request(bio);
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1177	put_ldev(device);
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1178	} else
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1179	bio_io_error(bio);
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1180	}
				1181
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1182	static void drbd_queue_write(struct drbd_device device, struct drbd_request req)
Lars Ellenberg	779b3fe	2013-03-19 18:16:54 +0100	[diff] [blame]	1183	{
Lars Ellenberg	844a6ae	2013-11-22 12:52:03 +0100	[diff] [blame]	1184	spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1185	list_add_tail(&req->tl_requests, &device->submit.writes);
Lars Ellenberg	844a6ae	2013-11-22 12:52:03 +0100	[diff] [blame]	1186	list_add_tail(&req->req_pending_master_completion,
				1187	&device->pending_master_completion[1 /* WRITE */]);
				1188	spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1189	queue_work(device->submit.wq, &device->submit.worker);
Lars Ellenberg	f5b90b6	2014-05-07 22:41:28 +0200	[diff] [blame]	1190	/* do_submit() may sleep internally on al_wait, too */
				1191	wake_up(&device->al_wait);
Lars Ellenberg	779b3fe	2013-03-19 18:16:54 +0100	[diff] [blame]	1192	}
				1193
Lars Ellenberg	6d9febe	2013-03-19 18:16:50 +0100	[diff] [blame]	1194	/* returns the new drbd_request pointer, if the caller is expected to
				1195	* drbd_send_and_submit() it (to save latency), or NULL if we queued the
				1196	* request on the submitter thread.
				1197	* Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request.
				1198	*/
Rashika Kheria	01cd263	2013-12-19 15:12:27 +0530	[diff] [blame]	1199	static struct drbd_request *
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	1200	drbd_request_prepare(struct drbd_device device, struct bio bio, unsigned long start_jif)
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1201	{
Lars Ellenberg	6d9febe	2013-03-19 18:16:50 +0100	[diff] [blame]	1202	const int rw = bio_data_dir(bio);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1203	struct drbd_request *req;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1204
				1205	/* allocate outside of all locks; */
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1206	req = drbd_req_new(device, bio);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1207	if (!req) {
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1208	dec_ap_bio(device);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1209	/* only pass the error to the upper layers.
				1210	* if user cannot handle io errors, that's not our business. */
Andreas Gruenbacher	d018017	2011-07-03 17:53:52 +0200	[diff] [blame]	1211	drbd_err(device, "could not kmalloc() req\n");
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1212	bio->bi_error = -ENOMEM;
				1213	bio_endio(bio);
Lars Ellenberg	6d9febe	2013-03-19 18:16:50 +0100	[diff] [blame]	1214	return ERR_PTR(-ENOMEM);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1215	}
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	1216	req->start_jif = start_jif;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1217
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1218	if (!get_ldev(device)) {
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1219	bio_put(req->private_bio);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1220	req->private_bio = NULL;
				1221	}
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1222
Lars Ellenberg	7e8c288	2013-03-19 18:16:57 +0100	[diff] [blame]	1223	/* Update disk stats */
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1224	_drbd_start_io_acct(device, req);
Lars Ellenberg	7e8c288	2013-03-19 18:16:57 +0100	[diff] [blame]	1225
Lars Ellenberg	519b6d3	2012-08-03 02:19:09 +0200	[diff] [blame]	1226	if (rw == WRITE && req->private_bio && req->i.size
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1227	&& !test_bit(AL_SUSPENDED, &device->flags)) {
				1228	if (!drbd_al_begin_io_fastpath(device, &req->i)) {
Lars Ellenberg	ad3fee7	2013-12-20 11:22:13 +0100	[diff] [blame]	1229	atomic_inc(&device->ap_actlog_cnt);
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1230	drbd_queue_write(device, req);
Lars Ellenberg	779b3fe	2013-03-19 18:16:54 +0100	[diff] [blame]	1231	return NULL;
				1232	}
Philipp Reisner	0778286	2010-08-31 12:00:50 +0200	[diff] [blame]	1233	req->rq_state \|= RQ_IN_ACT_LOG;
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	1234	req->in_actlog_jif = jiffies;
Philipp Reisner	0778286	2010-08-31 12:00:50 +0200	[diff] [blame]	1235	}
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1236
Lars Ellenberg	6d9febe	2013-03-19 18:16:50 +0100	[diff] [blame]	1237	return req;
				1238	}
				1239
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1240	static void drbd_send_and_submit(struct drbd_device device, struct drbd_request req)
Lars Ellenberg	6d9febe	2013-03-19 18:16:50 +0100	[diff] [blame]	1241	{
Lars Ellenberg	35b5ed5	2013-12-04 12:07:09 +0100	[diff] [blame]	1242	struct drbd_resource *resource = device->resource;
Lars Ellenberg	6d9febe	2013-03-19 18:16:50 +0100	[diff] [blame]	1243	const int rw = bio_rw(req->master_bio);
				1244	struct bio_and_error m = { NULL, };
				1245	bool no_remote = false;
Lars Ellenberg	35b5ed5	2013-12-04 12:07:09 +0100	[diff] [blame]	1246	bool submit_private_bio = false;
Lars Ellenberg	6d9febe	2013-03-19 18:16:50 +0100	[diff] [blame]	1247
Lars Ellenberg	35b5ed5	2013-12-04 12:07:09 +0100	[diff] [blame]	1248	spin_lock_irq(&resource->req_lock);
Andreas Gruenbacher	6024fec	2011-01-28 15:53:51 +0100	[diff] [blame]	1249	if (rw == WRITE) {
Lars Ellenberg	648e46b	2012-03-26 20:12:24 +0200	[diff] [blame]	1250	/* This may temporarily give up the req_lock,
				1251	* but will re-aquire it before it returns here.
				1252	* Needs to be before the check on drbd_suspended() */
				1253	complete_conflicting_writes(req);
Lars Ellenberg	607f25e	2013-03-27 14:08:45 +0100	[diff] [blame]	1254	/* no more giving up req_lock from now on! */
				1255
				1256	/* check for congestion, and potentially stop sending
				1257	* full data updates, but start sending "dirty bits" only. */
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1258	maybe_pull_ahead(device);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1259	}
				1260
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1261
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1262	if (drbd_suspended(device)) {
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1263	/* push back and retry: */
				1264	req->rq_state \|= RQ_POSTPONED;
				1265	if (req->private_bio) {
				1266	bio_put(req->private_bio);
				1267	req->private_bio = NULL;
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1268	put_ldev(device);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1269	}
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1270	goto out;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1271	}
				1272
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1273	/* We fail READ/READA early, if we can not serve it.
				1274	* We must do this before req is registered on any lists.
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	1275	* Otherwise, drbd_req_complete() will queue failed READ for retry. */
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1276	if (rw != WRITE) {
				1277	if (!do_remote_read(req) && !req->private_bio)
				1278	goto nodata;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1279	}
				1280
Lars Ellenberg	b6dd1a8	2011-11-28 15:04:49 +0100	[diff] [blame]	1281	/* which transfer log epoch does this belong to? */
Andreas Gruenbacher	a6b32bc	2011-05-31 14:33:49 +0200	[diff] [blame]	1282	req->epoch = atomic_read(&first_peer_device(device)->connection->current_tle_nr);
Philipp Reisner	288f422	2010-05-27 15:07:43 +0200	[diff] [blame]	1283
Lars Ellenberg	227f052	2012-07-31 09:31:11 +0200	[diff] [blame]	1284	/* no point in adding empty flushes to the transfer log,
				1285	* they are mapped to drbd barriers already. */
Lars Ellenberg	99b4d8f	2012-08-07 06:42:09 +0200	[diff] [blame]	1286	if (likely(req->i.size!=0)) {
				1287	if (rw == WRITE)
Andreas Gruenbacher	a6b32bc	2011-05-31 14:33:49 +0200	[diff] [blame]	1288	first_peer_device(device)->connection->current_tle_writes++;
Philipp Reisner	288f422	2010-05-27 15:07:43 +0200	[diff] [blame]	1289
Andreas Gruenbacher	a6b32bc	2011-05-31 14:33:49 +0200	[diff] [blame]	1290	list_add_tail(&req->tl_requests, &first_peer_device(device)->connection->transfer_log);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1291	}
Philipp Reisner	6753171	2010-10-27 12:21:30 +0200	[diff] [blame]	1292
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1293	if (rw == WRITE) {
				1294	if (!drbd_process_write_request(req))
				1295	no_remote = true;
				1296	} else {
				1297	/* We either have a private_bio, or we can read from remote.
				1298	* Otherwise we had done the goto nodata above. */
				1299	if (req->private_bio == NULL) {
				1300	_req_mod(req, TO_BE_SENT);
				1301	_req_mod(req, QUEUE_FOR_NET_READ);
Lars Ellenberg	6719fb0	2010-10-18 23:04:07 +0200	[diff] [blame]	1302	} else
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1303	no_remote = true;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1304	}
				1305
Lars Ellenberg	844a6ae	2013-11-22 12:52:03 +0100	[diff] [blame]	1306	/* If it took the fast path in drbd_request_prepare, add it here.
				1307	* The slow path has added it already. */
				1308	if (list_empty(&req->req_pending_master_completion))
				1309	list_add_tail(&req->req_pending_master_completion,
				1310	&device->pending_master_completion[rw == WRITE]);
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1311	if (req->private_bio) {
				1312	/* needs to be marked within the same spinlock */
Lars Ellenberg	05cbbb3	2015-01-16 17:41:55 +0100	[diff] [blame]	1313	req->pre_submit_jif = jiffies;
Lars Ellenberg	844a6ae	2013-11-22 12:52:03 +0100	[diff] [blame]	1314	list_add_tail(&req->req_pending_local,
				1315	&device->pending_completion[rw == WRITE]);
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1316	_req_mod(req, TO_BE_SUBMITTED);
				1317	/* but we need to give up the spinlock to submit */
Lars Ellenberg	35b5ed5	2013-12-04 12:07:09 +0100	[diff] [blame]	1318	submit_private_bio = true;
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1319	} else if (no_remote) {
				1320	nodata:
				1321	if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacher	d018017	2011-07-03 17:53:52 +0200	[diff] [blame]	1322	drbd_err(device, "IO ERROR: neither local nor remote data, sector %llu+%u\n",
Lars Ellenberg	42839f6	2012-09-27 15:19:38 +0200	[diff] [blame]	1323	(unsigned long long)req->i.sector, req->i.size >> 9);
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1324	/* A write may have been queued for send_oos, however.
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	1325	* So we can not simply free it, we must go through drbd_req_put_completion_ref() */
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1326	}
				1327
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1328	out:
Lars Ellenberg	a0d856d	2012-01-24 17:19:42 +0100	[diff] [blame]	1329	if (drbd_req_put_completion_ref(req, &m, 1))
				1330	kref_put(&req->kref, drbd_req_destroy);
Lars Ellenberg	35b5ed5	2013-12-04 12:07:09 +0100	[diff] [blame]	1331	spin_unlock_irq(&resource->req_lock);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1332
Lars Ellenberg	35b5ed5	2013-12-04 12:07:09 +0100	[diff] [blame]	1333	/* Even though above is a kref_put(), this is safe.
				1334	* As long as we still need to submit our private bio,
				1335	* we hold a completion ref, and the request cannot disappear.
				1336	* If however this request did not even have a private bio to submit
				1337	* (e.g. remote read), req may already be invalid now.
				1338	* That's why we cannot check on req->private_bio. */
				1339	if (submit_private_bio)
				1340	drbd_submit_req_private_bio(req);
Lars Ellenberg	5da9c83	2012-03-29 17:04:14 +0200	[diff] [blame]	1341	if (m.bio)
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1342	complete_master_bio(device, &m);
Lars Ellenberg	6d9febe	2013-03-19 18:16:50 +0100	[diff] [blame]	1343	}
				1344
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	1345	void __drbd_make_request(struct drbd_device device, struct bio bio, unsigned long start_jif)
Lars Ellenberg	6d9febe	2013-03-19 18:16:50 +0100	[diff] [blame]	1346	{
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	1347	struct drbd_request *req = drbd_request_prepare(device, bio, start_jif);
Lars Ellenberg	6d9febe	2013-03-19 18:16:50 +0100	[diff] [blame]	1348	if (IS_ERR_OR_NULL(req))
				1349	return;
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1350	drbd_send_and_submit(device, req);
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1351	}
				1352
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1353	static void submit_fast_path(struct drbd_device device, struct list_head incoming)
Lars Ellenberg	113fef9	2013-03-22 18:14:40 -0600	[diff] [blame]	1354	{
Lars Ellenberg	08a1dda	2013-03-19 18:16:56 +0100	[diff] [blame]	1355	struct drbd_request req, tmp;
				1356	list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
				1357	const int rw = bio_data_dir(req->master_bio);
Lars Ellenberg	113fef9	2013-03-22 18:14:40 -0600	[diff] [blame]	1358
Lars Ellenberg	08a1dda	2013-03-19 18:16:56 +0100	[diff] [blame]	1359	if (rw == WRITE /* rw != WRITE should not even end up here! */
				1360	&& req->private_bio && req->i.size
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1361	&& !test_bit(AL_SUSPENDED, &device->flags)) {
				1362	if (!drbd_al_begin_io_fastpath(device, &req->i))
Lars Ellenberg	08a1dda	2013-03-19 18:16:56 +0100	[diff] [blame]	1363	continue;
				1364
				1365	req->rq_state \|= RQ_IN_ACT_LOG;
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	1366	req->in_actlog_jif = jiffies;
Lars Ellenberg	ad3fee7	2013-12-20 11:22:13 +0100	[diff] [blame]	1367	atomic_dec(&device->ap_actlog_cnt);
Lars Ellenberg	08a1dda	2013-03-19 18:16:56 +0100	[diff] [blame]	1368	}
				1369
				1370	list_del_init(&req->tl_requests);
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1371	drbd_send_and_submit(device, req);
Lars Ellenberg	113fef9	2013-03-22 18:14:40 -0600	[diff] [blame]	1372	}
Lars Ellenberg	113fef9	2013-03-22 18:14:40 -0600	[diff] [blame]	1373	}
				1374
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1375	static bool prepare_al_transaction_nonblock(struct drbd_device *device,
Lars Ellenberg	08a1dda	2013-03-19 18:16:56 +0100	[diff] [blame]	1376	struct list_head *incoming,
Lars Ellenberg	f5b90b6	2014-05-07 22:41:28 +0200	[diff] [blame]	1377	struct list_head *pending,
				1378	struct list_head *later)
Lars Ellenberg	08a1dda	2013-03-19 18:16:56 +0100	[diff] [blame]	1379	{
				1380	struct drbd_request req, tmp;
				1381	int wake = 0;
				1382	int err;
				1383
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1384	spin_lock_irq(&device->al_lock);
Lars Ellenberg	08a1dda	2013-03-19 18:16:56 +0100	[diff] [blame]	1385	list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1386	err = drbd_al_begin_io_nonblock(device, &req->i);
Lars Ellenberg	f5b90b6	2014-05-07 22:41:28 +0200	[diff] [blame]	1387	if (err == -ENOBUFS)
				1388	break;
Lars Ellenberg	08a1dda	2013-03-19 18:16:56 +0100	[diff] [blame]	1389	if (err == -EBUSY)
				1390	wake = 1;
				1391	if (err)
Lars Ellenberg	f5b90b6	2014-05-07 22:41:28 +0200	[diff] [blame]	1392	list_move_tail(&req->tl_requests, later);
				1393	else
				1394	list_move_tail(&req->tl_requests, pending);
Lars Ellenberg	08a1dda	2013-03-19 18:16:56 +0100	[diff] [blame]	1395	}
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1396	spin_unlock_irq(&device->al_lock);
Lars Ellenberg	08a1dda	2013-03-19 18:16:56 +0100	[diff] [blame]	1397	if (wake)
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1398	wake_up(&device->al_wait);
Lars Ellenberg	08a1dda	2013-03-19 18:16:56 +0100	[diff] [blame]	1399	return !list_empty(pending);
				1400	}
Lars Ellenberg	113fef9	2013-03-22 18:14:40 -0600	[diff] [blame]	1401
Lars Ellenberg	f5b90b6	2014-05-07 22:41:28 +0200	[diff] [blame]	1402	void send_and_submit_pending(struct drbd_device device, struct list_head pending)
				1403	{
				1404	struct drbd_request req, tmp;
				1405
				1406	list_for_each_entry_safe(req, tmp, pending, tl_requests) {
				1407	req->rq_state \|= RQ_IN_ACT_LOG;
				1408	req->in_actlog_jif = jiffies;
				1409	atomic_dec(&device->ap_actlog_cnt);
				1410	list_del_init(&req->tl_requests);
				1411	drbd_send_and_submit(device, req);
				1412	}
				1413	}
				1414
Lars Ellenberg	113fef9	2013-03-22 18:14:40 -0600	[diff] [blame]	1415	void do_submit(struct work_struct *ws)
				1416	{
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1417	struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker);
Lars Ellenberg	f5b90b6	2014-05-07 22:41:28 +0200	[diff] [blame]	1418	LIST_HEAD(incoming); /* from drbd_make_request() */
				1419	LIST_HEAD(pending); /* to be submitted after next AL-transaction commit */
				1420	LIST_HEAD(busy); /* blocked by resync requests */
				1421
				1422	/* grab new incoming requests */
				1423	spin_lock_irq(&device->resource->req_lock);
				1424	list_splice_tail_init(&device->submit.writes, &incoming);
				1425	spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg	113fef9	2013-03-22 18:14:40 -0600	[diff] [blame]	1426
Lars Ellenberg	08a1dda	2013-03-19 18:16:56 +0100	[diff] [blame]	1427	for (;;) {
Lars Ellenberg	f5b90b6	2014-05-07 22:41:28 +0200	[diff] [blame]	1428	DEFINE_WAIT(wait);
Lars Ellenberg	113fef9	2013-03-22 18:14:40 -0600	[diff] [blame]	1429
Lars Ellenberg	f5b90b6	2014-05-07 22:41:28 +0200	[diff] [blame]	1430	/* move used-to-be-busy back to front of incoming */
				1431	list_splice_init(&busy, &incoming);
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1432	submit_fast_path(device, &incoming);
Lars Ellenberg	08a1dda	2013-03-19 18:16:56 +0100	[diff] [blame]	1433	if (list_empty(&incoming))
				1434	break;
				1435
Lars Ellenberg	45ad07b	2013-03-19 18:16:58 +0100	[diff] [blame]	1436	for (;;) {
Lars Ellenberg	f5b90b6	2014-05-07 22:41:28 +0200	[diff] [blame]	1437	prepare_to_wait(&device->al_wait, &wait, TASK_UNINTERRUPTIBLE);
				1438
				1439	list_splice_init(&busy, &incoming);
				1440	prepare_al_transaction_nonblock(device, &incoming, &pending, &busy);
				1441	if (!list_empty(&pending))
				1442	break;
				1443
				1444	schedule();
				1445
				1446	/* If all currently "hot" activity log extents are kept busy by
				1447	* incoming requests, we still must not totally starve new
				1448	* requests to "cold" extents.
				1449	* Something left on &incoming means there had not been
				1450	* enough update slots available, and the activity log
				1451	* has been marked as "starving".
				1452	*
				1453	* Try again now, without looking for new requests,
				1454	* effectively blocking all new requests until we made
				1455	* at least _some_ progress with what we currently have.
				1456	*/
				1457	if (!list_empty(&incoming))
				1458	continue;
				1459
				1460	/* Nothing moved to pending, but nothing left
				1461	* on incoming: all moved to busy!
				1462	* Grab new and iterate. */
				1463	spin_lock_irq(&device->resource->req_lock);
				1464	list_splice_tail_init(&device->submit.writes, &incoming);
				1465	spin_unlock_irq(&device->resource->req_lock);
				1466	}
				1467	finish_wait(&device->al_wait, &wait);
				1468
				1469	/* If the transaction was full, before all incoming requests
				1470	* had been processed, skip ahead to commit, and iterate
				1471	* without splicing in more incoming requests from upper layers.
				1472	*
				1473	* Else, if all incoming have been processed,
				1474	* they have become either "pending" (to be submitted after
				1475	* next transaction commit) or "busy" (blocked by resync).
				1476	*
				1477	* Maybe more was queued, while we prepared the transaction?
				1478	* Try to stuff those into this transaction as well.
				1479	* Be strictly non-blocking here,
				1480	* we already have something to commit.
				1481	*
				1482	* Commit if we don't make any more progres.
				1483	*/
				1484
				1485	while (list_empty(&incoming)) {
Lars Ellenberg	45ad07b	2013-03-19 18:16:58 +0100	[diff] [blame]	1486	LIST_HEAD(more_pending);
				1487	LIST_HEAD(more_incoming);
				1488	bool made_progress;
				1489
				1490	/* It is ok to look outside the lock,
				1491	* it's only an optimization anyways */
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1492	if (list_empty(&device->submit.writes))
Lars Ellenberg	45ad07b	2013-03-19 18:16:58 +0100	[diff] [blame]	1493	break;
				1494
Lars Ellenberg	844a6ae	2013-11-22 12:52:03 +0100	[diff] [blame]	1495	spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1496	list_splice_tail_init(&device->submit.writes, &more_incoming);
Lars Ellenberg	844a6ae	2013-11-22 12:52:03 +0100	[diff] [blame]	1497	spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg	45ad07b	2013-03-19 18:16:58 +0100	[diff] [blame]	1498
				1499	if (list_empty(&more_incoming))
				1500	break;
				1501
Lars Ellenberg	f5b90b6	2014-05-07 22:41:28 +0200	[diff] [blame]	1502	made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending, &busy);
Lars Ellenberg	45ad07b	2013-03-19 18:16:58 +0100	[diff] [blame]	1503
				1504	list_splice_tail_init(&more_pending, &pending);
				1505	list_splice_tail_init(&more_incoming, &incoming);
Lars Ellenberg	45ad07b	2013-03-19 18:16:58 +0100	[diff] [blame]	1506	if (!made_progress)
				1507	break;
				1508	}
Lars Ellenberg	f5b90b6	2014-05-07 22:41:28 +0200	[diff] [blame]	1509
Lars Ellenberg	4dd726f	2014-02-11 11:15:36 +0100	[diff] [blame]	1510	drbd_al_begin_io_commit(device);
Lars Ellenberg	f5b90b6	2014-05-07 22:41:28 +0200	[diff] [blame]	1511	send_and_submit_pending(device, &pending);
Lars Ellenberg	113fef9	2013-03-22 18:14:40 -0600	[diff] [blame]	1512	}
				1513	}
				1514
Jens Axboe	dece163	2015-11-05 10:41:16 -0700	[diff] [blame]	1515	blk_qc_t drbd_make_request(struct request_queue q, struct bio bio)
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1516	{
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1517	struct drbd_device device = (struct drbd_device ) q->queuedata;
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	1518	unsigned long start_jif;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1519
Kent Overstreet	54efd50	2015-04-23 22:37:18 -0700	[diff] [blame]	1520	blk_queue_split(q, &bio, q->bio_split);
				1521
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	1522	start_jif = jiffies;
Philipp Reisner	aeda1cd6	2010-11-09 17:45:06 +0100	[diff] [blame]	1523
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1524	/*
				1525	* what we "blindly" assume:
				1526	*/
Andreas Gruenbacher	0b0ba1e	2011-06-27 16:23:33 +0200	[diff] [blame]	1527	D_ASSERT(device, IS_ALIGNED(bio->bi_iter.bi_size, 512));
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1528
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1529	inc_ap_bio(device);
Lars Ellenberg	e5f891b	2013-11-22 12:32:01 +0100	[diff] [blame]	1530	__drbd_make_request(device, bio, start_jif);
Jens Axboe	dece163	2015-11-05 10:41:16 -0700	[diff] [blame]	1531	return BLK_QC_T_NONE;
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1532	}
				1533
Lars Ellenberg	84d34f2	2015-02-19 13:54:11 +0100	[diff] [blame^]	1534	static bool net_timeout_reached(struct drbd_request *net_req,
				1535	struct drbd_connection *connection,
				1536	unsigned long now, unsigned long ent,
				1537	unsigned int ko_count, unsigned int timeout)
				1538	{
				1539	struct drbd_device *device = net_req->device;
				1540
				1541	if (!time_after(now, net_req->pre_send_jif + ent))
				1542	return false;
				1543
				1544	if (time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent))
				1545	return false;
				1546
				1547	if (net_req->rq_state & RQ_NET_PENDING) {
				1548	drbd_warn(device, "Remote failed to finish a request within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
				1549	jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
				1550	return true;
				1551	}
				1552
				1553	/* We received an ACK already (or are using protocol A),
				1554	* but are waiting for the epoch closing barrier ack.
				1555	* Check if we sent the barrier already. We should not blame the peer
				1556	* for being unresponsive, if we did not even ask it yet. */
				1557	if (net_req->epoch == connection->send.current_epoch_nr) {
				1558	drbd_warn(device,
				1559	"We did not send a P_BARRIER for %ums > ko-count (%u) * timeout (%u * 0.1s); drbd kernel thread blocked?\n",
				1560	jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
				1561	return false;
				1562	}
				1563
				1564	/* Worst case: we may have been blocked for whatever reason, then
				1565	* suddenly are able to send a lot of requests (and epoch separating
				1566	* barriers) in quick succession.
				1567	* The timestamp of the net_req may be much too old and not correspond
				1568	* to the sending time of the relevant unack'ed barrier packet, so
				1569	* would trigger a spurious timeout. The latest barrier packet may
				1570	* have a too recent timestamp to trigger the timeout, potentially miss
				1571	* a timeout. Right now we don't have a place to conveniently store
				1572	* these timestamps.
				1573	* But in this particular situation, the application requests are still
				1574	* completed to upper layers, DRBD should still "feel" responsive.
				1575	* No need yet to kill this connection, it may still recover.
				1576	* If not, eventually we will have queued enough into the network for
				1577	* us to block. From that point of view, the timestamp of the last sent
				1578	* barrier packet is relevant enough.
				1579	*/
				1580	if (time_after(now, connection->send.last_sent_barrier_jif + ent)) {
				1581	drbd_warn(device, "Remote failed to answer a P_BARRIER (sent at %lu jif; now=%lu jif) within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
				1582	connection->send.last_sent_barrier_jif, now,
				1583	jiffies_to_msecs(now - connection->send.last_sent_barrier_jif), ko_count, timeout);
				1584	return true;
				1585	}
				1586	return false;
				1587	}
				1588
				1589	/* A request is considered timed out, if
				1590	* - we have some effective timeout from the configuration,
				1591	* with some state restrictions applied,
				1592	* - the oldest request is waiting for a response from the network
				1593	* resp. the local disk,
				1594	* - the oldest request is in fact older than the effective timeout,
				1595	* - the connection was established (resp. disk was attached)
				1596	* for longer than the timeout already.
				1597	* Note that for 32bit jiffies and very stable connections/disks,
				1598	* we may have a wrap around, which is catched by
				1599	* !time_in_range(now, last_..._jif, last_..._jif + timeout).
				1600	*
				1601	* Side effect: once per 32bit wrap-around interval, which means every
				1602	* ~198 days with 250 HZ, we have a window where the timeout would need
				1603	* to expire twice (worst case) to become effective. Good enough.
				1604	*/
				1605
Philipp Reisner	7fde2be	2011-03-01 11:08:28 +0100	[diff] [blame]	1606	void request_timer_fn(unsigned long data)
				1607	{
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1608	struct drbd_device device = (struct drbd_device ) data;
Andreas Gruenbacher	a6b32bc	2011-05-31 14:33:49 +0200	[diff] [blame]	1609	struct drbd_connection *connection = first_peer_device(device)->connection;
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	1610	struct drbd_request req_read, req_write, req_peer; / oldest request */
Philipp Reisner	44ed167	2011-04-19 17:10:19 +0200	[diff] [blame]	1611	struct net_conf *nc;
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	1612	unsigned long oldest_submit_jif;
Philipp Reisner	dfa8bed	2011-06-29 14:06:08 +0200	[diff] [blame]	1613	unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
Lars Ellenberg	ba280c0	2012-04-25 11:46:14 +0200	[diff] [blame]	1614	unsigned long now;
Lars Ellenberg	84d34f2	2015-02-19 13:54:11 +0100	[diff] [blame^]	1615	unsigned int ko_count = 0, timeout = 0;
Philipp Reisner	7fde2be	2011-03-01 11:08:28 +0100	[diff] [blame]	1616
Philipp Reisner	44ed167	2011-04-19 17:10:19 +0200	[diff] [blame]	1617	rcu_read_lock();
Andreas Gruenbacher	bde89a9	2011-05-30 16:32:41 +0200	[diff] [blame]	1618	nc = rcu_dereference(connection->net_conf);
Lars Ellenberg	84d34f2	2015-02-19 13:54:11 +0100	[diff] [blame^]	1619	if (nc && device->state.conn >= C_WF_REPORT_PARAMS) {
				1620	ko_count = nc->ko_count;
				1621	timeout = nc->timeout;
				1622	}
Philipp Reisner	cdfda63	2011-07-05 15:38:59 +0200	[diff] [blame]	1623
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1624	if (get_ldev(device)) { /* implicit state.disk >= D_INCONSISTENT */
				1625	dt = rcu_dereference(device->ldev->disk_conf)->disk_timeout * HZ / 10;
				1626	put_ldev(device);
Philipp Reisner	dfa8bed	2011-06-29 14:06:08 +0200	[diff] [blame]	1627	}
Philipp Reisner	44ed167	2011-04-19 17:10:19 +0200	[diff] [blame]	1628	rcu_read_unlock();
				1629
Lars Ellenberg	84d34f2	2015-02-19 13:54:11 +0100	[diff] [blame^]	1630
				1631	ent = timeout * HZ/10 * ko_count;
Philipp Reisner	dfa8bed	2011-06-29 14:06:08 +0200	[diff] [blame]	1632	et = min_not_zero(dt, ent);
				1633
Lars Ellenberg	ba280c0	2012-04-25 11:46:14 +0200	[diff] [blame]	1634	if (!et)
Philipp Reisner	7fde2be	2011-03-01 11:08:28 +0100	[diff] [blame]	1635	return; /* Recurring timer stopped */
				1636
Lars Ellenberg	ba280c0	2012-04-25 11:46:14 +0200	[diff] [blame]	1637	now = jiffies;
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	1638	nt = now + et;
Lars Ellenberg	ba280c0	2012-04-25 11:46:14 +0200	[diff] [blame]	1639
Andreas Gruenbacher	0500813	2011-07-07 14:19:42 +0200	[diff] [blame]	1640	spin_lock_irq(&device->resource->req_lock);
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	1641	req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local);
				1642	req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local);
Lars Ellenberg	84d34f2	2015-02-19 13:54:11 +0100	[diff] [blame^]	1643
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	1644	/* maybe the oldest request waiting for the peer is in fact still
Lars Ellenberg	84d34f2	2015-02-19 13:54:11 +0100	[diff] [blame^]	1645	* blocking in tcp sendmsg. That's ok, though, that's handled via the
				1646	* socket send timeout, requesting a ping, and bumping ko-count in
				1647	* we_should_drop_the_connection().
				1648	*/
				1649
				1650	/* check the oldest request we did successfully sent,
				1651	* but which is still waiting for an ACK. */
				1652	req_peer = connection->req_ack_pending;
				1653
				1654	/* if we don't have such request (e.g. protocoll A)
				1655	* check the oldest requests which is still waiting on its epoch
				1656	* closing barrier ack. */
				1657	if (!req_peer)
				1658	req_peer = connection->req_not_net_done;
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	1659
				1660	/* evaluate the oldest peer request only in one timer! */
				1661	if (req_peer && req_peer->device != device)
				1662	req_peer = NULL;
				1663
				1664	/* do we have something to evaluate? */
				1665	if (req_peer == NULL && req_write == NULL && req_read == NULL)
				1666	goto out;
				1667
				1668	oldest_submit_jif =
				1669	(req_write && req_read)
				1670	? ( time_before(req_write->pre_submit_jif, req_read->pre_submit_jif)
				1671	? req_write->pre_submit_jif : req_read->pre_submit_jif )
				1672	: req_write ? req_write->pre_submit_jif
				1673	: req_read ? req_read->pre_submit_jif : now;
Philipp Reisner	7fde2be	2011-03-01 11:08:28 +0100	[diff] [blame]	1674
Lars Ellenberg	84d34f2	2015-02-19 13:54:11 +0100	[diff] [blame^]	1675	if (ent && req_peer && net_timeout_reached(req_peer, connection, now, ent, ko_count, timeout))
Philipp Reisner	9581f97	2014-11-10 17:21:14 +0100	[diff] [blame]	1676	_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_VERBOSE \| CS_HARD);
Lars Ellenberg	84d34f2	2015-02-19 13:54:11 +0100	[diff] [blame^]	1677
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	1678	if (dt && oldest_submit_jif != now &&
				1679	time_after(now, oldest_submit_jif + dt) &&
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1680	!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
Andreas Gruenbacher	d018017	2011-07-03 17:53:52 +0200	[diff] [blame]	1681	drbd_warn(device, "Local backing device failed to meet the disk-timeout\n");
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1682	__drbd_chk_io_error(device, DRBD_FORCE_DETACH);
Philipp Reisner	dfa8bed	2011-06-29 14:06:08 +0200	[diff] [blame]	1683	}
Lars Ellenberg	0853546	2014-04-28 18:43:31 +0200	[diff] [blame]	1684
				1685	/* Reschedule timer for the nearest not already expired timeout.
				1686	* Fallback to now + min(effective network timeout, disk timeout). */
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	1687	ent = (ent && req_peer && time_before(now, req_peer->pre_send_jif + ent))
				1688	? req_peer->pre_send_jif + ent : now + et;
				1689	dt = (dt && oldest_submit_jif != now && time_before(now, oldest_submit_jif + dt))
				1690	? oldest_submit_jif + dt : now + et;
Lars Ellenberg	0853546	2014-04-28 18:43:31 +0200	[diff] [blame]	1691	nt = time_before(ent, dt) ? ent : dt;
Lars Ellenberg	7753a4c1	2013-11-22 13:00:12 +0100	[diff] [blame]	1692	out:
Andreas Gruenbacher	8d4ba3f	2014-09-11 14:29:08 +0200	[diff] [blame]	1693	spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher	b30ab79	2011-07-03 13:26:43 +0200	[diff] [blame]	1694	mod_timer(&device->request_timer, nt);
Philipp Reisner	7fde2be	2011-03-01 11:08:28 +0100	[diff] [blame]	1695	}