blob: 35dbb3dca47ef6a609811c03fefcc1bb4fc2f101 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +020024*/
Philipp Reisnerb411b362009-09-25 16:07:19 -070025
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070027#include <linux/drbd.h>
28#include <linux/sched.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070035#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020039#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070040#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070041
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +020042static int make_ov_request(struct drbd_device *, int);
43static int make_resync_request(struct drbd_device *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070044
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010045/* endio handlers:
Andreas Gruenbachered15b792014-09-11 14:29:06 +020046 * drbd_md_endio (defined here)
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +010047 * drbd_request_endio (defined here)
48 * drbd_peer_request_endio (defined here)
Andreas Gruenbachered15b792014-09-11 14:29:06 +020049 * drbd_bm_endio (defined in drbd_bitmap.c)
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010050 *
Philipp Reisnerb411b362009-09-25 16:07:19 -070051 * For all these callbacks, note the following:
52 * The callbacks will be called in irq context by the IDE drivers,
53 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
54 * Try to get the locking right :)
55 *
56 */
57
Philipp Reisnerb411b362009-09-25 16:07:19 -070058/* used for synchronous meta data and bitmap IO
59 * submitted by drbd_md_sync_page_io()
60 */
Christoph Hellwig4246a0b2015-07-20 15:29:37 +020061void drbd_md_endio(struct bio *bio)
Philipp Reisnerb411b362009-09-25 16:07:19 -070062{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020063 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -070064
Lars Ellenberge37d2432014-04-01 23:53:30 +020065 device = bio->bi_private;
Christoph Hellwig4246a0b2015-07-20 15:29:37 +020066 device->md_io.error = bio->bi_error;
Philipp Reisnerb411b362009-09-25 16:07:19 -070067
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010068 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
69 * to timeout on the lower level device, and eventually detach from it.
70 * If this io completion runs after that timeout expired, this
71 * drbd_md_put_buffer() may allow us to finally try and re-attach.
72 * During normal operation, this only puts that extra reference
73 * down to 1 again.
74 * Make sure we first drop the reference, and only then signal
75 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
76 * next drbd_md_sync_page_io(), that we trigger the
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020077 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010078 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020079 drbd_md_put_buffer(device);
Lars Ellenberge37d2432014-04-01 23:53:30 +020080 device->md_io.done = 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020081 wake_up(&device->misc_wait);
Philipp Reisnercdfda632011-07-05 15:38:59 +020082 bio_put(bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020083 if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
84 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -070085}
86
87/* reads on behalf of the partner,
88 * "submitted" by the receiver
89 */
Rashika Kheriaa186e472013-12-19 15:06:10 +053090static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -070091{
92 unsigned long flags = 0;
Andreas Gruenbacher67801392011-09-13 10:39:41 +020093 struct drbd_peer_device *peer_device = peer_req->peer_device;
94 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -070095
Andreas Gruenbacher05008132011-07-07 14:19:42 +020096 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020097 device->read_cnt += peer_req->i.size >> 9;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +020098 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020099 if (list_empty(&device->read_ee))
100 wake_up(&device->ee_wait);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100101 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200102 __drbd_chk_io_error(device, DRBD_READ_ERROR);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200103 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700104
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200105 drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200106 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700107}
108
109/* writes on behalf of the partner, or resync writes,
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200110 * "submitted" by the receiver, final stage. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200111void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700112{
113 unsigned long flags = 0;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200114 struct drbd_peer_device *peer_device = peer_req->peer_device;
115 struct drbd_device *device = peer_device->device;
Philipp Reisner668700b2015-03-16 16:08:29 +0100116 struct drbd_connection *connection = peer_device->connection;
Lars Ellenberg181286a2011-03-31 15:18:56 +0200117 struct drbd_interval i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700118 int do_wake;
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100119 u64 block_id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700120 int do_al_complete_io;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700121
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100122 /* after we moved peer_req to done_ee,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700123 * we may no longer access it,
124 * it may be freed/reused already!
125 * (as soon as we release the req_lock) */
Lars Ellenberg181286a2011-03-31 15:18:56 +0200126 i = peer_req->i;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100127 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
128 block_id = peer_req->block_id;
Lars Ellenberg21ae5d72014-05-05 23:42:24 +0200129 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700130
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200131 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200132 device->writ_cnt += peer_req->i.size >> 9;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200133 list_move_tail(&peer_req->w.list, &device->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700134
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100135 /*
Andreas Gruenbacher5e472262011-01-27 14:42:51 +0100136 * Do not remove from the write_requests tree here: we did not send the
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100137 * Ack yet and did not wake possibly waiting conflicting requests.
138 * Removed from the tree from "drbd_process_done_ee" within the
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200139 * appropriate dw.cb (e_end_block/e_end_resync_block) or from
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100140 * _drbd_clear_done_ee.
141 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700142
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200143 do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700144
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200145 /* FIXME do we want to detach for failed REQ_DISCARD?
146 * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
147 if (peer_req->flags & EE_WAS_ERROR)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200148 __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
Philipp Reisner668700b2015-03-16 16:08:29 +0100149
150 if (connection->cstate >= C_WF_REPORT_PARAMS) {
151 kref_get(&device->kref); /* put is in drbd_send_acks_wf() */
152 if (!queue_work(connection->ack_sender, &peer_device->send_acks_work))
153 kref_put(&device->kref, drbd_destroy_device);
154 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200155 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700156
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100157 if (block_id == ID_SYNCER)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200158 drbd_rs_complete_io(device, i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700159
160 if (do_wake)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200161 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700162
163 if (do_al_complete_io)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200164 drbd_al_complete_io(device, &i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200166 put_ldev(device);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200167}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169/* writes on behalf of the partner, or resync writes,
170 * "submitted" by the receiver.
171 */
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200172void drbd_peer_request_endio(struct bio *bio)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100174 struct drbd_peer_request *peer_req = bio->bi_private;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200175 struct drbd_device *device = peer_req->peer_device->device;
Fabian Frederick7e5fec32016-06-14 00:26:35 +0200176 bool is_write = bio_data_dir(bio) == WRITE;
177 bool is_discard = !!(bio_op(bio) == REQ_OP_DISCARD);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200178
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200179 if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200180 drbd_warn(device, "%s: error=%d s=%llus\n",
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200181 is_write ? (is_discard ? "discard" : "write")
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200182 : "read", bio->bi_error,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100183 (unsigned long long)peer_req->i.sector);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200184
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200185 if (bio->bi_error)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100186 set_bit(__EE_WAS_ERROR, &peer_req->flags);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200187
188 bio_put(bio); /* no need for the bio anymore */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100189 if (atomic_dec_and_test(&peer_req->pending_bios)) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200190 if (is_write)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100191 drbd_endio_write_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200192 else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100193 drbd_endio_read_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200194 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700195}
196
Lars Ellenberg142207f2015-02-19 13:48:59 +0100197void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
198{
199 panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n",
200 device->minor, device->resource->name, device->vnr);
201}
202
Philipp Reisnerb411b362009-09-25 16:07:19 -0700203/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
204 */
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200205void drbd_request_endio(struct bio *bio)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700206{
Lars Ellenberga1154132010-11-13 20:42:29 +0100207 unsigned long flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700208 struct drbd_request *req = bio->bi_private;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200209 struct drbd_device *device = req->device;
Lars Ellenberga1154132010-11-13 20:42:29 +0100210 struct bio_and_error m;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700211 enum drbd_req_event what;
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200212
213 /* If this request was aborted locally before,
214 * but now was completed "successfully",
215 * chances are that this caused arbitrary data corruption.
216 *
217 * "aborting" requests, or force-detaching the disk, is intended for
218 * completely blocked/hung local backing devices which do no longer
219 * complete requests at all, not even do error completions. In this
220 * situation, usually a hard-reset and failover is the only way out.
221 *
222 * By "aborting", basically faking a local error-completion,
223 * we allow for a more graceful swichover by cleanly migrating services.
224 * Still the affected node has to be rebooted "soon".
225 *
226 * By completing these requests, we allow the upper layers to re-use
227 * the associated data pages.
228 *
229 * If later the local backing device "recovers", and now DMAs some data
230 * from disk into the original request pages, in the best case it will
231 * just put random data into unused pages; but typically it will corrupt
232 * meanwhile completely unrelated data, causing all sorts of damage.
233 *
234 * Which means delayed successful completion,
235 * especially for READ requests,
236 * is a reason to panic().
237 *
238 * We assume that a delayed *error* completion is OK,
239 * though we still will complain noisily about it.
240 */
241 if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
242 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200243 drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200244
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200245 if (!bio->bi_error)
Lars Ellenberg142207f2015-02-19 13:48:59 +0100246 drbd_panic_after_delayed_completion_of_aborted_request(device);
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200247 }
248
Philipp Reisnerb411b362009-09-25 16:07:19 -0700249 /* to avoid recursion in __req_mod */
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200250 if (unlikely(bio->bi_error)) {
Christoph Hellwig70246282016-07-19 11:28:41 +0200251 switch (bio_op(bio)) {
252 case REQ_OP_DISCARD:
253 if (bio->bi_error == -EOPNOTSUPP)
254 what = DISCARD_COMPLETED_NOTSUPP;
255 else
256 what = DISCARD_COMPLETED_WITH_ERROR;
257 break;
258 case REQ_OP_READ:
259 if (bio->bi_rw & REQ_RAHEAD)
260 what = READ_AHEAD_COMPLETED_WITH_ERROR;
261 else
262 what = READ_COMPLETED_WITH_ERROR;
263 break;
264 default:
265 what = WRITE_COMPLETED_WITH_ERROR;
266 break;
267 }
268 } else {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100269 what = COMPLETED_OK;
Christoph Hellwig70246282016-07-19 11:28:41 +0200270 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700271
272 bio_put(req->private_bio);
Christoph Hellwig4246a0b2015-07-20 15:29:37 +0200273 req->private_bio = ERR_PTR(bio->bi_error);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700274
Lars Ellenberga1154132010-11-13 20:42:29 +0100275 /* not req_mod(), we need irqsave here! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200276 spin_lock_irqsave(&device->resource->req_lock, flags);
Lars Ellenberga1154132010-11-13 20:42:29 +0100277 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200278 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200279 put_ldev(device);
Lars Ellenberga1154132010-11-13 20:42:29 +0100280
281 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200282 complete_master_bio(device, &m);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283}
284
Herbert Xu9534d672016-01-24 21:19:21 +0800285void drbd_csum_ee(struct crypto_ahash *tfm, struct drbd_peer_request *peer_req, void *digest)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200286{
Herbert Xu9534d672016-01-24 21:19:21 +0800287 AHASH_REQUEST_ON_STACK(req, tfm);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200288 struct scatterlist sg;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100289 struct page *page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200290 struct page *tmp;
291 unsigned len;
292
Herbert Xu9534d672016-01-24 21:19:21 +0800293 ahash_request_set_tfm(req, tfm);
294 ahash_request_set_callback(req, 0, NULL, NULL);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200295
296 sg_init_table(&sg, 1);
Herbert Xu9534d672016-01-24 21:19:21 +0800297 crypto_ahash_init(req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200298
299 while ((tmp = page_chain_next(page))) {
300 /* all but the last page will be fully used */
301 sg_set_page(&sg, page, PAGE_SIZE, 0);
Herbert Xu9534d672016-01-24 21:19:21 +0800302 ahash_request_set_crypt(req, &sg, NULL, sg.length);
303 crypto_ahash_update(req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200304 page = tmp;
305 }
306 /* and now the last, possibly only partially used page */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100307 len = peer_req->i.size & (PAGE_SIZE - 1);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200308 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
Herbert Xu9534d672016-01-24 21:19:21 +0800309 ahash_request_set_crypt(req, &sg, digest, sg.length);
310 crypto_ahash_finup(req);
311 ahash_request_zero(req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200312}
313
Herbert Xu9534d672016-01-24 21:19:21 +0800314void drbd_csum_bio(struct crypto_ahash *tfm, struct bio *bio, void *digest)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700315{
Herbert Xu9534d672016-01-24 21:19:21 +0800316 AHASH_REQUEST_ON_STACK(req, tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700317 struct scatterlist sg;
Kent Overstreet79886132013-11-23 17:19:00 -0800318 struct bio_vec bvec;
319 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700320
Herbert Xu9534d672016-01-24 21:19:21 +0800321 ahash_request_set_tfm(req, tfm);
322 ahash_request_set_callback(req, 0, NULL, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700323
324 sg_init_table(&sg, 1);
Herbert Xu9534d672016-01-24 21:19:21 +0800325 crypto_ahash_init(req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326
Kent Overstreet79886132013-11-23 17:19:00 -0800327 bio_for_each_segment(bvec, bio, iter) {
328 sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
Herbert Xu9534d672016-01-24 21:19:21 +0800329 ahash_request_set_crypt(req, &sg, NULL, sg.length);
330 crypto_ahash_update(req);
Lars Ellenberg9104d312016-06-14 00:26:31 +0200331 /* REQ_OP_WRITE_SAME has only one segment,
332 * checksum the payload only once. */
333 if (bio_op(bio) == REQ_OP_WRITE_SAME)
334 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700335 }
Herbert Xu9534d672016-01-24 21:19:21 +0800336 ahash_request_set_crypt(req, NULL, digest, 0);
337 crypto_ahash_final(req);
338 ahash_request_zero(req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700339}
340
Lars Ellenberg9676c762011-02-22 14:02:31 +0100341/* MAYBE merge common code with w_e_end_ov_req */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100342static int w_e_send_csum(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700343{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200344 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200345 struct drbd_peer_device *peer_device = peer_req->peer_device;
346 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700347 int digest_size;
348 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100349 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700350
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100351 if (unlikely(cancel))
352 goto out;
353
Lars Ellenberg9676c762011-02-22 14:02:31 +0100354 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100355 goto out;
356
Herbert Xu9534d672016-01-24 21:19:21 +0800357 digest_size = crypto_ahash_digestsize(peer_device->connection->csums_tfm);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100358 digest = kmalloc(digest_size, GFP_NOIO);
359 if (digest) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100360 sector_t sector = peer_req->i.sector;
361 unsigned int size = peer_req->i.size;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200362 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
Lars Ellenberg9676c762011-02-22 14:02:31 +0100363 /* Free peer_req and pages before send.
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100364 * In case we block on congestion, we could otherwise run into
365 * some distributed deadlock, if the other side blocks on
366 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200367 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200368 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100369 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200370 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200371 err = drbd_send_drequest_csum(peer_device, sector, size,
Andreas Gruenbacherdb1b0b72011-03-16 01:37:21 +0100372 digest, digest_size,
373 P_CSUM_RS_REQUEST);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100374 kfree(digest);
375 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200376 drbd_err(device, "kmalloc() of digest failed.\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100377 err = -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700378 }
379
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100380out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100381 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200382 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100384 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200385 drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100386 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700387}
388
389#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
390
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200391static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700392{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200393 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100394 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700395
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200396 if (!get_ldev(device))
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200397 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700398
399 /* GFP_TRY, because if there is no memory available right now, this may
400 * be rescheduled for later. It is "only" background resync, after all. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200401 peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
Lars Ellenberg9104d312016-06-14 00:26:31 +0200402 size, size, GFP_TRY);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100403 if (!peer_req)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200404 goto defer;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700405
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200406 peer_req->w.cb = w_e_send_csum;
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200407 spin_lock_irq(&device->resource->req_lock);
Lars Ellenbergb9ed7082014-04-23 12:15:35 +0200408 list_add_tail(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200409 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200411 atomic_add(size >> 9, &device->rs_sect_ev);
Mike Christiebb3cc852016-06-05 14:32:06 -0500412 if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
413 DRBD_FAULT_RS_RD) == 0)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200414 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700415
Lars Ellenberg10f6d9922011-01-24 14:47:09 +0100416 /* If it failed because of ENOMEM, retry should help. If it failed
417 * because bio_add_page failed (probably broken lower level driver),
418 * retry may or may not help.
419 * If it does not, you may need to force disconnect. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200420 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200421 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200422 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +0200423
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200424 drbd_free_peer_req(device, peer_req);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200425defer:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200426 put_ldev(device);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200427 return -EAGAIN;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700428}
429
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100430int w_resync_timer(struct drbd_work *w, int cancel)
Philipp Reisner794abb72010-12-27 11:51:23 +0100431{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200432 struct drbd_device *device =
433 container_of(w, struct drbd_device, resync_work);
434
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200435 switch (device->state.conn) {
Philipp Reisner794abb72010-12-27 11:51:23 +0100436 case C_VERIFY_S:
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200437 make_ov_request(device, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100438 break;
439 case C_SYNC_TARGET:
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200440 make_resync_request(device, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100441 break;
442 }
443
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100444 return 0;
Philipp Reisner794abb72010-12-27 11:51:23 +0100445}
446
Philipp Reisnerb411b362009-09-25 16:07:19 -0700447void resync_timer_fn(unsigned long data)
448{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200449 struct drbd_device *device = (struct drbd_device *) data;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450
Lars Ellenberg15e26f62014-04-28 11:43:21 +0200451 drbd_queue_work_if_unqueued(
452 &first_peer_device(device)->connection->sender_work,
453 &device->resync_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700454}
455
Philipp Reisner778f2712010-07-06 11:14:00 +0200456static void fifo_set(struct fifo_buffer *fb, int value)
457{
458 int i;
459
460 for (i = 0; i < fb->size; i++)
Philipp Reisnerf10f2622010-10-05 16:50:17 +0200461 fb->values[i] = value;
Philipp Reisner778f2712010-07-06 11:14:00 +0200462}
463
464static int fifo_push(struct fifo_buffer *fb, int value)
465{
466 int ov;
467
468 ov = fb->values[fb->head_index];
469 fb->values[fb->head_index++] = value;
470
471 if (fb->head_index >= fb->size)
472 fb->head_index = 0;
473
474 return ov;
475}
476
477static void fifo_add_val(struct fifo_buffer *fb, int value)
478{
479 int i;
480
481 for (i = 0; i < fb->size; i++)
482 fb->values[i] += value;
483}
484
Philipp Reisner9958c852011-05-03 16:19:31 +0200485struct fifo_buffer *fifo_alloc(int fifo_size)
486{
487 struct fifo_buffer *fb;
488
Lars Ellenberg8747d302012-09-26 14:22:40 +0200489 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
Philipp Reisner9958c852011-05-03 16:19:31 +0200490 if (!fb)
491 return NULL;
492
493 fb->head_index = 0;
494 fb->size = fifo_size;
495 fb->total = 0;
496
497 return fb;
498}
499
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200500static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
Philipp Reisner778f2712010-07-06 11:14:00 +0200501{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200502 struct disk_conf *dc;
Lars Ellenberg7f34f612014-04-22 16:37:16 +0200503 unsigned int want; /* The number of sectors we want in-flight */
Philipp Reisner778f2712010-07-06 11:14:00 +0200504 int req_sect; /* Number of sectors to request in this turn */
Lars Ellenberg7f34f612014-04-22 16:37:16 +0200505 int correction; /* Number of sectors more we need in-flight */
Philipp Reisner778f2712010-07-06 11:14:00 +0200506 int cps; /* correction per invocation of drbd_rs_controller() */
507 int steps; /* Number of time steps to plan ahead */
508 int curr_corr;
509 int max_sect;
Philipp Reisner813472c2011-05-03 16:47:02 +0200510 struct fifo_buffer *plan;
Philipp Reisner778f2712010-07-06 11:14:00 +0200511
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200512 dc = rcu_dereference(device->ldev->disk_conf);
513 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner778f2712010-07-06 11:14:00 +0200514
Philipp Reisner813472c2011-05-03 16:47:02 +0200515 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
Philipp Reisner778f2712010-07-06 11:14:00 +0200516
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200517 if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200518 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200519 } else { /* normal path */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200520 want = dc->c_fill_target ? dc->c_fill_target :
521 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
Philipp Reisner778f2712010-07-06 11:14:00 +0200522 }
523
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200524 correction = want - device->rs_in_flight - plan->total;
Philipp Reisner778f2712010-07-06 11:14:00 +0200525
526 /* Plan ahead */
527 cps = correction / steps;
Philipp Reisner813472c2011-05-03 16:47:02 +0200528 fifo_add_val(plan, cps);
529 plan->total += cps * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200530
531 /* What we do in this step */
Philipp Reisner813472c2011-05-03 16:47:02 +0200532 curr_corr = fifo_push(plan, 0);
533 plan->total -= curr_corr;
Philipp Reisner778f2712010-07-06 11:14:00 +0200534
535 req_sect = sect_in + curr_corr;
536 if (req_sect < 0)
537 req_sect = 0;
538
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200539 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
Philipp Reisner778f2712010-07-06 11:14:00 +0200540 if (req_sect > max_sect)
541 req_sect = max_sect;
542
543 /*
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200544 drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200545 sect_in, device->rs_in_flight, want, correction,
546 steps, cps, device->rs_planed, curr_corr, req_sect);
Philipp Reisner778f2712010-07-06 11:14:00 +0200547 */
548
549 return req_sect;
550}
551
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200552static int drbd_rs_number_requests(struct drbd_device *device)
Lars Ellenberge65f4402010-11-05 10:04:07 +0100553{
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200554 unsigned int sect_in; /* Number of sectors that came in since the last turn */
555 int number, mxb;
556
557 sect_in = atomic_xchg(&device->rs_sect_in, 0);
558 device->rs_in_flight -= sect_in;
Philipp Reisner813472c2011-05-03 16:47:02 +0200559
560 rcu_read_lock();
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200561 mxb = drbd_get_max_buffers(device) / 2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200562 if (rcu_dereference(device->rs_plan_s)->size) {
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200563 number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200564 device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100565 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200566 device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
567 number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
Lars Ellenberge65f4402010-11-05 10:04:07 +0100568 }
Philipp Reisner813472c2011-05-03 16:47:02 +0200569 rcu_read_unlock();
Lars Ellenberge65f4402010-11-05 10:04:07 +0100570
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200571 /* Don't have more than "max-buffers"/2 in-flight.
572 * Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
573 * potentially causing a distributed deadlock on congestion during
574 * online-verify or (checksum-based) resync, if max-buffers,
575 * socket buffer sizes and resync rate settings are mis-configured. */
Lars Ellenberg7f34f612014-04-22 16:37:16 +0200576
577 /* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k),
578 * mxb (as used here, and in drbd_alloc_pages on the peer) is
579 * "number of pages" (typically also 4k),
580 * but "rs_in_flight" is in "sectors" (512 Byte). */
581 if (mxb - device->rs_in_flight/8 < number)
582 number = mxb - device->rs_in_flight/8;
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200583
Lars Ellenberge65f4402010-11-05 10:04:07 +0100584 return number;
585}
586
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100587static int make_resync_request(struct drbd_device *const device, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700588{
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100589 struct drbd_peer_device *const peer_device = first_peer_device(device);
590 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700591 unsigned long bit;
592 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200593 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100594 int max_bio_size;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100595 int number, rollback_i, size;
Lars Ellenberg506afb62014-01-31 14:55:12 +0100596 int align, requeue = 0;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200597 int i = 0;
Philipp Reisner92d94ae2016-06-14 00:26:15 +0200598 int discard_granularity = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700599
600 if (unlikely(cancel))
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100601 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700602
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200603 if (device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200604 /* empty resync? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200605 drbd_resync_finished(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100606 return 0;
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200607 }
608
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200609 if (!get_ldev(device)) {
610 /* Since we only need to access device->rsync a
611 get_ldev_if_state(device,D_FAILED) would be sufficient, but
Philipp Reisnerb411b362009-09-25 16:07:19 -0700612 to continue resync with a broken disk makes no sense at
613 all */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200614 drbd_err(device, "Disk broke down during resync!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100615 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700616 }
617
Lars Ellenberg9104d312016-06-14 00:26:31 +0200618 if (connection->agreed_features & DRBD_FF_THIN_RESYNC) {
Philipp Reisner92d94ae2016-06-14 00:26:15 +0200619 rcu_read_lock();
620 discard_granularity = rcu_dereference(device->ldev->disk_conf)->rs_discard_granularity;
621 rcu_read_unlock();
622 }
623
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200624 max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
625 number = drbd_rs_number_requests(device);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200626 if (number <= 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200627 goto requeue;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700628
Philipp Reisnerb411b362009-09-25 16:07:19 -0700629 for (i = 0; i < number; i++) {
Lars Ellenberg506afb62014-01-31 14:55:12 +0100630 /* Stop generating RS requests when half of the send buffer is filled,
631 * but notify TCP that we'd like to have more space. */
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100632 mutex_lock(&connection->data.mutex);
633 if (connection->data.socket) {
Lars Ellenberg506afb62014-01-31 14:55:12 +0100634 struct sock *sk = connection->data.socket->sk;
635 int queued = sk->sk_wmem_queued;
636 int sndbuf = sk->sk_sndbuf;
637 if (queued > sndbuf / 2) {
638 requeue = 1;
639 if (sk->sk_socket)
640 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
641 }
642 } else
643 requeue = 1;
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100644 mutex_unlock(&connection->data.mutex);
Lars Ellenberg506afb62014-01-31 14:55:12 +0100645 if (requeue)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700646 goto requeue;
647
648next_sector:
649 size = BM_BLOCK_SIZE;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200650 bit = drbd_bm_find_next(device, device->bm_resync_fo);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700651
Lars Ellenberg4b0715f2010-12-14 15:13:04 +0100652 if (bit == DRBD_END_OF_BITMAP) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200653 device->bm_resync_fo = drbd_bm_bits(device);
654 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100655 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700656 }
657
658 sector = BM_BIT_TO_SECT(bit);
659
Lars Ellenbergad3fee72013-12-20 11:22:13 +0100660 if (drbd_try_rs_begin_io(device, sector)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200661 device->bm_resync_fo = bit;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700662 goto requeue;
663 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200664 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700665
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200666 if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
667 drbd_rs_complete_io(device, sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700668 goto next_sector;
669 }
670
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100671#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
Philipp Reisnerb411b362009-09-25 16:07:19 -0700672 /* try to find some adjacent bits.
673 * we stop if we have already the maximum req size.
674 *
675 * Additionally always align bigger requests, in order to
676 * be prepared for all stripe sizes of software RAIDs.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700677 */
678 align = 1;
Philipp Reisnerd2074502010-07-22 15:27:27 +0200679 rollback_i = i;
Lars Ellenberg6377b922014-04-28 18:43:17 +0200680 while (i < number) {
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100681 if (size + BM_BLOCK_SIZE > max_bio_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700682 break;
683
684 /* Be always aligned */
685 if (sector & ((1<<(align+3))-1))
686 break;
687
Philipp Reisner92d94ae2016-06-14 00:26:15 +0200688 if (discard_granularity && size == discard_granularity)
689 break;
690
Philipp Reisnerb411b362009-09-25 16:07:19 -0700691 /* do not cross extent boundaries */
692 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
693 break;
694 /* now, is it actually dirty, after all?
695 * caution, drbd_bm_test_bit is tri-state for some
696 * obscure reason; ( b == 0 ) would get the out-of-band
697 * only accidentally right because of the "oddly sized"
698 * adjustment below */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200699 if (drbd_bm_test_bit(device, bit+1) != 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700700 break;
701 bit++;
702 size += BM_BLOCK_SIZE;
703 if ((BM_BLOCK_SIZE << align) <= size)
704 align++;
705 i++;
706 }
707 /* if we merged some,
708 * reset the offset to start the next drbd_bm_find_next from */
709 if (size > BM_BLOCK_SIZE)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200710 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700711#endif
712
713 /* adjust very last sectors, in case we are oddly sized */
714 if (sector + (size>>9) > capacity)
715 size = (capacity-sector)<<9;
Lars Ellenbergaaaba342014-03-18 12:30:09 +0100716
717 if (device->use_csums) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +0100718 switch (read_for_csum(peer_device, sector, size)) {
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200719 case -EIO: /* Disk failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200720 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100721 return -EIO;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200722 case -EAGAIN: /* allocation failed, or ldev busy */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200723 drbd_rs_complete_io(device, sector);
724 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerd2074502010-07-22 15:27:27 +0200725 i = rollback_i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700726 goto requeue;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200727 case 0:
728 /* everything ok */
729 break;
730 default:
731 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700732 }
733 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100734 int err;
735
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200736 inc_rs_pending(device);
Philipp Reisner92d94ae2016-06-14 00:26:15 +0200737 err = drbd_send_drequest(peer_device,
738 size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST,
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100739 sector, size, ID_SYNCER);
740 if (err) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200741 drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200742 dec_rs_pending(device);
743 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100744 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745 }
746 }
747 }
748
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200749 if (device->bm_resync_fo >= drbd_bm_bits(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700750 /* last syncer _request_ was sent,
751 * but the P_RS_DATA_REPLY not yet received. sync will end (and
752 * next sync group will resume), as soon as we receive the last
753 * resync data block, and the last bit is cleared.
754 * until then resync "work" is "inactive" ...
755 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200756 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100757 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700758 }
759
760 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200761 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
762 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
763 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100764 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700765}
766
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200767static int make_ov_request(struct drbd_device *device, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700768{
769 int number, i, size;
770 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200771 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200772 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700773
774 if (unlikely(cancel))
775 return 1;
776
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200777 number = drbd_rs_number_requests(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700778
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200779 sector = device->ov_position;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780 for (i = 0; i < number; i++) {
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200781 if (sector >= capacity)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700782 return 1;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200783
784 /* We check for "finished" only in the reply path:
785 * w_e_end_ov_reply().
786 * We need to send at least one request out. */
787 stop_sector_reached = i > 0
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200788 && verify_can_do_stop_sector(device)
789 && sector >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200790 if (stop_sector_reached)
791 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700792
793 size = BM_BLOCK_SIZE;
794
Lars Ellenbergad3fee72013-12-20 11:22:13 +0100795 if (drbd_try_rs_begin_io(device, sector)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200796 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700797 goto requeue;
798 }
799
800 if (sector + (size>>9) > capacity)
801 size = (capacity-sector)<<9;
802
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200803 inc_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200804 if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200805 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700806 return 0;
807 }
808 sector += BM_SECT_PER_BIT;
809 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200810 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700811
812 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200813 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200814 if (i == 0 || !stop_sector_reached)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200815 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700816 return 1;
817}
818
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100819int w_ov_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700820{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200821 struct drbd_device_work *dw =
822 container_of(w, struct drbd_device_work, w);
823 struct drbd_device *device = dw->device;
824 kfree(dw);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200825 ov_out_of_sync_print(device);
826 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700827
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100828 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700829}
830
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100831static int w_resync_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700832{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200833 struct drbd_device_work *dw =
834 container_of(w, struct drbd_device_work, w);
835 struct drbd_device *device = dw->device;
836 kfree(dw);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700837
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200838 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700839
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100840 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700841}
842
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200843static void ping_peer(struct drbd_device *device)
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200844{
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200845 struct drbd_connection *connection = first_peer_device(device)->connection;
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100846
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200847 clear_bit(GOT_PING_ACK, &connection->flags);
848 request_ping(connection);
849 wait_event(connection->ping_wait,
850 test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200851}
852
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200853int drbd_resync_finished(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700854{
Lars Ellenberg26a96112016-06-14 00:26:25 +0200855 struct drbd_connection *connection = first_peer_device(device)->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700856 unsigned long db, dt, dbdt;
857 unsigned long n_oos;
858 union drbd_state os, ns;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200859 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700860 char *khelper_cmd = NULL;
Lars Ellenberg26525612010-11-05 09:56:33 +0100861 int verify_done = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700862
863 /* Remove all elements from the resync LRU. Since future actions
864 * might set bits in the (main) bitmap, then the entries in the
865 * resync LRU would be wrong. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200866 if (drbd_rs_del_all(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700867 /* In case this is not possible now, most probably because
868 * there are P_RS_DATA_REPLY Packets lingering on the worker's
869 * queue (or even the read operations for those packets
870 * is not finished by now). Retry in 100ms. */
871
Philipp Reisner20ee6392011-01-18 15:28:59 +0100872 schedule_timeout_interruptible(HZ / 10);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200873 dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC);
874 if (dw) {
875 dw->w.cb = w_resync_finished;
876 dw->device = device;
Lars Ellenberg26a96112016-06-14 00:26:25 +0200877 drbd_queue_work(&connection->sender_work, &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700878 return 1;
879 }
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200880 drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700881 }
882
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200883 dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700884 if (dt <= 0)
885 dt = 1;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200886
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200887 db = device->rs_total;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200888 /* adjust for verify start and stop sectors, respective reached position */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200889 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
890 db -= device->ov_left;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200891
Philipp Reisnerb411b362009-09-25 16:07:19 -0700892 dbdt = Bit2KB(db/dt);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200893 device->rs_paused /= HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200895 if (!get_ldev(device))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700896 goto out;
897
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200898 ping_peer(device);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200899
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200900 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200901 os = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700902
Lars Ellenberg26525612010-11-05 09:56:33 +0100903 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
904
Philipp Reisnerb411b362009-09-25 16:07:19 -0700905 /* This protects us against multiple calls (that can happen in the presence
906 of application IO), and against connectivity loss just before we arrive here. */
907 if (os.conn <= C_CONNECTED)
908 goto out_unlock;
909
910 ns = os;
911 ns.conn = C_CONNECTED;
912
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200913 drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200914 verify_done ? "Online verify" : "Resync",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200915 dt + device->rs_paused, device->rs_paused, dbdt);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700916
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200917 n_oos = drbd_bm_total_weight(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700918
919 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
920 if (n_oos) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200921 drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700922 n_oos, Bit2KB(1));
923 khelper_cmd = "out-of-sync";
924 }
925 } else {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200926 D_ASSERT(device, (n_oos - device->rs_failed) == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700927
928 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
929 khelper_cmd = "after-resync-target";
930
Lars Ellenbergaaaba342014-03-18 12:30:09 +0100931 if (device->use_csums && device->rs_total) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200932 const unsigned long s = device->rs_same_csum;
933 const unsigned long t = device->rs_total;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700934 const int ratio =
935 (t == 0) ? 0 :
936 (t < 100000) ? ((s*100)/t) : (s/(t/100));
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200937 drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
Philipp Reisnerb411b362009-09-25 16:07:19 -0700938 "transferred %luK total %luK\n",
939 ratio,
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200940 Bit2KB(device->rs_same_csum),
941 Bit2KB(device->rs_total - device->rs_same_csum),
942 Bit2KB(device->rs_total));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700943 }
944 }
945
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200946 if (device->rs_failed) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200947 drbd_info(device, " %lu failed blocks\n", device->rs_failed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700948
949 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
950 ns.disk = D_INCONSISTENT;
951 ns.pdsk = D_UP_TO_DATE;
952 } else {
953 ns.disk = D_UP_TO_DATE;
954 ns.pdsk = D_INCONSISTENT;
955 }
956 } else {
957 ns.disk = D_UP_TO_DATE;
958 ns.pdsk = D_UP_TO_DATE;
959
960 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200961 if (device->p_uuid) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700962 int i;
963 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200964 _drbd_uuid_set(device, i, device->p_uuid[i]);
965 drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
966 _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700967 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200968 drbd_err(device, "device->p_uuid is NULL! BUG\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700969 }
970 }
971
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100972 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
973 /* for verify runs, we don't update uuids here,
974 * so there would be nothing to report. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200975 drbd_uuid_set_bm(device, 0UL);
976 drbd_print_uuids(device, "updated UUIDs");
977 if (device->p_uuid) {
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100978 /* Now the two UUID sets are equal, update what we
979 * know of the peer. */
980 int i;
981 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200982 device->p_uuid[i] = device->ldev->md.uuid[i];
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100983 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700984 }
985 }
986
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200987 _drbd_set_state(device, ns, CS_VERBOSE, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700988out_unlock:
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200989 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg26a96112016-06-14 00:26:25 +0200990
991 /* If we have been sync source, and have an effective fencing-policy,
992 * once *all* volumes are back in sync, call "unfence". */
993 if (os.conn == C_SYNC_SOURCE) {
994 enum drbd_disk_state disk_state = D_MASK;
995 enum drbd_disk_state pdsk_state = D_MASK;
996 enum drbd_fencing_p fp = FP_DONT_CARE;
997
998 rcu_read_lock();
999 fp = rcu_dereference(device->ldev->disk_conf)->fencing;
1000 if (fp != FP_DONT_CARE) {
1001 struct drbd_peer_device *peer_device;
1002 int vnr;
1003 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1004 struct drbd_device *device = peer_device->device;
1005 disk_state = min_t(enum drbd_disk_state, disk_state, device->state.disk);
1006 pdsk_state = min_t(enum drbd_disk_state, pdsk_state, device->state.pdsk);
1007 }
1008 }
1009 rcu_read_unlock();
1010 if (disk_state == D_UP_TO_DATE && pdsk_state == D_UP_TO_DATE)
1011 conn_khelper(connection, "unfence-peer");
1012 }
1013
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001014 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001015out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001016 device->rs_total = 0;
1017 device->rs_failed = 0;
1018 device->rs_paused = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001019
1020 /* reset start sector, if we reached end of device */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001021 if (verify_done && device->ov_left == 0)
1022 device->ov_start_sector = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001023
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001024 drbd_md_sync(device);
Lars Ellenberg13d42682010-10-13 17:37:54 +02001025
Philipp Reisnerb411b362009-09-25 16:07:19 -07001026 if (khelper_cmd)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001027 drbd_khelper(device, khelper_cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001028
1029 return 1;
1030}
1031
1032/* helper */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001033static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001034{
Andreas Gruenbacher045417f2011-04-07 21:34:24 +02001035 if (drbd_peer_req_has_active_page(peer_req)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001036 /* This might happen if sendpage() has not finished */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001037 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001038 atomic_add(i, &device->pp_in_use_by_net);
1039 atomic_sub(i, &device->pp_in_use);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001040 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001041 list_add_tail(&peer_req->w.list, &device->net_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001042 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001043 wake_up(&drbd_pp_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001044 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001045 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001046}
1047
1048/**
1049 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
Philipp Reisnerb411b362009-09-25 16:07:19 -07001050 * @w: work object.
1051 * @cancel: The connection will be closed anyways
1052 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001053int w_e_end_data_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001054{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001055 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001056 struct drbd_peer_device *peer_device = peer_req->peer_device;
1057 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001058 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001059
1060 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001061 drbd_free_peer_req(device, peer_req);
1062 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001063 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001064 }
1065
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001066 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001067 err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001068 } else {
1069 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001070 drbd_err(device, "Sending NegDReply. sector=%llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001071 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001072
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001073 err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001074 }
1075
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001076 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001077
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001078 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001079
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001080 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001081 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001082 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001083}
1084
Philipp Reisner700ca8c2016-06-14 00:26:13 +02001085static bool all_zero(struct drbd_peer_request *peer_req)
1086{
1087 struct page *page = peer_req->pages;
1088 unsigned int len = peer_req->i.size;
1089
1090 page_chain_for_each(page) {
1091 unsigned int l = min_t(unsigned int, len, PAGE_SIZE);
1092 unsigned int i, words = l / sizeof(long);
1093 unsigned long *d;
1094
1095 d = kmap_atomic(page);
1096 for (i = 0; i < words; i++) {
1097 if (d[i]) {
1098 kunmap_atomic(d);
1099 return false;
1100 }
1101 }
1102 kunmap_atomic(d);
1103 len -= l;
1104 }
1105
1106 return true;
1107}
1108
Philipp Reisnerb411b362009-09-25 16:07:19 -07001109/**
Andreas Gruenbachera209b4a2011-08-17 12:43:25 +02001110 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
Philipp Reisnerb411b362009-09-25 16:07:19 -07001111 * @w: work object.
1112 * @cancel: The connection will be closed anyways
1113 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001114int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001115{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001116 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001117 struct drbd_peer_device *peer_device = peer_req->peer_device;
1118 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001119 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001120
1121 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001122 drbd_free_peer_req(device, peer_req);
1123 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001124 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001125 }
1126
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001127 if (get_ldev_if_state(device, D_FAILED)) {
1128 drbd_rs_complete_io(device, peer_req->i.sector);
1129 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001130 }
1131
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001132 if (device->state.conn == C_AHEAD) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001133 err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001134 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001135 if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1136 inc_rs_pending(device);
Philipp Reisner700ca8c2016-06-14 00:26:13 +02001137 if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req))
1138 err = drbd_send_rs_deallocated(peer_device, peer_req);
1139 else
1140 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001141 } else {
1142 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001143 drbd_err(device, "Not sending RSDataReply, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07001144 "partner DISKLESS!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001145 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001146 }
1147 } else {
1148 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001149 drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001150 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001151
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001152 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001153
1154 /* update resync data with failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001155 drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001156 }
1157
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001158 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001159
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001160 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001161
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001162 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001163 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001164 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001165}
1166
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001167int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001168{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001169 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001170 struct drbd_peer_device *peer_device = peer_req->peer_device;
1171 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001172 struct digest_info *di;
1173 int digest_size;
1174 void *digest = NULL;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001175 int err, eq = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001176
1177 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001178 drbd_free_peer_req(device, peer_req);
1179 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001180 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001181 }
1182
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001183 if (get_ldev(device)) {
1184 drbd_rs_complete_io(device, peer_req->i.sector);
1185 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001186 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001187
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001188 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001189
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001190 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001191 /* quick hack to try to avoid a race against reconfiguration.
1192 * a real fix would be much more involved,
1193 * introducing more locking mechanisms */
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001194 if (peer_device->connection->csums_tfm) {
Herbert Xu9534d672016-01-24 21:19:21 +08001195 digest_size = crypto_ahash_digestsize(peer_device->connection->csums_tfm);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001196 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001197 digest = kmalloc(digest_size, GFP_NOIO);
1198 }
1199 if (digest) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001200 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001201 eq = !memcmp(digest, di->digest, digest_size);
1202 kfree(digest);
1203 }
1204
1205 if (eq) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001206 drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
Lars Ellenberg676396d2010-03-03 02:08:22 +01001207 /* rs_same_csums unit is BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001208 device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001209 err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001210 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001211 inc_rs_pending(device);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001212 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1213 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
Philipp Reisner204bba92010-08-23 16:17:13 +02001214 kfree(di);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001215 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001216 }
1217 } else {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001218 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001219 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001220 drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001221 }
1222
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001223 dec_unacked(device);
1224 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001225
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001226 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001227 drbd_err(device, "drbd_send_block/ack() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001228 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001229}
1230
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001231int w_e_end_ov_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001232{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001233 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001234 struct drbd_peer_device *peer_device = peer_req->peer_device;
1235 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001236 sector_t sector = peer_req->i.sector;
1237 unsigned int size = peer_req->i.size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001238 int digest_size;
1239 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001240 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001241
1242 if (unlikely(cancel))
1243 goto out;
1244
Herbert Xu9534d672016-01-24 21:19:21 +08001245 digest_size = crypto_ahash_digestsize(peer_device->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001246 digest = kmalloc(digest_size, GFP_NOIO);
Philipp Reisner8f214202011-03-01 15:52:35 +01001247 if (!digest) {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001248 err = 1; /* terminate the connection in case the allocation failed */
Philipp Reisner8f214202011-03-01 15:52:35 +01001249 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001250 }
1251
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001252 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001253 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
Philipp Reisner8f214202011-03-01 15:52:35 +01001254 else
1255 memset(digest, 0, digest_size);
1256
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001257 /* Free e and pages before send.
1258 * In case we block on congestion, we could otherwise run into
1259 * some distributed deadlock, if the other side blocks on
1260 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001261 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001262 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001263 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001264 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001265 err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001266 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001267 dec_rs_pending(device);
Philipp Reisner8f214202011-03-01 15:52:35 +01001268 kfree(digest);
1269
Philipp Reisnerb411b362009-09-25 16:07:19 -07001270out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001271 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001272 drbd_free_peer_req(device, peer_req);
1273 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001274 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001275}
1276
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001277void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001278{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001279 if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1280 device->ov_last_oos_size += size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001281 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001282 device->ov_last_oos_start = sector;
1283 device->ov_last_oos_size = size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001284 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001285 drbd_set_out_of_sync(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001286}
1287
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001288int w_e_end_ov_reply(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001289{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001290 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001291 struct drbd_peer_device *peer_device = peer_req->peer_device;
1292 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001293 struct digest_info *di;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001294 void *digest;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001295 sector_t sector = peer_req->i.sector;
1296 unsigned int size = peer_req->i.size;
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001297 int digest_size;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001298 int err, eq = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001299 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001300
1301 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001302 drbd_free_peer_req(device, peer_req);
1303 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001304 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001305 }
1306
1307 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1308 * the resync lru has been cleaned up already */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001309 if (get_ldev(device)) {
1310 drbd_rs_complete_io(device, peer_req->i.sector);
1311 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001312 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001313
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001314 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001315
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001316 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Herbert Xu9534d672016-01-24 21:19:21 +08001317 digest_size = crypto_ahash_digestsize(peer_device->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001318 digest = kmalloc(digest_size, GFP_NOIO);
1319 if (digest) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001320 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001321
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001322 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001323 eq = !memcmp(digest, di->digest, digest_size);
1324 kfree(digest);
1325 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001326 }
1327
Lars Ellenberg9676c762011-02-22 14:02:31 +01001328 /* Free peer_req and pages before send.
1329 * In case we block on congestion, we could otherwise run into
1330 * some distributed deadlock, if the other side blocks on
1331 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001332 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001333 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001334 if (!eq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001335 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001336 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001337 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001338
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001339 err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size,
Andreas Gruenbacherfa79abd2011-03-16 01:31:39 +01001340 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001341
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001342 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001343
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001344 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001345
1346 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001347 if ((device->ov_left & 0x200) == 0x200)
1348 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001349
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001350 stop_sector_reached = verify_can_do_stop_sector(device) &&
1351 (sector + (size>>9)) >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001352
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001353 if (device->ov_left == 0 || stop_sector_reached) {
1354 ov_out_of_sync_print(device);
1355 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001356 }
1357
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001358 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001359}
1360
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001361/* FIXME
1362 * We need to track the number of pending barrier acks,
1363 * and to be able to wait for them.
1364 * See also comment in drbd_adm_attach before drbd_suspend_io.
1365 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001366static int drbd_send_barrier(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001367{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001368 struct p_barrier *p;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001369 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001370
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001371 sock = &connection->data;
1372 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001373 if (!p)
1374 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001375 p->barrier = connection->send.current_epoch_nr;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001376 p->pad = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001377 connection->send.current_epoch_writes = 0;
Lars Ellenberg84d34f22015-02-19 13:54:11 +01001378 connection->send.last_sent_barrier_jif = jiffies;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001379
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001380 return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001381}
1382
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001383int w_send_write_hint(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001384{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001385 struct drbd_device *device =
1386 container_of(w, struct drbd_device, unplug_work);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001387 struct drbd_socket *sock;
1388
Philipp Reisnerb411b362009-09-25 16:07:19 -07001389 if (cancel)
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001390 return 0;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001391 sock = &first_peer_device(device)->connection->data;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001392 if (!drbd_prepare_command(first_peer_device(device), sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001393 return -EIO;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001394 return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001395}
1396
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001397static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001398{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001399 if (!connection->send.seen_any_write_yet) {
1400 connection->send.seen_any_write_yet = true;
1401 connection->send.current_epoch_nr = epoch;
1402 connection->send.current_epoch_writes = 0;
Lars Ellenberg84d34f22015-02-19 13:54:11 +01001403 connection->send.last_sent_barrier_jif = jiffies;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001404 }
1405}
1406
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001407static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001408{
1409 /* re-init if first write on this connection */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001410 if (!connection->send.seen_any_write_yet)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001411 return;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001412 if (connection->send.current_epoch_nr != epoch) {
1413 if (connection->send.current_epoch_writes)
1414 drbd_send_barrier(connection);
1415 connection->send.current_epoch_nr = epoch;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001416 }
1417}
1418
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01001419int w_send_out_of_sync(struct drbd_work *w, int cancel)
Philipp Reisner73a01a12010-10-27 14:33:00 +02001420{
1421 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001422 struct drbd_device *device = req->device;
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001423 struct drbd_peer_device *const peer_device = first_peer_device(device);
1424 struct drbd_connection *const connection = peer_device->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001425 int err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001426
1427 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001428 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001429 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001430 }
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001431 req->pre_send_jif = jiffies;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001432
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001433 /* this time, no connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001434 * If it was sent, it was the closing barrier for the last
1435 * replicated epoch, before we went into AHEAD mode.
1436 * No more barriers will be sent, until we leave AHEAD mode again. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001437 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001438
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001439 err = drbd_send_out_of_sync(peer_device, req);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001440 req_mod(req, OOS_HANDED_TO_NETWORK);
Philipp Reisner73a01a12010-10-27 14:33:00 +02001441
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001442 return err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001443}
1444
Philipp Reisnerb411b362009-09-25 16:07:19 -07001445/**
1446 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
Philipp Reisnerb411b362009-09-25 16:07:19 -07001447 * @w: work object.
1448 * @cancel: The connection will be closed anyways
1449 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001450int w_send_dblock(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001451{
1452 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001453 struct drbd_device *device = req->device;
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001454 struct drbd_peer_device *const peer_device = first_peer_device(device);
1455 struct drbd_connection *connection = peer_device->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001456 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001457
1458 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001459 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001460 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001461 }
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001462 req->pre_send_jif = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001463
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001464 re_init_if_first_write(connection, req->epoch);
1465 maybe_send_barrier(connection, req->epoch);
1466 connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001467
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001468 err = drbd_send_dblock(peer_device, req);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001469 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001470
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001471 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001472}
1473
1474/**
1475 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
Philipp Reisnerb411b362009-09-25 16:07:19 -07001476 * @w: work object.
1477 * @cancel: The connection will be closed anyways
1478 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001479int w_send_read_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001480{
1481 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001482 struct drbd_device *device = req->device;
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001483 struct drbd_peer_device *const peer_device = first_peer_device(device);
1484 struct drbd_connection *connection = peer_device->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001485 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001486
1487 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001488 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001489 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001490 }
Lars Ellenberge5f891b2013-11-22 12:32:01 +01001491 req->pre_send_jif = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001492
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001493 /* Even read requests may close a write epoch,
1494 * if there was any yet. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001495 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001496
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001497 err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size,
Andreas Gruenbacher6c1005e2011-03-16 01:34:24 +01001498 (unsigned long)req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001499
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001500 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001501
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001502 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001503}
1504
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001505int w_restart_disk_io(struct drbd_work *w, int cancel)
Philipp Reisner265be2d2010-05-31 10:14:17 +02001506{
1507 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001508 struct drbd_device *device = req->device;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001509
Philipp Reisner07782862010-08-31 12:00:50 +02001510 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
Lars Ellenberg4dd726f2014-02-11 11:15:36 +01001511 drbd_al_begin_io(device, &req->i);
Philipp Reisner265be2d2010-05-31 10:14:17 +02001512
1513 drbd_req_make_private_bio(req, req->master_bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001514 req->private_bio->bi_bdev = device->ldev->backing_bdev;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001515 generic_make_request(req->private_bio);
1516
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001517 return 0;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001518}
1519
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001520static int _drbd_may_sync_now(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001521{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001522 struct drbd_device *odev = device;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001523 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001524
1525 while (1) {
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001526 if (!odev->ldev || odev->state.disk == D_DISKLESS)
Philipp Reisner438c8372011-03-28 14:48:01 +02001527 return 1;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001528 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001529 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001530 rcu_read_unlock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001531 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001532 return 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001533 odev = minor_to_device(resync_after);
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001534 if (!odev)
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001535 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001536 if ((odev->state.conn >= C_SYNC_SOURCE &&
1537 odev->state.conn <= C_PAUSED_SYNC_T) ||
1538 odev->state.aftr_isp || odev->state.peer_isp ||
1539 odev->state.user_isp)
1540 return 0;
1541 }
1542}
1543
1544/**
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001545 * drbd_pause_after() - Pause resync on all devices that may not resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001546 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001547 *
1548 * Called from process context only (admin command and after_state_ch).
1549 */
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001550static bool drbd_pause_after(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001551{
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001552 bool changed = false;
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001553 struct drbd_device *odev;
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001554 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001555
Philipp Reisner695d08f2011-04-11 22:53:32 -07001556 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001557 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001558 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1559 continue;
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001560 if (!_drbd_may_sync_now(odev) &&
1561 _drbd_set_state(_NS(odev, aftr_isp, 1),
1562 CS_HARD, NULL) != SS_NOTHING_TO_DO)
1563 changed = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001564 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001565 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001566
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001567 return changed;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001568}
1569
1570/**
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001571 * drbd_resume_next() - Resume resync on all devices that may resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001572 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001573 *
1574 * Called from process context only (admin command and worker).
1575 */
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001576static bool drbd_resume_next(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001577{
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001578 bool changed = false;
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001579 struct drbd_device *odev;
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001580 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001581
Philipp Reisner695d08f2011-04-11 22:53:32 -07001582 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001583 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001584 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1585 continue;
1586 if (odev->state.aftr_isp) {
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001587 if (_drbd_may_sync_now(odev) &&
1588 _drbd_set_state(_NS(odev, aftr_isp, 0),
1589 CS_HARD, NULL) != SS_NOTHING_TO_DO)
1590 changed = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001591 }
1592 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001593 rcu_read_unlock();
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001594 return changed;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001595}
1596
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001597void resume_next_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001598{
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001599 lock_all_resources();
1600 drbd_resume_next(device);
1601 unlock_all_resources();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001602}
1603
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001604void suspend_other_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001605{
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001606 lock_all_resources();
1607 drbd_pause_after(device);
1608 unlock_all_resources();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001609}
1610
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001611/* caller must lock_all_resources() */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001612enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001613{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001614 struct drbd_device *odev;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001615 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001616
1617 if (o_minor == -1)
1618 return NO_ERROR;
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001619 if (o_minor < -1 || o_minor > MINORMASK)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001620 return ERR_RESYNC_AFTER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001621
1622 /* check for loops */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001623 odev = minor_to_device(o_minor);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001624 while (1) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001625 if (odev == device)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001626 return ERR_RESYNC_AFTER_CYCLE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001627
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001628 /* You are free to depend on diskless, non-existing,
1629 * or not yet/no longer existing minors.
1630 * We only reject dependency loops.
1631 * We cannot follow the dependency chain beyond a detached or
1632 * missing minor.
1633 */
1634 if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1635 return NO_ERROR;
1636
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001637 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001638 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001639 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001640 /* dependency chain ends here, no cycles. */
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001641 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001642 return NO_ERROR;
1643
1644 /* follow the dependency chain */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001645 odev = minor_to_device(resync_after);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001646 }
1647}
1648
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001649/* caller must lock_all_resources() */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001650void drbd_resync_after_changed(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001651{
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001652 int changed;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001653
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001654 do {
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001655 changed = drbd_pause_after(device);
1656 changed |= drbd_resume_next(device);
1657 } while (changed);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001658}
1659
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001660void drbd_rs_controller_reset(struct drbd_device *device)
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001661{
Lars Ellenbergff8bd882014-11-10 17:21:12 +01001662 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Philipp Reisner813472c2011-05-03 16:47:02 +02001663 struct fifo_buffer *plan;
1664
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001665 atomic_set(&device->rs_sect_in, 0);
1666 atomic_set(&device->rs_sect_ev, 0);
1667 device->rs_in_flight = 0;
Lars Ellenbergff8bd882014-11-10 17:21:12 +01001668 device->rs_last_events =
1669 (int)part_stat_read(&disk->part0, sectors[0]) +
1670 (int)part_stat_read(&disk->part0, sectors[1]);
Philipp Reisner813472c2011-05-03 16:47:02 +02001671
1672 /* Updating the RCU protected object in place is necessary since
1673 this function gets called from atomic context.
1674 It is valid since all other updates also lead to an completely
1675 empty fifo */
1676 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001677 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner813472c2011-05-03 16:47:02 +02001678 plan->total = 0;
1679 fifo_set(plan, 0);
1680 rcu_read_unlock();
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001681}
1682
Philipp Reisner1f04af32011-02-07 11:33:59 +01001683void start_resync_timer_fn(unsigned long data)
1684{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001685 struct drbd_device *device = (struct drbd_device *) data;
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001686 drbd_device_post_work(device, RS_START);
Philipp Reisner1f04af32011-02-07 11:33:59 +01001687}
1688
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001689static void do_start_resync(struct drbd_device *device)
Philipp Reisner1f04af32011-02-07 11:33:59 +01001690{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001691 if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001692 drbd_warn(device, "postponing start_resync ...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001693 device->start_resync_timer.expires = jiffies + HZ/10;
1694 add_timer(&device->start_resync_timer);
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001695 return;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001696 }
1697
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001698 drbd_start_resync(device, C_SYNC_SOURCE);
1699 clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
Philipp Reisner1f04af32011-02-07 11:33:59 +01001700}
1701
Lars Ellenbergaaaba342014-03-18 12:30:09 +01001702static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
1703{
1704 bool csums_after_crash_only;
1705 rcu_read_lock();
1706 csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
1707 rcu_read_unlock();
1708 return connection->agreed_pro_version >= 89 && /* supported? */
1709 connection->csums_tfm && /* configured? */
Fabian Frederick7e5fec32016-06-14 00:26:35 +02001710 (csums_after_crash_only == false /* use for each resync? */
Lars Ellenbergaaaba342014-03-18 12:30:09 +01001711 || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */
1712}
1713
Philipp Reisnerb411b362009-09-25 16:07:19 -07001714/**
1715 * drbd_start_resync() - Start the resync process
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001716 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001717 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1718 *
1719 * This function might bring you directly into one of the
1720 * C_PAUSED_SYNC_* states.
1721 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001722void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001723{
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001724 struct drbd_peer_device *peer_device = first_peer_device(device);
1725 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001726 union drbd_state ns;
1727 int r;
1728
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001729 if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001730 drbd_err(device, "Resync already running!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001731 return;
1732 }
1733
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001734 if (!test_bit(B_RS_H_DONE, &device->flags)) {
Philipp Reisnere64a3292011-02-05 17:34:11 +01001735 if (side == C_SYNC_TARGET) {
1736 /* Since application IO was locked out during C_WF_BITMAP_T and
1737 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1738 we check that we might make the data inconsistent. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001739 r = drbd_khelper(device, "before-resync-target");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001740 r = (r >> 8) & 0xff;
1741 if (r > 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001742 drbd_info(device, "before-resync-target handler returned %d, "
Philipp Reisner09b9e792010-12-03 16:04:24 +01001743 "dropping connection.\n", r);
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001744 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisner09b9e792010-12-03 16:04:24 +01001745 return;
1746 }
Philipp Reisnere64a3292011-02-05 17:34:11 +01001747 } else /* C_SYNC_SOURCE */ {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001748 r = drbd_khelper(device, "before-resync-source");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001749 r = (r >> 8) & 0xff;
1750 if (r > 0) {
1751 if (r == 3) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001752 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001753 "ignoring. Old userland tools?", r);
1754 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001755 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001756 "dropping connection.\n", r);
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001757 conn_request_state(connection,
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001758 NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001759 return;
1760 }
1761 }
Philipp Reisner09b9e792010-12-03 16:04:24 +01001762 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001763 }
1764
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001765 if (current == connection->worker.task) {
Philipp Reisnerdad20552011-02-11 19:43:55 +01001766 /* The worker should not sleep waiting for state_mutex,
Philipp Reisnere64a3292011-02-05 17:34:11 +01001767 that can take long */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001768 if (!mutex_trylock(device->state_mutex)) {
1769 set_bit(B_RS_H_DONE, &device->flags);
1770 device->start_resync_timer.expires = jiffies + HZ/5;
1771 add_timer(&device->start_resync_timer);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001772 return;
1773 }
1774 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001775 mutex_lock(device->state_mutex);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001776 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001777
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001778 lock_all_resources();
1779 clear_bit(B_RS_H_DONE, &device->flags);
Philipp Reisnera7004712013-03-27 14:08:35 +01001780 /* Did some connection breakage or IO error race with us? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001781 if (device->state.conn < C_CONNECTED
1782 || !get_ldev_if_state(device, D_NEGOTIATING)) {
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001783 unlock_all_resources();
1784 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001785 }
1786
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001787 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001788
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001789 ns.aftr_isp = !_drbd_may_sync_now(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001790
1791 ns.conn = side;
1792
1793 if (side == C_SYNC_TARGET)
1794 ns.disk = D_INCONSISTENT;
1795 else /* side == C_SYNC_SOURCE */
1796 ns.pdsk = D_INCONSISTENT;
1797
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001798 r = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001799 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001800
1801 if (ns.conn < C_CONNECTED)
1802 r = SS_UNKNOWN_ERROR;
1803
1804 if (r == SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001805 unsigned long tw = drbd_bm_total_weight(device);
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001806 unsigned long now = jiffies;
1807 int i;
1808
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001809 device->rs_failed = 0;
1810 device->rs_paused = 0;
1811 device->rs_same_csum = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001812 device->rs_last_sect_ev = 0;
1813 device->rs_total = tw;
1814 device->rs_start = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001815 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001816 device->rs_mark_left[i] = tw;
1817 device->rs_mark_time[i] = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001818 }
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001819 drbd_pause_after(device);
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001820 /* Forget potentially stale cached per resync extent bit-counts.
1821 * Open coded drbd_rs_cancel_all(device), we already have IRQs
1822 * disabled, and know the disk state is ok. */
1823 spin_lock(&device->al_lock);
1824 lc_reset(device->resync);
1825 device->resync_locked = 0;
1826 device->resync_wenr = LC_FREE;
1827 spin_unlock(&device->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001828 }
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001829 unlock_all_resources();
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001830
Philipp Reisnerb411b362009-09-25 16:07:19 -07001831 if (r == SS_SUCCESS) {
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001832 wake_up(&device->al_wait); /* for lc_reset() above */
Philipp Reisner328e0f122012-10-19 14:37:47 +02001833 /* reset rs_last_bcast when a resync or verify is started,
1834 * to deal with potential jiffies wrap. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001835 device->rs_last_bcast = jiffies - HZ;
Philipp Reisner328e0f122012-10-19 14:37:47 +02001836
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001837 drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001838 drbd_conn_str(ns.conn),
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001839 (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1840 (unsigned long) device->rs_total);
Lars Ellenbergaaaba342014-03-18 12:30:09 +01001841 if (side == C_SYNC_TARGET) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001842 device->bm_resync_fo = 0;
Lars Ellenbergaaaba342014-03-18 12:30:09 +01001843 device->use_csums = use_checksum_based_resync(connection, device);
1844 } else {
Fabian Frederick7e5fec32016-06-14 00:26:35 +02001845 device->use_csums = false;
Lars Ellenbergaaaba342014-03-18 12:30:09 +01001846 }
Lars Ellenberg6c922ed2011-01-12 11:51:13 +01001847
1848 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1849 * with w_send_oos, or the sync target will get confused as to
1850 * how much bits to resync. We cannot do that always, because for an
1851 * empty resync and protocol < 95, we need to do it here, as we call
1852 * drbd_resync_finished from here in that case.
1853 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1854 * and from after_state_ch otherwise. */
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001855 if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96)
1856 drbd_gen_and_send_sync_uuid(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001857
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001858 if (connection->agreed_pro_version < 95 && device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +02001859 /* This still has a race (about when exactly the peers
1860 * detect connection loss) that can lead to a full sync
1861 * on next handshake. In 8.3.9 we fixed this with explicit
1862 * resync-finished notifications, but the fix
1863 * introduces a protocol change. Sleeping for some
1864 * time longer than the ping interval + timeout on the
1865 * SyncSource, to give the SyncTarget the chance to
1866 * detect connection loss, then waiting for a ping
1867 * response (implicit in drbd_resync_finished) reduces
1868 * the race considerably, but does not solve it. */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001869 if (side == C_SYNC_SOURCE) {
1870 struct net_conf *nc;
1871 int timeo;
1872
1873 rcu_read_lock();
Lars Ellenberg44a4d552013-11-22 12:40:58 +01001874 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02001875 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1876 rcu_read_unlock();
1877 schedule_timeout_interruptible(timeo);
1878 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001879 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001880 }
1881
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001882 drbd_rs_controller_reset(device);
1883 /* ns.conn may already be != device->state.conn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001884 * we may have been paused in between, or become paused until
1885 * the timer triggers.
1886 * No matter, that is handled in resync_timer_fn() */
1887 if (ns.conn == C_SYNC_TARGET)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001888 mod_timer(&device->resync_timer, jiffies);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001889
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001890 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001891 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001892 put_ldev(device);
Andreas Gruenbacher28bc3b82014-08-14 18:33:30 +02001893out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001894 mutex_unlock(device->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001895}
1896
Lars Ellenberge334f552014-02-11 09:30:49 +01001897static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001898{
1899 struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
1900 device->rs_last_bcast = jiffies;
1901
1902 if (!get_ldev(device))
1903 return;
1904
1905 drbd_bm_write_lazy(device, 0);
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001906 if (resync_done && is_sync_state(device->state.conn))
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001907 drbd_resync_finished(device);
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01001908
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01001909 drbd_bcast_event(device, &sib);
1910 /* update timestamp, in case it took a while to write out stuff */
1911 device->rs_last_bcast = jiffies;
1912 put_ldev(device);
1913}
1914
Lars Ellenberge334f552014-02-11 09:30:49 +01001915static void drbd_ldev_destroy(struct drbd_device *device)
1916{
1917 lc_destroy(device->resync);
1918 device->resync = NULL;
1919 lc_destroy(device->act_log);
1920 device->act_log = NULL;
Andreas Gruenbacherd1b80852014-09-11 14:29:09 +02001921
1922 __acquire(local);
Lars Ellenberg63a7c8a2015-03-26 20:53:55 +01001923 drbd_backing_dev_free(device, device->ldev);
Andreas Gruenbacherd1b80852014-09-11 14:29:09 +02001924 device->ldev = NULL;
1925 __release(local);
1926
Lars Ellenberge334f552014-02-11 09:30:49 +01001927 clear_bit(GOING_DISKLESS, &device->flags);
1928 wake_up(&device->misc_wait);
1929}
1930
1931static void go_diskless(struct drbd_device *device)
1932{
1933 D_ASSERT(device, device->state.disk == D_FAILED);
1934 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
1935 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
1936 * the protected members anymore, though, so once put_ldev reaches zero
1937 * again, it will be safe to free them. */
1938
1939 /* Try to write changed bitmap pages, read errors may have just
1940 * set some bits outside the area covered by the activity log.
1941 *
1942 * If we have an IO error during the bitmap writeout,
1943 * we will want a full sync next time, just in case.
1944 * (Do we want a specific meta data flag for this?)
1945 *
1946 * If that does not make it to stable storage either,
1947 * we cannot do anything about that anymore.
1948 *
1949 * We still need to check if both bitmap and ldev are present, we may
1950 * end up here after a failed attach, before ldev was even assigned.
1951 */
1952 if (device->bitmap && device->ldev) {
1953 /* An interrupted resync or similar is allowed to recounts bits
1954 * while we detach.
1955 * Any modifications would not be expected anymore, though.
1956 */
1957 if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
1958 "detach", BM_LOCKED_TEST_ALLOWED)) {
1959 if (test_bit(WAS_READ_ERROR, &device->flags)) {
1960 drbd_md_set_flag(device, MDF_FULL_SYNC);
1961 drbd_md_sync(device);
1962 }
1963 }
1964 }
1965
1966 drbd_force_state(device, NS(disk, D_DISKLESS));
1967}
1968
Lars Ellenbergac0acb92014-02-11 09:47:58 +01001969static int do_md_sync(struct drbd_device *device)
1970{
1971 drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
1972 drbd_md_sync(device);
1973 return 0;
1974}
1975
Lars Ellenberg944410e2014-05-06 15:02:05 +02001976/* only called from drbd_worker thread, no locking */
1977void __update_timing_details(
1978 struct drbd_thread_timing_details *tdp,
1979 unsigned int *cb_nr,
1980 void *cb,
1981 const char *fn, const unsigned int line)
1982{
1983 unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST;
1984 struct drbd_thread_timing_details *td = tdp + i;
1985
1986 td->start_jif = jiffies;
1987 td->cb_addr = cb;
1988 td->caller_fn = fn;
1989 td->line = line;
1990 td->cb_nr = *cb_nr;
1991
1992 i = (i+1) % DRBD_THREAD_DETAILS_HIST;
1993 td = tdp + i;
1994 memset(td, 0, sizeof(*td));
1995
1996 ++(*cb_nr);
1997}
1998
Lars Ellenberge334f552014-02-11 09:30:49 +01001999static void do_device_work(struct drbd_device *device, const unsigned long todo)
2000{
Andreas Gruenbacherb47a06d2014-09-11 14:29:10 +02002001 if (test_bit(MD_SYNC, &todo))
Lars Ellenbergac0acb92014-02-11 09:47:58 +01002002 do_md_sync(device);
Andreas Gruenbacherb47a06d2014-09-11 14:29:10 +02002003 if (test_bit(RS_DONE, &todo) ||
2004 test_bit(RS_PROGRESS, &todo))
2005 update_on_disk_bitmap(device, test_bit(RS_DONE, &todo));
2006 if (test_bit(GO_DISKLESS, &todo))
Lars Ellenberge334f552014-02-11 09:30:49 +01002007 go_diskless(device);
Andreas Gruenbacherb47a06d2014-09-11 14:29:10 +02002008 if (test_bit(DESTROY_DISK, &todo))
Lars Ellenberge334f552014-02-11 09:30:49 +01002009 drbd_ldev_destroy(device);
Andreas Gruenbacherb47a06d2014-09-11 14:29:10 +02002010 if (test_bit(RS_START, &todo))
Lars Ellenbergac0acb92014-02-11 09:47:58 +01002011 do_start_resync(device);
Lars Ellenberge334f552014-02-11 09:30:49 +01002012}
2013
2014#define DRBD_DEVICE_WORK_MASK \
2015 ((1UL << GO_DISKLESS) \
2016 |(1UL << DESTROY_DISK) \
Lars Ellenbergac0acb92014-02-11 09:47:58 +01002017 |(1UL << MD_SYNC) \
2018 |(1UL << RS_START) \
Lars Ellenberge334f552014-02-11 09:30:49 +01002019 |(1UL << RS_PROGRESS) \
2020 |(1UL << RS_DONE) \
2021 )
2022
2023static unsigned long get_work_bits(unsigned long *flags)
2024{
2025 unsigned long old, new;
2026 do {
2027 old = *flags;
2028 new = old & ~DRBD_DEVICE_WORK_MASK;
2029 } while (cmpxchg(flags, old, new) != old);
2030 return old & DRBD_DEVICE_WORK_MASK;
2031}
2032
2033static void do_unqueued_work(struct drbd_connection *connection)
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01002034{
2035 struct drbd_peer_device *peer_device;
2036 int vnr;
2037
2038 rcu_read_lock();
2039 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2040 struct drbd_device *device = peer_device->device;
Lars Ellenberge334f552014-02-11 09:30:49 +01002041 unsigned long todo = get_work_bits(&device->flags);
2042 if (!todo)
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01002043 continue;
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01002044
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01002045 kref_get(&device->kref);
2046 rcu_read_unlock();
Lars Ellenberge334f552014-02-11 09:30:49 +01002047 do_device_work(device, todo);
Lars Ellenbergc7a58db2013-12-20 11:39:48 +01002048 kref_put(&device->kref, drbd_destroy_device);
2049 rcu_read_lock();
2050 }
2051 rcu_read_unlock();
2052}
2053
Rashika Kheriaa186e472013-12-19 15:06:10 +05302054static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002055{
2056 spin_lock_irq(&queue->q_lock);
Lars Ellenberg15e26f62014-04-28 11:43:21 +02002057 list_splice_tail_init(&queue->q, work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002058 spin_unlock_irq(&queue->q_lock);
2059 return !list_empty(work_list);
2060}
2061
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002062static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002063{
2064 DEFINE_WAIT(wait);
2065 struct net_conf *nc;
2066 int uncork, cork;
2067
Lars Ellenbergabde9cc2014-09-11 14:29:11 +02002068 dequeue_work_batch(&connection->sender_work, work_list);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002069 if (!list_empty(work_list))
2070 return;
2071
2072 /* Still nothing to do?
2073 * Maybe we still need to close the current epoch,
2074 * even if no new requests are queued yet.
2075 *
2076 * Also, poke TCP, just in case.
2077 * Then wait for new work (or signal). */
2078 rcu_read_lock();
2079 nc = rcu_dereference(connection->net_conf);
2080 uncork = nc ? nc->tcp_cork : 0;
2081 rcu_read_unlock();
2082 if (uncork) {
2083 mutex_lock(&connection->data.mutex);
2084 if (connection->data.socket)
2085 drbd_tcp_uncork(connection->data.socket);
2086 mutex_unlock(&connection->data.mutex);
2087 }
2088
2089 for (;;) {
2090 int send_barrier;
2091 prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002092 spin_lock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002093 spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
Lars Ellenbergbc317a92012-08-22 11:47:14 +02002094 if (!list_empty(&connection->sender_work.q))
Lars Ellenberg4dd726f2014-02-11 11:15:36 +01002095 list_splice_tail_init(&connection->sender_work.q, work_list);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002096 spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
2097 if (!list_empty(work_list) || signal_pending(current)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002098 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002099 break;
2100 }
Lars Ellenbergf9c78122014-04-28 18:43:29 +02002101
2102 /* We found nothing new to do, no to-be-communicated request,
2103 * no other work item. We may still need to close the last
2104 * epoch. Next incoming request epoch will be connection ->
2105 * current transfer log epoch number. If that is different
2106 * from the epoch of the last request we communicated, it is
2107 * safe to send the epoch separating barrier now.
2108 */
2109 send_barrier =
2110 atomic_read(&connection->current_tle_nr) !=
2111 connection->send.current_epoch_nr;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002112 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergf9c78122014-04-28 18:43:29 +02002113
2114 if (send_barrier)
2115 maybe_send_barrier(connection,
2116 connection->send.current_epoch_nr + 1);
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01002117
Lars Ellenberge334f552014-02-11 09:30:49 +01002118 if (test_bit(DEVICE_WORK_PENDING, &connection->flags))
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01002119 break;
2120
Lars Ellenberga80ca1a2013-12-27 17:17:25 +01002121 /* drbd_send() may have called flush_signals() */
2122 if (get_t_state(&connection->worker) != RUNNING)
2123 break;
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01002124
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01002125 schedule();
2126 /* may be woken up for other things but new work, too,
2127 * e.g. if the current epoch got closed.
2128 * In which case we send the barrier above. */
2129 }
2130 finish_wait(&connection->sender_work.q_wait, &wait);
2131
2132 /* someone may have changed the config while we have been waiting above. */
2133 rcu_read_lock();
2134 nc = rcu_dereference(connection->net_conf);
2135 cork = nc ? nc->tcp_cork : 0;
2136 rcu_read_unlock();
2137 mutex_lock(&connection->data.mutex);
2138 if (connection->data.socket) {
2139 if (cork)
2140 drbd_tcp_cork(connection->data.socket);
2141 else if (!uncork)
2142 drbd_tcp_uncork(connection->data.socket);
2143 }
2144 mutex_unlock(&connection->data.mutex);
2145}
2146
Philipp Reisnerb411b362009-09-25 16:07:19 -07002147int drbd_worker(struct drbd_thread *thi)
2148{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002149 struct drbd_connection *connection = thi->connection;
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002150 struct drbd_work *w = NULL;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02002151 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002152 LIST_HEAD(work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002153 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002154
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01002155 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01002156 drbd_thread_current_set_cpu(thi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002157
Lars Ellenberg944410e2014-05-06 15:02:05 +02002158 if (list_empty(&work_list)) {
2159 update_worker_timing_details(connection, wait_for_work);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002160 wait_for_work(connection, &work_list);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002161 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002162
Lars Ellenberg944410e2014-05-06 15:02:05 +02002163 if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2164 update_worker_timing_details(connection, do_unqueued_work);
Lars Ellenberge334f552014-02-11 09:30:49 +01002165 do_unqueued_work(connection);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002166 }
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +01002167
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002168 if (signal_pending(current)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002169 flush_signals(current);
Philipp Reisner19393e12011-02-09 10:09:07 +01002170 if (get_t_state(thi) == RUNNING) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02002171 drbd_warn(connection, "Worker got an unexpected signal\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002172 continue;
Philipp Reisner19393e12011-02-09 10:09:07 +01002173 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002174 break;
2175 }
2176
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01002177 if (get_t_state(thi) != RUNNING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002178 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002179
Lars Ellenberg729e8b82014-09-11 14:29:12 +02002180 if (!list_empty(&work_list)) {
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002181 w = list_first_entry(&work_list, struct drbd_work, list);
2182 list_del_init(&w->list);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002183 update_worker_timing_details(connection, w->cb);
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002184 if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002185 continue;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002186 if (connection->cstate >= C_WF_REPORT_PARAMS)
2187 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002188 }
2189 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002190
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02002191 do {
Lars Ellenberg944410e2014-05-06 15:02:05 +02002192 if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2193 update_worker_timing_details(connection, do_unqueued_work);
Lars Ellenberge334f552014-02-11 09:30:49 +01002194 do_unqueued_work(connection);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002195 }
Lars Ellenberg729e8b82014-09-11 14:29:12 +02002196 if (!list_empty(&work_list)) {
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002197 w = list_first_entry(&work_list, struct drbd_work, list);
2198 list_del_init(&w->list);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002199 update_worker_timing_details(connection, w->cb);
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02002200 w->cb(w, 1);
Lars Ellenberg729e8b82014-09-11 14:29:12 +02002201 } else
2202 dequeue_work_batch(&connection->sender_work, &work_list);
Lars Ellenberge334f552014-02-11 09:30:49 +01002203 } while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002204
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002205 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02002206 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2207 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002208 D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002209 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002210 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002211 drbd_device_cleanup(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02002212 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002213 rcu_read_lock();
Philipp Reisner0e29d162011-02-18 14:23:11 +01002214 }
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02002215 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002216
2217 return 0;
2218}