blob: 7e633cad745a9b55cac52d8336e4d035579e6be7 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +020024*/
Philipp Reisnerb411b362009-09-25 16:07:19 -070025
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070027#include <linux/drbd.h>
28#include <linux/sched.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070035#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020039#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070040#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070041
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +020042static int make_ov_request(struct drbd_device *, int);
43static int make_resync_request(struct drbd_device *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070044
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010045/* endio handlers:
46 * drbd_md_io_complete (defined here)
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +010047 * drbd_request_endio (defined here)
48 * drbd_peer_request_endio (defined here)
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010049 * bm_async_io_complete (defined in drbd_bitmap.c)
50 *
Philipp Reisnerb411b362009-09-25 16:07:19 -070051 * For all these callbacks, note the following:
52 * The callbacks will be called in irq context by the IDE drivers,
53 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
54 * Try to get the locking right :)
55 *
56 */
57
58
59/* About the global_state_lock
60 Each state transition on an device holds a read lock. In case we have
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +020061 to evaluate the resync after dependencies, we grab a write lock, because
Philipp Reisnerb411b362009-09-25 16:07:19 -070062 we need stable states on all devices for that. */
63rwlock_t global_state_lock;
64
65/* used for synchronous meta data and bitmap IO
66 * submitted by drbd_md_sync_page_io()
67 */
68void drbd_md_io_complete(struct bio *bio, int error)
69{
70 struct drbd_md_io *md_io;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020071 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
73 md_io = (struct drbd_md_io *)bio->bi_private;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020074 device = container_of(md_io, struct drbd_device, md_io);
Philipp Reisnercdfda632011-07-05 15:38:59 +020075
Philipp Reisnerb411b362009-09-25 16:07:19 -070076 md_io->error = error;
77
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010078 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
79 * to timeout on the lower level device, and eventually detach from it.
80 * If this io completion runs after that timeout expired, this
81 * drbd_md_put_buffer() may allow us to finally try and re-attach.
82 * During normal operation, this only puts that extra reference
83 * down to 1 again.
84 * Make sure we first drop the reference, and only then signal
85 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
86 * next drbd_md_sync_page_io(), that we trigger the
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020087 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010088 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020089 drbd_md_put_buffer(device);
Philipp Reisnercdfda632011-07-05 15:38:59 +020090 md_io->done = 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020091 wake_up(&device->misc_wait);
Philipp Reisnercdfda632011-07-05 15:38:59 +020092 bio_put(bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020093 if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
94 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -070095}
96
97/* reads on behalf of the partner,
98 * "submitted" by the receiver
99 */
Rashika Kheriaa186e472013-12-19 15:06:10 +0530100static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700101{
102 unsigned long flags = 0;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200103 struct drbd_peer_device *peer_device = peer_req->peer_device;
104 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700105
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200106 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200107 device->read_cnt += peer_req->i.size >> 9;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200108 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200109 if (list_empty(&device->read_ee))
110 wake_up(&device->ee_wait);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100111 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200112 __drbd_chk_io_error(device, DRBD_READ_ERROR);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200113 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700114
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200115 drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200116 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700117}
118
119/* writes on behalf of the partner, or resync writes,
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200120 * "submitted" by the receiver, final stage. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100121static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700122{
123 unsigned long flags = 0;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200124 struct drbd_peer_device *peer_device = peer_req->peer_device;
125 struct drbd_device *device = peer_device->device;
Lars Ellenberg181286a2011-03-31 15:18:56 +0200126 struct drbd_interval i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700127 int do_wake;
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100128 u64 block_id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700129 int do_al_complete_io;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700130
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100131 /* after we moved peer_req to done_ee,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700132 * we may no longer access it,
133 * it may be freed/reused already!
134 * (as soon as we release the req_lock) */
Lars Ellenberg181286a2011-03-31 15:18:56 +0200135 i = peer_req->i;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100136 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
137 block_id = peer_req->block_id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700138
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200139 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200140 device->writ_cnt += peer_req->i.size >> 9;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200141 list_move_tail(&peer_req->w.list, &device->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700142
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100143 /*
Andreas Gruenbacher5e472262011-01-27 14:42:51 +0100144 * Do not remove from the write_requests tree here: we did not send the
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100145 * Ack yet and did not wake possibly waiting conflicting requests.
146 * Removed from the tree from "drbd_process_done_ee" within the
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200147 * appropriate dw.cb (e_end_block/e_end_resync_block) or from
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100148 * _drbd_clear_done_ee.
149 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700150
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200151 do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700152
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100153 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200154 __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200155 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700156
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100157 if (block_id == ID_SYNCER)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200158 drbd_rs_complete_io(device, i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700159
160 if (do_wake)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200161 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700162
163 if (do_al_complete_io)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200164 drbd_al_complete_io(device, &i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200166 wake_asender(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200167 put_ldev(device);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200168}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700169
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200170/* writes on behalf of the partner, or resync writes,
171 * "submitted" by the receiver.
172 */
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +0100173void drbd_peer_request_endio(struct bio *bio, int error)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200174{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100175 struct drbd_peer_request *peer_req = bio->bi_private;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200176 struct drbd_device *device = peer_req->peer_device->device;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200177 int uptodate = bio_flagged(bio, BIO_UPTODATE);
178 int is_write = bio_data_dir(bio) == WRITE;
179
Lars Ellenberg07194272010-12-20 15:38:07 +0100180 if (error && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200181 drbd_warn(device, "%s: error=%d s=%llus\n",
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200182 is_write ? "write" : "read", error,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100183 (unsigned long long)peer_req->i.sector);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200184 if (!error && !uptodate) {
Lars Ellenberg07194272010-12-20 15:38:07 +0100185 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200186 drbd_warn(device, "%s: setting error to -EIO s=%llus\n",
Lars Ellenberg07194272010-12-20 15:38:07 +0100187 is_write ? "write" : "read",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100188 (unsigned long long)peer_req->i.sector);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200189 /* strange behavior of some lower level drivers...
190 * fail the request by clearing the uptodate flag,
191 * but do not return any error?! */
192 error = -EIO;
193 }
194
195 if (error)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100196 set_bit(__EE_WAS_ERROR, &peer_req->flags);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200197
198 bio_put(bio); /* no need for the bio anymore */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100199 if (atomic_dec_and_test(&peer_req->pending_bios)) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200200 if (is_write)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100201 drbd_endio_write_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200202 else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100203 drbd_endio_read_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200204 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205}
206
207/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
208 */
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +0100209void drbd_request_endio(struct bio *bio, int error)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700210{
Lars Ellenberga1154132010-11-13 20:42:29 +0100211 unsigned long flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700212 struct drbd_request *req = bio->bi_private;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200213 struct drbd_device *device = req->device;
Lars Ellenberga1154132010-11-13 20:42:29 +0100214 struct bio_and_error m;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215 enum drbd_req_event what;
216 int uptodate = bio_flagged(bio, BIO_UPTODATE);
217
Philipp Reisnerb411b362009-09-25 16:07:19 -0700218 if (!error && !uptodate) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200219 drbd_warn(device, "p %s: setting error to -EIO\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700220 bio_data_dir(bio) == WRITE ? "write" : "read");
221 /* strange behavior of some lower level drivers...
222 * fail the request by clearing the uptodate flag,
223 * but do not return any error?! */
224 error = -EIO;
225 }
226
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200227
228 /* If this request was aborted locally before,
229 * but now was completed "successfully",
230 * chances are that this caused arbitrary data corruption.
231 *
232 * "aborting" requests, or force-detaching the disk, is intended for
233 * completely blocked/hung local backing devices which do no longer
234 * complete requests at all, not even do error completions. In this
235 * situation, usually a hard-reset and failover is the only way out.
236 *
237 * By "aborting", basically faking a local error-completion,
238 * we allow for a more graceful swichover by cleanly migrating services.
239 * Still the affected node has to be rebooted "soon".
240 *
241 * By completing these requests, we allow the upper layers to re-use
242 * the associated data pages.
243 *
244 * If later the local backing device "recovers", and now DMAs some data
245 * from disk into the original request pages, in the best case it will
246 * just put random data into unused pages; but typically it will corrupt
247 * meanwhile completely unrelated data, causing all sorts of damage.
248 *
249 * Which means delayed successful completion,
250 * especially for READ requests,
251 * is a reason to panic().
252 *
253 * We assume that a delayed *error* completion is OK,
254 * though we still will complain noisily about it.
255 */
256 if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
257 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200258 drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200259
260 if (!error)
261 panic("possible random memory corruption caused by delayed completion of aborted local request\n");
262 }
263
Philipp Reisnerb411b362009-09-25 16:07:19 -0700264 /* to avoid recursion in __req_mod */
265 if (unlikely(error)) {
266 what = (bio_data_dir(bio) == WRITE)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100267 ? WRITE_COMPLETED_WITH_ERROR
Lars Ellenberg5c3c7e62010-04-10 02:10:09 +0200268 : (bio_rw(bio) == READ)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100269 ? READ_COMPLETED_WITH_ERROR
270 : READ_AHEAD_COMPLETED_WITH_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700271 } else
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100272 what = COMPLETED_OK;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700273
274 bio_put(req->private_bio);
275 req->private_bio = ERR_PTR(error);
276
Lars Ellenberga1154132010-11-13 20:42:29 +0100277 /* not req_mod(), we need irqsave here! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200278 spin_lock_irqsave(&device->resource->req_lock, flags);
Lars Ellenberga1154132010-11-13 20:42:29 +0100279 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200280 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200281 put_ldev(device);
Lars Ellenberga1154132010-11-13 20:42:29 +0100282
283 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200284 complete_master_bio(device, &m);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700285}
286
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200287void drbd_csum_ee(struct crypto_hash *tfm, struct drbd_peer_request *peer_req, void *digest)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200288{
289 struct hash_desc desc;
290 struct scatterlist sg;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100291 struct page *page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200292 struct page *tmp;
293 unsigned len;
294
295 desc.tfm = tfm;
296 desc.flags = 0;
297
298 sg_init_table(&sg, 1);
299 crypto_hash_init(&desc);
300
301 while ((tmp = page_chain_next(page))) {
302 /* all but the last page will be fully used */
303 sg_set_page(&sg, page, PAGE_SIZE, 0);
304 crypto_hash_update(&desc, &sg, sg.length);
305 page = tmp;
306 }
307 /* and now the last, possibly only partially used page */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100308 len = peer_req->i.size & (PAGE_SIZE - 1);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200309 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
310 crypto_hash_update(&desc, &sg, sg.length);
311 crypto_hash_final(&desc, digest);
312}
313
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200314void drbd_csum_bio(struct crypto_hash *tfm, struct bio *bio, void *digest)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700315{
316 struct hash_desc desc;
317 struct scatterlist sg;
Kent Overstreet79886132013-11-23 17:19:00 -0800318 struct bio_vec bvec;
319 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700320
321 desc.tfm = tfm;
322 desc.flags = 0;
323
324 sg_init_table(&sg, 1);
325 crypto_hash_init(&desc);
326
Kent Overstreet79886132013-11-23 17:19:00 -0800327 bio_for_each_segment(bvec, bio, iter) {
328 sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700329 crypto_hash_update(&desc, &sg, sg.length);
330 }
331 crypto_hash_final(&desc, digest);
332}
333
Lars Ellenberg9676c762011-02-22 14:02:31 +0100334/* MAYBE merge common code with w_e_end_ov_req */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100335static int w_e_send_csum(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700336{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200337 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200338 struct drbd_peer_device *peer_device = peer_req->peer_device;
339 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340 int digest_size;
341 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100342 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700343
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100344 if (unlikely(cancel))
345 goto out;
346
Lars Ellenberg9676c762011-02-22 14:02:31 +0100347 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100348 goto out;
349
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200350 digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100351 digest = kmalloc(digest_size, GFP_NOIO);
352 if (digest) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100353 sector_t sector = peer_req->i.sector;
354 unsigned int size = peer_req->i.size;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200355 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
Lars Ellenberg9676c762011-02-22 14:02:31 +0100356 /* Free peer_req and pages before send.
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100357 * In case we block on congestion, we could otherwise run into
358 * some distributed deadlock, if the other side blocks on
359 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200360 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200361 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100362 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200363 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200364 err = drbd_send_drequest_csum(peer_device, sector, size,
Andreas Gruenbacherdb1b0b72011-03-16 01:37:21 +0100365 digest, digest_size,
366 P_CSUM_RS_REQUEST);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100367 kfree(digest);
368 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200369 drbd_err(device, "kmalloc() of digest failed.\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100370 err = -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700371 }
372
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100373out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100374 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200375 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700376
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100377 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200378 drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100379 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380}
381
382#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
383
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200384static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700385{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200386 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100387 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700388
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200389 if (!get_ldev(device))
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200390 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200392 if (drbd_rs_should_slow_down(device, sector))
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200393 goto defer;
394
Philipp Reisnerb411b362009-09-25 16:07:19 -0700395 /* GFP_TRY, because if there is no memory available right now, this may
396 * be rescheduled for later. It is "only" background resync, after all. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200397 peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200398 size, GFP_TRY);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100399 if (!peer_req)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200400 goto defer;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700401
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200402 peer_req->w.cb = w_e_send_csum;
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200403 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200404 list_add(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200405 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700406
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200407 atomic_add(size >> 9, &device->rs_sect_ev);
408 if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200409 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410
Lars Ellenberg10f6d9922011-01-24 14:47:09 +0100411 /* If it failed because of ENOMEM, retry should help. If it failed
412 * because bio_add_page failed (probably broken lower level driver),
413 * retry may or may not help.
414 * If it does not, you may need to force disconnect. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200415 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200416 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200417 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +0200418
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200419 drbd_free_peer_req(device, peer_req);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200420defer:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200421 put_ldev(device);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200422 return -EAGAIN;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700423}
424
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100425int w_resync_timer(struct drbd_work *w, int cancel)
Philipp Reisner794abb72010-12-27 11:51:23 +0100426{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200427 struct drbd_device *device =
428 container_of(w, struct drbd_device, resync_work);
429
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200430 switch (device->state.conn) {
Philipp Reisner794abb72010-12-27 11:51:23 +0100431 case C_VERIFY_S:
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200432 make_ov_request(device, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100433 break;
434 case C_SYNC_TARGET:
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200435 make_resync_request(device, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100436 break;
437 }
438
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100439 return 0;
Philipp Reisner794abb72010-12-27 11:51:23 +0100440}
441
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442void resync_timer_fn(unsigned long data)
443{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200444 struct drbd_device *device = (struct drbd_device *) data;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700445
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200446 if (list_empty(&device->resync_work.list))
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200447 drbd_queue_work(&first_peer_device(device)->connection->sender_work,
448 &device->resync_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700449}
450
Philipp Reisner778f2712010-07-06 11:14:00 +0200451static void fifo_set(struct fifo_buffer *fb, int value)
452{
453 int i;
454
455 for (i = 0; i < fb->size; i++)
Philipp Reisnerf10f2622010-10-05 16:50:17 +0200456 fb->values[i] = value;
Philipp Reisner778f2712010-07-06 11:14:00 +0200457}
458
459static int fifo_push(struct fifo_buffer *fb, int value)
460{
461 int ov;
462
463 ov = fb->values[fb->head_index];
464 fb->values[fb->head_index++] = value;
465
466 if (fb->head_index >= fb->size)
467 fb->head_index = 0;
468
469 return ov;
470}
471
472static void fifo_add_val(struct fifo_buffer *fb, int value)
473{
474 int i;
475
476 for (i = 0; i < fb->size; i++)
477 fb->values[i] += value;
478}
479
Philipp Reisner9958c852011-05-03 16:19:31 +0200480struct fifo_buffer *fifo_alloc(int fifo_size)
481{
482 struct fifo_buffer *fb;
483
Lars Ellenberg8747d302012-09-26 14:22:40 +0200484 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
Philipp Reisner9958c852011-05-03 16:19:31 +0200485 if (!fb)
486 return NULL;
487
488 fb->head_index = 0;
489 fb->size = fifo_size;
490 fb->total = 0;
491
492 return fb;
493}
494
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200495static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
Philipp Reisner778f2712010-07-06 11:14:00 +0200496{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200497 struct disk_conf *dc;
Philipp Reisner778f2712010-07-06 11:14:00 +0200498 unsigned int want; /* The number of sectors we want in the proxy */
499 int req_sect; /* Number of sectors to request in this turn */
500 int correction; /* Number of sectors more we need in the proxy*/
501 int cps; /* correction per invocation of drbd_rs_controller() */
502 int steps; /* Number of time steps to plan ahead */
503 int curr_corr;
504 int max_sect;
Philipp Reisner813472c2011-05-03 16:47:02 +0200505 struct fifo_buffer *plan;
Philipp Reisner778f2712010-07-06 11:14:00 +0200506
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200507 dc = rcu_dereference(device->ldev->disk_conf);
508 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner778f2712010-07-06 11:14:00 +0200509
Philipp Reisner813472c2011-05-03 16:47:02 +0200510 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
Philipp Reisner778f2712010-07-06 11:14:00 +0200511
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200512 if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200513 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200514 } else { /* normal path */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200515 want = dc->c_fill_target ? dc->c_fill_target :
516 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
Philipp Reisner778f2712010-07-06 11:14:00 +0200517 }
518
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200519 correction = want - device->rs_in_flight - plan->total;
Philipp Reisner778f2712010-07-06 11:14:00 +0200520
521 /* Plan ahead */
522 cps = correction / steps;
Philipp Reisner813472c2011-05-03 16:47:02 +0200523 fifo_add_val(plan, cps);
524 plan->total += cps * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200525
526 /* What we do in this step */
Philipp Reisner813472c2011-05-03 16:47:02 +0200527 curr_corr = fifo_push(plan, 0);
528 plan->total -= curr_corr;
Philipp Reisner778f2712010-07-06 11:14:00 +0200529
530 req_sect = sect_in + curr_corr;
531 if (req_sect < 0)
532 req_sect = 0;
533
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200534 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
Philipp Reisner778f2712010-07-06 11:14:00 +0200535 if (req_sect > max_sect)
536 req_sect = max_sect;
537
538 /*
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200539 drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200540 sect_in, device->rs_in_flight, want, correction,
541 steps, cps, device->rs_planed, curr_corr, req_sect);
Philipp Reisner778f2712010-07-06 11:14:00 +0200542 */
543
544 return req_sect;
545}
546
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200547static int drbd_rs_number_requests(struct drbd_device *device)
Lars Ellenberge65f4402010-11-05 10:04:07 +0100548{
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200549 unsigned int sect_in; /* Number of sectors that came in since the last turn */
550 int number, mxb;
551
552 sect_in = atomic_xchg(&device->rs_sect_in, 0);
553 device->rs_in_flight -= sect_in;
Philipp Reisner813472c2011-05-03 16:47:02 +0200554
555 rcu_read_lock();
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200556 mxb = drbd_get_max_buffers(device) / 2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200557 if (rcu_dereference(device->rs_plan_s)->size) {
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200558 number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200559 device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100560 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200561 device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
562 number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
Lars Ellenberge65f4402010-11-05 10:04:07 +0100563 }
Philipp Reisner813472c2011-05-03 16:47:02 +0200564 rcu_read_unlock();
Lars Ellenberge65f4402010-11-05 10:04:07 +0100565
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200566 /* Don't have more than "max-buffers"/2 in-flight.
567 * Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
568 * potentially causing a distributed deadlock on congestion during
569 * online-verify or (checksum-based) resync, if max-buffers,
570 * socket buffer sizes and resync rate settings are mis-configured. */
571 if (mxb - device->rs_in_flight < number)
572 number = mxb - device->rs_in_flight;
573
Lars Ellenberge65f4402010-11-05 10:04:07 +0100574 return number;
575}
576
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200577static int make_resync_request(struct drbd_device *device, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700578{
579 unsigned long bit;
580 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200581 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100582 int max_bio_size;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100583 int number, rollback_i, size;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700584 int align, queued, sndbuf;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200585 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700586
587 if (unlikely(cancel))
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100588 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700589
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200590 if (device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200591 /* empty resync? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200592 drbd_resync_finished(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100593 return 0;
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200594 }
595
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200596 if (!get_ldev(device)) {
597 /* Since we only need to access device->rsync a
598 get_ldev_if_state(device,D_FAILED) would be sufficient, but
Philipp Reisnerb411b362009-09-25 16:07:19 -0700599 to continue resync with a broken disk makes no sense at
600 all */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200601 drbd_err(device, "Disk broke down during resync!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100602 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700603 }
604
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200605 max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
606 number = drbd_rs_number_requests(device);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200607 if (number <= 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200608 goto requeue;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700609
Philipp Reisnerb411b362009-09-25 16:07:19 -0700610 for (i = 0; i < number; i++) {
611 /* Stop generating RS requests, when half of the send buffer is filled */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200612 mutex_lock(&first_peer_device(device)->connection->data.mutex);
613 if (first_peer_device(device)->connection->data.socket) {
614 queued = first_peer_device(device)->connection->data.socket->sk->sk_wmem_queued;
615 sndbuf = first_peer_device(device)->connection->data.socket->sk->sk_sndbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700616 } else {
617 queued = 1;
618 sndbuf = 0;
619 }
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200620 mutex_unlock(&first_peer_device(device)->connection->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700621 if (queued > sndbuf / 2)
622 goto requeue;
623
624next_sector:
625 size = BM_BLOCK_SIZE;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200626 bit = drbd_bm_find_next(device, device->bm_resync_fo);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700627
Lars Ellenberg4b0715f2010-12-14 15:13:04 +0100628 if (bit == DRBD_END_OF_BITMAP) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200629 device->bm_resync_fo = drbd_bm_bits(device);
630 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100631 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700632 }
633
634 sector = BM_BIT_TO_SECT(bit);
635
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200636 if (drbd_rs_should_slow_down(device, sector) ||
637 drbd_try_rs_begin_io(device, sector)) {
638 device->bm_resync_fo = bit;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700639 goto requeue;
640 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200641 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700642
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200643 if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
644 drbd_rs_complete_io(device, sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700645 goto next_sector;
646 }
647
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100648#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
Philipp Reisnerb411b362009-09-25 16:07:19 -0700649 /* try to find some adjacent bits.
650 * we stop if we have already the maximum req size.
651 *
652 * Additionally always align bigger requests, in order to
653 * be prepared for all stripe sizes of software RAIDs.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700654 */
655 align = 1;
Philipp Reisnerd2074502010-07-22 15:27:27 +0200656 rollback_i = i;
Lars Ellenberg6377b922014-04-28 18:43:17 +0200657 while (i < number) {
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100658 if (size + BM_BLOCK_SIZE > max_bio_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700659 break;
660
661 /* Be always aligned */
662 if (sector & ((1<<(align+3))-1))
663 break;
664
665 /* do not cross extent boundaries */
666 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
667 break;
668 /* now, is it actually dirty, after all?
669 * caution, drbd_bm_test_bit is tri-state for some
670 * obscure reason; ( b == 0 ) would get the out-of-band
671 * only accidentally right because of the "oddly sized"
672 * adjustment below */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200673 if (drbd_bm_test_bit(device, bit+1) != 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700674 break;
675 bit++;
676 size += BM_BLOCK_SIZE;
677 if ((BM_BLOCK_SIZE << align) <= size)
678 align++;
679 i++;
680 }
681 /* if we merged some,
682 * reset the offset to start the next drbd_bm_find_next from */
683 if (size > BM_BLOCK_SIZE)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200684 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700685#endif
686
687 /* adjust very last sectors, in case we are oddly sized */
688 if (sector + (size>>9) > capacity)
689 size = (capacity-sector)<<9;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200690 if (first_peer_device(device)->connection->agreed_pro_version >= 89 &&
691 first_peer_device(device)->connection->csums_tfm) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200692 switch (read_for_csum(first_peer_device(device), sector, size)) {
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200693 case -EIO: /* Disk failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200694 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100695 return -EIO;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200696 case -EAGAIN: /* allocation failed, or ldev busy */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200697 drbd_rs_complete_io(device, sector);
698 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerd2074502010-07-22 15:27:27 +0200699 i = rollback_i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700700 goto requeue;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200701 case 0:
702 /* everything ok */
703 break;
704 default:
705 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706 }
707 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100708 int err;
709
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200710 inc_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200711 err = drbd_send_drequest(first_peer_device(device), P_RS_DATA_REQUEST,
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100712 sector, size, ID_SYNCER);
713 if (err) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200714 drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200715 dec_rs_pending(device);
716 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100717 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700718 }
719 }
720 }
721
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200722 if (device->bm_resync_fo >= drbd_bm_bits(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700723 /* last syncer _request_ was sent,
724 * but the P_RS_DATA_REPLY not yet received. sync will end (and
725 * next sync group will resume), as soon as we receive the last
726 * resync data block, and the last bit is cleared.
727 * until then resync "work" is "inactive" ...
728 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200729 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100730 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700731 }
732
733 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200734 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
735 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
736 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100737 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700738}
739
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200740static int make_ov_request(struct drbd_device *device, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700741{
742 int number, i, size;
743 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200744 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200745 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700746
747 if (unlikely(cancel))
748 return 1;
749
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200750 number = drbd_rs_number_requests(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700751
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200752 sector = device->ov_position;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700753 for (i = 0; i < number; i++) {
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200754 if (sector >= capacity)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700755 return 1;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200756
757 /* We check for "finished" only in the reply path:
758 * w_e_end_ov_reply().
759 * We need to send at least one request out. */
760 stop_sector_reached = i > 0
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200761 && verify_can_do_stop_sector(device)
762 && sector >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200763 if (stop_sector_reached)
764 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700765
766 size = BM_BLOCK_SIZE;
767
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200768 if (drbd_rs_should_slow_down(device, sector) ||
769 drbd_try_rs_begin_io(device, sector)) {
770 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771 goto requeue;
772 }
773
774 if (sector + (size>>9) > capacity)
775 size = (capacity-sector)<<9;
776
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200777 inc_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200778 if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200779 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780 return 0;
781 }
782 sector += BM_SECT_PER_BIT;
783 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200784 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700785
786 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200787 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200788 if (i == 0 || !stop_sector_reached)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200789 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700790 return 1;
791}
792
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100793int w_ov_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700794{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200795 struct drbd_device_work *dw =
796 container_of(w, struct drbd_device_work, w);
797 struct drbd_device *device = dw->device;
798 kfree(dw);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200799 ov_out_of_sync_print(device);
800 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700801
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100802 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700803}
804
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100805static int w_resync_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700806{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200807 struct drbd_device_work *dw =
808 container_of(w, struct drbd_device_work, w);
809 struct drbd_device *device = dw->device;
810 kfree(dw);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700811
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200812 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700813
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100814 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700815}
816
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200817static void ping_peer(struct drbd_device *device)
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200818{
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200819 struct drbd_connection *connection = first_peer_device(device)->connection;
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100820
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200821 clear_bit(GOT_PING_ACK, &connection->flags);
822 request_ping(connection);
823 wait_event(connection->ping_wait,
824 test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200825}
826
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200827int drbd_resync_finished(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700828{
829 unsigned long db, dt, dbdt;
830 unsigned long n_oos;
831 union drbd_state os, ns;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200832 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700833 char *khelper_cmd = NULL;
Lars Ellenberg26525612010-11-05 09:56:33 +0100834 int verify_done = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700835
836 /* Remove all elements from the resync LRU. Since future actions
837 * might set bits in the (main) bitmap, then the entries in the
838 * resync LRU would be wrong. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200839 if (drbd_rs_del_all(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700840 /* In case this is not possible now, most probably because
841 * there are P_RS_DATA_REPLY Packets lingering on the worker's
842 * queue (or even the read operations for those packets
843 * is not finished by now). Retry in 100ms. */
844
Philipp Reisner20ee6392011-01-18 15:28:59 +0100845 schedule_timeout_interruptible(HZ / 10);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200846 dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC);
847 if (dw) {
848 dw->w.cb = w_resync_finished;
849 dw->device = device;
850 drbd_queue_work(&first_peer_device(device)->connection->sender_work,
851 &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700852 return 1;
853 }
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200854 drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700855 }
856
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200857 dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700858 if (dt <= 0)
859 dt = 1;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200860
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200861 db = device->rs_total;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200862 /* adjust for verify start and stop sectors, respective reached position */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200863 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
864 db -= device->ov_left;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200865
Philipp Reisnerb411b362009-09-25 16:07:19 -0700866 dbdt = Bit2KB(db/dt);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200867 device->rs_paused /= HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700868
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200869 if (!get_ldev(device))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700870 goto out;
871
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200872 ping_peer(device);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200873
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200874 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200875 os = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700876
Lars Ellenberg26525612010-11-05 09:56:33 +0100877 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
878
Philipp Reisnerb411b362009-09-25 16:07:19 -0700879 /* This protects us against multiple calls (that can happen in the presence
880 of application IO), and against connectivity loss just before we arrive here. */
881 if (os.conn <= C_CONNECTED)
882 goto out_unlock;
883
884 ns = os;
885 ns.conn = C_CONNECTED;
886
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200887 drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200888 verify_done ? "Online verify" : "Resync",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200889 dt + device->rs_paused, device->rs_paused, dbdt);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700890
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200891 n_oos = drbd_bm_total_weight(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700892
893 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
894 if (n_oos) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200895 drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700896 n_oos, Bit2KB(1));
897 khelper_cmd = "out-of-sync";
898 }
899 } else {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200900 D_ASSERT(device, (n_oos - device->rs_failed) == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700901
902 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
903 khelper_cmd = "after-resync-target";
904
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200905 if (first_peer_device(device)->connection->csums_tfm && device->rs_total) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200906 const unsigned long s = device->rs_same_csum;
907 const unsigned long t = device->rs_total;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700908 const int ratio =
909 (t == 0) ? 0 :
910 (t < 100000) ? ((s*100)/t) : (s/(t/100));
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200911 drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
Philipp Reisnerb411b362009-09-25 16:07:19 -0700912 "transferred %luK total %luK\n",
913 ratio,
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200914 Bit2KB(device->rs_same_csum),
915 Bit2KB(device->rs_total - device->rs_same_csum),
916 Bit2KB(device->rs_total));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917 }
918 }
919
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200920 if (device->rs_failed) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200921 drbd_info(device, " %lu failed blocks\n", device->rs_failed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700922
923 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
924 ns.disk = D_INCONSISTENT;
925 ns.pdsk = D_UP_TO_DATE;
926 } else {
927 ns.disk = D_UP_TO_DATE;
928 ns.pdsk = D_INCONSISTENT;
929 }
930 } else {
931 ns.disk = D_UP_TO_DATE;
932 ns.pdsk = D_UP_TO_DATE;
933
934 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200935 if (device->p_uuid) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700936 int i;
937 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200938 _drbd_uuid_set(device, i, device->p_uuid[i]);
939 drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
940 _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700941 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200942 drbd_err(device, "device->p_uuid is NULL! BUG\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700943 }
944 }
945
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100946 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
947 /* for verify runs, we don't update uuids here,
948 * so there would be nothing to report. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200949 drbd_uuid_set_bm(device, 0UL);
950 drbd_print_uuids(device, "updated UUIDs");
951 if (device->p_uuid) {
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100952 /* Now the two UUID sets are equal, update what we
953 * know of the peer. */
954 int i;
955 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200956 device->p_uuid[i] = device->ldev->md.uuid[i];
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100957 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700958 }
959 }
960
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200961 _drbd_set_state(device, ns, CS_VERBOSE, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700962out_unlock:
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200963 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200964 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700965out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200966 device->rs_total = 0;
967 device->rs_failed = 0;
968 device->rs_paused = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200969
970 /* reset start sector, if we reached end of device */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200971 if (verify_done && device->ov_left == 0)
972 device->ov_start_sector = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700973
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200974 drbd_md_sync(device);
Lars Ellenberg13d42682010-10-13 17:37:54 +0200975
Philipp Reisnerb411b362009-09-25 16:07:19 -0700976 if (khelper_cmd)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200977 drbd_khelper(device, khelper_cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700978
979 return 1;
980}
981
982/* helper */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200983static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700984{
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200985 if (drbd_peer_req_has_active_page(peer_req)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700986 /* This might happen if sendpage() has not finished */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100987 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200988 atomic_add(i, &device->pp_in_use_by_net);
989 atomic_sub(i, &device->pp_in_use);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200990 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200991 list_add_tail(&peer_req->w.list, &device->net_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200992 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg435f0742010-09-06 12:30:25 +0200993 wake_up(&drbd_pp_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700994 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200995 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700996}
997
998/**
999 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001000 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001001 * @w: work object.
1002 * @cancel: The connection will be closed anyways
1003 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001004int w_e_end_data_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001005{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001006 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001007 struct drbd_peer_device *peer_device = peer_req->peer_device;
1008 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001009 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001010
1011 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001012 drbd_free_peer_req(device, peer_req);
1013 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001014 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001015 }
1016
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001017 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001018 err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001019 } else {
1020 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001021 drbd_err(device, "Sending NegDReply. sector=%llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001022 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001023
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001024 err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001025 }
1026
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001027 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001028
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001029 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001030
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001031 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001032 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001033 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001034}
1035
1036/**
Andreas Gruenbachera209b4a2011-08-17 12:43:25 +02001037 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
Philipp Reisnerb411b362009-09-25 16:07:19 -07001038 * @w: work object.
1039 * @cancel: The connection will be closed anyways
1040 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001041int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001042{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001043 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001044 struct drbd_peer_device *peer_device = peer_req->peer_device;
1045 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001046 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001047
1048 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001049 drbd_free_peer_req(device, peer_req);
1050 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001051 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001052 }
1053
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001054 if (get_ldev_if_state(device, D_FAILED)) {
1055 drbd_rs_complete_io(device, peer_req->i.sector);
1056 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001057 }
1058
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001059 if (device->state.conn == C_AHEAD) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001060 err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001061 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001062 if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1063 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001064 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001065 } else {
1066 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001067 drbd_err(device, "Not sending RSDataReply, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07001068 "partner DISKLESS!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001069 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001070 }
1071 } else {
1072 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001073 drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001074 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001075
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001076 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001077
1078 /* update resync data with failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001079 drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001080 }
1081
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001082 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001083
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001084 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001085
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001086 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001087 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001088 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001089}
1090
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001091int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001092{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001093 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001094 struct drbd_peer_device *peer_device = peer_req->peer_device;
1095 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001096 struct digest_info *di;
1097 int digest_size;
1098 void *digest = NULL;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001099 int err, eq = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001100
1101 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001102 drbd_free_peer_req(device, peer_req);
1103 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001104 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001105 }
1106
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001107 if (get_ldev(device)) {
1108 drbd_rs_complete_io(device, peer_req->i.sector);
1109 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001110 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001111
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001112 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001113
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001114 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001115 /* quick hack to try to avoid a race against reconfiguration.
1116 * a real fix would be much more involved,
1117 * introducing more locking mechanisms */
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001118 if (peer_device->connection->csums_tfm) {
1119 digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001120 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001121 digest = kmalloc(digest_size, GFP_NOIO);
1122 }
1123 if (digest) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001124 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001125 eq = !memcmp(digest, di->digest, digest_size);
1126 kfree(digest);
1127 }
1128
1129 if (eq) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001130 drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
Lars Ellenberg676396d2010-03-03 02:08:22 +01001131 /* rs_same_csums unit is BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001132 device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001133 err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001134 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001135 inc_rs_pending(device);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001136 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1137 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
Philipp Reisner204bba92010-08-23 16:17:13 +02001138 kfree(di);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001139 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001140 }
1141 } else {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001142 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001143 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001144 drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001145 }
1146
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001147 dec_unacked(device);
1148 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001149
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001150 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001151 drbd_err(device, "drbd_send_block/ack() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001152 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001153}
1154
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001155int w_e_end_ov_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001156{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001157 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001158 struct drbd_peer_device *peer_device = peer_req->peer_device;
1159 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001160 sector_t sector = peer_req->i.sector;
1161 unsigned int size = peer_req->i.size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001162 int digest_size;
1163 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001164 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001165
1166 if (unlikely(cancel))
1167 goto out;
1168
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001169 digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001170 digest = kmalloc(digest_size, GFP_NOIO);
Philipp Reisner8f214202011-03-01 15:52:35 +01001171 if (!digest) {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001172 err = 1; /* terminate the connection in case the allocation failed */
Philipp Reisner8f214202011-03-01 15:52:35 +01001173 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001174 }
1175
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001176 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001177 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
Philipp Reisner8f214202011-03-01 15:52:35 +01001178 else
1179 memset(digest, 0, digest_size);
1180
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001181 /* Free e and pages before send.
1182 * In case we block on congestion, we could otherwise run into
1183 * some distributed deadlock, if the other side blocks on
1184 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001185 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001186 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001187 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001188 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001189 err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001190 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001191 dec_rs_pending(device);
Philipp Reisner8f214202011-03-01 15:52:35 +01001192 kfree(digest);
1193
Philipp Reisnerb411b362009-09-25 16:07:19 -07001194out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001195 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001196 drbd_free_peer_req(device, peer_req);
1197 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001198 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001199}
1200
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001201void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001202{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001203 if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1204 device->ov_last_oos_size += size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001205 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001206 device->ov_last_oos_start = sector;
1207 device->ov_last_oos_size = size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001208 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001209 drbd_set_out_of_sync(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001210}
1211
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001212int w_e_end_ov_reply(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001213{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001214 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001215 struct drbd_peer_device *peer_device = peer_req->peer_device;
1216 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001217 struct digest_info *di;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001218 void *digest;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001219 sector_t sector = peer_req->i.sector;
1220 unsigned int size = peer_req->i.size;
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001221 int digest_size;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001222 int err, eq = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001223 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001224
1225 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001226 drbd_free_peer_req(device, peer_req);
1227 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001228 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001229 }
1230
1231 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1232 * the resync lru has been cleaned up already */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001233 if (get_ldev(device)) {
1234 drbd_rs_complete_io(device, peer_req->i.sector);
1235 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001236 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001237
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001238 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001239
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001240 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001241 digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001242 digest = kmalloc(digest_size, GFP_NOIO);
1243 if (digest) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001244 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001245
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001246 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001247 eq = !memcmp(digest, di->digest, digest_size);
1248 kfree(digest);
1249 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001250 }
1251
Lars Ellenberg9676c762011-02-22 14:02:31 +01001252 /* Free peer_req and pages before send.
1253 * In case we block on congestion, we could otherwise run into
1254 * some distributed deadlock, if the other side blocks on
1255 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001256 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001257 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001258 if (!eq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001259 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001260 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001261 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001262
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001263 err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size,
Andreas Gruenbacherfa79abd2011-03-16 01:31:39 +01001264 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001265
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001266 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001267
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001268 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001269
1270 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001271 if ((device->ov_left & 0x200) == 0x200)
1272 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001273
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001274 stop_sector_reached = verify_can_do_stop_sector(device) &&
1275 (sector + (size>>9)) >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001276
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001277 if (device->ov_left == 0 || stop_sector_reached) {
1278 ov_out_of_sync_print(device);
1279 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001280 }
1281
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001282 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001283}
1284
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001285/* FIXME
1286 * We need to track the number of pending barrier acks,
1287 * and to be able to wait for them.
1288 * See also comment in drbd_adm_attach before drbd_suspend_io.
1289 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001290static int drbd_send_barrier(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001291{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001292 struct p_barrier *p;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001293 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001294
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001295 sock = &connection->data;
1296 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001297 if (!p)
1298 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001299 p->barrier = connection->send.current_epoch_nr;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001300 p->pad = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001301 connection->send.current_epoch_writes = 0;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001302
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001303 return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001304}
1305
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001306int w_send_write_hint(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001307{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001308 struct drbd_device *device =
1309 container_of(w, struct drbd_device, unplug_work);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001310 struct drbd_socket *sock;
1311
Philipp Reisnerb411b362009-09-25 16:07:19 -07001312 if (cancel)
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001313 return 0;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001314 sock = &first_peer_device(device)->connection->data;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001315 if (!drbd_prepare_command(first_peer_device(device), sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001316 return -EIO;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001317 return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001318}
1319
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001320static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001321{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001322 if (!connection->send.seen_any_write_yet) {
1323 connection->send.seen_any_write_yet = true;
1324 connection->send.current_epoch_nr = epoch;
1325 connection->send.current_epoch_writes = 0;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001326 }
1327}
1328
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001329static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001330{
1331 /* re-init if first write on this connection */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001332 if (!connection->send.seen_any_write_yet)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001333 return;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001334 if (connection->send.current_epoch_nr != epoch) {
1335 if (connection->send.current_epoch_writes)
1336 drbd_send_barrier(connection);
1337 connection->send.current_epoch_nr = epoch;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001338 }
1339}
1340
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01001341int w_send_out_of_sync(struct drbd_work *w, int cancel)
Philipp Reisner73a01a12010-10-27 14:33:00 +02001342{
1343 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001344 struct drbd_device *device = req->device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001345 struct drbd_connection *connection = first_peer_device(device)->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001346 int err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001347
1348 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001349 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001350 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001351 }
1352
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001353 /* this time, no connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001354 * If it was sent, it was the closing barrier for the last
1355 * replicated epoch, before we went into AHEAD mode.
1356 * No more barriers will be sent, until we leave AHEAD mode again. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001357 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001358
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001359 err = drbd_send_out_of_sync(first_peer_device(device), req);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001360 req_mod(req, OOS_HANDED_TO_NETWORK);
Philipp Reisner73a01a12010-10-27 14:33:00 +02001361
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001362 return err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001363}
1364
Philipp Reisnerb411b362009-09-25 16:07:19 -07001365/**
1366 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
Philipp Reisnerb411b362009-09-25 16:07:19 -07001367 * @w: work object.
1368 * @cancel: The connection will be closed anyways
1369 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001370int w_send_dblock(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001371{
1372 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001373 struct drbd_device *device = req->device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001374 struct drbd_connection *connection = first_peer_device(device)->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001375 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001376
1377 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001378 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001379 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001380 }
1381
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001382 re_init_if_first_write(connection, req->epoch);
1383 maybe_send_barrier(connection, req->epoch);
1384 connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001385
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001386 err = drbd_send_dblock(first_peer_device(device), req);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001387 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001388
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001389 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001390}
1391
1392/**
1393 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
Philipp Reisnerb411b362009-09-25 16:07:19 -07001394 * @w: work object.
1395 * @cancel: The connection will be closed anyways
1396 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001397int w_send_read_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001398{
1399 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001400 struct drbd_device *device = req->device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001401 struct drbd_connection *connection = first_peer_device(device)->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001402 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001403
1404 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001405 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001406 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001407 }
1408
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001409 /* Even read requests may close a write epoch,
1410 * if there was any yet. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001411 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001412
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001413 err = drbd_send_drequest(first_peer_device(device), P_DATA_REQUEST, req->i.sector, req->i.size,
Andreas Gruenbacher6c1005e2011-03-16 01:34:24 +01001414 (unsigned long)req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001415
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001416 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001417
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001418 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001419}
1420
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001421int w_restart_disk_io(struct drbd_work *w, int cancel)
Philipp Reisner265be2d2010-05-31 10:14:17 +02001422{
1423 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001424 struct drbd_device *device = req->device;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001425
Philipp Reisner07782862010-08-31 12:00:50 +02001426 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001427 drbd_al_begin_io(device, &req->i, false);
Philipp Reisner265be2d2010-05-31 10:14:17 +02001428
1429 drbd_req_make_private_bio(req, req->master_bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001430 req->private_bio->bi_bdev = device->ldev->backing_bdev;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001431 generic_make_request(req->private_bio);
1432
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001433 return 0;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001434}
1435
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001436static int _drbd_may_sync_now(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001437{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001438 struct drbd_device *odev = device;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001439 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001440
1441 while (1) {
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001442 if (!odev->ldev || odev->state.disk == D_DISKLESS)
Philipp Reisner438c8372011-03-28 14:48:01 +02001443 return 1;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001444 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001445 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001446 rcu_read_unlock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001447 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001448 return 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001449 odev = minor_to_device(resync_after);
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001450 if (!odev)
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001451 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001452 if ((odev->state.conn >= C_SYNC_SOURCE &&
1453 odev->state.conn <= C_PAUSED_SYNC_T) ||
1454 odev->state.aftr_isp || odev->state.peer_isp ||
1455 odev->state.user_isp)
1456 return 0;
1457 }
1458}
1459
1460/**
1461 * _drbd_pause_after() - Pause resync on all devices that may not resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001462 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001463 *
1464 * Called from process context only (admin command and after_state_ch).
1465 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001466static int _drbd_pause_after(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001467{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001468 struct drbd_device *odev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001469 int i, rv = 0;
1470
Philipp Reisner695d08f2011-04-11 22:53:32 -07001471 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001472 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001473 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1474 continue;
1475 if (!_drbd_may_sync_now(odev))
1476 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1477 != SS_NOTHING_TO_DO);
1478 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001479 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001480
1481 return rv;
1482}
1483
1484/**
1485 * _drbd_resume_next() - Resume resync on all devices that may resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001486 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001487 *
1488 * Called from process context only (admin command and worker).
1489 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001490static int _drbd_resume_next(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001491{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001492 struct drbd_device *odev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001493 int i, rv = 0;
1494
Philipp Reisner695d08f2011-04-11 22:53:32 -07001495 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001496 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001497 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1498 continue;
1499 if (odev->state.aftr_isp) {
1500 if (_drbd_may_sync_now(odev))
1501 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1502 CS_HARD, NULL)
1503 != SS_NOTHING_TO_DO) ;
1504 }
1505 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001506 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001507 return rv;
1508}
1509
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001510void resume_next_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001511{
1512 write_lock_irq(&global_state_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001513 _drbd_resume_next(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001514 write_unlock_irq(&global_state_lock);
1515}
1516
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001517void suspend_other_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001518{
1519 write_lock_irq(&global_state_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001520 _drbd_pause_after(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001521 write_unlock_irq(&global_state_lock);
1522}
1523
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001524/* caller must hold global_state_lock */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001525enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001526{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001527 struct drbd_device *odev;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001528 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001529
1530 if (o_minor == -1)
1531 return NO_ERROR;
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001532 if (o_minor < -1 || o_minor > MINORMASK)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001533 return ERR_RESYNC_AFTER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001534
1535 /* check for loops */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001536 odev = minor_to_device(o_minor);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001537 while (1) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001538 if (odev == device)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001539 return ERR_RESYNC_AFTER_CYCLE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001540
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001541 /* You are free to depend on diskless, non-existing,
1542 * or not yet/no longer existing minors.
1543 * We only reject dependency loops.
1544 * We cannot follow the dependency chain beyond a detached or
1545 * missing minor.
1546 */
1547 if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1548 return NO_ERROR;
1549
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001550 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001551 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001552 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001553 /* dependency chain ends here, no cycles. */
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001554 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001555 return NO_ERROR;
1556
1557 /* follow the dependency chain */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001558 odev = minor_to_device(resync_after);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001559 }
1560}
1561
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001562/* caller must hold global_state_lock */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001563void drbd_resync_after_changed(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001564{
1565 int changes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001566
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001567 do {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001568 changes = _drbd_pause_after(device);
1569 changes |= _drbd_resume_next(device);
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001570 } while (changes);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001571}
1572
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001573void drbd_rs_controller_reset(struct drbd_device *device)
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001574{
Philipp Reisner813472c2011-05-03 16:47:02 +02001575 struct fifo_buffer *plan;
1576
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001577 atomic_set(&device->rs_sect_in, 0);
1578 atomic_set(&device->rs_sect_ev, 0);
1579 device->rs_in_flight = 0;
Philipp Reisner813472c2011-05-03 16:47:02 +02001580
1581 /* Updating the RCU protected object in place is necessary since
1582 this function gets called from atomic context.
1583 It is valid since all other updates also lead to an completely
1584 empty fifo */
1585 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001586 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner813472c2011-05-03 16:47:02 +02001587 plan->total = 0;
1588 fifo_set(plan, 0);
1589 rcu_read_unlock();
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001590}
1591
Philipp Reisner1f04af32011-02-07 11:33:59 +01001592void start_resync_timer_fn(unsigned long data)
1593{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001594 struct drbd_device *device = (struct drbd_device *) data;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001595
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001596 drbd_queue_work(&first_peer_device(device)->connection->sender_work,
1597 &device->start_resync_work);
Philipp Reisner1f04af32011-02-07 11:33:59 +01001598}
1599
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001600int w_start_resync(struct drbd_work *w, int cancel)
Philipp Reisner1f04af32011-02-07 11:33:59 +01001601{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001602 struct drbd_device *device =
1603 container_of(w, struct drbd_device, start_resync_work);
Philipp Reisner00d56942011-02-09 18:09:48 +01001604
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001605 if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001606 drbd_warn(device, "w_start_resync later...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001607 device->start_resync_timer.expires = jiffies + HZ/10;
1608 add_timer(&device->start_resync_timer);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001609 return 0;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001610 }
1611
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001612 drbd_start_resync(device, C_SYNC_SOURCE);
1613 clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001614 return 0;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001615}
1616
Philipp Reisnerb411b362009-09-25 16:07:19 -07001617/**
1618 * drbd_start_resync() - Start the resync process
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001619 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001620 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1621 *
1622 * This function might bring you directly into one of the
1623 * C_PAUSED_SYNC_* states.
1624 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001625void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001626{
1627 union drbd_state ns;
1628 int r;
1629
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001630 if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001631 drbd_err(device, "Resync already running!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001632 return;
1633 }
1634
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001635 if (!test_bit(B_RS_H_DONE, &device->flags)) {
Philipp Reisnere64a3292011-02-05 17:34:11 +01001636 if (side == C_SYNC_TARGET) {
1637 /* Since application IO was locked out during C_WF_BITMAP_T and
1638 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1639 we check that we might make the data inconsistent. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001640 r = drbd_khelper(device, "before-resync-target");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001641 r = (r >> 8) & 0xff;
1642 if (r > 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001643 drbd_info(device, "before-resync-target handler returned %d, "
Philipp Reisner09b9e792010-12-03 16:04:24 +01001644 "dropping connection.\n", r);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001645 conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisner09b9e792010-12-03 16:04:24 +01001646 return;
1647 }
Philipp Reisnere64a3292011-02-05 17:34:11 +01001648 } else /* C_SYNC_SOURCE */ {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001649 r = drbd_khelper(device, "before-resync-source");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001650 r = (r >> 8) & 0xff;
1651 if (r > 0) {
1652 if (r == 3) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001653 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001654 "ignoring. Old userland tools?", r);
1655 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001656 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001657 "dropping connection.\n", r);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001658 conn_request_state(first_peer_device(device)->connection,
1659 NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001660 return;
1661 }
1662 }
Philipp Reisner09b9e792010-12-03 16:04:24 +01001663 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001664 }
1665
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001666 if (current == first_peer_device(device)->connection->worker.task) {
Philipp Reisnerdad20552011-02-11 19:43:55 +01001667 /* The worker should not sleep waiting for state_mutex,
Philipp Reisnere64a3292011-02-05 17:34:11 +01001668 that can take long */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001669 if (!mutex_trylock(device->state_mutex)) {
1670 set_bit(B_RS_H_DONE, &device->flags);
1671 device->start_resync_timer.expires = jiffies + HZ/5;
1672 add_timer(&device->start_resync_timer);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001673 return;
1674 }
1675 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001676 mutex_lock(device->state_mutex);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001677 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001678 clear_bit(B_RS_H_DONE, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001679
Philipp Reisner0cfac5d2011-11-10 12:12:52 +01001680 write_lock_irq(&global_state_lock);
Philipp Reisnera7004712013-03-27 14:08:35 +01001681 /* Did some connection breakage or IO error race with us? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001682 if (device->state.conn < C_CONNECTED
1683 || !get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisner0cfac5d2011-11-10 12:12:52 +01001684 write_unlock_irq(&global_state_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001685 mutex_unlock(device->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001686 return;
1687 }
1688
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001689 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001690
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001691 ns.aftr_isp = !_drbd_may_sync_now(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001692
1693 ns.conn = side;
1694
1695 if (side == C_SYNC_TARGET)
1696 ns.disk = D_INCONSISTENT;
1697 else /* side == C_SYNC_SOURCE */
1698 ns.pdsk = D_INCONSISTENT;
1699
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001700 r = __drbd_set_state(device, ns, CS_VERBOSE, NULL);
1701 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001702
1703 if (ns.conn < C_CONNECTED)
1704 r = SS_UNKNOWN_ERROR;
1705
1706 if (r == SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001707 unsigned long tw = drbd_bm_total_weight(device);
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001708 unsigned long now = jiffies;
1709 int i;
1710
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001711 device->rs_failed = 0;
1712 device->rs_paused = 0;
1713 device->rs_same_csum = 0;
1714 device->rs_last_events = 0;
1715 device->rs_last_sect_ev = 0;
1716 device->rs_total = tw;
1717 device->rs_start = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001718 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001719 device->rs_mark_left[i] = tw;
1720 device->rs_mark_time[i] = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001721 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001722 _drbd_pause_after(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001723 }
1724 write_unlock_irq(&global_state_lock);
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001725
Philipp Reisnerb411b362009-09-25 16:07:19 -07001726 if (r == SS_SUCCESS) {
Philipp Reisner328e0f12012-10-19 14:37:47 +02001727 /* reset rs_last_bcast when a resync or verify is started,
1728 * to deal with potential jiffies wrap. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001729 device->rs_last_bcast = jiffies - HZ;
Philipp Reisner328e0f12012-10-19 14:37:47 +02001730
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001731 drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001732 drbd_conn_str(ns.conn),
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001733 (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1734 (unsigned long) device->rs_total);
Lars Ellenberg6c922ed2011-01-12 11:51:13 +01001735 if (side == C_SYNC_TARGET)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001736 device->bm_resync_fo = 0;
Lars Ellenberg6c922ed2011-01-12 11:51:13 +01001737
1738 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1739 * with w_send_oos, or the sync target will get confused as to
1740 * how much bits to resync. We cannot do that always, because for an
1741 * empty resync and protocol < 95, we need to do it here, as we call
1742 * drbd_resync_finished from here in that case.
1743 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1744 * and from after_state_ch otherwise. */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001745 if (side == C_SYNC_SOURCE &&
1746 first_peer_device(device)->connection->agreed_pro_version < 96)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001747 drbd_gen_and_send_sync_uuid(first_peer_device(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001748
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001749 if (first_peer_device(device)->connection->agreed_pro_version < 95 &&
1750 device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +02001751 /* This still has a race (about when exactly the peers
1752 * detect connection loss) that can lead to a full sync
1753 * on next handshake. In 8.3.9 we fixed this with explicit
1754 * resync-finished notifications, but the fix
1755 * introduces a protocol change. Sleeping for some
1756 * time longer than the ping interval + timeout on the
1757 * SyncSource, to give the SyncTarget the chance to
1758 * detect connection loss, then waiting for a ping
1759 * response (implicit in drbd_resync_finished) reduces
1760 * the race considerably, but does not solve it. */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001761 if (side == C_SYNC_SOURCE) {
1762 struct net_conf *nc;
1763 int timeo;
1764
1765 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001766 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02001767 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1768 rcu_read_unlock();
1769 schedule_timeout_interruptible(timeo);
1770 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001771 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001772 }
1773
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001774 drbd_rs_controller_reset(device);
1775 /* ns.conn may already be != device->state.conn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001776 * we may have been paused in between, or become paused until
1777 * the timer triggers.
1778 * No matter, that is handled in resync_timer_fn() */
1779 if (ns.conn == C_SYNC_TARGET)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001780 mod_timer(&device->resync_timer, jiffies);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001781
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001782 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001783 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001784 put_ldev(device);
1785 mutex_unlock(device->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001786}
1787
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001788/* If the resource already closed the current epoch, but we did not
1789 * (because we have not yet seen new requests), we should send the
1790 * corresponding barrier now. Must be checked within the same spinlock
1791 * that is used to check for new requests. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001792static bool need_to_send_barrier(struct drbd_connection *connection)
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001793{
1794 if (!connection->send.seen_any_write_yet)
1795 return false;
1796
1797 /* Skip barriers that do not contain any writes.
1798 * This may happen during AHEAD mode. */
1799 if (!connection->send.current_epoch_writes)
1800 return false;
1801
1802 /* ->req_lock is held when requests are queued on
1803 * connection->sender_work, and put into ->transfer_log.
1804 * It is also held when ->current_tle_nr is increased.
1805 * So either there are already new requests queued,
1806 * and corresponding barriers will be send there.
1807 * Or nothing new is queued yet, so the difference will be 1.
1808 */
1809 if (atomic_read(&connection->current_tle_nr) !=
1810 connection->send.current_epoch_nr + 1)
1811 return false;
1812
1813 return true;
1814}
1815
Rashika Kheriaa186e472013-12-19 15:06:10 +05301816static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001817{
1818 spin_lock_irq(&queue->q_lock);
1819 list_splice_init(&queue->q, work_list);
1820 spin_unlock_irq(&queue->q_lock);
1821 return !list_empty(work_list);
1822}
1823
Rashika Kheriaa186e472013-12-19 15:06:10 +05301824static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001825{
1826 spin_lock_irq(&queue->q_lock);
1827 if (!list_empty(&queue->q))
1828 list_move(queue->q.next, work_list);
1829 spin_unlock_irq(&queue->q_lock);
1830 return !list_empty(work_list);
1831}
1832
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001833static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001834{
1835 DEFINE_WAIT(wait);
1836 struct net_conf *nc;
1837 int uncork, cork;
1838
1839 dequeue_work_item(&connection->sender_work, work_list);
1840 if (!list_empty(work_list))
1841 return;
1842
1843 /* Still nothing to do?
1844 * Maybe we still need to close the current epoch,
1845 * even if no new requests are queued yet.
1846 *
1847 * Also, poke TCP, just in case.
1848 * Then wait for new work (or signal). */
1849 rcu_read_lock();
1850 nc = rcu_dereference(connection->net_conf);
1851 uncork = nc ? nc->tcp_cork : 0;
1852 rcu_read_unlock();
1853 if (uncork) {
1854 mutex_lock(&connection->data.mutex);
1855 if (connection->data.socket)
1856 drbd_tcp_uncork(connection->data.socket);
1857 mutex_unlock(&connection->data.mutex);
1858 }
1859
1860 for (;;) {
1861 int send_barrier;
1862 prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001863 spin_lock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001864 spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
Lars Ellenbergbc317a92012-08-22 11:47:14 +02001865 /* dequeue single item only,
1866 * we still use drbd_queue_work_front() in some places */
1867 if (!list_empty(&connection->sender_work.q))
1868 list_move(connection->sender_work.q.next, work_list);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001869 spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
1870 if (!list_empty(work_list) || signal_pending(current)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001871 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001872 break;
1873 }
1874 send_barrier = need_to_send_barrier(connection);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001875 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001876 if (send_barrier) {
1877 drbd_send_barrier(connection);
1878 connection->send.current_epoch_nr++;
1879 }
1880 schedule();
1881 /* may be woken up for other things but new work, too,
1882 * e.g. if the current epoch got closed.
1883 * In which case we send the barrier above. */
1884 }
1885 finish_wait(&connection->sender_work.q_wait, &wait);
1886
1887 /* someone may have changed the config while we have been waiting above. */
1888 rcu_read_lock();
1889 nc = rcu_dereference(connection->net_conf);
1890 cork = nc ? nc->tcp_cork : 0;
1891 rcu_read_unlock();
1892 mutex_lock(&connection->data.mutex);
1893 if (connection->data.socket) {
1894 if (cork)
1895 drbd_tcp_cork(connection->data.socket);
1896 else if (!uncork)
1897 drbd_tcp_uncork(connection->data.socket);
1898 }
1899 mutex_unlock(&connection->data.mutex);
1900}
1901
Philipp Reisnerb411b362009-09-25 16:07:19 -07001902int drbd_worker(struct drbd_thread *thi)
1903{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001904 struct drbd_connection *connection = thi->connection;
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02001905 struct drbd_work *w = NULL;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001906 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001907 LIST_HEAD(work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001908 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001909
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01001910 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01001911 drbd_thread_current_set_cpu(thi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001912
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001913 /* as long as we use drbd_queue_work_front(),
1914 * we may only dequeue single work items here, not batches. */
1915 if (list_empty(&work_list))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001916 wait_for_work(connection, &work_list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001917
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001918 if (signal_pending(current)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001919 flush_signals(current);
Philipp Reisner19393e12011-02-09 10:09:07 +01001920 if (get_t_state(thi) == RUNNING) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001921 drbd_warn(connection, "Worker got an unexpected signal\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001922 continue;
Philipp Reisner19393e12011-02-09 10:09:07 +01001923 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001924 break;
1925 }
1926
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01001927 if (get_t_state(thi) != RUNNING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001928 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001929
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001930 while (!list_empty(&work_list)) {
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02001931 w = list_first_entry(&work_list, struct drbd_work, list);
1932 list_del_init(&w->list);
1933 if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001934 continue;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001935 if (connection->cstate >= C_WF_REPORT_PARAMS)
1936 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001937 }
1938 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001939
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001940 do {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001941 while (!list_empty(&work_list)) {
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02001942 w = list_first_entry(&work_list, struct drbd_work, list);
1943 list_del_init(&w->list);
1944 w->cb(w, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001945 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001946 dequeue_work_batch(&connection->sender_work, &work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001947 } while (!list_empty(&work_list));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001948
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001949 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001950 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1951 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001952 D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001953 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001954 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001955 drbd_device_cleanup(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001956 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001957 rcu_read_lock();
Philipp Reisner0e29d162011-02-18 14:23:11 +01001958 }
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001959 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001960
1961 return 0;
1962}