blob: b587b8421aa5cc93d1fc9d46673221e63ea190c2 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070027#include <linux/drbd.h>
28#include <linux/sched.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070035#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020039#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070040#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070041
Philipp Reisner00d56942011-02-09 18:09:48 +010042static int w_make_ov_request(struct drbd_work *w, int cancel);
Philipp Reisnerb411b362009-09-25 16:07:19 -070043
44
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010045/* endio handlers:
46 * drbd_md_io_complete (defined here)
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +010047 * drbd_request_endio (defined here)
48 * drbd_peer_request_endio (defined here)
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010049 * bm_async_io_complete (defined in drbd_bitmap.c)
50 *
Philipp Reisnerb411b362009-09-25 16:07:19 -070051 * For all these callbacks, note the following:
52 * The callbacks will be called in irq context by the IDE drivers,
53 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
54 * Try to get the locking right :)
55 *
56 */
57
58
59/* About the global_state_lock
60 Each state transition on an device holds a read lock. In case we have
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +020061 to evaluate the resync after dependencies, we grab a write lock, because
Philipp Reisnerb411b362009-09-25 16:07:19 -070062 we need stable states on all devices for that. */
63rwlock_t global_state_lock;
64
65/* used for synchronous meta data and bitmap IO
66 * submitted by drbd_md_sync_page_io()
67 */
68void drbd_md_io_complete(struct bio *bio, int error)
69{
70 struct drbd_md_io *md_io;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020071 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
73 md_io = (struct drbd_md_io *)bio->bi_private;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020074 device = container_of(md_io, struct drbd_device, md_io);
Philipp Reisnercdfda632011-07-05 15:38:59 +020075
Philipp Reisnerb411b362009-09-25 16:07:19 -070076 md_io->error = error;
77
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010078 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
79 * to timeout on the lower level device, and eventually detach from it.
80 * If this io completion runs after that timeout expired, this
81 * drbd_md_put_buffer() may allow us to finally try and re-attach.
82 * During normal operation, this only puts that extra reference
83 * down to 1 again.
84 * Make sure we first drop the reference, and only then signal
85 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
86 * next drbd_md_sync_page_io(), that we trigger the
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020087 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010088 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020089 drbd_md_put_buffer(device);
Philipp Reisnercdfda632011-07-05 15:38:59 +020090 md_io->done = 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020091 wake_up(&device->misc_wait);
Philipp Reisnercdfda632011-07-05 15:38:59 +020092 bio_put(bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020093 if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
94 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -070095}
96
97/* reads on behalf of the partner,
98 * "submitted" by the receiver
99 */
Rashika Kheriaa186e472013-12-19 15:06:10 +0530100static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700101{
102 unsigned long flags = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200103 struct drbd_device *device = peer_req->w.device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700104
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200105 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200106 device->read_cnt += peer_req->i.size >> 9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100107 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200108 if (list_empty(&device->read_ee))
109 wake_up(&device->ee_wait);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100110 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200111 __drbd_chk_io_error(device, DRBD_READ_ERROR);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200112 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700113
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200114 drbd_queue_work(&first_peer_device(device)->connection->sender_work, &peer_req->w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200115 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700116}
117
118/* writes on behalf of the partner, or resync writes,
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200119 * "submitted" by the receiver, final stage. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100120static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700121{
122 unsigned long flags = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200123 struct drbd_device *device = peer_req->w.device;
Lars Ellenberg181286a2011-03-31 15:18:56 +0200124 struct drbd_interval i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700125 int do_wake;
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100126 u64 block_id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700127 int do_al_complete_io;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700128
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100129 /* after we moved peer_req to done_ee,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700130 * we may no longer access it,
131 * it may be freed/reused already!
132 * (as soon as we release the req_lock) */
Lars Ellenberg181286a2011-03-31 15:18:56 +0200133 i = peer_req->i;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100134 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
135 block_id = peer_req->block_id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700136
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200137 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200138 device->writ_cnt += peer_req->i.size >> 9;
139 list_move_tail(&peer_req->w.list, &device->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700140
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100141 /*
Andreas Gruenbacher5e472262011-01-27 14:42:51 +0100142 * Do not remove from the write_requests tree here: we did not send the
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100143 * Ack yet and did not wake possibly waiting conflicting requests.
144 * Removed from the tree from "drbd_process_done_ee" within the
145 * appropriate w.cb (e_end_block/e_end_resync_block) or from
146 * _drbd_clear_done_ee.
147 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700148
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200149 do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700150
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100151 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200152 __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200153 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700154
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100155 if (block_id == ID_SYNCER)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200156 drbd_rs_complete_io(device, i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157
158 if (do_wake)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200159 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700160
161 if (do_al_complete_io)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200162 drbd_al_complete_io(device, &i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700163
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200164 wake_asender(first_peer_device(device)->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200165 put_ldev(device);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700167
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200168/* writes on behalf of the partner, or resync writes,
169 * "submitted" by the receiver.
170 */
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +0100171void drbd_peer_request_endio(struct bio *bio, int error)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200172{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100173 struct drbd_peer_request *peer_req = bio->bi_private;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200174 struct drbd_device *device = peer_req->w.device;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200175 int uptodate = bio_flagged(bio, BIO_UPTODATE);
176 int is_write = bio_data_dir(bio) == WRITE;
177
Lars Ellenberg07194272010-12-20 15:38:07 +0100178 if (error && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200179 drbd_warn(device, "%s: error=%d s=%llus\n",
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200180 is_write ? "write" : "read", error,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100181 (unsigned long long)peer_req->i.sector);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200182 if (!error && !uptodate) {
Lars Ellenberg07194272010-12-20 15:38:07 +0100183 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200184 drbd_warn(device, "%s: setting error to -EIO s=%llus\n",
Lars Ellenberg07194272010-12-20 15:38:07 +0100185 is_write ? "write" : "read",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100186 (unsigned long long)peer_req->i.sector);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200187 /* strange behavior of some lower level drivers...
188 * fail the request by clearing the uptodate flag,
189 * but do not return any error?! */
190 error = -EIO;
191 }
192
193 if (error)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100194 set_bit(__EE_WAS_ERROR, &peer_req->flags);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200195
196 bio_put(bio); /* no need for the bio anymore */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100197 if (atomic_dec_and_test(&peer_req->pending_bios)) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200198 if (is_write)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100199 drbd_endio_write_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200200 else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100201 drbd_endio_read_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200202 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700203}
204
205/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
206 */
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +0100207void drbd_request_endio(struct bio *bio, int error)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700208{
Lars Ellenberga1154132010-11-13 20:42:29 +0100209 unsigned long flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700210 struct drbd_request *req = bio->bi_private;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200211 struct drbd_device *device = req->w.device;
Lars Ellenberga1154132010-11-13 20:42:29 +0100212 struct bio_and_error m;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213 enum drbd_req_event what;
214 int uptodate = bio_flagged(bio, BIO_UPTODATE);
215
Philipp Reisnerb411b362009-09-25 16:07:19 -0700216 if (!error && !uptodate) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200217 drbd_warn(device, "p %s: setting error to -EIO\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700218 bio_data_dir(bio) == WRITE ? "write" : "read");
219 /* strange behavior of some lower level drivers...
220 * fail the request by clearing the uptodate flag,
221 * but do not return any error?! */
222 error = -EIO;
223 }
224
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200225
226 /* If this request was aborted locally before,
227 * but now was completed "successfully",
228 * chances are that this caused arbitrary data corruption.
229 *
230 * "aborting" requests, or force-detaching the disk, is intended for
231 * completely blocked/hung local backing devices which do no longer
232 * complete requests at all, not even do error completions. In this
233 * situation, usually a hard-reset and failover is the only way out.
234 *
235 * By "aborting", basically faking a local error-completion,
236 * we allow for a more graceful swichover by cleanly migrating services.
237 * Still the affected node has to be rebooted "soon".
238 *
239 * By completing these requests, we allow the upper layers to re-use
240 * the associated data pages.
241 *
242 * If later the local backing device "recovers", and now DMAs some data
243 * from disk into the original request pages, in the best case it will
244 * just put random data into unused pages; but typically it will corrupt
245 * meanwhile completely unrelated data, causing all sorts of damage.
246 *
247 * Which means delayed successful completion,
248 * especially for READ requests,
249 * is a reason to panic().
250 *
251 * We assume that a delayed *error* completion is OK,
252 * though we still will complain noisily about it.
253 */
254 if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
255 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200256 drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200257
258 if (!error)
259 panic("possible random memory corruption caused by delayed completion of aborted local request\n");
260 }
261
Philipp Reisnerb411b362009-09-25 16:07:19 -0700262 /* to avoid recursion in __req_mod */
263 if (unlikely(error)) {
264 what = (bio_data_dir(bio) == WRITE)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100265 ? WRITE_COMPLETED_WITH_ERROR
Lars Ellenberg5c3c7e62010-04-10 02:10:09 +0200266 : (bio_rw(bio) == READ)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100267 ? READ_COMPLETED_WITH_ERROR
268 : READ_AHEAD_COMPLETED_WITH_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700269 } else
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100270 what = COMPLETED_OK;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700271
272 bio_put(req->private_bio);
273 req->private_bio = ERR_PTR(error);
274
Lars Ellenberga1154132010-11-13 20:42:29 +0100275 /* not req_mod(), we need irqsave here! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200276 spin_lock_irqsave(&device->resource->req_lock, flags);
Lars Ellenberga1154132010-11-13 20:42:29 +0100277 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200278 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200279 put_ldev(device);
Lars Ellenberga1154132010-11-13 20:42:29 +0100280
281 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200282 complete_master_bio(device, &m);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283}
284
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200285void drbd_csum_ee(struct crypto_hash *tfm, struct drbd_peer_request *peer_req, void *digest)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200286{
287 struct hash_desc desc;
288 struct scatterlist sg;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100289 struct page *page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200290 struct page *tmp;
291 unsigned len;
292
293 desc.tfm = tfm;
294 desc.flags = 0;
295
296 sg_init_table(&sg, 1);
297 crypto_hash_init(&desc);
298
299 while ((tmp = page_chain_next(page))) {
300 /* all but the last page will be fully used */
301 sg_set_page(&sg, page, PAGE_SIZE, 0);
302 crypto_hash_update(&desc, &sg, sg.length);
303 page = tmp;
304 }
305 /* and now the last, possibly only partially used page */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100306 len = peer_req->i.size & (PAGE_SIZE - 1);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200307 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
308 crypto_hash_update(&desc, &sg, sg.length);
309 crypto_hash_final(&desc, digest);
310}
311
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200312void drbd_csum_bio(struct crypto_hash *tfm, struct bio *bio, void *digest)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700313{
314 struct hash_desc desc;
315 struct scatterlist sg;
Kent Overstreet79886132013-11-23 17:19:00 -0800316 struct bio_vec bvec;
317 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700318
319 desc.tfm = tfm;
320 desc.flags = 0;
321
322 sg_init_table(&sg, 1);
323 crypto_hash_init(&desc);
324
Kent Overstreet79886132013-11-23 17:19:00 -0800325 bio_for_each_segment(bvec, bio, iter) {
326 sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700327 crypto_hash_update(&desc, &sg, sg.length);
328 }
329 crypto_hash_final(&desc, digest);
330}
331
Lars Ellenberg9676c762011-02-22 14:02:31 +0100332/* MAYBE merge common code with w_e_end_ov_req */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100333static int w_e_send_csum(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700334{
Philipp Reisner00d56942011-02-09 18:09:48 +0100335 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200336 struct drbd_device *device = w->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700337 int digest_size;
338 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100339 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100341 if (unlikely(cancel))
342 goto out;
343
Lars Ellenberg9676c762011-02-22 14:02:31 +0100344 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100345 goto out;
346
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200347 digest_size = crypto_hash_digestsize(first_peer_device(device)->connection->csums_tfm);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100348 digest = kmalloc(digest_size, GFP_NOIO);
349 if (digest) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100350 sector_t sector = peer_req->i.sector;
351 unsigned int size = peer_req->i.size;
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200352 drbd_csum_ee(first_peer_device(device)->connection->csums_tfm, peer_req, digest);
Lars Ellenberg9676c762011-02-22 14:02:31 +0100353 /* Free peer_req and pages before send.
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100354 * In case we block on congestion, we could otherwise run into
355 * some distributed deadlock, if the other side blocks on
356 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200357 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200358 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100359 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200360 inc_rs_pending(device);
361 err = drbd_send_drequest_csum(device, sector, size,
Andreas Gruenbacherdb1b0b72011-03-16 01:37:21 +0100362 digest, digest_size,
363 P_CSUM_RS_REQUEST);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100364 kfree(digest);
365 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200366 drbd_err(device, "kmalloc() of digest failed.\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100367 err = -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700368 }
369
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100370out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100371 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200372 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700373
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100374 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200375 drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100376 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700377}
378
379#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
380
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200381static int read_for_csum(struct drbd_device *device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100383 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700384
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200385 if (!get_ldev(device))
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200386 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700387
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200388 if (drbd_rs_should_slow_down(device, sector))
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200389 goto defer;
390
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391 /* GFP_TRY, because if there is no memory available right now, this may
392 * be rescheduled for later. It is "only" background resync, after all. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200393 peer_req = drbd_alloc_peer_req(device, ID_SYNCER /* unused */, sector,
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200394 size, GFP_TRY);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100395 if (!peer_req)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200396 goto defer;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700397
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100398 peer_req->w.cb = w_e_send_csum;
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200399 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200400 list_add(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200401 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700402
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200403 atomic_add(size >> 9, &device->rs_sect_ev);
404 if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200405 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700406
Lars Ellenberg10f6d9922011-01-24 14:47:09 +0100407 /* If it failed because of ENOMEM, retry should help. If it failed
408 * because bio_add_page failed (probably broken lower level driver),
409 * retry may or may not help.
410 * If it does not, you may need to force disconnect. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200411 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100412 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200413 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +0200414
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200415 drbd_free_peer_req(device, peer_req);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200416defer:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200417 put_ldev(device);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200418 return -EAGAIN;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700419}
420
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100421int w_resync_timer(struct drbd_work *w, int cancel)
Philipp Reisner794abb72010-12-27 11:51:23 +0100422{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200423 struct drbd_device *device = w->device;
424 switch (device->state.conn) {
Philipp Reisner794abb72010-12-27 11:51:23 +0100425 case C_VERIFY_S:
Philipp Reisner00d56942011-02-09 18:09:48 +0100426 w_make_ov_request(w, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100427 break;
428 case C_SYNC_TARGET:
Philipp Reisner00d56942011-02-09 18:09:48 +0100429 w_make_resync_request(w, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100430 break;
431 }
432
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100433 return 0;
Philipp Reisner794abb72010-12-27 11:51:23 +0100434}
435
Philipp Reisnerb411b362009-09-25 16:07:19 -0700436void resync_timer_fn(unsigned long data)
437{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200438 struct drbd_device *device = (struct drbd_device *) data;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700439
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200440 if (list_empty(&device->resync_work.list))
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200441 drbd_queue_work(&first_peer_device(device)->connection->sender_work, &device->resync_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442}
443
Philipp Reisner778f2712010-07-06 11:14:00 +0200444static void fifo_set(struct fifo_buffer *fb, int value)
445{
446 int i;
447
448 for (i = 0; i < fb->size; i++)
Philipp Reisnerf10f2622010-10-05 16:50:17 +0200449 fb->values[i] = value;
Philipp Reisner778f2712010-07-06 11:14:00 +0200450}
451
452static int fifo_push(struct fifo_buffer *fb, int value)
453{
454 int ov;
455
456 ov = fb->values[fb->head_index];
457 fb->values[fb->head_index++] = value;
458
459 if (fb->head_index >= fb->size)
460 fb->head_index = 0;
461
462 return ov;
463}
464
465static void fifo_add_val(struct fifo_buffer *fb, int value)
466{
467 int i;
468
469 for (i = 0; i < fb->size; i++)
470 fb->values[i] += value;
471}
472
Philipp Reisner9958c852011-05-03 16:19:31 +0200473struct fifo_buffer *fifo_alloc(int fifo_size)
474{
475 struct fifo_buffer *fb;
476
Lars Ellenberg8747d302012-09-26 14:22:40 +0200477 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
Philipp Reisner9958c852011-05-03 16:19:31 +0200478 if (!fb)
479 return NULL;
480
481 fb->head_index = 0;
482 fb->size = fifo_size;
483 fb->total = 0;
484
485 return fb;
486}
487
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200488static int drbd_rs_controller(struct drbd_device *device)
Philipp Reisner778f2712010-07-06 11:14:00 +0200489{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200490 struct disk_conf *dc;
Philipp Reisner778f2712010-07-06 11:14:00 +0200491 unsigned int sect_in; /* Number of sectors that came in since the last turn */
492 unsigned int want; /* The number of sectors we want in the proxy */
493 int req_sect; /* Number of sectors to request in this turn */
494 int correction; /* Number of sectors more we need in the proxy*/
495 int cps; /* correction per invocation of drbd_rs_controller() */
496 int steps; /* Number of time steps to plan ahead */
497 int curr_corr;
498 int max_sect;
Philipp Reisner813472c2011-05-03 16:47:02 +0200499 struct fifo_buffer *plan;
Philipp Reisner778f2712010-07-06 11:14:00 +0200500
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200501 sect_in = atomic_xchg(&device->rs_sect_in, 0); /* Number of sectors that came in */
502 device->rs_in_flight -= sect_in;
Philipp Reisner778f2712010-07-06 11:14:00 +0200503
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200504 dc = rcu_dereference(device->ldev->disk_conf);
505 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner778f2712010-07-06 11:14:00 +0200506
Philipp Reisner813472c2011-05-03 16:47:02 +0200507 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
Philipp Reisner778f2712010-07-06 11:14:00 +0200508
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200509 if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200510 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200511 } else { /* normal path */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200512 want = dc->c_fill_target ? dc->c_fill_target :
513 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
Philipp Reisner778f2712010-07-06 11:14:00 +0200514 }
515
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200516 correction = want - device->rs_in_flight - plan->total;
Philipp Reisner778f2712010-07-06 11:14:00 +0200517
518 /* Plan ahead */
519 cps = correction / steps;
Philipp Reisner813472c2011-05-03 16:47:02 +0200520 fifo_add_val(plan, cps);
521 plan->total += cps * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200522
523 /* What we do in this step */
Philipp Reisner813472c2011-05-03 16:47:02 +0200524 curr_corr = fifo_push(plan, 0);
525 plan->total -= curr_corr;
Philipp Reisner778f2712010-07-06 11:14:00 +0200526
527 req_sect = sect_in + curr_corr;
528 if (req_sect < 0)
529 req_sect = 0;
530
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200531 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
Philipp Reisner778f2712010-07-06 11:14:00 +0200532 if (req_sect > max_sect)
533 req_sect = max_sect;
534
535 /*
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200536 drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200537 sect_in, device->rs_in_flight, want, correction,
538 steps, cps, device->rs_planed, curr_corr, req_sect);
Philipp Reisner778f2712010-07-06 11:14:00 +0200539 */
540
541 return req_sect;
542}
543
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200544static int drbd_rs_number_requests(struct drbd_device *device)
Lars Ellenberge65f4402010-11-05 10:04:07 +0100545{
546 int number;
Philipp Reisner813472c2011-05-03 16:47:02 +0200547
548 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200549 if (rcu_dereference(device->rs_plan_s)->size) {
550 number = drbd_rs_controller(device) >> (BM_BLOCK_SHIFT - 9);
551 device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100552 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200553 device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
554 number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
Lars Ellenberge65f4402010-11-05 10:04:07 +0100555 }
Philipp Reisner813472c2011-05-03 16:47:02 +0200556 rcu_read_unlock();
Lars Ellenberge65f4402010-11-05 10:04:07 +0100557
Lars Ellenberge65f4402010-11-05 10:04:07 +0100558 /* ignore the amount of pending requests, the resync controller should
559 * throttle down to incoming reply rate soon enough anyways. */
560 return number;
561}
562
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100563int w_make_resync_request(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700564{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200565 struct drbd_device *device = w->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700566 unsigned long bit;
567 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200568 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100569 int max_bio_size;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100570 int number, rollback_i, size;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700571 int align, queued, sndbuf;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200572 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700573
574 if (unlikely(cancel))
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100575 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700576
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200577 if (device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200578 /* empty resync? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200579 drbd_resync_finished(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100580 return 0;
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200581 }
582
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200583 if (!get_ldev(device)) {
584 /* Since we only need to access device->rsync a
585 get_ldev_if_state(device,D_FAILED) would be sufficient, but
Philipp Reisnerb411b362009-09-25 16:07:19 -0700586 to continue resync with a broken disk makes no sense at
587 all */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200588 drbd_err(device, "Disk broke down during resync!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100589 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700590 }
591
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200592 max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
593 number = drbd_rs_number_requests(device);
Lars Ellenberge65f4402010-11-05 10:04:07 +0100594 if (number == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200595 goto requeue;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700596
Philipp Reisnerb411b362009-09-25 16:07:19 -0700597 for (i = 0; i < number; i++) {
598 /* Stop generating RS requests, when half of the send buffer is filled */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200599 mutex_lock(&first_peer_device(device)->connection->data.mutex);
600 if (first_peer_device(device)->connection->data.socket) {
601 queued = first_peer_device(device)->connection->data.socket->sk->sk_wmem_queued;
602 sndbuf = first_peer_device(device)->connection->data.socket->sk->sk_sndbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700603 } else {
604 queued = 1;
605 sndbuf = 0;
606 }
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200607 mutex_unlock(&first_peer_device(device)->connection->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700608 if (queued > sndbuf / 2)
609 goto requeue;
610
611next_sector:
612 size = BM_BLOCK_SIZE;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200613 bit = drbd_bm_find_next(device, device->bm_resync_fo);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700614
Lars Ellenberg4b0715f2010-12-14 15:13:04 +0100615 if (bit == DRBD_END_OF_BITMAP) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200616 device->bm_resync_fo = drbd_bm_bits(device);
617 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100618 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700619 }
620
621 sector = BM_BIT_TO_SECT(bit);
622
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200623 if (drbd_rs_should_slow_down(device, sector) ||
624 drbd_try_rs_begin_io(device, sector)) {
625 device->bm_resync_fo = bit;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700626 goto requeue;
627 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200628 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700629
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200630 if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
631 drbd_rs_complete_io(device, sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700632 goto next_sector;
633 }
634
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100635#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
Philipp Reisnerb411b362009-09-25 16:07:19 -0700636 /* try to find some adjacent bits.
637 * we stop if we have already the maximum req size.
638 *
639 * Additionally always align bigger requests, in order to
640 * be prepared for all stripe sizes of software RAIDs.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700641 */
642 align = 1;
Philipp Reisnerd2074502010-07-22 15:27:27 +0200643 rollback_i = i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700644 for (;;) {
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100645 if (size + BM_BLOCK_SIZE > max_bio_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700646 break;
647
648 /* Be always aligned */
649 if (sector & ((1<<(align+3))-1))
650 break;
651
652 /* do not cross extent boundaries */
653 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
654 break;
655 /* now, is it actually dirty, after all?
656 * caution, drbd_bm_test_bit is tri-state for some
657 * obscure reason; ( b == 0 ) would get the out-of-band
658 * only accidentally right because of the "oddly sized"
659 * adjustment below */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200660 if (drbd_bm_test_bit(device, bit+1) != 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700661 break;
662 bit++;
663 size += BM_BLOCK_SIZE;
664 if ((BM_BLOCK_SIZE << align) <= size)
665 align++;
666 i++;
667 }
668 /* if we merged some,
669 * reset the offset to start the next drbd_bm_find_next from */
670 if (size > BM_BLOCK_SIZE)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200671 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700672#endif
673
674 /* adjust very last sectors, in case we are oddly sized */
675 if (sector + (size>>9) > capacity)
676 size = (capacity-sector)<<9;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200677 if (first_peer_device(device)->connection->agreed_pro_version >= 89 &&
678 first_peer_device(device)->connection->csums_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200679 switch (read_for_csum(device, sector, size)) {
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200680 case -EIO: /* Disk failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200681 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100682 return -EIO;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200683 case -EAGAIN: /* allocation failed, or ldev busy */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200684 drbd_rs_complete_io(device, sector);
685 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerd2074502010-07-22 15:27:27 +0200686 i = rollback_i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700687 goto requeue;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200688 case 0:
689 /* everything ok */
690 break;
691 default:
692 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700693 }
694 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100695 int err;
696
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200697 inc_rs_pending(device);
698 err = drbd_send_drequest(device, P_RS_DATA_REQUEST,
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100699 sector, size, ID_SYNCER);
700 if (err) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200701 drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200702 dec_rs_pending(device);
703 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100704 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700705 }
706 }
707 }
708
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200709 if (device->bm_resync_fo >= drbd_bm_bits(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710 /* last syncer _request_ was sent,
711 * but the P_RS_DATA_REPLY not yet received. sync will end (and
712 * next sync group will resume), as soon as we receive the last
713 * resync data block, and the last bit is cleared.
714 * until then resync "work" is "inactive" ...
715 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200716 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100717 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700718 }
719
720 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200721 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
722 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
723 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100724 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700725}
726
Philipp Reisner00d56942011-02-09 18:09:48 +0100727static int w_make_ov_request(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700728{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200729 struct drbd_device *device = w->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700730 int number, i, size;
731 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200732 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200733 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700734
735 if (unlikely(cancel))
736 return 1;
737
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200738 number = drbd_rs_number_requests(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700739
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200740 sector = device->ov_position;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700741 for (i = 0; i < number; i++) {
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200742 if (sector >= capacity)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700743 return 1;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200744
745 /* We check for "finished" only in the reply path:
746 * w_e_end_ov_reply().
747 * We need to send at least one request out. */
748 stop_sector_reached = i > 0
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200749 && verify_can_do_stop_sector(device)
750 && sector >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200751 if (stop_sector_reached)
752 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700753
754 size = BM_BLOCK_SIZE;
755
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200756 if (drbd_rs_should_slow_down(device, sector) ||
757 drbd_try_rs_begin_io(device, sector)) {
758 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700759 goto requeue;
760 }
761
762 if (sector + (size>>9) > capacity)
763 size = (capacity-sector)<<9;
764
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200765 inc_rs_pending(device);
766 if (drbd_send_ov_request(device, sector, size)) {
767 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700768 return 0;
769 }
770 sector += BM_SECT_PER_BIT;
771 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200772 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700773
774 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200775 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200776 if (i == 0 || !stop_sector_reached)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200777 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700778 return 1;
779}
780
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100781int w_ov_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700782{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200783 struct drbd_device *device = w->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700784 kfree(w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200785 ov_out_of_sync_print(device);
786 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700787
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100788 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700789}
790
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100791static int w_resync_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700792{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200793 struct drbd_device *device = w->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700794 kfree(w);
795
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200796 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700797
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100798 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700799}
800
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200801static void ping_peer(struct drbd_device *device)
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200802{
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200803 struct drbd_connection *connection = first_peer_device(device)->connection;
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100804
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200805 clear_bit(GOT_PING_ACK, &connection->flags);
806 request_ping(connection);
807 wait_event(connection->ping_wait,
808 test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200809}
810
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200811int drbd_resync_finished(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700812{
813 unsigned long db, dt, dbdt;
814 unsigned long n_oos;
815 union drbd_state os, ns;
816 struct drbd_work *w;
817 char *khelper_cmd = NULL;
Lars Ellenberg26525612010-11-05 09:56:33 +0100818 int verify_done = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700819
820 /* Remove all elements from the resync LRU. Since future actions
821 * might set bits in the (main) bitmap, then the entries in the
822 * resync LRU would be wrong. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200823 if (drbd_rs_del_all(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700824 /* In case this is not possible now, most probably because
825 * there are P_RS_DATA_REPLY Packets lingering on the worker's
826 * queue (or even the read operations for those packets
827 * is not finished by now). Retry in 100ms. */
828
Philipp Reisner20ee6392011-01-18 15:28:59 +0100829 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700830 w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
831 if (w) {
832 w->cb = w_resync_finished;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200833 w->device = device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200834 drbd_queue_work(&first_peer_device(device)->connection->sender_work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700835 return 1;
836 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200837 drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700838 }
839
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200840 dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700841 if (dt <= 0)
842 dt = 1;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200843
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200844 db = device->rs_total;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200845 /* adjust for verify start and stop sectors, respective reached position */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200846 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
847 db -= device->ov_left;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200848
Philipp Reisnerb411b362009-09-25 16:07:19 -0700849 dbdt = Bit2KB(db/dt);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200850 device->rs_paused /= HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700851
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200852 if (!get_ldev(device))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700853 goto out;
854
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200855 ping_peer(device);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200856
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200857 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200858 os = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859
Lars Ellenberg26525612010-11-05 09:56:33 +0100860 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
861
Philipp Reisnerb411b362009-09-25 16:07:19 -0700862 /* This protects us against multiple calls (that can happen in the presence
863 of application IO), and against connectivity loss just before we arrive here. */
864 if (os.conn <= C_CONNECTED)
865 goto out_unlock;
866
867 ns = os;
868 ns.conn = C_CONNECTED;
869
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200870 drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200871 verify_done ? "Online verify" : "Resync",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200872 dt + device->rs_paused, device->rs_paused, dbdt);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700873
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200874 n_oos = drbd_bm_total_weight(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700875
876 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
877 if (n_oos) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200878 drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700879 n_oos, Bit2KB(1));
880 khelper_cmd = "out-of-sync";
881 }
882 } else {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200883 D_ASSERT(device, (n_oos - device->rs_failed) == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700884
885 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
886 khelper_cmd = "after-resync-target";
887
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200888 if (first_peer_device(device)->connection->csums_tfm && device->rs_total) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200889 const unsigned long s = device->rs_same_csum;
890 const unsigned long t = device->rs_total;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700891 const int ratio =
892 (t == 0) ? 0 :
893 (t < 100000) ? ((s*100)/t) : (s/(t/100));
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200894 drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
Philipp Reisnerb411b362009-09-25 16:07:19 -0700895 "transferred %luK total %luK\n",
896 ratio,
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200897 Bit2KB(device->rs_same_csum),
898 Bit2KB(device->rs_total - device->rs_same_csum),
899 Bit2KB(device->rs_total));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700900 }
901 }
902
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200903 if (device->rs_failed) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200904 drbd_info(device, " %lu failed blocks\n", device->rs_failed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700905
906 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
907 ns.disk = D_INCONSISTENT;
908 ns.pdsk = D_UP_TO_DATE;
909 } else {
910 ns.disk = D_UP_TO_DATE;
911 ns.pdsk = D_INCONSISTENT;
912 }
913 } else {
914 ns.disk = D_UP_TO_DATE;
915 ns.pdsk = D_UP_TO_DATE;
916
917 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200918 if (device->p_uuid) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700919 int i;
920 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200921 _drbd_uuid_set(device, i, device->p_uuid[i]);
922 drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
923 _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700924 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200925 drbd_err(device, "device->p_uuid is NULL! BUG\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700926 }
927 }
928
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100929 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
930 /* for verify runs, we don't update uuids here,
931 * so there would be nothing to report. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200932 drbd_uuid_set_bm(device, 0UL);
933 drbd_print_uuids(device, "updated UUIDs");
934 if (device->p_uuid) {
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100935 /* Now the two UUID sets are equal, update what we
936 * know of the peer. */
937 int i;
938 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200939 device->p_uuid[i] = device->ldev->md.uuid[i];
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100940 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700941 }
942 }
943
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200944 _drbd_set_state(device, ns, CS_VERBOSE, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700945out_unlock:
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200946 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200947 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700948out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200949 device->rs_total = 0;
950 device->rs_failed = 0;
951 device->rs_paused = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200952
953 /* reset start sector, if we reached end of device */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200954 if (verify_done && device->ov_left == 0)
955 device->ov_start_sector = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700956
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200957 drbd_md_sync(device);
Lars Ellenberg13d42682010-10-13 17:37:54 +0200958
Philipp Reisnerb411b362009-09-25 16:07:19 -0700959 if (khelper_cmd)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200960 drbd_khelper(device, khelper_cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700961
962 return 1;
963}
964
965/* helper */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200966static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700967{
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200968 if (drbd_peer_req_has_active_page(peer_req)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700969 /* This might happen if sendpage() has not finished */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100970 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200971 atomic_add(i, &device->pp_in_use_by_net);
972 atomic_sub(i, &device->pp_in_use);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200973 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200974 list_add_tail(&peer_req->w.list, &device->net_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200975 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg435f0742010-09-06 12:30:25 +0200976 wake_up(&drbd_pp_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700977 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200978 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700979}
980
981/**
982 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200983 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700984 * @w: work object.
985 * @cancel: The connection will be closed anyways
986 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100987int w_e_end_data_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700988{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100989 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200990 struct drbd_device *device = w->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100991 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700992
993 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200994 drbd_free_peer_req(device, peer_req);
995 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100996 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700997 }
998
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100999 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001000 err = drbd_send_block(device, P_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001001 } else {
1002 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001003 drbd_err(device, "Sending NegDReply. sector=%llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001004 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001005
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001006 err = drbd_send_ack(device, P_NEG_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001007 }
1008
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001009 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001010
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001011 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001012
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001013 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001014 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001015 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001016}
1017
1018/**
Andreas Gruenbachera209b4a2011-08-17 12:43:25 +02001019 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001020 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001021 * @w: work object.
1022 * @cancel: The connection will be closed anyways
1023 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001024int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001025{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001026 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001027 struct drbd_device *device = w->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001028 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001029
1030 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001031 drbd_free_peer_req(device, peer_req);
1032 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001033 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001034 }
1035
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001036 if (get_ldev_if_state(device, D_FAILED)) {
1037 drbd_rs_complete_io(device, peer_req->i.sector);
1038 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001039 }
1040
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001041 if (device->state.conn == C_AHEAD) {
1042 err = drbd_send_ack(device, P_RS_CANCEL, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001043 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001044 if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1045 inc_rs_pending(device);
1046 err = drbd_send_block(device, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001047 } else {
1048 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001049 drbd_err(device, "Not sending RSDataReply, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07001050 "partner DISKLESS!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001051 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001052 }
1053 } else {
1054 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001055 drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001056 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001057
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001058 err = drbd_send_ack(device, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001059
1060 /* update resync data with failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001061 drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001062 }
1063
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001064 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001065
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001066 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001067
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001068 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001069 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001070 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001071}
1072
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001073int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001074{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001075 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001076 struct drbd_device *device = w->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001077 struct digest_info *di;
1078 int digest_size;
1079 void *digest = NULL;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001080 int err, eq = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001081
1082 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001083 drbd_free_peer_req(device, peer_req);
1084 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001085 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001086 }
1087
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001088 if (get_ldev(device)) {
1089 drbd_rs_complete_io(device, peer_req->i.sector);
1090 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001091 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001092
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001093 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001094
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001095 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001096 /* quick hack to try to avoid a race against reconfiguration.
1097 * a real fix would be much more involved,
1098 * introducing more locking mechanisms */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001099 if (first_peer_device(device)->connection->csums_tfm) {
1100 digest_size = crypto_hash_digestsize(first_peer_device(device)->connection->csums_tfm);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001101 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001102 digest = kmalloc(digest_size, GFP_NOIO);
1103 }
1104 if (digest) {
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +02001105 drbd_csum_ee(first_peer_device(device)->connection->csums_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001106 eq = !memcmp(digest, di->digest, digest_size);
1107 kfree(digest);
1108 }
1109
1110 if (eq) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001111 drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
Lars Ellenberg676396d2010-03-03 02:08:22 +01001112 /* rs_same_csums unit is BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001113 device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
1114 err = drbd_send_ack(device, P_RS_IS_IN_SYNC, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001115 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001116 inc_rs_pending(device);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001117 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1118 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
Philipp Reisner204bba92010-08-23 16:17:13 +02001119 kfree(di);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001120 err = drbd_send_block(device, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001121 }
1122 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001123 err = drbd_send_ack(device, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001124 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001125 drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001126 }
1127
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001128 dec_unacked(device);
1129 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001130
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001131 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001132 drbd_err(device, "drbd_send_block/ack() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001133 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001134}
1135
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001136int w_e_end_ov_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001137{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001138 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001139 struct drbd_device *device = w->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001140 sector_t sector = peer_req->i.sector;
1141 unsigned int size = peer_req->i.size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001142 int digest_size;
1143 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001144 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001145
1146 if (unlikely(cancel))
1147 goto out;
1148
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001149 digest_size = crypto_hash_digestsize(first_peer_device(device)->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001150 digest = kmalloc(digest_size, GFP_NOIO);
Philipp Reisner8f214202011-03-01 15:52:35 +01001151 if (!digest) {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001152 err = 1; /* terminate the connection in case the allocation failed */
Philipp Reisner8f214202011-03-01 15:52:35 +01001153 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001154 }
1155
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001156 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +02001157 drbd_csum_ee(first_peer_device(device)->connection->verify_tfm, peer_req, digest);
Philipp Reisner8f214202011-03-01 15:52:35 +01001158 else
1159 memset(digest, 0, digest_size);
1160
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001161 /* Free e and pages before send.
1162 * In case we block on congestion, we could otherwise run into
1163 * some distributed deadlock, if the other side blocks on
1164 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001165 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001166 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001167 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001168 inc_rs_pending(device);
1169 err = drbd_send_drequest_csum(device, sector, size, digest, digest_size, P_OV_REPLY);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001170 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001171 dec_rs_pending(device);
Philipp Reisner8f214202011-03-01 15:52:35 +01001172 kfree(digest);
1173
Philipp Reisnerb411b362009-09-25 16:07:19 -07001174out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001175 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001176 drbd_free_peer_req(device, peer_req);
1177 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001178 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001179}
1180
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001181void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001182{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001183 if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1184 device->ov_last_oos_size += size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001185 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001186 device->ov_last_oos_start = sector;
1187 device->ov_last_oos_size = size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001188 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001189 drbd_set_out_of_sync(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001190}
1191
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001192int w_e_end_ov_reply(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001193{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001194 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001195 struct drbd_device *device = w->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001196 struct digest_info *di;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001197 void *digest;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001198 sector_t sector = peer_req->i.sector;
1199 unsigned int size = peer_req->i.size;
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001200 int digest_size;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001201 int err, eq = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001202 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001203
1204 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001205 drbd_free_peer_req(device, peer_req);
1206 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001207 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001208 }
1209
1210 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1211 * the resync lru has been cleaned up already */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001212 if (get_ldev(device)) {
1213 drbd_rs_complete_io(device, peer_req->i.sector);
1214 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001215 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001216
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001217 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001218
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001219 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001220 digest_size = crypto_hash_digestsize(first_peer_device(device)->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001221 digest = kmalloc(digest_size, GFP_NOIO);
1222 if (digest) {
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +02001223 drbd_csum_ee(first_peer_device(device)->connection->verify_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001224
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001225 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001226 eq = !memcmp(digest, di->digest, digest_size);
1227 kfree(digest);
1228 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001229 }
1230
Lars Ellenberg9676c762011-02-22 14:02:31 +01001231 /* Free peer_req and pages before send.
1232 * In case we block on congestion, we could otherwise run into
1233 * some distributed deadlock, if the other side blocks on
1234 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001235 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001236 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001237 if (!eq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001238 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001239 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001240 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001241
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001242 err = drbd_send_ack_ex(device, P_OV_RESULT, sector, size,
Andreas Gruenbacherfa79abd2011-03-16 01:31:39 +01001243 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001244
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001245 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001246
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001247 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001248
1249 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001250 if ((device->ov_left & 0x200) == 0x200)
1251 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001252
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001253 stop_sector_reached = verify_can_do_stop_sector(device) &&
1254 (sector + (size>>9)) >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001255
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001256 if (device->ov_left == 0 || stop_sector_reached) {
1257 ov_out_of_sync_print(device);
1258 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001259 }
1260
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001261 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001262}
1263
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001264int w_prev_work_done(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001265{
1266 struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001267
Philipp Reisnerb411b362009-09-25 16:07:19 -07001268 complete(&b->done);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001269 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001270}
1271
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001272/* FIXME
1273 * We need to track the number of pending barrier acks,
1274 * and to be able to wait for them.
1275 * See also comment in drbd_adm_attach before drbd_suspend_io.
1276 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001277static int drbd_send_barrier(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001278{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001279 struct p_barrier *p;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001280 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001281
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001282 sock = &connection->data;
1283 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001284 if (!p)
1285 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001286 p->barrier = connection->send.current_epoch_nr;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001287 p->pad = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001288 connection->send.current_epoch_writes = 0;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001289
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001290 return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001291}
1292
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001293int w_send_write_hint(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001294{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001295 struct drbd_device *device = w->device;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001296 struct drbd_socket *sock;
1297
Philipp Reisnerb411b362009-09-25 16:07:19 -07001298 if (cancel)
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001299 return 0;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001300 sock = &first_peer_device(device)->connection->data;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001301 if (!drbd_prepare_command(device, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001302 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001303 return drbd_send_command(device, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001304}
1305
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001306static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001307{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001308 if (!connection->send.seen_any_write_yet) {
1309 connection->send.seen_any_write_yet = true;
1310 connection->send.current_epoch_nr = epoch;
1311 connection->send.current_epoch_writes = 0;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001312 }
1313}
1314
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001315static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001316{
1317 /* re-init if first write on this connection */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001318 if (!connection->send.seen_any_write_yet)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001319 return;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001320 if (connection->send.current_epoch_nr != epoch) {
1321 if (connection->send.current_epoch_writes)
1322 drbd_send_barrier(connection);
1323 connection->send.current_epoch_nr = epoch;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001324 }
1325}
1326
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01001327int w_send_out_of_sync(struct drbd_work *w, int cancel)
Philipp Reisner73a01a12010-10-27 14:33:00 +02001328{
1329 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001330 struct drbd_device *device = w->device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001331 struct drbd_connection *connection = first_peer_device(device)->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001332 int err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001333
1334 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001335 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001336 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001337 }
1338
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001339 /* this time, no connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001340 * If it was sent, it was the closing barrier for the last
1341 * replicated epoch, before we went into AHEAD mode.
1342 * No more barriers will be sent, until we leave AHEAD mode again. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001343 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001344
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001345 err = drbd_send_out_of_sync(device, req);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001346 req_mod(req, OOS_HANDED_TO_NETWORK);
Philipp Reisner73a01a12010-10-27 14:33:00 +02001347
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001348 return err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001349}
1350
Philipp Reisnerb411b362009-09-25 16:07:19 -07001351/**
1352 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001353 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001354 * @w: work object.
1355 * @cancel: The connection will be closed anyways
1356 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001357int w_send_dblock(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001358{
1359 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001360 struct drbd_device *device = w->device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001361 struct drbd_connection *connection = first_peer_device(device)->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001362 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001363
1364 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001365 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001366 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001367 }
1368
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001369 re_init_if_first_write(connection, req->epoch);
1370 maybe_send_barrier(connection, req->epoch);
1371 connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001372
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001373 err = drbd_send_dblock(device, req);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001374 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001375
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001376 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001377}
1378
1379/**
1380 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001381 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001382 * @w: work object.
1383 * @cancel: The connection will be closed anyways
1384 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001385int w_send_read_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001386{
1387 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001388 struct drbd_device *device = w->device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001389 struct drbd_connection *connection = first_peer_device(device)->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001390 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001391
1392 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001393 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001394 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001395 }
1396
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001397 /* Even read requests may close a write epoch,
1398 * if there was any yet. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001399 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001400
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001401 err = drbd_send_drequest(device, P_DATA_REQUEST, req->i.sector, req->i.size,
Andreas Gruenbacher6c1005e2011-03-16 01:34:24 +01001402 (unsigned long)req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001403
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001404 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001405
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001406 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001407}
1408
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001409int w_restart_disk_io(struct drbd_work *w, int cancel)
Philipp Reisner265be2d2010-05-31 10:14:17 +02001410{
1411 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001412 struct drbd_device *device = w->device;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001413
Philipp Reisner07782862010-08-31 12:00:50 +02001414 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001415 drbd_al_begin_io(device, &req->i, false);
Philipp Reisner265be2d2010-05-31 10:14:17 +02001416
1417 drbd_req_make_private_bio(req, req->master_bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001418 req->private_bio->bi_bdev = device->ldev->backing_bdev;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001419 generic_make_request(req->private_bio);
1420
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001421 return 0;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001422}
1423
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001424static int _drbd_may_sync_now(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001425{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001426 struct drbd_device *odev = device;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001427 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001428
1429 while (1) {
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001430 if (!odev->ldev || odev->state.disk == D_DISKLESS)
Philipp Reisner438c8372011-03-28 14:48:01 +02001431 return 1;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001432 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001433 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001434 rcu_read_unlock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001435 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001436 return 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001437 odev = minor_to_device(resync_after);
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001438 if (!odev)
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001439 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001440 if ((odev->state.conn >= C_SYNC_SOURCE &&
1441 odev->state.conn <= C_PAUSED_SYNC_T) ||
1442 odev->state.aftr_isp || odev->state.peer_isp ||
1443 odev->state.user_isp)
1444 return 0;
1445 }
1446}
1447
1448/**
1449 * _drbd_pause_after() - Pause resync on all devices that may not resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001450 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001451 *
1452 * Called from process context only (admin command and after_state_ch).
1453 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001454static int _drbd_pause_after(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001455{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001456 struct drbd_device *odev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001457 int i, rv = 0;
1458
Philipp Reisner695d08f2011-04-11 22:53:32 -07001459 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001460 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001461 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1462 continue;
1463 if (!_drbd_may_sync_now(odev))
1464 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1465 != SS_NOTHING_TO_DO);
1466 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001467 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001468
1469 return rv;
1470}
1471
1472/**
1473 * _drbd_resume_next() - Resume resync on all devices that may resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001474 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001475 *
1476 * Called from process context only (admin command and worker).
1477 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001478static int _drbd_resume_next(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001479{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001480 struct drbd_device *odev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001481 int i, rv = 0;
1482
Philipp Reisner695d08f2011-04-11 22:53:32 -07001483 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001484 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001485 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1486 continue;
1487 if (odev->state.aftr_isp) {
1488 if (_drbd_may_sync_now(odev))
1489 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1490 CS_HARD, NULL)
1491 != SS_NOTHING_TO_DO) ;
1492 }
1493 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001494 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001495 return rv;
1496}
1497
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001498void resume_next_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001499{
1500 write_lock_irq(&global_state_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001501 _drbd_resume_next(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001502 write_unlock_irq(&global_state_lock);
1503}
1504
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001505void suspend_other_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001506{
1507 write_lock_irq(&global_state_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001508 _drbd_pause_after(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001509 write_unlock_irq(&global_state_lock);
1510}
1511
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001512/* caller must hold global_state_lock */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001513enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001514{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001515 struct drbd_device *odev;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001516 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001517
1518 if (o_minor == -1)
1519 return NO_ERROR;
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001520 if (o_minor < -1 || o_minor > MINORMASK)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001521 return ERR_RESYNC_AFTER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001522
1523 /* check for loops */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001524 odev = minor_to_device(o_minor);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001525 while (1) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001526 if (odev == device)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001527 return ERR_RESYNC_AFTER_CYCLE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001528
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001529 /* You are free to depend on diskless, non-existing,
1530 * or not yet/no longer existing minors.
1531 * We only reject dependency loops.
1532 * We cannot follow the dependency chain beyond a detached or
1533 * missing minor.
1534 */
1535 if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1536 return NO_ERROR;
1537
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001538 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001539 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001540 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001541 /* dependency chain ends here, no cycles. */
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001542 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001543 return NO_ERROR;
1544
1545 /* follow the dependency chain */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001546 odev = minor_to_device(resync_after);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001547 }
1548}
1549
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001550/* caller must hold global_state_lock */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001551void drbd_resync_after_changed(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001552{
1553 int changes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001554
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001555 do {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001556 changes = _drbd_pause_after(device);
1557 changes |= _drbd_resume_next(device);
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001558 } while (changes);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001559}
1560
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001561void drbd_rs_controller_reset(struct drbd_device *device)
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001562{
Philipp Reisner813472c2011-05-03 16:47:02 +02001563 struct fifo_buffer *plan;
1564
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001565 atomic_set(&device->rs_sect_in, 0);
1566 atomic_set(&device->rs_sect_ev, 0);
1567 device->rs_in_flight = 0;
Philipp Reisner813472c2011-05-03 16:47:02 +02001568
1569 /* Updating the RCU protected object in place is necessary since
1570 this function gets called from atomic context.
1571 It is valid since all other updates also lead to an completely
1572 empty fifo */
1573 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001574 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner813472c2011-05-03 16:47:02 +02001575 plan->total = 0;
1576 fifo_set(plan, 0);
1577 rcu_read_unlock();
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001578}
1579
Philipp Reisner1f04af32011-02-07 11:33:59 +01001580void start_resync_timer_fn(unsigned long data)
1581{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001582 struct drbd_device *device = (struct drbd_device *) data;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001583
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001584 drbd_queue_work(&first_peer_device(device)->connection->sender_work, &device->start_resync_work);
Philipp Reisner1f04af32011-02-07 11:33:59 +01001585}
1586
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001587int w_start_resync(struct drbd_work *w, int cancel)
Philipp Reisner1f04af32011-02-07 11:33:59 +01001588{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001589 struct drbd_device *device = w->device;
Philipp Reisner00d56942011-02-09 18:09:48 +01001590
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001591 if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001592 drbd_warn(device, "w_start_resync later...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001593 device->start_resync_timer.expires = jiffies + HZ/10;
1594 add_timer(&device->start_resync_timer);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001595 return 0;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001596 }
1597
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001598 drbd_start_resync(device, C_SYNC_SOURCE);
1599 clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001600 return 0;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001601}
1602
Philipp Reisnerb411b362009-09-25 16:07:19 -07001603/**
1604 * drbd_start_resync() - Start the resync process
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001605 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001606 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1607 *
1608 * This function might bring you directly into one of the
1609 * C_PAUSED_SYNC_* states.
1610 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001611void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001612{
1613 union drbd_state ns;
1614 int r;
1615
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001616 if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001617 drbd_err(device, "Resync already running!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001618 return;
1619 }
1620
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001621 if (!test_bit(B_RS_H_DONE, &device->flags)) {
Philipp Reisnere64a3292011-02-05 17:34:11 +01001622 if (side == C_SYNC_TARGET) {
1623 /* Since application IO was locked out during C_WF_BITMAP_T and
1624 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1625 we check that we might make the data inconsistent. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001626 r = drbd_khelper(device, "before-resync-target");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001627 r = (r >> 8) & 0xff;
1628 if (r > 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001629 drbd_info(device, "before-resync-target handler returned %d, "
Philipp Reisner09b9e792010-12-03 16:04:24 +01001630 "dropping connection.\n", r);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001631 conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisner09b9e792010-12-03 16:04:24 +01001632 return;
1633 }
Philipp Reisnere64a3292011-02-05 17:34:11 +01001634 } else /* C_SYNC_SOURCE */ {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001635 r = drbd_khelper(device, "before-resync-source");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001636 r = (r >> 8) & 0xff;
1637 if (r > 0) {
1638 if (r == 3) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001639 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001640 "ignoring. Old userland tools?", r);
1641 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001642 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001643 "dropping connection.\n", r);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001644 conn_request_state(first_peer_device(device)->connection,
1645 NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001646 return;
1647 }
1648 }
Philipp Reisner09b9e792010-12-03 16:04:24 +01001649 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001650 }
1651
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001652 if (current == first_peer_device(device)->connection->worker.task) {
Philipp Reisnerdad20552011-02-11 19:43:55 +01001653 /* The worker should not sleep waiting for state_mutex,
Philipp Reisnere64a3292011-02-05 17:34:11 +01001654 that can take long */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001655 if (!mutex_trylock(device->state_mutex)) {
1656 set_bit(B_RS_H_DONE, &device->flags);
1657 device->start_resync_timer.expires = jiffies + HZ/5;
1658 add_timer(&device->start_resync_timer);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001659 return;
1660 }
1661 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001662 mutex_lock(device->state_mutex);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001663 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001664 clear_bit(B_RS_H_DONE, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001665
Philipp Reisner0cfac5d2011-11-10 12:12:52 +01001666 write_lock_irq(&global_state_lock);
Philipp Reisnera7004712013-03-27 14:08:35 +01001667 /* Did some connection breakage or IO error race with us? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001668 if (device->state.conn < C_CONNECTED
1669 || !get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisner0cfac5d2011-11-10 12:12:52 +01001670 write_unlock_irq(&global_state_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001671 mutex_unlock(device->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001672 return;
1673 }
1674
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001675 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001676
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001677 ns.aftr_isp = !_drbd_may_sync_now(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001678
1679 ns.conn = side;
1680
1681 if (side == C_SYNC_TARGET)
1682 ns.disk = D_INCONSISTENT;
1683 else /* side == C_SYNC_SOURCE */
1684 ns.pdsk = D_INCONSISTENT;
1685
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001686 r = __drbd_set_state(device, ns, CS_VERBOSE, NULL);
1687 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001688
1689 if (ns.conn < C_CONNECTED)
1690 r = SS_UNKNOWN_ERROR;
1691
1692 if (r == SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001693 unsigned long tw = drbd_bm_total_weight(device);
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001694 unsigned long now = jiffies;
1695 int i;
1696
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001697 device->rs_failed = 0;
1698 device->rs_paused = 0;
1699 device->rs_same_csum = 0;
1700 device->rs_last_events = 0;
1701 device->rs_last_sect_ev = 0;
1702 device->rs_total = tw;
1703 device->rs_start = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001704 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001705 device->rs_mark_left[i] = tw;
1706 device->rs_mark_time[i] = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001707 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001708 _drbd_pause_after(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001709 }
1710 write_unlock_irq(&global_state_lock);
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001711
Philipp Reisnerb411b362009-09-25 16:07:19 -07001712 if (r == SS_SUCCESS) {
Philipp Reisner328e0f12012-10-19 14:37:47 +02001713 /* reset rs_last_bcast when a resync or verify is started,
1714 * to deal with potential jiffies wrap. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001715 device->rs_last_bcast = jiffies - HZ;
Philipp Reisner328e0f12012-10-19 14:37:47 +02001716
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001717 drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001718 drbd_conn_str(ns.conn),
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001719 (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1720 (unsigned long) device->rs_total);
Lars Ellenberg6c922ed2011-01-12 11:51:13 +01001721 if (side == C_SYNC_TARGET)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001722 device->bm_resync_fo = 0;
Lars Ellenberg6c922ed2011-01-12 11:51:13 +01001723
1724 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1725 * with w_send_oos, or the sync target will get confused as to
1726 * how much bits to resync. We cannot do that always, because for an
1727 * empty resync and protocol < 95, we need to do it here, as we call
1728 * drbd_resync_finished from here in that case.
1729 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1730 * and from after_state_ch otherwise. */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001731 if (side == C_SYNC_SOURCE &&
1732 first_peer_device(device)->connection->agreed_pro_version < 96)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001733 drbd_gen_and_send_sync_uuid(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001734
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001735 if (first_peer_device(device)->connection->agreed_pro_version < 95 &&
1736 device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +02001737 /* This still has a race (about when exactly the peers
1738 * detect connection loss) that can lead to a full sync
1739 * on next handshake. In 8.3.9 we fixed this with explicit
1740 * resync-finished notifications, but the fix
1741 * introduces a protocol change. Sleeping for some
1742 * time longer than the ping interval + timeout on the
1743 * SyncSource, to give the SyncTarget the chance to
1744 * detect connection loss, then waiting for a ping
1745 * response (implicit in drbd_resync_finished) reduces
1746 * the race considerably, but does not solve it. */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001747 if (side == C_SYNC_SOURCE) {
1748 struct net_conf *nc;
1749 int timeo;
1750
1751 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001752 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02001753 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1754 rcu_read_unlock();
1755 schedule_timeout_interruptible(timeo);
1756 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001757 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001758 }
1759
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001760 drbd_rs_controller_reset(device);
1761 /* ns.conn may already be != device->state.conn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001762 * we may have been paused in between, or become paused until
1763 * the timer triggers.
1764 * No matter, that is handled in resync_timer_fn() */
1765 if (ns.conn == C_SYNC_TARGET)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001766 mod_timer(&device->resync_timer, jiffies);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001767
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001768 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001769 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001770 put_ldev(device);
1771 mutex_unlock(device->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001772}
1773
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001774/* If the resource already closed the current epoch, but we did not
1775 * (because we have not yet seen new requests), we should send the
1776 * corresponding barrier now. Must be checked within the same spinlock
1777 * that is used to check for new requests. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001778static bool need_to_send_barrier(struct drbd_connection *connection)
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001779{
1780 if (!connection->send.seen_any_write_yet)
1781 return false;
1782
1783 /* Skip barriers that do not contain any writes.
1784 * This may happen during AHEAD mode. */
1785 if (!connection->send.current_epoch_writes)
1786 return false;
1787
1788 /* ->req_lock is held when requests are queued on
1789 * connection->sender_work, and put into ->transfer_log.
1790 * It is also held when ->current_tle_nr is increased.
1791 * So either there are already new requests queued,
1792 * and corresponding barriers will be send there.
1793 * Or nothing new is queued yet, so the difference will be 1.
1794 */
1795 if (atomic_read(&connection->current_tle_nr) !=
1796 connection->send.current_epoch_nr + 1)
1797 return false;
1798
1799 return true;
1800}
1801
Rashika Kheriaa186e472013-12-19 15:06:10 +05301802static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001803{
1804 spin_lock_irq(&queue->q_lock);
1805 list_splice_init(&queue->q, work_list);
1806 spin_unlock_irq(&queue->q_lock);
1807 return !list_empty(work_list);
1808}
1809
Rashika Kheriaa186e472013-12-19 15:06:10 +05301810static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001811{
1812 spin_lock_irq(&queue->q_lock);
1813 if (!list_empty(&queue->q))
1814 list_move(queue->q.next, work_list);
1815 spin_unlock_irq(&queue->q_lock);
1816 return !list_empty(work_list);
1817}
1818
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001819static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001820{
1821 DEFINE_WAIT(wait);
1822 struct net_conf *nc;
1823 int uncork, cork;
1824
1825 dequeue_work_item(&connection->sender_work, work_list);
1826 if (!list_empty(work_list))
1827 return;
1828
1829 /* Still nothing to do?
1830 * Maybe we still need to close the current epoch,
1831 * even if no new requests are queued yet.
1832 *
1833 * Also, poke TCP, just in case.
1834 * Then wait for new work (or signal). */
1835 rcu_read_lock();
1836 nc = rcu_dereference(connection->net_conf);
1837 uncork = nc ? nc->tcp_cork : 0;
1838 rcu_read_unlock();
1839 if (uncork) {
1840 mutex_lock(&connection->data.mutex);
1841 if (connection->data.socket)
1842 drbd_tcp_uncork(connection->data.socket);
1843 mutex_unlock(&connection->data.mutex);
1844 }
1845
1846 for (;;) {
1847 int send_barrier;
1848 prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001849 spin_lock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001850 spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
Lars Ellenbergbc317a92012-08-22 11:47:14 +02001851 /* dequeue single item only,
1852 * we still use drbd_queue_work_front() in some places */
1853 if (!list_empty(&connection->sender_work.q))
1854 list_move(connection->sender_work.q.next, work_list);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001855 spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
1856 if (!list_empty(work_list) || signal_pending(current)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001857 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001858 break;
1859 }
1860 send_barrier = need_to_send_barrier(connection);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001861 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001862 if (send_barrier) {
1863 drbd_send_barrier(connection);
1864 connection->send.current_epoch_nr++;
1865 }
1866 schedule();
1867 /* may be woken up for other things but new work, too,
1868 * e.g. if the current epoch got closed.
1869 * In which case we send the barrier above. */
1870 }
1871 finish_wait(&connection->sender_work.q_wait, &wait);
1872
1873 /* someone may have changed the config while we have been waiting above. */
1874 rcu_read_lock();
1875 nc = rcu_dereference(connection->net_conf);
1876 cork = nc ? nc->tcp_cork : 0;
1877 rcu_read_unlock();
1878 mutex_lock(&connection->data.mutex);
1879 if (connection->data.socket) {
1880 if (cork)
1881 drbd_tcp_cork(connection->data.socket);
1882 else if (!uncork)
1883 drbd_tcp_uncork(connection->data.socket);
1884 }
1885 mutex_unlock(&connection->data.mutex);
1886}
1887
Philipp Reisnerb411b362009-09-25 16:07:19 -07001888int drbd_worker(struct drbd_thread *thi)
1889{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001890 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001891 struct drbd_work *w = NULL;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001892 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001893 LIST_HEAD(work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001894 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001895
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01001896 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01001897 drbd_thread_current_set_cpu(thi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001898
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001899 /* as long as we use drbd_queue_work_front(),
1900 * we may only dequeue single work items here, not batches. */
1901 if (list_empty(&work_list))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001902 wait_for_work(connection, &work_list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001903
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001904 if (signal_pending(current)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001905 flush_signals(current);
Philipp Reisner19393e12011-02-09 10:09:07 +01001906 if (get_t_state(thi) == RUNNING) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001907 drbd_warn(connection, "Worker got an unexpected signal\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001908 continue;
Philipp Reisner19393e12011-02-09 10:09:07 +01001909 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001910 break;
1911 }
1912
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01001913 if (get_t_state(thi) != RUNNING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001914 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001915
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001916 while (!list_empty(&work_list)) {
1917 w = list_first_entry(&work_list, struct drbd_work, list);
1918 list_del_init(&w->list);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001919 if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001920 continue;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001921 if (connection->cstate >= C_WF_REPORT_PARAMS)
1922 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001923 }
1924 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001925
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001926 do {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001927 while (!list_empty(&work_list)) {
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001928 w = list_first_entry(&work_list, struct drbd_work, list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001929 list_del_init(&w->list);
Philipp Reisner00d56942011-02-09 18:09:48 +01001930 w->cb(w, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001931 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001932 dequeue_work_batch(&connection->sender_work, &work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001933 } while (!list_empty(&work_list));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001934
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001935 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001936 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1937 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001938 D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001939 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001940 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001941 drbd_device_cleanup(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001942 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001943 rcu_read_lock();
Philipp Reisner0e29d162011-02-18 14:23:11 +01001944 }
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001945 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001946
1947 return 0;
1948}