blob: 3c69e736cc9dbf4e52f83e577cef04348bf435b3 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070027#include <linux/drbd.h>
28#include <linux/sched.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070035#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020039#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070040#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070041
Philipp Reisner00d56942011-02-09 18:09:48 +010042static int w_make_ov_request(struct drbd_work *w, int cancel);
Philipp Reisnerb411b362009-09-25 16:07:19 -070043
44
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010045/* endio handlers:
46 * drbd_md_io_complete (defined here)
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +010047 * drbd_request_endio (defined here)
48 * drbd_peer_request_endio (defined here)
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010049 * bm_async_io_complete (defined in drbd_bitmap.c)
50 *
Philipp Reisnerb411b362009-09-25 16:07:19 -070051 * For all these callbacks, note the following:
52 * The callbacks will be called in irq context by the IDE drivers,
53 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
54 * Try to get the locking right :)
55 *
56 */
57
58
59/* About the global_state_lock
60 Each state transition on an device holds a read lock. In case we have
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +020061 to evaluate the resync after dependencies, we grab a write lock, because
Philipp Reisnerb411b362009-09-25 16:07:19 -070062 we need stable states on all devices for that. */
63rwlock_t global_state_lock;
64
65/* used for synchronous meta data and bitmap IO
66 * submitted by drbd_md_sync_page_io()
67 */
68void drbd_md_io_complete(struct bio *bio, int error)
69{
70 struct drbd_md_io *md_io;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020071 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
73 md_io = (struct drbd_md_io *)bio->bi_private;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020074 device = container_of(md_io, struct drbd_device, md_io);
Philipp Reisnercdfda632011-07-05 15:38:59 +020075
Philipp Reisnerb411b362009-09-25 16:07:19 -070076 md_io->error = error;
77
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010078 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
79 * to timeout on the lower level device, and eventually detach from it.
80 * If this io completion runs after that timeout expired, this
81 * drbd_md_put_buffer() may allow us to finally try and re-attach.
82 * During normal operation, this only puts that extra reference
83 * down to 1 again.
84 * Make sure we first drop the reference, and only then signal
85 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
86 * next drbd_md_sync_page_io(), that we trigger the
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020087 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010088 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020089 drbd_md_put_buffer(device);
Philipp Reisnercdfda632011-07-05 15:38:59 +020090 md_io->done = 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020091 wake_up(&device->misc_wait);
Philipp Reisnercdfda632011-07-05 15:38:59 +020092 bio_put(bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020093 if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
94 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -070095}
96
97/* reads on behalf of the partner,
98 * "submitted" by the receiver
99 */
Rashika Kheriaa186e472013-12-19 15:06:10 +0530100static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700101{
102 unsigned long flags = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200103 struct drbd_device *device = peer_req->w.device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700104
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200105 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200106 device->read_cnt += peer_req->i.size >> 9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100107 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200108 if (list_empty(&device->read_ee))
109 wake_up(&device->ee_wait);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100110 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200111 __drbd_chk_io_error(device, DRBD_READ_ERROR);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200112 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700113
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200114 drbd_queue_work(&first_peer_device(device)->connection->sender_work, &peer_req->w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200115 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700116}
117
118/* writes on behalf of the partner, or resync writes,
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200119 * "submitted" by the receiver, final stage. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100120static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700121{
122 unsigned long flags = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200123 struct drbd_device *device = peer_req->w.device;
Lars Ellenberg181286a2011-03-31 15:18:56 +0200124 struct drbd_interval i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700125 int do_wake;
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100126 u64 block_id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700127 int do_al_complete_io;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700128
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100129 /* after we moved peer_req to done_ee,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700130 * we may no longer access it,
131 * it may be freed/reused already!
132 * (as soon as we release the req_lock) */
Lars Ellenberg181286a2011-03-31 15:18:56 +0200133 i = peer_req->i;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100134 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
135 block_id = peer_req->block_id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700136
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200137 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200138 device->writ_cnt += peer_req->i.size >> 9;
139 list_move_tail(&peer_req->w.list, &device->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700140
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100141 /*
Andreas Gruenbacher5e472262011-01-27 14:42:51 +0100142 * Do not remove from the write_requests tree here: we did not send the
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100143 * Ack yet and did not wake possibly waiting conflicting requests.
144 * Removed from the tree from "drbd_process_done_ee" within the
145 * appropriate w.cb (e_end_block/e_end_resync_block) or from
146 * _drbd_clear_done_ee.
147 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700148
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200149 do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700150
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100151 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200152 __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200153 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700154
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100155 if (block_id == ID_SYNCER)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200156 drbd_rs_complete_io(device, i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157
158 if (do_wake)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200159 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700160
161 if (do_al_complete_io)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200162 drbd_al_complete_io(device, &i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700163
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200164 wake_asender(first_peer_device(device)->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200165 put_ldev(device);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700167
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200168/* writes on behalf of the partner, or resync writes,
169 * "submitted" by the receiver.
170 */
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +0100171void drbd_peer_request_endio(struct bio *bio, int error)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200172{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100173 struct drbd_peer_request *peer_req = bio->bi_private;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200174 struct drbd_device *device = peer_req->w.device;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200175 int uptodate = bio_flagged(bio, BIO_UPTODATE);
176 int is_write = bio_data_dir(bio) == WRITE;
177
Lars Ellenberg07194272010-12-20 15:38:07 +0100178 if (error && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200179 drbd_warn(device, "%s: error=%d s=%llus\n",
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200180 is_write ? "write" : "read", error,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100181 (unsigned long long)peer_req->i.sector);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200182 if (!error && !uptodate) {
Lars Ellenberg07194272010-12-20 15:38:07 +0100183 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200184 drbd_warn(device, "%s: setting error to -EIO s=%llus\n",
Lars Ellenberg07194272010-12-20 15:38:07 +0100185 is_write ? "write" : "read",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100186 (unsigned long long)peer_req->i.sector);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200187 /* strange behavior of some lower level drivers...
188 * fail the request by clearing the uptodate flag,
189 * but do not return any error?! */
190 error = -EIO;
191 }
192
193 if (error)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100194 set_bit(__EE_WAS_ERROR, &peer_req->flags);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200195
196 bio_put(bio); /* no need for the bio anymore */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100197 if (atomic_dec_and_test(&peer_req->pending_bios)) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200198 if (is_write)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100199 drbd_endio_write_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200200 else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100201 drbd_endio_read_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200202 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700203}
204
205/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
206 */
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +0100207void drbd_request_endio(struct bio *bio, int error)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700208{
Lars Ellenberga1154132010-11-13 20:42:29 +0100209 unsigned long flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700210 struct drbd_request *req = bio->bi_private;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200211 struct drbd_device *device = req->w.device;
Lars Ellenberga1154132010-11-13 20:42:29 +0100212 struct bio_and_error m;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213 enum drbd_req_event what;
214 int uptodate = bio_flagged(bio, BIO_UPTODATE);
215
Philipp Reisnerb411b362009-09-25 16:07:19 -0700216 if (!error && !uptodate) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200217 drbd_warn(device, "p %s: setting error to -EIO\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700218 bio_data_dir(bio) == WRITE ? "write" : "read");
219 /* strange behavior of some lower level drivers...
220 * fail the request by clearing the uptodate flag,
221 * but do not return any error?! */
222 error = -EIO;
223 }
224
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200225
226 /* If this request was aborted locally before,
227 * but now was completed "successfully",
228 * chances are that this caused arbitrary data corruption.
229 *
230 * "aborting" requests, or force-detaching the disk, is intended for
231 * completely blocked/hung local backing devices which do no longer
232 * complete requests at all, not even do error completions. In this
233 * situation, usually a hard-reset and failover is the only way out.
234 *
235 * By "aborting", basically faking a local error-completion,
236 * we allow for a more graceful swichover by cleanly migrating services.
237 * Still the affected node has to be rebooted "soon".
238 *
239 * By completing these requests, we allow the upper layers to re-use
240 * the associated data pages.
241 *
242 * If later the local backing device "recovers", and now DMAs some data
243 * from disk into the original request pages, in the best case it will
244 * just put random data into unused pages; but typically it will corrupt
245 * meanwhile completely unrelated data, causing all sorts of damage.
246 *
247 * Which means delayed successful completion,
248 * especially for READ requests,
249 * is a reason to panic().
250 *
251 * We assume that a delayed *error* completion is OK,
252 * though we still will complain noisily about it.
253 */
254 if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
255 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200256 drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200257
258 if (!error)
259 panic("possible random memory corruption caused by delayed completion of aborted local request\n");
260 }
261
Philipp Reisnerb411b362009-09-25 16:07:19 -0700262 /* to avoid recursion in __req_mod */
263 if (unlikely(error)) {
264 what = (bio_data_dir(bio) == WRITE)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100265 ? WRITE_COMPLETED_WITH_ERROR
Lars Ellenberg5c3c7e62010-04-10 02:10:09 +0200266 : (bio_rw(bio) == READ)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100267 ? READ_COMPLETED_WITH_ERROR
268 : READ_AHEAD_COMPLETED_WITH_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700269 } else
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100270 what = COMPLETED_OK;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700271
272 bio_put(req->private_bio);
273 req->private_bio = ERR_PTR(error);
274
Lars Ellenberga1154132010-11-13 20:42:29 +0100275 /* not req_mod(), we need irqsave here! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200276 spin_lock_irqsave(&device->resource->req_lock, flags);
Lars Ellenberga1154132010-11-13 20:42:29 +0100277 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200278 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200279 put_ldev(device);
Lars Ellenberga1154132010-11-13 20:42:29 +0100280
281 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200282 complete_master_bio(device, &m);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283}
284
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200285void drbd_csum_ee(struct crypto_hash *tfm, struct drbd_peer_request *peer_req, void *digest)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200286{
287 struct hash_desc desc;
288 struct scatterlist sg;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100289 struct page *page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200290 struct page *tmp;
291 unsigned len;
292
293 desc.tfm = tfm;
294 desc.flags = 0;
295
296 sg_init_table(&sg, 1);
297 crypto_hash_init(&desc);
298
299 while ((tmp = page_chain_next(page))) {
300 /* all but the last page will be fully used */
301 sg_set_page(&sg, page, PAGE_SIZE, 0);
302 crypto_hash_update(&desc, &sg, sg.length);
303 page = tmp;
304 }
305 /* and now the last, possibly only partially used page */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100306 len = peer_req->i.size & (PAGE_SIZE - 1);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200307 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
308 crypto_hash_update(&desc, &sg, sg.length);
309 crypto_hash_final(&desc, digest);
310}
311
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200312void drbd_csum_bio(struct crypto_hash *tfm, struct bio *bio, void *digest)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700313{
314 struct hash_desc desc;
315 struct scatterlist sg;
Kent Overstreet79886132013-11-23 17:19:00 -0800316 struct bio_vec bvec;
317 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700318
319 desc.tfm = tfm;
320 desc.flags = 0;
321
322 sg_init_table(&sg, 1);
323 crypto_hash_init(&desc);
324
Kent Overstreet79886132013-11-23 17:19:00 -0800325 bio_for_each_segment(bvec, bio, iter) {
326 sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700327 crypto_hash_update(&desc, &sg, sg.length);
328 }
329 crypto_hash_final(&desc, digest);
330}
331
Lars Ellenberg9676c762011-02-22 14:02:31 +0100332/* MAYBE merge common code with w_e_end_ov_req */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100333static int w_e_send_csum(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700334{
Philipp Reisner00d56942011-02-09 18:09:48 +0100335 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200336 struct drbd_device *device = w->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700337 int digest_size;
338 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100339 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100341 if (unlikely(cancel))
342 goto out;
343
Lars Ellenberg9676c762011-02-22 14:02:31 +0100344 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100345 goto out;
346
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200347 digest_size = crypto_hash_digestsize(first_peer_device(device)->connection->csums_tfm);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100348 digest = kmalloc(digest_size, GFP_NOIO);
349 if (digest) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100350 sector_t sector = peer_req->i.sector;
351 unsigned int size = peer_req->i.size;
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200352 drbd_csum_ee(first_peer_device(device)->connection->csums_tfm, peer_req, digest);
Lars Ellenberg9676c762011-02-22 14:02:31 +0100353 /* Free peer_req and pages before send.
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100354 * In case we block on congestion, we could otherwise run into
355 * some distributed deadlock, if the other side blocks on
356 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200357 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200358 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100359 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200360 inc_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200361 err = drbd_send_drequest_csum(first_peer_device(device), sector, size,
Andreas Gruenbacherdb1b0b72011-03-16 01:37:21 +0100362 digest, digest_size,
363 P_CSUM_RS_REQUEST);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100364 kfree(digest);
365 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200366 drbd_err(device, "kmalloc() of digest failed.\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100367 err = -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700368 }
369
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100370out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100371 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200372 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700373
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100374 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200375 drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100376 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700377}
378
379#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
380
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200381static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200383 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100384 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700385
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200386 if (!get_ldev(device))
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200387 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700388
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200389 if (drbd_rs_should_slow_down(device, sector))
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200390 goto defer;
391
Philipp Reisnerb411b362009-09-25 16:07:19 -0700392 /* GFP_TRY, because if there is no memory available right now, this may
393 * be rescheduled for later. It is "only" background resync, after all. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200394 peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200395 size, GFP_TRY);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100396 if (!peer_req)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200397 goto defer;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700398
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100399 peer_req->w.cb = w_e_send_csum;
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200400 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200401 list_add(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200402 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700403
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200404 atomic_add(size >> 9, &device->rs_sect_ev);
405 if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200406 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700407
Lars Ellenberg10f6d9922011-01-24 14:47:09 +0100408 /* If it failed because of ENOMEM, retry should help. If it failed
409 * because bio_add_page failed (probably broken lower level driver),
410 * retry may or may not help.
411 * If it does not, you may need to force disconnect. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200412 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100413 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200414 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +0200415
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200416 drbd_free_peer_req(device, peer_req);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200417defer:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200418 put_ldev(device);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200419 return -EAGAIN;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700420}
421
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100422int w_resync_timer(struct drbd_work *w, int cancel)
Philipp Reisner794abb72010-12-27 11:51:23 +0100423{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200424 struct drbd_device *device = w->device;
425 switch (device->state.conn) {
Philipp Reisner794abb72010-12-27 11:51:23 +0100426 case C_VERIFY_S:
Philipp Reisner00d56942011-02-09 18:09:48 +0100427 w_make_ov_request(w, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100428 break;
429 case C_SYNC_TARGET:
Philipp Reisner00d56942011-02-09 18:09:48 +0100430 w_make_resync_request(w, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100431 break;
432 }
433
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100434 return 0;
Philipp Reisner794abb72010-12-27 11:51:23 +0100435}
436
Philipp Reisnerb411b362009-09-25 16:07:19 -0700437void resync_timer_fn(unsigned long data)
438{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200439 struct drbd_device *device = (struct drbd_device *) data;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700440
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200441 if (list_empty(&device->resync_work.list))
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200442 drbd_queue_work(&first_peer_device(device)->connection->sender_work, &device->resync_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443}
444
Philipp Reisner778f2712010-07-06 11:14:00 +0200445static void fifo_set(struct fifo_buffer *fb, int value)
446{
447 int i;
448
449 for (i = 0; i < fb->size; i++)
Philipp Reisnerf10f2622010-10-05 16:50:17 +0200450 fb->values[i] = value;
Philipp Reisner778f2712010-07-06 11:14:00 +0200451}
452
453static int fifo_push(struct fifo_buffer *fb, int value)
454{
455 int ov;
456
457 ov = fb->values[fb->head_index];
458 fb->values[fb->head_index++] = value;
459
460 if (fb->head_index >= fb->size)
461 fb->head_index = 0;
462
463 return ov;
464}
465
466static void fifo_add_val(struct fifo_buffer *fb, int value)
467{
468 int i;
469
470 for (i = 0; i < fb->size; i++)
471 fb->values[i] += value;
472}
473
Philipp Reisner9958c852011-05-03 16:19:31 +0200474struct fifo_buffer *fifo_alloc(int fifo_size)
475{
476 struct fifo_buffer *fb;
477
Lars Ellenberg8747d302012-09-26 14:22:40 +0200478 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
Philipp Reisner9958c852011-05-03 16:19:31 +0200479 if (!fb)
480 return NULL;
481
482 fb->head_index = 0;
483 fb->size = fifo_size;
484 fb->total = 0;
485
486 return fb;
487}
488
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200489static int drbd_rs_controller(struct drbd_device *device)
Philipp Reisner778f2712010-07-06 11:14:00 +0200490{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200491 struct disk_conf *dc;
Philipp Reisner778f2712010-07-06 11:14:00 +0200492 unsigned int sect_in; /* Number of sectors that came in since the last turn */
493 unsigned int want; /* The number of sectors we want in the proxy */
494 int req_sect; /* Number of sectors to request in this turn */
495 int correction; /* Number of sectors more we need in the proxy*/
496 int cps; /* correction per invocation of drbd_rs_controller() */
497 int steps; /* Number of time steps to plan ahead */
498 int curr_corr;
499 int max_sect;
Philipp Reisner813472c2011-05-03 16:47:02 +0200500 struct fifo_buffer *plan;
Philipp Reisner778f2712010-07-06 11:14:00 +0200501
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200502 sect_in = atomic_xchg(&device->rs_sect_in, 0); /* Number of sectors that came in */
503 device->rs_in_flight -= sect_in;
Philipp Reisner778f2712010-07-06 11:14:00 +0200504
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200505 dc = rcu_dereference(device->ldev->disk_conf);
506 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner778f2712010-07-06 11:14:00 +0200507
Philipp Reisner813472c2011-05-03 16:47:02 +0200508 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
Philipp Reisner778f2712010-07-06 11:14:00 +0200509
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200510 if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200511 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200512 } else { /* normal path */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200513 want = dc->c_fill_target ? dc->c_fill_target :
514 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
Philipp Reisner778f2712010-07-06 11:14:00 +0200515 }
516
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200517 correction = want - device->rs_in_flight - plan->total;
Philipp Reisner778f2712010-07-06 11:14:00 +0200518
519 /* Plan ahead */
520 cps = correction / steps;
Philipp Reisner813472c2011-05-03 16:47:02 +0200521 fifo_add_val(plan, cps);
522 plan->total += cps * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200523
524 /* What we do in this step */
Philipp Reisner813472c2011-05-03 16:47:02 +0200525 curr_corr = fifo_push(plan, 0);
526 plan->total -= curr_corr;
Philipp Reisner778f2712010-07-06 11:14:00 +0200527
528 req_sect = sect_in + curr_corr;
529 if (req_sect < 0)
530 req_sect = 0;
531
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200532 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
Philipp Reisner778f2712010-07-06 11:14:00 +0200533 if (req_sect > max_sect)
534 req_sect = max_sect;
535
536 /*
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200537 drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200538 sect_in, device->rs_in_flight, want, correction,
539 steps, cps, device->rs_planed, curr_corr, req_sect);
Philipp Reisner778f2712010-07-06 11:14:00 +0200540 */
541
542 return req_sect;
543}
544
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200545static int drbd_rs_number_requests(struct drbd_device *device)
Lars Ellenberge65f4402010-11-05 10:04:07 +0100546{
547 int number;
Philipp Reisner813472c2011-05-03 16:47:02 +0200548
549 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200550 if (rcu_dereference(device->rs_plan_s)->size) {
551 number = drbd_rs_controller(device) >> (BM_BLOCK_SHIFT - 9);
552 device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100553 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200554 device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
555 number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
Lars Ellenberge65f4402010-11-05 10:04:07 +0100556 }
Philipp Reisner813472c2011-05-03 16:47:02 +0200557 rcu_read_unlock();
Lars Ellenberge65f4402010-11-05 10:04:07 +0100558
Lars Ellenberge65f4402010-11-05 10:04:07 +0100559 /* ignore the amount of pending requests, the resync controller should
560 * throttle down to incoming reply rate soon enough anyways. */
561 return number;
562}
563
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100564int w_make_resync_request(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700565{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200566 struct drbd_device *device = w->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700567 unsigned long bit;
568 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200569 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100570 int max_bio_size;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100571 int number, rollback_i, size;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700572 int align, queued, sndbuf;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200573 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700574
575 if (unlikely(cancel))
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100576 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700577
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200578 if (device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200579 /* empty resync? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200580 drbd_resync_finished(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100581 return 0;
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200582 }
583
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200584 if (!get_ldev(device)) {
585 /* Since we only need to access device->rsync a
586 get_ldev_if_state(device,D_FAILED) would be sufficient, but
Philipp Reisnerb411b362009-09-25 16:07:19 -0700587 to continue resync with a broken disk makes no sense at
588 all */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200589 drbd_err(device, "Disk broke down during resync!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100590 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700591 }
592
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200593 max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
594 number = drbd_rs_number_requests(device);
Lars Ellenberge65f4402010-11-05 10:04:07 +0100595 if (number == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200596 goto requeue;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700597
Philipp Reisnerb411b362009-09-25 16:07:19 -0700598 for (i = 0; i < number; i++) {
599 /* Stop generating RS requests, when half of the send buffer is filled */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200600 mutex_lock(&first_peer_device(device)->connection->data.mutex);
601 if (first_peer_device(device)->connection->data.socket) {
602 queued = first_peer_device(device)->connection->data.socket->sk->sk_wmem_queued;
603 sndbuf = first_peer_device(device)->connection->data.socket->sk->sk_sndbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700604 } else {
605 queued = 1;
606 sndbuf = 0;
607 }
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200608 mutex_unlock(&first_peer_device(device)->connection->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700609 if (queued > sndbuf / 2)
610 goto requeue;
611
612next_sector:
613 size = BM_BLOCK_SIZE;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200614 bit = drbd_bm_find_next(device, device->bm_resync_fo);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700615
Lars Ellenberg4b0715f2010-12-14 15:13:04 +0100616 if (bit == DRBD_END_OF_BITMAP) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200617 device->bm_resync_fo = drbd_bm_bits(device);
618 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100619 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700620 }
621
622 sector = BM_BIT_TO_SECT(bit);
623
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200624 if (drbd_rs_should_slow_down(device, sector) ||
625 drbd_try_rs_begin_io(device, sector)) {
626 device->bm_resync_fo = bit;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700627 goto requeue;
628 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200629 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700630
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200631 if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
632 drbd_rs_complete_io(device, sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700633 goto next_sector;
634 }
635
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100636#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
Philipp Reisnerb411b362009-09-25 16:07:19 -0700637 /* try to find some adjacent bits.
638 * we stop if we have already the maximum req size.
639 *
640 * Additionally always align bigger requests, in order to
641 * be prepared for all stripe sizes of software RAIDs.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700642 */
643 align = 1;
Philipp Reisnerd2074502010-07-22 15:27:27 +0200644 rollback_i = i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700645 for (;;) {
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100646 if (size + BM_BLOCK_SIZE > max_bio_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700647 break;
648
649 /* Be always aligned */
650 if (sector & ((1<<(align+3))-1))
651 break;
652
653 /* do not cross extent boundaries */
654 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
655 break;
656 /* now, is it actually dirty, after all?
657 * caution, drbd_bm_test_bit is tri-state for some
658 * obscure reason; ( b == 0 ) would get the out-of-band
659 * only accidentally right because of the "oddly sized"
660 * adjustment below */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200661 if (drbd_bm_test_bit(device, bit+1) != 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700662 break;
663 bit++;
664 size += BM_BLOCK_SIZE;
665 if ((BM_BLOCK_SIZE << align) <= size)
666 align++;
667 i++;
668 }
669 /* if we merged some,
670 * reset the offset to start the next drbd_bm_find_next from */
671 if (size > BM_BLOCK_SIZE)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200672 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700673#endif
674
675 /* adjust very last sectors, in case we are oddly sized */
676 if (sector + (size>>9) > capacity)
677 size = (capacity-sector)<<9;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200678 if (first_peer_device(device)->connection->agreed_pro_version >= 89 &&
679 first_peer_device(device)->connection->csums_tfm) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200680 switch (read_for_csum(first_peer_device(device), sector, size)) {
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200681 case -EIO: /* Disk failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200682 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100683 return -EIO;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200684 case -EAGAIN: /* allocation failed, or ldev busy */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200685 drbd_rs_complete_io(device, sector);
686 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerd2074502010-07-22 15:27:27 +0200687 i = rollback_i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700688 goto requeue;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200689 case 0:
690 /* everything ok */
691 break;
692 default:
693 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700694 }
695 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100696 int err;
697
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200698 inc_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200699 err = drbd_send_drequest(first_peer_device(device), P_RS_DATA_REQUEST,
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100700 sector, size, ID_SYNCER);
701 if (err) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200702 drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200703 dec_rs_pending(device);
704 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100705 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706 }
707 }
708 }
709
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200710 if (device->bm_resync_fo >= drbd_bm_bits(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700711 /* last syncer _request_ was sent,
712 * but the P_RS_DATA_REPLY not yet received. sync will end (and
713 * next sync group will resume), as soon as we receive the last
714 * resync data block, and the last bit is cleared.
715 * until then resync "work" is "inactive" ...
716 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200717 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100718 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700719 }
720
721 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200722 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
723 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
724 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100725 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700726}
727
Philipp Reisner00d56942011-02-09 18:09:48 +0100728static int w_make_ov_request(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700729{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200730 struct drbd_device *device = w->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700731 int number, i, size;
732 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200733 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200734 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700735
736 if (unlikely(cancel))
737 return 1;
738
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200739 number = drbd_rs_number_requests(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700740
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200741 sector = device->ov_position;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700742 for (i = 0; i < number; i++) {
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200743 if (sector >= capacity)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700744 return 1;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200745
746 /* We check for "finished" only in the reply path:
747 * w_e_end_ov_reply().
748 * We need to send at least one request out. */
749 stop_sector_reached = i > 0
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200750 && verify_can_do_stop_sector(device)
751 && sector >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200752 if (stop_sector_reached)
753 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700754
755 size = BM_BLOCK_SIZE;
756
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200757 if (drbd_rs_should_slow_down(device, sector) ||
758 drbd_try_rs_begin_io(device, sector)) {
759 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700760 goto requeue;
761 }
762
763 if (sector + (size>>9) > capacity)
764 size = (capacity-sector)<<9;
765
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200766 inc_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200767 if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200768 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700769 return 0;
770 }
771 sector += BM_SECT_PER_BIT;
772 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200773 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700774
775 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200776 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200777 if (i == 0 || !stop_sector_reached)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200778 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700779 return 1;
780}
781
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100782int w_ov_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700783{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200784 struct drbd_device *device = w->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700785 kfree(w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200786 ov_out_of_sync_print(device);
787 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700788
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100789 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700790}
791
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100792static int w_resync_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700793{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200794 struct drbd_device *device = w->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700795 kfree(w);
796
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200797 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700798
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100799 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700800}
801
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200802static void ping_peer(struct drbd_device *device)
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200803{
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200804 struct drbd_connection *connection = first_peer_device(device)->connection;
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100805
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200806 clear_bit(GOT_PING_ACK, &connection->flags);
807 request_ping(connection);
808 wait_event(connection->ping_wait,
809 test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200810}
811
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200812int drbd_resync_finished(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700813{
814 unsigned long db, dt, dbdt;
815 unsigned long n_oos;
816 union drbd_state os, ns;
817 struct drbd_work *w;
818 char *khelper_cmd = NULL;
Lars Ellenberg26525612010-11-05 09:56:33 +0100819 int verify_done = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700820
821 /* Remove all elements from the resync LRU. Since future actions
822 * might set bits in the (main) bitmap, then the entries in the
823 * resync LRU would be wrong. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200824 if (drbd_rs_del_all(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700825 /* In case this is not possible now, most probably because
826 * there are P_RS_DATA_REPLY Packets lingering on the worker's
827 * queue (or even the read operations for those packets
828 * is not finished by now). Retry in 100ms. */
829
Philipp Reisner20ee6392011-01-18 15:28:59 +0100830 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700831 w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
832 if (w) {
833 w->cb = w_resync_finished;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200834 w->device = device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200835 drbd_queue_work(&first_peer_device(device)->connection->sender_work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700836 return 1;
837 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200838 drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700839 }
840
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200841 dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700842 if (dt <= 0)
843 dt = 1;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200844
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200845 db = device->rs_total;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200846 /* adjust for verify start and stop sectors, respective reached position */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200847 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
848 db -= device->ov_left;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200849
Philipp Reisnerb411b362009-09-25 16:07:19 -0700850 dbdt = Bit2KB(db/dt);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200851 device->rs_paused /= HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700852
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200853 if (!get_ldev(device))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700854 goto out;
855
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200856 ping_peer(device);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200857
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200858 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200859 os = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700860
Lars Ellenberg26525612010-11-05 09:56:33 +0100861 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
862
Philipp Reisnerb411b362009-09-25 16:07:19 -0700863 /* This protects us against multiple calls (that can happen in the presence
864 of application IO), and against connectivity loss just before we arrive here. */
865 if (os.conn <= C_CONNECTED)
866 goto out_unlock;
867
868 ns = os;
869 ns.conn = C_CONNECTED;
870
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200871 drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200872 verify_done ? "Online verify" : "Resync",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200873 dt + device->rs_paused, device->rs_paused, dbdt);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700874
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200875 n_oos = drbd_bm_total_weight(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700876
877 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
878 if (n_oos) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200879 drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700880 n_oos, Bit2KB(1));
881 khelper_cmd = "out-of-sync";
882 }
883 } else {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200884 D_ASSERT(device, (n_oos - device->rs_failed) == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700885
886 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
887 khelper_cmd = "after-resync-target";
888
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200889 if (first_peer_device(device)->connection->csums_tfm && device->rs_total) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200890 const unsigned long s = device->rs_same_csum;
891 const unsigned long t = device->rs_total;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700892 const int ratio =
893 (t == 0) ? 0 :
894 (t < 100000) ? ((s*100)/t) : (s/(t/100));
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200895 drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
Philipp Reisnerb411b362009-09-25 16:07:19 -0700896 "transferred %luK total %luK\n",
897 ratio,
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200898 Bit2KB(device->rs_same_csum),
899 Bit2KB(device->rs_total - device->rs_same_csum),
900 Bit2KB(device->rs_total));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700901 }
902 }
903
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200904 if (device->rs_failed) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200905 drbd_info(device, " %lu failed blocks\n", device->rs_failed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700906
907 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
908 ns.disk = D_INCONSISTENT;
909 ns.pdsk = D_UP_TO_DATE;
910 } else {
911 ns.disk = D_UP_TO_DATE;
912 ns.pdsk = D_INCONSISTENT;
913 }
914 } else {
915 ns.disk = D_UP_TO_DATE;
916 ns.pdsk = D_UP_TO_DATE;
917
918 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200919 if (device->p_uuid) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700920 int i;
921 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200922 _drbd_uuid_set(device, i, device->p_uuid[i]);
923 drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
924 _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700925 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200926 drbd_err(device, "device->p_uuid is NULL! BUG\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700927 }
928 }
929
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100930 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
931 /* for verify runs, we don't update uuids here,
932 * so there would be nothing to report. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200933 drbd_uuid_set_bm(device, 0UL);
934 drbd_print_uuids(device, "updated UUIDs");
935 if (device->p_uuid) {
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100936 /* Now the two UUID sets are equal, update what we
937 * know of the peer. */
938 int i;
939 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200940 device->p_uuid[i] = device->ldev->md.uuid[i];
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100941 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700942 }
943 }
944
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200945 _drbd_set_state(device, ns, CS_VERBOSE, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700946out_unlock:
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200947 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200948 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700949out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200950 device->rs_total = 0;
951 device->rs_failed = 0;
952 device->rs_paused = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200953
954 /* reset start sector, if we reached end of device */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200955 if (verify_done && device->ov_left == 0)
956 device->ov_start_sector = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700957
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200958 drbd_md_sync(device);
Lars Ellenberg13d42682010-10-13 17:37:54 +0200959
Philipp Reisnerb411b362009-09-25 16:07:19 -0700960 if (khelper_cmd)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200961 drbd_khelper(device, khelper_cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700962
963 return 1;
964}
965
966/* helper */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200967static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700968{
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200969 if (drbd_peer_req_has_active_page(peer_req)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700970 /* This might happen if sendpage() has not finished */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100971 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200972 atomic_add(i, &device->pp_in_use_by_net);
973 atomic_sub(i, &device->pp_in_use);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200974 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200975 list_add_tail(&peer_req->w.list, &device->net_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200976 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg435f0742010-09-06 12:30:25 +0200977 wake_up(&drbd_pp_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700978 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200979 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700980}
981
982/**
983 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200984 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700985 * @w: work object.
986 * @cancel: The connection will be closed anyways
987 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100988int w_e_end_data_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700989{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100990 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200991 struct drbd_device *device = w->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100992 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700993
994 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200995 drbd_free_peer_req(device, peer_req);
996 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100997 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700998 }
999
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001000 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001001 err = drbd_send_block(first_peer_device(device), P_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001002 } else {
1003 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001004 drbd_err(device, "Sending NegDReply. sector=%llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001005 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001006
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001007 err = drbd_send_ack(first_peer_device(device), P_NEG_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001008 }
1009
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001010 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001011
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001012 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001013
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001014 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001015 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001016 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001017}
1018
1019/**
Andreas Gruenbachera209b4a2011-08-17 12:43:25 +02001020 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001021 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001022 * @w: work object.
1023 * @cancel: The connection will be closed anyways
1024 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001025int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001026{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001027 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001028 struct drbd_device *device = w->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001029 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001030
1031 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001032 drbd_free_peer_req(device, peer_req);
1033 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001034 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001035 }
1036
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001037 if (get_ldev_if_state(device, D_FAILED)) {
1038 drbd_rs_complete_io(device, peer_req->i.sector);
1039 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001040 }
1041
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001042 if (device->state.conn == C_AHEAD) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001043 err = drbd_send_ack(first_peer_device(device), P_RS_CANCEL, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001044 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001045 if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1046 inc_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001047 err = drbd_send_block(first_peer_device(device), P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001048 } else {
1049 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001050 drbd_err(device, "Not sending RSDataReply, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07001051 "partner DISKLESS!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001052 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001053 }
1054 } else {
1055 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001056 drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001057 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001058
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001059 err = drbd_send_ack(first_peer_device(device), P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001060
1061 /* update resync data with failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001062 drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001063 }
1064
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001065 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001066
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001067 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001068
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001069 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001070 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001071 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001072}
1073
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001074int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001075{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001076 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001077 struct drbd_device *device = w->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001078 struct digest_info *di;
1079 int digest_size;
1080 void *digest = NULL;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001081 int err, eq = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001082
1083 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001084 drbd_free_peer_req(device, peer_req);
1085 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001086 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001087 }
1088
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001089 if (get_ldev(device)) {
1090 drbd_rs_complete_io(device, peer_req->i.sector);
1091 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001092 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001093
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001094 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001095
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001096 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001097 /* quick hack to try to avoid a race against reconfiguration.
1098 * a real fix would be much more involved,
1099 * introducing more locking mechanisms */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001100 if (first_peer_device(device)->connection->csums_tfm) {
1101 digest_size = crypto_hash_digestsize(first_peer_device(device)->connection->csums_tfm);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001102 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001103 digest = kmalloc(digest_size, GFP_NOIO);
1104 }
1105 if (digest) {
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +02001106 drbd_csum_ee(first_peer_device(device)->connection->csums_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001107 eq = !memcmp(digest, di->digest, digest_size);
1108 kfree(digest);
1109 }
1110
1111 if (eq) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001112 drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
Lars Ellenberg676396d2010-03-03 02:08:22 +01001113 /* rs_same_csums unit is BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001114 device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001115 err = drbd_send_ack(first_peer_device(device), P_RS_IS_IN_SYNC, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001116 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001117 inc_rs_pending(device);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001118 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1119 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
Philipp Reisner204bba92010-08-23 16:17:13 +02001120 kfree(di);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001121 err = drbd_send_block(first_peer_device(device), P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001122 }
1123 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001124 err = drbd_send_ack(first_peer_device(device), P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001125 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001126 drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001127 }
1128
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001129 dec_unacked(device);
1130 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001131
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001132 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001133 drbd_err(device, "drbd_send_block/ack() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001134 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001135}
1136
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001137int w_e_end_ov_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001138{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001139 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001140 struct drbd_device *device = w->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001141 sector_t sector = peer_req->i.sector;
1142 unsigned int size = peer_req->i.size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001143 int digest_size;
1144 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001145 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001146
1147 if (unlikely(cancel))
1148 goto out;
1149
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001150 digest_size = crypto_hash_digestsize(first_peer_device(device)->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001151 digest = kmalloc(digest_size, GFP_NOIO);
Philipp Reisner8f214202011-03-01 15:52:35 +01001152 if (!digest) {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001153 err = 1; /* terminate the connection in case the allocation failed */
Philipp Reisner8f214202011-03-01 15:52:35 +01001154 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001155 }
1156
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001157 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +02001158 drbd_csum_ee(first_peer_device(device)->connection->verify_tfm, peer_req, digest);
Philipp Reisner8f214202011-03-01 15:52:35 +01001159 else
1160 memset(digest, 0, digest_size);
1161
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001162 /* Free e and pages before send.
1163 * In case we block on congestion, we could otherwise run into
1164 * some distributed deadlock, if the other side blocks on
1165 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001166 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001167 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001168 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001169 inc_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001170 err = drbd_send_drequest_csum(first_peer_device(device), sector, size, digest, digest_size, P_OV_REPLY);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001171 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001172 dec_rs_pending(device);
Philipp Reisner8f214202011-03-01 15:52:35 +01001173 kfree(digest);
1174
Philipp Reisnerb411b362009-09-25 16:07:19 -07001175out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001176 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001177 drbd_free_peer_req(device, peer_req);
1178 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001179 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001180}
1181
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001182void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001183{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001184 if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1185 device->ov_last_oos_size += size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001186 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001187 device->ov_last_oos_start = sector;
1188 device->ov_last_oos_size = size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001189 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001190 drbd_set_out_of_sync(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001191}
1192
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001193int w_e_end_ov_reply(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001194{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001195 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001196 struct drbd_device *device = w->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001197 struct digest_info *di;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001198 void *digest;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001199 sector_t sector = peer_req->i.sector;
1200 unsigned int size = peer_req->i.size;
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001201 int digest_size;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001202 int err, eq = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001203 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001204
1205 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001206 drbd_free_peer_req(device, peer_req);
1207 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001208 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001209 }
1210
1211 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1212 * the resync lru has been cleaned up already */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001213 if (get_ldev(device)) {
1214 drbd_rs_complete_io(device, peer_req->i.sector);
1215 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001216 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001217
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001218 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001219
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001220 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001221 digest_size = crypto_hash_digestsize(first_peer_device(device)->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001222 digest = kmalloc(digest_size, GFP_NOIO);
1223 if (digest) {
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +02001224 drbd_csum_ee(first_peer_device(device)->connection->verify_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001225
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001226 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001227 eq = !memcmp(digest, di->digest, digest_size);
1228 kfree(digest);
1229 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001230 }
1231
Lars Ellenberg9676c762011-02-22 14:02:31 +01001232 /* Free peer_req and pages before send.
1233 * In case we block on congestion, we could otherwise run into
1234 * some distributed deadlock, if the other side blocks on
1235 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001236 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001237 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001238 if (!eq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001239 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001240 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001241 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001242
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001243 err = drbd_send_ack_ex(first_peer_device(device), P_OV_RESULT, sector, size,
Andreas Gruenbacherfa79abd2011-03-16 01:31:39 +01001244 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001245
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001246 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001247
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001248 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001249
1250 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001251 if ((device->ov_left & 0x200) == 0x200)
1252 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001253
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001254 stop_sector_reached = verify_can_do_stop_sector(device) &&
1255 (sector + (size>>9)) >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001256
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001257 if (device->ov_left == 0 || stop_sector_reached) {
1258 ov_out_of_sync_print(device);
1259 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001260 }
1261
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001262 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001263}
1264
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001265int w_prev_work_done(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001266{
1267 struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001268
Philipp Reisnerb411b362009-09-25 16:07:19 -07001269 complete(&b->done);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001270 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001271}
1272
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001273/* FIXME
1274 * We need to track the number of pending barrier acks,
1275 * and to be able to wait for them.
1276 * See also comment in drbd_adm_attach before drbd_suspend_io.
1277 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001278static int drbd_send_barrier(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001279{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001280 struct p_barrier *p;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001281 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001282
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001283 sock = &connection->data;
1284 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001285 if (!p)
1286 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001287 p->barrier = connection->send.current_epoch_nr;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001288 p->pad = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001289 connection->send.current_epoch_writes = 0;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001290
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001291 return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001292}
1293
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001294int w_send_write_hint(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001295{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001296 struct drbd_device *device = w->device;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001297 struct drbd_socket *sock;
1298
Philipp Reisnerb411b362009-09-25 16:07:19 -07001299 if (cancel)
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001300 return 0;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001301 sock = &first_peer_device(device)->connection->data;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001302 if (!drbd_prepare_command(first_peer_device(device), sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001303 return -EIO;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001304 return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001305}
1306
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001307static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001308{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001309 if (!connection->send.seen_any_write_yet) {
1310 connection->send.seen_any_write_yet = true;
1311 connection->send.current_epoch_nr = epoch;
1312 connection->send.current_epoch_writes = 0;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001313 }
1314}
1315
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001316static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001317{
1318 /* re-init if first write on this connection */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001319 if (!connection->send.seen_any_write_yet)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001320 return;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001321 if (connection->send.current_epoch_nr != epoch) {
1322 if (connection->send.current_epoch_writes)
1323 drbd_send_barrier(connection);
1324 connection->send.current_epoch_nr = epoch;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001325 }
1326}
1327
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01001328int w_send_out_of_sync(struct drbd_work *w, int cancel)
Philipp Reisner73a01a12010-10-27 14:33:00 +02001329{
1330 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001331 struct drbd_device *device = w->device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001332 struct drbd_connection *connection = first_peer_device(device)->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001333 int err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001334
1335 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001336 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001337 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001338 }
1339
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001340 /* this time, no connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001341 * If it was sent, it was the closing barrier for the last
1342 * replicated epoch, before we went into AHEAD mode.
1343 * No more barriers will be sent, until we leave AHEAD mode again. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001344 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001345
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001346 err = drbd_send_out_of_sync(first_peer_device(device), req);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001347 req_mod(req, OOS_HANDED_TO_NETWORK);
Philipp Reisner73a01a12010-10-27 14:33:00 +02001348
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001349 return err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001350}
1351
Philipp Reisnerb411b362009-09-25 16:07:19 -07001352/**
1353 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001354 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001355 * @w: work object.
1356 * @cancel: The connection will be closed anyways
1357 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001358int w_send_dblock(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001359{
1360 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001361 struct drbd_device *device = w->device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001362 struct drbd_connection *connection = first_peer_device(device)->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001363 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001364
1365 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001366 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001367 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001368 }
1369
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001370 re_init_if_first_write(connection, req->epoch);
1371 maybe_send_barrier(connection, req->epoch);
1372 connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001373
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001374 err = drbd_send_dblock(first_peer_device(device), req);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001375 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001376
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001377 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001378}
1379
1380/**
1381 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001382 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001383 * @w: work object.
1384 * @cancel: The connection will be closed anyways
1385 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001386int w_send_read_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001387{
1388 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001389 struct drbd_device *device = w->device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001390 struct drbd_connection *connection = first_peer_device(device)->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001391 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001392
1393 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001394 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001395 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001396 }
1397
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001398 /* Even read requests may close a write epoch,
1399 * if there was any yet. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001400 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001401
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001402 err = drbd_send_drequest(first_peer_device(device), P_DATA_REQUEST, req->i.sector, req->i.size,
Andreas Gruenbacher6c1005e2011-03-16 01:34:24 +01001403 (unsigned long)req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001404
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001405 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001406
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001407 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001408}
1409
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001410int w_restart_disk_io(struct drbd_work *w, int cancel)
Philipp Reisner265be2d2010-05-31 10:14:17 +02001411{
1412 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001413 struct drbd_device *device = w->device;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001414
Philipp Reisner07782862010-08-31 12:00:50 +02001415 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001416 drbd_al_begin_io(device, &req->i, false);
Philipp Reisner265be2d2010-05-31 10:14:17 +02001417
1418 drbd_req_make_private_bio(req, req->master_bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001419 req->private_bio->bi_bdev = device->ldev->backing_bdev;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001420 generic_make_request(req->private_bio);
1421
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001422 return 0;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001423}
1424
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001425static int _drbd_may_sync_now(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001426{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001427 struct drbd_device *odev = device;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001428 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001429
1430 while (1) {
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001431 if (!odev->ldev || odev->state.disk == D_DISKLESS)
Philipp Reisner438c8372011-03-28 14:48:01 +02001432 return 1;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001433 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001434 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001435 rcu_read_unlock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001436 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001437 return 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001438 odev = minor_to_device(resync_after);
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001439 if (!odev)
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001440 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001441 if ((odev->state.conn >= C_SYNC_SOURCE &&
1442 odev->state.conn <= C_PAUSED_SYNC_T) ||
1443 odev->state.aftr_isp || odev->state.peer_isp ||
1444 odev->state.user_isp)
1445 return 0;
1446 }
1447}
1448
1449/**
1450 * _drbd_pause_after() - Pause resync on all devices that may not resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001451 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001452 *
1453 * Called from process context only (admin command and after_state_ch).
1454 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001455static int _drbd_pause_after(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001456{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001457 struct drbd_device *odev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001458 int i, rv = 0;
1459
Philipp Reisner695d08f2011-04-11 22:53:32 -07001460 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001461 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001462 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1463 continue;
1464 if (!_drbd_may_sync_now(odev))
1465 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1466 != SS_NOTHING_TO_DO);
1467 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001468 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001469
1470 return rv;
1471}
1472
1473/**
1474 * _drbd_resume_next() - Resume resync on all devices that may resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001475 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001476 *
1477 * Called from process context only (admin command and worker).
1478 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001479static int _drbd_resume_next(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001480{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001481 struct drbd_device *odev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001482 int i, rv = 0;
1483
Philipp Reisner695d08f2011-04-11 22:53:32 -07001484 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001485 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001486 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1487 continue;
1488 if (odev->state.aftr_isp) {
1489 if (_drbd_may_sync_now(odev))
1490 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1491 CS_HARD, NULL)
1492 != SS_NOTHING_TO_DO) ;
1493 }
1494 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001495 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001496 return rv;
1497}
1498
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001499void resume_next_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001500{
1501 write_lock_irq(&global_state_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001502 _drbd_resume_next(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001503 write_unlock_irq(&global_state_lock);
1504}
1505
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001506void suspend_other_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001507{
1508 write_lock_irq(&global_state_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001509 _drbd_pause_after(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001510 write_unlock_irq(&global_state_lock);
1511}
1512
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001513/* caller must hold global_state_lock */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001514enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001515{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001516 struct drbd_device *odev;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001517 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001518
1519 if (o_minor == -1)
1520 return NO_ERROR;
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001521 if (o_minor < -1 || o_minor > MINORMASK)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001522 return ERR_RESYNC_AFTER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001523
1524 /* check for loops */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001525 odev = minor_to_device(o_minor);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001526 while (1) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001527 if (odev == device)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001528 return ERR_RESYNC_AFTER_CYCLE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001529
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001530 /* You are free to depend on diskless, non-existing,
1531 * or not yet/no longer existing minors.
1532 * We only reject dependency loops.
1533 * We cannot follow the dependency chain beyond a detached or
1534 * missing minor.
1535 */
1536 if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1537 return NO_ERROR;
1538
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001539 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001540 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001541 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001542 /* dependency chain ends here, no cycles. */
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001543 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001544 return NO_ERROR;
1545
1546 /* follow the dependency chain */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001547 odev = minor_to_device(resync_after);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001548 }
1549}
1550
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001551/* caller must hold global_state_lock */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001552void drbd_resync_after_changed(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001553{
1554 int changes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001555
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001556 do {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001557 changes = _drbd_pause_after(device);
1558 changes |= _drbd_resume_next(device);
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001559 } while (changes);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001560}
1561
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001562void drbd_rs_controller_reset(struct drbd_device *device)
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001563{
Philipp Reisner813472c2011-05-03 16:47:02 +02001564 struct fifo_buffer *plan;
1565
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001566 atomic_set(&device->rs_sect_in, 0);
1567 atomic_set(&device->rs_sect_ev, 0);
1568 device->rs_in_flight = 0;
Philipp Reisner813472c2011-05-03 16:47:02 +02001569
1570 /* Updating the RCU protected object in place is necessary since
1571 this function gets called from atomic context.
1572 It is valid since all other updates also lead to an completely
1573 empty fifo */
1574 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001575 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner813472c2011-05-03 16:47:02 +02001576 plan->total = 0;
1577 fifo_set(plan, 0);
1578 rcu_read_unlock();
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001579}
1580
Philipp Reisner1f04af32011-02-07 11:33:59 +01001581void start_resync_timer_fn(unsigned long data)
1582{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001583 struct drbd_device *device = (struct drbd_device *) data;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001584
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001585 drbd_queue_work(&first_peer_device(device)->connection->sender_work, &device->start_resync_work);
Philipp Reisner1f04af32011-02-07 11:33:59 +01001586}
1587
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001588int w_start_resync(struct drbd_work *w, int cancel)
Philipp Reisner1f04af32011-02-07 11:33:59 +01001589{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001590 struct drbd_device *device = w->device;
Philipp Reisner00d56942011-02-09 18:09:48 +01001591
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001592 if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001593 drbd_warn(device, "w_start_resync later...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001594 device->start_resync_timer.expires = jiffies + HZ/10;
1595 add_timer(&device->start_resync_timer);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001596 return 0;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001597 }
1598
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001599 drbd_start_resync(device, C_SYNC_SOURCE);
1600 clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001601 return 0;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001602}
1603
Philipp Reisnerb411b362009-09-25 16:07:19 -07001604/**
1605 * drbd_start_resync() - Start the resync process
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001606 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001607 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1608 *
1609 * This function might bring you directly into one of the
1610 * C_PAUSED_SYNC_* states.
1611 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001612void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001613{
1614 union drbd_state ns;
1615 int r;
1616
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001617 if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001618 drbd_err(device, "Resync already running!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001619 return;
1620 }
1621
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001622 if (!test_bit(B_RS_H_DONE, &device->flags)) {
Philipp Reisnere64a3292011-02-05 17:34:11 +01001623 if (side == C_SYNC_TARGET) {
1624 /* Since application IO was locked out during C_WF_BITMAP_T and
1625 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1626 we check that we might make the data inconsistent. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001627 r = drbd_khelper(device, "before-resync-target");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001628 r = (r >> 8) & 0xff;
1629 if (r > 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001630 drbd_info(device, "before-resync-target handler returned %d, "
Philipp Reisner09b9e792010-12-03 16:04:24 +01001631 "dropping connection.\n", r);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001632 conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisner09b9e792010-12-03 16:04:24 +01001633 return;
1634 }
Philipp Reisnere64a3292011-02-05 17:34:11 +01001635 } else /* C_SYNC_SOURCE */ {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001636 r = drbd_khelper(device, "before-resync-source");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001637 r = (r >> 8) & 0xff;
1638 if (r > 0) {
1639 if (r == 3) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001640 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001641 "ignoring. Old userland tools?", r);
1642 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001643 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001644 "dropping connection.\n", r);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001645 conn_request_state(first_peer_device(device)->connection,
1646 NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001647 return;
1648 }
1649 }
Philipp Reisner09b9e792010-12-03 16:04:24 +01001650 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001651 }
1652
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001653 if (current == first_peer_device(device)->connection->worker.task) {
Philipp Reisnerdad20552011-02-11 19:43:55 +01001654 /* The worker should not sleep waiting for state_mutex,
Philipp Reisnere64a3292011-02-05 17:34:11 +01001655 that can take long */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001656 if (!mutex_trylock(device->state_mutex)) {
1657 set_bit(B_RS_H_DONE, &device->flags);
1658 device->start_resync_timer.expires = jiffies + HZ/5;
1659 add_timer(&device->start_resync_timer);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001660 return;
1661 }
1662 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001663 mutex_lock(device->state_mutex);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001664 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001665 clear_bit(B_RS_H_DONE, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001666
Philipp Reisner0cfac5d2011-11-10 12:12:52 +01001667 write_lock_irq(&global_state_lock);
Philipp Reisnera7004712013-03-27 14:08:35 +01001668 /* Did some connection breakage or IO error race with us? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001669 if (device->state.conn < C_CONNECTED
1670 || !get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisner0cfac5d2011-11-10 12:12:52 +01001671 write_unlock_irq(&global_state_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001672 mutex_unlock(device->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001673 return;
1674 }
1675
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001676 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001677
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001678 ns.aftr_isp = !_drbd_may_sync_now(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001679
1680 ns.conn = side;
1681
1682 if (side == C_SYNC_TARGET)
1683 ns.disk = D_INCONSISTENT;
1684 else /* side == C_SYNC_SOURCE */
1685 ns.pdsk = D_INCONSISTENT;
1686
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001687 r = __drbd_set_state(device, ns, CS_VERBOSE, NULL);
1688 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001689
1690 if (ns.conn < C_CONNECTED)
1691 r = SS_UNKNOWN_ERROR;
1692
1693 if (r == SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001694 unsigned long tw = drbd_bm_total_weight(device);
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001695 unsigned long now = jiffies;
1696 int i;
1697
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001698 device->rs_failed = 0;
1699 device->rs_paused = 0;
1700 device->rs_same_csum = 0;
1701 device->rs_last_events = 0;
1702 device->rs_last_sect_ev = 0;
1703 device->rs_total = tw;
1704 device->rs_start = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001705 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001706 device->rs_mark_left[i] = tw;
1707 device->rs_mark_time[i] = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001708 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001709 _drbd_pause_after(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001710 }
1711 write_unlock_irq(&global_state_lock);
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001712
Philipp Reisnerb411b362009-09-25 16:07:19 -07001713 if (r == SS_SUCCESS) {
Philipp Reisner328e0f12012-10-19 14:37:47 +02001714 /* reset rs_last_bcast when a resync or verify is started,
1715 * to deal with potential jiffies wrap. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001716 device->rs_last_bcast = jiffies - HZ;
Philipp Reisner328e0f12012-10-19 14:37:47 +02001717
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001718 drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001719 drbd_conn_str(ns.conn),
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001720 (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1721 (unsigned long) device->rs_total);
Lars Ellenberg6c922ed2011-01-12 11:51:13 +01001722 if (side == C_SYNC_TARGET)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001723 device->bm_resync_fo = 0;
Lars Ellenberg6c922ed2011-01-12 11:51:13 +01001724
1725 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1726 * with w_send_oos, or the sync target will get confused as to
1727 * how much bits to resync. We cannot do that always, because for an
1728 * empty resync and protocol < 95, we need to do it here, as we call
1729 * drbd_resync_finished from here in that case.
1730 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1731 * and from after_state_ch otherwise. */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001732 if (side == C_SYNC_SOURCE &&
1733 first_peer_device(device)->connection->agreed_pro_version < 96)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001734 drbd_gen_and_send_sync_uuid(first_peer_device(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001735
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001736 if (first_peer_device(device)->connection->agreed_pro_version < 95 &&
1737 device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +02001738 /* This still has a race (about when exactly the peers
1739 * detect connection loss) that can lead to a full sync
1740 * on next handshake. In 8.3.9 we fixed this with explicit
1741 * resync-finished notifications, but the fix
1742 * introduces a protocol change. Sleeping for some
1743 * time longer than the ping interval + timeout on the
1744 * SyncSource, to give the SyncTarget the chance to
1745 * detect connection loss, then waiting for a ping
1746 * response (implicit in drbd_resync_finished) reduces
1747 * the race considerably, but does not solve it. */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001748 if (side == C_SYNC_SOURCE) {
1749 struct net_conf *nc;
1750 int timeo;
1751
1752 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001753 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02001754 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1755 rcu_read_unlock();
1756 schedule_timeout_interruptible(timeo);
1757 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001758 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001759 }
1760
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001761 drbd_rs_controller_reset(device);
1762 /* ns.conn may already be != device->state.conn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001763 * we may have been paused in between, or become paused until
1764 * the timer triggers.
1765 * No matter, that is handled in resync_timer_fn() */
1766 if (ns.conn == C_SYNC_TARGET)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001767 mod_timer(&device->resync_timer, jiffies);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001768
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001769 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001770 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001771 put_ldev(device);
1772 mutex_unlock(device->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001773}
1774
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001775/* If the resource already closed the current epoch, but we did not
1776 * (because we have not yet seen new requests), we should send the
1777 * corresponding barrier now. Must be checked within the same spinlock
1778 * that is used to check for new requests. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001779static bool need_to_send_barrier(struct drbd_connection *connection)
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001780{
1781 if (!connection->send.seen_any_write_yet)
1782 return false;
1783
1784 /* Skip barriers that do not contain any writes.
1785 * This may happen during AHEAD mode. */
1786 if (!connection->send.current_epoch_writes)
1787 return false;
1788
1789 /* ->req_lock is held when requests are queued on
1790 * connection->sender_work, and put into ->transfer_log.
1791 * It is also held when ->current_tle_nr is increased.
1792 * So either there are already new requests queued,
1793 * and corresponding barriers will be send there.
1794 * Or nothing new is queued yet, so the difference will be 1.
1795 */
1796 if (atomic_read(&connection->current_tle_nr) !=
1797 connection->send.current_epoch_nr + 1)
1798 return false;
1799
1800 return true;
1801}
1802
Rashika Kheriaa186e472013-12-19 15:06:10 +05301803static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001804{
1805 spin_lock_irq(&queue->q_lock);
1806 list_splice_init(&queue->q, work_list);
1807 spin_unlock_irq(&queue->q_lock);
1808 return !list_empty(work_list);
1809}
1810
Rashika Kheriaa186e472013-12-19 15:06:10 +05301811static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001812{
1813 spin_lock_irq(&queue->q_lock);
1814 if (!list_empty(&queue->q))
1815 list_move(queue->q.next, work_list);
1816 spin_unlock_irq(&queue->q_lock);
1817 return !list_empty(work_list);
1818}
1819
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001820static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001821{
1822 DEFINE_WAIT(wait);
1823 struct net_conf *nc;
1824 int uncork, cork;
1825
1826 dequeue_work_item(&connection->sender_work, work_list);
1827 if (!list_empty(work_list))
1828 return;
1829
1830 /* Still nothing to do?
1831 * Maybe we still need to close the current epoch,
1832 * even if no new requests are queued yet.
1833 *
1834 * Also, poke TCP, just in case.
1835 * Then wait for new work (or signal). */
1836 rcu_read_lock();
1837 nc = rcu_dereference(connection->net_conf);
1838 uncork = nc ? nc->tcp_cork : 0;
1839 rcu_read_unlock();
1840 if (uncork) {
1841 mutex_lock(&connection->data.mutex);
1842 if (connection->data.socket)
1843 drbd_tcp_uncork(connection->data.socket);
1844 mutex_unlock(&connection->data.mutex);
1845 }
1846
1847 for (;;) {
1848 int send_barrier;
1849 prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001850 spin_lock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001851 spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
Lars Ellenbergbc317a92012-08-22 11:47:14 +02001852 /* dequeue single item only,
1853 * we still use drbd_queue_work_front() in some places */
1854 if (!list_empty(&connection->sender_work.q))
1855 list_move(connection->sender_work.q.next, work_list);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001856 spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
1857 if (!list_empty(work_list) || signal_pending(current)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001858 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001859 break;
1860 }
1861 send_barrier = need_to_send_barrier(connection);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001862 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001863 if (send_barrier) {
1864 drbd_send_barrier(connection);
1865 connection->send.current_epoch_nr++;
1866 }
1867 schedule();
1868 /* may be woken up for other things but new work, too,
1869 * e.g. if the current epoch got closed.
1870 * In which case we send the barrier above. */
1871 }
1872 finish_wait(&connection->sender_work.q_wait, &wait);
1873
1874 /* someone may have changed the config while we have been waiting above. */
1875 rcu_read_lock();
1876 nc = rcu_dereference(connection->net_conf);
1877 cork = nc ? nc->tcp_cork : 0;
1878 rcu_read_unlock();
1879 mutex_lock(&connection->data.mutex);
1880 if (connection->data.socket) {
1881 if (cork)
1882 drbd_tcp_cork(connection->data.socket);
1883 else if (!uncork)
1884 drbd_tcp_uncork(connection->data.socket);
1885 }
1886 mutex_unlock(&connection->data.mutex);
1887}
1888
Philipp Reisnerb411b362009-09-25 16:07:19 -07001889int drbd_worker(struct drbd_thread *thi)
1890{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001891 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001892 struct drbd_work *w = NULL;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001893 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001894 LIST_HEAD(work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001895 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001896
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01001897 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01001898 drbd_thread_current_set_cpu(thi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001899
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001900 /* as long as we use drbd_queue_work_front(),
1901 * we may only dequeue single work items here, not batches. */
1902 if (list_empty(&work_list))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001903 wait_for_work(connection, &work_list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001904
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001905 if (signal_pending(current)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001906 flush_signals(current);
Philipp Reisner19393e12011-02-09 10:09:07 +01001907 if (get_t_state(thi) == RUNNING) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001908 drbd_warn(connection, "Worker got an unexpected signal\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001909 continue;
Philipp Reisner19393e12011-02-09 10:09:07 +01001910 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001911 break;
1912 }
1913
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01001914 if (get_t_state(thi) != RUNNING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001915 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001916
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001917 while (!list_empty(&work_list)) {
1918 w = list_first_entry(&work_list, struct drbd_work, list);
1919 list_del_init(&w->list);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001920 if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001921 continue;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001922 if (connection->cstate >= C_WF_REPORT_PARAMS)
1923 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001924 }
1925 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001926
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001927 do {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001928 while (!list_empty(&work_list)) {
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001929 w = list_first_entry(&work_list, struct drbd_work, list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001930 list_del_init(&w->list);
Philipp Reisner00d56942011-02-09 18:09:48 +01001931 w->cb(w, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001932 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001933 dequeue_work_batch(&connection->sender_work, &work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001934 } while (!list_empty(&work_list));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001935
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001936 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001937 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1938 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001939 D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001940 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001941 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001942 drbd_device_cleanup(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001943 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001944 rcu_read_lock();
Philipp Reisner0e29d162011-02-18 14:23:11 +01001945 }
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001946 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001947
1948 return 0;
1949}