blob: 2c4ce42c3657882cfaf07dfdef97ed95bf4e513b [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +020024*/
Philipp Reisnerb411b362009-09-25 16:07:19 -070025
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070027#include <linux/drbd.h>
28#include <linux/sched.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070035#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020039#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070040#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070041
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +020042static int make_ov_request(struct drbd_device *, int);
43static int make_resync_request(struct drbd_device *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070044
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010045/* endio handlers:
46 * drbd_md_io_complete (defined here)
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +010047 * drbd_request_endio (defined here)
48 * drbd_peer_request_endio (defined here)
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010049 * bm_async_io_complete (defined in drbd_bitmap.c)
50 *
Philipp Reisnerb411b362009-09-25 16:07:19 -070051 * For all these callbacks, note the following:
52 * The callbacks will be called in irq context by the IDE drivers,
53 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
54 * Try to get the locking right :)
55 *
56 */
57
58
59/* About the global_state_lock
60 Each state transition on an device holds a read lock. In case we have
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +020061 to evaluate the resync after dependencies, we grab a write lock, because
Philipp Reisnerb411b362009-09-25 16:07:19 -070062 we need stable states on all devices for that. */
63rwlock_t global_state_lock;
64
65/* used for synchronous meta data and bitmap IO
66 * submitted by drbd_md_sync_page_io()
67 */
68void drbd_md_io_complete(struct bio *bio, int error)
69{
70 struct drbd_md_io *md_io;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020071 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
73 md_io = (struct drbd_md_io *)bio->bi_private;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020074 device = container_of(md_io, struct drbd_device, md_io);
Philipp Reisnercdfda632011-07-05 15:38:59 +020075
Philipp Reisnerb411b362009-09-25 16:07:19 -070076 md_io->error = error;
77
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010078 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
79 * to timeout on the lower level device, and eventually detach from it.
80 * If this io completion runs after that timeout expired, this
81 * drbd_md_put_buffer() may allow us to finally try and re-attach.
82 * During normal operation, this only puts that extra reference
83 * down to 1 again.
84 * Make sure we first drop the reference, and only then signal
85 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
86 * next drbd_md_sync_page_io(), that we trigger the
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020087 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
Philipp Reisner0cfac5d2011-11-10 12:12:52 +010088 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020089 drbd_md_put_buffer(device);
Philipp Reisnercdfda632011-07-05 15:38:59 +020090 md_io->done = 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020091 wake_up(&device->misc_wait);
Philipp Reisnercdfda632011-07-05 15:38:59 +020092 bio_put(bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +020093 if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
94 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -070095}
96
97/* reads on behalf of the partner,
98 * "submitted" by the receiver
99 */
Rashika Kheriaa186e472013-12-19 15:06:10 +0530100static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700101{
102 unsigned long flags = 0;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200103 struct drbd_peer_device *peer_device = peer_req->peer_device;
104 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700105
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200106 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200107 device->read_cnt += peer_req->i.size >> 9;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200108 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200109 if (list_empty(&device->read_ee))
110 wake_up(&device->ee_wait);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100111 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200112 __drbd_chk_io_error(device, DRBD_READ_ERROR);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200113 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700114
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200115 drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200116 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700117}
118
119/* writes on behalf of the partner, or resync writes,
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200120 * "submitted" by the receiver, final stage. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100121static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700122{
123 unsigned long flags = 0;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200124 struct drbd_peer_device *peer_device = peer_req->peer_device;
125 struct drbd_device *device = peer_device->device;
Lars Ellenberg181286a2011-03-31 15:18:56 +0200126 struct drbd_interval i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700127 int do_wake;
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100128 u64 block_id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700129 int do_al_complete_io;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700130
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100131 /* after we moved peer_req to done_ee,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700132 * we may no longer access it,
133 * it may be freed/reused already!
134 * (as soon as we release the req_lock) */
Lars Ellenberg181286a2011-03-31 15:18:56 +0200135 i = peer_req->i;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100136 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
137 block_id = peer_req->block_id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700138
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200139 spin_lock_irqsave(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200140 device->writ_cnt += peer_req->i.size >> 9;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200141 list_move_tail(&peer_req->w.list, &device->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700142
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100143 /*
Andreas Gruenbacher5e472262011-01-27 14:42:51 +0100144 * Do not remove from the write_requests tree here: we did not send the
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100145 * Ack yet and did not wake possibly waiting conflicting requests.
146 * Removed from the tree from "drbd_process_done_ee" within the
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200147 * appropriate dw.cb (e_end_block/e_end_resync_block) or from
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100148 * _drbd_clear_done_ee.
149 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700150
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200151 do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700152
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100153 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200154 __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200155 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700156
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100157 if (block_id == ID_SYNCER)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200158 drbd_rs_complete_io(device, i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700159
160 if (do_wake)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200161 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700162
163 if (do_al_complete_io)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200164 drbd_al_complete_io(device, &i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200166 wake_asender(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200167 put_ldev(device);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200168}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700169
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200170/* writes on behalf of the partner, or resync writes,
171 * "submitted" by the receiver.
172 */
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +0100173void drbd_peer_request_endio(struct bio *bio, int error)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200174{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100175 struct drbd_peer_request *peer_req = bio->bi_private;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200176 struct drbd_device *device = peer_req->peer_device->device;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200177 int uptodate = bio_flagged(bio, BIO_UPTODATE);
178 int is_write = bio_data_dir(bio) == WRITE;
179
Lars Ellenberg07194272010-12-20 15:38:07 +0100180 if (error && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200181 drbd_warn(device, "%s: error=%d s=%llus\n",
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200182 is_write ? "write" : "read", error,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100183 (unsigned long long)peer_req->i.sector);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200184 if (!error && !uptodate) {
Lars Ellenberg07194272010-12-20 15:38:07 +0100185 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200186 drbd_warn(device, "%s: setting error to -EIO s=%llus\n",
Lars Ellenberg07194272010-12-20 15:38:07 +0100187 is_write ? "write" : "read",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100188 (unsigned long long)peer_req->i.sector);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200189 /* strange behavior of some lower level drivers...
190 * fail the request by clearing the uptodate flag,
191 * but do not return any error?! */
192 error = -EIO;
193 }
194
195 if (error)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100196 set_bit(__EE_WAS_ERROR, &peer_req->flags);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200197
198 bio_put(bio); /* no need for the bio anymore */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100199 if (atomic_dec_and_test(&peer_req->pending_bios)) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200200 if (is_write)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100201 drbd_endio_write_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200202 else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100203 drbd_endio_read_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200204 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205}
206
207/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
208 */
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +0100209void drbd_request_endio(struct bio *bio, int error)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700210{
Lars Ellenberga1154132010-11-13 20:42:29 +0100211 unsigned long flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700212 struct drbd_request *req = bio->bi_private;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200213 struct drbd_device *device = req->device;
Lars Ellenberga1154132010-11-13 20:42:29 +0100214 struct bio_and_error m;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215 enum drbd_req_event what;
216 int uptodate = bio_flagged(bio, BIO_UPTODATE);
217
Philipp Reisnerb411b362009-09-25 16:07:19 -0700218 if (!error && !uptodate) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200219 drbd_warn(device, "p %s: setting error to -EIO\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700220 bio_data_dir(bio) == WRITE ? "write" : "read");
221 /* strange behavior of some lower level drivers...
222 * fail the request by clearing the uptodate flag,
223 * but do not return any error?! */
224 error = -EIO;
225 }
226
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200227
228 /* If this request was aborted locally before,
229 * but now was completed "successfully",
230 * chances are that this caused arbitrary data corruption.
231 *
232 * "aborting" requests, or force-detaching the disk, is intended for
233 * completely blocked/hung local backing devices which do no longer
234 * complete requests at all, not even do error completions. In this
235 * situation, usually a hard-reset and failover is the only way out.
236 *
237 * By "aborting", basically faking a local error-completion,
238 * we allow for a more graceful swichover by cleanly migrating services.
239 * Still the affected node has to be rebooted "soon".
240 *
241 * By completing these requests, we allow the upper layers to re-use
242 * the associated data pages.
243 *
244 * If later the local backing device "recovers", and now DMAs some data
245 * from disk into the original request pages, in the best case it will
246 * just put random data into unused pages; but typically it will corrupt
247 * meanwhile completely unrelated data, causing all sorts of damage.
248 *
249 * Which means delayed successful completion,
250 * especially for READ requests,
251 * is a reason to panic().
252 *
253 * We assume that a delayed *error* completion is OK,
254 * though we still will complain noisily about it.
255 */
256 if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
257 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200258 drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
Philipp Reisner1b6dd252012-09-04 15:16:20 +0200259
260 if (!error)
261 panic("possible random memory corruption caused by delayed completion of aborted local request\n");
262 }
263
Philipp Reisnerb411b362009-09-25 16:07:19 -0700264 /* to avoid recursion in __req_mod */
265 if (unlikely(error)) {
266 what = (bio_data_dir(bio) == WRITE)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100267 ? WRITE_COMPLETED_WITH_ERROR
Lars Ellenberg5c3c7e62010-04-10 02:10:09 +0200268 : (bio_rw(bio) == READ)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100269 ? READ_COMPLETED_WITH_ERROR
270 : READ_AHEAD_COMPLETED_WITH_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700271 } else
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100272 what = COMPLETED_OK;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700273
274 bio_put(req->private_bio);
275 req->private_bio = ERR_PTR(error);
276
Lars Ellenberga1154132010-11-13 20:42:29 +0100277 /* not req_mod(), we need irqsave here! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200278 spin_lock_irqsave(&device->resource->req_lock, flags);
Lars Ellenberga1154132010-11-13 20:42:29 +0100279 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200280 spin_unlock_irqrestore(&device->resource->req_lock, flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200281 put_ldev(device);
Lars Ellenberga1154132010-11-13 20:42:29 +0100282
283 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200284 complete_master_bio(device, &m);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700285}
286
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200287void drbd_csum_ee(struct crypto_hash *tfm, struct drbd_peer_request *peer_req, void *digest)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200288{
289 struct hash_desc desc;
290 struct scatterlist sg;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100291 struct page *page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200292 struct page *tmp;
293 unsigned len;
294
295 desc.tfm = tfm;
296 desc.flags = 0;
297
298 sg_init_table(&sg, 1);
299 crypto_hash_init(&desc);
300
301 while ((tmp = page_chain_next(page))) {
302 /* all but the last page will be fully used */
303 sg_set_page(&sg, page, PAGE_SIZE, 0);
304 crypto_hash_update(&desc, &sg, sg.length);
305 page = tmp;
306 }
307 /* and now the last, possibly only partially used page */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100308 len = peer_req->i.size & (PAGE_SIZE - 1);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200309 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
310 crypto_hash_update(&desc, &sg, sg.length);
311 crypto_hash_final(&desc, digest);
312}
313
Andreas Gruenbacher79a3c8d2011-08-09 02:49:01 +0200314void drbd_csum_bio(struct crypto_hash *tfm, struct bio *bio, void *digest)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700315{
316 struct hash_desc desc;
317 struct scatterlist sg;
Kent Overstreet79886132013-11-23 17:19:00 -0800318 struct bio_vec bvec;
319 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700320
321 desc.tfm = tfm;
322 desc.flags = 0;
323
324 sg_init_table(&sg, 1);
325 crypto_hash_init(&desc);
326
Kent Overstreet79886132013-11-23 17:19:00 -0800327 bio_for_each_segment(bvec, bio, iter) {
328 sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700329 crypto_hash_update(&desc, &sg, sg.length);
330 }
331 crypto_hash_final(&desc, digest);
332}
333
Lars Ellenberg9676c762011-02-22 14:02:31 +0100334/* MAYBE merge common code with w_e_end_ov_req */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100335static int w_e_send_csum(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700336{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200337 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200338 struct drbd_peer_device *peer_device = peer_req->peer_device;
339 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340 int digest_size;
341 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100342 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700343
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100344 if (unlikely(cancel))
345 goto out;
346
Lars Ellenberg9676c762011-02-22 14:02:31 +0100347 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100348 goto out;
349
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200350 digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100351 digest = kmalloc(digest_size, GFP_NOIO);
352 if (digest) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100353 sector_t sector = peer_req->i.sector;
354 unsigned int size = peer_req->i.size;
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200355 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
Lars Ellenberg9676c762011-02-22 14:02:31 +0100356 /* Free peer_req and pages before send.
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100357 * In case we block on congestion, we could otherwise run into
358 * some distributed deadlock, if the other side blocks on
359 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200360 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200361 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100362 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200363 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +0200364 err = drbd_send_drequest_csum(peer_device, sector, size,
Andreas Gruenbacherdb1b0b72011-03-16 01:37:21 +0100365 digest, digest_size,
366 P_CSUM_RS_REQUEST);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100367 kfree(digest);
368 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200369 drbd_err(device, "kmalloc() of digest failed.\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100370 err = -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700371 }
372
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100373out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100374 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200375 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700376
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100377 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200378 drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100379 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380}
381
382#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
383
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200384static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700385{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200386 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100387 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700388
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200389 if (!get_ldev(device))
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200390 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200392 if (drbd_rs_should_slow_down(device, sector))
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200393 goto defer;
394
Philipp Reisnerb411b362009-09-25 16:07:19 -0700395 /* GFP_TRY, because if there is no memory available right now, this may
396 * be rescheduled for later. It is "only" background resync, after all. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200397 peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200398 size, GFP_TRY);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100399 if (!peer_req)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200400 goto defer;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700401
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200402 peer_req->w.cb = w_e_send_csum;
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200403 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200404 list_add(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200405 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700406
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200407 atomic_add(size >> 9, &device->rs_sect_ev);
408 if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200409 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410
Lars Ellenberg10f6d9922011-01-24 14:47:09 +0100411 /* If it failed because of ENOMEM, retry should help. If it failed
412 * because bio_add_page failed (probably broken lower level driver),
413 * retry may or may not help.
414 * If it does not, you may need to force disconnect. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200415 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200416 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200417 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +0200418
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200419 drbd_free_peer_req(device, peer_req);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200420defer:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200421 put_ldev(device);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200422 return -EAGAIN;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700423}
424
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100425int w_resync_timer(struct drbd_work *w, int cancel)
Philipp Reisner794abb72010-12-27 11:51:23 +0100426{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200427 struct drbd_device *device =
428 container_of(w, struct drbd_device, resync_work);
429
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200430 switch (device->state.conn) {
Philipp Reisner794abb72010-12-27 11:51:23 +0100431 case C_VERIFY_S:
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200432 make_ov_request(device, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100433 break;
434 case C_SYNC_TARGET:
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200435 make_resync_request(device, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100436 break;
437 }
438
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100439 return 0;
Philipp Reisner794abb72010-12-27 11:51:23 +0100440}
441
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442void resync_timer_fn(unsigned long data)
443{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200444 struct drbd_device *device = (struct drbd_device *) data;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700445
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200446 if (list_empty(&device->resync_work.list))
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200447 drbd_queue_work(&first_peer_device(device)->connection->sender_work,
448 &device->resync_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700449}
450
Philipp Reisner778f2712010-07-06 11:14:00 +0200451static void fifo_set(struct fifo_buffer *fb, int value)
452{
453 int i;
454
455 for (i = 0; i < fb->size; i++)
Philipp Reisnerf10f2622010-10-05 16:50:17 +0200456 fb->values[i] = value;
Philipp Reisner778f2712010-07-06 11:14:00 +0200457}
458
459static int fifo_push(struct fifo_buffer *fb, int value)
460{
461 int ov;
462
463 ov = fb->values[fb->head_index];
464 fb->values[fb->head_index++] = value;
465
466 if (fb->head_index >= fb->size)
467 fb->head_index = 0;
468
469 return ov;
470}
471
472static void fifo_add_val(struct fifo_buffer *fb, int value)
473{
474 int i;
475
476 for (i = 0; i < fb->size; i++)
477 fb->values[i] += value;
478}
479
Philipp Reisner9958c852011-05-03 16:19:31 +0200480struct fifo_buffer *fifo_alloc(int fifo_size)
481{
482 struct fifo_buffer *fb;
483
Lars Ellenberg8747d302012-09-26 14:22:40 +0200484 fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_NOIO);
Philipp Reisner9958c852011-05-03 16:19:31 +0200485 if (!fb)
486 return NULL;
487
488 fb->head_index = 0;
489 fb->size = fifo_size;
490 fb->total = 0;
491
492 return fb;
493}
494
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200495static int drbd_rs_controller(struct drbd_device *device)
Philipp Reisner778f2712010-07-06 11:14:00 +0200496{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200497 struct disk_conf *dc;
Philipp Reisner778f2712010-07-06 11:14:00 +0200498 unsigned int sect_in; /* Number of sectors that came in since the last turn */
499 unsigned int want; /* The number of sectors we want in the proxy */
500 int req_sect; /* Number of sectors to request in this turn */
501 int correction; /* Number of sectors more we need in the proxy*/
502 int cps; /* correction per invocation of drbd_rs_controller() */
503 int steps; /* Number of time steps to plan ahead */
504 int curr_corr;
505 int max_sect;
Philipp Reisner813472c2011-05-03 16:47:02 +0200506 struct fifo_buffer *plan;
Philipp Reisner778f2712010-07-06 11:14:00 +0200507
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200508 sect_in = atomic_xchg(&device->rs_sect_in, 0); /* Number of sectors that came in */
509 device->rs_in_flight -= sect_in;
Philipp Reisner778f2712010-07-06 11:14:00 +0200510
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200511 dc = rcu_dereference(device->ldev->disk_conf);
512 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner778f2712010-07-06 11:14:00 +0200513
Philipp Reisner813472c2011-05-03 16:47:02 +0200514 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
Philipp Reisner778f2712010-07-06 11:14:00 +0200515
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200516 if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200517 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200518 } else { /* normal path */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200519 want = dc->c_fill_target ? dc->c_fill_target :
520 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
Philipp Reisner778f2712010-07-06 11:14:00 +0200521 }
522
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200523 correction = want - device->rs_in_flight - plan->total;
Philipp Reisner778f2712010-07-06 11:14:00 +0200524
525 /* Plan ahead */
526 cps = correction / steps;
Philipp Reisner813472c2011-05-03 16:47:02 +0200527 fifo_add_val(plan, cps);
528 plan->total += cps * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200529
530 /* What we do in this step */
Philipp Reisner813472c2011-05-03 16:47:02 +0200531 curr_corr = fifo_push(plan, 0);
532 plan->total -= curr_corr;
Philipp Reisner778f2712010-07-06 11:14:00 +0200533
534 req_sect = sect_in + curr_corr;
535 if (req_sect < 0)
536 req_sect = 0;
537
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +0200538 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
Philipp Reisner778f2712010-07-06 11:14:00 +0200539 if (req_sect > max_sect)
540 req_sect = max_sect;
541
542 /*
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200543 drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200544 sect_in, device->rs_in_flight, want, correction,
545 steps, cps, device->rs_planed, curr_corr, req_sect);
Philipp Reisner778f2712010-07-06 11:14:00 +0200546 */
547
548 return req_sect;
549}
550
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200551static int drbd_rs_number_requests(struct drbd_device *device)
Lars Ellenberge65f4402010-11-05 10:04:07 +0100552{
553 int number;
Philipp Reisner813472c2011-05-03 16:47:02 +0200554
555 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200556 if (rcu_dereference(device->rs_plan_s)->size) {
557 number = drbd_rs_controller(device) >> (BM_BLOCK_SHIFT - 9);
558 device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100559 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200560 device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
561 number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
Lars Ellenberge65f4402010-11-05 10:04:07 +0100562 }
Philipp Reisner813472c2011-05-03 16:47:02 +0200563 rcu_read_unlock();
Lars Ellenberge65f4402010-11-05 10:04:07 +0100564
Lars Ellenberge65f4402010-11-05 10:04:07 +0100565 /* ignore the amount of pending requests, the resync controller should
566 * throttle down to incoming reply rate soon enough anyways. */
567 return number;
568}
569
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200570static int make_resync_request(struct drbd_device *device, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700571{
572 unsigned long bit;
573 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200574 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100575 int max_bio_size;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100576 int number, rollback_i, size;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700577 int align, queued, sndbuf;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200578 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700579
580 if (unlikely(cancel))
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100581 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700582
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200583 if (device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200584 /* empty resync? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200585 drbd_resync_finished(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100586 return 0;
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200587 }
588
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200589 if (!get_ldev(device)) {
590 /* Since we only need to access device->rsync a
591 get_ldev_if_state(device,D_FAILED) would be sufficient, but
Philipp Reisnerb411b362009-09-25 16:07:19 -0700592 to continue resync with a broken disk makes no sense at
593 all */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200594 drbd_err(device, "Disk broke down during resync!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100595 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700596 }
597
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200598 max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
599 number = drbd_rs_number_requests(device);
Lars Ellenberge65f4402010-11-05 10:04:07 +0100600 if (number == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200601 goto requeue;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700602
Philipp Reisnerb411b362009-09-25 16:07:19 -0700603 for (i = 0; i < number; i++) {
604 /* Stop generating RS requests, when half of the send buffer is filled */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200605 mutex_lock(&first_peer_device(device)->connection->data.mutex);
606 if (first_peer_device(device)->connection->data.socket) {
607 queued = first_peer_device(device)->connection->data.socket->sk->sk_wmem_queued;
608 sndbuf = first_peer_device(device)->connection->data.socket->sk->sk_sndbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700609 } else {
610 queued = 1;
611 sndbuf = 0;
612 }
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200613 mutex_unlock(&first_peer_device(device)->connection->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700614 if (queued > sndbuf / 2)
615 goto requeue;
616
617next_sector:
618 size = BM_BLOCK_SIZE;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200619 bit = drbd_bm_find_next(device, device->bm_resync_fo);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700620
Lars Ellenberg4b0715f2010-12-14 15:13:04 +0100621 if (bit == DRBD_END_OF_BITMAP) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200622 device->bm_resync_fo = drbd_bm_bits(device);
623 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100624 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700625 }
626
627 sector = BM_BIT_TO_SECT(bit);
628
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200629 if (drbd_rs_should_slow_down(device, sector) ||
630 drbd_try_rs_begin_io(device, sector)) {
631 device->bm_resync_fo = bit;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700632 goto requeue;
633 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200634 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700635
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200636 if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
637 drbd_rs_complete_io(device, sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700638 goto next_sector;
639 }
640
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100641#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
Philipp Reisnerb411b362009-09-25 16:07:19 -0700642 /* try to find some adjacent bits.
643 * we stop if we have already the maximum req size.
644 *
645 * Additionally always align bigger requests, in order to
646 * be prepared for all stripe sizes of software RAIDs.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700647 */
648 align = 1;
Philipp Reisnerd2074502010-07-22 15:27:27 +0200649 rollback_i = i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700650 for (;;) {
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100651 if (size + BM_BLOCK_SIZE > max_bio_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700652 break;
653
654 /* Be always aligned */
655 if (sector & ((1<<(align+3))-1))
656 break;
657
658 /* do not cross extent boundaries */
659 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
660 break;
661 /* now, is it actually dirty, after all?
662 * caution, drbd_bm_test_bit is tri-state for some
663 * obscure reason; ( b == 0 ) would get the out-of-band
664 * only accidentally right because of the "oddly sized"
665 * adjustment below */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200666 if (drbd_bm_test_bit(device, bit+1) != 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700667 break;
668 bit++;
669 size += BM_BLOCK_SIZE;
670 if ((BM_BLOCK_SIZE << align) <= size)
671 align++;
672 i++;
673 }
674 /* if we merged some,
675 * reset the offset to start the next drbd_bm_find_next from */
676 if (size > BM_BLOCK_SIZE)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200677 device->bm_resync_fo = bit + 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700678#endif
679
680 /* adjust very last sectors, in case we are oddly sized */
681 if (sector + (size>>9) > capacity)
682 size = (capacity-sector)<<9;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200683 if (first_peer_device(device)->connection->agreed_pro_version >= 89 &&
684 first_peer_device(device)->connection->csums_tfm) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200685 switch (read_for_csum(first_peer_device(device), sector, size)) {
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200686 case -EIO: /* Disk failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200687 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100688 return -EIO;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200689 case -EAGAIN: /* allocation failed, or ldev busy */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200690 drbd_rs_complete_io(device, sector);
691 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerd2074502010-07-22 15:27:27 +0200692 i = rollback_i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700693 goto requeue;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200694 case 0:
695 /* everything ok */
696 break;
697 default:
698 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700699 }
700 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100701 int err;
702
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200703 inc_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200704 err = drbd_send_drequest(first_peer_device(device), P_RS_DATA_REQUEST,
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100705 sector, size, ID_SYNCER);
706 if (err) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200707 drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200708 dec_rs_pending(device);
709 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100710 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700711 }
712 }
713 }
714
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200715 if (device->bm_resync_fo >= drbd_bm_bits(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700716 /* last syncer _request_ was sent,
717 * but the P_RS_DATA_REPLY not yet received. sync will end (and
718 * next sync group will resume), as soon as we receive the last
719 * resync data block, and the last bit is cleared.
720 * until then resync "work" is "inactive" ...
721 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200722 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100723 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700724 }
725
726 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200727 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
728 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
729 put_ldev(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100730 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700731}
732
Andreas Gruenbacherd448a2e2011-08-25 16:59:58 +0200733static int make_ov_request(struct drbd_device *device, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700734{
735 int number, i, size;
736 sector_t sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200737 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200738 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700739
740 if (unlikely(cancel))
741 return 1;
742
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200743 number = drbd_rs_number_requests(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700744
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200745 sector = device->ov_position;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700746 for (i = 0; i < number; i++) {
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200747 if (sector >= capacity)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700748 return 1;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200749
750 /* We check for "finished" only in the reply path:
751 * w_e_end_ov_reply().
752 * We need to send at least one request out. */
753 stop_sector_reached = i > 0
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200754 && verify_can_do_stop_sector(device)
755 && sector >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200756 if (stop_sector_reached)
757 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700758
759 size = BM_BLOCK_SIZE;
760
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200761 if (drbd_rs_should_slow_down(device, sector) ||
762 drbd_try_rs_begin_io(device, sector)) {
763 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700764 goto requeue;
765 }
766
767 if (sector + (size>>9) > capacity)
768 size = (capacity-sector)<<9;
769
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200770 inc_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200771 if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200772 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700773 return 0;
774 }
775 sector += BM_SECT_PER_BIT;
776 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200777 device->ov_position = sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700778
779 requeue:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200780 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200781 if (i == 0 || !stop_sector_reached)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200782 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700783 return 1;
784}
785
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100786int w_ov_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700787{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200788 struct drbd_device_work *dw =
789 container_of(w, struct drbd_device_work, w);
790 struct drbd_device *device = dw->device;
791 kfree(dw);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200792 ov_out_of_sync_print(device);
793 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700794
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100795 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700796}
797
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100798static int w_resync_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700799{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200800 struct drbd_device_work *dw =
801 container_of(w, struct drbd_device_work, w);
802 struct drbd_device *device = dw->device;
803 kfree(dw);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700804
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200805 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700806
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100807 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700808}
809
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200810static void ping_peer(struct drbd_device *device)
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200811{
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200812 struct drbd_connection *connection = first_peer_device(device)->connection;
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100813
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200814 clear_bit(GOT_PING_ACK, &connection->flags);
815 request_ping(connection);
816 wait_event(connection->ping_wait,
817 test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200818}
819
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200820int drbd_resync_finished(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700821{
822 unsigned long db, dt, dbdt;
823 unsigned long n_oos;
824 union drbd_state os, ns;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200825 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700826 char *khelper_cmd = NULL;
Lars Ellenberg26525612010-11-05 09:56:33 +0100827 int verify_done = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700828
829 /* Remove all elements from the resync LRU. Since future actions
830 * might set bits in the (main) bitmap, then the entries in the
831 * resync LRU would be wrong. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200832 if (drbd_rs_del_all(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700833 /* In case this is not possible now, most probably because
834 * there are P_RS_DATA_REPLY Packets lingering on the worker's
835 * queue (or even the read operations for those packets
836 * is not finished by now). Retry in 100ms. */
837
Philipp Reisner20ee6392011-01-18 15:28:59 +0100838 schedule_timeout_interruptible(HZ / 10);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200839 dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC);
840 if (dw) {
841 dw->w.cb = w_resync_finished;
842 dw->device = device;
843 drbd_queue_work(&first_peer_device(device)->connection->sender_work,
844 &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700845 return 1;
846 }
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200847 drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700848 }
849
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200850 dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700851 if (dt <= 0)
852 dt = 1;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200853
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200854 db = device->rs_total;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200855 /* adjust for verify start and stop sectors, respective reached position */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200856 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
857 db -= device->ov_left;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200858
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859 dbdt = Bit2KB(db/dt);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200860 device->rs_paused /= HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700861
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200862 if (!get_ldev(device))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700863 goto out;
864
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200865 ping_peer(device);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200866
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200867 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200868 os = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700869
Lars Ellenberg26525612010-11-05 09:56:33 +0100870 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
871
Philipp Reisnerb411b362009-09-25 16:07:19 -0700872 /* This protects us against multiple calls (that can happen in the presence
873 of application IO), and against connectivity loss just before we arrive here. */
874 if (os.conn <= C_CONNECTED)
875 goto out_unlock;
876
877 ns = os;
878 ns.conn = C_CONNECTED;
879
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200880 drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200881 verify_done ? "Online verify" : "Resync",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200882 dt + device->rs_paused, device->rs_paused, dbdt);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700883
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200884 n_oos = drbd_bm_total_weight(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700885
886 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
887 if (n_oos) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200888 drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700889 n_oos, Bit2KB(1));
890 khelper_cmd = "out-of-sync";
891 }
892 } else {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200893 D_ASSERT(device, (n_oos - device->rs_failed) == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894
895 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
896 khelper_cmd = "after-resync-target";
897
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200898 if (first_peer_device(device)->connection->csums_tfm && device->rs_total) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200899 const unsigned long s = device->rs_same_csum;
900 const unsigned long t = device->rs_total;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700901 const int ratio =
902 (t == 0) ? 0 :
903 (t < 100000) ? ((s*100)/t) : (s/(t/100));
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200904 drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
Philipp Reisnerb411b362009-09-25 16:07:19 -0700905 "transferred %luK total %luK\n",
906 ratio,
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200907 Bit2KB(device->rs_same_csum),
908 Bit2KB(device->rs_total - device->rs_same_csum),
909 Bit2KB(device->rs_total));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700910 }
911 }
912
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200913 if (device->rs_failed) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200914 drbd_info(device, " %lu failed blocks\n", device->rs_failed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700915
916 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
917 ns.disk = D_INCONSISTENT;
918 ns.pdsk = D_UP_TO_DATE;
919 } else {
920 ns.disk = D_UP_TO_DATE;
921 ns.pdsk = D_INCONSISTENT;
922 }
923 } else {
924 ns.disk = D_UP_TO_DATE;
925 ns.pdsk = D_UP_TO_DATE;
926
927 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200928 if (device->p_uuid) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700929 int i;
930 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200931 _drbd_uuid_set(device, i, device->p_uuid[i]);
932 drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
933 _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700934 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200935 drbd_err(device, "device->p_uuid is NULL! BUG\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700936 }
937 }
938
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100939 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
940 /* for verify runs, we don't update uuids here,
941 * so there would be nothing to report. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200942 drbd_uuid_set_bm(device, 0UL);
943 drbd_print_uuids(device, "updated UUIDs");
944 if (device->p_uuid) {
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100945 /* Now the two UUID sets are equal, update what we
946 * know of the peer. */
947 int i;
948 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200949 device->p_uuid[i] = device->ldev->md.uuid[i];
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100950 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700951 }
952 }
953
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200954 _drbd_set_state(device, ns, CS_VERBOSE, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700955out_unlock:
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200956 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200957 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700958out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200959 device->rs_total = 0;
960 device->rs_failed = 0;
961 device->rs_paused = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +0200962
963 /* reset start sector, if we reached end of device */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200964 if (verify_done && device->ov_left == 0)
965 device->ov_start_sector = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700966
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200967 drbd_md_sync(device);
Lars Ellenberg13d42682010-10-13 17:37:54 +0200968
Philipp Reisnerb411b362009-09-25 16:07:19 -0700969 if (khelper_cmd)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200970 drbd_khelper(device, khelper_cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700971
972 return 1;
973}
974
975/* helper */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200976static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700977{
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200978 if (drbd_peer_req_has_active_page(peer_req)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700979 /* This might happen if sendpage() has not finished */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100980 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200981 atomic_add(i, &device->pp_in_use_by_net);
982 atomic_sub(i, &device->pp_in_use);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200983 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200984 list_add_tail(&peer_req->w.list, &device->net_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200985 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg435f0742010-09-06 12:30:25 +0200986 wake_up(&drbd_pp_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200988 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700989}
990
991/**
992 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200993 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700994 * @w: work object.
995 * @cancel: The connection will be closed anyways
996 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100997int w_e_end_data_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700998{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200999 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001000 struct drbd_peer_device *peer_device = peer_req->peer_device;
1001 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001002 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001003
1004 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001005 drbd_free_peer_req(device, peer_req);
1006 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001007 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001008 }
1009
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001010 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001011 err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001012 } else {
1013 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001014 drbd_err(device, "Sending NegDReply. sector=%llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001015 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001016
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001017 err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001018 }
1019
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001020 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001021
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001022 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001023
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001024 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001025 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001026 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001027}
1028
1029/**
Andreas Gruenbachera209b4a2011-08-17 12:43:25 +02001030 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
Philipp Reisnerb411b362009-09-25 16:07:19 -07001031 * @w: work object.
1032 * @cancel: The connection will be closed anyways
1033 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001034int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001035{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001036 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001037 struct drbd_peer_device *peer_device = peer_req->peer_device;
1038 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001039 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001040
1041 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001042 drbd_free_peer_req(device, peer_req);
1043 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001044 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001045 }
1046
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001047 if (get_ldev_if_state(device, D_FAILED)) {
1048 drbd_rs_complete_io(device, peer_req->i.sector);
1049 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001050 }
1051
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001052 if (device->state.conn == C_AHEAD) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001053 err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001054 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001055 if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1056 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001057 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001058 } else {
1059 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001060 drbd_err(device, "Not sending RSDataReply, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07001061 "partner DISKLESS!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001062 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001063 }
1064 } else {
1065 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001066 drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001067 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001068
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001069 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001070
1071 /* update resync data with failure */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001072 drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001073 }
1074
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001075 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001076
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001077 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001078
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001079 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001080 drbd_err(device, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001081 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001082}
1083
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001084int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001085{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001086 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001087 struct drbd_peer_device *peer_device = peer_req->peer_device;
1088 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001089 struct digest_info *di;
1090 int digest_size;
1091 void *digest = NULL;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001092 int err, eq = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001093
1094 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001095 drbd_free_peer_req(device, peer_req);
1096 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001097 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001098 }
1099
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001100 if (get_ldev(device)) {
1101 drbd_rs_complete_io(device, peer_req->i.sector);
1102 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001103 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001104
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001105 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001106
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001107 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001108 /* quick hack to try to avoid a race against reconfiguration.
1109 * a real fix would be much more involved,
1110 * introducing more locking mechanisms */
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001111 if (peer_device->connection->csums_tfm) {
1112 digest_size = crypto_hash_digestsize(peer_device->connection->csums_tfm);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001113 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001114 digest = kmalloc(digest_size, GFP_NOIO);
1115 }
1116 if (digest) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001117 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001118 eq = !memcmp(digest, di->digest, digest_size);
1119 kfree(digest);
1120 }
1121
1122 if (eq) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001123 drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
Lars Ellenberg676396d2010-03-03 02:08:22 +01001124 /* rs_same_csums unit is BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001125 device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001126 err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001127 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001128 inc_rs_pending(device);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001129 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1130 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
Philipp Reisner204bba92010-08-23 16:17:13 +02001131 kfree(di);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001132 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001133 }
1134 } else {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001135 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001136 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001137 drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001138 }
1139
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001140 dec_unacked(device);
1141 move_to_net_ee_or_free(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001142
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001143 if (unlikely(err))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001144 drbd_err(device, "drbd_send_block/ack() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001145 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001146}
1147
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001148int w_e_end_ov_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001149{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001150 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001151 struct drbd_peer_device *peer_device = peer_req->peer_device;
1152 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001153 sector_t sector = peer_req->i.sector;
1154 unsigned int size = peer_req->i.size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001155 int digest_size;
1156 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001157 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001158
1159 if (unlikely(cancel))
1160 goto out;
1161
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001162 digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001163 digest = kmalloc(digest_size, GFP_NOIO);
Philipp Reisner8f214202011-03-01 15:52:35 +01001164 if (!digest) {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001165 err = 1; /* terminate the connection in case the allocation failed */
Philipp Reisner8f214202011-03-01 15:52:35 +01001166 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001167 }
1168
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001169 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001170 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
Philipp Reisner8f214202011-03-01 15:52:35 +01001171 else
1172 memset(digest, 0, digest_size);
1173
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001174 /* Free e and pages before send.
1175 * In case we block on congestion, we could otherwise run into
1176 * some distributed deadlock, if the other side blocks on
1177 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001178 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001179 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001180 peer_req = NULL;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001181 inc_rs_pending(device);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001182 err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001183 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001184 dec_rs_pending(device);
Philipp Reisner8f214202011-03-01 15:52:35 +01001185 kfree(digest);
1186
Philipp Reisnerb411b362009-09-25 16:07:19 -07001187out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001188 if (peer_req)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001189 drbd_free_peer_req(device, peer_req);
1190 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001191 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001192}
1193
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001194void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001195{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001196 if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1197 device->ov_last_oos_size += size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001198 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001199 device->ov_last_oos_start = sector;
1200 device->ov_last_oos_size = size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001201 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001202 drbd_set_out_of_sync(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001203}
1204
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001205int w_e_end_ov_reply(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001206{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001207 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001208 struct drbd_peer_device *peer_device = peer_req->peer_device;
1209 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001210 struct digest_info *di;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001211 void *digest;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001212 sector_t sector = peer_req->i.sector;
1213 unsigned int size = peer_req->i.size;
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001214 int digest_size;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001215 int err, eq = 0;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001216 bool stop_sector_reached = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001217
1218 if (unlikely(cancel)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001219 drbd_free_peer_req(device, peer_req);
1220 dec_unacked(device);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001221 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001222 }
1223
1224 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1225 * the resync lru has been cleaned up already */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001226 if (get_ldev(device)) {
1227 drbd_rs_complete_io(device, peer_req->i.sector);
1228 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001229 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001230
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001231 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001232
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001233 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001234 digest_size = crypto_hash_digestsize(peer_device->connection->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001235 digest = kmalloc(digest_size, GFP_NOIO);
1236 if (digest) {
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001237 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001238
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001239 D_ASSERT(device, digest_size == di->digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001240 eq = !memcmp(digest, di->digest, digest_size);
1241 kfree(digest);
1242 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001243 }
1244
Lars Ellenberg9676c762011-02-22 14:02:31 +01001245 /* Free peer_req and pages before send.
1246 * In case we block on congestion, we could otherwise run into
1247 * some distributed deadlock, if the other side blocks on
1248 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001249 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001250 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001251 if (!eq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001252 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001253 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001254 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001255
Andreas Gruenbacher67801392011-09-13 10:39:41 +02001256 err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size,
Andreas Gruenbacherfa79abd2011-03-16 01:31:39 +01001257 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001258
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001259 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001260
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001261 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001262
1263 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001264 if ((device->ov_left & 0x200) == 0x200)
1265 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001266
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001267 stop_sector_reached = verify_can_do_stop_sector(device) &&
1268 (sector + (size>>9)) >= device->ov_stop_sector;
Lars Ellenberg58ffa582012-07-26 14:09:49 +02001269
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001270 if (device->ov_left == 0 || stop_sector_reached) {
1271 ov_out_of_sync_print(device);
1272 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001273 }
1274
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001275 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001276}
1277
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001278/* FIXME
1279 * We need to track the number of pending barrier acks,
1280 * and to be able to wait for them.
1281 * See also comment in drbd_adm_attach before drbd_suspend_io.
1282 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001283static int drbd_send_barrier(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001284{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001285 struct p_barrier *p;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001286 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001287
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001288 sock = &connection->data;
1289 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001290 if (!p)
1291 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001292 p->barrier = connection->send.current_epoch_nr;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001293 p->pad = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001294 connection->send.current_epoch_writes = 0;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001295
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001296 return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001297}
1298
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001299int w_send_write_hint(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001300{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001301 struct drbd_device *device =
1302 container_of(w, struct drbd_device, unplug_work);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001303 struct drbd_socket *sock;
1304
Philipp Reisnerb411b362009-09-25 16:07:19 -07001305 if (cancel)
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001306 return 0;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001307 sock = &first_peer_device(device)->connection->data;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001308 if (!drbd_prepare_command(first_peer_device(device), sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001309 return -EIO;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001310 return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001311}
1312
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001313static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001314{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001315 if (!connection->send.seen_any_write_yet) {
1316 connection->send.seen_any_write_yet = true;
1317 connection->send.current_epoch_nr = epoch;
1318 connection->send.current_epoch_writes = 0;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001319 }
1320}
1321
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001322static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001323{
1324 /* re-init if first write on this connection */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001325 if (!connection->send.seen_any_write_yet)
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001326 return;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001327 if (connection->send.current_epoch_nr != epoch) {
1328 if (connection->send.current_epoch_writes)
1329 drbd_send_barrier(connection);
1330 connection->send.current_epoch_nr = epoch;
Lars Ellenberg4eb9b3c2012-08-20 11:05:23 +02001331 }
1332}
1333
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01001334int w_send_out_of_sync(struct drbd_work *w, int cancel)
Philipp Reisner73a01a12010-10-27 14:33:00 +02001335{
1336 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001337 struct drbd_device *device = req->device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001338 struct drbd_connection *connection = first_peer_device(device)->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001339 int err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001340
1341 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001342 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001343 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001344 }
1345
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001346 /* this time, no connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001347 * If it was sent, it was the closing barrier for the last
1348 * replicated epoch, before we went into AHEAD mode.
1349 * No more barriers will be sent, until we leave AHEAD mode again. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001350 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001351
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001352 err = drbd_send_out_of_sync(first_peer_device(device), req);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001353 req_mod(req, OOS_HANDED_TO_NETWORK);
Philipp Reisner73a01a12010-10-27 14:33:00 +02001354
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001355 return err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001356}
1357
Philipp Reisnerb411b362009-09-25 16:07:19 -07001358/**
1359 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
Philipp Reisnerb411b362009-09-25 16:07:19 -07001360 * @w: work object.
1361 * @cancel: The connection will be closed anyways
1362 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001363int w_send_dblock(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001364{
1365 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001366 struct drbd_device *device = req->device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001367 struct drbd_connection *connection = first_peer_device(device)->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001368 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001369
1370 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001371 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001372 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001373 }
1374
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001375 re_init_if_first_write(connection, req->epoch);
1376 maybe_send_barrier(connection, req->epoch);
1377 connection->send.current_epoch_writes++;
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001378
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001379 err = drbd_send_dblock(first_peer_device(device), req);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001380 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001381
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001382 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001383}
1384
1385/**
1386 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
Philipp Reisnerb411b362009-09-25 16:07:19 -07001387 * @w: work object.
1388 * @cancel: The connection will be closed anyways
1389 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001390int w_send_read_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001391{
1392 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001393 struct drbd_device *device = req->device;
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001394 struct drbd_connection *connection = first_peer_device(device)->connection;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001395 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001396
1397 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001398 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001399 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001400 }
1401
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001402 /* Even read requests may close a write epoch,
1403 * if there was any yet. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001404 maybe_send_barrier(connection, req->epoch);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001405
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001406 err = drbd_send_drequest(first_peer_device(device), P_DATA_REQUEST, req->i.sector, req->i.size,
Andreas Gruenbacher6c1005e2011-03-16 01:34:24 +01001407 (unsigned long)req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001408
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001409 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001410
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001411 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001412}
1413
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001414int w_restart_disk_io(struct drbd_work *w, int cancel)
Philipp Reisner265be2d2010-05-31 10:14:17 +02001415{
1416 struct drbd_request *req = container_of(w, struct drbd_request, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001417 struct drbd_device *device = req->device;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001418
Philipp Reisner07782862010-08-31 12:00:50 +02001419 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001420 drbd_al_begin_io(device, &req->i, false);
Philipp Reisner265be2d2010-05-31 10:14:17 +02001421
1422 drbd_req_make_private_bio(req, req->master_bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001423 req->private_bio->bi_bdev = device->ldev->backing_bdev;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001424 generic_make_request(req->private_bio);
1425
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001426 return 0;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001427}
1428
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001429static int _drbd_may_sync_now(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001430{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001431 struct drbd_device *odev = device;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001432 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001433
1434 while (1) {
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001435 if (!odev->ldev || odev->state.disk == D_DISKLESS)
Philipp Reisner438c8372011-03-28 14:48:01 +02001436 return 1;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001437 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001438 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001439 rcu_read_unlock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001440 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001441 return 1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001442 odev = minor_to_device(resync_after);
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001443 if (!odev)
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001444 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001445 if ((odev->state.conn >= C_SYNC_SOURCE &&
1446 odev->state.conn <= C_PAUSED_SYNC_T) ||
1447 odev->state.aftr_isp || odev->state.peer_isp ||
1448 odev->state.user_isp)
1449 return 0;
1450 }
1451}
1452
1453/**
1454 * _drbd_pause_after() - Pause resync on all devices that may not resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001455 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001456 *
1457 * Called from process context only (admin command and after_state_ch).
1458 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001459static int _drbd_pause_after(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001460{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001461 struct drbd_device *odev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001462 int i, rv = 0;
1463
Philipp Reisner695d08f2011-04-11 22:53:32 -07001464 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001465 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001466 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1467 continue;
1468 if (!_drbd_may_sync_now(odev))
1469 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1470 != SS_NOTHING_TO_DO);
1471 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001472 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001473
1474 return rv;
1475}
1476
1477/**
1478 * _drbd_resume_next() - Resume resync on all devices that may resync now
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001479 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001480 *
1481 * Called from process context only (admin command and worker).
1482 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001483static int _drbd_resume_next(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001484{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001485 struct drbd_device *odev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001486 int i, rv = 0;
1487
Philipp Reisner695d08f2011-04-11 22:53:32 -07001488 rcu_read_lock();
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001489 idr_for_each_entry(&drbd_devices, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001490 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1491 continue;
1492 if (odev->state.aftr_isp) {
1493 if (_drbd_may_sync_now(odev))
1494 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1495 CS_HARD, NULL)
1496 != SS_NOTHING_TO_DO) ;
1497 }
1498 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001499 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001500 return rv;
1501}
1502
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001503void resume_next_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001504{
1505 write_lock_irq(&global_state_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001506 _drbd_resume_next(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001507 write_unlock_irq(&global_state_lock);
1508}
1509
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001510void suspend_other_sg(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001511{
1512 write_lock_irq(&global_state_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001513 _drbd_pause_after(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001514 write_unlock_irq(&global_state_lock);
1515}
1516
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001517/* caller must hold global_state_lock */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001518enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001519{
Andreas Gruenbacher54761692011-05-30 16:15:21 +02001520 struct drbd_device *odev;
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001521 int resync_after;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001522
1523 if (o_minor == -1)
1524 return NO_ERROR;
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001525 if (o_minor < -1 || o_minor > MINORMASK)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001526 return ERR_RESYNC_AFTER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001527
1528 /* check for loops */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001529 odev = minor_to_device(o_minor);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001530 while (1) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001531 if (odev == device)
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001532 return ERR_RESYNC_AFTER_CYCLE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001533
Lars Ellenberga3f8f7d2013-03-27 14:08:43 +01001534 /* You are free to depend on diskless, non-existing,
1535 * or not yet/no longer existing minors.
1536 * We only reject dependency loops.
1537 * We cannot follow the dependency chain beyond a detached or
1538 * missing minor.
1539 */
1540 if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1541 return NO_ERROR;
1542
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001543 rcu_read_lock();
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001544 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001545 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001546 /* dependency chain ends here, no cycles. */
Andreas Gruenbacher95f8efd2011-05-12 11:15:34 +02001547 if (resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001548 return NO_ERROR;
1549
1550 /* follow the dependency chain */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001551 odev = minor_to_device(resync_after);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001552 }
1553}
1554
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001555/* caller must hold global_state_lock */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001556void drbd_resync_after_changed(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001557{
1558 int changes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001559
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001560 do {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001561 changes = _drbd_pause_after(device);
1562 changes |= _drbd_resume_next(device);
Philipp Reisnerdc97b702011-05-03 14:27:15 +02001563 } while (changes);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001564}
1565
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001566void drbd_rs_controller_reset(struct drbd_device *device)
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001567{
Philipp Reisner813472c2011-05-03 16:47:02 +02001568 struct fifo_buffer *plan;
1569
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001570 atomic_set(&device->rs_sect_in, 0);
1571 atomic_set(&device->rs_sect_ev, 0);
1572 device->rs_in_flight = 0;
Philipp Reisner813472c2011-05-03 16:47:02 +02001573
1574 /* Updating the RCU protected object in place is necessary since
1575 this function gets called from atomic context.
1576 It is valid since all other updates also lead to an completely
1577 empty fifo */
1578 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001579 plan = rcu_dereference(device->rs_plan_s);
Philipp Reisner813472c2011-05-03 16:47:02 +02001580 plan->total = 0;
1581 fifo_set(plan, 0);
1582 rcu_read_unlock();
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001583}
1584
Philipp Reisner1f04af32011-02-07 11:33:59 +01001585void start_resync_timer_fn(unsigned long data)
1586{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001587 struct drbd_device *device = (struct drbd_device *) data;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001588
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001589 drbd_queue_work(&first_peer_device(device)->connection->sender_work,
1590 &device->start_resync_work);
Philipp Reisner1f04af32011-02-07 11:33:59 +01001591}
1592
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001593int w_start_resync(struct drbd_work *w, int cancel)
Philipp Reisner1f04af32011-02-07 11:33:59 +01001594{
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02001595 struct drbd_device *device =
1596 container_of(w, struct drbd_device, start_resync_work);
Philipp Reisner00d56942011-02-09 18:09:48 +01001597
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001598 if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001599 drbd_warn(device, "w_start_resync later...\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001600 device->start_resync_timer.expires = jiffies + HZ/10;
1601 add_timer(&device->start_resync_timer);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001602 return 0;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001603 }
1604
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001605 drbd_start_resync(device, C_SYNC_SOURCE);
1606 clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001607 return 0;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001608}
1609
Philipp Reisnerb411b362009-09-25 16:07:19 -07001610/**
1611 * drbd_start_resync() - Start the resync process
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001612 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001613 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1614 *
1615 * This function might bring you directly into one of the
1616 * C_PAUSED_SYNC_* states.
1617 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001618void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001619{
1620 union drbd_state ns;
1621 int r;
1622
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001623 if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001624 drbd_err(device, "Resync already running!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001625 return;
1626 }
1627
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001628 if (!test_bit(B_RS_H_DONE, &device->flags)) {
Philipp Reisnere64a3292011-02-05 17:34:11 +01001629 if (side == C_SYNC_TARGET) {
1630 /* Since application IO was locked out during C_WF_BITMAP_T and
1631 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1632 we check that we might make the data inconsistent. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001633 r = drbd_khelper(device, "before-resync-target");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001634 r = (r >> 8) & 0xff;
1635 if (r > 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001636 drbd_info(device, "before-resync-target handler returned %d, "
Philipp Reisner09b9e792010-12-03 16:04:24 +01001637 "dropping connection.\n", r);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001638 conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisner09b9e792010-12-03 16:04:24 +01001639 return;
1640 }
Philipp Reisnere64a3292011-02-05 17:34:11 +01001641 } else /* C_SYNC_SOURCE */ {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001642 r = drbd_khelper(device, "before-resync-source");
Philipp Reisnere64a3292011-02-05 17:34:11 +01001643 r = (r >> 8) & 0xff;
1644 if (r > 0) {
1645 if (r == 3) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001646 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001647 "ignoring. Old userland tools?", r);
1648 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001649 drbd_info(device, "before-resync-source handler returned %d, "
Philipp Reisnere64a3292011-02-05 17:34:11 +01001650 "dropping connection.\n", r);
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001651 conn_request_state(first_peer_device(device)->connection,
1652 NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001653 return;
1654 }
1655 }
Philipp Reisner09b9e792010-12-03 16:04:24 +01001656 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001657 }
1658
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001659 if (current == first_peer_device(device)->connection->worker.task) {
Philipp Reisnerdad20552011-02-11 19:43:55 +01001660 /* The worker should not sleep waiting for state_mutex,
Philipp Reisnere64a3292011-02-05 17:34:11 +01001661 that can take long */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001662 if (!mutex_trylock(device->state_mutex)) {
1663 set_bit(B_RS_H_DONE, &device->flags);
1664 device->start_resync_timer.expires = jiffies + HZ/5;
1665 add_timer(&device->start_resync_timer);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001666 return;
1667 }
1668 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001669 mutex_lock(device->state_mutex);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001670 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001671 clear_bit(B_RS_H_DONE, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001672
Philipp Reisner0cfac5d2011-11-10 12:12:52 +01001673 write_lock_irq(&global_state_lock);
Philipp Reisnera7004712013-03-27 14:08:35 +01001674 /* Did some connection breakage or IO error race with us? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001675 if (device->state.conn < C_CONNECTED
1676 || !get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisner0cfac5d2011-11-10 12:12:52 +01001677 write_unlock_irq(&global_state_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001678 mutex_unlock(device->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001679 return;
1680 }
1681
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001682 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001683
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001684 ns.aftr_isp = !_drbd_may_sync_now(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001685
1686 ns.conn = side;
1687
1688 if (side == C_SYNC_TARGET)
1689 ns.disk = D_INCONSISTENT;
1690 else /* side == C_SYNC_SOURCE */
1691 ns.pdsk = D_INCONSISTENT;
1692
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001693 r = __drbd_set_state(device, ns, CS_VERBOSE, NULL);
1694 ns = drbd_read_state(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001695
1696 if (ns.conn < C_CONNECTED)
1697 r = SS_UNKNOWN_ERROR;
1698
1699 if (r == SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001700 unsigned long tw = drbd_bm_total_weight(device);
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001701 unsigned long now = jiffies;
1702 int i;
1703
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001704 device->rs_failed = 0;
1705 device->rs_paused = 0;
1706 device->rs_same_csum = 0;
1707 device->rs_last_events = 0;
1708 device->rs_last_sect_ev = 0;
1709 device->rs_total = tw;
1710 device->rs_start = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001711 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001712 device->rs_mark_left[i] = tw;
1713 device->rs_mark_time[i] = now;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001714 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001715 _drbd_pause_after(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001716 }
1717 write_unlock_irq(&global_state_lock);
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001718
Philipp Reisnerb411b362009-09-25 16:07:19 -07001719 if (r == SS_SUCCESS) {
Philipp Reisner328e0f122012-10-19 14:37:47 +02001720 /* reset rs_last_bcast when a resync or verify is started,
1721 * to deal with potential jiffies wrap. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001722 device->rs_last_bcast = jiffies - HZ;
Philipp Reisner328e0f122012-10-19 14:37:47 +02001723
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001724 drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001725 drbd_conn_str(ns.conn),
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001726 (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1727 (unsigned long) device->rs_total);
Lars Ellenberg6c922ed2011-01-12 11:51:13 +01001728 if (side == C_SYNC_TARGET)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001729 device->bm_resync_fo = 0;
Lars Ellenberg6c922ed2011-01-12 11:51:13 +01001730
1731 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1732 * with w_send_oos, or the sync target will get confused as to
1733 * how much bits to resync. We cannot do that always, because for an
1734 * empty resync and protocol < 95, we need to do it here, as we call
1735 * drbd_resync_finished from here in that case.
1736 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1737 * and from after_state_ch otherwise. */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001738 if (side == C_SYNC_SOURCE &&
1739 first_peer_device(device)->connection->agreed_pro_version < 96)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001740 drbd_gen_and_send_sync_uuid(first_peer_device(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001741
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001742 if (first_peer_device(device)->connection->agreed_pro_version < 95 &&
1743 device->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +02001744 /* This still has a race (about when exactly the peers
1745 * detect connection loss) that can lead to a full sync
1746 * on next handshake. In 8.3.9 we fixed this with explicit
1747 * resync-finished notifications, but the fix
1748 * introduces a protocol change. Sleeping for some
1749 * time longer than the ping interval + timeout on the
1750 * SyncSource, to give the SyncTarget the chance to
1751 * detect connection loss, then waiting for a ping
1752 * response (implicit in drbd_resync_finished) reduces
1753 * the race considerably, but does not solve it. */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001754 if (side == C_SYNC_SOURCE) {
1755 struct net_conf *nc;
1756 int timeo;
1757
1758 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001759 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02001760 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1761 rcu_read_unlock();
1762 schedule_timeout_interruptible(timeo);
1763 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001764 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001765 }
1766
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001767 drbd_rs_controller_reset(device);
1768 /* ns.conn may already be != device->state.conn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001769 * we may have been paused in between, or become paused until
1770 * the timer triggers.
1771 * No matter, that is handled in resync_timer_fn() */
1772 if (ns.conn == C_SYNC_TARGET)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001773 mod_timer(&device->resync_timer, jiffies);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001774
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001775 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001776 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001777 put_ldev(device);
1778 mutex_unlock(device->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001779}
1780
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001781/* If the resource already closed the current epoch, but we did not
1782 * (because we have not yet seen new requests), we should send the
1783 * corresponding barrier now. Must be checked within the same spinlock
1784 * that is used to check for new requests. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001785static bool need_to_send_barrier(struct drbd_connection *connection)
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001786{
1787 if (!connection->send.seen_any_write_yet)
1788 return false;
1789
1790 /* Skip barriers that do not contain any writes.
1791 * This may happen during AHEAD mode. */
1792 if (!connection->send.current_epoch_writes)
1793 return false;
1794
1795 /* ->req_lock is held when requests are queued on
1796 * connection->sender_work, and put into ->transfer_log.
1797 * It is also held when ->current_tle_nr is increased.
1798 * So either there are already new requests queued,
1799 * and corresponding barriers will be send there.
1800 * Or nothing new is queued yet, so the difference will be 1.
1801 */
1802 if (atomic_read(&connection->current_tle_nr) !=
1803 connection->send.current_epoch_nr + 1)
1804 return false;
1805
1806 return true;
1807}
1808
Rashika Kheriaa186e472013-12-19 15:06:10 +05301809static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001810{
1811 spin_lock_irq(&queue->q_lock);
1812 list_splice_init(&queue->q, work_list);
1813 spin_unlock_irq(&queue->q_lock);
1814 return !list_empty(work_list);
1815}
1816
Rashika Kheriaa186e472013-12-19 15:06:10 +05301817static bool dequeue_work_item(struct drbd_work_queue *queue, struct list_head *work_list)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001818{
1819 spin_lock_irq(&queue->q_lock);
1820 if (!list_empty(&queue->q))
1821 list_move(queue->q.next, work_list);
1822 spin_unlock_irq(&queue->q_lock);
1823 return !list_empty(work_list);
1824}
1825
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001826static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001827{
1828 DEFINE_WAIT(wait);
1829 struct net_conf *nc;
1830 int uncork, cork;
1831
1832 dequeue_work_item(&connection->sender_work, work_list);
1833 if (!list_empty(work_list))
1834 return;
1835
1836 /* Still nothing to do?
1837 * Maybe we still need to close the current epoch,
1838 * even if no new requests are queued yet.
1839 *
1840 * Also, poke TCP, just in case.
1841 * Then wait for new work (or signal). */
1842 rcu_read_lock();
1843 nc = rcu_dereference(connection->net_conf);
1844 uncork = nc ? nc->tcp_cork : 0;
1845 rcu_read_unlock();
1846 if (uncork) {
1847 mutex_lock(&connection->data.mutex);
1848 if (connection->data.socket)
1849 drbd_tcp_uncork(connection->data.socket);
1850 mutex_unlock(&connection->data.mutex);
1851 }
1852
1853 for (;;) {
1854 int send_barrier;
1855 prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001856 spin_lock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001857 spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
Lars Ellenbergbc317a92012-08-22 11:47:14 +02001858 /* dequeue single item only,
1859 * we still use drbd_queue_work_front() in some places */
1860 if (!list_empty(&connection->sender_work.q))
1861 list_move(connection->sender_work.q.next, work_list);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001862 spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
1863 if (!list_empty(work_list) || signal_pending(current)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001864 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001865 break;
1866 }
1867 send_barrier = need_to_send_barrier(connection);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001868 spin_unlock_irq(&connection->resource->req_lock);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01001869 if (send_barrier) {
1870 drbd_send_barrier(connection);
1871 connection->send.current_epoch_nr++;
1872 }
1873 schedule();
1874 /* may be woken up for other things but new work, too,
1875 * e.g. if the current epoch got closed.
1876 * In which case we send the barrier above. */
1877 }
1878 finish_wait(&connection->sender_work.q_wait, &wait);
1879
1880 /* someone may have changed the config while we have been waiting above. */
1881 rcu_read_lock();
1882 nc = rcu_dereference(connection->net_conf);
1883 cork = nc ? nc->tcp_cork : 0;
1884 rcu_read_unlock();
1885 mutex_lock(&connection->data.mutex);
1886 if (connection->data.socket) {
1887 if (cork)
1888 drbd_tcp_cork(connection->data.socket);
1889 else if (!uncork)
1890 drbd_tcp_uncork(connection->data.socket);
1891 }
1892 mutex_unlock(&connection->data.mutex);
1893}
1894
Philipp Reisnerb411b362009-09-25 16:07:19 -07001895int drbd_worker(struct drbd_thread *thi)
1896{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001897 struct drbd_connection *connection = thi->connection;
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02001898 struct drbd_work *w = NULL;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001899 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001900 LIST_HEAD(work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001901 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001902
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01001903 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01001904 drbd_thread_current_set_cpu(thi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001905
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001906 /* as long as we use drbd_queue_work_front(),
1907 * we may only dequeue single work items here, not batches. */
1908 if (list_empty(&work_list))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001909 wait_for_work(connection, &work_list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001910
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001911 if (signal_pending(current)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001912 flush_signals(current);
Philipp Reisner19393e12011-02-09 10:09:07 +01001913 if (get_t_state(thi) == RUNNING) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001914 drbd_warn(connection, "Worker got an unexpected signal\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001915 continue;
Philipp Reisner19393e12011-02-09 10:09:07 +01001916 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001917 break;
1918 }
1919
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01001920 if (get_t_state(thi) != RUNNING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001921 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001922
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001923 while (!list_empty(&work_list)) {
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02001924 w = list_first_entry(&work_list, struct drbd_work, list);
1925 list_del_init(&w->list);
1926 if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001927 continue;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001928 if (connection->cstate >= C_WF_REPORT_PARAMS)
1929 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001930 }
1931 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001932
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001933 do {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001934 while (!list_empty(&work_list)) {
Andreas Gruenbacher6db7e502011-08-26 23:50:08 +02001935 w = list_first_entry(&work_list, struct drbd_work, list);
1936 list_del_init(&w->list);
1937 w->cb(w, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001938 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001939 dequeue_work_batch(&connection->sender_work, &work_list);
Lars Ellenberg8c0785a2011-10-19 11:50:57 +02001940 } while (!list_empty(&work_list));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001941
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001942 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001943 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1944 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001945 D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001946 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001947 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001948 drbd_device_cleanup(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001949 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001950 rcu_read_lock();
Philipp Reisner0e29d162011-02-18 14:23:11 +01001951 }
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001952 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001953
1954 return 0;
1955}