blob: 410900eb2fffc8930b557eb1154fce3592377c5a [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_worker.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070027#include <linux/drbd.h>
28#include <linux/sched.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/memcontrol.h>
32#include <linux/mm_inline.h>
33#include <linux/slab.h>
34#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070035#include <linux/string.h>
36#include <linux/scatterlist.h>
37
38#include "drbd_int.h"
39#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070040
Philipp Reisner00d56942011-02-09 18:09:48 +010041static int w_make_ov_request(struct drbd_work *w, int cancel);
Philipp Reisnerb411b362009-09-25 16:07:19 -070042
43
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010044/* endio handlers:
45 * drbd_md_io_complete (defined here)
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +010046 * drbd_request_endio (defined here)
47 * drbd_peer_request_endio (defined here)
Andreas Gruenbacherc5a91612011-01-25 17:33:38 +010048 * bm_async_io_complete (defined in drbd_bitmap.c)
49 *
Philipp Reisnerb411b362009-09-25 16:07:19 -070050 * For all these callbacks, note the following:
51 * The callbacks will be called in irq context by the IDE drivers,
52 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
53 * Try to get the locking right :)
54 *
55 */
56
57
58/* About the global_state_lock
59 Each state transition on an device holds a read lock. In case we have
60 to evaluate the sync after dependencies, we grab a write lock, because
61 we need stable states on all devices for that. */
62rwlock_t global_state_lock;
63
64/* used for synchronous meta data and bitmap IO
65 * submitted by drbd_md_sync_page_io()
66 */
67void drbd_md_io_complete(struct bio *bio, int error)
68{
69 struct drbd_md_io *md_io;
70
71 md_io = (struct drbd_md_io *)bio->bi_private;
72 md_io->error = error;
73
Philipp Reisnerb411b362009-09-25 16:07:19 -070074 complete(&md_io->event);
75}
76
77/* reads on behalf of the partner,
78 * "submitted" by the receiver
79 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +010080void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -070081{
82 unsigned long flags = 0;
Philipp Reisnera21e9292011-02-08 15:08:49 +010083 struct drbd_conf *mdev = peer_req->w.mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -070084
Philipp Reisner87eeee42011-01-19 14:16:30 +010085 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +010086 mdev->read_cnt += peer_req->i.size >> 9;
87 list_del(&peer_req->w.list);
Philipp Reisnerb411b362009-09-25 16:07:19 -070088 if (list_empty(&mdev->read_ee))
89 wake_up(&mdev->ee_wait);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +010090 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +010091 __drbd_chk_io_error(mdev, false);
Philipp Reisner87eeee42011-01-19 14:16:30 +010092 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -070093
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +010094 drbd_queue_work(&mdev->tconn->data.work, &peer_req->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -070095 put_ldev(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -070096}
97
98/* writes on behalf of the partner, or resync writes,
Lars Ellenberg45bb9122010-05-14 17:10:48 +020099 * "submitted" by the receiver, final stage. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100100static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700101{
102 unsigned long flags = 0;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100103 struct drbd_conf *mdev = peer_req->w.mdev;
Lars Ellenberg181286a2011-03-31 15:18:56 +0200104 struct drbd_interval i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700105 int do_wake;
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100106 u64 block_id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700107 int do_al_complete_io;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700108
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100109 /* after we moved peer_req to done_ee,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700110 * we may no longer access it,
111 * it may be freed/reused already!
112 * (as soon as we release the req_lock) */
Lars Ellenberg181286a2011-03-31 15:18:56 +0200113 i = peer_req->i;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100114 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
115 block_id = peer_req->block_id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700116
Philipp Reisner87eeee42011-01-19 14:16:30 +0100117 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100118 mdev->writ_cnt += peer_req->i.size >> 9;
119 list_del(&peer_req->w.list); /* has been on active_ee or sync_ee */
120 list_add_tail(&peer_req->w.list, &mdev->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700121
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100122 /*
Andreas Gruenbacher5e472262011-01-27 14:42:51 +0100123 * Do not remove from the write_requests tree here: we did not send the
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +0100124 * Ack yet and did not wake possibly waiting conflicting requests.
125 * Removed from the tree from "drbd_process_done_ee" within the
126 * appropriate w.cb (e_end_block/e_end_resync_block) or from
127 * _drbd_clear_done_ee.
128 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700129
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100130 do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700131
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100132 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100133 __drbd_chk_io_error(mdev, false);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100134 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700135
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +0100136 if (block_id == ID_SYNCER)
Lars Ellenberg181286a2011-03-31 15:18:56 +0200137 drbd_rs_complete_io(mdev, i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700138
139 if (do_wake)
140 wake_up(&mdev->ee_wait);
141
142 if (do_al_complete_io)
Lars Ellenberg181286a2011-03-31 15:18:56 +0200143 drbd_al_complete_io(mdev, &i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700144
Philipp Reisner0625ac12011-02-07 14:49:19 +0100145 wake_asender(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700146 put_ldev(mdev);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200147}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700148
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200149/* writes on behalf of the partner, or resync writes,
150 * "submitted" by the receiver.
151 */
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +0100152void drbd_peer_request_endio(struct bio *bio, int error)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200153{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100154 struct drbd_peer_request *peer_req = bio->bi_private;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100155 struct drbd_conf *mdev = peer_req->w.mdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200156 int uptodate = bio_flagged(bio, BIO_UPTODATE);
157 int is_write = bio_data_dir(bio) == WRITE;
158
Lars Ellenberg07194272010-12-20 15:38:07 +0100159 if (error && __ratelimit(&drbd_ratelimit_state))
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200160 dev_warn(DEV, "%s: error=%d s=%llus\n",
161 is_write ? "write" : "read", error,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100162 (unsigned long long)peer_req->i.sector);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200163 if (!error && !uptodate) {
Lars Ellenberg07194272010-12-20 15:38:07 +0100164 if (__ratelimit(&drbd_ratelimit_state))
165 dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
166 is_write ? "write" : "read",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100167 (unsigned long long)peer_req->i.sector);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200168 /* strange behavior of some lower level drivers...
169 * fail the request by clearing the uptodate flag,
170 * but do not return any error?! */
171 error = -EIO;
172 }
173
174 if (error)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100175 set_bit(__EE_WAS_ERROR, &peer_req->flags);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200176
177 bio_put(bio); /* no need for the bio anymore */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100178 if (atomic_dec_and_test(&peer_req->pending_bios)) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200179 if (is_write)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100180 drbd_endio_write_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200181 else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100182 drbd_endio_read_sec_final(peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200183 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700184}
185
186/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
187 */
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +0100188void drbd_request_endio(struct bio *bio, int error)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700189{
Lars Ellenberga1154132010-11-13 20:42:29 +0100190 unsigned long flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700191 struct drbd_request *req = bio->bi_private;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100192 struct drbd_conf *mdev = req->w.mdev;
Lars Ellenberga1154132010-11-13 20:42:29 +0100193 struct bio_and_error m;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700194 enum drbd_req_event what;
195 int uptodate = bio_flagged(bio, BIO_UPTODATE);
196
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197 if (!error && !uptodate) {
198 dev_warn(DEV, "p %s: setting error to -EIO\n",
199 bio_data_dir(bio) == WRITE ? "write" : "read");
200 /* strange behavior of some lower level drivers...
201 * fail the request by clearing the uptodate flag,
202 * but do not return any error?! */
203 error = -EIO;
204 }
205
Philipp Reisnerb411b362009-09-25 16:07:19 -0700206 /* to avoid recursion in __req_mod */
207 if (unlikely(error)) {
208 what = (bio_data_dir(bio) == WRITE)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100209 ? WRITE_COMPLETED_WITH_ERROR
Lars Ellenberg5c3c7e62010-04-10 02:10:09 +0200210 : (bio_rw(bio) == READ)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100211 ? READ_COMPLETED_WITH_ERROR
212 : READ_AHEAD_COMPLETED_WITH_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213 } else
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100214 what = COMPLETED_OK;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215
216 bio_put(req->private_bio);
217 req->private_bio = ERR_PTR(error);
218
Lars Ellenberga1154132010-11-13 20:42:29 +0100219 /* not req_mod(), we need irqsave here! */
Philipp Reisner87eeee42011-01-19 14:16:30 +0100220 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
Lars Ellenberga1154132010-11-13 20:42:29 +0100221 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100222 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
Lars Ellenberga1154132010-11-13 20:42:29 +0100223
224 if (m.bio)
225 complete_master_bio(mdev, &m);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226}
227
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100228int w_read_retry_remote(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700229{
230 struct drbd_request *req = container_of(w, struct drbd_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +0100231 struct drbd_conf *mdev = w->mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700232
233 /* We should not detach for read io-error,
234 * but try to WRITE the P_DATA_REPLY to the failed location,
235 * to give the disk the chance to relocate that block */
236
Philipp Reisner87eeee42011-01-19 14:16:30 +0100237 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenbergd255e5f2010-05-27 09:45:45 +0200238 if (cancel || mdev->state.pdsk != D_UP_TO_DATE) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +0100239 _req_mod(req, READ_RETRY_REMOTE_CANCELED);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100240 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100241 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700242 }
Philipp Reisner87eeee42011-01-19 14:16:30 +0100243 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700244
Philipp Reisner00d56942011-02-09 18:09:48 +0100245 return w_send_read_req(w, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700246}
247
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100248void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100249 struct drbd_peer_request *peer_req, void *digest)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200250{
251 struct hash_desc desc;
252 struct scatterlist sg;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100253 struct page *page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200254 struct page *tmp;
255 unsigned len;
256
257 desc.tfm = tfm;
258 desc.flags = 0;
259
260 sg_init_table(&sg, 1);
261 crypto_hash_init(&desc);
262
263 while ((tmp = page_chain_next(page))) {
264 /* all but the last page will be fully used */
265 sg_set_page(&sg, page, PAGE_SIZE, 0);
266 crypto_hash_update(&desc, &sg, sg.length);
267 page = tmp;
268 }
269 /* and now the last, possibly only partially used page */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100270 len = peer_req->i.size & (PAGE_SIZE - 1);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200271 sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
272 crypto_hash_update(&desc, &sg, sg.length);
273 crypto_hash_final(&desc, digest);
274}
275
276void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700277{
278 struct hash_desc desc;
279 struct scatterlist sg;
280 struct bio_vec *bvec;
281 int i;
282
283 desc.tfm = tfm;
284 desc.flags = 0;
285
286 sg_init_table(&sg, 1);
287 crypto_hash_init(&desc);
288
289 __bio_for_each_segment(bvec, bio, i, 0) {
290 sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
291 crypto_hash_update(&desc, &sg, sg.length);
292 }
293 crypto_hash_final(&desc, digest);
294}
295
Lars Ellenberg9676c762011-02-22 14:02:31 +0100296/* MAYBE merge common code with w_e_end_ov_req */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100297static int w_e_send_csum(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700298{
Philipp Reisner00d56942011-02-09 18:09:48 +0100299 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
300 struct drbd_conf *mdev = w->mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 int digest_size;
302 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100303 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700304
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100305 if (unlikely(cancel))
306 goto out;
307
Lars Ellenberg9676c762011-02-22 14:02:31 +0100308 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100309 goto out;
310
Lars Ellenbergf3990022011-03-23 14:31:09 +0100311 digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100312 digest = kmalloc(digest_size, GFP_NOIO);
313 if (digest) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100314 sector_t sector = peer_req->i.sector;
315 unsigned int size = peer_req->i.size;
Lars Ellenbergf3990022011-03-23 14:31:09 +0100316 drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
Lars Ellenberg9676c762011-02-22 14:02:31 +0100317 /* Free peer_req and pages before send.
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100318 * In case we block on congestion, we could otherwise run into
319 * some distributed deadlock, if the other side blocks on
320 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200321 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200322 drbd_free_peer_req(mdev, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100323 peer_req = NULL;
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100324 inc_rs_pending(mdev);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100325 err = drbd_send_drequest_csum(mdev, sector, size,
Andreas Gruenbacherdb1b0b72011-03-16 01:37:21 +0100326 digest, digest_size,
327 P_CSUM_RS_REQUEST);
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100328 kfree(digest);
329 } else {
330 dev_err(DEV, "kmalloc() of digest failed.\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100331 err = -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700332 }
333
Lars Ellenberg53ea4332011-03-08 17:11:40 +0100334out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100335 if (peer_req)
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200336 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700337
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100338 if (unlikely(err))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700339 dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100340 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700341}
342
343#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
344
345static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
346{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100347 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700348
349 if (!get_ldev(mdev))
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200350 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700351
Philipp Reisnere3555d82010-11-07 15:56:29 +0100352 if (drbd_rs_should_slow_down(mdev, sector))
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200353 goto defer;
354
Philipp Reisnerb411b362009-09-25 16:07:19 -0700355 /* GFP_TRY, because if there is no memory available right now, this may
356 * be rescheduled for later. It is "only" background resync, after all. */
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200357 peer_req = drbd_alloc_peer_req(mdev, ID_SYNCER /* unused */, sector,
358 size, GFP_TRY);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100359 if (!peer_req)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200360 goto defer;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700361
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100362 peer_req->w.cb = w_e_send_csum;
Philipp Reisner87eeee42011-01-19 14:16:30 +0100363 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100364 list_add(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100365 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700366
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200367 atomic_add(size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +0100368 if (drbd_submit_peer_request(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200369 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700370
Lars Ellenberg10f6d9922011-01-24 14:47:09 +0100371 /* If it failed because of ENOMEM, retry should help. If it failed
372 * because bio_add_page failed (probably broken lower level driver),
373 * retry may or may not help.
374 * If it does not, you may need to force disconnect. */
Philipp Reisner87eeee42011-01-19 14:16:30 +0100375 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100376 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100377 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +0200378
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200379 drbd_free_peer_req(mdev, peer_req);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200380defer:
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200381 put_ldev(mdev);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200382 return -EAGAIN;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383}
384
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100385int w_resync_timer(struct drbd_work *w, int cancel)
Philipp Reisner794abb72010-12-27 11:51:23 +0100386{
Philipp Reisner00d56942011-02-09 18:09:48 +0100387 struct drbd_conf *mdev = w->mdev;
Philipp Reisner794abb72010-12-27 11:51:23 +0100388 switch (mdev->state.conn) {
389 case C_VERIFY_S:
Philipp Reisner00d56942011-02-09 18:09:48 +0100390 w_make_ov_request(w, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100391 break;
392 case C_SYNC_TARGET:
Philipp Reisner00d56942011-02-09 18:09:48 +0100393 w_make_resync_request(w, cancel);
Philipp Reisner794abb72010-12-27 11:51:23 +0100394 break;
395 }
396
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100397 return 0;
Philipp Reisner794abb72010-12-27 11:51:23 +0100398}
399
Philipp Reisnerb411b362009-09-25 16:07:19 -0700400void resync_timer_fn(unsigned long data)
401{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700402 struct drbd_conf *mdev = (struct drbd_conf *) data;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700403
Philipp Reisner794abb72010-12-27 11:51:23 +0100404 if (list_empty(&mdev->resync_work.list))
Philipp Reisnere42325a2011-01-19 13:55:45 +0100405 drbd_queue_work(&mdev->tconn->data.work, &mdev->resync_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700406}
407
Philipp Reisner778f2712010-07-06 11:14:00 +0200408static void fifo_set(struct fifo_buffer *fb, int value)
409{
410 int i;
411
412 for (i = 0; i < fb->size; i++)
Philipp Reisnerf10f2622010-10-05 16:50:17 +0200413 fb->values[i] = value;
Philipp Reisner778f2712010-07-06 11:14:00 +0200414}
415
416static int fifo_push(struct fifo_buffer *fb, int value)
417{
418 int ov;
419
420 ov = fb->values[fb->head_index];
421 fb->values[fb->head_index++] = value;
422
423 if (fb->head_index >= fb->size)
424 fb->head_index = 0;
425
426 return ov;
427}
428
429static void fifo_add_val(struct fifo_buffer *fb, int value)
430{
431 int i;
432
433 for (i = 0; i < fb->size; i++)
434 fb->values[i] += value;
435}
436
Philipp Reisner9d77a5f2010-11-07 18:02:56 +0100437static int drbd_rs_controller(struct drbd_conf *mdev)
Philipp Reisner778f2712010-07-06 11:14:00 +0200438{
439 unsigned int sect_in; /* Number of sectors that came in since the last turn */
440 unsigned int want; /* The number of sectors we want in the proxy */
441 int req_sect; /* Number of sectors to request in this turn */
442 int correction; /* Number of sectors more we need in the proxy*/
443 int cps; /* correction per invocation of drbd_rs_controller() */
444 int steps; /* Number of time steps to plan ahead */
445 int curr_corr;
446 int max_sect;
447
448 sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
449 mdev->rs_in_flight -= sect_in;
450
451 spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */
452
Lars Ellenbergf3990022011-03-23 14:31:09 +0100453 steps = mdev->rs_plan_s.size; /* (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
Philipp Reisner778f2712010-07-06 11:14:00 +0200454
455 if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
Lars Ellenbergf3990022011-03-23 14:31:09 +0100456 want = ((mdev->ldev->dc.resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
Philipp Reisner778f2712010-07-06 11:14:00 +0200457 } else { /* normal path */
Lars Ellenbergf3990022011-03-23 14:31:09 +0100458 want = mdev->ldev->dc.c_fill_target ? mdev->ldev->dc.c_fill_target :
459 sect_in * mdev->ldev->dc.c_delay_target * HZ / (SLEEP_TIME * 10);
Philipp Reisner778f2712010-07-06 11:14:00 +0200460 }
461
462 correction = want - mdev->rs_in_flight - mdev->rs_planed;
463
464 /* Plan ahead */
465 cps = correction / steps;
466 fifo_add_val(&mdev->rs_plan_s, cps);
467 mdev->rs_planed += cps * steps;
468
469 /* What we do in this step */
470 curr_corr = fifo_push(&mdev->rs_plan_s, 0);
471 spin_unlock(&mdev->peer_seq_lock);
472 mdev->rs_planed -= curr_corr;
473
474 req_sect = sect_in + curr_corr;
475 if (req_sect < 0)
476 req_sect = 0;
477
Lars Ellenbergf3990022011-03-23 14:31:09 +0100478 max_sect = (mdev->ldev->dc.c_max_rate * 2 * SLEEP_TIME) / HZ;
Philipp Reisner778f2712010-07-06 11:14:00 +0200479 if (req_sect > max_sect)
480 req_sect = max_sect;
481
482 /*
483 dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
484 sect_in, mdev->rs_in_flight, want, correction,
485 steps, cps, mdev->rs_planed, curr_corr, req_sect);
486 */
487
488 return req_sect;
489}
490
Philipp Reisner9d77a5f2010-11-07 18:02:56 +0100491static int drbd_rs_number_requests(struct drbd_conf *mdev)
Lars Ellenberge65f4402010-11-05 10:04:07 +0100492{
493 int number;
Lars Ellenbergf3990022011-03-23 14:31:09 +0100494 if (mdev->rs_plan_s.size) { /* mdev->ldev->dc.c_plan_ahead */
Lars Ellenberge65f4402010-11-05 10:04:07 +0100495 number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
496 mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
497 } else {
Lars Ellenbergf3990022011-03-23 14:31:09 +0100498 mdev->c_sync_rate = mdev->ldev->dc.resync_rate;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100499 number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
500 }
501
Lars Ellenberge65f4402010-11-05 10:04:07 +0100502 /* ignore the amount of pending requests, the resync controller should
503 * throttle down to incoming reply rate soon enough anyways. */
504 return number;
505}
506
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100507int w_make_resync_request(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700508{
Philipp Reisner00d56942011-02-09 18:09:48 +0100509 struct drbd_conf *mdev = w->mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700510 unsigned long bit;
511 sector_t sector;
512 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100513 int max_bio_size;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100514 int number, rollback_i, size;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700515 int align, queued, sndbuf;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200516 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700517
518 if (unlikely(cancel))
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100519 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700520
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200521 if (mdev->rs_total == 0) {
522 /* empty resync? */
523 drbd_resync_finished(mdev);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100524 return 0;
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200525 }
526
Philipp Reisnerb411b362009-09-25 16:07:19 -0700527 if (!get_ldev(mdev)) {
528 /* Since we only need to access mdev->rsync a
529 get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
530 to continue resync with a broken disk makes no sense at
531 all */
532 dev_err(DEV, "Disk broke down during resync!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100533 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700534 }
535
Philipp Reisner0cfdd242011-05-25 11:14:35 +0200536 max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
Lars Ellenberge65f4402010-11-05 10:04:07 +0100537 number = drbd_rs_number_requests(mdev);
538 if (number == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +0200539 goto requeue;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700540
Philipp Reisnerb411b362009-09-25 16:07:19 -0700541 for (i = 0; i < number; i++) {
542 /* Stop generating RS requests, when half of the send buffer is filled */
Philipp Reisnere42325a2011-01-19 13:55:45 +0100543 mutex_lock(&mdev->tconn->data.mutex);
544 if (mdev->tconn->data.socket) {
545 queued = mdev->tconn->data.socket->sk->sk_wmem_queued;
546 sndbuf = mdev->tconn->data.socket->sk->sk_sndbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700547 } else {
548 queued = 1;
549 sndbuf = 0;
550 }
Philipp Reisnere42325a2011-01-19 13:55:45 +0100551 mutex_unlock(&mdev->tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700552 if (queued > sndbuf / 2)
553 goto requeue;
554
555next_sector:
556 size = BM_BLOCK_SIZE;
557 bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
558
Lars Ellenberg4b0715f2010-12-14 15:13:04 +0100559 if (bit == DRBD_END_OF_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700560 mdev->bm_resync_fo = drbd_bm_bits(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700561 put_ldev(mdev);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100562 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700563 }
564
565 sector = BM_BIT_TO_SECT(bit);
566
Philipp Reisnere3555d82010-11-07 15:56:29 +0100567 if (drbd_rs_should_slow_down(mdev, sector) ||
568 drbd_try_rs_begin_io(mdev, sector)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700569 mdev->bm_resync_fo = bit;
570 goto requeue;
571 }
572 mdev->bm_resync_fo = bit + 1;
573
574 if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) {
575 drbd_rs_complete_io(mdev, sector);
576 goto next_sector;
577 }
578
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100579#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
Philipp Reisnerb411b362009-09-25 16:07:19 -0700580 /* try to find some adjacent bits.
581 * we stop if we have already the maximum req size.
582 *
583 * Additionally always align bigger requests, in order to
584 * be prepared for all stripe sizes of software RAIDs.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700585 */
586 align = 1;
Philipp Reisnerd2074502010-07-22 15:27:27 +0200587 rollback_i = i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700588 for (;;) {
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100589 if (size + BM_BLOCK_SIZE > max_bio_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700590 break;
591
592 /* Be always aligned */
593 if (sector & ((1<<(align+3))-1))
594 break;
595
596 /* do not cross extent boundaries */
597 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
598 break;
599 /* now, is it actually dirty, after all?
600 * caution, drbd_bm_test_bit is tri-state for some
601 * obscure reason; ( b == 0 ) would get the out-of-band
602 * only accidentally right because of the "oddly sized"
603 * adjustment below */
604 if (drbd_bm_test_bit(mdev, bit+1) != 1)
605 break;
606 bit++;
607 size += BM_BLOCK_SIZE;
608 if ((BM_BLOCK_SIZE << align) <= size)
609 align++;
610 i++;
611 }
612 /* if we merged some,
613 * reset the offset to start the next drbd_bm_find_next from */
614 if (size > BM_BLOCK_SIZE)
615 mdev->bm_resync_fo = bit + 1;
616#endif
617
618 /* adjust very last sectors, in case we are oddly sized */
619 if (sector + (size>>9) > capacity)
620 size = (capacity-sector)<<9;
Lars Ellenbergf3990022011-03-23 14:31:09 +0100621 if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700622 switch (read_for_csum(mdev, sector, size)) {
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200623 case -EIO: /* Disk failure */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700624 put_ldev(mdev);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100625 return -EIO;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200626 case -EAGAIN: /* allocation failed, or ldev busy */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700627 drbd_rs_complete_io(mdev, sector);
628 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerd2074502010-07-22 15:27:27 +0200629 i = rollback_i;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700630 goto requeue;
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200631 case 0:
632 /* everything ok */
633 break;
634 default:
635 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700636 }
637 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100638 int err;
639
Philipp Reisnerb411b362009-09-25 16:07:19 -0700640 inc_rs_pending(mdev);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100641 err = drbd_send_drequest(mdev, P_RS_DATA_REQUEST,
642 sector, size, ID_SYNCER);
643 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700644 dev_err(DEV, "drbd_send_drequest() failed, aborting...\n");
645 dec_rs_pending(mdev);
646 put_ldev(mdev);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100647 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 }
649 }
650 }
651
652 if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) {
653 /* last syncer _request_ was sent,
654 * but the P_RS_DATA_REPLY not yet received. sync will end (and
655 * next sync group will resume), as soon as we receive the last
656 * resync data block, and the last bit is cleared.
657 * until then resync "work" is "inactive" ...
658 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700659 put_ldev(mdev);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100660 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700661 }
662
663 requeue:
Philipp Reisner778f2712010-07-06 11:14:00 +0200664 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700665 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
666 put_ldev(mdev);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100667 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700668}
669
Philipp Reisner00d56942011-02-09 18:09:48 +0100670static int w_make_ov_request(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700671{
Philipp Reisner00d56942011-02-09 18:09:48 +0100672 struct drbd_conf *mdev = w->mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700673 int number, i, size;
674 sector_t sector;
675 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
676
677 if (unlikely(cancel))
678 return 1;
679
Lars Ellenberg2649f082010-11-05 10:05:47 +0100680 number = drbd_rs_number_requests(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700681
682 sector = mdev->ov_position;
683 for (i = 0; i < number; i++) {
684 if (sector >= capacity) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700685 return 1;
686 }
687
688 size = BM_BLOCK_SIZE;
689
Philipp Reisnere3555d82010-11-07 15:56:29 +0100690 if (drbd_rs_should_slow_down(mdev, sector) ||
691 drbd_try_rs_begin_io(mdev, sector)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700692 mdev->ov_position = sector;
693 goto requeue;
694 }
695
696 if (sector + (size>>9) > capacity)
697 size = (capacity-sector)<<9;
698
699 inc_rs_pending(mdev);
Andreas Gruenbacher5b9f4992011-03-16 01:31:39 +0100700 if (drbd_send_ov_request(mdev, sector, size)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700701 dec_rs_pending(mdev);
702 return 0;
703 }
704 sector += BM_SECT_PER_BIT;
705 }
706 mdev->ov_position = sector;
707
708 requeue:
Lars Ellenberg2649f082010-11-05 10:05:47 +0100709 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
711 return 1;
712}
713
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100714int w_ov_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715{
Philipp Reisner00d56942011-02-09 18:09:48 +0100716 struct drbd_conf *mdev = w->mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700717 kfree(w);
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +0100718 ov_out_of_sync_print(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700719 drbd_resync_finished(mdev);
720
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100721 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700722}
723
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100724static int w_resync_finished(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700725{
Philipp Reisner00d56942011-02-09 18:09:48 +0100726 struct drbd_conf *mdev = w->mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700727 kfree(w);
728
729 drbd_resync_finished(mdev);
730
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100731 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700732}
733
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200734static void ping_peer(struct drbd_conf *mdev)
735{
Philipp Reisner2a67d8b2011-02-09 14:10:32 +0100736 struct drbd_tconn *tconn = mdev->tconn;
737
738 clear_bit(GOT_PING_ACK, &tconn->flags);
739 request_ping(tconn);
740 wait_event(tconn->ping_wait,
741 test_bit(GOT_PING_ACK, &tconn->flags) || mdev->state.conn < C_CONNECTED);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200742}
743
Philipp Reisnerb411b362009-09-25 16:07:19 -0700744int drbd_resync_finished(struct drbd_conf *mdev)
745{
746 unsigned long db, dt, dbdt;
747 unsigned long n_oos;
748 union drbd_state os, ns;
749 struct drbd_work *w;
750 char *khelper_cmd = NULL;
Lars Ellenberg26525612010-11-05 09:56:33 +0100751 int verify_done = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700752
753 /* Remove all elements from the resync LRU. Since future actions
754 * might set bits in the (main) bitmap, then the entries in the
755 * resync LRU would be wrong. */
756 if (drbd_rs_del_all(mdev)) {
757 /* In case this is not possible now, most probably because
758 * there are P_RS_DATA_REPLY Packets lingering on the worker's
759 * queue (or even the read operations for those packets
760 * is not finished by now). Retry in 100ms. */
761
Philipp Reisner20ee6392011-01-18 15:28:59 +0100762 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700763 w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
764 if (w) {
765 w->cb = w_resync_finished;
Philipp Reisnere42325a2011-01-19 13:55:45 +0100766 drbd_queue_work(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700767 return 1;
768 }
769 dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
770 }
771
772 dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
773 if (dt <= 0)
774 dt = 1;
775 db = mdev->rs_total;
776 dbdt = Bit2KB(db/dt);
777 mdev->rs_paused /= HZ;
778
779 if (!get_ldev(mdev))
780 goto out;
781
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +0200782 ping_peer(mdev);
783
Philipp Reisner87eeee42011-01-19 14:16:30 +0100784 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisner78bae592011-03-28 15:40:12 +0200785 os = drbd_read_state(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700786
Lars Ellenberg26525612010-11-05 09:56:33 +0100787 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
788
Philipp Reisnerb411b362009-09-25 16:07:19 -0700789 /* This protects us against multiple calls (that can happen in the presence
790 of application IO), and against connectivity loss just before we arrive here. */
791 if (os.conn <= C_CONNECTED)
792 goto out_unlock;
793
794 ns = os;
795 ns.conn = C_CONNECTED;
796
797 dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
Lars Ellenberg26525612010-11-05 09:56:33 +0100798 verify_done ? "Online verify " : "Resync",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700799 dt + mdev->rs_paused, mdev->rs_paused, dbdt);
800
801 n_oos = drbd_bm_total_weight(mdev);
802
803 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
804 if (n_oos) {
805 dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n",
806 n_oos, Bit2KB(1));
807 khelper_cmd = "out-of-sync";
808 }
809 } else {
810 D_ASSERT((n_oos - mdev->rs_failed) == 0);
811
812 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
813 khelper_cmd = "after-resync-target";
814
Lars Ellenbergf3990022011-03-23 14:31:09 +0100815 if (mdev->tconn->csums_tfm && mdev->rs_total) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700816 const unsigned long s = mdev->rs_same_csum;
817 const unsigned long t = mdev->rs_total;
818 const int ratio =
819 (t == 0) ? 0 :
820 (t < 100000) ? ((s*100)/t) : (s/(t/100));
Bart Van Assche24c48302011-05-21 18:32:29 +0200821 dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
Philipp Reisnerb411b362009-09-25 16:07:19 -0700822 "transferred %luK total %luK\n",
823 ratio,
824 Bit2KB(mdev->rs_same_csum),
825 Bit2KB(mdev->rs_total - mdev->rs_same_csum),
826 Bit2KB(mdev->rs_total));
827 }
828 }
829
830 if (mdev->rs_failed) {
831 dev_info(DEV, " %lu failed blocks\n", mdev->rs_failed);
832
833 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
834 ns.disk = D_INCONSISTENT;
835 ns.pdsk = D_UP_TO_DATE;
836 } else {
837 ns.disk = D_UP_TO_DATE;
838 ns.pdsk = D_INCONSISTENT;
839 }
840 } else {
841 ns.disk = D_UP_TO_DATE;
842 ns.pdsk = D_UP_TO_DATE;
843
844 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
845 if (mdev->p_uuid) {
846 int i;
847 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
848 _drbd_uuid_set(mdev, i, mdev->p_uuid[i]);
849 drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]);
850 _drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]);
851 } else {
852 dev_err(DEV, "mdev->p_uuid is NULL! BUG\n");
853 }
854 }
855
Lars Ellenberg62b0da32011-01-20 13:25:21 +0100856 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
857 /* for verify runs, we don't update uuids here,
858 * so there would be nothing to report. */
859 drbd_uuid_set_bm(mdev, 0UL);
860 drbd_print_uuids(mdev, "updated UUIDs");
861 if (mdev->p_uuid) {
862 /* Now the two UUID sets are equal, update what we
863 * know of the peer. */
864 int i;
865 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
866 mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
867 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700868 }
869 }
870
871 _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
872out_unlock:
Philipp Reisner87eeee42011-01-19 14:16:30 +0100873 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700874 put_ldev(mdev);
875out:
876 mdev->rs_total = 0;
877 mdev->rs_failed = 0;
878 mdev->rs_paused = 0;
Lars Ellenberg26525612010-11-05 09:56:33 +0100879 if (verify_done)
880 mdev->ov_start_sector = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700881
Lars Ellenberg13d42682010-10-13 17:37:54 +0200882 drbd_md_sync(mdev);
883
Philipp Reisnerb411b362009-09-25 16:07:19 -0700884 if (khelper_cmd)
885 drbd_khelper(mdev, khelper_cmd);
886
887 return 1;
888}
889
890/* helper */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100891static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700892{
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200893 if (drbd_peer_req_has_active_page(peer_req)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894 /* This might happen if sendpage() has not finished */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100895 int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200896 atomic_add(i, &mdev->pp_in_use_by_net);
897 atomic_sub(i, &mdev->pp_in_use);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100898 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100899 list_add_tail(&peer_req->w.list, &mdev->net_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100900 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg435f0742010-09-06 12:30:25 +0200901 wake_up(&drbd_pp_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700902 } else
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200903 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700904}
905
906/**
907 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
908 * @mdev: DRBD device.
909 * @w: work object.
910 * @cancel: The connection will be closed anyways
911 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100912int w_e_end_data_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700913{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100914 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +0100915 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100916 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917
918 if (unlikely(cancel)) {
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200919 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700920 dec_unacked(mdev);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100921 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700922 }
923
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100924 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100925 err = drbd_send_block(mdev, P_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700926 } else {
927 if (__ratelimit(&drbd_ratelimit_state))
928 dev_err(DEV, "Sending NegDReply. sector=%llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100929 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700930
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100931 err = drbd_send_ack(mdev, P_NEG_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700932 }
933
934 dec_unacked(mdev);
935
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100936 move_to_net_ee_or_free(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700937
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100938 if (unlikely(err))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700939 dev_err(DEV, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100940 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700941}
942
943/**
944 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS
945 * @mdev: DRBD device.
946 * @w: work object.
947 * @cancel: The connection will be closed anyways
948 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100949int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700950{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100951 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +0100952 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100953 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700954
955 if (unlikely(cancel)) {
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200956 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700957 dec_unacked(mdev);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100958 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700959 }
960
961 if (get_ldev_if_state(mdev, D_FAILED)) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100962 drbd_rs_complete_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700963 put_ldev(mdev);
964 }
965
Philipp Reisnerd612d302010-12-27 10:53:28 +0100966 if (mdev->state.conn == C_AHEAD) {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100967 err = drbd_send_ack(mdev, P_RS_CANCEL, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100968 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700969 if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
970 inc_rs_pending(mdev);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100971 err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700972 } else {
973 if (__ratelimit(&drbd_ratelimit_state))
974 dev_err(DEV, "Not sending RSDataReply, "
975 "partner DISKLESS!\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100976 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700977 }
978 } else {
979 if (__ratelimit(&drbd_ratelimit_state))
980 dev_err(DEV, "Sending NegRSDReply. sector %llus.\n",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100981 (unsigned long long)peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700982
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100983 err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700984
985 /* update resync data with failure */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100986 drbd_rs_failed_io(mdev, peer_req->i.sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987 }
988
989 dec_unacked(mdev);
990
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100991 move_to_net_ee_or_free(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700992
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100993 if (unlikely(err))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700994 dev_err(DEV, "drbd_send_block() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100995 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700996}
997
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100998int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700999{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001000 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001001 struct drbd_conf *mdev = w->mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001002 struct digest_info *di;
1003 int digest_size;
1004 void *digest = NULL;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001005 int err, eq = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001006
1007 if (unlikely(cancel)) {
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02001008 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009 dec_unacked(mdev);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001010 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001011 }
1012
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001013 if (get_ldev(mdev)) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001014 drbd_rs_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001015 put_ldev(mdev);
1016 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001017
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001018 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001019
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001020 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001021 /* quick hack to try to avoid a race against reconfiguration.
1022 * a real fix would be much more involved,
1023 * introducing more locking mechanisms */
Lars Ellenbergf3990022011-03-23 14:31:09 +01001024 if (mdev->tconn->csums_tfm) {
1025 digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001026 D_ASSERT(digest_size == di->digest_size);
1027 digest = kmalloc(digest_size, GFP_NOIO);
1028 }
1029 if (digest) {
Lars Ellenbergf3990022011-03-23 14:31:09 +01001030 drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001031 eq = !memcmp(digest, di->digest, digest_size);
1032 kfree(digest);
1033 }
1034
1035 if (eq) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001036 drbd_set_in_sync(mdev, peer_req->i.sector, peer_req->i.size);
Lars Ellenberg676396d2010-03-03 02:08:22 +01001037 /* rs_same_csums unit is BM_BLOCK_SIZE */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001038 mdev->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001039 err = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001040 } else {
1041 inc_rs_pending(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001042 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1043 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
Philipp Reisner204bba92010-08-23 16:17:13 +02001044 kfree(di);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001045 err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001046 }
1047 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001048 err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001049 if (__ratelimit(&drbd_ratelimit_state))
1050 dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
1051 }
1052
1053 dec_unacked(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001054 move_to_net_ee_or_free(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001055
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001056 if (unlikely(err))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001057 dev_err(DEV, "drbd_send_block/ack() failed\n");
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001058 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001059}
1060
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001061int w_e_end_ov_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001062{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001063 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001064 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001065 sector_t sector = peer_req->i.sector;
1066 unsigned int size = peer_req->i.size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001067 int digest_size;
1068 void *digest;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001069 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001070
1071 if (unlikely(cancel))
1072 goto out;
1073
Lars Ellenbergf3990022011-03-23 14:31:09 +01001074 digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001075 digest = kmalloc(digest_size, GFP_NOIO);
Philipp Reisner8f214202011-03-01 15:52:35 +01001076 if (!digest) {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001077 err = 1; /* terminate the connection in case the allocation failed */
Philipp Reisner8f214202011-03-01 15:52:35 +01001078 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001079 }
1080
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001081 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
Lars Ellenbergf3990022011-03-23 14:31:09 +01001082 drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
Philipp Reisner8f214202011-03-01 15:52:35 +01001083 else
1084 memset(digest, 0, digest_size);
1085
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001086 /* Free e and pages before send.
1087 * In case we block on congestion, we could otherwise run into
1088 * some distributed deadlock, if the other side blocks on
1089 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001090 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02001091 drbd_free_peer_req(mdev, peer_req);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001092 peer_req = NULL;
Philipp Reisner8f214202011-03-01 15:52:35 +01001093 inc_rs_pending(mdev);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001094 err = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, P_OV_REPLY);
1095 if (err)
Philipp Reisner8f214202011-03-01 15:52:35 +01001096 dec_rs_pending(mdev);
1097 kfree(digest);
1098
Philipp Reisnerb411b362009-09-25 16:07:19 -07001099out:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001100 if (peer_req)
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02001101 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001102 dec_unacked(mdev);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001103 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001104}
1105
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01001106void drbd_ov_out_of_sync_found(struct drbd_conf *mdev, sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001107{
1108 if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) {
1109 mdev->ov_last_oos_size += size>>9;
1110 } else {
1111 mdev->ov_last_oos_start = sector;
1112 mdev->ov_last_oos_size = size>>9;
1113 }
1114 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001115}
1116
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001117int w_e_end_ov_reply(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001118{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001119 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001120 struct drbd_conf *mdev = w->mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001121 struct digest_info *di;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001122 void *digest;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001123 sector_t sector = peer_req->i.sector;
1124 unsigned int size = peer_req->i.size;
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001125 int digest_size;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001126 int err, eq = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001127
1128 if (unlikely(cancel)) {
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02001129 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001130 dec_unacked(mdev);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001131 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001132 }
1133
1134 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1135 * the resync lru has been cleaned up already */
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001136 if (get_ldev(mdev)) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001137 drbd_rs_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02001138 put_ldev(mdev);
1139 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001140
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001141 di = peer_req->digest;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001142
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001143 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Lars Ellenbergf3990022011-03-23 14:31:09 +01001144 digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001145 digest = kmalloc(digest_size, GFP_NOIO);
1146 if (digest) {
Lars Ellenbergf3990022011-03-23 14:31:09 +01001147 drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001148
1149 D_ASSERT(digest_size == di->digest_size);
1150 eq = !memcmp(digest, di->digest, digest_size);
1151 kfree(digest);
1152 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001153 }
1154
Lars Ellenberg9676c762011-02-22 14:02:31 +01001155 /* Free peer_req and pages before send.
1156 * In case we block on congestion, we could otherwise run into
1157 * some distributed deadlock, if the other side blocks on
1158 * congestion as well, because our receiver blocks in
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001159 * drbd_alloc_pages due to pp_in_use > max_buffers. */
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02001160 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001161 if (!eq)
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01001162 drbd_ov_out_of_sync_found(mdev, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001163 else
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01001164 ov_out_of_sync_print(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001165
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001166 err = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
Andreas Gruenbacherfa79abd2011-03-16 01:31:39 +01001167 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001168
Lars Ellenberg53ea4332011-03-08 17:11:40 +01001169 dec_unacked(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001170
Lars Ellenbergea5442a2010-11-05 09:48:01 +01001171 --mdev->ov_left;
1172
1173 /* let's advance progress step marks only for every other megabyte */
1174 if ((mdev->ov_left & 0x200) == 0x200)
1175 drbd_advance_rs_marks(mdev, mdev->ov_left);
1176
1177 if (mdev->ov_left == 0) {
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01001178 ov_out_of_sync_print(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001179 drbd_resync_finished(mdev);
1180 }
1181
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001182 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001183}
1184
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001185int w_prev_work_done(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001186{
1187 struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001188
Philipp Reisnerb411b362009-09-25 16:07:19 -07001189 complete(&b->done);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001190 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001191}
1192
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001193int w_send_barrier(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001194{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001195 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001196 struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001197 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001198 struct p_barrier *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001199
1200 /* really avoid racing with tl_clear. w.cb may have been referenced
1201 * just before it was reassigned and re-queued, so double check that.
1202 * actually, this race was harmless, since we only try to send the
1203 * barrier packet here, and otherwise do nothing with the object.
1204 * but compare with the head of w_clear_epoch */
Philipp Reisner87eeee42011-01-19 14:16:30 +01001205 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001206 if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED)
1207 cancel = 1;
Philipp Reisner87eeee42011-01-19 14:16:30 +01001208 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001209 if (cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001210 return 0;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001211
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001212 sock = &mdev->tconn->data;
1213 p = drbd_prepare_command(mdev, sock);
1214 if (!p)
1215 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001216 p->barrier = b->br_number;
1217 /* inc_ap_pending was done where this was queued.
1218 * dec_ap_pending will be done in got_BarrierAck
1219 * or (on connection loss) in w_clear_epoch. */
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001220 return drbd_send_command(mdev, sock, P_BARRIER, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001221}
1222
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001223int w_send_write_hint(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001224{
Philipp Reisner00d56942011-02-09 18:09:48 +01001225 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001226 struct drbd_socket *sock;
1227
Philipp Reisnerb411b362009-09-25 16:07:19 -07001228 if (cancel)
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001229 return 0;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001230 sock = &mdev->tconn->data;
1231 if (!drbd_prepare_command(mdev, sock))
1232 return -EIO;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001233 return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001234}
1235
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01001236int w_send_out_of_sync(struct drbd_work *w, int cancel)
Philipp Reisner73a01a12010-10-27 14:33:00 +02001237{
1238 struct drbd_request *req = container_of(w, struct drbd_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001239 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001240 int err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001241
1242 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001243 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001244 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001245 }
1246
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01001247 err = drbd_send_out_of_sync(mdev, req);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001248 req_mod(req, OOS_HANDED_TO_NETWORK);
Philipp Reisner73a01a12010-10-27 14:33:00 +02001249
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001250 return err;
Philipp Reisner73a01a12010-10-27 14:33:00 +02001251}
1252
Philipp Reisnerb411b362009-09-25 16:07:19 -07001253/**
1254 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
1255 * @mdev: DRBD device.
1256 * @w: work object.
1257 * @cancel: The connection will be closed anyways
1258 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001259int w_send_dblock(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001260{
1261 struct drbd_request *req = container_of(w, struct drbd_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001262 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001263 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001264
1265 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001266 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001267 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001268 }
1269
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001270 err = drbd_send_dblock(mdev, req);
1271 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001272
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001273 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001274}
1275
1276/**
1277 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
1278 * @mdev: DRBD device.
1279 * @w: work object.
1280 * @cancel: The connection will be closed anyways
1281 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001282int w_send_read_req(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001283{
1284 struct drbd_request *req = container_of(w, struct drbd_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001285 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001286 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001287
1288 if (unlikely(cancel)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001289 req_mod(req, SEND_CANCELED);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001290 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001291 }
1292
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001293 err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size,
Andreas Gruenbacher6c1005e2011-03-16 01:34:24 +01001294 (unsigned long)req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001295
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001296 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001297
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001298 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001299}
1300
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001301int w_restart_disk_io(struct drbd_work *w, int cancel)
Philipp Reisner265be2d2010-05-31 10:14:17 +02001302{
1303 struct drbd_request *req = container_of(w, struct drbd_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001304 struct drbd_conf *mdev = w->mdev;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001305
Philipp Reisner07782862010-08-31 12:00:50 +02001306 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
Lars Ellenberg181286a2011-03-31 15:18:56 +02001307 drbd_al_begin_io(mdev, &req->i);
Philipp Reisner265be2d2010-05-31 10:14:17 +02001308 /* Calling drbd_al_begin_io() out of the worker might deadlocks
1309 theoretically. Practically it can not deadlock, since this is
1310 only used when unfreezing IOs. All the extents of the requests
1311 that made it into the TL are already active */
1312
1313 drbd_req_make_private_bio(req, req->master_bio);
1314 req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
1315 generic_make_request(req->private_bio);
1316
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001317 return 0;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001318}
1319
Philipp Reisnerb411b362009-09-25 16:07:19 -07001320static int _drbd_may_sync_now(struct drbd_conf *mdev)
1321{
1322 struct drbd_conf *odev = mdev;
1323
1324 while (1) {
Philipp Reisner438c8372011-03-28 14:48:01 +02001325 if (!odev->ldev)
1326 return 1;
Lars Ellenbergf3990022011-03-23 14:31:09 +01001327 if (odev->ldev->dc.resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001328 return 1;
Lars Ellenbergf3990022011-03-23 14:31:09 +01001329 odev = minor_to_mdev(odev->ldev->dc.resync_after);
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001330 if (!expect(odev))
1331 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001332 if ((odev->state.conn >= C_SYNC_SOURCE &&
1333 odev->state.conn <= C_PAUSED_SYNC_T) ||
1334 odev->state.aftr_isp || odev->state.peer_isp ||
1335 odev->state.user_isp)
1336 return 0;
1337 }
1338}
1339
1340/**
1341 * _drbd_pause_after() - Pause resync on all devices that may not resync now
1342 * @mdev: DRBD device.
1343 *
1344 * Called from process context only (admin command and after_state_ch).
1345 */
1346static int _drbd_pause_after(struct drbd_conf *mdev)
1347{
1348 struct drbd_conf *odev;
1349 int i, rv = 0;
1350
Philipp Reisner695d08f2011-04-11 22:53:32 -07001351 rcu_read_lock();
Philipp Reisner81a5d602011-02-22 19:53:16 -05001352 idr_for_each_entry(&minors, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001353 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1354 continue;
1355 if (!_drbd_may_sync_now(odev))
1356 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
1357 != SS_NOTHING_TO_DO);
1358 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001359 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001360
1361 return rv;
1362}
1363
1364/**
1365 * _drbd_resume_next() - Resume resync on all devices that may resync now
1366 * @mdev: DRBD device.
1367 *
1368 * Called from process context only (admin command and worker).
1369 */
1370static int _drbd_resume_next(struct drbd_conf *mdev)
1371{
1372 struct drbd_conf *odev;
1373 int i, rv = 0;
1374
Philipp Reisner695d08f2011-04-11 22:53:32 -07001375 rcu_read_lock();
Philipp Reisner81a5d602011-02-22 19:53:16 -05001376 idr_for_each_entry(&minors, odev, i) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001377 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1378 continue;
1379 if (odev->state.aftr_isp) {
1380 if (_drbd_may_sync_now(odev))
1381 rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
1382 CS_HARD, NULL)
1383 != SS_NOTHING_TO_DO) ;
1384 }
1385 }
Philipp Reisner695d08f2011-04-11 22:53:32 -07001386 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001387 return rv;
1388}
1389
1390void resume_next_sg(struct drbd_conf *mdev)
1391{
1392 write_lock_irq(&global_state_lock);
1393 _drbd_resume_next(mdev);
1394 write_unlock_irq(&global_state_lock);
1395}
1396
1397void suspend_other_sg(struct drbd_conf *mdev)
1398{
1399 write_lock_irq(&global_state_lock);
1400 _drbd_pause_after(mdev);
1401 write_unlock_irq(&global_state_lock);
1402}
1403
1404static int sync_after_error(struct drbd_conf *mdev, int o_minor)
1405{
1406 struct drbd_conf *odev;
1407
1408 if (o_minor == -1)
1409 return NO_ERROR;
1410 if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
1411 return ERR_SYNC_AFTER;
1412
1413 /* check for loops */
1414 odev = minor_to_mdev(o_minor);
1415 while (1) {
1416 if (odev == mdev)
1417 return ERR_SYNC_AFTER_CYCLE;
1418
1419 /* dependency chain ends here, no cycles. */
Lars Ellenbergf3990022011-03-23 14:31:09 +01001420 if (odev->ldev->dc.resync_after == -1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001421 return NO_ERROR;
1422
1423 /* follow the dependency chain */
Lars Ellenbergf3990022011-03-23 14:31:09 +01001424 odev = minor_to_mdev(odev->ldev->dc.resync_after);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001425 }
1426}
1427
1428int drbd_alter_sa(struct drbd_conf *mdev, int na)
1429{
1430 int changes;
1431 int retcode;
1432
1433 write_lock_irq(&global_state_lock);
1434 retcode = sync_after_error(mdev, na);
1435 if (retcode == NO_ERROR) {
Lars Ellenbergf3990022011-03-23 14:31:09 +01001436 mdev->ldev->dc.resync_after = na;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001437 do {
1438 changes = _drbd_pause_after(mdev);
1439 changes |= _drbd_resume_next(mdev);
1440 } while (changes);
1441 }
1442 write_unlock_irq(&global_state_lock);
1443 return retcode;
1444}
1445
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001446void drbd_rs_controller_reset(struct drbd_conf *mdev)
1447{
1448 atomic_set(&mdev->rs_sect_in, 0);
1449 atomic_set(&mdev->rs_sect_ev, 0);
1450 mdev->rs_in_flight = 0;
1451 mdev->rs_planed = 0;
1452 spin_lock(&mdev->peer_seq_lock);
1453 fifo_set(&mdev->rs_plan_s, 0);
1454 spin_unlock(&mdev->peer_seq_lock);
1455}
1456
Philipp Reisner1f04af32011-02-07 11:33:59 +01001457void start_resync_timer_fn(unsigned long data)
1458{
1459 struct drbd_conf *mdev = (struct drbd_conf *) data;
1460
1461 drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work);
1462}
1463
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001464int w_start_resync(struct drbd_work *w, int cancel)
Philipp Reisner1f04af32011-02-07 11:33:59 +01001465{
Philipp Reisner00d56942011-02-09 18:09:48 +01001466 struct drbd_conf *mdev = w->mdev;
1467
Philipp Reisner1f04af32011-02-07 11:33:59 +01001468 if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
1469 dev_warn(DEV, "w_start_resync later...\n");
1470 mdev->start_resync_timer.expires = jiffies + HZ/10;
1471 add_timer(&mdev->start_resync_timer);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001472 return 0;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001473 }
1474
1475 drbd_start_resync(mdev, C_SYNC_SOURCE);
1476 clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001477 return 0;
Philipp Reisner1f04af32011-02-07 11:33:59 +01001478}
1479
Philipp Reisnerb411b362009-09-25 16:07:19 -07001480/**
1481 * drbd_start_resync() - Start the resync process
1482 * @mdev: DRBD device.
1483 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1484 *
1485 * This function might bring you directly into one of the
1486 * C_PAUSED_SYNC_* states.
1487 */
1488void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1489{
1490 union drbd_state ns;
1491 int r;
1492
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02001493 if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001494 dev_err(DEV, "Resync already running!\n");
1495 return;
1496 }
1497
Philipp Reisner59817f42010-10-29 12:44:20 +02001498 if (mdev->state.conn < C_AHEAD) {
1499 /* In case a previous resync run was aborted by an IO error/detach on the peer. */
1500 drbd_rs_cancel_all(mdev);
1501 /* This should be done when we abort the resync. We definitely do not
1502 want to have this for connections going back and forth between
1503 Ahead/Behind and SyncSource/SyncTarget */
1504 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001505
Philipp Reisnere64a3292011-02-05 17:34:11 +01001506 if (!test_bit(B_RS_H_DONE, &mdev->flags)) {
1507 if (side == C_SYNC_TARGET) {
1508 /* Since application IO was locked out during C_WF_BITMAP_T and
1509 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1510 we check that we might make the data inconsistent. */
1511 r = drbd_khelper(mdev, "before-resync-target");
1512 r = (r >> 8) & 0xff;
1513 if (r > 0) {
1514 dev_info(DEV, "before-resync-target handler returned %d, "
Philipp Reisner09b9e792010-12-03 16:04:24 +01001515 "dropping connection.\n", r);
Philipp Reisner38fa9982011-03-15 18:24:49 +01001516 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisner09b9e792010-12-03 16:04:24 +01001517 return;
1518 }
Philipp Reisnere64a3292011-02-05 17:34:11 +01001519 } else /* C_SYNC_SOURCE */ {
1520 r = drbd_khelper(mdev, "before-resync-source");
1521 r = (r >> 8) & 0xff;
1522 if (r > 0) {
1523 if (r == 3) {
1524 dev_info(DEV, "before-resync-source handler returned %d, "
1525 "ignoring. Old userland tools?", r);
1526 } else {
1527 dev_info(DEV, "before-resync-source handler returned %d, "
1528 "dropping connection.\n", r);
Philipp Reisner38fa9982011-03-15 18:24:49 +01001529 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001530 return;
1531 }
1532 }
Philipp Reisner09b9e792010-12-03 16:04:24 +01001533 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001534 }
1535
Philipp Reisnere64a3292011-02-05 17:34:11 +01001536 if (current == mdev->tconn->worker.task) {
Philipp Reisnerdad20552011-02-11 19:43:55 +01001537 /* The worker should not sleep waiting for state_mutex,
Philipp Reisnere64a3292011-02-05 17:34:11 +01001538 that can take long */
Philipp Reisner8410da82011-02-11 20:11:10 +01001539 if (!mutex_trylock(mdev->state_mutex)) {
Philipp Reisnere64a3292011-02-05 17:34:11 +01001540 set_bit(B_RS_H_DONE, &mdev->flags);
1541 mdev->start_resync_timer.expires = jiffies + HZ/5;
1542 add_timer(&mdev->start_resync_timer);
1543 return;
1544 }
1545 } else {
Philipp Reisner8410da82011-02-11 20:11:10 +01001546 mutex_lock(mdev->state_mutex);
Philipp Reisnere64a3292011-02-05 17:34:11 +01001547 }
1548 clear_bit(B_RS_H_DONE, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001549
1550 if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
Philipp Reisner8410da82011-02-11 20:11:10 +01001551 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001552 return;
1553 }
1554
Philipp Reisnerb411b362009-09-25 16:07:19 -07001555 write_lock_irq(&global_state_lock);
Philipp Reisner78bae592011-03-28 15:40:12 +02001556 ns = drbd_read_state(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001557
1558 ns.aftr_isp = !_drbd_may_sync_now(mdev);
1559
1560 ns.conn = side;
1561
1562 if (side == C_SYNC_TARGET)
1563 ns.disk = D_INCONSISTENT;
1564 else /* side == C_SYNC_SOURCE */
1565 ns.pdsk = D_INCONSISTENT;
1566
1567 r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
Philipp Reisner78bae592011-03-28 15:40:12 +02001568 ns = drbd_read_state(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001569
1570 if (ns.conn < C_CONNECTED)
1571 r = SS_UNKNOWN_ERROR;
1572
1573 if (r == SS_SUCCESS) {
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001574 unsigned long tw = drbd_bm_total_weight(mdev);
1575 unsigned long now = jiffies;
1576 int i;
1577
Philipp Reisnerb411b362009-09-25 16:07:19 -07001578 mdev->rs_failed = 0;
1579 mdev->rs_paused = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001580 mdev->rs_same_csum = 0;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001581 mdev->rs_last_events = 0;
1582 mdev->rs_last_sect_ev = 0;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001583 mdev->rs_total = tw;
1584 mdev->rs_start = now;
1585 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1586 mdev->rs_mark_left[i] = tw;
1587 mdev->rs_mark_time[i] = now;
1588 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001589 _drbd_pause_after(mdev);
1590 }
1591 write_unlock_irq(&global_state_lock);
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001592
Philipp Reisnerb411b362009-09-25 16:07:19 -07001593 if (r == SS_SUCCESS) {
1594 dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
1595 drbd_conn_str(ns.conn),
1596 (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
1597 (unsigned long) mdev->rs_total);
Lars Ellenberg6c922ed2011-01-12 11:51:13 +01001598 if (side == C_SYNC_TARGET)
1599 mdev->bm_resync_fo = 0;
1600
1601 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1602 * with w_send_oos, or the sync target will get confused as to
1603 * how much bits to resync. We cannot do that always, because for an
1604 * empty resync and protocol < 95, we need to do it here, as we call
1605 * drbd_resync_finished from here in that case.
1606 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1607 * and from after_state_ch otherwise. */
Philipp Reisner31890f42011-01-19 14:12:51 +01001608 if (side == C_SYNC_SOURCE && mdev->tconn->agreed_pro_version < 96)
Lars Ellenberg6c922ed2011-01-12 11:51:13 +01001609 drbd_gen_and_send_sync_uuid(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001610
Philipp Reisner31890f42011-01-19 14:12:51 +01001611 if (mdev->tconn->agreed_pro_version < 95 && mdev->rs_total == 0) {
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +02001612 /* This still has a race (about when exactly the peers
1613 * detect connection loss) that can lead to a full sync
1614 * on next handshake. In 8.3.9 we fixed this with explicit
1615 * resync-finished notifications, but the fix
1616 * introduces a protocol change. Sleeping for some
1617 * time longer than the ping interval + timeout on the
1618 * SyncSource, to give the SyncTarget the chance to
1619 * detect connection loss, then waiting for a ping
1620 * response (implicit in drbd_resync_finished) reduces
1621 * the race considerably, but does not solve it. */
1622 if (side == C_SYNC_SOURCE)
1623 schedule_timeout_interruptible(
Philipp Reisner89e58e72011-01-19 13:12:45 +01001624 mdev->tconn->net_conf->ping_int * HZ +
1625 mdev->tconn->net_conf->ping_timeo*HZ/9);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001626 drbd_resync_finished(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001627 }
1628
Lars Ellenberg9bd28d32010-11-05 09:55:18 +01001629 drbd_rs_controller_reset(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001630 /* ns.conn may already be != mdev->state.conn,
1631 * we may have been paused in between, or become paused until
1632 * the timer triggers.
1633 * No matter, that is handled in resync_timer_fn() */
1634 if (ns.conn == C_SYNC_TARGET)
1635 mod_timer(&mdev->resync_timer, jiffies);
1636
1637 drbd_md_sync(mdev);
1638 }
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001639 put_ldev(mdev);
Philipp Reisner8410da82011-02-11 20:11:10 +01001640 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001641}
1642
1643int drbd_worker(struct drbd_thread *thi)
1644{
Philipp Reisner392c8802011-02-09 10:33:31 +01001645 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001646 struct drbd_work *w = NULL;
Philipp Reisner0e29d162011-02-18 14:23:11 +01001647 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001648 LIST_HEAD(work_list);
Lars Ellenbergf3990022011-03-23 14:31:09 +01001649 int vnr, intr = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001650
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01001651 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01001652 drbd_thread_current_set_cpu(thi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001653
Philipp Reisner19393e12011-02-09 10:09:07 +01001654 if (down_trylock(&tconn->data.work.s)) {
1655 mutex_lock(&tconn->data.mutex);
1656 if (tconn->data.socket && !tconn->net_conf->no_cork)
1657 drbd_tcp_uncork(tconn->data.socket);
1658 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001659
Philipp Reisner19393e12011-02-09 10:09:07 +01001660 intr = down_interruptible(&tconn->data.work.s);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001661
Philipp Reisner19393e12011-02-09 10:09:07 +01001662 mutex_lock(&tconn->data.mutex);
1663 if (tconn->data.socket && !tconn->net_conf->no_cork)
1664 drbd_tcp_cork(tconn->data.socket);
1665 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001666 }
1667
1668 if (intr) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001669 flush_signals(current);
Philipp Reisner19393e12011-02-09 10:09:07 +01001670 if (get_t_state(thi) == RUNNING) {
1671 conn_warn(tconn, "Worker got an unexpected signal\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001672 continue;
Philipp Reisner19393e12011-02-09 10:09:07 +01001673 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001674 break;
1675 }
1676
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01001677 if (get_t_state(thi) != RUNNING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001678 break;
1679 /* With this break, we have done a down() but not consumed
1680 the entry from the list. The cleanup code takes care of
1681 this... */
1682
1683 w = NULL;
Philipp Reisner19393e12011-02-09 10:09:07 +01001684 spin_lock_irq(&tconn->data.work.q_lock);
1685 if (list_empty(&tconn->data.work.q)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001686 /* something terribly wrong in our logic.
1687 * we were able to down() the semaphore,
1688 * but the list is empty... doh.
1689 *
1690 * what is the best thing to do now?
1691 * try again from scratch, restarting the receiver,
1692 * asender, whatnot? could break even more ugly,
1693 * e.g. when we are primary, but no good local data.
1694 *
1695 * I'll try to get away just starting over this loop.
1696 */
Philipp Reisner19393e12011-02-09 10:09:07 +01001697 conn_warn(tconn, "Work list unexpectedly empty\n");
1698 spin_unlock_irq(&tconn->data.work.q_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001699 continue;
1700 }
Philipp Reisner19393e12011-02-09 10:09:07 +01001701 w = list_entry(tconn->data.work.q.next, struct drbd_work, list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001702 list_del_init(&w->list);
Philipp Reisner19393e12011-02-09 10:09:07 +01001703 spin_unlock_irq(&tconn->data.work.q_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001704
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001705 if (w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001706 /* dev_warn(DEV, "worker: a callback failed! \n"); */
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01001707 if (tconn->cstate >= C_WF_REPORT_PARAMS)
1708 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001709 }
1710 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001711
Philipp Reisner19393e12011-02-09 10:09:07 +01001712 spin_lock_irq(&tconn->data.work.q_lock);
1713 while (!list_empty(&tconn->data.work.q)) {
1714 list_splice_init(&tconn->data.work.q, &work_list);
1715 spin_unlock_irq(&tconn->data.work.q_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001716
1717 while (!list_empty(&work_list)) {
1718 w = list_entry(work_list.next, struct drbd_work, list);
1719 list_del_init(&w->list);
Philipp Reisner00d56942011-02-09 18:09:48 +01001720 w->cb(w, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001721 }
1722
Philipp Reisner19393e12011-02-09 10:09:07 +01001723 spin_lock_irq(&tconn->data.work.q_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001724 }
Philipp Reisner19393e12011-02-09 10:09:07 +01001725 sema_init(&tconn->data.work.s, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001726 /* DANGEROUS race: if someone did queue his work within the spinlock,
1727 * but up() ed outside the spinlock, we could get an up() on the
1728 * semaphore without corresponding list entry.
1729 * So don't do that.
1730 */
Philipp Reisner19393e12011-02-09 10:09:07 +01001731 spin_unlock_irq(&tconn->data.work.q_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001732
Philipp Reisner0e29d162011-02-18 14:23:11 +01001733 drbd_thread_stop(&tconn->receiver);
Lars Ellenbergf3990022011-03-23 14:31:09 +01001734 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
Philipp Reisner0e29d162011-02-18 14:23:11 +01001735 D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
1736 /* _drbd_set_state only uses stop_nowait.
1737 * wait here for the exiting receiver. */
1738 drbd_mdev_cleanup(mdev);
1739 }
1740 clear_bit(OBJECT_DYING, &tconn->flags);
1741 clear_bit(CONFIG_PENDING, &tconn->flags);
1742 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001743
1744 return 0;
1745}