blob: ade79be4e27a61d7d44711f6a30187c187b3a3cc [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_actlog.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
26#include <linux/slab.h>
Lars Ellenberg7ad651b2011-02-21 13:21:03 +010027#include <linux/crc32c.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070028#include <linux/drbd.h>
Lars Ellenberg7ad651b2011-02-21 13:21:03 +010029#include <linux/drbd_limits.h>
30#include <linux/dynamic_debug.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070032#include "drbd_wrappers.h"
33
Lars Ellenberg85f103d2011-03-31 12:06:48 +020034
35enum al_transaction_types {
36 AL_TR_UPDATE = 0,
37 AL_TR_INITIALIZED = 0xffff
38};
Lars Ellenberg7ad651b2011-02-21 13:21:03 +010039/* all fields on disc in big endian */
40struct __packed al_transaction_on_disk {
41 /* don't we all like magic */
42 __be32 magic;
43
44 /* to identify the most recent transaction block
45 * in the on disk ring buffer */
46 __be32 tr_number;
47
48 /* checksum on the full 4k block, with this field set to 0. */
49 __be32 crc32c;
50
51 /* type of transaction, special transaction types like:
Lars Ellenberg85f103d2011-03-31 12:06:48 +020052 * purge-all, set-all-idle, set-all-active, ... to-be-defined
53 * see also enum al_transaction_types */
Lars Ellenberg7ad651b2011-02-21 13:21:03 +010054 __be16 transaction_type;
55
56 /* we currently allow only a few thousand extents,
57 * so 16bit will be enough for the slot number. */
58
59 /* how many updates in this transaction */
60 __be16 n_updates;
61
62 /* maximum slot number, "al-extents" in drbd.conf speak.
63 * Having this in each transaction should make reconfiguration
64 * of that parameter easier. */
65 __be16 context_size;
66
67 /* slot number the context starts with */
68 __be16 context_start_slot_nr;
69
70 /* Some reserved bytes. Expected usage is a 64bit counter of
71 * sectors-written since device creation, and other data generation tag
72 * supporting usage */
73 __be32 __reserved[4];
74
75 /* --- 36 byte used --- */
76
77 /* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes
78 * in one transaction, then use the remaining byte in the 4k block for
79 * context information. "Flexible" number of updates per transaction
80 * does not help, as we have to account for the case when all update
81 * slots are used anyways, so it would only complicate code without
82 * additional benefit.
83 */
84 __be16 update_slot_nr[AL_UPDATES_PER_TRANSACTION];
85
86 /* but the extent number is 32bit, which at an extent size of 4 MiB
87 * allows to cover device sizes of up to 2**54 Byte (16 PiB) */
88 __be32 update_extent_nr[AL_UPDATES_PER_TRANSACTION];
89
90 /* --- 420 bytes used (36 + 64*6) --- */
91
92 /* 4096 - 420 = 3676 = 919 * 4 */
93 __be32 context[AL_CONTEXT_PER_TRANSACTION];
Philipp Reisnerb411b362009-09-25 16:07:19 -070094};
95
96struct update_odbm_work {
97 struct drbd_work w;
98 unsigned int enr;
99};
100
101struct update_al_work {
102 struct drbd_work w;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700103 struct completion event;
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100104 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700105};
106
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200107static int al_write_transaction(struct drbd_conf *mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700108
Philipp Reisnercdfda632011-07-05 15:38:59 +0200109void *drbd_md_get_buffer(struct drbd_conf *mdev)
110{
111 int r;
112
113 wait_event(mdev->misc_wait,
114 (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 ||
115 mdev->state.disk <= D_FAILED);
116
117 return r ? NULL : page_address(mdev->md_io_page);
118}
119
120void drbd_md_put_buffer(struct drbd_conf *mdev)
121{
122 if (atomic_dec_and_test(&mdev->md_io_in_use))
123 wake_up(&mdev->misc_wait);
124}
125
126static bool md_io_allowed(struct drbd_conf *mdev)
127{
128 enum drbd_disk_state ds = mdev->state.disk;
129 return ds >= D_NEGOTIATING || ds == D_ATTACHING;
130}
131
132void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done)
133{
134 wait_event(mdev->misc_wait, *done || !md_io_allowed(mdev));
135}
136
Philipp Reisnerb411b362009-09-25 16:07:19 -0700137static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
138 struct drbd_backing_dev *bdev,
139 struct page *page, sector_t sector,
140 int rw, int size)
141{
142 struct bio *bio;
Andreas Gruenbacherac29f402010-12-13 02:20:47 +0100143 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700144
Philipp Reisnercdfda632011-07-05 15:38:59 +0200145 mdev->md_io.done = 0;
146 mdev->md_io.error = -ENODEV;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700147
Philipp Reisnera8a4e512010-08-25 10:21:04 +0200148 if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags))
Lars Ellenberg86e1e982011-06-28 13:22:48 +0200149 rw |= REQ_FUA | REQ_FLUSH;
Jens Axboe721a9602011-03-09 11:56:30 +0100150 rw |= REQ_SYNC;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700151
Lars Ellenbergda4a75d2011-02-23 17:02:01 +0100152 bio = bio_alloc_drbd(GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700153 bio->bi_bdev = bdev->md_bdev;
154 bio->bi_sector = sector;
Andreas Gruenbacherac29f402010-12-13 02:20:47 +0100155 err = -EIO;
156 if (bio_add_page(bio, page, size, 0) != size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157 goto out;
Philipp Reisnercdfda632011-07-05 15:38:59 +0200158 bio->bi_private = &mdev->md_io;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700159 bio->bi_end_io = drbd_md_io_complete;
160 bio->bi_rw = rw;
161
Philipp Reisnercdfda632011-07-05 15:38:59 +0200162 if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */
163 dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n");
164 err = -ENODEV;
165 goto out;
166 }
167
168 bio_get(bio); /* one bio_put() is in the completion handler */
169 atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100170 if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700171 bio_endio(bio, -EIO);
172 else
173 submit_bio(rw, bio);
Philipp Reisnercdfda632011-07-05 15:38:59 +0200174 wait_until_done_or_disk_failure(mdev, &mdev->md_io.done);
Andreas Gruenbacherac29f402010-12-13 02:20:47 +0100175 if (bio_flagged(bio, BIO_UPTODATE))
Philipp Reisnercdfda632011-07-05 15:38:59 +0200176 err = mdev->md_io.error;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700177
Philipp Reisnerb411b362009-09-25 16:07:19 -0700178 out:
179 bio_put(bio);
Andreas Gruenbacherac29f402010-12-13 02:20:47 +0100180 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700181}
182
183int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
184 sector_t sector, int rw)
185{
Andreas Gruenbacher3fbf4d22010-12-13 02:25:41 +0100186 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700187 struct page *iop = mdev->md_io_page;
188
Philipp Reisnercdfda632011-07-05 15:38:59 +0200189 D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700190
191 BUG_ON(!bdev->md_bdev);
192
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100193 dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n",
194 current->comm, current->pid, __func__,
195 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700196
197 if (sector < drbd_md_first_sector(bdev) ||
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100198 sector + 7 > drbd_md_last_sector(bdev))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700199 dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n",
200 current->comm, current->pid, __func__,
201 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
202
Andreas Gruenbacher3fbf4d22010-12-13 02:25:41 +0100203 err = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE);
204 if (err) {
Andreas Gruenbacher935be262011-08-19 13:47:31 +0200205 dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n",
206 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700207 }
Andreas Gruenbacher3fbf4d22010-12-13 02:25:41 +0100208 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700209}
210
211static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
212{
213 struct lc_element *al_ext;
214 struct lc_element *tmp;
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100215 int wake;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700216
217 spin_lock_irq(&mdev->al_lock);
218 tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT);
219 if (unlikely(tmp != NULL)) {
220 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
221 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100222 wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223 spin_unlock_irq(&mdev->al_lock);
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100224 if (wake)
225 wake_up(&mdev->al_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226 return NULL;
227 }
228 }
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100229 al_ext = lc_get(mdev->act_log, enr);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700230 spin_unlock_irq(&mdev->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700231 return al_ext;
232}
233
Lars Ellenberg181286a2011-03-31 15:18:56 +0200234void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700235{
Lars Ellenberg77265472011-03-31 16:00:51 +0200236 /* for bios crossing activity log extent boundaries,
237 * we may need to activate two extents in one go */
Lars Ellenberge15766e2011-04-01 10:38:30 +0200238 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
239 unsigned last = (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
240 unsigned enr;
Lars Ellenberg7dc1d672011-05-03 16:49:20 +0200241 bool locked = false;
242
Philipp Reisnerb411b362009-09-25 16:07:19 -0700243
244 D_ASSERT(atomic_read(&mdev->local_cnt) > 0);
245
Lars Ellenberge15766e2011-04-01 10:38:30 +0200246 for (enr = first; enr <= last; enr++)
247 wait_event(mdev->al_wait, _al_get(mdev, enr) != NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700248
Lars Ellenberg7dc1d672011-05-03 16:49:20 +0200249 /* Serialize multiple transactions.
250 * This uses test_and_set_bit, memory barrier is implicit.
251 */
252 wait_event(mdev->al_wait,
253 mdev->act_log->pending_changes == 0 ||
254 (locked = lc_try_lock_for_transaction(mdev->act_log)));
255
256 if (locked) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 /* drbd_al_write_transaction(mdev,al_ext,enr);
258 * recurses into generic_make_request(), which
259 * disallows recursion, bios being serialized on the
260 * current->bio_tail list now.
261 * we have to delegate updates to the activity log
262 * to the worker thread. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700263
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100264 /* Double check: it may have been committed by someone else,
265 * while we have been waiting for the lock. */
Lars Ellenberge15766e2011-04-01 10:38:30 +0200266 if (mdev->act_log->pending_changes) {
Philipp Reisner376694a2011-11-07 10:54:28 +0100267 al_write_transaction(mdev);
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100268 mdev->al_writ_cnt++;
269
270 spin_lock_irq(&mdev->al_lock);
271 /* FIXME
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200272 if (err)
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100273 we need an "lc_cancel" here;
274 */
275 lc_committed(mdev->act_log);
276 spin_unlock_irq(&mdev->al_lock);
277 }
278 lc_unlock(mdev->act_log);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700279 wake_up(&mdev->al_wait);
280 }
281}
282
Lars Ellenberg181286a2011-03-31 15:18:56 +0200283void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700284{
Lars Ellenberge15766e2011-04-01 10:38:30 +0200285 /* for bios crossing activity log extent boundaries,
286 * we may need to activate two extents in one go */
287 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
288 unsigned last = (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
289 unsigned enr;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700290 struct lc_element *extent;
291 unsigned long flags;
292
Philipp Reisnerb411b362009-09-25 16:07:19 -0700293 spin_lock_irqsave(&mdev->al_lock, flags);
294
Lars Ellenberge15766e2011-04-01 10:38:30 +0200295 for (enr = first; enr <= last; enr++) {
296 extent = lc_find(mdev->act_log, enr);
297 if (!extent) {
298 dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr);
299 continue;
300 }
Philipp Reisner376694a2011-11-07 10:54:28 +0100301 lc_put(mdev->act_log, extent);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700302 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700303 spin_unlock_irqrestore(&mdev->al_lock, flags);
Lars Ellenberge15766e2011-04-01 10:38:30 +0200304 wake_up(&mdev->al_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700305}
306
Lars Ellenberg19f843a2010-12-15 08:59:11 +0100307#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
308/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
309 * are still coupled, or assume too much about their relation.
310 * Code below will not work if this is violated.
311 * Will be cleaned up with some followup patch.
312 */
313# error FIXME
314#endif
315
316static unsigned int al_extent_to_bm_page(unsigned int al_enr)
317{
318 return al_enr >>
319 /* bit to page */
320 ((PAGE_SHIFT + 3) -
321 /* al extent number to bit */
322 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
323}
324
325static unsigned int rs_extent_to_bm_page(unsigned int rs_enr)
326{
327 return rs_enr >>
328 /* bit to page */
329 ((PAGE_SHIFT + 3) -
Lars Ellenbergacb104c32011-04-28 07:58:24 +0200330 /* resync extent number to bit */
Lars Ellenberg19f843a2010-12-15 08:59:11 +0100331 (BM_EXT_SHIFT - BM_BLOCK_SHIFT));
332}
333
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100334static int
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200335_al_write_transaction(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700336{
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100337 struct al_transaction_on_disk *buffer;
338 struct lc_element *e;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700339 sector_t sector;
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100340 int i, mx;
341 unsigned extent_nr;
342 unsigned crc = 0;
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200343 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700344
345 if (!get_ldev(mdev)) {
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100346 dev_err(DEV, "disk is %s, cannot start al transaction\n",
347 drbd_disk_str(mdev->state.disk));
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200348 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700349 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700350
Lars Ellenberg6719fb02010-10-18 23:04:07 +0200351 /* The bitmap write may have failed, causing a state change. */
352 if (mdev->state.disk < D_INCONSISTENT) {
353 dev_err(DEV,
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100354 "disk is %s, cannot write al transaction\n",
355 drbd_disk_str(mdev->state.disk));
Lars Ellenberg6719fb02010-10-18 23:04:07 +0200356 put_ldev(mdev);
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200357 return -EIO;
Lars Ellenberg6719fb02010-10-18 23:04:07 +0200358 }
359
Philipp Reisnercdfda632011-07-05 15:38:59 +0200360 buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */
361 if (!buffer) {
362 dev_err(DEV, "disk failed while waiting for md_io buffer\n");
Philipp Reisnercdfda632011-07-05 15:38:59 +0200363 put_ldev(mdev);
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200364 return -ENODEV;
Philipp Reisnercdfda632011-07-05 15:38:59 +0200365 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700366
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100367 memset(buffer, 0, sizeof(*buffer));
368 buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700369 buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
370
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100371 i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700372
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100373 /* Even though no one can start to change this list
374 * once we set the LC_LOCKED -- from drbd_al_begin_io(),
375 * lc_try_lock_for_transaction() --, someone may still
376 * be in the process of changing it. */
377 spin_lock_irq(&mdev->al_lock);
378 list_for_each_entry(e, &mdev->act_log->to_be_changed, list) {
379 if (i == AL_UPDATES_PER_TRANSACTION) {
380 i++;
381 break;
382 }
383 buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
384 buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
385 if (e->lc_number != LC_FREE)
386 drbd_bm_mark_for_writeout(mdev,
387 al_extent_to_bm_page(e->lc_number));
388 i++;
389 }
390 spin_unlock_irq(&mdev->al_lock);
391 BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700392
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100393 buffer->n_updates = cpu_to_be16(i);
394 for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
395 buffer->update_slot_nr[i] = cpu_to_be16(-1);
396 buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
397 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700398
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100399 buffer->context_size = cpu_to_be16(mdev->act_log->nr_elements);
400 buffer->context_start_slot_nr = cpu_to_be16(mdev->al_tr_cycle);
401
402 mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700403 mdev->act_log->nr_elements - mdev->al_tr_cycle);
404 for (i = 0; i < mx; i++) {
405 unsigned idx = mdev->al_tr_cycle + i;
406 extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number;
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100407 buffer->context[i] = cpu_to_be32(extent_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408 }
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100409 for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
410 buffer->context[i] = cpu_to_be32(LC_FREE);
411
412 mdev->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413 if (mdev->al_tr_cycle >= mdev->act_log->nr_elements)
414 mdev->al_tr_cycle = 0;
415
Philipp Reisnerb411b362009-09-25 16:07:19 -0700416 sector = mdev->ldev->md.md_offset
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100417 + mdev->ldev->md.al_offset
418 + mdev->al_tr_pos * (MD_BLOCK_SIZE>>9);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700419
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100420 crc = crc32c(0, buffer, 4096);
421 buffer->crc32c = cpu_to_be32(crc);
422
423 if (drbd_bm_write_hinted(mdev))
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200424 err = -EIO;
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100425 /* drbd_chk_io_error done already */
Andreas Gruenbacher3fbf4d22010-12-13 02:25:41 +0100426 else if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200427 err = -EIO;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100428 drbd_chk_io_error(mdev, 1, true);
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100429 } else {
430 /* advance ringbuffer position and transaction counter */
431 mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
432 mdev->al_tr_number++;
433 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700434
Philipp Reisnercdfda632011-07-05 15:38:59 +0200435 drbd_md_put_buffer(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700436 put_ldev(mdev);
437
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200438 return err;
439}
440
441
442static int w_al_write_transaction(struct drbd_work *w, int unused)
443{
444 struct update_al_work *aw = container_of(w, struct update_al_work, w);
445 struct drbd_conf *mdev = w->mdev;
446 int err;
447
448 err = _al_write_transaction(mdev);
449 aw->err = err;
450 complete(&aw->event);
451
452 return err != -EIO ? err : 0;
453}
454
455/* Calls from worker context (see w_restart_disk_io()) need to write the
456 transaction directly. Others came through generic_make_request(),
457 those need to delegate it to the worker. */
458static int al_write_transaction(struct drbd_conf *mdev)
459{
460 struct update_al_work al_work;
461
462 if (current == mdev->tconn->worker.task)
463 return _al_write_transaction(mdev);
464
465 init_completion(&al_work.event);
466 al_work.w.cb = w_al_write_transaction;
467 al_work.w.mdev = mdev;
468 drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w);
469 wait_for_completion(&al_work.event);
470
471 return al_work.err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700472}
473
Philipp Reisnerb411b362009-09-25 16:07:19 -0700474static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext)
475{
476 int rv;
477
478 spin_lock_irq(&mdev->al_lock);
479 rv = (al_ext->refcnt == 0);
480 if (likely(rv))
481 lc_del(mdev->act_log, al_ext);
482 spin_unlock_irq(&mdev->al_lock);
483
484 return rv;
485}
486
487/**
488 * drbd_al_shrink() - Removes all active extents form the activity log
489 * @mdev: DRBD device.
490 *
491 * Removes all active extents form the activity log, waiting until
492 * the reference count of each entry dropped to 0 first, of course.
493 *
494 * You need to lock mdev->act_log with lc_try_lock() / lc_unlock()
495 */
496void drbd_al_shrink(struct drbd_conf *mdev)
497{
498 struct lc_element *al_ext;
499 int i;
500
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100501 D_ASSERT(test_bit(__LC_LOCKED, &mdev->act_log->flags));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700502
503 for (i = 0; i < mdev->act_log->nr_elements; i++) {
504 al_ext = lc_element_by_index(mdev->act_log, i);
505 if (al_ext->lc_number == LC_FREE)
506 continue;
507 wait_event(mdev->al_wait, _try_lc_del(mdev, al_ext));
508 }
509
510 wake_up(&mdev->al_wait);
511}
512
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100513static int w_update_odbm(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700514{
515 struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
Philipp Reisner00d56942011-02-09 18:09:48 +0100516 struct drbd_conf *mdev = w->mdev;
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +0100517 struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700518
519 if (!get_ldev(mdev)) {
520 if (__ratelimit(&drbd_ratelimit_state))
521 dev_warn(DEV, "Can not update on disk bitmap, local IO disabled.\n");
522 kfree(udw);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100523 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700524 }
525
Lars Ellenberg19f843a2010-12-15 08:59:11 +0100526 drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700527 put_ldev(mdev);
528
529 kfree(udw);
530
531 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) {
532 switch (mdev->state.conn) {
533 case C_SYNC_SOURCE: case C_SYNC_TARGET:
534 case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T:
535 drbd_resync_finished(mdev);
536 default:
537 /* nothing to do */
538 break;
539 }
540 }
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +0100541 drbd_bcast_event(mdev, &sib);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700542
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100543 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700544}
545
546
547/* ATTENTION. The AL's extents are 4MB each, while the extents in the
548 * resync LRU-cache are 16MB each.
549 * The caller of this function has to hold an get_ldev() reference.
550 *
551 * TODO will be obsoleted once we have a caching lru of the on disk bitmap
552 */
553static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
554 int count, int success)
555{
556 struct lc_element *e;
557 struct update_odbm_work *udw;
558
559 unsigned int enr;
560
561 D_ASSERT(atomic_read(&mdev->local_cnt));
562
563 /* I simply assume that a sector/size pair never crosses
564 * a 16 MB extent border. (Currently this is true...) */
565 enr = BM_SECT_TO_EXT(sector);
566
567 e = lc_get(mdev->resync, enr);
568 if (e) {
569 struct bm_extent *ext = lc_entry(e, struct bm_extent, lce);
570 if (ext->lce.lc_number == enr) {
571 if (success)
572 ext->rs_left -= count;
573 else
574 ext->rs_failed += count;
575 if (ext->rs_left < ext->rs_failed) {
576 dev_err(DEV, "BAD! sector=%llus enr=%u rs_left=%d "
577 "rs_failed=%d count=%d\n",
578 (unsigned long long)sector,
579 ext->lce.lc_number, ext->rs_left,
580 ext->rs_failed, count);
581 dump_stack();
582
583 lc_put(mdev->resync, &ext->lce);
Philipp Reisner38fa9982011-03-15 18:24:49 +0100584 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700585 return;
586 }
587 } else {
588 /* Normally this element should be in the cache,
589 * since drbd_rs_begin_io() pulled it already in.
590 *
591 * But maybe an application write finished, and we set
592 * something outside the resync lru_cache in sync.
593 */
594 int rs_left = drbd_bm_e_weight(mdev, enr);
595 if (ext->flags != 0) {
596 dev_warn(DEV, "changing resync lce: %d[%u;%02lx]"
597 " -> %d[%u;00]\n",
598 ext->lce.lc_number, ext->rs_left,
599 ext->flags, enr, rs_left);
600 ext->flags = 0;
601 }
602 if (ext->rs_failed) {
603 dev_warn(DEV, "Kicking resync_lru element enr=%u "
604 "out with rs_failed=%d\n",
605 ext->lce.lc_number, ext->rs_failed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700606 }
607 ext->rs_left = rs_left;
608 ext->rs_failed = success ? 0 : count;
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100609 /* we don't keep a persistent log of the resync lru,
610 * we can commit any change right away. */
611 lc_committed(mdev->resync);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700612 }
613 lc_put(mdev->resync, &ext->lce);
614 /* no race, we are within the al_lock! */
615
616 if (ext->rs_left == ext->rs_failed) {
617 ext->rs_failed = 0;
618
619 udw = kmalloc(sizeof(*udw), GFP_ATOMIC);
620 if (udw) {
621 udw->enr = ext->lce.lc_number;
622 udw->w.cb = w_update_odbm;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100623 udw->w.mdev = mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +0100624 drbd_queue_work_front(&mdev->tconn->data.work, &udw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700625 } else {
626 dev_warn(DEV, "Could not kmalloc an udw\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700627 }
628 }
629 } else {
630 dev_err(DEV, "lc_get() failed! locked=%d/%d flags=%lu\n",
631 mdev->resync_locked,
632 mdev->resync->nr_elements,
633 mdev->resync->flags);
634 }
635}
636
Lars Ellenbergc6ea14d2010-11-05 09:23:37 +0100637void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go)
638{
639 unsigned long now = jiffies;
640 unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark];
641 int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS;
642 if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
643 if (mdev->rs_mark_left[mdev->rs_last_mark] != still_to_go &&
644 mdev->state.conn != C_PAUSED_SYNC_T &&
645 mdev->state.conn != C_PAUSED_SYNC_S) {
646 mdev->rs_mark_time[next] = now;
647 mdev->rs_mark_left[next] = still_to_go;
648 mdev->rs_last_mark = next;
649 }
650 }
651}
652
Philipp Reisnerb411b362009-09-25 16:07:19 -0700653/* clear the bit corresponding to the piece of storage in question:
654 * size byte of data starting from sector. Only clear a bits of the affected
655 * one ore more _aligned_ BM_BLOCK_SIZE blocks.
656 *
657 * called by worker on C_SYNC_TARGET and receiver on SyncSource.
658 *
659 */
660void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
661 const char *file, const unsigned int line)
662{
663 /* Is called from worker and receiver context _only_ */
664 unsigned long sbnr, ebnr, lbnr;
665 unsigned long count = 0;
666 sector_t esector, nr_sectors;
667 int wake_up = 0;
668 unsigned long flags;
669
Andreas Gruenbacherc670a392011-02-21 12:41:39 +0100670 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700671 dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
672 (unsigned long long)sector, size);
673 return;
674 }
675 nr_sectors = drbd_get_capacity(mdev->this_bdev);
676 esector = sector + (size >> 9) - 1;
677
Andreas Gruenbacher841ce242010-12-15 19:31:20 +0100678 if (!expect(sector < nr_sectors))
679 return;
680 if (!expect(esector < nr_sectors))
681 esector = nr_sectors - 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700682
683 lbnr = BM_SECT_TO_BIT(nr_sectors-1);
684
685 /* we clear it (in sync).
686 * round up start sector, round down end sector. we make sure we only
687 * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */
688 if (unlikely(esector < BM_SECT_PER_BIT-1))
689 return;
690 if (unlikely(esector == (nr_sectors-1)))
691 ebnr = lbnr;
692 else
693 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
694 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
695
Philipp Reisnerb411b362009-09-25 16:07:19 -0700696 if (sbnr > ebnr)
697 return;
698
699 /*
700 * ok, (capacity & 7) != 0 sometimes, but who cares...
701 * we count rs_{total,left} in bits, not sectors.
702 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700703 count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
Lars Ellenberg1d7734a2010-08-11 21:21:50 +0200704 if (count && get_ldev(mdev)) {
Lars Ellenbergc6ea14d2010-11-05 09:23:37 +0100705 drbd_advance_rs_marks(mdev, drbd_bm_total_weight(mdev));
Lars Ellenberg1d7734a2010-08-11 21:21:50 +0200706 spin_lock_irqsave(&mdev->al_lock, flags);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100707 drbd_try_clear_on_disk_bm(mdev, sector, count, true);
Lars Ellenberg1d7734a2010-08-11 21:21:50 +0200708 spin_unlock_irqrestore(&mdev->al_lock, flags);
709
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710 /* just wake_up unconditional now, various lc_chaged(),
711 * lc_put() in drbd_try_clear_on_disk_bm(). */
712 wake_up = 1;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +0200713 put_ldev(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700714 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715 if (wake_up)
716 wake_up(&mdev->al_wait);
717}
718
719/*
720 * this is intended to set one request worth of data out of sync.
721 * affects at least 1 bit,
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100722 * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700723 *
724 * called by tl_clear and drbd_send_dblock (==drbd_make_request).
725 * so this can be _any_ process.
726 */
Philipp Reisner73a01a12010-10-27 14:33:00 +0200727int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700728 const char *file, const unsigned int line)
729{
Philipp Reisner376694a2011-11-07 10:54:28 +0100730 unsigned long sbnr, ebnr, flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700731 sector_t esector, nr_sectors;
Philipp Reisner73a01a12010-10-27 14:33:00 +0200732 unsigned int enr, count = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700733 struct lc_element *e;
734
Andreas Gruenbacherc670a392011-02-21 12:41:39 +0100735 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700736 dev_err(DEV, "sector: %llus, size: %d\n",
737 (unsigned long long)sector, size);
Philipp Reisner73a01a12010-10-27 14:33:00 +0200738 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700739 }
740
741 if (!get_ldev(mdev))
Philipp Reisner73a01a12010-10-27 14:33:00 +0200742 return 0; /* no disk, no metadata, no bitmap to set bits in */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700743
744 nr_sectors = drbd_get_capacity(mdev->this_bdev);
745 esector = sector + (size >> 9) - 1;
746
Andreas Gruenbacher841ce242010-12-15 19:31:20 +0100747 if (!expect(sector < nr_sectors))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700748 goto out;
Andreas Gruenbacher841ce242010-12-15 19:31:20 +0100749 if (!expect(esector < nr_sectors))
750 esector = nr_sectors - 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700751
Philipp Reisnerb411b362009-09-25 16:07:19 -0700752 /* we set it out of sync,
753 * we do not need to round anything here */
754 sbnr = BM_SECT_TO_BIT(sector);
755 ebnr = BM_SECT_TO_BIT(esector);
756
Philipp Reisnerb411b362009-09-25 16:07:19 -0700757 /* ok, (capacity & 7) != 0 sometimes, but who cares...
758 * we count rs_{total,left} in bits, not sectors. */
759 spin_lock_irqsave(&mdev->al_lock, flags);
760 count = drbd_bm_set_bits(mdev, sbnr, ebnr);
761
762 enr = BM_SECT_TO_EXT(sector);
763 e = lc_find(mdev->resync, enr);
764 if (e)
765 lc_entry(e, struct bm_extent, lce)->rs_left += count;
766 spin_unlock_irqrestore(&mdev->al_lock, flags);
767
768out:
769 put_ldev(mdev);
Philipp Reisner73a01a12010-10-27 14:33:00 +0200770
771 return count;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700772}
773
774static
775struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr)
776{
777 struct lc_element *e;
778 struct bm_extent *bm_ext;
779 int wakeup = 0;
780 unsigned long rs_flags;
781
782 spin_lock_irq(&mdev->al_lock);
783 if (mdev->resync_locked > mdev->resync->nr_elements/2) {
784 spin_unlock_irq(&mdev->al_lock);
785 return NULL;
786 }
787 e = lc_get(mdev->resync, enr);
788 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
789 if (bm_ext) {
790 if (bm_ext->lce.lc_number != enr) {
791 bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
792 bm_ext->rs_failed = 0;
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100793 lc_committed(mdev->resync);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700794 wakeup = 1;
795 }
796 if (bm_ext->lce.refcnt == 1)
797 mdev->resync_locked++;
798 set_bit(BME_NO_WRITES, &bm_ext->flags);
799 }
800 rs_flags = mdev->resync->flags;
801 spin_unlock_irq(&mdev->al_lock);
802 if (wakeup)
803 wake_up(&mdev->al_wait);
804
805 if (!bm_ext) {
806 if (rs_flags & LC_STARVING)
807 dev_warn(DEV, "Have to wait for element"
808 " (resync LRU too small?)\n");
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100809 BUG_ON(rs_flags & LC_LOCKED);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700810 }
811
812 return bm_ext;
813}
814
815static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
816{
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100817 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700818
819 spin_lock_irq(&mdev->al_lock);
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100820 rv = lc_is_used(mdev->act_log, enr);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700821 spin_unlock_irq(&mdev->al_lock);
822
Philipp Reisnerb411b362009-09-25 16:07:19 -0700823 return rv;
824}
825
826/**
827 * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED
828 * @mdev: DRBD device.
829 * @sector: The sector number.
830 *
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200831 * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700832 */
833int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
834{
835 unsigned int enr = BM_SECT_TO_EXT(sector);
836 struct bm_extent *bm_ext;
837 int i, sig;
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100838 int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait.
839 200 times -> 20 seconds. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700840
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100841retry:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700842 sig = wait_event_interruptible(mdev->al_wait,
843 (bm_ext = _bme_get(mdev, enr)));
844 if (sig)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200845 return -EINTR;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700846
847 if (test_bit(BME_LOCKED, &bm_ext->flags))
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200848 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700849
850 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
851 sig = wait_event_interruptible(mdev->al_wait,
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100852 !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i) ||
Philipp Reisnerc507f462010-11-22 15:49:17 +0100853 test_bit(BME_PRIORITY, &bm_ext->flags));
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100854
855 if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700856 spin_lock_irq(&mdev->al_lock);
857 if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100858 bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859 mdev->resync_locked--;
860 wake_up(&mdev->al_wait);
861 }
862 spin_unlock_irq(&mdev->al_lock);
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100863 if (sig)
864 return -EINTR;
865 if (schedule_timeout_interruptible(HZ/10))
866 return -EINTR;
Philipp Reisnerc507f462010-11-22 15:49:17 +0100867 if (sa && --sa == 0)
868 dev_warn(DEV,"drbd_rs_begin_io() stepped aside for 20sec."
869 "Resync stalled?\n");
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100870 goto retry;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700871 }
872 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700873 set_bit(BME_LOCKED, &bm_ext->flags);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200874 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700875}
876
877/**
878 * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep
879 * @mdev: DRBD device.
880 * @sector: The sector number.
881 *
882 * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then
883 * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN
884 * if there is still application IO going on in this area.
885 */
886int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
887{
888 unsigned int enr = BM_SECT_TO_EXT(sector);
889 const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT;
890 struct lc_element *e;
891 struct bm_extent *bm_ext;
892 int i;
893
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894 spin_lock_irq(&mdev->al_lock);
895 if (mdev->resync_wenr != LC_FREE && mdev->resync_wenr != enr) {
896 /* in case you have very heavy scattered io, it may
897 * stall the syncer undefined if we give up the ref count
898 * when we try again and requeue.
899 *
900 * if we don't give up the refcount, but the next time
901 * we are scheduled this extent has been "synced" by new
902 * application writes, we'd miss the lc_put on the
903 * extent we keep the refcount on.
904 * so we remembered which extent we had to try again, and
905 * if the next requested one is something else, we do
906 * the lc_put here...
907 * we also have to wake_up
908 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700909 e = lc_find(mdev->resync, mdev->resync_wenr);
910 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
911 if (bm_ext) {
912 D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
913 D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags));
914 clear_bit(BME_NO_WRITES, &bm_ext->flags);
915 mdev->resync_wenr = LC_FREE;
916 if (lc_put(mdev->resync, &bm_ext->lce) == 0)
917 mdev->resync_locked--;
918 wake_up(&mdev->al_wait);
919 } else {
920 dev_alert(DEV, "LOGIC BUG\n");
921 }
922 }
923 /* TRY. */
924 e = lc_try_get(mdev->resync, enr);
925 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
926 if (bm_ext) {
927 if (test_bit(BME_LOCKED, &bm_ext->flags))
928 goto proceed;
929 if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) {
930 mdev->resync_locked++;
931 } else {
932 /* we did set the BME_NO_WRITES,
933 * but then could not set BME_LOCKED,
934 * so we tried again.
935 * drop the extra reference. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700936 bm_ext->lce.refcnt--;
937 D_ASSERT(bm_ext->lce.refcnt > 0);
938 }
939 goto check_al;
940 } else {
941 /* do we rather want to try later? */
Jens Axboe6a0afdf2009-10-01 09:04:14 +0200942 if (mdev->resync_locked > mdev->resync->nr_elements-3)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700943 goto try_again;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700944 /* Do or do not. There is no try. -- Yoda */
945 e = lc_get(mdev->resync, enr);
946 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
947 if (!bm_ext) {
948 const unsigned long rs_flags = mdev->resync->flags;
949 if (rs_flags & LC_STARVING)
950 dev_warn(DEV, "Have to wait for element"
951 " (resync LRU too small?)\n");
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100952 BUG_ON(rs_flags & LC_LOCKED);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700953 goto try_again;
954 }
955 if (bm_ext->lce.lc_number != enr) {
956 bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
957 bm_ext->rs_failed = 0;
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100958 lc_committed(mdev->resync);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700959 wake_up(&mdev->al_wait);
960 D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0);
961 }
962 set_bit(BME_NO_WRITES, &bm_ext->flags);
963 D_ASSERT(bm_ext->lce.refcnt == 1);
964 mdev->resync_locked++;
965 goto check_al;
966 }
967check_al:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700968 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700969 if (lc_is_used(mdev->act_log, al_enr+i))
970 goto try_again;
971 }
972 set_bit(BME_LOCKED, &bm_ext->flags);
973proceed:
974 mdev->resync_wenr = LC_FREE;
975 spin_unlock_irq(&mdev->al_lock);
976 return 0;
977
978try_again:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700979 if (bm_ext)
980 mdev->resync_wenr = enr;
981 spin_unlock_irq(&mdev->al_lock);
982 return -EAGAIN;
983}
984
985void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector)
986{
987 unsigned int enr = BM_SECT_TO_EXT(sector);
988 struct lc_element *e;
989 struct bm_extent *bm_ext;
990 unsigned long flags;
991
Philipp Reisnerb411b362009-09-25 16:07:19 -0700992 spin_lock_irqsave(&mdev->al_lock, flags);
993 e = lc_find(mdev->resync, enr);
994 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
995 if (!bm_ext) {
996 spin_unlock_irqrestore(&mdev->al_lock, flags);
997 if (__ratelimit(&drbd_ratelimit_state))
998 dev_err(DEV, "drbd_rs_complete_io() called, but extent not found\n");
999 return;
1000 }
1001
1002 if (bm_ext->lce.refcnt == 0) {
1003 spin_unlock_irqrestore(&mdev->al_lock, flags);
1004 dev_err(DEV, "drbd_rs_complete_io(,%llu [=%u]) called, "
1005 "but refcnt is 0!?\n",
1006 (unsigned long long)sector, enr);
1007 return;
1008 }
1009
1010 if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
Philipp Reisnere3555d82010-11-07 15:56:29 +01001011 bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001012 mdev->resync_locked--;
1013 wake_up(&mdev->al_wait);
1014 }
1015
1016 spin_unlock_irqrestore(&mdev->al_lock, flags);
1017}
1018
1019/**
1020 * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED)
1021 * @mdev: DRBD device.
1022 */
1023void drbd_rs_cancel_all(struct drbd_conf *mdev)
1024{
Philipp Reisnerb411b362009-09-25 16:07:19 -07001025 spin_lock_irq(&mdev->al_lock);
1026
1027 if (get_ldev_if_state(mdev, D_FAILED)) { /* Makes sure ->resync is there. */
1028 lc_reset(mdev->resync);
1029 put_ldev(mdev);
1030 }
1031 mdev->resync_locked = 0;
1032 mdev->resync_wenr = LC_FREE;
1033 spin_unlock_irq(&mdev->al_lock);
1034 wake_up(&mdev->al_wait);
1035}
1036
1037/**
1038 * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU
1039 * @mdev: DRBD device.
1040 *
1041 * Returns 0 upon success, -EAGAIN if at least one reference count was
1042 * not zero.
1043 */
1044int drbd_rs_del_all(struct drbd_conf *mdev)
1045{
1046 struct lc_element *e;
1047 struct bm_extent *bm_ext;
1048 int i;
1049
Philipp Reisnerb411b362009-09-25 16:07:19 -07001050 spin_lock_irq(&mdev->al_lock);
1051
1052 if (get_ldev_if_state(mdev, D_FAILED)) {
1053 /* ok, ->resync is there. */
1054 for (i = 0; i < mdev->resync->nr_elements; i++) {
1055 e = lc_element_by_index(mdev->resync, i);
Philipp Reisnerb2b163d2010-04-02 08:40:33 +02001056 bm_ext = lc_entry(e, struct bm_extent, lce);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001057 if (bm_ext->lce.lc_number == LC_FREE)
1058 continue;
1059 if (bm_ext->lce.lc_number == mdev->resync_wenr) {
1060 dev_info(DEV, "dropping %u in drbd_rs_del_all, apparently"
1061 " got 'synced' by application io\n",
1062 mdev->resync_wenr);
1063 D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
1064 D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags));
1065 clear_bit(BME_NO_WRITES, &bm_ext->flags);
1066 mdev->resync_wenr = LC_FREE;
1067 lc_put(mdev->resync, &bm_ext->lce);
1068 }
1069 if (bm_ext->lce.refcnt != 0) {
1070 dev_info(DEV, "Retrying drbd_rs_del_all() later. "
1071 "refcnt=%d\n", bm_ext->lce.refcnt);
1072 put_ldev(mdev);
1073 spin_unlock_irq(&mdev->al_lock);
1074 return -EAGAIN;
1075 }
1076 D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
1077 D_ASSERT(!test_bit(BME_NO_WRITES, &bm_ext->flags));
1078 lc_del(mdev->resync, &bm_ext->lce);
1079 }
1080 D_ASSERT(mdev->resync->used == 0);
1081 put_ldev(mdev);
1082 }
1083 spin_unlock_irq(&mdev->al_lock);
1084
1085 return 0;
1086}
1087
1088/**
1089 * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks
1090 * @mdev: DRBD device.
1091 * @sector: The sector number.
1092 * @size: Size of failed IO operation, in byte.
1093 */
1094void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size)
1095{
1096 /* Is called from worker and receiver context _only_ */
1097 unsigned long sbnr, ebnr, lbnr;
1098 unsigned long count;
1099 sector_t esector, nr_sectors;
1100 int wake_up = 0;
1101
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01001102 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001103 dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
1104 (unsigned long long)sector, size);
1105 return;
1106 }
1107 nr_sectors = drbd_get_capacity(mdev->this_bdev);
1108 esector = sector + (size >> 9) - 1;
1109
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001110 if (!expect(sector < nr_sectors))
1111 return;
1112 if (!expect(esector < nr_sectors))
1113 esector = nr_sectors - 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001114
1115 lbnr = BM_SECT_TO_BIT(nr_sectors-1);
1116
1117 /*
1118 * round up start sector, round down end sector. we make sure we only
1119 * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */
1120 if (unlikely(esector < BM_SECT_PER_BIT-1))
1121 return;
1122 if (unlikely(esector == (nr_sectors-1)))
1123 ebnr = lbnr;
1124 else
1125 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
1126 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
1127
1128 if (sbnr > ebnr)
1129 return;
1130
1131 /*
1132 * ok, (capacity & 7) != 0 sometimes, but who cares...
1133 * we count rs_{total,left} in bits, not sectors.
1134 */
1135 spin_lock_irq(&mdev->al_lock);
1136 count = drbd_bm_count_bits(mdev, sbnr, ebnr);
1137 if (count) {
1138 mdev->rs_failed += count;
1139
1140 if (get_ldev(mdev)) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001141 drbd_try_clear_on_disk_bm(mdev, sector, count, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001142 put_ldev(mdev);
1143 }
1144
1145 /* just wake_up unconditional now, various lc_chaged(),
1146 * lc_put() in drbd_try_clear_on_disk_bm(). */
1147 wake_up = 1;
1148 }
1149 spin_unlock_irq(&mdev->al_lock);
1150 if (wake_up)
1151 wake_up(&mdev->al_wait);
1152}