blob: 92510f8ad0131f480aac0fddc48d8e889f8d690f [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_actlog.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
26#include <linux/slab.h>
Lars Ellenberg7ad651b2011-02-21 13:21:03 +010027#include <linux/crc32c.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070028#include <linux/drbd.h>
Lars Ellenberg7ad651b2011-02-21 13:21:03 +010029#include <linux/drbd_limits.h>
30#include <linux/dynamic_debug.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070032#include "drbd_wrappers.h"
33
Lars Ellenberg85f103d2011-03-31 12:06:48 +020034
35enum al_transaction_types {
36 AL_TR_UPDATE = 0,
37 AL_TR_INITIALIZED = 0xffff
38};
Lars Ellenberg7ad651b2011-02-21 13:21:03 +010039/* all fields on disc in big endian */
40struct __packed al_transaction_on_disk {
41 /* don't we all like magic */
42 __be32 magic;
43
44 /* to identify the most recent transaction block
45 * in the on disk ring buffer */
46 __be32 tr_number;
47
48 /* checksum on the full 4k block, with this field set to 0. */
49 __be32 crc32c;
50
51 /* type of transaction, special transaction types like:
Lars Ellenberg85f103d2011-03-31 12:06:48 +020052 * purge-all, set-all-idle, set-all-active, ... to-be-defined
53 * see also enum al_transaction_types */
Lars Ellenberg7ad651b2011-02-21 13:21:03 +010054 __be16 transaction_type;
55
56 /* we currently allow only a few thousand extents,
57 * so 16bit will be enough for the slot number. */
58
59 /* how many updates in this transaction */
60 __be16 n_updates;
61
62 /* maximum slot number, "al-extents" in drbd.conf speak.
63 * Having this in each transaction should make reconfiguration
64 * of that parameter easier. */
65 __be16 context_size;
66
67 /* slot number the context starts with */
68 __be16 context_start_slot_nr;
69
70 /* Some reserved bytes. Expected usage is a 64bit counter of
71 * sectors-written since device creation, and other data generation tag
72 * supporting usage */
73 __be32 __reserved[4];
74
75 /* --- 36 byte used --- */
76
77 /* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes
78 * in one transaction, then use the remaining byte in the 4k block for
79 * context information. "Flexible" number of updates per transaction
80 * does not help, as we have to account for the case when all update
81 * slots are used anyways, so it would only complicate code without
82 * additional benefit.
83 */
84 __be16 update_slot_nr[AL_UPDATES_PER_TRANSACTION];
85
86 /* but the extent number is 32bit, which at an extent size of 4 MiB
87 * allows to cover device sizes of up to 2**54 Byte (16 PiB) */
88 __be32 update_extent_nr[AL_UPDATES_PER_TRANSACTION];
89
90 /* --- 420 bytes used (36 + 64*6) --- */
91
92 /* 4096 - 420 = 3676 = 919 * 4 */
93 __be32 context[AL_CONTEXT_PER_TRANSACTION];
Philipp Reisnerb411b362009-09-25 16:07:19 -070094};
95
96struct update_odbm_work {
97 struct drbd_work w;
98 unsigned int enr;
99};
100
101struct update_al_work {
102 struct drbd_work w;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700103 struct completion event;
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100104 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700105};
106
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200107static int al_write_transaction(struct drbd_conf *mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700108
Philipp Reisnercdfda632011-07-05 15:38:59 +0200109void *drbd_md_get_buffer(struct drbd_conf *mdev)
110{
111 int r;
112
113 wait_event(mdev->misc_wait,
114 (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 ||
115 mdev->state.disk <= D_FAILED);
116
117 return r ? NULL : page_address(mdev->md_io_page);
118}
119
120void drbd_md_put_buffer(struct drbd_conf *mdev)
121{
122 if (atomic_dec_and_test(&mdev->md_io_in_use))
123 wake_up(&mdev->misc_wait);
124}
125
Lars Ellenberge34b6772012-09-27 15:07:11 +0200126void wait_until_done_or_force_detached(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
Philipp Reisner32db80f2012-02-22 11:51:57 +0100127 unsigned int *done)
Philipp Reisnercdfda632011-07-05 15:38:59 +0200128{
Philipp Reisner32db80f2012-02-22 11:51:57 +0100129 long dt;
130
131 rcu_read_lock();
132 dt = rcu_dereference(bdev->disk_conf)->disk_timeout;
133 rcu_read_unlock();
134 dt = dt * HZ / 10;
135 if (dt == 0)
136 dt = MAX_SCHEDULE_TIMEOUT;
137
Lars Ellenberge34b6772012-09-27 15:07:11 +0200138 dt = wait_event_timeout(mdev->misc_wait,
139 *done || test_bit(FORCE_DETACH, &mdev->flags), dt);
140 if (dt == 0) {
Philipp Reisner32db80f2012-02-22 11:51:57 +0100141 dev_err(DEV, "meta-data IO operation timed out\n");
Lars Ellenberge34b6772012-09-27 15:07:11 +0200142 drbd_chk_io_error(mdev, 1, DRBD_FORCE_DETACH);
143 }
Philipp Reisnercdfda632011-07-05 15:38:59 +0200144}
145
Philipp Reisnerb411b362009-09-25 16:07:19 -0700146static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
147 struct drbd_backing_dev *bdev,
148 struct page *page, sector_t sector,
149 int rw, int size)
150{
151 struct bio *bio;
Andreas Gruenbacherac29f402010-12-13 02:20:47 +0100152 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700153
Philipp Reisnercdfda632011-07-05 15:38:59 +0200154 mdev->md_io.done = 0;
155 mdev->md_io.error = -ENODEV;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700156
Philipp Reisnera8a4e512010-08-25 10:21:04 +0200157 if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags))
Lars Ellenberg86e1e982011-06-28 13:22:48 +0200158 rw |= REQ_FUA | REQ_FLUSH;
Jens Axboe721a9602011-03-09 11:56:30 +0100159 rw |= REQ_SYNC;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700160
Lars Ellenbergda4a75d2011-02-23 17:02:01 +0100161 bio = bio_alloc_drbd(GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700162 bio->bi_bdev = bdev->md_bdev;
163 bio->bi_sector = sector;
Andreas Gruenbacherac29f402010-12-13 02:20:47 +0100164 err = -EIO;
165 if (bio_add_page(bio, page, size, 0) != size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700166 goto out;
Philipp Reisnercdfda632011-07-05 15:38:59 +0200167 bio->bi_private = &mdev->md_io;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 bio->bi_end_io = drbd_md_io_complete;
169 bio->bi_rw = rw;
170
Philipp Reisnercdfda632011-07-05 15:38:59 +0200171 if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */
172 dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n");
173 err = -ENODEV;
174 goto out;
175 }
176
177 bio_get(bio); /* one bio_put() is in the completion handler */
178 atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100179 if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700180 bio_endio(bio, -EIO);
181 else
182 submit_bio(rw, bio);
Lars Ellenberge34b6772012-09-27 15:07:11 +0200183 wait_until_done_or_force_detached(mdev, bdev, &mdev->md_io.done);
Andreas Gruenbacherac29f402010-12-13 02:20:47 +0100184 if (bio_flagged(bio, BIO_UPTODATE))
Philipp Reisnercdfda632011-07-05 15:38:59 +0200185 err = mdev->md_io.error;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700186
Philipp Reisnerb411b362009-09-25 16:07:19 -0700187 out:
188 bio_put(bio);
Andreas Gruenbacherac29f402010-12-13 02:20:47 +0100189 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700190}
191
192int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
193 sector_t sector, int rw)
194{
Andreas Gruenbacher3fbf4d22010-12-13 02:25:41 +0100195 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700196 struct page *iop = mdev->md_io_page;
197
Philipp Reisnercdfda632011-07-05 15:38:59 +0200198 D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700199
200 BUG_ON(!bdev->md_bdev);
201
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100202 dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n",
203 current->comm, current->pid, __func__,
204 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205
206 if (sector < drbd_md_first_sector(bdev) ||
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100207 sector + 7 > drbd_md_last_sector(bdev))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700208 dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n",
209 current->comm, current->pid, __func__,
210 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
211
Andreas Gruenbacher3fbf4d22010-12-13 02:25:41 +0100212 err = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE);
213 if (err) {
Andreas Gruenbacher935be262011-08-19 13:47:31 +0200214 dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n",
215 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700216 }
Andreas Gruenbacher3fbf4d22010-12-13 02:25:41 +0100217 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700218}
219
220static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
221{
222 struct lc_element *al_ext;
223 struct lc_element *tmp;
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100224 int wake;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700225
226 spin_lock_irq(&mdev->al_lock);
227 tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT);
228 if (unlikely(tmp != NULL)) {
229 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
230 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100231 wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700232 spin_unlock_irq(&mdev->al_lock);
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100233 if (wake)
234 wake_up(&mdev->al_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700235 return NULL;
236 }
237 }
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100238 al_ext = lc_get(mdev->act_log, enr);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700239 spin_unlock_irq(&mdev->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700240 return al_ext;
241}
242
Lars Ellenberg181286a2011-03-31 15:18:56 +0200243void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700244{
Lars Ellenberg77265472011-03-31 16:00:51 +0200245 /* for bios crossing activity log extent boundaries,
246 * we may need to activate two extents in one go */
Lars Ellenberge15766e2011-04-01 10:38:30 +0200247 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
Lars Ellenberg81a35372012-07-30 09:00:54 +0200248 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
Lars Ellenberge15766e2011-04-01 10:38:30 +0200249 unsigned enr;
Lars Ellenberg7dc1d672011-05-03 16:49:20 +0200250 bool locked = false;
251
Philipp Reisnerb411b362009-09-25 16:07:19 -0700252
Lars Ellenberg81a35372012-07-30 09:00:54 +0200253 D_ASSERT(first <= last);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700254 D_ASSERT(atomic_read(&mdev->local_cnt) > 0);
255
Lars Ellenberge15766e2011-04-01 10:38:30 +0200256 for (enr = first; enr <= last; enr++)
257 wait_event(mdev->al_wait, _al_get(mdev, enr) != NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700258
Lars Ellenberg7dc1d672011-05-03 16:49:20 +0200259 /* Serialize multiple transactions.
260 * This uses test_and_set_bit, memory barrier is implicit.
261 */
262 wait_event(mdev->al_wait,
263 mdev->act_log->pending_changes == 0 ||
264 (locked = lc_try_lock_for_transaction(mdev->act_log)));
265
266 if (locked) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700267 /* drbd_al_write_transaction(mdev,al_ext,enr);
268 * recurses into generic_make_request(), which
269 * disallows recursion, bios being serialized on the
270 * current->bio_tail list now.
271 * we have to delegate updates to the activity log
272 * to the worker thread. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700273
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100274 /* Double check: it may have been committed by someone else,
275 * while we have been waiting for the lock. */
Lars Ellenberge15766e2011-04-01 10:38:30 +0200276 if (mdev->act_log->pending_changes) {
Philipp Reisner9a51ab12012-02-20 21:53:28 +0100277 bool write_al_updates;
278
279 rcu_read_lock();
280 write_al_updates = rcu_dereference(mdev->ldev->disk_conf)->al_updates;
281 rcu_read_unlock();
282
283 if (write_al_updates) {
284 al_write_transaction(mdev);
285 mdev->al_writ_cnt++;
286 }
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100287
288 spin_lock_irq(&mdev->al_lock);
289 /* FIXME
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200290 if (err)
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100291 we need an "lc_cancel" here;
292 */
293 lc_committed(mdev->act_log);
294 spin_unlock_irq(&mdev->al_lock);
295 }
296 lc_unlock(mdev->act_log);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700297 wake_up(&mdev->al_wait);
298 }
299}
300
Lars Ellenberg181286a2011-03-31 15:18:56 +0200301void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700302{
Lars Ellenberge15766e2011-04-01 10:38:30 +0200303 /* for bios crossing activity log extent boundaries,
304 * we may need to activate two extents in one go */
305 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
Lars Ellenberg81a35372012-07-30 09:00:54 +0200306 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
Lars Ellenberge15766e2011-04-01 10:38:30 +0200307 unsigned enr;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700308 struct lc_element *extent;
309 unsigned long flags;
310
Lars Ellenberg81a35372012-07-30 09:00:54 +0200311 D_ASSERT(first <= last);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700312 spin_lock_irqsave(&mdev->al_lock, flags);
313
Lars Ellenberge15766e2011-04-01 10:38:30 +0200314 for (enr = first; enr <= last; enr++) {
315 extent = lc_find(mdev->act_log, enr);
316 if (!extent) {
317 dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr);
318 continue;
319 }
Philipp Reisner376694a2011-11-07 10:54:28 +0100320 lc_put(mdev->act_log, extent);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700321 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700322 spin_unlock_irqrestore(&mdev->al_lock, flags);
Lars Ellenberge15766e2011-04-01 10:38:30 +0200323 wake_up(&mdev->al_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700324}
325
Lars Ellenberg19f843a2010-12-15 08:59:11 +0100326#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
327/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
328 * are still coupled, or assume too much about their relation.
329 * Code below will not work if this is violated.
330 * Will be cleaned up with some followup patch.
331 */
332# error FIXME
333#endif
334
335static unsigned int al_extent_to_bm_page(unsigned int al_enr)
336{
337 return al_enr >>
338 /* bit to page */
339 ((PAGE_SHIFT + 3) -
340 /* al extent number to bit */
341 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
342}
343
344static unsigned int rs_extent_to_bm_page(unsigned int rs_enr)
345{
346 return rs_enr >>
347 /* bit to page */
348 ((PAGE_SHIFT + 3) -
Lars Ellenbergacb104c32011-04-28 07:58:24 +0200349 /* resync extent number to bit */
Lars Ellenberg19f843a2010-12-15 08:59:11 +0100350 (BM_EXT_SHIFT - BM_BLOCK_SHIFT));
351}
352
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100353static int
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200354_al_write_transaction(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700355{
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100356 struct al_transaction_on_disk *buffer;
357 struct lc_element *e;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700358 sector_t sector;
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100359 int i, mx;
360 unsigned extent_nr;
361 unsigned crc = 0;
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200362 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700363
364 if (!get_ldev(mdev)) {
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100365 dev_err(DEV, "disk is %s, cannot start al transaction\n",
366 drbd_disk_str(mdev->state.disk));
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200367 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700368 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700369
Lars Ellenberg6719fb02010-10-18 23:04:07 +0200370 /* The bitmap write may have failed, causing a state change. */
371 if (mdev->state.disk < D_INCONSISTENT) {
372 dev_err(DEV,
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100373 "disk is %s, cannot write al transaction\n",
374 drbd_disk_str(mdev->state.disk));
Lars Ellenberg6719fb02010-10-18 23:04:07 +0200375 put_ldev(mdev);
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200376 return -EIO;
Lars Ellenberg6719fb02010-10-18 23:04:07 +0200377 }
378
Philipp Reisnercdfda632011-07-05 15:38:59 +0200379 buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */
380 if (!buffer) {
381 dev_err(DEV, "disk failed while waiting for md_io buffer\n");
Philipp Reisnercdfda632011-07-05 15:38:59 +0200382 put_ldev(mdev);
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200383 return -ENODEV;
Philipp Reisnercdfda632011-07-05 15:38:59 +0200384 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700385
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100386 memset(buffer, 0, sizeof(*buffer));
387 buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700388 buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
389
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100390 i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100392 /* Even though no one can start to change this list
393 * once we set the LC_LOCKED -- from drbd_al_begin_io(),
394 * lc_try_lock_for_transaction() --, someone may still
395 * be in the process of changing it. */
396 spin_lock_irq(&mdev->al_lock);
397 list_for_each_entry(e, &mdev->act_log->to_be_changed, list) {
398 if (i == AL_UPDATES_PER_TRANSACTION) {
399 i++;
400 break;
401 }
402 buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
403 buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
404 if (e->lc_number != LC_FREE)
405 drbd_bm_mark_for_writeout(mdev,
406 al_extent_to_bm_page(e->lc_number));
407 i++;
408 }
409 spin_unlock_irq(&mdev->al_lock);
410 BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700411
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100412 buffer->n_updates = cpu_to_be16(i);
413 for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
414 buffer->update_slot_nr[i] = cpu_to_be16(-1);
415 buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
416 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700417
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100418 buffer->context_size = cpu_to_be16(mdev->act_log->nr_elements);
419 buffer->context_start_slot_nr = cpu_to_be16(mdev->al_tr_cycle);
420
421 mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422 mdev->act_log->nr_elements - mdev->al_tr_cycle);
423 for (i = 0; i < mx; i++) {
424 unsigned idx = mdev->al_tr_cycle + i;
425 extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number;
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100426 buffer->context[i] = cpu_to_be32(extent_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700427 }
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100428 for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
429 buffer->context[i] = cpu_to_be32(LC_FREE);
430
431 mdev->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700432 if (mdev->al_tr_cycle >= mdev->act_log->nr_elements)
433 mdev->al_tr_cycle = 0;
434
Philipp Reisnerb411b362009-09-25 16:07:19 -0700435 sector = mdev->ldev->md.md_offset
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100436 + mdev->ldev->md.al_offset
437 + mdev->al_tr_pos * (MD_BLOCK_SIZE>>9);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700438
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100439 crc = crc32c(0, buffer, 4096);
440 buffer->crc32c = cpu_to_be32(crc);
441
442 if (drbd_bm_write_hinted(mdev))
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200443 err = -EIO;
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100444 /* drbd_chk_io_error done already */
Andreas Gruenbacher3fbf4d22010-12-13 02:25:41 +0100445 else if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200446 err = -EIO;
Lars Ellenberg0c849662012-07-30 09:07:28 +0200447 drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100448 } else {
449 /* advance ringbuffer position and transaction counter */
450 mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
451 mdev->al_tr_number++;
452 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700453
Philipp Reisnercdfda632011-07-05 15:38:59 +0200454 drbd_md_put_buffer(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700455 put_ldev(mdev);
456
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200457 return err;
458}
459
460
461static int w_al_write_transaction(struct drbd_work *w, int unused)
462{
463 struct update_al_work *aw = container_of(w, struct update_al_work, w);
464 struct drbd_conf *mdev = w->mdev;
465 int err;
466
467 err = _al_write_transaction(mdev);
468 aw->err = err;
469 complete(&aw->event);
470
471 return err != -EIO ? err : 0;
472}
473
474/* Calls from worker context (see w_restart_disk_io()) need to write the
475 transaction directly. Others came through generic_make_request(),
476 those need to delegate it to the worker. */
477static int al_write_transaction(struct drbd_conf *mdev)
478{
479 struct update_al_work al_work;
480
481 if (current == mdev->tconn->worker.task)
482 return _al_write_transaction(mdev);
483
484 init_completion(&al_work.event);
485 al_work.w.cb = w_al_write_transaction;
486 al_work.w.mdev = mdev;
Lars Ellenbergd5b27b02011-11-14 15:42:37 +0100487 drbd_queue_work_front(&mdev->tconn->sender_work, &al_work.w);
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200488 wait_for_completion(&al_work.event);
489
490 return al_work.err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700491}
492
Philipp Reisnerb411b362009-09-25 16:07:19 -0700493static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext)
494{
495 int rv;
496
497 spin_lock_irq(&mdev->al_lock);
498 rv = (al_ext->refcnt == 0);
499 if (likely(rv))
500 lc_del(mdev->act_log, al_ext);
501 spin_unlock_irq(&mdev->al_lock);
502
503 return rv;
504}
505
506/**
507 * drbd_al_shrink() - Removes all active extents form the activity log
508 * @mdev: DRBD device.
509 *
510 * Removes all active extents form the activity log, waiting until
511 * the reference count of each entry dropped to 0 first, of course.
512 *
513 * You need to lock mdev->act_log with lc_try_lock() / lc_unlock()
514 */
515void drbd_al_shrink(struct drbd_conf *mdev)
516{
517 struct lc_element *al_ext;
518 int i;
519
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100520 D_ASSERT(test_bit(__LC_LOCKED, &mdev->act_log->flags));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700521
522 for (i = 0; i < mdev->act_log->nr_elements; i++) {
523 al_ext = lc_element_by_index(mdev->act_log, i);
524 if (al_ext->lc_number == LC_FREE)
525 continue;
526 wait_event(mdev->al_wait, _try_lc_del(mdev, al_ext));
527 }
528
529 wake_up(&mdev->al_wait);
530}
531
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100532static int w_update_odbm(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700533{
534 struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
Philipp Reisner00d56942011-02-09 18:09:48 +0100535 struct drbd_conf *mdev = w->mdev;
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +0100536 struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700537
538 if (!get_ldev(mdev)) {
539 if (__ratelimit(&drbd_ratelimit_state))
540 dev_warn(DEV, "Can not update on disk bitmap, local IO disabled.\n");
541 kfree(udw);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100542 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700543 }
544
Lars Ellenberg19f843a2010-12-15 08:59:11 +0100545 drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700546 put_ldev(mdev);
547
548 kfree(udw);
549
550 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) {
551 switch (mdev->state.conn) {
552 case C_SYNC_SOURCE: case C_SYNC_TARGET:
553 case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T:
554 drbd_resync_finished(mdev);
555 default:
556 /* nothing to do */
557 break;
558 }
559 }
Lars Ellenberg3b98c0c2011-03-07 12:49:34 +0100560 drbd_bcast_event(mdev, &sib);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700561
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100562 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700563}
564
565
566/* ATTENTION. The AL's extents are 4MB each, while the extents in the
567 * resync LRU-cache are 16MB each.
568 * The caller of this function has to hold an get_ldev() reference.
569 *
570 * TODO will be obsoleted once we have a caching lru of the on disk bitmap
571 */
572static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
573 int count, int success)
574{
575 struct lc_element *e;
576 struct update_odbm_work *udw;
577
578 unsigned int enr;
579
580 D_ASSERT(atomic_read(&mdev->local_cnt));
581
582 /* I simply assume that a sector/size pair never crosses
583 * a 16 MB extent border. (Currently this is true...) */
584 enr = BM_SECT_TO_EXT(sector);
585
586 e = lc_get(mdev->resync, enr);
587 if (e) {
588 struct bm_extent *ext = lc_entry(e, struct bm_extent, lce);
589 if (ext->lce.lc_number == enr) {
590 if (success)
591 ext->rs_left -= count;
592 else
593 ext->rs_failed += count;
594 if (ext->rs_left < ext->rs_failed) {
Philipp Reisner975b2972011-11-17 10:11:47 +0100595 dev_warn(DEV, "BAD! sector=%llus enr=%u rs_left=%d "
596 "rs_failed=%d count=%d cstate=%s\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700597 (unsigned long long)sector,
598 ext->lce.lc_number, ext->rs_left,
Philipp Reisner975b2972011-11-17 10:11:47 +0100599 ext->rs_failed, count,
600 drbd_conn_str(mdev->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601
Philipp Reisner975b2972011-11-17 10:11:47 +0100602 /* We don't expect to be able to clear more bits
603 * than have been set when we originally counted
604 * the set bits to cache that value in ext->rs_left.
605 * Whatever the reason (disconnect during resync,
606 * delayed local completion of an application write),
607 * try to fix it up by recounting here. */
608 ext->rs_left = drbd_bm_e_weight(mdev, enr);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700609 }
610 } else {
611 /* Normally this element should be in the cache,
612 * since drbd_rs_begin_io() pulled it already in.
613 *
614 * But maybe an application write finished, and we set
615 * something outside the resync lru_cache in sync.
616 */
617 int rs_left = drbd_bm_e_weight(mdev, enr);
618 if (ext->flags != 0) {
619 dev_warn(DEV, "changing resync lce: %d[%u;%02lx]"
620 " -> %d[%u;00]\n",
621 ext->lce.lc_number, ext->rs_left,
622 ext->flags, enr, rs_left);
623 ext->flags = 0;
624 }
625 if (ext->rs_failed) {
626 dev_warn(DEV, "Kicking resync_lru element enr=%u "
627 "out with rs_failed=%d\n",
628 ext->lce.lc_number, ext->rs_failed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700629 }
630 ext->rs_left = rs_left;
631 ext->rs_failed = success ? 0 : count;
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100632 /* we don't keep a persistent log of the resync lru,
633 * we can commit any change right away. */
634 lc_committed(mdev->resync);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700635 }
636 lc_put(mdev->resync, &ext->lce);
637 /* no race, we are within the al_lock! */
638
639 if (ext->rs_left == ext->rs_failed) {
640 ext->rs_failed = 0;
641
642 udw = kmalloc(sizeof(*udw), GFP_ATOMIC);
643 if (udw) {
644 udw->enr = ext->lce.lc_number;
645 udw->w.cb = w_update_odbm;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100646 udw->w.mdev = mdev;
Lars Ellenbergd5b27b02011-11-14 15:42:37 +0100647 drbd_queue_work_front(&mdev->tconn->sender_work, &udw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 } else {
649 dev_warn(DEV, "Could not kmalloc an udw\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700650 }
651 }
652 } else {
653 dev_err(DEV, "lc_get() failed! locked=%d/%d flags=%lu\n",
654 mdev->resync_locked,
655 mdev->resync->nr_elements,
656 mdev->resync->flags);
657 }
658}
659
Lars Ellenbergc6ea14d2010-11-05 09:23:37 +0100660void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go)
661{
662 unsigned long now = jiffies;
663 unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark];
664 int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS;
665 if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
666 if (mdev->rs_mark_left[mdev->rs_last_mark] != still_to_go &&
667 mdev->state.conn != C_PAUSED_SYNC_T &&
668 mdev->state.conn != C_PAUSED_SYNC_S) {
669 mdev->rs_mark_time[next] = now;
670 mdev->rs_mark_left[next] = still_to_go;
671 mdev->rs_last_mark = next;
672 }
673 }
674}
675
Philipp Reisnerb411b362009-09-25 16:07:19 -0700676/* clear the bit corresponding to the piece of storage in question:
677 * size byte of data starting from sector. Only clear a bits of the affected
678 * one ore more _aligned_ BM_BLOCK_SIZE blocks.
679 *
680 * called by worker on C_SYNC_TARGET and receiver on SyncSource.
681 *
682 */
683void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
684 const char *file, const unsigned int line)
685{
686 /* Is called from worker and receiver context _only_ */
687 unsigned long sbnr, ebnr, lbnr;
688 unsigned long count = 0;
689 sector_t esector, nr_sectors;
690 int wake_up = 0;
691 unsigned long flags;
692
Andreas Gruenbacherc670a392011-02-21 12:41:39 +0100693 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700694 dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
695 (unsigned long long)sector, size);
696 return;
697 }
Philipp Reisner518a4d52012-10-19 14:21:22 +0200698
699 if (!get_ldev(mdev))
700 return; /* no disk, no metadata, no bitmap to clear bits in */
701
Philipp Reisnerb411b362009-09-25 16:07:19 -0700702 nr_sectors = drbd_get_capacity(mdev->this_bdev);
703 esector = sector + (size >> 9) - 1;
704
Andreas Gruenbacher841ce242010-12-15 19:31:20 +0100705 if (!expect(sector < nr_sectors))
Philipp Reisner518a4d52012-10-19 14:21:22 +0200706 goto out;
Andreas Gruenbacher841ce242010-12-15 19:31:20 +0100707 if (!expect(esector < nr_sectors))
708 esector = nr_sectors - 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700709
710 lbnr = BM_SECT_TO_BIT(nr_sectors-1);
711
712 /* we clear it (in sync).
713 * round up start sector, round down end sector. we make sure we only
714 * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */
715 if (unlikely(esector < BM_SECT_PER_BIT-1))
Philipp Reisner518a4d52012-10-19 14:21:22 +0200716 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700717 if (unlikely(esector == (nr_sectors-1)))
718 ebnr = lbnr;
719 else
720 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
721 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
722
Philipp Reisnerb411b362009-09-25 16:07:19 -0700723 if (sbnr > ebnr)
Philipp Reisner518a4d52012-10-19 14:21:22 +0200724 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700725
726 /*
727 * ok, (capacity & 7) != 0 sometimes, but who cares...
728 * we count rs_{total,left} in bits, not sectors.
729 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700730 count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
Philipp Reisner518a4d52012-10-19 14:21:22 +0200731 if (count) {
Lars Ellenbergc6ea14d2010-11-05 09:23:37 +0100732 drbd_advance_rs_marks(mdev, drbd_bm_total_weight(mdev));
Lars Ellenberg1d7734a2010-08-11 21:21:50 +0200733 spin_lock_irqsave(&mdev->al_lock, flags);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100734 drbd_try_clear_on_disk_bm(mdev, sector, count, true);
Lars Ellenberg1d7734a2010-08-11 21:21:50 +0200735 spin_unlock_irqrestore(&mdev->al_lock, flags);
736
Philipp Reisnerb411b362009-09-25 16:07:19 -0700737 /* just wake_up unconditional now, various lc_chaged(),
738 * lc_put() in drbd_try_clear_on_disk_bm(). */
739 wake_up = 1;
740 }
Philipp Reisner518a4d52012-10-19 14:21:22 +0200741out:
742 put_ldev(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700743 if (wake_up)
744 wake_up(&mdev->al_wait);
745}
746
747/*
748 * this is intended to set one request worth of data out of sync.
749 * affects at least 1 bit,
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100750 * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700751 *
752 * called by tl_clear and drbd_send_dblock (==drbd_make_request).
753 * so this can be _any_ process.
754 */
Philipp Reisner73a01a12010-10-27 14:33:00 +0200755int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700756 const char *file, const unsigned int line)
757{
Philipp Reisner376694a2011-11-07 10:54:28 +0100758 unsigned long sbnr, ebnr, flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700759 sector_t esector, nr_sectors;
Philipp Reisner73a01a12010-10-27 14:33:00 +0200760 unsigned int enr, count = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700761 struct lc_element *e;
762
Lars Ellenberg81a35372012-07-30 09:00:54 +0200763 /* this should be an empty REQ_FLUSH */
764 if (size == 0)
765 return 0;
766
767 if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700768 dev_err(DEV, "sector: %llus, size: %d\n",
769 (unsigned long long)sector, size);
Philipp Reisner73a01a12010-10-27 14:33:00 +0200770 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771 }
772
773 if (!get_ldev(mdev))
Philipp Reisner73a01a12010-10-27 14:33:00 +0200774 return 0; /* no disk, no metadata, no bitmap to set bits in */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700775
776 nr_sectors = drbd_get_capacity(mdev->this_bdev);
777 esector = sector + (size >> 9) - 1;
778
Andreas Gruenbacher841ce242010-12-15 19:31:20 +0100779 if (!expect(sector < nr_sectors))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780 goto out;
Andreas Gruenbacher841ce242010-12-15 19:31:20 +0100781 if (!expect(esector < nr_sectors))
782 esector = nr_sectors - 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700783
Philipp Reisnerb411b362009-09-25 16:07:19 -0700784 /* we set it out of sync,
785 * we do not need to round anything here */
786 sbnr = BM_SECT_TO_BIT(sector);
787 ebnr = BM_SECT_TO_BIT(esector);
788
Philipp Reisnerb411b362009-09-25 16:07:19 -0700789 /* ok, (capacity & 7) != 0 sometimes, but who cares...
790 * we count rs_{total,left} in bits, not sectors. */
791 spin_lock_irqsave(&mdev->al_lock, flags);
792 count = drbd_bm_set_bits(mdev, sbnr, ebnr);
793
794 enr = BM_SECT_TO_EXT(sector);
795 e = lc_find(mdev->resync, enr);
796 if (e)
797 lc_entry(e, struct bm_extent, lce)->rs_left += count;
798 spin_unlock_irqrestore(&mdev->al_lock, flags);
799
800out:
801 put_ldev(mdev);
Philipp Reisner73a01a12010-10-27 14:33:00 +0200802
803 return count;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700804}
805
806static
807struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr)
808{
809 struct lc_element *e;
810 struct bm_extent *bm_ext;
811 int wakeup = 0;
812 unsigned long rs_flags;
813
814 spin_lock_irq(&mdev->al_lock);
815 if (mdev->resync_locked > mdev->resync->nr_elements/2) {
816 spin_unlock_irq(&mdev->al_lock);
817 return NULL;
818 }
819 e = lc_get(mdev->resync, enr);
820 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
821 if (bm_ext) {
822 if (bm_ext->lce.lc_number != enr) {
823 bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
824 bm_ext->rs_failed = 0;
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100825 lc_committed(mdev->resync);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700826 wakeup = 1;
827 }
828 if (bm_ext->lce.refcnt == 1)
829 mdev->resync_locked++;
830 set_bit(BME_NO_WRITES, &bm_ext->flags);
831 }
832 rs_flags = mdev->resync->flags;
833 spin_unlock_irq(&mdev->al_lock);
834 if (wakeup)
835 wake_up(&mdev->al_wait);
836
837 if (!bm_ext) {
838 if (rs_flags & LC_STARVING)
839 dev_warn(DEV, "Have to wait for element"
840 " (resync LRU too small?)\n");
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100841 BUG_ON(rs_flags & LC_LOCKED);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700842 }
843
844 return bm_ext;
845}
846
847static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
848{
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100849 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700850
851 spin_lock_irq(&mdev->al_lock);
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100852 rv = lc_is_used(mdev->act_log, enr);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700853 spin_unlock_irq(&mdev->al_lock);
854
Philipp Reisnerb411b362009-09-25 16:07:19 -0700855 return rv;
856}
857
858/**
859 * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED
860 * @mdev: DRBD device.
861 * @sector: The sector number.
862 *
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200863 * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700864 */
865int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
866{
867 unsigned int enr = BM_SECT_TO_EXT(sector);
868 struct bm_extent *bm_ext;
869 int i, sig;
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100870 int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait.
871 200 times -> 20 seconds. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700872
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100873retry:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700874 sig = wait_event_interruptible(mdev->al_wait,
875 (bm_ext = _bme_get(mdev, enr)));
876 if (sig)
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200877 return -EINTR;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700878
879 if (test_bit(BME_LOCKED, &bm_ext->flags))
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200880 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700881
882 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
883 sig = wait_event_interruptible(mdev->al_wait,
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100884 !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i) ||
Philipp Reisnerc507f462010-11-22 15:49:17 +0100885 test_bit(BME_PRIORITY, &bm_ext->flags));
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100886
887 if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700888 spin_lock_irq(&mdev->al_lock);
889 if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100890 bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700891 mdev->resync_locked--;
892 wake_up(&mdev->al_wait);
893 }
894 spin_unlock_irq(&mdev->al_lock);
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100895 if (sig)
896 return -EINTR;
897 if (schedule_timeout_interruptible(HZ/10))
898 return -EINTR;
Philipp Reisnerc507f462010-11-22 15:49:17 +0100899 if (sa && --sa == 0)
900 dev_warn(DEV,"drbd_rs_begin_io() stepped aside for 20sec."
901 "Resync stalled?\n");
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100902 goto retry;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700903 }
904 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700905 set_bit(BME_LOCKED, &bm_ext->flags);
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200906 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700907}
908
909/**
910 * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep
911 * @mdev: DRBD device.
912 * @sector: The sector number.
913 *
914 * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then
915 * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN
916 * if there is still application IO going on in this area.
917 */
918int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
919{
920 unsigned int enr = BM_SECT_TO_EXT(sector);
921 const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT;
922 struct lc_element *e;
923 struct bm_extent *bm_ext;
924 int i;
925
Philipp Reisnerb411b362009-09-25 16:07:19 -0700926 spin_lock_irq(&mdev->al_lock);
927 if (mdev->resync_wenr != LC_FREE && mdev->resync_wenr != enr) {
928 /* in case you have very heavy scattered io, it may
929 * stall the syncer undefined if we give up the ref count
930 * when we try again and requeue.
931 *
932 * if we don't give up the refcount, but the next time
933 * we are scheduled this extent has been "synced" by new
934 * application writes, we'd miss the lc_put on the
935 * extent we keep the refcount on.
936 * so we remembered which extent we had to try again, and
937 * if the next requested one is something else, we do
938 * the lc_put here...
939 * we also have to wake_up
940 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700941 e = lc_find(mdev->resync, mdev->resync_wenr);
942 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
943 if (bm_ext) {
944 D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
945 D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags));
946 clear_bit(BME_NO_WRITES, &bm_ext->flags);
947 mdev->resync_wenr = LC_FREE;
948 if (lc_put(mdev->resync, &bm_ext->lce) == 0)
949 mdev->resync_locked--;
950 wake_up(&mdev->al_wait);
951 } else {
952 dev_alert(DEV, "LOGIC BUG\n");
953 }
954 }
955 /* TRY. */
956 e = lc_try_get(mdev->resync, enr);
957 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
958 if (bm_ext) {
959 if (test_bit(BME_LOCKED, &bm_ext->flags))
960 goto proceed;
961 if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) {
962 mdev->resync_locked++;
963 } else {
964 /* we did set the BME_NO_WRITES,
965 * but then could not set BME_LOCKED,
966 * so we tried again.
967 * drop the extra reference. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700968 bm_ext->lce.refcnt--;
969 D_ASSERT(bm_ext->lce.refcnt > 0);
970 }
971 goto check_al;
972 } else {
973 /* do we rather want to try later? */
Jens Axboe6a0afdf2009-10-01 09:04:14 +0200974 if (mdev->resync_locked > mdev->resync->nr_elements-3)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700975 goto try_again;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700976 /* Do or do not. There is no try. -- Yoda */
977 e = lc_get(mdev->resync, enr);
978 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
979 if (!bm_ext) {
980 const unsigned long rs_flags = mdev->resync->flags;
981 if (rs_flags & LC_STARVING)
982 dev_warn(DEV, "Have to wait for element"
983 " (resync LRU too small?)\n");
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100984 BUG_ON(rs_flags & LC_LOCKED);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700985 goto try_again;
986 }
987 if (bm_ext->lce.lc_number != enr) {
988 bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
989 bm_ext->rs_failed = 0;
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100990 lc_committed(mdev->resync);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700991 wake_up(&mdev->al_wait);
992 D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0);
993 }
994 set_bit(BME_NO_WRITES, &bm_ext->flags);
995 D_ASSERT(bm_ext->lce.refcnt == 1);
996 mdev->resync_locked++;
997 goto check_al;
998 }
999check_al:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001000 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001001 if (lc_is_used(mdev->act_log, al_enr+i))
1002 goto try_again;
1003 }
1004 set_bit(BME_LOCKED, &bm_ext->flags);
1005proceed:
1006 mdev->resync_wenr = LC_FREE;
1007 spin_unlock_irq(&mdev->al_lock);
1008 return 0;
1009
1010try_again:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001011 if (bm_ext)
1012 mdev->resync_wenr = enr;
1013 spin_unlock_irq(&mdev->al_lock);
1014 return -EAGAIN;
1015}
1016
1017void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector)
1018{
1019 unsigned int enr = BM_SECT_TO_EXT(sector);
1020 struct lc_element *e;
1021 struct bm_extent *bm_ext;
1022 unsigned long flags;
1023
Philipp Reisnerb411b362009-09-25 16:07:19 -07001024 spin_lock_irqsave(&mdev->al_lock, flags);
1025 e = lc_find(mdev->resync, enr);
1026 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
1027 if (!bm_ext) {
1028 spin_unlock_irqrestore(&mdev->al_lock, flags);
1029 if (__ratelimit(&drbd_ratelimit_state))
1030 dev_err(DEV, "drbd_rs_complete_io() called, but extent not found\n");
1031 return;
1032 }
1033
1034 if (bm_ext->lce.refcnt == 0) {
1035 spin_unlock_irqrestore(&mdev->al_lock, flags);
1036 dev_err(DEV, "drbd_rs_complete_io(,%llu [=%u]) called, "
1037 "but refcnt is 0!?\n",
1038 (unsigned long long)sector, enr);
1039 return;
1040 }
1041
1042 if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
Philipp Reisnere3555d82010-11-07 15:56:29 +01001043 bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001044 mdev->resync_locked--;
1045 wake_up(&mdev->al_wait);
1046 }
1047
1048 spin_unlock_irqrestore(&mdev->al_lock, flags);
1049}
1050
1051/**
1052 * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED)
1053 * @mdev: DRBD device.
1054 */
1055void drbd_rs_cancel_all(struct drbd_conf *mdev)
1056{
Philipp Reisnerb411b362009-09-25 16:07:19 -07001057 spin_lock_irq(&mdev->al_lock);
1058
1059 if (get_ldev_if_state(mdev, D_FAILED)) { /* Makes sure ->resync is there. */
1060 lc_reset(mdev->resync);
1061 put_ldev(mdev);
1062 }
1063 mdev->resync_locked = 0;
1064 mdev->resync_wenr = LC_FREE;
1065 spin_unlock_irq(&mdev->al_lock);
1066 wake_up(&mdev->al_wait);
1067}
1068
1069/**
1070 * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU
1071 * @mdev: DRBD device.
1072 *
1073 * Returns 0 upon success, -EAGAIN if at least one reference count was
1074 * not zero.
1075 */
1076int drbd_rs_del_all(struct drbd_conf *mdev)
1077{
1078 struct lc_element *e;
1079 struct bm_extent *bm_ext;
1080 int i;
1081
Philipp Reisnerb411b362009-09-25 16:07:19 -07001082 spin_lock_irq(&mdev->al_lock);
1083
1084 if (get_ldev_if_state(mdev, D_FAILED)) {
1085 /* ok, ->resync is there. */
1086 for (i = 0; i < mdev->resync->nr_elements; i++) {
1087 e = lc_element_by_index(mdev->resync, i);
Philipp Reisnerb2b163d2010-04-02 08:40:33 +02001088 bm_ext = lc_entry(e, struct bm_extent, lce);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001089 if (bm_ext->lce.lc_number == LC_FREE)
1090 continue;
1091 if (bm_ext->lce.lc_number == mdev->resync_wenr) {
1092 dev_info(DEV, "dropping %u in drbd_rs_del_all, apparently"
1093 " got 'synced' by application io\n",
1094 mdev->resync_wenr);
1095 D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
1096 D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags));
1097 clear_bit(BME_NO_WRITES, &bm_ext->flags);
1098 mdev->resync_wenr = LC_FREE;
1099 lc_put(mdev->resync, &bm_ext->lce);
1100 }
1101 if (bm_ext->lce.refcnt != 0) {
1102 dev_info(DEV, "Retrying drbd_rs_del_all() later. "
1103 "refcnt=%d\n", bm_ext->lce.refcnt);
1104 put_ldev(mdev);
1105 spin_unlock_irq(&mdev->al_lock);
1106 return -EAGAIN;
1107 }
1108 D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
1109 D_ASSERT(!test_bit(BME_NO_WRITES, &bm_ext->flags));
1110 lc_del(mdev->resync, &bm_ext->lce);
1111 }
1112 D_ASSERT(mdev->resync->used == 0);
1113 put_ldev(mdev);
1114 }
1115 spin_unlock_irq(&mdev->al_lock);
Lars Ellenberga6a7d4f2012-03-26 16:21:37 +02001116 wake_up(&mdev->al_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001117
1118 return 0;
1119}
1120
1121/**
1122 * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks
1123 * @mdev: DRBD device.
1124 * @sector: The sector number.
1125 * @size: Size of failed IO operation, in byte.
1126 */
1127void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size)
1128{
1129 /* Is called from worker and receiver context _only_ */
1130 unsigned long sbnr, ebnr, lbnr;
1131 unsigned long count;
1132 sector_t esector, nr_sectors;
1133 int wake_up = 0;
1134
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01001135 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001136 dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
1137 (unsigned long long)sector, size);
1138 return;
1139 }
1140 nr_sectors = drbd_get_capacity(mdev->this_bdev);
1141 esector = sector + (size >> 9) - 1;
1142
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001143 if (!expect(sector < nr_sectors))
1144 return;
1145 if (!expect(esector < nr_sectors))
1146 esector = nr_sectors - 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001147
1148 lbnr = BM_SECT_TO_BIT(nr_sectors-1);
1149
1150 /*
1151 * round up start sector, round down end sector. we make sure we only
1152 * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */
1153 if (unlikely(esector < BM_SECT_PER_BIT-1))
1154 return;
1155 if (unlikely(esector == (nr_sectors-1)))
1156 ebnr = lbnr;
1157 else
1158 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
1159 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
1160
1161 if (sbnr > ebnr)
1162 return;
1163
1164 /*
1165 * ok, (capacity & 7) != 0 sometimes, but who cares...
1166 * we count rs_{total,left} in bits, not sectors.
1167 */
1168 spin_lock_irq(&mdev->al_lock);
1169 count = drbd_bm_count_bits(mdev, sbnr, ebnr);
1170 if (count) {
1171 mdev->rs_failed += count;
1172
1173 if (get_ldev(mdev)) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001174 drbd_try_clear_on_disk_bm(mdev, sector, count, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001175 put_ldev(mdev);
1176 }
1177
1178 /* just wake_up unconditional now, various lc_chaged(),
1179 * lc_put() in drbd_try_clear_on_disk_bm(). */
1180 wake_up = 1;
1181 }
1182 spin_unlock_irq(&mdev->al_lock);
1183 if (wake_up)
1184 wake_up(&mdev->al_wait);
1185}