blob: 278c31f246398b6d4de715dd9884d0601abc26f8 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_actlog.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
26#include <linux/slab.h>
Lars Ellenberg7ad651b2011-02-21 13:21:03 +010027#include <linux/crc32c.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070028#include <linux/drbd.h>
Lars Ellenberg7ad651b2011-02-21 13:21:03 +010029#include <linux/drbd_limits.h>
30#include <linux/dynamic_debug.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070032
Lars Ellenberg85f103d2011-03-31 12:06:48 +020033
34enum al_transaction_types {
35 AL_TR_UPDATE = 0,
36 AL_TR_INITIALIZED = 0xffff
37};
Lars Ellenberg7ad651b2011-02-21 13:21:03 +010038/* all fields on disc in big endian */
39struct __packed al_transaction_on_disk {
40 /* don't we all like magic */
41 __be32 magic;
42
43 /* to identify the most recent transaction block
44 * in the on disk ring buffer */
45 __be32 tr_number;
46
47 /* checksum on the full 4k block, with this field set to 0. */
48 __be32 crc32c;
49
50 /* type of transaction, special transaction types like:
Lars Ellenberg85f103d2011-03-31 12:06:48 +020051 * purge-all, set-all-idle, set-all-active, ... to-be-defined
52 * see also enum al_transaction_types */
Lars Ellenberg7ad651b2011-02-21 13:21:03 +010053 __be16 transaction_type;
54
55 /* we currently allow only a few thousand extents,
56 * so 16bit will be enough for the slot number. */
57
58 /* how many updates in this transaction */
59 __be16 n_updates;
60
61 /* maximum slot number, "al-extents" in drbd.conf speak.
62 * Having this in each transaction should make reconfiguration
63 * of that parameter easier. */
64 __be16 context_size;
65
66 /* slot number the context starts with */
67 __be16 context_start_slot_nr;
68
69 /* Some reserved bytes. Expected usage is a 64bit counter of
70 * sectors-written since device creation, and other data generation tag
71 * supporting usage */
72 __be32 __reserved[4];
73
74 /* --- 36 byte used --- */
75
76 /* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes
77 * in one transaction, then use the remaining byte in the 4k block for
78 * context information. "Flexible" number of updates per transaction
79 * does not help, as we have to account for the case when all update
80 * slots are used anyways, so it would only complicate code without
81 * additional benefit.
82 */
83 __be16 update_slot_nr[AL_UPDATES_PER_TRANSACTION];
84
85 /* but the extent number is 32bit, which at an extent size of 4 MiB
86 * allows to cover device sizes of up to 2**54 Byte (16 PiB) */
87 __be32 update_extent_nr[AL_UPDATES_PER_TRANSACTION];
88
89 /* --- 420 bytes used (36 + 64*6) --- */
90
91 /* 4096 - 420 = 3676 = 919 * 4 */
92 __be32 context[AL_CONTEXT_PER_TRANSACTION];
Philipp Reisnerb411b362009-09-25 16:07:19 -070093};
94
Philipp Reisnerb411b362009-09-25 16:07:19 -070095struct update_al_work {
96 struct drbd_work w;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +020097 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -070098 struct completion event;
Lars Ellenberg7ad651b2011-02-21 13:21:03 +010099 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700100};
101
Philipp Reisnerb411b362009-09-25 16:07:19 -0700102
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200103void *drbd_md_get_buffer(struct drbd_device *device)
Philipp Reisnercdfda632011-07-05 15:38:59 +0200104{
105 int r;
106
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200107 wait_event(device->misc_wait,
108 (r = atomic_cmpxchg(&device->md_io_in_use, 0, 1)) == 0 ||
109 device->state.disk <= D_FAILED);
Philipp Reisnercdfda632011-07-05 15:38:59 +0200110
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200111 return r ? NULL : page_address(device->md_io_page);
Philipp Reisnercdfda632011-07-05 15:38:59 +0200112}
113
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200114void drbd_md_put_buffer(struct drbd_device *device)
Philipp Reisnercdfda632011-07-05 15:38:59 +0200115{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200116 if (atomic_dec_and_test(&device->md_io_in_use))
117 wake_up(&device->misc_wait);
Philipp Reisnercdfda632011-07-05 15:38:59 +0200118}
119
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200120void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_backing_dev *bdev,
Philipp Reisner32db80f2012-02-22 11:51:57 +0100121 unsigned int *done)
Philipp Reisnercdfda632011-07-05 15:38:59 +0200122{
Philipp Reisner32db80f2012-02-22 11:51:57 +0100123 long dt;
124
125 rcu_read_lock();
126 dt = rcu_dereference(bdev->disk_conf)->disk_timeout;
127 rcu_read_unlock();
128 dt = dt * HZ / 10;
129 if (dt == 0)
130 dt = MAX_SCHEDULE_TIMEOUT;
131
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200132 dt = wait_event_timeout(device->misc_wait,
133 *done || test_bit(FORCE_DETACH, &device->flags), dt);
Lars Ellenberge34b6772012-09-27 15:07:11 +0200134 if (dt == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200135 drbd_err(device, "meta-data IO operation timed out\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200136 drbd_chk_io_error(device, 1, DRBD_FORCE_DETACH);
Lars Ellenberge34b6772012-09-27 15:07:11 +0200137 }
Philipp Reisnercdfda632011-07-05 15:38:59 +0200138}
139
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200140static int _drbd_md_sync_page_io(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700141 struct drbd_backing_dev *bdev,
142 struct page *page, sector_t sector,
143 int rw, int size)
144{
145 struct bio *bio;
Andreas Gruenbacherac29f402010-12-13 02:20:47 +0100146 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700147
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200148 device->md_io.done = 0;
149 device->md_io.error = -ENODEV;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700150
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200151 if ((rw & WRITE) && !test_bit(MD_NO_FUA, &device->flags))
Lars Ellenberg86e1e982011-06-28 13:22:48 +0200152 rw |= REQ_FUA | REQ_FLUSH;
Jens Axboe721a9602011-03-09 11:56:30 +0100153 rw |= REQ_SYNC;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700154
Lars Ellenbergda4a75d2011-02-23 17:02:01 +0100155 bio = bio_alloc_drbd(GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700156 bio->bi_bdev = bdev->md_bdev;
Kent Overstreet4f024f32013-10-11 15:44:27 -0700157 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherac29f402010-12-13 02:20:47 +0100158 err = -EIO;
159 if (bio_add_page(bio, page, size, 0) != size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700160 goto out;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200161 bio->bi_private = &device->md_io;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700162 bio->bi_end_io = drbd_md_io_complete;
163 bio->bi_rw = rw;
164
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200165 if (!(rw & WRITE) && device->state.disk == D_DISKLESS && device->ldev == NULL)
Lars Ellenbergc04ccaa2013-03-19 18:16:47 +0100166 /* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */
167 ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200168 else if (!get_ldev_if_state(device, D_ATTACHING)) {
Lars Ellenbergc04ccaa2013-03-19 18:16:47 +0100169 /* Corresponding put_ldev in drbd_md_io_complete() */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200170 drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n");
Philipp Reisnercdfda632011-07-05 15:38:59 +0200171 err = -ENODEV;
172 goto out;
173 }
174
175 bio_get(bio); /* one bio_put() is in the completion handler */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200176 atomic_inc(&device->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */
177 if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700178 bio_endio(bio, -EIO);
179 else
180 submit_bio(rw, bio);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200181 wait_until_done_or_force_detached(device, bdev, &device->md_io.done);
Andreas Gruenbacherac29f402010-12-13 02:20:47 +0100182 if (bio_flagged(bio, BIO_UPTODATE))
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200183 err = device->md_io.error;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700184
Philipp Reisnerb411b362009-09-25 16:07:19 -0700185 out:
186 bio_put(bio);
Andreas Gruenbacherac29f402010-12-13 02:20:47 +0100187 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700188}
189
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200190int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700191 sector_t sector, int rw)
192{
Andreas Gruenbacher3fbf4d22010-12-13 02:25:41 +0100193 int err;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200194 struct page *iop = device->md_io_page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700195
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200196 D_ASSERT(device, atomic_read(&device->md_io_in_use) == 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197
198 BUG_ON(!bdev->md_bdev);
199
Lars Ellenberge4d7d6f2014-04-28 18:43:28 +0200200 dynamic_drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n",
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100201 current->comm, current->pid, __func__,
Lars Ellenbergc04ccaa2013-03-19 18:16:47 +0100202 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ",
203 (void*)_RET_IP_ );
Philipp Reisnerb411b362009-09-25 16:07:19 -0700204
205 if (sector < drbd_md_first_sector(bdev) ||
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100206 sector + 7 > drbd_md_last_sector(bdev))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200207 drbd_alert(device, "%s [%d]:%s(,%llus,%s) out of range md access!\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700208 current->comm, current->pid, __func__,
209 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
210
Lars Ellenbergae8bf312013-03-19 18:16:43 +0100211 /* we do all our meta data IO in aligned 4k blocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200212 err = _drbd_md_sync_page_io(device, bdev, iop, sector, rw, 4096);
Andreas Gruenbacher3fbf4d22010-12-13 02:25:41 +0100213 if (err) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200214 drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n",
Andreas Gruenbacher935be262011-08-19 13:47:31 +0200215 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700216 }
Andreas Gruenbacher3fbf4d22010-12-13 02:25:41 +0100217 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700218}
219
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200220static struct bm_extent *find_active_resync_extent(struct drbd_device *device, unsigned int enr)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700221{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222 struct lc_element *tmp;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200223 tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700224 if (unlikely(tmp != NULL)) {
225 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
Lars Ellenberg6c3c43552013-03-19 18:16:53 +0100226 if (test_bit(BME_NO_WRITES, &bm_ext->flags))
227 return bm_ext;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700228 }
Lars Ellenberg6c3c43552013-03-19 18:16:53 +0100229 return NULL;
230}
231
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200232static struct lc_element *_al_get(struct drbd_device *device, unsigned int enr, bool nonblock)
Lars Ellenberg6c3c43552013-03-19 18:16:53 +0100233{
234 struct lc_element *al_ext;
235 struct bm_extent *bm_ext;
236 int wake;
237
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200238 spin_lock_irq(&device->al_lock);
239 bm_ext = find_active_resync_extent(device, enr);
Lars Ellenberg6c3c43552013-03-19 18:16:53 +0100240 if (bm_ext) {
241 wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200242 spin_unlock_irq(&device->al_lock);
Lars Ellenberg6c3c43552013-03-19 18:16:53 +0100243 if (wake)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200244 wake_up(&device->al_wait);
Lars Ellenberg6c3c43552013-03-19 18:16:53 +0100245 return NULL;
246 }
247 if (nonblock)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200248 al_ext = lc_try_get(device->act_log, enr);
Lars Ellenberg6c3c43552013-03-19 18:16:53 +0100249 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200250 al_ext = lc_get(device->act_log, enr);
251 spin_unlock_irq(&device->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700252 return al_ext;
253}
254
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200255bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i)
Lars Ellenbergb5bc8e02013-03-19 18:16:52 +0100256{
257 /* for bios crossing activity log extent boundaries,
258 * we may need to activate two extents in one go */
259 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
260 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
Lars Ellenbergb5bc8e02013-03-19 18:16:52 +0100261
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200262 D_ASSERT(device, (unsigned)(last - first) <= 1);
263 D_ASSERT(device, atomic_read(&device->local_cnt) > 0);
Lars Ellenbergb5bc8e02013-03-19 18:16:52 +0100264
265 /* FIXME figure out a fast path for bios crossing AL extent boundaries */
266 if (first != last)
267 return false;
268
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200269 return _al_get(device, first, true);
Lars Ellenbergb5bc8e02013-03-19 18:16:52 +0100270}
271
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200272bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700273{
Lars Ellenberg77265472011-03-31 16:00:51 +0200274 /* for bios crossing activity log extent boundaries,
275 * we may need to activate two extents in one go */
Lars Ellenberge15766e2011-04-01 10:38:30 +0200276 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
Lars Ellenberg81a35372012-07-30 09:00:54 +0200277 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
Lars Ellenberge15766e2011-04-01 10:38:30 +0200278 unsigned enr;
Lars Ellenbergebfd5d82013-03-19 18:16:49 +0100279 bool need_transaction = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700280
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200281 D_ASSERT(device, first <= last);
282 D_ASSERT(device, atomic_read(&device->local_cnt) > 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283
Lars Ellenbergebfd5d82013-03-19 18:16:49 +0100284 for (enr = first; enr <= last; enr++) {
285 struct lc_element *al_ext;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200286 wait_event(device->al_wait,
287 (al_ext = _al_get(device, enr, false)) != NULL);
Lars Ellenbergebfd5d82013-03-19 18:16:49 +0100288 if (al_ext->lc_number != enr)
289 need_transaction = true;
290 }
Lars Ellenbergb5bc8e02013-03-19 18:16:52 +0100291 return need_transaction;
292}
Lars Ellenbergebfd5d82013-03-19 18:16:49 +0100293
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200294static int al_write_transaction(struct drbd_device *device, bool delegate);
Lars Ellenbergb5bc8e02013-03-19 18:16:52 +0100295
296/* When called through generic_make_request(), we must delegate
297 * activity log I/O to the worker thread: a further request
298 * submitted via generic_make_request() within the same task
299 * would be queued on current->bio_list, and would only start
300 * after this function returns (see generic_make_request()).
301 *
302 * However, if we *are* the worker, we must not delegate to ourselves.
303 */
304
305/*
306 * @delegate: delegate activity log I/O to the worker thread
307 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200308void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate)
Lars Ellenbergb5bc8e02013-03-19 18:16:52 +0100309{
310 bool locked = false;
311
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200312 BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700313
Lars Ellenberg7dc1d672011-05-03 16:49:20 +0200314 /* Serialize multiple transactions.
315 * This uses test_and_set_bit, memory barrier is implicit.
316 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200317 wait_event(device->al_wait,
318 device->act_log->pending_changes == 0 ||
319 (locked = lc_try_lock_for_transaction(device->act_log)));
Lars Ellenberg7dc1d672011-05-03 16:49:20 +0200320
321 if (locked) {
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100322 /* Double check: it may have been committed by someone else,
323 * while we have been waiting for the lock. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200324 if (device->act_log->pending_changes) {
Philipp Reisner9a51ab12012-02-20 21:53:28 +0100325 bool write_al_updates;
326
327 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200328 write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
Philipp Reisner9a51ab12012-02-20 21:53:28 +0100329 rcu_read_unlock();
330
Lars Ellenbergb5bc8e02013-03-19 18:16:52 +0100331 if (write_al_updates)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200332 al_write_transaction(device, delegate);
333 spin_lock_irq(&device->al_lock);
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100334 /* FIXME
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200335 if (err)
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100336 we need an "lc_cancel" here;
337 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200338 lc_committed(device->act_log);
339 spin_unlock_irq(&device->al_lock);
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100340 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200341 lc_unlock(device->act_log);
342 wake_up(&device->al_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700343 }
344}
345
Lars Ellenbergb5bc8e02013-03-19 18:16:52 +0100346/*
347 * @delegate: delegate activity log I/O to the worker thread
348 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200349void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate)
Lars Ellenbergb5bc8e02013-03-19 18:16:52 +0100350{
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +0200351 BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task);
Lars Ellenbergb5bc8e02013-03-19 18:16:52 +0100352
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200353 if (drbd_al_begin_io_prepare(device, i))
354 drbd_al_begin_io_commit(device, delegate);
Lars Ellenbergb5bc8e02013-03-19 18:16:52 +0100355}
356
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200357int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i)
Lars Ellenberg08a1dda2013-03-19 18:16:56 +0100358{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200359 struct lru_cache *al = device->act_log;
Lars Ellenberg08a1dda2013-03-19 18:16:56 +0100360 /* for bios crossing activity log extent boundaries,
361 * we may need to activate two extents in one go */
362 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
363 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
364 unsigned nr_al_extents;
365 unsigned available_update_slots;
366 unsigned enr;
367
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200368 D_ASSERT(device, first <= last);
Lars Ellenberg08a1dda2013-03-19 18:16:56 +0100369
370 nr_al_extents = 1 + last - first; /* worst case: all touched extends are cold. */
371 available_update_slots = min(al->nr_elements - al->used,
372 al->max_pending_changes - al->pending_changes);
373
374 /* We want all necessary updates for a given request within the same transaction
375 * We could first check how many updates are *actually* needed,
376 * and use that instead of the worst-case nr_al_extents */
377 if (available_update_slots < nr_al_extents)
378 return -EWOULDBLOCK;
379
380 /* Is resync active in this area? */
381 for (enr = first; enr <= last; enr++) {
382 struct lc_element *tmp;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200383 tmp = lc_find(device->resync, enr/AL_EXT_PER_BM_SECT);
Lars Ellenberg08a1dda2013-03-19 18:16:56 +0100384 if (unlikely(tmp != NULL)) {
385 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
386 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
Lars Ellenberg0b6ef412013-03-27 14:08:49 +0100387 if (!test_and_set_bit(BME_PRIORITY, &bm_ext->flags))
Lars Ellenberg08a1dda2013-03-19 18:16:56 +0100388 return -EBUSY;
389 return -EWOULDBLOCK;
390 }
391 }
392 }
393
394 /* Checkout the refcounts.
395 * Given that we checked for available elements and update slots above,
396 * this has to be successful. */
397 for (enr = first; enr <= last; enr++) {
398 struct lc_element *al_ext;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200399 al_ext = lc_get_cumulative(device->act_log, enr);
Lars Ellenberg08a1dda2013-03-19 18:16:56 +0100400 if (!al_ext)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200401 drbd_info(device, "LOGIC BUG for enr=%u\n", enr);
Lars Ellenberg08a1dda2013-03-19 18:16:56 +0100402 }
403 return 0;
404}
405
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200406void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700407{
Lars Ellenberge15766e2011-04-01 10:38:30 +0200408 /* for bios crossing activity log extent boundaries,
409 * we may need to activate two extents in one go */
410 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
Lars Ellenberg81a35372012-07-30 09:00:54 +0200411 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
Lars Ellenberge15766e2011-04-01 10:38:30 +0200412 unsigned enr;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413 struct lc_element *extent;
414 unsigned long flags;
415
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200416 D_ASSERT(device, first <= last);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200417 spin_lock_irqsave(&device->al_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700418
Lars Ellenberge15766e2011-04-01 10:38:30 +0200419 for (enr = first; enr <= last; enr++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200420 extent = lc_find(device->act_log, enr);
Lars Ellenberge15766e2011-04-01 10:38:30 +0200421 if (!extent) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200422 drbd_err(device, "al_complete_io() called on inactive extent %u\n", enr);
Lars Ellenberge15766e2011-04-01 10:38:30 +0200423 continue;
424 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200425 lc_put(device->act_log, extent);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700426 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200427 spin_unlock_irqrestore(&device->al_lock, flags);
428 wake_up(&device->al_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700429}
430
Lars Ellenberg19f843a2010-12-15 08:59:11 +0100431#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
432/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
433 * are still coupled, or assume too much about their relation.
434 * Code below will not work if this is violated.
435 * Will be cleaned up with some followup patch.
436 */
437# error FIXME
438#endif
439
440static unsigned int al_extent_to_bm_page(unsigned int al_enr)
441{
442 return al_enr >>
443 /* bit to page */
444 ((PAGE_SHIFT + 3) -
445 /* al extent number to bit */
446 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
447}
448
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200449static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
Lars Ellenbergae8bf312013-03-19 18:16:43 +0100450{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200451 const unsigned int stripes = device->ldev->md.al_stripes;
452 const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k;
Lars Ellenbergae8bf312013-03-19 18:16:43 +0100453
454 /* transaction number, modulo on-disk ring buffer wrap around */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200455 unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k);
Lars Ellenbergae8bf312013-03-19 18:16:43 +0100456
457 /* ... to aligned 4k on disk block */
458 t = ((t % stripes) * stripe_size_4kB) + t/stripes;
459
460 /* ... to 512 byte sector in activity log */
461 t *= 8;
462
463 /* ... plus offset to the on disk position */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200464 return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
Lars Ellenbergae8bf312013-03-19 18:16:43 +0100465}
466
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +0100467static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200468_al_write_transaction(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700469{
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100470 struct al_transaction_on_disk *buffer;
471 struct lc_element *e;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700472 sector_t sector;
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100473 int i, mx;
474 unsigned extent_nr;
475 unsigned crc = 0;
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200476 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700477
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200478 if (!get_ldev(device)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200479 drbd_err(device, "disk is %s, cannot start al transaction\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200480 drbd_disk_str(device->state.disk));
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200481 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700482 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700483
Lars Ellenberg6719fb02010-10-18 23:04:07 +0200484 /* The bitmap write may have failed, causing a state change. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200485 if (device->state.disk < D_INCONSISTENT) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200486 drbd_err(device,
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100487 "disk is %s, cannot write al transaction\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200488 drbd_disk_str(device->state.disk));
489 put_ldev(device);
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200490 return -EIO;
Lars Ellenberg6719fb02010-10-18 23:04:07 +0200491 }
492
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200493 buffer = drbd_md_get_buffer(device); /* protects md_io_buffer, al_tr_cycle, ... */
Philipp Reisnercdfda632011-07-05 15:38:59 +0200494 if (!buffer) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200495 drbd_err(device, "disk failed while waiting for md_io buffer\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200496 put_ldev(device);
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200497 return -ENODEV;
Philipp Reisnercdfda632011-07-05 15:38:59 +0200498 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700499
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100500 memset(buffer, 0, sizeof(*buffer));
501 buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200502 buffer->tr_number = cpu_to_be32(device->al_tr_number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700503
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100504 i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700505
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100506 /* Even though no one can start to change this list
507 * once we set the LC_LOCKED -- from drbd_al_begin_io(),
508 * lc_try_lock_for_transaction() --, someone may still
509 * be in the process of changing it. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200510 spin_lock_irq(&device->al_lock);
511 list_for_each_entry(e, &device->act_log->to_be_changed, list) {
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100512 if (i == AL_UPDATES_PER_TRANSACTION) {
513 i++;
514 break;
515 }
516 buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
517 buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
518 if (e->lc_number != LC_FREE)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200519 drbd_bm_mark_for_writeout(device,
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100520 al_extent_to_bm_page(e->lc_number));
521 i++;
522 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200523 spin_unlock_irq(&device->al_lock);
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100524 BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700525
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100526 buffer->n_updates = cpu_to_be16(i);
527 for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
528 buffer->update_slot_nr[i] = cpu_to_be16(-1);
529 buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
530 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700531
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200532 buffer->context_size = cpu_to_be16(device->act_log->nr_elements);
533 buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle);
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100534
535 mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200536 device->act_log->nr_elements - device->al_tr_cycle);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700537 for (i = 0; i < mx; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200538 unsigned idx = device->al_tr_cycle + i;
539 extent_nr = lc_element_by_index(device->act_log, idx)->lc_number;
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100540 buffer->context[i] = cpu_to_be32(extent_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700541 }
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100542 for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
543 buffer->context[i] = cpu_to_be32(LC_FREE);
544
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200545 device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
546 if (device->al_tr_cycle >= device->act_log->nr_elements)
547 device->al_tr_cycle = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700548
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200549 sector = al_tr_number_to_on_disk_sector(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700550
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100551 crc = crc32c(0, buffer, 4096);
552 buffer->crc32c = cpu_to_be32(crc);
553
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200554 if (drbd_bm_write_hinted(device))
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200555 err = -EIO;
Lars Ellenbergb5bc8e02013-03-19 18:16:52 +0100556 else {
557 bool write_al_updates;
558 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200559 write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
Lars Ellenbergb5bc8e02013-03-19 18:16:52 +0100560 rcu_read_unlock();
561 if (write_al_updates) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200562 if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
Lars Ellenbergb5bc8e02013-03-19 18:16:52 +0100563 err = -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200564 drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
Lars Ellenbergb5bc8e02013-03-19 18:16:52 +0100565 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200566 device->al_tr_number++;
567 device->al_writ_cnt++;
Lars Ellenbergb5bc8e02013-03-19 18:16:52 +0100568 }
569 }
Lars Ellenberg7ad651b2011-02-21 13:21:03 +0100570 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700571
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200572 drbd_md_put_buffer(device);
573 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700574
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200575 return err;
576}
577
578
579static int w_al_write_transaction(struct drbd_work *w, int unused)
580{
581 struct update_al_work *aw = container_of(w, struct update_al_work, w);
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200582 struct drbd_device *device = aw->device;
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200583 int err;
584
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200585 err = _al_write_transaction(device);
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200586 aw->err = err;
587 complete(&aw->event);
588
589 return err != -EIO ? err : 0;
590}
591
592/* Calls from worker context (see w_restart_disk_io()) need to write the
593 transaction directly. Others came through generic_make_request(),
594 those need to delegate it to the worker. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200595static int al_write_transaction(struct drbd_device *device, bool delegate)
Philipp Reisner1b7ab152011-07-15 17:19:02 +0200596{
Lars Ellenberg56392d22013-03-19 18:16:48 +0100597 if (delegate) {
598 struct update_al_work al_work;
599 init_completion(&al_work.event);
600 al_work.w.cb = w_al_write_transaction;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +0200601 al_work.device = device;
602 drbd_queue_work_front(&first_peer_device(device)->connection->sender_work,
603 &al_work.w);
Lars Ellenberg56392d22013-03-19 18:16:48 +0100604 wait_for_completion(&al_work.event);
605 return al_work.err;
606 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200607 return _al_write_transaction(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700608}
609
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200610static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700611{
612 int rv;
613
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200614 spin_lock_irq(&device->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700615 rv = (al_ext->refcnt == 0);
616 if (likely(rv))
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200617 lc_del(device->act_log, al_ext);
618 spin_unlock_irq(&device->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700619
620 return rv;
621}
622
623/**
624 * drbd_al_shrink() - Removes all active extents form the activity log
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200625 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700626 *
627 * Removes all active extents form the activity log, waiting until
628 * the reference count of each entry dropped to 0 first, of course.
629 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200630 * You need to lock device->act_log with lc_try_lock() / lc_unlock()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700631 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200632void drbd_al_shrink(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700633{
634 struct lc_element *al_ext;
635 int i;
636
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200637 D_ASSERT(device, test_bit(__LC_LOCKED, &device->act_log->flags));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700638
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200639 for (i = 0; i < device->act_log->nr_elements; i++) {
640 al_ext = lc_element_by_index(device->act_log, i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700641 if (al_ext->lc_number == LC_FREE)
642 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200643 wait_event(device->al_wait, _try_lc_del(device, al_ext));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700644 }
645
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200646 wake_up(&device->al_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700647}
648
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200649int drbd_initialize_al(struct drbd_device *device, void *buffer)
Philipp Reisnerd752b262013-06-25 16:50:08 +0200650{
651 struct al_transaction_on_disk *al = buffer;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200652 struct drbd_md *md = &device->ldev->md;
Philipp Reisnerd752b262013-06-25 16:50:08 +0200653 sector_t al_base = md->md_offset + md->al_offset;
654 int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
655 int i;
656
657 memset(al, 0, 4096);
658 al->magic = cpu_to_be32(DRBD_AL_MAGIC);
659 al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED);
660 al->crc32c = cpu_to_be32(crc32c(0, al, 4096));
661
662 for (i = 0; i < al_size_4k; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200663 int err = drbd_md_sync_page_io(device, device->ldev, al_base + i * 8, WRITE);
Philipp Reisnerd752b262013-06-25 16:50:08 +0200664 if (err)
665 return err;
666 }
667 return 0;
668}
669
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100670static const char *drbd_change_sync_fname[] = {
671 [RECORD_RS_FAILED] = "drbd_rs_failed_io",
672 [SET_IN_SYNC] = "drbd_set_in_sync",
673 [SET_OUT_OF_SYNC] = "drbd_set_out_of_sync"
674};
675
Philipp Reisnerb411b362009-09-25 16:07:19 -0700676/* ATTENTION. The AL's extents are 4MB each, while the extents in the
677 * resync LRU-cache are 16MB each.
678 * The caller of this function has to hold an get_ldev() reference.
679 *
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100680 * Adjusts the caching members ->rs_left (success) or ->rs_failed (!success),
681 * potentially pulling in (and recounting the corresponding bits)
682 * this resync extent into the resync extent lru cache.
683 *
684 * Returns whether all bits have been cleared for this resync extent,
685 * precisely: (rs_left <= rs_failed)
686 *
Philipp Reisnerb411b362009-09-25 16:07:19 -0700687 * TODO will be obsoleted once we have a caching lru of the on disk bitmap
688 */
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100689static bool update_rs_extent(struct drbd_device *device,
690 unsigned int enr, int count,
691 enum update_sync_bits_mode mode)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700692{
693 struct lc_element *e;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700694
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200695 D_ASSERT(device, atomic_read(&device->local_cnt));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700696
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100697 /* When setting out-of-sync bits,
698 * we don't need it cached (lc_find).
699 * But if it is present in the cache,
700 * we should update the cached bit count.
701 * Otherwise, that extent should be in the resync extent lru cache
702 * already -- or we want to pull it in if necessary -- (lc_get),
703 * then update and check rs_left and rs_failed. */
704 if (mode == SET_OUT_OF_SYNC)
705 e = lc_find(device->resync, enr);
706 else
707 e = lc_get(device->resync, enr);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708 if (e) {
709 struct bm_extent *ext = lc_entry(e, struct bm_extent, lce);
710 if (ext->lce.lc_number == enr) {
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100711 if (mode == SET_IN_SYNC)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700712 ext->rs_left -= count;
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100713 else if (mode == SET_OUT_OF_SYNC)
714 ext->rs_left += count;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715 else
716 ext->rs_failed += count;
717 if (ext->rs_left < ext->rs_failed) {
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100718 drbd_warn(device, "BAD! enr=%u rs_left=%d "
Philipp Reisner975b2972011-11-17 10:11:47 +0100719 "rs_failed=%d count=%d cstate=%s\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -0700720 ext->lce.lc_number, ext->rs_left,
Philipp Reisner975b2972011-11-17 10:11:47 +0100721 ext->rs_failed, count,
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200722 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700723
Philipp Reisner975b2972011-11-17 10:11:47 +0100724 /* We don't expect to be able to clear more bits
725 * than have been set when we originally counted
726 * the set bits to cache that value in ext->rs_left.
727 * Whatever the reason (disconnect during resync,
728 * delayed local completion of an application write),
729 * try to fix it up by recounting here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200730 ext->rs_left = drbd_bm_e_weight(device, enr);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700731 }
732 } else {
733 /* Normally this element should be in the cache,
734 * since drbd_rs_begin_io() pulled it already in.
735 *
736 * But maybe an application write finished, and we set
737 * something outside the resync lru_cache in sync.
738 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200739 int rs_left = drbd_bm_e_weight(device, enr);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700740 if (ext->flags != 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200741 drbd_warn(device, "changing resync lce: %d[%u;%02lx]"
Philipp Reisnerb411b362009-09-25 16:07:19 -0700742 " -> %d[%u;00]\n",
743 ext->lce.lc_number, ext->rs_left,
744 ext->flags, enr, rs_left);
745 ext->flags = 0;
746 }
747 if (ext->rs_failed) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200748 drbd_warn(device, "Kicking resync_lru element enr=%u "
Philipp Reisnerb411b362009-09-25 16:07:19 -0700749 "out with rs_failed=%d\n",
750 ext->lce.lc_number, ext->rs_failed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700751 }
752 ext->rs_left = rs_left;
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100753 ext->rs_failed = (mode == RECORD_RS_FAILED) ? count : 0;
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100754 /* we don't keep a persistent log of the resync lru,
755 * we can commit any change right away. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200756 lc_committed(device->resync);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700757 }
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100758 if (mode != SET_OUT_OF_SYNC)
759 lc_put(device->resync, &ext->lce);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700760 /* no race, we are within the al_lock! */
761
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100762 if (ext->rs_left <= ext->rs_failed) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700763 ext->rs_failed = 0;
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100764 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700765 }
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100766 } else if (mode != SET_OUT_OF_SYNC) {
767 /* be quiet if lc_find() did not find it. */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200768 drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200769 device->resync_locked,
770 device->resync->nr_elements,
771 device->resync->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700772 }
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100773 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700774}
775
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200776void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go)
Lars Ellenbergc6ea14d2010-11-05 09:23:37 +0100777{
778 unsigned long now = jiffies;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200779 unsigned long last = device->rs_mark_time[device->rs_last_mark];
780 int next = (device->rs_last_mark + 1) % DRBD_SYNC_MARKS;
Lars Ellenbergc6ea14d2010-11-05 09:23:37 +0100781 if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200782 if (device->rs_mark_left[device->rs_last_mark] != still_to_go &&
783 device->state.conn != C_PAUSED_SYNC_T &&
784 device->state.conn != C_PAUSED_SYNC_S) {
785 device->rs_mark_time[next] = now;
786 device->rs_mark_left[next] = still_to_go;
787 device->rs_last_mark = next;
Lars Ellenbergc6ea14d2010-11-05 09:23:37 +0100788 }
789 }
790}
791
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100792/* It is called lazy update, so don't do write-out too often. */
793static bool lazy_bitmap_update_due(struct drbd_device *device)
794{
795 return time_after(jiffies, device->rs_last_bcast + 2*HZ);
796}
797
798static void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done)
799{
800 struct drbd_connection *connection;
801 if (rs_done)
802 set_bit(RS_DONE, &device->flags);
803 /* and also set RS_PROGRESS below */
804 else if (!lazy_bitmap_update_due(device))
805 return;
806
807 /* compare with test_and_clear_bit() calls in and above
808 * try_update_all_on_disk_bitmaps() from the drbd_worker(). */
809 if (test_and_set_bit(RS_PROGRESS, &device->flags))
810 return;
811 connection = first_peer_device(device)->connection;
812 if (!test_and_set_bit(CONN_RS_PROGRESS, &connection->flags))
813 wake_up(&connection->sender_work.q_wait);
814}
815
816static int update_sync_bits(struct drbd_device *device,
817 unsigned long sbnr, unsigned long ebnr,
818 enum update_sync_bits_mode mode)
819{
820 /*
821 * We keep a count of set bits per resync-extent in the ->rs_left
822 * caching member, so we need to loop and work within the resync extent
823 * alignment. Typically this loop will execute exactly once.
824 */
825 unsigned long flags;
826 unsigned long count = 0;
827 unsigned int cleared = 0;
828 while (sbnr <= ebnr) {
829 /* set temporary boundary bit number to last bit number within
830 * the resync extent of the current start bit number,
831 * but cap at provided end bit number */
832 unsigned long tbnr = min(ebnr, sbnr | BM_BLOCKS_PER_BM_EXT_MASK);
833 unsigned long c;
834
835 if (mode == RECORD_RS_FAILED)
836 /* Only called from drbd_rs_failed_io(), bits
837 * supposedly still set. Recount, maybe some
838 * of the bits have been successfully cleared
839 * by application IO meanwhile.
840 */
841 c = drbd_bm_count_bits(device, sbnr, tbnr);
842 else if (mode == SET_IN_SYNC)
843 c = drbd_bm_clear_bits(device, sbnr, tbnr);
844 else /* if (mode == SET_OUT_OF_SYNC) */
845 c = drbd_bm_set_bits(device, sbnr, tbnr);
846
847 if (c) {
848 spin_lock_irqsave(&device->al_lock, flags);
849 cleared += update_rs_extent(device, BM_BIT_TO_EXT(sbnr), c, mode);
850 spin_unlock_irqrestore(&device->al_lock, flags);
851 count += c;
852 }
853 sbnr = tbnr + 1;
854 }
855 if (count) {
856 if (mode == SET_IN_SYNC) {
857 unsigned long still_to_go = drbd_bm_total_weight(device);
858 bool rs_is_done = (still_to_go <= device->rs_failed);
859 drbd_advance_rs_marks(device, still_to_go);
860 if (cleared || rs_is_done)
861 maybe_schedule_on_disk_bitmap_update(device, rs_is_done);
862 } else if (mode == RECORD_RS_FAILED)
863 device->rs_failed += count;
864 wake_up(&device->al_wait);
865 }
866 return count;
867}
868
Philipp Reisnerb411b362009-09-25 16:07:19 -0700869/* clear the bit corresponding to the piece of storage in question:
870 * size byte of data starting from sector. Only clear a bits of the affected
871 * one ore more _aligned_ BM_BLOCK_SIZE blocks.
872 *
873 * called by worker on C_SYNC_TARGET and receiver on SyncSource.
874 *
875 */
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100876int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
877 enum update_sync_bits_mode mode,
878 const char *file, const unsigned int line)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700879{
880 /* Is called from worker and receiver context _only_ */
881 unsigned long sbnr, ebnr, lbnr;
882 unsigned long count = 0;
883 sector_t esector, nr_sectors;
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100884
885 /* This would be an empty REQ_FLUSH, be silent. */
886 if ((mode == SET_OUT_OF_SYNC) && size == 0)
887 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700888
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200889 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100890 drbd_err(device, "%s: sector=%llus size=%d nonsense!\n",
891 drbd_change_sync_fname[mode],
Philipp Reisnerb411b362009-09-25 16:07:19 -0700892 (unsigned long long)sector, size);
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100893 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894 }
Philipp Reisner518a4d52012-10-19 14:21:22 +0200895
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200896 if (!get_ldev(device))
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100897 return 0; /* no disk, no metadata, no bitmap to manipulate bits in */
Philipp Reisner518a4d52012-10-19 14:21:22 +0200898
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200899 nr_sectors = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700900 esector = sector + (size >> 9) - 1;
901
Andreas Gruenbacher841ce242010-12-15 19:31:20 +0100902 if (!expect(sector < nr_sectors))
Philipp Reisner518a4d52012-10-19 14:21:22 +0200903 goto out;
Andreas Gruenbacher841ce242010-12-15 19:31:20 +0100904 if (!expect(esector < nr_sectors))
905 esector = nr_sectors - 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700906
907 lbnr = BM_SECT_TO_BIT(nr_sectors-1);
908
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100909 if (mode == SET_IN_SYNC) {
910 /* Round up start sector, round down end sector. We make sure
911 * we only clear full, aligned, BM_BLOCK_SIZE blocks. */
912 if (unlikely(esector < BM_SECT_PER_BIT-1))
913 goto out;
914 if (unlikely(esector == (nr_sectors-1)))
915 ebnr = lbnr;
916 else
917 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
918 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
919 } else {
920 /* We set it out of sync, or record resync failure.
921 * Should not round anything here. */
922 sbnr = BM_SECT_TO_BIT(sector);
923 ebnr = BM_SECT_TO_BIT(esector);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700924 }
925
Lars Ellenberg5ab7d2c2014-01-27 15:58:22 +0100926 count = update_sync_bits(device, sbnr, ebnr, mode);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700927out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200928 put_ldev(device);
Philipp Reisner73a01a12010-10-27 14:33:00 +0200929 return count;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700930}
931
932static
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200933struct bm_extent *_bme_get(struct drbd_device *device, unsigned int enr)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700934{
935 struct lc_element *e;
936 struct bm_extent *bm_ext;
937 int wakeup = 0;
938 unsigned long rs_flags;
939
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200940 spin_lock_irq(&device->al_lock);
941 if (device->resync_locked > device->resync->nr_elements/2) {
942 spin_unlock_irq(&device->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700943 return NULL;
944 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200945 e = lc_get(device->resync, enr);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700946 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
947 if (bm_ext) {
948 if (bm_ext->lce.lc_number != enr) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200949 bm_ext->rs_left = drbd_bm_e_weight(device, enr);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700950 bm_ext->rs_failed = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200951 lc_committed(device->resync);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700952 wakeup = 1;
953 }
954 if (bm_ext->lce.refcnt == 1)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200955 device->resync_locked++;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700956 set_bit(BME_NO_WRITES, &bm_ext->flags);
957 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200958 rs_flags = device->resync->flags;
959 spin_unlock_irq(&device->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700960 if (wakeup)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200961 wake_up(&device->al_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700962
963 if (!bm_ext) {
964 if (rs_flags & LC_STARVING)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200965 drbd_warn(device, "Have to wait for element"
Philipp Reisnerb411b362009-09-25 16:07:19 -0700966 " (resync LRU too small?)\n");
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100967 BUG_ON(rs_flags & LC_LOCKED);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700968 }
969
970 return bm_ext;
971}
972
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200973static int _is_in_al(struct drbd_device *device, unsigned int enr)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700974{
Lars Ellenberg46a15bc2011-02-21 13:21:01 +0100975 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700976
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200977 spin_lock_irq(&device->al_lock);
978 rv = lc_is_used(device->act_log, enr);
979 spin_unlock_irq(&device->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700980
Philipp Reisnerb411b362009-09-25 16:07:19 -0700981 return rv;
982}
983
984/**
985 * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200986 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987 * @sector: The sector number.
988 *
Lars Ellenberg80a40e42010-08-11 23:28:00 +0200989 * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700990 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200991int drbd_rs_begin_io(struct drbd_device *device, sector_t sector)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700992{
993 unsigned int enr = BM_SECT_TO_EXT(sector);
994 struct bm_extent *bm_ext;
995 int i, sig;
Lars Ellenberge8299872014-04-28 18:43:19 +0200996 bool sa;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700997
Philipp Reisnerf91ab622010-11-09 13:59:41 +0100998retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200999 sig = wait_event_interruptible(device->al_wait,
1000 (bm_ext = _bme_get(device, enr)));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001001 if (sig)
Lars Ellenberg80a40e42010-08-11 23:28:00 +02001002 return -EINTR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001003
1004 if (test_bit(BME_LOCKED, &bm_ext->flags))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02001005 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001006
Lars Ellenberge8299872014-04-28 18:43:19 +02001007 /* step aside only while we are above c-min-rate; unless disabled. */
1008 sa = drbd_rs_c_min_rate_throttle(device);
1009
Philipp Reisnerb411b362009-09-25 16:07:19 -07001010 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001011 sig = wait_event_interruptible(device->al_wait,
1012 !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) ||
Lars Ellenberge8299872014-04-28 18:43:19 +02001013 (sa && test_bit(BME_PRIORITY, &bm_ext->flags)));
Philipp Reisnerf91ab622010-11-09 13:59:41 +01001014
Lars Ellenberge8299872014-04-28 18:43:19 +02001015 if (sig || (sa && test_bit(BME_PRIORITY, &bm_ext->flags))) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001016 spin_lock_irq(&device->al_lock);
1017 if (lc_put(device->resync, &bm_ext->lce) == 0) {
Philipp Reisnerf91ab622010-11-09 13:59:41 +01001018 bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001019 device->resync_locked--;
1020 wake_up(&device->al_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001021 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001022 spin_unlock_irq(&device->al_lock);
Philipp Reisnerf91ab622010-11-09 13:59:41 +01001023 if (sig)
1024 return -EINTR;
1025 if (schedule_timeout_interruptible(HZ/10))
1026 return -EINTR;
Philipp Reisnerf91ab622010-11-09 13:59:41 +01001027 goto retry;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001028 }
1029 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001030 set_bit(BME_LOCKED, &bm_ext->flags);
Lars Ellenberg80a40e42010-08-11 23:28:00 +02001031 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001032}
1033
1034/**
1035 * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001036 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001037 * @sector: The sector number.
1038 *
1039 * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then
1040 * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN
1041 * if there is still application IO going on in this area.
1042 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001043int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001044{
1045 unsigned int enr = BM_SECT_TO_EXT(sector);
1046 const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT;
1047 struct lc_element *e;
1048 struct bm_extent *bm_ext;
1049 int i;
1050
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001051 spin_lock_irq(&device->al_lock);
1052 if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001053 /* in case you have very heavy scattered io, it may
1054 * stall the syncer undefined if we give up the ref count
1055 * when we try again and requeue.
1056 *
1057 * if we don't give up the refcount, but the next time
1058 * we are scheduled this extent has been "synced" by new
1059 * application writes, we'd miss the lc_put on the
1060 * extent we keep the refcount on.
1061 * so we remembered which extent we had to try again, and
1062 * if the next requested one is something else, we do
1063 * the lc_put here...
1064 * we also have to wake_up
1065 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001066 e = lc_find(device->resync, device->resync_wenr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001067 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
1068 if (bm_ext) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001069 D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags));
1070 D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001071 clear_bit(BME_NO_WRITES, &bm_ext->flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001072 device->resync_wenr = LC_FREE;
1073 if (lc_put(device->resync, &bm_ext->lce) == 0)
1074 device->resync_locked--;
1075 wake_up(&device->al_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001076 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001077 drbd_alert(device, "LOGIC BUG\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001078 }
1079 }
1080 /* TRY. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001081 e = lc_try_get(device->resync, enr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001082 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
1083 if (bm_ext) {
1084 if (test_bit(BME_LOCKED, &bm_ext->flags))
1085 goto proceed;
1086 if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001087 device->resync_locked++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001088 } else {
1089 /* we did set the BME_NO_WRITES,
1090 * but then could not set BME_LOCKED,
1091 * so we tried again.
1092 * drop the extra reference. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001093 bm_ext->lce.refcnt--;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001094 D_ASSERT(device, bm_ext->lce.refcnt > 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001095 }
1096 goto check_al;
1097 } else {
1098 /* do we rather want to try later? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001099 if (device->resync_locked > device->resync->nr_elements-3)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001100 goto try_again;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001101 /* Do or do not. There is no try. -- Yoda */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001102 e = lc_get(device->resync, enr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001103 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
1104 if (!bm_ext) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001105 const unsigned long rs_flags = device->resync->flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001106 if (rs_flags & LC_STARVING)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001107 drbd_warn(device, "Have to wait for element"
Philipp Reisnerb411b362009-09-25 16:07:19 -07001108 " (resync LRU too small?)\n");
Lars Ellenberg46a15bc2011-02-21 13:21:01 +01001109 BUG_ON(rs_flags & LC_LOCKED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001110 goto try_again;
1111 }
1112 if (bm_ext->lce.lc_number != enr) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001113 bm_ext->rs_left = drbd_bm_e_weight(device, enr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001114 bm_ext->rs_failed = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001115 lc_committed(device->resync);
1116 wake_up(&device->al_wait);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001117 D_ASSERT(device, test_bit(BME_LOCKED, &bm_ext->flags) == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001118 }
1119 set_bit(BME_NO_WRITES, &bm_ext->flags);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001120 D_ASSERT(device, bm_ext->lce.refcnt == 1);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001121 device->resync_locked++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001122 goto check_al;
1123 }
1124check_al:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001125 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001126 if (lc_is_used(device->act_log, al_enr+i))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001127 goto try_again;
1128 }
1129 set_bit(BME_LOCKED, &bm_ext->flags);
1130proceed:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001131 device->resync_wenr = LC_FREE;
1132 spin_unlock_irq(&device->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001133 return 0;
1134
1135try_again:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001136 if (bm_ext)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001137 device->resync_wenr = enr;
1138 spin_unlock_irq(&device->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001139 return -EAGAIN;
1140}
1141
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001142void drbd_rs_complete_io(struct drbd_device *device, sector_t sector)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001143{
1144 unsigned int enr = BM_SECT_TO_EXT(sector);
1145 struct lc_element *e;
1146 struct bm_extent *bm_ext;
1147 unsigned long flags;
1148
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001149 spin_lock_irqsave(&device->al_lock, flags);
1150 e = lc_find(device->resync, enr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001151 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
1152 if (!bm_ext) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001153 spin_unlock_irqrestore(&device->al_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001154 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001155 drbd_err(device, "drbd_rs_complete_io() called, but extent not found\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001156 return;
1157 }
1158
1159 if (bm_ext->lce.refcnt == 0) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001160 spin_unlock_irqrestore(&device->al_lock, flags);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001161 drbd_err(device, "drbd_rs_complete_io(,%llu [=%u]) called, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07001162 "but refcnt is 0!?\n",
1163 (unsigned long long)sector, enr);
1164 return;
1165 }
1166
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001167 if (lc_put(device->resync, &bm_ext->lce) == 0) {
Philipp Reisnere3555d82010-11-07 15:56:29 +01001168 bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001169 device->resync_locked--;
1170 wake_up(&device->al_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001171 }
1172
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001173 spin_unlock_irqrestore(&device->al_lock, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001174}
1175
1176/**
1177 * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001178 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001179 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001180void drbd_rs_cancel_all(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001181{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001182 spin_lock_irq(&device->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001183
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001184 if (get_ldev_if_state(device, D_FAILED)) { /* Makes sure ->resync is there. */
1185 lc_reset(device->resync);
1186 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001187 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001188 device->resync_locked = 0;
1189 device->resync_wenr = LC_FREE;
1190 spin_unlock_irq(&device->al_lock);
1191 wake_up(&device->al_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001192}
1193
1194/**
1195 * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001196 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001197 *
1198 * Returns 0 upon success, -EAGAIN if at least one reference count was
1199 * not zero.
1200 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001201int drbd_rs_del_all(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001202{
1203 struct lc_element *e;
1204 struct bm_extent *bm_ext;
1205 int i;
1206
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001207 spin_lock_irq(&device->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001208
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001209 if (get_ldev_if_state(device, D_FAILED)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001210 /* ok, ->resync is there. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001211 for (i = 0; i < device->resync->nr_elements; i++) {
1212 e = lc_element_by_index(device->resync, i);
Philipp Reisnerb2b163d2010-04-02 08:40:33 +02001213 bm_ext = lc_entry(e, struct bm_extent, lce);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001214 if (bm_ext->lce.lc_number == LC_FREE)
1215 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001216 if (bm_ext->lce.lc_number == device->resync_wenr) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001217 drbd_info(device, "dropping %u in drbd_rs_del_all, apparently"
Philipp Reisnerb411b362009-09-25 16:07:19 -07001218 " got 'synced' by application io\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001219 device->resync_wenr);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001220 D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags));
1221 D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001222 clear_bit(BME_NO_WRITES, &bm_ext->flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001223 device->resync_wenr = LC_FREE;
1224 lc_put(device->resync, &bm_ext->lce);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001225 }
1226 if (bm_ext->lce.refcnt != 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001227 drbd_info(device, "Retrying drbd_rs_del_all() later. "
Philipp Reisnerb411b362009-09-25 16:07:19 -07001228 "refcnt=%d\n", bm_ext->lce.refcnt);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001229 put_ldev(device);
1230 spin_unlock_irq(&device->al_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001231 return -EAGAIN;
1232 }
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001233 D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags));
1234 D_ASSERT(device, !test_bit(BME_NO_WRITES, &bm_ext->flags));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001235 lc_del(device->resync, &bm_ext->lce);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001236 }
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001237 D_ASSERT(device, device->resync->used == 0);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001238 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001239 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001240 spin_unlock_irq(&device->al_lock);
1241 wake_up(&device->al_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001242
1243 return 0;
1244}