drbd: Use interval tree for overlapping epoch entry detection
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index fa722a9..751a4d4 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1080,6 +1080,9 @@
struct hlist_head *ee_hash; /* is proteced by req_lock! */
unsigned int ee_hash_s;
+ /* Interval tree of pending remote write requests (struct drbd_epoch_entry) */
+ struct rb_root epoch_entries;
+
/* this one is protected by ee_lock, single thread */
struct drbd_epoch_entry *last_write_w_barrier;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 0033137..18f27af 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3475,6 +3475,7 @@
goto out_no_tl;
mdev->read_requests = RB_ROOT;
mdev->write_requests = RB_ROOT;
+ mdev->epoch_entries = RB_ROOT;
mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL);
if (!mdev->app_reads_hash)
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 42c0ffa..a0fbbfc 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -334,6 +334,7 @@
goto fail;
INIT_HLIST_NODE(&e->collision);
+ drbd_clear_interval(&e->i);
e->epoch = NULL;
e->mdev = mdev;
e->pages = page;
@@ -361,6 +362,7 @@
drbd_pp_free(mdev, e->pages, is_net);
D_ASSERT(atomic_read(&e->pending_bios) == 0);
D_ASSERT(hlist_unhashed(&e->collision));
+ D_ASSERT(drbd_interval_empty(&e->i));
mempool_free(e, drbd_ee_mempool);
}
@@ -1418,6 +1420,7 @@
int ok;
D_ASSERT(hlist_unhashed(&e->collision));
+ D_ASSERT(drbd_interval_empty(&e->i));
if (likely((e->flags & EE_WAS_ERROR) == 0)) {
drbd_set_in_sync(mdev, sector, e->i.size);
@@ -1574,9 +1577,13 @@
spin_lock_irq(&mdev->req_lock);
D_ASSERT(!hlist_unhashed(&e->collision));
hlist_del_init(&e->collision);
+ D_ASSERT(!drbd_interval_empty(&e->i));
+ drbd_remove_interval(&mdev->epoch_entries, &e->i);
+ drbd_clear_interval(&e->i);
spin_unlock_irq(&mdev->req_lock);
} else {
D_ASSERT(hlist_unhashed(&e->collision));
+ D_ASSERT(drbd_interval_empty(&e->i));
}
drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
@@ -1595,6 +1602,9 @@
spin_lock_irq(&mdev->req_lock);
D_ASSERT(!hlist_unhashed(&e->collision));
hlist_del_init(&e->collision);
+ D_ASSERT(!drbd_interval_empty(&e->i));
+ drbd_remove_interval(&mdev->epoch_entries, &e->i);
+ drbd_clear_interval(&e->i);
spin_unlock_irq(&mdev->req_lock);
dec_unacked(mdev);
@@ -1767,6 +1777,7 @@
spin_lock_irq(&mdev->req_lock);
hlist_add_head(&e->collision, ee_hash_slot(mdev, sector));
+ drbd_insert_interval(&mdev->epoch_entries, &e->i);
first = 1;
for (;;) {
@@ -1817,6 +1828,8 @@
if (signal_pending(current)) {
hlist_del_init(&e->collision);
+ drbd_remove_interval(&mdev->epoch_entries, &e->i);
+ drbd_clear_interval(&e->i);
spin_unlock_irq(&mdev->req_lock);
@@ -1875,6 +1888,8 @@
spin_lock_irq(&mdev->req_lock);
list_del(&e->w.list);
hlist_del_init(&e->collision);
+ drbd_remove_interval(&mdev->epoch_entries, &e->i);
+ drbd_clear_interval(&e->i);
spin_unlock_irq(&mdev->req_lock);
if (e->flags & EE_CALL_AL_COMPLETE_IO)
drbd_al_complete_io(mdev, e->i.sector);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 5bf93a7..b81ce82 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -135,9 +135,6 @@
struct drbd_request *req)
{
const unsigned long s = req->rq_state;
- struct drbd_epoch_entry *e;
- struct hlist_node *n;
- struct hlist_head *slot;
/* Before we can signal completion to the upper layers,
* we may need to close the current epoch.
@@ -185,16 +182,10 @@
*
* anyways, if we found one,
* we just have to do a wake_up. */
-#define OVERLAPS overlaps(sector, size, e->i.sector, e->i.size)
- slot = ee_hash_slot(mdev, req->i.sector);
- hlist_for_each_entry(e, n, slot, collision) {
- if (OVERLAPS) {
- wake_up(&mdev->misc_wait);
- break;
- }
- }
+ i = drbd_find_overlap(&mdev->epoch_entries, sector, size);
+ if (i)
+ wake_up(&mdev->misc_wait);
}
-#undef OVERLAPS
}
void complete_master_bio(struct drbd_conf *mdev,
@@ -332,9 +323,6 @@
const sector_t sector = req->i.sector;
const int size = req->i.size;
struct drbd_interval *i;
- struct drbd_epoch_entry *e;
- struct hlist_node *n;
- struct hlist_head *slot;
D_ASSERT(hlist_unhashed(&req->collision));
D_ASSERT(drbd_interval_empty(&req->i));
@@ -364,21 +352,21 @@
if (mdev->ee_hash_s) {
/* now, check for overlapping requests with remote origin */
BUG_ON(mdev->ee_hash == NULL);
-#define OVERLAPS overlaps(e->i.sector, e->i.size, sector, size)
- slot = ee_hash_slot(mdev, sector);
- hlist_for_each_entry(e, n, slot, collision) {
- if (OVERLAPS) {
- dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
- " [DISCARD L] new: %llus +%u; "
- "pending: %llus +%u\n",
- current->comm, current->pid,
- (unsigned long long)sector, size,
- (unsigned long long)e->i.sector, e->i.size);
- goto out_conflict;
- }
+
+ i = drbd_find_overlap(&mdev->epoch_entries, sector, size);
+ if (i) {
+ struct drbd_epoch_entry *e =
+ container_of(i, struct drbd_epoch_entry, i);
+
+ dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
+ " [DISCARD L] new: %llus +%u; "
+ "pending: %llus +%u\n",
+ current->comm, current->pid,
+ (unsigned long long)sector, size,
+ (unsigned long long)e->i.sector, e->i.size);
+ goto out_conflict;
}
}
-#undef OVERLAPS
out_no_conflict:
/* this is like it should be, and what we expected.