drbd: Use interval tree for overlapping epoch entry detection

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index fa722a9..751a4d4 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1080,6 +1080,9 @@
 	struct hlist_head *ee_hash; /* is proteced by req_lock! */
 	unsigned int ee_hash_s;
 
+	/* Interval tree of pending remote write requests (struct drbd_epoch_entry) */
+	struct rb_root epoch_entries;
+
 	/* this one is protected by ee_lock, single thread */
 	struct drbd_epoch_entry *last_write_w_barrier;
 
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 0033137..18f27af 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3475,6 +3475,7 @@
 		goto out_no_tl;
 	mdev->read_requests = RB_ROOT;
 	mdev->write_requests = RB_ROOT;
+	mdev->epoch_entries = RB_ROOT;
 
 	mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL);
 	if (!mdev->app_reads_hash)
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 42c0ffa..a0fbbfc 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -334,6 +334,7 @@
 		goto fail;
 
 	INIT_HLIST_NODE(&e->collision);
+	drbd_clear_interval(&e->i);
 	e->epoch = NULL;
 	e->mdev = mdev;
 	e->pages = page;
@@ -361,6 +362,7 @@
 	drbd_pp_free(mdev, e->pages, is_net);
 	D_ASSERT(atomic_read(&e->pending_bios) == 0);
 	D_ASSERT(hlist_unhashed(&e->collision));
+	D_ASSERT(drbd_interval_empty(&e->i));
 	mempool_free(e, drbd_ee_mempool);
 }
 
@@ -1418,6 +1420,7 @@
 	int ok;
 
 	D_ASSERT(hlist_unhashed(&e->collision));
+	D_ASSERT(drbd_interval_empty(&e->i));
 
 	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
 		drbd_set_in_sync(mdev, sector, e->i.size);
@@ -1574,9 +1577,13 @@
 		spin_lock_irq(&mdev->req_lock);
 		D_ASSERT(!hlist_unhashed(&e->collision));
 		hlist_del_init(&e->collision);
+		D_ASSERT(!drbd_interval_empty(&e->i));
+		drbd_remove_interval(&mdev->epoch_entries, &e->i);
+		drbd_clear_interval(&e->i);
 		spin_unlock_irq(&mdev->req_lock);
 	} else {
 		D_ASSERT(hlist_unhashed(&e->collision));
+		D_ASSERT(drbd_interval_empty(&e->i));
 	}
 
 	drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
@@ -1595,6 +1602,9 @@
 	spin_lock_irq(&mdev->req_lock);
 	D_ASSERT(!hlist_unhashed(&e->collision));
 	hlist_del_init(&e->collision);
+	D_ASSERT(!drbd_interval_empty(&e->i));
+	drbd_remove_interval(&mdev->epoch_entries, &e->i);
+	drbd_clear_interval(&e->i);
 	spin_unlock_irq(&mdev->req_lock);
 
 	dec_unacked(mdev);
@@ -1767,6 +1777,7 @@
 		spin_lock_irq(&mdev->req_lock);
 
 		hlist_add_head(&e->collision, ee_hash_slot(mdev, sector));
+		drbd_insert_interval(&mdev->epoch_entries, &e->i);
 
 		first = 1;
 		for (;;) {
@@ -1817,6 +1828,8 @@
 
 			if (signal_pending(current)) {
 				hlist_del_init(&e->collision);
+				drbd_remove_interval(&mdev->epoch_entries, &e->i);
+				drbd_clear_interval(&e->i);
 
 				spin_unlock_irq(&mdev->req_lock);
 
@@ -1875,6 +1888,8 @@
 	spin_lock_irq(&mdev->req_lock);
 	list_del(&e->w.list);
 	hlist_del_init(&e->collision);
+	drbd_remove_interval(&mdev->epoch_entries, &e->i);
+	drbd_clear_interval(&e->i);
 	spin_unlock_irq(&mdev->req_lock);
 	if (e->flags & EE_CALL_AL_COMPLETE_IO)
 		drbd_al_complete_io(mdev, e->i.sector);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 5bf93a7..b81ce82 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -135,9 +135,6 @@
 	struct drbd_request *req)
 {
 	const unsigned long s = req->rq_state;
-	struct drbd_epoch_entry *e;
-	struct hlist_node *n;
-	struct hlist_head *slot;
 
 	/* Before we can signal completion to the upper layers,
 	 * we may need to close the current epoch.
@@ -185,16 +182,10 @@
 		 *
 		 * anyways, if we found one,
 		 * we just have to do a wake_up.  */
-#define OVERLAPS overlaps(sector, size, e->i.sector, e->i.size)
-		slot = ee_hash_slot(mdev, req->i.sector);
-		hlist_for_each_entry(e, n, slot, collision) {
-			if (OVERLAPS) {
-				wake_up(&mdev->misc_wait);
-				break;
-			}
-		}
+		i = drbd_find_overlap(&mdev->epoch_entries, sector, size);
+		if (i)
+			wake_up(&mdev->misc_wait);
 	}
-#undef OVERLAPS
 }
 
 void complete_master_bio(struct drbd_conf *mdev,
@@ -332,9 +323,6 @@
 	const sector_t sector = req->i.sector;
 	const int size = req->i.size;
 	struct drbd_interval *i;
-	struct drbd_epoch_entry *e;
-	struct hlist_node *n;
-	struct hlist_head *slot;
 
 	D_ASSERT(hlist_unhashed(&req->collision));
 	D_ASSERT(drbd_interval_empty(&req->i));
@@ -364,21 +352,21 @@
 	if (mdev->ee_hash_s) {
 		/* now, check for overlapping requests with remote origin */
 		BUG_ON(mdev->ee_hash == NULL);
-#define OVERLAPS overlaps(e->i.sector, e->i.size, sector, size)
-		slot = ee_hash_slot(mdev, sector);
-		hlist_for_each_entry(e, n, slot, collision) {
-			if (OVERLAPS) {
-				dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
-				      " [DISCARD L] new: %llus +%u; "
-				      "pending: %llus +%u\n",
-				      current->comm, current->pid,
-				      (unsigned long long)sector, size,
-				      (unsigned long long)e->i.sector, e->i.size);
-				goto out_conflict;
-			}
+
+		i = drbd_find_overlap(&mdev->epoch_entries, sector, size);
+		if (i) {
+			struct drbd_epoch_entry *e =
+				container_of(i, struct drbd_epoch_entry, i);
+
+			dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
+			      " [DISCARD L] new: %llus +%u; "
+			      "pending: %llus +%u\n",
+			      current->comm, current->pid,
+			      (unsigned long long)sector, size,
+			      (unsigned long long)e->i.sector, e->i.size);
+			goto out_conflict;
 		}
 	}
-#undef OVERLAPS
 
 out_no_conflict:
 	/* this is like it should be, and what we expected.