bcache: Rework allocator reserves

We need a reserve for allocating buckets for new btree nodes - and now that
we've got multiple btrees, it really needs to be per btree.

This reworks the reserves so we've got separate freelists for each reserve
instead of watermarks, which seems to make things a bit cleaner, and it adds
some code so that btree_split() can make sure the reserve is available before it
starts.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 4c9852d..bcfd96e 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -132,10 +132,16 @@
 {
 	BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b));
 
-	if (fifo_used(&ca->free) > ca->watermark[WATERMARK_MOVINGGC] &&
-	    CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO)
-		return false;
+	if (CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) {
+		unsigned i;
 
+		for (i = 0; i < RESERVE_NONE; i++)
+			if (!fifo_full(&ca->free[i]))
+				goto add;
+
+		return false;
+	}
+add:
 	b->prio = 0;
 
 	if (can_inc_bucket_gen(b) &&
@@ -304,6 +310,21 @@
 	__set_current_state(TASK_RUNNING);				\
 } while (0)
 
+static int bch_allocator_push(struct cache *ca, long bucket)
+{
+	unsigned i;
+
+	/* Prios/gens are actually the most important reserve */
+	if (fifo_push(&ca->free[RESERVE_PRIO], bucket))
+		return true;
+
+	for (i = 0; i < RESERVE_NR; i++)
+		if (fifo_push(&ca->free[i], bucket))
+			return true;
+
+	return false;
+}
+
 static int bch_allocator_thread(void *arg)
 {
 	struct cache *ca = arg;
@@ -336,9 +357,7 @@
 				mutex_lock(&ca->set->bucket_lock);
 			}
 
-			allocator_wait(ca, !fifo_full(&ca->free));
-
-			fifo_push(&ca->free, bucket);
+			allocator_wait(ca, bch_allocator_push(ca, bucket));
 			wake_up(&ca->set->bucket_wait);
 		}
 
@@ -365,34 +384,29 @@
 	}
 }
 
-long bch_bucket_alloc(struct cache *ca, unsigned watermark, bool wait)
+long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
 {
 	DEFINE_WAIT(w);
 	struct bucket *b;
 	long r;
 
 	/* fastpath */
-	if (fifo_used(&ca->free) > ca->watermark[watermark]) {
-		fifo_pop(&ca->free, r);
+	if (fifo_pop(&ca->free[RESERVE_NONE], r) ||
+	    fifo_pop(&ca->free[reserve], r))
 		goto out;
-	}
 
 	if (!wait)
 		return -1;
 
-	while (1) {
-		if (fifo_used(&ca->free) > ca->watermark[watermark]) {
-			fifo_pop(&ca->free, r);
-			break;
-		}
-
+	do {
 		prepare_to_wait(&ca->set->bucket_wait, &w,
 				TASK_UNINTERRUPTIBLE);
 
 		mutex_unlock(&ca->set->bucket_lock);
 		schedule();
 		mutex_lock(&ca->set->bucket_lock);
-	}
+	} while (!fifo_pop(&ca->free[RESERVE_NONE], r) &&
+		 !fifo_pop(&ca->free[reserve], r));
 
 	finish_wait(&ca->set->bucket_wait, &w);
 out:
@@ -401,12 +415,14 @@
 	if (expensive_debug_checks(ca->set)) {
 		size_t iter;
 		long i;
+		unsigned j;
 
 		for (iter = 0; iter < prio_buckets(ca) * 2; iter++)
 			BUG_ON(ca->prio_buckets[iter] == (uint64_t) r);
 
-		fifo_for_each(i, &ca->free, iter)
-			BUG_ON(i == r);
+		for (j = 0; j < RESERVE_NR; j++)
+			fifo_for_each(i, &ca->free[j], iter)
+				BUG_ON(i == r);
 		fifo_for_each(i, &ca->free_inc, iter)
 			BUG_ON(i == r);
 		fifo_for_each(i, &ca->unused, iter)
@@ -419,7 +435,7 @@
 
 	SET_GC_SECTORS_USED(b, ca->sb.bucket_size);
 
-	if (watermark <= WATERMARK_METADATA) {
+	if (reserve <= RESERVE_PRIO) {
 		SET_GC_MARK(b, GC_MARK_METADATA);
 		SET_GC_MOVE(b, 0);
 		b->prio = BTREE_PRIO;
@@ -445,7 +461,7 @@
 	}
 }
 
-int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
+int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
 			   struct bkey *k, int n, bool wait)
 {
 	int i;
@@ -459,7 +475,7 @@
 
 	for (i = 0; i < n; i++) {
 		struct cache *ca = c->cache_by_alloc[i];
-		long b = bch_bucket_alloc(ca, watermark, wait);
+		long b = bch_bucket_alloc(ca, reserve, wait);
 
 		if (b == -1)
 			goto err;
@@ -478,12 +494,12 @@
 	return -1;
 }
 
-int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
+int bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
 			 struct bkey *k, int n, bool wait)
 {
 	int ret;
 	mutex_lock(&c->bucket_lock);
-	ret = __bch_bucket_alloc_set(c, watermark, k, n, wait);
+	ret = __bch_bucket_alloc_set(c, reserve, k, n, wait);
 	mutex_unlock(&c->bucket_lock);
 	return ret;
 }
@@ -573,8 +589,8 @@
 
 	while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) {
 		unsigned watermark = write_prio
-			? WATERMARK_MOVINGGC
-			: WATERMARK_NONE;
+			? RESERVE_MOVINGGC
+			: RESERVE_NONE;
 
 		spin_unlock(&c->data_bucket_lock);
 
@@ -689,7 +705,7 @@
 	 * Then 8 for btree allocations
 	 * Then half for the moving garbage collector
 	 */
-
+#if 0
 	ca->watermark[WATERMARK_PRIO] = 0;
 
 	ca->watermark[WATERMARK_METADATA] = prio_buckets(ca);
@@ -699,6 +715,6 @@
 
 	ca->watermark[WATERMARK_NONE] = ca->free.size / 2 +
 		ca->watermark[WATERMARK_MOVINGGC];
-
+#endif
 	return 0;
 }