bcache: Convert gc to a kthread

We needed a dedicated rescuer workqueue for gc anyways... and gc was
conceptually a dedicated thread, just one that wasn't running all the
time. Switch it to a dedicated thread to make the code a bit more
straightforward.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 1b64e66..b9bd586 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -210,7 +210,7 @@
 			 * multiple times when it can't do anything
 			 */
 			ca->invalidate_needs_gc = 1;
-			bch_queue_gc(ca->set);
+			wake_up_gc(ca->set);
 			return;
 		}
 
@@ -235,7 +235,7 @@
 
 		if (++checked >= ca->sb.nbuckets) {
 			ca->invalidate_needs_gc = 1;
-			bch_queue_gc(ca->set);
+			wake_up_gc(ca->set);
 			return;
 		}
 	}
@@ -260,7 +260,7 @@
 
 		if (++checked >= ca->sb.nbuckets / 2) {
 			ca->invalidate_needs_gc = 1;
-			bch_queue_gc(ca->set);
+			wake_up_gc(ca->set);
 			return;
 		}
 	}
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index d352074..09410eb 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -773,7 +773,7 @@
 	struct gc_stat		gc_stats;
 	size_t			nbuckets;
 
-	struct closure_with_waitlist gc;
+	struct task_struct	*gc_thread;
 	/* Where in the btree gc currently is */
 	struct bkey		gc_done;
 
@@ -786,11 +786,10 @@
 	/* Counts how many sectors bio_insert has added to the cache */
 	atomic_t		sectors_to_gc;
 
-	struct closure		moving_gc;
-	struct closure_waitlist	moving_gc_wait;
+	wait_queue_head_t	moving_gc_wait;
 	struct keybuf		moving_gc_keys;
 	/* Number of moving GC bios in flight */
-	atomic_t		in_flight;
+	struct semaphore	moving_in_flight;
 
 	struct btree		*root;
 
@@ -1176,7 +1175,7 @@
 void bch_prio_write(struct cache *);
 void bch_write_bdev_super(struct cached_dev *, struct closure *);
 
-extern struct workqueue_struct *bcache_wq, *bch_gc_wq;
+extern struct workqueue_struct *bcache_wq;
 extern const char * const bch_cache_modes[];
 extern struct mutex bch_register_lock;
 extern struct list_head bch_cache_sets;
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 935d90d..17bfd87 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -28,7 +28,9 @@
 
 #include <linux/slab.h>
 #include <linux/bitops.h>
+#include <linux/freezer.h>
 #include <linux/hash.h>
+#include <linux/kthread.h>
 #include <linux/prefetch.h>
 #include <linux/random.h>
 #include <linux/rcupdate.h>
@@ -105,7 +107,6 @@
 #define PTR_HASH(c, k)							\
 	(((k)->ptr[0] >> c->bucket_bits) | PTR_GEN(k, 0))
 
-struct workqueue_struct *bch_gc_wq;
 static struct workqueue_struct *btree_io_wq;
 
 void bch_btree_op_init_stack(struct btree_op *op)
@@ -732,12 +733,9 @@
 {
 	unsigned i;
 
-	/* XXX: doesn't check for errors */
-
-	closure_init_unlocked(&c->gc);
-
 	for (i = 0; i < mca_reserve(c); i++)
-		mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL);
+		if (!mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL))
+			return -ENOMEM;
 
 	list_splice_init(&c->btree_cache,
 			 &c->btree_cache_freeable);
@@ -1456,9 +1454,8 @@
 	return available;
 }
 
-static void bch_btree_gc(struct closure *cl)
+static void bch_btree_gc(struct cache_set *c)
 {
-	struct cache_set *c = container_of(cl, struct cache_set, gc.cl);
 	int ret;
 	unsigned long available;
 	struct gc_stat stats;
@@ -1483,7 +1480,7 @@
 
 	if (ret) {
 		pr_warn("gc failed!");
-		continue_at(cl, bch_btree_gc, bch_gc_wq);
+		return;
 	}
 
 	/* Possibly wait for new UUIDs or whatever to hit disk */
@@ -1505,12 +1502,35 @@
 
 	trace_bcache_gc_end(c);
 
-	continue_at(cl, bch_moving_gc, bch_gc_wq);
+	bch_moving_gc(c);
 }
 
-void bch_queue_gc(struct cache_set *c)
+static int bch_gc_thread(void *arg)
 {
-	closure_trylock_call(&c->gc.cl, bch_btree_gc, bch_gc_wq, &c->cl);
+	struct cache_set *c = arg;
+
+	while (1) {
+		bch_btree_gc(c);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (kthread_should_stop())
+			break;
+
+		try_to_freeze();
+		schedule();
+	}
+
+	return 0;
+}
+
+int bch_gc_thread_start(struct cache_set *c)
+{
+	c->gc_thread = kthread_create(bch_gc_thread, c, "bcache_gc");
+	if (IS_ERR(c->gc_thread))
+		return PTR_ERR(c->gc_thread);
+
+	set_task_state(c->gc_thread, TASK_INTERRUPTIBLE);
+	return 0;
 }
 
 /* Initial partial gc */
@@ -2480,14 +2500,12 @@
 {
 	if (btree_io_wq)
 		destroy_workqueue(btree_io_wq);
-	if (bch_gc_wq)
-		destroy_workqueue(bch_gc_wq);
 }
 
 int __init bch_btree_init(void)
 {
-	if (!(bch_gc_wq = create_singlethread_workqueue("bch_btree_gc")) ||
-	    !(btree_io_wq = create_singlethread_workqueue("bch_btree_io")))
+	btree_io_wq = create_singlethread_workqueue("bch_btree_io");
+	if (!btree_io_wq)
 		return -ENOMEM;
 
 	return 0;
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index d691d95..fa9641a 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -388,12 +388,18 @@
 
 int bch_btree_search_recurse(struct btree *, struct btree_op *);
 
-void bch_queue_gc(struct cache_set *);
+int bch_gc_thread_start(struct cache_set *);
 size_t bch_btree_gc_finish(struct cache_set *);
-void bch_moving_gc(struct closure *);
+void bch_moving_gc(struct cache_set *);
 int bch_btree_check(struct cache_set *, struct btree_op *);
 uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *);
 
+static inline void wake_up_gc(struct cache_set *c)
+{
+	if (c->gc_thread)
+		wake_up_process(c->gc_thread);
+}
+
 void bch_keybuf_init(struct keybuf *);
 void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *,
 		       keybuf_pred_fn *);
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 2c42377..6ba0504 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -57,8 +57,7 @@
 
 	bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w);
 
-	atomic_dec_bug(&io->s.op.c->in_flight);
-	closure_wake_up(&io->s.op.c->moving_gc_wait);
+	up(&io->s.op.c->moving_in_flight);
 
 	closure_return_with_destructor(cl, moving_io_destructor);
 }
@@ -113,7 +112,7 @@
 		bch_data_insert(&s->op.cl);
 	}
 
-	continue_at(cl, write_moving_finish, bch_gc_wq);
+	continue_at(cl, write_moving_finish, system_wq);
 }
 
 static void read_moving_submit(struct closure *cl)
@@ -124,15 +123,17 @@
 
 	bch_submit_bbio(bio, s->op.c, &io->w->key, 0);
 
-	continue_at(cl, write_moving, bch_gc_wq);
+	continue_at(cl, write_moving, system_wq);
 }
 
-static void read_moving(struct closure *cl)
+static void read_moving(struct cache_set *c)
 {
-	struct cache_set *c = container_of(cl, struct cache_set, moving_gc);
 	struct keybuf_key *w;
 	struct moving_io *io;
 	struct bio *bio;
+	struct closure cl;
+
+	closure_init_stack(&cl);
 
 	/* XXX: if we error, background writeback could stall indefinitely */
 
@@ -164,13 +165,8 @@
 
 		trace_bcache_gc_copy(&w->key);
 
-		closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl);
-
-		if (atomic_inc_return(&c->in_flight) >= 64) {
-			closure_wait_event(&c->moving_gc_wait, cl,
-					   atomic_read(&c->in_flight) < 64);
-			continue_at(cl, read_moving, bch_gc_wq);
-		}
+		down(&c->moving_in_flight);
+		closure_call(&io->s.cl, read_moving_submit, NULL, &cl);
 	}
 
 	if (0) {
@@ -180,7 +176,7 @@
 		bch_keybuf_del(&c->moving_gc_keys, w);
 	}
 
-	closure_return(cl);
+	closure_sync(&cl);
 }
 
 static bool bucket_cmp(struct bucket *l, struct bucket *r)
@@ -193,15 +189,14 @@
 	return GC_SECTORS_USED(heap_peek(&ca->heap));
 }
 
-void bch_moving_gc(struct closure *cl)
+void bch_moving_gc(struct cache_set *c)
 {
-	struct cache_set *c = container_of(cl, struct cache_set, gc.cl);
 	struct cache *ca;
 	struct bucket *b;
 	unsigned i;
 
 	if (!c->copy_gc_enabled)
-		closure_return(cl);
+		return;
 
 	mutex_lock(&c->bucket_lock);
 
@@ -242,13 +237,11 @@
 
 	c->moving_gc_keys.last_scanned = ZERO_KEY;
 
-	closure_init(&c->moving_gc, cl);
-	read_moving(&c->moving_gc);
-
-	closure_return(cl);
+	read_moving(c);
 }
 
 void bch_moving_init_cache_set(struct cache_set *c)
 {
 	bch_keybuf_init(&c->moving_gc_keys);
+	sema_init(&c->moving_in_flight, 64);
 }
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 26d18f4..f779eb4 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -520,7 +520,7 @@
 
 	if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) {
 		set_gc_sectors(op->c);
-		bch_queue_gc(op->c);
+		wake_up_gc(op->c);
 	}
 
 	/*
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 84398a8..f89e229 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1342,6 +1342,9 @@
 	kobject_put(&c->internal);
 	kobject_del(&c->kobj);
 
+	if (c->gc_thread)
+		kthread_stop(c->gc_thread);
+
 	if (!IS_ERR_OR_NULL(c->root))
 		list_add(&c->root->list, &c->btree_cache);
 
@@ -1579,8 +1582,6 @@
 		bch_journal_replay(c, &journal, &op);
 	} else {
 		pr_notice("invalidating existing data");
-		/* Don't want invalidate_buckets() to queue a gc yet */
-		closure_lock(&c->gc, NULL);
 
 		for_each_cache(ca, c, i) {
 			unsigned j;
@@ -1606,12 +1607,12 @@
 
 		err = "cannot allocate new UUID bucket";
 		if (__uuid_write(c))
-			goto err_unlock_gc;
+			goto err;
 
 		err = "cannot allocate new btree root";
 		c->root = bch_btree_node_alloc(c, 0);
 		if (IS_ERR_OR_NULL(c->root))
-			goto err_unlock_gc;
+			goto err;
 
 		bkey_copy_key(&c->root->key, &MAX_KEY);
 		bch_btree_node_write(c->root, &op.cl);
@@ -1628,12 +1629,12 @@
 
 		bch_journal_next(&c->journal);
 		bch_journal_meta(c, &op.cl);
-
-		/* Unlock */
-		closure_set_stopped(&c->gc.cl);
-		closure_put(&c->gc.cl);
 	}
 
+	err = "error starting gc thread";
+	if (bch_gc_thread_start(c))
+		goto err;
+
 	closure_sync(&op.cl);
 	c->sb.last_mount = get_seconds();
 	bcache_write_super(c);
@@ -1644,9 +1645,6 @@
 	flash_devs_run(c);
 
 	return;
-err_unlock_gc:
-	closure_set_stopped(&c->gc.cl);
-	closure_put(&c->gc.cl);
 err:
 	closure_sync(&op.cl);
 	/* XXX: test this, it's broken */
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index b3a66f1..ab286b9 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -566,7 +566,7 @@
 	}
 
 	if (attr == &sysfs_trigger_gc)
-		bch_queue_gc(c);
+		wake_up_gc(c);
 
 	if (attr == &sysfs_prune_cache) {
 		struct shrink_control sc;