bcache: Refactor btree io The most significant change is that btree reads are now done synchronously, instead of asynchronously and doing the post read stuff from a workqueue. This was originally done because we can't block on IO under generic_make_request(). But - we already have a mechanism to punt cache lookups to workqueue if needed, so if we just use that we don't have to deal with the complexity of doing things asynchronously. The main benefit is this makes the locking situation saner; we can hold our write lock on the btree node until we're finished reading it, and we don't need that btree_node_read_done() flag anymore. Also, for writes, btree_write() was broken out into btree_node_write() and btree_leaf_dirty() - the old code with the boolean argument was dumb and confusing. The prio_blocked mechanism was improved a bit too, now the only counter is in struct btree_write, we don't mess with transfering a count from struct btree anymore. This required changing garbage collection to block prios at the start and unblock when it finishes, which is cleaner than what it was doing anyways (the old code had mostly the same effect, but was doing it in a convoluted way) And the btree iter btree_node_read_done() uses was converted to a real mempool. Signed-off-by: Kent Overstreet <koverstreet@google.com>

commit: 5794351146199b9ac67a5ab1beab82be8bfd7b5d [log] [tgz]
author: Kent Overstreet <koverstreet@google.com> Thu Apr 25 13:58:35 2013 -0700
committer: Kent Overstreet <koverstreet@google.com> Wed Jun 26 17:09:14 2013 -0700
tree: efefb88301131757fd32b700ce897943597578da
parent: 119ba0f82839cd80eaef3e6991988f1403965d5b [diff] [blame]
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index aaeda23..e53f899 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c

@@ -1255,9 +1255,10 @@
 	free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
 	free_pages((unsigned long) c->sort, ilog2(bucket_pages(c)));
 
-	kfree(c->fill_iter);
 	if (c->bio_split)
 		bioset_free(c->bio_split);
+	if (c->fill_iter)
+		mempool_destroy(c->fill_iter);
 	if (c->bio_meta)
 		mempool_destroy(c->bio_meta);
 	if (c->search)
@@ -1295,7 +1296,7 @@
 	/* Should skip this if we're unregistering because of an error */
 	list_for_each_entry(b, &c->btree_cache, list)
 		if (btree_node_dirty(b))
-			bch_btree_write(b, true, NULL);
+			bch_btree_node_write(b, NULL);
 
 	closure_return(cl);
 }
@@ -1374,7 +1375,6 @@
 				       BTREE_MAX_PAGES);
 
 	mutex_init(&c->bucket_lock);
-	mutex_init(&c->fill_lock);
 	mutex_init(&c->sort_lock);
 	spin_lock_init(&c->sort_time_lock);
 	closure_init_unlocked(&c->sb_write);
@@ -1400,8 +1400,8 @@
 	    !(c->bio_meta = mempool_create_kmalloc_pool(2,
 				sizeof(struct bbio) + sizeof(struct bio_vec) *
 				bucket_pages(c))) ||
+	    !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
 	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
-	    !(c->fill_iter = kmalloc(iter_size, GFP_KERNEL)) ||
 	    !(c->sort = alloc_bucket_pages(GFP_KERNEL, c)) ||
 	    !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
 	    bch_journal_alloc(c) ||
@@ -1409,8 +1409,6 @@
 	    bch_open_buckets_alloc(c))
 		goto err;
 
-	c->fill_iter->size = sb->bucket_size / sb->block_size;
-
 	c->congested_read_threshold_us	= 2000;
 	c->congested_write_threshold_us	= 20000;
 	c->error_limit	= 8 << IO_ERROR_SHIFT;
@@ -1551,7 +1549,7 @@
 			goto err_unlock_gc;
 
 		bkey_copy_key(&c->root->key, &MAX_KEY);
-		bch_btree_write(c->root, true, &op);
+		bch_btree_node_write(c->root, &op.cl);
 
 		bch_btree_set_root(c->root);
 		rw_unlock(true, c->root);
commit	5794351146199b9ac67a5ab1beab82be8bfd7b5d	[log] [tgz]
author	Kent Overstreet <koverstreet@google.com>	Thu Apr 25 13:58:35 2013 -0700
committer	Kent Overstreet <koverstreet@google.com>	Wed Jun 26 17:09:14 2013 -0700
tree	efefb88301131757fd32b700ce897943597578da
parent	119ba0f82839cd80eaef3e6991988f1403965d5b [diff] [blame]