Btrfs: Add async worker threads for pre and post IO checksumming

Btrfs has been using workqueues to spread the checksumming load across
other CPUs in the system.  But, workqueues only schedule work on the
same CPU that queued the work, giving them a limited benefit for systems with
higher CPU counts.

This code adds a generic facility to schedule work with pools of kthreads,
and changes the bio submission code to queue bios up.  The queueing is
important to make sure large numbers of procs on the system don't
turn streaming workloads into random workloads by sending IO down
concurrently.

The end result of all of this is much higher performance (and CPU usage) when
doing checksumming on large machines.  Two worker pools are created,
one for writes and one for endio processing.  The two could deadlock if
we tried to service both from a single pool.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 4df6b16..48a44f7 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -20,6 +20,7 @@
 #define __BTRFS_VOLUMES_
 
 #include <linux/bio.h>
+#include "async-thread.h"
 
 struct buffer_head;
 struct btrfs_device {
@@ -27,6 +28,9 @@
 	struct list_head dev_alloc_list;
 	struct btrfs_root *dev_root;
 	struct buffer_head *pending_io;
+	struct bio *pending_bios;
+	struct bio *pending_bio_tail;
+	int running_pending;
 	u64 generation;
 
 	int barriers;
@@ -36,8 +40,6 @@
 
 	struct block_device *bdev;
 
-	u64 total_ios;
-
 	char *name;
 
 	/* the internal btrfs device id */
@@ -63,6 +65,8 @@
 
 	/* physical drive uuid (or lvm uuid) */
 	u8 uuid[BTRFS_UUID_SIZE];
+
+	struct btrfs_work work;
 };
 
 struct btrfs_fs_devices {
@@ -117,7 +121,7 @@
 void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
 void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
 int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
-		  int mirror_num);
+		  int mirror_num, int async_submit);
 int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf);
 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		       int flags, void *holder);