Btrfs: use percpu counter for dirty metadata count

->dirty_metadata_bytes is accessed very frequently, so use percpu
counter instead of the u64 variant to reduce the contention of
the lock.

This patch also fixed the problem that we access it without
lock protection in __btrfs_btree_balance_dirty(), which may
cause we skip the dirty pages flush.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d8e539f..4c47628 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -191,6 +191,8 @@
 /* ioprio of readahead is set to idle */
 #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
 
+#define BTRFS_DIRTY_METADATA_THRESH	(32 * 1024 * 1024)
+
 /*
  * The key defines the order in the tree, and so it also defines (optimal)
  * block layout.
@@ -1448,10 +1450,9 @@
 
 	u64 total_pinned;
 
-	/* protected by the delalloc lock, used to keep from writing
-	 * metadata until there is a nice batch
-	 */
-	u64 dirty_metadata_bytes;
+	/* used to keep from writing metadata until there is a nice batch */
+	struct percpu_counter dirty_metadata_bytes;
+	s32 dirty_metadata_batch;
 	struct list_head dirty_cowonly_roots;
 
 	struct btrfs_fs_devices *fs_devices;