btrfs: try harder to allocate raid56 stripe cache

The stripe hash table is large, starting with allocation order 4 and can go as
high as order 7 in case lock debugging is turned on and structure padding
happens.

Observed mount failure:

mount: page allocation failure: order:7, mode:0x200050
Pid: 8234, comm: mount Tainted: G        W    3.8.0-default+ #267
Call Trace:
 [<ffffffff81114353>] warn_alloc_failed+0xf3/0x140
 [<ffffffff811171d2>] ? __alloc_pages_direct_compact+0x92/0x250
 [<ffffffff81117ac3>] __alloc_pages_nodemask+0x733/0x9d0
 [<ffffffff81152878>] ? cache_alloc_refill+0x3f8/0x840
 [<ffffffff811528bc>] cache_alloc_refill+0x43c/0x840
 [<ffffffff811302eb>] ? is_kernel_percpu_address+0x4b/0x90
 [<ffffffffa00a00ac>] ? btrfs_alloc_stripe_hash_table+0x5c/0x130 [btrfs]
 [<ffffffff811531d7>] kmem_cache_alloc_trace+0x247/0x270
 [<ffffffffa00a00ac>] btrfs_alloc_stripe_hash_table+0x5c/0x130 [btrfs]
 [<ffffffffa003133f>] open_ctree+0xb2f/0x1f90 [btrfs]
 [<ffffffff81397289>] ? string+0x49/0xe0
 [<ffffffff813987b3>] ? vsnprintf+0x443/0x5d0
 [<ffffffffa0007cb6>] btrfs_mount+0x526/0x600 [btrfs]
 [<ffffffff8115127c>] ? cache_alloc_debugcheck_after+0x4c/0x200
 [<ffffffff81162b90>] mount_fs+0x20/0xe0
 [<ffffffff8117db26>] vfs_kern_mount+0x76/0x120
 [<ffffffff811801b6>] do_mount+0x386/0x980
 [<ffffffff8112a5cb>] ? strndup_user+0x5b/0x80
 [<ffffffff81180840>] sys_mount+0x90/0xe0
 [<ffffffff81962e99>] system_call_fastpath+0x16/0x1b

Signed-off-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5031e6d..02369a3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2197,7 +2197,7 @@
 
 	ret = btrfs_alloc_stripe_hash_table(fs_info);
 	if (ret) {
-		err = -ENOMEM;
+		err = ret;
 		goto fail_alloc;
 	}
 
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index e34e568..0722205 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -188,13 +188,25 @@
 	struct btrfs_stripe_hash *h;
 	int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
 	int i;
+	int table_size;
 
 	if (info->stripe_hash_table)
 		return 0;
 
-	table = kzalloc(sizeof(*table) + sizeof(*h) * num_entries, GFP_NOFS);
-	if (!table)
-		return -ENOMEM;
+	/*
+	 * The table is large, starting with order 4 and can go as high as
+	 * order 7 in case lock debugging is turned on.
+	 *
+	 * Try harder to allocate and fallback to vmalloc to lower the chance
+	 * of a failing mount.
+	 */
+	table_size = sizeof(*table) + sizeof(*h) * num_entries;
+	table = kzalloc(table_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+	if (!table) {
+		table = vzalloc(table_size);
+		if (!table)
+			return -ENOMEM;
+	}
 
 	spin_lock_init(&table->cache_lock);
 	INIT_LIST_HEAD(&table->stripe_cache);
@@ -209,8 +221,12 @@
 	}
 
 	x = cmpxchg(&info->stripe_hash_table, NULL, table);
-	if (x)
-		kfree(x);
+	if (x) {
+		if (is_vmalloc_addr(x))
+			vfree(x);
+		else
+			kfree(x);
+	}
 	return 0;
 }
 
@@ -420,7 +436,10 @@
 	if (!info->stripe_hash_table)
 		return;
 	btrfs_clear_rbio_cache(info);
-	kfree(info->stripe_hash_table);
+	if (is_vmalloc_addr(info->stripe_hash_table))
+		vfree(info->stripe_hash_table);
+	else
+		kfree(info->stripe_hash_table);
 	info->stripe_hash_table = NULL;
 }