Blame - fs/btrfs/raid56.c - kernel/msm-4.19

blob: d8ea0eb76325e9b25d42dfa4a99c63918981aa2f [file] [log] [blame]

David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1	/*
				2	* Copyright (C) 2012 Fusion-io All rights reserved.
				3	* Copyright (C) 2012 Intel Corp. All rights reserved.
				4	*
				5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public
				7	* License v2 as published by the Free Software Foundation.
				8	*
				9	* This program is distributed in the hope that it will be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				12	* General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU General Public
				15	* License along with this program; if not, write to the
				16	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
				17	* Boston, MA 021110-1307, USA.
				18	*/
				19	#include <linux/sched.h>
				20	#include <linux/wait.h>
				21	#include <linux/bio.h>
				22	#include <linux/slab.h>
				23	#include <linux/buffer_head.h>
				24	#include <linux/blkdev.h>
				25	#include <linux/random.h>
				26	#include <linux/iocontext.h>
				27	#include <linux/capability.h>
				28	#include <linux/ratelimit.h>
				29	#include <linux/kthread.h>
				30	#include <linux/raid/pq.h>
				31	#include <linux/hash.h>
				32	#include <linux/list_sort.h>
				33	#include <linux/raid/xor.h>
Geert Uytterhoeven	d7011f5	2013-03-03 04:44:41 -0700	[diff] [blame]	34	#include <linux/vmalloc.h>
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	35	#include <asm/div64.h>
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	36	#include "ctree.h"
				37	#include "extent_map.h"
				38	#include "disk-io.h"
				39	#include "transaction.h"
				40	#include "print-tree.h"
				41	#include "volumes.h"
				42	#include "raid56.h"
				43	#include "async-thread.h"
				44	#include "check-integrity.h"
				45	#include "rcu-string.h"
				46
				47	/* set when additional merges to this rbio are not allowed */
				48	#define RBIO_RMW_LOCKED_BIT 1
				49
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	50	/*
				51	* set when this rbio is sitting in the hash, but it is just a cache
				52	* of past RMW
				53	*/
				54	#define RBIO_CACHE_BIT 2
				55
				56	/*
				57	* set when it is safe to trust the stripe_pages for caching
				58	*/
				59	#define RBIO_CACHE_READY_BIT 3
				60
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	61	#define RBIO_CACHE_SIZE 1024
				62
Miao Xie	1b94b55	2014-11-06 16:14:21 +0800	[diff] [blame]	63	enum btrfs_rbio_ops {
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	64	BTRFS_RBIO_WRITE,
				65	BTRFS_RBIO_READ_REBUILD,
				66	BTRFS_RBIO_PARITY_SCRUB,
				67	BTRFS_RBIO_REBUILD_MISSING,
Miao Xie	1b94b55	2014-11-06 16:14:21 +0800	[diff] [blame]	68	};
				69
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	70	struct btrfs_raid_bio {
				71	struct btrfs_fs_info *fs_info;
				72	struct btrfs_bio *bbio;
				73
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	74	/* while we're doing rmw on a stripe
				75	* we put it into a hash table so we can
				76	* lock the stripe and merge more rbios
				77	* into it.
				78	*/
				79	struct list_head hash_list;
				80
				81	/*
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	82	* LRU list for the stripe cache
				83	*/
				84	struct list_head stripe_cache;
				85
				86	/*
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	87	* for scheduling work in the helper threads
				88	*/
				89	struct btrfs_work work;
				90
				91	/*
				92	* bio list and bio_list_lock are used
				93	* to add more bios into the stripe
				94	* in hopes of avoiding the full rmw
				95	*/
				96	struct bio_list bio_list;
				97	spinlock_t bio_list_lock;
				98
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	99	/* also protected by the bio_list_lock, the
				100	* plug list is used by the plugging code
				101	* to collect partial bios while plugged. The
				102	* stripe locking code also uses it to hand off
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	103	* the stripe lock to the next pending IO
				104	*/
				105	struct list_head plug_list;
				106
				107	/*
				108	* flags that tell us if it is safe to
				109	* merge with this bio
				110	*/
				111	unsigned long flags;
				112
				113	/* size of each individual stripe on disk */
				114	int stripe_len;
				115
				116	/* number of data stripes (no p/q) */
				117	int nr_data;
				118
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	119	int real_stripes;
				120
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	121	int stripe_npages;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	122	/*
				123	* set if we're doing a parity rebuild
				124	* for a read from higher up, which is handled
				125	* differently from a parity rebuild as part of
				126	* rmw
				127	*/
Miao Xie	1b94b55	2014-11-06 16:14:21 +0800	[diff] [blame]	128	enum btrfs_rbio_ops operation;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	129
				130	/* first bad stripe */
				131	int faila;
				132
				133	/* second bad stripe (for raid6 use) */
				134	int failb;
				135
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	136	int scrubp;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	137	/*
				138	* number of pages needed to represent the full
				139	* stripe
				140	*/
				141	int nr_pages;
				142
				143	/*
				144	* size of all the bios in the bio_list. This
				145	* helps us decide if the rbio maps to a full
				146	* stripe or not
				147	*/
				148	int bio_list_bytes;
				149
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	150	int generic_bio_cnt;
				151
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	152	refcount_t refs;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	153
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	154	atomic_t stripes_pending;
				155
				156	atomic_t error;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	157	/*
				158	* these are two arrays of pointers. We allocate the
				159	* rbio big enough to hold them both and setup their
				160	* locations when the rbio is allocated
				161	*/
				162
				163	/* pointers to pages that we allocated for
				164	* reading/writing stripes directly from the disk (including P/Q)
				165	*/
				166	struct page **stripe_pages;
				167
				168	/*
				169	* pointers to the pages in the bio_list. Stored
				170	* here for faster lookup
				171	*/
				172	struct page **bio_pages;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	173
				174	/*
				175	* bitmap to record which horizontal stripe has data
				176	*/
				177	unsigned long *dbitmap;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	178	};
				179
				180	static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
				181	static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
				182	static void rmw_work(struct btrfs_work *work);
				183	static void read_rebuild_work(struct btrfs_work *work);
				184	static void async_rmw_stripe(struct btrfs_raid_bio *rbio);
				185	static void async_read_rebuild(struct btrfs_raid_bio *rbio);
				186	static int fail_bio_stripe(struct btrfs_raid_bio rbio, struct bio bio);
				187	static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
				188	static void __free_raid_bio(struct btrfs_raid_bio *rbio);
				189	static void index_rbio_pages(struct btrfs_raid_bio *rbio);
				190	static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
				191
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	192	static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
				193	int need_check);
				194	static void async_scrub_parity(struct btrfs_raid_bio *rbio);
				195
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	196	/*
				197	* the stripe hash table is used for locking, and to collect
				198	* bios in hopes of making a full stripe
				199	*/
				200	int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
				201	{
				202	struct btrfs_stripe_hash_table *table;
				203	struct btrfs_stripe_hash_table *x;
				204	struct btrfs_stripe_hash *cur;
				205	struct btrfs_stripe_hash *h;
				206	int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
				207	int i;
David Sterba	83c8266	2013-03-01 15:03:00 +0000	[diff] [blame]	208	int table_size;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	209
				210	if (info->stripe_hash_table)
				211	return 0;
				212
David Sterba	83c8266	2013-03-01 15:03:00 +0000	[diff] [blame]	213	/*
				214	* The table is large, starting with order 4 and can go as high as
				215	* order 7 in case lock debugging is turned on.
				216	*
				217	* Try harder to allocate and fallback to vmalloc to lower the chance
				218	* of a failing mount.
				219	*/
				220	table_size = sizeof(table) + sizeof(h) * num_entries;
				221	table = kzalloc(table_size, GFP_KERNEL \| __GFP_NOWARN \| __GFP_REPEAT);
				222	if (!table) {
				223	table = vzalloc(table_size);
				224	if (!table)
				225	return -ENOMEM;
				226	}
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	227
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	228	spin_lock_init(&table->cache_lock);
				229	INIT_LIST_HEAD(&table->stripe_cache);
				230
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	231	h = table->table;
				232
				233	for (i = 0; i < num_entries; i++) {
				234	cur = h + i;
				235	INIT_LIST_HEAD(&cur->hash_list);
				236	spin_lock_init(&cur->lock);
				237	init_waitqueue_head(&cur->wait);
				238	}
				239
				240	x = cmpxchg(&info->stripe_hash_table, NULL, table);
Wang Shilong	f749303	2014-11-22 21:13:10 +0800	[diff] [blame]	241	if (x)
				242	kvfree(x);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	243	return 0;
				244	}
				245
				246	/*
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	247	* caching an rbio means to copy anything from the
				248	* bio_pages array into the stripe_pages array. We
				249	* use the page uptodate bit in the stripe cache array
				250	* to indicate if it has valid data
				251	*
				252	* once the caching is done, we set the cache ready
				253	* bit.
				254	*/
				255	static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
				256	{
				257	int i;
				258	char *s;
				259	char *d;
				260	int ret;
				261
				262	ret = alloc_rbio_pages(rbio);
				263	if (ret)
				264	return;
				265
				266	for (i = 0; i < rbio->nr_pages; i++) {
				267	if (!rbio->bio_pages[i])
				268	continue;
				269
				270	s = kmap(rbio->bio_pages[i]);
				271	d = kmap(rbio->stripe_pages[i]);
				272
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	273	memcpy(d, s, PAGE_SIZE);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	274
				275	kunmap(rbio->bio_pages[i]);
				276	kunmap(rbio->stripe_pages[i]);
				277	SetPageUptodate(rbio->stripe_pages[i]);
				278	}
				279	set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
				280	}
				281
				282	/*
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	283	* we hash on the first logical address of the stripe
				284	*/
				285	static int rbio_bucket(struct btrfs_raid_bio *rbio)
				286	{
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	287	u64 num = rbio->bbio->raid_map[0];
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	288
				289	/*
				290	* we shift down quite a bit. We're using byte
				291	* addressing, and most of the lower bits are zeros.
				292	* This tends to upset hash_64, and it consistently
				293	* returns just one or two different values.
				294	*
				295	* shifting off the lower bits fixes things.
				296	*/
				297	return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS);
				298	}
				299
				300	/*
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	301	* stealing an rbio means taking all the uptodate pages from the stripe
				302	* array in the source rbio and putting them into the destination rbio
				303	*/
				304	static void steal_rbio(struct btrfs_raid_bio src, struct btrfs_raid_bio dest)
				305	{
				306	int i;
				307	struct page *s;
				308	struct page *d;
				309
				310	if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags))
				311	return;
				312
				313	for (i = 0; i < dest->nr_pages; i++) {
				314	s = src->stripe_pages[i];
				315	if (!s \|\| !PageUptodate(s)) {
				316	continue;
				317	}
				318
				319	d = dest->stripe_pages[i];
				320	if (d)
				321	__free_page(d);
				322
				323	dest->stripe_pages[i] = s;
				324	src->stripe_pages[i] = NULL;
				325	}
				326	}
				327
				328	/*
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	329	* merging means we take the bio_list from the victim and
				330	* splice it into the destination. The victim should
				331	* be discarded afterwards.
				332	*
				333	* must be called with dest->rbio_list_lock held
				334	*/
				335	static void merge_rbio(struct btrfs_raid_bio *dest,
				336	struct btrfs_raid_bio *victim)
				337	{
				338	bio_list_merge(&dest->bio_list, &victim->bio_list);
				339	dest->bio_list_bytes += victim->bio_list_bytes;
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	340	dest->generic_bio_cnt += victim->generic_bio_cnt;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	341	bio_list_init(&victim->bio_list);
				342	}
				343
				344	/*
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	345	* used to prune items that are in the cache. The caller
				346	* must hold the hash table lock.
				347	*/
				348	static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
				349	{
				350	int bucket = rbio_bucket(rbio);
				351	struct btrfs_stripe_hash_table *table;
				352	struct btrfs_stripe_hash *h;
				353	int freeit = 0;
				354
				355	/*
				356	* check the bit again under the hash table lock.
				357	*/
				358	if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
				359	return;
				360
				361	table = rbio->fs_info->stripe_hash_table;
				362	h = table->table + bucket;
				363
				364	/* hold the lock for the bucket because we may be
				365	* removing it from the hash table
				366	*/
				367	spin_lock(&h->lock);
				368
				369	/*
				370	* hold the lock for the bio list because we need
				371	* to make sure the bio list is empty
				372	*/
				373	spin_lock(&rbio->bio_list_lock);
				374
				375	if (test_and_clear_bit(RBIO_CACHE_BIT, &rbio->flags)) {
				376	list_del_init(&rbio->stripe_cache);
				377	table->cache_size -= 1;
				378	freeit = 1;
				379
				380	/* if the bio list isn't empty, this rbio is
				381	* still involved in an IO. We take it out
				382	* of the cache list, and drop the ref that
				383	* was held for the list.
				384	*
				385	* If the bio_list was empty, we also remove
				386	* the rbio from the hash_table, and drop
				387	* the corresponding ref
				388	*/
				389	if (bio_list_empty(&rbio->bio_list)) {
				390	if (!list_empty(&rbio->hash_list)) {
				391	list_del_init(&rbio->hash_list);
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	392	refcount_dec(&rbio->refs);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	393	BUG_ON(!list_empty(&rbio->plug_list));
				394	}
				395	}
				396	}
				397
				398	spin_unlock(&rbio->bio_list_lock);
				399	spin_unlock(&h->lock);
				400
				401	if (freeit)
				402	__free_raid_bio(rbio);
				403	}
				404
				405	/*
				406	* prune a given rbio from the cache
				407	*/
				408	static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
				409	{
				410	struct btrfs_stripe_hash_table *table;
				411	unsigned long flags;
				412
				413	if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
				414	return;
				415
				416	table = rbio->fs_info->stripe_hash_table;
				417
				418	spin_lock_irqsave(&table->cache_lock, flags);
				419	__remove_rbio_from_cache(rbio);
				420	spin_unlock_irqrestore(&table->cache_lock, flags);
				421	}
				422
				423	/*
				424	* remove everything in the cache
				425	*/
Eric Sandeen	48a3b63	2013-04-25 20:41:01 +0000	[diff] [blame]	426	static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	427	{
				428	struct btrfs_stripe_hash_table *table;
				429	unsigned long flags;
				430	struct btrfs_raid_bio *rbio;
				431
				432	table = info->stripe_hash_table;
				433
				434	spin_lock_irqsave(&table->cache_lock, flags);
				435	while (!list_empty(&table->stripe_cache)) {
				436	rbio = list_entry(table->stripe_cache.next,
				437	struct btrfs_raid_bio,
				438	stripe_cache);
				439	__remove_rbio_from_cache(rbio);
				440	}
				441	spin_unlock_irqrestore(&table->cache_lock, flags);
				442	}
				443
				444	/*
				445	* remove all cached entries and free the hash table
				446	* used by unmount
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	447	*/
				448	void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
				449	{
				450	if (!info->stripe_hash_table)
				451	return;
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	452	btrfs_clear_rbio_cache(info);
Wang Shilong	f749303	2014-11-22 21:13:10 +0800	[diff] [blame]	453	kvfree(info->stripe_hash_table);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	454	info->stripe_hash_table = NULL;
				455	}
				456
				457	/*
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	458	* insert an rbio into the stripe cache. It
				459	* must have already been prepared by calling
				460	* cache_rbio_pages
				461	*
				462	* If this rbio was already cached, it gets
				463	* moved to the front of the lru.
				464	*
				465	* If the size of the rbio cache is too big, we
				466	* prune an item.
				467	*/
				468	static void cache_rbio(struct btrfs_raid_bio *rbio)
				469	{
				470	struct btrfs_stripe_hash_table *table;
				471	unsigned long flags;
				472
				473	if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags))
				474	return;
				475
				476	table = rbio->fs_info->stripe_hash_table;
				477
				478	spin_lock_irqsave(&table->cache_lock, flags);
				479	spin_lock(&rbio->bio_list_lock);
				480
				481	/* bump our ref if we were not in the list before */
				482	if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags))
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	483	refcount_inc(&rbio->refs);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	484
				485	if (!list_empty(&rbio->stripe_cache)){
				486	list_move(&rbio->stripe_cache, &table->stripe_cache);
				487	} else {
				488	list_add(&rbio->stripe_cache, &table->stripe_cache);
				489	table->cache_size += 1;
				490	}
				491
				492	spin_unlock(&rbio->bio_list_lock);
				493
				494	if (table->cache_size > RBIO_CACHE_SIZE) {
				495	struct btrfs_raid_bio *found;
				496
				497	found = list_entry(table->stripe_cache.prev,
				498	struct btrfs_raid_bio,
				499	stripe_cache);
				500
				501	if (found != rbio)
				502	__remove_rbio_from_cache(found);
				503	}
				504
				505	spin_unlock_irqrestore(&table->cache_lock, flags);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	506	}
				507
				508	/*
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	509	* helper function to run the xor_blocks api. It is only
				510	* able to do MAX_XOR_BLOCKS at a time, so we need to
				511	* loop through.
				512	*/
				513	static void run_xor(void **pages, int src_cnt, ssize_t len)
				514	{
				515	int src_off = 0;
				516	int xor_src_cnt = 0;
				517	void *dest = pages[src_cnt];
				518
				519	while(src_cnt > 0) {
				520	xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
				521	xor_blocks(xor_src_cnt, len, dest, pages + src_off);
				522
				523	src_cnt -= xor_src_cnt;
				524	src_off += xor_src_cnt;
				525	}
				526	}
				527
				528	/*
				529	* returns true if the bio list inside this rbio
				530	* covers an entire stripe (no rmw required).
				531	* Must be called with the bio list lock held, or
				532	* at a time when you know it is impossible to add
				533	* new bios into the list
				534	*/
				535	static int __rbio_is_full(struct btrfs_raid_bio *rbio)
				536	{
				537	unsigned long size = rbio->bio_list_bytes;
				538	int ret = 1;
				539
				540	if (size != rbio->nr_data * rbio->stripe_len)
				541	ret = 0;
				542
				543	BUG_ON(size > rbio->nr_data * rbio->stripe_len);
				544	return ret;
				545	}
				546
				547	static int rbio_is_full(struct btrfs_raid_bio *rbio)
				548	{
				549	unsigned long flags;
				550	int ret;
				551
				552	spin_lock_irqsave(&rbio->bio_list_lock, flags);
				553	ret = __rbio_is_full(rbio);
				554	spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
				555	return ret;
				556	}
				557
				558	/*
				559	* returns 1 if it is safe to merge two rbios together.
				560	* The merging is safe if the two rbios correspond to
				561	* the same stripe and if they are both going in the same
				562	* direction (read vs write), and if neither one is
				563	* locked for final IO
				564	*
				565	* The caller is responsible for locking such that
				566	* rmw_locked is safe to test
				567	*/
				568	static int rbio_can_merge(struct btrfs_raid_bio *last,
				569	struct btrfs_raid_bio *cur)
				570	{
				571	if (test_bit(RBIO_RMW_LOCKED_BIT, &last->flags) \|\|
				572	test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags))
				573	return 0;
				574
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	575	/*
				576	* we can't merge with cached rbios, since the
				577	* idea is that when we merge the destination
				578	* rbio is going to run our IO for us. We can
Nicholas D Steeves	0132761	2016-05-19 21:18:45 -0400	[diff] [blame]	579	* steal from cached rbios though, other functions
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	580	* handle that.
				581	*/
				582	if (test_bit(RBIO_CACHE_BIT, &last->flags) \|\|
				583	test_bit(RBIO_CACHE_BIT, &cur->flags))
				584	return 0;
				585
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	586	if (last->bbio->raid_map[0] !=
				587	cur->bbio->raid_map[0])
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	588	return 0;
				589
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	590	/* we can't merge with different operations */
				591	if (last->operation != cur->operation)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	592	return 0;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	593	/*
				594	* We've need read the full stripe from the drive.
				595	* check and repair the parity and write the new results.
				596	*
				597	* We're not allowed to add any new bios to the
				598	* bio list here, anyone else that wants to
				599	* change this stripe needs to do their own rmw.
				600	*/
				601	if (last->operation == BTRFS_RBIO_PARITY_SCRUB \|\|
				602	cur->operation == BTRFS_RBIO_PARITY_SCRUB)
				603	return 0;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	604
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	605	if (last->operation == BTRFS_RBIO_REBUILD_MISSING \|\|
				606	cur->operation == BTRFS_RBIO_REBUILD_MISSING)
				607	return 0;
				608
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	609	return 1;
				610	}
				611
Zhao Lei	b7178a5	2015-03-03 20:38:46 +0800	[diff] [blame]	612	static int rbio_stripe_page_index(struct btrfs_raid_bio *rbio, int stripe,
				613	int index)
				614	{
				615	return stripe * rbio->stripe_npages + index;
				616	}
				617
				618	/*
				619	* these are just the pages from the rbio array, not from anything
				620	* the FS sent down to us
				621	*/
				622	static struct page rbio_stripe_page(struct btrfs_raid_bio rbio, int stripe,
				623	int index)
				624	{
				625	return rbio->stripe_pages[rbio_stripe_page_index(rbio, stripe, index)];
				626	}
				627
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	628	/*
				629	* helper to index into the pstripe
				630	*/
				631	static struct page rbio_pstripe_page(struct btrfs_raid_bio rbio, int index)
				632	{
Zhao Lei	b7178a5	2015-03-03 20:38:46 +0800	[diff] [blame]	633	return rbio_stripe_page(rbio, rbio->nr_data, index);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	634	}
				635
				636	/*
				637	* helper to index into the qstripe, returns null
				638	* if there is no qstripe
				639	*/
				640	static struct page rbio_qstripe_page(struct btrfs_raid_bio rbio, int index)
				641	{
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	642	if (rbio->nr_data + 1 == rbio->real_stripes)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	643	return NULL;
Zhao Lei	b7178a5	2015-03-03 20:38:46 +0800	[diff] [blame]	644	return rbio_stripe_page(rbio, rbio->nr_data + 1, index);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	645	}
				646
				647	/*
				648	* The first stripe in the table for a logical address
				649	* has the lock. rbios are added in one of three ways:
				650	*
				651	* 1) Nobody has the stripe locked yet. The rbio is given
				652	* the lock and 0 is returned. The caller must start the IO
				653	* themselves.
				654	*
				655	* 2) Someone has the stripe locked, but we're able to merge
				656	* with the lock owner. The rbio is freed and the IO will
				657	* start automatically along with the existing rbio. 1 is returned.
				658	*
				659	* 3) Someone has the stripe locked, but we're not able to merge.
				660	* The rbio is added to the lock owner's plug list, or merged into
				661	* an rbio already on the plug list. When the lock owner unlocks,
				662	* the next rbio on the list is run and the IO is started automatically.
				663	* 1 is returned
				664	*
				665	* If we return 0, the caller still owns the rbio and must continue with
				666	* IO submission. If we return 1, the caller must assume the rbio has
				667	* already been freed.
				668	*/
				669	static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
				670	{
				671	int bucket = rbio_bucket(rbio);
				672	struct btrfs_stripe_hash *h = rbio->fs_info->stripe_hash_table->table + bucket;
				673	struct btrfs_raid_bio *cur;
				674	struct btrfs_raid_bio *pending;
				675	unsigned long flags;
				676	DEFINE_WAIT(wait);
				677	struct btrfs_raid_bio *freeit = NULL;
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	678	struct btrfs_raid_bio *cache_drop = NULL;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	679	int ret = 0;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	680
				681	spin_lock_irqsave(&h->lock, flags);
				682	list_for_each_entry(cur, &h->hash_list, hash_list) {
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	683	if (cur->bbio->raid_map[0] == rbio->bbio->raid_map[0]) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	684	spin_lock(&cur->bio_list_lock);
				685
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	686	/* can we steal this cached rbio's pages? */
				687	if (bio_list_empty(&cur->bio_list) &&
				688	list_empty(&cur->plug_list) &&
				689	test_bit(RBIO_CACHE_BIT, &cur->flags) &&
				690	!test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
				691	list_del_init(&cur->hash_list);
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	692	refcount_dec(&cur->refs);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	693
				694	steal_rbio(cur, rbio);
				695	cache_drop = cur;
				696	spin_unlock(&cur->bio_list_lock);
				697
				698	goto lockit;
				699	}
				700
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	701	/* can we merge into the lock owner? */
				702	if (rbio_can_merge(cur, rbio)) {
				703	merge_rbio(cur, rbio);
				704	spin_unlock(&cur->bio_list_lock);
				705	freeit = rbio;
				706	ret = 1;
				707	goto out;
				708	}
				709
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	710
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	711	/*
				712	* we couldn't merge with the running
				713	* rbio, see if we can merge with the
				714	* pending ones. We don't have to
				715	* check for rmw_locked because there
				716	* is no way they are inside finish_rmw
				717	* right now
				718	*/
				719	list_for_each_entry(pending, &cur->plug_list,
				720	plug_list) {
				721	if (rbio_can_merge(pending, rbio)) {
				722	merge_rbio(pending, rbio);
				723	spin_unlock(&cur->bio_list_lock);
				724	freeit = rbio;
				725	ret = 1;
				726	goto out;
				727	}
				728	}
				729
				730	/* no merging, put us on the tail of the plug list,
				731	* our rbio will be started with the currently
				732	* running rbio unlocks
				733	*/
				734	list_add_tail(&rbio->plug_list, &cur->plug_list);
				735	spin_unlock(&cur->bio_list_lock);
				736	ret = 1;
				737	goto out;
				738	}
				739	}
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	740	lockit:
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	741	refcount_inc(&rbio->refs);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	742	list_add(&rbio->hash_list, &h->hash_list);
				743	out:
				744	spin_unlock_irqrestore(&h->lock, flags);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	745	if (cache_drop)
				746	remove_rbio_from_cache(cache_drop);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	747	if (freeit)
				748	__free_raid_bio(freeit);
				749	return ret;
				750	}
				751
				752	/*
				753	* called as rmw or parity rebuild is completed. If the plug list has more
				754	* rbios waiting for this stripe, the next one on the list will be started
				755	*/
				756	static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
				757	{
				758	int bucket;
				759	struct btrfs_stripe_hash *h;
				760	unsigned long flags;
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	761	int keep_cache = 0;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	762
				763	bucket = rbio_bucket(rbio);
				764	h = rbio->fs_info->stripe_hash_table->table + bucket;
				765
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	766	if (list_empty(&rbio->plug_list))
				767	cache_rbio(rbio);
				768
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	769	spin_lock_irqsave(&h->lock, flags);
				770	spin_lock(&rbio->bio_list_lock);
				771
				772	if (!list_empty(&rbio->hash_list)) {
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	773	/*
				774	* if we're still cached and there is no other IO
				775	* to perform, just leave this rbio here for others
				776	* to steal from later
				777	*/
				778	if (list_empty(&rbio->plug_list) &&
				779	test_bit(RBIO_CACHE_BIT, &rbio->flags)) {
				780	keep_cache = 1;
				781	clear_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
				782	BUG_ON(!bio_list_empty(&rbio->bio_list));
				783	goto done;
				784	}
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	785
				786	list_del_init(&rbio->hash_list);
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	787	refcount_dec(&rbio->refs);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	788
				789	/*
				790	* we use the plug list to hold all the rbios
				791	* waiting for the chance to lock this stripe.
				792	* hand the lock over to one of them.
				793	*/
				794	if (!list_empty(&rbio->plug_list)) {
				795	struct btrfs_raid_bio *next;
				796	struct list_head *head = rbio->plug_list.next;
				797
				798	next = list_entry(head, struct btrfs_raid_bio,
				799	plug_list);
				800
				801	list_del_init(&rbio->plug_list);
				802
				803	list_add(&next->hash_list, &h->hash_list);
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	804	refcount_inc(&next->refs);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	805	spin_unlock(&rbio->bio_list_lock);
				806	spin_unlock_irqrestore(&h->lock, flags);
				807
Miao Xie	1b94b55	2014-11-06 16:14:21 +0800	[diff] [blame]	808	if (next->operation == BTRFS_RBIO_READ_REBUILD)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	809	async_read_rebuild(next);
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	810	else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
				811	steal_rbio(rbio, next);
				812	async_read_rebuild(next);
				813	} else if (next->operation == BTRFS_RBIO_WRITE) {
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	814	steal_rbio(rbio, next);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	815	async_rmw_stripe(next);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	816	} else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
				817	steal_rbio(rbio, next);
				818	async_scrub_parity(next);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	819	}
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	820
				821	goto done_nolock;
David Sterba	33a9eca	2015-10-10 18:35:10 +0200	[diff] [blame]	822	/*
				823	* The barrier for this waitqueue_active is not needed,
				824	* we're protected by h->lock and can't miss a wakeup.
				825	*/
				826	} else if (waitqueue_active(&h->wait)) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	827	spin_unlock(&rbio->bio_list_lock);
				828	spin_unlock_irqrestore(&h->lock, flags);
				829	wake_up(&h->wait);
				830	goto done_nolock;
				831	}
				832	}
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	833	done:
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	834	spin_unlock(&rbio->bio_list_lock);
				835	spin_unlock_irqrestore(&h->lock, flags);
				836
				837	done_nolock:
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	838	if (!keep_cache)
				839	remove_rbio_from_cache(rbio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	840	}
				841
				842	static void __free_raid_bio(struct btrfs_raid_bio *rbio)
				843	{
				844	int i;
				845
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	846	if (!refcount_dec_and_test(&rbio->refs))
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	847	return;
				848
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	849	WARN_ON(!list_empty(&rbio->stripe_cache));
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	850	WARN_ON(!list_empty(&rbio->hash_list));
				851	WARN_ON(!bio_list_empty(&rbio->bio_list));
				852
				853	for (i = 0; i < rbio->nr_pages; i++) {
				854	if (rbio->stripe_pages[i]) {
				855	__free_page(rbio->stripe_pages[i]);
				856	rbio->stripe_pages[i] = NULL;
				857	}
				858	}
Miao Xie	af8e2d1	2014-10-23 14:42:50 +0800	[diff] [blame]	859
Zhao Lei	6e9606d	2015-01-20 15:11:34 +0800	[diff] [blame]	860	btrfs_put_bbio(rbio->bbio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	861	kfree(rbio);
				862	}
				863
				864	static void free_raid_bio(struct btrfs_raid_bio *rbio)
				865	{
				866	unlock_stripe(rbio);
				867	__free_raid_bio(rbio);
				868	}
				869
				870	/*
				871	* this frees the rbio and runs through all the bios in the
				872	* bio_list and calls end_io on them
				873	*/
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	874	static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	875	{
				876	struct bio *cur = bio_list_get(&rbio->bio_list);
				877	struct bio *next;
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	878
				879	if (rbio->generic_bio_cnt)
				880	btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
				881
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	882	free_raid_bio(rbio);
				883
				884	while (cur) {
				885	next = cur->bi_next;
				886	cur->bi_next = NULL;
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	887	cur->bi_error = err;
				888	bio_endio(cur);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	889	cur = next;
				890	}
				891	}
				892
				893	/*
				894	* end io function used by finish_rmw. When we finally
				895	* get here, we've written a full stripe
				896	*/
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	897	static void raid_write_end_io(struct bio *bio)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	898	{
				899	struct btrfs_raid_bio *rbio = bio->bi_private;
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	900	int err = bio->bi_error;
Zhao Lei	a6111d1	2016-01-12 17:52:13 +0800	[diff] [blame]	901	int max_errors;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	902
				903	if (err)
				904	fail_bio_stripe(rbio, bio);
				905
				906	bio_put(bio);
				907
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	908	if (!atomic_dec_and_test(&rbio->stripes_pending))
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	909	return;
				910
				911	err = 0;
				912
				913	/* OK, we have read all the stripes we need to. */
Zhao Lei	a6111d1	2016-01-12 17:52:13 +0800	[diff] [blame]	914	max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
				915	0 : rbio->bbio->max_errors;
				916	if (atomic_read(&rbio->error) > max_errors)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	917	err = -EIO;
				918
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	919	rbio_orig_end_io(rbio, err);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	920	}
				921
				922	/*
				923	* the read/modify/write code wants to use the original bio for
				924	* any pages it included, and then use the rbio for everything
				925	* else. This function decides if a given index (stripe number)
				926	* and page number in that stripe fall inside the original bio
				927	* or the rbio.
				928	*
				929	* if you set bio_list_only, you'll get a NULL back for any ranges
				930	* that are outside the bio_list
				931	*
				932	* This doesn't take any refs on anything, you get a bare page pointer
				933	* and the caller must bump refs as required.
				934	*
				935	* You must call index_rbio_pages once before you can trust
				936	* the answers from this function.
				937	*/
				938	static struct page page_in_rbio(struct btrfs_raid_bio rbio,
				939	int index, int pagenr, int bio_list_only)
				940	{
				941	int chunk_page;
				942	struct page *p = NULL;
				943
				944	chunk_page = index * (rbio->stripe_len >> PAGE_SHIFT) + pagenr;
				945
				946	spin_lock_irq(&rbio->bio_list_lock);
				947	p = rbio->bio_pages[chunk_page];
				948	spin_unlock_irq(&rbio->bio_list_lock);
				949
				950	if (p \|\| bio_list_only)
				951	return p;
				952
				953	return rbio->stripe_pages[chunk_page];
				954	}
				955
				956	/*
				957	* number of pages we need for the entire stripe across all the
				958	* drives
				959	*/
				960	static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
				961	{
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	962	return DIV_ROUND_UP(stripe_len, PAGE_SIZE) * nr_stripes;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	963	}
				964
				965	/*
				966	* allocation and initial setup for the btrfs_raid_bio. Not
				967	* this does not allocate any pages for rbio->pages.
				968	*/
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	969	static struct btrfs_raid_bio alloc_rbio(struct btrfs_fs_info fs_info,
				970	struct btrfs_bio *bbio,
				971	u64 stripe_len)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	972	{
				973	struct btrfs_raid_bio *rbio;
				974	int nr_data = 0;
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	975	int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
				976	int num_pages = rbio_nr_pages(stripe_len, real_stripes);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	977	int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	978	void *p;
				979
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	980	rbio = kzalloc(sizeof(rbio) + num_pages sizeof(struct page ) 2 +
Zhao Lei	bfca9a6	2014-12-08 19:55:57 +0800	[diff] [blame]	981	DIV_ROUND_UP(stripe_npages, BITS_PER_LONG) *
				982	sizeof(long), GFP_NOFS);
Miao Xie	af8e2d1	2014-10-23 14:42:50 +0800	[diff] [blame]	983	if (!rbio)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	984	return ERR_PTR(-ENOMEM);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	985
				986	bio_list_init(&rbio->bio_list);
				987	INIT_LIST_HEAD(&rbio->plug_list);
				988	spin_lock_init(&rbio->bio_list_lock);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	989	INIT_LIST_HEAD(&rbio->stripe_cache);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	990	INIT_LIST_HEAD(&rbio->hash_list);
				991	rbio->bbio = bbio;
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	992	rbio->fs_info = fs_info;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	993	rbio->stripe_len = stripe_len;
				994	rbio->nr_pages = num_pages;
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	995	rbio->real_stripes = real_stripes;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	996	rbio->stripe_npages = stripe_npages;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	997	rbio->faila = -1;
				998	rbio->failb = -1;
Elena Reshetova	dec9557	2017-03-03 10:55:26 +0200	[diff] [blame]	999	refcount_set(&rbio->refs, 1);
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1000	atomic_set(&rbio->error, 0);
				1001	atomic_set(&rbio->stripes_pending, 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1002
				1003	/*
				1004	* the stripe_pages and bio_pages array point to the extra
				1005	* memory we allocated past the end of the rbio
				1006	*/
				1007	p = rbio + 1;
				1008	rbio->stripe_pages = p;
				1009	rbio->bio_pages = p + sizeof(struct page ) num_pages;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	1010	rbio->dbitmap = p + sizeof(struct page ) num_pages * 2;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1011
Zhao Lei	10f1190	2015-01-20 15:11:43 +0800	[diff] [blame]	1012	if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
				1013	nr_data = real_stripes - 1;
				1014	else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1015	nr_data = real_stripes - 2;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1016	else
Zhao Lei	10f1190	2015-01-20 15:11:43 +0800	[diff] [blame]	1017	BUG();
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1018
				1019	rbio->nr_data = nr_data;
				1020	return rbio;
				1021	}
				1022
				1023	/* allocate pages for all the stripes in the bio, including parity */
				1024	static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
				1025	{
				1026	int i;
				1027	struct page *page;
				1028
				1029	for (i = 0; i < rbio->nr_pages; i++) {
				1030	if (rbio->stripe_pages[i])
				1031	continue;
				1032	page = alloc_page(GFP_NOFS \| __GFP_HIGHMEM);
				1033	if (!page)
				1034	return -ENOMEM;
				1035	rbio->stripe_pages[i] = page;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1036	}
				1037	return 0;
				1038	}
				1039
Zhao Lei	b7178a5	2015-03-03 20:38:46 +0800	[diff] [blame]	1040	/* only allocate pages for p/q stripes */
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1041	static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
				1042	{
				1043	int i;
				1044	struct page *page;
				1045
Zhao Lei	b7178a5	2015-03-03 20:38:46 +0800	[diff] [blame]	1046	i = rbio_stripe_page_index(rbio, rbio->nr_data, 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1047
				1048	for (; i < rbio->nr_pages; i++) {
				1049	if (rbio->stripe_pages[i])
				1050	continue;
				1051	page = alloc_page(GFP_NOFS \| __GFP_HIGHMEM);
				1052	if (!page)
				1053	return -ENOMEM;
				1054	rbio->stripe_pages[i] = page;
				1055	}
				1056	return 0;
				1057	}
				1058
				1059	/*
				1060	* add a single page from a specific stripe into our list of bios for IO
				1061	* this will try to merge into existing bios if possible, and returns
				1062	* zero if all went well.
				1063	*/
Eric Sandeen	48a3b63	2013-04-25 20:41:01 +0000	[diff] [blame]	1064	static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
				1065	struct bio_list *bio_list,
				1066	struct page *page,
				1067	int stripe_nr,
				1068	unsigned long page_index,
				1069	unsigned long bio_max_len)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1070	{
				1071	struct bio *last = bio_list->tail;
				1072	u64 last_end = 0;
				1073	int ret;
				1074	struct bio *bio;
				1075	struct btrfs_bio_stripe *stripe;
				1076	u64 disk_start;
				1077
				1078	stripe = &rbio->bbio->stripes[stripe_nr];
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1079	disk_start = stripe->physical + (page_index << PAGE_SHIFT);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1080
				1081	/* if the device is missing, just fail this stripe */
				1082	if (!stripe->dev->bdev)
				1083	return fail_rbio_index(rbio, stripe_nr);
				1084
				1085	/* see if we can add this page onto our existing bio */
				1086	if (last) {
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1087	last_end = (u64)last->bi_iter.bi_sector << 9;
				1088	last_end += last->bi_iter.bi_size;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1089
				1090	/*
				1091	* we can't merge these if they are from different
				1092	* devices or if they are not contiguous
				1093	*/
				1094	if (last_end == disk_start && stripe->dev->bdev &&
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1095	!last->bi_error &&
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1096	last->bi_bdev == stripe->dev->bdev) {
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1097	ret = bio_add_page(last, page, PAGE_SIZE, 0);
				1098	if (ret == PAGE_SIZE)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1099	return 0;
				1100	}
				1101	}
				1102
				1103	/* put a new bio on the list */
Chris Mason	9be3395	2013-05-17 18:30:14 -0400	[diff] [blame]	1104	bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1105	if (!bio)
				1106	return -ENOMEM;
				1107
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1108	bio->bi_iter.bi_size = 0;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1109	bio->bi_bdev = stripe->dev->bdev;
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1110	bio->bi_iter.bi_sector = disk_start >> 9;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1111
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1112	bio_add_page(bio, page, PAGE_SIZE, 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1113	bio_list_add(bio_list, bio);
				1114	return 0;
				1115	}
				1116
				1117	/*
				1118	* while we're doing the read/modify/write cycle, we could
				1119	* have errors in reading pages off the disk. This checks
				1120	* for errors and if we're not able to read the page it'll
				1121	* trigger parity reconstruction. The rmw will be finished
				1122	* after we've reconstructed the failed stripes
				1123	*/
				1124	static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
				1125	{
				1126	if (rbio->faila >= 0 \|\| rbio->failb >= 0) {
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1127	BUG_ON(rbio->faila == rbio->real_stripes - 1);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1128	__raid56_parity_recover(rbio);
				1129	} else {
				1130	finish_rmw(rbio);
				1131	}
				1132	}
				1133
				1134	/*
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1135	* helper function to walk our bio list and populate the bio_pages array with
				1136	* the result. This seems expensive, but it is faster than constantly
				1137	* searching through the bio list as we setup the IO in finish_rmw or stripe
				1138	* reconstruction.
				1139	*
				1140	* This must be called before you trust the answers from page_in_rbio
				1141	*/
				1142	static void index_rbio_pages(struct btrfs_raid_bio *rbio)
				1143	{
				1144	struct bio *bio;
Christoph Hellwig	80ace3e	2016-11-25 09:07:47 +0100	[diff] [blame]	1145	struct bio_vec *bvec;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1146	u64 start;
				1147	unsigned long stripe_offset;
				1148	unsigned long page_index;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1149	int i;
				1150
				1151	spin_lock_irq(&rbio->bio_list_lock);
				1152	bio_list_for_each(bio, &rbio->bio_list) {
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1153	start = (u64)bio->bi_iter.bi_sector << 9;
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	1154	stripe_offset = start - rbio->bbio->raid_map[0];
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1155	page_index = stripe_offset >> PAGE_SHIFT;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1156
Christoph Hellwig	80ace3e	2016-11-25 09:07:47 +0100	[diff] [blame]	1157	bio_for_each_segment_all(bvec, bio, i)
				1158	rbio->bio_pages[page_index + i] = bvec->bv_page;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1159	}
				1160	spin_unlock_irq(&rbio->bio_list_lock);
				1161	}
				1162
				1163	/*
				1164	* this is called from one of two situations. We either
				1165	* have a full stripe from the higher layers, or we've read all
				1166	* the missing bits off disk.
				1167	*
				1168	* This will calculate the parity and then send down any
				1169	* changed blocks.
				1170	*/
				1171	static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
				1172	{
				1173	struct btrfs_bio *bbio = rbio->bbio;
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1174	void *pointers[rbio->real_stripes];
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1175	int nr_data = rbio->nr_data;
				1176	int stripe;
				1177	int pagenr;
				1178	int p_stripe = -1;
				1179	int q_stripe = -1;
				1180	struct bio_list bio_list;
				1181	struct bio *bio;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1182	int ret;
				1183
				1184	bio_list_init(&bio_list);
				1185
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1186	if (rbio->real_stripes - rbio->nr_data == 1) {
				1187	p_stripe = rbio->real_stripes - 1;
				1188	} else if (rbio->real_stripes - rbio->nr_data == 2) {
				1189	p_stripe = rbio->real_stripes - 2;
				1190	q_stripe = rbio->real_stripes - 1;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1191	} else {
				1192	BUG();
				1193	}
				1194
				1195	/* at this point we either have a full stripe,
				1196	* or we've read the full stripe from the drive.
				1197	* recalculate the parity and write the new results.
				1198	*
				1199	* We're not allowed to add any new bios to the
				1200	* bio list here, anyone else that wants to
				1201	* change this stripe needs to do their own rmw.
				1202	*/
				1203	spin_lock_irq(&rbio->bio_list_lock);
				1204	set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
				1205	spin_unlock_irq(&rbio->bio_list_lock);
				1206
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1207	atomic_set(&rbio->error, 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1208
				1209	/*
				1210	* now that we've set rmw_locked, run through the
				1211	* bio list one last time and map the page pointers
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	1212	*
				1213	* We don't cache full rbios because we're assuming
				1214	* the higher layers are unlikely to use this area of
				1215	* the disk again soon. If they do use it again,
				1216	* hopefully they will send another full bio.
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1217	*/
				1218	index_rbio_pages(rbio);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	1219	if (!rbio_is_full(rbio))
				1220	cache_rbio_pages(rbio);
				1221	else
				1222	clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1223
Zhao Lei	915e229	2015-03-03 20:42:48 +0800	[diff] [blame]	1224	for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1225	struct page *p;
				1226	/* first collect one page from each data stripe */
				1227	for (stripe = 0; stripe < nr_data; stripe++) {
				1228	p = page_in_rbio(rbio, stripe, pagenr, 0);
				1229	pointers[stripe] = kmap(p);
				1230	}
				1231
				1232	/* then add the parity stripe */
				1233	p = rbio_pstripe_page(rbio, pagenr);
				1234	SetPageUptodate(p);
				1235	pointers[stripe++] = kmap(p);
				1236
				1237	if (q_stripe != -1) {
				1238
				1239	/*
				1240	* raid6, add the qstripe and call the
				1241	* library function to fill in our p/q
				1242	*/
				1243	p = rbio_qstripe_page(rbio, pagenr);
				1244	SetPageUptodate(p);
				1245	pointers[stripe++] = kmap(p);
				1246
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1247	raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1248	pointers);
				1249	} else {
				1250	/* raid5 */
				1251	memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1252	run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1253	}
				1254
				1255
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1256	for (stripe = 0; stripe < rbio->real_stripes; stripe++)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1257	kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
				1258	}
				1259
				1260	/*
				1261	* time to start writing. Make bios for everything from the
				1262	* higher layers (the bio_list in our rbio) and our p/q. Ignore
				1263	* everything else.
				1264	*/
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1265	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
Zhao Lei	915e229	2015-03-03 20:42:48 +0800	[diff] [blame]	1266	for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1267	struct page *page;
				1268	if (stripe < rbio->nr_data) {
				1269	page = page_in_rbio(rbio, stripe, pagenr, 1);
				1270	if (!page)
				1271	continue;
				1272	} else {
				1273	page = rbio_stripe_page(rbio, stripe, pagenr);
				1274	}
				1275
				1276	ret = rbio_add_io_page(rbio, &bio_list,
				1277	page, stripe, pagenr, rbio->stripe_len);
				1278	if (ret)
				1279	goto cleanup;
				1280	}
				1281	}
				1282
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1283	if (likely(!bbio->num_tgtdevs))
				1284	goto write_data;
				1285
				1286	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
				1287	if (!bbio->tgtdev_map[stripe])
				1288	continue;
				1289
Zhao Lei	915e229	2015-03-03 20:42:48 +0800	[diff] [blame]	1290	for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1291	struct page *page;
				1292	if (stripe < rbio->nr_data) {
				1293	page = page_in_rbio(rbio, stripe, pagenr, 1);
				1294	if (!page)
				1295	continue;
				1296	} else {
				1297	page = rbio_stripe_page(rbio, stripe, pagenr);
				1298	}
				1299
				1300	ret = rbio_add_io_page(rbio, &bio_list, page,
				1301	rbio->bbio->tgtdev_map[stripe],
				1302	pagenr, rbio->stripe_len);
				1303	if (ret)
				1304	goto cleanup;
				1305	}
				1306	}
				1307
				1308	write_data:
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1309	atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
				1310	BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1311
				1312	while (1) {
				1313	bio = bio_list_pop(&bio_list);
				1314	if (!bio)
				1315	break;
				1316
				1317	bio->bi_private = rbio;
				1318	bio->bi_end_io = raid_write_end_io;
Mike Christie	37226b2	2016-06-05 14:31:52 -0500	[diff] [blame]	1319	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
Mike Christie	4e49ea4	2016-06-05 14:31:41 -0500	[diff] [blame]	1320
				1321	submit_bio(bio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1322	}
				1323	return;
				1324
				1325	cleanup:
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1326	rbio_orig_end_io(rbio, -EIO);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1327	}
				1328
				1329	/*
				1330	* helper to find the stripe number for a given bio. Used to figure out which
				1331	* stripe has failed. This expects the bio to correspond to a physical disk,
				1332	* so it looks up based on physical sector numbers.
				1333	*/
				1334	static int find_bio_stripe(struct btrfs_raid_bio *rbio,
				1335	struct bio *bio)
				1336	{
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1337	u64 physical = bio->bi_iter.bi_sector;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1338	u64 stripe_start;
				1339	int i;
				1340	struct btrfs_bio_stripe *stripe;
				1341
				1342	physical <<= 9;
				1343
				1344	for (i = 0; i < rbio->bbio->num_stripes; i++) {
				1345	stripe = &rbio->bbio->stripes[i];
				1346	stripe_start = stripe->physical;
				1347	if (physical >= stripe_start &&
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1348	physical < stripe_start + rbio->stripe_len &&
				1349	bio->bi_bdev == stripe->dev->bdev) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1350	return i;
				1351	}
				1352	}
				1353	return -1;
				1354	}
				1355
				1356	/*
				1357	* helper to find the stripe number for a given
				1358	* bio (before mapping). Used to figure out which stripe has
				1359	* failed. This looks up based on logical block numbers.
				1360	*/
				1361	static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
				1362	struct bio *bio)
				1363	{
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1364	u64 logical = bio->bi_iter.bi_sector;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1365	u64 stripe_start;
				1366	int i;
				1367
				1368	logical <<= 9;
				1369
				1370	for (i = 0; i < rbio->nr_data; i++) {
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	1371	stripe_start = rbio->bbio->raid_map[i];
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1372	if (logical >= stripe_start &&
				1373	logical < stripe_start + rbio->stripe_len) {
				1374	return i;
				1375	}
				1376	}
				1377	return -1;
				1378	}
				1379
				1380	/*
				1381	* returns -EIO if we had too many failures
				1382	*/
				1383	static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed)
				1384	{
				1385	unsigned long flags;
				1386	int ret = 0;
				1387
				1388	spin_lock_irqsave(&rbio->bio_list_lock, flags);
				1389
				1390	/* we already know this stripe is bad, move on */
				1391	if (rbio->faila == failed \|\| rbio->failb == failed)
				1392	goto out;
				1393
				1394	if (rbio->faila == -1) {
				1395	/* first failure on this rbio */
				1396	rbio->faila = failed;
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1397	atomic_inc(&rbio->error);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1398	} else if (rbio->failb == -1) {
				1399	/* second failure on this rbio */
				1400	rbio->failb = failed;
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1401	atomic_inc(&rbio->error);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1402	} else {
				1403	ret = -EIO;
				1404	}
				1405	out:
				1406	spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
				1407
				1408	return ret;
				1409	}
				1410
				1411	/*
				1412	* helper to fail a stripe based on a physical disk
				1413	* bio.
				1414	*/
				1415	static int fail_bio_stripe(struct btrfs_raid_bio *rbio,
				1416	struct bio *bio)
				1417	{
				1418	int failed = find_bio_stripe(rbio, bio);
				1419
				1420	if (failed < 0)
				1421	return -EIO;
				1422
				1423	return fail_rbio_index(rbio, failed);
				1424	}
				1425
				1426	/*
				1427	* this sets each page in the bio uptodate. It should only be used on private
				1428	* rbio pages, nothing that comes in from the higher layers
				1429	*/
				1430	static void set_bio_pages_uptodate(struct bio *bio)
				1431	{
Christoph Hellwig	80ace3e	2016-11-25 09:07:47 +0100	[diff] [blame]	1432	struct bio_vec *bvec;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1433	int i;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1434
Christoph Hellwig	80ace3e	2016-11-25 09:07:47 +0100	[diff] [blame]	1435	bio_for_each_segment_all(bvec, bio, i)
				1436	SetPageUptodate(bvec->bv_page);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1437	}
				1438
				1439	/*
				1440	* end io for the read phase of the rmw cycle. All the bios here are physical
				1441	* stripe bios we've read from the disk so we can recalculate the parity of the
				1442	* stripe.
				1443	*
				1444	* This will usually kick off finish_rmw once all the bios are read in, but it
				1445	* may trigger parity reconstruction if we had any errors along the way
				1446	*/
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1447	static void raid_rmw_end_io(struct bio *bio)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1448	{
				1449	struct btrfs_raid_bio *rbio = bio->bi_private;
				1450
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1451	if (bio->bi_error)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1452	fail_bio_stripe(rbio, bio);
				1453	else
				1454	set_bio_pages_uptodate(bio);
				1455
				1456	bio_put(bio);
				1457
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1458	if (!atomic_dec_and_test(&rbio->stripes_pending))
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1459	return;
				1460
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1461	if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1462	goto cleanup;
				1463
				1464	/*
				1465	* this will normally call finish_rmw to start our write
				1466	* but if there are any failed stripes we'll reconstruct
				1467	* from parity first
				1468	*/
				1469	validate_rbio_for_rmw(rbio);
				1470	return;
				1471
				1472	cleanup:
				1473
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1474	rbio_orig_end_io(rbio, -EIO);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1475	}
				1476
				1477	static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
				1478	{
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	1479	btrfs_init_work(&rbio->work, btrfs_rmw_helper, rmw_work, NULL, NULL);
				1480	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1481	}
				1482
				1483	static void async_read_rebuild(struct btrfs_raid_bio *rbio)
				1484	{
Liu Bo	9e0af23	2014-08-15 23:36:53 +0800	[diff] [blame]	1485	btrfs_init_work(&rbio->work, btrfs_rmw_helper,
				1486	read_rebuild_work, NULL, NULL);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1487
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	1488	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1489	}
				1490
				1491	/*
				1492	* the stripe must be locked by the caller. It will
				1493	* unlock after all the writes are done
				1494	*/
				1495	static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
				1496	{
				1497	int bios_to_read = 0;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1498	struct bio_list bio_list;
				1499	int ret;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1500	int pagenr;
				1501	int stripe;
				1502	struct bio *bio;
				1503
				1504	bio_list_init(&bio_list);
				1505
				1506	ret = alloc_rbio_pages(rbio);
				1507	if (ret)
				1508	goto cleanup;
				1509
				1510	index_rbio_pages(rbio);
				1511
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1512	atomic_set(&rbio->error, 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1513	/*
				1514	* build a list of bios to read all the missing parts of this
				1515	* stripe
				1516	*/
				1517	for (stripe = 0; stripe < rbio->nr_data; stripe++) {
Zhao Lei	915e229	2015-03-03 20:42:48 +0800	[diff] [blame]	1518	for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1519	struct page *page;
				1520	/*
				1521	* we want to find all the pages missing from
				1522	* the rbio and read them from the disk. If
				1523	* page_in_rbio finds a page in the bio list
				1524	* we don't need to read it off the stripe.
				1525	*/
				1526	page = page_in_rbio(rbio, stripe, pagenr, 1);
				1527	if (page)
				1528	continue;
				1529
				1530	page = rbio_stripe_page(rbio, stripe, pagenr);
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	1531	/*
				1532	* the bio cache may have handed us an uptodate
				1533	* page. If so, be happy and use it
				1534	*/
				1535	if (PageUptodate(page))
				1536	continue;
				1537
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1538	ret = rbio_add_io_page(rbio, &bio_list, page,
				1539	stripe, pagenr, rbio->stripe_len);
				1540	if (ret)
				1541	goto cleanup;
				1542	}
				1543	}
				1544
				1545	bios_to_read = bio_list_size(&bio_list);
				1546	if (!bios_to_read) {
				1547	/*
				1548	* this can happen if others have merged with
				1549	* us, it means there is nothing left to read.
				1550	* But if there are missing devices it may not be
				1551	* safe to do the full stripe write yet.
				1552	*/
				1553	goto finish;
				1554	}
				1555
				1556	/*
				1557	* the bbio may be freed once we submit the last bio. Make sure
				1558	* not to touch it after that
				1559	*/
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	1560	atomic_set(&rbio->stripes_pending, bios_to_read);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1561	while (1) {
				1562	bio = bio_list_pop(&bio_list);
				1563	if (!bio)
				1564	break;
				1565
				1566	bio->bi_private = rbio;
				1567	bio->bi_end_io = raid_rmw_end_io;
Mike Christie	37226b2	2016-06-05 14:31:52 -0500	[diff] [blame]	1568	bio_set_op_attrs(bio, REQ_OP_READ, 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1569
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	1570	btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1571
Mike Christie	4e49ea4	2016-06-05 14:31:41 -0500	[diff] [blame]	1572	submit_bio(bio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1573	}
				1574	/* the actual write will happen once the reads are done */
				1575	return 0;
				1576
				1577	cleanup:
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1578	rbio_orig_end_io(rbio, -EIO);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1579	return -EIO;
				1580
				1581	finish:
				1582	validate_rbio_for_rmw(rbio);
				1583	return 0;
				1584	}
				1585
				1586	/*
				1587	* if the upper layers pass in a full stripe, we thank them by only allocating
				1588	* enough pages to hold the parity, and sending it all down quickly.
				1589	*/
				1590	static int full_stripe_write(struct btrfs_raid_bio *rbio)
				1591	{
				1592	int ret;
				1593
				1594	ret = alloc_rbio_parity_pages(rbio);
Miao Xie	3cd846d	2013-07-22 16:36:57 +0800	[diff] [blame]	1595	if (ret) {
				1596	__free_raid_bio(rbio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1597	return ret;
Miao Xie	3cd846d	2013-07-22 16:36:57 +0800	[diff] [blame]	1598	}
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1599
				1600	ret = lock_stripe_add(rbio);
				1601	if (ret == 0)
				1602	finish_rmw(rbio);
				1603	return 0;
				1604	}
				1605
				1606	/*
				1607	* partial stripe writes get handed over to async helpers.
				1608	* We're really hoping to merge a few more writes into this
				1609	* rbio before calculating new parity
				1610	*/
				1611	static int partial_stripe_write(struct btrfs_raid_bio *rbio)
				1612	{
				1613	int ret;
				1614
				1615	ret = lock_stripe_add(rbio);
				1616	if (ret == 0)
				1617	async_rmw_stripe(rbio);
				1618	return 0;
				1619	}
				1620
				1621	/*
				1622	* sometimes while we were reading from the drive to
				1623	* recalculate parity, enough new bios come into create
				1624	* a full stripe. So we do a check here to see if we can
				1625	* go directly to finish_rmw
				1626	*/
				1627	static int __raid56_parity_write(struct btrfs_raid_bio *rbio)
				1628	{
				1629	/* head off into rmw land if we don't have a full stripe */
				1630	if (!rbio_is_full(rbio))
				1631	return partial_stripe_write(rbio);
				1632	return full_stripe_write(rbio);
				1633	}
				1634
				1635	/*
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1636	* We use plugging call backs to collect full stripes.
				1637	* Any time we get a partial stripe write while plugged
				1638	* we collect it into a list. When the unplug comes down,
				1639	* we sort the list by logical block number and merge
				1640	* everything we can into the same rbios
				1641	*/
				1642	struct btrfs_plug_cb {
				1643	struct blk_plug_cb cb;
				1644	struct btrfs_fs_info *info;
				1645	struct list_head rbio_list;
				1646	struct btrfs_work work;
				1647	};
				1648
				1649	/*
				1650	* rbios on the plug list are sorted for easier merging.
				1651	*/
				1652	static int plug_cmp(void priv, struct list_head a, struct list_head *b)
				1653	{
				1654	struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio,
				1655	plug_list);
				1656	struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
				1657	plug_list);
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1658	u64 a_sector = ra->bio_list.head->bi_iter.bi_sector;
				1659	u64 b_sector = rb->bio_list.head->bi_iter.bi_sector;
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1660
				1661	if (a_sector < b_sector)
				1662	return -1;
				1663	if (a_sector > b_sector)
				1664	return 1;
				1665	return 0;
				1666	}
				1667
				1668	static void run_plug(struct btrfs_plug_cb *plug)
				1669	{
				1670	struct btrfs_raid_bio *cur;
				1671	struct btrfs_raid_bio *last = NULL;
				1672
				1673	/*
				1674	* sort our plug list then try to merge
				1675	* everything we can in hopes of creating full
				1676	* stripes.
				1677	*/
				1678	list_sort(NULL, &plug->rbio_list, plug_cmp);
				1679	while (!list_empty(&plug->rbio_list)) {
				1680	cur = list_entry(plug->rbio_list.next,
				1681	struct btrfs_raid_bio, plug_list);
				1682	list_del_init(&cur->plug_list);
				1683
				1684	if (rbio_is_full(cur)) {
				1685	/* we have a full stripe, send it down */
				1686	full_stripe_write(cur);
				1687	continue;
				1688	}
				1689	if (last) {
				1690	if (rbio_can_merge(last, cur)) {
				1691	merge_rbio(last, cur);
				1692	__free_raid_bio(cur);
				1693	continue;
				1694
				1695	}
				1696	__raid56_parity_write(last);
				1697	}
				1698	last = cur;
				1699	}
				1700	if (last) {
				1701	__raid56_parity_write(last);
				1702	}
				1703	kfree(plug);
				1704	}
				1705
				1706	/*
				1707	* if the unplug comes from schedule, we have to push the
				1708	* work off to a helper thread
				1709	*/
				1710	static void unplug_work(struct btrfs_work *work)
				1711	{
				1712	struct btrfs_plug_cb *plug;
				1713	plug = container_of(work, struct btrfs_plug_cb, work);
				1714	run_plug(plug);
				1715	}
				1716
				1717	static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
				1718	{
				1719	struct btrfs_plug_cb *plug;
				1720	plug = container_of(cb, struct btrfs_plug_cb, cb);
				1721
				1722	if (from_schedule) {
Liu Bo	9e0af23	2014-08-15 23:36:53 +0800	[diff] [blame]	1723	btrfs_init_work(&plug->work, btrfs_rmw_helper,
				1724	unplug_work, NULL, NULL);
Qu Wenruo	d05a33a	2014-02-28 10:46:11 +0800	[diff] [blame]	1725	btrfs_queue_work(plug->info->rmw_workers,
				1726	&plug->work);
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1727	return;
				1728	}
				1729	run_plug(plug);
				1730	}
				1731
				1732	/*
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1733	* our main entry point for writes from the rest of the FS.
				1734	*/
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	1735	int raid56_parity_write(struct btrfs_fs_info fs_info, struct bio bio,
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	1736	struct btrfs_bio *bbio, u64 stripe_len)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1737	{
				1738	struct btrfs_raid_bio *rbio;
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1739	struct btrfs_plug_cb *plug = NULL;
				1740	struct blk_plug_cb *cb;
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	1741	int ret;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1742
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	1743	rbio = alloc_rbio(fs_info, bbio, stripe_len);
Miao Xie	af8e2d1	2014-10-23 14:42:50 +0800	[diff] [blame]	1744	if (IS_ERR(rbio)) {
Zhao Lei	6e9606d	2015-01-20 15:11:34 +0800	[diff] [blame]	1745	btrfs_put_bbio(bbio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1746	return PTR_ERR(rbio);
Miao Xie	af8e2d1	2014-10-23 14:42:50 +0800	[diff] [blame]	1747	}
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1748	bio_list_add(&rbio->bio_list, bio);
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1749	rbio->bio_list_bytes = bio->bi_iter.bi_size;
Miao Xie	1b94b55	2014-11-06 16:14:21 +0800	[diff] [blame]	1750	rbio->operation = BTRFS_RBIO_WRITE;
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1751
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	1752	btrfs_bio_counter_inc_noblocked(fs_info);
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	1753	rbio->generic_bio_cnt = 1;
				1754
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1755	/*
				1756	* don't plug on full rbios, just get them out the door
				1757	* as quickly as we can
				1758	*/
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	1759	if (rbio_is_full(rbio)) {
				1760	ret = full_stripe_write(rbio);
				1761	if (ret)
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	1762	btrfs_bio_counter_dec(fs_info);
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	1763	return ret;
				1764	}
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1765
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	1766	cb = blk_check_plugged(btrfs_raid_unplug, fs_info, sizeof(*plug));
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1767	if (cb) {
				1768	plug = container_of(cb, struct btrfs_plug_cb, cb);
				1769	if (!plug->info) {
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	1770	plug->info = fs_info;
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1771	INIT_LIST_HEAD(&plug->rbio_list);
				1772	}
				1773	list_add_tail(&rbio->plug_list, &plug->rbio_list);
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	1774	ret = 0;
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1775	} else {
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	1776	ret = __raid56_parity_write(rbio);
				1777	if (ret)
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	1778	btrfs_bio_counter_dec(fs_info);
Chris Mason	6ac0f48	2013-01-31 14:42:28 -0500	[diff] [blame]	1779	}
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	1780	return ret;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1781	}
				1782
				1783	/*
				1784	* all parity reconstruction happens here. We've read in everything
				1785	* we can find from the drives and this does the heavy lifting of
				1786	* sorting the good from the bad.
				1787	*/
				1788	static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
				1789	{
				1790	int pagenr, stripe;
				1791	void **pointers;
				1792	int faila = -1, failb = -1;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1793	struct page *page;
				1794	int err;
				1795	int i;
				1796
David Sterba	31e818f	2015-02-20 18:00:26 +0100	[diff] [blame]	1797	pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1798	if (!pointers) {
				1799	err = -ENOMEM;
				1800	goto cleanup_io;
				1801	}
				1802
				1803	faila = rbio->faila;
				1804	failb = rbio->failb;
				1805
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	1806	if (rbio->operation == BTRFS_RBIO_READ_REBUILD \|\|
				1807	rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1808	spin_lock_irq(&rbio->bio_list_lock);
				1809	set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
				1810	spin_unlock_irq(&rbio->bio_list_lock);
				1811	}
				1812
				1813	index_rbio_pages(rbio);
				1814
Zhao Lei	915e229	2015-03-03 20:42:48 +0800	[diff] [blame]	1815	for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	1816	/*
				1817	* Now we just use bitmap to mark the horizontal stripes in
				1818	* which we have data when doing parity scrub.
				1819	*/
				1820	if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
				1821	!test_bit(pagenr, rbio->dbitmap))
				1822	continue;
				1823
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1824	/* setup our array of pointers with pages
				1825	* from each stripe
				1826	*/
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1827	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1828	/*
				1829	* if we're rebuilding a read, we have to use
				1830	* pages from the bio list
				1831	*/
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	1832	if ((rbio->operation == BTRFS_RBIO_READ_REBUILD \|\|
				1833	rbio->operation == BTRFS_RBIO_REBUILD_MISSING) &&
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1834	(stripe == faila \|\| stripe == failb)) {
				1835	page = page_in_rbio(rbio, stripe, pagenr, 0);
				1836	} else {
				1837	page = rbio_stripe_page(rbio, stripe, pagenr);
				1838	}
				1839	pointers[stripe] = kmap(page);
				1840	}
				1841
				1842	/* all raid6 handling here */
Zhao Lei	10f1190	2015-01-20 15:11:43 +0800	[diff] [blame]	1843	if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1844	/*
				1845	* single failure, rebuild from parity raid5
				1846	* style
				1847	*/
				1848	if (failb < 0) {
				1849	if (faila == rbio->nr_data) {
				1850	/*
				1851	* Just the P stripe has failed, without
				1852	* a bad data or Q stripe.
				1853	* TODO, we should redo the xor here.
				1854	*/
				1855	err = -EIO;
				1856	goto cleanup;
				1857	}
				1858	/*
				1859	* a single failure in raid6 is rebuilt
				1860	* in the pstripe code below
				1861	*/
				1862	goto pstripe;
				1863	}
				1864
				1865	/* make sure our ps and qs are in order */
				1866	if (faila > failb) {
				1867	int tmp = failb;
				1868	failb = faila;
				1869	faila = tmp;
				1870	}
				1871
				1872	/* if the q stripe is failed, do a pstripe reconstruction
				1873	* from the xors.
				1874	* If both the q stripe and the P stripe are failed, we're
				1875	* here due to a crc mismatch and we can't give them the
				1876	* data they want
				1877	*/
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	1878	if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) {
				1879	if (rbio->bbio->raid_map[faila] ==
				1880	RAID5_P_STRIPE) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1881	err = -EIO;
				1882	goto cleanup;
				1883	}
				1884	/*
				1885	* otherwise we have one bad data stripe and
				1886	* a good P stripe. raid5!
				1887	*/
				1888	goto pstripe;
				1889	}
				1890
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	1891	if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) {
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1892	raid6_datap_recov(rbio->real_stripes,
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1893	PAGE_SIZE, faila, pointers);
				1894	} else {
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1895	raid6_2data_recov(rbio->real_stripes,
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1896	PAGE_SIZE, faila, failb,
				1897	pointers);
				1898	}
				1899	} else {
				1900	void *p;
				1901
				1902	/* rebuild from P stripe here (raid5 or raid6) */
				1903	BUG_ON(failb != -1);
				1904	pstripe:
				1905	/* Copy parity block into failed block to start with */
				1906	memcpy(pointers[faila],
				1907	pointers[rbio->nr_data],
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1908	PAGE_SIZE);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1909
				1910	/* rearrange the pointer array */
				1911	p = pointers[faila];
				1912	for (stripe = faila; stripe < rbio->nr_data - 1; stripe++)
				1913	pointers[stripe] = pointers[stripe + 1];
				1914	pointers[rbio->nr_data - 1] = p;
				1915
				1916	/* xor in the rest */
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1917	run_xor(pointers, rbio->nr_data - 1, PAGE_SIZE);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1918	}
				1919	/* if we're doing this rebuild as part of an rmw, go through
				1920	* and set all of our private rbio pages in the
				1921	* failed stripes as uptodate. This way finish_rmw will
				1922	* know they can be trusted. If this was a read reconstruction,
				1923	* other endio functions will fiddle the uptodate bits
				1924	*/
Miao Xie	1b94b55	2014-11-06 16:14:21 +0800	[diff] [blame]	1925	if (rbio->operation == BTRFS_RBIO_WRITE) {
Zhao Lei	915e229	2015-03-03 20:42:48 +0800	[diff] [blame]	1926	for (i = 0; i < rbio->stripe_npages; i++) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1927	if (faila != -1) {
				1928	page = rbio_stripe_page(rbio, faila, i);
				1929	SetPageUptodate(page);
				1930	}
				1931	if (failb != -1) {
				1932	page = rbio_stripe_page(rbio, failb, i);
				1933	SetPageUptodate(page);
				1934	}
				1935	}
				1936	}
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	1937	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1938	/*
				1939	* if we're rebuilding a read, we have to use
				1940	* pages from the bio list
				1941	*/
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	1942	if ((rbio->operation == BTRFS_RBIO_READ_REBUILD \|\|
				1943	rbio->operation == BTRFS_RBIO_REBUILD_MISSING) &&
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1944	(stripe == faila \|\| stripe == failb)) {
				1945	page = page_in_rbio(rbio, stripe, pagenr, 0);
				1946	} else {
				1947	page = rbio_stripe_page(rbio, stripe, pagenr);
				1948	}
				1949	kunmap(page);
				1950	}
				1951	}
				1952
				1953	err = 0;
				1954	cleanup:
				1955	kfree(pointers);
				1956
				1957	cleanup_io:
Miao Xie	1b94b55	2014-11-06 16:14:21 +0800	[diff] [blame]	1958	if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
Zhao Lei	6e9606d	2015-01-20 15:11:34 +0800	[diff] [blame]	1959	if (err == 0)
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	1960	cache_rbio_pages(rbio);
				1961	else
				1962	clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
				1963
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1964	rbio_orig_end_io(rbio, err);
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	1965	} else if (rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
Linus Torvalds	2236597	2015-09-05 15:14:43 -0700	[diff] [blame]	1966	rbio_orig_end_io(rbio, err);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1967	} else if (err == 0) {
				1968	rbio->faila = -1;
				1969	rbio->failb = -1;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	1970
				1971	if (rbio->operation == BTRFS_RBIO_WRITE)
				1972	finish_rmw(rbio);
				1973	else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB)
				1974	finish_parity_scrub(rbio, 0);
				1975	else
				1976	BUG();
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1977	} else {
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1978	rbio_orig_end_io(rbio, err);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1979	}
				1980	}
				1981
				1982	/*
				1983	* This is called only for stripes we've read from disk to
				1984	* reconstruct the parity.
				1985	*/
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1986	static void raid_recover_end_io(struct bio *bio)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1987	{
				1988	struct btrfs_raid_bio *rbio = bio->bi_private;
				1989
				1990	/*
				1991	* we only read stripe pages off the disk, set them
				1992	* up to date if there were no errors
				1993	*/
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1994	if (bio->bi_error)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	1995	fail_bio_stripe(rbio, bio);
				1996	else
				1997	set_bio_pages_uptodate(bio);
				1998	bio_put(bio);
				1999
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	2000	if (!atomic_dec_and_test(&rbio->stripes_pending))
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2001	return;
				2002
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	2003	if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	2004	rbio_orig_end_io(rbio, -EIO);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2005	else
				2006	__raid_recover_end_io(rbio);
				2007	}
				2008
				2009	/*
				2010	* reads everything we need off the disk to reconstruct
				2011	* the parity. endio handlers trigger final reconstruction
				2012	* when the IO is done.
				2013	*
				2014	* This is used both for reads from the higher layers and for
				2015	* parity construction required to finish a rmw cycle.
				2016	*/
				2017	static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
				2018	{
				2019	int bios_to_read = 0;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2020	struct bio_list bio_list;
				2021	int ret;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2022	int pagenr;
				2023	int stripe;
				2024	struct bio *bio;
				2025
				2026	bio_list_init(&bio_list);
				2027
				2028	ret = alloc_rbio_pages(rbio);
				2029	if (ret)
				2030	goto cleanup;
				2031
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	2032	atomic_set(&rbio->error, 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2033
				2034	/*
Chris Mason	4ae10b3	2013-01-31 14:42:09 -0500	[diff] [blame]	2035	* read everything that hasn't failed. Thanks to the
				2036	* stripe cache, it is possible that some or all of these
				2037	* pages are going to be uptodate.
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2038	*/
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2039	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
Liu Bo	5588383	2014-06-24 15:39:16 +0800	[diff] [blame]	2040	if (rbio->faila == stripe \|\| rbio->failb == stripe) {
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	2041	atomic_inc(&rbio->error);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2042	continue;
Liu Bo	5588383	2014-06-24 15:39:16 +0800	[diff] [blame]	2043	}
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2044
Zhao Lei	915e229	2015-03-03 20:42:48 +0800	[diff] [blame]	2045	for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2046	struct page *p;
				2047
				2048	/*
				2049	* the rmw code may have already read this
				2050	* page in
				2051	*/
				2052	p = rbio_stripe_page(rbio, stripe, pagenr);
				2053	if (PageUptodate(p))
				2054	continue;
				2055
				2056	ret = rbio_add_io_page(rbio, &bio_list,
				2057	rbio_stripe_page(rbio, stripe, pagenr),
				2058	stripe, pagenr, rbio->stripe_len);
				2059	if (ret < 0)
				2060	goto cleanup;
				2061	}
				2062	}
				2063
				2064	bios_to_read = bio_list_size(&bio_list);
				2065	if (!bios_to_read) {
				2066	/*
				2067	* we might have no bios to read just because the pages
				2068	* were up to date, or we might have no bios to read because
				2069	* the devices were gone.
				2070	*/
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	2071	if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2072	__raid_recover_end_io(rbio);
				2073	goto out;
				2074	} else {
				2075	goto cleanup;
				2076	}
				2077	}
				2078
				2079	/*
				2080	* the bbio may be freed once we submit the last bio. Make sure
				2081	* not to touch it after that
				2082	*/
Miao Xie	b89e1b0	2014-10-15 11:18:44 +0800	[diff] [blame]	2083	atomic_set(&rbio->stripes_pending, bios_to_read);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2084	while (1) {
				2085	bio = bio_list_pop(&bio_list);
				2086	if (!bio)
				2087	break;
				2088
				2089	bio->bi_private = rbio;
				2090	bio->bi_end_io = raid_recover_end_io;
Mike Christie	37226b2	2016-06-05 14:31:52 -0500	[diff] [blame]	2091	bio_set_op_attrs(bio, REQ_OP_READ, 0);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2092
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2093	btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2094
Mike Christie	4e49ea4	2016-06-05 14:31:41 -0500	[diff] [blame]	2095	submit_bio(bio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2096	}
				2097	out:
				2098	return 0;
				2099
				2100	cleanup:
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	2101	if (rbio->operation == BTRFS_RBIO_READ_REBUILD \|\|
				2102	rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	2103	rbio_orig_end_io(rbio, -EIO);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2104	return -EIO;
				2105	}
				2106
				2107	/*
				2108	* the main entry point for reads from the higher layers. This
				2109	* is really only called when the normal read path had a failure,
				2110	* so we assume the bio they send down corresponds to a failed part
				2111	* of the drive.
				2112	*/
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	2113	int raid56_parity_recover(struct btrfs_fs_info fs_info, struct bio bio,
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	2114	struct btrfs_bio *bbio, u64 stripe_len,
				2115	int mirror_num, int generic_io)
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2116	{
				2117	struct btrfs_raid_bio *rbio;
				2118	int ret;
				2119
Liu Bo	abad60c	2017-03-29 10:54:26 -0700	[diff] [blame]	2120	if (generic_io) {
				2121	ASSERT(bbio->mirror_num == mirror_num);
				2122	btrfs_io_bio(bio)->mirror_num = mirror_num;
				2123	}
				2124
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	2125	rbio = alloc_rbio(fs_info, bbio, stripe_len);
Miao Xie	af8e2d1	2014-10-23 14:42:50 +0800	[diff] [blame]	2126	if (IS_ERR(rbio)) {
Zhao Lei	6e9606d	2015-01-20 15:11:34 +0800	[diff] [blame]	2127	if (generic_io)
				2128	btrfs_put_bbio(bbio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2129	return PTR_ERR(rbio);
Miao Xie	af8e2d1	2014-10-23 14:42:50 +0800	[diff] [blame]	2130	}
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2131
Miao Xie	1b94b55	2014-11-06 16:14:21 +0800	[diff] [blame]	2132	rbio->operation = BTRFS_RBIO_READ_REBUILD;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2133	bio_list_add(&rbio->bio_list, bio);
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	2134	rbio->bio_list_bytes = bio->bi_iter.bi_size;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2135
				2136	rbio->faila = find_logical_bio_stripe(rbio, bio);
				2137	if (rbio->faila == -1) {
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2138	btrfs_warn(fs_info,
Liu Bo	e46a28c	2016-07-29 10:57:55 -0700	[diff] [blame]	2139	"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bbio has map_type %llu)",
				2140	__func__, (u64)bio->bi_iter.bi_sector << 9,
				2141	(u64)bio->bi_iter.bi_size, bbio->map_type);
Zhao Lei	6e9606d	2015-01-20 15:11:34 +0800	[diff] [blame]	2142	if (generic_io)
				2143	btrfs_put_bbio(bbio);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2144	kfree(rbio);
				2145	return -EIO;
				2146	}
				2147
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	2148	if (generic_io) {
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2149	btrfs_bio_counter_inc_noblocked(fs_info);
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	2150	rbio->generic_bio_cnt = 1;
				2151	} else {
Zhao Lei	6e9606d	2015-01-20 15:11:34 +0800	[diff] [blame]	2152	btrfs_get_bbio(bbio);
Miao Xie	4245215	2014-11-25 16:39:28 +0800	[diff] [blame]	2153	}
				2154
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2155	/*
				2156	* reconstruct from the q stripe if they are
				2157	* asking for mirror 3
				2158	*/
				2159	if (mirror_num == 3)
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2160	rbio->failb = rbio->real_stripes - 2;
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2161
				2162	ret = lock_stripe_add(rbio);
				2163
				2164	/*
				2165	* __raid56_parity_recover will end the bio with
				2166	* any errors it hits. We don't want to return
				2167	* its error value up the stack because our caller
				2168	* will end up calling bio_endio with any nonzero
				2169	* return
				2170	*/
				2171	if (ret == 0)
				2172	__raid56_parity_recover(rbio);
				2173	/*
				2174	* our rbio has been added to the list of
				2175	* rbios that will be handled after the
				2176	* currently lock owner is done
				2177	*/
				2178	return 0;
				2179
				2180	}
				2181
				2182	static void rmw_work(struct btrfs_work *work)
				2183	{
				2184	struct btrfs_raid_bio *rbio;
				2185
				2186	rbio = container_of(work, struct btrfs_raid_bio, work);
				2187	raid56_rmw_stripe(rbio);
				2188	}
				2189
				2190	static void read_rebuild_work(struct btrfs_work *work)
				2191	{
				2192	struct btrfs_raid_bio *rbio;
				2193
				2194	rbio = container_of(work, struct btrfs_raid_bio, work);
				2195	__raid56_parity_recover(rbio);
				2196	}
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2197
				2198	/*
				2199	* The following code is used to scrub/replace the parity stripe
				2200	*
Qu Wenruo	ae6529c	2017-03-29 09:33:21 +0800	[diff] [blame]	2201	* Caller must have already increased bio_counter for getting @bbio.
				2202	*
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2203	* Note: We need make sure all the pages that add into the scrub/replace
				2204	* raid bio are correct and not be changed during the scrub/replace. That
				2205	* is those pages just hold metadata or file data with checksum.
				2206	*/
				2207
				2208	struct btrfs_raid_bio *
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	2209	raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info fs_info, struct bio bio,
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	2210	struct btrfs_bio *bbio, u64 stripe_len,
				2211	struct btrfs_device *scrub_dev,
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2212	unsigned long *dbitmap, int stripe_nsectors)
				2213	{
				2214	struct btrfs_raid_bio *rbio;
				2215	int i;
				2216
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	2217	rbio = alloc_rbio(fs_info, bbio, stripe_len);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2218	if (IS_ERR(rbio))
				2219	return NULL;
				2220	bio_list_add(&rbio->bio_list, bio);
				2221	/*
				2222	* This is a special bio which is used to hold the completion handler
				2223	* and make the scrub rbio is similar to the other types
				2224	*/
				2225	ASSERT(!bio->bi_iter.bi_size);
				2226	rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
				2227
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2228	for (i = 0; i < rbio->real_stripes; i++) {
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2229	if (bbio->stripes[i].dev == scrub_dev) {
				2230	rbio->scrubp = i;
				2231	break;
				2232	}
				2233	}
				2234
				2235	/* Now we just support the sectorsize equals to page size */
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2236	ASSERT(fs_info->sectorsize == PAGE_SIZE);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2237	ASSERT(rbio->stripe_npages == stripe_nsectors);
				2238	bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
				2239
Qu Wenruo	ae6529c	2017-03-29 09:33:21 +0800	[diff] [blame]	2240	/*
				2241	* We have already increased bio_counter when getting bbio, record it
				2242	* so we can free it at rbio_orig_end_io().
				2243	*/
				2244	rbio->generic_bio_cnt = 1;
				2245
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2246	return rbio;
				2247	}
				2248
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	2249	/* Used for both parity scrub and missing. */
				2250	void raid56_add_scrub_pages(struct btrfs_raid_bio rbio, struct page page,
				2251	u64 logical)
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2252	{
				2253	int stripe_offset;
				2254	int index;
				2255
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	2256	ASSERT(logical >= rbio->bbio->raid_map[0]);
				2257	ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] +
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2258	rbio->stripe_len * rbio->nr_data);
Zhao Lei	8e5cfb5	2015-01-20 15:11:33 +0800	[diff] [blame]	2259	stripe_offset = (int)(logical - rbio->bbio->raid_map[0]);
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	2260	index = stripe_offset >> PAGE_SHIFT;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2261	rbio->bio_pages[index] = page;
				2262	}
				2263
				2264	/*
				2265	* We just scrub the parity that we have correct data on the same horizontal,
				2266	* so we needn't allocate all pages for all the stripes.
				2267	*/
				2268	static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
				2269	{
				2270	int i;
				2271	int bit;
				2272	int index;
				2273	struct page *page;
				2274
				2275	for_each_set_bit(bit, rbio->dbitmap, rbio->stripe_npages) {
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2276	for (i = 0; i < rbio->real_stripes; i++) {
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2277	index = i * rbio->stripe_npages + bit;
				2278	if (rbio->stripe_pages[index])
				2279	continue;
				2280
				2281	page = alloc_page(GFP_NOFS \| __GFP_HIGHMEM);
				2282	if (!page)
				2283	return -ENOMEM;
				2284	rbio->stripe_pages[index] = page;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2285	}
				2286	}
				2287	return 0;
				2288	}
				2289
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2290	static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
				2291	int need_check)
				2292	{
Miao Xie	7603597	2014-11-14 17:45:42 +0800	[diff] [blame]	2293	struct btrfs_bio *bbio = rbio->bbio;
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2294	void *pointers[rbio->real_stripes];
Miao Xie	7603597	2014-11-14 17:45:42 +0800	[diff] [blame]	2295	DECLARE_BITMAP(pbitmap, rbio->stripe_npages);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2296	int nr_data = rbio->nr_data;
				2297	int stripe;
				2298	int pagenr;
				2299	int p_stripe = -1;
				2300	int q_stripe = -1;
				2301	struct page *p_page = NULL;
				2302	struct page *q_page = NULL;
				2303	struct bio_list bio_list;
				2304	struct bio *bio;
Miao Xie	7603597	2014-11-14 17:45:42 +0800	[diff] [blame]	2305	int is_replace = 0;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2306	int ret;
				2307
				2308	bio_list_init(&bio_list);
				2309
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2310	if (rbio->real_stripes - rbio->nr_data == 1) {
				2311	p_stripe = rbio->real_stripes - 1;
				2312	} else if (rbio->real_stripes - rbio->nr_data == 2) {
				2313	p_stripe = rbio->real_stripes - 2;
				2314	q_stripe = rbio->real_stripes - 1;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2315	} else {
				2316	BUG();
				2317	}
				2318
Miao Xie	7603597	2014-11-14 17:45:42 +0800	[diff] [blame]	2319	if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) {
				2320	is_replace = 1;
				2321	bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages);
				2322	}
				2323
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2324	/*
				2325	* Because the higher layers(scrubber) are unlikely to
				2326	* use this area of the disk again soon, so don't cache
				2327	* it.
				2328	*/
				2329	clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
				2330
				2331	if (!need_check)
				2332	goto writeback;
				2333
				2334	p_page = alloc_page(GFP_NOFS \| __GFP_HIGHMEM);
				2335	if (!p_page)
				2336	goto cleanup;
				2337	SetPageUptodate(p_page);
				2338
				2339	if (q_stripe != -1) {
				2340	q_page = alloc_page(GFP_NOFS \| __GFP_HIGHMEM);
				2341	if (!q_page) {
				2342	__free_page(p_page);
				2343	goto cleanup;
				2344	}
				2345	SetPageUptodate(q_page);
				2346	}
				2347
				2348	atomic_set(&rbio->error, 0);
				2349
				2350	for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
				2351	struct page *p;
				2352	void *parity;
				2353	/* first collect one page from each data stripe */
				2354	for (stripe = 0; stripe < nr_data; stripe++) {
				2355	p = page_in_rbio(rbio, stripe, pagenr, 0);
				2356	pointers[stripe] = kmap(p);
				2357	}
				2358
				2359	/* then add the parity stripe */
				2360	pointers[stripe++] = kmap(p_page);
				2361
				2362	if (q_stripe != -1) {
				2363
				2364	/*
				2365	* raid6, add the qstripe and call the
				2366	* library function to fill in our p/q
				2367	*/
				2368	pointers[stripe++] = kmap(q_page);
				2369
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2370	raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2371	pointers);
				2372	} else {
				2373	/* raid5 */
				2374	memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	2375	run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2376	}
				2377
Nicholas D Steeves	0132761	2016-05-19 21:18:45 -0400	[diff] [blame]	2378	/* Check scrubbing parity and repair it */
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2379	p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
				2380	parity = kmap(p);
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	2381	if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE))
				2382	memcpy(parity, pointers[rbio->scrubp], PAGE_SIZE);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2383	else
				2384	/* Parity is right, needn't writeback */
				2385	bitmap_clear(rbio->dbitmap, pagenr, 1);
				2386	kunmap(p);
				2387
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2388	for (stripe = 0; stripe < rbio->real_stripes; stripe++)
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2389	kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
				2390	}
				2391
				2392	__free_page(p_page);
				2393	if (q_page)
				2394	__free_page(q_page);
				2395
				2396	writeback:
				2397	/*
				2398	* time to start writing. Make bios for everything from the
				2399	* higher layers (the bio_list in our rbio) and our p/q. Ignore
				2400	* everything else.
				2401	*/
				2402	for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
				2403	struct page *page;
				2404
				2405	page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
				2406	ret = rbio_add_io_page(rbio, &bio_list,
				2407	page, rbio->scrubp, pagenr, rbio->stripe_len);
				2408	if (ret)
				2409	goto cleanup;
				2410	}
				2411
Miao Xie	7603597	2014-11-14 17:45:42 +0800	[diff] [blame]	2412	if (!is_replace)
				2413	goto submit_write;
				2414
				2415	for_each_set_bit(pagenr, pbitmap, rbio->stripe_npages) {
				2416	struct page *page;
				2417
				2418	page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
				2419	ret = rbio_add_io_page(rbio, &bio_list, page,
				2420	bbio->tgtdev_map[rbio->scrubp],
				2421	pagenr, rbio->stripe_len);
				2422	if (ret)
				2423	goto cleanup;
				2424	}
				2425
				2426	submit_write:
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2427	nr_data = bio_list_size(&bio_list);
				2428	if (!nr_data) {
				2429	/* Every parity is right */
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	2430	rbio_orig_end_io(rbio, 0);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2431	return;
				2432	}
				2433
				2434	atomic_set(&rbio->stripes_pending, nr_data);
				2435
				2436	while (1) {
				2437	bio = bio_list_pop(&bio_list);
				2438	if (!bio)
				2439	break;
				2440
				2441	bio->bi_private = rbio;
Zhao Lei	a6111d1	2016-01-12 17:52:13 +0800	[diff] [blame]	2442	bio->bi_end_io = raid_write_end_io;
Mike Christie	37226b2	2016-06-05 14:31:52 -0500	[diff] [blame]	2443	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
Mike Christie	4e49ea4	2016-06-05 14:31:41 -0500	[diff] [blame]	2444
				2445	submit_bio(bio);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2446	}
				2447	return;
				2448
				2449	cleanup:
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	2450	rbio_orig_end_io(rbio, -EIO);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2451	}
				2452
				2453	static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
				2454	{
				2455	if (stripe >= 0 && stripe < rbio->nr_data)
				2456	return 1;
				2457	return 0;
				2458	}
				2459
				2460	/*
				2461	* While we're doing the parity check and repair, we could have errors
				2462	* in reading pages off the disk. This checks for errors and if we're
				2463	* not able to read the page it'll trigger parity reconstruction. The
				2464	* parity scrub will be finished after we've reconstructed the failed
				2465	* stripes
				2466	*/
				2467	static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
				2468	{
				2469	if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
				2470	goto cleanup;
				2471
				2472	if (rbio->faila >= 0 \|\| rbio->failb >= 0) {
				2473	int dfail = 0, failp = -1;
				2474
				2475	if (is_data_stripe(rbio, rbio->faila))
				2476	dfail++;
				2477	else if (is_parity_stripe(rbio->faila))
				2478	failp = rbio->faila;
				2479
				2480	if (is_data_stripe(rbio, rbio->failb))
				2481	dfail++;
				2482	else if (is_parity_stripe(rbio->failb))
				2483	failp = rbio->failb;
				2484
				2485	/*
				2486	* Because we can not use a scrubbing parity to repair
				2487	* the data, so the capability of the repair is declined.
				2488	* (In the case of RAID5, we can not repair anything)
				2489	*/
				2490	if (dfail > rbio->bbio->max_errors - 1)
				2491	goto cleanup;
				2492
				2493	/*
				2494	* If all data is good, only parity is correctly, just
				2495	* repair the parity.
				2496	*/
				2497	if (dfail == 0) {
				2498	finish_parity_scrub(rbio, 0);
				2499	return;
				2500	}
				2501
				2502	/*
				2503	* Here means we got one corrupted data stripe and one
				2504	* corrupted parity on RAID6, if the corrupted parity
Nicholas D Steeves	0132761	2016-05-19 21:18:45 -0400	[diff] [blame]	2505	* is scrubbing parity, luckily, use the other one to repair
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2506	* the data, or we can not repair the data stripe.
				2507	*/
				2508	if (failp != rbio->scrubp)
				2509	goto cleanup;
				2510
				2511	__raid_recover_end_io(rbio);
				2512	} else {
				2513	finish_parity_scrub(rbio, 1);
				2514	}
				2515	return;
				2516
				2517	cleanup:
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	2518	rbio_orig_end_io(rbio, -EIO);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2519	}
				2520
				2521	/*
				2522	* end io for the read phase of the rmw cycle. All the bios here are physical
				2523	* stripe bios we've read from the disk so we can recalculate the parity of the
				2524	* stripe.
				2525	*
				2526	* This will usually kick off finish_rmw once all the bios are read in, but it
				2527	* may trigger parity reconstruction if we had any errors along the way
				2528	*/
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	2529	static void raid56_parity_scrub_end_io(struct bio *bio)
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2530	{
				2531	struct btrfs_raid_bio *rbio = bio->bi_private;
				2532
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	2533	if (bio->bi_error)
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2534	fail_bio_stripe(rbio, bio);
				2535	else
				2536	set_bio_pages_uptodate(bio);
				2537
				2538	bio_put(bio);
				2539
				2540	if (!atomic_dec_and_test(&rbio->stripes_pending))
				2541	return;
				2542
				2543	/*
				2544	* this will normally call finish_rmw to start our write
				2545	* but if there are any failed stripes we'll reconstruct
				2546	* from parity first
				2547	*/
				2548	validate_rbio_for_parity_scrub(rbio);
				2549	}
				2550
				2551	static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
				2552	{
				2553	int bios_to_read = 0;
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2554	struct bio_list bio_list;
				2555	int ret;
				2556	int pagenr;
				2557	int stripe;
				2558	struct bio *bio;
				2559
				2560	ret = alloc_rbio_essential_pages(rbio);
				2561	if (ret)
				2562	goto cleanup;
				2563
				2564	bio_list_init(&bio_list);
				2565
				2566	atomic_set(&rbio->error, 0);
				2567	/*
				2568	* build a list of bios to read all the missing parts of this
				2569	* stripe
				2570	*/
Miao Xie	2c8cdd6	2014-11-14 16:06:25 +0800	[diff] [blame]	2571	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2572	for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
				2573	struct page *page;
				2574	/*
				2575	* we want to find all the pages missing from
				2576	* the rbio and read them from the disk. If
				2577	* page_in_rbio finds a page in the bio list
				2578	* we don't need to read it off the stripe.
				2579	*/
				2580	page = page_in_rbio(rbio, stripe, pagenr, 1);
				2581	if (page)
				2582	continue;
				2583
				2584	page = rbio_stripe_page(rbio, stripe, pagenr);
				2585	/*
				2586	* the bio cache may have handed us an uptodate
				2587	* page. If so, be happy and use it
				2588	*/
				2589	if (PageUptodate(page))
				2590	continue;
				2591
				2592	ret = rbio_add_io_page(rbio, &bio_list, page,
				2593	stripe, pagenr, rbio->stripe_len);
				2594	if (ret)
				2595	goto cleanup;
				2596	}
				2597	}
				2598
				2599	bios_to_read = bio_list_size(&bio_list);
				2600	if (!bios_to_read) {
				2601	/*
				2602	* this can happen if others have merged with
				2603	* us, it means there is nothing left to read.
				2604	* But if there are missing devices it may not be
				2605	* safe to do the full stripe write yet.
				2606	*/
				2607	goto finish;
				2608	}
				2609
				2610	/*
				2611	* the bbio may be freed once we submit the last bio. Make sure
				2612	* not to touch it after that
				2613	*/
				2614	atomic_set(&rbio->stripes_pending, bios_to_read);
				2615	while (1) {
				2616	bio = bio_list_pop(&bio_list);
				2617	if (!bio)
				2618	break;
				2619
				2620	bio->bi_private = rbio;
				2621	bio->bi_end_io = raid56_parity_scrub_end_io;
Mike Christie	37226b2	2016-06-05 14:31:52 -0500	[diff] [blame]	2622	bio_set_op_attrs(bio, REQ_OP_READ, 0);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2623
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2624	btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2625
Mike Christie	4e49ea4	2016-06-05 14:31:41 -0500	[diff] [blame]	2626	submit_bio(bio);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2627	}
				2628	/* the actual write will happen once the reads are done */
				2629	return;
				2630
				2631	cleanup:
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	2632	rbio_orig_end_io(rbio, -EIO);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2633	return;
				2634
				2635	finish:
				2636	validate_rbio_for_parity_scrub(rbio);
				2637	}
				2638
				2639	static void scrub_parity_work(struct btrfs_work *work)
				2640	{
				2641	struct btrfs_raid_bio *rbio;
				2642
				2643	rbio = container_of(work, struct btrfs_raid_bio, work);
				2644	raid56_parity_scrub_stripe(rbio);
				2645	}
				2646
				2647	static void async_scrub_parity(struct btrfs_raid_bio *rbio)
				2648	{
				2649	btrfs_init_work(&rbio->work, btrfs_rmw_helper,
				2650	scrub_parity_work, NULL, NULL);
				2651
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2652	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
Miao Xie	5a6ac9e	2014-11-06 17:20:58 +0800	[diff] [blame]	2653	}
				2654
				2655	void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
				2656	{
				2657	if (!lock_stripe_add(rbio))
				2658	async_scrub_parity(rbio);
				2659	}
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	2660
				2661	/* The following code is used for dev replace of a missing RAID 5/6 device. */
				2662
				2663	struct btrfs_raid_bio *
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	2664	raid56_alloc_missing_rbio(struct btrfs_fs_info fs_info, struct bio bio,
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	2665	struct btrfs_bio *bbio, u64 length)
				2666	{
				2667	struct btrfs_raid_bio *rbio;
				2668
Jeff Mahoney	2ff7e61	2016-06-22 18:54:24 -0400	[diff] [blame]	2669	rbio = alloc_rbio(fs_info, bbio, length);
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	2670	if (IS_ERR(rbio))
				2671	return NULL;
				2672
				2673	rbio->operation = BTRFS_RBIO_REBUILD_MISSING;
				2674	bio_list_add(&rbio->bio_list, bio);
				2675	/*
				2676	* This is a special bio which is used to hold the completion handler
				2677	* and make the scrub rbio is similar to the other types
				2678	*/
				2679	ASSERT(!bio->bi_iter.bi_size);
				2680
				2681	rbio->faila = find_logical_bio_stripe(rbio, bio);
				2682	if (rbio->faila == -1) {
				2683	BUG();
				2684	kfree(rbio);
				2685	return NULL;
				2686	}
				2687
Qu Wenruo	ae6529c	2017-03-29 09:33:21 +0800	[diff] [blame]	2688	/*
				2689	* When we get bbio, we have already increased bio_counter, record it
				2690	* so we can free it at rbio_orig_end_io()
				2691	*/
				2692	rbio->generic_bio_cnt = 1;
				2693
Omar Sandoval	b4ee178	2015-06-19 11:52:50 -0700	[diff] [blame]	2694	return rbio;
				2695	}
				2696
				2697	static void missing_raid56_work(struct btrfs_work *work)
				2698	{
				2699	struct btrfs_raid_bio *rbio;
				2700
				2701	rbio = container_of(work, struct btrfs_raid_bio, work);
				2702	__raid56_parity_recover(rbio);
				2703	}
				2704
				2705	static void async_missing_raid56(struct btrfs_raid_bio *rbio)
				2706	{
				2707	btrfs_init_work(&rbio->work, btrfs_rmw_helper,
				2708	missing_raid56_work, NULL, NULL);
				2709
				2710	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
				2711	}
				2712
				2713	void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
				2714	{
				2715	if (!lock_stripe_add(rbio))
				2716	async_missing_raid56(rbio);
				2717	}