Blame - block/bio.c - kernel/msm-5.4

blob: 036435995c556063fdc990ef5fa59dc38245b1f9 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
Jens Axboe	0fe2347	2006-09-04 15:41:16 +0200	[diff] [blame]	2	* Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License version 2 as
				6	* published by the Free Software Foundation.
				7	*
				8	* This program is distributed in the hope that it will be useful,
				9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				11	* GNU General Public License for more details.
				12	*
				13	* You should have received a copy of the GNU General Public Licens
				14	* along with this program; if not, write to the Free Software
				15	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
				16	*
				17	*/
				18	#include <linux/mm.h>
				19	#include <linux/swap.h>
				20	#include <linux/bio.h>
				21	#include <linux/blkdev.h>
Kent Overstreet	a27bb33	2013-05-07 16:19:08 -0700	[diff] [blame]	22	#include <linux/uio.h>
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	23	#include <linux/iocontext.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	24	#include <linux/slab.h>
				25	#include <linux/init.h>
				26	#include <linux/kernel.h>
Paul Gortmaker	630d9c4	2011-11-16 23:57:37 -0500	[diff] [blame]	27	#include <linux/export.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	28	#include <linux/mempool.h>
				29	#include <linux/workqueue.h>
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	30	#include <linux/cgroup.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	31
Li Zefan	5578213	2009-06-09 13:43:05 +0800	[diff] [blame]	32	#include <trace/events/block.h>
Ingo Molnar	0bfc245	2008-11-26 11:59:56 +0100	[diff] [blame]	33
Jens Axboe	392ddc3	2008-12-23 12:42:54 +0100	[diff] [blame]	34	/*
				35	* Test patch to inline a certain number of bi_io_vec's inside the bio
				36	* itself, to shrink a bio data allocation from two mempool calls to one
				37	*/
				38	#define BIO_INLINE_VECS 4
				39
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	40	/*
				41	* if you change this list, also change bvec_alloc or things will
				42	* break badly! cannot be bigger than what you can fit into an
				43	* unsigned short
				44	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	45	#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
Christoph Hellwig	ed996a5	2016-07-19 11:28:42 +0200	[diff] [blame]	46	static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	47	BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
				48	};
				49	#undef BV
				50
				51	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	52	* fs_bio_set is the bio_set containing bio and iovec memory pools used by
				53	* IO code that does not need private memory pools.
				54	*/
Martin K. Petersen	51d654e	2008-06-17 18:59:56 +0200	[diff] [blame]	55	struct bio_set *fs_bio_set;
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	56	EXPORT_SYMBOL(fs_bio_set);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	57
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	58	/*
				59	* Our slab pool management
				60	*/
				61	struct bio_slab {
				62	struct kmem_cache *slab;
				63	unsigned int slab_ref;
				64	unsigned int slab_size;
				65	char name[8];
				66	};
				67	static DEFINE_MUTEX(bio_slab_lock);
				68	static struct bio_slab *bio_slabs;
				69	static unsigned int bio_slab_nr, bio_slab_max;
				70
				71	static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
				72	{
				73	unsigned int sz = sizeof(struct bio) + extra_size;
				74	struct kmem_cache *slab = NULL;
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	75	struct bio_slab bslab, new_bio_slabs;
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	76	unsigned int new_bio_slab_max;
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	77	unsigned int i, entry = -1;
				78
				79	mutex_lock(&bio_slab_lock);
				80
				81	i = 0;
				82	while (i < bio_slab_nr) {
Thiago Farina	f06f135	2010-01-19 14:07:09 +0100	[diff] [blame]	83	bslab = &bio_slabs[i];
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	84
				85	if (!bslab->slab && entry == -1)
				86	entry = i;
				87	else if (bslab->slab_size == sz) {
				88	slab = bslab->slab;
				89	bslab->slab_ref++;
				90	break;
				91	}
				92	i++;
				93	}
				94
				95	if (slab)
				96	goto out_unlock;
				97
				98	if (bio_slab_nr == bio_slab_max && entry == -1) {
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	99	new_bio_slab_max = bio_slab_max << 1;
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	100	new_bio_slabs = krealloc(bio_slabs,
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	101	new_bio_slab_max * sizeof(struct bio_slab),
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	102	GFP_KERNEL);
				103	if (!new_bio_slabs)
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	104	goto out_unlock;
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	105	bio_slab_max = new_bio_slab_max;
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	106	bio_slabs = new_bio_slabs;
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	107	}
				108	if (entry == -1)
				109	entry = bio_slab_nr++;
				110
				111	bslab = &bio_slabs[entry];
				112
				113	snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
Mikulas Patocka	6a24148	2014-03-28 15:51:55 -0400	[diff] [blame]	114	slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN,
				115	SLAB_HWCACHE_ALIGN, NULL);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	116	if (!slab)
				117	goto out_unlock;
				118
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	119	bslab->slab = slab;
				120	bslab->slab_ref = 1;
				121	bslab->slab_size = sz;
				122	out_unlock:
				123	mutex_unlock(&bio_slab_lock);
				124	return slab;
				125	}
				126
				127	static void bio_put_slab(struct bio_set *bs)
				128	{
				129	struct bio_slab *bslab = NULL;
				130	unsigned int i;
				131
				132	mutex_lock(&bio_slab_lock);
				133
				134	for (i = 0; i < bio_slab_nr; i++) {
				135	if (bs->bio_slab == bio_slabs[i].slab) {
				136	bslab = &bio_slabs[i];
				137	break;
				138	}
				139	}
				140
				141	if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
				142	goto out;
				143
				144	WARN_ON(!bslab->slab_ref);
				145
				146	if (--bslab->slab_ref)
				147	goto out;
				148
				149	kmem_cache_destroy(bslab->slab);
				150	bslab->slab = NULL;
				151
				152	out:
				153	mutex_unlock(&bio_slab_lock);
				154	}
				155
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	156	unsigned int bvec_nr_vecs(unsigned short idx)
				157	{
				158	return bvec_slabs[idx].nr_vecs;
				159	}
				160
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	161	void bvec_free(mempool_t pool, struct bio_vec bv, unsigned int idx)
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	162	{
Christoph Hellwig	ed996a5	2016-07-19 11:28:42 +0200	[diff] [blame]	163	if (!idx)
				164	return;
				165	idx--;
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	166
Christoph Hellwig	ed996a5	2016-07-19 11:28:42 +0200	[diff] [blame]	167	BIO_BUG_ON(idx >= BVEC_POOL_NR);
				168
				169	if (idx == BVEC_POOL_MAX) {
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	170	mempool_free(bv, pool);
Christoph Hellwig	ed996a5	2016-07-19 11:28:42 +0200	[diff] [blame]	171	} else {
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	172	struct biovec_slab *bvs = bvec_slabs + idx;
				173
				174	kmem_cache_free(bvs->slab, bv);
				175	}
				176	}
				177
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	178	struct bio_vec bvec_alloc(gfp_t gfp_mask, int nr, unsigned long idx,
				179	mempool_t *pool)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	180	{
				181	struct bio_vec *bvl;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	182
				183	/*
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	184	* see comment near bvec_array define!
				185	*/
				186	switch (nr) {
				187	case 1:
				188	*idx = 0;
				189	break;
				190	case 2 ... 4:
				191	*idx = 1;
				192	break;
				193	case 5 ... 16:
				194	*idx = 2;
				195	break;
				196	case 17 ... 64:
				197	*idx = 3;
				198	break;
				199	case 65 ... 128:
				200	*idx = 4;
				201	break;
				202	case 129 ... BIO_MAX_PAGES:
				203	*idx = 5;
				204	break;
				205	default:
				206	return NULL;
				207	}
				208
				209	/*
				210	* idx now points to the pool we want to allocate from. only the
				211	* 1-vec entry pool is mempool backed.
				212	*/
Christoph Hellwig	ed996a5	2016-07-19 11:28:42 +0200	[diff] [blame]	213	if (*idx == BVEC_POOL_MAX) {
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	214	fallback:
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	215	bvl = mempool_alloc(pool, gfp_mask);
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	216	} else {
				217	struct biovec_slab bvs = bvec_slabs + idx;
Mel Gorman	d0164ad	2015-11-06 16:28:21 -0800	[diff] [blame]	218	gfp_t __gfp_mask = gfp_mask & ~(__GFP_DIRECT_RECLAIM \| __GFP_IO);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	219
Jens Axboe	0a0d96b	2008-09-11 13:17:37 +0200	[diff] [blame]	220	/*
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	221	* Make this allocation restricted and don't dump info on
				222	* allocation failures, since we'll fallback to the mempool
				223	* in case of failure.
Jens Axboe	0a0d96b	2008-09-11 13:17:37 +0200	[diff] [blame]	224	*/
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	225	__gfp_mask \|= __GFP_NOMEMALLOC \| __GFP_NORETRY \| __GFP_NOWARN;
				226
				227	/*
Mel Gorman	d0164ad	2015-11-06 16:28:21 -0800	[diff] [blame]	228	* Try a slab allocation. If this fails and __GFP_DIRECT_RECLAIM
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	229	* is set, retry with the 1-entry mempool
				230	*/
				231	bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
Mel Gorman	d0164ad	2015-11-06 16:28:21 -0800	[diff] [blame]	232	if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) {
Christoph Hellwig	ed996a5	2016-07-19 11:28:42 +0200	[diff] [blame]	233	*idx = BVEC_POOL_MAX;
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	234	goto fallback;
				235	}
				236	}
				237
Christoph Hellwig	ed996a5	2016-07-19 11:28:42 +0200	[diff] [blame]	238	(*idx)++;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	239	return bvl;
				240	}
				241
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	242	static void __bio_free(struct bio *bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	243	{
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	244	bio_disassociate_task(bio);
Jens Axboe	992c5dd	2007-07-18 13:18:08 +0200	[diff] [blame]	245
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	246	if (bio_integrity(bio))
Kent Overstreet	1e2a410f	2012-09-06 15:34:56 -0700	[diff] [blame]	247	bio_integrity_free(bio);
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	248	}
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	249
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	250	static void bio_free(struct bio *bio)
				251	{
				252	struct bio_set *bs = bio->bi_pool;
				253	void *p;
				254
				255	__bio_free(bio);
				256
				257	if (bs) {
Christoph Hellwig	ed996a5	2016-07-19 11:28:42 +0200	[diff] [blame]	258	bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	259
				260	/*
				261	* If we have front padding, adjust the bio pointer before freeing
				262	*/
				263	p = bio;
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	264	p -= bs->front_pad;
				265
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	266	mempool_free(p, bs->bio_pool);
				267	} else {
				268	/* Bio was allocated by bio_kmalloc() */
				269	kfree(bio);
				270	}
Peter Osterlund	3676347	2005-09-06 15:16:42 -0700	[diff] [blame]	271	}
				272
Ming Lei	3a83f46	2016-11-22 08:57:21 -0700	[diff] [blame]	273	void bio_init(struct bio bio, struct bio_vec table,
				274	unsigned short max_vecs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	275	{
Jens Axboe	2b94de5	2007-07-18 13:14:03 +0200	[diff] [blame]	276	memset(bio, 0, sizeof(*bio));
Jens Axboe	c4cf526	2015-04-17 16:15:18 -0600	[diff] [blame]	277	atomic_set(&bio->__bi_remaining, 1);
Jens Axboe	dac5621	2015-04-17 16:23:59 -0600	[diff] [blame]	278	atomic_set(&bio->__bi_cnt, 1);
Ming Lei	3a83f46	2016-11-22 08:57:21 -0700	[diff] [blame]	279
				280	bio->bi_io_vec = table;
				281	bio->bi_max_vecs = max_vecs;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	282	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	283	EXPORT_SYMBOL(bio_init);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	284
				285	/**
Kent Overstreet	f44b48c	2012-09-06 15:34:58 -0700	[diff] [blame]	286	* bio_reset - reinitialize a bio
				287	* @bio: bio to reset
				288	*
				289	* Description:
				290	* After calling bio_reset(), @bio will be in the same state as a freshly
				291	* allocated bio returned bio bio_alloc_bioset() - the only fields that are
				292	* preserved are the ones that are initialized by bio_alloc_bioset(). See
				293	* comment in struct bio.
				294	*/
				295	void bio_reset(struct bio *bio)
				296	{
				297	unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
				298
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	299	__bio_free(bio);
Kent Overstreet	f44b48c	2012-09-06 15:34:58 -0700	[diff] [blame]	300
				301	memset(bio, 0, BIO_RESET_BYTES);
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	302	bio->bi_flags = flags;
Jens Axboe	c4cf526	2015-04-17 16:15:18 -0600	[diff] [blame]	303	atomic_set(&bio->__bi_remaining, 1);
Kent Overstreet	f44b48c	2012-09-06 15:34:58 -0700	[diff] [blame]	304	}
				305	EXPORT_SYMBOL(bio_reset);
				306
Christoph Hellwig	38f8baa	2016-03-11 17:34:51 +0100	[diff] [blame]	307	static struct bio __bio_chain_endio(struct bio bio)
Kent Overstreet	196d38bc	2013-11-23 18:34:15 -0800	[diff] [blame]	308	{
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	309	struct bio *parent = bio->bi_private;
				310
Christoph Hellwig	af3e3a5	2016-03-11 17:34:50 +0100	[diff] [blame]	311	if (!parent->bi_error)
				312	parent->bi_error = bio->bi_error;
Kent Overstreet	196d38bc	2013-11-23 18:34:15 -0800	[diff] [blame]	313	bio_put(bio);
Christoph Hellwig	38f8baa	2016-03-11 17:34:51 +0100	[diff] [blame]	314	return parent;
				315	}
				316
				317	static void bio_chain_endio(struct bio *bio)
				318	{
				319	bio_endio(__bio_chain_endio(bio));
Kent Overstreet	196d38bc	2013-11-23 18:34:15 -0800	[diff] [blame]	320	}
				321
				322	/**
				323	* bio_chain - chain bio completions
Randy Dunlap	1051a90	2014-04-20 16:03:31 -0700	[diff] [blame]	324	* @bio: the target bio
				325	* @parent: the @bio's parent bio
Kent Overstreet	196d38bc	2013-11-23 18:34:15 -0800	[diff] [blame]	326	*
				327	* The caller won't have a bi_end_io called when @bio completes - instead,
				328	* @parent's bi_end_io won't be called until both @parent and @bio have
				329	* completed; the chained bio will also be freed when it completes.
				330	*
				331	* The caller must not set bi_private or bi_end_io in @bio.
				332	*/
				333	void bio_chain(struct bio bio, struct bio parent)
				334	{
				335	BUG_ON(bio->bi_private \|\| bio->bi_end_io);
				336
				337	bio->bi_private = parent;
				338	bio->bi_end_io = bio_chain_endio;
Jens Axboe	c4cf526	2015-04-17 16:15:18 -0600	[diff] [blame]	339	bio_inc_remaining(parent);
Kent Overstreet	196d38bc	2013-11-23 18:34:15 -0800	[diff] [blame]	340	}
				341	EXPORT_SYMBOL(bio_chain);
				342
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	343	static void bio_alloc_rescue(struct work_struct *work)
				344	{
				345	struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
				346	struct bio *bio;
				347
				348	while (1) {
				349	spin_lock(&bs->rescue_lock);
				350	bio = bio_list_pop(&bs->rescue_list);
				351	spin_unlock(&bs->rescue_lock);
				352
				353	if (!bio)
				354	break;
				355
				356	generic_make_request(bio);
				357	}
				358	}
				359
				360	static void punt_bios_to_rescuer(struct bio_set *bs)
				361	{
				362	struct bio_list punt, nopunt;
				363	struct bio *bio;
				364
				365	/*
				366	* In order to guarantee forward progress we must punt only bios that
				367	* were allocated from this bio_set; otherwise, if there was a bio on
				368	* there for a stacking driver higher up in the stack, processing it
				369	* could require allocating bios from this bio_set, and doing that from
				370	* our own rescuer would be bad.
				371	*
				372	* Since bio lists are singly linked, pop them all instead of trying to
				373	* remove from the middle of the list:
				374	*/
				375
				376	bio_list_init(&punt);
				377	bio_list_init(&nopunt);
				378
NeilBrown	f5fe1b5	2017-03-10 17:00:47 +1100	[diff] [blame]	379	while ((bio = bio_list_pop(&current->bio_list[0])))
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	380	bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
NeilBrown	f5fe1b5	2017-03-10 17:00:47 +1100	[diff] [blame]	381	current->bio_list[0] = nopunt;
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	382
NeilBrown	f5fe1b5	2017-03-10 17:00:47 +1100	[diff] [blame]	383	bio_list_init(&nopunt);
				384	while ((bio = bio_list_pop(&current->bio_list[1])))
				385	bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
				386	current->bio_list[1] = nopunt;
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	387
				388	spin_lock(&bs->rescue_lock);
				389	bio_list_merge(&bs->rescue_list, &punt);
				390	spin_unlock(&bs->rescue_lock);
				391
				392	queue_work(bs->rescue_workqueue, &bs->rescue_work);
				393	}
				394
Kent Overstreet	f44b48c	2012-09-06 15:34:58 -0700	[diff] [blame]	395	/**
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	396	* bio_alloc_bioset - allocate a bio for I/O
				397	* @gfp_mask: the GFP_ mask given to the slab allocator
				398	* @nr_iovecs: number of iovecs to pre-allocate
Jaak Ristioja	db18efa	2010-01-15 12:05:07 +0200	[diff] [blame]	399	* @bs: the bio_set to allocate from.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	400	*
				401	* Description:
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	402	* If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
				403	* backed by the @bs's mempool.
				404	*
Mel Gorman	d0164ad	2015-11-06 16:28:21 -0800	[diff] [blame]	405	* When @bs is not NULL, if %__GFP_DIRECT_RECLAIM is set then bio_alloc will
				406	* always be able to allocate a bio. This is due to the mempool guarantees.
				407	* To make this work, callers must never allocate more than 1 bio at a time
				408	* from this pool. Callers that need to allocate more than 1 bio must always
				409	* submit the previously allocated bio for IO before attempting to allocate
				410	* a new one. Failure to do so can cause deadlocks under memory pressure.
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	411	*
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	412	* Note that when running under generic_make_request() (i.e. any block
				413	* driver), bios are not submitted until after you return - see the code in
				414	* generic_make_request() that converts recursion into iteration, to prevent
				415	* stack overflows.
				416	*
				417	* This would normally mean allocating multiple bios under
				418	* generic_make_request() would be susceptible to deadlocks, but we have
				419	* deadlock avoidance code that resubmits any blocked bios from a rescuer
				420	* thread.
				421	*
				422	* However, we do not guarantee forward progress for allocations from other
				423	* mempools. Doing multiple allocations from the same mempool under
				424	* generic_make_request() should be avoided - instead, use bio_set's front_pad
				425	* for per bio allocations.
				426	*
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	427	* RETURNS:
				428	* Pointer to new bio on success, NULL on failure.
				429	*/
Al Viro	dd0fc66	2005-10-07 07:46:04 +0100	[diff] [blame]	430	struct bio bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set bs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	431	{
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	432	gfp_t saved_gfp = gfp_mask;
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	433	unsigned front_pad;
				434	unsigned inline_vecs;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	435	struct bio_vec *bvl = NULL;
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	436	struct bio *bio;
				437	void *p;
Jens Axboe	0a0d96b	2008-09-11 13:17:37 +0200	[diff] [blame]	438
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	439	if (!bs) {
				440	if (nr_iovecs > UIO_MAXIOV)
				441	return NULL;
				442
				443	p = kmalloc(sizeof(struct bio) +
				444	nr_iovecs * sizeof(struct bio_vec),
				445	gfp_mask);
				446	front_pad = 0;
				447	inline_vecs = nr_iovecs;
				448	} else {
Junichi Nomura	d8f429e	2014-10-03 17:27:12 -0400	[diff] [blame]	449	/* should not use nobvec bioset for nr_iovecs > 0 */
				450	if (WARN_ON_ONCE(!bs->bvec_pool && nr_iovecs > 0))
				451	return NULL;
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	452	/*
				453	* generic_make_request() converts recursion to iteration; this
				454	* means if we're running beneath it, any bios we allocate and
				455	* submit will not be submitted (and thus freed) until after we
				456	* return.
				457	*
				458	* This exposes us to a potential deadlock if we allocate
				459	* multiple bios from the same bio_set() while running
				460	* underneath generic_make_request(). If we were to allocate
				461	* multiple bios (say a stacking block driver that was splitting
				462	* bios), we would deadlock if we exhausted the mempool's
				463	* reserve.
				464	*
				465	* We solve this, and guarantee forward progress, with a rescuer
				466	* workqueue per bio_set. If we go to allocate and there are
				467	* bios on current->bio_list, we first try the allocation
Mel Gorman	d0164ad	2015-11-06 16:28:21 -0800	[diff] [blame]	468	* without __GFP_DIRECT_RECLAIM; if that fails, we punt those
				469	* bios we would be blocking to the rescuer workqueue before
				470	* we retry with the original gfp_flags.
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	471	*/
				472
NeilBrown	f5fe1b5	2017-03-10 17:00:47 +1100	[diff] [blame]	473	if (current->bio_list &&
				474	(!bio_list_empty(&current->bio_list[0]) \|\|
				475	!bio_list_empty(&current->bio_list[1])))
Mel Gorman	d0164ad	2015-11-06 16:28:21 -0800	[diff] [blame]	476	gfp_mask &= ~__GFP_DIRECT_RECLAIM;
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	477
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	478	p = mempool_alloc(bs->bio_pool, gfp_mask);
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	479	if (!p && gfp_mask != saved_gfp) {
				480	punt_bios_to_rescuer(bs);
				481	gfp_mask = saved_gfp;
				482	p = mempool_alloc(bs->bio_pool, gfp_mask);
				483	}
				484
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	485	front_pad = bs->front_pad;
				486	inline_vecs = BIO_INLINE_VECS;
				487	}
				488
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	489	if (unlikely(!p))
				490	return NULL;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	491
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	492	bio = p + front_pad;
Ming Lei	3a83f46	2016-11-22 08:57:21 -0700	[diff] [blame]	493	bio_init(bio, NULL, 0);
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	494
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	495	if (nr_iovecs > inline_vecs) {
Christoph Hellwig	ed996a5	2016-07-19 11:28:42 +0200	[diff] [blame]	496	unsigned long idx = 0;
				497
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	498	bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	499	if (!bvl && gfp_mask != saved_gfp) {
				500	punt_bios_to_rescuer(bs);
				501	gfp_mask = saved_gfp;
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	502	bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	503	}
				504
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	505	if (unlikely(!bvl))
				506	goto err_free;
Kent Overstreet	a38352e	2012-05-25 13:03:11 -0700	[diff] [blame]	507
Christoph Hellwig	ed996a5	2016-07-19 11:28:42 +0200	[diff] [blame]	508	bio->bi_flags \|= idx << BVEC_POOL_OFFSET;
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	509	} else if (nr_iovecs) {
				510	bvl = bio->bi_inline_vecs;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	511	}
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	512
				513	bio->bi_pool = bs;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	514	bio->bi_max_vecs = nr_iovecs;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	515	bio->bi_io_vec = bvl;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	516	return bio;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	517
				518	err_free:
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	519	mempool_free(p, bs->bio_pool);
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	520	return NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	521	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	522	EXPORT_SYMBOL(bio_alloc_bioset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	523
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	524	void zero_fill_bio(struct bio *bio)
				525	{
				526	unsigned long flags;
Kent Overstreet	7988613	2013-11-23 17:19:00 -0800	[diff] [blame]	527	struct bio_vec bv;
				528	struct bvec_iter iter;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	529
Kent Overstreet	7988613	2013-11-23 17:19:00 -0800	[diff] [blame]	530	bio_for_each_segment(bv, bio, iter) {
				531	char *data = bvec_kmap_irq(&bv, &flags);
				532	memset(data, 0, bv.bv_len);
				533	flush_dcache_page(bv.bv_page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	534	bvec_kunmap_irq(data, &flags);
				535	}
				536	}
				537	EXPORT_SYMBOL(zero_fill_bio);
				538
				539	/**
				540	* bio_put - release a reference to a bio
				541	* @bio: bio to release reference to
				542	*
				543	* Description:
				544	* Put a reference to a &struct bio, either one you have gotten with
Alberto Bertogli	ad0bf11	2009-11-02 11:39:22 +0100	[diff] [blame]	545	* bio_alloc, bio_get or bio_clone. The last put of a bio will free it.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	546	**/
				547	void bio_put(struct bio *bio)
				548	{
Jens Axboe	dac5621	2015-04-17 16:23:59 -0600	[diff] [blame]	549	if (!bio_flagged(bio, BIO_REFFED))
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	550	bio_free(bio);
Jens Axboe	dac5621	2015-04-17 16:23:59 -0600	[diff] [blame]	551	else {
				552	BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));
				553
				554	/*
				555	* last put frees it
				556	*/
				557	if (atomic_dec_and_test(&bio->__bi_cnt))
				558	bio_free(bio);
				559	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	560	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	561	EXPORT_SYMBOL(bio_put);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	562
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	563	inline int bio_phys_segments(struct request_queue q, struct bio bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	564	{
				565	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
				566	blk_recount_segments(q, bio);
				567
				568	return bio->bi_phys_segments;
				569	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	570	EXPORT_SYMBOL(bio_phys_segments);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	571
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	572	/**
Kent Overstreet	59d276f	2013-11-23 18:19:27 -0800	[diff] [blame]	573	* __bio_clone_fast - clone a bio that shares the original bio's biovec
				574	* @bio: destination bio
				575	* @bio_src: bio to clone
				576	*
				577	* Clone a &bio. Caller will own the returned bio, but not
				578	* the actual data it points to. Reference count of returned
				579	* bio will be one.
				580	*
				581	* Caller must ensure that @bio_src is not freed before @bio.
				582	*/
				583	void __bio_clone_fast(struct bio bio, struct bio bio_src)
				584	{
Christoph Hellwig	ed996a5	2016-07-19 11:28:42 +0200	[diff] [blame]	585	BUG_ON(bio->bi_pool && BVEC_POOL_IDX(bio));
Kent Overstreet	59d276f	2013-11-23 18:19:27 -0800	[diff] [blame]	586
				587	/*
				588	* most users will be overriding ->bi_bdev with a new target,
				589	* so we don't set nor calculate new physical/hw segment counts here
				590	*/
				591	bio->bi_bdev = bio_src->bi_bdev;
Jens Axboe	b7c44ed	2015-07-24 12:37:59 -0600	[diff] [blame]	592	bio_set_flag(bio, BIO_CLONED);
Jens Axboe	1eff9d3	2016-08-05 15:35:16 -0600	[diff] [blame]	593	bio->bi_opf = bio_src->bi_opf;
Kent Overstreet	59d276f	2013-11-23 18:19:27 -0800	[diff] [blame]	594	bio->bi_iter = bio_src->bi_iter;
				595	bio->bi_io_vec = bio_src->bi_io_vec;
Paolo Valente	20bd723	2016-07-27 07:22:05 +0200	[diff] [blame]	596
				597	bio_clone_blkcg_association(bio, bio_src);
Kent Overstreet	59d276f	2013-11-23 18:19:27 -0800	[diff] [blame]	598	}
				599	EXPORT_SYMBOL(__bio_clone_fast);
				600
				601	/**
				602	* bio_clone_fast - clone a bio that shares the original bio's biovec
				603	* @bio: bio to clone
				604	* @gfp_mask: allocation priority
				605	* @bs: bio_set to allocate from
				606	*
				607	* Like __bio_clone_fast, only also allocates the returned bio
				608	*/
				609	struct bio bio_clone_fast(struct bio bio, gfp_t gfp_mask, struct bio_set *bs)
				610	{
				611	struct bio *b;
				612
				613	b = bio_alloc_bioset(gfp_mask, 0, bs);
				614	if (!b)
				615	return NULL;
				616
				617	__bio_clone_fast(b, bio);
				618
				619	if (bio_integrity(bio)) {
				620	int ret;
				621
				622	ret = bio_integrity_clone(b, bio, gfp_mask);
				623
				624	if (ret < 0) {
				625	bio_put(b);
				626	return NULL;
				627	}
				628	}
				629
				630	return b;
				631	}
				632	EXPORT_SYMBOL(bio_clone_fast);
				633
Shaohua Li	f459587	2017-03-24 10:34:43 -0700	[diff] [blame^]	634	/**
				635	* bio_clone_bioset - clone a bio
				636	* @bio_src: bio to clone
				637	* @gfp_mask: allocation priority
				638	* @bs: bio_set to allocate from
				639	*
				640	* Clone bio. Caller will own the returned bio, but not the actual data it
				641	* points to. Reference count of returned bio will be one.
				642	*/
				643	struct bio bio_clone_bioset(struct bio bio_src, gfp_t gfp_mask,
				644	struct bio_set *bs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	645	{
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	646	struct bvec_iter iter;
				647	struct bio_vec bv;
				648	struct bio *bio;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	649
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	650	/*
				651	* Pre immutable biovecs, __bio_clone() used to just do a memcpy from
				652	* bio_src->bi_io_vec to bio->bi_io_vec.
				653	*
				654	* We can't do that anymore, because:
				655	*
				656	* - The point of cloning the biovec is to produce a bio with a biovec
				657	* the caller can modify: bi_idx and bi_bvec_done should be 0.
				658	*
				659	* - The original bio could've had more than BIO_MAX_PAGES biovecs; if
				660	* we tried to clone the whole thing bio_alloc_bioset() would fail.
				661	* But the clone should succeed as long as the number of biovecs we
				662	* actually need to allocate is fewer than BIO_MAX_PAGES.
				663	*
				664	* - Lastly, bi_vcnt should not be looked at or relied upon by code
				665	* that does not own the bio - reason being drivers don't use it for
				666	* iterating over the biovec anymore, so expecting it to be kept up
				667	* to date (i.e. for clones that share the parent biovec) is just
				668	* asking for trouble and would force extra work on
				669	* __bio_clone_fast() anyways.
				670	*/
				671
Shaohua Li	f459587	2017-03-24 10:34:43 -0700	[diff] [blame^]	672	bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	673	if (!bio)
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	674	return NULL;
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	675	bio->bi_bdev = bio_src->bi_bdev;
Jens Axboe	1eff9d3	2016-08-05 15:35:16 -0600	[diff] [blame]	676	bio->bi_opf = bio_src->bi_opf;
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	677	bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
				678	bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	679
Adrian Hunter	7afafc8	2016-08-16 10:59:35 +0300	[diff] [blame]	680	switch (bio_op(bio)) {
				681	case REQ_OP_DISCARD:
				682	case REQ_OP_SECURE_ERASE:
Chaitanya Kulkarni	a6f0788	2016-11-30 12:28:59 -0800	[diff] [blame]	683	case REQ_OP_WRITE_ZEROES:
Adrian Hunter	7afafc8	2016-08-16 10:59:35 +0300	[diff] [blame]	684	break;
				685	case REQ_OP_WRITE_SAME:
Kent Overstreet	8423ae3	2014-02-10 17:45:50 -0800	[diff] [blame]	686	bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
Adrian Hunter	7afafc8	2016-08-16 10:59:35 +0300	[diff] [blame]	687	break;
				688	default:
Shaohua Li	f459587	2017-03-24 10:34:43 -0700	[diff] [blame^]	689	bio_for_each_segment(bv, bio_src, iter)
Adrian Hunter	7afafc8	2016-08-16 10:59:35 +0300	[diff] [blame]	690	bio->bi_io_vec[bio->bi_vcnt++] = bv;
				691	break;
Kent Overstreet	8423ae3	2014-02-10 17:45:50 -0800	[diff] [blame]	692	}
				693
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	694	if (bio_integrity(bio_src)) {
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	695	int ret;
				696
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	697	ret = bio_integrity_clone(bio, bio_src, gfp_mask);
Li Zefan	059ea33	2009-03-09 10:42:45 +0100	[diff] [blame]	698	if (ret < 0) {
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	699	bio_put(bio);
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	700	return NULL;
Li Zefan	059ea33	2009-03-09 10:42:45 +0100	[diff] [blame]	701	}
Peter Osterlund	3676347	2005-09-06 15:16:42 -0700	[diff] [blame]	702	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	703
Paolo Valente	20bd723	2016-07-27 07:22:05 +0200	[diff] [blame]	704	bio_clone_blkcg_association(bio, bio_src);
				705
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	706	return bio;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	707	}
Kent Overstreet	bf800ef	2012-09-06 15:35:02 -0700	[diff] [blame]	708	EXPORT_SYMBOL(bio_clone_bioset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	709
				710	/**
Kent Overstreet	c66a14d	2013-11-23 22:30:22 -0800	[diff] [blame]	711	* bio_add_pc_page - attempt to add page to bio
				712	* @q: the target queue
				713	* @bio: destination bio
				714	* @page: page to add
				715	* @len: vec entry length
				716	* @offset: vec entry offset
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	717	*
Kent Overstreet	c66a14d	2013-11-23 22:30:22 -0800	[diff] [blame]	718	* Attempt to add a page to the bio_vec maplist. This can fail for a
				719	* number of reasons, such as the bio being full or target block device
				720	* limitations. The target block device must allow bio's up to PAGE_SIZE,
				721	* so it is always possible to add a single page to an empty bio.
				722	*
				723	* This should only be used by REQ_PC bios.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	724	*/
Kent Overstreet	c66a14d	2013-11-23 22:30:22 -0800	[diff] [blame]	725	int bio_add_pc_page(struct request_queue q, struct bio bio, struct page
				726	*page, unsigned int len, unsigned int offset)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	727	{
				728	int retried_segments = 0;
				729	struct bio_vec *bvec;
				730
				731	/*
				732	* cloned bio must not modify vec list
				733	*/
				734	if (unlikely(bio_flagged(bio, BIO_CLONED)))
				735	return 0;
				736
Kent Overstreet	c66a14d	2013-11-23 22:30:22 -0800	[diff] [blame]	737	if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	738	return 0;
				739
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	740	/*
				741	* For filesystems with a blocksize smaller than the pagesize
				742	* we will often be called with the same page as last time and
				743	* a consecutive offset. Optimize this special case.
				744	*/
				745	if (bio->bi_vcnt > 0) {
				746	struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
				747
				748	if (page == prev->bv_page &&
				749	offset == prev->bv_offset + prev->bv_len) {
				750	prev->bv_len += len;
Maurizio Lombardi	fcbf6a0	2014-12-10 14:16:53 -0800	[diff] [blame]	751	bio->bi_iter.bi_size += len;
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	752	goto done;
				753	}
Jens Axboe	66cb45a	2014-06-24 16:22:24 -0600	[diff] [blame]	754
				755	/*
				756	* If the queue doesn't support SG gaps and adding this
				757	* offset would create a gap, disallow it.
				758	*/
Keith Busch	03100aa	2015-08-19 14:24:05 -0700	[diff] [blame]	759	if (bvec_gap_to_prev(q, prev, offset))
Jens Axboe	66cb45a	2014-06-24 16:22:24 -0600	[diff] [blame]	760	return 0;
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	761	}
				762
				763	if (bio->bi_vcnt >= bio->bi_max_vecs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	764	return 0;
				765
				766	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	767	* setup the new entry, we might clear it again later if we
				768	* cannot add the page
				769	*/
				770	bvec = &bio->bi_io_vec[bio->bi_vcnt];
				771	bvec->bv_page = page;
				772	bvec->bv_len = len;
				773	bvec->bv_offset = offset;
Maurizio Lombardi	fcbf6a0	2014-12-10 14:16:53 -0800	[diff] [blame]	774	bio->bi_vcnt++;
				775	bio->bi_phys_segments++;
				776	bio->bi_iter.bi_size += len;
				777
				778	/*
				779	* Perform a recount if the number of segments is greater
				780	* than queue_max_segments(q).
				781	*/
				782
				783	while (bio->bi_phys_segments > queue_max_segments(q)) {
				784
				785	if (retried_segments)
				786	goto failed;
				787
				788	retried_segments = 1;
				789	blk_recount_segments(q, bio);
				790	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	791
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	792	/* If we may be able to merge these biovecs, force a recount */
Maurizio Lombardi	fcbf6a0	2014-12-10 14:16:53 -0800	[diff] [blame]	793	if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
Jens Axboe	b7c44ed	2015-07-24 12:37:59 -0600	[diff] [blame]	794	bio_clear_flag(bio, BIO_SEG_VALID);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	795
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	796	done:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	797	return len;
Maurizio Lombardi	fcbf6a0	2014-12-10 14:16:53 -0800	[diff] [blame]	798
				799	failed:
				800	bvec->bv_page = NULL;
				801	bvec->bv_len = 0;
				802	bvec->bv_offset = 0;
				803	bio->bi_vcnt--;
				804	bio->bi_iter.bi_size -= len;
				805	blk_recount_segments(q, bio);
				806	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	807	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	808	EXPORT_SYMBOL(bio_add_pc_page);
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	809
				810	/**
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	811	* bio_add_page - attempt to add page to bio
				812	* @bio: destination bio
				813	* @page: page to add
				814	* @len: vec entry length
				815	* @offset: vec entry offset
				816	*
Kent Overstreet	c66a14d	2013-11-23 22:30:22 -0800	[diff] [blame]	817	* Attempt to add a page to the bio_vec maplist. This will only fail
				818	* if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	819	*/
Kent Overstreet	c66a14d	2013-11-23 22:30:22 -0800	[diff] [blame]	820	int bio_add_page(struct bio bio, struct page page,
				821	unsigned int len, unsigned int offset)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	822	{
Kent Overstreet	c66a14d	2013-11-23 22:30:22 -0800	[diff] [blame]	823	struct bio_vec *bv;
Jens Axboe	762380a	2014-06-05 13:38:39 -0600	[diff] [blame]	824
Kent Overstreet	c66a14d	2013-11-23 22:30:22 -0800	[diff] [blame]	825	/*
				826	* cloned bio must not modify vec list
				827	*/
				828	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
				829	return 0;
Jens Axboe	58a4915	2014-06-10 12:53:56 -0600	[diff] [blame]	830
Kent Overstreet	c66a14d	2013-11-23 22:30:22 -0800	[diff] [blame]	831	/*
				832	* For filesystems with a blocksize smaller than the pagesize
				833	* we will often be called with the same page as last time and
				834	* a consecutive offset. Optimize this special case.
				835	*/
				836	if (bio->bi_vcnt > 0) {
				837	bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
				838
				839	if (page == bv->bv_page &&
				840	offset == bv->bv_offset + bv->bv_len) {
				841	bv->bv_len += len;
				842	goto done;
				843	}
				844	}
				845
				846	if (bio->bi_vcnt >= bio->bi_max_vecs)
				847	return 0;
				848
				849	bv = &bio->bi_io_vec[bio->bi_vcnt];
				850	bv->bv_page = page;
				851	bv->bv_len = len;
				852	bv->bv_offset = offset;
				853
				854	bio->bi_vcnt++;
				855	done:
				856	bio->bi_iter.bi_size += len;
				857	return len;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	858	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	859	EXPORT_SYMBOL(bio_add_page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	860
Kent Overstreet	2cefe4d	2016-10-31 11:59:24 -0600	[diff] [blame]	861	/**
				862	* bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
				863	* @bio: bio to add pages to
				864	* @iter: iov iterator describing the region to be mapped
				865	*
				866	* Pins as many pages from *iter and appends them to @bio's bvec array. The
				867	* pages will have to be released using put_page() when done.
				868	*/
				869	int bio_iov_iter_get_pages(struct bio bio, struct iov_iter iter)
				870	{
				871	unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
				872	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
				873	struct page pages = (struct page )bv;
				874	size_t offset, diff;
				875	ssize_t size;
				876
				877	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
				878	if (unlikely(size <= 0))
				879	return size ? size : -EFAULT;
				880	nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE;
				881
				882	/*
				883	* Deep magic below: We need to walk the pinned pages backwards
				884	* because we are abusing the space allocated for the bio_vecs
				885	* for the page array. Because the bio_vecs are larger than the
				886	* page pointers by definition this will always work. But it also
				887	* means we can't use bio_add_page, so any changes to it's semantics
				888	* need to be reflected here as well.
				889	*/
				890	bio->bi_iter.bi_size += size;
				891	bio->bi_vcnt += nr_pages;
				892
				893	diff = (nr_pages * PAGE_SIZE - offset) - size;
				894	while (nr_pages--) {
				895	bv[nr_pages].bv_page = pages[nr_pages];
				896	bv[nr_pages].bv_len = PAGE_SIZE;
				897	bv[nr_pages].bv_offset = 0;
				898	}
				899
				900	bv[0].bv_offset += offset;
				901	bv[0].bv_len -= offset;
				902	if (diff)
				903	bv[bio->bi_vcnt - 1].bv_len -= diff;
				904
				905	iov_iter_advance(iter, size);
				906	return 0;
				907	}
				908	EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
				909
Kent Overstreet	9e88224	2012-09-10 14:41:12 -0700	[diff] [blame]	910	struct submit_bio_ret {
				911	struct completion event;
				912	int error;
				913	};
				914
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	915	static void submit_bio_wait_endio(struct bio *bio)
Kent Overstreet	9e88224	2012-09-10 14:41:12 -0700	[diff] [blame]	916	{
				917	struct submit_bio_ret *ret = bio->bi_private;
				918
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	919	ret->error = bio->bi_error;
Kent Overstreet	9e88224	2012-09-10 14:41:12 -0700	[diff] [blame]	920	complete(&ret->event);
				921	}
				922
				923	/**
				924	* submit_bio_wait - submit a bio, and wait until it completes
Kent Overstreet	9e88224	2012-09-10 14:41:12 -0700	[diff] [blame]	925	* @bio: The &struct bio which describes the I/O
				926	*
				927	* Simple wrapper around submit_bio(). Returns 0 on success, or the error from
				928	* bio_endio() on failure.
				929	*/
Mike Christie	4e49ea4	2016-06-05 14:31:41 -0500	[diff] [blame]	930	int submit_bio_wait(struct bio *bio)
Kent Overstreet	9e88224	2012-09-10 14:41:12 -0700	[diff] [blame]	931	{
				932	struct submit_bio_ret ret;
				933
Kent Overstreet	9e88224	2012-09-10 14:41:12 -0700	[diff] [blame]	934	init_completion(&ret.event);
				935	bio->bi_private = &ret;
				936	bio->bi_end_io = submit_bio_wait_endio;
Jens Axboe	1eff9d3	2016-08-05 15:35:16 -0600	[diff] [blame]	937	bio->bi_opf \|= REQ_SYNC;
Mike Christie	4e49ea4	2016-06-05 14:31:41 -0500	[diff] [blame]	938	submit_bio(bio);
Stephane Gasparini	d57d611	2016-02-09 17:07:38 +0100	[diff] [blame]	939	wait_for_completion_io(&ret.event);
Kent Overstreet	9e88224	2012-09-10 14:41:12 -0700	[diff] [blame]	940
				941	return ret.error;
				942	}
				943	EXPORT_SYMBOL(submit_bio_wait);
				944
Kent Overstreet	054bdf6	2012-09-28 13:17:55 -0700	[diff] [blame]	945	/**
				946	* bio_advance - increment/complete a bio by some number of bytes
				947	* @bio: bio to advance
				948	* @bytes: number of bytes to complete
				949	*
				950	* This updates bi_sector, bi_size and bi_idx; if the number of bytes to
				951	* complete doesn't align with a bvec boundary, then bv_len and bv_offset will
				952	* be updated on the last bvec as well.
				953	*
				954	* @bio will then represent the remaining, uncompleted portion of the io.
				955	*/
				956	void bio_advance(struct bio *bio, unsigned bytes)
				957	{
				958	if (bio_integrity(bio))
				959	bio_integrity_advance(bio, bytes);
				960
Kent Overstreet	4550dd6	2013-08-07 14:26:21 -0700	[diff] [blame]	961	bio_advance_iter(bio, &bio->bi_iter, bytes);
Kent Overstreet	054bdf6	2012-09-28 13:17:55 -0700	[diff] [blame]	962	}
				963	EXPORT_SYMBOL(bio_advance);
				964
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	965	/**
Kent Overstreet	a078760	2012-09-10 14:03:28 -0700	[diff] [blame]	966	* bio_alloc_pages - allocates a single page for each bvec in a bio
				967	* @bio: bio to allocate pages for
				968	* @gfp_mask: flags for allocation
				969	*
				970	* Allocates pages up to @bio->bi_vcnt.
				971	*
				972	* Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are
				973	* freed.
				974	*/
				975	int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask)
				976	{
				977	int i;
				978	struct bio_vec *bv;
				979
				980	bio_for_each_segment_all(bv, bio, i) {
				981	bv->bv_page = alloc_page(gfp_mask);
				982	if (!bv->bv_page) {
				983	while (--bv >= bio->bi_io_vec)
				984	__free_page(bv->bv_page);
				985	return -ENOMEM;
				986	}
				987	}
				988
				989	return 0;
				990	}
				991	EXPORT_SYMBOL(bio_alloc_pages);
				992
Ming Lei	6f88028	2017-03-17 00:12:29 +0800	[diff] [blame]	993	static void __bio_copy_data(struct bio dst, struct bio src,
				994	int offset, int size)
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	995	{
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	996	struct bvec_iter src_iter, dst_iter;
				997	struct bio_vec src_bv, dst_bv;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	998	void src_p, dst_p;
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	999	unsigned bytes;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1000
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	1001	src_iter = src->bi_iter;
				1002	dst_iter = dst->bi_iter;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1003
Ming Lei	6f88028	2017-03-17 00:12:29 +0800	[diff] [blame]	1004	/* for supporting partial copy */
				1005	if (offset \|\| size != src->bi_iter.bi_size) {
				1006	bio_advance_iter(src, &src_iter, offset);
				1007	src_iter.bi_size = size;
				1008	}
				1009
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1010	while (1) {
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	1011	if (!src_iter.bi_size) {
				1012	src = src->bi_next;
				1013	if (!src)
				1014	break;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1015
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	1016	src_iter = src->bi_iter;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1017	}
				1018
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	1019	if (!dst_iter.bi_size) {
				1020	dst = dst->bi_next;
				1021	if (!dst)
				1022	break;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1023
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	1024	dst_iter = dst->bi_iter;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1025	}
				1026
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	1027	src_bv = bio_iter_iovec(src, src_iter);
				1028	dst_bv = bio_iter_iovec(dst, dst_iter);
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1029
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	1030	bytes = min(src_bv.bv_len, dst_bv.bv_len);
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1031
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	1032	src_p = kmap_atomic(src_bv.bv_page);
				1033	dst_p = kmap_atomic(dst_bv.bv_page);
				1034
				1035	memcpy(dst_p + dst_bv.bv_offset,
				1036	src_p + src_bv.bv_offset,
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1037	bytes);
				1038
				1039	kunmap_atomic(dst_p);
				1040	kunmap_atomic(src_p);
				1041
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	1042	bio_advance_iter(src, &src_iter, bytes);
				1043	bio_advance_iter(dst, &dst_iter, bytes);
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1044	}
				1045	}
Ming Lei	6f88028	2017-03-17 00:12:29 +0800	[diff] [blame]	1046
				1047	/**
				1048	* bio_copy_data - copy contents of data buffers from one chain of bios to
				1049	* another
				1050	* @src: source bio list
				1051	* @dst: destination bio list
				1052	*
				1053	* If @src and @dst are single bios, bi_next must be NULL - otherwise, treats
				1054	* @src and @dst as linked lists of bios.
				1055	*
				1056	* Stops when it reaches the end of either @src or @dst - that is, copies
				1057	* min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
				1058	*/
				1059	void bio_copy_data(struct bio dst, struct bio src)
				1060	{
				1061	__bio_copy_data(dst, src, 0, src->bi_iter.bi_size);
				1062	}
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1063	EXPORT_SYMBOL(bio_copy_data);
				1064
Ming Lei	6f88028	2017-03-17 00:12:29 +0800	[diff] [blame]	1065	/**
				1066	* bio_copy_data_partial - copy partial contents of data buffers from one
				1067	* chain of bios to another
				1068	* @dst: destination bio list
				1069	* @src: source bio list
				1070	* @offset: starting copy from the offset
				1071	* @size: how many bytes to copy
				1072	*
				1073	* If @src and @dst are single bios, bi_next must be NULL - otherwise, treats
				1074	* @src and @dst as linked lists of bios.
				1075	*
				1076	* Stops when it reaches the end of either @src or @dst - that is, copies
				1077	* min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
				1078	*/
				1079	void bio_copy_data_partial(struct bio dst, struct bio src,
				1080	int offset, int size)
				1081	{
				1082	__bio_copy_data(dst, src, offset, size);
				1083
				1084	}
				1085	EXPORT_SYMBOL(bio_copy_data_partial);
				1086
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1087	struct bio_map_data {
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1088	int is_our_pages;
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1089	struct iov_iter iter;
				1090	struct iovec iov[];
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1091	};
				1092
Fabian Frederick	7410b3c	2014-04-22 15:09:07 -0600	[diff] [blame]	1093	static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count,
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1094	gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1095	{
Jens Axboe	f3f63c1	2010-10-29 11:46:56 -0600	[diff] [blame]	1096	if (iov_count > UIO_MAXIOV)
				1097	return NULL;
				1098
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1099	return kmalloc(sizeof(struct bio_map_data) +
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1100	sizeof(struct iovec) * iov_count, gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1101	}
				1102
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1103	/**
				1104	* bio_copy_from_iter - copy all pages from iov_iter to bio
				1105	* @bio: The &struct bio which describes the I/O as destination
				1106	* @iter: iov_iter as source
				1107	*
				1108	* Copy all pages from iov_iter to bio.
				1109	* Returns 0 on success, or error on failure.
				1110	*/
				1111	static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1112	{
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1113	int i;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1114	struct bio_vec *bvec;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1115
Kent Overstreet	d74c6d5	2013-02-06 12:23:11 -0800	[diff] [blame]	1116	bio_for_each_segment_all(bvec, bio, i) {
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1117	ssize_t ret;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1118
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1119	ret = copy_page_from_iter(bvec->bv_page,
				1120	bvec->bv_offset,
				1121	bvec->bv_len,
				1122	&iter);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1123
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1124	if (!iov_iter_count(&iter))
				1125	break;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1126
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1127	if (ret < bvec->bv_len)
				1128	return -EFAULT;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1129	}
				1130
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1131	return 0;
				1132	}
				1133
				1134	/**
				1135	* bio_copy_to_iter - copy all pages from bio to iov_iter
				1136	* @bio: The &struct bio which describes the I/O as source
				1137	* @iter: iov_iter as destination
				1138	*
				1139	* Copy all pages from bio to iov_iter.
				1140	* Returns 0 on success, or error on failure.
				1141	*/
				1142	static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
				1143	{
				1144	int i;
				1145	struct bio_vec *bvec;
				1146
				1147	bio_for_each_segment_all(bvec, bio, i) {
				1148	ssize_t ret;
				1149
				1150	ret = copy_page_to_iter(bvec->bv_page,
				1151	bvec->bv_offset,
				1152	bvec->bv_len,
				1153	&iter);
				1154
				1155	if (!iov_iter_count(&iter))
				1156	break;
				1157
				1158	if (ret < bvec->bv_len)
				1159	return -EFAULT;
				1160	}
				1161
				1162	return 0;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1163	}
				1164
Guoqing Jiang	491221f	2016-09-22 03:10:01 -0400	[diff] [blame]	1165	void bio_free_pages(struct bio *bio)
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1166	{
				1167	struct bio_vec *bvec;
				1168	int i;
				1169
				1170	bio_for_each_segment_all(bvec, bio, i)
				1171	__free_page(bvec->bv_page);
				1172	}
Guoqing Jiang	491221f	2016-09-22 03:10:01 -0400	[diff] [blame]	1173	EXPORT_SYMBOL(bio_free_pages);
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1174
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1175	/**
				1176	* bio_uncopy_user - finish previously mapped bio
				1177	* @bio: bio being terminated
				1178	*
Christoph Hellwig	ddad8dd	2015-01-18 16:16:29 +0100	[diff] [blame]	1179	* Free pages allocated from bio_copy_user_iov() and write back data
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1180	* to user space in case of a read.
				1181	*/
				1182	int bio_uncopy_user(struct bio *bio)
				1183	{
				1184	struct bio_map_data *bmd = bio->bi_private;
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1185	int ret = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1186
Roland Dreier	35dc248	2013-08-05 17:55:01 -0700	[diff] [blame]	1187	if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
				1188	/*
				1189	* if we're in a workqueue, the request is orphaned, so
Hannes Reinecke	2d99b55	2016-02-12 09:39:15 +0100	[diff] [blame]	1190	* don't copy into a random user address space, just free
				1191	* and return -EINTR so user space doesn't expect any data.
Roland Dreier	35dc248	2013-08-05 17:55:01 -0700	[diff] [blame]	1192	*/
Hannes Reinecke	2d99b55	2016-02-12 09:39:15 +0100	[diff] [blame]	1193	if (!current->mm)
				1194	ret = -EINTR;
				1195	else if (bio_data_dir(bio) == READ)
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1196	ret = bio_copy_to_iter(bio, bmd->iter);
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1197	if (bmd->is_our_pages)
				1198	bio_free_pages(bio);
Roland Dreier	35dc248	2013-08-05 17:55:01 -0700	[diff] [blame]	1199	}
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1200	kfree(bmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1201	bio_put(bio);
				1202	return ret;
				1203	}
				1204
				1205	/**
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1206	* bio_copy_user_iov - copy user data to bio
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1207	* @q: destination block queue
				1208	* @map_data: pointer to the rq_map_data holding pages (if necessary)
				1209	* @iter: iovec iterator
				1210	* @gfp_mask: memory allocation flags
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1211	*
				1212	* Prepares and returns a bio for indirect user io, bouncing data
				1213	* to/from kernel pages as necessary. Must be paired with
				1214	* call bio_uncopy_user() on io completion.
				1215	*/
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1216	struct bio bio_copy_user_iov(struct request_queue q,
				1217	struct rq_map_data *map_data,
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1218	const struct iov_iter *iter,
				1219	gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1220	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1221	struct bio_map_data *bmd;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1222	struct page *page;
				1223	struct bio *bio;
				1224	int i, ret;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1225	int nr_pages = 0;
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1226	unsigned int len = iter->count;
Geliang Tang	bd5cece	2015-11-21 17:27:31 +0800	[diff] [blame]	1227	unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1228
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1229	for (i = 0; i < iter->nr_segs; i++) {
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1230	unsigned long uaddr;
				1231	unsigned long end;
				1232	unsigned long start;
				1233
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1234	uaddr = (unsigned long) iter->iov[i].iov_base;
				1235	end = (uaddr + iter->iov[i].iov_len + PAGE_SIZE - 1)
				1236	>> PAGE_SHIFT;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1237	start = uaddr >> PAGE_SHIFT;
				1238
Jens Axboe	cb4644c	2010-11-10 14:36:25 +0100	[diff] [blame]	1239	/*
				1240	* Overflow, abort
				1241	*/
				1242	if (end < start)
				1243	return ERR_PTR(-EINVAL);
				1244
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1245	nr_pages += end - start;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1246	}
				1247
FUJITA Tomonori	6983872	2009-04-28 20:24:29 +0200	[diff] [blame]	1248	if (offset)
				1249	nr_pages++;
				1250
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1251	bmd = bio_alloc_map_data(iter->nr_segs, gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1252	if (!bmd)
				1253	return ERR_PTR(-ENOMEM);
				1254
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1255	/*
				1256	* We need to do a deep copy of the iov_iter including the iovecs.
				1257	* The caller provided iov might point to an on-stack or otherwise
				1258	* shortlived one.
				1259	*/
				1260	bmd->is_our_pages = map_data ? 0 : 1;
				1261	memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs);
				1262	iov_iter_init(&bmd->iter, iter->type, bmd->iov,
				1263	iter->nr_segs, iter->count);
				1264
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1265	ret = -ENOMEM;
Tejun Heo	a9e9dc2	2009-04-15 22:10:27 +0900	[diff] [blame]	1266	bio = bio_kmalloc(gfp_mask, nr_pages);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1267	if (!bio)
				1268	goto out_bmd;
				1269
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1270	ret = 0;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1271
				1272	if (map_data) {
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1273	nr_pages = 1 << map_data->page_order;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1274	i = map_data->offset / PAGE_SIZE;
				1275	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1276	while (len) {
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1277	unsigned int bytes = PAGE_SIZE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1278
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1279	bytes -= offset;
				1280
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1281	if (bytes > len)
				1282	bytes = len;
				1283
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1284	if (map_data) {
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1285	if (i == map_data->nr_entries * nr_pages) {
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1286	ret = -ENOMEM;
				1287	break;
				1288	}
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1289
				1290	page = map_data->pages[i / nr_pages];
				1291	page += (i % nr_pages);
				1292
				1293	i++;
				1294	} else {
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1295	page = alloc_page(q->bounce_gfp \| gfp_mask);
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1296	if (!page) {
				1297	ret = -ENOMEM;
				1298	break;
				1299	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1300	}
				1301
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1302	if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1303	break;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1304
				1305	len -= bytes;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1306	offset = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1307	}
				1308
				1309	if (ret)
				1310	goto cleanup;
				1311
				1312	/*
				1313	* success
				1314	*/
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1315	if (((iter->type & WRITE) && (!map_data \|\| !map_data->null_mapped)) \|\|
FUJITA Tomonori	ecb554a	2009-07-09 14:46:53 +0200	[diff] [blame]	1316	(map_data && map_data->from_user)) {
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1317	ret = bio_copy_from_iter(bio, *iter);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1318	if (ret)
				1319	goto cleanup;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1320	}
				1321
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1322	bio->bi_private = bmd;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1323	return bio;
				1324	cleanup:
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1325	if (!map_data)
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1326	bio_free_pages(bio);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1327	bio_put(bio);
				1328	out_bmd:
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1329	kfree(bmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1330	return ERR_PTR(ret);
				1331	}
				1332
Christoph Hellwig	37f19e5	2015-01-18 16:16:33 +0100	[diff] [blame]	1333	/**
				1334	* bio_map_user_iov - map user iovec into bio
				1335	* @q: the struct request_queue for the bio
				1336	* @iter: iovec iterator
				1337	* @gfp_mask: memory allocation flags
				1338	*
				1339	* Map the user space address into a bio suitable for io to a block
				1340	* device. Returns an error pointer in case of error.
				1341	*/
				1342	struct bio bio_map_user_iov(struct request_queue q,
				1343	const struct iov_iter *iter,
				1344	gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1345	{
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1346	int j;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1347	int nr_pages = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1348	struct page **pages;
				1349	struct bio *bio;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1350	int cur_page = 0;
				1351	int ret, offset;
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1352	struct iov_iter i;
				1353	struct iovec iov;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1354
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1355	iov_for_each(iov, i, *iter) {
				1356	unsigned long uaddr = (unsigned long) iov.iov_base;
				1357	unsigned long len = iov.iov_len;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1358	unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1359	unsigned long start = uaddr >> PAGE_SHIFT;
				1360
Jens Axboe	cb4644c	2010-11-10 14:36:25 +0100	[diff] [blame]	1361	/*
				1362	* Overflow, abort
				1363	*/
				1364	if (end < start)
				1365	return ERR_PTR(-EINVAL);
				1366
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1367	nr_pages += end - start;
				1368	/*
Linus Walleij	a441b0d	2016-09-14 14:32:52 +0200	[diff] [blame]	1369	* buffer must be aligned to at least logical block size for now
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1370	*/
Mike Christie	ad2d722	2006-12-01 10:40:20 +0100	[diff] [blame]	1371	if (uaddr & queue_dma_alignment(q))
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1372	return ERR_PTR(-EINVAL);
				1373	}
				1374
				1375	if (!nr_pages)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1376	return ERR_PTR(-EINVAL);
				1377
Tejun Heo	a9e9dc2	2009-04-15 22:10:27 +0900	[diff] [blame]	1378	bio = bio_kmalloc(gfp_mask, nr_pages);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1379	if (!bio)
				1380	return ERR_PTR(-ENOMEM);
				1381
				1382	ret = -ENOMEM;
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1383	pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1384	if (!pages)
				1385	goto out;
				1386
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1387	iov_for_each(iov, i, *iter) {
				1388	unsigned long uaddr = (unsigned long) iov.iov_base;
				1389	unsigned long len = iov.iov_len;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1390	unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1391	unsigned long start = uaddr >> PAGE_SHIFT;
				1392	const int local_nr_pages = end - start;
				1393	const int page_limit = cur_page + local_nr_pages;
Jens Axboe	cb4644c	2010-11-10 14:36:25 +0100	[diff] [blame]	1394
Nick Piggin	f5dd33c	2008-07-25 19:45:25 -0700	[diff] [blame]	1395	ret = get_user_pages_fast(uaddr, local_nr_pages,
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1396	(iter->type & WRITE) != WRITE,
				1397	&pages[cur_page]);
Jens Axboe	9917215	2006-06-16 13:02:29 +0200	[diff] [blame]	1398	if (ret < local_nr_pages) {
				1399	ret = -EFAULT;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1400	goto out_unmap;
Jens Axboe	9917215	2006-06-16 13:02:29 +0200	[diff] [blame]	1401	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1402
Geliang Tang	bd5cece	2015-11-21 17:27:31 +0800	[diff] [blame]	1403	offset = offset_in_page(uaddr);
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1404	for (j = cur_page; j < page_limit; j++) {
				1405	unsigned int bytes = PAGE_SIZE - offset;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1406
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1407	if (len <= 0)
				1408	break;
				1409
				1410	if (bytes > len)
				1411	bytes = len;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1412
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1413	/*
				1414	* sorry...
				1415	*/
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	1416	if (bio_add_pc_page(q, bio, pages[j], bytes, offset) <
				1417	bytes)
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1418	break;
				1419
				1420	len -= bytes;
				1421	offset = 0;
				1422	}
				1423
				1424	cur_page = j;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1425	/*
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1426	* release the pages we didn't map into the bio, if any
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1427	*/
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1428	while (j < page_limit)
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1429	put_page(pages[j++]);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1430	}
				1431
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1432	kfree(pages);
				1433
Jens Axboe	b7c44ed	2015-07-24 12:37:59 -0600	[diff] [blame]	1434	bio_set_flag(bio, BIO_USER_MAPPED);
Christoph Hellwig	37f19e5	2015-01-18 16:16:33 +0100	[diff] [blame]	1435
				1436	/*
Bart Van Assche	5fad1b6	2017-02-01 08:20:08 -0800	[diff] [blame]	1437	* subtle -- if bio_map_user_iov() ended up bouncing a bio,
Christoph Hellwig	37f19e5	2015-01-18 16:16:33 +0100	[diff] [blame]	1438	* it would normally disappear when its bi_end_io is run.
				1439	* however, we need it for the unmap, so grab an extra
				1440	* reference to it
				1441	*/
				1442	bio_get(bio);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1443	return bio;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1444
				1445	out_unmap:
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1446	for (j = 0; j < nr_pages; j++) {
				1447	if (!pages[j])
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1448	break;
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1449	put_page(pages[j]);
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1450	}
				1451	out:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1452	kfree(pages);
				1453	bio_put(bio);
				1454	return ERR_PTR(ret);
				1455	}
				1456
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1457	static void __bio_unmap_user(struct bio *bio)
				1458	{
				1459	struct bio_vec *bvec;
				1460	int i;
				1461
				1462	/*
				1463	* make sure we dirty pages we wrote to
				1464	*/
Kent Overstreet	d74c6d5	2013-02-06 12:23:11 -0800	[diff] [blame]	1465	bio_for_each_segment_all(bvec, bio, i) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1466	if (bio_data_dir(bio) == READ)
				1467	set_page_dirty_lock(bvec->bv_page);
				1468
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1469	put_page(bvec->bv_page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1470	}
				1471
				1472	bio_put(bio);
				1473	}
				1474
				1475	/**
				1476	* bio_unmap_user - unmap a bio
				1477	* @bio: the bio being unmapped
				1478	*
Bart Van Assche	5fad1b6	2017-02-01 08:20:08 -0800	[diff] [blame]	1479	* Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
				1480	* process context.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1481	*
				1482	* bio_unmap_user() may sleep.
				1483	*/
				1484	void bio_unmap_user(struct bio *bio)
				1485	{
				1486	__bio_unmap_user(bio);
				1487	bio_put(bio);
				1488	}
				1489
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1490	static void bio_map_kern_endio(struct bio *bio)
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1491	{
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1492	bio_put(bio);
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1493	}
				1494
Christoph Hellwig	75c72b8	2015-01-18 16:16:32 +0100	[diff] [blame]	1495	/**
				1496	* bio_map_kern - map kernel address into bio
				1497	* @q: the struct request_queue for the bio
				1498	* @data: pointer to buffer to map
				1499	* @len: length in bytes
				1500	* @gfp_mask: allocation flags for bio allocation
				1501	*
				1502	* Map the kernel address into a bio suitable for io to a block
				1503	* device. Returns an error pointer in case of error.
				1504	*/
				1505	struct bio bio_map_kern(struct request_queue q, void *data, unsigned int len,
				1506	gfp_t gfp_mask)
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1507	{
				1508	unsigned long kaddr = (unsigned long)data;
				1509	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1510	unsigned long start = kaddr >> PAGE_SHIFT;
				1511	const int nr_pages = end - start;
				1512	int offset, i;
				1513	struct bio *bio;
				1514
Tejun Heo	a9e9dc2	2009-04-15 22:10:27 +0900	[diff] [blame]	1515	bio = bio_kmalloc(gfp_mask, nr_pages);
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1516	if (!bio)
				1517	return ERR_PTR(-ENOMEM);
				1518
				1519	offset = offset_in_page(kaddr);
				1520	for (i = 0; i < nr_pages; i++) {
				1521	unsigned int bytes = PAGE_SIZE - offset;
				1522
				1523	if (len <= 0)
				1524	break;
				1525
				1526	if (bytes > len)
				1527	bytes = len;
				1528
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	1529	if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
Christoph Hellwig	75c72b8	2015-01-18 16:16:32 +0100	[diff] [blame]	1530	offset) < bytes) {
				1531	/* we don't support partial mappings */
				1532	bio_put(bio);
				1533	return ERR_PTR(-EINVAL);
				1534	}
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1535
				1536	data += bytes;
				1537	len -= bytes;
				1538	offset = 0;
				1539	}
				1540
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1541	bio->bi_end_io = bio_map_kern_endio;
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1542	return bio;
				1543	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1544	EXPORT_SYMBOL(bio_map_kern);
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1545
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1546	static void bio_copy_kern_endio(struct bio *bio)
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1547	{
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1548	bio_free_pages(bio);
				1549	bio_put(bio);
				1550	}
				1551
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1552	static void bio_copy_kern_endio_read(struct bio *bio)
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1553	{
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1554	char *p = bio->bi_private;
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1555	struct bio_vec *bvec;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1556	int i;
				1557
Kent Overstreet	d74c6d5	2013-02-06 12:23:11 -0800	[diff] [blame]	1558	bio_for_each_segment_all(bvec, bio, i) {
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1559	memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1560	p += bvec->bv_len;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1561	}
				1562
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1563	bio_copy_kern_endio(bio);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1564	}
				1565
				1566	/**
				1567	* bio_copy_kern - copy kernel address into bio
				1568	* @q: the struct request_queue for the bio
				1569	* @data: pointer to buffer to copy
				1570	* @len: length in bytes
				1571	* @gfp_mask: allocation flags for bio and page allocation
Randy Dunlap	ffee025	2008-04-30 09:08:54 +0200	[diff] [blame]	1572	* @reading: data direction is READ
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1573	*
				1574	* copy the kernel address into a bio suitable for io to a block
				1575	* device. Returns an error pointer in case of error.
				1576	*/
				1577	struct bio bio_copy_kern(struct request_queue q, void *data, unsigned int len,
				1578	gfp_t gfp_mask, int reading)
				1579	{
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1580	unsigned long kaddr = (unsigned long)data;
				1581	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1582	unsigned long start = kaddr >> PAGE_SHIFT;
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1583	struct bio *bio;
				1584	void *p = data;
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1585	int nr_pages = 0;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1586
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1587	/*
				1588	* Overflow, abort
				1589	*/
				1590	if (end < start)
				1591	return ERR_PTR(-EINVAL);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1592
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1593	nr_pages = end - start;
				1594	bio = bio_kmalloc(gfp_mask, nr_pages);
				1595	if (!bio)
				1596	return ERR_PTR(-ENOMEM);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1597
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1598	while (len) {
				1599	struct page *page;
				1600	unsigned int bytes = PAGE_SIZE;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1601
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1602	if (bytes > len)
				1603	bytes = len;
				1604
				1605	page = alloc_page(q->bounce_gfp \| gfp_mask);
				1606	if (!page)
				1607	goto cleanup;
				1608
				1609	if (!reading)
				1610	memcpy(page_address(page), p, bytes);
				1611
				1612	if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
				1613	break;
				1614
				1615	len -= bytes;
				1616	p += bytes;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1617	}
				1618
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1619	if (reading) {
				1620	bio->bi_end_io = bio_copy_kern_endio_read;
				1621	bio->bi_private = data;
				1622	} else {
				1623	bio->bi_end_io = bio_copy_kern_endio;
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1624	}
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1625
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1626	return bio;
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1627
				1628	cleanup:
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1629	bio_free_pages(bio);
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1630	bio_put(bio);
				1631	return ERR_PTR(-ENOMEM);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1632	}
				1633
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1634	/*
				1635	* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
				1636	* for performing direct-IO in BIOs.
				1637	*
				1638	* The problem is that we cannot run set_page_dirty() from interrupt context
				1639	* because the required locks are not interrupt-safe. So what we can do is to
				1640	* mark the pages dirty _before_ performing IO. And in interrupt context,
				1641	* check that the pages are still dirty. If so, fine. If not, redirty them
				1642	* in process context.
				1643	*
				1644	* We special-case compound pages here: normally this means reads into hugetlb
				1645	* pages. The logic in here doesn't really work right for compound pages
				1646	* because the VM does not uniformly chase down the head page in all cases.
				1647	* But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
				1648	* handle them at all. So we skip compound pages here at an early stage.
				1649	*
				1650	* Note that this code is very hard to test under normal circumstances because
				1651	* direct-io pins the pages with get_user_pages(). This makes
				1652	* is_page_cache_freeable return false, and the VM will not clean the pages.
Artem Bityutskiy	0d5c3eb	2012-07-25 18:12:08 +0300	[diff] [blame]	1653	* But other code (eg, flusher threads) could clean the pages if they are mapped
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1654	* pagecache.
				1655	*
				1656	* Simply disabling the call to bio_set_pages_dirty() is a good way to test the
				1657	* deferred bio dirtying paths.
				1658	*/
				1659
				1660	/*
				1661	* bio_set_pages_dirty() will mark all the bio's pages as dirty.
				1662	*/
				1663	void bio_set_pages_dirty(struct bio *bio)
				1664	{
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1665	struct bio_vec *bvec;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1666	int i;
				1667
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1668	bio_for_each_segment_all(bvec, bio, i) {
				1669	struct page *page = bvec->bv_page;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1670
				1671	if (page && !PageCompound(page))
				1672	set_page_dirty_lock(page);
				1673	}
				1674	}
				1675
Adrian Bunk	86b6c7a	2008-02-18 13:48:32 +0100	[diff] [blame]	1676	static void bio_release_pages(struct bio *bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1677	{
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1678	struct bio_vec *bvec;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1679	int i;
				1680
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1681	bio_for_each_segment_all(bvec, bio, i) {
				1682	struct page *page = bvec->bv_page;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1683
				1684	if (page)
				1685	put_page(page);
				1686	}
				1687	}
				1688
				1689	/*
				1690	* bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
				1691	* If they are, then fine. If, however, some pages are clean then they must
				1692	* have been written out during the direct-IO read. So we take another ref on
				1693	* the BIO and the offending pages and re-dirty the pages in process context.
				1694	*
				1695	* It is expected that bio_check_pages_dirty() will wholly own the BIO from
Kirill A. Shutemov	ea1754a	2016-04-01 15:29:48 +0300	[diff] [blame]	1696	* here on. It will run one put_page() against each page and will run one
				1697	* bio_put() against the BIO.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1698	*/
				1699
David Howells	65f27f3	2006-11-22 14:55:48 +0000	[diff] [blame]	1700	static void bio_dirty_fn(struct work_struct *work);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1701
David Howells	65f27f3	2006-11-22 14:55:48 +0000	[diff] [blame]	1702	static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1703	static DEFINE_SPINLOCK(bio_dirty_lock);
				1704	static struct bio *bio_dirty_list;
				1705
				1706	/*
				1707	* This runs in process context
				1708	*/
David Howells	65f27f3	2006-11-22 14:55:48 +0000	[diff] [blame]	1709	static void bio_dirty_fn(struct work_struct *work)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1710	{
				1711	unsigned long flags;
				1712	struct bio *bio;
				1713
				1714	spin_lock_irqsave(&bio_dirty_lock, flags);
				1715	bio = bio_dirty_list;
				1716	bio_dirty_list = NULL;
				1717	spin_unlock_irqrestore(&bio_dirty_lock, flags);
				1718
				1719	while (bio) {
				1720	struct bio *next = bio->bi_private;
				1721
				1722	bio_set_pages_dirty(bio);
				1723	bio_release_pages(bio);
				1724	bio_put(bio);
				1725	bio = next;
				1726	}
				1727	}
				1728
				1729	void bio_check_pages_dirty(struct bio *bio)
				1730	{
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1731	struct bio_vec *bvec;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1732	int nr_clean_pages = 0;
				1733	int i;
				1734
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1735	bio_for_each_segment_all(bvec, bio, i) {
				1736	struct page *page = bvec->bv_page;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1737
				1738	if (PageDirty(page) \|\| PageCompound(page)) {
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1739	put_page(page);
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1740	bvec->bv_page = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1741	} else {
				1742	nr_clean_pages++;
				1743	}
				1744	}
				1745
				1746	if (nr_clean_pages) {
				1747	unsigned long flags;
				1748
				1749	spin_lock_irqsave(&bio_dirty_lock, flags);
				1750	bio->bi_private = bio_dirty_list;
				1751	bio_dirty_list = bio;
				1752	spin_unlock_irqrestore(&bio_dirty_lock, flags);
				1753	schedule_work(&bio_dirty_work);
				1754	} else {
				1755	bio_put(bio);
				1756	}
				1757	}
				1758
Gu Zheng	394ffa5	2014-11-24 11:05:22 +0800	[diff] [blame]	1759	void generic_start_io_acct(int rw, unsigned long sectors,
				1760	struct hd_struct *part)
				1761	{
				1762	int cpu = part_stat_lock();
				1763
				1764	part_round_stats(cpu, part);
				1765	part_stat_inc(cpu, part, ios[rw]);
				1766	part_stat_add(cpu, part, sectors[rw], sectors);
				1767	part_inc_in_flight(part, rw);
				1768
				1769	part_stat_unlock();
				1770	}
				1771	EXPORT_SYMBOL(generic_start_io_acct);
				1772
				1773	void generic_end_io_acct(int rw, struct hd_struct *part,
				1774	unsigned long start_time)
				1775	{
				1776	unsigned long duration = jiffies - start_time;
				1777	int cpu = part_stat_lock();
				1778
				1779	part_stat_add(cpu, part, ticks[rw], duration);
				1780	part_round_stats(cpu, part);
				1781	part_dec_in_flight(part, rw);
				1782
				1783	part_stat_unlock();
				1784	}
				1785	EXPORT_SYMBOL(generic_end_io_acct);
				1786
Ilya Loginov	2d4dc89	2009-11-26 09:16:19 +0100	[diff] [blame]	1787	#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
				1788	void bio_flush_dcache_pages(struct bio *bi)
				1789	{
Kent Overstreet	7988613	2013-11-23 17:19:00 -0800	[diff] [blame]	1790	struct bio_vec bvec;
				1791	struct bvec_iter iter;
Ilya Loginov	2d4dc89	2009-11-26 09:16:19 +0100	[diff] [blame]	1792
Kent Overstreet	7988613	2013-11-23 17:19:00 -0800	[diff] [blame]	1793	bio_for_each_segment(bvec, bi, iter)
				1794	flush_dcache_page(bvec.bv_page);
Ilya Loginov	2d4dc89	2009-11-26 09:16:19 +0100	[diff] [blame]	1795	}
				1796	EXPORT_SYMBOL(bio_flush_dcache_pages);
				1797	#endif
				1798
Jens Axboe	c4cf526	2015-04-17 16:15:18 -0600	[diff] [blame]	1799	static inline bool bio_remaining_done(struct bio *bio)
				1800	{
				1801	/*
				1802	* If we're not chaining, then ->__bi_remaining is always 1 and
				1803	* we always end io on the first invocation.
				1804	*/
				1805	if (!bio_flagged(bio, BIO_CHAIN))
				1806	return true;
				1807
				1808	BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);
				1809
Mike Snitzer	326e1db	2015-05-22 09:14:03 -0400	[diff] [blame]	1810	if (atomic_dec_and_test(&bio->__bi_remaining)) {
Jens Axboe	b7c44ed	2015-07-24 12:37:59 -0600	[diff] [blame]	1811	bio_clear_flag(bio, BIO_CHAIN);
Jens Axboe	c4cf526	2015-04-17 16:15:18 -0600	[diff] [blame]	1812	return true;
Mike Snitzer	326e1db	2015-05-22 09:14:03 -0400	[diff] [blame]	1813	}
Jens Axboe	c4cf526	2015-04-17 16:15:18 -0600	[diff] [blame]	1814
				1815	return false;
				1816	}
				1817
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1818	/**
				1819	* bio_endio - end I/O on a bio
				1820	* @bio: bio
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1821	*
				1822	* Description:
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1823	* bio_endio() will end I/O on the whole bio. bio_endio() is the preferred
				1824	* way to end I/O on a bio. No one should call bi_end_io() directly on a
				1825	* bio unless they own it and thus know that it has an end_io function.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1826	**/
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	1827	void bio_endio(struct bio *bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1828	{
Christoph Hellwig	ba8c696	2016-03-11 17:34:52 +0100	[diff] [blame]	1829	again:
Christoph Hellwig	2b88551	2016-03-11 17:34:53 +0100	[diff] [blame]	1830	if (!bio_remaining_done(bio))
Christoph Hellwig	ba8c696	2016-03-11 17:34:52 +0100	[diff] [blame]	1831	return;
Kent Overstreet	196d38bc	2013-11-23 18:34:15 -0800	[diff] [blame]	1832
Christoph Hellwig	ba8c696	2016-03-11 17:34:52 +0100	[diff] [blame]	1833	/*
				1834	* Need to have a real endio function for chained bios, otherwise
				1835	* various corner cases will break (like stacking block devices that
				1836	* save/restore bi_end_io) - however, we want to avoid unbounded
				1837	* recursion and blowing the stack. Tail call optimization would
				1838	* handle this, but compiling with frame pointers also disables
				1839	* gcc's sibling call optimization.
				1840	*/
				1841	if (bio->bi_end_io == bio_chain_endio) {
				1842	bio = __bio_chain_endio(bio);
				1843	goto again;
Kent Overstreet	196d38bc	2013-11-23 18:34:15 -0800	[diff] [blame]	1844	}
Christoph Hellwig	ba8c696	2016-03-11 17:34:52 +0100	[diff] [blame]	1845
				1846	if (bio->bi_end_io)
				1847	bio->bi_end_io(bio);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1848	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1849	EXPORT_SYMBOL(bio_endio);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1850
Kent Overstreet	196d38bc	2013-11-23 18:34:15 -0800	[diff] [blame]	1851	/**
Kent Overstreet	20d0189	2013-11-23 18:21:01 -0800	[diff] [blame]	1852	* bio_split - split a bio
				1853	* @bio: bio to split
				1854	* @sectors: number of sectors to split from the front of @bio
				1855	* @gfp: gfp mask
				1856	* @bs: bio set to allocate from
				1857	*
				1858	* Allocates and returns a new bio which represents @sectors from the start of
				1859	* @bio, and updates @bio to represent the remaining sectors.
				1860	*
Martin K. Petersen	f3f5da6	2015-07-22 07:57:12 -0400	[diff] [blame]	1861	* Unless this is a discard request the newly allocated bio will point
				1862	* to @bio's bi_io_vec; it is the caller's responsibility to ensure that
				1863	* @bio is not freed before the split.
Kent Overstreet	20d0189	2013-11-23 18:21:01 -0800	[diff] [blame]	1864	*/
				1865	struct bio bio_split(struct bio bio, int sectors,
				1866	gfp_t gfp, struct bio_set *bs)
				1867	{
				1868	struct bio *split = NULL;
				1869
				1870	BUG_ON(sectors <= 0);
				1871	BUG_ON(sectors >= bio_sectors(bio));
				1872
Christoph Hellwig	f9d03f9	2016-12-08 15:20:32 -0700	[diff] [blame]	1873	split = bio_clone_fast(bio, gfp, bs);
Kent Overstreet	20d0189	2013-11-23 18:21:01 -0800	[diff] [blame]	1874	if (!split)
				1875	return NULL;
				1876
				1877	split->bi_iter.bi_size = sectors << 9;
				1878
				1879	if (bio_integrity(split))
				1880	bio_integrity_trim(split, 0, sectors);
				1881
				1882	bio_advance(bio, split->bi_iter.bi_size);
				1883
				1884	return split;
				1885	}
				1886	EXPORT_SYMBOL(bio_split);
				1887
Martin K. Petersen	ad3316b	2008-10-01 22:42:53 -0400	[diff] [blame]	1888	/**
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1889	* bio_trim - trim a bio
				1890	* @bio: bio to trim
				1891	* @offset: number of sectors to trim from the front of @bio
				1892	* @size: size we want to trim @bio to, in sectors
				1893	*/
				1894	void bio_trim(struct bio *bio, int offset, int size)
				1895	{
				1896	/* 'bio' is a cloned bio which we need to trim to match
				1897	* the given offset and size.
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1898	*/
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1899
				1900	size <<= 9;
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1901	if (offset == 0 && size == bio->bi_iter.bi_size)
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1902	return;
				1903
Jens Axboe	b7c44ed	2015-07-24 12:37:59 -0600	[diff] [blame]	1904	bio_clear_flag(bio, BIO_SEG_VALID);
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1905
				1906	bio_advance(bio, offset << 9);
				1907
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1908	bio->bi_iter.bi_size = size;
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1909	}
				1910	EXPORT_SYMBOL_GPL(bio_trim);
				1911
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1912	/*
				1913	* create memory pools for biovec's in a bio_set.
				1914	* use the global biovec slabs created for general use.
				1915	*/
Fabian Frederick	a6c39cb4f	2014-04-22 15:09:05 -0600	[diff] [blame]	1916	mempool_t *biovec_create_pool(int pool_entries)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1917	{
Christoph Hellwig	ed996a5	2016-07-19 11:28:42 +0200	[diff] [blame]	1918	struct biovec_slab *bp = bvec_slabs + BVEC_POOL_MAX;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1919
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	1920	return mempool_create_slab_pool(pool_entries, bp->slab);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1921	}
				1922
				1923	void bioset_free(struct bio_set *bs)
				1924	{
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1925	if (bs->rescue_workqueue)
				1926	destroy_workqueue(bs->rescue_workqueue);
				1927
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1928	if (bs->bio_pool)
				1929	mempool_destroy(bs->bio_pool);
				1930
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	1931	if (bs->bvec_pool)
				1932	mempool_destroy(bs->bvec_pool);
				1933
Martin K. Petersen	7878cba	2009-06-26 15:37:49 +0200	[diff] [blame]	1934	bioset_integrity_free(bs);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1935	bio_put_slab(bs);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1936
				1937	kfree(bs);
				1938	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1939	EXPORT_SYMBOL(bioset_free);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1940
Junichi Nomura	d8f429e	2014-10-03 17:27:12 -0400	[diff] [blame]	1941	static struct bio_set *__bioset_create(unsigned int pool_size,
				1942	unsigned int front_pad,
				1943	bool create_bvec_pool)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1944	{
Jens Axboe	392ddc3	2008-12-23 12:42:54 +0100	[diff] [blame]	1945	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
Jens Axboe	1b43449	2008-10-22 20:32:58 +0200	[diff] [blame]	1946	struct bio_set *bs;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1947
Jens Axboe	1b43449	2008-10-22 20:32:58 +0200	[diff] [blame]	1948	bs = kzalloc(sizeof(*bs), GFP_KERNEL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1949	if (!bs)
				1950	return NULL;
				1951
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1952	bs->front_pad = front_pad;
Jens Axboe	1b43449	2008-10-22 20:32:58 +0200	[diff] [blame]	1953
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1954	spin_lock_init(&bs->rescue_lock);
				1955	bio_list_init(&bs->rescue_list);
				1956	INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
				1957
Jens Axboe	392ddc3	2008-12-23 12:42:54 +0100	[diff] [blame]	1958	bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1959	if (!bs->bio_slab) {
				1960	kfree(bs);
				1961	return NULL;
				1962	}
				1963
				1964	bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1965	if (!bs->bio_pool)
				1966	goto bad;
				1967
Junichi Nomura	d8f429e	2014-10-03 17:27:12 -0400	[diff] [blame]	1968	if (create_bvec_pool) {
				1969	bs->bvec_pool = biovec_create_pool(pool_size);
				1970	if (!bs->bvec_pool)
				1971	goto bad;
				1972	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1973
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1974	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
				1975	if (!bs->rescue_workqueue)
				1976	goto bad;
				1977
				1978	return bs;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1979	bad:
				1980	bioset_free(bs);
				1981	return NULL;
				1982	}
Junichi Nomura	d8f429e	2014-10-03 17:27:12 -0400	[diff] [blame]	1983
				1984	/**
				1985	* bioset_create - Create a bio_set
				1986	* @pool_size: Number of bio and bio_vecs to cache in the mempool
				1987	* @front_pad: Number of bytes to allocate in front of the returned bio
				1988	*
				1989	* Description:
				1990	* Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
				1991	* to ask for a number of bytes to be allocated in front of the bio.
				1992	* Front pad allocation is useful for embedding the bio inside
				1993	* another structure, to avoid allocating extra data to go with the bio.
				1994	* Note that the bio must be embedded at the END of that structure always,
				1995	* or things will break badly.
				1996	*/
				1997	struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
				1998	{
				1999	return __bioset_create(pool_size, front_pad, true);
				2000	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	2001	EXPORT_SYMBOL(bioset_create);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2002
Junichi Nomura	d8f429e	2014-10-03 17:27:12 -0400	[diff] [blame]	2003	/**
				2004	* bioset_create_nobvec - Create a bio_set without bio_vec mempool
				2005	* @pool_size: Number of bio to cache in the mempool
				2006	* @front_pad: Number of bytes to allocate in front of the returned bio
				2007	*
				2008	* Description:
				2009	* Same functionality as bioset_create() except that mempool is not
				2010	* created for bio_vecs. Saving some memory for bio_clone_fast() users.
				2011	*/
				2012	struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad)
				2013	{
				2014	return __bioset_create(pool_size, front_pad, false);
				2015	}
				2016	EXPORT_SYMBOL(bioset_create_nobvec);
				2017
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	2018	#ifdef CONFIG_BLK_CGROUP
Tejun Heo	1d933cf	2015-05-22 17:13:24 -0400	[diff] [blame]	2019
				2020	/**
				2021	* bio_associate_blkcg - associate a bio with the specified blkcg
				2022	* @bio: target bio
				2023	* @blkcg_css: css of the blkcg to associate
				2024	*
				2025	* Associate @bio with the blkcg specified by @blkcg_css. Block layer will
				2026	* treat @bio as if it were issued by a task which belongs to the blkcg.
				2027	*
				2028	* This function takes an extra reference of @blkcg_css which will be put
				2029	* when @bio is released. The caller must own @bio and is responsible for
				2030	* synchronizing calls to this function.
				2031	*/
				2032	int bio_associate_blkcg(struct bio bio, struct cgroup_subsys_state blkcg_css)
				2033	{
				2034	if (unlikely(bio->bi_css))
				2035	return -EBUSY;
				2036	css_get(blkcg_css);
				2037	bio->bi_css = blkcg_css;
				2038	return 0;
				2039	}
Tejun Heo	5aa2a96	2015-07-23 14:27:09 -0400	[diff] [blame]	2040	EXPORT_SYMBOL_GPL(bio_associate_blkcg);
Tejun Heo	1d933cf	2015-05-22 17:13:24 -0400	[diff] [blame]	2041
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	2042	/**
				2043	* bio_associate_current - associate a bio with %current
				2044	* @bio: target bio
				2045	*
				2046	* Associate @bio with %current if it hasn't been associated yet. Block
				2047	* layer will treat @bio as if it were issued by %current no matter which
				2048	* task actually issues it.
				2049	*
				2050	* This function takes an extra reference of @task's io_context and blkcg
				2051	* which will be put when @bio is released. The caller must own @bio,
				2052	* ensure %current->io_context exists, and is responsible for synchronizing
				2053	* calls to this function.
				2054	*/
				2055	int bio_associate_current(struct bio *bio)
				2056	{
				2057	struct io_context *ioc;
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	2058
Tejun Heo	1d933cf	2015-05-22 17:13:24 -0400	[diff] [blame]	2059	if (bio->bi_css)
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	2060	return -EBUSY;
				2061
				2062	ioc = current->io_context;
				2063	if (!ioc)
				2064	return -ENOENT;
				2065
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	2066	get_io_context_active(ioc);
				2067	bio->bi_ioc = ioc;
Tejun Heo	c165b3e	2015-08-18 14:55:29 -0700	[diff] [blame]	2068	bio->bi_css = task_get_css(current, io_cgrp_id);
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	2069	return 0;
				2070	}
Tejun Heo	5aa2a96	2015-07-23 14:27:09 -0400	[diff] [blame]	2071	EXPORT_SYMBOL_GPL(bio_associate_current);
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	2072
				2073	/**
				2074	* bio_disassociate_task - undo bio_associate_current()
				2075	* @bio: target bio
				2076	*/
				2077	void bio_disassociate_task(struct bio *bio)
				2078	{
				2079	if (bio->bi_ioc) {
				2080	put_io_context(bio->bi_ioc);
				2081	bio->bi_ioc = NULL;
				2082	}
				2083	if (bio->bi_css) {
				2084	css_put(bio->bi_css);
				2085	bio->bi_css = NULL;
				2086	}
				2087	}
				2088
Paolo Valente	20bd723	2016-07-27 07:22:05 +0200	[diff] [blame]	2089	/**
				2090	* bio_clone_blkcg_association - clone blkcg association from src to dst bio
				2091	* @dst: destination bio
				2092	* @src: source bio
				2093	*/
				2094	void bio_clone_blkcg_association(struct bio dst, struct bio src)
				2095	{
				2096	if (src->bi_css)
				2097	WARN_ON(bio_associate_blkcg(dst, src->bi_css));
				2098	}
				2099
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	2100	#endif /* CONFIG_BLK_CGROUP */
				2101
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2102	static void __init biovec_init_slabs(void)
				2103	{
				2104	int i;
				2105
Christoph Hellwig	ed996a5	2016-07-19 11:28:42 +0200	[diff] [blame]	2106	for (i = 0; i < BVEC_POOL_NR; i++) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2107	int size;
				2108	struct biovec_slab *bvs = bvec_slabs + i;
				2109
Jens Axboe	a7fcd37	2008-12-05 16:10:29 +0100	[diff] [blame]	2110	if (bvs->nr_vecs <= BIO_INLINE_VECS) {
				2111	bvs->slab = NULL;
				2112	continue;
				2113	}
Jens Axboe	a7fcd37	2008-12-05 16:10:29 +0100	[diff] [blame]	2114
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2115	size = bvs->nr_vecs * sizeof(struct bio_vec);
				2116	bvs->slab = kmem_cache_create(bvs->name, size, 0,
Paul Mundt	20c2df8	2007-07-20 10:11:58 +0900	[diff] [blame]	2117	SLAB_HWCACHE_ALIGN\|SLAB_PANIC, NULL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2118	}
				2119	}
				2120
				2121	static int __init init_bio(void)
				2122	{
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	2123	bio_slab_max = 2;
				2124	bio_slab_nr = 0;
				2125	bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
				2126	if (!bio_slabs)
				2127	panic("bio: can't allocate bios\n");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2128
Martin K. Petersen	7878cba	2009-06-26 15:37:49 +0200	[diff] [blame]	2129	bio_integrity_init();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2130	biovec_init_slabs();
				2131
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	2132	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2133	if (!fs_bio_set)
				2134	panic("bio: can't allocate bios\n");
				2135
Martin K. Petersen	a91a278	2011-03-17 11:11:05 +0100	[diff] [blame]	2136	if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
				2137	panic("bio: can't create integrity pool\n");
				2138
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2139	return 0;
				2140	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2141	subsys_initcall(init_bio);