Blame - block/bio.c - kernel/msm-4.9

blob: 2a00d349cd6883cba32d9fd477251889a1c58081 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
Jens Axboe	0fe2347	2006-09-04 15:41:16 +0200	[diff] [blame]	2	* Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License version 2 as
				6	* published by the Free Software Foundation.
				7	*
				8	* This program is distributed in the hope that it will be useful,
				9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				11	* GNU General Public License for more details.
				12	*
				13	* You should have received a copy of the GNU General Public Licens
				14	* along with this program; if not, write to the Free Software
				15	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
				16	*
				17	*/
				18	#include <linux/mm.h>
				19	#include <linux/swap.h>
				20	#include <linux/bio.h>
				21	#include <linux/blkdev.h>
Kent Overstreet	a27bb33	2013-05-07 16:19:08 -0700	[diff] [blame]	22	#include <linux/uio.h>
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	23	#include <linux/iocontext.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	24	#include <linux/slab.h>
				25	#include <linux/init.h>
				26	#include <linux/kernel.h>
Paul Gortmaker	630d9c4	2011-11-16 23:57:37 -0500	[diff] [blame]	27	#include <linux/export.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	28	#include <linux/mempool.h>
				29	#include <linux/workqueue.h>
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	30	#include <linux/cgroup.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	31
Li Zefan	5578213	2009-06-09 13:43:05 +0800	[diff] [blame]	32	#include <trace/events/block.h>
Ingo Molnar	0bfc245	2008-11-26 11:59:56 +0100	[diff] [blame]	33
Jens Axboe	392ddc3	2008-12-23 12:42:54 +0100	[diff] [blame]	34	/*
				35	* Test patch to inline a certain number of bi_io_vec's inside the bio
				36	* itself, to shrink a bio data allocation from two mempool calls to one
				37	*/
				38	#define BIO_INLINE_VECS 4
				39
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	40	/*
				41	* if you change this list, also change bvec_alloc or things will
				42	* break badly! cannot be bigger than what you can fit into an
				43	* unsigned short
				44	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	45	#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
Martin K. Petersen	df67714	2011-03-08 08:28:01 +0100	[diff] [blame]	46	static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	47	BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
				48	};
				49	#undef BV
				50
				51	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	52	* fs_bio_set is the bio_set containing bio and iovec memory pools used by
				53	* IO code that does not need private memory pools.
				54	*/
Martin K. Petersen	51d654e	2008-06-17 18:59:56 +0200	[diff] [blame]	55	struct bio_set *fs_bio_set;
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	56	EXPORT_SYMBOL(fs_bio_set);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	57
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	58	/*
				59	* Our slab pool management
				60	*/
				61	struct bio_slab {
				62	struct kmem_cache *slab;
				63	unsigned int slab_ref;
				64	unsigned int slab_size;
				65	char name[8];
				66	};
				67	static DEFINE_MUTEX(bio_slab_lock);
				68	static struct bio_slab *bio_slabs;
				69	static unsigned int bio_slab_nr, bio_slab_max;
				70
				71	static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
				72	{
				73	unsigned int sz = sizeof(struct bio) + extra_size;
				74	struct kmem_cache *slab = NULL;
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	75	struct bio_slab bslab, new_bio_slabs;
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	76	unsigned int new_bio_slab_max;
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	77	unsigned int i, entry = -1;
				78
				79	mutex_lock(&bio_slab_lock);
				80
				81	i = 0;
				82	while (i < bio_slab_nr) {
Thiago Farina	f06f135	2010-01-19 14:07:09 +0100	[diff] [blame]	83	bslab = &bio_slabs[i];
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	84
				85	if (!bslab->slab && entry == -1)
				86	entry = i;
				87	else if (bslab->slab_size == sz) {
				88	slab = bslab->slab;
				89	bslab->slab_ref++;
				90	break;
				91	}
				92	i++;
				93	}
				94
				95	if (slab)
				96	goto out_unlock;
				97
				98	if (bio_slab_nr == bio_slab_max && entry == -1) {
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	99	new_bio_slab_max = bio_slab_max << 1;
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	100	new_bio_slabs = krealloc(bio_slabs,
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	101	new_bio_slab_max * sizeof(struct bio_slab),
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	102	GFP_KERNEL);
				103	if (!new_bio_slabs)
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	104	goto out_unlock;
Anna Leuschner	386bc35	2012-10-22 21:53:36 +0200	[diff] [blame]	105	bio_slab_max = new_bio_slab_max;
Alexey Khoroshilov	389d7b2	2012-08-09 15:19:25 +0200	[diff] [blame]	106	bio_slabs = new_bio_slabs;
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	107	}
				108	if (entry == -1)
				109	entry = bio_slab_nr++;
				110
				111	bslab = &bio_slabs[entry];
				112
				113	snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
Mikulas Patocka	6a24148	2014-03-28 15:51:55 -0400	[diff] [blame]	114	slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN,
				115	SLAB_HWCACHE_ALIGN, NULL);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	116	if (!slab)
				117	goto out_unlock;
				118
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	119	bslab->slab = slab;
				120	bslab->slab_ref = 1;
				121	bslab->slab_size = sz;
				122	out_unlock:
				123	mutex_unlock(&bio_slab_lock);
				124	return slab;
				125	}
				126
				127	static void bio_put_slab(struct bio_set *bs)
				128	{
				129	struct bio_slab *bslab = NULL;
				130	unsigned int i;
				131
				132	mutex_lock(&bio_slab_lock);
				133
				134	for (i = 0; i < bio_slab_nr; i++) {
				135	if (bs->bio_slab == bio_slabs[i].slab) {
				136	bslab = &bio_slabs[i];
				137	break;
				138	}
				139	}
				140
				141	if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
				142	goto out;
				143
				144	WARN_ON(!bslab->slab_ref);
				145
				146	if (--bslab->slab_ref)
				147	goto out;
				148
				149	kmem_cache_destroy(bslab->slab);
				150	bslab->slab = NULL;
				151
				152	out:
				153	mutex_unlock(&bio_slab_lock);
				154	}
				155
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	156	unsigned int bvec_nr_vecs(unsigned short idx)
				157	{
				158	return bvec_slabs[idx].nr_vecs;
				159	}
				160
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	161	void bvec_free(mempool_t pool, struct bio_vec bv, unsigned int idx)
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	162	{
				163	BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
				164
				165	if (idx == BIOVEC_MAX_IDX)
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	166	mempool_free(bv, pool);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	167	else {
				168	struct biovec_slab *bvs = bvec_slabs + idx;
				169
				170	kmem_cache_free(bvs->slab, bv);
				171	}
				172	}
				173
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	174	struct bio_vec bvec_alloc(gfp_t gfp_mask, int nr, unsigned long idx,
				175	mempool_t *pool)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	176	{
				177	struct bio_vec *bvl;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	178
				179	/*
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	180	* see comment near bvec_array define!
				181	*/
				182	switch (nr) {
				183	case 1:
				184	*idx = 0;
				185	break;
				186	case 2 ... 4:
				187	*idx = 1;
				188	break;
				189	case 5 ... 16:
				190	*idx = 2;
				191	break;
				192	case 17 ... 64:
				193	*idx = 3;
				194	break;
				195	case 65 ... 128:
				196	*idx = 4;
				197	break;
				198	case 129 ... BIO_MAX_PAGES:
				199	*idx = 5;
				200	break;
				201	default:
				202	return NULL;
				203	}
				204
				205	/*
				206	* idx now points to the pool we want to allocate from. only the
				207	* 1-vec entry pool is mempool backed.
				208	*/
				209	if (*idx == BIOVEC_MAX_IDX) {
				210	fallback:
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	211	bvl = mempool_alloc(pool, gfp_mask);
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	212	} else {
				213	struct biovec_slab bvs = bvec_slabs + idx;
				214	gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT \| __GFP_IO);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	215
Jens Axboe	0a0d96b	2008-09-11 13:17:37 +0200	[diff] [blame]	216	/*
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	217	* Make this allocation restricted and don't dump info on
				218	* allocation failures, since we'll fallback to the mempool
				219	* in case of failure.
Jens Axboe	0a0d96b	2008-09-11 13:17:37 +0200	[diff] [blame]	220	*/
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	221	__gfp_mask \|= __GFP_NOMEMALLOC \| __GFP_NORETRY \| __GFP_NOWARN;
				222
				223	/*
				224	* Try a slab allocation. If this fails and __GFP_WAIT
				225	* is set, retry with the 1-entry mempool
				226	*/
				227	bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
				228	if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
				229	*idx = BIOVEC_MAX_IDX;
				230	goto fallback;
				231	}
				232	}
				233
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	234	return bvl;
				235	}
				236
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	237	static void __bio_free(struct bio *bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	238	{
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	239	bio_disassociate_task(bio);
Jens Axboe	992c5dd	2007-07-18 13:18:08 +0200	[diff] [blame]	240
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	241	if (bio_integrity(bio))
Kent Overstreet	1e2a410f	2012-09-06 15:34:56 -0700	[diff] [blame]	242	bio_integrity_free(bio);
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	243	}
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	244
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	245	static void bio_free(struct bio *bio)
				246	{
				247	struct bio_set *bs = bio->bi_pool;
				248	void *p;
				249
				250	__bio_free(bio);
				251
				252	if (bs) {
Kent Overstreet	a38352e	2012-05-25 13:03:11 -0700	[diff] [blame]	253	if (bio_flagged(bio, BIO_OWNS_VEC))
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	254	bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio));
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	255
				256	/*
				257	* If we have front padding, adjust the bio pointer before freeing
				258	*/
				259	p = bio;
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	260	p -= bs->front_pad;
				261
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	262	mempool_free(p, bs->bio_pool);
				263	} else {
				264	/* Bio was allocated by bio_kmalloc() */
				265	kfree(bio);
				266	}
Peter Osterlund	3676347	2005-09-06 15:16:42 -0700	[diff] [blame]	267	}
				268
Arjan van de Ven	858119e	2006-01-14 13:20:43 -0800	[diff] [blame]	269	void bio_init(struct bio *bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	270	{
Jens Axboe	2b94de5	2007-07-18 13:14:03 +0200	[diff] [blame]	271	memset(bio, 0, sizeof(*bio));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	272	bio->bi_flags = 1 << BIO_UPTODATE;
Jens Axboe	c4cf526	2015-04-17 16:15:18 -0600	[diff] [blame]	273	atomic_set(&bio->__bi_remaining, 1);
Jens Axboe	dac5621	2015-04-17 16:23:59 -0600	[diff] [blame]	274	atomic_set(&bio->__bi_cnt, 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	275	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	276	EXPORT_SYMBOL(bio_init);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	277
				278	/**
Kent Overstreet	f44b48c	2012-09-06 15:34:58 -0700	[diff] [blame]	279	* bio_reset - reinitialize a bio
				280	* @bio: bio to reset
				281	*
				282	* Description:
				283	* After calling bio_reset(), @bio will be in the same state as a freshly
				284	* allocated bio returned bio bio_alloc_bioset() - the only fields that are
				285	* preserved are the ones that are initialized by bio_alloc_bioset(). See
				286	* comment in struct bio.
				287	*/
				288	void bio_reset(struct bio *bio)
				289	{
				290	unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
				291
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	292	__bio_free(bio);
Kent Overstreet	f44b48c	2012-09-06 15:34:58 -0700	[diff] [blame]	293
				294	memset(bio, 0, BIO_RESET_BYTES);
Jens Axboe	c4cf526	2015-04-17 16:15:18 -0600	[diff] [blame]	295	bio->bi_flags = flags \| (1 << BIO_UPTODATE);
				296	atomic_set(&bio->__bi_remaining, 1);
Kent Overstreet	f44b48c	2012-09-06 15:34:58 -0700	[diff] [blame]	297	}
				298	EXPORT_SYMBOL(bio_reset);
				299
Kent Overstreet	196d38b	2013-11-23 18:34:15 -0800	[diff] [blame]	300	static void bio_chain_endio(struct bio *bio, int error)
				301	{
				302	bio_endio(bio->bi_private, error);
				303	bio_put(bio);
				304	}
				305
Mike Snitzer	326e1db	2015-05-22 09:14:03 -0400	[diff] [blame]	306	/*
				307	* Increment chain count for the bio. Make sure the CHAIN flag update
				308	* is visible before the raised count.
				309	*/
				310	static inline void bio_inc_remaining(struct bio *bio)
				311	{
				312	bio->bi_flags \|= (1 << BIO_CHAIN);
				313	smp_mb__before_atomic();
				314	atomic_inc(&bio->__bi_remaining);
				315	}
				316
Kent Overstreet	196d38b	2013-11-23 18:34:15 -0800	[diff] [blame]	317	/**
				318	* bio_chain - chain bio completions
Randy Dunlap	1051a90	2014-04-20 16:03:31 -0700	[diff] [blame]	319	* @bio: the target bio
				320	* @parent: the @bio's parent bio
Kent Overstreet	196d38b	2013-11-23 18:34:15 -0800	[diff] [blame]	321	*
				322	* The caller won't have a bi_end_io called when @bio completes - instead,
				323	* @parent's bi_end_io won't be called until both @parent and @bio have
				324	* completed; the chained bio will also be freed when it completes.
				325	*
				326	* The caller must not set bi_private or bi_end_io in @bio.
				327	*/
				328	void bio_chain(struct bio bio, struct bio parent)
				329	{
				330	BUG_ON(bio->bi_private \|\| bio->bi_end_io);
				331
				332	bio->bi_private = parent;
				333	bio->bi_end_io = bio_chain_endio;
Jens Axboe	c4cf526	2015-04-17 16:15:18 -0600	[diff] [blame]	334	bio_inc_remaining(parent);
Kent Overstreet	196d38b	2013-11-23 18:34:15 -0800	[diff] [blame]	335	}
				336	EXPORT_SYMBOL(bio_chain);
				337
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	338	static void bio_alloc_rescue(struct work_struct *work)
				339	{
				340	struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
				341	struct bio *bio;
				342
				343	while (1) {
				344	spin_lock(&bs->rescue_lock);
				345	bio = bio_list_pop(&bs->rescue_list);
				346	spin_unlock(&bs->rescue_lock);
				347
				348	if (!bio)
				349	break;
				350
				351	generic_make_request(bio);
				352	}
				353	}
				354
				355	static void punt_bios_to_rescuer(struct bio_set *bs)
				356	{
				357	struct bio_list punt, nopunt;
				358	struct bio *bio;
				359
				360	/*
				361	* In order to guarantee forward progress we must punt only bios that
				362	* were allocated from this bio_set; otherwise, if there was a bio on
				363	* there for a stacking driver higher up in the stack, processing it
				364	* could require allocating bios from this bio_set, and doing that from
				365	* our own rescuer would be bad.
				366	*
				367	* Since bio lists are singly linked, pop them all instead of trying to
				368	* remove from the middle of the list:
				369	*/
				370
				371	bio_list_init(&punt);
				372	bio_list_init(&nopunt);
				373
				374	while ((bio = bio_list_pop(current->bio_list)))
				375	bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
				376
				377	*current->bio_list = nopunt;
				378
				379	spin_lock(&bs->rescue_lock);
				380	bio_list_merge(&bs->rescue_list, &punt);
				381	spin_unlock(&bs->rescue_lock);
				382
				383	queue_work(bs->rescue_workqueue, &bs->rescue_work);
				384	}
				385
Kent Overstreet	f44b48c	2012-09-06 15:34:58 -0700	[diff] [blame]	386	/**
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	387	* bio_alloc_bioset - allocate a bio for I/O
				388	* @gfp_mask: the GFP_ mask given to the slab allocator
				389	* @nr_iovecs: number of iovecs to pre-allocate
Jaak Ristioja	db18efa	2010-01-15 12:05:07 +0200	[diff] [blame]	390	* @bs: the bio_set to allocate from.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	391	*
				392	* Description:
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	393	* If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
				394	* backed by the @bs's mempool.
				395	*
				396	* When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be
				397	* able to allocate a bio. This is due to the mempool guarantees. To make this
				398	* work, callers must never allocate more than 1 bio at a time from this pool.
				399	* Callers that need to allocate more than 1 bio must always submit the
				400	* previously allocated bio for IO before attempting to allocate a new one.
				401	* Failure to do so can cause deadlocks under memory pressure.
				402	*
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	403	* Note that when running under generic_make_request() (i.e. any block
				404	* driver), bios are not submitted until after you return - see the code in
				405	* generic_make_request() that converts recursion into iteration, to prevent
				406	* stack overflows.
				407	*
				408	* This would normally mean allocating multiple bios under
				409	* generic_make_request() would be susceptible to deadlocks, but we have
				410	* deadlock avoidance code that resubmits any blocked bios from a rescuer
				411	* thread.
				412	*
				413	* However, we do not guarantee forward progress for allocations from other
				414	* mempools. Doing multiple allocations from the same mempool under
				415	* generic_make_request() should be avoided - instead, use bio_set's front_pad
				416	* for per bio allocations.
				417	*
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	418	* RETURNS:
				419	* Pointer to new bio on success, NULL on failure.
				420	*/
Al Viro	dd0fc66	2005-10-07 07:46:04 +0100	[diff] [blame]	421	struct bio bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set bs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	422	{
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	423	gfp_t saved_gfp = gfp_mask;
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	424	unsigned front_pad;
				425	unsigned inline_vecs;
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	426	unsigned long idx = BIO_POOL_NONE;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	427	struct bio_vec *bvl = NULL;
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	428	struct bio *bio;
				429	void *p;
Jens Axboe	0a0d96b	2008-09-11 13:17:37 +0200	[diff] [blame]	430
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	431	if (!bs) {
				432	if (nr_iovecs > UIO_MAXIOV)
				433	return NULL;
				434
				435	p = kmalloc(sizeof(struct bio) +
				436	nr_iovecs * sizeof(struct bio_vec),
				437	gfp_mask);
				438	front_pad = 0;
				439	inline_vecs = nr_iovecs;
				440	} else {
Junichi Nomura	d8f429e	2014-10-03 17:27:12 -0400	[diff] [blame]	441	/* should not use nobvec bioset for nr_iovecs > 0 */
				442	if (WARN_ON_ONCE(!bs->bvec_pool && nr_iovecs > 0))
				443	return NULL;
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	444	/*
				445	* generic_make_request() converts recursion to iteration; this
				446	* means if we're running beneath it, any bios we allocate and
				447	* submit will not be submitted (and thus freed) until after we
				448	* return.
				449	*
				450	* This exposes us to a potential deadlock if we allocate
				451	* multiple bios from the same bio_set() while running
				452	* underneath generic_make_request(). If we were to allocate
				453	* multiple bios (say a stacking block driver that was splitting
				454	* bios), we would deadlock if we exhausted the mempool's
				455	* reserve.
				456	*
				457	* We solve this, and guarantee forward progress, with a rescuer
				458	* workqueue per bio_set. If we go to allocate and there are
				459	* bios on current->bio_list, we first try the allocation
				460	* without __GFP_WAIT; if that fails, we punt those bios we
				461	* would be blocking to the rescuer workqueue before we retry
				462	* with the original gfp_flags.
				463	*/
				464
				465	if (current->bio_list && !bio_list_empty(current->bio_list))
				466	gfp_mask &= ~__GFP_WAIT;
				467
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	468	p = mempool_alloc(bs->bio_pool, gfp_mask);
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	469	if (!p && gfp_mask != saved_gfp) {
				470	punt_bios_to_rescuer(bs);
				471	gfp_mask = saved_gfp;
				472	p = mempool_alloc(bs->bio_pool, gfp_mask);
				473	}
				474
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	475	front_pad = bs->front_pad;
				476	inline_vecs = BIO_INLINE_VECS;
				477	}
				478
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	479	if (unlikely(!p))
				480	return NULL;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	481
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	482	bio = p + front_pad;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	483	bio_init(bio);
				484
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	485	if (nr_iovecs > inline_vecs) {
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	486	bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	487	if (!bvl && gfp_mask != saved_gfp) {
				488	punt_bios_to_rescuer(bs);
				489	gfp_mask = saved_gfp;
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	490	bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	491	}
				492
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	493	if (unlikely(!bvl))
				494	goto err_free;
Kent Overstreet	a38352e	2012-05-25 13:03:11 -0700	[diff] [blame]	495
				496	bio->bi_flags \|= 1 << BIO_OWNS_VEC;
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	497	} else if (nr_iovecs) {
				498	bvl = bio->bi_inline_vecs;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	499	}
Kent Overstreet	3f86a82	2012-09-06 15:35:01 -0700	[diff] [blame]	500
				501	bio->bi_pool = bs;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	502	bio->bi_flags \|= idx << BIO_POOL_OFFSET;
				503	bio->bi_max_vecs = nr_iovecs;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	504	bio->bi_io_vec = bvl;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	505	return bio;
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	506
				507	err_free:
Tejun Heo	451a9eb	2009-04-15 19:50:51 +0200	[diff] [blame]	508	mempool_free(p, bs->bio_pool);
Ingo Molnar	3405397	2009-02-21 11:16:36 +0100	[diff] [blame]	509	return NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	510	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	511	EXPORT_SYMBOL(bio_alloc_bioset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	512
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	513	void zero_fill_bio(struct bio *bio)
				514	{
				515	unsigned long flags;
Kent Overstreet	7988613	2013-11-23 17:19:00 -0800	[diff] [blame]	516	struct bio_vec bv;
				517	struct bvec_iter iter;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	518
Kent Overstreet	7988613	2013-11-23 17:19:00 -0800	[diff] [blame]	519	bio_for_each_segment(bv, bio, iter) {
				520	char *data = bvec_kmap_irq(&bv, &flags);
				521	memset(data, 0, bv.bv_len);
				522	flush_dcache_page(bv.bv_page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	523	bvec_kunmap_irq(data, &flags);
				524	}
				525	}
				526	EXPORT_SYMBOL(zero_fill_bio);
				527
				528	/**
				529	* bio_put - release a reference to a bio
				530	* @bio: bio to release reference to
				531	*
				532	* Description:
				533	* Put a reference to a &struct bio, either one you have gotten with
Alberto Bertogli	ad0bf11	2009-11-02 11:39:22 +0100	[diff] [blame]	534	* bio_alloc, bio_get or bio_clone. The last put of a bio will free it.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	535	**/
				536	void bio_put(struct bio *bio)
				537	{
Jens Axboe	dac5621	2015-04-17 16:23:59 -0600	[diff] [blame]	538	if (!bio_flagged(bio, BIO_REFFED))
Kent Overstreet	4254bba	2012-09-06 15:35:00 -0700	[diff] [blame]	539	bio_free(bio);
Jens Axboe	dac5621	2015-04-17 16:23:59 -0600	[diff] [blame]	540	else {
				541	BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));
				542
				543	/*
				544	* last put frees it
				545	*/
				546	if (atomic_dec_and_test(&bio->__bi_cnt))
				547	bio_free(bio);
				548	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	549	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	550	EXPORT_SYMBOL(bio_put);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	551
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	552	inline int bio_phys_segments(struct request_queue q, struct bio bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	553	{
				554	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
				555	blk_recount_segments(q, bio);
				556
				557	return bio->bi_phys_segments;
				558	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	559	EXPORT_SYMBOL(bio_phys_segments);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	560
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	561	/**
Kent Overstreet	59d276f	2013-11-23 18:19:27 -0800	[diff] [blame]	562	* __bio_clone_fast - clone a bio that shares the original bio's biovec
				563	* @bio: destination bio
				564	* @bio_src: bio to clone
				565	*
				566	* Clone a &bio. Caller will own the returned bio, but not
				567	* the actual data it points to. Reference count of returned
				568	* bio will be one.
				569	*
				570	* Caller must ensure that @bio_src is not freed before @bio.
				571	*/
				572	void __bio_clone_fast(struct bio bio, struct bio bio_src)
				573	{
				574	BUG_ON(bio->bi_pool && BIO_POOL_IDX(bio) != BIO_POOL_NONE);
				575
				576	/*
				577	* most users will be overriding ->bi_bdev with a new target,
				578	* so we don't set nor calculate new physical/hw segment counts here
				579	*/
				580	bio->bi_bdev = bio_src->bi_bdev;
				581	bio->bi_flags \|= 1 << BIO_CLONED;
				582	bio->bi_rw = bio_src->bi_rw;
				583	bio->bi_iter = bio_src->bi_iter;
				584	bio->bi_io_vec = bio_src->bi_io_vec;
				585	}
				586	EXPORT_SYMBOL(__bio_clone_fast);
				587
				588	/**
				589	* bio_clone_fast - clone a bio that shares the original bio's biovec
				590	* @bio: bio to clone
				591	* @gfp_mask: allocation priority
				592	* @bs: bio_set to allocate from
				593	*
				594	* Like __bio_clone_fast, only also allocates the returned bio
				595	*/
				596	struct bio bio_clone_fast(struct bio bio, gfp_t gfp_mask, struct bio_set *bs)
				597	{
				598	struct bio *b;
				599
				600	b = bio_alloc_bioset(gfp_mask, 0, bs);
				601	if (!b)
				602	return NULL;
				603
				604	__bio_clone_fast(b, bio);
				605
				606	if (bio_integrity(bio)) {
				607	int ret;
				608
				609	ret = bio_integrity_clone(b, bio, gfp_mask);
				610
				611	if (ret < 0) {
				612	bio_put(b);
				613	return NULL;
				614	}
				615	}
				616
				617	return b;
				618	}
				619	EXPORT_SYMBOL(bio_clone_fast);
				620
				621	/**
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	622	* bio_clone_bioset - clone a bio
				623	* @bio_src: bio to clone
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	624	* @gfp_mask: allocation priority
Kent Overstreet	bf800ef	2012-09-06 15:35:02 -0700	[diff] [blame]	625	* @bs: bio_set to allocate from
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	626	*
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	627	* Clone bio. Caller will own the returned bio, but not the actual data it
				628	* points to. Reference count of returned bio will be one.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	629	*/
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	630	struct bio bio_clone_bioset(struct bio bio_src, gfp_t gfp_mask,
Kent Overstreet	bf800ef	2012-09-06 15:35:02 -0700	[diff] [blame]	631	struct bio_set *bs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	632	{
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	633	struct bvec_iter iter;
				634	struct bio_vec bv;
				635	struct bio *bio;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	636
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	637	/*
				638	* Pre immutable biovecs, __bio_clone() used to just do a memcpy from
				639	* bio_src->bi_io_vec to bio->bi_io_vec.
				640	*
				641	* We can't do that anymore, because:
				642	*
				643	* - The point of cloning the biovec is to produce a bio with a biovec
				644	* the caller can modify: bi_idx and bi_bvec_done should be 0.
				645	*
				646	* - The original bio could've had more than BIO_MAX_PAGES biovecs; if
				647	* we tried to clone the whole thing bio_alloc_bioset() would fail.
				648	* But the clone should succeed as long as the number of biovecs we
				649	* actually need to allocate is fewer than BIO_MAX_PAGES.
				650	*
				651	* - Lastly, bi_vcnt should not be looked at or relied upon by code
				652	* that does not own the bio - reason being drivers don't use it for
				653	* iterating over the biovec anymore, so expecting it to be kept up
				654	* to date (i.e. for clones that share the parent biovec) is just
				655	* asking for trouble and would force extra work on
				656	* __bio_clone_fast() anyways.
				657	*/
				658
Kent Overstreet	8423ae3	2014-02-10 17:45:50 -0800	[diff] [blame]	659	bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	660	if (!bio)
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	661	return NULL;
				662
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	663	bio->bi_bdev = bio_src->bi_bdev;
				664	bio->bi_rw = bio_src->bi_rw;
				665	bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
				666	bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	667
Kent Overstreet	8423ae3	2014-02-10 17:45:50 -0800	[diff] [blame]	668	if (bio->bi_rw & REQ_DISCARD)
				669	goto integrity_clone;
				670
				671	if (bio->bi_rw & REQ_WRITE_SAME) {
				672	bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
				673	goto integrity_clone;
				674	}
				675
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	676	bio_for_each_segment(bv, bio_src, iter)
				677	bio->bi_io_vec[bio->bi_vcnt++] = bv;
				678
Kent Overstreet	8423ae3	2014-02-10 17:45:50 -0800	[diff] [blame]	679	integrity_clone:
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	680	if (bio_integrity(bio_src)) {
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	681	int ret;
				682
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	683	ret = bio_integrity_clone(bio, bio_src, gfp_mask);
Li Zefan	059ea33	2009-03-09 10:42:45 +0100	[diff] [blame]	684	if (ret < 0) {
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	685	bio_put(bio);
Martin K. Petersen	7ba1ba1	2008-06-30 20:04:41 +0200	[diff] [blame]	686	return NULL;
Li Zefan	059ea33	2009-03-09 10:42:45 +0100	[diff] [blame]	687	}
Peter Osterlund	3676347	2005-09-06 15:16:42 -0700	[diff] [blame]	688	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	689
Kent Overstreet	bdb5320	2013-11-23 17:26:46 -0800	[diff] [blame]	690	return bio;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	691	}
Kent Overstreet	bf800ef	2012-09-06 15:35:02 -0700	[diff] [blame]	692	EXPORT_SYMBOL(bio_clone_bioset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	693
				694	/**
				695	* bio_get_nr_vecs - return approx number of vecs
				696	* @bdev: I/O target
				697	*
				698	* Return the approximate number of pages we can send to this target.
				699	* There's no guarantee that you will be able to fit this number of pages
				700	* into a bio, it does not account for dynamic restrictions that vary
				701	* on offset.
				702	*/
				703	int bio_get_nr_vecs(struct block_device *bdev)
				704	{
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	705	struct request_queue *q = bdev_get_queue(bdev);
Bernd Schubert	f908ee9	2012-05-11 16:36:44 +0200	[diff] [blame]	706	int nr_pages;
				707
				708	nr_pages = min_t(unsigned,
Kent Overstreet	5abebfd	2012-02-08 22:07:18 +0100	[diff] [blame]	709	queue_max_segments(q),
				710	queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
Bernd Schubert	f908ee9	2012-05-11 16:36:44 +0200	[diff] [blame]	711
				712	return min_t(unsigned, nr_pages, BIO_MAX_PAGES);
				713
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	714	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	715	EXPORT_SYMBOL(bio_get_nr_vecs);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	716
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	717	static int __bio_add_page(struct request_queue q, struct bio bio, struct page
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	718	*page, unsigned int len, unsigned int offset,
Akinobu Mita	34f2fd8	2013-11-18 22:11:42 +0900	[diff] [blame]	719	unsigned int max_sectors)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	720	{
				721	int retried_segments = 0;
				722	struct bio_vec *bvec;
				723
				724	/*
				725	* cloned bio must not modify vec list
				726	*/
				727	if (unlikely(bio_flagged(bio, BIO_CLONED)))
				728	return 0;
				729
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	730	if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	731	return 0;
				732
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	733	/*
				734	* For filesystems with a blocksize smaller than the pagesize
				735	* we will often be called with the same page as last time and
				736	* a consecutive offset. Optimize this special case.
				737	*/
				738	if (bio->bi_vcnt > 0) {
				739	struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
				740
				741	if (page == prev->bv_page &&
				742	offset == prev->bv_offset + prev->bv_len) {
Dmitry Monakhov	1d61658	2010-01-27 22:44:36 +0300	[diff] [blame]	743	unsigned int prev_bv_len = prev->bv_len;
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	744	prev->bv_len += len;
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	745
				746	if (q->merge_bvec_fn) {
				747	struct bvec_merge_data bvm = {
Dmitry Monakhov	1d61658	2010-01-27 22:44:36 +0300	[diff] [blame]	748	/* prev_bvec is already charged in
				749	bi_size, discharge it in order to
				750	simulate merging updated prev_bvec
				751	as new bvec. */
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	752	.bi_bdev = bio->bi_bdev,
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	753	.bi_sector = bio->bi_iter.bi_sector,
				754	.bi_size = bio->bi_iter.bi_size -
				755	prev_bv_len,
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	756	.bi_rw = bio->bi_rw,
				757	};
				758
Dmitry Monakhov	8bf8c37	2010-03-03 06:28:06 +0300	[diff] [blame]	759	if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) {
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	760	prev->bv_len -= len;
				761	return 0;
				762	}
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	763	}
				764
Maurizio Lombardi	fcbf6a0	2014-12-10 14:16:53 -0800	[diff] [blame]	765	bio->bi_iter.bi_size += len;
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	766	goto done;
				767	}
Jens Axboe	66cb45a	2014-06-24 16:22:24 -0600	[diff] [blame]	768
				769	/*
				770	* If the queue doesn't support SG gaps and adding this
				771	* offset would create a gap, disallow it.
				772	*/
				773	if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS) &&
				774	bvec_gap_to_prev(prev, offset))
				775	return 0;
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	776	}
				777
				778	if (bio->bi_vcnt >= bio->bi_max_vecs)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	779	return 0;
				780
				781	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	782	* setup the new entry, we might clear it again later if we
				783	* cannot add the page
				784	*/
				785	bvec = &bio->bi_io_vec[bio->bi_vcnt];
				786	bvec->bv_page = page;
				787	bvec->bv_len = len;
				788	bvec->bv_offset = offset;
Maurizio Lombardi	fcbf6a0	2014-12-10 14:16:53 -0800	[diff] [blame]	789	bio->bi_vcnt++;
				790	bio->bi_phys_segments++;
				791	bio->bi_iter.bi_size += len;
				792
				793	/*
				794	* Perform a recount if the number of segments is greater
				795	* than queue_max_segments(q).
				796	*/
				797
				798	while (bio->bi_phys_segments > queue_max_segments(q)) {
				799
				800	if (retried_segments)
				801	goto failed;
				802
				803	retried_segments = 1;
				804	blk_recount_segments(q, bio);
				805	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	806
				807	/*
				808	* if queue has other restrictions (eg varying max sector size
				809	* depending on offset), it can specify a merge_bvec_fn in the
				810	* queue to get further control
				811	*/
				812	if (q->merge_bvec_fn) {
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	813	struct bvec_merge_data bvm = {
				814	.bi_bdev = bio->bi_bdev,
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	815	.bi_sector = bio->bi_iter.bi_sector,
Maurizio Lombardi	fcbf6a0	2014-12-10 14:16:53 -0800	[diff] [blame]	816	.bi_size = bio->bi_iter.bi_size - len,
Alasdair G Kergon	cc371e6	2008-07-03 09:53:43 +0200	[diff] [blame]	817	.bi_rw = bio->bi_rw,
				818	};
				819
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	820	/*
				821	* merge_bvec_fn() returns number of bytes it can accept
				822	* at this offset
				823	*/
Maurizio Lombardi	fcbf6a0	2014-12-10 14:16:53 -0800	[diff] [blame]	824	if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len)
				825	goto failed;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	826	}
				827
				828	/* If we may be able to merge these biovecs, force a recount */
Maurizio Lombardi	fcbf6a0	2014-12-10 14:16:53 -0800	[diff] [blame]	829	if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	830	bio->bi_flags &= ~(1 << BIO_SEG_VALID);
				831
Jens Axboe	80cfd54	2006-01-06 09:43:28 +0100	[diff] [blame]	832	done:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	833	return len;
Maurizio Lombardi	fcbf6a0	2014-12-10 14:16:53 -0800	[diff] [blame]	834
				835	failed:
				836	bvec->bv_page = NULL;
				837	bvec->bv_len = 0;
				838	bvec->bv_offset = 0;
				839	bio->bi_vcnt--;
				840	bio->bi_iter.bi_size -= len;
				841	blk_recount_segments(q, bio);
				842	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	843	}
				844
				845	/**
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	846	* bio_add_pc_page - attempt to add page to bio
Jens Axboe	fddfdea	2006-01-31 15:24:34 +0100	[diff] [blame]	847	* @q: the target queue
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	848	* @bio: destination bio
				849	* @page: page to add
				850	* @len: vec entry length
				851	* @offset: vec entry offset
				852	*
				853	* Attempt to add a page to the bio_vec maplist. This can fail for a
Andreas Gruenbacher	c642808	2011-05-27 14:52:09 +0200	[diff] [blame]	854	* number of reasons, such as the bio being full or target block device
				855	* limitations. The target block device must allow bio's up to PAGE_SIZE,
				856	* so it is always possible to add a single page to an empty bio.
				857	*
				858	* This should only be used by REQ_PC bios.
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	859	*/
Jens Axboe	165125e	2007-07-24 09:28:11 +0200	[diff] [blame]	860	int bio_add_pc_page(struct request_queue q, struct bio bio, struct page *page,
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	861	unsigned int len, unsigned int offset)
				862	{
Martin K. Petersen	ae03bf6	2009-05-22 17:17:50 -0400	[diff] [blame]	863	return __bio_add_page(q, bio, page, len, offset,
				864	queue_max_hw_sectors(q));
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	865	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	866	EXPORT_SYMBOL(bio_add_pc_page);
Mike Christie	6e68af6	2005-11-11 05:30:27 -0600	[diff] [blame]	867
				868	/**
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	869	* bio_add_page - attempt to add page to bio
				870	* @bio: destination bio
				871	* @page: page to add
				872	* @len: vec entry length
				873	* @offset: vec entry offset
				874	*
				875	* Attempt to add a page to the bio_vec maplist. This can fail for a
Andreas Gruenbacher	c642808	2011-05-27 14:52:09 +0200	[diff] [blame]	876	* number of reasons, such as the bio being full or target block device
				877	* limitations. The target block device must allow bio's up to PAGE_SIZE,
				878	* so it is always possible to add a single page to an empty bio.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	879	*/
				880	int bio_add_page(struct bio bio, struct page page, unsigned int len,
				881	unsigned int offset)
				882	{
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	883	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
Jens Axboe	58a4915	2014-06-10 12:53:56 -0600	[diff] [blame]	884	unsigned int max_sectors;
Jens Axboe	762380a	2014-06-05 13:38:39 -0600	[diff] [blame]	885
Jens Axboe	58a4915	2014-06-10 12:53:56 -0600	[diff] [blame]	886	max_sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
				887	if ((max_sectors < (len >> 9)) && !bio->bi_iter.bi_size)
				888	max_sectors = len >> 9;
				889
				890	return __bio_add_page(q, bio, page, len, offset, max_sectors);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	891	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	892	EXPORT_SYMBOL(bio_add_page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	893
Kent Overstreet	9e88224	2012-09-10 14:41:12 -0700	[diff] [blame]	894	struct submit_bio_ret {
				895	struct completion event;
				896	int error;
				897	};
				898
				899	static void submit_bio_wait_endio(struct bio *bio, int error)
				900	{
				901	struct submit_bio_ret *ret = bio->bi_private;
				902
				903	ret->error = error;
				904	complete(&ret->event);
				905	}
				906
				907	/**
				908	* submit_bio_wait - submit a bio, and wait until it completes
				909	* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
				910	* @bio: The &struct bio which describes the I/O
				911	*
				912	* Simple wrapper around submit_bio(). Returns 0 on success, or the error from
				913	* bio_endio() on failure.
				914	*/
				915	int submit_bio_wait(int rw, struct bio *bio)
				916	{
				917	struct submit_bio_ret ret;
				918
				919	rw \|= REQ_SYNC;
				920	init_completion(&ret.event);
				921	bio->bi_private = &ret;
				922	bio->bi_end_io = submit_bio_wait_endio;
				923	submit_bio(rw, bio);
				924	wait_for_completion(&ret.event);
				925
				926	return ret.error;
				927	}
				928	EXPORT_SYMBOL(submit_bio_wait);
				929
Kent Overstreet	054bdf6	2012-09-28 13:17:55 -0700	[diff] [blame]	930	/**
				931	* bio_advance - increment/complete a bio by some number of bytes
				932	* @bio: bio to advance
				933	* @bytes: number of bytes to complete
				934	*
				935	* This updates bi_sector, bi_size and bi_idx; if the number of bytes to
				936	* complete doesn't align with a bvec boundary, then bv_len and bv_offset will
				937	* be updated on the last bvec as well.
				938	*
				939	* @bio will then represent the remaining, uncompleted portion of the io.
				940	*/
				941	void bio_advance(struct bio *bio, unsigned bytes)
				942	{
				943	if (bio_integrity(bio))
				944	bio_integrity_advance(bio, bytes);
				945
Kent Overstreet	4550dd6	2013-08-07 14:26:21 -0700	[diff] [blame]	946	bio_advance_iter(bio, &bio->bi_iter, bytes);
Kent Overstreet	054bdf6	2012-09-28 13:17:55 -0700	[diff] [blame]	947	}
				948	EXPORT_SYMBOL(bio_advance);
				949
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	950	/**
Kent Overstreet	a078760	2012-09-10 14:03:28 -0700	[diff] [blame]	951	* bio_alloc_pages - allocates a single page for each bvec in a bio
				952	* @bio: bio to allocate pages for
				953	* @gfp_mask: flags for allocation
				954	*
				955	* Allocates pages up to @bio->bi_vcnt.
				956	*
				957	* Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are
				958	* freed.
				959	*/
				960	int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask)
				961	{
				962	int i;
				963	struct bio_vec *bv;
				964
				965	bio_for_each_segment_all(bv, bio, i) {
				966	bv->bv_page = alloc_page(gfp_mask);
				967	if (!bv->bv_page) {
				968	while (--bv >= bio->bi_io_vec)
				969	__free_page(bv->bv_page);
				970	return -ENOMEM;
				971	}
				972	}
				973
				974	return 0;
				975	}
				976	EXPORT_SYMBOL(bio_alloc_pages);
				977
				978	/**
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	979	* bio_copy_data - copy contents of data buffers from one chain of bios to
				980	* another
				981	* @src: source bio list
				982	* @dst: destination bio list
				983	*
				984	* If @src and @dst are single bios, bi_next must be NULL - otherwise, treats
				985	* @src and @dst as linked lists of bios.
				986	*
				987	* Stops when it reaches the end of either @src or @dst - that is, copies
				988	* min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
				989	*/
				990	void bio_copy_data(struct bio dst, struct bio src)
				991	{
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	992	struct bvec_iter src_iter, dst_iter;
				993	struct bio_vec src_bv, dst_bv;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	994	void src_p, dst_p;
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	995	unsigned bytes;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	996
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	997	src_iter = src->bi_iter;
				998	dst_iter = dst->bi_iter;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	999
				1000	while (1) {
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	1001	if (!src_iter.bi_size) {
				1002	src = src->bi_next;
				1003	if (!src)
				1004	break;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1005
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	1006	src_iter = src->bi_iter;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1007	}
				1008
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	1009	if (!dst_iter.bi_size) {
				1010	dst = dst->bi_next;
				1011	if (!dst)
				1012	break;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1013
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	1014	dst_iter = dst->bi_iter;
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1015	}
				1016
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	1017	src_bv = bio_iter_iovec(src, src_iter);
				1018	dst_bv = bio_iter_iovec(dst, dst_iter);
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1019
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	1020	bytes = min(src_bv.bv_len, dst_bv.bv_len);
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1021
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	1022	src_p = kmap_atomic(src_bv.bv_page);
				1023	dst_p = kmap_atomic(dst_bv.bv_page);
				1024
				1025	memcpy(dst_p + dst_bv.bv_offset,
				1026	src_p + src_bv.bv_offset,
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1027	bytes);
				1028
				1029	kunmap_atomic(dst_p);
				1030	kunmap_atomic(src_p);
				1031
Kent Overstreet	1cb9dda	2013-08-07 14:26:39 -0700	[diff] [blame]	1032	bio_advance_iter(src, &src_iter, bytes);
				1033	bio_advance_iter(dst, &dst_iter, bytes);
Kent Overstreet	16ac3d6	2012-09-10 13:57:51 -0700	[diff] [blame]	1034	}
				1035	}
				1036	EXPORT_SYMBOL(bio_copy_data);
				1037
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1038	struct bio_map_data {
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1039	int is_our_pages;
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1040	struct iov_iter iter;
				1041	struct iovec iov[];
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1042	};
				1043
Fabian Frederick	7410b3c	2014-04-22 15:09:07 -0600	[diff] [blame]	1044	static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count,
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1045	gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1046	{
Jens Axboe	f3f63c1	2010-10-29 11:46:56 -0600	[diff] [blame]	1047	if (iov_count > UIO_MAXIOV)
				1048	return NULL;
				1049
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1050	return kmalloc(sizeof(struct bio_map_data) +
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1051	sizeof(struct iovec) * iov_count, gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1052	}
				1053
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1054	/**
				1055	* bio_copy_from_iter - copy all pages from iov_iter to bio
				1056	* @bio: The &struct bio which describes the I/O as destination
				1057	* @iter: iov_iter as source
				1058	*
				1059	* Copy all pages from iov_iter to bio.
				1060	* Returns 0 on success, or error on failure.
				1061	*/
				1062	static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter)
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1063	{
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1064	int i;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1065	struct bio_vec *bvec;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1066
Kent Overstreet	d74c6d5	2013-02-06 12:23:11 -0800	[diff] [blame]	1067	bio_for_each_segment_all(bvec, bio, i) {
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1068	ssize_t ret;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1069
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1070	ret = copy_page_from_iter(bvec->bv_page,
				1071	bvec->bv_offset,
				1072	bvec->bv_len,
				1073	&iter);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1074
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1075	if (!iov_iter_count(&iter))
				1076	break;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1077
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1078	if (ret < bvec->bv_len)
				1079	return -EFAULT;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1080	}
				1081
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1082	return 0;
				1083	}
				1084
				1085	/**
				1086	* bio_copy_to_iter - copy all pages from bio to iov_iter
				1087	* @bio: The &struct bio which describes the I/O as source
				1088	* @iter: iov_iter as destination
				1089	*
				1090	* Copy all pages from bio to iov_iter.
				1091	* Returns 0 on success, or error on failure.
				1092	*/
				1093	static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
				1094	{
				1095	int i;
				1096	struct bio_vec *bvec;
				1097
				1098	bio_for_each_segment_all(bvec, bio, i) {
				1099	ssize_t ret;
				1100
				1101	ret = copy_page_to_iter(bvec->bv_page,
				1102	bvec->bv_offset,
				1103	bvec->bv_len,
				1104	&iter);
				1105
				1106	if (!iov_iter_count(&iter))
				1107	break;
				1108
				1109	if (ret < bvec->bv_len)
				1110	return -EFAULT;
				1111	}
				1112
				1113	return 0;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1114	}
				1115
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1116	static void bio_free_pages(struct bio *bio)
				1117	{
				1118	struct bio_vec *bvec;
				1119	int i;
				1120
				1121	bio_for_each_segment_all(bvec, bio, i)
				1122	__free_page(bvec->bv_page);
				1123	}
				1124
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1125	/**
				1126	* bio_uncopy_user - finish previously mapped bio
				1127	* @bio: bio being terminated
				1128	*
Christoph Hellwig	ddad8dd	2015-01-18 16:16:29 +0100	[diff] [blame]	1129	* Free pages allocated from bio_copy_user_iov() and write back data
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1130	* to user space in case of a read.
				1131	*/
				1132	int bio_uncopy_user(struct bio *bio)
				1133	{
				1134	struct bio_map_data *bmd = bio->bi_private;
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1135	int ret = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1136
Roland Dreier	35dc248	2013-08-05 17:55:01 -0700	[diff] [blame]	1137	if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
				1138	/*
				1139	* if we're in a workqueue, the request is orphaned, so
				1140	* don't copy into a random user address space, just free.
				1141	*/
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1142	if (current->mm && bio_data_dir(bio) == READ)
				1143	ret = bio_copy_to_iter(bio, bmd->iter);
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1144	if (bmd->is_our_pages)
				1145	bio_free_pages(bio);
Roland Dreier	35dc248	2013-08-05 17:55:01 -0700	[diff] [blame]	1146	}
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1147	kfree(bmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1148	bio_put(bio);
				1149	return ret;
				1150	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1151	EXPORT_SYMBOL(bio_uncopy_user);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1152
				1153	/**
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1154	* bio_copy_user_iov - copy user data to bio
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1155	* @q: destination block queue
				1156	* @map_data: pointer to the rq_map_data holding pages (if necessary)
				1157	* @iter: iovec iterator
				1158	* @gfp_mask: memory allocation flags
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1159	*
				1160	* Prepares and returns a bio for indirect user io, bouncing data
				1161	* to/from kernel pages as necessary. Must be paired with
				1162	* call bio_uncopy_user() on io completion.
				1163	*/
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1164	struct bio bio_copy_user_iov(struct request_queue q,
				1165	struct rq_map_data *map_data,
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1166	const struct iov_iter *iter,
				1167	gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1168	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1169	struct bio_map_data *bmd;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1170	struct page *page;
				1171	struct bio *bio;
				1172	int i, ret;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1173	int nr_pages = 0;
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1174	unsigned int len = iter->count;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1175	unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1176
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1177	for (i = 0; i < iter->nr_segs; i++) {
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1178	unsigned long uaddr;
				1179	unsigned long end;
				1180	unsigned long start;
				1181
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1182	uaddr = (unsigned long) iter->iov[i].iov_base;
				1183	end = (uaddr + iter->iov[i].iov_len + PAGE_SIZE - 1)
				1184	>> PAGE_SHIFT;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1185	start = uaddr >> PAGE_SHIFT;
				1186
Jens Axboe	cb4644c	2010-11-10 14:36:25 +0100	[diff] [blame]	1187	/*
				1188	* Overflow, abort
				1189	*/
				1190	if (end < start)
				1191	return ERR_PTR(-EINVAL);
				1192
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1193	nr_pages += end - start;
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1194	}
				1195
FUJITA Tomonori	6983872	2009-04-28 20:24:29 +0200	[diff] [blame]	1196	if (offset)
				1197	nr_pages++;
				1198
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1199	bmd = bio_alloc_map_data(iter->nr_segs, gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1200	if (!bmd)
				1201	return ERR_PTR(-ENOMEM);
				1202
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1203	/*
				1204	* We need to do a deep copy of the iov_iter including the iovecs.
				1205	* The caller provided iov might point to an on-stack or otherwise
				1206	* shortlived one.
				1207	*/
				1208	bmd->is_our_pages = map_data ? 0 : 1;
				1209	memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs);
				1210	iov_iter_init(&bmd->iter, iter->type, bmd->iov,
				1211	iter->nr_segs, iter->count);
				1212
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1213	ret = -ENOMEM;
Tejun Heo	a9e9dc2	2009-04-15 22:10:27 +0900	[diff] [blame]	1214	bio = bio_kmalloc(gfp_mask, nr_pages);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1215	if (!bio)
				1216	goto out_bmd;
				1217
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1218	if (iter->type & WRITE)
Christoph Hellwig	7b6d91d	2010-08-07 18:20:39 +0200	[diff] [blame]	1219	bio->bi_rw \|= REQ_WRITE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1220
				1221	ret = 0;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1222
				1223	if (map_data) {
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1224	nr_pages = 1 << map_data->page_order;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1225	i = map_data->offset / PAGE_SIZE;
				1226	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1227	while (len) {
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1228	unsigned int bytes = PAGE_SIZE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1229
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1230	bytes -= offset;
				1231
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1232	if (bytes > len)
				1233	bytes = len;
				1234
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1235	if (map_data) {
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1236	if (i == map_data->nr_entries * nr_pages) {
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1237	ret = -ENOMEM;
				1238	break;
				1239	}
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1240
				1241	page = map_data->pages[i / nr_pages];
				1242	page += (i % nr_pages);
				1243
				1244	i++;
				1245	} else {
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1246	page = alloc_page(q->bounce_gfp \| gfp_mask);
FUJITA Tomonori	e623ddb	2008-12-18 14:49:36 +0900	[diff] [blame]	1247	if (!page) {
				1248	ret = -ENOMEM;
				1249	break;
				1250	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1251	}
				1252
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1253	if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1254	break;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1255
				1256	len -= bytes;
FUJITA Tomonori	56c451f	2008-12-18 14:49:37 +0900	[diff] [blame]	1257	offset = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1258	}
				1259
				1260	if (ret)
				1261	goto cleanup;
				1262
				1263	/*
				1264	* success
				1265	*/
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1266	if (((iter->type & WRITE) && (!map_data \|\| !map_data->null_mapped)) \|\|
FUJITA Tomonori	ecb554a	2009-07-09 14:46:53 +0200	[diff] [blame]	1267	(map_data && map_data->from_user)) {
Dongsu Park	9124d3f	2015-01-18 16:16:34 +0100	[diff] [blame]	1268	ret = bio_copy_from_iter(bio, *iter);
FUJITA Tomonori	c5dec1c	2008-04-11 12:56:49 +0200	[diff] [blame]	1269	if (ret)
				1270	goto cleanup;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1271	}
				1272
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1273	bio->bi_private = bmd;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1274	return bio;
				1275	cleanup:
FUJITA Tomonori	152e283	2008-08-28 16:17:06 +0900	[diff] [blame]	1276	if (!map_data)
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1277	bio_free_pages(bio);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1278	bio_put(bio);
				1279	out_bmd:
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1280	kfree(bmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1281	return ERR_PTR(ret);
				1282	}
				1283
Christoph Hellwig	37f19e5	2015-01-18 16:16:33 +0100	[diff] [blame]	1284	/**
				1285	* bio_map_user_iov - map user iovec into bio
				1286	* @q: the struct request_queue for the bio
				1287	* @iter: iovec iterator
				1288	* @gfp_mask: memory allocation flags
				1289	*
				1290	* Map the user space address into a bio suitable for io to a block
				1291	* device. Returns an error pointer in case of error.
				1292	*/
				1293	struct bio bio_map_user_iov(struct request_queue q,
				1294	const struct iov_iter *iter,
				1295	gfp_t gfp_mask)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1296	{
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1297	int j;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1298	int nr_pages = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1299	struct page **pages;
				1300	struct bio *bio;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1301	int cur_page = 0;
				1302	int ret, offset;
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1303	struct iov_iter i;
				1304	struct iovec iov;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1305
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1306	iov_for_each(iov, i, *iter) {
				1307	unsigned long uaddr = (unsigned long) iov.iov_base;
				1308	unsigned long len = iov.iov_len;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1309	unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1310	unsigned long start = uaddr >> PAGE_SHIFT;
				1311
Jens Axboe	cb4644c	2010-11-10 14:36:25 +0100	[diff] [blame]	1312	/*
				1313	* Overflow, abort
				1314	*/
				1315	if (end < start)
				1316	return ERR_PTR(-EINVAL);
				1317
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1318	nr_pages += end - start;
				1319	/*
Mike Christie	ad2d722	2006-12-01 10:40:20 +0100	[diff] [blame]	1320	* buffer must be aligned to at least hardsector size for now
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1321	*/
Mike Christie	ad2d722	2006-12-01 10:40:20 +0100	[diff] [blame]	1322	if (uaddr & queue_dma_alignment(q))
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1323	return ERR_PTR(-EINVAL);
				1324	}
				1325
				1326	if (!nr_pages)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1327	return ERR_PTR(-EINVAL);
				1328
Tejun Heo	a9e9dc2	2009-04-15 22:10:27 +0900	[diff] [blame]	1329	bio = bio_kmalloc(gfp_mask, nr_pages);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1330	if (!bio)
				1331	return ERR_PTR(-ENOMEM);
				1332
				1333	ret = -ENOMEM;
FUJITA Tomonori	a3bce90	2008-08-28 16:17:05 +0900	[diff] [blame]	1334	pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1335	if (!pages)
				1336	goto out;
				1337
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1338	iov_for_each(iov, i, *iter) {
				1339	unsigned long uaddr = (unsigned long) iov.iov_base;
				1340	unsigned long len = iov.iov_len;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1341	unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1342	unsigned long start = uaddr >> PAGE_SHIFT;
				1343	const int local_nr_pages = end - start;
				1344	const int page_limit = cur_page + local_nr_pages;
Jens Axboe	cb4644c	2010-11-10 14:36:25 +0100	[diff] [blame]	1345
Nick Piggin	f5dd33c	2008-07-25 19:45:25 -0700	[diff] [blame]	1346	ret = get_user_pages_fast(uaddr, local_nr_pages,
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1347	(iter->type & WRITE) != WRITE,
				1348	&pages[cur_page]);
Jens Axboe	9917215	2006-06-16 13:02:29 +0200	[diff] [blame]	1349	if (ret < local_nr_pages) {
				1350	ret = -EFAULT;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1351	goto out_unmap;
Jens Axboe	9917215	2006-06-16 13:02:29 +0200	[diff] [blame]	1352	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1353
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1354	offset = uaddr & ~PAGE_MASK;
				1355	for (j = cur_page; j < page_limit; j++) {
				1356	unsigned int bytes = PAGE_SIZE - offset;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1357
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1358	if (len <= 0)
				1359	break;
				1360
				1361	if (bytes > len)
				1362	bytes = len;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1363
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1364	/*
				1365	* sorry...
				1366	*/
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	1367	if (bio_add_pc_page(q, bio, pages[j], bytes, offset) <
				1368	bytes)
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1369	break;
				1370
				1371	len -= bytes;
				1372	offset = 0;
				1373	}
				1374
				1375	cur_page = j;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1376	/*
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1377	* release the pages we didn't map into the bio, if any
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1378	*/
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1379	while (j < page_limit)
				1380	page_cache_release(pages[j++]);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1381	}
				1382
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1383	kfree(pages);
				1384
				1385	/*
				1386	* set data direction, and check if mapped pages need bouncing
				1387	*/
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1388	if (iter->type & WRITE)
Christoph Hellwig	7b6d91d	2010-08-07 18:20:39 +0200	[diff] [blame]	1389	bio->bi_rw \|= REQ_WRITE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1390
				1391	bio->bi_flags \|= (1 << BIO_USER_MAPPED);
Christoph Hellwig	37f19e5	2015-01-18 16:16:33 +0100	[diff] [blame]	1392
				1393	/*
				1394	* subtle -- if __bio_map_user() ended up bouncing a bio,
				1395	* it would normally disappear when its bi_end_io is run.
				1396	* however, we need it for the unmap, so grab an extra
				1397	* reference to it
				1398	*/
				1399	bio_get(bio);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1400	return bio;
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1401
				1402	out_unmap:
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1403	for (j = 0; j < nr_pages; j++) {
				1404	if (!pages[j])
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1405	break;
Kent Overstreet	26e49cf	2015-01-18 16:16:31 +0100	[diff] [blame]	1406	page_cache_release(pages[j]);
James Bottomley	f1970ba	2005-06-20 14:06:52 +0200	[diff] [blame]	1407	}
				1408	out:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1409	kfree(pages);
				1410	bio_put(bio);
				1411	return ERR_PTR(ret);
				1412	}
				1413
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1414	static void __bio_unmap_user(struct bio *bio)
				1415	{
				1416	struct bio_vec *bvec;
				1417	int i;
				1418
				1419	/*
				1420	* make sure we dirty pages we wrote to
				1421	*/
Kent Overstreet	d74c6d5	2013-02-06 12:23:11 -0800	[diff] [blame]	1422	bio_for_each_segment_all(bvec, bio, i) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1423	if (bio_data_dir(bio) == READ)
				1424	set_page_dirty_lock(bvec->bv_page);
				1425
				1426	page_cache_release(bvec->bv_page);
				1427	}
				1428
				1429	bio_put(bio);
				1430	}
				1431
				1432	/**
				1433	* bio_unmap_user - unmap a bio
				1434	* @bio: the bio being unmapped
				1435	*
				1436	* Unmap a bio previously mapped by bio_map_user(). Must be called with
				1437	* a process context.
				1438	*
				1439	* bio_unmap_user() may sleep.
				1440	*/
				1441	void bio_unmap_user(struct bio *bio)
				1442	{
				1443	__bio_unmap_user(bio);
				1444	bio_put(bio);
				1445	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1446	EXPORT_SYMBOL(bio_unmap_user);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1447
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1448	static void bio_map_kern_endio(struct bio *bio, int err)
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1449	{
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1450	bio_put(bio);
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1451	}
				1452
Christoph Hellwig	75c72b8	2015-01-18 16:16:32 +0100	[diff] [blame]	1453	/**
				1454	* bio_map_kern - map kernel address into bio
				1455	* @q: the struct request_queue for the bio
				1456	* @data: pointer to buffer to map
				1457	* @len: length in bytes
				1458	* @gfp_mask: allocation flags for bio allocation
				1459	*
				1460	* Map the kernel address into a bio suitable for io to a block
				1461	* device. Returns an error pointer in case of error.
				1462	*/
				1463	struct bio bio_map_kern(struct request_queue q, void *data, unsigned int len,
				1464	gfp_t gfp_mask)
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1465	{
				1466	unsigned long kaddr = (unsigned long)data;
				1467	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1468	unsigned long start = kaddr >> PAGE_SHIFT;
				1469	const int nr_pages = end - start;
				1470	int offset, i;
				1471	struct bio *bio;
				1472
Tejun Heo	a9e9dc2	2009-04-15 22:10:27 +0900	[diff] [blame]	1473	bio = bio_kmalloc(gfp_mask, nr_pages);
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1474	if (!bio)
				1475	return ERR_PTR(-ENOMEM);
				1476
				1477	offset = offset_in_page(kaddr);
				1478	for (i = 0; i < nr_pages; i++) {
				1479	unsigned int bytes = PAGE_SIZE - offset;
				1480
				1481	if (len <= 0)
				1482	break;
				1483
				1484	if (bytes > len)
				1485	bytes = len;
				1486
Mike Christie	defd94b	2005-12-05 02:37:06 -0600	[diff] [blame]	1487	if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
Christoph Hellwig	75c72b8	2015-01-18 16:16:32 +0100	[diff] [blame]	1488	offset) < bytes) {
				1489	/* we don't support partial mappings */
				1490	bio_put(bio);
				1491	return ERR_PTR(-EINVAL);
				1492	}
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1493
				1494	data += bytes;
				1495	len -= bytes;
				1496	offset = 0;
				1497	}
				1498
Jens Axboe	b823825	2005-06-20 14:05:27 +0200	[diff] [blame]	1499	bio->bi_end_io = bio_map_kern_endio;
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1500	return bio;
				1501	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1502	EXPORT_SYMBOL(bio_map_kern);
Mike Christie	df46b9a	2005-06-20 14:04:44 +0200	[diff] [blame]	1503
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1504	static void bio_copy_kern_endio(struct bio *bio, int err)
				1505	{
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1506	bio_free_pages(bio);
				1507	bio_put(bio);
				1508	}
				1509
				1510	static void bio_copy_kern_endio_read(struct bio *bio, int err)
				1511	{
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1512	char *p = bio->bi_private;
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1513	struct bio_vec *bvec;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1514	int i;
				1515
Kent Overstreet	d74c6d5	2013-02-06 12:23:11 -0800	[diff] [blame]	1516	bio_for_each_segment_all(bvec, bio, i) {
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1517	memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
Kent Overstreet	c8db444	2013-11-22 19:39:06 -0800	[diff] [blame]	1518	p += bvec->bv_len;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1519	}
				1520
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1521	bio_copy_kern_endio(bio, err);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1522	}
				1523
				1524	/**
				1525	* bio_copy_kern - copy kernel address into bio
				1526	* @q: the struct request_queue for the bio
				1527	* @data: pointer to buffer to copy
				1528	* @len: length in bytes
				1529	* @gfp_mask: allocation flags for bio and page allocation
Randy Dunlap	ffee025	2008-04-30 09:08:54 +0200	[diff] [blame]	1530	* @reading: data direction is READ
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1531	*
				1532	* copy the kernel address into a bio suitable for io to a block
				1533	* device. Returns an error pointer in case of error.
				1534	*/
				1535	struct bio bio_copy_kern(struct request_queue q, void *data, unsigned int len,
				1536	gfp_t gfp_mask, int reading)
				1537	{
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1538	unsigned long kaddr = (unsigned long)data;
				1539	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1540	unsigned long start = kaddr >> PAGE_SHIFT;
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1541	struct bio *bio;
				1542	void *p = data;
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1543	int nr_pages = 0;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1544
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1545	/*
				1546	* Overflow, abort
				1547	*/
				1548	if (end < start)
				1549	return ERR_PTR(-EINVAL);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1550
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1551	nr_pages = end - start;
				1552	bio = bio_kmalloc(gfp_mask, nr_pages);
				1553	if (!bio)
				1554	return ERR_PTR(-ENOMEM);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1555
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1556	while (len) {
				1557	struct page *page;
				1558	unsigned int bytes = PAGE_SIZE;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1559
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1560	if (bytes > len)
				1561	bytes = len;
				1562
				1563	page = alloc_page(q->bounce_gfp \| gfp_mask);
				1564	if (!page)
				1565	goto cleanup;
				1566
				1567	if (!reading)
				1568	memcpy(page_address(page), p, bytes);
				1569
				1570	if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
				1571	break;
				1572
				1573	len -= bytes;
				1574	p += bytes;
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1575	}
				1576
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1577	if (reading) {
				1578	bio->bi_end_io = bio_copy_kern_endio_read;
				1579	bio->bi_private = data;
				1580	} else {
				1581	bio->bi_end_io = bio_copy_kern_endio;
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1582	bio->bi_rw \|= REQ_WRITE;
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1583	}
FUJITA Tomonori	76029ff	2008-08-25 20:36:08 +0200	[diff] [blame]	1584
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1585	return bio;
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1586
				1587	cleanup:
Christoph Hellwig	1dfa0f6	2015-01-18 16:16:30 +0100	[diff] [blame]	1588	bio_free_pages(bio);
Christoph Hellwig	42d2683	2015-01-18 16:16:28 +0100	[diff] [blame]	1589	bio_put(bio);
				1590	return ERR_PTR(-ENOMEM);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1591	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1592	EXPORT_SYMBOL(bio_copy_kern);
FUJITA Tomonori	68154e9	2008-04-25 12:47:50 +0200	[diff] [blame]	1593
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1594	/*
				1595	* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
				1596	* for performing direct-IO in BIOs.
				1597	*
				1598	* The problem is that we cannot run set_page_dirty() from interrupt context
				1599	* because the required locks are not interrupt-safe. So what we can do is to
				1600	* mark the pages dirty _before_ performing IO. And in interrupt context,
				1601	* check that the pages are still dirty. If so, fine. If not, redirty them
				1602	* in process context.
				1603	*
				1604	* We special-case compound pages here: normally this means reads into hugetlb
				1605	* pages. The logic in here doesn't really work right for compound pages
				1606	* because the VM does not uniformly chase down the head page in all cases.
				1607	* But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
				1608	* handle them at all. So we skip compound pages here at an early stage.
				1609	*
				1610	* Note that this code is very hard to test under normal circumstances because
				1611	* direct-io pins the pages with get_user_pages(). This makes
				1612	* is_page_cache_freeable return false, and the VM will not clean the pages.
Artem Bityutskiy	0d5c3eb	2012-07-25 18:12:08 +0300	[diff] [blame]	1613	* But other code (eg, flusher threads) could clean the pages if they are mapped
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1614	* pagecache.
				1615	*
				1616	* Simply disabling the call to bio_set_pages_dirty() is a good way to test the
				1617	* deferred bio dirtying paths.
				1618	*/
				1619
				1620	/*
				1621	* bio_set_pages_dirty() will mark all the bio's pages as dirty.
				1622	*/
				1623	void bio_set_pages_dirty(struct bio *bio)
				1624	{
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1625	struct bio_vec *bvec;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1626	int i;
				1627
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1628	bio_for_each_segment_all(bvec, bio, i) {
				1629	struct page *page = bvec->bv_page;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1630
				1631	if (page && !PageCompound(page))
				1632	set_page_dirty_lock(page);
				1633	}
				1634	}
				1635
Adrian Bunk	86b6c7a	2008-02-18 13:48:32 +0100	[diff] [blame]	1636	static void bio_release_pages(struct bio *bio)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1637	{
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1638	struct bio_vec *bvec;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1639	int i;
				1640
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1641	bio_for_each_segment_all(bvec, bio, i) {
				1642	struct page *page = bvec->bv_page;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1643
				1644	if (page)
				1645	put_page(page);
				1646	}
				1647	}
				1648
				1649	/*
				1650	* bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
				1651	* If they are, then fine. If, however, some pages are clean then they must
				1652	* have been written out during the direct-IO read. So we take another ref on
				1653	* the BIO and the offending pages and re-dirty the pages in process context.
				1654	*
				1655	* It is expected that bio_check_pages_dirty() will wholly own the BIO from
				1656	* here on. It will run one page_cache_release() against each page and will
				1657	* run one bio_put() against the BIO.
				1658	*/
				1659
David Howells	65f27f3	2006-11-22 14:55:48 +0000	[diff] [blame]	1660	static void bio_dirty_fn(struct work_struct *work);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1661
David Howells	65f27f3	2006-11-22 14:55:48 +0000	[diff] [blame]	1662	static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1663	static DEFINE_SPINLOCK(bio_dirty_lock);
				1664	static struct bio *bio_dirty_list;
				1665
				1666	/*
				1667	* This runs in process context
				1668	*/
David Howells	65f27f3	2006-11-22 14:55:48 +0000	[diff] [blame]	1669	static void bio_dirty_fn(struct work_struct *work)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1670	{
				1671	unsigned long flags;
				1672	struct bio *bio;
				1673
				1674	spin_lock_irqsave(&bio_dirty_lock, flags);
				1675	bio = bio_dirty_list;
				1676	bio_dirty_list = NULL;
				1677	spin_unlock_irqrestore(&bio_dirty_lock, flags);
				1678
				1679	while (bio) {
				1680	struct bio *next = bio->bi_private;
				1681
				1682	bio_set_pages_dirty(bio);
				1683	bio_release_pages(bio);
				1684	bio_put(bio);
				1685	bio = next;
				1686	}
				1687	}
				1688
				1689	void bio_check_pages_dirty(struct bio *bio)
				1690	{
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1691	struct bio_vec *bvec;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1692	int nr_clean_pages = 0;
				1693	int i;
				1694
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1695	bio_for_each_segment_all(bvec, bio, i) {
				1696	struct page *page = bvec->bv_page;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1697
				1698	if (PageDirty(page) \|\| PageCompound(page)) {
				1699	page_cache_release(page);
Kent Overstreet	cb34e05	2012-09-05 15:22:02 -0700	[diff] [blame]	1700	bvec->bv_page = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1701	} else {
				1702	nr_clean_pages++;
				1703	}
				1704	}
				1705
				1706	if (nr_clean_pages) {
				1707	unsigned long flags;
				1708
				1709	spin_lock_irqsave(&bio_dirty_lock, flags);
				1710	bio->bi_private = bio_dirty_list;
				1711	bio_dirty_list = bio;
				1712	spin_unlock_irqrestore(&bio_dirty_lock, flags);
				1713	schedule_work(&bio_dirty_work);
				1714	} else {
				1715	bio_put(bio);
				1716	}
				1717	}
				1718
Gu Zheng	394ffa5	2014-11-24 11:05:22 +0800	[diff] [blame]	1719	void generic_start_io_acct(int rw, unsigned long sectors,
				1720	struct hd_struct *part)
				1721	{
				1722	int cpu = part_stat_lock();
				1723
				1724	part_round_stats(cpu, part);
				1725	part_stat_inc(cpu, part, ios[rw]);
				1726	part_stat_add(cpu, part, sectors[rw], sectors);
				1727	part_inc_in_flight(part, rw);
				1728
				1729	part_stat_unlock();
				1730	}
				1731	EXPORT_SYMBOL(generic_start_io_acct);
				1732
				1733	void generic_end_io_acct(int rw, struct hd_struct *part,
				1734	unsigned long start_time)
				1735	{
				1736	unsigned long duration = jiffies - start_time;
				1737	int cpu = part_stat_lock();
				1738
				1739	part_stat_add(cpu, part, ticks[rw], duration);
				1740	part_round_stats(cpu, part);
				1741	part_dec_in_flight(part, rw);
				1742
				1743	part_stat_unlock();
				1744	}
				1745	EXPORT_SYMBOL(generic_end_io_acct);
				1746
Ilya Loginov	2d4dc89	2009-11-26 09:16:19 +0100	[diff] [blame]	1747	#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
				1748	void bio_flush_dcache_pages(struct bio *bi)
				1749	{
Kent Overstreet	7988613	2013-11-23 17:19:00 -0800	[diff] [blame]	1750	struct bio_vec bvec;
				1751	struct bvec_iter iter;
Ilya Loginov	2d4dc89	2009-11-26 09:16:19 +0100	[diff] [blame]	1752
Kent Overstreet	7988613	2013-11-23 17:19:00 -0800	[diff] [blame]	1753	bio_for_each_segment(bvec, bi, iter)
				1754	flush_dcache_page(bvec.bv_page);
Ilya Loginov	2d4dc89	2009-11-26 09:16:19 +0100	[diff] [blame]	1755	}
				1756	EXPORT_SYMBOL(bio_flush_dcache_pages);
				1757	#endif
				1758
Jens Axboe	c4cf526	2015-04-17 16:15:18 -0600	[diff] [blame]	1759	static inline bool bio_remaining_done(struct bio *bio)
				1760	{
				1761	/*
				1762	* If we're not chaining, then ->__bi_remaining is always 1 and
				1763	* we always end io on the first invocation.
				1764	*/
				1765	if (!bio_flagged(bio, BIO_CHAIN))
				1766	return true;
				1767
				1768	BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);
				1769
Mike Snitzer	326e1db	2015-05-22 09:14:03 -0400	[diff] [blame]	1770	if (atomic_dec_and_test(&bio->__bi_remaining)) {
				1771	clear_bit(BIO_CHAIN, &bio->bi_flags);
Jens Axboe	c4cf526	2015-04-17 16:15:18 -0600	[diff] [blame]	1772	return true;
Mike Snitzer	326e1db	2015-05-22 09:14:03 -0400	[diff] [blame]	1773	}
Jens Axboe	c4cf526	2015-04-17 16:15:18 -0600	[diff] [blame]	1774
				1775	return false;
				1776	}
				1777
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1778	/**
				1779	* bio_endio - end I/O on a bio
				1780	* @bio: bio
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1781	* @error: error, if any
				1782	*
				1783	* Description:
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1784	* bio_endio() will end I/O on the whole bio. bio_endio() is the
NeilBrown	5bb23a6	2007-09-27 12:46:13 +0200	[diff] [blame]	1785	* preferred way to end I/O on a bio, it takes care of clearing
				1786	* BIO_UPTODATE on error. @error is 0 on success, and and one of the
				1787	* established -Exxxx (-EIO, for instance) error values in case
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	1788	* something went wrong. No one should call bi_end_io() directly on a
NeilBrown	5bb23a6	2007-09-27 12:46:13 +0200	[diff] [blame]	1789	* bio unless they own it and thus know that it has an end_io
				1790	* function.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1791	**/
NeilBrown	6712ecf	2007-09-27 12:47:43 +0200	[diff] [blame]	1792	void bio_endio(struct bio *bio, int error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1793	{
Kent Overstreet	196d38b	2013-11-23 18:34:15 -0800	[diff] [blame]	1794	while (bio) {
Kent Overstreet	196d38b	2013-11-23 18:34:15 -0800	[diff] [blame]	1795	if (error)
				1796	clear_bit(BIO_UPTODATE, &bio->bi_flags);
				1797	else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
				1798	error = -EIO;
				1799
Jens Axboe	c4cf526	2015-04-17 16:15:18 -0600	[diff] [blame]	1800	if (unlikely(!bio_remaining_done(bio)))
				1801	break;
Kent Overstreet	196d38b	2013-11-23 18:34:15 -0800	[diff] [blame]	1802
				1803	/*
				1804	* Need to have a real endio function for chained bios,
				1805	* otherwise various corner cases will break (like stacking
				1806	* block devices that save/restore bi_end_io) - however, we want
				1807	* to avoid unbounded recursion and blowing the stack. Tail call
				1808	* optimization would handle this, but compiling with frame
				1809	* pointers also disables gcc's sibling call optimization.
				1810	*/
				1811	if (bio->bi_end_io == bio_chain_endio) {
				1812	struct bio *parent = bio->bi_private;
				1813	bio_put(bio);
				1814	bio = parent;
				1815	} else {
				1816	if (bio->bi_end_io)
				1817	bio->bi_end_io(bio, error);
				1818	bio = NULL;
				1819	}
				1820	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1821	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1822	EXPORT_SYMBOL(bio_endio);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1823
Kent Overstreet	196d38b	2013-11-23 18:34:15 -0800	[diff] [blame]	1824	/**
Kent Overstreet	20d0189	2013-11-23 18:21:01 -0800	[diff] [blame]	1825	* bio_split - split a bio
				1826	* @bio: bio to split
				1827	* @sectors: number of sectors to split from the front of @bio
				1828	* @gfp: gfp mask
				1829	* @bs: bio set to allocate from
				1830	*
				1831	* Allocates and returns a new bio which represents @sectors from the start of
				1832	* @bio, and updates @bio to represent the remaining sectors.
				1833	*
				1834	* The newly allocated bio will point to @bio's bi_io_vec; it is the caller's
				1835	* responsibility to ensure that @bio is not freed before the split.
				1836	*/
				1837	struct bio bio_split(struct bio bio, int sectors,
				1838	gfp_t gfp, struct bio_set *bs)
				1839	{
				1840	struct bio *split = NULL;
				1841
				1842	BUG_ON(sectors <= 0);
				1843	BUG_ON(sectors >= bio_sectors(bio));
				1844
				1845	split = bio_clone_fast(bio, gfp, bs);
				1846	if (!split)
				1847	return NULL;
				1848
				1849	split->bi_iter.bi_size = sectors << 9;
				1850
				1851	if (bio_integrity(split))
				1852	bio_integrity_trim(split, 0, sectors);
				1853
				1854	bio_advance(bio, split->bi_iter.bi_size);
				1855
				1856	return split;
				1857	}
				1858	EXPORT_SYMBOL(bio_split);
				1859
Martin K. Petersen	ad3316b	2008-10-01 22:42:53 -0400	[diff] [blame]	1860	/**
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1861	* bio_trim - trim a bio
				1862	* @bio: bio to trim
				1863	* @offset: number of sectors to trim from the front of @bio
				1864	* @size: size we want to trim @bio to, in sectors
				1865	*/
				1866	void bio_trim(struct bio *bio, int offset, int size)
				1867	{
				1868	/* 'bio' is a cloned bio which we need to trim to match
				1869	* the given offset and size.
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1870	*/
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1871
				1872	size <<= 9;
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1873	if (offset == 0 && size == bio->bi_iter.bi_size)
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1874	return;
				1875
				1876	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
				1877
				1878	bio_advance(bio, offset << 9);
				1879
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	1880	bio->bi_iter.bi_size = size;
Kent Overstreet	6678d83	2013-08-07 11:14:32 -0700	[diff] [blame]	1881	}
				1882	EXPORT_SYMBOL_GPL(bio_trim);
				1883
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1884	/*
				1885	* create memory pools for biovec's in a bio_set.
				1886	* use the global biovec slabs created for general use.
				1887	*/
Fabian Frederick	a6c39cb4f	2014-04-22 15:09:05 -0600	[diff] [blame]	1888	mempool_t *biovec_create_pool(int pool_entries)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1889	{
Jens Axboe	7ff9345	2008-12-11 11:53:43 +0100	[diff] [blame]	1890	struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1891
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	1892	return mempool_create_slab_pool(pool_entries, bp->slab);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1893	}
				1894
				1895	void bioset_free(struct bio_set *bs)
				1896	{
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1897	if (bs->rescue_workqueue)
				1898	destroy_workqueue(bs->rescue_workqueue);
				1899
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1900	if (bs->bio_pool)
				1901	mempool_destroy(bs->bio_pool);
				1902
Kent Overstreet	9f060e2	2012-10-12 15:29:33 -0700	[diff] [blame]	1903	if (bs->bvec_pool)
				1904	mempool_destroy(bs->bvec_pool);
				1905
Martin K. Petersen	7878cba	2009-06-26 15:37:49 +0200	[diff] [blame]	1906	bioset_integrity_free(bs);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1907	bio_put_slab(bs);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1908
				1909	kfree(bs);
				1910	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1911	EXPORT_SYMBOL(bioset_free);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1912
Junichi Nomura	d8f429e	2014-10-03 17:27:12 -0400	[diff] [blame]	1913	static struct bio_set *__bioset_create(unsigned int pool_size,
				1914	unsigned int front_pad,
				1915	bool create_bvec_pool)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1916	{
Jens Axboe	392ddc3	2008-12-23 12:42:54 +0100	[diff] [blame]	1917	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
Jens Axboe	1b43449	2008-10-22 20:32:58 +0200	[diff] [blame]	1918	struct bio_set *bs;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1919
Jens Axboe	1b43449	2008-10-22 20:32:58 +0200	[diff] [blame]	1920	bs = kzalloc(sizeof(*bs), GFP_KERNEL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1921	if (!bs)
				1922	return NULL;
				1923
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1924	bs->front_pad = front_pad;
Jens Axboe	1b43449	2008-10-22 20:32:58 +0200	[diff] [blame]	1925
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1926	spin_lock_init(&bs->rescue_lock);
				1927	bio_list_init(&bs->rescue_list);
				1928	INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
				1929
Jens Axboe	392ddc3	2008-12-23 12:42:54 +0100	[diff] [blame]	1930	bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	1931	if (!bs->bio_slab) {
				1932	kfree(bs);
				1933	return NULL;
				1934	}
				1935
				1936	bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1937	if (!bs->bio_pool)
				1938	goto bad;
				1939
Junichi Nomura	d8f429e	2014-10-03 17:27:12 -0400	[diff] [blame]	1940	if (create_bvec_pool) {
				1941	bs->bvec_pool = biovec_create_pool(pool_size);
				1942	if (!bs->bvec_pool)
				1943	goto bad;
				1944	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1945
Kent Overstreet	df2cb6d	2012-09-10 14:33:46 -0700	[diff] [blame]	1946	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
				1947	if (!bs->rescue_workqueue)
				1948	goto bad;
				1949
				1950	return bs;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1951	bad:
				1952	bioset_free(bs);
				1953	return NULL;
				1954	}
Junichi Nomura	d8f429e	2014-10-03 17:27:12 -0400	[diff] [blame]	1955
				1956	/**
				1957	* bioset_create - Create a bio_set
				1958	* @pool_size: Number of bio and bio_vecs to cache in the mempool
				1959	* @front_pad: Number of bytes to allocate in front of the returned bio
				1960	*
				1961	* Description:
				1962	* Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
				1963	* to ask for a number of bytes to be allocated in front of the bio.
				1964	* Front pad allocation is useful for embedding the bio inside
				1965	* another structure, to avoid allocating extra data to go with the bio.
				1966	* Note that the bio must be embedded at the END of that structure always,
				1967	* or things will break badly.
				1968	*/
				1969	struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
				1970	{
				1971	return __bioset_create(pool_size, front_pad, true);
				1972	}
H Hartley Sweeten	a112a71	2009-09-26 16:19:21 +0200	[diff] [blame]	1973	EXPORT_SYMBOL(bioset_create);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1974
Junichi Nomura	d8f429e	2014-10-03 17:27:12 -0400	[diff] [blame]	1975	/**
				1976	* bioset_create_nobvec - Create a bio_set without bio_vec mempool
				1977	* @pool_size: Number of bio to cache in the mempool
				1978	* @front_pad: Number of bytes to allocate in front of the returned bio
				1979	*
				1980	* Description:
				1981	* Same functionality as bioset_create() except that mempool is not
				1982	* created for bio_vecs. Saving some memory for bio_clone_fast() users.
				1983	*/
				1984	struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad)
				1985	{
				1986	return __bioset_create(pool_size, front_pad, false);
				1987	}
				1988	EXPORT_SYMBOL(bioset_create_nobvec);
				1989
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	1990	#ifdef CONFIG_BLK_CGROUP
Tejun Heo	1d933cf	2015-05-22 17:13:24 -0400	[diff] [blame]	1991
				1992	/**
				1993	* bio_associate_blkcg - associate a bio with the specified blkcg
				1994	* @bio: target bio
				1995	* @blkcg_css: css of the blkcg to associate
				1996	*
				1997	* Associate @bio with the blkcg specified by @blkcg_css. Block layer will
				1998	* treat @bio as if it were issued by a task which belongs to the blkcg.
				1999	*
				2000	* This function takes an extra reference of @blkcg_css which will be put
				2001	* when @bio is released. The caller must own @bio and is responsible for
				2002	* synchronizing calls to this function.
				2003	*/
				2004	int bio_associate_blkcg(struct bio bio, struct cgroup_subsys_state blkcg_css)
				2005	{
				2006	if (unlikely(bio->bi_css))
				2007	return -EBUSY;
				2008	css_get(blkcg_css);
				2009	bio->bi_css = blkcg_css;
				2010	return 0;
				2011	}
				2012
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	2013	/**
				2014	* bio_associate_current - associate a bio with %current
				2015	* @bio: target bio
				2016	*
				2017	* Associate @bio with %current if it hasn't been associated yet. Block
				2018	* layer will treat @bio as if it were issued by %current no matter which
				2019	* task actually issues it.
				2020	*
				2021	* This function takes an extra reference of @task's io_context and blkcg
				2022	* which will be put when @bio is released. The caller must own @bio,
				2023	* ensure %current->io_context exists, and is responsible for synchronizing
				2024	* calls to this function.
				2025	*/
				2026	int bio_associate_current(struct bio *bio)
				2027	{
				2028	struct io_context *ioc;
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	2029
Tejun Heo	1d933cf	2015-05-22 17:13:24 -0400	[diff] [blame]	2030	if (bio->bi_css)
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	2031	return -EBUSY;
				2032
				2033	ioc = current->io_context;
				2034	if (!ioc)
				2035	return -ENOENT;
				2036
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	2037	get_io_context_active(ioc);
				2038	bio->bi_ioc = ioc;
Tejun Heo	ec43869	2015-05-22 17:13:22 -0400	[diff] [blame]	2039	bio->bi_css = task_get_css(current, blkio_cgrp_id);
Tejun Heo	852c788	2012-03-05 13:15:27 -0800	[diff] [blame]	2040	return 0;
				2041	}
				2042
				2043	/**
				2044	* bio_disassociate_task - undo bio_associate_current()
				2045	* @bio: target bio
				2046	*/
				2047	void bio_disassociate_task(struct bio *bio)
				2048	{
				2049	if (bio->bi_ioc) {
				2050	put_io_context(bio->bi_ioc);
				2051	bio->bi_ioc = NULL;
				2052	}
				2053	if (bio->bi_css) {
				2054	css_put(bio->bi_css);
				2055	bio->bi_css = NULL;
				2056	}
				2057	}
				2058
				2059	#endif /* CONFIG_BLK_CGROUP */
				2060
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2061	static void __init biovec_init_slabs(void)
				2062	{
				2063	int i;
				2064
				2065	for (i = 0; i < BIOVEC_NR_POOLS; i++) {
				2066	int size;
				2067	struct biovec_slab *bvs = bvec_slabs + i;
				2068
Jens Axboe	a7fcd37	2008-12-05 16:10:29 +0100	[diff] [blame]	2069	if (bvs->nr_vecs <= BIO_INLINE_VECS) {
				2070	bvs->slab = NULL;
				2071	continue;
				2072	}
Jens Axboe	a7fcd37	2008-12-05 16:10:29 +0100	[diff] [blame]	2073
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2074	size = bvs->nr_vecs * sizeof(struct bio_vec);
				2075	bvs->slab = kmem_cache_create(bvs->name, size, 0,
Paul Mundt	20c2df8	2007-07-20 10:11:58 +0900	[diff] [blame]	2076	SLAB_HWCACHE_ALIGN\|SLAB_PANIC, NULL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2077	}
				2078	}
				2079
				2080	static int __init init_bio(void)
				2081	{
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	2082	bio_slab_max = 2;
				2083	bio_slab_nr = 0;
				2084	bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
				2085	if (!bio_slabs)
				2086	panic("bio: can't allocate bios\n");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2087
Martin K. Petersen	7878cba	2009-06-26 15:37:49 +0200	[diff] [blame]	2088	bio_integrity_init();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2089	biovec_init_slabs();
				2090
Jens Axboe	bb799ca	2008-12-10 15:35:05 +0100	[diff] [blame]	2091	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2092	if (!fs_bio_set)
				2093	panic("bio: can't allocate bios\n");
				2094
Martin K. Petersen	a91a278	2011-03-17 11:11:05 +0100	[diff] [blame]	2095	if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
				2096	panic("bio: can't create integrity pool\n");
				2097
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2098	return 0;
				2099	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2100	subsys_initcall(init_bio);