bio: add support for inlining a number of bio_vecs inside the bio

When we go and allocate a bio for IO, we actually do two allocations.
One for the bio itself, and one for the bi_io_vec that holds the
actual pages we are interested in.

This feature inlines a definable amount of io vecs inside the bio
itself, so we eliminate the bio_vec array allocation for IO's up
to a certain size. It defaults to 4 vecs, which is typically 16k
of IO.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/fs/bio.c b/fs/bio.c
index 0146f80..75e6be1 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -31,6 +31,12 @@
 
 DEFINE_TRACE(block_split);
 
+/*
+ * Test patch to inline a certain number of bi_io_vec's inside the bio
+ * itself, to shrink a bio data allocation from two mempool calls to one
+ */
+#define BIO_INLINE_VECS		4
+
 static mempool_t *bio_split_pool __read_mostly;
 
 /*
@@ -241,7 +247,7 @@
 {
 	void *p;
 
-	if (bio->bi_io_vec)
+	if (bio_has_allocated_vec(bio))
 		bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
 
 	if (bio_integrity(bio))
@@ -267,7 +273,8 @@
 
 static void bio_kmalloc_destructor(struct bio *bio)
 {
-	kfree(bio->bi_io_vec);
+	if (bio_has_allocated_vec(bio))
+		kfree(bio->bi_io_vec);
 	kfree(bio);
 }
 
@@ -314,7 +321,16 @@
 		if (likely(nr_iovecs)) {
 			unsigned long uninitialized_var(idx);
 
-			bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
+			if (nr_iovecs <= BIO_INLINE_VECS) {
+				idx = 0;
+				bvl = bio->bi_inline_vecs;
+				nr_iovecs = BIO_INLINE_VECS;
+				memset(bvl, 0, BIO_INLINE_VECS * sizeof(*bvl));
+			} else {
+				bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx,
+							bs);
+				nr_iovecs = bvec_nr_vecs(idx);
+			}
 			if (unlikely(!bvl)) {
 				if (bs)
 					mempool_free(bio, bs->bio_pool);
@@ -324,7 +340,7 @@
 				goto out;
 			}
 			bio->bi_flags |= idx << BIO_POOL_OFFSET;
-			bio->bi_max_vecs = bvec_nr_vecs(idx);
+			bio->bi_max_vecs = nr_iovecs;
 		}
 		bio->bi_io_vec = bvl;
 	}
@@ -1525,6 +1541,7 @@
  */
 struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
 {
+	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
 	struct bio_set *bs;
 
 	bs = kzalloc(sizeof(*bs), GFP_KERNEL);
@@ -1533,7 +1550,7 @@
 
 	bs->front_pad = front_pad;
 
-	bs->bio_slab = bio_find_or_create_slab(front_pad);
+	bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
 	if (!bs->bio_slab) {
 		kfree(bs);
 		return NULL;