block: reduce stack footprint of blk_recount_segments()
blk_recalc_rq_segments() requires a request structure passed in, which
we don't have from blk_recount_segments(). So the latter allocates one on
the stack, using > 400 bytes of stack for that. This can cause us to spill
over one page of stack from ext4 at least:
0) 4560 400 blk_recount_segments+0x43/0x62
1) 4160 32 bio_phys_segments+0x1c/0x24
2) 4128 32 blk_rq_bio_prep+0x2a/0xf9
3) 4096 32 init_request_from_bio+0xf9/0xfe
4) 4064 112 __make_request+0x33c/0x3f6
5) 3952 144 generic_make_request+0x2d1/0x321
6) 3808 64 submit_bio+0xb9/0xc3
7) 3744 48 submit_bh+0xea/0x10e
8) 3696 368 ext4_mb_init_cache+0x257/0xa6a [ext4]
9) 3328 288 ext4_mb_regular_allocator+0x421/0xcd9 [ext4]
10) 3040 160 ext4_mb_new_blocks+0x211/0x4b4 [ext4]
11) 2880 336 ext4_ext_get_blocks+0xb61/0xd45 [ext4]
12) 2544 96 ext4_get_blocks_wrap+0xf2/0x200 [ext4]
13) 2448 80 ext4_da_get_block_write+0x6e/0x16b [ext4]
14) 2368 352 mpage_da_map_blocks+0x7e/0x4b3 [ext4]
15) 2016 352 ext4_da_writepages+0x2ce/0x43c [ext4]
16) 1664 32 do_writepages+0x2d/0x3c
17) 1632 144 __writeback_single_inode+0x162/0x2cd
18) 1488 96 generic_sync_sb_inodes+0x1e3/0x32b
19) 1392 16 sync_sb_inodes+0xe/0x10
20) 1376 48 writeback_inodes+0x69/0xb3
21) 1328 208 balance_dirty_pages_ratelimited_nr+0x187/0x2f9
22) 1120 224 generic_file_buffered_write+0x1d4/0x2c4
23) 896 176 __generic_file_aio_write_nolock+0x35f/0x393
24) 720 80 generic_file_aio_write+0x6c/0xc8
25) 640 80 ext4_file_write+0xa9/0x137 [ext4]
26) 560 320 do_sync_write+0xf0/0x137
27) 240 48 vfs_write+0xb3/0x13c
28) 192 64 sys_write+0x4c/0x74
29) 128 128 system_call_fastpath+0x16/0x1b
Split the segment counting out into a __blk_recalc_rq_segments() helper
to avoid allocating an onstack request just for checking the physical
segment count.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/block/blk-merge.c b/block/blk-merge.c
index b92f5b0..a104593 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -38,72 +38,84 @@
}
}
-void blk_recalc_rq_segments(struct request *rq)
+static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
+ struct bio *bio,
+ unsigned int *seg_size_ptr)
{
- int nr_phys_segs;
unsigned int phys_size;
struct bio_vec *bv, *bvprv = NULL;
- int seg_size;
- int cluster;
- struct req_iterator iter;
- int high, highprv = 1;
- struct request_queue *q = rq->q;
+ int cluster, i, high, highprv = 1;
+ unsigned int seg_size, nr_phys_segs;
+ struct bio *fbio;
- if (!rq->bio)
- return;
+ if (!bio)
+ return 0;
+ fbio = bio;
cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
seg_size = 0;
phys_size = nr_phys_segs = 0;
- rq_for_each_segment(bv, rq, iter) {
- /*
- * the trick here is making sure that a high page is never
- * considered part of another segment, since that might
- * change with the bounce page.
- */
- high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
- if (high || highprv)
- goto new_segment;
- if (cluster) {
- if (seg_size + bv->bv_len > q->max_segment_size)
+ for_each_bio(bio) {
+ bio_for_each_segment(bv, bio, i) {
+ /*
+ * the trick here is making sure that a high page is
+ * never considered part of another segment, since that
+ * might change with the bounce page.
+ */
+ high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
+ if (high || highprv)
goto new_segment;
- if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
- goto new_segment;
- if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
- goto new_segment;
+ if (cluster) {
+ if (seg_size + bv->bv_len > q->max_segment_size)
+ goto new_segment;
+ if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
+ goto new_segment;
+ if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
+ goto new_segment;
- seg_size += bv->bv_len;
- bvprv = bv;
- continue;
- }
+ seg_size += bv->bv_len;
+ bvprv = bv;
+ continue;
+ }
new_segment:
- if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size)
- rq->bio->bi_seg_front_size = seg_size;
+ if (nr_phys_segs == 1 && seg_size >
+ fbio->bi_seg_front_size)
+ fbio->bi_seg_front_size = seg_size;
- nr_phys_segs++;
- bvprv = bv;
- seg_size = bv->bv_len;
- highprv = high;
+ nr_phys_segs++;
+ bvprv = bv;
+ seg_size = bv->bv_len;
+ highprv = high;
+ }
}
- if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size)
+ if (seg_size_ptr)
+ *seg_size_ptr = seg_size;
+
+ return nr_phys_segs;
+}
+
+void blk_recalc_rq_segments(struct request *rq)
+{
+ unsigned int seg_size = 0, phys_segs;
+
+ phys_segs = __blk_recalc_rq_segments(rq->q, rq->bio, &seg_size);
+
+ if (phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size)
rq->bio->bi_seg_front_size = seg_size;
if (seg_size > rq->biotail->bi_seg_back_size)
rq->biotail->bi_seg_back_size = seg_size;
- rq->nr_phys_segments = nr_phys_segs;
+ rq->nr_phys_segments = phys_segs;
}
void blk_recount_segments(struct request_queue *q, struct bio *bio)
{
- struct request rq;
struct bio *nxt = bio->bi_next;
- rq.q = q;
- rq.bio = rq.biotail = bio;
+
bio->bi_next = NULL;
- blk_recalc_rq_segments(&rq);
+ bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, NULL);
bio->bi_next = nxt;
- bio->bi_phys_segments = rq.nr_phys_segments;
bio->bi_flags |= (1 << BIO_SEG_VALID);
}
EXPORT_SYMBOL(blk_recount_segments);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index dcaa0fd..465d6ba 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -708,6 +708,8 @@
};
/* This should not be used directly - use rq_for_each_segment */
+#define for_each_bio(_bio) \
+ for (; _bio; _bio = _bio->bi_next)
#define __rq_for_each_bio(_bio, rq) \
if ((rq->bio)) \
for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)