blk-throttle: cap discard request size
discard request usually is very big and easily use all bandwidth budget
of a cgroup. discard request size doesn't really mean the size of data
written, so it doesn't make sense to account it into bandwidth budget.
Jens pointed out treating the size 0 doesn't make sense too, because
discard request does have cost. But it's not easy to find the actual
cost. This patch simply makes the size one sector.
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a7285bf..80f5481 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -382,6 +382,14 @@
} \
} while (0)
+static inline unsigned int throtl_bio_data_size(struct bio *bio)
+{
+ /* assume it's one sector */
+ if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
+ return 512;
+ return bio->bi_iter.bi_size;
+}
+
static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
{
INIT_LIST_HEAD(&qn->node);
@@ -934,6 +942,7 @@
bool rw = bio_data_dir(bio);
u64 bytes_allowed, extra_bytes, tmp;
unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
+ unsigned int bio_size = throtl_bio_data_size(bio);
jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
@@ -947,14 +956,14 @@
do_div(tmp, HZ);
bytes_allowed = tmp;
- if (tg->bytes_disp[rw] + bio->bi_iter.bi_size <= bytes_allowed) {
+ if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
if (wait)
*wait = 0;
return true;
}
/* Calc approx time to dispatch */
- extra_bytes = tg->bytes_disp[rw] + bio->bi_iter.bi_size - bytes_allowed;
+ extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed;
jiffy_wait = div64_u64(extra_bytes * HZ, tg_bps_limit(tg, rw));
if (!jiffy_wait)
@@ -1034,11 +1043,12 @@
static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
{
bool rw = bio_data_dir(bio);
+ unsigned int bio_size = throtl_bio_data_size(bio);
/* Charge the bio to the group */
- tg->bytes_disp[rw] += bio->bi_iter.bi_size;
+ tg->bytes_disp[rw] += bio_size;
tg->io_disp[rw]++;
- tg->last_bytes_disp[rw] += bio->bi_iter.bi_size;
+ tg->last_bytes_disp[rw] += bio_size;
tg->last_io_disp[rw]++;
/*