From f2c4902bd08b854a23c3c2ab352382fd7eef959f Mon Sep 17 00:00:00 2001 From: Zizhi Wo Date: Tue, 6 May 2025 10:09:32 +0800 Subject: [PATCH] blk-throttle: Split the blkthrotl queue This patch splits the single queue into separate bps and iops queues. Now, an IO request must first pass through the bps queue, then the iops queue, and finally be dispatched. Due to the queue splitting, we need to modify the throtl add/peek/pop function. Additionally, the patch modifies the logic related to tg_dispatch_time(). If bio needs to wait for bps, function directly returns the bps wait time; otherwise, it charges bps and returns the iops wait time so that bio can be directly placed into the iops queue afterward. Note that this may lead to more frequent updates to disptime, but the overhead is negligible for the slow path. Signed-off-by: Zizhi Wo Reviewed-by: Yu Kuai Signed-off-by: Zizhi Wo Link: https://lore.kernel.org/r/20250506020935.655574-6-wozizhi@huaweicloud.com Signed-off-by: Jens Axboe --- block/blk-throttle.c | 49 ++++++++++++++++++++++++++++++-------------- block/blk-throttle.h | 3 ++- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index ee4eeee8f21f..dc7c6b51c2cd 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -143,7 +143,8 @@ static inline unsigned int throtl_bio_data_size(struct bio *bio) static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg) { INIT_LIST_HEAD(&qn->node); - bio_list_init(&qn->bios); + bio_list_init(&qn->bios_bps); + bio_list_init(&qn->bios_iops); qn->tg = tg; } @@ -160,7 +161,11 @@ static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg) static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn, struct list_head *queued) { - bio_list_add(&qn->bios, bio); + if (bio_flagged(bio, BIO_TG_BPS_THROTTLED)) + bio_list_add(&qn->bios_iops, bio); + else + bio_list_add(&qn->bios_bps, bio); + if (list_empty(&qn->node)) { list_add_tail(&qn->node, queued); blkg_get(tg_to_blkg(qn->tg)); @@ -170,6 +175,10 @@ static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn, /** * throtl_peek_queued - peek the first bio on a qnode list * @queued: the qnode list to peek + * + * Always take a bio from the head of the iops queue first. If the queue is + * empty, we then take it from the bps queue to maintain the overall idea of + * fetching bios from the head. */ static struct bio *throtl_peek_queued(struct list_head *queued) { @@ -180,7 +189,9 @@ static struct bio *throtl_peek_queued(struct list_head *queued) return NULL; qn = list_first_entry(queued, struct throtl_qnode, node); - bio = bio_list_peek(&qn->bios); + bio = bio_list_peek(&qn->bios_iops); + if (!bio) + bio = bio_list_peek(&qn->bios_bps); WARN_ON_ONCE(!bio); return bio; } @@ -190,9 +201,10 @@ static struct bio *throtl_peek_queued(struct list_head *queued) * @queued: the qnode list to pop a bio from * @tg_to_put: optional out argument for throtl_grp to put * - * Pop the first bio from the qnode list @queued. After popping, the first - * qnode is removed from @queued if empty or moved to the end of @queued so - * that the popping order is round-robin. + * Pop the first bio from the qnode list @queued. Note that we firstly focus on + * the iops list because bios are ultimately dispatched from it. After popping, + * the first qnode is removed from @queued if empty or moved to the end of + * @queued so that the popping order is round-robin. * * When the first qnode is removed, its associated throtl_grp should be put * too. If @tg_to_put is NULL, this function automatically puts it; @@ -209,10 +221,12 @@ static struct bio *throtl_pop_queued(struct list_head *queued, return NULL; qn = list_first_entry(queued, struct throtl_qnode, node); - bio = bio_list_pop(&qn->bios); + bio = bio_list_pop(&qn->bios_iops); + if (!bio) + bio = bio_list_pop(&qn->bios_bps); WARN_ON_ONCE(!bio); - if (bio_list_empty(&qn->bios)) { + if (bio_list_empty(&qn->bios_bps) && bio_list_empty(&qn->bios_iops)) { list_del_init(&qn->node); if (tg_to_put) *tg_to_put = qn->tg; @@ -856,12 +870,12 @@ static unsigned long tg_dispatch_iops_time(struct throtl_grp *tg, struct bio *bi /* * Returns approx number of jiffies to wait before this bio is with-in IO rate - * and can be dispatched. + * and can be moved to other queue or dispatched. */ static unsigned long tg_dispatch_time(struct throtl_grp *tg, struct bio *bio) { bool rw = bio_data_dir(bio); - unsigned long bps_wait, iops_wait; + unsigned long wait; /* * Currently whole state machine of group depends on first bio @@ -872,10 +886,17 @@ static unsigned long tg_dispatch_time(struct throtl_grp *tg, struct bio *bio) BUG_ON(tg->service_queue.nr_queued[rw] && bio != throtl_peek_queued(&tg->service_queue.queued[rw])); - bps_wait = tg_dispatch_bps_time(tg, bio); - iops_wait = tg_dispatch_iops_time(tg, bio); + wait = tg_dispatch_bps_time(tg, bio); + if (wait != 0) + return wait; - return max(bps_wait, iops_wait); + /* + * Charge bps here because @bio will be directly placed into the + * iops queue afterward. + */ + throtl_charge_bps_bio(tg, bio); + + return tg_dispatch_iops_time(tg, bio); } /** @@ -964,7 +985,6 @@ static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw) bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put); sq->nr_queued[rw]--; - throtl_charge_bps_bio(tg, bio); throtl_charge_iops_bio(tg, bio); /* @@ -1692,7 +1712,6 @@ bool __blk_throtl_bio(struct bio *bio) while (true) { if (tg_within_limit(tg, bio, rw)) { /* within limits, let's charge and dispatch directly */ - throtl_charge_bps_bio(tg, bio); throtl_charge_iops_bio(tg, bio); /* diff --git a/block/blk-throttle.h b/block/blk-throttle.h index 2c1cfe091531..f21a2a2bf755 100644 --- a/block/blk-throttle.h +++ b/block/blk-throttle.h @@ -29,7 +29,8 @@ */ struct throtl_qnode { struct list_head node; /* service_queue->queued[] */ - struct bio_list bios; /* queued bios */ + struct bio_list bios_bps; /* queued bios for bps limit */ + struct bio_list bios_iops; /* queued bios for iops limit */ struct throtl_grp *tg; /* tg this qnode belongs to */ }; -- 2.50.1