{
        tg->bytes_disp[rw] = 0;
        tg->io_disp[rw] = 0;
+       tg->carryover_bytes[rw] = 0;
+       tg->carryover_ios[rw] = 0;
 
        /*
         * Previous slice has expired. We must have trimmed it after last
                   tg->slice_end[rw], jiffies);
 }
 
-static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
+static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw,
+                                         bool clear_carryover)
 {
        tg->bytes_disp[rw] = 0;
        tg->io_disp[rw] = 0;
        tg->slice_start[rw] = jiffies;
        tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
+       if (clear_carryover) {
+               tg->carryover_bytes[rw] = 0;
+               tg->carryover_ios[rw] = 0;
+       }
 
        throtl_log(&tg->service_queue,
                   "[%c] new slice start=%lu end=%lu jiffies=%lu",
        return mul_u64_u64_div_u64(bps_limit, (u64)jiffy_elapsed, (u64)HZ);
 }
 
+static void __tg_update_carryover(struct throtl_grp *tg, bool rw)
+{
+       unsigned long jiffy_elapsed = jiffies - tg->slice_start[rw];
+       u64 bps_limit = tg_bps_limit(tg, rw);
+       u32 iops_limit = tg_iops_limit(tg, rw);
+
+       /*
+        * If config is updated while bios are still throttled, calculate and
+        * accumulate how many bytes/ios are waited across changes. And
+        * carryover_bytes/ios will be used to calculate new wait time under new
+        * configuration.
+        */
+       if (bps_limit != U64_MAX)
+               tg->carryover_bytes[rw] +=
+                       calculate_bytes_allowed(bps_limit, jiffy_elapsed) -
+                       tg->bytes_disp[rw];
+       if (iops_limit != UINT_MAX)
+               tg->carryover_ios[rw] +=
+                       calculate_io_allowed(iops_limit, jiffy_elapsed) -
+                       tg->io_disp[rw];
+}
+
+static void tg_update_carryover(struct throtl_grp *tg)
+{
+       if (tg->service_queue.nr_queued[READ])
+               __tg_update_carryover(tg, READ);
+       if (tg->service_queue.nr_queued[WRITE])
+               __tg_update_carryover(tg, WRITE);
+
+       /* see comments in struct throtl_grp for meaning of these fields. */
+       throtl_log(&tg->service_queue, "%s: %llu %llu %u %u\n", __func__,
+                  tg->carryover_bytes[READ], tg->carryover_bytes[WRITE],
+                  tg->carryover_ios[READ], tg->carryover_ios[WRITE]);
+}
+
 static bool tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio,
                                 u32 iops_limit, unsigned long *wait)
 {
 
        /* Round up to the next throttle slice, wait time must be nonzero */
        jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
-       io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed_rnd);
+       io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed_rnd) +
+                    tg->carryover_ios[rw];
        if (tg->io_disp[rw] + 1 <= io_allowed) {
                if (wait)
                        *wait = 0;
                jiffy_elapsed_rnd = tg->td->throtl_slice;
 
        jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
-       bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd);
+       bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd) +
+                       tg->carryover_bytes[rw];
        if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
                if (wait)
                        *wait = 0;
         * slice and it should be extended instead.
         */
        if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw]))
-               throtl_start_new_slice(tg, rw);
+               throtl_start_new_slice(tg, rw, true);
        else {
                if (time_before(tg->slice_end[rw],
                    jiffies + tg->td->throtl_slice))
         * that a group's limit are dropped suddenly and we don't want to
         * account recently dispatched IO with new low rate.
         */
-       throtl_start_new_slice(tg, READ);
-       throtl_start_new_slice(tg, WRITE);
+       throtl_start_new_slice(tg, READ, false);
+       throtl_start_new_slice(tg, WRITE, false);
 
        if (tg->flags & THROTL_TG_PENDING) {
                tg_update_disptime(tg);
                v = U64_MAX;
 
        tg = blkg_to_tg(ctx.blkg);
+       tg_update_carryover(tg);
 
        if (is_u64)
                *(u64 *)((void *)tg + of_cft(of)->private) = v;
                return ret;
 
        tg = blkg_to_tg(ctx.blkg);
+       tg_update_carryover(tg);
 
        v[0] = tg->bps_conf[READ][index];
        v[1] = tg->bps_conf[WRITE][index];