/* see __propagate_weights() and current_hweight() for details */
        u64                             child_active_sum;
        u64                             child_inuse_sum;
+       u64                             child_adjusted_sum;
        int                             hweight_gen;
        u32                             hweight_active;
        u32                             hweight_inuse;
+       u32                             hweight_donating;
        u32                             hweight_after_donation;
 
        struct list_head                walk_list;
        return usage;
 }
 
+/*
+ * For work-conservation, an iocg which isn't using all of its share should
+ * donate the leftover to other iocgs. There are two ways to achieve this - 1.
+ * bumping up vrate accordingly 2. lowering the donating iocg's inuse weight.
+ *
+ * #1 is mathematically simpler but has the drawback of requiring synchronous
+ * global hweight_inuse updates when idle iocg's get activated or inuse weights
+ * change due to donation snapbacks as it has the possibility of grossly
+ * overshooting what's allowed by the model and vrate.
+ *
+ * #2 is inherently safe with local operations. The donating iocg can easily
+ * snap back to higher weights when needed without worrying about impacts on
+ * other nodes as the impacts will be inherently correct. This also makes idle
+ * iocg activations safe. The only effect activations have is decreasing
+ * hweight_inuse of others, the right solution to which is for those iocgs to
+ * snap back to higher weights.
+ *
+ * So, we go with #2. The challenge is calculating how each donating iocg's
+ * inuse should be adjusted to achieve the target donation amounts. This is done
+ * using Andy's method described in the following pdf.
+ *
+ *   https://drive.google.com/file/d/1PsJwxPFtjUnwOY1QJ5AeICCcsL7BM3bo
+ *
+ * Given the weights and target after-donation hweight_inuse values, Andy's
+ * method determines how the proportional distribution should look like at each
+ * sibling level to maintain the relative relationship between all non-donating
+ * pairs. To roughly summarize, it divides the tree into donating and
+ * non-donating parts, calculates global donation rate which is used to
+ * determine the target hweight_inuse for each node, and then derives per-level
+ * proportions.
+ *
+ * The following pdf shows that global distribution calculated this way can be
+ * achieved by scaling inuse weights of donating leaves and propagating the
+ * adjustments upwards proportionally.
+ *
+ *   https://drive.google.com/file/d/1vONz1-fzVO7oY5DXXsLjSxEtYYQbOvsE
+ *
+ * Combining the above two, we can determine how each leaf iocg's inuse should
+ * be adjusted to achieve the target donation.
+ *
+ *   https://drive.google.com/file/d/1WcrltBOSPN0qXVdBgnKm4mdp9FhuEFQN
+ *
+ * The inline comments use symbols from the last pdf.
+ *
+ *   b is the sum of the absolute budgets in the subtree. 1 for the root node.
+ *   f is the sum of the absolute budgets of non-donating nodes in the subtree.
+ *   t is the sum of the absolute budgets of donating nodes in the subtree.
+ *   w is the weight of the node. w = w_f + w_t
+ *   w_f is the non-donating portion of w. w_f = w * f / b
+ *   w_b is the donating portion of w. w_t = w * t / b
+ *   s is the sum of all sibling weights. s = Sum(w) for siblings
+ *   s_f and s_t are the non-donating and donating portions of s.
+ *
+ * Subscript p denotes the parent's counterpart and ' the adjusted value - e.g.
+ * w_pt is the donating portion of the parent's weight and w'_pt the same value
+ * after adjustments. Subscript r denotes the root node's values.
+ */
 static void transfer_surpluses(struct list_head *surpluses, struct ioc_now *now)
 {
-       struct ioc_gq *iocg;
+       LIST_HEAD(over_hwa);
+       LIST_HEAD(inner_walk);
+       struct ioc_gq *iocg, *tiocg, *root_iocg;
+       u32 after_sum, over_sum, over_target, gamma;
+
+       /*
+        * It's pretty unlikely but possible for the total sum of
+        * hweight_after_donation's to be higher than WEIGHT_ONE, which will
+        * confuse the following calculations. If such condition is detected,
+        * scale down everyone over its full share equally to keep the sum below
+        * WEIGHT_ONE.
+        */
+       after_sum = 0;
+       over_sum = 0;
+       list_for_each_entry(iocg, surpluses, surplus_list) {
+               u32 hwa;
+
+               current_hweight(iocg, &hwa, NULL);
+               after_sum += iocg->hweight_after_donation;
+
+               if (iocg->hweight_after_donation > hwa) {
+                       over_sum += iocg->hweight_after_donation;
+                       list_add(&iocg->walk_list, &over_hwa);
+               }
+       }
+
+       if (after_sum >= WEIGHT_ONE) {
+               /*
+                * The delta should be deducted from the over_sum, calculate
+                * target over_sum value.
+                */
+               u32 over_delta = after_sum - (WEIGHT_ONE - 1);
+               WARN_ON_ONCE(over_sum <= over_delta);
+               over_target = over_sum - over_delta;
+       } else {
+               over_target = 0;
+       }
+
+       list_for_each_entry_safe(iocg, tiocg, &over_hwa, walk_list) {
+               if (over_target)
+                       iocg->hweight_after_donation =
+                               div_u64((u64)iocg->hweight_after_donation *
+                                       over_target, over_sum);
+               list_del_init(&iocg->walk_list);
+       }
+
+       /*
+        * Build pre-order inner node walk list and prepare for donation
+        * adjustment calculations.
+        */
+       list_for_each_entry(iocg, surpluses, surplus_list) {
+               iocg_build_inner_walk(iocg, &inner_walk);
+       }
+
+       root_iocg = list_first_entry(&inner_walk, struct ioc_gq, walk_list);
+       WARN_ON_ONCE(root_iocg->level > 0);
 
+       list_for_each_entry(iocg, &inner_walk, walk_list) {
+               iocg->child_adjusted_sum = 0;
+               iocg->hweight_donating = 0;
+               iocg->hweight_after_donation = 0;
+       }
+
+       /*
+        * Propagate the donating budget (b_t) and after donation budget (b'_t)
+        * up the hierarchy.
+        */
        list_for_each_entry(iocg, surpluses, surplus_list) {
-               u32 old_hwi, new_hwi, new_inuse;
+               struct ioc_gq *parent = iocg->ancestors[iocg->level - 1];
+
+               parent->hweight_donating += iocg->hweight_donating;
+               parent->hweight_after_donation += iocg->hweight_after_donation;
+       }
 
-               current_hweight(iocg, NULL, &old_hwi);
-               new_hwi = iocg->hweight_after_donation;
+       list_for_each_entry_reverse(iocg, &inner_walk, walk_list) {
+               if (iocg->level > 0) {
+                       struct ioc_gq *parent = iocg->ancestors[iocg->level - 1];
 
-               new_inuse = DIV64_U64_ROUND_UP((u64)iocg->inuse * new_hwi,
-                                              old_hwi);
-               __propagate_weights(iocg, iocg->weight, new_inuse);
+                       parent->hweight_donating += iocg->hweight_donating;
+                       parent->hweight_after_donation += iocg->hweight_after_donation;
+               }
+       }
+
+       /*
+        * Calculate inner hwa's (b) and make sure the donation values are
+        * within the accepted ranges as we're doing low res calculations with
+        * roundups.
+        */
+       list_for_each_entry(iocg, &inner_walk, walk_list) {
+               if (iocg->level) {
+                       struct ioc_gq *parent = iocg->ancestors[iocg->level - 1];
+
+                       iocg->hweight_active = DIV64_U64_ROUND_UP(
+                               (u64)parent->hweight_active * iocg->active,
+                               parent->child_active_sum);
+
+               }
+
+               iocg->hweight_donating = min(iocg->hweight_donating,
+                                            iocg->hweight_active);
+               iocg->hweight_after_donation = min(iocg->hweight_after_donation,
+                                                  iocg->hweight_donating - 1);
+               if (WARN_ON_ONCE(iocg->hweight_active <= 1 ||
+                                iocg->hweight_donating <= 1 ||
+                                iocg->hweight_after_donation == 0)) {
+                       pr_warn("iocg: invalid donation weights in ");
+                       pr_cont_cgroup_path(iocg_to_blkg(iocg)->blkcg->css.cgroup);
+                       pr_cont(": active=%u donating=%u after=%u\n",
+                               iocg->hweight_active, iocg->hweight_donating,
+                               iocg->hweight_after_donation);
+               }
        }
+
+       /*
+        * Calculate the global donation rate (gamma) - the rate to adjust
+        * non-donating budgets by. No need to use 64bit multiplication here as
+        * the first operand is guaranteed to be smaller than WEIGHT_ONE
+        * (1<<16).
+        *
+        * gamma = (1 - t_r') / (1 - t_r)
+        */
+       gamma = DIV_ROUND_UP(
+               (WEIGHT_ONE - root_iocg->hweight_after_donation) * WEIGHT_ONE,
+               WEIGHT_ONE - root_iocg->hweight_donating);
+
+       /*
+        * Calculate adjusted hwi, child_adjusted_sum and inuse for the inner
+        * nodes.
+        */
+       list_for_each_entry(iocg, &inner_walk, walk_list) {
+               struct ioc_gq *parent;
+               u32 inuse, wpt, wptp;
+               u64 st, sf;
+
+               if (iocg->level == 0) {
+                       /* adjusted weight sum for 1st level: s' = s * b_pf / b'_pf */
+                       iocg->child_adjusted_sum = DIV64_U64_ROUND_UP(
+                               iocg->child_active_sum * (WEIGHT_ONE - iocg->hweight_donating),
+                               WEIGHT_ONE - iocg->hweight_after_donation);
+                       continue;
+               }
+
+               parent = iocg->ancestors[iocg->level - 1];
+
+               /* b' = gamma * b_f + b_t' */
+               iocg->hweight_inuse = DIV64_U64_ROUND_UP(
+                       (u64)gamma * (iocg->hweight_active - iocg->hweight_donating),
+                       WEIGHT_ONE) + iocg->hweight_after_donation;
+
+               /* w' = s' * b' / b'_p */
+               inuse = DIV64_U64_ROUND_UP(
+                       (u64)parent->child_adjusted_sum * iocg->hweight_inuse,
+                       parent->hweight_inuse);
+
+               /* adjusted weight sum for children: s' = s_f + s_t * w'_pt / w_pt */
+               st = DIV64_U64_ROUND_UP(
+                       iocg->child_active_sum * iocg->hweight_donating,
+                       iocg->hweight_active);
+               sf = iocg->child_active_sum - st;
+               wpt = DIV64_U64_ROUND_UP(
+                       (u64)iocg->active * iocg->hweight_donating,
+                       iocg->hweight_active);
+               wptp = DIV64_U64_ROUND_UP(
+                       (u64)inuse * iocg->hweight_after_donation,
+                       iocg->hweight_inuse);
+
+               iocg->child_adjusted_sum = sf + DIV64_U64_ROUND_UP(st * wptp, wpt);
+       }
+
+       /*
+        * All inner nodes now have ->hweight_inuse and ->child_adjusted_sum and
+        * we can finally determine leaf adjustments.
+        */
+       list_for_each_entry(iocg, surpluses, surplus_list) {
+               struct ioc_gq *parent = iocg->ancestors[iocg->level - 1];
+               u32 inuse;
+
+               /* w' = s' * b' / b'_p, note that b' == b'_t for donating leaves */
+               inuse = DIV64_U64_ROUND_UP(
+                       parent->child_adjusted_sum * iocg->hweight_after_donation,
+                       parent->hweight_inuse);
+               __propagate_weights(iocg, iocg->active, inuse);
+       }
+
+       /* walk list should be dissolved after use */
+       list_for_each_entry_safe(iocg, tiocg, &inner_walk, walk_list)
+               list_del_init(&iocg->walk_list);
 }
 
 static void ioc_timer_fn(struct timer_list *timer)
                if (hw_inuse < hw_active ||
                    (!waitqueue_active(&iocg->waitq) &&
                     time_before64(vtime, now.vnow - ioc->margins.max))) {
-                       u32 hwm, new_hwi;
+                       u32 hwa, hwm, new_hwi;
 
                        /*
                         * Already donating or accumulated enough to start.
                         * Determine the donation amount.
                         */
+                       current_hweight(iocg, &hwa, NULL);
                        hwm = current_hweight_max(iocg);
                        new_hwi = hweight_after_donation(iocg, hwm, usage,
                                                         &now);
                        if (new_hwi < hwm) {
+                               iocg->hweight_donating = hwa;
                                iocg->hweight_after_donation = new_hwi;
                                list_add(&iocg->surplus_list, &surpluses);
                        } else {