WRITE_ONCE(*ptr, res);                                  \
 } while (0)
 
+/*
+ * Remove and clamp on negative, from a local variable.
+ *
+ * A variant of sub_positive(), which does not use explicit load-store
+ * and is thus optimized for local variable updates.
+ */
+#define lsub_positive(_ptr, _val) do {                         \
+       typeof(_ptr) ptr = (_ptr);                              \
+       *ptr -= min_t(typeof(*ptr), *ptr, _val);                \
+} while (0)
+
 #ifdef CONFIG_SMP
 static inline void
 enqueue_runnable_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
                cfs_b->distribute_running = 0;
                throttled = !list_empty(&cfs_b->throttled_cfs_rq);
 
-               cfs_b->runtime -= min(runtime, cfs_b->runtime);
+               lsub_positive(&cfs_b->runtime, runtime);
        }
 
        /*
 
        raw_spin_lock(&cfs_b->lock);
        if (expires == cfs_b->runtime_expires)
-               cfs_b->runtime -= min(runtime, cfs_b->runtime);
+               lsub_positive(&cfs_b->runtime, runtime);
        cfs_b->distribute_running = 0;
        raw_spin_unlock(&cfs_b->lock);
 }
        util = READ_ONCE(cfs_rq->avg.util_avg);
 
        /* Discount task's util from CPU's util */
-       util -= min_t(unsigned int, util, task_util(p));
+       lsub_positive(&util, task_util(p));
 
        /*
         * Covered cases:
                 * properly fix the execl regression and it helps in further
                 * reducing the chances for the above race.
                 */
-               if (unlikely(task_on_rq_queued(p) || current == p)) {
-                       estimated -= min_t(unsigned int, estimated,
-                                          _task_util_est(p));
-               }
+               if (unlikely(task_on_rq_queued(p) || current == p))
+                       lsub_positive(&estimated, _task_util_est(p));
+
                util = max(util, estimated);
        }