]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
sched_ext: Fix lock imbalance in dispatch_to_local_dsq()
authorAndrea Righi <arighi@nvidia.com>
Mon, 27 Jan 2025 22:06:16 +0000 (23:06 +0100)
committerTejun Heo <tj@kernel.org>
Mon, 27 Jan 2025 22:41:12 +0000 (12:41 -1000)
While performing the rq locking dance in dispatch_to_local_dsq(), we may
trigger the following lock imbalance condition, in particular when
multiple tasks are rapidly changing CPU affinity (i.e., running a
`stress-ng --race-sched 0`):

[   13.413579] =====================================
[   13.413660] WARNING: bad unlock balance detected!
[   13.413729] 6.13.0-virtme #15 Not tainted
[   13.413792] -------------------------------------
[   13.413859] kworker/1:1/80 is trying to release lock (&rq->__lock) at:
[   13.413954] [<ffffffff873c6c48>] dispatch_to_local_dsq+0x108/0x1a0
[   13.414111] but there are no more locks to release!
[   13.414176]
[   13.414176] other info that might help us debug this:
[   13.414258] 1 lock held by kworker/1:1/80:
[   13.414318]  #0: ffff8b66feb41698 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock_nested+0x20/0x90
[   13.414612]
[   13.414612] stack backtrace:
[   13.415255] CPU: 1 UID: 0 PID: 80 Comm: kworker/1:1 Not tainted 6.13.0-virtme #15
[   13.415505] Workqueue:  0x0 (events)
[   13.415567] Sched_ext: dsp_local_on (enabled+all), task: runnable_at=-2ms
[   13.415570] Call Trace:
[   13.415700]  <TASK>
[   13.415744]  dump_stack_lvl+0x78/0xe0
[   13.415806]  ? dispatch_to_local_dsq+0x108/0x1a0
[   13.415884]  print_unlock_imbalance_bug+0x11b/0x130
[   13.415965]  ? dispatch_to_local_dsq+0x108/0x1a0
[   13.416226]  lock_release+0x231/0x2c0
[   13.416326]  _raw_spin_unlock+0x1b/0x40
[   13.416422]  dispatch_to_local_dsq+0x108/0x1a0
[   13.416554]  flush_dispatch_buf+0x199/0x1d0
[   13.416652]  balance_one+0x194/0x370
[   13.416751]  balance_scx+0x61/0x1e0
[   13.416848]  prev_balance+0x43/0xb0
[   13.416947]  __pick_next_task+0x6b/0x1b0
[   13.417052]  __schedule+0x20d/0x1740

This happens because dispatch_to_local_dsq() is racing with
dispatch_dequeue() and, when the latter wins, we incorrectly assume that
the task has been moved to dst_rq.

Fix by properly tracking the currently locked rq.

Fixes: 4d3ca89bdd31 ("sched_ext: Refactor consume_remote_task()")
Signed-off-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
kernel/sched/ext.c

index c7b159f4883436d4c69f48cb62aa4387f6a46239..a6d6d6dadde51d6c552da4572cbbd1ff30eab7ee 100644 (file)
@@ -2575,6 +2575,9 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq,
 {
        struct rq *src_rq = task_rq(p);
        struct rq *dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq);
+#ifdef CONFIG_SMP
+       struct rq *locked_rq = rq;
+#endif
 
        /*
         * We're synchronized against dequeue through DISPATCHING. As @p can't
@@ -2611,8 +2614,9 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq,
        atomic_long_set_release(&p->scx.ops_state, SCX_OPSS_NONE);
 
        /* switch to @src_rq lock */
-       if (rq != src_rq) {
-               raw_spin_rq_unlock(rq);
+       if (locked_rq != src_rq) {
+               raw_spin_rq_unlock(locked_rq);
+               locked_rq = src_rq;
                raw_spin_rq_lock(src_rq);
        }
 
@@ -2630,6 +2634,8 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq,
                } else {
                        move_remote_task_to_local_dsq(p, enq_flags,
                                                      src_rq, dst_rq);
+                       /* task has been moved to dst_rq, which is now locked */
+                       locked_rq = dst_rq;
                }
 
                /* if the destination CPU is idle, wake it up */
@@ -2638,8 +2644,8 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq,
        }
 
        /* switch back to @rq lock */
-       if (rq != dst_rq) {
-               raw_spin_rq_unlock(dst_rq);
+       if (locked_rq != rq) {
+               raw_spin_rq_unlock(locked_rq);
                raw_spin_rq_lock(rq);
        }
 #else  /* CONFIG_SMP */