]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
sched_ext: Implement auto local dispatching of migration disabled tasks
authorTejun Heo <tj@kernel.org>
Fri, 7 Feb 2025 20:58:23 +0000 (10:58 -1000)
committerTejun Heo <tj@kernel.org>
Sun, 9 Feb 2025 06:32:54 +0000 (20:32 -1000)
Migration disabled tasks are special and pinned to their previous CPUs. They
tripped up some unsuspecting BPF schedulers as their ->nr_cpus_allowed may
not agree with the bits set in ->cpus_ptr. Make it easier for BPF schedulers
by automatically dispatching them to the pinned local DSQs by default. If a
BPF scheduler wants to handle migration disabled tasks explicitly, it can
set SCX_OPS_ENQ_MIGRATION_DISABLED.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Andrea Righi <arighi@nvidia.com>
kernel/sched/ext.c

index a6d6d6dadde51d6c552da4572cbbd1ff30eab7ee..efdbf4d85a2159980f8df71796c681b51ecf6585 100644 (file)
@@ -122,6 +122,19 @@ enum scx_ops_flags {
         */
        SCX_OPS_SWITCH_PARTIAL  = 1LLU << 3,
 
+       /*
+        * A migration disabled task can only execute on its current CPU. By
+        * default, such tasks are automatically put on the CPU's local DSQ with
+        * the default slice on enqueue. If this ops flag is set, they also go
+        * through ops.enqueue().
+        *
+        * A migration disabled task never invokes ops.select_cpu() as it can
+        * only select the current CPU. Also, p->cpus_ptr will only contain its
+        * current CPU while p->nr_cpus_allowed keeps tracking p->user_cpus_ptr
+        * and thus may disagree with cpumask_weight(p->cpus_ptr).
+        */
+       SCX_OPS_ENQ_MIGRATION_DISABLED = 1LLU << 4,
+
        /*
         * CPU cgroup support flags
         */
@@ -130,6 +143,7 @@ enum scx_ops_flags {
        SCX_OPS_ALL_FLAGS       = SCX_OPS_KEEP_BUILTIN_IDLE |
                                  SCX_OPS_ENQ_LAST |
                                  SCX_OPS_ENQ_EXITING |
+                                 SCX_OPS_ENQ_MIGRATION_DISABLED |
                                  SCX_OPS_SWITCH_PARTIAL |
                                  SCX_OPS_HAS_CGROUP_WEIGHT,
 };
@@ -882,6 +896,7 @@ static bool scx_warned_zero_slice;
 
 static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_last);
 static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_exiting);
+static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_migration_disabled);
 static DEFINE_STATIC_KEY_FALSE(scx_ops_cpu_preempt);
 static DEFINE_STATIC_KEY_FALSE(scx_builtin_idle_enabled);
 
@@ -2014,6 +2029,11 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
            unlikely(p->flags & PF_EXITING))
                goto local;
 
+       /* see %SCX_OPS_ENQ_MIGRATION_DISABLED */
+       if (!static_branch_unlikely(&scx_ops_enq_migration_disabled) &&
+           is_migration_disabled(p))
+               goto local;
+
        if (!SCX_HAS_OP(enqueue))
                goto global;
 
@@ -5052,6 +5072,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
                static_branch_disable(&scx_has_op[i]);
        static_branch_disable(&scx_ops_enq_last);
        static_branch_disable(&scx_ops_enq_exiting);
+       static_branch_disable(&scx_ops_enq_migration_disabled);
        static_branch_disable(&scx_ops_cpu_preempt);
        static_branch_disable(&scx_builtin_idle_enabled);
        synchronize_rcu();
@@ -5661,6 +5682,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
 
        if (ops->flags & SCX_OPS_ENQ_EXITING)
                static_branch_enable(&scx_ops_enq_exiting);
+       if (ops->flags & SCX_OPS_ENQ_MIGRATION_DISABLED)
+               static_branch_enable(&scx_ops_enq_migration_disabled);
        if (scx_ops.cpu_acquire || scx_ops.cpu_release)
                static_branch_enable(&scx_ops_cpu_preempt);