sched_ext: Move global_dsqs into scx_sched

author Tejun Heo <tj@kernel.org>

Tue, 29 Apr 2025 18:40:10 +0000 (08:40 -1000)

committer Tejun Heo <tj@kernel.org>

Tue, 29 Apr 2025 18:40:10 +0000 (08:40 -1000)
author Tejun Heo <tj@kernel.org>
Tue, 29 Apr 2025 18:40:10 +0000 (08:40 -1000)
committer Tejun Heo <tj@kernel.org>
Tue, 29 Apr 2025 18:40:10 +0000 (08:40 -1000)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c

index 62ec2d1b195e1ea1f0dc5440b89023510f1c036d..c6aa5fcf0f7db00a4a53cfdce0f0e48d1507b455 100644 (file)
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -770,7 +770,17 @@ struct scx_sched {
         struct sched_ext_ops    ops;
         DECLARE_BITMAP(has_op, SCX_OPI_END);
  
+       /*
+        * Dispatch queues.
+        *
+        * The global DSQ (%SCX_DSQ_GLOBAL) is split per-node for scalability.
+        * This is to avoid live-locking in bypass mode where all tasks are
+        * dispatched to %SCX_DSQ_GLOBAL and all CPUs consume from it. If
+        * per-node split isn't sufficient, it can be further split.
+        */
         struct rhashtable       dsq_hash;
+       struct scx_dispatch_q   **global_dsqs;
+
         bool                    warned_zero_slice;
  
         atomic_t                exit_kind;
@@ -1001,16 +1011,6 @@ static unsigned long __percpu *scx_kick_cpus_pnt_seqs;
   */
  static DEFINE_PER_CPU(struct task_struct *, direct_dispatch_task);
  
-/*
- * Dispatch queues.
- *
- * The global DSQ (%SCX_DSQ_GLOBAL) is split per-node for scalability. This is
- * to avoid live-locking in bypass mode where all tasks are dispatched to
- * %SCX_DSQ_GLOBAL and all CPUs consume from it. If per-node split isn't
- * sufficient, it can be further split.
- */
-static struct scx_dispatch_q **global_dsqs;
-
  static const struct rhashtable_params dsq_hash_params = {
         .key_len                = sizeof_field(struct scx_dispatch_q, id),
         .key_offset             = offsetof(struct scx_dispatch_q, id),
@@ -1111,7 +1111,9 @@ static bool u32_before(u32 a, u32 b)
  
  static struct scx_dispatch_q *find_global_dsq(struct task_struct *p)
  {
-       return global_dsqs[cpu_to_node(task_cpu(p))];
+       struct scx_sched *sch = scx_root;
+
+       return sch->global_dsqs[cpu_to_node(task_cpu(p))];
  }
  
  static struct scx_dispatch_q *find_user_dsq(struct scx_sched *sch, u64 dsq_id)
@@ -2788,11 +2790,11 @@ retry:
         return false;
  }
  
-static bool consume_global_dsq(struct rq *rq)
+static bool consume_global_dsq(struct scx_sched *sch, struct rq *rq)
  {
         int node = cpu_to_node(cpu_of(rq));
  
-       return consume_dispatch_q(rq, global_dsqs[node]);
+       return consume_dispatch_q(rq, sch->global_dsqs[node]);
  }
  
  /**
@@ -3038,7 +3040,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
         if (rq->scx.local_dsq.nr)
                 goto has_tasks;
  
-       if (consume_global_dsq(rq))
+       if (consume_global_dsq(sch, rq))
                 goto has_tasks;
  
         if (unlikely(!SCX_HAS_OP(scx_root, dispatch)) ||
@@ -3068,7 +3070,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
                 }
                 if (rq->scx.local_dsq.nr)
                         goto has_tasks;
-               if (consume_global_dsq(rq))
+               if (consume_global_dsq(sch, rq))
                         goto has_tasks;
  
                 /*
@@ -4409,6 +4411,11 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
         struct scx_sched *sch = container_of(rcu_work, struct scx_sched, rcu_work);
         struct rhashtable_iter rht_iter;
         struct scx_dispatch_q *dsq;
+       int node;
+
+       for_each_node_state(node, N_POSSIBLE)
+               kfree(sch->global_dsqs[node]);
+       kfree(sch->global_dsqs);
  
         rhashtable_walk_enter(&sch->dsq_hash, &rht_iter);
         do {
@@ -5247,7 +5254,7 @@ static struct kthread_worker *scx_create_rt_helper(const char *name)
  static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
  {
         struct scx_sched *sch;
-       int ret;
+       int node, ret;
  
         sch = kzalloc(sizeof(*sch), GFP_KERNEL);
         if (!sch)
@@ -5263,6 +5270,26 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
         if (ret < 0)
                 goto err_free_ei;
  
+       sch->global_dsqs = kcalloc(nr_node_ids, sizeof(sch->global_dsqs[0]),
+                                  GFP_KERNEL);
+       if (!sch->global_dsqs) {
+               ret = -ENOMEM;
+               goto err_free_hash;
+       }
+
+       for_each_node_state(node, N_POSSIBLE) {
+               struct scx_dispatch_q *dsq;
+
+               dsq = kzalloc_node(sizeof(*dsq), GFP_KERNEL, node);
+               if (!dsq) {
+                       ret = -ENOMEM;
+                       goto err_free_gdsqs;
+               }
+
+               init_dsq(dsq, SCX_DSQ_GLOBAL);
+               sch->global_dsqs[node] = dsq;
+       }
+
         atomic_set(&sch->exit_kind, SCX_EXIT_NONE);
         sch->ops = *ops;
         ops->priv = sch;
@@ -5270,10 +5297,14 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
         sch->kobj.kset = scx_kset;
         ret = kobject_init_and_add(&sch->kobj, &scx_ktype, NULL, "root");
         if (ret < 0)
-               goto err_free_hash;
+               goto err_free_gdsqs;
  
         return sch;
  
+err_free_gdsqs:
+       for_each_node_state(node, N_POSSIBLE)
+               kfree(sch->global_dsqs[node]);
+       kfree(sch->global_dsqs);
  err_free_hash:
         rhashtable_free_and_destroy(&sch->dsq_hash, NULL, NULL);
  err_free_ei:
@@ -5335,7 +5366,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
         struct scx_task_iter sti;
         struct task_struct *p;
         unsigned long timeout;
-       int i, cpu, node, ret;
+       int i, cpu, ret;
  
         if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN),
                            cpu_possible_mask)) {
@@ -5362,34 +5393,6 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
                 }
         }
  
-       if (!global_dsqs) {
-               struct scx_dispatch_q **dsqs;
-
-               dsqs = kcalloc(nr_node_ids, sizeof(dsqs[0]), GFP_KERNEL);
-               if (!dsqs) {
-                       ret = -ENOMEM;
-                       goto err_unlock;
-               }
-
-               for_each_node_state(node, N_POSSIBLE) {
-                       struct scx_dispatch_q *dsq;
-
-                       dsq = kzalloc_node(sizeof(*dsq), GFP_KERNEL, node);
-                       if (!dsq) {
-                               for_each_node_state(node, N_POSSIBLE)
-                                       kfree(dsqs[node]);
-                               kfree(dsqs);
-                               ret = -ENOMEM;
-                               goto err_unlock;
-                       }
-
-                       init_dsq(dsq, SCX_DSQ_GLOBAL);
-                       dsqs[node] = dsq;
-               }
-
-               global_dsqs = dsqs;
-       }
-
         if (scx_enable_state() != SCX_DISABLED) {
                 ret = -EBUSY;
                 goto err_unlock;
author	Tejun Heo <tj@kernel.org>
	Tue, 29 Apr 2025 18:40:10 +0000 (08:40 -1000)
committer	Tejun Heo <tj@kernel.org>
	Tue, 29 Apr 2025 18:40:10 +0000 (08:40 -1000)