From: Tejun Heo Date: Tue, 29 Apr 2025 18:40:10 +0000 (-1000) Subject: sched_ext: Move global_dsqs into scx_sched X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=8409b800a0b1f143be4eba38ad67d4c1d403519e;p=users%2Fjedix%2Flinux-maple.git sched_ext: Move global_dsqs into scx_sched Global DSQs are going to become per scheduler instance. Move global_dsqs into scx_sched. find_global_dsq() already takes a task_struct pointer as an argument and should later be able to determine the scx_sched to use from that. For now, assume scx_root. Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi Acked-by: Changwoo Min --- diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 62ec2d1b195e1..c6aa5fcf0f7db 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -770,7 +770,17 @@ struct scx_sched { struct sched_ext_ops ops; DECLARE_BITMAP(has_op, SCX_OPI_END); + /* + * Dispatch queues. + * + * The global DSQ (%SCX_DSQ_GLOBAL) is split per-node for scalability. + * This is to avoid live-locking in bypass mode where all tasks are + * dispatched to %SCX_DSQ_GLOBAL and all CPUs consume from it. If + * per-node split isn't sufficient, it can be further split. + */ struct rhashtable dsq_hash; + struct scx_dispatch_q **global_dsqs; + bool warned_zero_slice; atomic_t exit_kind; @@ -1001,16 +1011,6 @@ static unsigned long __percpu *scx_kick_cpus_pnt_seqs; */ static DEFINE_PER_CPU(struct task_struct *, direct_dispatch_task); -/* - * Dispatch queues. - * - * The global DSQ (%SCX_DSQ_GLOBAL) is split per-node for scalability. This is - * to avoid live-locking in bypass mode where all tasks are dispatched to - * %SCX_DSQ_GLOBAL and all CPUs consume from it. If per-node split isn't - * sufficient, it can be further split. - */ -static struct scx_dispatch_q **global_dsqs; - static const struct rhashtable_params dsq_hash_params = { .key_len = sizeof_field(struct scx_dispatch_q, id), .key_offset = offsetof(struct scx_dispatch_q, id), @@ -1111,7 +1111,9 @@ static bool u32_before(u32 a, u32 b) static struct scx_dispatch_q *find_global_dsq(struct task_struct *p) { - return global_dsqs[cpu_to_node(task_cpu(p))]; + struct scx_sched *sch = scx_root; + + return sch->global_dsqs[cpu_to_node(task_cpu(p))]; } static struct scx_dispatch_q *find_user_dsq(struct scx_sched *sch, u64 dsq_id) @@ -2788,11 +2790,11 @@ retry: return false; } -static bool consume_global_dsq(struct rq *rq) +static bool consume_global_dsq(struct scx_sched *sch, struct rq *rq) { int node = cpu_to_node(cpu_of(rq)); - return consume_dispatch_q(rq, global_dsqs[node]); + return consume_dispatch_q(rq, sch->global_dsqs[node]); } /** @@ -3038,7 +3040,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev) if (rq->scx.local_dsq.nr) goto has_tasks; - if (consume_global_dsq(rq)) + if (consume_global_dsq(sch, rq)) goto has_tasks; if (unlikely(!SCX_HAS_OP(scx_root, dispatch)) || @@ -3068,7 +3070,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev) } if (rq->scx.local_dsq.nr) goto has_tasks; - if (consume_global_dsq(rq)) + if (consume_global_dsq(sch, rq)) goto has_tasks; /* @@ -4409,6 +4411,11 @@ static void scx_sched_free_rcu_work(struct work_struct *work) struct scx_sched *sch = container_of(rcu_work, struct scx_sched, rcu_work); struct rhashtable_iter rht_iter; struct scx_dispatch_q *dsq; + int node; + + for_each_node_state(node, N_POSSIBLE) + kfree(sch->global_dsqs[node]); + kfree(sch->global_dsqs); rhashtable_walk_enter(&sch->dsq_hash, &rht_iter); do { @@ -5247,7 +5254,7 @@ static struct kthread_worker *scx_create_rt_helper(const char *name) static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops) { struct scx_sched *sch; - int ret; + int node, ret; sch = kzalloc(sizeof(*sch), GFP_KERNEL); if (!sch) @@ -5263,6 +5270,26 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops) if (ret < 0) goto err_free_ei; + sch->global_dsqs = kcalloc(nr_node_ids, sizeof(sch->global_dsqs[0]), + GFP_KERNEL); + if (!sch->global_dsqs) { + ret = -ENOMEM; + goto err_free_hash; + } + + for_each_node_state(node, N_POSSIBLE) { + struct scx_dispatch_q *dsq; + + dsq = kzalloc_node(sizeof(*dsq), GFP_KERNEL, node); + if (!dsq) { + ret = -ENOMEM; + goto err_free_gdsqs; + } + + init_dsq(dsq, SCX_DSQ_GLOBAL); + sch->global_dsqs[node] = dsq; + } + atomic_set(&sch->exit_kind, SCX_EXIT_NONE); sch->ops = *ops; ops->priv = sch; @@ -5270,10 +5297,14 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops) sch->kobj.kset = scx_kset; ret = kobject_init_and_add(&sch->kobj, &scx_ktype, NULL, "root"); if (ret < 0) - goto err_free_hash; + goto err_free_gdsqs; return sch; +err_free_gdsqs: + for_each_node_state(node, N_POSSIBLE) + kfree(sch->global_dsqs[node]); + kfree(sch->global_dsqs); err_free_hash: rhashtable_free_and_destroy(&sch->dsq_hash, NULL, NULL); err_free_ei: @@ -5335,7 +5366,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) struct scx_task_iter sti; struct task_struct *p; unsigned long timeout; - int i, cpu, node, ret; + int i, cpu, ret; if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN), cpu_possible_mask)) { @@ -5362,34 +5393,6 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) } } - if (!global_dsqs) { - struct scx_dispatch_q **dsqs; - - dsqs = kcalloc(nr_node_ids, sizeof(dsqs[0]), GFP_KERNEL); - if (!dsqs) { - ret = -ENOMEM; - goto err_unlock; - } - - for_each_node_state(node, N_POSSIBLE) { - struct scx_dispatch_q *dsq; - - dsq = kzalloc_node(sizeof(*dsq), GFP_KERNEL, node); - if (!dsq) { - for_each_node_state(node, N_POSSIBLE) - kfree(dsqs[node]); - kfree(dsqs); - ret = -ENOMEM; - goto err_unlock; - } - - init_dsq(dsq, SCX_DSQ_GLOBAL); - dsqs[node] = dsq; - } - - global_dsqs = dsqs; - } - if (scx_enable_state() != SCX_DISABLED) { ret = -EBUSY; goto err_unlock;