From 29b49be6c97ef45e709c7d7676ecaa5dfdd0f1b6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 4 Apr 2025 10:10:30 -1000 Subject: [PATCH 01/16] sched_ext: Drop "ops" from SCX_OPS_TASK_ITER_BATCH The tag "ops" is used for two different purposes. First, to indicate that the entity is directly related to the operations such as flags carried in sched_ext_ops. Second, to indicate that the entity applies to something global such as enable or bypass states. The second usage is historical and causes confusion rather than clarifying anything. For example, scx_ops_enable_state enums are named SCX_OPS_* and thus conflict with scx_ops_flags. Let's drop the second usages. Drop "ops" from SCX_OPS_TASK_ITER_BATCH. Signed-off-by: Tejun Heo Suggested-and-acked-by: Andrea Righi --- kernel/sched/ext.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index d15b45b3eb71..6781e6da059b 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -26,7 +26,7 @@ enum scx_consts { * Iterating all tasks may take a while. Periodically drop * scx_tasks_lock to avoid causing e.g. CSD and RCU stalls. */ - SCX_OPS_TASK_ITER_BATCH = 32, + SCX_TASK_ITER_BATCH = 32, }; enum scx_exit_kind { @@ -1401,15 +1401,15 @@ static void scx_task_iter_stop(struct scx_task_iter *iter) * @iter: iterator to walk * * Visit the next task. See scx_task_iter_start() for details. Locks are dropped - * and re-acquired every %SCX_OPS_TASK_ITER_BATCH iterations to avoid causing - * stalls by holding scx_tasks_lock for too long. + * and re-acquired every %SCX_TASK_ITER_BATCH iterations to avoid causing stalls + * by holding scx_tasks_lock for too long. */ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter) { struct list_head *cursor = &iter->cursor.tasks_node; struct sched_ext_entity *pos; - if (!(++iter->cnt % SCX_OPS_TASK_ITER_BATCH)) { + if (!(++iter->cnt % SCX_TASK_ITER_BATCH)) { scx_task_iter_unlock(iter); cond_resched(); scx_task_iter_relock(iter); -- 2.51.0 From 29f512f555ec16446525c11aa7422ae09236ab32 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sat, 5 Apr 2025 15:39:21 +0200 Subject: [PATCH 02/16] sched_ext: idle: Extend topology optimizations to all tasks The built-in idle selection policy, scx_select_cpu_dfl(), always prioritizes picking idle CPUs within the same LLC or NUMA node, but these optimizations are currently applied only when a task has no CPU affinity constraints. This is done primarily for efficiency, as it avoids the overhead of updating a cpumask every time we need to select an idle CPU (which can be costly in large SMP systems). However, this approach limits the effectiveness of the built-in idle policy and results in inconsistent behavior, as affinity-restricted tasks don't benefit from topology-aware optimizations. To address this, modify the policy to apply LLC and NUMA-aware optimizations even when a task is constrained to a subset of CPUs. We can still avoid updating the cpumasks by checking if the subset of LLC and node CPUs are contained in the subset of allowed CPUs usable by the task (which is true in most of the cases - for tasks that don't have affinity constratints). Moreover, use temporary local per-CPU cpumasks to determine the LLC and node subsets, minimizing potential overhead even on large SMP systems. Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext_idle.c | 73 ++++++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 22 deletions(-) diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index cce2746f9f34..ed37fb8e4518 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -46,6 +46,12 @@ static struct scx_idle_cpus scx_idle_global_masks; */ static struct scx_idle_cpus **scx_idle_node_masks; +/* + * Local per-CPU cpumasks (used to generate temporary idle cpumasks). + */ +static DEFINE_PER_CPU(cpumask_var_t, local_llc_idle_cpumask); +static DEFINE_PER_CPU(cpumask_var_t, local_numa_idle_cpumask); + /* * Return the idle masks associated to a target @node. * @@ -391,6 +397,14 @@ void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops) static_branch_disable_cpuslocked(&scx_selcpu_topo_numa); } +/* + * Return true if @p can run on all possible CPUs, false otherwise. + */ +static inline bool task_affinity_all(const struct task_struct *p) +{ + return p->nr_cpus_allowed >= num_possible_cpus(); +} + /* * Built-in CPU idle selection policy: * @@ -426,8 +440,7 @@ void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops) */ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags) { - const struct cpumask *llc_cpus = NULL; - const struct cpumask *numa_cpus = NULL; + const struct cpumask *llc_cpus = NULL, *numa_cpus = NULL; int node = scx_cpu_node_if_enabled(prev_cpu); s32 cpu; @@ -437,22 +450,30 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 rcu_read_lock(); /* - * Determine the scheduling domain only if the task is allowed to run - * on all CPUs. + * Determine the subset of CPUs that the task can use in its + * current LLC and node. * - * This is done primarily for efficiency, as it avoids the overhead of - * updating a cpumask every time we need to select an idle CPU (which - * can be costly in large SMP systems), but it also aligns logically: - * if a task's scheduling domain is restricted by user-space (through - * CPU affinity), the task will simply use the flat scheduling domain - * defined by user-space. + * If the task can run on all CPUs, use the node and LLC cpumasks + * directly. */ - if (p->nr_cpus_allowed >= num_possible_cpus()) { - if (static_branch_maybe(CONFIG_NUMA, &scx_selcpu_topo_numa)) - numa_cpus = numa_span(prev_cpu); + if (static_branch_maybe(CONFIG_NUMA, &scx_selcpu_topo_numa)) { + struct cpumask *local_cpus = this_cpu_cpumask_var_ptr(local_numa_idle_cpumask); + const struct cpumask *cpus = numa_span(prev_cpu); + + if (task_affinity_all(p)) + numa_cpus = cpus; + else if (cpus && cpumask_and(local_cpus, p->cpus_ptr, cpus)) + numa_cpus = local_cpus; + } - if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc)) - llc_cpus = llc_span(prev_cpu); + if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc)) { + struct cpumask *local_cpus = this_cpu_cpumask_var_ptr(local_llc_idle_cpumask); + const struct cpumask *cpus = llc_span(prev_cpu); + + if (task_affinity_all(p)) + llc_cpus = cpus; + else if (cpus && cpumask_and(local_cpus, p->cpus_ptr, cpus)) + llc_cpus = local_cpus; } /* @@ -596,7 +617,7 @@ out_unlock: */ void scx_idle_init_masks(void) { - int node; + int i; /* Allocate global idle cpumasks */ BUG_ON(!alloc_cpumask_var(&scx_idle_global_masks.cpu, GFP_KERNEL)); @@ -607,13 +628,21 @@ void scx_idle_init_masks(void) sizeof(*scx_idle_node_masks), GFP_KERNEL); BUG_ON(!scx_idle_node_masks); - for_each_node(node) { - scx_idle_node_masks[node] = kzalloc_node(sizeof(**scx_idle_node_masks), - GFP_KERNEL, node); - BUG_ON(!scx_idle_node_masks[node]); + for_each_node(i) { + scx_idle_node_masks[i] = kzalloc_node(sizeof(**scx_idle_node_masks), + GFP_KERNEL, i); + BUG_ON(!scx_idle_node_masks[i]); + + BUG_ON(!alloc_cpumask_var_node(&scx_idle_node_masks[i]->cpu, GFP_KERNEL, i)); + BUG_ON(!alloc_cpumask_var_node(&scx_idle_node_masks[i]->smt, GFP_KERNEL, i)); + } - BUG_ON(!alloc_cpumask_var_node(&scx_idle_node_masks[node]->cpu, GFP_KERNEL, node)); - BUG_ON(!alloc_cpumask_var_node(&scx_idle_node_masks[node]->smt, GFP_KERNEL, node)); + /* Allocate local per-cpu idle cpumasks */ + for_each_possible_cpu(i) { + BUG_ON(!alloc_cpumask_var_node(&per_cpu(local_llc_idle_cpumask, i), + GFP_KERNEL, cpu_to_node(i))); + BUG_ON(!alloc_cpumask_var_node(&per_cpu(local_numa_idle_cpumask, i), + GFP_KERNEL, cpu_to_node(i))); } } -- 2.51.0 From 23c63a965275ce5d6268075bbfe7ce8b6ffe9a35 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sat, 5 Apr 2025 15:39:22 +0200 Subject: [PATCH 03/16] sched_ext: idle: Explicitly pass allowed cpumask to scx_select_cpu_dfl() Modify scx_select_cpu_dfl() to take the allowed cpumask as an explicit argument, instead of implicitly using @p->cpus_ptr. This prepares for future changes where arbitrary cpumasks may be passed to the built-in idle CPU selection policy. This is a pure refactoring with no functional changes. Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 2 +- kernel/sched/ext_idle.c | 20 +++++++++++--------- kernel/sched/ext_idle.h | 3 ++- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 6781e6da059b..ac3fd7a409e9 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3392,7 +3392,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag } else { s32 cpu; - cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, 0); + cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, NULL, 0); if (cpu >= 0) { p->scx.slice = SCX_SLICE_DFL; p->scx.ddsp_dsq_id = SCX_DSQ_LOCAL; diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index ed37fb8e4518..5d6253c6ed90 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -438,9 +438,11 @@ static inline bool task_affinity_all(const struct task_struct *p) * NOTE: tasks that can only run on 1 CPU are excluded by this logic, because * we never call ops.select_cpu() for them, see select_task_rq(). */ -s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags) +s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, + const struct cpumask *cpus_allowed, u64 flags) { const struct cpumask *llc_cpus = NULL, *numa_cpus = NULL; + const struct cpumask *allowed = cpus_allowed ?: p->cpus_ptr; int node = scx_cpu_node_if_enabled(prev_cpu); s32 cpu; @@ -460,9 +462,9 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 struct cpumask *local_cpus = this_cpu_cpumask_var_ptr(local_numa_idle_cpumask); const struct cpumask *cpus = numa_span(prev_cpu); - if (task_affinity_all(p)) + if (allowed == p->cpus_ptr && task_affinity_all(p)) numa_cpus = cpus; - else if (cpus && cpumask_and(local_cpus, p->cpus_ptr, cpus)) + else if (cpus && cpumask_and(local_cpus, allowed, cpus)) numa_cpus = local_cpus; } @@ -470,9 +472,9 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 struct cpumask *local_cpus = this_cpu_cpumask_var_ptr(local_llc_idle_cpumask); const struct cpumask *cpus = llc_span(prev_cpu); - if (task_affinity_all(p)) + if (allowed == p->cpus_ptr && task_affinity_all(p)) llc_cpus = cpus; - else if (cpus && cpumask_and(local_cpus, p->cpus_ptr, cpus)) + else if (cpus && cpumask_and(local_cpus, allowed, cpus)) llc_cpus = local_cpus; } @@ -511,7 +513,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 cpu_rq(cpu)->scx.local_dsq.nr == 0 && (!(flags & SCX_PICK_IDLE_IN_NODE) || (waker_node == node)) && !cpumask_empty(idle_cpumask(waker_node)->cpu)) { - if (cpumask_test_cpu(cpu, p->cpus_ptr)) + if (cpumask_test_cpu(cpu, allowed)) goto out_unlock; } } @@ -556,7 +558,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 * begin in prev_cpu's node and proceed to other nodes in * order of increasing distance. */ - cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags | SCX_PICK_IDLE_CORE); + cpu = scx_pick_idle_cpu(allowed, node, flags | SCX_PICK_IDLE_CORE); if (cpu >= 0) goto out_unlock; @@ -604,7 +606,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 * in prev_cpu's node and proceed to other nodes in order of * increasing distance. */ - cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags); + cpu = scx_pick_idle_cpu(allowed, node, flags); out_unlock: rcu_read_unlock(); @@ -858,7 +860,7 @@ __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, goto prev_cpu; #ifdef CONFIG_SMP - cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, 0); + cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, NULL, 0); if (cpu >= 0) { *is_idle = true; return cpu; diff --git a/kernel/sched/ext_idle.h b/kernel/sched/ext_idle.h index 511cc2221f7a..37be78a7502b 100644 --- a/kernel/sched/ext_idle.h +++ b/kernel/sched/ext_idle.h @@ -27,7 +27,8 @@ static inline s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node } #endif /* CONFIG_SMP */ -s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags); +s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, + const struct cpumask *cpus_allowed, u64 flags); void scx_idle_enable(struct sched_ext_ops *ops); void scx_idle_disable(void); int scx_idle_init(void); -- 2.51.0 From c2d8b2a57cd461f99eb8ebfaf08cded812d4396f Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sat, 5 Apr 2025 15:39:23 +0200 Subject: [PATCH 04/16] sched_ext: idle: Accept an arbitrary cpumask in scx_select_cpu_dfl() Many scx schedulers implement their own hard or soft-affinity rules to support topology characteristics, such as heterogeneous architectures (e.g., big.LITTLE, P-cores/E-cores), or to categorize tasks based on specific properties (e.g., running certain tasks only in a subset of CPUs). Currently, there is no mechanism that allows to use the built-in idle CPU selection policy to an arbitrary subset of CPUs. As a result, schedulers often implement their own idle CPU selection policies, which are typically similar to one another, leading to a lot of code duplication. To address this, modify scx_select_cpu_dfl() to accept an arbitrary cpumask, that can be used by the BPF schedulers to apply the existent built-in idle CPU selection policy to a subset of allowed CPUs. With this concept the idle CPU selection policy becomes the following: - always prioritize CPUs from fully idle SMT cores (if SMT is enabled), - select the same CPU if it's idle and in the allowed CPUs, - select an idle CPU within the same LLC, if the LLC cpumask is a subset of the allowed CPUs, - select an idle CPU within the same node, if the node cpumask is a subset of the allowed CPUs, - select an idle CPU within the allowed CPUs. This functionality will be exposed through a dedicated kfunc in a separate patch. Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext_idle.c | 48 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index 5d6253c6ed90..f39b34fbb8a6 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -49,6 +49,7 @@ static struct scx_idle_cpus **scx_idle_node_masks; /* * Local per-CPU cpumasks (used to generate temporary idle cpumasks). */ +static DEFINE_PER_CPU(cpumask_var_t, local_idle_cpumask); static DEFINE_PER_CPU(cpumask_var_t, local_llc_idle_cpumask); static DEFINE_PER_CPU(cpumask_var_t, local_numa_idle_cpumask); @@ -417,13 +418,15 @@ static inline bool task_affinity_all(const struct task_struct *p) * branch prediction optimizations. * * 3. Pick a CPU within the same LLC (Last-Level Cache): - * - if the above conditions aren't met, pick a CPU that shares the same LLC - * to maintain cache locality. + * - if the above conditions aren't met, pick a CPU that shares the same + * LLC, if the LLC domain is a subset of @cpus_allowed, to maintain + * cache locality. * * 4. Pick a CPU within the same NUMA node, if enabled: - * - choose a CPU from the same NUMA node to reduce memory access latency. + * - choose a CPU from the same NUMA node, if the node cpumask is a + * subset of @cpus_allowed, to reduce memory access latency. * - * 5. Pick any idle CPU usable by the task. + * 5. Pick any idle CPU within the @cpus_allowed domain. * * Step 3 and 4 are performed only if the system has, respectively, * multiple LLCs / multiple NUMA nodes (see scx_selcpu_topo_llc and @@ -446,6 +449,39 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, int node = scx_cpu_node_if_enabled(prev_cpu); s32 cpu; + preempt_disable(); + + /* + * Determine the subset of CPUs usable by @p within @cpus_allowed. + */ + if (allowed != p->cpus_ptr) { + struct cpumask *local_cpus = this_cpu_cpumask_var_ptr(local_idle_cpumask); + + if (task_affinity_all(p)) { + allowed = cpus_allowed; + } else if (cpumask_and(local_cpus, cpus_allowed, p->cpus_ptr)) { + allowed = local_cpus; + } else { + cpu = -EBUSY; + goto out_enable; + } + + /* + * If @prev_cpu is not in the allowed CPUs, skip topology + * optimizations and try to pick any idle CPU usable by the + * task. + * + * If %SCX_OPS_BUILTIN_IDLE_PER_NODE is enabled, prioritize + * the current node, as it may optimize some waker->wakee + * workloads. + */ + if (!cpumask_test_cpu(prev_cpu, allowed)) { + node = scx_cpu_node_if_enabled(smp_processor_id()); + cpu = scx_pick_idle_cpu(allowed, node, flags); + goto out_enable; + } + } + /* * This is necessary to protect llc_cpus. */ @@ -610,6 +646,8 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, out_unlock: rcu_read_unlock(); +out_enable: + preempt_enable(); return cpu; } @@ -641,6 +679,8 @@ void scx_idle_init_masks(void) /* Allocate local per-cpu idle cpumasks */ for_each_possible_cpu(i) { + BUG_ON(!alloc_cpumask_var_node(&per_cpu(local_idle_cpumask, i), + GFP_KERNEL, cpu_to_node(i))); BUG_ON(!alloc_cpumask_var_node(&per_cpu(local_llc_idle_cpumask, i), GFP_KERNEL, cpu_to_node(i))); BUG_ON(!alloc_cpumask_var_node(&per_cpu(local_numa_idle_cpumask, i), -- 2.51.0 From 683d2d0faba12a0e7d4c3b85a62ac8298977e17b Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sat, 5 Apr 2025 15:39:24 +0200 Subject: [PATCH 05/16] sched_ext: idle: Introduce scx_bpf_select_cpu_and() Provide a new kfunc, scx_bpf_select_cpu_and(), that can be used to apply the built-in idle CPU selection policy to a subset of allowed CPU. This new helper is basically an extension of scx_bpf_select_cpu_dfl(). However, when an idle CPU can't be found, it returns a negative value instead of @prev_cpu, aligning its behavior more closely with scx_bpf_pick_idle_cpu(). It also accepts %SCX_PICK_IDLE_* flags, which can be used to enforce strict selection to @prev_cpu's node (%SCX_PICK_IDLE_IN_NODE), or to request only a full-idle SMT core (%SCX_PICK_IDLE_CORE), while applying the built-in selection logic. With this helper, BPF schedulers can apply the built-in idle CPU selection policy restricted to any arbitrary subset of CPUs. Example usage ============= Possible usage in ops.select_cpu(): s32 BPF_STRUCT_OPS(foo_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) { const struct cpumask *cpus = task_allowed_cpus(p) ?: p->cpus_ptr; s32 cpu; cpu = scx_bpf_select_cpu_and(p, prev_cpu, wake_flags, cpus, 0); if (cpu >= 0) { scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); return cpu; } return prev_cpu; } Results ======= Load distribution on a 4 sockets, 4 cores per socket system, simulated using virtme-ng, running a modified version of scx_bpfland that uses scx_bpf_select_cpu_and() with 0xff00 as the allowed subset of CPUs: $ vng --cpu 16,sockets=4,cores=4,threads=1 ... $ stress-ng -c 16 ... $ htop ... 0[ 0.0%] 8[||||||||||||||||||||||||100.0%] 1[ 0.0%] 9[||||||||||||||||||||||||100.0%] 2[ 0.0%] 10[||||||||||||||||||||||||100.0%] 3[ 0.0%] 11[||||||||||||||||||||||||100.0%] 4[ 0.0%] 12[||||||||||||||||||||||||100.0%] 5[ 0.0%] 13[||||||||||||||||||||||||100.0%] 6[ 0.0%] 14[||||||||||||||||||||||||100.0%] 7[ 0.0%] 15[||||||||||||||||||||||||100.0%] With scx_bpf_select_cpu_dfl() tasks would be distributed evenly across all the available CPUs. Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 1 + kernel/sched/ext_idle.c | 55 ++++++++++++++++++++++++ tools/sched_ext/include/scx/common.bpf.h | 2 + 3 files changed, 58 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index ac3fd7a409e9..eae16108f8d6 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -465,6 +465,7 @@ struct sched_ext_ops { * idle CPU tracking and the following helpers become unavailable: * * - scx_bpf_select_cpu_dfl() + * - scx_bpf_select_cpu_and() * - scx_bpf_test_and_clear_cpu_idle() * - scx_bpf_pick_idle_cpu() * diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index f39b34fbb8a6..023ae6df5e8c 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -912,6 +912,60 @@ prev_cpu: return prev_cpu; } +/** + * scx_bpf_select_cpu_and - Pick an idle CPU usable by task @p, + * prioritizing those in @cpus_allowed + * @p: task_struct to select a CPU for + * @prev_cpu: CPU @p was on previously + * @wake_flags: %SCX_WAKE_* flags + * @cpus_allowed: cpumask of allowed CPUs + * @flags: %SCX_PICK_IDLE* flags + * + * Can only be called from ops.select_cpu() or ops.enqueue() if the + * built-in CPU selection is enabled: ops.update_idle() is missing or + * %SCX_OPS_KEEP_BUILTIN_IDLE is set. + * + * @p, @prev_cpu and @wake_flags match ops.select_cpu(). + * + * Returns the selected idle CPU, which will be automatically awakened upon + * returning from ops.select_cpu() and can be used for direct dispatch, or + * a negative value if no idle CPU is available. + */ +__bpf_kfunc s32 scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 wake_flags, + const struct cpumask *cpus_allowed, u64 flags) +{ + s32 cpu; + + if (!ops_cpu_valid(prev_cpu, NULL)) + return -EINVAL; + + if (!check_builtin_idle_enabled()) + return -EBUSY; + + if (!scx_kf_allowed(SCX_KF_SELECT_CPU | SCX_KF_ENQUEUE)) + return -EPERM; + +#ifdef CONFIG_SMP + /* + * This may also be called from ops.enqueue(), so we need to handle + * per-CPU tasks as well. For these tasks, we can skip all idle CPU + * selection optimizations and simply check whether the previously + * used CPU is idle and within the allowed cpumask. + */ + if (p->nr_cpus_allowed == 1) { + if (cpumask_test_cpu(prev_cpu, cpus_allowed) && + scx_idle_test_and_clear_cpu(prev_cpu)) + return prev_cpu; + return -EBUSY; + } + cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, cpus_allowed, flags); +#else + cpu = -EBUSY; +#endif + + return cpu; +} + /** * scx_bpf_get_idle_cpumask_node - Get a referenced kptr to the * idle-tracking per-CPU cpumask of a target NUMA node. @@ -1220,6 +1274,7 @@ static const struct btf_kfunc_id_set scx_kfunc_set_idle = { BTF_KFUNCS_START(scx_kfunc_ids_select_cpu) BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU) +BTF_ID_FLAGS(func, scx_bpf_select_cpu_and, KF_RCU) BTF_KFUNCS_END(scx_kfunc_ids_select_cpu) static const struct btf_kfunc_id_set scx_kfunc_set_select_cpu = { diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index 8787048c6762..d4e21558e982 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -48,6 +48,8 @@ static inline void ___vmlinux_h_sanity_check___(void) s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym; s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym; +s32 scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 wake_flags, + const struct cpumask *cpus_allowed, u64 flags) __ksym __weak; void scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak; void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak; u32 scx_bpf_dispatch_nr_slots(void) __ksym; -- 2.51.0 From 01d541baedd74530831c8c98b946622cb7487f30 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Sat, 5 Apr 2025 15:39:25 +0200 Subject: [PATCH 06/16] selftests/sched_ext: Add test for scx_bpf_select_cpu_and() Add a selftest to validate the behavior of the built-in idle CPU selection policy applied to a subset of allowed CPUs, using scx_bpf_select_cpu_and(). Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/Makefile | 1 + .../selftests/sched_ext/allowed_cpus.bpf.c | 121 ++++++++++++++++++ .../selftests/sched_ext/allowed_cpus.c | 57 +++++++++ 3 files changed, 179 insertions(+) create mode 100644 tools/testing/selftests/sched_ext/allowed_cpus.bpf.c create mode 100644 tools/testing/selftests/sched_ext/allowed_cpus.c diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile index f4531327b8e7..e9d5bc575f80 100644 --- a/tools/testing/selftests/sched_ext/Makefile +++ b/tools/testing/selftests/sched_ext/Makefile @@ -173,6 +173,7 @@ auto-test-targets := \ maybe_null \ minimal \ numa \ + allowed_cpus \ prog_run \ reload_loop \ select_cpu_dfl \ diff --git a/tools/testing/selftests/sched_ext/allowed_cpus.bpf.c b/tools/testing/selftests/sched_ext/allowed_cpus.bpf.c new file mode 100644 index 000000000000..39d57f7f7409 --- /dev/null +++ b/tools/testing/selftests/sched_ext/allowed_cpus.bpf.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A scheduler that validates the behavior of scx_bpf_select_cpu_and() by + * selecting idle CPUs strictly within a subset of allowed CPUs. + * + * Copyright (c) 2025 Andrea Righi + */ + +#include + +char _license[] SEC("license") = "GPL"; + +UEI_DEFINE(uei); + +private(PREF_CPUS) struct bpf_cpumask __kptr * allowed_cpumask; + +static void +validate_idle_cpu(const struct task_struct *p, const struct cpumask *allowed, s32 cpu) +{ + if (scx_bpf_test_and_clear_cpu_idle(cpu)) + scx_bpf_error("CPU %d should be marked as busy", cpu); + + if (bpf_cpumask_subset(allowed, p->cpus_ptr) && + !bpf_cpumask_test_cpu(cpu, allowed)) + scx_bpf_error("CPU %d not in the allowed domain for %d (%s)", + cpu, p->pid, p->comm); +} + +s32 BPF_STRUCT_OPS(allowed_cpus_select_cpu, + struct task_struct *p, s32 prev_cpu, u64 wake_flags) +{ + const struct cpumask *allowed; + s32 cpu; + + allowed = cast_mask(allowed_cpumask); + if (!allowed) { + scx_bpf_error("allowed domain not initialized"); + return -EINVAL; + } + + /* + * Select an idle CPU strictly within the allowed domain. + */ + cpu = scx_bpf_select_cpu_and(p, prev_cpu, wake_flags, allowed, 0); + if (cpu >= 0) { + validate_idle_cpu(p, allowed, cpu); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); + + return cpu; + } + + return prev_cpu; +} + +void BPF_STRUCT_OPS(allowed_cpus_enqueue, struct task_struct *p, u64 enq_flags) +{ + const struct cpumask *allowed; + s32 prev_cpu = scx_bpf_task_cpu(p), cpu; + + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); + + allowed = cast_mask(allowed_cpumask); + if (!allowed) { + scx_bpf_error("allowed domain not initialized"); + return; + } + + /* + * Use scx_bpf_select_cpu_and() to proactively kick an idle CPU + * within @allowed_cpumask, usable by @p. + */ + cpu = scx_bpf_select_cpu_and(p, prev_cpu, 0, allowed, 0); + if (cpu >= 0) { + validate_idle_cpu(p, allowed, cpu); + scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE); + } +} + +s32 BPF_STRUCT_OPS_SLEEPABLE(allowed_cpus_init) +{ + struct bpf_cpumask *mask; + + mask = bpf_cpumask_create(); + if (!mask) + return -ENOMEM; + + mask = bpf_kptr_xchg(&allowed_cpumask, mask); + if (mask) + bpf_cpumask_release(mask); + + bpf_rcu_read_lock(); + + /* + * Assign the first online CPU to the allowed domain. + */ + mask = allowed_cpumask; + if (mask) { + const struct cpumask *online = scx_bpf_get_online_cpumask(); + + bpf_cpumask_set_cpu(bpf_cpumask_first(online), mask); + scx_bpf_put_cpumask(online); + } + + bpf_rcu_read_unlock(); + + return 0; +} + +void BPF_STRUCT_OPS(allowed_cpus_exit, struct scx_exit_info *ei) +{ + UEI_RECORD(uei, ei); +} + +SEC(".struct_ops.link") +struct sched_ext_ops allowed_cpus_ops = { + .select_cpu = (void *)allowed_cpus_select_cpu, + .enqueue = (void *)allowed_cpus_enqueue, + .init = (void *)allowed_cpus_init, + .exit = (void *)allowed_cpus_exit, + .name = "allowed_cpus", +}; diff --git a/tools/testing/selftests/sched_ext/allowed_cpus.c b/tools/testing/selftests/sched_ext/allowed_cpus.c new file mode 100644 index 000000000000..a001a3a0e9f1 --- /dev/null +++ b/tools/testing/selftests/sched_ext/allowed_cpus.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2025 Andrea Righi + */ +#include +#include +#include +#include +#include "allowed_cpus.bpf.skel.h" +#include "scx_test.h" + +static enum scx_test_status setup(void **ctx) +{ + struct allowed_cpus *skel; + + skel = allowed_cpus__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(allowed_cpus__load(skel), "Failed to load skel"); + + *ctx = skel; + + return SCX_TEST_PASS; +} + +static enum scx_test_status run(void *ctx) +{ + struct allowed_cpus *skel = ctx; + struct bpf_link *link; + + link = bpf_map__attach_struct_ops(skel->maps.allowed_cpus_ops); + SCX_FAIL_IF(!link, "Failed to attach scheduler"); + + /* Just sleeping is fine, plenty of scheduling events happening */ + sleep(1); + + SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE)); + bpf_link__destroy(link); + + return SCX_TEST_PASS; +} + +static void cleanup(void *ctx) +{ + struct allowed_cpus *skel = ctx; + + allowed_cpus__destroy(skel); +} + +struct scx_test allowed_cpus = { + .name = "allowed_cpus", + .description = "Verify scx_bpf_select_cpu_and()", + .setup = setup, + .run = run, + .cleanup = cleanup, +}; +REGISTER_SCX_TEST(&allowed_cpus) -- 2.51.0 From 47068309b5777313b6ac84a77d8d10dc7312260a Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 8 Apr 2025 09:50:42 -0700 Subject: [PATCH 07/16] sched_ext: Use kvzalloc for large exit_dump allocation Replace kzalloc with kvzalloc for the exit_dump buffer allocation, which can require large contiguous memory depending on the implementation. This change prevents allocation failures by allowing the system to fall back to vmalloc when contiguous memory allocation fails. Since this buffer is only used for debugging purposes, physical memory contiguity is not required, making vmalloc a suitable alternative. Cc: stable@vger.kernel.org Fixes: 07814a9439a3b0 ("sched_ext: Print debug dump after an error exit") Suggested-by: Rik van Riel Signed-off-by: Breno Leitao Acked-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 66bcd40a28ca..db9af6a3c04f 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4623,7 +4623,7 @@ unlock: static void free_exit_info(struct scx_exit_info *ei) { - kfree(ei->dump); + kvfree(ei->dump); kfree(ei->msg); kfree(ei->bt); kfree(ei); @@ -4639,7 +4639,7 @@ static struct scx_exit_info *alloc_exit_info(size_t exit_dump_len) ei->bt = kcalloc(SCX_EXIT_BT_LEN, sizeof(ei->bt[0]), GFP_KERNEL); ei->msg = kzalloc(SCX_EXIT_MSG_LEN, GFP_KERNEL); - ei->dump = kzalloc(exit_dump_len, GFP_KERNEL); + ei->dump = kvzalloc(exit_dump_len, GFP_KERNEL); if (!ei->bt || !ei->msg || !ei->dump) { free_exit_info(ei); -- 2.51.0 From e776b26e3701945e0d20a11dc30ecd4da98e8d67 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Apr 2025 13:20:04 -1000 Subject: [PATCH 08/16] sched_ext: Remove cpu.weight / cpu.idle unimplemented warnings sched_ext generates warnings when cpu.weight / cpu.idle are set to non-default values if the BPF scheduler doesn't implement weight support. These warnings don't provide much value while adding constant annoyance. A BPF scheduler may not implement any particular behavior and there's nothing particularly special about missing cgroup weight support. Drop the warnings. Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 41 +---------------------------------------- 1 file changed, 1 insertion(+), 40 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index db9af6a3c04f..21eaf081d336 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3899,35 +3899,6 @@ bool scx_can_stop_tick(struct rq *rq) DEFINE_STATIC_PERCPU_RWSEM(scx_cgroup_rwsem); static bool scx_cgroup_enabled; -static bool cgroup_warned_missing_weight; -static bool cgroup_warned_missing_idle; - -static void scx_cgroup_warn_missing_weight(struct task_group *tg) -{ - if (scx_ops_enable_state() == SCX_OPS_DISABLED || - cgroup_warned_missing_weight) - return; - - if ((scx_ops.flags & SCX_OPS_HAS_CGROUP_WEIGHT) || !tg->css.parent) - return; - - pr_warn("sched_ext: \"%s\" does not implement cgroup cpu.weight\n", - scx_ops.name); - cgroup_warned_missing_weight = true; -} - -static void scx_cgroup_warn_missing_idle(struct task_group *tg) -{ - if (!scx_cgroup_enabled || cgroup_warned_missing_idle) - return; - - if (!tg->idle) - return; - - pr_warn("sched_ext: \"%s\" does not implement cgroup cpu.idle\n", - scx_ops.name); - cgroup_warned_missing_idle = true; -} int scx_tg_online(struct task_group *tg) { @@ -3937,8 +3908,6 @@ int scx_tg_online(struct task_group *tg) percpu_down_read(&scx_cgroup_rwsem); - scx_cgroup_warn_missing_weight(tg); - if (scx_cgroup_enabled) { if (SCX_HAS_OP(cgroup_init)) { struct scx_cgroup_init_args args = @@ -4076,9 +4045,7 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight) void scx_group_set_idle(struct task_group *tg, bool idle) { - percpu_down_read(&scx_cgroup_rwsem); - scx_cgroup_warn_missing_idle(tg); - percpu_up_read(&scx_cgroup_rwsem); + /* TODO: Implement ops->cgroup_set_idle() */ } static void scx_cgroup_lock(void) @@ -4272,9 +4239,6 @@ static int scx_cgroup_init(void) percpu_rwsem_assert_held(&scx_cgroup_rwsem); - cgroup_warned_missing_weight = false; - cgroup_warned_missing_idle = false; - /* * scx_tg_on/offline() are excluded through scx_cgroup_rwsem. If we walk * cgroups and init, all online cgroups are initialized. @@ -4284,9 +4248,6 @@ static int scx_cgroup_init(void) struct task_group *tg = css_tg(css); struct scx_cgroup_init_args args = { .weight = tg->scx_weight }; - scx_cgroup_warn_missing_weight(tg); - scx_cgroup_warn_missing_idle(tg); - if ((tg->scx_flags & (SCX_TG_ONLINE | SCX_TG_INITED)) != SCX_TG_ONLINE) continue; -- 2.51.0 From bc08b15b54b8aadbc8a8f413271c07a3f4bead87 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Apr 2025 08:31:12 -1000 Subject: [PATCH 09/16] sched_ext: Mark SCX_OPS_HAS_CGROUP_WEIGHT for deprecation SCX_OPS_HAS_CGROUP_WEIGHT was only used to suppress the missing cgroup weight support warnings. Now that the warnings are removed, the flag doesn't do anything. Mark it for deprecation and remove its usage from scx_flatcg. v2: Actually include the scx_flatcg update. Signed-off-by: Tejun Heo Suggested-and-reviewed-by: Andrea Righi --- kernel/sched/ext.c | 5 ++++- tools/sched_ext/scx_flatcg.bpf.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 21eaf081d336..fdbf249d1c68 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -163,7 +163,7 @@ enum scx_ops_flags { /* * CPU cgroup support flags */ - SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* cpu.weight */ + SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* DEPRECATED, will be removed on 6.18 */ SCX_OPS_ALL_FLAGS = SCX_OPS_KEEP_BUILTIN_IDLE | SCX_OPS_ENQ_LAST | @@ -5213,6 +5213,9 @@ static int validate_ops(const struct sched_ext_ops *ops) return -EINVAL; } + if (ops->flags & SCX_OPS_HAS_CGROUP_WEIGHT) + pr_warn("SCX_OPS_HAS_CGROUP_WEIGHT is deprecated and a noop\n"); + return 0; } diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c index 2c720e3ecad5..fdc7170639e6 100644 --- a/tools/sched_ext/scx_flatcg.bpf.c +++ b/tools/sched_ext/scx_flatcg.bpf.c @@ -950,5 +950,5 @@ SCX_OPS_DEFINE(flatcg_ops, .cgroup_move = (void *)fcg_cgroup_move, .init = (void *)fcg_init, .exit = (void *)fcg_exit, - .flags = SCX_OPS_HAS_CGROUP_WEIGHT | SCX_OPS_ENQ_EXITING, + .flags = SCX_OPS_ENQ_EXITING, .name = "flatcg"); -- 2.51.0 From d75ee2d6781f4c04e56a467d88f710a758ad8b81 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Apr 2025 09:05:59 -1000 Subject: [PATCH 10/16] sched_ext: Indentation updates Purely cosmetic. Signed-off-by: Tejun Heo Acked-by: Changwoo Min Acked-by: Andrea Righi --- kernel/sched/ext.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 3cc5d7b5e3a9..022343518f99 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -163,16 +163,16 @@ enum scx_ops_flags { /* * CPU cgroup support flags */ - SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* DEPRECATED, will be removed on 6.18 */ - - SCX_OPS_ALL_FLAGS = SCX_OPS_KEEP_BUILTIN_IDLE | - SCX_OPS_ENQ_LAST | - SCX_OPS_ENQ_EXITING | - SCX_OPS_ENQ_MIGRATION_DISABLED | - SCX_OPS_ALLOW_QUEUED_WAKEUP | - SCX_OPS_SWITCH_PARTIAL | - SCX_OPS_BUILTIN_IDLE_PER_NODE | - SCX_OPS_HAS_CGROUP_WEIGHT, + SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* DEPRECATED, will be removed on 6.18 */ + + SCX_OPS_ALL_FLAGS = SCX_OPS_KEEP_BUILTIN_IDLE | + SCX_OPS_ENQ_LAST | + SCX_OPS_ENQ_EXITING | + SCX_OPS_ENQ_MIGRATION_DISABLED | + SCX_OPS_ALLOW_QUEUED_WAKEUP | + SCX_OPS_SWITCH_PARTIAL | + SCX_OPS_BUILTIN_IDLE_PER_NODE | + SCX_OPS_HAS_CGROUP_WEIGHT, }; /* argument container for ops.init_task() */ -- 2.51.0 From cc39454c341e02b929e565f8a37cfe9b18c7a0a9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Apr 2025 09:05:59 -1000 Subject: [PATCH 11/16] sched_ext: Remove scx_ops_enq_* static_keys scx_ops_enq_last/exiting/migration_disabled are used to encode the corresponding SCX_OPS_ flags into static_keys. These flags aren't hot enough for static_key usage to make any meaningful difference and are made static_keys mostly because there was no reason not to. However, global static_keys can't work with the planned hierarchical multiple scheduler support. Remove the static_keys and test the ops flags directly. In repeated hackbench runs before and after static_keys removal on an AMD Ryzen 3900X, I couldn't tell any measurable performance difference. Signed-off-by: Tejun Heo Acked-by: Changwoo Min Acked-by: Andrea Righi --- kernel/sched/ext.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 022343518f99..1e685e77b5e4 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -924,9 +924,6 @@ static struct sched_ext_ops scx_ops; static bool scx_warned_zero_slice; DEFINE_STATIC_KEY_FALSE(scx_ops_allow_queued_wakeup); -static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_last); -static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_exiting); -static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_migration_disabled); static DEFINE_STATIC_KEY_FALSE(scx_ops_cpu_preempt); static struct static_key_false scx_has_op[SCX_OPI_END] = @@ -2144,14 +2141,14 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags, goto direct; /* see %SCX_OPS_ENQ_EXITING */ - if (!static_branch_unlikely(&scx_ops_enq_exiting) && + if (!(scx_ops.flags & SCX_OPS_ENQ_EXITING) && unlikely(p->flags & PF_EXITING)) { __scx_add_event(SCX_EV_ENQ_SKIP_EXITING, 1); goto local; } /* see %SCX_OPS_ENQ_MIGRATION_DISABLED */ - if (!static_branch_unlikely(&scx_ops_enq_migration_disabled) && + if (!(scx_ops.flags & SCX_OPS_ENQ_MIGRATION_DISABLED) && is_migration_disabled(p)) { __scx_add_event(SCX_EV_ENQ_SKIP_MIGRATION_DISABLED, 1); goto local; @@ -3022,8 +3019,8 @@ no_tasks: * Didn't find another task to run. Keep running @prev unless * %SCX_OPS_ENQ_LAST is in effect. */ - if (prev_on_rq && (!static_branch_unlikely(&scx_ops_enq_last) || - scx_rq_bypassing(rq))) { + if (prev_on_rq && + (!(scx_ops.flags & SCX_OPS_ENQ_LAST) || scx_rq_bypassing(rq))) { rq->scx.flags |= SCX_RQ_BAL_KEEP; __scx_add_event(SCX_EV_DISPATCH_KEEP_LAST, 1); goto has_tasks; @@ -3228,7 +3225,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p, * which should trigger an explicit follow-up scheduling event. */ if (sched_class_above(&ext_sched_class, next->sched_class)) { - WARN_ON_ONCE(!static_branch_unlikely(&scx_ops_enq_last)); + WARN_ON_ONCE(!(scx_ops.flags & SCX_OPS_ENQ_LAST)); do_enqueue_task(rq, p, SCX_ENQ_LAST, -1); } else { do_enqueue_task(rq, p, 0, -1); @@ -4728,9 +4725,6 @@ static void scx_disable_workfn(struct kthread_work *work) for (i = SCX_OPI_BEGIN; i < SCX_OPI_END; i++) static_branch_disable(&scx_has_op[i]); static_branch_disable(&scx_ops_allow_queued_wakeup); - static_branch_disable(&scx_ops_enq_last); - static_branch_disable(&scx_ops_enq_exiting); - static_branch_disable(&scx_ops_enq_migration_disabled); static_branch_disable(&scx_ops_cpu_preempt); scx_idle_disable(); synchronize_rcu(); @@ -5372,12 +5366,6 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) if (ops->flags & SCX_OPS_ALLOW_QUEUED_WAKEUP) static_branch_enable(&scx_ops_allow_queued_wakeup); - if (ops->flags & SCX_OPS_ENQ_LAST) - static_branch_enable(&scx_ops_enq_last); - if (ops->flags & SCX_OPS_ENQ_EXITING) - static_branch_enable(&scx_ops_enq_exiting); - if (ops->flags & SCX_OPS_ENQ_MIGRATION_DISABLED) - static_branch_enable(&scx_ops_enq_migration_disabled); if (scx_ops.cpu_acquire || scx_ops.cpu_release) static_branch_enable(&scx_ops_cpu_preempt); -- 2.51.0 From 54d2e717bc5f419b111915adfdec7ecc1ca8cf90 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Apr 2025 09:06:00 -1000 Subject: [PATCH 12/16] sched_ext: Remove scx_ops_cpu_preempt static_key scx_ops_cpu_preempt is used to encode whether ops.cpu_acquire/release() are implemented into a static_key. These tests aren't hot enough for static_key usage to make any meaningful difference and are made to use a static_key mostly because there was no reason not to. However, global static_keys can't work with the planned hierarchical multiple scheduler support. Remove the static_key and instead use an internal ops flag SCX_OPS_HAS_CPU_PREEMPT to record and test whether ops.cpu_acquire/release() are implemented. In repeated hackbench runs before and after static_keys removal on an AMD Ryzen 3900X, I couldn't tell any measurable performance difference. Signed-off-by: Tejun Heo Acked-by: Changwoo Min Acked-by: Andrea Righi --- kernel/sched/ext.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 1e685e77b5e4..1adf5c299cce 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -173,6 +173,11 @@ enum scx_ops_flags { SCX_OPS_SWITCH_PARTIAL | SCX_OPS_BUILTIN_IDLE_PER_NODE | SCX_OPS_HAS_CGROUP_WEIGHT, + + /* high 8 bits are internal, don't include in SCX_OPS_ALL_FLAGS */ + __SCX_OPS_INTERNAL_MASK = 0xffLLU << 56, + + SCX_OPS_HAS_CPU_PREEMPT = 1LLU << 56, }; /* argument container for ops.init_task() */ @@ -924,7 +929,6 @@ static struct sched_ext_ops scx_ops; static bool scx_warned_zero_slice; DEFINE_STATIC_KEY_FALSE(scx_ops_allow_queued_wakeup); -static DEFINE_STATIC_KEY_FALSE(scx_ops_cpu_preempt); static struct static_key_false scx_has_op[SCX_OPI_END] = { [0 ... SCX_OPI_END-1] = STATIC_KEY_FALSE_INIT }; @@ -2931,7 +2935,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev) rq->scx.flags |= SCX_RQ_IN_BALANCE; rq->scx.flags &= ~(SCX_RQ_BAL_PENDING | SCX_RQ_BAL_KEEP); - if (static_branch_unlikely(&scx_ops_cpu_preempt) && + if ((scx_ops.flags & SCX_OPS_HAS_CPU_PREEMPT) && unlikely(rq->scx.cpu_released)) { /* * If the previous sched_class for the current CPU was not SCX, @@ -3160,7 +3164,7 @@ static void switch_class(struct rq *rq, struct task_struct *next) */ smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1); #endif - if (!static_branch_unlikely(&scx_ops_cpu_preempt)) + if (!(scx_ops.flags & SCX_OPS_HAS_CPU_PREEMPT)) return; /* @@ -4725,7 +4729,6 @@ static void scx_disable_workfn(struct kthread_work *work) for (i = SCX_OPI_BEGIN; i < SCX_OPI_END; i++) static_branch_disable(&scx_has_op[i]); static_branch_disable(&scx_ops_allow_queued_wakeup); - static_branch_disable(&scx_ops_cpu_preempt); scx_idle_disable(); synchronize_rcu(); @@ -5367,7 +5370,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) if (ops->flags & SCX_OPS_ALLOW_QUEUED_WAKEUP) static_branch_enable(&scx_ops_allow_queued_wakeup); if (scx_ops.cpu_acquire || scx_ops.cpu_release) - static_branch_enable(&scx_ops_cpu_preempt); + scx_ops.flags |= SCX_OPS_HAS_CPU_PREEMPT; /* * Lock out forks, cgroup on/offlining and moves before opening the -- 2.51.0 From 743354e3bb7238552cda70a5f39ba6e946a899fd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Apr 2025 09:06:00 -1000 Subject: [PATCH 13/16] sched_ext: Remove scx_ops_allow_queued_wakeup static_key scx_ops_allow_queued_wakeup is used to encode SCX_OPS_ALLOW_QUEUED_WAKEUP into a static_key. The test is gated behind scx_enabled(), and, even when sched_ext is enabled, is unlikely for the static_key usage to make any meaningful difference. It is made to use a static_key mostly because there was no reason not to. However, global static_keys can't work with the planned hierarchical multiple scheduler support. Remove the static_key and instead test SCX_OPS_ALLOW_QUEUED_WAKEUP directly. In repeated hackbench runs before and after static_keys removal on an AMD Ryzen 3900X, I couldn't tell any measurable performance difference. Signed-off-by: Tejun Heo Acked-by: Changwoo Min Acked-by: Andrea Righi --- kernel/sched/ext.c | 12 +++++++----- kernel/sched/ext.h | 8 +------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 1adf5c299cce..f0ed0cec4c98 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -928,8 +928,6 @@ DEFINE_STATIC_KEY_FALSE(__scx_switched_all); static struct sched_ext_ops scx_ops; static bool scx_warned_zero_slice; -DEFINE_STATIC_KEY_FALSE(scx_ops_allow_queued_wakeup); - static struct static_key_false scx_has_op[SCX_OPI_END] = { [0 ... SCX_OPI_END-1] = STATIC_KEY_FALSE_INIT }; @@ -4414,6 +4412,13 @@ bool task_should_scx(int policy) return policy == SCHED_EXT; } +bool scx_allow_ttwu_queue(const struct task_struct *p) +{ + return !scx_enabled() || + (scx_ops.flags & SCX_OPS_ALLOW_QUEUED_WAKEUP) || + p->sched_class != &ext_sched_class; +} + /** * scx_softlockup - sched_ext softlockup handler * @dur_s: number of seconds of CPU stuck due to soft lockup @@ -4728,7 +4733,6 @@ static void scx_disable_workfn(struct kthread_work *work) static_branch_disable(&__scx_enabled); for (i = SCX_OPI_BEGIN; i < SCX_OPI_END; i++) static_branch_disable(&scx_has_op[i]); - static_branch_disable(&scx_ops_allow_queued_wakeup); scx_idle_disable(); synchronize_rcu(); @@ -5367,8 +5371,6 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) if (((void (**)(void))ops)[i]) static_branch_enable(&scx_has_op[i]); - if (ops->flags & SCX_OPS_ALLOW_QUEUED_WAKEUP) - static_branch_enable(&scx_ops_allow_queued_wakeup); if (scx_ops.cpu_acquire || scx_ops.cpu_release) scx_ops.flags |= SCX_OPS_HAS_CPU_PREEMPT; diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index 1bda96b19a1b..3053cdd61eb9 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -21,6 +21,7 @@ void scx_rq_activate(struct rq *rq); void scx_rq_deactivate(struct rq *rq); int scx_check_setscheduler(struct task_struct *p, int policy); bool task_should_scx(int policy); +bool scx_allow_ttwu_queue(const struct task_struct *p); void init_sched_ext_class(void); static inline u32 scx_cpuperf_target(s32 cpu) @@ -36,13 +37,6 @@ static inline bool task_on_scx(const struct task_struct *p) return scx_enabled() && p->sched_class == &ext_sched_class; } -static inline bool scx_allow_ttwu_queue(const struct task_struct *p) -{ - return !scx_enabled() || - static_branch_likely(&scx_ops_allow_queued_wakeup) || - p->sched_class != &ext_sched_class; -} - #ifdef CONFIG_SCHED_CORE bool scx_prio_less(const struct task_struct *a, const struct task_struct *b, bool in_fi); -- 2.51.0 From 0b304617936094b1e55c0b3e5903ce7d4f1bc32b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Apr 2025 09:06:00 -1000 Subject: [PATCH 14/16] sched_ext: Make scx_has_op a bitmap scx_has_op is used to encode which ops are implemented by the BPF scheduler into an array of static_keys. While this saves a bit of branching overhead, that is unlikely to be noticeable compared to the overall cost. As the global static_keys can't work with the planned hierarchical multiple scheduler support, replace the static_key array with a bitmap. In repeated hackbench runs before and after static_keys removal on an AMD Ryzen 3900X, I couldn't tell any measurable performance difference. Signed-off-by: Tejun Heo Acked-by: Changwoo Min Acked-by: Andrea Righi --- kernel/sched/ext.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index f0ed0cec4c98..8ae85ec6d9a2 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -928,8 +928,7 @@ DEFINE_STATIC_KEY_FALSE(__scx_switched_all); static struct sched_ext_ops scx_ops; static bool scx_warned_zero_slice; -static struct static_key_false scx_has_op[SCX_OPI_END] = - { [0 ... SCX_OPI_END-1] = STATIC_KEY_FALSE_INIT }; +static DECLARE_BITMAP(scx_has_op, SCX_OPI_END); static atomic_t scx_exit_kind = ATOMIC_INIT(SCX_EXIT_DONE); static struct scx_exit_info *scx_exit_info; @@ -1055,7 +1054,7 @@ static __printf(3, 4) void __scx_exit(enum scx_exit_kind kind, s64 exit_code, #define scx_error(fmt, args...) \ __scx_error(SCX_EXIT_ERROR, fmt, ##args) -#define SCX_HAS_OP(op) static_branch_likely(&scx_has_op[SCX_OP_IDX(op)]) +#define SCX_HAS_OP(op) test_bit(SCX_OP_IDX(op), scx_has_op) static long jiffies_delta_msecs(unsigned long at, unsigned long now) { @@ -1774,7 +1773,7 @@ static void touch_core_sched_dispatch(struct rq *rq, struct task_struct *p) lockdep_assert_rq_held(rq); #ifdef CONFIG_SCHED_CORE - if (SCX_HAS_OP(core_sched_before)) + if (unlikely(SCX_HAS_OP(core_sched_before))) touch_core_sched(rq, p); #endif } @@ -2156,7 +2155,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags, goto local; } - if (!SCX_HAS_OP(enqueue)) + if (unlikely(!SCX_HAS_OP(enqueue))) goto global; /* DSQ bypass didn't trigger, enqueue on the BPF scheduler */ @@ -2972,7 +2971,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev) if (consume_global_dsq(rq)) goto has_tasks; - if (!SCX_HAS_OP(dispatch) || scx_rq_bypassing(rq) || !scx_rq_online(rq)) + if (unlikely(!SCX_HAS_OP(dispatch)) || scx_rq_bypassing(rq) || !scx_rq_online(rq)) goto no_tasks; dspc->rq = rq; @@ -3373,7 +3372,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag return prev_cpu; rq_bypass = scx_rq_bypassing(task_rq(p)); - if (SCX_HAS_OP(select_cpu) && !rq_bypass) { + if (likely(SCX_HAS_OP(select_cpu)) && !rq_bypass) { s32 cpu; struct task_struct **ddsp_taskp; @@ -4638,7 +4637,7 @@ static void scx_disable_workfn(struct kthread_work *work) struct task_struct *p; struct rhashtable_iter rht_iter; struct scx_dispatch_q *dsq; - int i, kind, cpu; + int kind, cpu; kind = atomic_read(&scx_exit_kind); while (true) { @@ -4731,8 +4730,7 @@ static void scx_disable_workfn(struct kthread_work *work) /* no task is on scx, turn off all the switches and flush in-progress calls */ static_branch_disable(&__scx_enabled); - for (i = SCX_OPI_BEGIN; i < SCX_OPI_END; i++) - static_branch_disable(&scx_has_op[i]); + bitmap_zero(scx_has_op, SCX_OPI_END); scx_idle_disable(); synchronize_rcu(); @@ -5328,7 +5326,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) for (i = SCX_OPI_CPU_HOTPLUG_BEGIN; i < SCX_OPI_CPU_HOTPLUG_END; i++) if (((void (**)(void))ops)[i]) - static_branch_enable_cpuslocked(&scx_has_op[i]); + set_bit(i, scx_has_op); check_hotplug_seq(ops); scx_idle_update_selcpu_topology(ops); @@ -5369,7 +5367,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) for (i = SCX_OPI_NORMAL_BEGIN; i < SCX_OPI_NORMAL_END; i++) if (((void (**)(void))ops)[i]) - static_branch_enable(&scx_has_op[i]); + set_bit(i, scx_has_op); if (scx_ops.cpu_acquire || scx_ops.cpu_release) scx_ops.flags |= SCX_OPS_HAS_CPU_PREEMPT; -- 2.51.0 From 6d65f682a9f20ed26c822812129f191455002f1c Mon Sep 17 00:00:00 2001 From: yangsonghua Date: Mon, 14 Apr 2025 16:14:36 +0800 Subject: [PATCH 15/16] sched_ext: Improve cross-compilation support in Makefile Modify the tools/sched_ext/Makefile to better handle cross-compilation environments by: 1. Fix host tools build directory structure by separating obj/ from output (HOST_BUILD_DIR now points to $(OBJ_DIR)/host/obj) 2. Properly propagate CROSS_COMPILE to libbpf sub-make invocation 3. Add missing $(HOST_BPFOBJ) build rule with proper host toolchain flags (ARCH=, CROSS_COMPILE=, explicit HOSTCC/HOSTLD) 4. Consistently quote $(HOSTCC) in bpftool build rule 5. Change LDFLAGS assignment to += to allow external extensions The changes ensure proper cross-compilation behavior while maintaining backward compatibility with native builds. Host tools are now correctly built with the host toolchain while target binaries use the cross-toolchain. Signed-off-by: yangsonghua Signed-off-by: Tejun Heo --- tools/sched_ext/Makefile | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tools/sched_ext/Makefile b/tools/sched_ext/Makefile index ca3815e572d8..d68780e2e03d 100644 --- a/tools/sched_ext/Makefile +++ b/tools/sched_ext/Makefile @@ -61,8 +61,8 @@ SCXOBJ_DIR := $(OBJ_DIR)/sched_ext BINDIR := $(OUTPUT_DIR)/bin BPFOBJ := $(BPFOBJ_DIR)/libbpf.a ifneq ($(CROSS_COMPILE),) -HOST_BUILD_DIR := $(OBJ_DIR)/host -HOST_OUTPUT_DIR := host-tools +HOST_BUILD_DIR := $(OBJ_DIR)/host/obj +HOST_OUTPUT_DIR := $(OBJ_DIR)/host HOST_INCLUDE_DIR := $(HOST_OUTPUT_DIR)/include else HOST_BUILD_DIR := $(OBJ_DIR) @@ -98,7 +98,7 @@ ifneq ($(LLVM),) CFLAGS += -Wno-unused-command-line-argument endif -LDFLAGS = -lelf -lz -lpthread +LDFLAGS += -lelf -lz -lpthread IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - Date: Fri, 18 Apr 2025 11:26:02 +0800 Subject: [PATCH 16/16] sched_ext: change the variable name for slice refill event SCX_EV_ENQ_SLICE_DFL gives the impression that the event only occurs when the tasks were enqueued, which seems not accurate. What it actually means is the refilling with defalt slice, and this can occur either when enqueue or pick_task. Let's change the variable to SCX_EV_REFILL_SLICE_DFL. Signed-off-by: Honglei Wang Acked-by: Changwoo Min Acked-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 22 +++++++++++----------- tools/sched_ext/scx_qmap.bpf.c | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 8ae85ec6d9a2..4e96f437d388 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1515,10 +1515,10 @@ struct scx_event_stats { s64 SCX_EV_ENQ_SKIP_MIGRATION_DISABLED; /* - * The total number of tasks enqueued (or pick_task-ed) with a - * default time slice (SCX_SLICE_DFL). + * Total number of times a task's time slice was refilled with the + * default value (SCX_SLICE_DFL). */ - s64 SCX_EV_ENQ_SLICE_DFL; + s64 SCX_EV_REFILL_SLICE_DFL; /* * The total duration of bypass modes in nanoseconds. @@ -2193,7 +2193,7 @@ local: */ touch_core_sched(rq, p); p->scx.slice = SCX_SLICE_DFL; - __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1); + __scx_add_event(SCX_EV_REFILL_SLICE_DFL, 1); local_norefill: dispatch_enqueue(&rq->scx.local_dsq, p, enq_flags); return; @@ -2201,7 +2201,7 @@ local_norefill: global: touch_core_sched(rq, p); /* see the comment in local: */ p->scx.slice = SCX_SLICE_DFL; - __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1); + __scx_add_event(SCX_EV_REFILL_SLICE_DFL, 1); dispatch_enqueue(find_global_dsq(p), p, enq_flags); } @@ -3292,7 +3292,7 @@ static struct task_struct *pick_task_scx(struct rq *rq) p = prev; if (!p->scx.slice) { p->scx.slice = SCX_SLICE_DFL; - __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1); + __scx_add_event(SCX_EV_REFILL_SLICE_DFL, 1); } } else { p = first_local_task(rq); @@ -3309,7 +3309,7 @@ static struct task_struct *pick_task_scx(struct rq *rq) scx_warned_zero_slice = true; } p->scx.slice = SCX_SLICE_DFL; - __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1); + __scx_add_event(SCX_EV_REFILL_SLICE_DFL, 1); } } @@ -3395,7 +3395,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag if (cpu >= 0) { p->scx.slice = SCX_SLICE_DFL; p->scx.ddsp_dsq_id = SCX_DSQ_LOCAL; - __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1); + __scx_add_event(SCX_EV_REFILL_SLICE_DFL, 1); } else { cpu = prev_cpu; } @@ -4368,7 +4368,7 @@ static ssize_t scx_attr_events_show(struct kobject *kobj, at += scx_attr_event_show(buf, at, &events, SCX_EV_DISPATCH_KEEP_LAST); at += scx_attr_event_show(buf, at, &events, SCX_EV_ENQ_SKIP_EXITING); at += scx_attr_event_show(buf, at, &events, SCX_EV_ENQ_SKIP_MIGRATION_DISABLED); - at += scx_attr_event_show(buf, at, &events, SCX_EV_ENQ_SLICE_DFL); + at += scx_attr_event_show(buf, at, &events, SCX_EV_REFILL_SLICE_DFL); at += scx_attr_event_show(buf, at, &events, SCX_EV_BYPASS_DURATION); at += scx_attr_event_show(buf, at, &events, SCX_EV_BYPASS_DISPATCH); at += scx_attr_event_show(buf, at, &events, SCX_EV_BYPASS_ACTIVATE); @@ -5099,7 +5099,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len) scx_dump_event(s, &events, SCX_EV_DISPATCH_KEEP_LAST); scx_dump_event(s, &events, SCX_EV_ENQ_SKIP_EXITING); scx_dump_event(s, &events, SCX_EV_ENQ_SKIP_MIGRATION_DISABLED); - scx_dump_event(s, &events, SCX_EV_ENQ_SLICE_DFL); + scx_dump_event(s, &events, SCX_EV_REFILL_SLICE_DFL); scx_dump_event(s, &events, SCX_EV_BYPASS_DURATION); scx_dump_event(s, &events, SCX_EV_BYPASS_DISPATCH); scx_dump_event(s, &events, SCX_EV_BYPASS_ACTIVATE); @@ -7252,7 +7252,7 @@ __bpf_kfunc void scx_bpf_events(struct scx_event_stats *events, scx_agg_event(&e_sys, e_cpu, SCX_EV_DISPATCH_KEEP_LAST); scx_agg_event(&e_sys, e_cpu, SCX_EV_ENQ_SKIP_EXITING); scx_agg_event(&e_sys, e_cpu, SCX_EV_ENQ_SKIP_MIGRATION_DISABLED); - scx_agg_event(&e_sys, e_cpu, SCX_EV_ENQ_SLICE_DFL); + scx_agg_event(&e_sys, e_cpu, SCX_EV_REFILL_SLICE_DFL); scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_DURATION); scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_DISPATCH); scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_ACTIVATE); diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index 26c40ca4f36c..c3cd9a17d48e 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -784,8 +784,8 @@ static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer) scx_read_event(&events, SCX_EV_DISPATCH_KEEP_LAST)); bpf_printk("%35s: %lld", "SCX_EV_ENQ_SKIP_EXITING", scx_read_event(&events, SCX_EV_ENQ_SKIP_EXITING)); - bpf_printk("%35s: %lld", "SCX_EV_ENQ_SLICE_DFL", - scx_read_event(&events, SCX_EV_ENQ_SLICE_DFL)); + bpf_printk("%35s: %lld", "SCX_EV_REFILL_SLICE_DFL", + scx_read_event(&events, SCX_EV_REFILL_SLICE_DFL)); bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DURATION", scx_read_event(&events, SCX_EV_BYPASS_DURATION)); bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DISPATCH", -- 2.51.0