]> www.infradead.org Git - nvme.git/commitdiff
sched_ext: Introduce LLC awareness to the default idle selection policy
authorAndrea Righi <arighi@nvidia.com>
Tue, 22 Oct 2024 23:47:18 +0000 (01:47 +0200)
committerTejun Heo <tj@kernel.org>
Wed, 23 Oct 2024 19:25:26 +0000 (09:25 -1000)
Rely on the scheduler topology information to implement basic LLC
awareness in the sched_ext build-in idle selection policy.

This allows schedulers using the built-in policy to make more informed
decisions when selecting an idle CPU in systems with multiple LLCs, such
as NUMA systems or chiplet-based architectures, and it helps keep tasks
within the same LLC domain, thereby improving cache locality.

For efficiency, LLC awareness is applied only to tasks that can run on
all the CPUs in the system for now. If a task's affinity is modified
from user space, it's the responsibility of user space to choose the
appropriate optimized scheduling domain.

Signed-off-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
kernel/sched/ext.c

index 97231a65487f007e6e8aae31c53479af7ce3b800..d7ae816db6f222b978728dbad553334dea15341b 100644 (file)
@@ -3124,9 +3124,39 @@ found:
                goto retry;
 }
 
+#ifdef CONFIG_SCHED_MC
+/*
+ * Return the cpumask of CPUs usable by task @p in the same LLC domain of @cpu,
+ * or NULL if the LLC domain cannot be determined.
+ */
+static const struct cpumask *llc_domain(const struct task_struct *p, s32 cpu)
+{
+       struct sched_domain *sd = rcu_dereference(per_cpu(sd_llc, cpu));
+       const struct cpumask *llc_cpus = sd ? sched_domain_span(sd) : NULL;
+
+       /*
+        * Return the LLC domain only if the task is allowed to run on all
+        * CPUs.
+        */
+       return p->nr_cpus_allowed == nr_cpu_ids ? llc_cpus : NULL;
+}
+#else /* CONFIG_SCHED_MC */
+static inline const struct cpumask *llc_domain(struct task_struct *p, s32 cpu)
+{
+       return NULL;
+}
+#endif /* CONFIG_SCHED_MC */
+
+/*
+ * Built-in cpu idle selection policy.
+ *
+ * NOTE: tasks that can only run on 1 CPU are excluded by this logic, because
+ * we never call ops.select_cpu() for them, see select_task_rq().
+ */
 static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
                              u64 wake_flags, bool *found)
 {
+       const struct cpumask *llc_cpus = llc_domain(p, prev_cpu);
        s32 cpu;
 
        *found = false;
@@ -3178,22 +3208,52 @@ static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
         * partially idle @prev_cpu.
         */
        if (sched_smt_active()) {
+               /*
+                * Keep using @prev_cpu if it's part of a fully idle core.
+                */
                if (cpumask_test_cpu(prev_cpu, idle_masks.smt) &&
                    test_and_clear_cpu_idle(prev_cpu)) {
                        cpu = prev_cpu;
                        goto cpu_found;
                }
 
+               /*
+                * Search for any fully idle core in the same LLC domain.
+                */
+               if (llc_cpus) {
+                       cpu = scx_pick_idle_cpu(llc_cpus, SCX_PICK_IDLE_CORE);
+                       if (cpu >= 0)
+                               goto cpu_found;
+               }
+
+               /*
+                * Search for any full idle core usable by the task.
+                */
                cpu = scx_pick_idle_cpu(p->cpus_ptr, SCX_PICK_IDLE_CORE);
                if (cpu >= 0)
                        goto cpu_found;
        }
 
+       /*
+        * Use @prev_cpu if it's idle.
+        */
        if (test_and_clear_cpu_idle(prev_cpu)) {
                cpu = prev_cpu;
                goto cpu_found;
        }
 
+       /*
+        * Search for any idle CPU in the same LLC domain.
+        */
+       if (llc_cpus) {
+               cpu = scx_pick_idle_cpu(llc_cpus, 0);
+               if (cpu >= 0)
+                       goto cpu_found;
+       }
+
+       /*
+        * Search for any idle CPU usable by the task.
+        */
        cpu = scx_pick_idle_cpu(p->cpus_ptr, 0);
        if (cpu >= 0)
                goto cpu_found;