From 6d3f8fb4b2d47b7b6581a1a5a6ce35700f1416e6 Mon Sep 17 00:00:00 2001 From: Changwoo Min Date: Fri, 7 Feb 2025 15:40:51 +0900 Subject: [PATCH] sched_ext: Add an event, SCX_EV_ENQ_SLICE_DFL Add a core event, SCX_EV_ENQ_SLICE_DFL, which represents how many tasks have been enqueued (or pick_task-ed or select_cpu-ed) with a default time slice (SCX_SLICE_DFL). Scheduling a task with SCX_SLICE_DFL unintentionally would be a source of latency spikes because SCX_SLICE_DFL is relatively long (20 msec). Thus, soaring the SCX_EV_ENQ_SLICE_DFL value would be a sign of BPF scheduler bugs, causing latency spikes, especially when ops.select_cpu() is provided. __scx_add_event() is used since the caller holds an rq lock or p->pi_lock, so the preemption has already been disabled. Signed-off-by: Changwoo Min Acked-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 8a9a30895381a..5ef90d9bcdd2d 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1468,6 +1468,12 @@ struct scx_event_stats { */ u64 SCX_EV_ENQ_SKIP_EXITING; + /* + * The total number of tasks enqueued (or pick_task-ed) with a + * default time slice (SCX_SLICE_DFL). + */ + u64 SCX_EV_ENQ_SLICE_DFL; + /* * The total duration of bypass modes in nanoseconds. */ @@ -2134,6 +2140,7 @@ local: */ touch_core_sched(rq, p); p->scx.slice = SCX_SLICE_DFL; + __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1); local_norefill: dispatch_enqueue(&rq->scx.local_dsq, p, enq_flags); return; @@ -2141,6 +2148,7 @@ local_norefill: global: touch_core_sched(rq, p); /* see the comment in local: */ p->scx.slice = SCX_SLICE_DFL; + __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1); dispatch_enqueue(find_global_dsq(p), p, enq_flags); } @@ -3202,8 +3210,10 @@ static struct task_struct *pick_task_scx(struct rq *rq) */ if (keep_prev) { p = prev; - if (!p->scx.slice) + if (!p->scx.slice) { p->scx.slice = SCX_SLICE_DFL; + __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1); + } } else { p = first_local_task(rq); if (!p) { @@ -3219,6 +3229,7 @@ static struct task_struct *pick_task_scx(struct rq *rq) scx_warned_zero_slice = true; } p->scx.slice = SCX_SLICE_DFL; + __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1); } } @@ -3306,6 +3317,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag if (found) { p->scx.slice = SCX_SLICE_DFL; p->scx.ddsp_dsq_id = SCX_DSQ_LOCAL; + __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1); } if (rq_bypass) @@ -5023,6 +5035,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len) scx_dump_event(s, &events, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE); scx_dump_event(s, &events, SCX_EV_DISPATCH_KEEP_LAST); scx_dump_event(s, &events, SCX_EV_ENQ_SKIP_EXITING); + scx_dump_event(s, &events, SCX_EV_ENQ_SLICE_DFL); scx_dump_event(s, &events, SCX_EV_BYPASS_DURATION); scx_dump_event(s, &events, SCX_EV_BYPASS_DISPATCH); scx_dump_event(s, &events, SCX_EV_BYPASS_ACTIVATE); @@ -7163,6 +7176,7 @@ __bpf_kfunc void scx_bpf_events(struct scx_event_stats *events, scx_agg_event(&e_sys, e_cpu, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE); scx_agg_event(&e_sys, e_cpu, SCX_EV_DISPATCH_KEEP_LAST); scx_agg_event(&e_sys, e_cpu, SCX_EV_ENQ_SKIP_EXITING); + scx_agg_event(&e_sys, e_cpu, SCX_EV_ENQ_SLICE_DFL); scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_DURATION); scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_DISPATCH); scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_ACTIVATE); -- 2.50.1