From: Tianchen Ding Date: Wed, 26 Jun 2024 02:35:05 +0000 (+0800) Subject: sched/fair: Make SCHED_IDLE entity be preempted in strict hierarchy X-Git-Tag: nvme-6.12-2024-10-18~97^2~59 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=faa42d29419def58d3c3e5b14ad4037f0af3b496;p=nvme.git sched/fair: Make SCHED_IDLE entity be preempted in strict hierarchy Consider the following cgroup: root | ------------------------ | | normal_cgroup idle_cgroup | | SCHED_IDLE task_A SCHED_NORMAL task_B According to the cgroup hierarchy, A should preempt B. But current check_preempt_wakeup_fair() treats cgroup se and task separately, so B will preempt A unexpectedly. Unify the wakeup logic by {c,p}se_is_idle only. This makes SCHED_IDLE of a task a relative policy that is effective only within its own cgroup, similar to the behavior of NICE. Also fix se_is_idle() definition when !CONFIG_FAIR_GROUP_SCHED. Fixes: 304000390f88 ("sched: Cgroup SCHED_IDLE support") Signed-off-by: Tianchen Ding Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Don Reviewed-by: Vincent Guittot Link: https://lkml.kernel.org/r/20240626023505.1332596-1-dtcccc@linux.alibaba.com --- diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 02694fc5fce9..99c80abdbaaa 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -511,7 +511,7 @@ static int cfs_rq_is_idle(struct cfs_rq *cfs_rq) static int se_is_idle(struct sched_entity *se) { - return 0; + return task_has_idle_policy(task_of(se)); } #endif /* CONFIG_FAIR_GROUP_SCHED */ @@ -8381,16 +8381,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int if (test_tsk_need_resched(curr)) return; - /* Idle tasks are by definition preempted by non-idle tasks. */ - if (unlikely(task_has_idle_policy(curr)) && - likely(!task_has_idle_policy(p))) - goto preempt; - - /* - * Batch and idle tasks do not preempt non-idle tasks (their preemption - * is driven by the tick): - */ - if (unlikely(p->policy != SCHED_NORMAL) || !sched_feat(WAKEUP_PREEMPTION)) + if (!sched_feat(WAKEUP_PREEMPTION)) return; find_matching_se(&se, &pse); @@ -8400,7 +8391,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int pse_is_idle = se_is_idle(pse); /* - * Preempt an idle group in favor of a non-idle group (and don't preempt + * Preempt an idle entity in favor of a non-idle entity (and don't preempt * in the inverse case). */ if (cse_is_idle && !pse_is_idle) @@ -8408,9 +8399,14 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int if (cse_is_idle != pse_is_idle) return; + /* + * BATCH and IDLE tasks do not preempt others. + */ + if (unlikely(p->policy != SCHED_NORMAL)) + return; + cfs_rq = cfs_rq_of(se); update_curr(cfs_rq); - /* * XXX pick_eevdf(cfs_rq) != se ? */