]> www.infradead.org Git - nvme.git/commitdiff
Merge tag 'sched-core-2024-09-19' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 19 Sep 2024 13:55:58 +0000 (15:55 +0200)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 19 Sep 2024 13:55:58 +0000 (15:55 +0200)
Pull scheduler updates from Ingo Molnar:

 - Implement the SCHED_DEADLINE server infrastructure - Daniel Bristot
   de Oliveira's last major contribution to the kernel:

     "SCHED_DEADLINE servers can help fixing starvation issues of low
      priority tasks (e.g., SCHED_OTHER) when higher priority tasks
      monopolize CPU cycles. Today we have RT Throttling; DEADLINE
      servers should be able to replace and improve that."

   (Daniel Bristot de Oliveira, Peter Zijlstra, Joel Fernandes, Youssef
   Esmat, Huang Shijie)

 - Preparatory changes for sched_ext integration:
     - Use set_next_task(.first) where required
     - Fix up set_next_task() implementations
     - Clean up DL server vs. core sched
     - Split up put_prev_task_balance()
     - Rework pick_next_task()
     - Combine the last put_prev_task() and the first set_next_task()
     - Rework dl_server
     - Add put_prev_task(.next)

   (Peter Zijlstra, with a fix by Tejun Heo)

 - Complete the EEVDF transition and refine EEVDF scheduling:
     - Implement delayed dequeue
     - Allow shorter slices to wakeup-preempt
     - Use sched_attr::sched_runtime to set request/slice suggestion
     - Document the new feature flags
     - Remove unused and duplicate-functionality fields
     - Simplify & unify pick_next_task_fair()
     - Misc debuggability enhancements

   (Peter Zijlstra, with fixes/cleanups by Dietmar Eggemann, Valentin
   Schneider and Chuyi Zhou)

 - Initialize the vruntime of a new task when it is first enqueued,
   resulting in significant decrease in latency of newly woken tasks
   (Zhang Qiao)

 - Introduce SM_IDLE and an idle re-entry fast-path in __schedule()
   (K Prateek Nayak, Peter Zijlstra)

 - Clean up and clarify the usage of Clean up usage of rt_task()
   (Qais Yousef)

 - Preempt SCHED_IDLE entities in strict cgroup hierarchies
   (Tianchen Ding)

 - Clarify the documentation of time units for deadline scheduler
   parameters (Christian Loehle)

 - Remove the HZ_BW chicken-bit feature flag introduced a year ago,
   the original change seems to be working fine (Phil Auld)

 - Misc fixes and cleanups (Chen Yu, Dan Carpenter, Huang Shijie,
   Peilin He, Qais Yousefm and Vincent Guittot)

* tag 'sched-core-2024-09-19' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (64 commits)
  sched/cpufreq: Use NSEC_PER_MSEC for deadline task
  cpufreq/cppc: Use NSEC_PER_MSEC for deadline task
  sched/deadline: Clarify nanoseconds in uapi
  sched/deadline: Convert schedtool example to chrt
  sched/debug: Fix the runnable tasks output
  sched: Fix sched_delayed vs sched_core
  kernel/sched: Fix util_est accounting for DELAY_DEQUEUE
  kthread: Fix task state in kthread worker if being frozen
  sched/pelt: Use rq_clock_task() for hw_pressure
  sched/fair: Move effective_cpu_util() and effective_cpu_util() in fair.c
  sched/core: Introduce SM_IDLE and an idle re-entry fast-path in __schedule()
  sched: Add put_prev_task(.next)
  sched: Rework dl_server
  sched: Combine the last put_prev_task() and the first set_next_task()
  sched: Rework pick_next_task()
  sched: Split up put_prev_task_balance()
  sched: Clean up DL server vs core sched
  sched: Fixup set_next_task() implementations
  sched: Use set_next_task(.first) where required
  sched/fair: Properly deactivate sched_delayed task upon class change
  ...

1  2 
fs/proc/base.c
include/linux/sched.h
kernel/locking/rtmutex.c
kernel/sched/core.c
kernel/sched/fair.c
kernel/sched/syscalls.c
kernel/sys.c
kernel/time/hrtimer.c
mm/page_alloc.c

diff --cc fs/proc/base.c
index e7810f3bd522d580ef96ef0522d1394c76b2f30d,72a1acd03675cc77da7320a14426f40e6fc9513f..b31283d81c52ea2a984519dac166d9bcbb7c99a8
@@@ -2626,11 -2569,10 +2626,11 @@@ static ssize_t timerslack_ns_write(stru
        }
  
        task_lock(p);
-       if (task_is_realtime(p))
 -      if (slack_ns == 0)
 -              p->timer_slack_ns = p->default_timer_slack_ns;
 -      else
 -              p->timer_slack_ns = slack_ns;
++      if (rt_or_dl_task_policy(p))
 +              slack_ns = 0;
 +      else if (slack_ns == 0)
 +              slack_ns = p->default_timer_slack_ns;
 +      p->timer_slack_ns = slack_ns;
        task_unlock(p);
  
  out:
Simple merge
Simple merge
Simple merge
Simple merge
index 195d2f2834a9758ebc8c855336688d2e2520600e,c62acf509b748599c5dd92cdbbae6da510c40964..cb03c790c27a590d64c7c07acf51180331818393
@@@ -401,19 -300,21 +300,29 @@@ static void __setscheduler_params(struc
  
        p->policy = policy;
  
-       if (dl_policy(policy))
+       if (dl_policy(policy)) {
                __setparam_dl(p, attr);
-       else if (fair_policy(policy))
+       } else if (fair_policy(policy)) {
                p->static_prio = NICE_TO_PRIO(attr->sched_nice);
+               if (attr->sched_runtime) {
+                       p->se.custom_slice = 1;
+                       p->se.slice = clamp_t(u64, attr->sched_runtime,
+                                             NSEC_PER_MSEC/10,   /* HZ=1000 * 10 */
+                                             NSEC_PER_MSEC*100); /* HZ=100  / 10 */
+               } else {
+                       p->se.custom_slice = 0;
+                       p->se.slice = sysctl_sched_base_slice;
+               }
+       }
  
-       if (task_is_realtime(p)) {
 +      /* rt-policy tasks do not have a timerslack */
++      if (rt_or_dl_task_policy(p)) {
 +              p->timer_slack_ns = 0;
 +      } else if (p->timer_slack_ns == 0) {
 +              /* when switching back to non-rt policy, restore timerslack */
 +              p->timer_slack_ns = p->default_timer_slack_ns;
 +      }
 +
        /*
         * __sched_setscheduler() ensures attr->sched_priority == 0 when
         * !rt_policy. Always setting this ensures that things like
diff --cc kernel/sys.c
index e3c4cffb520ceec414ff20cd769a2a6bfe19df2c,3a2df1bd9f640ebbd63ceda94dd8cc1c51018f1b..b7e096e1c3a13d808731f0f4e55a16a5524dcb81
@@@ -2557,8 -2557,6 +2557,8 @@@ SYSCALL_DEFINE5(prctl, int, option, uns
                        error = current->timer_slack_ns;
                break;
        case PR_SET_TIMERSLACK:
-               if (task_is_realtime(current))
++              if (rt_or_dl_task_policy(current))
 +                      break;
                if (arg2 <= 0)
                        current->timer_slack_ns =
                                        current->default_timer_slack_ns;
Simple merge
diff --cc mm/page_alloc.c
Simple merge