sched: Allow per-cpu kernel threads to run on online && !active

author Peter Zijlstra (Intel) <peterz@infradead.org>

Thu, 10 Mar 2016 11:54:08 +0000 (12:54 +0100)

committer Thomas Gleixner <tglx@linutronix.de>

Fri, 6 May 2016 12:58:22 +0000 (14:58 +0200)
author Peter Zijlstra (Intel) <peterz@infradead.org>
Thu, 10 Mar 2016 11:54:08 +0000 (12:54 +0100)
committer Thomas Gleixner <tglx@linutronix.de>
Fri, 6 May 2016 12:58:22 +0000 (14:58 +0200)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c

index 8cac1eb414661ad6e3340a8469361fe9a885d117..55c924b65f71aa4dfc7c58f9e4a446669c5416e0 100644 (file)
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -565,7 +565,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
                 smp_ops->give_timebase();
  
         /* Wait until cpu puts itself in the online & active maps */
-       while (!cpu_online(cpu) || !cpu_active(cpu))
+       while (!cpu_online(cpu))
                 cpu_relax();
  
         return 0;
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c

index 40a6b4f9c36cedd5400898572f32173c90bf0240..7b89a757210031e1b312603b1630b44216531347 100644 (file)
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -832,7 +832,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
         pcpu_attach_task(pcpu, tidle);
         pcpu_start_fn(pcpu, smp_start_secondary, NULL);
         /* Wait until cpu puts itself in the online & active maps */
-       while (!cpu_online(cpu) || !cpu_active(cpu))
+       while (!cpu_online(cpu))
                 cpu_relax();
         return 0;
  }
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h

index 40cee6b77a93618d74785c97e4339ba3e243758e..e828cf65d7dfd5645c0c61f53795f4f4aba11453 100644 (file)
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -743,12 +743,10 @@ set_cpu_present(unsigned int cpu, bool present)
  static inline void
  set_cpu_online(unsigned int cpu, bool online)
  {
-       if (online) {
+       if (online)
                 cpumask_set_cpu(cpu, &__cpu_online_mask);
-               cpumask_set_cpu(cpu, &__cpu_active_mask);
-       } else {
+       else
                 cpumask_clear_cpu(cpu, &__cpu_online_mask);
-       }
  }
  
  static inline void
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 8b489fcac37bd9d829439feb08d8bac1354c7e71..8bfd7d4f1c211f50e7eab01ce3596c02116c288c 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1082,13 +1082,21 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
  static int __set_cpus_allowed_ptr(struct task_struct *p,
                                   const struct cpumask *new_mask, bool check)
  {
+       const struct cpumask *cpu_valid_mask = cpu_active_mask;
+       unsigned int dest_cpu;
         unsigned long flags;
         struct rq *rq;
-       unsigned int dest_cpu;
         int ret = 0;
  
         rq = task_rq_lock(p, &flags);
  
+       if (p->flags & PF_KTHREAD) {
+               /*
+                * Kernel threads are allowed on online && !active CPUs
+                */
+               cpu_valid_mask = cpu_online_mask;
+       }
+
         /*
          * Must re-check here, to close a race against __kthread_bind(),
          * sched_setaffinity() is not guaranteed to observe the flag.
@@ -1101,18 +1109,28 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
         if (cpumask_equal(&p->cpus_allowed, new_mask))
                 goto out;
  
-       if (!cpumask_intersects(new_mask, cpu_active_mask)) {
+       if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
                 ret = -EINVAL;
                 goto out;
         }
  
         do_set_cpus_allowed(p, new_mask);
  
+       if (p->flags & PF_KTHREAD) {
+               /*
+                * For kernel threads that do indeed end up on online &&
+                * !active we want to ensure they are strict per-cpu threads.
+                */
+               WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
+                       !cpumask_intersects(new_mask, cpu_active_mask) &&
+                       p->nr_cpus_allowed != 1);
+       }
+
         /* Can the task run on the task's current CPU? If so, we're done */
         if (cpumask_test_cpu(task_cpu(p), new_mask))
                 goto out;
  
-       dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
+       dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
         if (task_running(rq, p) || p->state == TASK_WAKING) {
                 struct migration_arg arg = { p, dest_cpu };
                 /* Need help from migration thread: drop lock and wait. */
@@ -1431,6 +1449,25 @@ EXPORT_SYMBOL_GPL(kick_process);
  
  /*
   * ->cpus_allowed is protected by both rq->lock and p->pi_lock
+ *
+ * A few notes on cpu_active vs cpu_online:
+ *
+ *  - cpu_active must be a subset of cpu_online
+ *
+ *  - on cpu-up we allow per-cpu kthreads on the online && !active cpu,
+ *    see __set_cpus_allowed_ptr(). At this point the newly online
+ *    cpu isn't yet part of the sched domains, and balancing will not
+ *    see it.
+ *
+ *  - on cpu-down we clear cpu_active() to mask the sched domains and
+ *    avoid the load balancer to place new tasks on the to be removed
+ *    cpu. Existing tasks will remain running there and will be taken
+ *    off.
+ *
+ * This means that fallback selection must not select !active CPUs.
+ * And can assume that any active CPU must be online. Conversely
+ * select_task_rq() below may allow selection of !active CPUs in order
+ * to satisfy the above rules.
   */
  static int select_fallback_rq(int cpu, struct task_struct *p)
  {
@@ -1449,8 +1486,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
  
                 /* Look for allowed, online CPU in same node. */
                 for_each_cpu(dest_cpu, nodemask) {
-                       if (!cpu_online(dest_cpu))
-                               continue;
                         if (!cpu_active(dest_cpu))
                                 continue;
                         if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
@@ -1461,8 +1496,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
         for (;;) {
                 /* Any allowed, online CPU? */
                 for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
-                       if (!cpu_online(dest_cpu))
-                               continue;
                         if (!cpu_active(dest_cpu))
                                 continue;
                         goto out;
@@ -1514,6 +1547,8 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
  
         if (p->nr_cpus_allowed > 1)
                 cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
+       else
+               cpu = cpumask_any(tsk_cpus_allowed(p));
  
         /*
          * In order not to call set_task_cpu() on a blocking task we need
author	Peter Zijlstra (Intel) <peterz@infradead.org>
	Thu, 10 Mar 2016 11:54:08 +0000 (12:54 +0100)
committer	Thomas Gleixner <tglx@linutronix.de>
	Fri, 6 May 2016 12:58:22 +0000 (14:58 +0200)
arch/powerpc/kernel/smp.c		patch \| blob \| history
arch/s390/kernel/smp.c		patch \| blob \| history
include/linux/cpumask.h		patch \| blob \| history
kernel/sched/core.c		patch \| blob \| history