sched,lockdep: Employ lock pinning

author Peter Zijlstra <peterz@infradead.org>

Thu, 11 Jun 2015 12:46:54 +0000 (14:46 +0200)

committer Thomas Gleixner <tglx@linutronix.de>

Thu, 18 Jun 2015 22:25:27 +0000 (00:25 +0200)
author Peter Zijlstra <peterz@infradead.org>
Thu, 11 Jun 2015 12:46:54 +0000 (14:46 +0200)
committer Thomas Gleixner <tglx@linutronix.de>
Thu, 18 Jun 2015 22:25:27 +0000 (00:25 +0200)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 1ddc129c5f669d1acad9db10b5f28696f9e2e2f3..c74191aa4e6acdec19409c73dcb71758b5238b2c 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1201,8 +1201,15 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
                 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
                 tlb_migrate_finish(p->mm);
                 return 0;
-       } else if (task_on_rq_queued(p))
+       } else if (task_on_rq_queued(p)) {
+               /*
+                * OK, since we're going to drop the lock immediately
+                * afterwards anyway.
+                */
+               lockdep_unpin_lock(&rq->lock);
                 rq = move_queued_task(rq, p, dest_cpu);
+               lockdep_pin_lock(&rq->lock);
+       }
  out:
         task_rq_unlock(rq, p, &flags);
  
@@ -1562,6 +1569,8 @@ out:
  static inline
  int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
  {
+       lockdep_assert_held(&p->pi_lock);
+
         if (p->nr_cpus_allowed > 1)
                 cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
  
@@ -1652,9 +1661,12 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
  #ifdef CONFIG_SMP
         if (p->sched_class->task_woken) {
                 /*
-                * XXX can drop rq->lock; most likely ok.
+                * Our task @p is fully woken up and running; so its safe to
+                * drop the rq->lock, hereafter rq is only used for statistics.
                  */
+               lockdep_unpin_lock(&rq->lock);
                 p->sched_class->task_woken(rq, p);
+               lockdep_pin_lock(&rq->lock);
         }
  
         if (rq->idle_stamp) {
@@ -1674,6 +1686,8 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
  static void
  ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
  {
+       lockdep_assert_held(&rq->lock);
+
  #ifdef CONFIG_SMP
         if (p->sched_contributes_to_load)
                 rq->nr_uninterruptible--;
@@ -1718,6 +1732,7 @@ void sched_ttwu_pending(void)
                 return;
  
         raw_spin_lock_irqsave(&rq->lock, flags);
+       lockdep_pin_lock(&rq->lock);
  
         while (llist) {
                 p = llist_entry(llist, struct task_struct, wake_entry);
@@ -1725,6 +1740,7 @@ void sched_ttwu_pending(void)
                 ttwu_do_activate(rq, p, 0);
         }
  
+       lockdep_unpin_lock(&rq->lock);
         raw_spin_unlock_irqrestore(&rq->lock, flags);
  }
  
@@ -1821,7 +1837,9 @@ static void ttwu_queue(struct task_struct *p, int cpu)
  #endif
  
         raw_spin_lock(&rq->lock);
+       lockdep_pin_lock(&rq->lock);
         ttwu_do_activate(rq, p, 0);
+       lockdep_unpin_lock(&rq->lock);
         raw_spin_unlock(&rq->lock);
  }
  
@@ -1916,9 +1934,17 @@ static void try_to_wake_up_local(struct task_struct *p)
         lockdep_assert_held(&rq->lock);
  
         if (!raw_spin_trylock(&p->pi_lock)) {
+               /*
+                * This is OK, because current is on_cpu, which avoids it being
+                * picked for load-balance and preemption/IRQs are still
+                * disabled avoiding further scheduler activity on it and we've
+                * not yet picked a replacement task.
+                */
+               lockdep_unpin_lock(&rq->lock);
                 raw_spin_unlock(&rq->lock);
                 raw_spin_lock(&p->pi_lock);
                 raw_spin_lock(&rq->lock);
+               lockdep_pin_lock(&rq->lock);
         }
  
         if (!(p->state & TASK_NORMAL))
@@ -2530,6 +2556,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
          * of the scheduler it's an obvious special-case), so we
          * do an early lockdep release here:
          */
+       lockdep_unpin_lock(&rq->lock);
         spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
  
         context_tracking_task_switch(prev, next);
@@ -2953,6 +2980,7 @@ static void __sched __schedule(void)
          */
         smp_mb__before_spinlock();
         raw_spin_lock_irq(&rq->lock);
+       lockdep_pin_lock(&rq->lock);
  
         rq->clock_skip_update <<= 1; /* promote REQ to ACT */
  
@@ -2995,8 +3023,10 @@ static void __sched __schedule(void)
  
                 rq = context_switch(rq, prev, next); /* unlocks the rq */
                 cpu = cpu_of(rq);
-       } else
+       } else {
+               lockdep_unpin_lock(&rq->lock);
                 raw_spin_unlock_irq(&rq->lock);
+       }
  
         balance_callback(rq);
  }
@@ -5065,6 +5095,11 @@ static void migrate_tasks(struct rq *dead_rq)
                 if (rq->nr_running == 1)
                         break;
  
+               /*
+                * Ensure rq->lock covers the entire task selection
+                * until the migration.
+                */
+               lockdep_pin_lock(&rq->lock);
                 next = pick_next_task(rq, &fake_task);
                 BUG_ON(!next);
                 next->sched_class->put_prev_task(rq, next);
@@ -5072,6 +5107,7 @@ static void migrate_tasks(struct rq *dead_rq)
                 /* Find suitable destination for @next, with force if needed. */
                 dest_cpu = select_fallback_rq(dead_rq->cpu, next);
  
+               lockdep_unpin_lock(&rq->lock);
                 rq = __migrate_task(rq, next, dest_cpu);
                 if (rq != dead_rq) {
                         raw_spin_unlock(&rq->lock);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c

index 6318f43971c95112344e410945f68575df90ecc2..e8146415a688ff49c10543e8c033b5f15333ca12 100644 (file)
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1151,7 +1151,15 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)
         dl_rq = &rq->dl;
  
         if (need_pull_dl_task(rq, prev)) {
+               /*
+                * This is OK, because current is on_cpu, which avoids it being
+                * picked for load-balance and preemption/IRQs are still
+                * disabled avoiding further scheduler activity on it and we're
+                * being very careful to re-start the picking loop.
+                */
+               lockdep_unpin_lock(&rq->lock);
                 pull_dl_task(rq);
+               lockdep_pin_lock(&rq->lock);
                 /*
                  * pull_rt_task() can drop (and re-acquire) rq->lock; this
                  * means a stop task can slip in, in which case we need to
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 7210ae84890934b9d8c5466e97eb906e28662fe6..509ef63d0d6ffab9193489651107c65353cc2b48 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5392,7 +5392,15 @@ simple:
         return p;
  
  idle:
+       /*
+        * This is OK, because current is on_cpu, which avoids it being picked
+        * for load-balance and preemption/IRQs are still disabled avoiding
+        * further scheduler activity on it and we're being very careful to
+        * re-start the picking loop.
+        */
+       lockdep_unpin_lock(&rq->lock);
         new_tasks = idle_balance(rq);
+       lockdep_pin_lock(&rq->lock);
         /*
          * Because idle_balance() releases (and re-acquires) rq->lock, it is
          * possible for any higher priority task to appear. In that case we
@@ -7426,9 +7434,6 @@ static int idle_balance(struct rq *this_rq)
                 goto out;
         }
  
-       /*
-        * Drop the rq->lock, but keep IRQ/preempt disabled.
-        */
         raw_spin_unlock(&this_rq->lock);
  
         update_blocked_averages(this_cpu);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c

index 460f85888b74d52453e62025a337a3d9b60aabe7..0d193a243e96dce029c952a47858b2f0edfb8491 100644 (file)
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1478,7 +1478,15 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
         struct rt_rq *rt_rq = &rq->rt;
  
         if (need_pull_rt_task(rq, prev)) {
+               /*
+                * This is OK, because current is on_cpu, which avoids it being
+                * picked for load-balance and preemption/IRQs are still
+                * disabled avoiding further scheduler activity on it and we're
+                * being very careful to re-start the picking loop.
+                */
+               lockdep_unpin_lock(&rq->lock);
                 pull_rt_task(rq);
+               lockdep_pin_lock(&rq->lock);
                 /*
                  * pull_rt_task() can drop (and re-acquire) rq->lock; this
                  * means a dl or stop task can slip in, in which case we need
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index 62949ab06bc2157e05417043497e85146140551f..ef02d11654cd5dcd3952c4a2c0409402a6dec528 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1439,8 +1439,10 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
         for (;;) {
                 rq = task_rq(p);
                 raw_spin_lock(&rq->lock);
-               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
+                       lockdep_pin_lock(&rq->lock);
                         return rq;
+               }
                 raw_spin_unlock(&rq->lock);
  
                 while (unlikely(task_on_rq_migrating(p)))
@@ -1477,8 +1479,10 @@ static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flag
                  * If we observe the new cpu in task_rq_lock, the acquire will
                  * pair with the WMB to ensure we must then also see migrating.
                  */
-               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
+                       lockdep_pin_lock(&rq->lock);
                         return rq;
+               }
                 raw_spin_unlock(&rq->lock);
                 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
  
@@ -1490,6 +1494,7 @@ static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flag
  static inline void __task_rq_unlock(struct rq *rq)
         __releases(rq->lock)
  {
+       lockdep_unpin_lock(&rq->lock);
         raw_spin_unlock(&rq->lock);
  }
  
@@ -1498,6 +1503,7 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
         __releases(rq->lock)
         __releases(p->pi_lock)
  {
+       lockdep_unpin_lock(&rq->lock);
         raw_spin_unlock(&rq->lock);
         raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
  }
author	Peter Zijlstra <peterz@infradead.org>
	Thu, 11 Jun 2015 12:46:54 +0000 (14:46 +0200)
committer	Thomas Gleixner <tglx@linutronix.de>
	Thu, 18 Jun 2015 22:25:27 +0000 (00:25 +0200)
kernel/sched/core.c		patch \| blob \| history
kernel/sched/deadline.c		patch \| blob \| history
kernel/sched/fair.c		patch \| blob \| history
kernel/sched/rt.c		patch \| blob \| history
kernel/sched/sched.h		patch \| blob \| history