RB_CLEAR_NODE(&waiter->pi_tree_entry);
 }
 
-/*
- * Must hold both p->pi_lock and task_rq(p)->lock.
- */
-void rt_mutex_update_top_task(struct task_struct *p)
-{
-       if (!task_has_pi_waiters(p)) {
-               p->pi_top_task = NULL;
-               return;
-       }
-
-       p->pi_top_task = task_top_pi_waiter(p)->task;
-}
-
-/*
- * Calculate task priority from the waiter tree priority
- *
- * Return task->normal_prio when the waiter tree is empty or when
- * the waiter is not allowed to do priority boosting
- */
-int rt_mutex_getprio(struct task_struct *task)
-{
-       if (likely(!task_has_pi_waiters(task)))
-               return task->normal_prio;
-
-       return min(task_top_pi_waiter(task)->prio,
-                  task->normal_prio);
-}
-
-/*
- * Must hold either p->pi_lock or task_rq(p)->lock.
- */
-struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
-{
-       return task->pi_top_task;
-}
-
-/*
- * Called by sched_setscheduler() to get the priority which will be
- * effective after the change.
- */
-int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
+static void rt_mutex_adjust_prio(struct task_struct *p)
 {
-       struct task_struct *top_task = rt_mutex_get_top_task(task);
+       struct task_struct *pi_task = NULL;
 
-       if (!top_task)
-               return newprio;
+       lockdep_assert_held(&p->pi_lock);
 
-       return min(top_task->prio, newprio);
-}
+       if (task_has_pi_waiters(p))
+               pi_task = task_top_pi_waiter(p)->task;
 
-/*
- * Adjust the priority of a task, after its pi_waiters got modified.
- *
- * This can be both boosting and unboosting. task->pi_lock must be held.
- */
-static void __rt_mutex_adjust_prio(struct task_struct *task)
-{
-       int prio = rt_mutex_getprio(task);
-
-       if (task->prio != prio || dl_prio(prio))
-               rt_mutex_setprio(task, prio);
+       rt_mutex_setprio(p, pi_task);
 }
 
 /*
                 */
                rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
                rt_mutex_enqueue_pi(task, waiter);
-               __rt_mutex_adjust_prio(task);
+               rt_mutex_adjust_prio(task);
 
        } else if (prerequeue_top_waiter == waiter) {
                /*
                rt_mutex_dequeue_pi(task, waiter);
                waiter = rt_mutex_top_waiter(lock);
                rt_mutex_enqueue_pi(task, waiter);
-               __rt_mutex_adjust_prio(task);
+               rt_mutex_adjust_prio(task);
        } else {
                /*
                 * Nothing changed. No need to do any priority
                return -EDEADLK;
 
        raw_spin_lock(&task->pi_lock);
-       __rt_mutex_adjust_prio(task);
+       rt_mutex_adjust_prio(task);
        waiter->task = task;
        waiter->lock = lock;
        waiter->prio = task->prio;
                rt_mutex_dequeue_pi(owner, top_waiter);
                rt_mutex_enqueue_pi(owner, waiter);
 
-               __rt_mutex_adjust_prio(owner);
+               rt_mutex_adjust_prio(owner);
                if (owner->pi_blocked_on)
                        chain_walk = 1;
        } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
        waiter = rt_mutex_top_waiter(lock);
 
        /*
-        * Remove it from current->pi_waiters. We do not adjust a
-        * possible priority boost right now. We execute wakeup in the
-        * boosted mode and go back to normal after releasing
-        * lock->wait_lock.
+        * Remove it from current->pi_waiters and deboost.
+        *
+        * We must in fact deboost here in order to ensure we call
+        * rt_mutex_setprio() to update p->pi_top_task before the
+        * task unblocks.
         */
        rt_mutex_dequeue_pi(current, waiter);
-       __rt_mutex_adjust_prio(current);
+       rt_mutex_adjust_prio(current);
 
        /*
         * As we are waking up the top waiter, and the waiter stays
         */
        lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
 
-       raw_spin_unlock(¤t->pi_lock);
-
+       /*
+        * We deboosted before waking the top waiter task such that we don't
+        * run two tasks with the 'same' priority (and ensure the
+        * p->pi_top_task pointer points to a blocked task). This however can
+        * lead to priority inversion if we would get preempted after the
+        * deboost but before waking our donor task, hence the preempt_disable()
+        * before unlock.
+        *
+        * Pairs with preempt_enable() in rt_mutex_postunlock();
+        */
+       preempt_disable();
        wake_q_add(wake_q, waiter->task);
+       raw_spin_unlock(¤t->pi_lock);
 }
 
 /*
        if (rt_mutex_has_waiters(lock))
                rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
 
-       __rt_mutex_adjust_prio(owner);
+       rt_mutex_adjust_prio(owner);
 
        /* Store the lock on which owner is blocked or NULL */
        next_lock = task_blocked_on_lock(owner);
        raw_spin_lock_irqsave(&task->pi_lock, flags);
 
        waiter = task->pi_blocked_on;
-       if (!waiter || (waiter->prio == task->prio &&
-                       !dl_prio(task->prio))) {
+       if (!waiter || (waiter->prio == task->prio && !dl_prio(task->prio))) {
                raw_spin_unlock_irqrestore(&task->pi_lock, flags);
                return;
        }
         * Queue the next waiter for wakeup once we release the wait_lock.
         */
        mark_wakeup_next_waiter(wake_q, lock);
-
-       /*
-        * We should deboost before waking the top waiter task such that
-        * we don't run two tasks with the 'same' priority. This however
-        * can lead to prio-inversion if we would get preempted after
-        * the deboost but before waking our high-prio task, hence the
-        * preempt_disable before unlock. Pairs with preempt_enable() in
-        * rt_mutex_postunlock();
-        */
-       preempt_disable();
-
        raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
        return true; /* call rt_mutex_postunlock() */
 
 
 #ifdef CONFIG_RT_MUTEXES
 
+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
+{
+       if (pi_task)
+               prio = min(prio, pi_task->prio);
+
+       return prio;
+}
+
+static inline int rt_effective_prio(struct task_struct *p, int prio)
+{
+       struct task_struct *pi_task = rt_mutex_get_top_task(p);
+
+       return __rt_effective_prio(pi_task, prio);
+}
+
 /*
  * rt_mutex_setprio - set the current priority of a task
- * @p: task
- * @prio: prio value (kernel-internal form)
+ * @p: task to boost
+ * @pi_task: donor task
  *
  * This function changes the 'effective' priority of a task. It does
  * not touch ->normal_prio like __setscheduler().
  * Used by the rt_mutex code to implement priority inheritance
  * logic. Call site only calls if the priority of the task changed.
  */
-void rt_mutex_setprio(struct task_struct *p, int prio)
+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
 {
-       int oldprio, queued, running, queue_flag =
+       int prio, oldprio, queued, running, queue_flag =
                DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
        const struct sched_class *prev_class;
        struct rq_flags rf;
        struct rq *rq;
 
-       BUG_ON(prio > MAX_PRIO);
+       /* XXX used to be waiter->prio, not waiter->task->prio */
+       prio = __rt_effective_prio(pi_task, p->normal_prio);
+
+       /*
+        * If nothing changed; bail early.
+        */
+       if (p->pi_top_task == pi_task && prio == p->prio && !dl_prio(prio))
+               return;
 
        rq = __task_rq_lock(p, &rf);
        update_rq_clock(rq);
+       /*
+        * Set under pi_lock && rq->lock, such that the value can be used under
+        * either lock.
+        *
+        * Note that there is loads of tricky to make this pointer cache work
+        * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
+        * ensure a task is de-boosted (pi_task is set to NULL) before the
+        * task is allowed to run again (and can exit). This ensures the pointer
+        * points to a blocked task -- which guaratees the task is present.
+        */
+       p->pi_top_task = pi_task;
+
+       /*
+        * For FIFO/RR we only need to set prio, if that matches we're done.
+        */
+       if (prio == p->prio && !dl_prio(prio))
+               goto out_unlock;
 
        /*
         * Idle task boosting is a nono in general. There is one
                goto out_unlock;
        }
 
-       rt_mutex_update_top_task(p);
-
-       trace_sched_pi_setprio(p, prio);
+       trace_sched_pi_setprio(p, prio); /* broken */
        oldprio = p->prio;
 
        if (oldprio == prio)
         *          running task
         */
        if (dl_prio(prio)) {
-               struct task_struct *pi_task = rt_mutex_get_top_task(p);
                if (!dl_prio(p->normal_prio) ||
                    (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
                        p->dl.dl_boosted = 1;
        balance_callback(rq);
        preempt_enable();
 }
+#else
+static inline int rt_effective_prio(struct task_struct *p, int prio)
+{
+       return prio;
+}
 #endif
 
 void set_user_nice(struct task_struct *p, long nice)
         * Keep a potential priority boosting if called from
         * sched_setscheduler().
         */
+       p->prio = normal_prio(p);
        if (keep_boost)
-               p->prio = rt_mutex_get_effective_prio(p, normal_prio(p));
-       else
-               p->prio = normal_prio(p);
+               p->prio = rt_effective_prio(p, p->prio);
 
        if (dl_prio(p->prio))
                p->sched_class = &dl_sched_class;
                 * the runqueue. This will be done when the task deboost
                 * itself.
                 */
-               new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
+               new_effective_prio = rt_effective_prio(p, newprio);
                if (new_effective_prio == oldprio)
                        queue_flags &= ~DEQUEUE_MOVE;
        }