John reported that push_rt_task() can end up invoking
find_lowest_rq(rq->curr) when curr is not an RT task (in this case a CFS
one), which causes mayhem down convert_prio().
This can happen when current gets demoted to e.g. CFS when releasing an
rt_mutex, and the local CPU gets hit with an rto_push_work irqwork before
getting the chance to reschedule. Exactly who triggers this work isn't
entirely clear to me - switched_from_rt() only invokes rt_queue_pull_task()
if there are no RT tasks on the local RQ, which means the local CPU can't
be in the rto_mask.
My current suspected sequence is something along the lines of the below,
with the demoted task being current.
  mark_wakeup_next_waiter()
    rt_mutex_adjust_prio()
      rt_mutex_setprio() // deboost originally-CFS task
	check_class_changed()
	  switched_from_rt() // Only rt_queue_pull_task() if !rq->rt.rt_nr_running
	  switched_to_fair() // Sets need_resched
      __balance_callbacks() // if pull_rt_task(), tell_cpu_to_push() can't select local CPU per the above
      raw_spin_rq_unlock(rq)
       // need_resched is set, so task_woken_rt() can't
       // invoke push_rt_tasks(). Best I can come up with is
       // local CPU has rt_nr_migratory >= 2 after the demotion, so stays
       // in the rto_mask, and then:
       <some other CPU running rto_push_irq_work_func() queues rto_push_work on this CPU>
	 push_rt_task()
	   // breakage follows here as rq->curr is CFS
Move an existing check to check rq->curr vs the next pushable task's
priority before getting anywhere near find_lowest_rq(). While at it, add an
explicit sched_class of rq->curr check prior to invoking
find_lowest_rq(rq->curr). Align the DL logic to also reschedule regardless
of next_task's migratability.
Fixes: a7c81556ec4d ("sched: Fix migrate_disable() vs rt/dl balancing")
Reported-by: John Keeping <john@metanate.com>
Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Tested-by: John Keeping <john@metanate.com>
Link: https://lore.kernel.org/r/20220127154059.974729-1-valentin.schneider@arm.com
                return 0;
 
 retry:
-       if (is_migration_disabled(next_task))
-               return 0;
-
-       if (WARN_ON(next_task == rq->curr))
-               return 0;
-
        /*
         * If next_task preempts rq->curr, and rq->curr
         * can move away, it makes sense to just reschedule
                return 0;
        }
 
+       if (is_migration_disabled(next_task))
+               return 0;
+
+       if (WARN_ON(next_task == rq->curr))
+               return 0;
+
        /* We might release rq lock */
        get_task_struct(next_task);
 
 
                return 0;
 
 retry:
+       /*
+        * It's possible that the next_task slipped in of
+        * higher priority than current. If that's the case
+        * just reschedule current.
+        */
+       if (unlikely(next_task->prio < rq->curr->prio)) {
+               resched_curr(rq);
+               return 0;
+       }
+
        if (is_migration_disabled(next_task)) {
                struct task_struct *push_task = NULL;
                int cpu;
                if (!pull || rq->push_busy)
                        return 0;
 
+               /*
+                * Invoking find_lowest_rq() on anything but an RT task doesn't
+                * make sense. Per the above priority check, curr has to
+                * be of higher priority than next_task, so no need to
+                * reschedule when bailing out.
+                *
+                * Note that the stoppers are masqueraded as SCHED_FIFO
+                * (cf. sched_set_stop_task()), so we can't rely on rt_task().
+                */
+               if (rq->curr->sched_class != &rt_sched_class)
+                       return 0;
+
                cpu = find_lowest_rq(rq->curr);
                if (cpu == -1 || cpu == rq->cpu)
                        return 0;
        if (WARN_ON(next_task == rq->curr))
                return 0;
 
-       /*
-        * It's possible that the next_task slipped in of
-        * higher priority than current. If that's the case
-        * just reschedule current.
-        */
-       if (unlikely(next_task->prio < rq->curr->prio)) {
-               resched_curr(rq);
-               return 0;
-       }
-
        /* We might release rq lock */
        get_task_struct(next_task);