* PEBS requires this to provide PID/TID information. This requires we flush
  * all queued PEBS records before we context switch to a new task.
  */
+static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in)
+{
+       struct pmu *pmu;
+
+       pmu = cpuctx->ctx.pmu; /* software PMUs will not have sched_task */
+
+       if (WARN_ON_ONCE(!pmu->sched_task))
+               return;
+
+       perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+       perf_pmu_disable(pmu);
+
+       pmu->sched_task(cpuctx->task_ctx, sched_in);
+
+       perf_pmu_enable(pmu);
+       perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
+}
+
 static void perf_pmu_sched_task(struct task_struct *prev,
                                struct task_struct *next,
                                bool sched_in)
 {
        struct perf_cpu_context *cpuctx;
-       struct pmu *pmu;
 
        if (prev == next)
                return;
 
-       list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
-               pmu = cpuctx->ctx.pmu; /* software PMUs will not have sched_task */
-
-               if (WARN_ON_ONCE(!pmu->sched_task))
-                       continue;
-
-               perf_ctx_lock(cpuctx, cpuctx->task_ctx);
-               perf_pmu_disable(pmu);
-
-               pmu->sched_task(cpuctx->task_ctx, sched_in);
+       list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry)
+               __perf_pmu_sched_task(cpuctx, sched_in);
 
-               perf_pmu_enable(pmu);
-               perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
-       }
 }
 
 static void perf_event_switch(struct task_struct *task,
                                        struct task_struct *task)
 {
        struct perf_cpu_context *cpuctx;
+       struct pmu *pmu = ctx->pmu;
 
        cpuctx = __get_cpu_context(ctx);
-       if (cpuctx->task_ctx == ctx)
+       if (cpuctx->task_ctx == ctx) {
+               if (cpuctx->sched_cb_usage)
+                       __perf_pmu_sched_task(cpuctx, true);
                return;
+       }
 
        perf_ctx_lock(cpuctx, ctx);
        /*
        if (!ctx->nr_events)
                goto unlock;
 
-       perf_pmu_disable(ctx->pmu);
+       perf_pmu_disable(pmu);
        /*
         * We want to keep the following priority order:
         * cpu pinned (that don't need to move), task pinned,
        if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
                cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
        perf_event_sched_in(cpuctx, ctx, task);
-       perf_pmu_enable(ctx->pmu);
+
+       if (cpuctx->sched_cb_usage && pmu->sched_task)
+               pmu->sched_task(cpuctx->task_ctx, true);
+
+       perf_pmu_enable(pmu);
 
 unlock:
        perf_ctx_unlock(cpuctx, ctx);
 
        if (atomic_read(&nr_switch_events))
                perf_event_switch(task, prev, true);
-
-       if (__this_cpu_read(perf_sched_cb_usages))
-               perf_pmu_sched_task(prev, task, true);
 }
 
 static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)