fini_debug_store_on_cpu(cpu);
 }
 
-static void intel_pmu_flush_branch_stack(void)
-{
-       /*
-        * Intel LBR does not tag entries with the
-        * PID of the current task, then we need to
-        * flush it on ctxsw
-        * For now, we simply reset it
-        */
-       if (x86_pmu.lbr_nr)
-               intel_pmu_lbr_reset();
-}
-
 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
 
 PMU_FORMAT_ATTR(ldlat, "config1:0-15");
        .cpu_starting           = intel_pmu_cpu_starting,
        .cpu_dying              = intel_pmu_cpu_dying,
        .guest_get_msrs         = intel_guest_get_msrs,
-       .flush_branch_stack     = intel_pmu_flush_branch_stack,
+       .sched_task             = intel_pmu_lbr_sched_task,
 };
 
 static __init void intel_clovertown_quirk(void)
 
                intel_pmu_lbr_reset_64();
 }
 
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (!x86_pmu.lbr_nr)
+               return;
+
+       /*
+        * When sampling the branck stack in system-wide, it may be
+        * necessary to flush the stack on context switch. This happens
+        * when the branch stack does not tag its entries with the pid
+        * of the current task. Otherwise it becomes impossible to
+        * associate a branch entry with a task. This ambiguity is more
+        * likely to appear when the branch stack supports priv level
+        * filtering and the user sets it to monitor only at the user
+        * level (which could be a useful measurement in system-wide
+        * mode). In that case, the risk is high of having a branch
+        * stack with branch from multiple tasks.
+        */
+       if (sched_in) {
+               intel_pmu_lbr_reset();
+               cpuc->lbr_context = ctx;
+       }
+}
+
 void intel_pmu_lbr_enable(struct perf_event *event)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        cpuc->br_sel = event->hw.branch_reg.reg;
 
        cpuc->lbr_users++;
+       perf_sched_cb_inc(event->ctx->pmu);
 }
 
 void intel_pmu_lbr_disable(struct perf_event *event)
 
        cpuc->lbr_users--;
        WARN_ON_ONCE(cpuc->lbr_users < 0);
+       perf_sched_cb_dec(event->ctx->pmu);
 
        if (cpuc->enabled && !cpuc->lbr_users) {
                __intel_pmu_lbr_disable();
 
  */
 struct static_key_deferred perf_sched_events __read_mostly;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
-static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
 static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 
 static atomic_t nr_mmap_events __read_mostly;
        if (is_cgroup_event(event))
                ctx->nr_cgroups++;
 
-       if (has_branch_stack(event))
-               ctx->nr_branch_stack++;
-
        list_add_rcu(&event->event_entry, &ctx->event_list);
        ctx->nr_events++;
        if (event->attr.inherit_stat)
                        cpuctx->cgrp = NULL;
        }
 
-       if (has_branch_stack(event))
-               ctx->nr_branch_stack--;
-
        ctx->nr_events--;
        if (event->attr.inherit_stat)
                ctx->nr_stat--;
        perf_ctx_unlock(cpuctx, ctx);
 }
 
-/*
- * When sampling the branck stack in system-wide, it may be necessary
- * to flush the stack on context switch. This happens when the branch
- * stack does not tag its entries with the pid of the current task.
- * Otherwise it becomes impossible to associate a branch entry with a
- * task. This ambiguity is more likely to appear when the branch stack
- * supports priv level filtering and the user sets it to monitor only
- * at the user level (which could be a useful measurement in system-wide
- * mode). In that case, the risk is high of having a branch stack with
- * branch from multiple tasks. Flushing may mean dropping the existing
- * entries or stashing them somewhere in the PMU specific code layer.
- *
- * This function provides the context switch callback to the lower code
- * layer. It is invoked ONLY when there is at least one system-wide context
- * with at least one active event using taken branch sampling.
- */
-static void perf_branch_stack_sched_in(struct task_struct *prev,
-                                      struct task_struct *task)
-{
-       struct perf_cpu_context *cpuctx;
-       struct pmu *pmu;
-       unsigned long flags;
-
-       /* no need to flush branch stack if not changing task */
-       if (prev == task)
-               return;
-
-       local_irq_save(flags);
-
-       rcu_read_lock();
-
-       list_for_each_entry_rcu(pmu, &pmus, entry) {
-               cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-
-               /*
-                * check if the context has at least one
-                * event using PERF_SAMPLE_BRANCH_STACK
-                */
-               if (cpuctx->ctx.nr_branch_stack > 0
-                   && pmu->flush_branch_stack) {
-
-                       perf_ctx_lock(cpuctx, cpuctx->task_ctx);
-
-                       perf_pmu_disable(pmu);
-
-                       pmu->flush_branch_stack();
-
-                       perf_pmu_enable(pmu);
-
-                       perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
-               }
-       }
-
-       rcu_read_unlock();
-
-       local_irq_restore(flags);
-}
-
 /*
  * Called from scheduler to add the events of the current task
  * with interrupts disabled.
        if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
                perf_cgroup_sched_in(prev, task);
 
-       /* check for system-wide branch_stack events */
-       if (atomic_read(this_cpu_ptr(&perf_branch_stack_events)))
-               perf_branch_stack_sched_in(prev, task);
-
        if (__this_cpu_read(perf_sched_cb_usages))
                perf_pmu_sched_task(prev, task, true);
 }
        if (event->parent)
                return;
 
-       if (has_branch_stack(event)) {
-               if (!(event->attach_state & PERF_ATTACH_TASK))
-                       atomic_dec(&per_cpu(perf_branch_stack_events, cpu));
-       }
        if (is_cgroup_event(event))
                atomic_dec(&per_cpu(perf_cgroup_events, cpu));
 }
        if (event->parent)
                return;
 
-       if (has_branch_stack(event)) {
-               if (!(event->attach_state & PERF_ATTACH_TASK))
-                       atomic_inc(&per_cpu(perf_branch_stack_events, cpu));
-       }
        if (is_cgroup_event(event))
                atomic_inc(&per_cpu(perf_cgroup_events, cpu));
 }