intel_pmu_lbr_sched_task(ctx, sched_in);
 }
 
+static void intel_pmu_swap_task_ctx(struct perf_event_context *prev,
+                                   struct perf_event_context *next)
+{
+       intel_pmu_lbr_swap_task_ctx(prev, next);
+}
+
 static int intel_pmu_check_period(struct perf_event *event, u64 value)
 {
        return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
 
        .guest_get_msrs         = intel_guest_get_msrs,
        .sched_task             = intel_pmu_sched_task,
+       .swap_task_ctx          = intel_pmu_swap_task_ctx,
 
        .check_period           = intel_pmu_check_period,
 
 
                raw_spin_lock(&ctx->lock);
                raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
                if (context_equiv(ctx, next_ctx)) {
+                       struct pmu *pmu = ctx->pmu;
+
                        WRITE_ONCE(ctx->task, next);
                        WRITE_ONCE(next_ctx->task, task);
 
-                       swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
+                       /*
+                        * PMU specific parts of task perf context can require
+                        * additional synchronization. As an example of such
+                        * synchronization see implementation details of Intel
+                        * LBR call stack data profiling;
+                        */
+                       if (pmu->swap_task_ctx)
+                               pmu->swap_task_ctx(ctx, next_ctx);
+                       else
+                               swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
 
                        /*
                         * RCU_INIT_POINTER here is safe because we've not