#include <asm/irq_regs.h>
 
+static struct workqueue_struct *perf_wq;
+
 struct remote_function_call {
        struct task_struct      *p;
        int                     (*func)(void *info);
                perf_event__header_size(tmp);
 }
 
+/*
+ * User event without the task.
+ */
+static bool is_orphaned_event(struct perf_event *event)
+{
+       return event && !is_kernel_event(event) && !event->owner;
+}
+
+/*
+ * Event has a parent but parent's task finished and it's
+ * alive only because of children holding refference.
+ */
+static bool is_orphaned_child(struct perf_event *event)
+{
+       return is_orphaned_event(event->parent);
+}
+
+static void orphans_remove_work(struct work_struct *work);
+
+static void schedule_orphans_remove(struct perf_event_context *ctx)
+{
+       if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
+               return;
+
+       if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
+               get_ctx(ctx);
+               ctx->orphans_remove_sched = true;
+       }
+}
+
+static int __init perf_workqueue_init(void)
+{
+       perf_wq = create_singlethread_workqueue("perf");
+       WARN(!perf_wq, "failed to create perf workqueue\n");
+       return perf_wq ? 0 : -1;
+}
+
+core_initcall(perf_workqueue_init);
+
 static inline int
 event_filter_match(struct perf_event *event)
 {
        if (event->attr.exclusive || !cpuctx->active_oncpu)
                cpuctx->exclusive = 0;
 
+       if (is_orphaned_child(event))
+               schedule_orphans_remove(ctx);
+
        perf_pmu_enable(event->pmu);
 }
 
        if (event->attr.exclusive)
                cpuctx->exclusive = 1;
 
+       if (is_orphaned_child(event))
+               schedule_orphans_remove(ctx);
+
 out:
        perf_pmu_enable(event->pmu);
 
        INIT_LIST_HEAD(&ctx->flexible_groups);
        INIT_LIST_HEAD(&ctx->event_list);
        atomic_set(&ctx->refcount, 1);
+       INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
 }
 
 static struct perf_event_context *
        return 0;
 }
 
+/*
+ * Remove all orphanes events from the context.
+ */
+static void orphans_remove_work(struct work_struct *work)
+{
+       struct perf_event_context *ctx;
+       struct perf_event *event, *tmp;
+
+       ctx = container_of(work, struct perf_event_context,
+                          orphans_remove.work);
+
+       mutex_lock(&ctx->mutex);
+       list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
+               struct perf_event *parent_event = event->parent;
+
+               if (!is_orphaned_child(event))
+                       continue;
+
+               perf_remove_from_context(event, true);
+
+               mutex_lock(&parent_event->child_mutex);
+               list_del_init(&event->child_list);
+               mutex_unlock(&parent_event->child_mutex);
+
+               free_event(event);
+               put_event(parent_event);
+       }
+
+       raw_spin_lock_irq(&ctx->lock);
+       ctx->orphans_remove_sched = false;
+       raw_spin_unlock_irq(&ctx->lock);
+       mutex_unlock(&ctx->mutex);
+
+       put_ctx(ctx);
+}
+
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
        struct perf_event *child;
        if (IS_ERR(child_event))
                return child_event;
 
-       if (!atomic_long_inc_not_zero(&parent_event->refcount)) {
+       if (is_orphaned_event(parent_event) ||
+           !atomic_long_inc_not_zero(&parent_event->refcount)) {
                free_event(child_event);
                return NULL;
        }