static inline void
 perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
-                      u64 count, struct pt_regs *regs, void *head)
+                      u64 count, struct pt_regs *regs, void *head,
+                      struct task_struct *task)
 {
-       perf_tp_event(addr, count, raw_data, size, regs, head, rctx);
+       perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task);
 }
 #endif
 
 
 extern void perf_event_init(void);
 extern void perf_tp_event(u64 addr, u64 count, void *record,
                          int entry_size, struct pt_regs *regs,
-                         struct hlist_head *head, int rctx);
+                         struct hlist_head *head, int rctx,
+                         struct task_struct *task);
 extern void perf_bp_event(struct perf_event *event, void *data);
 
 #ifndef perf_misc_flags
 
                __entry->prio           = p->prio;
                __entry->success        = success;
                __entry->target_cpu     = task_cpu(p);
+       )
+       TP_perf_assign(
+               __perf_task(p);
        ),
 
        TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
        )
        TP_perf_assign(
                __perf_count(delay);
+               __perf_task(tsk);
        ),
 
        TP_printk("comm=%s pid=%d delay=%Lu [ns]",
 
 #undef __perf_count
 #define __perf_count(c) __count = (c)
 
+#undef __perf_task
+#define __perf_task(t) __task = (t)
+
 #undef TP_perf_assign
 #define TP_perf_assign(args...) args
 
        struct ftrace_raw_##call *entry;                                \
        struct pt_regs __regs;                                          \
        u64 __addr = 0, __count = 1;                                    \
+       struct task_struct *__task = NULL;                              \
        struct hlist_head *head;                                        \
        int __entry_size;                                               \
        int __data_size;                                                \
                                                                        \
        head = this_cpu_ptr(event_call->perf_events);                   \
        perf_trace_buf_submit(entry, __entry_size, rctx, __addr,        \
-               __count, &__regs, head);                                \
+               __count, &__regs, head, __task);                        \
 }
 
 /*
 
        put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
 }
 
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+struct perf_callchain_entry *
+perf_callchain(struct perf_event *event, struct pt_regs *regs)
 {
        int rctx;
        struct perf_callchain_entry *entry;
        }
 
        if (regs) {
+               /*
+                * Disallow cross-task user callchains.
+                */
+               if (event->ctx->task && event->ctx->task != current)
+                       goto exit_put;
+
                perf_callchain_store(entry, PERF_CONTEXT_USER);
                perf_callchain_user(entry, regs);
        }
 
        if (sample_type & PERF_SAMPLE_CALLCHAIN) {
                int size = 1;
 
-               data->callchain = perf_callchain(regs);
+               data->callchain = perf_callchain(event, regs);
 
                if (data->callchain)
                        size += data->callchain->nr;
 }
 
 void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
-                  struct pt_regs *regs, struct hlist_head *head, int rctx)
+                  struct pt_regs *regs, struct hlist_head *head, int rctx,
+                  struct task_struct *task)
 {
        struct perf_sample_data data;
        struct perf_event *event;
                        perf_swevent_event(event, count, &data, regs);
        }
 
+       /*
+        * If we got specified a target task, also iterate its context and
+        * deliver this event there too.
+        */
+       if (task && task != current) {
+               struct perf_event_context *ctx;
+               struct trace_entry *entry = record;
+
+               rcu_read_lock();
+               ctx = rcu_dereference(task->perf_event_ctxp[perf_sw_context]);
+               if (!ctx)
+                       goto unlock;
+
+               list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+                       if (event->attr.type != PERF_TYPE_TRACEPOINT)
+                               continue;
+                       if (event->attr.config != entry->type)
+                               continue;
+                       if (perf_tp_event_match(event, &data, regs))
+                               perf_swevent_event(event, count, &data, regs);
+               }
+unlock:
+               rcu_read_unlock();
+       }
+
        perf_swevent_put_recursion_context(rctx);
 }
 EXPORT_SYMBOL_GPL(perf_tp_event);
 
 }
 
 /* Callchain handling */
-extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
+extern struct perf_callchain_entry *
+perf_callchain(struct perf_event *event, struct pt_regs *regs);
 extern int get_callchain_buffers(void);
 extern void put_callchain_buffers(void);
 
 
 
        head = this_cpu_ptr(event_function.perf_events);
        perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
-                             1, ®s, head);
+                             1, ®s, head, NULL);
 
 #undef ENTRY_SIZE
 }
 
        store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
 
        head = this_cpu_ptr(call->perf_events);
-       perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
+       perf_trace_buf_submit(entry, size, rctx,
+                                       entry->ip, 1, regs, head, NULL);
 }
 
 /* Kretprobe profile handler */
        store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
 
        head = this_cpu_ptr(call->perf_events);
-       perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
+       perf_trace_buf_submit(entry, size, rctx,
+                                       entry->ret_ip, 1, regs, head, NULL);
 }
 #endif /* CONFIG_PERF_EVENTS */
 
 
                               (unsigned long *)&rec->args);
 
        head = this_cpu_ptr(sys_data->enter_event->perf_events);
-       perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
+       perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
 }
 
 int perf_sysenter_enable(struct ftrace_event_call *call)
        rec->ret = syscall_get_return_value(current, regs);
 
        head = this_cpu_ptr(sys_data->exit_event->perf_events);
-       perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
+       perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
 }
 
 int perf_sysexit_enable(struct ftrace_event_call *call)
 
                call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
 
        head = this_cpu_ptr(call->perf_events);
-       perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
+       perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL);
 
  out:
        preempt_enable();