extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
                                     char *filter_str);
 extern void ftrace_profile_free_filter(struct perf_event *event);
-extern void *
-perf_trace_buf_prepare(int size, unsigned short type, int *rctxp,
-                        unsigned long *irq_flags);
+extern void *perf_trace_buf_prepare(int size, unsigned short type,
+                                   struct pt_regs *regs, int *rctxp);
 
 static inline void
 perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
-                      u64 count, unsigned long irq_flags, struct pt_regs *regs,
-                      void *event)
+                      u64 count, struct pt_regs *regs, void *event)
 {
        struct trace_entry *entry = raw_data;
 
        perf_tp_event(entry->type, addr, count, raw_data, size, regs, event);
        perf_swevent_put_recursion_context(rctx);
-       local_irq_restore(irq_flags);
 }
 #endif
 
 
        struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
        struct ftrace_raw_##call *entry;                                \
        u64 __addr = 0, __count = 1;                                    \
-       unsigned long irq_flags;                                        \
        int __entry_size;                                               \
        int __data_size;                                                \
        int rctx;                                                       \
        if (WARN_ONCE(__entry_size > PERF_MAX_TRACE_SIZE,               \
                      "profile buffer not large enough"))               \
                return;                                                 \
+                                                                       \
        entry = (struct ftrace_raw_##call *)perf_trace_buf_prepare(     \
-               __entry_size, event_call->id, &rctx, &irq_flags);       \
+               __entry_size, event_call->id, __regs, &rctx);           \
        if (!entry)                                                     \
                return;                                                 \
+                                                                       \
        tstruct                                                         \
                                                                        \
        { assign; }                                                     \
                                                                        \
        perf_trace_buf_submit(entry, __entry_size, rctx, __addr,        \
-                              __count, irq_flags, __regs,              \
-                             event_call->perf_data);                   \
+                              __count, __regs, event_call->perf_data); \
 }
 
 #undef DEFINE_EVENT
 static notrace void perf_trace_##call(proto)                           \
 {                                                                      \
        struct ftrace_event_call *event_call = &event_##call;           \
-       struct pt_regs *__regs = &get_cpu_var(perf_trace_regs);         \
-                                                                       \
-       perf_fetch_caller_regs(__regs, 1);                              \
-                                                                       \
-       perf_trace_templ_##template(event_call, __regs, args);          \
+       struct pt_regs __regs;                                          \
                                                                        \
-       put_cpu_var(perf_trace_regs);                                   \
+       perf_fetch_caller_regs(&__regs, 1);                             \
+       perf_trace_templ_##template(event_call, &__regs, args);         \
 }
 
 #undef DEFINE_EVENT_PRINT
 
 #include <linux/kprobes.h>
 #include "trace.h"
 
-DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
-EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs);
-
 EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
 
-static char *perf_trace_buf;
-static char *perf_trace_buf_nmi;
+static char *perf_trace_buf[4];
 
 /*
  * Force it to be aligned to unsigned long to avoid misaligned accesses
 
 static int perf_trace_event_enable(struct ftrace_event_call *event, void *data)
 {
-       char *buf;
        int ret = -ENOMEM;
 
        if (event->perf_refcount++ > 0) {
        }
 
        if (!total_ref_count) {
-               buf = (char *)alloc_percpu(perf_trace_t);
-               if (!buf)
-                       goto fail_buf;
-
-               rcu_assign_pointer(perf_trace_buf, buf);
+               char *buf;
+               int i;
 
-               buf = (char *)alloc_percpu(perf_trace_t);
-               if (!buf)
-                       goto fail_buf_nmi;
+               for (i = 0; i < 4; i++) {
+                       buf = (char *)alloc_percpu(perf_trace_t);
+                       if (!buf)
+                               goto fail_buf;
 
-               rcu_assign_pointer(perf_trace_buf_nmi, buf);
+                       rcu_assign_pointer(perf_trace_buf[i], buf);
+               }
        }
 
        ret = event->perf_event_enable(event);
                return 0;
        }
 
-fail_buf_nmi:
+fail_buf:
        if (!total_ref_count) {
-               free_percpu(perf_trace_buf_nmi);
-               free_percpu(perf_trace_buf);
-               perf_trace_buf_nmi = NULL;
-               perf_trace_buf = NULL;
+               int i;
+
+               for (i = 0; i < 4; i++) {
+                       free_percpu(perf_trace_buf[i]);
+                       perf_trace_buf[i] = NULL;
+               }
        }
-fail_buf:
        event->perf_refcount--;
 
        return ret;
 
 static void perf_trace_event_disable(struct ftrace_event_call *event)
 {
-       char *buf, *nmi_buf;
-
        if (--event->perf_refcount > 0)
                return;
 
        event->perf_event_disable(event);
 
        if (!--total_ref_count) {
-               buf = perf_trace_buf;
-               rcu_assign_pointer(perf_trace_buf, NULL);
+               char *buf[4];
+               int i;
 
-               nmi_buf = perf_trace_buf_nmi;
-               rcu_assign_pointer(perf_trace_buf_nmi, NULL);
+               for (i = 0; i < 4; i++) {
+                       buf[i] = perf_trace_buf[i];
+                       rcu_assign_pointer(perf_trace_buf[i], NULL);
+               }
 
                /*
                 * Ensure every events in profiling have finished before
                 */
                synchronize_sched();
 
-               free_percpu(buf);
-               free_percpu(nmi_buf);
+               for (i = 0; i < 4; i++)
+                       free_percpu(buf[i]);
        }
 }
 
 }
 
 __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
-                                      int *rctxp, unsigned long *irq_flags)
+                                      struct pt_regs *regs, int *rctxp)
 {
        struct trace_entry *entry;
        char *trace_buf, *raw_data;
-       int pc, cpu;
+       int pc;
 
        BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
 
        pc = preempt_count();
 
-       /* Protect the per cpu buffer, begin the rcu read side */
-       local_irq_save(*irq_flags);
-
        *rctxp = perf_swevent_get_recursion_context();
        if (*rctxp < 0)
                goto err_recursion;
 
-       cpu = smp_processor_id();
-
-       if (in_nmi())
-               trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
-       else
-               trace_buf = rcu_dereference_sched(perf_trace_buf);
-
+       trace_buf = rcu_dereference_sched(perf_trace_buf[*rctxp]);
        if (!trace_buf)
                goto err;
 
-       raw_data = per_cpu_ptr(trace_buf, cpu);
+       raw_data = per_cpu_ptr(trace_buf, smp_processor_id());
 
        /* zero the dead bytes from align to not leak stack to user */
        memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
 
        entry = (struct trace_entry *)raw_data;
-       tracing_generic_entry_update(entry, *irq_flags, pc);
+       tracing_generic_entry_update(entry, regs->flags, pc);
        entry->type = type;
 
        return raw_data;
 err:
        perf_swevent_put_recursion_context(*rctxp);
 err_recursion:
-       local_irq_restore(*irq_flags);
        return NULL;
 }
 EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
 
        struct kprobe_trace_entry_head *entry;
        u8 *data;
        int size, __size, i;
-       unsigned long irq_flags;
        int rctx;
 
        __size = sizeof(*entry) + tp->size;
                     "profile buffer not large enough"))
                return;
 
-       entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
+       entry = perf_trace_buf_prepare(size, call->id, regs, &rctx);
        if (!entry)
                return;
 
        for (i = 0; i < tp->nr_args; i++)
                call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
 
-       perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs, call->perf_data);
+       perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, call->perf_data);
 }
 
 /* Kretprobe profile handler */
        struct kretprobe_trace_entry_head *entry;
        u8 *data;
        int size, __size, i;
-       unsigned long irq_flags;
        int rctx;
 
        __size = sizeof(*entry) + tp->size;
                     "profile buffer not large enough"))
                return;
 
-       entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
+       entry = perf_trace_buf_prepare(size, call->id, regs, &rctx);
        if (!entry)
                return;
 
                call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
 
        perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,
-                              irq_flags, regs, call->perf_data);
+                             regs, call->perf_data);
 }
 
 static int probe_perf_enable(struct ftrace_event_call *call)
 
 {
        struct syscall_metadata *sys_data;
        struct syscall_trace_enter *rec;
-       unsigned long flags;
        int syscall_nr;
        int rctx;
        int size;
                return;
 
        rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
-                               sys_data->enter_event->id, &rctx, &flags);
+                               sys_data->enter_event->id, regs, &rctx);
        if (!rec)
                return;
 
        rec->nr = syscall_nr;
        syscall_get_arguments(current, regs, 0, sys_data->nb_args,
                               (unsigned long *)&rec->args);
-       perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs,
+       perf_trace_buf_submit(rec, size, rctx, 0, 1, regs,
                        sys_data->enter_event->perf_data);
 }
 
 {
        struct syscall_metadata *sys_data;
        struct syscall_trace_exit *rec;
-       unsigned long flags;
        int syscall_nr;
        int rctx;
        int size;
                return;
 
        rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
-                               sys_data->exit_event->id, &rctx, &flags);
+                               sys_data->exit_event->id, regs, &rctx);
        if (!rec)
                return;
 
        rec->nr = syscall_nr;
        rec->ret = syscall_get_return_value(current, regs);
 
-       perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs,
+       perf_trace_buf_submit(rec, size, rctx, 0, 1, regs,
                        sys_data->exit_event->perf_data);
 }