]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
dtrace: restructuring for multi-arch support
authorKris Van Hees <kris.van.hees@oracle.com>
Mon, 22 Dec 2014 21:54:49 +0000 (16:54 -0500)
committerNick Alcock <nick.alcock@oracle.com>
Tue, 21 Jul 2015 14:29:47 +0000 (15:29 +0100)
1) The cpu_info member of the cpu_core structure is now architecture
   dependent through the use of type cpuinfo_arch_t.  Architectures must
   provide a typedef for this type in asm/dtrace_cpuinfo.h.

2) Wherever an instruction address is used, the type is now sdt_instr_t,
   which architectures must define in asm/dtrace_sdt.h.

3) System calls that use an assembly language stub must be defined by
   architectures in asm/dtrace_syscall.h using the following syntax:

DTRACE_SYSCALL_STUB(all-caps-name, name)

   E.g.

DTRACE_SYSCALL_STUB(CLONE, clone)

4) The x86 architecture uses an InvalidOpcode TRAP as probe firing
   mechanism for SDT.  This is machinery that is entirely architecture
   specific (not just in imolementation) and the function prototypes for
   those functions have been moved to asm/dtrace_util.h.  The actual SDT
   implementation (which aside from the core framework is always going
   to be architecture specific) has been moved into the x86 specific
   part of the kernel tree.  The core framework has been split off into
   is own dtrace_sdt_core.c.

5) The system call tracing implementation (and assembly language stubs)
   are very architecture specific and have been moved into the x86
   specific part of the kernel tree.  The sys_call_ptr_t type has been
   discontinued because dt_sys_call_t serves the same purpose already.
   The syscall tracing specific part of dtrace_os.h has been split out
   into its own dtrace_syscall.h.

6) Aside from any architecture-specific support functions (that are not
   accessed from arch-independent code), an arch-specific dtrace_util.c
   must also implement the following functions:

void dtrace_skip_instruction(struct pt_regs *regs)

Adjust the CPU registers such that upon restoring of the
register set, execution commences with the instruction
AFTER the one currently referenced.

int dtrace_die_notifier(struct notifier_block *nb,
unsigned long val, void *args)

Handle faults that could cause a panic in cases of
invalid memory accesses, and very whether DTrace has
requested that they be rendered non-fatal.  This may
also be used for other arch-specific things like the
InvalidOpcode TRAP on x86.

7) Since the linux/percpu.h include file occurs in different include
   chains dependent on the architecture, linux/dtrace_cpu_defines.h now
   includes it explicitly (as it should have done from the beginning)
   because it does depend on per_cpu() and friends.

8) The use of uprobes structures in linux/dtrace_os.h was conditional on
   a specific architecture (x86_64).  This has been changed to now be
   conditional on whether the fasttrap provider is being built.  Fasttrap
   itself depends on uprobes through the kconfig mechanism.

9) The DTrace facility has been a bool option in Kconfig for a while now,
   although some code still refers to the possibilty CONFIG_DTRACE_MODULE
   may be defined.  That has been changed to always check CONFIG_DTRACE.

10) Some architectures truly need strnlen_user() to be used when there
   are userspace addresses involved.

11) While on x86 we were using the dump stack facility to get stacktrace
   information, this is not available on other architectures.  We're now
   using the save_stack_trace().  Sadly, just for x86 we need to adjust
   the stack depth value by -1 because the x86 implementation adds a
   totally unnecessary sentinel value to the end of the stacktrace (and
   includes it in the nr_entries count).

Orabug: 20262965

Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
Acked-by: Nick Alcock <nick.alcock@oracle.com>
20 files changed:
arch/x86/include/asm/dtrace_cpuinfo.h [new file with mode: 0644]
arch/x86/include/asm/dtrace_sdt.h [new file with mode: 0644]
arch/x86/include/asm/dtrace_syscall.h [new file with mode: 0644]
arch/x86/include/asm/dtrace_util.h [new file with mode: 0644]
arch/x86/kernel/dtrace_sdt.c [new file with mode: 0644]
arch/x86/kernel/dtrace_syscall.c [new file with mode: 0644]
arch/x86/kernel/dtrace_syscall_stubs.S [moved from kernel/dtrace/dtrace_stubs_x86_64.S with 95% similarity]
arch/x86/kernel/dtrace_util.c [new file with mode: 0644]
include/linux/dtrace_cpu.h
include/linux/dtrace_cpu_defines.h
include/linux/dtrace_os.h
include/linux/dtrace_syscall.h [new file with mode: 0644]
include/linux/sdt.h
kernel/dtrace/Kconfig
kernel/dtrace/Makefile
kernel/dtrace/dtrace_cpu.c
kernel/dtrace/dtrace_os.c
kernel/dtrace/dtrace_sdt.c [deleted file]
kernel/dtrace/dtrace_sdt.h [new file with mode: 0644]
kernel/dtrace/dtrace_sdt_core.c [new file with mode: 0644]

diff --git a/arch/x86/include/asm/dtrace_cpuinfo.h b/arch/x86/include/asm/dtrace_cpuinfo.h
new file mode 100644 (file)
index 0000000..68ce09c
--- /dev/null
@@ -0,0 +1,12 @@
+/* Copyright (C) 2013-2014 Oracle, Inc. */
+
+#ifndef _ASM_X86_DTRACE_CPUINFO_H_
+#define _ASM_X86_DTRACE_CPUINFO_H_
+
+#include <asm/processor.h>
+
+typedef struct cpuinfo_x86     cpuinfo_arch_t;
+
+#define dtrace_cpuinfo_chip(ci)        ((ci)->phys_proc_id)
+
+#endif /* _ASM_X86_DTRACE_CPUINFO_H_ */
diff --git a/arch/x86/include/asm/dtrace_sdt.h b/arch/x86/include/asm/dtrace_sdt.h
new file mode 100644 (file)
index 0000000..96c7fa2
--- /dev/null
@@ -0,0 +1,8 @@
+/* Copyright (C) 2013-2014 Oracle, Inc. */
+
+#ifndef _X86_DTRACE_SDT_H
+#define _X86_DTRACE_SDT_H
+
+typedef uint8_t                sdt_instr_t;
+
+#endif /* _X86_DTRACE_SDT_H */
diff --git a/arch/x86/include/asm/dtrace_syscall.h b/arch/x86/include/asm/dtrace_syscall.h
new file mode 100644 (file)
index 0000000..1a8fe47
--- /dev/null
@@ -0,0 +1,8 @@
+/* Copyright (C) 2011-2014 Oracle, Inc. */
+
+DTRACE_SYSCALL_STUB(CLONE,             clone)
+DTRACE_SYSCALL_STUB(FORK,              fork)
+DTRACE_SYSCALL_STUB(VFORK,             vfork)
+DTRACE_SYSCALL_STUB(IOPL,              iopl)
+DTRACE_SYSCALL_STUB(EXECVE,            execve)
+DTRACE_SYSCALL_STUB(RT_SIGRETURN,      rt_sigreturn)
diff --git a/arch/x86/include/asm/dtrace_util.h b/arch/x86/include/asm/dtrace_util.h
new file mode 100644 (file)
index 0000000..a26840d
--- /dev/null
@@ -0,0 +1,14 @@
+/* Copyright (C) 2013-2014 Oracle, Inc. */
+
+#ifndef _X86_DTRACE_UTIL_H
+#define _X86_DTRACE_UTIL_H
+
+#include <asm/ptrace.h>
+
+extern int dtrace_invop_add(uint8_t (*func)(struct pt_regs *));
+extern void dtrace_invop_remove(uint8_t (*func)(struct pt_regs *));
+
+extern void dtrace_invop_enable(uint8_t *);
+extern void dtrace_invop_disable(uint8_t *, uint8_t);
+
+#endif /* _X86_DTRACE_UTIL_H */
diff --git a/arch/x86/kernel/dtrace_sdt.c b/arch/x86/kernel/dtrace_sdt.c
new file mode 100644 (file)
index 0000000..86ed00b
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * FILE:        dtrace_sdt.c
+ * DESCRIPTION: Dynamic Tracing: SDT registration code (arch-specific)
+ *
+ * Copyright (C) 2010-2014 Oracle Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/memory.h>
+#include <linux/module.h>
+#include <linux/dtrace_os.h>
+#include <linux/sdt.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/nmi.h>
+#include <asm/nops.h>
+#include <asm/dtrace_sdt.h>
+
+#define        SDT_NOP_SIZE    5
+
+uint8_t                        nops[SDT_NOP_SIZE];
+
+/* This code is based on apply_alternatives and text_poke_early.  It needs to
+ * run before SMP is initialized in order to avoid SMP problems with patching
+ * code that might be accessed on another CPU.
+ */
+void __init_or_module dtrace_sdt_nop_multi(sdt_instr_t **addrs, int cnt)
+{
+       int                     i;
+       sdt_instr_t             *addr;
+       unsigned long           flags;
+
+       stop_nmi();
+       local_irq_save(flags);
+
+       for (i = 0; i < cnt; i++) {
+               addr = addrs[i];
+               memcpy(addr, nops, sizeof(nops));
+       }
+
+       sync_core();
+       local_irq_restore(flags);
+       restart_nmi();
+}
+
+void dtrace_sdt_init_arch(void)
+{
+       /*
+        * A little unusual, but potentially necessary.  While we could use a
+        * single NOP sequence of length SDT_NOP_SIZE, we need to consider the
+        * fact that when a SDT probe point is enabled, a single invalid opcode
+        * is written on the first byte of this NOP sequence.  By using a
+        * sequence of a 1-byte NOP, followed by a (SDT_NOP_SIZE - 1) byte NOP
+        * sequence, we play it pretty safe.
+        */
+       add_nops(nops, 1);
+       add_nops(nops + 1, SDT_NOP_SIZE - 1);
+}
diff --git a/arch/x86/kernel/dtrace_syscall.c b/arch/x86/kernel/dtrace_syscall.c
new file mode 100644 (file)
index 0000000..85c9c10
--- /dev/null
@@ -0,0 +1,286 @@
+/*
+ * FILE:       dtrace_syscall.c
+ * DESCRIPTION:        Dynamic Tracing: system call tracing support (arch-specific)
+ *
+ * Copyright (C) 2010-2014 Oracle Corporation
+ */
+
+#include <linux/dtrace_cpu.h>
+#include <linux/dtrace_os.h>
+#include <linux/dtrace_syscall.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <asm/insn.h>
+#include <asm/stacktrace.h>
+#include <asm/syscalls.h>
+
+/*---------------------------------------------------------------------------*\
+(* SYSTEM CALL TRACING SUPPORT                                               *)
+\*---------------------------------------------------------------------------*/
+void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+                      uintptr_t, uintptr_t);
+
+void systrace_stub(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
+                  uintptr_t arg2, uintptr_t arg3, uintptr_t arg4,
+                  uintptr_t arg5)
+{
+}
+
+asmlinkage long systrace_syscall(uintptr_t, uintptr_t,
+                                uintptr_t, uintptr_t,
+                                uintptr_t, uintptr_t);
+asmlinkage long dtrace_stub_clone(uintptr_t, uintptr_t,
+                                uintptr_t, uintptr_t,
+                                uintptr_t, uintptr_t);
+asmlinkage long dtrace_stub_fork(uintptr_t, uintptr_t,
+                                uintptr_t, uintptr_t,
+                                uintptr_t, uintptr_t);
+asmlinkage long dtrace_stub_vfork(uintptr_t, uintptr_t,
+                                 uintptr_t, uintptr_t,
+                                 uintptr_t, uintptr_t);
+asmlinkage long dtrace_stub_iopl(uintptr_t, uintptr_t,
+                                uintptr_t, uintptr_t,
+                                uintptr_t, uintptr_t);
+asmlinkage long dtrace_stub_execve(uintptr_t, uintptr_t,
+                                  uintptr_t, uintptr_t,
+                                  uintptr_t, uintptr_t);
+asmlinkage long dtrace_stub_rt_sigreturn(uintptr_t, uintptr_t,
+                                        uintptr_t, uintptr_t,
+                                        uintptr_t, uintptr_t);
+
+static systrace_info_t systrace_info =
+               {
+                       &systrace_probe,
+                       systrace_stub,
+                       systrace_syscall,
+                       {
+                           [SCE_CLONE] dtrace_stub_clone,
+                           [SCE_FORK] dtrace_stub_fork,
+                           [SCE_VFORK] dtrace_stub_vfork,
+                           [SCE_IOPL] dtrace_stub_iopl,
+                           [SCE_EXECVE] dtrace_stub_execve,
+                           [SCE_RT_SIGRETURN] dtrace_stub_rt_sigreturn,
+                       },
+                       {
+#define __SYSCALL_64(nr, sym, compat)          [nr] { __stringify(sym), },
+#define __SYSCALL_COMMON(nr, sym, compat)      __SYSCALL_64(nr, sym, compat)
+#define __SYSCALL_X32(nt, sym, compat)
+#include <asm/syscalls_64.h>
+                       }
+               };
+
+
+long systrace_syscall(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2,
+                     uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
+{
+       long                    rc = 0;
+       unsigned long           sysnum;
+       dtrace_id_t             id;
+       dtrace_syscalls_t       *sc;
+
+       asm volatile("movq %%rax,%0" : "=m"(sysnum));
+
+       sc = &systrace_info.sysent[sysnum];
+
+       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
+               (*systrace_probe)(id, arg0, arg1, arg2, arg3, arg4, arg5);
+
+       /*
+        * FIXME: Add stop functionality for DTrace.
+        */
+
+       if (sc->stsy_underlying != NULL)
+               rc = (*sc->stsy_underlying)(arg0, arg1, arg2, arg3, arg4,
+                                           arg5);
+
+       if ((id = sc->stsy_return) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
+                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+
+       return rc;
+}
+
+systrace_info_t *dtrace_syscalls_init() {
+       int                     i;
+
+       for (i = 0; i < NR_syscalls; i++) {
+               systrace_info.sysent[i].stsy_tblent =
+                                       (dt_sys_call_t *)&sys_call_table[i];
+               systrace_info.sysent[i].stsy_underlying =
+                                       (dt_sys_call_t)sys_call_table[i];
+       }
+
+       return &systrace_info;
+}
+EXPORT_SYMBOL(dtrace_syscalls_init);
+
+long dtrace_sys_clone(unsigned long clone_flags, unsigned long newsp,
+                     int __user *parent_tidptr, int __user *child_tidptr,
+                 int tls_val)
+{
+       long                    rc = 0;
+       dtrace_id_t             id;
+       dtrace_syscalls_t       *sc;
+
+       sc = &systrace_info.sysent[__NR_clone];
+
+       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
+               (*systrace_probe)(id, clone_flags, newsp,
+                                 (uintptr_t)parent_tidptr,
+                                 (uintptr_t)child_tidptr, tls_val, 0);
+
+       /*
+        * FIXME: Add stop functionality for DTrace.
+        */
+
+       rc = do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
+
+       if ((id = sc->stsy_return) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
+                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+
+       return rc;
+}
+
+long dtrace_sys_fork(void)
+{
+       long                    rc = 0;
+       dtrace_id_t             id;
+       dtrace_syscalls_t       *sc;
+
+       sc = &systrace_info.sysent[__NR_fork];
+
+       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
+               (*systrace_probe)(id, 0, 0, 0, 0, 0, 0);
+
+       /*
+        * FIXME: Add stop functionality for DTrace.
+        */
+
+       rc = do_fork(SIGCHLD, 0, 0, NULL, NULL);
+
+       if ((id = sc->stsy_return) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
+                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+
+       return rc;
+}
+
+long dtrace_sys_vfork(void)
+{
+       long                    rc = 0;
+       dtrace_id_t             id;
+       dtrace_syscalls_t       *sc;
+
+       sc = &systrace_info.sysent[__NR_vfork];
+
+       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
+               (*systrace_probe)(id, 0, 0, 0, 0, 0, 0);
+
+       /*
+        * FIXME: Add stop functionality for DTrace.
+        */
+
+       rc = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, 0, NULL, NULL);
+
+       if ((id = sc->stsy_return) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
+                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+
+       return rc;
+}
+
+long dtrace_sys_execve(const char __user *name,
+                      const char __user *const __user *argv,
+                      const char __user *const __user *envp)
+{
+       long                    rc = 0;
+       dtrace_id_t             id;
+       dtrace_syscalls_t       *sc;
+
+       sc = &systrace_info.sysent[__NR_execve];
+
+       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)name, (uintptr_t)argv,
+                                 (uintptr_t)envp, 0, 0, 0);
+
+       /*
+        * FIXME: Add stop functionality for DTrace.
+        */
+
+       rc = do_execve(getname(name), argv, envp);
+
+       if ((id = sc->stsy_return) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
+                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+
+       return rc;
+}
+
+long dtrace_sys_iopl(unsigned int level, struct pt_regs *regs)
+{
+       long                    rc = 0;
+       dtrace_id_t             id;
+       dtrace_syscalls_t       *sc;
+       unsigned int            old = (regs->flags >> 12) & 3;
+       struct thread_struct    *t = &current->thread;
+
+       sc = &systrace_info.sysent[__NR_iopl];
+
+       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)level, (uintptr_t)regs,
+                                 0, 0, 0, 0);
+
+       /*
+        * FIXME: Add stop functionality for DTrace.
+        */
+
+       if (level > 3) {
+               rc = -EINVAL;
+               goto out;
+       }
+
+       /* Trying to gain more privileges? */
+       if (level > old) {
+               if (!capable(CAP_SYS_RAWIO)) {
+                       rc = -EPERM;
+                       goto out;
+               }
+       }
+
+       regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12);
+       t->iopl = level << 12;
+       set_iopl_mask(t->iopl);
+
+out:
+       if ((id = sc->stsy_return) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
+                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+
+       return rc;
+}
+
+long dtrace_sys_rt_sigreturn(struct pt_regs *regs)
+{
+       long                    rc = 0;
+       dtrace_id_t             id;
+       dtrace_syscalls_t       *sc;
+
+       sc = &systrace_info.sysent[__NR_rt_sigreturn];
+
+       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)regs, 0, 0, 0, 0, 0);
+
+       /*
+        * FIXME: Add stop functionality for DTrace.
+        */
+
+       rc = sys_rt_sigreturn(regs);
+
+       if ((id = sc->stsy_return) != DTRACE_IDNONE)
+               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
+                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+
+       return rc;
+}
similarity index 95%
rename from kernel/dtrace/dtrace_stubs_x86_64.S
rename to arch/x86/kernel/dtrace_syscall_stubs.S
index 4efccdf5054b13da72c49704a60391a82498c633..b10a6a97847ad983cae52fdc458defcbe92f6427 100644 (file)
@@ -1,8 +1,8 @@
 /*
  * FILE:        dtrace_stubs_x86_64.S
- * DESCRIPTION: Dynamic Tracing: x86_64 specific stubs (based on entry_64.S)
+ * DESCRIPTION: Dynamic Tracing: Syscall tracing stubs (arch-specific)
  *
- * Copyright (C) 2010, 2011 Oracle Corporation
+ * Copyright (C) 2010-2014 Oracle Corporation
  */
 
 #include <linux/linkage.h>
@@ -167,7 +167,7 @@ ENTRY(dtrace_stub_\func)
        pushq   %r11                    /* put it back on stack */
        FIXUP_TOP_OF_STACK %r11, 8
        DEFAULT_FRAME 0 8               /* offset 8: return address */
-       call dtrace_\func
+       call dtrace_sys_\func
        RESTORE_TOP_OF_STACK %r11, 8
        ret $REST_SKIP                  /* pop extended registers */
        CFI_ENDPROC
@@ -177,7 +177,7 @@ END(dtrace_stub_\func)
         FORK_LIKE  clone
         FORK_LIKE  fork
         FORK_LIKE  vfork
-       PTREGSCALL dtrace_stub_iopl, dtrace_iopl, %rsi
+       PTREGSCALL dtrace_stub_iopl, dtrace_sys_iopl, %rsi
 
 ENTRY(dtrace_stub_execve)
        CFI_STARTPROC
@@ -185,7 +185,7 @@ ENTRY(dtrace_stub_execve)
        PARTIAL_FRAME 0
        SAVE_REST
        FIXUP_TOP_OF_STACK %r11
-       call dtrace_execve
+       call dtrace_sys_execve
        RESTORE_TOP_OF_STACK %r11
        movq %rax,RAX(%rsp)
        RESTORE_REST
@@ -204,7 +204,7 @@ ENTRY(dtrace_stub_rt_sigreturn)
        SAVE_REST
        movq %rsp,%rdi
        FIXUP_TOP_OF_STACK %r11
-       call dtrace_rt_sigreturn
+       call dtrace_sys_rt_sigreturn
        movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
        RESTORE_REST
        jmp int_ret_from_sys_call
diff --git a/arch/x86/kernel/dtrace_util.c b/arch/x86/kernel/dtrace_util.c
new file mode 100644 (file)
index 0000000..926b59e
--- /dev/null
@@ -0,0 +1,179 @@
+/*
+ * FILE:       dtrace_util.c
+ * DESCRIPTION:        Dynamic Tracing: Architecture utility functions
+ *
+ * Copyright (C) 2010-2014 Oracle Corporation
+ */
+
+#include <linux/dtrace_cpu.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/slab.h>
+#include <asm/insn.h>
+#include <asm/ptrace.h>
+
+/*
+ * Move the instruction pointer forward to the next instruction, effectiely
+ * skipping the current one.
+ */
+void dtrace_skip_instruction(struct pt_regs *regs) {
+       struct insn             insn;
+
+       kernel_insn_init(&insn, (void *)regs->ip);
+       insn_get_length(&insn);
+
+       regs->ip += insn.length;
+}
+
+typedef struct dtrace_invop_hdlr {
+       uint8_t                         (*dtih_func)(struct pt_regs *);
+       struct dtrace_invop_hdlr        *dtih_next;
+} dtrace_invop_hdlr_t;
+
+static dtrace_invop_hdlr_t     *dtrace_invop_hdlrs;
+
+#define INVOP_TRAP_INSTR       0xf0
+
+/*
+ * Trap notification handler.
+ */
+int dtrace_die_notifier(struct notifier_block *nb, unsigned long val,
+                       void *args)
+{
+       struct die_args         *dargs = args;
+
+       switch (val) {
+       case DIE_PAGE_FAULT: {
+               unsigned long   addr = read_cr2();
+
+               if (!DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT))
+                       return NOTIFY_DONE;
+
+               DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
+               this_cpu_core->cpuc_dtrace_illval = addr;
+
+               dtrace_skip_instruction(dargs->regs);
+
+               return NOTIFY_OK | NOTIFY_STOP_MASK;
+       }
+       case DIE_GPF: {
+               struct insn     insn;
+
+               kernel_insn_init(&insn, (void *)dargs->regs->ip);
+               insn_get_length(&insn);
+
+               /*
+                * It would seem that the invalid opcode generated by the LOCK
+                * prefix (0xF0) used for SDT probe points may get delivered as
+                * a general protection failure on Xen.  We need to ignore them
+                * as general protection failures...
+                */
+               if (insn.length != 5 || insn.prefixes.bytes[0] != 0xf0 ||
+                   insn.opcode.bytes[0] != 0x90) {
+                       if (!DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT))
+                               return NOTIFY_DONE;
+
+                       DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
+
+                       dargs->regs->ip += insn.length;
+
+                       return NOTIFY_OK | NOTIFY_STOP_MASK;
+               }
+
+               /*
+                * ... and instead treat them as the SDT probe point traps that
+                * they are.
+                */
+               dargs->trapnr = 6;
+       }
+       case DIE_TRAP: {
+               dtrace_invop_hdlr_t     *hdlr;
+               int                     rval = 0;
+
+               if (dargs->trapnr != 6)
+                       return NOTIFY_DONE;
+
+               for (hdlr = dtrace_invop_hdlrs; hdlr != NULL;
+                    hdlr = hdlr->dtih_next) {
+                       if ((rval = hdlr->dtih_func(dargs->regs)) != 0)
+                               break;
+               }
+
+               if (rval != 0) {
+                       dtrace_skip_instruction(dargs->regs);
+
+                       return NOTIFY_OK | NOTIFY_STOP_MASK;
+               }
+       }
+       default:
+               return NOTIFY_DONE;
+       }
+}
+
+/*
+ * Add an INVOP trap handler.
+ */
+int dtrace_invop_add(uint8_t (*func)(struct pt_regs *))
+{
+       dtrace_invop_hdlr_t     *hdlr;
+
+       hdlr = kmalloc(sizeof(dtrace_invop_hdlr_t), GFP_KERNEL);
+       if (hdlr == NULL) {
+               pr_warn("Failed to add invop handler: out of memory\n");
+               return -ENOMEM;
+       }
+
+       hdlr->dtih_func = func;
+       hdlr->dtih_next = dtrace_invop_hdlrs;
+       dtrace_invop_hdlrs = hdlr;
+
+       return 0;
+}
+EXPORT_SYMBOL(dtrace_invop_add);
+
+/*
+ * Remove an INVOP trap handler.
+ */
+void dtrace_invop_remove(uint8_t (*func)(struct pt_regs *))
+{
+       dtrace_invop_hdlr_t     *hdlr = dtrace_invop_hdlrs, *prev = NULL;
+
+       for (;;) {
+               if (hdlr == NULL)
+                       return;
+
+               if (hdlr->dtih_func == func)
+                       break;
+
+               prev = hdlr;
+               hdlr = hdlr->dtih_next;
+       }
+
+       if (prev == NULL) {
+               dtrace_invop_hdlrs = hdlr->dtih_next;
+       } else
+               prev->dtih_next = hdlr->dtih_next;
+
+       kfree(hdlr);
+}
+EXPORT_SYMBOL(dtrace_invop_remove);
+
+/*
+ * Enable an INVOP-based probe, i.e. ensure that an INVOP trap is triggered at
+ * the specified address.
+ */
+void dtrace_invop_enable(uint8_t *addr)
+{
+       text_poke(addr, ((unsigned char []){INVOP_TRAP_INSTR}), 1);
+}
+EXPORT_SYMBOL(dtrace_invop_enable);
+
+/*
+ * Disable an INVOP-based probe.
+ */
+void dtrace_invop_disable(uint8_t *addr, uint8_t opcode)
+{
+       text_poke(addr, ((unsigned char []){opcode}), 1);
+}
+EXPORT_SYMBOL(dtrace_invop_disable);
index 4fa09946edf67cff18e332c629d3ebcc676aec31..d6aa57f08fc4f2832b8599129ab6ddf519813e08 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011, 2012, 2013 Oracle, Inc. */
+/* Copyright (C) 2011-2014 Oracle, Inc. */
 
 #ifndef _LINUX_DTRACE_CPU_H_
 #define _LINUX_DTRACE_CPU_H_
@@ -7,6 +7,7 @@
 #include <linux/mutex.h>
 #include <linux/rwlock.h>
 #include <linux/dtrace_cpu_defines.h>
+#include <asm/dtrace_cpuinfo.h>
 
 typedef struct cpu_core {
        uint16_t cpuc_dtrace_flags;
@@ -29,7 +30,7 @@ typedef struct cpuinfo {
        psetid_t cpu_pset;
        chipid_t cpu_chip;
        lgrp_id_t cpu_lgrp;
-       struct cpuinfo_x86 *cpu_info;
+       cpuinfo_arch_t *cpu_info;
 } cpuinfo_t;
 
 DECLARE_PER_CPU_SHARED_ALIGNED(cpuinfo_t, dtrace_cpu_info);
index 24c425af829ed506a17256b1590ce79c32418c51..ca2bfb05180d3e409fdb501106661336ca42bfe2 100644 (file)
@@ -1,8 +1,10 @@
-/* Copyright (C) 2011, 2012, 2013 Oracle, Inc. */
+/* Copyright (C) 2011-2014 Oracle, Inc. */
 
 #ifndef _LINUX_DTRACE_CPU_DEFINES_H_
 #define _LINUX_DTRACE_CPU_DEFINES_H_
 
+#include <linux/percpu.h>
+
 #define CPUC_SIZE      (sizeof (uint16_t) + sizeof(uint8_t) + \
                         sizeof(uintptr_t) + sizeof(struct mutex))
 #define CPUC_PADSIZE   (192 - CPUC_SIZE)
index 063051766b8c3ea19892e9f116556c1d04d34ec0..74fa48609c3616ceb7320624b794993420c739f0 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011, 2012, 2013 Oracle, Inc. */
+/* Copyright (C) 2011-2014 Oracle, Inc. */
 
 #ifndef _LINUX_DTRACE_OS_H_
 #define _LINUX_DTRACE_OS_H_
@@ -9,21 +9,18 @@ typedef uint32_t dtrace_id_t;
 
 #ifdef CONFIG_DTRACE
 
+#include <linux/ktime.h>
+#include <linux/notifier.h>
+#if defined(CONFIG_DT_FASTTRAP) || defined(CONFIG_DT_FASTTRAP_MODULE)
 #include <linux/uprobes.h>
+#endif
+#include <asm/dtrace_util.h>
+#include <asm/unistd.h>
 #include <asm/asm-offsets.h>
 #include <linux/dtrace_cpu.h>
 
 #define DTRACE_IDNONE 0
 
-#define SCE_CLONE              0
-#define SCE_FORK               1
-#define SCE_VFORK              2
-#define SCE_SIGALTSTACK                3
-#define SCE_IOPL               4
-#define SCE_EXECVE             5
-#define SCE_RT_SIGRETURN       6
-#define SCE_nr_stubs           7
-
 extern struct module   *dtrace_kmod;
 
 extern void dtrace_os_init(void);
@@ -46,37 +43,9 @@ extern void dtrace_vtime_enable(void);
 extern void dtrace_vtime_disable(void);
 extern void dtrace_vtime_switch(struct task_struct *, struct task_struct *);
 
-extern int dtrace_invop_add(uint8_t (*func)(struct pt_regs *));
-extern void dtrace_invop_remove(uint8_t (*func)(struct pt_regs *));
-
-extern void dtrace_invop_enable(uint8_t *);
-extern void dtrace_invop_disable(uint8_t *, uint8_t);
-
-typedef void (*sys_call_ptr_t)(void);
-typedef long (*dt_sys_call_t)(uintptr_t, uintptr_t, uintptr_t, uintptr_t,
-                             uintptr_t, uintptr_t);
-
-typedef struct dtrace_syscalls {
-       const char      *name;
-       dtrace_id_t     stsy_entry;
-       dtrace_id_t     stsy_return;
-       dt_sys_call_t   stsy_underlying;
-       sys_call_ptr_t  *stsy_tblent;
-} dtrace_syscalls_t;
-
-typedef void (*dtrace_systrace_probe_t)(dtrace_id_t, uintptr_t, uintptr_t,
-                                       uintptr_t, uintptr_t, uintptr_t,
-                                       uintptr_t);
+extern void dtrace_skip_instruction(struct pt_regs *);
 
-typedef struct systrace_info {
-       dtrace_systrace_probe_t *probep;
-       dtrace_systrace_probe_t stub;
-       dt_sys_call_t           syscall;
-       dt_sys_call_t           stubs[SCE_nr_stubs];
-       dtrace_syscalls_t       sysent[NR_syscalls];
-} systrace_info_t;
-
-extern systrace_info_t *dtrace_syscalls_init(void);
+extern int dtrace_die_notifier(struct notifier_block *, unsigned long, void *);
 
 #define STACKTRACE_KERNEL      0x01
 #define STACKTRACE_USER                0x02
@@ -104,27 +73,31 @@ static inline int dtrace_no_pf(struct pt_regs *regs)
        return 0;
 }
 
-extern struct task_struct *register_pid_provider(pid_t);
-extern void unregister_pid_provider(pid_t);
 extern void dtrace_task_reinit(struct task_struct *);
 extern void dtrace_task_init(struct task_struct *);
 extern void dtrace_task_fork(struct task_struct *, struct task_struct *);
 extern void dtrace_task_cleanup(struct task_struct *);
 
+extern void (*dtrace_helpers_cleanup)(struct task_struct *);
+extern void (*dtrace_fasttrap_probes_cleanup)(struct task_struct *);
+extern void (*dtrace_helpers_fork)(struct task_struct *,
+                                  struct task_struct *);
+
+#if defined(CONFIG_DT_FASTTRAP) || defined(CONFIG_DT_FASTTRAP_MODULE)
 typedef struct fasttrap_machtp {
        struct inode            *fmtp_ino;
        loff_t                  fmtp_off;
        struct uprobe_consumer  fmtp_cns;
 } fasttrap_machtp_t;
 
-extern void (*dtrace_helpers_cleanup)(struct task_struct *);
-extern void (*dtrace_fasttrap_probes_cleanup)(struct task_struct *);
-extern void (*dtrace_helpers_fork)(struct task_struct *,
-                                  struct task_struct *);
 extern int (*dtrace_tracepoint_hit)(fasttrap_machtp_t *, struct pt_regs *);
 
+extern struct task_struct *register_pid_provider(pid_t);
+extern void unregister_pid_provider(pid_t);
+
 extern int dtrace_tracepoint_enable(pid_t, uintptr_t, fasttrap_machtp_t *);
 extern int dtrace_tracepoint_disable(pid_t, fasttrap_machtp_t *);
+#endif /* CONFIG_DT_FASTTRAP || CONFIG_DT_FASTTRAP_MODULE */
 
 #else
 
diff --git a/include/linux/dtrace_syscall.h b/include/linux/dtrace_syscall.h
new file mode 100644 (file)
index 0000000..03dc8e6
--- /dev/null
@@ -0,0 +1,50 @@
+/* Copyright (C) 2011-2014 Oracle, Inc. */
+
+#ifndef _LINUX_DTRACE_SYSCALL_H_
+#define _LINUX_DTRACE_SYSCALL_H_
+
+#include <linux/types.h>
+#include <linux/types.h>
+#include <linux/dtrace_os.h>
+#include <asm/syscall.h>
+
+#define DTRACE_SYSCALL_STUB(t, n)      SCE_##t,
+enum dtrace_sce_id {
+        SCE_NONE = 0,
+#include <asm/dtrace_syscall.h>
+       SCE_nr_stubs
+};
+#undef DTRACE_SYSCALL_STUB
+
+#define DTRACE_SYSCALL_STUB(t, n) \
+       asmlinkage long dtrace_stub_##n(uintptr_t, uintptr_t, uintptr_t, \
+                                       uintptr_t, uintptr_t, uintptr_t);
+#include <asm/dtrace_syscall.h>
+#undef DTRACE_SYSCALL_STUB
+
+typedef long (*dt_sys_call_t)(uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+                             uintptr_t, uintptr_t);
+
+typedef struct dtrace_syscalls {
+       const char      *name;
+       dtrace_id_t     stsy_entry;
+       dtrace_id_t     stsy_return;
+       dt_sys_call_t   stsy_underlying;
+       dt_sys_call_t   *stsy_tblent;
+} dtrace_syscalls_t;
+
+typedef void (*dtrace_systrace_probe_t)(dtrace_id_t, uintptr_t, uintptr_t,
+                                       uintptr_t, uintptr_t, uintptr_t,
+                                       uintptr_t);
+
+typedef struct systrace_info {
+       dtrace_systrace_probe_t *probep;
+       dtrace_systrace_probe_t stub;
+       dt_sys_call_t           syscall;
+       dt_sys_call_t           stubs[SCE_nr_stubs];
+       dtrace_syscalls_t       sysent[NR_syscalls];
+} systrace_info_t;
+
+extern systrace_info_t *dtrace_syscalls_init(void);
+
+#endif /* _LINUX_DTRACE_SYSCALL_H_ */
index 57dcb93c4652dc694bd5a867a6ca4d248ddec7e7..a1f12e7a0c9d7717547b835f9e65fcbee837726b 100644 (file)
@@ -1,16 +1,16 @@
-/* Copyright (C) 2011, 2012, 2013 Oracle, Inc. */
+/* Copyright (C) 2011-2014 Oracle, Inc. */
 
-#ifndef _SDT_H_
-#define        _SDT_H_
+#ifndef _LINUX_SDT_H_
+#define        _LINUX_SDT_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(CONFIG_DTRACE) || defined(CONFIG_DTRACE_MODULE)
+#ifdef CONFIG_DTRACE
 
 #ifndef __KERNEL__
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #define        DTRACE_PROBE(provider, name) {                                  \
        extern void __dtrace_##provider##___##name(void);               \
        __dtrace_##provider##___##name();                               \
@@ -51,6 +51,10 @@ extern "C" {
            (unsigned long)arg4, (unsigned long)arg5);                  \
 }
 
+#ifdef __cplusplus
+}
+#endif
+
 #else /* __KERNEL__ */
 
 #define        DTRACE_PROBE(name)      {                                       \
@@ -118,35 +122,22 @@ extern "C" {
            (uintptr_t)(arg6), (uintptr_t)(arg7), (uintptr_t)(arg8));   \
 }
 
-/*
- * vmlinux dtrace_probe__ caller reloc info;
- * comes from vmlinux_info.S
- */
-typedef uint8_t        sdt_instr_t;
-
-extern unsigned long dtrace_sdt_nprobes __attribute__((weak));
-extern void *dtrace_sdt_probes __attribute__((weak));
-
-typedef struct dtrace_sdt_probeinfo {
-       unsigned long addr;
-       unsigned long name_len;
-       unsigned long func_len;
-       char name[0];
-} __aligned(sizeof(unsigned long)) dtrace_sdt_probeinfo_t;
-
-void dtrace_sdt_init(void);
-void dtrace_sdt_register(struct module *);
-void dtrace_sdt_exit(void);
+typedef struct sdt_probedesc {
+       char                    *sdpd_name;     /* probe name */
+       char                    *sdpd_func;     /* probe function */
+       unsigned long           sdpd_offset;    /* offset of call in text */
+       struct sdt_probedesc    *sdpd_next;     /* next static probe */
+} sdt_probedesc_t;
 
 #endif /* __KERNEL__ */
 
-#else /* DTRACE not enabled: */
+#else /* ! CONFIG_DTRACE */
 
-#define        DTRACE_PROBE(name)      do { } while (0)
+#define        DTRACE_PROBE(name)                              do { } while (0)
 #define        DTRACE_PROBE1(name, type1, arg1)                DTRACE_PROBE(name)
 #define        DTRACE_PROBE2(name, type1, arg1, type2, arg2)   DTRACE_PROBE(name)
 #define        DTRACE_PROBE3(name, type1, arg1, type2, arg2, type3, arg3)      \
-                       DTRACE_PROBE(name)
+                                                       DTRACE_PROBE(name)
 #define        DTRACE_PROBE4(name, type1, arg1, type2, arg2, type3, arg3,      \
                        type4, arg4)                    DTRACE_PROBE(name)
 #define        DTRACE_PROBE5(name, type1, arg1, type2, arg2, type3, arg3,      \
@@ -154,14 +145,14 @@ void dtrace_sdt_exit(void);
 #define        DTRACE_PROBE6(name, type1, arg1, type2, arg2, type3, arg3,      \
        type4, arg4, type5, arg5, type6, arg6)          DTRACE_PROBE(name)
 #define        DTRACE_PROBE7(name, type1, arg1, type2, arg2, type3, arg3,      \
-       type4, arg4, type5, arg5, type6, arg6, type7, arg7) DTRACE_PROBE(name)
+       type4, arg4, type5, arg5, type6, arg6, type7, arg7)             \
+                                                       DTRACE_PROBE(name)
 #define        DTRACE_PROBE8(name, type1, arg1, type2, arg2, type3, arg3,      \
        type4, arg4, type5, arg5, type6, arg6, type7, arg7, type8, arg8) \
-                       DTRACE_PROBE(name)
+                                                       DTRACE_PROBE(name)
 
 #endif /* CONFIG_DTRACE */
 
-
 #define        DTRACE_SCHED(name)                                              \
        DTRACE_PROBE(__sched_##name);
 
@@ -427,19 +418,4 @@ void dtrace_sdt_exit(void);
            type3, arg3, type4, arg4, type5, arg5, type6, arg6,         \
            type7, arg7, type8, arg8);
 
-extern const char *sdt_prefix;
-
-typedef struct sdt_probedesc {
-       char                    *sdpd_name;     /* probe name */
-       char                    *sdpd_func;     /* probe function */
-       unsigned long           sdpd_offset;    /* offset of call in text */
-       struct sdt_probedesc    *sdpd_next;     /* next static probe */
-} sdt_probedesc_t;
-
-/* extern void dtrace_register_builtins(void); */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SDT_H_ */
+#endif /* _LINUX_SDT_H_ */
index 1801451f077339e94618802e6255196f79c9e96c..e2c3f7bd44c4cbb236123a1d570cc4a29df20017 100644 (file)
@@ -6,7 +6,8 @@
 menuconfig DTRACE
        bool "DTrace (Dynamic Tracing) Support"
        default y
-       depends on X86_64 && !DEBUG_LOCK_ALLOC
+       depends on ARCH_SUPPORTS_DTRACE
+       depends on !DEBUG_LOCK_ALLOC
        select KALLSYMS
        select WAITFD
        select CTF if (!DT_DISABLE_CTF)
index 82ae56f41ec7e5faa5e11bdd637908c60ec4c2b1..15c51d4907f834b7ae7ec011fd0bd04e39ebd3a4 100644 (file)
@@ -4,7 +4,12 @@
 
 GCOV_PROFILE := y
 
+DT_CORE_ARCH_OBJS              = $(addprefix ../../arch/$(SRCARCH)/kernel/, \
+                                   dtrace_syscall.o dtrace_syscall_stubs.o \
+                                   dtrace_sdt.o dtrace_util.o)
+
 ifdef CONFIG_DT_CORE
 obj-y                          += cyclic.o dtrace_os.o dtrace_cpu.o \
-                                  dtrace_stubs_x86_64.o dtrace_sdt.o
+                                  dtrace_sdt_core.o \
+                                  $(DT_CORE_ARCH_OBJS)
 endif
index 7bc04e7ec91910bd3b8fbd9a46b26ee1baa0e76e..09a05c7d863024965c3c08c50629ee57cbdcd1ea 100644 (file)
@@ -7,7 +7,7 @@
 
 #include <linux/dtrace_cpu.h>
 #include <linux/module.h>
-#include <asm/percpu.h>
+#include <asm/dtrace_cpuinfo.h>
 
 DEFINE_PER_CPU_SHARED_ALIGNED(cpu_core_t, dtrace_cpu_core);
 EXPORT_PER_CPU_SYMBOL(dtrace_cpu_core);
@@ -20,15 +20,15 @@ void dtrace_cpu_init(void)
        int     cpu;
 
        for_each_present_cpu(cpu) {
-               struct cpuinfo_x86      *c = &cpu_data(cpu);
+               cpuinfo_arch_t          *ci = &cpu_data(cpu);
                cpuinfo_t               *cpui = per_cpu_info(cpu);
                cpu_core_t              *cpuc = per_cpu_core(cpu);
 
                cpui->cpu_id = cpu;
                cpui->cpu_pset = 0;
-               cpui->cpu_chip = c->phys_proc_id;
+               cpui->cpu_chip = dtrace_cpuinfo_chip(ci);
                cpui->cpu_lgrp = 0;
-               cpui->cpu_info = c;
+               cpui->cpu_info = ci;
 
                cpuc->cpuc_dtrace_flags = 0;
                cpuc->cpuc_dcpc_intr_state = 0;
index 3404da0936427f2ff9eae0172da154b72d2f2103..8b1c915fe262e6ca7f1e08c48c8c5b41ff4cf193 100644 (file)
@@ -2,7 +2,7 @@
  * FILE:       dtrace_os.c
  * DESCRIPTION:        Dynamic Tracing: OS support functions - part of kernel core
  *
- * Copyright (C) 2010, 2011, 2012, 2013 Oracle Corporation
+ * Copyright (C) 2010-2014 Oracle Corporation
  */
 
 #include <linux/binfmts.h>
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/stacktrace.h>
 #include <linux/vmalloc.h>
 #include <linux/kallsyms.h>
 #include <linux/workqueue.h>
 #include <linux/mm.h>
-#include <asm/insn.h>
-#include <asm/stacktrace.h>
-#include <asm/syscalls.h>
-
-#include <linux/uprobes.h>
 #include <asm/ptrace.h>
+#include "dtrace_sdt.h"
+
+#if defined(CONFIG_DT_FASTTRAP) || defined(CONFIG_DT_FASTTRAP_MODULE)
+# include <linux/uprobes.h>
+#endif /* CONFIG_DT_FASTTRAP || CONFIG_DT_FASTTRAP_MODULE */
 
 /*---------------------------------------------------------------------------*\
 (* OS SPECIFIC DTRACE SETUP                                                  *)
@@ -358,233 +359,48 @@ void dtrace_vtime_switch(struct task_struct *prev, struct task_struct *next)
        next->dtrace_start = now;
 }
 
-/*---------------------------------------------------------------------------*\
-(* STACK TRACES                                                              *)
-\*---------------------------------------------------------------------------*/
-static int dtrace_stacktrace_stack(void *data, char *name)
+void dtrace_stacktrace(stacktrace_state_t *st)
 {
-       stacktrace_state_t      *st = (stacktrace_state_t *)data;
-
-       /*
-        * We do not skip anything for non-user stack analysis.
-        */
-       if (!(st->flags & STACKTRACE_USER))
-               return 0;
-
-       if (name != NULL && strlen(name) > 3) {
-               /*
-                * Sadly, the dump stack code calls us with both <EOE> and EOI.
-                * Consistency would be much nicer.
-                */
-               if ((name[0] == '<' && name[1] == 'E' && name[2] == 'O') ||
-                   (name[0] == 'E' && name[2] == 'O'))
-                       st->flags &= ~STACKTRACE_SKIP;
-       }
+       struct stack_trace      trace;
+       int                     i;
 
-       return 0;
-}
+       trace.nr_entries = 0;
+       trace.max_entries = st->limit ? st->limit : 512;
+       trace.entries = (typeof(trace.entries))st->pcs;
+       trace.skip = st->depth;
 
-static void dtrace_stacktrace_address(void *data, unsigned long addr,
-                                      int reliable)
-{
-       stacktrace_state_t      *st = (stacktrace_state_t *)data;
+       if (st->pcs == NULL)
+               trace.entries = vmalloc(trace.max_entries *
+                                       sizeof(trace.entries[0]));
 
-       if (st->flags & STACKTRACE_SKIP)
-               return;
+       save_stack_trace(&trace);
 
-       if (reliable == 2) {
-               if (st->fps)
-                       st->fps[st->depth] = addr;
-       } else {
-               if (st->pcs != NULL) {
-                       if (st->depth < st->limit)
-                               st->pcs[st->depth++] = addr;
-               } else
-                       st->depth++;
-       }
+       /*
+        * For entirely unknown reasons, the save_stack_trace() implementation
+        * on x86_64 adds a ULONG_MAX entry after the last stack trace entry.
+        * This might be a sentinel value, but given that struct stack_trace
+        * already contains a nr_entries counter, this seems rather pointless.
+        * Alas, we need to add a special case for that...
+        */
+#ifdef CONFIG_X86_64
+       st->depth = trace.nr_entries - 1;
+#else
+       st->depth = trace.nr_entries;
+#endif
 
-       if (st->depth >= dtrace_ustackdepth_max) {
-               DTRACE_CPUFLAG_SET(CPU_DTRACE_BADSTACK);
-               this_cpu_core->cpuc_dtrace_illval = st->depth;
+       if (st->pcs == NULL)
+               vfree(trace.entries);
 
-               return;
+       if (st->fps != NULL) {
+               for (i = 0; i < st->limit; i++)
+                       st->fps[i] = 0;
        }
 }
-
-static inline int valid_sp(struct thread_info *tinfo, void *p,
-                          unsigned int size, void *end)
-{
-       void    *t = tinfo;
-
-       if (end) {
-               if (p < end && p >= (end - THREAD_SIZE))
-                       return 1;
-               else
-                       return 0;
-       }
-
-       return p > t && p < t + THREAD_SIZE - size;
-}
-
-struct frame {
-       struct frame    *fr_savfp;
-       unsigned long   fr_savpc;
-} __attribute__((packed));
-
-static unsigned long dtrace_stacktrace_walk_stack(
-                                       struct thread_info *tinfo,
-                                       unsigned long *stack,
-                                       unsigned long bp,
-                                       const struct stacktrace_ops *ops,
-                                       void *data, unsigned long *end,
-                                       int *graph)
-{
-       struct frame    *fr = (struct frame *)bp;
-       unsigned long   *pcp = &(fr->fr_savpc);
-
-       while (valid_sp(tinfo, pcp, sizeof(*pcp), end)) {
-               unsigned long   addr = *pcp;
-
-               fr = fr->fr_savfp;
-               ops->address(data, (unsigned long)fr, 2);
-               ops->address(data, addr, 1);
-               pcp = &(fr->fr_savpc);
-       }
-
-       return (unsigned long)fr;
-}
-
-static const struct stacktrace_ops     dtrace_stacktrace_ops = {
-       .stack          = dtrace_stacktrace_stack,
-       .address        = dtrace_stacktrace_address,
-       .walk_stack     = print_context_stack_bp
-};
-
-static const struct stacktrace_ops     dtrace_fpstacktrace_ops = {
-       .stack          = dtrace_stacktrace_stack,
-       .address        = dtrace_stacktrace_address,
-       .walk_stack     = dtrace_stacktrace_walk_stack
-};
-
-void dtrace_stacktrace(stacktrace_state_t *st)
-{
-       unsigned long   bp;
-       unsigned long   stack;
-
-       bp = stack_frame(current, NULL);
-
-       /*
-        * Generate a stacktrace in the buffer embedded in st, identical to the
-        * stacktrace that would be printed to the console if dump_stack() were
-        * called here.
-        *
-        * dump_stack()
-        * -> show_trace(NULL, NULL, &stack, bp)
-        *      where unsigned long bp = stack_frame(current, NULL);
-        *            unsigned long stack;
-        * -> show_trace_log_lvl(NULL, NULL, &stack, bp, "")
-        *      where unsigned long bp = stack_frame(current, NULL);
-        *            unsigned long stack;
-        * -> dump_trace(NULL, NULL, &stack, bp, &print_trace_ops, "");
-        *      where unsigned long bp = stack_frame(current, NULL);
-        *            unsigned long stack;
-        */
-       dump_trace(NULL, NULL, &stack, bp,
-                  st->fps != NULL ? &dtrace_fpstacktrace_ops
-                                  : &dtrace_stacktrace_ops, st);
-}
 EXPORT_SYMBOL(dtrace_stacktrace);
 
 /*---------------------------------------------------------------------------*\
 (* INVALID OPCODE AND PAGE FAULT HANDLING                                    *)
 \*---------------------------------------------------------------------------*/
-typedef struct dtrace_invop_hdlr {
-       uint8_t                         (*dtih_func)(struct pt_regs *);
-       struct dtrace_invop_hdlr        *dtih_next;
-} dtrace_invop_hdlr_t;
-
-static dtrace_invop_hdlr_t     *dtrace_invop_hdlrs;
-
-#define INVOP_TRAP_INSTR       0xf0
-
-static int dtrace_die_notifier(struct notifier_block *nb, unsigned long val,
-                              void *args)
-{
-       struct die_args         *dargs = args;
-       struct insn             insn;
-
-       switch (val) {
-       case DIE_PAGE_FAULT: {
-               unsigned long   addr = read_cr2();
-
-               if (!DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT))
-                       return NOTIFY_DONE;
-
-               DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
-               this_cpu_core->cpuc_dtrace_illval = addr;
-
-               kernel_insn_init(&insn, (void *)dargs->regs->ip);
-               insn_get_length(&insn);
-
-               dargs->regs->ip += insn.length;
-
-               return NOTIFY_OK | NOTIFY_STOP_MASK;
-       }
-       case DIE_GPF: {
-               kernel_insn_init(&insn, (void *)dargs->regs->ip);
-               insn_get_length(&insn);
-
-               /*
-                * It would seem that the invalid opcode generated by the LOCK
-                * prefix (0xF0) used for SDT probe points may get delivered as
-                * a general protection failure on Xen.  We need to ignore them
-                * as general protection failures...
-                */
-               if (insn.length != 5 || insn.prefixes.bytes[0] != 0xf0 ||
-                   insn.opcode.bytes[0] != 0x90) {
-                       if (!DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT))
-                               return NOTIFY_DONE;
-
-                       DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
-
-                       dargs->regs->ip += insn.length;
-
-                       return NOTIFY_OK | NOTIFY_STOP_MASK;
-               }
-
-               /*
-                * ... and instead treat them as the SDT probe point traps that
-                * they are.
-                */
-               dargs->trapnr = 6;
-       }
-       case DIE_TRAP: {
-               dtrace_invop_hdlr_t     *hdlr;
-               int                     rval = 0;
-
-               if (dargs->trapnr != 6)
-                       return NOTIFY_DONE;
-
-               for (hdlr = dtrace_invop_hdlrs; hdlr != NULL;
-                    hdlr = hdlr->dtih_next) {
-                       if ((rval = hdlr->dtih_func(dargs->regs)) != 0)
-                               break;
-               }
-
-               if (rval != 0) {
-                       kernel_insn_init(&insn, (void *)dargs->regs->ip);
-                       insn_get_length(&insn);
-
-                       dargs->regs->ip += insn.length;
-
-                       return NOTIFY_OK | NOTIFY_STOP_MASK;
-               }
-       }
-       default:
-               return NOTIFY_DONE;
-       }
-}
-
 static struct notifier_block   dtrace_die = {
        .notifier_call = dtrace_die_notifier,
 };
@@ -617,352 +433,69 @@ EXPORT_SYMBOL(dtrace_disable);
  */
 int dtrace_handle_no_pf(struct pt_regs *regs)
 {
-       struct insn insn;
-
        DTRACE_CPUFLAG_SET(CPU_DTRACE_PF_TRAPPED);
 
-       kernel_insn_init(&insn, (void *)regs->ip);
-       insn_get_length(&insn);
-
-       regs->ip += insn.length;
+       dtrace_skip_instruction(regs);
 
        return 1;
 }
 
-int dtrace_invop_add(uint8_t (*func)(struct pt_regs *))
-{
-       dtrace_invop_hdlr_t     *hdlr;
-
-       hdlr = kmalloc(sizeof(dtrace_invop_hdlr_t), GFP_KERNEL);
-       if (hdlr == NULL) {
-               pr_warn("Failed to add invop handler: out of memory\n");
-               return -ENOMEM;
-       }
-
-       hdlr->dtih_func = func;
-       hdlr->dtih_next = dtrace_invop_hdlrs;
-       dtrace_invop_hdlrs = hdlr;
-
-       return 0;
-}
-EXPORT_SYMBOL(dtrace_invop_add);
-
-void dtrace_invop_remove(uint8_t (*func)(struct pt_regs *))
-{
-       dtrace_invop_hdlr_t     *hdlr = dtrace_invop_hdlrs, *prev = NULL;
-
-       for (;;) {
-               if (hdlr == NULL)
-                       return;
-
-               if (hdlr->dtih_func == func)
-                       break;
-
-               prev = hdlr;
-               hdlr = hdlr->dtih_next;
-       }
-
-       if (prev == NULL) {
-               dtrace_invop_hdlrs = hdlr->dtih_next;
-       } else
-               prev->dtih_next = hdlr->dtih_next;
-
-       kfree(hdlr);
-}
-EXPORT_SYMBOL(dtrace_invop_remove);
-
-void dtrace_invop_enable(uint8_t *addr)
-{
-       text_poke(addr, ((unsigned char []){INVOP_TRAP_INSTR}), 1);
-}
-EXPORT_SYMBOL(dtrace_invop_enable);
-
-void dtrace_invop_disable(uint8_t *addr, uint8_t opcode)
-{
-       text_poke(addr, ((unsigned char []){opcode}), 1);
-}
-EXPORT_SYMBOL(dtrace_invop_disable);
-
 /*---------------------------------------------------------------------------*\
-(* SYSTEM CALL TRACING SUPPORT                                               *)
+(* USER SPACE TRACING (FASTTRAP) SUPPORT                                     *)
 \*---------------------------------------------------------------------------*/
-void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
-                      uintptr_t, uintptr_t);
-
-void systrace_stub(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
-                  uintptr_t arg2, uintptr_t arg3, uintptr_t arg4,
-                  uintptr_t arg5)
-{
-}
-
-asmlinkage long systrace_syscall(uintptr_t, uintptr_t,
-                                uintptr_t, uintptr_t,
-                                uintptr_t, uintptr_t);
-asmlinkage long dtrace_stub_clone(uintptr_t, uintptr_t,
-                                uintptr_t, uintptr_t,
-                                uintptr_t, uintptr_t);
-asmlinkage long dtrace_stub_fork(uintptr_t, uintptr_t,
-                                uintptr_t, uintptr_t,
-                                uintptr_t, uintptr_t);
-asmlinkage long dtrace_stub_vfork(uintptr_t, uintptr_t,
-                                 uintptr_t, uintptr_t,
-                                 uintptr_t, uintptr_t);
-asmlinkage long dtrace_stub_iopl(uintptr_t, uintptr_t,
-                                uintptr_t, uintptr_t,
-                                uintptr_t, uintptr_t);
-asmlinkage long dtrace_stub_execve(uintptr_t, uintptr_t,
-                                  uintptr_t, uintptr_t,
-                                  uintptr_t, uintptr_t);
-asmlinkage long dtrace_stub_rt_sigreturn(uintptr_t, uintptr_t,
-                                        uintptr_t, uintptr_t,
-                                        uintptr_t, uintptr_t);
-
-static systrace_info_t systrace_info =
-               {
-                       &systrace_probe,
-                       systrace_stub,
-                       systrace_syscall,
-                       {
-                           [SCE_CLONE] dtrace_stub_clone,
-                           [SCE_FORK] dtrace_stub_fork,
-                           [SCE_VFORK] dtrace_stub_vfork,
-                           [SCE_IOPL] dtrace_stub_iopl,
-                           [SCE_EXECVE] dtrace_stub_execve,
-                           [SCE_RT_SIGRETURN] dtrace_stub_rt_sigreturn,
-                       },
-                       {
-#define __SYSCALL_64(nr, sym, compat)          [nr] { __stringify(sym), },
-#define __SYSCALL_COMMON(nr, sym, compat)      __SYSCALL_64(nr, sym, compat)
-#define __SYSCALL_X32(nt, sym, compat)
-#include <asm/syscalls_64.h>
-                       }
-               };
-
-
-long systrace_syscall(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2,
-                     uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
-{
-       long                    rc = 0;
-       unsigned long           sysnum;
-       dtrace_id_t             id;
-       dtrace_syscalls_t       *sc;
-
-       asm volatile("movq %%rax,%0" : "=m"(sysnum));
-
-       sc = &systrace_info.sysent[sysnum];
-
-       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
-               (*systrace_probe)(id, arg0, arg1, arg2, arg3, arg4, arg5);
-
-       /*
-        * FIXME: Add stop functionality for DTrace.
-        */
-
-       if (sc->stsy_underlying != NULL)
-               rc = (*sc->stsy_underlying)(arg0, arg1, arg2, arg3, arg4,
-                                           arg5);
-
-       if ((id = sc->stsy_return) != DTRACE_IDNONE)
-               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
-                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
-
-       return rc;
-}
-
-systrace_info_t *dtrace_syscalls_init() {
-       int                     i;
-       extern sys_call_ptr_t   sys_call_table[NR_syscalls];
-
-       for (i = 0; i < NR_syscalls; i++) {
-               systrace_info.sysent[i].stsy_tblent = &sys_call_table[i];
-               systrace_info.sysent[i].stsy_underlying =
-                                       (dt_sys_call_t)sys_call_table[i];
-       }
-
-       return &systrace_info;
-}
-EXPORT_SYMBOL(dtrace_syscalls_init);
-
-long dtrace_clone(unsigned long clone_flags, unsigned long newsp,
-                 int __user *parent_tidptr, int __user *child_tidptr,
-                 int tls_val)
-{
-       long                    rc = 0;
-       dtrace_id_t             id;
-       dtrace_syscalls_t       *sc;
-
-       sc = &systrace_info.sysent[__NR_clone];
-
-       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
-               (*systrace_probe)(id, clone_flags, newsp,
-                                 (uintptr_t)parent_tidptr,
-                                 (uintptr_t)child_tidptr, tls_val, 0);
-
-       /*
-        * FIXME: Add stop functionality for DTrace.
-        */
-
-       rc = do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
-
-       if ((id = sc->stsy_return) != DTRACE_IDNONE)
-               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
-                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
-
-       return rc;
-}
+void (*dtrace_helpers_cleanup)(struct task_struct *);
+EXPORT_SYMBOL(dtrace_helpers_cleanup);
+void (*dtrace_fasttrap_probes_cleanup)(struct task_struct *);
+EXPORT_SYMBOL(dtrace_fasttrap_probes_cleanup);
+void (*dtrace_helpers_fork)(struct task_struct *, struct task_struct *);
+EXPORT_SYMBOL(dtrace_helpers_fork);
 
-long dtrace_fork(void)
+void dtrace_task_reinit(struct task_struct *tsk)
 {
-       long                    rc = 0;
-       dtrace_id_t             id;
-       dtrace_syscalls_t       *sc;
-
-       sc = &systrace_info.sysent[__NR_fork];
-
-       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
-               (*systrace_probe)(id, 0, 0, 0, 0, 0, 0);
-
-       /*
-        * FIXME: Add stop functionality for DTrace.
-        */
-
-       rc = do_fork(SIGCHLD, 0, 0, NULL, NULL);
-
-       if ((id = sc->stsy_return) != DTRACE_IDNONE)
-               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
-                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+       tsk->predcache = 0;
+       tsk->dtrace_stop = 0;
+       tsk->dtrace_sig = 0;
 
-       return rc;
+       tsk->dtrace_helpers = NULL;
+       tsk->dtrace_probes = 0;
+       tsk->dtrace_tp_count = 0;
 }
 
-long dtrace_vfork(void)
+void dtrace_task_init(struct task_struct *tsk)
 {
-       long                    rc = 0;
-       dtrace_id_t             id;
-       dtrace_syscalls_t       *sc;
-
-       sc = &systrace_info.sysent[__NR_vfork];
-
-       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
-               (*systrace_probe)(id, 0, 0, 0, 0, 0, 0);
-
-       /*
-        * FIXME: Add stop functionality for DTrace.
-        */
-
-       rc = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, 0, NULL, NULL);
-
-       if ((id = sc->stsy_return) != DTRACE_IDNONE)
-               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
-                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+       dtrace_task_reinit(tsk);
 
-       return rc;
+       tsk->dtrace_vtime = ktime_set(0, 0);
+       tsk->dtrace_start = ktime_set(0, 0);
 }
 
-long dtrace_execve(const char __user *name,
-                  const char __user *const __user *argv,
-                  const char __user *const __user *envp)
+void dtrace_task_fork(struct task_struct *tsk, struct task_struct *child)
 {
-       long                    rc = 0;
-       dtrace_id_t             id;
-       dtrace_syscalls_t       *sc;
-       struct filename         *path;
-
-       sc = &systrace_info.sysent[__NR_execve];
-
-       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
-               (*systrace_probe)(id, (uintptr_t)name, (uintptr_t)argv,
-                                 (uintptr_t)envp, 0, 0, 0);
-
-       /*
-        * FIXME: Add stop functionality for DTrace.
-        */
-
-       path = getname(name);
-       rc = PTR_ERR(path);
-       if (IS_ERR(path))
-               goto out;
-       rc = do_execve(path->name, argv, envp);
-       putname(path);
-
-out:
-       if ((id = sc->stsy_return) != DTRACE_IDNONE)
-               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
-                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
+       if (likely(dtrace_helpers_fork == NULL))
+               return;
 
-       return rc;
+       if (tsk->dtrace_helpers != NULL)
+               (*dtrace_helpers_fork)(tsk, child);
 }
 
-long dtrace_iopl(unsigned int level, struct pt_regs *regs)
+void dtrace_task_cleanup(struct task_struct *tsk)
 {
-       long                    rc = 0;
-       dtrace_id_t             id;
-       dtrace_syscalls_t       *sc;
-       unsigned int            old = (regs->flags >> 12) & 3;
-       struct thread_struct    *t = &current->thread;
-
-       sc = &systrace_info.sysent[__NR_iopl];
-
-       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
-               (*systrace_probe)(id, (uintptr_t)level, (uintptr_t)regs,
-                                 0, 0, 0, 0);
-
-       /*
-        * FIXME: Add stop functionality for DTrace.
-        */
+       if (likely(dtrace_helpers_cleanup == NULL))
+               return;
 
-       if (level > 3) {
-               rc = -EINVAL;
-               goto out;
-       }
+       if (tsk->dtrace_helpers != NULL)
+               (*dtrace_helpers_cleanup)(tsk);
 
-       /* Trying to gain more privileges? */
-       if (level > old) {
-               if (!capable(CAP_SYS_RAWIO)) {
-                       rc = -EPERM;
-                       goto out;
-               }
+       if (tsk->dtrace_probes) {
+               if (dtrace_fasttrap_probes_cleanup != NULL)
+                       (*dtrace_fasttrap_probes_cleanup)(tsk);
        }
-
-       regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12);
-       t->iopl = level << 12;
-       set_iopl_mask(t->iopl);
-
-out:
-       if ((id = sc->stsy_return) != DTRACE_IDNONE)
-               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
-                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
-
-       return rc;
 }
 
-long dtrace_rt_sigreturn(struct pt_regs *regs)
-{
-       long                    rc = 0;
-       dtrace_id_t             id;
-       dtrace_syscalls_t       *sc;
-
-       sc = &systrace_info.sysent[__NR_rt_sigreturn];
-
-       if ((id = sc->stsy_entry) != DTRACE_IDNONE)
-               (*systrace_probe)(id, (uintptr_t)regs, 0, 0, 0, 0, 0);
-
-       /*
-        * FIXME: Add stop functionality for DTrace.
-        */
-
-       rc = sys_rt_sigreturn(regs);
-
-       if ((id = sc->stsy_return) != DTRACE_IDNONE)
-               (*systrace_probe)(id, (uintptr_t)rc, (uintptr_t)rc,
-                                 (uintptr_t)((uint64_t)rc >> 32), 0, 0, 0);
-
-       return rc;
-}
+#if defined(CONFIG_DT_FASTTRAP) || defined(CONFIG_DT_FASTTRAP_MODULE)
+int (*dtrace_tracepoint_hit)(fasttrap_machtp_t *, struct pt_regs *);
+EXPORT_SYMBOL(dtrace_tracepoint_hit);
 
-/*---------------------------------------------------------------------------*\
-(* USER SPACE TRACING (FASTTRAP) SUPPORT                                     *)
-\*---------------------------------------------------------------------------*/
 struct task_struct *register_pid_provider(pid_t pid)
 {
        struct task_struct      *p;
@@ -1029,57 +562,6 @@ void unregister_pid_provider(pid_t pid)
 }
 EXPORT_SYMBOL(unregister_pid_provider);
 
-void (*dtrace_helpers_cleanup)(struct task_struct *);
-EXPORT_SYMBOL(dtrace_helpers_cleanup);
-void (*dtrace_fasttrap_probes_cleanup)(struct task_struct *);
-EXPORT_SYMBOL(dtrace_fasttrap_probes_cleanup);
-void (*dtrace_helpers_fork)(struct task_struct *, struct task_struct *);
-EXPORT_SYMBOL(dtrace_helpers_fork);
-int (*dtrace_tracepoint_hit)(fasttrap_machtp_t *, struct pt_regs *);
-EXPORT_SYMBOL(dtrace_tracepoint_hit);
-
-void dtrace_task_reinit(struct task_struct *tsk)
-{
-       tsk->predcache = 0;
-       tsk->dtrace_stop = 0;
-       tsk->dtrace_sig = 0;
-
-       tsk->dtrace_helpers = NULL;
-       tsk->dtrace_probes = 0;
-       tsk->dtrace_tp_count = 0;
-}
-
-void dtrace_task_init(struct task_struct *tsk)
-{
-       dtrace_task_reinit(tsk);
-
-       tsk->dtrace_vtime = ktime_set(0, 0);
-       tsk->dtrace_start = ktime_set(0, 0);
-}
-
-void dtrace_task_fork(struct task_struct *tsk, struct task_struct *child)
-{
-       if (likely(dtrace_helpers_fork == NULL))
-               return;
-
-       if (tsk->dtrace_helpers != NULL)
-               (*dtrace_helpers_fork)(tsk, child);
-}
-
-void dtrace_task_cleanup(struct task_struct *tsk)
-{
-       if (likely(dtrace_helpers_cleanup == NULL))
-               return;
-
-       if (tsk->dtrace_helpers != NULL)
-               (*dtrace_helpers_cleanup)(tsk);
-
-       if (tsk->dtrace_probes) {
-               if (dtrace_fasttrap_probes_cleanup != NULL)
-                       (*dtrace_fasttrap_probes_cleanup)(tsk);
-       }
-}
-
 static int handler(struct uprobe_consumer *self, struct pt_regs *regs)
 {
        fasttrap_machtp_t       *mtp = container_of(self, fasttrap_machtp_t,
@@ -1170,3 +652,4 @@ int dtrace_tracepoint_disable(pid_t pid, fasttrap_machtp_t *mtp)
        return 0;
 }
 EXPORT_SYMBOL(dtrace_tracepoint_disable);
+#endif /* CONFIG_DT_FASTTRAP || CONFIG_DT_FASTTRAP_MODULE */
diff --git a/kernel/dtrace/dtrace_sdt.c b/kernel/dtrace/dtrace_sdt.c
deleted file mode 100644 (file)
index 3b6d2b6..0000000
+++ /dev/null
@@ -1,253 +0,0 @@
-/* Copyright (C) 2010, 2011 Oracle Corporation */
-
-/* register static dtrace probe points */
-
-#include <linux/kernel.h>
-#include <linux/memory.h>
-#include <linux/module.h>
-#include <linux/dtrace_os.h>
-#include <linux/sdt.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <asm-generic/bitsperlong.h>
-#include <asm-generic/sections.h>
-#include <asm/alternative.h>
-#include <asm/nmi.h>
-#include <asm/nops.h>
-
-#define        SDT_TRAP_INSTR  0xf0
-#define        SDT_NOP_SIZE    5
-
-const char             *sdt_prefix = "__dtrace_probe_";
-uint8_t                        nops[SDT_NOP_SIZE];
-
-/* This code is based on apply_alternatives and text_poke_early.  It needs to
- * run before SMP is initialized in order to avoid SMP problems with patching
- * code that might be accessed on another CPU.
- */
-static void __init_or_module text_poke_batch(struct text_poke_param *reqs,
-                                            int cnt)
-{
-       int                     i;
-       unsigned long           flags;
-       struct text_poke_param  *tpp;
-
-       stop_nmi();
-       local_irq_save(flags);
-
-       for (i = 0; i < cnt; i++) {
-               tpp = &reqs[i];
-               memcpy(tpp->addr, tpp->opcode, tpp->len);
-       }
-
-       sync_core();
-       local_irq_restore(flags);
-       restart_nmi();
-}
-
-static int sdt_probe_set(sdt_probedesc_t *sdp, char *name, char *func,
-                        uintptr_t addr, struct text_poke_param *tpp)
-{
-       uint8_t         *instr;
-
-       if ((sdp->sdpd_name = kstrdup(name, GFP_KERNEL)) == NULL) {
-               kfree(sdp);
-               return 1;
-       }
-
-       if ((sdp->sdpd_func = kstrdup(func, GFP_KERNEL)) == NULL) {
-               kfree(sdp->sdpd_name);
-               kfree(sdp);
-               return 1;
-       }
-
-       /* adjust relocation address to beginning of call instruction */
-       instr = (uint8_t *)(addr - 1);
-
-       sdp->sdpd_offset = (uintptr_t)instr;
-
-       tpp->addr = instr;
-       tpp->opcode = nops;
-       tpp->len = SDT_NOP_SIZE;
-
-       return 0;
-}
-
-/*
- * Register the SDT probes for the core kernel, i.e. SDT probes that reside in
- * vmlinux.  For SDT probes in kernel modules, we use dtrace_mod_notifier().
- */
-void dtrace_sdt_register(struct module *mod)
-{
-       int                     i, cnt;
-       dtrace_sdt_probeinfo_t  *pi =
-                               (dtrace_sdt_probeinfo_t *)&dtrace_sdt_probes;
-       void                    *nextpi;
-       sdt_probedesc_t         *sdps;
-       struct text_poke_param  *reqs;
-
-       if (mod == NULL) {
-               pr_warning("%s: no module provided - nothing registered\n",
-                          __func__);
-               return;
-       }
-
-       /*
-        * Just in case we run into failures further on...
-        */
-       mod->sdt_probes = NULL;
-       mod->num_dtrace_probes = 0;
-
-       if (dtrace_sdt_nprobes == 0)
-               return;
-
-       /*
-        * Allocate the array of SDT probe descriptions to be registered in the
-        * vmlinux pseudo-module.
-        */
-       sdps = (sdt_probedesc_t *)vmalloc(dtrace_sdt_nprobes *
-                                         sizeof(sdt_probedesc_t));
-       if (sdps == NULL) {
-               pr_warning("%s: cannot allocate SDT probe array\n", __func__);
-               return;
-       }
-
-       /*
-        * Set up a batch of text_poke requests that will handle replacing all
-        * calls at SDT probe locations with the NOP sequence.  Allocate the
-        * requests array, and then fill it in.
-        */
-       reqs = (struct text_poke_param *)
-                       vmalloc(dtrace_sdt_nprobes *
-                               sizeof(struct text_poke_param));
-       if (reqs == NULL) {
-               pr_warning("%s: cannot allocate text_poke_param array\n",
-                          __func__);
-               vfree(sdps);
-               return;
-       }
-
-       for (i = cnt = 0; cnt < dtrace_sdt_nprobes; i++) {
-               char    *func = pi->name + pi->name_len + 1;
-
-               if (sdt_probe_set(&sdps[cnt], pi->name, func, pi->addr,
-                                 &reqs[cnt]))
-                       pr_warning("%s: failed to add SDT probe %s\n",
-                                  __func__, pi->name);
-               else
-                       cnt++;
-
-               nextpi = (void *)pi + sizeof(dtrace_sdt_probeinfo_t)
-                       + roundup(pi->name_len + 1 +
-                                 pi->func_len + 1, BITS_PER_LONG / 8);
-               pi = nextpi;
-       }
-
-       mod->sdt_probes = sdps;
-       mod->num_dtrace_probes = cnt;
-
-       text_poke_batch(reqs, cnt);
-
-       vfree(reqs);
-}
-
-static int __init nosdt(char *str)
-{
-        dtrace_sdt_nprobes = 0;
-
-        return 0;
-}
-
-early_param("nosdt", nosdt);
-
-static int dtrace_mod_notifier(struct notifier_block *nb, unsigned long val,
-                              void *args)
-{
-       struct module           *mod = args;
-       struct text_poke_param  *reqs, *req;
-       int                     idx, cnt;
-       sdt_probedesc_t         *sdp;
-
-       /*
-        * We only need to capture modules in the COMING state, we need a valid
-        * module structure as argument, and the module needs to actually have
-        * SDT probes.  If not, ignore...
-        */
-       if (val != MODULE_STATE_COMING)
-               return NOTIFY_DONE;
-       if (!mod)
-               return NOTIFY_DONE;
-       if (mod->num_dtrace_probes == 0 || mod->sdt_probes == NULL)
-               return NOTIFY_DONE;
-
-       /*
-        * Set up a batch of text_poke requests that will handle replacing all
-        * calls at SDT probe locations with the NOP sequence.  Allocate the
-        * requests array, and then fill it in.
-        */
-       reqs = (struct text_poke_param *)
-                       vmalloc(dtrace_sdt_nprobes *
-                               sizeof(struct text_poke_param));
-       if (reqs == NULL) {
-               pr_warning("%s: cannot allocate text_poke_param array (%s)\n",
-                          __func__, mod->name);
-               return NOTIFY_DONE;
-       }
-
-       for (idx = cnt = 0, req = reqs, sdp = mod->sdt_probes;
-            idx < mod->num_dtrace_probes; idx++, sdp++) {
-               /*
-                * Fix-up the offset to reflect the relocated address of the
-                * probe.  We subtract 1 to put us at the beginning of the call
-                * instruction.  We verify that the offset won't put us beyond
-                * the module core, just to be safe.
-                */
-               sdp->sdpd_offset += (uintptr_t)mod->module_core - 1;
-               if (!within_module_core(sdp->sdpd_offset, mod)) {
-                       pr_warning("%s: SDT probe outside module core %s\n",
-                                  __func__, mod->name);
-                       continue;
-               }
-
-               req->addr = (uint8_t *)sdp->sdpd_offset;
-               req->opcode = nops;
-               req->len = SDT_NOP_SIZE;
-
-               cnt++;
-               req++;
-       }
-
-       text_poke_batch(reqs, cnt);
-
-       vfree(reqs);
-
-       return NOTIFY_DONE;
-}
-
-static struct notifier_block   dtrace_modfix = {
-       .notifier_call = dtrace_mod_notifier,
-};
-
-void dtrace_sdt_init(void)
-{
-       /*
-        * A little unusual, but potentially necessary.  While we could use a
-        * single NOP sequence of length SDT_NOP_SIZE, we need to consider the
-        * fact that when a SDT probe point is enabled, a single invalid opcode
-        * is written on the first byte of this NOP sequence.  By using a
-        * sequence of a 1-byte NOP, followed by a (SDT_NOP_SIZE - 1) byte NOP
-        * sequence, we play it pretty safe.
-        */
-       add_nops(nops, 1);
-       add_nops(nops + 1, SDT_NOP_SIZE - 1);
-
-       register_module_notifier(&dtrace_modfix);
-}
-
-#if defined(CONFIG_DT_DT_PERF) || defined(CONFIG_DT_DT_PERF_MODULE)
-void dtrace_sdt_perf(void)
-{
-       DTRACE_PROBE(measure);
-}
-EXPORT_SYMBOL(dtrace_sdt_perf);
-#endif
diff --git a/kernel/dtrace/dtrace_sdt.h b/kernel/dtrace/dtrace_sdt.h
new file mode 100644 (file)
index 0000000..2e12b69
--- /dev/null
@@ -0,0 +1,32 @@
+/* Copyright (C) 2011-2014 Oracle, Inc. */
+
+#ifndef _DTRACE_SDT_H_
+#define        _DTRACE_SDT_H_
+
+#include <asm/dtrace_sdt.h>
+
+/*
+ * SDT probe called relocation information for the core kernel, provided by
+ * .tmp_sdtinfo.S.
+ */
+typedef struct dtrace_sdt_probeinfo {
+       unsigned long           addr;
+       unsigned long           name_len;
+       unsigned long           func_len;
+       char                    name[0];
+} __aligned(sizeof(unsigned long)) dtrace_sdt_probeinfo_t;
+
+extern unsigned long dtrace_sdt_nprobes __attribute__((weak));
+extern void *dtrace_sdt_probes __attribute__((weak));
+
+extern void dtrace_sdt_init(void);
+extern void dtrace_sdt_register(struct module *);
+extern void dtrace_sdt_exit(void);
+
+/*
+ * Functions to be defined in arch/<arch>/kernel/dtrace_sdt.c
+ */
+extern void __init_or_module dtrace_sdt_nop_multi(sdt_instr_t **, int);
+extern void dtrace_sdt_init_arch(void);
+
+#endif /* _DTRACE_SDT_H_ */
diff --git a/kernel/dtrace/dtrace_sdt_core.c b/kernel/dtrace/dtrace_sdt_core.c
new file mode 100644 (file)
index 0000000..6a91734
--- /dev/null
@@ -0,0 +1,201 @@
+/*
+ * FILE:        dtrace_sdt_core.c
+ * DESCRIPTION: Dynamic Tracing: SDT probe point registration
+ *
+ * Copyright (C) 2010-2014 Oracle Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/memory.h>
+#include <linux/module.h>
+#include <linux/dtrace_os.h>
+#include <linux/sdt.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <asm-generic/bitsperlong.h>
+#include <asm-generic/sections.h>
+#include "dtrace_sdt.h"
+
+const char             *sdt_prefix = "__dtrace_probe_";
+
+static int sdt_probe_set(sdt_probedesc_t *sdp, char *name, char *func,
+                        uintptr_t addr, sdt_instr_t **paddr)
+{
+       if ((sdp->sdpd_name = kstrdup(name, GFP_KERNEL)) == NULL) {
+               kfree(sdp);
+               return 1;
+       }
+
+       if ((sdp->sdpd_func = kstrdup(func, GFP_KERNEL)) == NULL) {
+               kfree(sdp->sdpd_name);
+               kfree(sdp);
+               return 1;
+       }
+
+       sdp->sdpd_offset = addr;
+
+       *paddr = (sdt_instr_t *)addr;
+
+       return 0;
+}
+
+/*
+ * Register the SDT probes for the core kernel, i.e. SDT probes that reside in
+ * vmlinux.  For SDT probes in kernel modules, we use dtrace_mod_notifier().
+ */
+void dtrace_sdt_register(struct module *mp)
+{
+       int                     i, cnt;
+       dtrace_sdt_probeinfo_t  *pi =
+                               (dtrace_sdt_probeinfo_t *)&dtrace_sdt_probes;
+       void                    *nextpi;
+       sdt_probedesc_t         *sdps;
+       sdt_instr_t             **addrs;
+
+       if (mp == NULL) {
+               pr_warning("%s: no module provided - nothing registered\n",
+                          __func__);
+               return;
+       }
+
+       /*
+        * Just in case we run into failures further on...
+        */
+       mp->sdt_probes = NULL;
+       mp->num_dtrace_probes = 0;
+
+       if (dtrace_sdt_nprobes == 0)
+               return;
+
+       /*
+        * Allocate the array of SDT probe descriptions to be registered in the
+        * vmlinux pseudo-module.
+        */
+       sdps = (sdt_probedesc_t *)vmalloc(dtrace_sdt_nprobes *
+                                         sizeof(sdt_probedesc_t));
+       if (sdps == NULL) {
+               pr_warning("%s: cannot allocate SDT probe array\n", __func__);
+               return;
+       }
+
+       /*
+        * Create a list of addresses (SDT probe locations) that need to be
+        * patched with a NOP instruction (or instruction sequence).
+        */
+       addrs = (sdt_instr_t **)vmalloc(dtrace_sdt_nprobes *
+                                       sizeof(sdt_instr_t *));
+       if (addrs == NULL) {
+               pr_warning("%s: cannot allocate SDT probe address list\n",
+                          __func__);
+               vfree(sdps);
+               return;
+       }
+
+       for (i = cnt = 0; cnt < dtrace_sdt_nprobes; i++) {
+               char    *func = pi->name + pi->name_len + 1;
+
+               if (sdt_probe_set(&sdps[cnt], pi->name, func, pi->addr,
+                                 &addrs[cnt]))
+                       pr_warning("%s: failed to add SDT probe %s\n",
+                                  __func__, pi->name);
+               else
+                       cnt++;
+
+               nextpi = (void *)pi + sizeof(dtrace_sdt_probeinfo_t)
+                       + roundup(pi->name_len + 1 +
+                                 pi->func_len + 1, BITS_PER_LONG / 8);
+               pi = nextpi;
+       }
+
+       mp->sdt_probes = sdps;
+       mp->num_dtrace_probes = cnt;
+
+       dtrace_sdt_nop_multi(addrs, cnt);
+
+       vfree(addrs);
+}
+
+static int __init nosdt(char *str)
+{
+        dtrace_sdt_nprobes = 0;
+
+        return 0;
+}
+
+early_param("nosdt", nosdt);
+
+static int dtrace_mod_notifier(struct notifier_block *nb, unsigned long val,
+                              void *args)
+{
+       struct module           *mp = args;
+       int                     i, cnt;
+       sdt_probedesc_t         *sdp;
+       sdt_instr_t             **addrs;
+
+       /*
+        * We only need to capture modules in the COMING state, we need a valid
+        * module structure as argument, and the module needs to actually have
+        * SDT probes.  If not, ignore...
+        */
+       if (val != MODULE_STATE_COMING)
+               return NOTIFY_DONE;
+       if (!mp)
+               return NOTIFY_DONE;
+       if (mp->num_dtrace_probes == 0 || mp->sdt_probes == NULL)
+               return NOTIFY_DONE;
+
+       /*
+        * Create a list of addresses (SDT probe locations) that need to be
+        * patched with a NOP instruction (or instruction sequence).
+        */
+       addrs = (sdt_instr_t **)vmalloc(mp->num_dtrace_probes *
+                                       sizeof(sdt_instr_t *));
+       if (addrs == NULL) {
+               pr_warning("%s: cannot allocate SDT probe address list (%s)\n",
+                          __func__, mp->name);
+               return NOTIFY_DONE;
+       }
+
+       for (i = cnt = 0, sdp = mp->sdt_probes; i < mp->num_dtrace_probes;
+            i++, sdp++) {
+               /*
+                * Fix-up the offset to reflect the relocated address of the
+                * probe.  We subtract 1 to put us at the beginning of the call
+                * instruction.  We verify that the offset won't put us beyond
+                * the module core, just to be safe.
+                */
+               sdp->sdpd_offset += (uintptr_t)mp->module_core;
+               if (!within_module_core(sdp->sdpd_offset, mp)) {
+                       pr_warning("%s: SDT probe outside module core %s\n",
+                                  __func__, mp->name);
+                       continue;
+               }
+
+               addrs[cnt++] = (sdt_instr_t *)sdp->sdpd_offset;
+       }
+
+       dtrace_sdt_nop_multi(addrs, cnt);
+
+       vfree(addrs);
+
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block   dtrace_modfix = {
+       .notifier_call = dtrace_mod_notifier,
+};
+
+void dtrace_sdt_init(void)
+{
+       dtrace_sdt_init_arch();
+       register_module_notifier(&dtrace_modfix);
+}
+
+#if defined(CONFIG_DT_DT_PERF) || defined(CONFIG_DT_DT_PERF_MODULE)
+void dtrace_sdt_perf(void)
+{
+       DTRACE_PROBE(measure);
+}
+EXPORT_SYMBOL(dtrace_sdt_perf);
+#endif