]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
dtrace: SDT implementation
authorKris Van Hees <kris.van.hees@oracle.com>
Mon, 5 Dec 2011 20:01:27 +0000 (15:01 -0500)
committerNick Alcock <nick.alcock@oracle.com>
Mon, 29 Jun 2015 21:40:22 +0000 (22:40 +0100)
This adds core kernel support for providing a list of static probe
points for the kernel pseudo-module, dtrace SDT meta-provider support, ...
Also a new script (dtrace_sdt.sh) to extract locations of SDT probe points in
the core kernel.

Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
fs/exec.c
include/linux/dtrace_os.h
include/linux/sdt.h
init/main.c
kernel/dtrace/dtrace_os.c
kernel/dtrace/sdt_register.c
kernel/exit.c
kernel/fork.c
kernel/signal.c
scripts/dtrace_sdt.sh [new file with mode: 0755]

index 1977c2a553aca711ba145d1670ea9a84fd45ea84..cc95013de1e624ffbc3f2df80d4ca3b652eddd1a 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -56,6 +56,7 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/oom.h>
 #include <linux/compat.h>
+#include <linux/sdt.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1522,6 +1523,7 @@ static int do_execveat_common(int fd, struct filename *filename,
        current->in_execve = 1;
 
        file = do_open_execat(fd, filename, flags);
+       DTRACE_PROC1(exec, char *, filename->name);
        retval = PTR_ERR(file);
        if (IS_ERR(file))
                goto out_unmark;
@@ -1595,6 +1597,8 @@ static int do_execveat_common(int fd, struct filename *filename,
        putname(filename);
        if (displaced)
                put_files_struct(displaced);
+
+       DTRACE_PROC(exec__success);
        return retval;
 
 out:
@@ -1616,6 +1620,7 @@ out_files:
                reset_files_struct(displaced);
 out_ret:
        putname(filename);
+       DTRACE_PROC1(exec__failure, int, retval);
        return retval;
 }
 
index d8e146fbcb1409046fd1dc7e0a5e58abf488b247..d9436ec513a65b5f494b09b0268179e8647a0da1 100644 (file)
@@ -18,6 +18,9 @@ typedef uint32_t dtrace_id_t;
 #define SCE_RT_SIGRETURN       6
 #define SCE_nr_stubs           7
 
+extern void dtrace_invop_add(int (*func)(struct pt_regs *));
+extern void dtrace_invop_remove(int (*func)(struct pt_regs *));
+
 typedef void (*sys_call_ptr_t)(void);
 typedef long (*dt_sys_call_t)(uintptr_t, uintptr_t, uintptr_t, uintptr_t,
                              uintptr_t, uintptr_t);
index bd7e7db46159bc0715ffe0e88fa563b09c506a85..8a58a51cb13efaec750c429721d8eb2426bdb893 100644 (file)
@@ -120,15 +120,21 @@ extern "C" {
  * vmlinux dtrace_probe__ caller reloc info;
  * comes from vmlinux_info.S
  */
-extern unsigned long dtrace_relocs_count __attribute__((weak));
-extern void *dtrace_relocs __attribute__((weak));
+typedef uint8_t        sdt_instr_t;
 
-struct reloc_info {
-       unsigned long probe_offset;
-       unsigned long section_base;
-       unsigned long probe_name_len;
-       char probe_name[0];
-} __aligned(sizeof(unsigned long));
+extern unsigned long dtrace_sdt_nprobes __attribute__((weak));
+extern void *dtrace_sdt_probes __attribute__((weak));
+
+extern void sdt_probe_enable(sdt_instr_t *);
+extern void sdt_probe_disable(sdt_instr_t *);
+
+typedef struct dtrace_sdt_probeinfo {
+       unsigned long offset;
+       unsigned long base;
+       unsigned long name_len;
+       unsigned long func_len;
+       char name[0];
+} __aligned(sizeof(unsigned long)) dtrace_sdt_probeinfo_t;
 
 void dtrace_register_builtins(void);
 
@@ -430,7 +436,8 @@ void dtrace_register_builtins(void);
 extern const char *sdt_prefix;
 
 typedef struct sdt_probedesc {
-       char                    *sdpd_name;     /* name of this probe */
+       char                    *sdpd_name;     /* probe name */
+       char                    *sdpd_func;     /* probe function */
        unsigned long           sdpd_offset;    /* offset of call in text */
        struct sdt_probedesc    *sdpd_next;     /* next static probe */
 } sdt_probedesc_t;
index da860d2f58c8f40da138602fb3c377eb7013834b..84dade25d3ca423c0bd3aaa3aa7e27bd919dabe1 100644 (file)
@@ -675,7 +675,9 @@ asmlinkage __visible void __init start_kernel(void)
        ftrace_init();
 
 #if defined(CONFIG_DT_SDT) || defined(CONFIG_DT_SDT_MODULE)
+# if FIXME
        dtrace_register_builtins();
+# endif
 #endif
 
        /* Do the rest non-__init'ed, we're now alive */
index d8b5f088785c8f34d4e0c2b18e07243f535e0bc5..2851e1528337317161cb4fa7de07abd95d77bf38 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/cyclic.h>
 #include <linux/dtrace_os.h>
 #include <linux/hrtimer.h>
+#include <linux/kdebug.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -41,9 +42,9 @@ ktime_t dtrace_getwalltime(void)
 }
 EXPORT_SYMBOL(dtrace_getwalltime);
 
-/*
- * Very basic implementation of cyclics, merely enough to support dtrace.
- */
+/*---------------------------------------------------------------------------*\
+(* CYCLICS                                                                   *)
+\*---------------------------------------------------------------------------*/
 typedef union cyclic   cyclic_t;
 union cyclic {
        struct {
@@ -121,6 +122,9 @@ void cyclic_remove(cyclic_id_t id)
 }
 EXPORT_SYMBOL(cyclic_remove);
 
+/*---------------------------------------------------------------------------*\
+(* STACK TRACES                                                              *)
+\*---------------------------------------------------------------------------*/
 static int dtrace_stacktrace_stack(void *data, char *name)
 {
        stacktrace_state_t      *st = (stacktrace_state_t *)data;
@@ -227,6 +231,99 @@ void dtrace_stacktrace(stacktrace_state_t *st)
 }
 EXPORT_SYMBOL(dtrace_stacktrace);
 
+/*---------------------------------------------------------------------------*\
+(* INVALID OPCODE HANDLING                                                   *)
+\*---------------------------------------------------------------------------*/
+typedef struct dtrace_invop_hdlr {
+       int                             (*dtih_func)(struct pt_regs *);
+       struct dtrace_invop_hdlr        *dtih_next;
+} dtrace_invop_hdlr_t;
+
+static dtrace_invop_hdlr_t     *dtrace_invop_hdlrs;
+
+static int dtrace_die_notifier(struct notifier_block *nb, unsigned long val,
+                              void *args)
+{
+       struct die_args         *dargs = args;
+       dtrace_invop_hdlr_t     *hdlr;
+       int                     rval = 0;
+
+       if (val != DIE_TRAP || dargs->trapnr != 6)
+               return NOTIFY_DONE;
+
+printk(KERN_INFO "dtrace_die_notifier: TRAP %d, IP %lx\n", dargs->trapnr, dargs->regs->ip);
+
+       for (hdlr = dtrace_invop_hdlrs; hdlr != NULL; hdlr = hdlr->dtih_next) {
+               if ((rval = hdlr->dtih_func(dargs->regs)) != 0)
+                       break;
+       }
+
+       if (rval != 0) {
+               dargs->regs->ip++;
+
+printk(KERN_INFO "dtrace_die_notifier: TRAP %d, New IP %lx\n", dargs->trapnr, dargs->regs->ip);
+               return NOTIFY_OK | NOTIFY_STOP_MASK;
+       }
+
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block   dtrace_die = {
+       .notifier_call = dtrace_die_notifier,
+};
+
+void dtrace_invop_add(int (*func)(struct pt_regs *))
+{
+       dtrace_invop_hdlr_t     *hdlr;
+
+       hdlr = kmalloc(sizeof(dtrace_invop_hdlr_t), GFP_KERNEL);
+       hdlr->dtih_func = func;
+       hdlr->dtih_next = dtrace_invop_hdlrs;
+       dtrace_invop_hdlrs = hdlr;
+
+       /*
+        * If this is the first DTrace invalid opcode handling, register the
+        * die notifier with the kernel notifier core.
+        */
+       if (hdlr->dtih_next == NULL)
+               register_die_notifier(&dtrace_die);
+}
+EXPORT_SYMBOL(dtrace_invop_add);
+
+void dtrace_invop_remove(int (*func)(struct pt_regs *))
+{
+       dtrace_invop_hdlr_t     *hdlr = dtrace_invop_hdlrs, *prev = NULL;
+
+       for (;;) {
+               if (hdlr == NULL)
+                       pr_err("attempt to remove non-existant invop handler");
+
+               if (hdlr->dtih_func == func)
+                       break;
+
+               prev = hdlr;
+               hdlr = hdlr->dtih_next;
+       }
+
+       if (prev == NULL) {
+               dtrace_invop_hdlrs = hdlr->dtih_next;
+
+               /*
+                * If there are no invalid opcode handlers left, unregister
+                * from the kernel notifier core.
+                */
+               if (dtrace_invop_hdlrs == NULL)
+                       unregister_die_notifier(&dtrace_die);
+       } else
+               prev->dtih_next = hdlr->dtih_next;
+
+       kfree(hdlr);
+}
+EXPORT_SYMBOL(dtrace_invop_remove);
+
+/*---------------------------------------------------------------------------*\
+(* SYSTEM CALL PROBING SUPPORT                                               *)
+\*---------------------------------------------------------------------------*/
 void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
                       uintptr_t, uintptr_t);
 
index aaaf13fd7a8f9cb50feb74f47b9dfe902504ddd8..152d5a5db3aafb1990fb22f9044ab667e8748dfa 100644 (file)
 #include <asm-generic/bitsperlong.h>
 #include <asm-generic/sections.h>
 #include <asm/alternative.h>
+#include <asm/nops.h>
 
-#define        SDT_NOP         0x90
+#define        SDT_TRAP_INSTR  0xf0
 #define        SDT_NOP_SIZE    5
 
 const char             *sdt_prefix = "__dtrace_probe_";
 
-static struct module   *kernmod; /* for kernel builtins; TBD: temporary ??? */
+struct module          *dtrace_kmod;
+EXPORT_SYMBOL(dtrace_kmod);
 
-static int sdt_reloc_resolve(struct module *mp, char *symname,
+void sdt_probe_enable(sdt_instr_t *addr)
+{
+       text_poke(addr, ((unsigned char []){SDT_TRAP_INSTR}), 1);
+}
+EXPORT_SYMBOL(sdt_probe_enable);
+
+void sdt_probe_disable(sdt_instr_t *addr)
+{
+       text_poke((void *)addr, ideal_nops[1], 1);
+}
+EXPORT_SYMBOL(sdt_probe_disable);
+
+static int sdt_probe_resolve(struct module *mp, char *name, char *func,
                             uintptr_t offset, uintptr_t base, void *nops)
 {
        sdt_probedesc_t *sdp;
        uint8_t *instr;
 
-       /*
-        * The "statically defined tracing" (SDT) provider for DTrace uses
-        * a mechanism similar to TNF, but somewhat simpler.  (Surprise,
-        * surprise.)  The SDT mechanism works by replacing calls to the
-        * undefined routine __dtrace_probe_[name] with nop instructions.
-        * The relocations are logged, and SDT itself will later patch the
-        * running binary appropriately.
-        */
-       if (strncmp(symname, sdt_prefix, strlen(sdt_prefix)) != 0)
+       if ((sdp = kmalloc(sizeof(sdt_probedesc_t), GFP_KERNEL)) == NULL)
                return 1;
 
-       symname += strlen(sdt_prefix);
-
-       sdp = kmalloc(sizeof(sdt_probedesc_t), GFP_KERNEL);
-       if (!sdp)
+       if ((sdp->sdpd_name = kstrdup(name, GFP_KERNEL)) == NULL) {
+               kfree(sdp);
                return 1;
+       }
 
-       sdp->sdpd_name = kmalloc(strlen(symname) + 1, GFP_KERNEL);
-       if (!sdp->sdpd_name) {
+       if ((sdp->sdpd_func = kstrdup(func, GFP_KERNEL)) == NULL) {
+               kfree(sdp->sdpd_name);
                kfree(sdp);
                return 1;
        }
-       memcpy(sdp->sdpd_name, symname, strlen(symname) + 1);
-
-       /* FIXME:
-        * instr is still relative, not absolute; for some reason,
-        * vmlinux_info.S shows absolute addresses but it is not being
-        * rebuilt again when needed, so vmlinux_info.o still contains
-        * relative addresses.
-        * Hack this for now by adding _stext to instr, but this should
-        * not be necessary.
-        */
+
        /* convert relative instr to absolute */
        instr = (uint8_t *)((uintptr_t)_text + base + offset - 1);
 
@@ -68,8 +64,9 @@ static int sdt_reloc_resolve(struct module *mp, char *symname,
        mp->sdt_probes = sdp;
 
        DPRINTK("sdt_probes -> 0x%p\n", mp->sdt_probes);
-       DPRINTK("this probe: instr offset=0x%lx, next ptr=0x%p, probe_name=%s\n",
-               sdp->sdpd_offset, sdp->sdpd_next, sdp->sdpd_name);
+       DPRINTK("this: instr offset=0x%lx, next ptr=0x%p, name=%s, func=%s\n",
+               sdp->sdpd_offset, sdp->sdpd_next, sdp->sdpd_name,
+               sdp->sdpd_func);
 
        mutex_lock(&text_mutex);
        text_poke(instr, nops, SDT_NOP_SIZE);
@@ -81,44 +78,57 @@ static int sdt_reloc_resolve(struct module *mp, char *symname,
 
 void dtrace_register_builtins(void)
 {
-       unsigned long cnt;
-       struct reloc_info *ri = (struct reloc_info *)&dtrace_relocs;
-       void *nextri;
-       uint8_t nops[SDT_NOP_SIZE];
+       unsigned long           cnt;
+       dtrace_sdt_probeinfo_t  *pi =
+                               (dtrace_sdt_probeinfo_t *)&dtrace_sdt_probes;
+       void                    *nextpi;
+       uint8_t                 nops[SDT_NOP_SIZE];
 
-       add_nops(nops, SDT_NOP_SIZE);
+       /*
+        * A little unusual, but potentially necessary.  While we could use a
+        * single NOP sequence of length SDT_NOP_SIZE, we need to consider the
+        * fact that when a SDT probe point is enabled, a single invalid opcode
+        * is written on the first byte of this NOP sequence.  By using a
+        * sequence of a 1-byte NOP, followed by a (SDT_NOP_SIZE - 1) byte NOP
+        * sequence, we play it pretty safe.
+        */
+       add_nops(nops, 1);
+       add_nops(nops + 1, SDT_NOP_SIZE - 1);
 
-       kernmod = kzalloc(sizeof(struct module), GFP_KERNEL);
-       if (!kernmod) {
+       dtrace_kmod = kzalloc(sizeof(struct module), GFP_KERNEL);
+       if (!dtrace_kmod) {
                printk(KERN_WARNING
-                       "%s: cannot allocate kernel builtin module memory\n",
+                       "%s: cannot allocate kernel pseudo-module\n",
                        __func__);
                return;
        }
-       kernmod->state = MODULE_STATE_LIVE;
-       strlcpy(kernmod->name, "kernel_builtins", MODULE_NAME_LEN);
+       dtrace_kmod->state = MODULE_STATE_LIVE;
+       strlcpy(dtrace_kmod->name, "vmlinux", MODULE_NAME_LEN);
 
        DPRINTK("%lu SDT relocation entries beg. @0x%p\n",
-               dtrace_relocs_count, &dtrace_relocs);
+               dtrace_sdt_nprobes, &dtrace_sdt_probes);
 
-       if (dtrace_relocs_count == 0)
+       if (dtrace_sdt_nprobes == 0)
                return;
 
-       for (cnt = 0; cnt < dtrace_relocs_count; cnt++) {
-               DPRINTK("SDT relocs [%lu]: "
-                       "probe_offset=0x%lx, section_base=0x%lx, "
-                       "name_len=0x%lx, probe_name=%s\n",
-                       cnt, ri->probe_offset, ri->section_base,
-                       ri->probe_name_len, ri->probe_name);
-               if (sdt_reloc_resolve(kernmod, ri->probe_name,
-                                     ri->probe_offset, ri->section_base,
-                                     nops))
+       for (cnt = 0; cnt < dtrace_sdt_nprobes; cnt++) {
+               char    *func = pi->name + pi->name_len + 1;
+
+               DPRINTK("SDT probe point [%lu]: "
+                       "offset=0x%lx, base=0x%lx, name_len=0x%lx, "
+                       "func_len=0x%lx, name=%s, func=%s\n",
+                       cnt, pi->offset, pi->base, pi->name_len,
+                            pi->func_len, pi->name, func);
+               if (sdt_probe_resolve(dtrace_kmod, pi->name, func,
+                                     pi->offset, pi->base, nops))
                        printk(KERN_WARNING "%s: cannot resolve %s\n",
-                               __func__, ri->probe_name);
+                               __func__, pi->name);
 
-               nextri = (void *)ri + sizeof(struct reloc_info)
-                       + roundup(ri->probe_name_len + 1, BITS_PER_LONG / 8);
-               ri = nextri;
-               DPRINTK("SDT relocs: next entry at 0x%p\n", ri);
+               nextpi = (void *)pi + sizeof(dtrace_sdt_probeinfo_t)
+                       + roundup(pi->name_len + 1 +
+                                 pi->func_len + 1, BITS_PER_LONG / 8);
+               pi = nextpi;
+               DPRINTK("SDT relocs: next entry at 0x%p\n", pi);
        }
 }
+EXPORT_SYMBOL(dtrace_register_builtins);
index 22fcc05dec4022fa54f2507cb13eddf2137abacc..8915dea8385baa056a6986cb6587ceaa7981afbb 100644 (file)
@@ -53,6 +53,7 @@
 #include <linux/oom.h>
 #include <linux/writeback.h>
 #include <linux/shm.h>
+#include <linux/sdt.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -730,6 +731,8 @@ void do_exit(long code)
        tsk->exit_code = code;
        taskstats_exit(tsk, group_dead);
 
+       DTRACE_PROC1(exit, int, code);
+
        exit_mm(tsk);
 
        if (group_dead)
index 03c1eaaa6ef56f56a670488eaf572eb8c6f58d4e..ea5630530b1e5c6b597db2de3e7a25349fe5c9a8 100644 (file)
@@ -75,6 +75,7 @@
 #include <linux/aio.h>
 #include <linux/compiler.h>
 #include <linux/sysctl.h>
+#include <linux/sdt.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -1738,6 +1739,7 @@ long do_fork(unsigned long clone_flags,
                }
 
                put_pid(pid);
+               DTRACE_PROC1(create, struct task_struct *, p);
        } else {
                nr = PTR_ERR(p);
        }
index 5d366e7b1de2554b76e95fec0b76428706f611b8..7186267e64867f084852cf7a89c5d68a4a756bb9 100644 (file)
@@ -1121,6 +1121,7 @@ out_set:
        sigaddset(&pending->signal, sig);
        complete_signal(sig, t, group);
 ret:
+       DTRACE_PROC2(signal__send, struct task_struct *, t, int, sig);
        trace_signal_generate(sig, info, t, group, result);
        return ret;
 }
@@ -2342,11 +2343,6 @@ relock:
                        if (print_fatal_signals)
                                print_fatal_signal(ksig->info.si_signo);
                        proc_coredump_connector(current);
-                       DTRACE_PROBE4(get_signal_to_deliver,
-                               int, ksig->info.si_signo,
-                               struct pt_regs *, regs,
-                               char *, current->comm,
-                               int, task_pid_nr(current));
                        /*
                         * If it was able to dump core, this kills all
                         * other threads in the group and synchronizes with
@@ -2366,6 +2362,12 @@ relock:
        }
        spin_unlock_irq(&sighand->siglock);
 
+       if (signr != 0) {
+               DTRACE_PROC3(signal__handle, int, signr, siginfo_t *,
+                            ksig->ka.sa.sa_handler != SIG_DFL ? NULL :
+                            &ksig->info, void (*)(void),
+                            ksig->ka.sa.sa_handler);
+       }
        ksig->sig = signr;
        return ksig->sig > 0;
 }
diff --git a/scripts/dtrace_sdt.sh b/scripts/dtrace_sdt.sh
new file mode 100755 (executable)
index 0000000..b7a27de
--- /dev/null
@@ -0,0 +1,85 @@
+#!/bin/sh
+
+LANG=C
+
+fn="$1"
+
+objdump -htr "$fn" | \
+    awk '/^Sections:/ {
+            getline;
+            getline;
+            while ($0 !~ /SYMBOL/) {
+                sect = $2;
+                addr = $6;
+
+                getline;
+                if (/CODE/)
+                    sectbase[sect] = addr;
+
+                getline;
+            }
+            next;
+        }
+
+        / F / {
+            printf "%16s %s F %s\n", $4, $1, $6;
+            next;
+        }
+
+        /^RELOC/ {
+            sub(/^[^\[]+\[/, "");
+            sub(/].*$/, "");
+            sect = $1;
+            next;
+        }
+
+        /__dtrace_probe_/ {
+            $3 = substr($3, 16);
+            sub(/-.*$/, "", $3);
+
+            printf "%16s %s R %s %s\n", sect, $1, $3, sectbase[sect];
+            next;
+        }' | \
+    sort | \
+    awk 'BEGIN {
+            print "#include <asm/types.h>";
+            print "#if BITS_PER_LONG == 64";
+            print "# define PTR .quad";
+            print "# define ALGN .align 8";
+            print "#else";
+            print "# define PTR .long";
+            print "# define ALGN .align 4";
+            print "#endif";
+
+            print "\t.section .rodata, \042a\042";
+            print "";
+
+            print ".globl dtrace_sdt_probes";
+            print "\tALGN";
+            print "dtrace_sdt_probes:";
+        }
+
+        / F / {
+            fun = $4;
+            next;
+        }
+
+        / R / {
+            print "\tPTR\t0x" $2;
+            print "\tPTR\t0x" $5;
+            print "\tPTR\t" length($4);
+            print "\tPTR\t" length(fun);
+            print "\t.asciz\t\042" $4 "\042";
+            print "\t.asciz\t\042" fun "\042";
+            print "\tALGN";
+
+            probec++;
+        }
+
+        END {
+            print "";
+            print ".globl dtrace_sdt_nprobes";
+            print "\tALGN";
+            print "dtrace_sdt_nprobes:";
+            print "\tPTR\t" probec;
+        }'