From: Kris Van Hees Date: Mon, 5 Dec 2011 20:01:27 +0000 (-0500) Subject: dtrace: SDT implementation X-Git-Tag: v4.1.12-92~313^2~160 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=11530faf40b829d0276d563500ca1812029580cf;p=users%2Fjedix%2Flinux-maple.git dtrace: SDT implementation This adds core kernel support for providing a list of static probe points for the kernel pseudo-module, dtrace SDT meta-provider support, ... Also a new script (dtrace_sdt.sh) to extract locations of SDT probe points in the core kernel. Signed-off-by: Kris Van Hees --- diff --git a/fs/exec.c b/fs/exec.c index 1977c2a553aca..cc95013de1e62 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -1522,6 +1523,7 @@ static int do_execveat_common(int fd, struct filename *filename, current->in_execve = 1; file = do_open_execat(fd, filename, flags); + DTRACE_PROC1(exec, char *, filename->name); retval = PTR_ERR(file); if (IS_ERR(file)) goto out_unmark; @@ -1595,6 +1597,8 @@ static int do_execveat_common(int fd, struct filename *filename, putname(filename); if (displaced) put_files_struct(displaced); + + DTRACE_PROC(exec__success); return retval; out: @@ -1616,6 +1620,7 @@ out_files: reset_files_struct(displaced); out_ret: putname(filename); + DTRACE_PROC1(exec__failure, int, retval); return retval; } diff --git a/include/linux/dtrace_os.h b/include/linux/dtrace_os.h index d8e146fbcb140..d9436ec513a65 100644 --- a/include/linux/dtrace_os.h +++ b/include/linux/dtrace_os.h @@ -18,6 +18,9 @@ typedef uint32_t dtrace_id_t; #define SCE_RT_SIGRETURN 6 #define SCE_nr_stubs 7 +extern void dtrace_invop_add(int (*func)(struct pt_regs *)); +extern void dtrace_invop_remove(int (*func)(struct pt_regs *)); + typedef void (*sys_call_ptr_t)(void); typedef long (*dt_sys_call_t)(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); diff --git a/include/linux/sdt.h b/include/linux/sdt.h index bd7e7db46159b..8a58a51cb13ef 100644 --- a/include/linux/sdt.h +++ b/include/linux/sdt.h @@ -120,15 +120,21 @@ extern "C" { * vmlinux dtrace_probe__ caller reloc info; * comes from vmlinux_info.S */ -extern unsigned long dtrace_relocs_count __attribute__((weak)); -extern void *dtrace_relocs __attribute__((weak)); +typedef uint8_t sdt_instr_t; -struct reloc_info { - unsigned long probe_offset; - unsigned long section_base; - unsigned long probe_name_len; - char probe_name[0]; -} __aligned(sizeof(unsigned long)); +extern unsigned long dtrace_sdt_nprobes __attribute__((weak)); +extern void *dtrace_sdt_probes __attribute__((weak)); + +extern void sdt_probe_enable(sdt_instr_t *); +extern void sdt_probe_disable(sdt_instr_t *); + +typedef struct dtrace_sdt_probeinfo { + unsigned long offset; + unsigned long base; + unsigned long name_len; + unsigned long func_len; + char name[0]; +} __aligned(sizeof(unsigned long)) dtrace_sdt_probeinfo_t; void dtrace_register_builtins(void); @@ -430,7 +436,8 @@ void dtrace_register_builtins(void); extern const char *sdt_prefix; typedef struct sdt_probedesc { - char *sdpd_name; /* name of this probe */ + char *sdpd_name; /* probe name */ + char *sdpd_func; /* probe function */ unsigned long sdpd_offset; /* offset of call in text */ struct sdt_probedesc *sdpd_next; /* next static probe */ } sdt_probedesc_t; diff --git a/init/main.c b/init/main.c index da860d2f58c8f..84dade25d3ca4 100644 --- a/init/main.c +++ b/init/main.c @@ -675,7 +675,9 @@ asmlinkage __visible void __init start_kernel(void) ftrace_init(); #if defined(CONFIG_DT_SDT) || defined(CONFIG_DT_SDT_MODULE) +# if FIXME dtrace_register_builtins(); +# endif #endif /* Do the rest non-__init'ed, we're now alive */ diff --git a/kernel/dtrace/dtrace_os.c b/kernel/dtrace/dtrace_os.c index d8b5f088785c8..2851e15283373 100644 --- a/kernel/dtrace/dtrace_os.c +++ b/kernel/dtrace/dtrace_os.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -41,9 +42,9 @@ ktime_t dtrace_getwalltime(void) } EXPORT_SYMBOL(dtrace_getwalltime); -/* - * Very basic implementation of cyclics, merely enough to support dtrace. - */ +/*---------------------------------------------------------------------------*\ +(* CYCLICS *) +\*---------------------------------------------------------------------------*/ typedef union cyclic cyclic_t; union cyclic { struct { @@ -121,6 +122,9 @@ void cyclic_remove(cyclic_id_t id) } EXPORT_SYMBOL(cyclic_remove); +/*---------------------------------------------------------------------------*\ +(* STACK TRACES *) +\*---------------------------------------------------------------------------*/ static int dtrace_stacktrace_stack(void *data, char *name) { stacktrace_state_t *st = (stacktrace_state_t *)data; @@ -227,6 +231,99 @@ void dtrace_stacktrace(stacktrace_state_t *st) } EXPORT_SYMBOL(dtrace_stacktrace); +/*---------------------------------------------------------------------------*\ +(* INVALID OPCODE HANDLING *) +\*---------------------------------------------------------------------------*/ +typedef struct dtrace_invop_hdlr { + int (*dtih_func)(struct pt_regs *); + struct dtrace_invop_hdlr *dtih_next; +} dtrace_invop_hdlr_t; + +static dtrace_invop_hdlr_t *dtrace_invop_hdlrs; + +static int dtrace_die_notifier(struct notifier_block *nb, unsigned long val, + void *args) +{ + struct die_args *dargs = args; + dtrace_invop_hdlr_t *hdlr; + int rval = 0; + + if (val != DIE_TRAP || dargs->trapnr != 6) + return NOTIFY_DONE; + +printk(KERN_INFO "dtrace_die_notifier: TRAP %d, IP %lx\n", dargs->trapnr, dargs->regs->ip); + + for (hdlr = dtrace_invop_hdlrs; hdlr != NULL; hdlr = hdlr->dtih_next) { + if ((rval = hdlr->dtih_func(dargs->regs)) != 0) + break; + } + + if (rval != 0) { + dargs->regs->ip++; + +printk(KERN_INFO "dtrace_die_notifier: TRAP %d, New IP %lx\n", dargs->trapnr, dargs->regs->ip); + return NOTIFY_OK | NOTIFY_STOP_MASK; + } + + return NOTIFY_DONE; +} + +static struct notifier_block dtrace_die = { + .notifier_call = dtrace_die_notifier, +}; + +void dtrace_invop_add(int (*func)(struct pt_regs *)) +{ + dtrace_invop_hdlr_t *hdlr; + + hdlr = kmalloc(sizeof(dtrace_invop_hdlr_t), GFP_KERNEL); + hdlr->dtih_func = func; + hdlr->dtih_next = dtrace_invop_hdlrs; + dtrace_invop_hdlrs = hdlr; + + /* + * If this is the first DTrace invalid opcode handling, register the + * die notifier with the kernel notifier core. + */ + if (hdlr->dtih_next == NULL) + register_die_notifier(&dtrace_die); +} +EXPORT_SYMBOL(dtrace_invop_add); + +void dtrace_invop_remove(int (*func)(struct pt_regs *)) +{ + dtrace_invop_hdlr_t *hdlr = dtrace_invop_hdlrs, *prev = NULL; + + for (;;) { + if (hdlr == NULL) + pr_err("attempt to remove non-existant invop handler"); + + if (hdlr->dtih_func == func) + break; + + prev = hdlr; + hdlr = hdlr->dtih_next; + } + + if (prev == NULL) { + dtrace_invop_hdlrs = hdlr->dtih_next; + + /* + * If there are no invalid opcode handlers left, unregister + * from the kernel notifier core. + */ + if (dtrace_invop_hdlrs == NULL) + unregister_die_notifier(&dtrace_die); + } else + prev->dtih_next = hdlr->dtih_next; + + kfree(hdlr); +} +EXPORT_SYMBOL(dtrace_invop_remove); + +/*---------------------------------------------------------------------------*\ +(* SYSTEM CALL PROBING SUPPORT *) +\*---------------------------------------------------------------------------*/ void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); diff --git a/kernel/dtrace/sdt_register.c b/kernel/dtrace/sdt_register.c index aaaf13fd7a8f9..152d5a5db3aaf 100644 --- a/kernel/dtrace/sdt_register.c +++ b/kernel/dtrace/sdt_register.c @@ -13,52 +13,48 @@ #include #include #include +#include -#define SDT_NOP 0x90 +#define SDT_TRAP_INSTR 0xf0 #define SDT_NOP_SIZE 5 const char *sdt_prefix = "__dtrace_probe_"; -static struct module *kernmod; /* for kernel builtins; TBD: temporary ??? */ +struct module *dtrace_kmod; +EXPORT_SYMBOL(dtrace_kmod); -static int sdt_reloc_resolve(struct module *mp, char *symname, +void sdt_probe_enable(sdt_instr_t *addr) +{ + text_poke(addr, ((unsigned char []){SDT_TRAP_INSTR}), 1); +} +EXPORT_SYMBOL(sdt_probe_enable); + +void sdt_probe_disable(sdt_instr_t *addr) +{ + text_poke((void *)addr, ideal_nops[1], 1); +} +EXPORT_SYMBOL(sdt_probe_disable); + +static int sdt_probe_resolve(struct module *mp, char *name, char *func, uintptr_t offset, uintptr_t base, void *nops) { sdt_probedesc_t *sdp; uint8_t *instr; - /* - * The "statically defined tracing" (SDT) provider for DTrace uses - * a mechanism similar to TNF, but somewhat simpler. (Surprise, - * surprise.) The SDT mechanism works by replacing calls to the - * undefined routine __dtrace_probe_[name] with nop instructions. - * The relocations are logged, and SDT itself will later patch the - * running binary appropriately. - */ - if (strncmp(symname, sdt_prefix, strlen(sdt_prefix)) != 0) + if ((sdp = kmalloc(sizeof(sdt_probedesc_t), GFP_KERNEL)) == NULL) return 1; - symname += strlen(sdt_prefix); - - sdp = kmalloc(sizeof(sdt_probedesc_t), GFP_KERNEL); - if (!sdp) + if ((sdp->sdpd_name = kstrdup(name, GFP_KERNEL)) == NULL) { + kfree(sdp); return 1; + } - sdp->sdpd_name = kmalloc(strlen(symname) + 1, GFP_KERNEL); - if (!sdp->sdpd_name) { + if ((sdp->sdpd_func = kstrdup(func, GFP_KERNEL)) == NULL) { + kfree(sdp->sdpd_name); kfree(sdp); return 1; } - memcpy(sdp->sdpd_name, symname, strlen(symname) + 1); - - /* FIXME: - * instr is still relative, not absolute; for some reason, - * vmlinux_info.S shows absolute addresses but it is not being - * rebuilt again when needed, so vmlinux_info.o still contains - * relative addresses. - * Hack this for now by adding _stext to instr, but this should - * not be necessary. - */ + /* convert relative instr to absolute */ instr = (uint8_t *)((uintptr_t)_text + base + offset - 1); @@ -68,8 +64,9 @@ static int sdt_reloc_resolve(struct module *mp, char *symname, mp->sdt_probes = sdp; DPRINTK("sdt_probes -> 0x%p\n", mp->sdt_probes); - DPRINTK("this probe: instr offset=0x%lx, next ptr=0x%p, probe_name=%s\n", - sdp->sdpd_offset, sdp->sdpd_next, sdp->sdpd_name); + DPRINTK("this: instr offset=0x%lx, next ptr=0x%p, name=%s, func=%s\n", + sdp->sdpd_offset, sdp->sdpd_next, sdp->sdpd_name, + sdp->sdpd_func); mutex_lock(&text_mutex); text_poke(instr, nops, SDT_NOP_SIZE); @@ -81,44 +78,57 @@ static int sdt_reloc_resolve(struct module *mp, char *symname, void dtrace_register_builtins(void) { - unsigned long cnt; - struct reloc_info *ri = (struct reloc_info *)&dtrace_relocs; - void *nextri; - uint8_t nops[SDT_NOP_SIZE]; + unsigned long cnt; + dtrace_sdt_probeinfo_t *pi = + (dtrace_sdt_probeinfo_t *)&dtrace_sdt_probes; + void *nextpi; + uint8_t nops[SDT_NOP_SIZE]; - add_nops(nops, SDT_NOP_SIZE); + /* + * A little unusual, but potentially necessary. While we could use a + * single NOP sequence of length SDT_NOP_SIZE, we need to consider the + * fact that when a SDT probe point is enabled, a single invalid opcode + * is written on the first byte of this NOP sequence. By using a + * sequence of a 1-byte NOP, followed by a (SDT_NOP_SIZE - 1) byte NOP + * sequence, we play it pretty safe. + */ + add_nops(nops, 1); + add_nops(nops + 1, SDT_NOP_SIZE - 1); - kernmod = kzalloc(sizeof(struct module), GFP_KERNEL); - if (!kernmod) { + dtrace_kmod = kzalloc(sizeof(struct module), GFP_KERNEL); + if (!dtrace_kmod) { printk(KERN_WARNING - "%s: cannot allocate kernel builtin module memory\n", + "%s: cannot allocate kernel pseudo-module\n", __func__); return; } - kernmod->state = MODULE_STATE_LIVE; - strlcpy(kernmod->name, "kernel_builtins", MODULE_NAME_LEN); + dtrace_kmod->state = MODULE_STATE_LIVE; + strlcpy(dtrace_kmod->name, "vmlinux", MODULE_NAME_LEN); DPRINTK("%lu SDT relocation entries beg. @0x%p\n", - dtrace_relocs_count, &dtrace_relocs); + dtrace_sdt_nprobes, &dtrace_sdt_probes); - if (dtrace_relocs_count == 0) + if (dtrace_sdt_nprobes == 0) return; - for (cnt = 0; cnt < dtrace_relocs_count; cnt++) { - DPRINTK("SDT relocs [%lu]: " - "probe_offset=0x%lx, section_base=0x%lx, " - "name_len=0x%lx, probe_name=%s\n", - cnt, ri->probe_offset, ri->section_base, - ri->probe_name_len, ri->probe_name); - if (sdt_reloc_resolve(kernmod, ri->probe_name, - ri->probe_offset, ri->section_base, - nops)) + for (cnt = 0; cnt < dtrace_sdt_nprobes; cnt++) { + char *func = pi->name + pi->name_len + 1; + + DPRINTK("SDT probe point [%lu]: " + "offset=0x%lx, base=0x%lx, name_len=0x%lx, " + "func_len=0x%lx, name=%s, func=%s\n", + cnt, pi->offset, pi->base, pi->name_len, + pi->func_len, pi->name, func); + if (sdt_probe_resolve(dtrace_kmod, pi->name, func, + pi->offset, pi->base, nops)) printk(KERN_WARNING "%s: cannot resolve %s\n", - __func__, ri->probe_name); + __func__, pi->name); - nextri = (void *)ri + sizeof(struct reloc_info) - + roundup(ri->probe_name_len + 1, BITS_PER_LONG / 8); - ri = nextri; - DPRINTK("SDT relocs: next entry at 0x%p\n", ri); + nextpi = (void *)pi + sizeof(dtrace_sdt_probeinfo_t) + + roundup(pi->name_len + 1 + + pi->func_len + 1, BITS_PER_LONG / 8); + pi = nextpi; + DPRINTK("SDT relocs: next entry at 0x%p\n", pi); } } +EXPORT_SYMBOL(dtrace_register_builtins); diff --git a/kernel/exit.c b/kernel/exit.c index 22fcc05dec402..8915dea8385ba 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -730,6 +731,8 @@ void do_exit(long code) tsk->exit_code = code; taskstats_exit(tsk, group_dead); + DTRACE_PROC1(exit, int, code); + exit_mm(tsk); if (group_dead) diff --git a/kernel/fork.c b/kernel/fork.c index 03c1eaaa6ef56..ea5630530b1e5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -75,6 +75,7 @@ #include #include #include +#include #include #include @@ -1738,6 +1739,7 @@ long do_fork(unsigned long clone_flags, } put_pid(pid); + DTRACE_PROC1(create, struct task_struct *, p); } else { nr = PTR_ERR(p); } diff --git a/kernel/signal.c b/kernel/signal.c index 5d366e7b1de25..7186267e64867 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1121,6 +1121,7 @@ out_set: sigaddset(&pending->signal, sig); complete_signal(sig, t, group); ret: + DTRACE_PROC2(signal__send, struct task_struct *, t, int, sig); trace_signal_generate(sig, info, t, group, result); return ret; } @@ -2342,11 +2343,6 @@ relock: if (print_fatal_signals) print_fatal_signal(ksig->info.si_signo); proc_coredump_connector(current); - DTRACE_PROBE4(get_signal_to_deliver, - int, ksig->info.si_signo, - struct pt_regs *, regs, - char *, current->comm, - int, task_pid_nr(current)); /* * If it was able to dump core, this kills all * other threads in the group and synchronizes with @@ -2366,6 +2362,12 @@ relock: } spin_unlock_irq(&sighand->siglock); + if (signr != 0) { + DTRACE_PROC3(signal__handle, int, signr, siginfo_t *, + ksig->ka.sa.sa_handler != SIG_DFL ? NULL : + &ksig->info, void (*)(void), + ksig->ka.sa.sa_handler); + } ksig->sig = signr; return ksig->sig > 0; } diff --git a/scripts/dtrace_sdt.sh b/scripts/dtrace_sdt.sh new file mode 100755 index 0000000000000..b7a27de890435 --- /dev/null +++ b/scripts/dtrace_sdt.sh @@ -0,0 +1,85 @@ +#!/bin/sh + +LANG=C + +fn="$1" + +objdump -htr "$fn" | \ + awk '/^Sections:/ { + getline; + getline; + while ($0 !~ /SYMBOL/) { + sect = $2; + addr = $6; + + getline; + if (/CODE/) + sectbase[sect] = addr; + + getline; + } + next; + } + + / F / { + printf "%16s %s F %s\n", $4, $1, $6; + next; + } + + /^RELOC/ { + sub(/^[^\[]+\[/, ""); + sub(/].*$/, ""); + sect = $1; + next; + } + + /__dtrace_probe_/ { + $3 = substr($3, 16); + sub(/-.*$/, "", $3); + + printf "%16s %s R %s %s\n", sect, $1, $3, sectbase[sect]; + next; + }' | \ + sort | \ + awk 'BEGIN { + print "#include "; + print "#if BITS_PER_LONG == 64"; + print "# define PTR .quad"; + print "# define ALGN .align 8"; + print "#else"; + print "# define PTR .long"; + print "# define ALGN .align 4"; + print "#endif"; + + print "\t.section .rodata, \042a\042"; + print ""; + + print ".globl dtrace_sdt_probes"; + print "\tALGN"; + print "dtrace_sdt_probes:"; + } + + / F / { + fun = $4; + next; + } + + / R / { + print "\tPTR\t0x" $2; + print "\tPTR\t0x" $5; + print "\tPTR\t" length($4); + print "\tPTR\t" length(fun); + print "\t.asciz\t\042" $4 "\042"; + print "\t.asciz\t\042" fun "\042"; + print "\tALGN"; + + probec++; + } + + END { + print ""; + print ".globl dtrace_sdt_nprobes"; + print "\tALGN"; + print "dtrace_sdt_nprobes:"; + print "\tPTR\t" probec; + }'