#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
#include <linux/compat.h>
+#include <linux/sdt.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
current->in_execve = 1;
file = do_open_execat(fd, filename, flags);
+ DTRACE_PROC1(exec, char *, filename->name);
retval = PTR_ERR(file);
if (IS_ERR(file))
goto out_unmark;
putname(filename);
if (displaced)
put_files_struct(displaced);
+
+ DTRACE_PROC(exec__success);
return retval;
out:
reset_files_struct(displaced);
out_ret:
putname(filename);
+ DTRACE_PROC1(exec__failure, int, retval);
return retval;
}
#define SCE_RT_SIGRETURN 6
#define SCE_nr_stubs 7
+extern void dtrace_invop_add(int (*func)(struct pt_regs *));
+extern void dtrace_invop_remove(int (*func)(struct pt_regs *));
+
typedef void (*sys_call_ptr_t)(void);
typedef long (*dt_sys_call_t)(uintptr_t, uintptr_t, uintptr_t, uintptr_t,
uintptr_t, uintptr_t);
* vmlinux dtrace_probe__ caller reloc info;
* comes from vmlinux_info.S
*/
-extern unsigned long dtrace_relocs_count __attribute__((weak));
-extern void *dtrace_relocs __attribute__((weak));
+typedef uint8_t sdt_instr_t;
-struct reloc_info {
- unsigned long probe_offset;
- unsigned long section_base;
- unsigned long probe_name_len;
- char probe_name[0];
-} __aligned(sizeof(unsigned long));
+extern unsigned long dtrace_sdt_nprobes __attribute__((weak));
+extern void *dtrace_sdt_probes __attribute__((weak));
+
+extern void sdt_probe_enable(sdt_instr_t *);
+extern void sdt_probe_disable(sdt_instr_t *);
+
+typedef struct dtrace_sdt_probeinfo {
+ unsigned long offset;
+ unsigned long base;
+ unsigned long name_len;
+ unsigned long func_len;
+ char name[0];
+} __aligned(sizeof(unsigned long)) dtrace_sdt_probeinfo_t;
void dtrace_register_builtins(void);
extern const char *sdt_prefix;
typedef struct sdt_probedesc {
- char *sdpd_name; /* name of this probe */
+ char *sdpd_name; /* probe name */
+ char *sdpd_func; /* probe function */
unsigned long sdpd_offset; /* offset of call in text */
struct sdt_probedesc *sdpd_next; /* next static probe */
} sdt_probedesc_t;
ftrace_init();
#if defined(CONFIG_DT_SDT) || defined(CONFIG_DT_SDT_MODULE)
+# if FIXME
dtrace_register_builtins();
+# endif
#endif
/* Do the rest non-__init'ed, we're now alive */
#include <linux/cyclic.h>
#include <linux/dtrace_os.h>
#include <linux/hrtimer.h>
+#include <linux/kdebug.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
}
EXPORT_SYMBOL(dtrace_getwalltime);
-/*
- * Very basic implementation of cyclics, merely enough to support dtrace.
- */
+/*---------------------------------------------------------------------------*\
+(* CYCLICS *)
+\*---------------------------------------------------------------------------*/
typedef union cyclic cyclic_t;
union cyclic {
struct {
}
EXPORT_SYMBOL(cyclic_remove);
+/*---------------------------------------------------------------------------*\
+(* STACK TRACES *)
+\*---------------------------------------------------------------------------*/
static int dtrace_stacktrace_stack(void *data, char *name)
{
stacktrace_state_t *st = (stacktrace_state_t *)data;
}
EXPORT_SYMBOL(dtrace_stacktrace);
+/*---------------------------------------------------------------------------*\
+(* INVALID OPCODE HANDLING *)
+\*---------------------------------------------------------------------------*/
+typedef struct dtrace_invop_hdlr {
+ int (*dtih_func)(struct pt_regs *);
+ struct dtrace_invop_hdlr *dtih_next;
+} dtrace_invop_hdlr_t;
+
+static dtrace_invop_hdlr_t *dtrace_invop_hdlrs;
+
+static int dtrace_die_notifier(struct notifier_block *nb, unsigned long val,
+ void *args)
+{
+ struct die_args *dargs = args;
+ dtrace_invop_hdlr_t *hdlr;
+ int rval = 0;
+
+ if (val != DIE_TRAP || dargs->trapnr != 6)
+ return NOTIFY_DONE;
+
+printk(KERN_INFO "dtrace_die_notifier: TRAP %d, IP %lx\n", dargs->trapnr, dargs->regs->ip);
+
+ for (hdlr = dtrace_invop_hdlrs; hdlr != NULL; hdlr = hdlr->dtih_next) {
+ if ((rval = hdlr->dtih_func(dargs->regs)) != 0)
+ break;
+ }
+
+ if (rval != 0) {
+ dargs->regs->ip++;
+
+printk(KERN_INFO "dtrace_die_notifier: TRAP %d, New IP %lx\n", dargs->trapnr, dargs->regs->ip);
+ return NOTIFY_OK | NOTIFY_STOP_MASK;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block dtrace_die = {
+ .notifier_call = dtrace_die_notifier,
+};
+
+void dtrace_invop_add(int (*func)(struct pt_regs *))
+{
+ dtrace_invop_hdlr_t *hdlr;
+
+ hdlr = kmalloc(sizeof(dtrace_invop_hdlr_t), GFP_KERNEL);
+ hdlr->dtih_func = func;
+ hdlr->dtih_next = dtrace_invop_hdlrs;
+ dtrace_invop_hdlrs = hdlr;
+
+ /*
+ * If this is the first DTrace invalid opcode handling, register the
+ * die notifier with the kernel notifier core.
+ */
+ if (hdlr->dtih_next == NULL)
+ register_die_notifier(&dtrace_die);
+}
+EXPORT_SYMBOL(dtrace_invop_add);
+
+void dtrace_invop_remove(int (*func)(struct pt_regs *))
+{
+ dtrace_invop_hdlr_t *hdlr = dtrace_invop_hdlrs, *prev = NULL;
+
+ for (;;) {
+ if (hdlr == NULL)
+ pr_err("attempt to remove non-existant invop handler");
+
+ if (hdlr->dtih_func == func)
+ break;
+
+ prev = hdlr;
+ hdlr = hdlr->dtih_next;
+ }
+
+ if (prev == NULL) {
+ dtrace_invop_hdlrs = hdlr->dtih_next;
+
+ /*
+ * If there are no invalid opcode handlers left, unregister
+ * from the kernel notifier core.
+ */
+ if (dtrace_invop_hdlrs == NULL)
+ unregister_die_notifier(&dtrace_die);
+ } else
+ prev->dtih_next = hdlr->dtih_next;
+
+ kfree(hdlr);
+}
+EXPORT_SYMBOL(dtrace_invop_remove);
+
+/*---------------------------------------------------------------------------*\
+(* SYSTEM CALL PROBING SUPPORT *)
+\*---------------------------------------------------------------------------*/
void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
uintptr_t, uintptr_t);
#include <asm-generic/bitsperlong.h>
#include <asm-generic/sections.h>
#include <asm/alternative.h>
+#include <asm/nops.h>
-#define SDT_NOP 0x90
+#define SDT_TRAP_INSTR 0xf0
#define SDT_NOP_SIZE 5
const char *sdt_prefix = "__dtrace_probe_";
-static struct module *kernmod; /* for kernel builtins; TBD: temporary ??? */
+struct module *dtrace_kmod;
+EXPORT_SYMBOL(dtrace_kmod);
-static int sdt_reloc_resolve(struct module *mp, char *symname,
+void sdt_probe_enable(sdt_instr_t *addr)
+{
+ text_poke(addr, ((unsigned char []){SDT_TRAP_INSTR}), 1);
+}
+EXPORT_SYMBOL(sdt_probe_enable);
+
+void sdt_probe_disable(sdt_instr_t *addr)
+{
+ text_poke((void *)addr, ideal_nops[1], 1);
+}
+EXPORT_SYMBOL(sdt_probe_disable);
+
+static int sdt_probe_resolve(struct module *mp, char *name, char *func,
uintptr_t offset, uintptr_t base, void *nops)
{
sdt_probedesc_t *sdp;
uint8_t *instr;
- /*
- * The "statically defined tracing" (SDT) provider for DTrace uses
- * a mechanism similar to TNF, but somewhat simpler. (Surprise,
- * surprise.) The SDT mechanism works by replacing calls to the
- * undefined routine __dtrace_probe_[name] with nop instructions.
- * The relocations are logged, and SDT itself will later patch the
- * running binary appropriately.
- */
- if (strncmp(symname, sdt_prefix, strlen(sdt_prefix)) != 0)
+ if ((sdp = kmalloc(sizeof(sdt_probedesc_t), GFP_KERNEL)) == NULL)
return 1;
- symname += strlen(sdt_prefix);
-
- sdp = kmalloc(sizeof(sdt_probedesc_t), GFP_KERNEL);
- if (!sdp)
+ if ((sdp->sdpd_name = kstrdup(name, GFP_KERNEL)) == NULL) {
+ kfree(sdp);
return 1;
+ }
- sdp->sdpd_name = kmalloc(strlen(symname) + 1, GFP_KERNEL);
- if (!sdp->sdpd_name) {
+ if ((sdp->sdpd_func = kstrdup(func, GFP_KERNEL)) == NULL) {
+ kfree(sdp->sdpd_name);
kfree(sdp);
return 1;
}
- memcpy(sdp->sdpd_name, symname, strlen(symname) + 1);
-
- /* FIXME:
- * instr is still relative, not absolute; for some reason,
- * vmlinux_info.S shows absolute addresses but it is not being
- * rebuilt again when needed, so vmlinux_info.o still contains
- * relative addresses.
- * Hack this for now by adding _stext to instr, but this should
- * not be necessary.
- */
+
/* convert relative instr to absolute */
instr = (uint8_t *)((uintptr_t)_text + base + offset - 1);
mp->sdt_probes = sdp;
DPRINTK("sdt_probes -> 0x%p\n", mp->sdt_probes);
- DPRINTK("this probe: instr offset=0x%lx, next ptr=0x%p, probe_name=%s\n",
- sdp->sdpd_offset, sdp->sdpd_next, sdp->sdpd_name);
+ DPRINTK("this: instr offset=0x%lx, next ptr=0x%p, name=%s, func=%s\n",
+ sdp->sdpd_offset, sdp->sdpd_next, sdp->sdpd_name,
+ sdp->sdpd_func);
mutex_lock(&text_mutex);
text_poke(instr, nops, SDT_NOP_SIZE);
void dtrace_register_builtins(void)
{
- unsigned long cnt;
- struct reloc_info *ri = (struct reloc_info *)&dtrace_relocs;
- void *nextri;
- uint8_t nops[SDT_NOP_SIZE];
+ unsigned long cnt;
+ dtrace_sdt_probeinfo_t *pi =
+ (dtrace_sdt_probeinfo_t *)&dtrace_sdt_probes;
+ void *nextpi;
+ uint8_t nops[SDT_NOP_SIZE];
- add_nops(nops, SDT_NOP_SIZE);
+ /*
+ * A little unusual, but potentially necessary. While we could use a
+ * single NOP sequence of length SDT_NOP_SIZE, we need to consider the
+ * fact that when a SDT probe point is enabled, a single invalid opcode
+ * is written on the first byte of this NOP sequence. By using a
+ * sequence of a 1-byte NOP, followed by a (SDT_NOP_SIZE - 1) byte NOP
+ * sequence, we play it pretty safe.
+ */
+ add_nops(nops, 1);
+ add_nops(nops + 1, SDT_NOP_SIZE - 1);
- kernmod = kzalloc(sizeof(struct module), GFP_KERNEL);
- if (!kernmod) {
+ dtrace_kmod = kzalloc(sizeof(struct module), GFP_KERNEL);
+ if (!dtrace_kmod) {
printk(KERN_WARNING
- "%s: cannot allocate kernel builtin module memory\n",
+ "%s: cannot allocate kernel pseudo-module\n",
__func__);
return;
}
- kernmod->state = MODULE_STATE_LIVE;
- strlcpy(kernmod->name, "kernel_builtins", MODULE_NAME_LEN);
+ dtrace_kmod->state = MODULE_STATE_LIVE;
+ strlcpy(dtrace_kmod->name, "vmlinux", MODULE_NAME_LEN);
DPRINTK("%lu SDT relocation entries beg. @0x%p\n",
- dtrace_relocs_count, &dtrace_relocs);
+ dtrace_sdt_nprobes, &dtrace_sdt_probes);
- if (dtrace_relocs_count == 0)
+ if (dtrace_sdt_nprobes == 0)
return;
- for (cnt = 0; cnt < dtrace_relocs_count; cnt++) {
- DPRINTK("SDT relocs [%lu]: "
- "probe_offset=0x%lx, section_base=0x%lx, "
- "name_len=0x%lx, probe_name=%s\n",
- cnt, ri->probe_offset, ri->section_base,
- ri->probe_name_len, ri->probe_name);
- if (sdt_reloc_resolve(kernmod, ri->probe_name,
- ri->probe_offset, ri->section_base,
- nops))
+ for (cnt = 0; cnt < dtrace_sdt_nprobes; cnt++) {
+ char *func = pi->name + pi->name_len + 1;
+
+ DPRINTK("SDT probe point [%lu]: "
+ "offset=0x%lx, base=0x%lx, name_len=0x%lx, "
+ "func_len=0x%lx, name=%s, func=%s\n",
+ cnt, pi->offset, pi->base, pi->name_len,
+ pi->func_len, pi->name, func);
+ if (sdt_probe_resolve(dtrace_kmod, pi->name, func,
+ pi->offset, pi->base, nops))
printk(KERN_WARNING "%s: cannot resolve %s\n",
- __func__, ri->probe_name);
+ __func__, pi->name);
- nextri = (void *)ri + sizeof(struct reloc_info)
- + roundup(ri->probe_name_len + 1, BITS_PER_LONG / 8);
- ri = nextri;
- DPRINTK("SDT relocs: next entry at 0x%p\n", ri);
+ nextpi = (void *)pi + sizeof(dtrace_sdt_probeinfo_t)
+ + roundup(pi->name_len + 1 +
+ pi->func_len + 1, BITS_PER_LONG / 8);
+ pi = nextpi;
+ DPRINTK("SDT relocs: next entry at 0x%p\n", pi);
}
}
+EXPORT_SYMBOL(dtrace_register_builtins);
#include <linux/oom.h>
#include <linux/writeback.h>
#include <linux/shm.h>
+#include <linux/sdt.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
tsk->exit_code = code;
taskstats_exit(tsk, group_dead);
+ DTRACE_PROC1(exit, int, code);
+
exit_mm(tsk);
if (group_dead)
#include <linux/aio.h>
#include <linux/compiler.h>
#include <linux/sysctl.h>
+#include <linux/sdt.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
}
put_pid(pid);
+ DTRACE_PROC1(create, struct task_struct *, p);
} else {
nr = PTR_ERR(p);
}
sigaddset(&pending->signal, sig);
complete_signal(sig, t, group);
ret:
+ DTRACE_PROC2(signal__send, struct task_struct *, t, int, sig);
trace_signal_generate(sig, info, t, group, result);
return ret;
}
if (print_fatal_signals)
print_fatal_signal(ksig->info.si_signo);
proc_coredump_connector(current);
- DTRACE_PROBE4(get_signal_to_deliver,
- int, ksig->info.si_signo,
- struct pt_regs *, regs,
- char *, current->comm,
- int, task_pid_nr(current));
/*
* If it was able to dump core, this kills all
* other threads in the group and synchronizes with
}
spin_unlock_irq(&sighand->siglock);
+ if (signr != 0) {
+ DTRACE_PROC3(signal__handle, int, signr, siginfo_t *,
+ ksig->ka.sa.sa_handler != SIG_DFL ? NULL :
+ &ksig->info, void (*)(void),
+ ksig->ka.sa.sa_handler);
+ }
ksig->sig = signr;
return ksig->sig > 0;
}
--- /dev/null
+#!/bin/sh
+
+LANG=C
+
+fn="$1"
+
+objdump -htr "$fn" | \
+ awk '/^Sections:/ {
+ getline;
+ getline;
+ while ($0 !~ /SYMBOL/) {
+ sect = $2;
+ addr = $6;
+
+ getline;
+ if (/CODE/)
+ sectbase[sect] = addr;
+
+ getline;
+ }
+ next;
+ }
+
+ / F / {
+ printf "%16s %s F %s\n", $4, $1, $6;
+ next;
+ }
+
+ /^RELOC/ {
+ sub(/^[^\[]+\[/, "");
+ sub(/].*$/, "");
+ sect = $1;
+ next;
+ }
+
+ /__dtrace_probe_/ {
+ $3 = substr($3, 16);
+ sub(/-.*$/, "", $3);
+
+ printf "%16s %s R %s %s\n", sect, $1, $3, sectbase[sect];
+ next;
+ }' | \
+ sort | \
+ awk 'BEGIN {
+ print "#include <asm/types.h>";
+ print "#if BITS_PER_LONG == 64";
+ print "# define PTR .quad";
+ print "# define ALGN .align 8";
+ print "#else";
+ print "# define PTR .long";
+ print "# define ALGN .align 4";
+ print "#endif";
+
+ print "\t.section .rodata, \042a\042";
+ print "";
+
+ print ".globl dtrace_sdt_probes";
+ print "\tALGN";
+ print "dtrace_sdt_probes:";
+ }
+
+ / F / {
+ fun = $4;
+ next;
+ }
+
+ / R / {
+ print "\tPTR\t0x" $2;
+ print "\tPTR\t0x" $5;
+ print "\tPTR\t" length($4);
+ print "\tPTR\t" length(fun);
+ print "\t.asciz\t\042" $4 "\042";
+ print "\t.asciz\t\042" fun "\042";
+ print "\tALGN";
+
+ probec++;
+ }
+
+ END {
+ print "";
+ print ".globl dtrace_sdt_nprobes";
+ print "\tALGN";
+ print "dtrace_sdt_nprobes:";
+ print "\tPTR\t" probec;
+ }'