From ec8c03da4f00339c460381fe2851e21d9ef85260 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Wed, 14 Sep 2016 02:16:46 +0100 Subject: [PATCH] dtrace: import the sdt type information into per-sdt_probedesc state Now we have type and probe names in ELF sections, we have to do something with them (it's initdata, so in the kernel, if not in modules, it will be thrown away after initialization.) We marshal each arg string in turn, without parsing it, into a new field in the sdt_probedesc_t, the per-probe structure used to communicate information about SDT probes to the SDT kernel module. Before now, this has been a simple task at runtime: the array is constructed by dtrace_sdt.sh and just needs to be reformulated (for the core kernel) or dropped unchanged into place (for modules). But this extra field is derived from C macro expansion and cannot be determined by dtrace_sdt.sh: so it leaves it 0 and we fill it in at boot / module load time. The fillout process is a little baroque to avoid slowing down the boot too much, since we have to associate one array with another and want to avoid linear scans. We build a hashtable mapping from probe name to args string (i.e. from _dtrace_sdt_names to _dtrace_sdt_args entry), in the process verifying that if the probe appears multiple times, all its arg strings are the same. We can then easily use this hash table to point the extra argument types field in the sdt_probedesc_t array at the args string too. One inefficiency remains: there can be *lots* of duplicate args strings, and currently we make no attempt to deduplicate them. (We discard the core kernel's array of probe names, but the array of arg strings obviously must be preserved. We could deduplicate it, but do not.) This is probably wasting no more than a few kilobytes at present, so a deduplicator is not worth writing, but as the number of probes (particularly perf probes) increases, one may become worth writing. The new sdpd_args field into which the type info string is dropped is protected from the ABI checker: the sdt_probedesc_t is internal to the kernel and the DTrace module, and the usual ABI guarantees do not apply to it. Signed-off-by: Nick Alcock Acked-by: Kris Van Hees Orabug: 24661801 --- include/linux/dtrace_sdt.h | 4 +- include/linux/sdt.h | 3 + kernel/dtrace/dtrace_sdt_core.c | 153 +++++++++++++++++++++++++++++++- kernel/module.c | 10 ++- scripts/dtrace_sdt.sh | 2 +- 5 files changed, 167 insertions(+), 5 deletions(-) diff --git a/include/linux/dtrace_sdt.h b/include/linux/dtrace_sdt.h index 90ca2f5c74fe9..0155b4216929d 100644 --- a/include/linux/dtrace_sdt.h +++ b/include/linux/dtrace_sdt.h @@ -22,7 +22,9 @@ extern void *dtrace_sdt_probes __attribute__((weak)); extern void dtrace_sdt_init(void); extern void dtrace_sdt_register(struct module *); -extern void dtrace_sdt_register_module(struct module *); +extern void dtrace_sdt_register_module(struct module *, + void *sdt_names_addr, size_t, + void *sdt_args_addr, size_t); extern void dtrace_sdt_exit(void); /* diff --git a/include/linux/sdt.h b/include/linux/sdt.h index 42ae116b21c68..ae194e4a466b8 100644 --- a/include/linux/sdt.h +++ b/include/linux/sdt.h @@ -308,6 +308,9 @@ extern "C" { typedef struct sdt_probedesc { char *sdpd_name; /* probe name */ char *sdpd_func; /* probe function */ +#ifndef __GENKSYMS__ + char *sdpd_args; /* arg string */ +#endif unsigned long sdpd_offset; /* offset of call in text */ struct sdt_probedesc *sdpd_next; /* next static probe */ } sdt_probedesc_t; diff --git a/kernel/dtrace/dtrace_sdt_core.c b/kernel/dtrace/dtrace_sdt_core.c index 6b3d57f7901e6..992bb03001d78 100644 --- a/kernel/dtrace/dtrace_sdt_core.c +++ b/kernel/dtrace/dtrace_sdt_core.c @@ -2,7 +2,7 @@ * FILE: dtrace_sdt_core.c * DESCRIPTION: Dynamic Tracing: SDT probe point registration * - * Copyright (C) 2010-2014 Oracle Corporation + * Copyright (C) 2010-2016 Oracle Corporation */ #include @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -19,6 +20,14 @@ const char *sdt_prefix = "__dtrace_probe_"; +/* + * Markers of core-kernel sdt_args and sdt_names sections. + */ +extern const char __start_dtrace_sdt_args[]; +extern const char __stop_dtrace_sdt_args[]; +extern const char __start_dtrace_sdt_names[]; +extern const char __stop_dtrace_sdt_names[]; + static int sdt_probe_set(sdt_probedesc_t *sdp, char *name, char *func, uintptr_t addr, asm_instr_t **paddr,\ sdt_probedesc_t *prv) @@ -34,6 +43,7 @@ static int sdt_probe_set(sdt_probedesc_t *sdp, char *name, char *func, return 1; } + sdp->sdpd_args = NULL; sdp->sdpd_offset = addr; sdp->sdpd_next = NULL; @@ -46,6 +56,115 @@ static int sdt_probe_set(sdt_probedesc_t *sdp, char *name, char *func, return 0; } +/* + * Transfer the SDT args section into the sdpd_args field left NULL above. + * + * The memory pointed to by args_start must have a lifetime at least as long as + * that pointed to by sdpd. + */ +void dtrace_sdt_stash_args(const char *module_name, + sdt_probedesc_t *sdpd, size_t nprobes, + const char *names_start, size_t names_len, + const char *args_start, size_t args_len) +{ + struct probe_name_hashent_t { + const char *pnhe_name; + const char *pnhe_args; + } *args_by_name; + int i; + const char *namep, *argp; + size_t hashsize; + + /* + * We need to find the probes (and there may be many) in the sdpd + * corresponding to the probe with that name in the argtype section. + * + * Build a hashtable mapping from probe name -> args string, ignoring + * duplicate probe names except to check (in debugging mode) that they + * have the same args string as the first. Then cycle over the sdpd + * looking up each probe in turn and pointing to the same place. + * + * We don't know how many entries there are in the table, but we do know + * there cannot be more than nprobes (and are probably less). + */ + + hashsize = nprobes * 4; /* arbitrary expansion factor */ + args_by_name = vzalloc(hashsize * sizeof (struct probe_name_hashent_t)); + if (args_by_name == NULL) { + pr_warning("%s: cannot allocate hash for sdt args population\n", + __func__); + return; + } + + namep = names_start; + argp = args_start; + while ((namep < names_start + names_len) && + (argp < args_start + args_len)) { + + size_t l = strlen(namep); + u32 h = jhash(namep, l, 0); + h = h % hashsize; + + while (args_by_name[h].pnhe_name != NULL && + strcmp(args_by_name[h].pnhe_name, namep) != 0) { + h++; + h %= hashsize; + } + + if (args_by_name[h].pnhe_name == NULL) { + args_by_name[h].pnhe_name = namep; + args_by_name[h].pnhe_args = argp; + } +#if defined(CONFIG_DT_DEBUG) + else if (strcmp(args_by_name[h].pnhe_name, namep) != 0) + printk(KERN_WARNING "%s: multiple " + "distinct arg strings for probe " + "%s found: %s versus %s", + module_name, namep, + args_by_name[h].pnhe_args, + argp); +#endif + namep += l + 1; + argp += strlen(argp) + 1; + } + +#if defined(CONFIG_DT_DEBUG) + if ((namep < names_start + names_len) || (argp < args_start + args_len)) + printk(KERN_WARNING "%s: Not all SDT names or args consumed: %zi " + "bytes of names and %zi of args left over. Some arg types " + "will be mis-assigned.\n", module_name, + namep - (names_start + names_len), + argp - (args_start + args_len)); +#endif + + for (i = 0; i < nprobes; i++) { + size_t l = strlen(sdpd[i].sdpd_name); + u32 h = jhash(sdpd[i].sdpd_name, l, 0); + h = h % hashsize; + + while (args_by_name[h].pnhe_name != NULL && + strcmp(sdpd[i].sdpd_name, + args_by_name[h].pnhe_name) != 0) { + h++; + h %= hashsize; + } + + if (args_by_name[h].pnhe_name == NULL) { + /* + * No arg string. Peculiar: report in debugging mode. + */ +#if defined(CONFIG_DT_DEBUG) + printk(KERN_WARNING "%s: probe %s has no arg string.\n", + module_name, sdpd[i].sdpd_name); +#endif + continue; + } + + sdpd[i].sdpd_args = args_by_name[h].pnhe_args; + } + vfree(args_by_name); +} + /* * Register the SDT probes for the core kernel, i.e. SDT probes that reside in * vmlinux. For SDT probes in kernel modules, we use dtrace_mod_notifier(). @@ -58,6 +177,8 @@ void dtrace_sdt_register(struct module *mp) void *nextpi; sdt_probedesc_t *sdps; asm_instr_t **addrs; + void *args; + size_t args_len; if (mp == NULL) { pr_warning("%s: no module provided - nothing registered\n", @@ -120,6 +241,28 @@ void dtrace_sdt_register(struct module *mp) dtrace_sdt_nop_multi(addrs, cnt); + /* + * Allocate space for the array of arg types, and copy it in from the + * (discardable) kernel section. We will need to keep it. (The + * identically-ordered array of probe names is not needed after + * initialization.) + */ + args_len = __stop_dtrace_sdt_args - __start_dtrace_sdt_args; + args = vmalloc(args_len); + if (args == NULL) { + pr_warning("%s: cannot allocate table of SDT arg types\n", + __func__); + goto end; + } + + memcpy(args, __start_dtrace_sdt_args, args_len); + + dtrace_sdt_stash_args("vmlinux", sdps, cnt, + __start_dtrace_sdt_names, + (__stop_dtrace_sdt_names - __start_dtrace_sdt_names), + args, args_len); + +end: vfree(addrs); } @@ -132,7 +275,9 @@ static int __init nosdt(char *str) early_param("nosdt", nosdt); -void dtrace_sdt_register_module(struct module *mp) +void dtrace_sdt_register_module(struct module *mp, + void *sdt_names_addr, size_t sdt_names_len, + void *sdt_args_addr, size_t sdt_args_len) { int i, cnt; sdt_probedesc_t *sdp; @@ -159,6 +304,10 @@ void dtrace_sdt_register_module(struct module *mp) dtrace_sdt_nop_multi(addrs, cnt); + dtrace_sdt_stash_args(mp->name, mp->sdt_probes, mp->sdt_probec, + sdt_names_addr, sdt_names_len, + sdt_args_addr, sdt_args_len); + vfree(addrs); } diff --git a/kernel/module.c b/kernel/module.c index f696197b5174d..37e681eb7a43d 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3203,7 +3203,15 @@ static int complete_formation(struct module *mod, struct load_info *info) int err; #ifdef CONFIG_DTRACE - dtrace_sdt_register_module(mod); + void *sdt_args, *sdt_names; + unsigned int sdt_args_len, sdt_names_len; + + sdt_names = section_objs(info, "_dtrace_sdt_names", 1, + &sdt_names_len); + sdt_args = section_objs(info, "_dtrace_sdt_args", 1, + &sdt_args_len); + dtrace_sdt_register_module(mod, sdt_names, sdt_names_len, + sdt_args, sdt_args_len); #endif mutex_lock(&module_mutex); diff --git a/scripts/dtrace_sdt.sh b/scripts/dtrace_sdt.sh index bcff44aa0feb9..188bc48c34be5 100755 --- a/scripts/dtrace_sdt.sh +++ b/scripts/dtrace_sdt.sh @@ -208,7 +208,7 @@ if [ "$tok" = "kmod" ]; then addr = subl(addr, 1); protom[alias] = 1; - probev[probec] = sprintf(" {\042%s\042, \042%s\042 /* %s */, (uintptr_t)%s+0x%s },", $4, fname, $1, alias, addr); + probev[probec] = sprintf(" {\042%s\042, \042%s\042 /* %s */, 0 /* sdt_args string */, (uintptr_t)%s+0x%s },", $4, fname, $1, alias, addr); probec++; next; -- 2.50.1