From 560ecfb75f5666af865825dc5b1bf6fbbfdea155 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Wed, 23 Nov 2016 17:50:09 +0000 Subject: [PATCH] dtrace: is-enabled probes for SDT "Is-enabled probes" are a conditional, long supported in userspace probing, which lets you avoid doing expensive data-collection operations needed only by DTrace probes unless those probes are active. e.g. (an example using the core DTRACE_PROBE / DTRACE_IS_ENABLED macros, rather than the DTRACE_providername macros used in practice, because no such macros have been added to the kernel yet): if (DTRACE_IS_ENABLED(__io_wait__start)) { /* stuff done only when io:::wait-start is enabled */ } As with normal SDT probes, the DTRACE_IS_ENABLED() macro compiles to a stub function call (named like __dtrace_isenabled_*()) which is replaced at bootup/module load time with an architecture-dependent instruction sequence analogous to a function that always returns false, though no function call is generated. At probe enabling time, this is replaced with a trap into dtrace just like normal dtrace probes, incurring a performance hit, but only when the probe is active. The probe name used in the various ELF sections that track SDT probes begins with a ? character to help the module distinguish is-enabled probes from normal probes: this is internal to the DTrace implementation and is otherwise invisible. (Thanks to Kris Van Hees for initial work on this.) Signed-off-by: Nick Alcock Acked-by: Kris Van Hees Orabug: 25143173 --- arch/sparc/include/asm/dtrace_sdt_arch.h | 9 +++++ arch/sparc/kernel/dtrace_sdt.c | 4 ++- arch/x86/include/asm/dtrace_arch.h | 4 ++- arch/x86/include/asm/dtrace_sdt_arch.h | 9 +++++ arch/x86/kernel/dtrace_sdt.c | 27 ++++++++++---- include/linux/dtrace_sdt.h | 2 +- include/linux/sdt.h | 6 ++++ kernel/dtrace/dtrace_sdt_core.c | 46 ++++++++++++++++++------ scripts/dtrace_sdt.sh | 23 +++++++++--- scripts/mod/modpost.c | 3 +- 10 files changed, 109 insertions(+), 24 deletions(-) create mode 100644 arch/sparc/include/asm/dtrace_sdt_arch.h create mode 100644 arch/x86/include/asm/dtrace_sdt_arch.h diff --git a/arch/sparc/include/asm/dtrace_sdt_arch.h b/arch/sparc/include/asm/dtrace_sdt_arch.h new file mode 100644 index 0000000000000..00267cda504f8 --- /dev/null +++ b/arch/sparc/include/asm/dtrace_sdt_arch.h @@ -0,0 +1,9 @@ +/* Copyright (C) 2016 Oracle, Inc. */ + +#ifndef _SPARC_DTRACE_SDT_ARCH_H +#define _SPARC_DTRACE_SDT_ARCH_H + +#define __DTRACE_SDT_ISENABLED_PROTO long +#define __DTRACE_SDT_ISENABLED_ARGS 0 + +#endif /* _SPARC_DTRACE_SDT_ARCH_H */ diff --git a/arch/sparc/kernel/dtrace_sdt.c b/arch/sparc/kernel/dtrace_sdt.c index 1ffcbfb71dea9..a9293f207dda3 100644 --- a/arch/sparc/kernel/dtrace_sdt.c +++ b/arch/sparc/kernel/dtrace_sdt.c @@ -15,7 +15,9 @@ #include #include -void __init_or_module dtrace_sdt_nop_multi(asm_instr_t **addrs, int cnt) +void __init_or_module dtrace_sdt_nop_multi(asm_instr_t **addrs, + int * __always_unused is_enabled, + int cnt) { int i; asm_instr_t *addr; diff --git a/arch/x86/include/asm/dtrace_arch.h b/arch/x86/include/asm/dtrace_arch.h index 12165fae33c3f..70317781e0cbc 100644 --- a/arch/x86/include/asm/dtrace_arch.h +++ b/arch/x86/include/asm/dtrace_arch.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2013-2014 Oracle, Inc. */ +/* Copyright (C) 2013-2016 Oracle, Inc. */ #ifndef _X86_DTRACE_ARCH_H #define _X86_DTRACE_ARCH_H @@ -9,4 +9,6 @@ typedef uint8_t asm_instr_t; #define DTRACE_PDATA_EXTRA 0 #define DTRACE_PDATA_MAXSIZE (DTRACE_PDATA_SIZE + DTRACE_PDATA_EXTRA) +#define ASM_CALL_SIZE 5 + #endif /* _X86_DTRACE_ARCH_H */ diff --git a/arch/x86/include/asm/dtrace_sdt_arch.h b/arch/x86/include/asm/dtrace_sdt_arch.h new file mode 100644 index 0000000000000..692ab72783690 --- /dev/null +++ b/arch/x86/include/asm/dtrace_sdt_arch.h @@ -0,0 +1,9 @@ +/* Copyright (C) 2016 Oracle, Inc. */ + +#ifndef _X86_DTRACE_SDT_ARCH_H +#define _X86_DTRACE_SDT_ARCH_H + +#define __DTRACE_SDT_ISENABLED_PROTO void +#define __DTRACE_SDT_ISENABLED_ARGS + +#endif /* _X86_DTRACE_SDT_ARCH_H */ diff --git a/arch/x86/kernel/dtrace_sdt.c b/arch/x86/kernel/dtrace_sdt.c index 73768b1b91b24..f9bc1a89aba65 100644 --- a/arch/x86/kernel/dtrace_sdt.c +++ b/arch/x86/kernel/dtrace_sdt.c @@ -2,7 +2,7 @@ * FILE: dtrace_sdt.c * DESCRIPTION: Dynamic Tracing: SDT registration code (arch-specific) * - * Copyright (C) 2010-2014 Oracle Corporation + * Copyright (C) 2010-2016 Oracle Corporation */ #include @@ -16,15 +16,19 @@ #include #include -#define SDT_NOP_SIZE 5 +static uint8_t nops[ASM_CALL_SIZE]; +static uint8_t movs[ASM_CALL_SIZE]; -uint8_t nops[SDT_NOP_SIZE]; +#define DT_OP_REX_RAX 0x48 +#define DT_OP_XOR_EAX_0 0x33 +#define DT_OP_XOR_EAX_1 0xc0 /* This code is based on apply_alternatives and text_poke_early. It needs to * run before SMP is initialized in order to avoid SMP problems with patching * code that might be accessed on another CPU. */ -void __init_or_module dtrace_sdt_nop_multi(asm_instr_t **addrs, int cnt) +void __init_or_module dtrace_sdt_nop_multi(asm_instr_t **addrs, + int *is_enabled, int cnt) { int i; asm_instr_t *addr; @@ -35,7 +39,10 @@ void __init_or_module dtrace_sdt_nop_multi(asm_instr_t **addrs, int cnt) for (i = 0; i < cnt; i++) { addr = addrs[i]; - memcpy(addr, nops, sizeof(nops)); + if (likely(!is_enabled[i])) + memcpy(addr, nops, sizeof(nops)); + else + memcpy(addr, movs, sizeof(movs)); } sync_core(); @@ -54,5 +61,13 @@ void dtrace_sdt_init_arch(void) * sequence, we play it pretty safe. */ add_nops(nops, 1); - add_nops(nops + 1, SDT_NOP_SIZE - 1); + add_nops(nops + 1, ASM_CALL_SIZE - 1); + + /* + * Is-enabled probe points contain an "xor %rax, %rax" when disabled. + */ + movs[0] = DT_OP_REX_RAX; + movs[1] = DT_OP_XOR_EAX_0; + movs[2] = DT_OP_XOR_EAX_1; + add_nops(movs + 3, ASM_CALL_SIZE - 3); } diff --git a/include/linux/dtrace_sdt.h b/include/linux/dtrace_sdt.h index 0155b4216929d..57931122ee6a8 100644 --- a/include/linux/dtrace_sdt.h +++ b/include/linux/dtrace_sdt.h @@ -30,7 +30,7 @@ extern void dtrace_sdt_exit(void); /* * Functions to be defined in arch//kernel/dtrace_sdt.c */ -extern void __init_or_module dtrace_sdt_nop_multi(asm_instr_t **, int); +extern void __init_or_module dtrace_sdt_nop_multi(asm_instr_t **, int *, int); extern void dtrace_sdt_init_arch(void); #endif /* _DTRACE_SDT_H_ */ diff --git a/include/linux/sdt.h b/include/linux/sdt.h index 90a6ec003dcf2..51b9405d69e36 100644 --- a/include/linux/sdt.h +++ b/include/linux/sdt.h @@ -7,6 +7,7 @@ #ifdef CONFIG_DTRACE +#include #include #define DTRACE_PROBE(name, ...) { \ @@ -22,6 +23,11 @@ ".popsection\n"); \ } +#define DTRACE_PROBE_ENABLED(name) unlikely(({ \ + extern int __dtrace_isenabled_##name(__DTRACE_SDT_ISENABLED_PROTO); \ + __dtrace_isenabled_##name(__DTRACE_SDT_ISENABLED_ARGS); \ +})) + #ifdef CONFIG_DT_SDT_PERF #define __DTRACE_UINTPTR_CAST_EACH(x) ({ \ diff --git a/kernel/dtrace/dtrace_sdt_core.c b/kernel/dtrace/dtrace_sdt_core.c index 992bb03001d78..a9eb5274d10da 100644 --- a/kernel/dtrace/dtrace_sdt_core.c +++ b/kernel/dtrace/dtrace_sdt_core.c @@ -142,6 +142,12 @@ void dtrace_sdt_stash_args(const char *module_name, u32 h = jhash(sdpd[i].sdpd_name, l, 0); h = h % hashsize; + /* + * Is-enabled probes have no arg string. + */ + if (sdpd[i].sdpd_name[0] == '?') + continue; + while (args_by_name[h].pnhe_name != NULL && strcmp(sdpd[i].sdpd_name, args_by_name[h].pnhe_name) != 0) { @@ -177,6 +183,7 @@ void dtrace_sdt_register(struct module *mp) void *nextpi; sdt_probedesc_t *sdps; asm_instr_t **addrs; + int *is_enabled; void *args; size_t args_len; @@ -208,27 +215,38 @@ void dtrace_sdt_register(struct module *mp) /* * Create a list of addresses (SDT probe locations) that need to be - * patched with a NOP instruction (or instruction sequence). + * patched with a NOP instruction (or instruction sequence), and another + * array indicating whether each probe needs patching with an + * arch-dependent false return instead. */ addrs = (asm_instr_t **)vmalloc(dtrace_sdt_nprobes * sizeof(asm_instr_t *)); - if (addrs == NULL) { - pr_warning("%s: cannot allocate SDT probe address list\n", - __func__); + is_enabled = (int *)vmalloc(dtrace_sdt_nprobes * sizeof(int)); + if ((addrs == NULL) || (is_enabled == NULL)) { + pr_warning("%s: cannot allocate SDT probe address/is-enabled " + "lists\n", __func__); vfree(sdps); + vfree(addrs); + vfree(is_enabled); return; } for (i = cnt = 0; cnt < dtrace_sdt_nprobes; i++) { char *func = pi->name + pi->name_len + 1; + is_enabled[cnt] = (pi->name[0] == '?'); + if (sdt_probe_set(&sdps[cnt], pi->name, func, pi->addr, &addrs[cnt], cnt > 0 ? &sdps[cnt - 1] : NULL)) pr_warning("%s: failed to add SDT probe %s\n", __func__, pi->name); - else + else { + if (is_enabled[cnt]) + printk(KERN_INFO "is-enabled probe at %p\n", addrs[cnt]); + cnt++; + } nextpi = (void *)pi + sizeof(dtrace_sdt_probeinfo_t) + roundup(pi->name_len + 1 + @@ -239,7 +257,7 @@ void dtrace_sdt_register(struct module *mp) mp->sdt_probes = sdps; mp->sdt_probec = cnt; - dtrace_sdt_nop_multi(addrs, cnt); + dtrace_sdt_nop_multi(addrs, is_enabled, cnt); /* * Allocate space for the array of arg types, and copy it in from the @@ -264,6 +282,7 @@ void dtrace_sdt_register(struct module *mp) end: vfree(addrs); + vfree(is_enabled); } static int __init nosdt(char *str) @@ -282,6 +301,7 @@ void dtrace_sdt_register_module(struct module *mp, int i, cnt; sdt_probedesc_t *sdp; asm_instr_t **addrs; + int *is_enabled; if (mp->sdt_probec == 0 || mp->sdt_probes == NULL) return; @@ -292,23 +312,29 @@ void dtrace_sdt_register_module(struct module *mp, */ addrs = (asm_instr_t **)vmalloc(mp->sdt_probec * sizeof(asm_instr_t *)); - if (addrs == NULL) { + is_enabled = (int *)vmalloc(mp->sdt_probec * sizeof(int)); + if ((addrs == NULL) || (is_enabled == NULL)) { pr_warning("%s: cannot allocate SDT probe address list (%s)\n", __func__, mp->name); + vfree(addrs); + vfree(is_enabled); return; } for (i = cnt = 0, sdp = mp->sdt_probes; i < mp->sdt_probec; - i++, sdp++) - addrs[cnt++] = (asm_instr_t *)sdp->sdpd_offset; + i++, sdp++) { + addrs[cnt] = (asm_instr_t *)sdp->sdpd_offset; + is_enabled[cnt++] = (sdp->sdpd_name[0] == '?'); + } - dtrace_sdt_nop_multi(addrs, cnt); + dtrace_sdt_nop_multi(addrs, is_enabled, cnt); dtrace_sdt_stash_args(mp->name, mp->sdt_probes, mp->sdt_probec, sdt_names_addr, sdt_names_len, sdt_args_addr, sdt_args_len); vfree(addrs); + vfree(is_enabled); } void dtrace_sdt_init(void) diff --git a/scripts/dtrace_sdt.sh b/scripts/dtrace_sdt.sh index 188bc48c34be5..da314b1740232 100755 --- a/scripts/dtrace_sdt.sh +++ b/scripts/dtrace_sdt.sh @@ -39,8 +39,7 @@ if [ -z "$ofn" ]; then fi if [ "$opr" = "sdtstub" ]; then - ${NM} -u $* | \ - grep __dtrace_probe_ | sort | uniq | \ + ${NM} -u $* | grep -E '__dtrace_(probe|isenabled)_' | sort | uniq | \ ${AWK} -v arch=${ARCH} \ '{ printf("\t.globl %s\n\t.type %s,@function\n%s:\n", @@ -73,7 +72,8 @@ if [ "$tok" = "kmod" ]; then # Output all function symbols in the symbol table of the object file. # Subsequently, output all relocation records for DTrace SDT probes. The - # probes are identified by their __dtrace_probe_ prefix. + # probes are identified by either a __dtrace_probe_ or __dtrace_isenabled_ + # prefix. # # We sort the output primarily based on the section, using the value (or # offset) as secondary sort criterion The overall result is that the @@ -114,6 +114,13 @@ if [ "$tok" = "kmod" ]; then next; } + sect && /__dtrace_isenabled_/ { + $3 = substr($3, 20); + sub(/[\-+].*$/, "", $3); + print sect " " $1 " R ?" $3; + next; + } + /file format/ { next; } @@ -244,7 +251,8 @@ else # Finally, each relocation record from the .text section that relates to # SDT probes are written to the output stream with its address, a token # identifying it as a relocation, and its name. Probes are identified in - # the relocation records as symbols with __dtrace_probe_ as prefix. + # the relocation records as symbols with either a __dtrace_probe_ or + # __dtrace_isenabled_ prefix. # # We sort the output based on the address, which guarantees that the output # will be a list of functions, and each function record will be followed @@ -307,6 +315,13 @@ else next; } + in_reloc && /__dtrace_isenabled_/ { + $3 = substr($3, 20); + sub(/[\-+].*$/, "", $3); + print addl(base, $1) " R ?" $3; + next; + } + / F / { if ($6 == ".hidden") print $1 " G " $7; diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 8ac053304ad3d..03d798228a458 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -2159,7 +2159,8 @@ static int add_versions(struct buffer *b, struct module *mod) exp = find_symbol(s->name); if (!exp || exp->module == mod) { if (have_vmlinux && !s->weak && - !strstarts(s->name, "__dtrace_probe_")) { + !strstarts(s->name, "__dtrace_probe_") && + !strstarts(s->name, "__dtrace_isenabled_")) { if (warn_unresolved) { warn("\"%s\" [%s.ko] undefined!\n", s->name, mod->name); -- 2.50.1