]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
dtrace: is-enabled probes for SDT
authorNick Alcock <nick.alcock@oracle.com>
Wed, 23 Nov 2016 17:50:09 +0000 (17:50 +0000)
committerKris Van Hees <kris.van.hees@oracle.com>
Fri, 23 Dec 2016 22:33:01 +0000 (17:33 -0500)
"Is-enabled probes" are a conditional, long supported in userspace
probing, which lets you avoid doing expensive data-collection operations
needed only by DTrace probes unless those probes are active.

e.g. (an example using the core DTRACE_PROBE / DTRACE_IS_ENABLED macros,
rather than the DTRACE_providername macros used in practice, because
no such macros have been added to the kernel yet):

if (DTRACE_IS_ENABLED(__io_wait__start)) {
/* stuff done only when io:::wait-start is enabled */
}

As with normal SDT probes, the DTRACE_IS_ENABLED() macro compiles to a
stub function call (named like __dtrace_isenabled_*()) which is replaced
at bootup/module load time with an architecture-dependent instruction
sequence analogous to a function that always returns false, though no
function call is generated.  At probe enabling time, this is replaced
with a trap into dtrace just like normal dtrace probes, incurring a
performance hit, but only when the probe is active.

The probe name used in the various ELF sections that track SDT
probes begins with a ? character to help the module distinguish
is-enabled probes from normal probes: this is internal to the DTrace
implementation and is otherwise invisible.

(Thanks to Kris Van Hees for initial work on this.)

Signed-off-by: Nick Alcock <nick.alcock@oracle.com>
Acked-by: Kris Van Hees <kris.van.hees@oracle.com>
Orabug: 25143173

arch/sparc/include/asm/dtrace_sdt_arch.h [new file with mode: 0644]
arch/sparc/kernel/dtrace_sdt.c
arch/x86/include/asm/dtrace_arch.h
arch/x86/include/asm/dtrace_sdt_arch.h [new file with mode: 0644]
arch/x86/kernel/dtrace_sdt.c
include/linux/dtrace_sdt.h
include/linux/sdt.h
kernel/dtrace/dtrace_sdt_core.c
scripts/dtrace_sdt.sh
scripts/mod/modpost.c

diff --git a/arch/sparc/include/asm/dtrace_sdt_arch.h b/arch/sparc/include/asm/dtrace_sdt_arch.h
new file mode 100644 (file)
index 0000000..00267cd
--- /dev/null
@@ -0,0 +1,9 @@
+/* Copyright (C) 2016 Oracle, Inc. */
+
+#ifndef _SPARC_DTRACE_SDT_ARCH_H
+#define _SPARC_DTRACE_SDT_ARCH_H
+
+#define __DTRACE_SDT_ISENABLED_PROTO long
+#define __DTRACE_SDT_ISENABLED_ARGS 0
+
+#endif /* _SPARC_DTRACE_SDT_ARCH_H */
index 1ffcbfb71dea9bc23d7abe14d2bee43f6e54e8ae..a9293f207dda35fb76534f9cb69b1c225df6a120 100644 (file)
@@ -15,7 +15,9 @@
 #include <asm/cacheflush.h>
 #include <asm/dtrace_arch.h>
 
-void __init_or_module dtrace_sdt_nop_multi(asm_instr_t **addrs, int cnt)
+void __init_or_module dtrace_sdt_nop_multi(asm_instr_t **addrs,
+                                          int * __always_unused is_enabled,
+                                          int cnt)
 {
        int             i;
        asm_instr_t     *addr;
index 12165fae33c3f1ff123009923e0c09ea471f3cc6..70317781e0cbca549bd9142105860e611367f132 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2014 Oracle, Inc. */
+/* Copyright (C) 2013-2016 Oracle, Inc. */
 
 #ifndef _X86_DTRACE_ARCH_H
 #define _X86_DTRACE_ARCH_H
@@ -9,4 +9,6 @@ typedef uint8_t         asm_instr_t;
 #define DTRACE_PDATA_EXTRA      0
 #define DTRACE_PDATA_MAXSIZE    (DTRACE_PDATA_SIZE + DTRACE_PDATA_EXTRA)
 
+#define ASM_CALL_SIZE          5
+
 #endif /* _X86_DTRACE_ARCH_H */
diff --git a/arch/x86/include/asm/dtrace_sdt_arch.h b/arch/x86/include/asm/dtrace_sdt_arch.h
new file mode 100644 (file)
index 0000000..692ab72
--- /dev/null
@@ -0,0 +1,9 @@
+/* Copyright (C) 2016 Oracle, Inc. */
+
+#ifndef _X86_DTRACE_SDT_ARCH_H
+#define _X86_DTRACE_SDT_ARCH_H
+
+#define __DTRACE_SDT_ISENABLED_PROTO void
+#define __DTRACE_SDT_ISENABLED_ARGS
+
+#endif /* _X86_DTRACE_SDT_ARCH_H */
index 73768b1b91b2447b4e76b04c8d40543fe70a6c3b..f9bc1a89aba651dff08ca509d1bd9bfc731dc34d 100644 (file)
@@ -2,7 +2,7 @@
  * FILE:        dtrace_sdt.c
  * DESCRIPTION: Dynamic Tracing: SDT registration code (arch-specific)
  *
- * Copyright (C) 2010-2014 Oracle Corporation
+ * Copyright (C) 2010-2016 Oracle Corporation
  */
 
 #include <linux/kernel.h>
 #include <asm/nops.h>
 #include <asm/dtrace_arch.h>
 
-#define        SDT_NOP_SIZE    5
+static uint8_t nops[ASM_CALL_SIZE];
+static uint8_t movs[ASM_CALL_SIZE];
 
-uint8_t                        nops[SDT_NOP_SIZE];
+#define DT_OP_REX_RAX           0x48
+#define DT_OP_XOR_EAX_0         0x33
+#define DT_OP_XOR_EAX_1         0xc0
 
 /* This code is based on apply_alternatives and text_poke_early.  It needs to
  * run before SMP is initialized in order to avoid SMP problems with patching
  * code that might be accessed on another CPU.
  */
-void __init_or_module dtrace_sdt_nop_multi(asm_instr_t **addrs, int cnt)
+void __init_or_module dtrace_sdt_nop_multi(asm_instr_t **addrs,
+                                          int *is_enabled, int cnt)
 {
        int                     i;
        asm_instr_t             *addr;
@@ -35,7 +39,10 @@ void __init_or_module dtrace_sdt_nop_multi(asm_instr_t **addrs, int cnt)
 
        for (i = 0; i < cnt; i++) {
                addr = addrs[i];
-               memcpy(addr, nops, sizeof(nops));
+               if (likely(!is_enabled[i]))
+                       memcpy(addr, nops, sizeof(nops));
+               else
+                       memcpy(addr, movs, sizeof(movs));
        }
 
        sync_core();
@@ -54,5 +61,13 @@ void dtrace_sdt_init_arch(void)
         * sequence, we play it pretty safe.
         */
        add_nops(nops, 1);
-       add_nops(nops + 1, SDT_NOP_SIZE - 1);
+       add_nops(nops + 1, ASM_CALL_SIZE - 1);
+
+       /*
+        * Is-enabled probe points contain an "xor %rax, %rax" when disabled.
+        */
+       movs[0] = DT_OP_REX_RAX;
+       movs[1] = DT_OP_XOR_EAX_0;
+       movs[2] = DT_OP_XOR_EAX_1;
+       add_nops(movs + 3, ASM_CALL_SIZE - 3);
 }
index 0155b4216929d7b4ab1c06b18817ae6532b9f57b..57931122ee6a885f5d93db65e19abf4000444957 100644 (file)
@@ -30,7 +30,7 @@ extern void dtrace_sdt_exit(void);
 /*
  * Functions to be defined in arch/<arch>/kernel/dtrace_sdt.c
  */
-extern void __init_or_module dtrace_sdt_nop_multi(asm_instr_t **, int);
+extern void __init_or_module dtrace_sdt_nop_multi(asm_instr_t **, int *, int);
 extern void dtrace_sdt_init_arch(void);
 
 #endif /* _DTRACE_SDT_H_ */
index 90a6ec003dcf2c02b3b546c8daaab87477c7a4b7..51b9405d69e3609a65dabeade04aaaad0b14f525 100644 (file)
@@ -7,6 +7,7 @@
 
 #ifdef CONFIG_DTRACE
 
+#include <asm/dtrace_sdt_arch.h>
 #include <linux/stringify.h>
 
 #define        DTRACE_PROBE(name, ...) {                               \
                     ".popsection\n");                                  \
 }
 
+#define        DTRACE_PROBE_ENABLED(name)      unlikely(({                     \
+       extern int __dtrace_isenabled_##name(__DTRACE_SDT_ISENABLED_PROTO); \
+       __dtrace_isenabled_##name(__DTRACE_SDT_ISENABLED_ARGS);         \
+}))
+
 #ifdef CONFIG_DT_SDT_PERF
 
 #define __DTRACE_UINTPTR_CAST_EACH(x) ({                               \
index 992bb03001d783a9d54c2f7bd6e3732d41061658..a9eb5274d10da9579fab643abe8b026d9960f529 100644 (file)
@@ -142,6 +142,12 @@ void dtrace_sdt_stash_args(const char *module_name,
                u32 h = jhash(sdpd[i].sdpd_name, l, 0);
                h = h % hashsize;
 
+               /*
+                * Is-enabled probes have no arg string.
+                */
+               if (sdpd[i].sdpd_name[0] == '?')
+                       continue;
+
                while (args_by_name[h].pnhe_name != NULL &&
                       strcmp(sdpd[i].sdpd_name,
                              args_by_name[h].pnhe_name) != 0) {
@@ -177,6 +183,7 @@ void dtrace_sdt_register(struct module *mp)
        void                    *nextpi;
        sdt_probedesc_t         *sdps;
        asm_instr_t             **addrs;
+       int                     *is_enabled;
        void                    *args;
        size_t                  args_len;
 
@@ -208,27 +215,38 @@ void dtrace_sdt_register(struct module *mp)
 
        /*
         * Create a list of addresses (SDT probe locations) that need to be
-        * patched with a NOP instruction (or instruction sequence).
+        * patched with a NOP instruction (or instruction sequence), and another
+        * array indicating whether each probe needs patching with an
+        * arch-dependent false return instead.
         */
        addrs = (asm_instr_t **)vmalloc(dtrace_sdt_nprobes *
                                        sizeof(asm_instr_t *));
-       if (addrs == NULL) {
-               pr_warning("%s: cannot allocate SDT probe address list\n",
-                          __func__);
+       is_enabled = (int *)vmalloc(dtrace_sdt_nprobes * sizeof(int));
+       if ((addrs == NULL) || (is_enabled == NULL)) {
+               pr_warning("%s: cannot allocate SDT probe address/is-enabled "
+                          "lists\n", __func__);
                vfree(sdps);
+               vfree(addrs);
+               vfree(is_enabled);
                return;
        }
 
        for (i = cnt = 0; cnt < dtrace_sdt_nprobes; i++) {
                char    *func = pi->name + pi->name_len + 1;
 
+               is_enabled[cnt] = (pi->name[0] == '?');
+
                if (sdt_probe_set(&sdps[cnt], pi->name, func, pi->addr,
                                  &addrs[cnt],
                                  cnt > 0 ? &sdps[cnt - 1] : NULL))
                        pr_warning("%s: failed to add SDT probe %s\n",
                                   __func__, pi->name);
-               else
+               else {
+                       if (is_enabled[cnt])
+                               printk(KERN_INFO "is-enabled probe at %p\n", addrs[cnt]);
+
                        cnt++;
+               }
 
                nextpi = (void *)pi + sizeof(dtrace_sdt_probeinfo_t)
                        + roundup(pi->name_len + 1 +
@@ -239,7 +257,7 @@ void dtrace_sdt_register(struct module *mp)
        mp->sdt_probes = sdps;
        mp->sdt_probec = cnt;
 
-       dtrace_sdt_nop_multi(addrs, cnt);
+       dtrace_sdt_nop_multi(addrs, is_enabled, cnt);
 
        /*
         * Allocate space for the array of arg types, and copy it in from the
@@ -264,6 +282,7 @@ void dtrace_sdt_register(struct module *mp)
 
 end:
        vfree(addrs);
+       vfree(is_enabled);
 }
 
 static int __init nosdt(char *str)
@@ -282,6 +301,7 @@ void dtrace_sdt_register_module(struct module *mp,
        int                     i, cnt;
        sdt_probedesc_t         *sdp;
        asm_instr_t             **addrs;
+       int                     *is_enabled;
 
        if (mp->sdt_probec == 0 || mp->sdt_probes == NULL)
                return;
@@ -292,23 +312,29 @@ void dtrace_sdt_register_module(struct module *mp,
         */
        addrs = (asm_instr_t **)vmalloc(mp->sdt_probec *
                                        sizeof(asm_instr_t *));
-       if (addrs == NULL) {
+       is_enabled = (int *)vmalloc(mp->sdt_probec * sizeof(int));
+       if ((addrs == NULL) || (is_enabled == NULL)) {
                pr_warning("%s: cannot allocate SDT probe address list (%s)\n",
                           __func__, mp->name);
+               vfree(addrs);
+               vfree(is_enabled);
                return;
        }
 
        for (i = cnt = 0, sdp = mp->sdt_probes; i < mp->sdt_probec;
-            i++, sdp++)
-               addrs[cnt++] = (asm_instr_t *)sdp->sdpd_offset;
+            i++, sdp++) {
+               addrs[cnt] = (asm_instr_t *)sdp->sdpd_offset;
+               is_enabled[cnt++] = (sdp->sdpd_name[0] == '?');
+       }
 
-       dtrace_sdt_nop_multi(addrs, cnt);
+       dtrace_sdt_nop_multi(addrs, is_enabled, cnt);
 
        dtrace_sdt_stash_args(mp->name, mp->sdt_probes, mp->sdt_probec,
                              sdt_names_addr, sdt_names_len,
                              sdt_args_addr, sdt_args_len);
 
        vfree(addrs);
+       vfree(is_enabled);
 }
 
 void dtrace_sdt_init(void)
index 188bc48c34be539ed6571df1c5efe2f2d450ac2e..da314b1740232055911cf8ab3ab479688ffbbbed 100755 (executable)
@@ -39,8 +39,7 @@ if [ -z "$ofn" ]; then
 fi
 
 if [ "$opr" = "sdtstub" ]; then
-    ${NM} -u $* | \
-       grep __dtrace_probe_ | sort | uniq | \
+    ${NM} -u $* | grep -E '__dtrace_(probe|isenabled)_' | sort | uniq | \
        ${AWK} -v arch=${ARCH} \
               '{
                    printf("\t.globl %s\n\t.type %s,@function\n%s:\n",
@@ -73,7 +72,8 @@ if [ "$tok" = "kmod" ]; then
 
     # Output all function symbols in the symbol table of the object file.
     # Subsequently, output all relocation records for DTrace SDT probes.  The
-    # probes are identified by their __dtrace_probe_ prefix.
+    # probes are identified by either a __dtrace_probe_ or __dtrace_isenabled_
+    # prefix.
     #
     # We sort the output primarily based on the section, using the value (or
     # offset) as secondary sort criterion  The overall result is that the
@@ -114,6 +114,13 @@ if [ "$tok" = "kmod" ]; then
             next;
         }
 
+        sect && /__dtrace_isenabled_/ {
+            $3 = substr($3, 20);
+            sub(/[\-+].*$/, "", $3);
+            print sect " " $1 " R ?" $3;
+            next;
+        }
+
         /file format/ {
             next;
         }
@@ -244,7 +251,8 @@ else
     # Finally, each relocation record from the .text section that relates to
     # SDT probes are written to the output stream with its address, a token
     # identifying it as a relocation, and its name.  Probes are identified in
-    # the relocation records as symbols with __dtrace_probe_ as prefix.
+    # the relocation records as symbols with either a __dtrace_probe_ or
+    # __dtrace_isenabled_ prefix.
     #
     # We sort the output based on the address, which guarantees that the output
     # will be a list of functions, and each function record will be followed 
@@ -307,6 +315,13 @@ else
             next;
         }
 
+        in_reloc && /__dtrace_isenabled_/ {
+            $3 = substr($3, 20);
+            sub(/[\-+].*$/, "", $3);
+            print addl(base, $1) " R ?" $3;
+            next;
+        }
+
         / F / {
             if ($6 == ".hidden")
                 print $1 " G " $7;
index 8ac053304ad3dc89971d83f956934c06cf5f707d..03d798228a458c1e236277b8b3e8d2e48ea093d9 100644 (file)
@@ -2159,7 +2159,8 @@ static int add_versions(struct buffer *b, struct module *mod)
                exp = find_symbol(s->name);
                if (!exp || exp->module == mod) {
                        if (have_vmlinux && !s->weak &&
-                           !strstarts(s->name, "__dtrace_probe_")) {
+                           !strstarts(s->name, "__dtrace_probe_") &&
+                           !strstarts(s->name, "__dtrace_isenabled_")) {
                                if (warn_unresolved) {
                                        warn("\"%s\" [%s.ko] undefined!\n",
                                             s->name, mod->name);