]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
dtrace: add support for probes in sections other than .text
authorKris Van Hees <kris.van.hees@oracle.com>
Sat, 18 Jun 2016 14:19:03 +0000 (10:19 -0400)
committerNick Alcock <nick.alcock@oracle.com>
Mon, 4 Jul 2016 16:14:28 +0000 (17:14 +0100)
This commit adds support for SDT probes in sections other than the
regular .text section, both for the kernel and kernel modules.

The core of the problem is that probe locations (for modules and
for the kernel proper) were stored as an offset relatve to the start
of the code section they occur in.  Processing them at load time (or
boot time, for the kernel) resulted in incorrect addresses for any
probes in sections other than .text because the offset was getting
applied to the wrong base address.

I.e. a probe at offset 0x1d0 in section .text.unlikely would be
resolved as
BASE(.text) + 0x1d0
rather than
BASE(.text.unlikely) + 0x1d0

The end result was that (using x86_64 as example) we would be writing
a 5-byte NOP sequence in a location that was really not expecting that,
with very odd and entirely unpredictable results.

Solving this required two distinct mechanisms because modules are
linked in a way where the distict code sections are retained.  Only
at module load time are they laoded consecutively into a single block
of memory that starts at module_core; worse yet, probes can be
duplicated by the compiler into multiple such sections.  This means
that a simple offset will not do; the only thing we can rely on is the
function name (since symbol names are guaranteed unique, and the
compiler synthesizes new ones not valid in C when it clones and
specializes functions), and its start address.

The question is how to identify the functions.  We can't use the symbol
table index, because the symbol table is rewritten by strip(1) as part
of RPM generation: and we can't directly refer to the names given to
cloned functions because they are not valid in C: even if valid, they
might well be local to the translation unit in any case.  So we run the
.o file through a new tool, scripts/kmodsdt, which identifies all
functions containing DTrace probes by hunting for relocations to the
__dtrace_probe_* probe calls and, if the containing functions are local,
introduces an alias to each named __dta_<function>_<symindex> (the
function name and index are necessary because these functions are local,
so there could be several of them with the same name in different
translation units: the symindex makes the aliases unique).  The function
name is rewritten to ensure it is valid in C by translating . into _.

The address in the sdtinfo then references each function symbol (or
alias) and simply adds the offset from that function symbol to the probe
to it in perfectly normal C code, and the linker then deals with all the
problems involving keeping the addresses valid across stripping, module
loading, etc.

When the module is loaded, the kernel resolves all relocations.  This
means that the sdtinfo data that is compiled into the module will have
the exact address of all probe locations by the time we need it.

The kernel is a different story altogether...

The kernel performs a final link into an executable object, merging
all non-init code sections into a single .text.  This mens that for
the kernel, we need to calculate the absolute address of each probe
location based on the knowledge that section merging occurs.

We make use of the multi-step linking process that the kernel uses to
ensure stability of the kallsyms data.  Three steps take place in the
sdtinfo generation process (though they will appear as a single step,
because they are a single pipeline):

  - a list of code sections is extracted from vmlinux.o, and for each
    section we take note of the function with the lowest offset in
    the section

  - temporary kernel image .tmp_vmlinux1 is used to determine the
    absolute load address of each merged section by searching for the
    first function of each respective section, and subtracting its
    offset from its address.

  - using the list of absolute base addresses for each section, the
    list of probes (associated with the function they occur in, and
    the section the function belongs in) gets its absolute addresses
    for the probe locations by adding the probe offset (relative to
    its encapsulating section) to the section base address.

(Note: if two distinct sections in the kernel happen to have fuctions
       at the lowest offset with identical names, this process cannot
       distinguish between those two sections.  That is a limitation
       of the heuristic but is extremely unlikely.  If it were to
       occur, a fix would require to use a more complex comparison to
       determine whether a function in the final kernel image is the
       start of one section or another.)

Orabug: 23344927
Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
Acked-by: Nick Alcock <nick.alcock@oracle.com>
kernel/dtrace/dtrace_sdt_core.c
scripts/.gitignore
scripts/Makefile
scripts/dtrace_sdt.sh
scripts/kmodsdt.c [new file with mode: 0644]

index 0a93aac1e032420eb5aef15c56e9ad272ad275db..6b3d57f7901e6a3487f15a92f89099ac69c51faf 100644 (file)
@@ -154,22 +154,8 @@ void dtrace_sdt_register_module(struct module *mp)
        }
 
        for (i = cnt = 0, sdp = mp->sdt_probes; i < mp->sdt_probec;
-            i++, sdp++) {
-               /*
-                * Fix-up the offset to reflect the relocated address of the
-                * probe.  We subtract 1 to put us at the beginning of the call
-                * instruction.  We verify that the offset won't put us beyond
-                * the module core, just to be safe.
-                */
-               sdp->sdpd_offset += (uintptr_t)mp->module_core;
-               if (!within_module_core(sdp->sdpd_offset, mp)) {
-                       pr_warning("%s: SDT probe outside module core %s\n",
-                                  __func__, mp->name);
-                       continue;
-               }
-
+            i++, sdp++)
                addrs[cnt++] = (asm_instr_t *)sdp->sdpd_offset;
-       }
 
        dtrace_sdt_nop_multi(addrs, cnt);
 
index 5ecfe93f2028712afa413dba6f67c72e1ce0930e..dfd7ab4af20931ce2a20b8e92911b52835c80e07 100644 (file)
@@ -10,3 +10,4 @@ recordmcount
 docproc
 sortextable
 asn1_compiler
+kmodsdt
index a9f02e69f6dd5cc954952a834ae4950d28f5a883..ab907e9284bd0cb774ddca8b4f211ecdd7e5e59e 100644 (file)
@@ -7,6 +7,7 @@
 # conmakehash:   Create chartable
 # conmakehash:  Create arrays for initializing the kernel console tables
 # docproc:       Used in Documentation/DocBook
+# kmodsdt:       Post-process module .o files for SDT probes
 
 HOST_EXTRACFLAGS += -I$(srctree)/tools/include
 
@@ -16,6 +17,7 @@ hostprogs-$(CONFIG_VT)           += conmakehash
 hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount
 hostprogs-$(CONFIG_BUILDTIME_EXTABLE_SORT) += sortextable
 hostprogs-$(CONFIG_ASN1)        += asn1_compiler
+hostprogs-$(CONFIG_DTRACE)      += kmodsdt
 
 HOSTCFLAGS_sortextable.o = -I$(srctree)/tools/include
 HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include
@@ -32,6 +34,11 @@ HOSTCFLAGS_kallsyms.o := $(shell pkg-config --cflags glib-2.0) -I$(srctree)/scri
 HOSTLOADLIBES_kallsyms := $(shell pkg-config --libs glib-2.0) -ldw
 endif
 
+ifeq ($(CONFIG_DTRACE),y)
+HOSTCFLAGS_kmodsdt.o := -I$(srctree)/include/generated
+HOSTLOADLIBES_kmodsdt := -lelf
+endif
+
 # The following hostprogs-y programs are only build on demand
 hostprogs-y += unifdef docproc
 
index 528121243651d0fa0f1fa2ef3905d671ac6360d7..8215ad632fd12fccd739735d46ccf61ad99cbd62 100755 (executable)
@@ -2,6 +2,20 @@
 
 LANG=C
 
+#
+# Syntax:
+#      dtrace_sdt.sh sdtstub <S-file> <o-file>+
+#              This is used to generate DTrace SDT probe stubs based on one
+#              or more object file(s).  The stubs are written to <S-file>.
+#      dtrace_sdt.sh sdtinfo <c-file> <o-file> kmod
+#              This is used to generate DTrace SDT probe definitions for a
+#              kmod .o file.  The output is written to <c-file>.
+#      dtrace_sdt.sh sdtinfo <S-file> <o-file> <l-file>
+#              This is used to generate DTrace SDT probe definitions for a
+#              kernel object file <o-file> and kernel image file <l-file>.
+#              The output is written to <S-file>.
+#
+
 opr="$1"
 shift
 if [ -z "$opr" ]; then
@@ -28,24 +42,23 @@ if [ "$opr" = "sdtstub" ]; then
     ${NM} -u $* | \
        grep __dtrace_probe_ | sort | uniq | \
        ${AWK} -v arch=${ARCH} \
-               '{
-                    printf("\t.globl %s\n\t.type %s,@function\n%s:\n",
-                           $2, $2, $2);
-                    count++;
-                }
-
-                END {
-                    if (count) {
-                        if (arch == "x86" || arch == "x86_64") {
-                            print "\tret";
-                        } else if (arch == "sparc" || arch == "sparc64") {
-                            print "\tretl";
-                            print "\tnop";
-                        }
-                    } else
-                        exit(1);
-                }' > $tfn
-    exit 0
+              '{
+                   printf("\t.globl %s\n\t.type %s,@function\n%s:\n",
+                          $2, $2, $2);
+                   count++;
+               }
+
+               END {
+                   if (count) {
+                       if (arch == "x86" || arch == "x86_64") {
+                           print "\tret";
+                       } else if (arch == "sparc" || arch == "sparc64") {
+                           print "\tretl";
+                           print "\tnop";
+                       }
+                   }
+               }' > $tfn
+    exit $?
 fi
 
 if [ "$opr" != "sdtinfo" ]; then
@@ -54,113 +67,168 @@ if [ "$opr" != "sdtinfo" ]; then
 fi
 
 (
-    objdump -htr "$ofn" | \
-       awk -v lfn="${lfn}" \
-           '/^Sections:/ {
-                getline;
-                getline;
-                while ($0 !~ /SYMBOL/) {
-                    sect = $2;
-                    addr = $6;
-
-                    getline;
-                    if (/CODE/)
-                        sectbase[sect] = addr;
-
-                    getline;
+    # Only include the first two objdump output runs for the actual kernel.
+    # We do not need them for kernel modules.
+    if [ "x${lfn}" != "x" -a "x${lfn}" != "xkmod" ]; then
+       # Output all functions listed in the symbol table.  Output lines will
+       # all resemble the following:
+       #       <value> <<scope> F <section> <size> <name>
+       # Therefore, output lines will contain 6 tokens (see STAGE 1 below).
+       #
+       ${OBJDUMP} -t ${ofn} | \
+           grep ' F '
+
+       # Output all functions listed in the symbol table of the linked kernel
+       # image, i.e. with resolved addresses.  We only output the section
+       # name, value, and symbol name for these functions.  Therefore, output
+       # lines will contains 3 tokens (see STAGE 2 below).
+       #
+       # Note that we output one extra special symbol (__init_begin).  This
+       # one is used to signal the boundary between the init-section code
+       # that gets discarded after system boot, and the general code section
+       # that is used as kernel runtime.  Probes in the init-section will be
+       # ignored (for now).
+       #
+       ${OBJDUMP} -t ${lfn} | \
+           awk '/ F / {
+                    print $4 " " $1 " " $6;
+                    next;
                 }
 
-                sect = 0;
-                next;
-            }
-
-            $3 == "F" {
-                printf "%16s %s F %s\n", $4, $1, $6;
+                $NF == "__init_begin" {
+                    print ". " $1 " " $NF;
+                }' | sort -k1,2
+    else
+       scripts/kmodsdt ${ofn}
+    fi
+
+    # Output all function symbols in the symbol table of the object file.
+    # Subsequently, output all relocation records for DTrace SDT probes.  The
+    # probes are identified by their __dtrace_probe_ prefix.
+    #
+    # We sort the output primarily based on the section, using the value (or
+    # offset) as secondary sort criterion  The overall result is that the
+    # output will be structured as a list of functions, and for any functions
+    # that contain DTrace SDT probes, relocation records will follow the
+    # function entry they are associated with.
+    #
+    # Relocations are reported by objdump per section, with a header line
+    # documenting the specific section being reported:
+    #  RELOCATION RECORDS FOR [<section>]:
+    # This is followed by a column header line, and a list of relocations.
+    # The relocations are listed with 3 tokens per line:
+    #  <offset> <type> <value>
+    #
+    # Three different types can show up in the output (all with 4 tokens):
+    #    <section> <offset> F <value>
+    #        Function within a section at a specific offset.
+    #        (See STAGE 3a below.)
+    #    <section> <offset> G <value>
+    #        Global alias for a local function within a section at a specific
+    #        offset.  A function can only have one alias, and there cannot be
+    #        an alias without its respective function.
+    #        (See STAGE 3a below.)
+    #    <section> <offset> R <value>
+    #        Relocation within a section at a specific offset.
+    #        (See STAGE 3b below.)
+    #
+    ${OBJDUMP} -tr ${ofn} | \
+    awk '/^RELOC/ {
+            sect = substr($4, 2, length($4) - 3);
+            next;
+        }
 
-                if (!lfn || lfn == "kmod")
-                    printf "%s t %s\n", $1, $6;
+        sect && /__dtrace_probe_/ {
+            $3 = substr($3, 16);
+            sub(/[\-+].*$/, "", $3);
+            print sect " " $1 " R " $3;
+            next;
+        }
 
-                next;
-            }
+        /file format/ {
+            next;
+        }
 
-            /^RELOC/ {
-                sub(/^[^\[]+\[/, "");
-                sub(/].*$/, "");
-                sect = $1;
-                next;
+        / F / {
+            if ($6 == ".hidden")
+                print $4 " " $1 " G " $7;
+            else
+                print $4 " " $1 " F " $6;
+        }
+        NF > 3 && kvh {
+            if (/^[0-9a-f]/) {
+                sidx++;
+                if ($3 == "F") {
+                    if ($6 == ".hidden")
+                        $6 = $7;
+                }
             }
-
-            /__dtrace_probe_/ && sect && sect !~ /debug/ {
-                $3 = substr($3, 16);
-                sub(/-.*$/, "", $3);
-                printf "%16s %s R %s %s\n", sect, $1, $3, sectbase[sect];
-                next;
-            }' | \
-       sort
-    [ "x${lfn}" != "x" -a "x${lfn}" != "xkmod" ] && nm ${lfn}
+            next;
+        }' | \
+    sort -k1,2
 ) | \
     awk -v lfn="${lfn}" \
        -v arch=${ARCH} \
        'function addl(v0, v1, v0h, v0l, v1h, v1l, d, tmp) {
-            tmp = $0;
-            if (length(v0) > 8 || length(v1) > 8) {
-                d = length(v0);
-                v0h = strtonum("0x"substr(v0, 1, d - 8));
-                v0l = strtonum("0x"substr(v0, d - 8 + 1));
-                d = length(v1);
-                v1h = strtonum("0x"substr(v1, 1, d - 8));
-                v1l = strtonum("0x"substr(v1, d - 8 + 1));
-
-                v0h += v1h;
-                v0l += v1l;
-
-                d = sprintf("%x", v0l);
-                if (length(d) > 8)
-                    v0h++;
-
-                d = sprintf("%x%x", v0h, v0l);
-            } else {
-                v0 = strtonum("0x"v0);
-                v1 = strtonum("0x"v1);
-                d = sprintf("%x", v0 + v1);
-            }
-            $0 = tmp;
-
-            return d;
-        }
-
-        function subl(v0, v1, v0h, v0l, v1h, v1l, d, tmp) {
-            tmp = $0;
-            if (length(v0) > 8) {
-                d = length(v0);
-                v0h = strtonum("0x"substr(v0, 1, d - 8));
-                v0l = strtonum("0x"substr(v0, d - 8 + 1));
-                d = length(v1);
-                v1h = strtonum("0x"substr(v1, 1, d - 8));
-                v1l = strtonum("0x"substr(v1, d - 8 + 1));
-
-                if (v0l > v1l) {
-                    if (v0h >= v1h) {
-                        d = sprintf("%x%x", v0h - v1h, v0l - v1l);
-                    } else {
-                        printf "#error Invalid addresses: %x vs %x", v0, v1 \
-                                                               > /dev/stderr;
-                        errc++;
-                    }
-                } else {
-                    printf "#error Invalid addresses: %x vs %x", v0, v1 \
-                                                               > /dev/stderr;
-                    errc++;
-                }
-            } else {
-                v0 = strtonum("0x"v0);
-                v1 = strtonum("0x"v1);
-                d = sprintf("%x", v0 - v1);
-            }
-            $0 = tmp;
-
-            return d;
-        }
+             tmp = $0;
+             if (length(v0) > 8 || length(v1) > 8) {
+                 d = length(v0);
+                 v0h = strtonum("0x"substr(v0, 1, d - 8));
+                 v0l = strtonum("0x"substr(v0, d - 8 + 1));
+                 d = length(v1);
+                 v1h = strtonum("0x"substr(v1, 1, d - 8));
+                 v1l = strtonum("0x"substr(v1, d - 8 + 1));
+
+                 v0h += v1h;
+                 v0l += v1l;
+
+                 d = sprintf("%x", v0l);
+                 if (length(d) > 8)
+                     v0h++;
+
+                 d = sprintf("%x%x", v0h, v0l);
+             } else {
+                 v0 = strtonum("0x"v0);
+                 v1 = strtonum("0x"v1);
+                 d = sprintf("%x", v0 + v1);
+             }
+             $0 = tmp;
+
+             return d;
+         }
+
+         function subl(v0, v1, v0h, v0l, v1h, v1l, d, tmp) {
+             tmp = $0;
+             if (length(v0) > 8) {
+                 d = length(v0);
+                 v0h = strtonum("0x"substr(v0, 1, d - 8));
+                 v0l = strtonum("0x"substr(v0, d - 8 + 1));
+                 d = length(v1);
+                 v1h = strtonum("0x"substr(v1, 1, d - 8));
+                 v1l = strtonum("0x"substr(v1, d - 8 + 1));
+
+                 if (v0l > v1l) {
+                     if (v0h >= v1h) {
+                         d = sprintf("%x%x", v0h - v1h, v0l - v1l);
+                     } else {
+                         printf "#error Invalid addresses: %x vs %x", v0, v1 \
+                                                                >"/dev/stderr";
+                         errc++;
+                     }
+                 } else {
+                     printf "#error Invalid addresses: %x vs %x", v0, v1 \
+                                                                >"/dev/stderr";
+                     errc++;
+                 }
+             } else {
+                 v0 = strtonum("0x"v0);
+                 v1 = strtonum("0x"v1);
+                 d = sprintf("%x", v0 - v1);
+             }
+             $0 = tmp;
+
+             return d;
+         }
 
         BEGIN {
             if (lfn != "kmod") {
@@ -179,75 +247,150 @@ fi
                 print ".globl dtrace_sdt_probes";
                 print "\tALGN";
                 print "dtrace_sdt_probes:";
-            } else {
+            } else
                 print "#include <linux/sdt.h>";
-            }
 
             probec = 0;
         }
 
-        $2 ~ /^[tT]$/ {
-            fun = $3;
-
-            if (fun in probes) {
-                baseaddr = $1;
-                sub(/^0+/, "", baseaddr);
-
-                $0 = probes[fun];
+        #
+        # [STAGE 1] Kernel only:
+        # Process a symbol table definition for a function in the object
+        # file ($ofn).  As we pass through the symbol table, we record the
+        # function with the lowest offset within each section.
+        #
+        NF == 6 {
+            if ($4 in sectaddr) {
+                if ($1 < sectaddr[$4]) {
+                    sectaddr[$4] = $1;
+                    sectfunc[$4] = $6;
+                }
+            } else {
+                secttodo[$4] = 1;
+                sectaddr[$4] = $1;
+                sectfunc[$4] = $6;
+            }
 
-                for (i = 1; i <= NF; i++) {
-                    prb = $i;
-                    pn = fun ":" prb;
+            next;
+        }
 
-                    for (j = 0; j < pidcnt[pn]; j++) {
-                        pid = pn"-"j;
-                        ad = addl(baseaddr, poffst[pid]);
+        #
+        # [STAGE 2] Kernel only:
+        # Process a symbol table definition for a function in the final link
+        # target ($tfn).  As we pass through the symbol table, we update the
+        # section data with the final load address using the known function
+        # with lowest offset wihin the section.
+        #
+        NF == 3 {              # Symbol def in $lfn (final addresses)
+            for (s in secttodo) {
+                if (sectfunc[s] == $3) {
+                    if (init_begin && $2 > init_begin) {
+                        sectname[s] = "";
+                        next;
+                    }
 
-                        if (arch == "x86" || arch == "x86_64")
-                            ad = subl(ad, 1);
+                    sectname[s] = $1;
 
-                        if (lfn != "kmod") {
-                            printf "\tPTR\t0x%s\n", ad;
-                            printf "\tPTR\t%d\n", length(prb);
-                            printf "\tPTR\t%d\n", length(fun);
-                            printf "\t.asciz\t\042%s\042\n", prb;
-                            printf "\t.asciz\t\042%s\042\n", fun;
-                            print "\tALGN";
-                        } else {
-                            if (probec == 0)
-                                print "static sdt_probedesc_t\t_sdt_probes[] = {";
+                    # If the first function in the section is not at offset 0,
+                    # subtracting the offset from the function address  yields
+                    # the address of the start of the section.
+                    if (sectaddr[s] !~ /^0+$/)
+                        sectaddr[s] = subl($2, sectaddr[s]);
+                    else
+                        sectaddr[s] = $2;
 
-                            printf "  {\042%s\042, \042%s\042, 0x%s },\n", \
-                                   prb, fun, ad;
-                        }
+                    delete secttodo[s];
 
-                       probec++;
-                    }
+                    next;
                 }
             }
 
+            if ($3 == "__init_begin") {
+                print "\t/* Sections above " $2 " are skipped. */";
+                init_begin = $2;
+            }
+
             next;
         }
 
-        $3 == "F" {
-            fun = $4;
-            addr = $2;
+        #
+        # [STAGE 3a] Kernel and kernel modules:
+        # Process a symbol table definition for a function in the object
+        # file ($ofn).  As we pass through the symbol table, we record the
+        # function name, address, and symbol table index or alias.  This
+        # information is needed for any potential DTrace probes that may exist
+        # in the function.  They will be listed in relocation records
+        # subsequent to this function definition (and are processed in the
+        # next action block).
+        #
+        NF == 4 && $3 == "F" {
+            fname = $4;
+            sub(/\..*$/, "", fname);
+            alias = $4;
+            faddr = $2;
+            sub(/^0+/, "", faddr);
 
-            sub(/^0+/, "", addr);
+            next;
+        }
+
+        NF == 4 && $3 == "G" {
+            alias = $4;
 
             next;
         }
 
-        $3 == "R" {
+        #
+        # [STAGE 3b] Kernel and kernel modules:
+        # Process a relocation record associated with the preceding function.
+        #
+        # For kernel:
+        # Convert the section offset into an absolute address based on the
+        # section load address.
+        #
+        # For kernel modules:
+        # Convert the section offset into an offset in the function where the
+        # DTrace probe is located, i.e. an offset from the start of the
+        # function.  This will be resolved in an absolute address at runtime
+        # when the module is loaded.
+        #
+        NF == 4 && $3 == "R" {
             sub(/^0+/, "", $2);
-            pn = fun":"$4;
 
-            if (!pidcnt[pn])
-                probes[fun] = $4 " " probes[fun];
+            if (lfn != "kmod") {
+                if ($1 in sectaddr) {
+                    if (!sectname[$1]) {
+                        printf "WARNING: Probe %s in [%s] %s() ignored - " \
+                               "init-section.\n", $4, $1, fname \
+                                                               >"/dev/stderr";
+                        next;
+                    }
 
-            pid = pn"-"int(pidcnt[pn]);
-            pidcnt[pn]++;
-            poffst[pid] = subl($2, addr);
+                    addr = addl(sectaddr[$1], $2);
+                    printf "\t/* [%s base] %s + %s = [%s] %s */\n", \
+                           $1, sectaddr[$1], $2, sectname[$1], addr \
+                } else
+                    addr = $2;
+
+                if (arch == "x86" || arch == "x86_64")
+                    addr = subl(addr, 1);
+
+                printf "\tPTR\t0x%s\n", addr;
+                printf "\tPTR\t%d\n", length($4);
+                printf "\tPTR\t%d\n", length(fname);
+                printf "\t.asciz\t\042%s\042\n", $4;
+                printf "\t.asciz\t\042%s\042\n", fname;
+                print "\tALGN";
+            } else {
+                addr = subl($2, faddr);
+
+                if (arch == "x86" || arch == "x86_64")
+                    addr = subl(addr, 1);
+
+                protom[alias] = 1;
+                probev[probec] = sprintf("  {\042%s\042,  \042%s\042 /* %s */, (uintptr_t)%s+0x%s },", $4, fname, $1, alias, addr);
+            }
+
+            probec++;
 
             next;
         }
@@ -260,18 +403,20 @@ fi
                 print "dtrace_sdt_nprobes:";
                 printf "\tPTR\t%d\n", probec;
             } else {
-                if (probec > 0)
-                    print "};";
-                else
+                if (probec > 0) {
+                    for (alias in protom)
+                        printf "extern void %s(void);\n", alias;
+                    print "\nstatic sdt_probedesc_t\t_sdt_probes[] = {";
+                    for (i = 0; i < probec; i++)
+                        print probev[i];
+                    print "};\n";
+                } else
                     print "#define _sdt_probes\tNULL";
 
                 print "#define _sdt_probec\t" probec;
             }
 
-            if (errc > 0) {
-                print errc " errors generating SDT probe data." > /dev/stderr;
-                exit 1;
-            }
+            exit(errc == 0 ? 0 : 1);
         }' > $tfn
 
-exit 0
+exit $?
diff --git a/scripts/kmodsdt.c b/scripts/kmodsdt.c
new file mode 100644 (file)
index 0000000..f2364c3
--- /dev/null
@@ -0,0 +1,397 @@
+/*
+ * Copyright 2016 Oracle, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <autoconf.h>
+
+#define        ELF_TARGET_ALL
+#include <elf.h>
+#include <gelf.h>
+
+#include <sys/types.h>
+
+#include <unistd.h>
+#include <string.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+
+typedef struct symtbl {
+       struct symtbl *next;
+       void *strtab;
+       void *symtab;
+} symtbl_t;
+
+static int
+dt_elf_symtab_lookup(Elf_Data *data_sym, int nsym, uintptr_t addr, uint32_t shn,
+    GElf_Sym *sym)
+{
+       int i, ret = -1;
+       GElf_Sym s;
+
+       for (i = 0; i < nsym && gelf_getsym(data_sym, i, sym) != NULL; i++) {
+               if (GELF_ST_TYPE(sym->st_info) == STT_FUNC &&
+                   shn == sym->st_shndx &&
+                   sym->st_value <= addr &&
+                   addr < sym->st_value + sym->st_size) {
+                       if (GELF_ST_BIND(sym->st_info) == STB_GLOBAL)
+                               return i;
+
+                       ret = i;
+                       s = *sym;
+               }
+       }
+
+       if (ret >= 0)
+               *sym = s;
+       return (ret);
+}
+
+static int
+process_obj(const char *obj)
+{
+       static const char dt_ppref[] = "__dtrace_probe_";
+       static const char dt_spref[] = "__dta_";
+       int fd, i, sidx, mod = 0;
+       Elf *elf = NULL;
+       GElf_Ehdr ehdr;
+       Elf_Scn *scn_rel, *scn_sym, *scn_str;
+       Elf_Data *data_rel, *data_sym, *data_str;
+       GElf_Shdr shdr_rel, shdr_sym, shdr_str;
+       GElf_Sym rsym, fsym, dsym;
+       GElf_Rela rela;
+       char *p, *r, *f, *a;
+       uint32_t eclass, emachine1, emachine2;
+       size_t symsize, nsym, nstr, isym, istr, osym, len;
+       symtbl_t *pair, *bufs = NULL;
+       char **alttab;
+       const char *elferrstr = "no error";
+
+       if ((fd = open(obj, O_RDWR)) == -1) {
+               fprintf(stderr, "failed to open %s: %s\n", obj,
+                       strerror(errno));
+               return 1;
+       }
+
+       if (elf_version(EV_CURRENT) == EV_NONE) {
+               fprintf(stderr, "ELF library version too old\n");
+               return 1;
+       }
+
+       if ((elf = elf_begin(fd, ELF_C_RDWR, NULL)) == NULL) {
+               fprintf(stderr, "failed to process %s: %s\n", obj,
+                       elf_errmsg(elf_errno()));
+               return 1;
+       }
+
+       switch (elf_kind(elf)) {
+       case ELF_K_ELF:
+               break;
+       case ELF_K_AR:
+               fprintf(stderr, "archives are not permitted; %s\n", obj);
+               return 1;
+       default:
+               fprintf(stderr, "invalid file type: %s\n", obj);
+               return 1;
+       }
+
+       if (gelf_getehdr(elf, &ehdr) == NULL) {
+               fprintf(stderr, "corrupt file: %s\n", obj);
+               return 1;
+       }
+
+#ifdef CONFIG_64BIT
+       eclass = ELFCLASS64;
+# if defined(__sparc)
+       emachine1 = emachine2 = EM_SPARCV9;
+# elif defined(__i386) || defined(__amd64)
+       emachine1 = emachine2 = EM_X86_64;
+# endif
+       symsize = sizeof(Elf64_Sym);
+#else
+       eclass = ELFCLASS32;
+# if defined(__sparc)
+       emachine1 = EM_SPARC;
+       emachine2 = EM_SPARC32PLUS;
+# elif defined(__i386) || defined(__amd64)
+       emachine1 = emachine2 = EM_386;
+# endif
+       symsize = sizeof(Elf32_Sym);
+#endif
+
+       if (ehdr.e_ident[EI_CLASS] != eclass) {
+               fprintf(stderr, "incorrect ELF class for %s: %d "
+                       "(expected %d)\n", obj, ehdr.e_ident[EI_CLASS],
+                       eclass);
+               return 1;
+       }
+       if (ehdr.e_machine != emachine1 && ehdr.e_machine != emachine2) {
+               fprintf(stderr, "incorrect ELF machine type for %s: %d "
+                       "(expected %d or %d)\n",
+                       obj, ehdr.e_machine, emachine1, emachine2);
+               return 1;
+       }
+
+       scn_rel = NULL;
+       while ((scn_rel = elf_nextscn(elf, scn_rel)) != NULL) {
+               if (gelf_getshdr(scn_rel, &shdr_rel) == NULL) {
+                       elferrstr = "failed to get section header";
+                       goto elf_err;
+               }
+
+               /*
+                * Skip any non-relocation sections.
+                */
+               if (shdr_rel.sh_type != SHT_RELA && shdr_rel.sh_type != SHT_REL)
+                       continue;
+
+               if ((data_rel = elf_getdata(scn_rel, NULL)) == NULL) {
+                       elferrstr = "failed to get relocation data";
+                       goto elf_err;
+               }
+
+               /*
+                * Grab the section, section header and section data for the
+                * symbol table that this relocation section references.
+                */
+               if ((scn_sym = elf_getscn(elf, shdr_rel.sh_link)) == NULL ||
+                   gelf_getshdr(scn_sym, &shdr_sym) == NULL ||
+                   (data_sym = elf_getdata(scn_sym, NULL)) == NULL) {
+                       elferrstr = "failed to get symbol table";
+                       goto elf_err;
+               }
+
+               /*
+                * Ditto for that symbol table's string table.
+                */
+               if ((scn_str = elf_getscn(elf, shdr_sym.sh_link)) == NULL ||
+                   gelf_getshdr(scn_str, &shdr_str) == NULL ||
+                   (data_str = elf_getdata(scn_str, NULL)) == NULL) {
+                       elferrstr = "failed to get string table";
+                       goto elf_err;
+               }
+
+               /*
+                * We're looking for relocations to symbols matching this form:
+                *
+                *   __dtrace_probe_<probe>
+                *
+                * If the function containing the probe is locally scoped
+                * (static), we create an alias.  The alias, a new symbol,
+                * will be global (so that it can be referenced from sdtinfo
+                * entries) and hidden (so that it is converted to a local
+                * symbol at link time). Such aliases have this form:
+                *
+                *   __dta_<function>_<symindex>
+                *
+                * The <symindex> is appended to ensure that aliases are unique
+                * because they are referenced in global scope.  Two local
+                * functions with identical names need to be distrinct at the
+                * level of the aliases.
+                *
+                * We take a first pass through all the relocations to
+                * populate our string table and count the number of extra
+                * symbols we'll require.  Note that the <function> is
+                * sanitized to ensure that it is a valid C identifier, i.e.
+                * any periods in the name are converted to underscores.
+                */
+               isym = osym = data_sym->d_size / symsize;
+               istr = data_str->d_size;
+
+               /*
+                * Allocate the alias table to be the exact same size as the
+                * symtab.  If an alias is required for a specific symbol, its
+                * corresponding entry in this alias table will contain the
+                * alias name.  Otherwise, the entry will be NULL.
+                */
+               alttab = (char **)calloc(isym, sizeof(char *));
+
+               nsym = 0;
+               nstr = 0;
+
+               for (i = 0; i < shdr_rel.sh_size / shdr_rel.sh_entsize; i++) {
+                       if (shdr_rel.sh_type == SHT_RELA) {
+                               if (gelf_getrela(data_rel, i, &rela) == NULL)
+                                       continue;
+                       } else {
+                               GElf_Rel rel;
+                               if (gelf_getrel(data_rel, i, &rel) == NULL)
+                                       continue;
+                               rela.r_offset = rel.r_offset;
+                               rela.r_info = rel.r_info;
+                               rela.r_addend = 0;
+                       }
+
+                       if (gelf_getsym(data_sym, GELF_R_SYM(rela.r_info),
+                                       &rsym) == NULL) {
+                               elferrstr = "relocation symbol not found";
+                               goto elf_err;
+                       }
+
+                       assert(rsym.st_name < data_str->d_size);
+
+                       r = (char *)data_str->d_buf + rsym.st_name;
+                       if (strncmp(r, dt_ppref, sizeof(dt_ppref) - 1) != 0)
+                               continue;
+
+                       sidx = dt_elf_symtab_lookup(data_sym, isym,
+                                                   rela.r_offset,
+                                                   shdr_rel.sh_info, &fsym);
+                       if (sidx < 0) {
+                               fprintf(stderr, "relocation %x not in "
+                                       "function\n", i);
+                               goto err;
+                       }
+
+                       assert(fsym.st_name < data_str->d_size);
+                       assert(GELF_ST_TYPE(fsym.st_info) == STT_FUNC);
+
+                       if (GELF_ST_BIND(fsym.st_info) != STB_LOCAL)
+                               continue;
+
+                       f = (char *)data_str->d_buf + fsym.st_name;
+
+                       if (alttab[sidx] != NULL)
+                               continue;
+
+                       len = snprintf(NULL, 0, "%s%s_%d", dt_spref, f, sidx)
+                             + 1;
+                       a = malloc(len);
+                       assert(a != NULL);
+                       nstr += snprintf(a, len, "%s%s_%d", dt_spref, f, sidx)
+                                + 1;
+                       for (p = a; *p != '\0'; p++) {
+                               if (*p == '.')
+                                       *p = '_';
+                       }
+                       alttab[sidx] = a;
+                       nsym++;
+               }
+
+               if (!nsym) {
+                       free(alttab);
+                       continue;
+               }
+
+               if ((pair = malloc(sizeof(symtbl_t))) == NULL) {
+                       fprintf(stderr, "failed to alloc new symtbl\n");
+                       goto err;
+               }
+               if ((pair->strtab = malloc(data_str->d_size + nstr)) == NULL) {
+                       fprintf(stderr, "failed to alloc new symtbl->strtab\n");
+                       free(pair);
+                       goto err;
+               }
+               if ((pair->symtab =
+                    malloc(data_sym->d_size + nsym * symsize)) == NULL) {
+                       fprintf(stderr, "failed to alloc new symtbl->symtab\n");
+                       free(pair->strtab);
+                       free(pair);
+                       goto err;
+               }
+
+               pair->next = bufs;
+               bufs = pair;
+
+               memcpy(pair->strtab, data_str->d_buf, data_str->d_size);
+               data_str->d_buf = pair->strtab;
+               data_str->d_size += nstr;
+               elf_flagdata(data_str, ELF_C_SET, ELF_F_DIRTY);
+               shdr_str.sh_size += nstr;
+               gelf_update_shdr(scn_str, &shdr_str);
+
+               memcpy(pair->symtab, data_sym->d_buf, data_sym->d_size);
+               data_sym->d_buf = pair->symtab;
+               data_sym->d_size += nsym * symsize;
+               elf_flagdata(data_sym, ELF_C_SET, ELF_F_DIRTY);
+               shdr_sym.sh_size += nsym * symsize;
+               gelf_update_shdr(scn_sym, &shdr_sym);
+
+               nsym += isym;
+
+               /*
+                * Now that the tables have been allocated, add the aliases as
+                * described above.  Since we already know the symtab index of
+                * the symbol that the alias refers to, we can simply run down
+                * the alttab and add alias for any non-NULL entries.
+                */
+               for (i = 1; i < osym; i++) {
+                       if (alttab[i] == NULL)
+                               continue;
+
+                       if (gelf_getsym(data_sym, i, &fsym) == NULL) {
+                               fprintf(stderr, "failed to get symbol %d: %s\n",
+                                       i, elf_errmsg(elf_errno()));
+                               goto err;
+                       }
+
+                       assert(GELF_ST_TYPE(fsym.st_info) == STT_FUNC);
+                       assert(GELF_ST_BIND(fsym.st_info) == STB_LOCAL);
+                       /*
+                        * Add the alias as a new symbol to the symtab.
+                        */
+                       dsym = fsym;
+                       dsym.st_name = istr;
+                       dsym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_FUNC);
+                       dsym.st_other = ELF64_ST_VISIBILITY(STV_HIDDEN);
+
+                       len = strlen(alttab[i]) + 1;
+                       assert(istr + len <= data_str->d_size);
+                       a = (char *)data_str->d_buf + istr;
+                       memcpy(a, alttab[i], len);
+
+                       gelf_update_sym(data_sym, isym, &dsym);
+                       istr += len;
+                       isym++;
+
+                       assert(isym <= nsym);
+
+                       mod = 1;
+
+                       free(alttab[i]);
+               }
+
+               free(alttab);
+       }
+
+       if (mod && elf_update(elf, ELF_C_WRITE) == -1) {
+               elferrstr = "Failed to update ELF object";
+               goto elf_err;
+       }
+
+       elf_end(elf);
+       close(fd);
+
+       while ((pair = bufs) != NULL) {
+               bufs = pair->next;
+               free(pair->strtab);
+               free(pair->symtab);
+               free(pair);
+       }
+
+       return 0;
+
+elf_err:
+       fprintf(stderr, "%s: %s\n", elferrstr, elf_errmsg(elf_errno()));
+err:
+       fprintf(stderr, "an error was encountered while processing %s\n", obj);
+       return 1;
+}
+
+int
+main(int argc, char *argv[])
+{
+       int i;
+
+       for (i = 1; i < argc; i++) {
+               if (process_obj(argv[i]))
+                       exit(1);
+       }
+
+       exit(0);
+}