From: Kris Van Hees Date: Sun, 13 Jan 2019 03:10:01 +0000 (-0500) Subject: dtrace: support kernels built with RANDOMIZE_BASE X-Git-Tag: v4.1.12-124.31.3~282 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=a08c810b9965a5a76f4c600e8ed73a714a59173e;p=users%2Fjedix%2Flinux-maple.git dtrace: support kernels built with RANDOMIZE_BASE SDT probe addresses were being generated as absolute addresses which breaks when the kernel may get relocated to a place other than the default load address. The solution is to generate the probe locations as an offset relative to the _stext symbol in the .tmp_sdtinfo.S source file (generated at build time), so that the actual addresses are processed as relocations when the kernel boots. This fix also optimizes the SDT info data (function and probe names) by using de-duplication since especially with perf probes) many of those strings are non-unique. Orabug: 29204005 Signed-off-by: Kris Van Hees Reviewed-by: Nick Alcock Tested-by: John Haxby Signed-off-by: Brian Maly --- diff --git a/include/linux/dtrace_sdt.h b/include/linux/dtrace_sdt.h index 57931122ee6a..48d264a8e6cc 100644 --- a/include/linux/dtrace_sdt.h +++ b/include/linux/dtrace_sdt.h @@ -6,20 +6,6 @@ #include #include -/* - * SDT probe called relocation information for the core kernel, provided by - * .tmp_sdtinfo.S. - */ -typedef struct dtrace_sdt_probeinfo { - unsigned long addr; - unsigned long name_len; - unsigned long func_len; - char name[0]; -} __aligned(sizeof(unsigned long)) dtrace_sdt_probeinfo_t; - -extern unsigned long dtrace_sdt_nprobes __attribute__((weak)); -extern void *dtrace_sdt_probes __attribute__((weak)); - extern void dtrace_sdt_init(void); extern void dtrace_sdt_register(struct module *); extern void dtrace_sdt_register_module(struct module *, diff --git a/kernel/dtrace/dtrace_sdt_core.c b/kernel/dtrace/dtrace_sdt_core.c index 28fbab2cd655..aad934ed5571 100644 --- a/kernel/dtrace/dtrace_sdt_core.c +++ b/kernel/dtrace/dtrace_sdt_core.c @@ -29,6 +29,14 @@ #include const char *sdt_prefix = "__dtrace_probe_"; +int dtrace_nosdt; + +/* + * Compiled-in SDT probe data. + */ +extern const unsigned long dtrace_sdt_probes[]; +extern const char dtrace_sdt_strings[]; +extern const unsigned long dtrace_sdt_nprobes; /* * Markers of core-kernel sdt_args and sdt_names sections. @@ -38,8 +46,8 @@ extern const char __stop_dtrace_sdt_args[]; extern const char __start_dtrace_sdt_names[]; extern const char __stop_dtrace_sdt_names[]; -static int sdt_probe_set(sdt_probedesc_t *sdp, char *name, char *func, - uintptr_t addr, asm_instr_t **paddr,\ +static int sdt_probe_set(sdt_probedesc_t *sdp, const char *name, + const char *func, uintptr_t addr, asm_instr_t **paddr, sdt_probedesc_t *prv) { if ((sdp->sdpd_name = kstrdup(name, GFP_KERNEL)) == NULL) { @@ -101,8 +109,8 @@ void dtrace_sdt_stash_args(const char *module_name, hashsize = nprobes * 4; /* arbitrary expansion factor */ args_by_name = vzalloc(hashsize * sizeof (struct probe_name_hashent_t)); if (args_by_name == NULL) { - pr_warning("%s: cannot allocate hash for sdt args population\n", - __func__); + pr_warn("%s: cannot allocate hash for sdt args population\n", + __func__); return; } @@ -188,9 +196,6 @@ void dtrace_sdt_stash_args(const char *module_name, void dtrace_sdt_register(struct module *mp) { int i, cnt; - dtrace_sdt_probeinfo_t *pi = - (dtrace_sdt_probeinfo_t *)&dtrace_sdt_probes; - void *nextpi; sdt_probedesc_t *sdps; asm_instr_t **addrs; int *is_enabled; @@ -198,8 +203,8 @@ void dtrace_sdt_register(struct module *mp) size_t args_len; if (mp == NULL) { - pr_warning("%s: no module provided - nothing registered\n", - __func__); + pr_warn("%s: no module provided - nothing registered\n", + __func__); return; } @@ -209,7 +214,7 @@ void dtrace_sdt_register(struct module *mp) mp->sdt_probes = NULL; mp->sdt_probec = 0; - if (dtrace_sdt_nprobes == 0) + if (dtrace_sdt_nprobes == 0 || dtrace_nosdt) return; /* @@ -219,7 +224,7 @@ void dtrace_sdt_register(struct module *mp) sdps = (sdt_probedesc_t *)vmalloc(dtrace_sdt_nprobes * sizeof(sdt_probedesc_t)); if (sdps == NULL) { - pr_warning("%s: cannot allocate SDT probe array\n", __func__); + pr_warn("%s: cannot allocate SDT probe array\n", __func__); return; } @@ -233,8 +238,8 @@ void dtrace_sdt_register(struct module *mp) sizeof(asm_instr_t *)); is_enabled = (int *)vmalloc(dtrace_sdt_nprobes * sizeof(int)); if ((addrs == NULL) || (is_enabled == NULL)) { - pr_warning("%s: cannot allocate SDT probe address/is-enabled " - "lists\n", __func__); + pr_warn("%s: cannot allocate SDT probe address/is-enabled " + "lists\n", __func__); vfree(sdps); vfree(addrs); vfree(is_enabled); @@ -242,22 +247,24 @@ void dtrace_sdt_register(struct module *mp) } for (i = cnt = 0; cnt < dtrace_sdt_nprobes; i++) { - char *func = pi->name + pi->name_len + 1; + uintptr_t addr, poff, foff; + const char *fname; + const char *pname; + + addr = dtrace_sdt_probes[i * 3]; /* address */ + poff = dtrace_sdt_probes[i * 3 + 1]; /* probe name offset */ + foff = dtrace_sdt_probes[i * 3 + 2]; /* func name offset */ + pname = &dtrace_sdt_strings[poff]; + fname = &dtrace_sdt_strings[foff]; - is_enabled[cnt] = (pi->name[0] == '?'); + is_enabled[cnt] = (pname[0] == '?'); - if (sdt_probe_set(&sdps[cnt], pi->name, func, pi->addr, - &addrs[cnt], + if (sdt_probe_set(&sdps[cnt], pname, fname, addr, &addrs[cnt], cnt > 0 ? &sdps[cnt - 1] : NULL)) - pr_warning("%s: failed to add SDT probe %s\n", - __func__, pi->name); + pr_warn("%s: failed to add SDT probe %s for %s\n", + __func__, pname, fname); else cnt++; - - nextpi = (void *)pi + sizeof(dtrace_sdt_probeinfo_t) - + roundup(pi->name_len + 1 + - pi->func_len + 1, BITS_PER_LONG / 8); - pi = nextpi; } mp->sdt_probes = sdps; @@ -274,7 +281,7 @@ void dtrace_sdt_register(struct module *mp) args_len = __stop_dtrace_sdt_args - __start_dtrace_sdt_args; args = vmalloc(args_len); if (args == NULL) { - pr_warning("%s: cannot allocate table of SDT arg types\n", + pr_warn("%s: cannot allocate table of SDT arg types\n", __func__); goto end; } @@ -293,9 +300,9 @@ end: static int __init nosdt(char *str) { - dtrace_sdt_nprobes = 0; + dtrace_nosdt = 1; - return 0; + return 0; } early_param("nosdt", nosdt); @@ -320,8 +327,8 @@ void dtrace_sdt_register_module(struct module *mp, sizeof(asm_instr_t *)); is_enabled = (int *)vmalloc(mp->sdt_probec * sizeof(int)); if ((addrs == NULL) || (is_enabled == NULL)) { - pr_warning("%s: cannot allocate SDT probe address list (%s)\n", - __func__, mp->name); + pr_warn("%s: cannot allocate SDT probe address list (%s)\n", + __func__, mp->name); vfree(addrs); vfree(is_enabled); return; diff --git a/scripts/dtrace_sdt.sh b/scripts/dtrace_sdt.sh index ddf9559f918a..6ace6b44eda9 100755 --- a/scripts/dtrace_sdt.sh +++ b/scripts/dtrace_sdt.sh @@ -105,6 +105,9 @@ if [ "$tok" = "kmod" ]; then ${OBJDUMP} -tr ${ofn} | \ awk '/^RELOC/ { sect = substr($4, 2, length($4) - 3); + if (sect ~ /^\.(exit|init|meminit)\.text/) + sect = 0; + next; } @@ -127,6 +130,9 @@ if [ "$tok" = "kmod" ]; then } / F / { + if ($4 ~ /^\.(exit|init|meminit)\.text/) + next; + if ($6 == ".hidden") print $4 " " $1 " G " $7; else @@ -144,18 +150,24 @@ if [ "$tok" = "kmod" ]; then v1h = strtonum("0x"substr(v1, 1, d - 8)); v1l = strtonum("0x"substr(v1, d - 8 + 1)); - if (v0l > v1l) { + if (v0l >= v1l) { if (v0h >= v1h) { d = sprintf("%08x%08x", v0h - v1h, v0l - v1l); } else { - printf "#error Invalid addresses: %x vs %x", v0, v1 \ - >"/dev/stderr"; + printf "#error Invalid addresses: %s - %s\n", v0, v1 \ + >"/dev/stderr"; errc++; } } else { - printf "#error Invalid addresses: %x vs %x", v0, v1 \ - >"/dev/stderr"; - errc++; + if (v0h > v1h) { + v0h--; + v0l += 4294967296; + d = sprintf("%08x%08x", v0h - v1h, v0l - v1l); + } else { + printf "#error Invalid addresses: %s - %s\n", v0, v1 \ + >"/dev/stderr"; + errc++; + } } } else { v0 = strtonum("0x"v0); @@ -238,42 +250,78 @@ if [ "$tok" = "kmod" ]; then exit(errc == 0 ? 0 : 1); }' > $tfn else - # For a linked kernel (with relocation data), the scope of DTrace SDT - # probe discovery can be limited to just the .text section. + # For a linked kernel (with relocation data), the scope of the DTrace SDT + # probe discovery can be limited to CODE sections that are not included in + # the init or exit code sections. # - # First the section record is retrieved in order to determine the base - # address for symbols in the .text section. + # First the sections records are parsed to order to determine the base + # address for each relevant section. # - # Subsequently, all function symbols that are located in the .text sectio - # are read from the symbol table of the linked kernel object. Each symbol - # is reported in the output stream with its address, a token identifying it - # as a function (or alias), and its name. + # Subsequently, all function symbols that are located in the sections we + # care about are read from the symbol table of the linked kernel object. + # Each symbol is reported in the output stream with its section name, + # address, a token identifying it as a function (or alias), and its name. # - # Finally, each relocation record from the .text section that relates to - # SDT probes are written to the output stream with its address, a token - # identifying it as a relocation, and its name. Probes are identified in - # the relocation records as symbols with either a __dtrace_probe_ or - # __dtrace_isenabled_ prefix. + # Finally, each relocation record from relevant sections that relates to + # SDT probes are written to the output stream with its section name, + # address, a token # identifying it as a relocation, and its name. Probes + # are identified in the relocation records as symbols with either a + # __dtrace_probe_ or __dtrace_isenabled_ prefix. # - # We sort the output based on the address, which guarantees that the output - # will be a list of functions, and each function record will be followed - # immediately by any DTrace SDT probe records that are used in that - # function. + # We sort the output based on the section name and address, ensuring that + # the output will be a list of functions, and each function record will be + # followed immediately by any DTrace SDT probe records that are used in + # that function. # - # Three different record types can show up in the output (3 tokens each): - #
F + # Three different record types can show up in the output (4 tokens each): + #
F # Named function at a specific address. - #
G + #
G # Global alias for a local function at a specific offset. A # function can only have one alias, and there cannot be an alias # without its respective function. - #
R + #
R # Relocation within a section at a specific address # - ${OBJDUMP} -htrj .text ${ofn} | \ + ${OBJDUMP} -htr ${ofn} | \ awk 'function addl(v0, v1, v0h, v0l, v1h, v1l, d, tmp) { + tmp = $0; + if (length(v0) > 8 || length(v1) > 8) { + d = length(v0); + v0h = strtonum("0x"substr(v0, 1, d - 8)); + v0l = strtonum("0x"substr(v0, d - 8 + 1)); + d = length(v1); + v1h = strtonum("0x"substr(v1, 1, d - 8)); + v1l = strtonum("0x"substr(v1, d - 8 + 1)); + + v0l += v1l; + v0h += v1h; + d = sprintf("%x", v0l); + if (length(d) > 8) { + v0h++; + v0l -= 4294967296; + } + d = sprintf("%x", v0h); + if (length(d) <= 8) { + d = sprintf("%08x%08x", v0h, v0l); + } else { + printf "#error Invalid addresses: %s + %s\n", v0, v1 \ + >"/dev/stderr"; + errc++; + } + } else { + v0 = strtonum("0x"v0); + v1 = strtonum("0x"v1); + d = sprintf("%016x", v0 + v1); + } + $0 = tmp; + + return d; + } + + function subl(v0, v1, v0h, v0l, v1h, v1l, d, tmp) { tmp = $0; - if (length(v0) > 8 || length(v1) > 8) { + if (length(v0) > 8) { d = length(v0); v0h = strtonum("0x"substr(v0, 1, d - 8)); v0l = strtonum("0x"substr(v0, d - 8 + 1)); @@ -281,55 +329,84 @@ else v1h = strtonum("0x"substr(v1, 1, d - 8)); v1l = strtonum("0x"substr(v1, d - 8 + 1)); - v0h += v1h; - v0l += v1l; - - d = sprintf("%x", v0l); - if (length(d) > 8) - v0h++; - - d = sprintf("%08x%08x", v0h, v0l); + if (v0l >= v1l) { + if (v0h >= v1h) { + d = sprintf("%08x%08x", v0h - v1h, v0l - v1l); + } else { + printf "#error Invalid addresses: %s - %s\n", v0, v1 \ + >"/dev/stderr"; + errc++; + } + } else { + if (v0h > v1h) { + v0h--; + v0l += 4294967296; + d = sprintf("%08x%08x", v0h - v1h, v0l - v1l); + } else { + printf "#error Invalid addresses: %s - %s\n", v0, v1 \ + >"/dev/stderr"; + errc++; + } + } } else { v0 = strtonum("0x"v0); v1 = strtonum("0x"v1); - d = sprintf("%016x", v0 + v1); + d = sprintf("%016x", v0 - v1); } $0 = tmp; return d; } - NF == 7 && $2 == ".text" { - base = $4; + NF == 7 && $2 !~ /^\.(exit|init|meminit)\.text/ { + snam = $2; + addr = $4; + + getline; + if (/CODE/) + base[snam] = addr; + + next; + } + + NF == 5 && $2 == "g" && $NF == "_stext" { + print ". " $1 " B _stext"; next; } /^RELOC/ { - in_reloc = $4 == "[.text]:"; + snam = substr($4, 2, length($4) - 3); + if (snam in base) + in_reloc = 1; + else + in_reloc = 0; next; } in_reloc && /__dtrace_probe_/ { $3 = substr($3, 16); sub(/[\-+].*$/, "", $3); - print addl(base, $1) " R " $3; + print snam " " addl(base[snam], $1) " R " $3; next; } in_reloc && /__dtrace_isenabled_/ { $3 = substr($3, 20); sub(/[\-+].*$/, "", $3); - print addl(base, $1) " R ?" $3; + print snam " " addl(base[snam], $1) " R ?" $3; next; } / F / { + if (!($4 in base)) + next; + if ($6 == ".hidden") - print $1 " G " $7; + print $4 " " $1 " G " $7; else - print $1 " F " $6; + print $4 " " $1 " F " $6; }' | \ - sort -k1,2 | \ + sort -k2 | \ awk -v arch=${ARCH} \ 'function subl(v0, v1, v0h, v0l, v1h, v1l, d, tmp) { tmp = $0; @@ -341,7 +418,7 @@ else v1h = strtonum("0x"substr(v1, 1, d - 8)); v1l = strtonum("0x"substr(v1, d - 8 + 1)); - if (v0l > v1l) { + if (v0l >= v1l) { if (v0h >= v1h) { d = sprintf("%08x%08x", v0h - v1h, v0l - v1l); } else { @@ -364,6 +441,19 @@ else return d; } + function map_string(str, off) { + if (str in strmap) + off = strmap[str]; + else { + off = strsz; + strmap[str] = strsz; + strv[strc++] = str; + strsz += length(str) + 1; + } + + return off; + } + BEGIN { print "#include "; print "#if BITS_PER_LONG == 64"; @@ -382,27 +472,48 @@ else print "dtrace_sdt_probes:"; probec = 0; + stroff = 0; + strc = 0; + } + + + # + # Record the _stext address so probe locations can be expressed + # relative to that address. + # + NF == 4 && $1 == "." && $4 == "_stext" { + stext = $2; + next; } # # Process a symbol table definition for a function in the .text - # section of the kernel image. As we pass through the symbol table, - # we record the function name and address. + # section of the kernel image. We record the function name and + # the address, and pre-populate the alias name with the function + # name. + # + # We also compare the address of the current symbol to the last + # recorded address, and if they are the same, we do not increment + # the function count. # - NF == 3 && $2 == "F" { - fname = $3; + NF == 4 && $3 == "F" { + faddr = $2; + fname = $4; sub(/\..*$/, "", fname); - alias = $3; + alias = $4; - if ($1 != prev) + if ($2 != prev) funcc++; - prev = $1; + prev = $2; next; } - NF == 3 && $2 == "G" { - alias = $3; + # + # When we encounter an alias symbol, we record the name. + # + NF == 4 && $3 == "G" { + alias = $4; next; } @@ -410,23 +521,31 @@ else # # Process a relocation record associated with the preceding function. # - # Since all addresses are already resolved earlier, we can simply - # generate the SDT probe information record. + # The address was resolved earlier, so we can simply generate the + # numeric information for the SDT probe information record. The + # text information (probe name and function name) are stored. This + # allows us to weed out duplicates, and it is necessary because the + # data blob with all the strings will be written to output later. # - NF == 3 && $2 == "R" { - sub(/^0+/, "", $1); + NF == 4 && $3 == "R" { + sub(/^0+/, "", $2); - addr = $1; + addr = subl($2, stext); + # + # On x86, relocations point to the 2nd byte of a call instruction + # so we need to adjust the address. + # if (arch == "x86" || arch == "x86_64") addr = subl(addr, 1); - printf "\tPTR\t0x%s\n", addr; - printf "\tPTR\t%d\n", length($3); - printf "\tPTR\t%d\n", length(fname); - printf "\t.asciz\t\042%s\042\n", $3; - printf "\t.asciz\t\042%s\042\n", fname; - print "\tALGN"; + print "/*"; + print " * " $1 " " faddr " F " fname; + print " * " $0; + print " */"; + printf "\tPTR\t_stext + 0x%s\n", addr; + printf "\tPTR\t%d\n", map_string($4); + printf "\tPTR\t%d\n", map_string(fname); probec++; @@ -434,6 +553,15 @@ else } END { + print ""; + print ".globl dtrace_sdt_strings"; + print "\tALGN"; + print "dtrace_sdt_strings:"; + + + for (i = 0; i < strc; i++) + printf "\t.asciz\t\042%s\042\n", strv[i]; + print ""; print ".globl dtrace_sdt_nprobes"; print ".globl dtrace_fbt_nfuncs";