From: Nick Alcock Date: Thu, 9 Mar 2017 14:31:33 +0000 (+0000) Subject: ctf: speed up the dwarf2ctf duplicate detector some more X-Git-Tag: v4.1.12-102.0.20170529_2200~57^2~4 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=234a12ad39af415cc93ba792c047c5763f146a59;p=users%2Fjedix%2Flinux-maple.git ctf: speed up the dwarf2ctf duplicate detector some more The duplicate detector's alias detection pass is still doing unnecessary work. When a non-opaque structure is marked shared, it is sometimes necessary to do another deduplication scan, because the marking may have marked types used within the structure as shared, which may require yet more types (e.g. opaque uses of of that type in other modules) to be marked shared as well. So while we know this pass can only affect structures/ unions/enums that have names, and their interior types, it would seem that we must keep scanning them to see if they need deduplication until none remain. However, there is one exception: if a non-opaque type and its corresponding opaque type are both already marked shared, or if we have just processed them and marked them accordingly, we know that we will never need to re-mark those particular types again, since they can't be more shared than they already are: so we can remove them from consideration in future passes. Because we are only opening DWARF files in this pass as needed now, this hugely cuts down the number of files we process in subsequent passes: we still see the same number of passes, but passes after the first (which marks tens of thousands of opaque types as shared) only open a few files, mark a few hundred types, and flash past in under a second. In my tests, the alias fixup pass now takes under 10s, which can be more or less ignored: all other passes other than initialization and writeout are much more expensive. (Before this series, it took over a minute on the fastest machine I have access to, and over three minutes on SPARC.) Orabug: 25815306 Signed-off-by: Nick Alcock Reviewed-by: tomas.jedlicka@oracle.com --- diff --git a/scripts/dwarf2ctf/dwarf2ctf.c b/scripts/dwarf2ctf/dwarf2ctf.c index 2332501c317a6..ec710b3a77fc7 100644 --- a/scripts/dwarf2ctf/dwarf2ctf.c +++ b/scripts/dwarf2ctf/dwarf2ctf.c @@ -345,7 +345,7 @@ struct detect_duplicates_state { const char *file_name; const char *module_name; GHashTable *structs_seen; - GSList *named_structs; + GList *named_structs; const char *dwfl_file_name; Dwarf *dwarf; Dwfl *dwfl; @@ -397,8 +397,10 @@ static void mark_seen_contained(Dwarf_Die *die, const char *module_name); * Determine if some type (whose ultimate base type is an non-opaque structure, * alias, or enum) has an opaque equivalent which is shared, and mark it and * all its bases as shared too if so. + * + * A list_filter() filter function. */ -static void detect_duplicates_alias_fixup(void *id_file_data, void *data); +static int detect_duplicates_alias_fixup(void *id_file_data, void *data); /* * Mark a basic type shared by name and intern it in all relevant hashes. (Used @@ -716,6 +718,13 @@ static char *xstrdup(const char *s) __attribute__((__nonnull__, __warn_unused_result__, __malloc__)); +/* + * Filter a GList, calling a predicate on it and removing all elements for which + * the predicate returns true. + */ +typedef int (*filter_pred_fun) (void *element, void *data); +static GList *list_filter(GList *list, filter_pred_fun fun, void *data); + /* * Figure out the (pathless, suffixless) module name for a given module file (.o * or .ko), and return it in a new dynamically allocated string. @@ -1823,14 +1832,14 @@ static void scan_duplicates(void) dw_ctf_trace("Duplicate detection: alias fixup pass.\n"); state.repeat_detection = 0; - - g_slist_foreach(state.named_structs, detect_duplicates_alias_fixup, - &state); + state.named_structs = list_filter(state.named_structs, + detect_duplicates_alias_fixup, + &state); } while (state.repeat_detection); out: detect_duplicates_dwarf_free(&state); dw_ctf_trace("Duplicate detection: complete.\n"); - g_slist_free_full(state.named_structs, free_duplicates_id_file); + g_list_free_full(state.named_structs, free_duplicates_id_file); } /* @@ -2002,7 +2011,7 @@ static void detect_duplicates_will_rescan(Dwarf_Die *die, const char *id, id_file->file_name = xstrdup(state->file_name); id_file->id = xstrdup(id); id_file->dieoff = dwarf_dieoffset(die); - state->named_structs = g_slist_prepend(state->named_structs, id_file); + state->named_structs = g_list_prepend(state->named_structs, id_file); } /* @@ -2230,14 +2239,19 @@ static void is_named_struct_union_enum(Dwarf_Die *die, const char *unused, * (This is why it must run over non-opaque structures: given a non-opaque * structure, its opaque alias is easy to compute, but the converse is not * true.) + * + * As a list_filter() filter function, returns 0 if this structure will not need + * to be checked again (because both its opaque and transparent variants are + * shared). */ -static void detect_duplicates_alias_fixup(void *id_file_data, void *data) +static int detect_duplicates_alias_fixup(void *id_file_data, void *data) { struct detect_duplicates_id_file *id_file = id_file_data; struct detect_duplicates_state *state = data; int transparent_shared = 0; int opaque_shared = 0; + int made_shared = 0; char *opaque_id; const char *line_num; @@ -2317,6 +2331,7 @@ static void detect_duplicates_alias_fixup(void *id_file_data, void *data) exit(1); } mark_shared(&die, NULL, state); + made_shared = 1; } /* @@ -2334,9 +2349,12 @@ static void detect_duplicates_alias_fixup(void *id_file_data, void *data) dw_ctf_trace("Marking %s as duplicate\n", opaque_id); g_hash_table_replace(id_to_module, xstrdup(opaque_id), xstrdup("shared_ctf")); + made_shared = 1; } free(opaque_id); + + return made_shared || (opaque_shared && transparent_shared); } /* @@ -3978,6 +3996,23 @@ static char *str_appendn(char *s, ...) return s; } +/* + * Filter a GList, calling a predicate on it and removing all elements for which + * the predicate returns true. + */ +static GList *list_filter(GList *list, filter_pred_fun fun, void *data) +{ + GList *cur = list; + while (cur) { + GList *next = cur->next; + if (fun(cur->data, data)) + list = g_list_delete_link(list, cur); + cur = next; + } + + return list; +} + /* * Figure out the (pathless, suffixless) module name for a given module file (.o * or .ko), and return it in a new dynamically allocated string.