From 871fa90358152a60dd0118c9b829fecaba3a6b54 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Tue, 12 Feb 2019 16:01:13 -0500 Subject: [PATCH] maple_tree: Performance enhancements. Start using slot 0 on splits. Fix missing breaks on set of slots. Calc node type only once during certain loops Signed-off-by: Liam R. Howlett --- lib/maple_tree.c | 143 +++++++++++++-------- lib/test_maple_tree.c | 2 + tools/testing/radix-tree/ma_xa_benchmark.c | 7 +- 3 files changed, 93 insertions(+), 59 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index e97617ce6dd0..39deb426693e 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -278,15 +278,13 @@ static inline void ma_set_slot(struct ma_state *ms, int slot) ma_set_alloc_req(ms, slot); } -static inline unsigned long ma_get_pivot(const struct maple_node *mn, - unsigned char slot) +static inline unsigned long _ma_get_pivot(const struct maple_node *mn, + unsigned char slot, enum maple_type type) { - enum maple_type type = mt_node_type(mn); - switch (type) { - default: - case maple_dense: - return 0; + case maple_range_64: + case maple_leaf_64: + return mt_to_node(mn)->mr64.pivot[slot]; case maple_sparse_6: return mt_to_node(mn)->ms6.pivot; case maple_sparse_9: @@ -305,11 +303,16 @@ static inline unsigned long ma_get_pivot(const struct maple_node *mn, case maple_range_32: case maple_leaf_32: return mt_to_node(mn)->mr32.pivot[slot]; - case maple_range_64: - case maple_leaf_64: - return mt_to_node(mn)->mr64.pivot[slot]; + case maple_dense: + default: + return 0; } } +static inline unsigned long ma_get_pivot(const struct maple_node *mn, + unsigned char slot) +{ + return _ma_get_pivot(mn, slot, mt_node_type(mn)); +} static inline void ma_set_pivot(struct maple_node *mn, unsigned char slot, unsigned long val) { @@ -317,8 +320,12 @@ static inline void ma_set_pivot(struct maple_node *mn, unsigned char slot, switch (type) { default: + case maple_range_64: + case maple_leaf_64: + (&mt_to_node(mn)->mr64)->pivot[slot] = val; + break; case maple_dense: - return; + break; case maple_sparse_6: mt_to_node(mn)->ms6.pivot = val; break; @@ -345,10 +352,6 @@ static inline void ma_set_pivot(struct maple_node *mn, unsigned char slot, case maple_leaf_32: mt_to_node(mn)->mr32.pivot[slot] = val; break; - case maple_range_64: - case maple_leaf_64: - (&mt_to_node(mn)->mr64)->pivot[slot] = val; - break; } return; } @@ -357,12 +360,13 @@ static inline void ma_cp_pivot(struct maple_node *dst, { ma_set_pivot(dst, dloc, ma_get_pivot(src, sloc)); } -static inline void __rcu *ma_get_rcu_slot(const struct maple_node *mn, - unsigned char slot) +static inline void __rcu *_ma_get_rcu_slot(const struct maple_node *mn, + unsigned char slot, enum maple_type type) { - enum maple_type type = mt_node_type(mn); - switch (type) { + case maple_range_64: + case maple_leaf_64: + return rcu_dereference(mt_to_node(mn)->mr64.slot[slot]); default: case maple_dense: return rcu_dereference(mt_to_node(mn)->slot[slot]); @@ -384,11 +388,13 @@ static inline void __rcu *ma_get_rcu_slot(const struct maple_node *mn, case maple_range_32: case maple_leaf_32: return rcu_dereference(mt_to_node(mn)->mr32.slot[slot]); - case maple_range_64: - case maple_leaf_64: - return rcu_dereference(mt_to_node(mn)->mr64.slot[slot]); } } +static inline void __rcu *ma_get_rcu_slot(const struct maple_node *mn, + unsigned char slot) +{ + return _ma_get_rcu_slot(mn, slot, mt_node_type(mn)); +} static inline void ma_set_rcu_slot(const struct maple_node *mn, unsigned char slot, void *val) { @@ -398,27 +404,37 @@ static inline void ma_set_rcu_slot(const struct maple_node *mn, default: case maple_dense: RCU_INIT_POINTER(mt_to_node(mn)->slot[slot], val); + break; case maple_sparse_6: RCU_INIT_POINTER(mt_to_node(mn)->ms6.slot[slot], val); + break; case maple_sparse_9: RCU_INIT_POINTER(mt_to_node(mn)->ms9.slot[slot], val); + break; case maple_sparse_16: RCU_INIT_POINTER(mt_to_node(mn)->ms16.slot[slot], val); + break; case maple_sparse_21: RCU_INIT_POINTER(mt_to_node(mn)->ms21.slot[slot], val); + break; case maple_sparse_32: RCU_INIT_POINTER(mt_to_node(mn)->ms32.slot[slot], val); + break; case maple_sparse_64: RCU_INIT_POINTER(mt_to_node(mn)->ms64.slot[slot], val); + break; case maple_range_16: case maple_leaf_16: RCU_INIT_POINTER(mt_to_node(mn)->mr16.slot[slot], val); + break; case maple_range_32: case maple_leaf_32: RCU_INIT_POINTER(mt_to_node(mn)->mr32.slot[slot], val); + break; case maple_range_64: case maple_leaf_64: RCU_INIT_POINTER(mt_to_node(mn)->mr64.slot[slot], val); + break; } } static inline void ma_cp_rcu_slot(struct maple_node *dst, @@ -432,33 +448,44 @@ static inline void ma_update_rcu_slot(const struct maple_node *mn, enum maple_type type = mt_node_type(mn); switch (type) { + case maple_range_64: + case maple_leaf_64: + rcu_assign_pointer(mt_to_node(mn)->mr64.slot[slot], val); + break; default: case maple_dense: rcu_assign_pointer(mt_to_node(mn)->slot[slot], val); + break; case maple_sparse_6: rcu_assign_pointer(mt_to_node(mn)->ms6.slot[slot], val); + break; case maple_sparse_9: rcu_assign_pointer(mt_to_node(mn)->ms9.slot[slot], val); + break; case maple_sparse_16: rcu_assign_pointer(mt_to_node(mn)->ms16.slot[slot], val); + break; case maple_sparse_21: rcu_assign_pointer(mt_to_node(mn)->ms21.slot[slot], val); + break; case maple_sparse_32: rcu_assign_pointer(mt_to_node(mn)->ms32.slot[slot], val); + break; case maple_sparse_64: rcu_assign_pointer(mt_to_node(mn)->ms64.slot[slot], val); + break; case maple_range_16: case maple_leaf_16: rcu_assign_pointer(mt_to_node(mn)->mr16.slot[slot], val); + break; case maple_range_32: case maple_leaf_32: rcu_assign_pointer(mt_to_node(mn)->mr32.slot[slot], val); - case maple_range_64: - case maple_leaf_64: - rcu_assign_pointer(mt_to_node(mn)->mr64.slot[slot], val); + break; } } -static void mas_update_limits(struct ma_state *ms, unsigned char slot) +static void mas_update_limits(struct ma_state *ms, unsigned char slot, + enum maple_type type) { if (slot >= MAPLE_NODE_SLOTS) return; @@ -467,15 +494,15 @@ static void mas_update_limits(struct ma_state *ms, unsigned char slot) return; if (ma_is_root(ms->node)) { - ms->max = mt_node_max(ms->node); + ms->max = mt_max[type]; ms->min = 0; } if (slot > 0) - ms->min = ma_get_pivot(ms->node, slot - 1); + ms->min = _ma_get_pivot(ms->node, slot - 1, type); - if (slot < mt_slot_count(ms->node) - 1) - ms->max = ma_get_pivot(ms->node, slot); + if (slot < mt_slots[type] - 1) + ms->max = _ma_get_pivot(ms->node, slot, type); } static inline void ma_encoded_parent(struct ma_state *mas) { @@ -492,7 +519,7 @@ static inline void ma_encoded_parent(struct ma_state *mas) slot = mt_parent_slot(parent); mas->node = mt_mk_node(gparent, mt_parent_enum(parent)); ma_set_slot(mas, slot); - mas_update_limits(mas, slot); + mas_update_limits(mas, slot, mt_parent_enum(parent)); mas->node = ma_get_rcu_slot(gparent, slot); return; } @@ -626,20 +653,21 @@ static void *mas_start(struct ma_state *mas) return entry; } -unsigned char ma_data_end(const struct maple_node *mn) +unsigned char ma_data_end(const struct maple_node *mn, + const enum maple_type type) { unsigned char data_end = 0; unsigned long last; for (data_end = 0; data_end < mt_slot_count(mn) - 1; data_end++) { - last = ma_get_pivot(mn, data_end); + last = _ma_get_pivot(mn, data_end, type); if (last == 0 && data_end > 0) return data_end - 1; - if (last == mt_node_max(mn)) + if (last == mt_max[type]) return data_end - 1; } - if (ma_get_rcu_slot(mn, data_end) == NULL) + if (_ma_get_rcu_slot(mn, data_end, type) == NULL) data_end--; return data_end; @@ -714,14 +742,15 @@ static void ma_adopt_children(struct maple_node *parent) struct maple_node *child; unsigned char slot; - unsigned char slot_cnt = mt_slot_count(parent); + enum maple_type type = mt_node_type(parent); + unsigned char slot_cnt = mt_slots[type]; for (slot = 0; slot < slot_cnt; slot++) { if (slot != 0 && slot < slot_cnt - 1 && - ma_get_pivot(parent, slot) == 0) + _ma_get_pivot(parent, slot, type) == 0) break; - child = ma_get_rcu_slot(parent, slot); + child = _ma_get_rcu_slot(parent, slot, type); if (child) mt_set_parent(child, parent, slot); } @@ -817,7 +846,8 @@ static int ma_split(struct ma_state *mas, unsigned char slot) p_slot = mt_parent_slot(mas->node); ma_encoded_parent(mas); old_parent = mas->node; - p_end = ma_data_end(mas->node); + ptype = mt_node_type(mas->node); + p_end = ma_data_end(mas->node, ptype); if (p_end >= slot_cnt - 1) { /* Must split the parent */ split = ma_split(mas, p_slot); @@ -830,7 +860,7 @@ static int ma_split(struct ma_state *mas, unsigned char slot) ma_set_slot(mas, p_slot); } ptype = mt_node_type(mas->node); - mas_update_limits(mas, p_slot); + mas_update_limits(mas, p_slot, ptype); mas->node = ma_get_rcu_slot(old_parent, p_slot); } @@ -881,7 +911,7 @@ static int ma_split(struct ma_state *mas, unsigned char slot) if (split < slot_cnt - 1) pivot = ma_get_pivot(mas->node, split); else - pivot = ma_get_pivot(left, split -1); + pivot = ma_get_pivot(left, split - 1); ma_link(left, new_parent, p_slot, pivot, ctype); @@ -918,7 +948,7 @@ static int ma_split(struct ma_state *mas, unsigned char slot) static int _ma_insert(struct ma_state *mas, void *entry, unsigned char slot) { struct maple_node *p_mn; - int o_end = ma_data_end(mas->node); // Old end + int o_end = ma_data_end(mas->node, mt_node_type(mas->node)); // Old end int n_end = o_end; // New end unsigned long max = mas->max; unsigned long min = mas->min; @@ -952,7 +982,7 @@ static int _ma_insert(struct ma_state *mas, void *entry, unsigned char slot) slot -= split; n_end -= split; - o_end = ma_data_end(mas->node); // Old end is not so old now. + o_end -= split; } /* Save the node in case we are not appending. */ p_mn = mas->node; @@ -1091,15 +1121,16 @@ done: return ret; } -static bool mas_search_slots(struct ma_state *ms, unsigned long val) +static bool mas_search_slots(struct ma_state *ms, unsigned long val, + enum maple_type type) { - int i = 0; + int i; bool ret = false; - unsigned char slot_cnt = mt_slot_count(ms->node); + unsigned char slot_cnt = mt_slots[type]; unsigned long pivot = 0; for (i = 0; i < slot_cnt - 1; i++) { - pivot = ma_get_pivot(ms->node, i); + pivot = _ma_get_pivot(ms->node, i, type); if (i != 0 && pivot == 0) { ma_set_slot(ms, MAPLE_NODE_SLOTS); return ret; @@ -1114,31 +1145,33 @@ static bool mas_search_slots(struct ma_state *ms, unsigned long val) i++; } - if (ma_get_rcu_slot(ms->node, i)) + if (_ma_get_rcu_slot(ms->node, i, type)) ret = true; ma_set_slot(ms, i); return ret; } -bool mas_traverse(struct ma_state *mas) +bool mas_traverse(struct ma_state *mas, enum maple_type type) { unsigned char slot = ma_get_slot(mas); - mas_update_limits(mas, slot); - if (mt_is_leaf(mas->node)) + mas_update_limits(mas, slot, type); + if (type < maple_range_16) return false; - mas->node = ma_get_rcu_slot(mas->node, slot); + mas->node = _ma_get_rcu_slot(mas->node, slot, type); return true; } bool _mas_walk(struct ma_state *mas) { mas->node = mas_start(mas); + enum maple_type type; do { - if (!mas_search_slots(mas, mas->index)) - return mt_is_leaf(mas->node); - } while (mas_traverse(mas)); + type = mt_node_type(mas->node); + if (!mas_search_slots(mas, mas->index, type)) + return type < maple_range_16; + } while (mas_traverse(mas, type)); return true; } diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index 8416286d191b..948e13c30fff 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -399,7 +399,9 @@ static int maple_tree_seed(void) check_nomem(&tree); check_seq(&tree, 16, false); + printk("start\n"); check_seq(&tree, 1000, true); + printk("end\n"); check_new_node(&tree); check_lower_bound_split(&tree); diff --git a/tools/testing/radix-tree/ma_xa_benchmark.c b/tools/testing/radix-tree/ma_xa_benchmark.c index e7247818c6e1..e37f61db79a6 100644 --- a/tools/testing/radix-tree/ma_xa_benchmark.c +++ b/tools/testing/radix-tree/ma_xa_benchmark.c @@ -29,15 +29,14 @@ int __weak main(void) clock_t start, end; double xa_t, mt_t; unsigned long xa_m, mt_m; - void *entry; - unsigned long i, max = 100000; + void *entry = &main; + unsigned long i, max = 200000; struct rusage sru, eru; /* xarray first */ radix_tree_init(); DEFINE_XARRAY(xa); - entry = &xa; getrusage(RUSAGE_SELF, &sru); for (i = 0; i <= max; i++) { @@ -82,11 +81,11 @@ int __weak main(void) mt_m = mt_get_alloc_size(); printk("mt %lu inserts: %fs using %luK in %d allocations\n", max, mt_t, mt_m/1024, nr_allocated); +// mt_dump(&mt); mtree_destroy(&mt); printk(" Delta : %f (%f%% of xa time) %ldK\n", xa_t - mt_t, (mt_t - xa_t)/xa_t * 100, (signed long)(xa_m - mt_m)/1024); rcu_barrier(); - printk("Done\n"); return 0; } -- 2.50.1