From: Liam R. Howlett Date: Tue, 27 Oct 2020 16:52:49 +0000 (-0400) Subject: maple_tree: Implement optimized fork splitting. X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=e22f8a9da9f7218d6302d230566d0a9f9438c864;p=users%2Fjedix%2Flinux-maple.git maple_tree: Implement optimized fork splitting. When using mas_for_each with pre-allocations, split so that there are more slots available to the left. If this results in an insufficient node to the left, then rebalance on mas_destroy(). Signed-off-by: Liam R. Howlett --- diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 926150c1d5c5d..5b42889692377 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -205,6 +205,7 @@ struct ma_state { struct maple_alloc *alloc; /* Allocated nodes for this operation */ unsigned char depth; /* depth of tree descent during write */ unsigned char offset; + unsigned char mas_flags; }; #define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock)) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index a0e66b15ff07b..5ee0a6d96ef4d 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -19,6 +19,10 @@ #define MA_ROOT_PARENT 1 +/* Maple state flags */ +#define MA_STATE_ADVANCED_OP 1 +#define MA_STATE_LAST_SLOT 2 + #define ma_parent_ptr(x) ((struct maple_pnode *)(x)) #define ma_mnode_ptr(x) ((struct maple_node *)(x)) #define ma_enode_ptr(x) ((struct maple_enode *)(x)) @@ -735,6 +739,7 @@ static inline void mas_dup_state(struct ma_state *dst, struct ma_state *src) dst->max = src->max; dst->min = src->min; dst->offset = src->offset; + dst->mas_flags = src->mas_flags; } /* @@ -1422,7 +1427,8 @@ static inline int mab_no_null_split(struct maple_big_node *b_node, * @mid_split: The second split, if required. 0 otherwise. * Returns: The first split location. */ -static inline int mab_calc_split(struct maple_big_node *b_node, +static inline int mab_calc_split(struct ma_state *mas, + struct maple_big_node *b_node, unsigned char *mid_split) { int split = b_node->b_end / 2; // Assume equal split. @@ -1432,14 +1438,21 @@ static inline int mab_calc_split(struct maple_big_node *b_node, split = b_node->b_end / 3; *mid_split = split * 2; } else { + unsigned char min = mt_min_slots[b_node->type] - 1; + *mid_split = 0; + if ((mas->mas_flags & MA_STATE_LAST_SLOT) && + ma_is_leaf(b_node->type)) { + min = 2; + split = mt_slots[b_node->type] - min; + } /* Avoid having a range less than the slot count unless it * causes one node to be deficient. * NOTE: mt_min_slots is 1 based, b_end and split are zero. */ while (((b_node->pivot[split] - b_node->min) < slot_count - 1) && (split < slot_count - 1) && - (b_node->b_end - split > mt_min_slots[b_node->type] - 1)) + (b_node->b_end - split > min)) split++; } @@ -1581,6 +1594,16 @@ static inline void mas_descend_adopt(struct ma_state *mas) } } +static inline void mas_advanced_may_rebalance(struct ma_state *mas) +{ + if (!(mas->mas_flags & MA_STATE_ADVANCED_OP)) + return; + + if (mte_is_root(mas->node)) + return; + + mas->mas_flags |= MA_STATE_LAST_SLOT; +} /* * mas_store_b_node() - Store an @entry into the b_node while also copying the * data from a maple encoded node. @@ -1624,6 +1647,9 @@ static inline unsigned char mas_store_b_node(struct ma_state *mas, // Handle new range ending before old range ends piv = _mas_safe_pivot(mas, pivots, slot, b_node->type); if (piv > mas->last) { + if (piv == ULONG_MAX) + mas_advanced_may_rebalance(mas); + b_node->slot[++b_end] = contents; if (!contents) b_node->gap[b_end] = piv - mas->last + 1; @@ -1958,7 +1984,7 @@ static inline unsigned char mas_mab_to_node(struct ma_state *mas, if (b_node->b_end < slot_count) { split = b_node->b_end; } else { - split = mab_calc_split(b_node, mid_split); + split = mab_calc_split(mas, b_node, mid_split); *right = mas_new_ma_node(mas, b_node); } @@ -2380,14 +2406,105 @@ static inline int mas_rebalance(struct ma_state *mas, static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char mas_end) { - struct maple_big_node b_node; + enum maple_type mt = mte_node_type(mas->node); + struct maple_node reuse, *newnode, *parent, *new_left; + struct maple_enode *eparent; + unsigned char offset, tmp, split = mt_slots[mt] / 2; + void **l_slots, **slots; + unsigned long *l_pivs, *pivs, gap; - /* Slow path. */ - memset(&b_node, 0, sizeof(struct maple_big_node)); - b_node.type = mte_node_type(mas->node); - mas_mab_cp(mas, 0, mas_end, &b_node, 0); - b_node.b_end = mas_end; - mas_rebalance(mas, &b_node); + MA_STATE(l_mas, mas->tree, mas->index, mas->last); + + mas_dup_state(&l_mas, mas); + mas_prev_sibling(&l_mas); + + // set up node. + if (mt_in_rcu(mas->tree)) { + mas_node_count(mas, 3); // both left and right as well as parent. + if (mas_is_err(mas)) // FIXME + return; + + newnode = mas_pop_node(mas); + } else { + memset(&reuse, 0, sizeof(struct maple_node)); + newnode = &reuse; + } + + newnode->parent = mas_mn(mas)->parent; + pivs = ma_pivots(newnode, mt); + + + + slots = ma_slots(newnode, mt); + pivs = ma_pivots(newnode, mt); + l_slots = ma_slots(mas_mn(&l_mas), mt); + l_pivs = ma_pivots(mas_mn(&l_mas), mt); + if (!l_slots[split]) + split++; + tmp = mas_data_end(&l_mas) - split; + + memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp); + memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp); + pivs[tmp] = l_mas.max; + memcpy(slots + tmp, ma_slots(mas_mn(mas), mt), sizeof(void *) * mas_end); + memcpy(pivs + tmp, ma_pivots(mas_mn(mas), mt), sizeof(unsigned long) * mas_end); + + l_mas.max = l_pivs[split]; + mas->min = l_mas.max + 1; + eparent = mt_mk_node(mte_parent(l_mas.node), + mas_parent_enum(&l_mas, l_mas.node)); + if (!mt_in_rcu(mas->tree)) { + + memcpy(mas_mn(mas), newnode, sizeof(struct maple_node)); + mte_set_pivot(eparent, mte_parent_slot(l_mas.node), + l_pivs[split]); + // Remove data from l_pivs. + tmp = split + 1; + memset(l_pivs + tmp, 0, + sizeof(unsigned long) * (mt_pivots[mt] - tmp)); + memset(l_slots + tmp, 0, + sizeof(void *) * (mt_slots[mt] - tmp)); + + goto done; + } + + // RCU requires replacing both l_mas, mas, and parent. + // replace mas + mas->node = mt_mk_node(newnode, mt); + + // replace l_mas + new_left = mas_pop_node(mas); + new_left->parent = mas_mn(&l_mas)->parent; + mt = mte_node_type(l_mas.node); + slots = ma_slots(new_left, mt); + pivs = ma_pivots(new_left, mt); + memcpy(slots, l_slots, sizeof(void *) * split); + memcpy(pivs, l_pivs, sizeof(unsigned long) * split); + l_mas.node = mt_mk_node(new_left, mt); + + + // replace parent. + offset = mte_parent_slot(mas->node); + mt = mas_parent_enum(&l_mas, l_mas.node); + parent = mas_pop_node(mas); + slots = ma_slots(parent, mt); + pivs = ma_pivots(parent, mt); + memcpy(parent, mte_to_node(eparent), sizeof(struct maple_node)); + slots[offset] = mas->node; + slots[offset - 1] = l_mas.node; + pivs[offset - 1] = l_mas.max; + eparent = mt_mk_node(parent, mt); +done: + gap = mas_leaf_max_gap(mas); + mte_set_gap(eparent, mte_parent_slot(mas->node), gap); + gap = mas_leaf_max_gap(&l_mas); + mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap); + mas_ascend(mas); + + if (mt_in_rcu(mas->tree)) + mas_replace(mas, false); + + mas_update_gap(mas); } static inline bool _mas_split_final_node(struct maple_subtree_state *mast, @@ -2601,7 +2718,7 @@ static inline int mas_split(struct ma_state *mas, if (mas_push_right(mas, height, &mast)) break; - split = mab_calc_split(mast.bn, &mid_split); + split = mab_calc_split(mas, mast.bn, &mid_split); mast_split_data(&mast, mas, split); // Usually correct, mab_mas_cp in the above call overwrites r->max. mast.r->max = mas->max; @@ -3035,6 +3152,9 @@ static inline bool mas_node_store(struct ma_state *mas, void *entry, if (mas->last == max) { // don't copy this offset offset_end++; } else if (mas->last < max) { // new range ends in this range. + if (max == ULONG_MAX) + mas_advanced_may_rebalance(mas); + new_end++; offset_end = offset; } else if (mas->last == mas->max) { // runs right to the end of the node. @@ -3056,8 +3176,10 @@ static inline bool mas_node_store(struct ma_state *mas, void *entry, if (new_end >= mt_slots[mt]) // Not enough room return false; - if (new_end <= mt_min_slots[mt]) // not enough data. + if (!mte_is_root(mas->node) && (new_end <= mt_min_slots[mt]) && + !(mas->mas_flags & MA_STATE_ADVANCED_OP)) {// not enough data. return false; + } // set up node. if (mt_in_rcu(mas->tree)) { @@ -3118,6 +3240,7 @@ done: return true; } + static inline bool mas_slot_store(struct ma_state *mas, void *entry, unsigned long min, unsigned long max, unsigned char end, void *content, @@ -3139,25 +3262,21 @@ static inline bool mas_slot_store(struct ma_state *mas, void *entry, lmax = mas_logical_pivot(mas, pivots, offset + 1, mt); if (max > mas->last) // going to split a single entry. - return mas_node_store(mas, entry, min, max, end, content, - offset); + goto try_node_store; if (lmax < mas->last) // going to overwrite too many slots. - return mas_node_store(mas, entry, min, max, end, content, - offset); + goto try_node_store; if (min == mas->index) { if (lmax <= mas->last) // overwriting two or more ranges with one. - return mas_node_store(mas, entry, min, max, end, - content, offset); + goto try_node_store; slots[offset] = entry; pivots[offset] = mas->last; goto done; } else if (min < mas->index) { // split start if (lmax != mas->last) // Doesn't end on the next range end. - return mas_node_store(mas, entry, min, max, end, - content, offset); + goto try_node_store; if (offset + 1 < mt_pivots[mt]) pivots[offset + 1] = mas->last; @@ -3173,6 +3292,9 @@ static inline bool mas_slot_store(struct ma_state *mas, void *entry, done: mas_update_gap(mas); return true; + +try_node_store: + return mas_node_store(mas, entry, min, max, end, content, offset); } static inline void *_mas_store(struct ma_state *mas, void *entry, bool overwrite) @@ -4742,6 +4864,7 @@ EXPORT_SYMBOL(mtree_destroy); */ void *mas_store(struct ma_state *mas, void *entry) { + mas->mas_flags |= MA_STATE_ADVANCED_OP; if (mas->index <= mas->last) return _mas_store(mas, entry, true); @@ -4806,15 +4929,16 @@ void mas_destroy(struct ma_state *mas) // it is possible that the number inserted is less than the expected // number. To fix an invalid final node, a check is performed here to // rebalance the previous node with the final node. - if ((mas->max == ULONG_MAX) && !mas_is_err(mas) && !mas_is_start(mas) && - mas_searchable(mas) && !mte_is_root(mas->node)) { - unsigned char end = mas_data_end(mas); + if (mas->mas_flags & MA_STATE_LAST_SLOT) { + unsigned char end; + unsigned long range_min, range_max; - if (end < mt_min_slot_count(mas->node)) { - printk("destroy rebalance %p\n", mas->node); - mt_dump(mas->tree); + __mas_walk(mas, &range_min, &range_max); + end = mas_data_end(mas) + 1; + if (end < mt_min_slot_count(mas->node) - 1) { mas_destroy_rebalance(mas, end); } + mas->mas_flags &= ~MA_STATE_LAST_SLOT; } while (mas->alloc && !(((unsigned long)mas->alloc & 0x1))) { diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index bc5414bdb9ec1..b1c14da1a17d5 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -9,6 +9,7 @@ #include #define MTREE_ALLOC_MAX 0x2000000000000Ul +#define CONFIG_DEBUG_MAPLE_TREE static int mtree_insert_index(struct maple_tree *mt, unsigned long index, gfp_t gfp) { @@ -265,7 +266,7 @@ static noinline void check_new_node(struct maple_tree *mt) MT_BUG_ON(mt, mas_allocated(&mas) != 1); // Check the node is only one node. mn = mas_pop_node(&mas); - MT_BUG_ON(mt, mas_allocated(&mas)); + MT_BUG_ON(mt, mas_allocated(&mas) != 0); MT_BUG_ON(mt, mn == NULL); MT_BUG_ON(mt, mn->slot[0] != NULL); MT_BUG_ON(mt, mn->slot[1] != NULL); @@ -360,7 +361,7 @@ static noinline void check_new_node(struct maple_tree *mt) i = 1; smn = mas.alloc; while(i < total) { - for (j = 0; j < MAPLE_NODE_SLOTS - 1; j++) { + for (j = 0; j < MAPLE_ALLOC_SLOTS; j++) { i++; MT_BUG_ON(mt, !smn->slot[j]); if (i == total) @@ -409,40 +410,38 @@ static noinline void check_new_node(struct maple_tree *mt) MT_BUG_ON(mt, mas_allocated(&mas) != 0); - mas_node_count(&mas, MAPLE_NODE_SLOTS); // Request + mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 1); // Request MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM)); MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - printk("alloc node total %u\n", mas.alloc->total); - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_NODE_SLOTS); - printk("alloc node count %u\n", mas.alloc->node_count); - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_NODE_SLOTS - 2); + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); + MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1); mn = mas_pop_node(&mas); // get the next node. - printk("alloc node count %u\n", mas.alloc->node_count); MT_BUG_ON(mt, mn == NULL); - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_NODE_SLOTS - 3); - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_NODE_SLOTS - 1); + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS); + MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 2); mas_push_node(&mas, (struct maple_enode *)mn); - printk("alloc node count %u\n", mas.alloc->node_count); - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_NODE_SLOTS - 2); - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_NODE_SLOTS); - - printk("Add one on the end\n"); - printk("alloc node count %u\n", mas.alloc->node_count); - printk("alloc node total %u\n", mas.alloc->total); - printk("Request %u\n", MAPLE_NODE_SLOTS + 1); - mas_node_count(&mas, MAPLE_NODE_SLOTS + 1); // Request - printk("alloc req %u\n", mas_alloc_req(&mas)); + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); + MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1); + + // Check the limit of pop/push/pop + mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 2); // Request MT_BUG_ON(mt, mas_alloc_req(&mas) != 1); MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM)); MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); MT_BUG_ON(mt, mas_alloc_req(&mas)); - printk("alloc node count %u\n", mas.alloc->node_count); - printk("alloc node total %u\n", mas.alloc->total); + MT_BUG_ON(mt, mas.alloc->node_count); + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2); + mn = mas_pop_node(&mas); + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); + MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1); + mas_push_node(&mas, (struct maple_enode *)mn); + MT_BUG_ON(mt, mas.alloc->node_count); + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2); mn = mas_pop_node(&mas); ma_free_rcu(mn); - for (i = 1; i <= MAPLE_NODE_SLOTS; i++) { + for (i = 1; i <= MAPLE_ALLOC_SLOTS + 1; i++) { mn = mas_pop_node(&mas); ma_free_rcu(mn); } @@ -35294,7 +35293,7 @@ static void check_dfs_preorder(struct maple_tree *mt) mas_store(&mas, xa_mk_value(count)); MT_BUG_ON(mt, mas_is_err(&mas)); } - mas_empty_alloc(&mas); + mas_destroy(&mas); rcu_barrier(); //pr_info(" ->seq test of 0-%lu %luK in %d active (%d total)\n", // max, mt_get_alloc_size()/1024, nr_allocated, @@ -35349,7 +35348,7 @@ static noinline void check_forking(struct maple_tree *mt) newmas.last = mas.last; mas_store(&newmas, val); } - mas_empty_alloc(&newmas); + mas_destroy(&newmas); mt_validate(&newmt); mas_reset(&mas); mas_reset(&newmas); @@ -35393,7 +35392,7 @@ static noinline void bench_forking(struct maple_tree *mt) newmas.last = mas.last; mas_store(&newmas, val); } - mas_empty_alloc(&newmas); + mas_destroy(&newmas); mt_validate(&newmt); mt_set_non_kernel(0); mtree_destroy(&newmt); @@ -35409,6 +35408,7 @@ static int maple_tree_seed(void) void *ptr = &set; pr_info("\nTEST STARTING\n\n"); + #if 0 mtree_init(&tree, MAPLE_ALLOC_RANGE); bench_node_store(&tree);