From 10b8acfacff6d62cae96657ea064f3b22e2b333d Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Mon, 22 Sep 2025 15:37:12 -0400 Subject: [PATCH] add split and push to rebalance Signed-off-by: Liam R. Howlett --- lib/maple_tree.c | 231 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 222 insertions(+), 9 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index b30567e56b81..3792ba2782ff 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -3089,17 +3089,154 @@ static inline void cp_data_calc(struct maple_copy *cp, cp->data += r_wr_mas->mas->end - r_wr_mas->offset_end; } +static bool data_fits(struct ma_state *sib, struct ma_state *mas, + struct maple_copy *cp) +{ + unsigned char space; + unsigned char new_data; + unsigned char end; + enum maple_type type; + void __rcu **slots; + + type = mte_node_type(mas->node); + space = 2 * mt_slots[type]; + end = sib->end; + + new_data = end + 1 + cp->data; + if (new_data > space) + return false; + + /* + * This is off by one by design. The extra space is left to reduce + * jitter in operations that add then remove two entries. + * + * end is an index while new space and data are both sizes. Adding one + * to end to convert the index to a size means that the below + * calculation should be <=, but we want to keep an extra space in nodes + * to reduce jitter. + */ + if (new_data < space - 1) + return true; + + /* Leave some room to reduce jitter. */ + if (!ma_is_leaf(type)) + return false; + + /* + * new_data == space, an exact fit can be foiled by a NULL at the split + * location. + */ + if (sib->max < mas->min) { + /* + * Pushing to left, there must be room so the last slot is not + * occupied. + * + * The new data is causing the two nodes to equate to exactly 32 + * in size, so the left must either have 1 or 2 slots empty. + */ + new_data = mt_slots[type] - end - 2; + if (mas->offset == new_data || + mas->offset + cp->end == new_data) { + new_data -= mas->offset; + slots = cp->slot; + } else { + slots = ma_slots(mas_mn(mas), type); + } + } else { + unsigned char split; + /* + * Pushing to right, there must be an overflow of data into sib, + * which is almost full itself. + * + * The new data can add 1 or 2 entries in this case. + * The split must fall either in the new data or the end of the + * existing mas node. + */ + /* size - size to index (-1) */ + split = (new_data - 1) / 2; + + if (split >= mas->offset && + split <= mas->offset + cp->end + 1) { + new_data = split - mas->offset; + slots = cp->slot; + } else { + if (mas->offset < split) { + /* + * The last slot in the previous node is the one + * before the overflow. + * The total is 32 here, so we know that it will + * be 16/16 split. + */ + + //new_data = mas->end - (split - end); + new_data = mas->end - (cp->data - 1 - split); + } + slots = ma_slots(mas_mn(mas), type); + } + } + + if (slots[new_data]) + return true; + + return false; +} + +static void push_data_sib(struct maple_copy *cp, struct ma_state *mas, + struct ma_state *sib) +{ + if (mte_is_root(mas->node)) + goto no_push; + + + *sib = *mas; + mas_ascend(sib); + if (sib->offset) { + sib->offset--; + mas_descend(sib); + sib->end = mas_data_end(sib); + if (data_fits(sib, mas, cp)) { + /* Push left */ + return; + } + + mas_ascend(sib); + sib->offset++; + } + + sib->end = mas_data_end(sib); + if (sib->offset >= sib->end) + goto no_push; + + sib->offset++; + mas_descend(sib); + sib->end = mas_data_end(sib); + if (data_fits(sib, mas, cp)) { + /* Push right*/ + return; + } + +no_push: + sib->end = 0; +} + static inline void rebalance_data_calc(struct maple_copy *cp, struct ma_wr_state *wr_mas, struct ma_state *sib) { cp_data_calc(cp, wr_mas, wr_mas); + sib->end = 0; + if (cp->data >= mt_slots[wr_mas->type]) { + push_data_sib(cp, wr_mas->mas, sib); + if (sib->end) { + cp->data += sib->end + 1; + return; + } + } + if (((wr_mas->mas->min != 0) || (wr_mas->mas->max != ULONG_MAX)) && (cp->data <= mt_min_slots[wr_mas->type])) { rebalance_sib(wr_mas->mas, sib); cp->data += sib->end + 1; - } else { - sib->end = 0; } } /* @@ -3135,7 +3272,7 @@ void multi_dst_setup(struct maple_copy *cp, struct ma_state *mas, cp->split = (cp->data - 1) / 2; cp->d_count = 2; - if (cp->data - 1 < mt_slots[mt] * 2) { + if (cp->data <= mt_slots[mt] * 2) { unsigned char off; unsigned char s; @@ -3153,11 +3290,10 @@ void multi_dst_setup(struct maple_copy *cp, struct ma_state *mas, unsigned char s_off; s_off = cp->src[s].end - cp->src[s].start; - if (s) - off--; if (s_off >= off) break; + s_off++; off -= s_off; } off += cp->src[s].start; @@ -3455,16 +3591,17 @@ static inline bool rebalance_ascend(struct maple_copy *cp, { struct ma_state *mas; unsigned long min; - struct ma_state *l, *r; + struct ma_state *r; mas = wr_mas->mas; - if (sib->min > mas->max) { /* Move right succeeded */ + if (!sib->end) { + min = mas->min; + r = mas; + } else if (sib->min > mas->max) { /* Move right succeeded */ min = mas->min; - l = mas; r = sib; } else { min = sib->min; - l = sib; r = mas; } @@ -4495,6 +4632,64 @@ static inline void mas_wr_append(struct ma_wr_state *wr_mas, return; } +static bool split_ascend(struct maple_copy *cp, + struct ma_wr_state *wr_mas, struct ma_state *sib) +{ + struct ma_state *mas; + unsigned long min, max; + + mas = wr_mas->mas; + min = mas->min; /* push right, or normal split */ + max = mas->max; + if (sib->end) { + if (sib->max < mas->min) { + min = sib->min; /* push left */ + } else { + max = sib->max; /* push right */ + } + } + + cp_dst_to_slots(cp, min, max, mas); + if (!cp->min && cp->max == ULONG_MAX) { + rebalance_new_root(cp, mas); + return false; + } + + if (cp->d_count == 1 && !sib->end) { + cp->dst[0].node->parent = ma_parent_ptr(mas_mn(mas)->parent); + return false; + } + + cp->height++; + mas_ascend(mas); + wr_mas_setup(wr_mas, mas); + wr_mas->offset_end = mas->offset; + if (sib->end) { + if (sib->min == min) + wr_mas->mas->offset--; + else + wr_mas->offset_end++; + } + + return true; +} + +static void split_data_calc(struct maple_copy *cp, + struct ma_wr_state *wr_mas, struct ma_state *sib) +{ + cp_data_calc(cp, wr_mas, wr_mas); + if (cp->data <= mt_slots[wr_mas->type]) { + MAS_BUG_ON(wr_mas->mas, cp->height == 1); + sib->end = 0; + return; + } + + push_data_sib(cp, wr_mas->mas, sib); + if (sib->end) { + cp->data += sib->end + 1; + } +} + /* * mas_wr_split() - Expand one node into two * @wr_mas: The write maple state @@ -4509,6 +4704,24 @@ static noinline_for_kasan void mas_wr_split(struct ma_wr_state *wr_mas) trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry); + //mt_dump(wr_mas->mas->tree, mt_dump_hex); + mas = wr_mas->mas; + cp_leaf_init(&cp, mas, wr_mas, wr_mas); + do { + split_data_calc(&cp, wr_mas, &sib); + multi_src_setup(&cp, wr_mas, wr_mas, &sib); + multi_dst_setup(&cp, mas, wr_mas->type); + cp_data_write(&cp, mas); + } while (split_ascend(&cp, wr_mas, &sib)); + + old_enode = mas->node; + mas->node = cp.slot[0]; + mas_wmb_replace(mas, old_enode, cp.height); + mtree_range_walk(mas); + + //mt_dump(wr_mas->mas->tree, mt_dump_hex); + mt_validate(wr_mas->mas->tree); + #else struct maple_big_node b_node; -- 2.51.0