From: Liam R. Howlett Date: Mon, 4 Nov 2024 15:32:03 +0000 (-0500) Subject: maple_tree: Create new mas_wr_split() X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=0cd9bc7c3e8c40bdcea1f7c78be3ee88b9e2ad71;p=users%2Fjedix%2Flinux-maple.git maple_tree: Create new mas_wr_split() Stop using the large struct big_node and use logic with two allocated nodes. Signed-off-by: Liam R. Howlett --- diff --git a/lib/maple_tree.c b/lib/maple_tree.c index d0ae808f3a149..ff3f04a2bce1a 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -486,6 +486,43 @@ enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode) return 0; } +/* + * mte_set_parent() - Set the parent node and encode the slot + * @enode: The encoded maple node. + * @parent: The encoded maple node that is the parent of @enode. + * @slot: The slot that @enode resides in @parent. + * + * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the + * parent type. + */ +static inline +void mte_set_parent(struct maple_enode *enode, + const struct maple_enode *parent, unsigned char slot) +{ + unsigned long val = (unsigned long)parent; + unsigned long shift; + unsigned long type; + enum maple_type p_type = mte_node_type(parent); + + switch (p_type) { + case maple_range_64: + case maple_arange_64: + shift = MAPLE_PARENT_SLOT_SHIFT; + type = MAPLE_PARENT_RANGE64; + break; + default: + case maple_dense: + case maple_leaf_64: + shift = type = 0; + break; + } + + //printk("\t\t\tset %p -> parent %p\n", enode, parent); + val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */ + val |= (slot << shift) | type; + mte_to_node(enode)->parent = ma_parent_ptr(val); +} + /* * mas_set_parent() - Set the parent node and encode the slot * @mas: The maple state @@ -1678,6 +1715,7 @@ static inline void mas_update_gap(struct ma_state *mas) if (mte_is_root(mas->node)) return; + //printk("Updating gap for %p\n", mas->node); max_gap = mas_max_gap(mas); pslot = mte_parent_slot(mas->node); @@ -1777,7 +1815,9 @@ static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child) end = ma_data_end(node, mt, pivots, mas->max); for (offset = mas->offset; offset <= end; offset++) { entry = mas_slot_locked(mas, slots, offset); + //printk("check %p[%u/%u] %p\n", mas->node, end, offset, entry); if (mte_parent(entry) == node) { + //printk(" found entry at %u\n", offset); *child = *mas; mas->offset = offset + 1; child->offset = offset; @@ -1900,18 +1940,7 @@ static inline int mab_calc_split(struct ma_state *mas, split = b_end / 3; *mid_split = split * 2; } else { - slot_min = mt_min_slots[bn->type]; - *mid_split = 0; - /* - * Avoid having a range less than the slot count unless it - * causes one node to be deficient. - * NOTE: mt_min_slots is 1 based, b_end and split are zero. - */ - while ((split < slot_count - 1) && - ((bn->pivot[split] - min) < slot_count - 1) && - (b_end - split > slot_min)) - split++; } /* Avoid ending a node on a NULL entry */ @@ -2594,6 +2623,7 @@ static inline void mas_topiary_replace(struct ma_state *mas, tmp[i] = tmp_next[i]; } + //printk("Collect discarded\n"); /* Collect the old nodes that need to be discarded */ if (mte_is_leaf(old_enode)) return mas_free(mas, old_enode); @@ -2971,6 +3001,7 @@ static inline void mas_rebalance(struct ma_state *mas, MA_STATE(l_mas, mas->tree, mas->index, mas->last); MA_STATE(r_mas, mas->tree, mas->index, mas->last); + //printk("Rebalance\n"); trace_ma_op(__func__, mas); /* @@ -3125,263 +3156,772 @@ done: mas_update_gap(mas); } +struct ma_node_part { + unsigned char size; + unsigned char pos; + unsigned char dst_max_off; + unsigned long pivots[3]; + void *slots[3]; + unsigned long gaps[2]; + bool unfinished; +}; + +struct ma_node_state { + struct maple_node *node; + struct maple_enode *enode; + unsigned long min, max; + unsigned long max_gap; + void __rcu **slots; + unsigned long *pivots; + unsigned long *gaps; + unsigned char offset; /* Current operating offset */ + unsigned char insert; + enum maple_type type; +}; + +static inline +void mns_node_part_leaf_init(struct ma_node_part *ma_part, + struct ma_wr_state *wr_mas) +{ + ma_part->pos = 0; + ma_part->size = 0; + //printk("%s: %lx - %lx store %lx - %lx\n", __func__, + // wr_mas->r_min, wr_mas->r_max, + // wr_mas->mas->index, wr_mas->mas->last); + if (wr_mas->r_min < wr_mas->mas->index) { + ma_part->pivots[0] = wr_mas->mas->index - 1; + ma_part->slots[0] = wr_mas->content; + ma_part->size++; + } + + ma_part->pivots[ma_part->size] = wr_mas->mas->last; + ma_part->slots[ma_part->size] = wr_mas->entry; + ma_part->size++; + + if (wr_mas->r_max > wr_mas->mas->last) { + ma_part->pivots[ma_part->size] = wr_mas->r_max; + ma_part->slots[ma_part->size] = wr_mas->content; + ma_part->size++; + } + + ma_part->unfinished = false; + ma_part->dst_max_off = 255; +} + +static inline +void mns_node_part_init(struct ma_node_part *ma_part, + struct ma_node_state *left, struct ma_node_state *right) +{ + ma_part->slots[0] = left->enode; + ma_part->pivots[0] = left->max; + ma_part->gaps[0] = left->max_gap; + + ma_part->slots[1] = right->enode; + ma_part->pivots[1] = right->max; + ma_part->gaps[1] = right->max_gap; + + ma_part->pos = 0; + ma_part->size = 2; + ma_part->unfinished = false; + ma_part->dst_max_off = 255; +} + +static inline +void mns_insert_part(struct ma_node_part *part, + struct ma_node_state *dst) +{ + //printk("insert pos %u/%u %u/%u\n", part->pos, part->size, + // dst->offset, part->dst_max_off); + + while (dst->offset < mt_slots[dst->type]) { + //printk("Store part %u into %u %p\n", part->pos, dst->offset, part->slots[part->pos]); + dst->slots[dst->offset] = part->slots[part->pos]; + if (dst->gaps) + dst->gaps[dst->offset] = part->gaps[part->pos]; + + if (!ma_is_leaf(dst->type)) + mte_set_parent(part->slots[part->pos], + dst->enode, dst->offset); + + if (dst->offset < mt_pivots[dst->type]) + dst->pivots[dst->offset] = part->pivots[part->pos]; + //printk ("offset %lx\n", part->pivots[part->pos]); + + dst->offset++; + dst->max = part->pivots[part->pos]; + //printk("Offset is %u, use max for pivot\n", dst->offset); + part->pos++; + //printk("dst offset is %u\n", dst->offset); + if (part->pos >= part->size) { + //printk("pos >= size\n"); + part->unfinished = false; + return; /* Nothing to do */ + } + + if (dst->offset > part->dst_max_off) { + //printk("push part to next node\n"); + /* push to next node */ + part->unfinished = true; + return; + } + //printk("dst offset is %u max is %u\n", dst->offset, part->dst_max_off); + + } + + //printk("OUT OF ROOM??\n"); + /* Out of room.. */ + //WARN_ON_ONCE(1); + part->unfinished = true; +} + +static inline +void _mns_node_init(struct ma_node_state *mns, struct maple_node *node, + enum maple_type type) +{ + mns->node = node; + mns->type = type; + mns->max_gap = 0; + mns->offset = 0; + mns->slots = ma_slots(node, type); + mns->pivots = ma_pivots(node, type); + mns->gaps = ma_gaps(node, type); + mns->alloc = false; +} + +static inline +void mns_node_init(struct ma_node_state *mns, struct maple_node *node, + enum maple_type type) +{ + _mns_node_init(mns, node, type); + mns->enode = mt_mk_node(node, type); +} + +static inline +void mns_mas_init(struct ma_node_state *mns, struct ma_state *mas) +{ + struct maple_node *node = mte_to_node(mas->node); + enum maple_type type = mte_node_type(mas->node); + + _mns_node_init(mns, node, type); + mns->enode = mas->node; + mns->insert = mas->offset; +} + /* - * mas_split_final_node() - Split the final node in a subtree operation. - * @mast: the maple subtree state - * @mas: The maple state - * @height: The height of the tree in case it's a new root. + * @src: The maple node state of the source + * @dst: The maple node state of the destination + * @len: The number of offsets to copy + * */ -static inline void mas_split_final_node(struct maple_subtree_state *mast, - struct ma_state *mas, int height) +static inline void mns_cp(struct ma_node_state *src, struct ma_node_state *dst, + unsigned char len) { - struct maple_enode *ancestor; + unsigned long max; + size_t size; + + //printk("Cp %p %u-%u\n", dst->node, dst->offset, dst->offset + len - 1); + //printk("src %p %u-%u\n", src->node, src->offset, src->offset + len - 1); + size = len * sizeof(void *); + //printk("Copy %lu (%lu)\n", size, len); + memcpy(dst->slots + dst->offset, src->slots + src->offset, size); + + size = len * sizeof(unsigned long); + if (src->gaps) + memcpy(dst->gaps + dst->offset, src->gaps + src->offset, size); + + BUG_ON(src->offset + len > mt_slots[src->type]); + if (src->offset + len > mt_pivots[src->type]) { + size = mt_pivots[src->type] - src->offset; + max = src->max; + //printk("Avoid overflow, use max %lx\n", max); + } else { + size = len; + max = src->pivots[src->offset + len - 1]; + //printk("use max %lx\n", max); + } - if (mte_is_root(mas->node)) { - if (mt_is_alloc(mas->tree)) - mast->bn->type = maple_arange_64; - else - mast->bn->type = maple_range_64; - mas->depth = height; + if (dst->offset + len > mt_pivots[dst->type]) { + size = mt_pivots[dst->type] - dst->offset; + //printk("Avoid overflow, SET max %lx\n", max); + } else { + //printk("Set piv %u to %lx\n", dst->offset + len - 1, max); + dst->pivots[dst->offset + len - 1] = max; } - /* - * Only a single node is used here, could be root. - * The Big_node data should just fit in a single node. - */ - ancestor = mas_new_ma_node(mas, mast->bn); - mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset); - mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset); - mte_to_node(ancestor)->parent = mas_mn(mas)->parent; - mast->l->node = ancestor; - mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true); - mas->offset = mast->bn->b_end - 1; + size *= sizeof(unsigned long); + memcpy(dst->pivots + dst->offset, src->pivots + src->offset, size); + dst->max = max; + dst->offset += len; + src->offset += len; } /* - * mast_fill_bnode() - Copy data into the big node in the subtree state - * @mast: The maple subtree state - * @mas: the maple state - * @skip: The number of entries to skip for new nodes insertion. + * + * Zero any area that needs to be zeroed and set the metadata. + * metadata needs the largest gap for non-leaves. */ -static inline void mast_fill_bnode(struct maple_subtree_state *mast, - struct ma_state *mas, - unsigned char skip) +static inline void mns_finalise(struct ma_node_state *p) { - bool cp = true; - unsigned char split; + unsigned long max_gap; + unsigned char len; - memset(mast->bn, 0, sizeof(struct maple_big_node)); + //printk("%s: offset is %u range %lx - %lx\n", __func__, + // p->offset, p->min, p->max); + len = mt_slots[p->type] - p->offset; - if (mte_is_root(mas->node)) { - cp = false; + //printk("len is %u %u - %u\n", len, mt_slots[p->type], p->offset); + + if (len) { + //printk("zero slots %u to %u\n", p->offset, len + p->offset - 1); + memset(p->slots + p->offset, 0, len * sizeof(void *)); + + if (p->pivots && len > 1) + memset(p->pivots + p->offset, 0, + (len - 1) * sizeof(unsigned long)); + } + + //printk("check %p %u gaps\n", p->node, p->type); + max_gap = 0; + if (ma_is_leaf(p->type)) { + unsigned char offset; + unsigned char i; + unsigned long gap, pstart; + + if (!p->alloc) + goto finalise_leaf; + //printk("check gaps for %p\n", p->node); + i = 0; + offset = p->offset - 2; + /* + * Check the end pivot which can only exist at the left most + * node + */ + //printk("max is %lx last slot %u\n", p->max, offset + 2); + //printk("last slot is %p\n", p->slots[offset + 1]); + if (unlikely(p->max == ULONG_MAX) && + !p->slots[offset + 1]) { + //printk("last slot\n"); + max_gap = ULONG_MAX - p->pivots[offset]; + //printk("set max gap to %lu\n", max_gap); + if (max_gap > p->pivots[offset] - p->min) + goto finalise_leaf; + } + + /* Special case the first slot before the loop */ + if (likely(!p->slots[0])) { + //printk("slot 0 is %p\n", p->slots[0]); + //printk("first slot check (%lu - %lu + 1\n", p->pivots[0], p->min); + gap = p->pivots[0] - p->min + 1; + if (gap > max_gap) + max_gap = gap; + //printk("gap is now %lu\n", max_gap); + i = 2; + } else { + i = 1; + } + + + for (; i <= offset; i++) { + /* data == no gap. */ + if (likely(p->slots[i])) + continue; + + //printk("empty slot at %u\n", i); + pstart = p->pivots[i - 1]; + gap = p->pivots[i] - pstart; + //printk("gap is %lu vs %lu\n", gap, max_gap); + if (gap > max_gap) + max_gap = gap; + + /* There cannot be two gaps in a row. */ + i++; + } +finalise_leaf: + p->max_gap = max_gap; + if (p->offset <= mt_pivots[p->type]) { + //printk("%s: set meta %u\n", __func__, p->offset - 1); + ma_set_meta(p->node, p->type, 0, p->offset - 1); + } } else { - mas_ascend(mas); - mas->offset = mte_parent_slot(mas->node); + unsigned long gap_off = 0; + //printk("gaps is %p\n", p->gaps); + if (p->gaps) { + unsigned char offset = p->offset - 1; + + //printk("go through offset %u to 0\n", offset); + memset(p->gaps + p->offset, 0, + len * sizeof(unsigned long)); + do { + if (p->gaps[offset] > max_gap) { + gap_off = offset; + max_gap = p->gaps[offset]; + } + } while (offset--); + + p->max_gap = max_gap; + //printk("max gap is %lx\n", max_gap); + //printk("%s: set meta %u\n", __func__, p->offset - 1); + ma_set_meta(p->node, p->type, gap_off, p->offset - 1); + } else if (p->offset <= mt_pivots[p->type]) { + //printk("%s: set meta %u\n", __func__, p->offset - 1); + ma_set_meta(p->node, p->type, 0, p->offset - 1); + } + } +} + +static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas) +{ + struct ma_state *mas = wr_mas->mas; + unsigned char new_end = mas->end + 2; + + new_end -= wr_mas->offset_end - mas->offset; + if (wr_mas->r_min == mas->index) + new_end--; + + if (wr_mas->end_piv == mas->last) + new_end--; + + return new_end; +} + +static inline void mas_wr_converged(struct ma_node_state *src, + struct ma_node_state *dst, struct ma_node_part *ma_part, + struct ma_state *mas, unsigned int skip) +{ + mns_node_init(dst, mas_pop_node(mas), src->type); + + if (mas->offset) + mns_cp(src, dst, mas->offset); + + mns_insert_part(ma_part, dst); + src->offset += skip; + + if (src->offset <= mas->end) + mns_cp(src, dst, mas->end - src->offset + 1); + + dst->node->parent = src->node->parent; + mns_finalise(dst); + mas_set_height(mas); +} + +static void mas_wr_split_no_null(struct ma_node_state *src, + struct ma_node_state *left, struct ma_node_state *right, + unsigned char total, struct ma_node_part *ma_part) +{ + if (!ma_is_leaf(src->type)) + return; + + if (!left->slots[left->offset - 1]) { + unsigned char min; + unsigned char end; + + end = total - left->offset; + min = mt_min_slots[right->type]; + if ((end - 1 > min) && + (left->offset < mt_slots[left->type])) { + if (ma_part->unfinished || + src->insert == src->offset) { + ma_part->dst_max_off = src->offset; + mns_insert_part(ma_part, left); + } else { + mns_cp(src, left, 1); + } + } else { + left->offset--; + right->offset++; + right->slots[0] = NULL; + if (left->offset < mt_pivots[left->type]) { + right->pivots[0] = left->pivots[left->offset]; + left->pivots[left->offset] = 0; + } else { + right->pivots[0] = left->max; + } + left->max = left->pivots[left->offset - 1]; + } } - if (cp && mast->l->offset) - mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0); + right->min = left->max + 1; +} + +static inline void mns_in_left(struct ma_node_state *src, + struct ma_node_state *left, struct ma_node_state *right, + struct ma_state *mas, unsigned char split, + unsigned char new_end, struct ma_node_part *ma_part) +{ + ma_part->dst_max_off = split; + if (mas->offset) + mns_cp(src, left, mas->offset); + + mns_insert_part(ma_part, left); + src->offset++; + if (left->offset <= split) + mns_cp(src, left, split - left->offset + 1); - split = mast->bn->b_end; - mab_set_b_end(mast->bn, mast->l, mast->l->node); - mast->r->offset = mast->bn->b_end; - mab_set_b_end(mast->bn, mast->r, mast->r->node); - if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max) - cp = false; + mas_wr_split_no_null(src, left, right, new_end, ma_part); + if (ma_part->unfinished) + mns_insert_part(ma_part, right); + + right->min = left->max + 1; + mns_cp(src, right, mas->end - src->offset + 1); +} +static inline void mns_in_right(struct ma_node_state *src, + struct ma_node_state *left, struct ma_node_state *right, + struct ma_state *mas, unsigned char split, + unsigned char new_end, struct ma_node_part *ma_part) +{ + unsigned char cp; + + cp = mas->offset - split - 1; + mns_cp(src, left, split + 1); + mas_wr_split_no_null(src, left, right, new_end, ma_part); + right->min = left->max + 1; if (cp) - mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1, - mast->bn, mast->bn->b_end); + mns_cp(src, right, cp); - mast->bn->b_end--; - mast->bn->type = mte_node_type(mas->node); + mns_insert_part(ma_part, right); + src->offset++; + if (src->offset <= mas->end) + mns_cp(src, right, mas->end - src->offset + 1); } /* - * mast_split_data() - Split the data in the subtree state big node into regular - * nodes. - * @mast: The maple subtree state - * @mas: The maple state - * @split: The location to split the big node + * mas_wr_rebalance_calc() - Try to calculate a rebalance that will work + * @data_size: The total data to be written + * @mt: The maple node types splitting the data + * + * Returns: 0 on failure, the split location otherwise. */ -static inline void mast_split_data(struct maple_subtree_state *mast, - struct ma_state *mas, unsigned char split) +static inline +unsigned char mas_wr_rebalance_calc(unsigned char data_size, + enum maple_type mt) { - unsigned char p_slot; + unsigned char space, split; + unsigned char node_size, node_min; - mab_mas_cp(mast->bn, 0, split, mast->l, true); - mte_set_pivot(mast->r->node, 0, mast->r->max); - mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false); - mast->l->offset = mte_parent_slot(mas->node); - mast->l->max = mast->bn->pivot[split]; - mast->r->min = mast->l->max + 1; - if (mte_is_leaf(mas->node)) - return; + node_min = mt_min_slots[mt]; + node_size = mt_slots[mt]; + + space = node_size * 2 - 2; + /* Greedy rebalance */ + if (space <= data_size) + return 0; + + split = node_size - 2; + if (data_size - split >= node_size) + return 0; + + if (data_size - split <= node_min) + split = (data_size + 2) / 2; - p_slot = mast->orig_l->offset; - mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node, - &p_slot, split); - mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node, - &p_slot, split); + return split; +} + +static inline +void mas_wr_ascend_init(struct ma_state *mas, + struct ma_node_state *ns) +{ + mas_ascend(mas); + mns_mas_init(ns, mas); + ns->min = mas->min; + ns->max = mas->max; } /* - * mas_push_data() - Instead of splitting a node, it is beneficial to push the - * data to the right or left node if there is room. - * @mas: The maple state - * @height: The current height of the maple state - * @mast: The maple subtree state - * @left: Push left or not. - * - * Keeping the height of the tree low means faster lookups. + * mas_wr_try_rebalance() - Try to rebalance two nodes, this may not work out. + * @src: The source node state + * @new_end: The size of the src after the insert + * @left: The new left child + * @right: The new right child + * @ma_part: The node part that will be inserted * - * Return: True if pushed, false otherwise. + * Returns: True on rebalance, false otherwise. */ -static inline bool mas_push_data(struct ma_state *mas, int height, - struct maple_subtree_state *mast, bool left) +static bool mas_wr_try_rebalance(struct ma_state *mas, + struct ma_node_state *src, unsigned char new_end, + struct ma_node_state *left, struct ma_node_state *right, + struct ma_node_part *ma_part) { - unsigned char slot_total = mast->bn->b_end; - unsigned char end, space, split; + struct ma_state tmp_mas; + struct ma_node_state src2, parent, new_parent; + struct ma_node_state *l_src, *r_src; + unsigned char l_end, r_end, mas_off; + unsigned char split, max; + unsigned char p_end, p_off; + bool left_store = false; + + /* + * It is currently not known if the rebalance can work, so this is to + * try and determine if a rebalance operation will succeed + */ - MA_STATE(tmp_mas, mas->tree, mas->index, mas->last); tmp_mas = *mas; - tmp_mas.depth = mast->l->depth; + mas_wr_ascend_init(&tmp_mas, &parent); + p_off = tmp_mas.offset; + p_end = ma_data_end(parent.node, parent.type, parent.pivots, + parent.max); + //printk("parent %p has end %u %p\n", parent.node, p_end, parent.slots[p_end]); + max = mt_slots[src->type] - 1; + if (ma_is_leaf(src->type)) + max--; + + if (!p_off) + goto try_right; + + tmp_mas.offset--; + mas_descend(&tmp_mas); + mns_mas_init(&src2, &tmp_mas); + src2.max = tmp_mas.max; + src2.min = tmp_mas.min; + src2.insert = 255; + l_end = ma_data_end(src2.node, src2.type, src2.pivots, + src2.max); + split = mas_wr_rebalance_calc(l_end + new_end, src2.type); + if (split) { + p_off--; + l_src = &src2; + r_src = src; + r_end = mas->end; + } else { + if (p_end <= p_off) + return false; - if (left && !mas_prev_sibling(&tmp_mas)) - return false; - else if (!left && !mas_next_sibling(&tmp_mas)) - return false; + mas_ascend(&tmp_mas); +try_right: + tmp_mas.offset = p_off + 1; + mas_descend(&tmp_mas); + mns_mas_init(&src2, &tmp_mas); + src2.min = tmp_mas.min; + src2.max = tmp_mas.max; + src2.insert = 255; + r_end = ma_data_end(src2.node, src2.type, + src2.pivots, src2.max); + l_end = mas->end; + split = mas_wr_rebalance_calc(r_end + new_end, src2.type); + if (!split) + return false; - end = mas_data_end(&tmp_mas); - slot_total += end; - space = 2 * mt_slot_count(mas->node) - 2; - /* -2 instead of -1 to ensure there isn't a triple split */ - if (ma_is_leaf(mast->bn->type)) - space--; + split = r_end + new_end - split; + l_src = src; + r_src = &src2; + left_store = true; + } - if (mas->max == ULONG_MAX) - space--; + /* + * At this point, the rebalance operation will succeed. + */ - if (slot_total >= space) - return false; + left->min = l_src->min; + mas_off = mas->offset; + /* + * l_src, ma_part, and r_src will be split between the new left and + * right nodes. Depending on where the split and the store offset + * (mas_off) falls within the data will determine where the new data + * will end up in the new nodes (left and right). + * + * This is further complicated by the insert potentially spanning the + * nodes and the left node ending on a NULL. If left does end in null, + * then the data is shifted forward one (if possible), or back one. + * Shifting back means copying the data to the right node. Shifting + * forward is complicated by a potential insert splitting the nodes, + * which means the new data going to the left will have to come from the + * ma_part. This is all taken care of in mas_wr_split_no_null(). + */ + if (left_store) { /* Store is targeting l_src */ + if (mas_off <= split) { /* Store will end up in left */ + if (mas_off) + mns_cp(l_src, left, mas_off); + + ma_part->dst_max_off = split; + mns_insert_part(ma_part, left); + l_src->offset++; + + if (left->offset <= split) + mns_cp(l_src, left, split - left->offset + 1); + + mas_wr_split_no_null(l_src, left, right, + r_end + new_end + 1, ma_part); + right->min = left->max + 1; + if (ma_part->unfinished) + mns_insert_part(ma_part, right); + + if (l_end >= l_src->offset) + mns_cp(l_src, right, l_end - l_src->offset + 1); + + } else { /* Store will end up in right */ + mns_cp(l_src, left, split + 1); + mas_wr_split_no_null(l_src, left, right, + r_end + new_end + 1, ma_part); + right->min = left->max + 1; + mns_cp(l_src, right, mas_off - l_src->offset); + l_src->offset++; + mns_insert_part(ma_part, right); + if (l_end >= l_src->offset) + mns_cp(l_src, right, l_end - l_src->offset + 1); + } - /* Get the data; Fill mast->bn */ - mast->bn->b_end++; - if (left) { - mab_shift_right(mast->bn, end + 1); - mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0); - mast->bn->b_end = slot_total + 1; - } else { - mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end); + mns_cp(r_src, right, r_end + 1); + } else { /* Store is targeting r_src */ + if (split <= l_end) { /* Store will end up in right */ + mns_cp(l_src, left, split + 1); + mas_wr_split_no_null(l_src, left, right, + l_end + new_end + 1, ma_part); + + mns_cp(l_src, right, l_end - l_src->offset + 1); + right->min = left->max + 1; + mns_cp(r_src, right, mas_off); + mns_insert_part(ma_part, right); + r_src->offset++; + if (r_src->offset <= r_end) + mns_cp(r_src, right, r_end - r_src->offset + 1); + + } else { /* Store will end up in left */ + unsigned char r_split; + + r_split = split - l_end - 1; + mns_cp(l_src, left, l_end + 1); + if (mas_off <= r_split) { + if (mas_off) + mns_cp(r_src, left, mas_off); + ma_part->dst_max_off = split; + mns_insert_part(ma_part, left); + r_src->offset++; + if (r_src->offset < r_split) + mns_cp(r_src, left, r_split - r_src->offset); + + mas_wr_split_no_null(r_src, left, right, + l_end + new_end + 1, ma_part); + + if (ma_part->unfinished) + mns_insert_part(ma_part, right); + + right->min = left->max + 1; + } else { + mns_cp(r_src, left, r_split + 1); + mas_wr_split_no_null(r_src, left, right, + l_end + new_end + 1, ma_part); + right->min = left->max + 1; + if (mas_off > r_src->offset) + mns_cp(r_src, right, mas_off - r_src->offset); + mns_insert_part(ma_part, right); + r_src->offset++; + } + + if (r_src->offset <= r_end) + mns_cp(r_src, right, r_end - r_src->offset + 1); + } } - /* Configure mast for splitting of mast->bn */ - split = mt_slots[mast->bn->type] - 2; - if (left) { - /* Switch mas to prev node */ - *mas = tmp_mas; - /* Start using mast->l for the left side. */ - tmp_mas.node = mast->l->node; - *mast->l = tmp_mas; - } else { - tmp_mas.node = mast->r->node; - *mast->r = tmp_mas; - split = slot_total - split; - } - split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]); - /* Update parent slot for split calculation. */ - if (left) - mast->orig_l->offset += end + 1; - - mast_split_data(mast, mas, split); - mast_fill_bnode(mast, mas, 2); - mas_split_final_node(mast, mas, height + 1); + mns_finalise(left); + mns_finalise(right); + mas_ascend(mas); + mas->end = p_end; + mas->offset = p_off; + mns_node_part_init(ma_part, left, right); + mas_wr_converged(&parent, &new_parent, ma_part, mas, /* skip = */ 2); + src->enode = parent.enode; + mas->node = new_parent.enode; return true; } /* - * mas_split() - Split data that is too big for one node into two. - * @mas: The maple state - * @b_node: The maple big node + * There is not enough room to contain the store in one node. */ -static void mas_split(struct ma_state *mas, struct maple_big_node *b_node) +static void mas_wr_split(struct ma_wr_state *wr_mas) { - struct maple_subtree_state mast; - int height = 0; - unsigned char mid_split, split = 0; - struct maple_enode *old; - - /* - * Splitting is handled differently from any other B-tree; the Maple - * Tree splits upwards. Splitting up means that the split operation - * occurs when the walk of the tree hits the leaves and not on the way - * down. The reason for splitting up is that it is impossible to know - * how much space will be needed until the leaf is (or leaves are) - * reached. Since overwriting data is allowed and a range could - * overwrite more than one range or result in changing one entry into 3 - * entries, it is impossible to know if a split is required until the - * data is examined. - * - * Splitting is a balancing act between keeping allocations to a minimum - * and avoiding a 'jitter' event where a tree is expanded to make room - * for an entry followed by a contraction when the entry is removed. To - * accomplish the balance, there are empty slots remaining in both left - * and right nodes after a split. - */ - MA_STATE(l_mas, mas->tree, mas->index, mas->last); - MA_STATE(r_mas, mas->tree, mas->index, mas->last); - MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); - MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); + struct ma_state *mas = wr_mas->mas; + struct ma_node_state src, parent, left, right; + struct ma_node_part ma_part; + int height; + unsigned char split, total; trace_ma_op(__func__, mas); - mas->depth = mas_mt_height(mas); - mast.l = &l_mas; - mast.r = &r_mas; - mast.orig_l = &prev_l_mas; - mast.orig_r = &prev_r_mas; - mast.bn = b_node; + //mt_dump(mas->tree, mt_dump_hex); + height = mas_mt_height(mas); + /* FIXME: Save this? */ + total = mas_wr_new_end(wr_mas); + split = (total + 1) / 2; + mas->depth = height; + mns_node_part_leaf_init(&ma_part, wr_mas); + + /* First split the leaves */ + mns_node_init(&left, mas_pop_node(mas), wr_mas->type); + mns_node_init(&right, mas_pop_node(mas), wr_mas->type); + mns_mas_init(&src, mas); + src.max = mas->max; + src.min = mas->min; - while (height++ <= mas->depth) { - if (mt_slots[b_node->type] > b_node->b_end) { - mas_split_final_node(&mast, mas, height); - break; - } + if (mt_is_alloc(mas->tree)) + right.alloc = left.alloc = true; - l_mas = r_mas = *mas; - l_mas.node = mas_new_ma_node(mas, b_node); - r_mas.node = mas_new_ma_node(mas, b_node); - /* - * Another way that 'jitter' is avoided is to terminate a split up early if the - * left or right node has space to spare. This is referred to as "pushing left" - * or "pushing right" and is similar to the B* tree, except the nodes left or - * right can rarely be reused due to RCU, but the ripple upwards is halted which - * is a significant savings. - */ - /* Try to push left. */ - if (mas_push_data(mas, height, &mast, true)) - break; - /* Try to push right. */ - if (mas_push_data(mas, height, &mast, false)) - break; + if (height > 1 && + mas_wr_try_rebalance(mas, &src, total, &left, &right, &ma_part)) + goto rebalanced; - split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min); - mast_split_data(&mast, mas, split); - /* - * Usually correct, mab_mas_cp in the above call overwrites - * r->max. - */ - mast.r->max = mas->max; - mast_fill_bnode(&mast, mas, 1); - prev_l_mas = *mast.l; - prev_r_mas = *mast.r; + left.min = mas->min; + right.max = mas->max; + if (split >= mas->offset) + mns_in_left(&src, &left, &right, mas, split, total, &ma_part); + else + mns_in_right(&src, &left, &right, mas, split, total, &ma_part); + + mns_finalise(&left); + mns_finalise(&right); + mns_node_part_init(&ma_part, &left, &right); + + if (height == 1) { + if (mt_is_alloc(mas->tree)) + src.type = maple_arange_64; + else + src.type = maple_range_64; + + goto new_root; + } + + //printk("%d height is %d\n", __LINE__, height); + while (--height) { + mas_wr_ascend_init(mas, &src); + mas->end = ma_data_end(src.node, src.type, src.pivots, + src.max); + total = mas->end + 1; + if (mas->end + 1 < mt_slots[src.type]) + goto converged; + + //printk("\tConsume %p type %u\n", src.node, src.type); + mns_node_init(&left, mas_pop_node(mas), src.type); + mns_node_init(&right, mas_pop_node(mas), src.type); + if ((height > 1) && + (mas_wr_try_rebalance(mas, &src, mas->end + 1, &left, + &right, &ma_part))) + goto rebalanced; + + left.min = src.min; + right.max = src.max; + split = (total + 1) / 2; + if (split >= mas->offset) + mns_in_left(&src, &left, &right, mas, split, total, &ma_part); + else + mns_in_right(&src, &left, &right, mas, split, total, &ma_part); + + mns_finalise(&left); + mns_finalise(&right); + mns_node_part_init(&ma_part, &left, &right); } - /* Set the original node as dead */ - old = mas->node; - mas->node = l_mas.node; - mas_wmb_replace(mas, old); +new_root: + /* Converged on new root */ + mas->depth++; + mas->offset = 0; + mas->end = 0; + mas_set_height(mas); +converged: + mas_wr_converged(&src, &parent, &ma_part, mas, /* skip = */ 1); + mas->node = parent.enode; +rebalanced: + mas_wmb_replace(mas, src.enode); mtree_range_walk(mas); + //mt_dump(wr_mas->mas->tree, mt_dump_hex); return; } @@ -3397,10 +3937,7 @@ static noinline_for_kasan void mas_commit_b_node(struct ma_wr_state *wr_mas, WARN_ON_ONCE(type != wr_rebalance && type != wr_split_store); - if (type == wr_rebalance) - return mas_rebalance(wr_mas->mas, b_node); - - return mas_split(wr_mas->mas, b_node); + return mas_rebalance(wr_mas->mas, b_node); } /* @@ -3982,21 +4519,6 @@ static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas) wr_mas->end_piv = wr_mas->mas->max; } -static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas) -{ - struct ma_state *mas = wr_mas->mas; - unsigned char new_end = mas->end + 2; - - new_end -= wr_mas->offset_end - mas->offset; - if (wr_mas->r_min == mas->index) - new_end--; - - if (wr_mas->end_piv == mas->last) - new_end--; - - return new_end; -} - /* * mas_wr_append: Attempt to append * @wr_mas: the maple write state @@ -4101,6 +4623,8 @@ static inline void mas_wr_store_entry(struct ma_wr_state *wr_mas) mas_wr_spanning_store(wr_mas); break; case wr_split_store: + mas_wr_split(wr_mas); + break; case wr_rebalance: mas_wr_bnode(wr_mas); break; @@ -4711,6 +5235,7 @@ again: goto retry; } + BUG_ON(mas_is_overflow(mas)); if (WARN_ON_ONCE(mas_is_overflow(mas))) return NULL; @@ -7181,6 +7706,7 @@ static void mt_dump_range64(const struct maple_tree *mt, void *entry, pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n", node, last, max, i); } + BUG_ON(1); } first = last + 1; } @@ -7345,8 +7871,10 @@ static void mas_validate_gaps(struct ma_state *mas) } } - if (gap > max_gap) + if (gap > max_gap) { max_gap = gap; + //printk("Use %p[%u] for max gap %lx\n",mas->node, i, gap); + } p_start = p_end + 1; if (p_end >= mas->max) @@ -7595,6 +8123,10 @@ void mt_validate(struct maple_tree *mt) while (!mas_is_overflow(&mas)) { MAS_WARN_ON(&mas, mte_dead_node(mas.node)); end = mas_data_end(&mas); + if (end < mt_min_slot_count(mas.node) && + (mas.max != ULONG_MAX)) + pr_err("Invalid size %u of " PTR_FMT "\n", + end, mas_mn(&mas)); if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) && (mas.max != ULONG_MAX))) { pr_err("Invalid size %u of " PTR_FMT "\n", diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index 704cb1093ae8f..396ff1176eedb 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -253,6 +253,8 @@ static noinline void __init check_rev_seq(struct maple_tree *mt, pr_info(" %s test of 0-%lu %luK in %d active (%d total)\n", __func__, max, mt_get_alloc_size()/1024, mt_nr_allocated(), mt_nr_tallocated()); + BUG_ON(mt_nr_allocated() > 76); + BUG_ON(mt_nr_tallocated() > 885); } #endif } @@ -267,6 +269,7 @@ static noinline void __init check_seq(struct maple_tree *mt, unsigned long max, mt_zero_nr_tallocated(); for (i = 0; i <= max; i++) { MT_BUG_ON(mt, mtree_insert_index(mt, i, GFP_KERNEL)); + mt_validate(mt); for (j = 0; j <= i; j++) check_index_load(mt, j); @@ -988,7 +991,7 @@ static noinline void __init check_alloc_range(struct maple_tree *mt) static noinline void __init check_ranges(struct maple_tree *mt) { - int i, val, val2; + unsigned long i, val, val2; static const unsigned long r[] = { 10, 15, 20, 25, @@ -1155,11 +1158,17 @@ static noinline void __init check_ranges(struct maple_tree *mt) val = i*5; val2 = (i+1)*5; check_store_range(mt, val, val2, xa_mk_value(val), 0); + mt_validate(mt); } + mt_validate(mt); check_store_range(mt, 2422, 2422, xa_mk_value(2422), 0); + mt_validate(mt); check_store_range(mt, 2424, 2424, xa_mk_value(2424), 0); + mt_validate(mt); check_store_range(mt, 2425, 2425, xa_mk_value(2), 0); + mt_validate(mt); check_store_range(mt, 2460, 2470, NULL, 0); + mt_validate(mt); check_store_range(mt, 2435, 2460, xa_mk_value(2435), 0); check_store_range(mt, 2461, 2470, xa_mk_value(2461), 0); mt_set_non_kernel(0); @@ -1174,6 +1183,7 @@ static noinline void __init check_ranges(struct maple_tree *mt) val = i * 5 + 1; val2 = val + 4; check_store_range(mt, val, val2, xa_mk_value(val), 0); + mt_validate(mt); } /* Append to the last range without touching any boundaries */ @@ -1181,6 +1191,7 @@ static noinline void __init check_ranges(struct maple_tree *mt) val = val2 + 5; val2 = val + 4; check_store_range(mt, val, val2, xa_mk_value(val), 0); + mt_validate(mt); } /* Append to the end of last range */ @@ -1189,6 +1200,7 @@ static noinline void __init check_ranges(struct maple_tree *mt) val += 5; MT_BUG_ON(mt, mtree_test_store_range(mt, val, ULONG_MAX, xa_mk_value(val)) != 0); + mt_validate(mt); } /* Overwriting the range and over a part of the next range */ @@ -1196,6 +1208,7 @@ static noinline void __init check_ranges(struct maple_tree *mt) val = i * 5 + 1; val2 = val + 5; check_store_range(mt, val, val2, xa_mk_value(val), 0); + mt_validate(mt); } /* Overwriting a part of the range and over the next range */ @@ -1203,6 +1216,7 @@ static noinline void __init check_ranges(struct maple_tree *mt) val2 = i * 5; val = val2 - 5; check_store_range(mt, val, val2, xa_mk_value(val), 0); + mt_validate(mt); } /* @@ -1213,6 +1227,7 @@ static noinline void __init check_ranges(struct maple_tree *mt) val = i * 5 - 5; val2 = i * 5 + 1; check_store_range(mt, val, val2, xa_mk_value(val), 0); + mt_validate(mt); } /* @@ -1224,6 +1239,7 @@ static noinline void __init check_ranges(struct maple_tree *mt) val = i * 5 - 5; val2 = i * 5 + 1; check_store_range(mt, val, val2, xa_mk_value(val), 0); + //mt_dump(mt, mt_dump_hex); } MT_BUG_ON(mt, !mt_height(mt)); @@ -1285,14 +1301,22 @@ static noinline void __init check_ranges(struct maple_tree *mt) MT_BUG_ON(mt, mt_height(mt) >= 4); } /* Cause a 3 child split all the way up the tree. */ - for (i = 5; i < 215; i += 10) + for (i = 5; i < 215; i += 10) { check_store_range(mt, 11450 + i, 11450 + i + 1, NULL, 0); - for (i = 5; i < 65; i += 10) + mt_validate(mt); + } + for (i = 5; i < 65; i += 10) { check_store_range(mt, 11770 + i, 11770 + i + 1, NULL, 0); + mt_validate(mt); + MT_BUG_ON(mt, mt_height(mt) >= 4); + } MT_BUG_ON(mt, mt_height(mt) >= 4); - for (i = 5; i < 45; i += 10) + for (i = 5; i < 45; i += 10) { check_store_range(mt, 11700 + i, 11700 + i + 1, NULL, 0); + mt_validate(mt); + } + if (!MAPLE_32BIT) MT_BUG_ON(mt, mt_height(mt) < 4); mtree_destroy(mt); @@ -1303,18 +1327,34 @@ static noinline void __init check_ranges(struct maple_tree *mt) val = i*10; val2 = (i+1)*10; check_store_range(mt, val, val2, xa_mk_value(val), 0); + mt_validate(mt); MT_BUG_ON(mt, mt_height(mt) >= 4); } + + val = 7660; /* Fill parents and leaves before split. */ - for (i = 5; i < 455; i += 10) - check_store_range(mt, 7800 + i, 7800 + i + 1, NULL, 0); + for (i = 5; i < 490; i += 5) { + val += 5; + check_store_range(mt, val, val + 1, NULL, 0); + mt_validate(mt); + MT_BUG_ON(mt, mt_height(mt) >= 4); + } - for (i = 1; i < 16; i++) - check_store_range(mt, 8185 + i, 8185 + i + 1, - xa_mk_value(8185+i), 0); - MT_BUG_ON(mt, mt_height(mt) >= 4); + + val = 9460; + /* Fill parents and leaves before split. */ + for (i = 1; i < 10; i++) { + val++; + check_store_range(mt, val, val + 1, xa_mk_value(val), 0); + mt_validate(mt); + MT_BUG_ON(mt, mt_height(mt) >= 4); + } + check_store_range(mt, 8001, 8001, xa_mk_value(8001), 0); + check_store_range(mt, 8002, 8002, xa_mk_value(8002), 0); + check_store_range(mt, 8081, 8081, xa_mk_value(8081), 0); + check_store_range(mt, 8082, 8082, xa_mk_value(8082), 0); /* triple split across multiple levels. */ - check_store_range(mt, 8184, 8184, xa_mk_value(8184), 0); + check_store_range(mt, 8099, 8100, xa_mk_value(8100), 0); if (!MAPLE_32BIT) MT_BUG_ON(mt, mt_height(mt) != 4); } @@ -1961,6 +2001,7 @@ static noinline void __init check_forking(void) for (i = 0; i <= nr_entries; i++) { mas_set_range(&mas, i*10, i*10 + 5); mas_store_gfp(&mas, xa_mk_value(i), GFP_KERNEL); + mt_validate(&mt); } down_write_nested(&newmt_lock, SINGLE_DEPTH_NESTING); @@ -1971,8 +2012,10 @@ static noinline void __init check_forking(void) } mas_set(&newmas, 0); - mas_for_each(&newmas, val, ULONG_MAX) + mas_for_each(&newmas, val, ULONG_MAX) { mas_store(&newmas, val); + mt_validate(&newmt); + } mas_destroy(&newmas); mas_destroy(&mas); @@ -1989,9 +2032,11 @@ static noinline void __init check_iteration(struct maple_tree *mt) void *val; MA_STATE(mas, mt, 0, 0); - for (i = 0; i <= nr_entries; i++) + for (i = 0; i <= nr_entries; i++) { mtree_store_range(mt, i * 10, i * 10 + 9, xa_mk_value(i), GFP_KERNEL); + mt_validate(mt); + } mt_set_non_kernel(99999); @@ -2025,8 +2070,10 @@ static noinline void __init check_iteration(struct maple_tree *mt) } else { i++; } + mt_validate(mt); } val = mas_find(&mas, ULONG_MAX); + mt_validate(mt); MT_BUG_ON(mt, val != xa_mk_value(i)); mas_set(&mas, 0); @@ -2978,6 +3025,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) mas_store_gfp(&mas, (void *)size, GFP_KERNEL); mas_unlock(&mas); mas_reset(&mas); + mt_validate(mt); } } @@ -2990,6 +3038,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) /* Fill a depth 3 node to the maximum */ for (unsigned long i = 629440511; i <= 629440800; i += 6) mtree_store_range(mt, i, i + 5, (void *)i, GFP_KERNEL); + mt_validate(mt); /* Make space in the second-last depth 4 node */ mtree_erase(mt, 631668735); /* Make space in the last depth 4 node */ diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index bc30050227fda..b2732810cdea8 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -35450,7 +35450,7 @@ static void check_dfs_preorder(struct maple_tree *mt) count++; mas_dfs_preorder(&mas); } while (!mas_is_none(&mas)); - /*printk("count %lu\n", count); */ + //printk("count %lu vs %lu\n", count, e); MT_BUG_ON(mt, count != e); mtree_destroy(mt); @@ -36332,7 +36332,6 @@ static inline int check_vma_modification(struct maple_tree *mt) __mas_set_range(&mas, 0x7ffde4ca2000, 0x7ffffffff000 - 1); mas_preallocate(&mas, NULL, GFP_KERNEL); mas_store_prealloc(&mas, NULL); - mt_dump(mt, mt_dump_hex); mas_destroy(&mas); mtree_unlock(mt); @@ -36539,12 +36538,12 @@ static void regression_tests(void) void maple_tree_tests(void) { + maple_tree_seed(); + maple_tree_harvest(); #if !defined(BENCH) regression_tests(); farmer_tests(); #endif - maple_tree_seed(); - maple_tree_harvest(); } int __weak main(void)