From: Liam R. Howlett Date: Sat, 10 May 2025 04:40:02 +0000 (-0400) Subject: spanning store new plan X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=a608a7e04243166824c981f241d2d203f21fda51;p=users%2Fjedix%2Flinux-maple.git spanning store new plan Signed-off-by: Liam R. Howlett --- diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 7b89feda40cc..32c29bbd941f 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -3056,7 +3056,7 @@ done: struct ma_node_part { unsigned char size; - unsigned char pos; + unsigned char offset; unsigned long pivots[3]; void *slots[3]; unsigned long gaps[2]; @@ -3077,6 +3077,7 @@ struct ma_node_info { unsigned char end; unsigned char insert_off; unsigned char offset; /* Operating position */ + bool new; }; struct ma_node_state { @@ -3101,7 +3102,7 @@ static __always_inline void mns_node_part_leaf_init(struct ma_node_part *part, struct ma_wr_state *wr_mas, struct ma_node_info *src) { - part->pos = 0; + part->offset = 0; part->size = 0; if (wr_mas->r_min < wr_mas->mas->index) { part->pivots[0] = wr_mas->mas->index - 1; @@ -3131,7 +3132,7 @@ void mns_node_part_span_leaf_init(struct ma_node_part *part, struct ma_wr_state *wr_l, struct ma_wr_state *wr_r, struct ma_node_info *src) { - part->pos = 0; + part->offset = 0; part->size = 0; if (wr_l->r_min < wr_l->mas->index) { part->pivots[0] = wr_l->mas->index - 1; @@ -3170,11 +3171,32 @@ void mni_node_part_init(struct ma_node_part *part, part->size = 1; } - part->pos = 0; + part->offset = 0; part->skip = 1; part->leaf = false; } +static inline +void init_mni_from_part(struct ma_node_info *info, struct ma_node_part *part, + unsigned long min, bool is_alloc) +{ + info->node = NULL; + info->offset = 0; + info->new = true; + info->end = part->size - 1; + info->max = part->pivots[info->end]; + info->pivots = part->pivots; + info->slots = part->slots; + if (is_alloc && ma_is_leaf(info->type)) { + for(int i = 0; i < part->size; i++) { + if (!part->slots[i]) + part->gaps[i] = part->pivots[i] - min; + min = part->pivots[i]; + } + } + info->gaps = part->gaps; +} + static inline void _mni_node_init(struct ma_node_info *mni, struct maple_node *node, enum maple_type type) @@ -3185,6 +3207,7 @@ void _mni_node_init(struct ma_node_info *mni, struct maple_node *node, mni->slots = ma_slots(node, type); mni->pivots = ma_pivots(node, type); mni->gaps = ma_gaps(node, type); + mni->new = false; } static inline @@ -3230,10 +3253,10 @@ void mni_mas_init(struct ma_node_info *mni, struct ma_state *mas) mni->type = mte_node_type(mas->node); _mni_node_init(mni, mni->node, mni->type); mni->enode = mas->node; - mni->end = mas->end; mni->max = mas->max; mni->min = mas->min; mni_set_end(mni); + mni->insert_off = mas->offset; } static inline @@ -3246,6 +3269,23 @@ void mns_mni_init(struct ma_node_state *mns, struct ma_node_info *dst, mns->use_part = false; } +static inline +void mni_descend(struct ma_node_info *mni) +{ + unsigned char off = mni->insert_off; + + if (off) + mni->min = mni->pivots[off - 1] + 1; + + if (off < mt_pivots[mni->type]) + mni->max = mni->pivots[off]; + + mni->enode = mni->slots[off]; + mni->node = mte_to_node(mni->enode); + mni->type = mte_node_type(mni->enode); + _mni_node_init(mni, mni->node, mni->type); + mni_set_end(mni); +} static inline bool mns_ends_in_null(struct ma_node_state *ns) { @@ -3315,12 +3355,13 @@ struct split_data { unsigned char offset; /* Offset into destination data (entire set) */ unsigned char space; /* The space left in the current destination node */ unsigned char split; /* Proposed split of data */ + unsigned char mid; /* Proposed middle split of data */ unsigned char insert; /* Insert location of destination */ unsigned char insert_end;/* Insert end location of destination */ unsigned char new_end; /* Total data */ unsigned char src_ins_end; /* Offset into source data where the write ends */ unsigned char len; /* Number of ma_node_states in the states array */ - struct ma_node_state states[5]; + struct ma_node_state states[6]; bool is_alloc; bool left_store; }; @@ -3478,6 +3519,7 @@ void mns_assemble(struct ma_node_state *states, unsigned char len) ns->dst->min = max + 1; } last_dst = ns->dst; + printk("dst: %p\n", ns->dst->node); d_slots = ns->dst->slots + ns->dst->offset; d_piv = ns->dst->pivots + ns->dst->offset; @@ -3502,6 +3544,16 @@ void mns_assemble(struct ma_node_state *states, unsigned char len) } } else { s_slots = ns->info->slots + ns->start; + if (ns->info->new) { + printk("NEW %u: %u-%u\n", i, ns->start, size); + for (int j = 0; j > size; j++) { + struct maple_enode *child; + + child = ma_enode_ptr(ns->info->slots[j]); + mte_set_parent(child, ns->dst->enode, + j + ns->dst->offset); + } + } s_piv = ns->info->pivots + ns->start; s_gap = ns->info->gaps; if (ns->start + size > mt_pivots[ns->info->type]) { @@ -3510,8 +3562,10 @@ void mns_assemble(struct ma_node_state *states, unsigned char len) } else { max = s_piv[size - 1]; } + printk("->max is %lu\n", max); } + printk("%p <= %p + %u\n", d_slots, s_slots, size); memcpy(d_slots, s_slots, size * sizeof(void __rcu *)); if (ns->dst->gaps) { d_gap = ns->dst->gaps + ns->dst->offset; @@ -3523,6 +3577,7 @@ void mns_assemble(struct ma_node_state *states, unsigned char len) piv_overflow = 1; } else if (piv_overflow) { /* Source overflow */ *(d_piv + size - 1) = max; + printk("overflow max is %lu\n", max); } ns->dst->offset += size; @@ -3530,6 +3585,7 @@ void mns_assemble(struct ma_node_state *states, unsigned char len) size -= piv_overflow; memcpy(d_piv, s_piv, size * sizeof(unsigned long)); ns->dst->max = max; + printk("set max is %lu\n", max); } } @@ -3539,16 +3595,19 @@ static inline void mas_wr_converged(struct ma_node_info *src, { unsigned char off = 0; + printk("src insert off is %u\n", src->insert_off); sd->len = 0; mni_node_init(dst, mas_pop_node(mas), src->type); if (src->insert_off) { mns_mni_init(&sd->states[sd->len], dst, 0, src->insert_off); + printk("state %d: %p[0] + %u\n", sd->len, src->node, src->insert_off); sd->states[sd->len].info = src; sd->len++; off = src->insert_off; } mns_mni_init(&sd->states[sd->len], dst, 0, part->size); + printk("state %d: part[0] + %u\n", sd->len, part->size); sd->states[sd->len].part = part; sd->states[sd->len].use_part = true; sd->len++; @@ -3558,6 +3617,7 @@ static inline void mas_wr_converged(struct ma_node_info *src, unsigned char size; size = src->end - off + 1; + printk("state %d: %p[%u] + %u\n", sd->len, src->node, off, size); mns_mni_init(&sd->states[sd->len], dst, off, size); sd->states[sd->len].info = src; sd->len++; @@ -3587,15 +3647,15 @@ static void split_state_setup(struct ma_node_info *src, state = &sd->states[sd->len]; size = sd->space; if (sd->offset >= sd->insert && sd->offset <= sd->insert_end) { - max_copy = part->size - part->pos; + max_copy = part->size - part->offset; if (max_copy < size) size = max_copy; - printk("\tstore part: %u %u %u\n", sd->len, part->pos, size); + printk("\tstore part: %u %u %u\n", sd->len, part->offset, size); state->part = part; - mns_mni_init(state, dst, part->pos, size); + mns_mni_init(state, dst, part->offset, size); state->use_part = true; - part->pos += size; + part->offset += size; src->offset = sd->src_ins_end + 1; printk("src offset is now %u\n", src->offset); } else { @@ -3682,7 +3742,7 @@ static void split_data_by_state(struct ma_node_info *src, sd->offset--; sd->space++; if (state->use_part) { - part->pos--; + part->offset--; } else { src->offset--; } @@ -4827,16 +4887,320 @@ static inline void mas_wr_append(struct ma_wr_state *wr_mas, return; } +static int mas_prev_node(struct ma_state *mas, unsigned long min); +static int mas_next_node(struct ma_state *mas, struct maple_node *node, + unsigned long max); + + + +/* + * Append from src->offset of size len or src->end, whichever is smaller + * + * Return true if len is appended, false otherwise. + */ +static void spanning_append(struct split_data *sd, struct ma_node_info *src, + struct ma_node_info *dst, unsigned char size) +{ + struct ma_node_state *state; + + state = &sd->states[sd->len]; + printk("size %u > %u - %u + 1\n", size, src->end, src->offset); + if (size > src->end - src->offset + 1) + size = src->end - src->offset + 1; + + printk("size %u > %u - %u + 1\n", size, mt_slots[dst->type], dst->offset); + if (size > mt_slots[dst->type] - dst->offset) + size = mt_slots[dst->type] - dst->offset; + + printk("state %u: %u <= %p[%u] + %u\n", sd->len, sd->offset, src->node, src->offset, size); + state->info = src; + printk("\tdst %p\n", dst->node); + mns_mni_init(state, dst, src->offset, size); + src->offset += size; + sd->offset += size; + dst->offset += size; + sd->len++; +} + +static void dst_finalise(struct ma_node_info *dst, unsigned char count, + struct split_data *sd, struct ma_node_part *part, + unsigned char skip) +{ + + for (int i = 0; i <= count; i++) { + printk("finalise dst %u\n", i); + mni_finalise(&dst[i], sd); + part->pivots[i] = dst[i].max; + part->slots[i] = dst[i].enode; + part->gaps[i] = dst[i].max_gap; + } + part->size = count + 1; + part->offset = 0; + part->leaf = false; + part->skip = skip; +} + +static void mas_wr_spanning_store(struct ma_wr_state *wr_mas) +{ + struct ma_state *mas; + MA_STATE(r_mas, NULL, 0, 0); + MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry); + struct ma_node_info left, right, other, p; + struct ma_node_info *src[4]; + struct ma_node_info dst[3]; + struct ma_node_info parent, r_parent, new_parent; + unsigned char s, d, max_d, max_s, height = 0; + unsigned char limits[3]; + struct split_data sd; + struct ma_node_part part; + unsigned long min; + + mas = wr_mas->mas; + trace_ma_op(__func__, mas); + printk("\n\n\n\t\t\t\t%s\n", __func__); + mt_dump(mas->tree, mt_dump_dec); + printk("Storing %lu - %lu -> %p\n", mas->index, mas->last, wr_mas->entry); + + /* FIXME: Can this happen? probably not? */ + if (unlikely(!mas->index && mas->last == ULONG_MAX)) + return mas_new_root(mas, wr_mas->entry); + + r_mas = *mas; + printk("r_wr_mas\n"); + r_mas.index = r_mas.last; + mas_wr_walk_index(&r_wr_mas); + r_mas.index = r_mas.min; + r_mas.offset = 0; + mas_wr_end_piv(&r_wr_mas); + + /* Set up left side. */ + mas_wr_walk_index(wr_mas); + printk("\nwr_mas %lu\n", mas->min); + + + printk("At %p and %p\n", mas_mn(mas), mas_mn(&r_mas)); + printk("r_mas: end piv is %lu max %lu\n", r_wr_mas.end_piv, r_mas.max); + + wr_mas->end_piv = r_wr_mas.end_piv; + mni_mas_init(&left, mas); + mni_mas_init(&right, &r_mas); + right.insert_off = r_wr_mas.offset_end; + sd.len = 0; + sd.offset = 0; + sd.left_store = true; + sd.insert = mas->offset; + sd.src_ins_end = wr_mas->offset_end + mas->end + 1; + wr_mas->offset_end = r_wr_mas.offset_end + mas->end + 1; + printk("src is %p slots is %p\n", right.node, right.slots); + mns_node_part_span_leaf_init(&part, wr_mas, &r_wr_mas, &right); + do { + height++; + d = s = 0; + /* + * + * Set up sources (up to 4) + * Move mas or r_mas, if necessary + * Set up destinations (up to 3) + * Set up limits for destinations (up to 3) + * This will generate up to 6 copy segments! + * + * Iterate sd.offset until sd.new_end + * store from source up to limit or source max + * store part when sd.offset is in the correct limit + * Validate it doesn't end in NULL when switching destinations + * correct if it does + * finalise what is necessary + * Continue until parents meet with sufficient data + * + */ + /* Set up sources (up to 3) + part (always) */ + sd.new_end = r_mas.end + mas->end - part.skip + 1 + part.size; + printk("new end is %u\n", sd.new_end); + mas_wr_ascend_init(&r_mas, &r_parent); + mas_wr_ascend_init(mas, &parent); + if (sd.new_end < mt_min_slots[left.type] && + !ma_is_root(left.node) && + (left.min || right.max != ULONG_MAX)) { + /* Take in more nodes */ + printk("!!! %d min slots not met\n", __LINE__); + if (r_parent.insert_off < r_parent.end) { + src[2] = &other; + printk("%d: set src %u\n", __LINE__, 2); + mas_next_node(&r_mas, r_parent.node, ULONG_MAX); + other = r_parent; + other.insert_off++; + r_parent.insert_off++; + printk("Looks like there is a right sibling\n"); + } else if (parent.insert_off) { + printk("%d: set src %u\n", __LINE__, s); + src[0] = &other; + s++; + mas_prev_node(mas, 0); + other = parent; + other.insert_off--; + parent.insert_off--; + printk("Looks like there is a left sibling\n"); + } else if (r_parent.max > ULONG_MAX) { + printk("%d: set src %u\n", __LINE__, 2); + src[2] = &other; + mas_next_node(&r_mas, r_parent.node, ULONG_MAX); + mni_mas_init(&other, &r_mas); + r_parent.insert_off++; + printk("Looks like there is a right cousin\n"); + BUG_ON(1); + } else { + BUG_ON(!parent.min); + printk("%d: set src %u\n", __LINE__, s); + src[0] = &other; + s++; + mas_prev_node(mas, 0); + mni_mas_init(&other, &r_mas); + parent.offset--; + printk("Looks like there is a left cousin\n"); + BUG_ON(1); + } + + mni_descend(&other); + sd.new_end += other.end + 1; + printk("new end is now %u\n", sd.new_end); + } + + /* A source by any other name.. */ + if (left.insert_off) { + printk("%d: set src %u\n", __LINE__, s); + src[s] = &left; + left.end = left.insert_off - 1; + min = left.pivots[left.end]; + s++; + } else { + printk("no insert offset\n"); + min = left.min; + } + + p.type = left.type; + init_mni_from_part(&p, &part, min, sd.is_alloc); + printk("%d: set src %u NEW\n", __LINE__, s); + src[s] = &p; + + if (right.insert_off < right.end) { + s++; + printk("%d: set src %u\n", __LINE__, s); + src[s] = &right; + right.offset = right.insert_off + 1; + } + + printk("Start right at %u end is %u\n", right.offset, right.end); + + mni_node_init(&dst[d], mas_pop_node(mas), left.type); + printk("new_end %u\n", sd.new_end); + if (sd.new_end > 2 * mt_slots[left.type]) { + sd.split = (sd.new_end + 1) / 3; + sd.mid = sd.split * 2; + d++; + mni_node_init(&dst[d], mas_pop_node(mas), left.type); + d++; + mni_node_init(&dst[d], mas_pop_node(mas), left.type); + } else if (sd.new_end > mt_slots[left.type]) { + sd.split = (sd.new_end + 1) / 2; + sd.mid = sd.new_end; + d++; + mni_node_init(&dst[d], mas_pop_node(mas), left.type); + } else { + sd.split = sd.new_end; + sd.mid = sd.new_end; + } + printk("limits %u %u %u\n", sd.split, sd.mid, sd.new_end); + limits[0] = sd.split; + limits[1] = sd.mid; + limits[2] = sd.new_end; + + /* + * Now, loop through, advancing limits each time. + */ + printk("min is %lu\n", src[0]->min); + dst[0].min = src[0]->min; + dst[d].max = src[s]->max; + printk("max dst is %u\n", d); + max_d = d; + max_s = s; + s = d = 0; + printk("START\n"); + do { + printk("limits is %u/%u\n", limits[d], sd.offset); + printk("pass dst %p\n", &dst[d]); + spanning_append(&sd, src[s], &dst[d], limits[d] + 1 - sd.offset); + if (sd.offset > limits[d]) { + /* Check NULL.. */ + if (ma_is_leaf(src[s]->type) + && mns_ends_in_null(&sd.states[sd.len - 1])) { + sd.states[sd.len - 1].size--; + limits[d]--; + } + dst[d].max = src[s]->pivots[src[s]->offset - 1]; + dst[d].offset = 0; + printk("set dst %u max %lu\n", d, dst[d].max); + d++; + printk("dst ++\n"); + } + + if (src[s]->offset > src[s]->end) { + src[s]->offset = 0; + s++; + } + } while (sd.offset < sd.new_end); + + dst[max_d].offset = 0; + printk("\n\n"); + printk("parent is %p d is %u\n", parent.node, d); + mns_assemble(sd.states, sd.len); + if (ma_is_root(parent.node)) { + printk("\n\nNew root\n"); + if (!dst[0].min && dst[0].max == ULONG_MAX) { + mas->depth = height; + left.enode = mas->node; + dst[0].node->parent = parent.node->parent; + mas->node = dst[0].enode; + mas_set_height(mas); + mni_finalise(&dst[0], &sd); + goto new_root; + } + } + + dst_finalise(dst, max_d, &sd, &part, max_s); + sd.len = 0; + sd.offset = 0; + } while ((parent.node != r_parent.node) && + sd.new_end >= mt_slots[parent.type]); + + printk("\n\nDone\n"); + part.skip = r_parent.insert_off - parent.insert_off + 1; + printk("Skip %u\n", part.skip); + printk("parent end is %u\n", parent.end); + mas_wr_converged(&parent, &new_parent, &part, mas, &sd); + left.enode = parent.enode; + mas->node = new_parent.enode; + printk("replace %p with %p\n", left.enode, mas->node); +new_root: + mas_wmb_replace(mas, left.enode); + mtree_range_walk(mas); + mt_dump(mas->tree, mt_dump_dec); + mt_validate(mas->tree); +} +#if 0 static void mas_wr_spanning_store(struct ma_wr_state *wr_mas) { struct ma_state *mas; MA_STATE(r_mas, NULL, 0, 0); MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry); - struct ma_node_info src, r_src, left, right, middle; + struct ma_node_info src, r_src, other_src; + struct ma_node_info *sources[3]; + struct ma_node_info *destinations[3]; + struct ma_node_info left, right, middle; struct ma_node_info parent, r_parent, new_parent; struct ma_node_part part; struct split_data sd; - unsigned char height; + unsigned char height, s, d; + bool append, prepend; mas = wr_mas->mas; trace_ma_op(__func__, mas); @@ -4877,6 +5241,24 @@ static void mas_wr_spanning_store(struct ma_wr_state *wr_mas) printk("src is %p slots is %p\n", r_src.node, r_src.slots); mns_node_part_span_leaf_init(&part, wr_mas, &r_wr_mas, &r_src); do { + /* + * + * Set up sources (up to 4) + * Move mas or r_mas, if necessary + * Set up destinations (up to 3) + * Set up limits for destinations (up to 3) + * This will generate up to 6 copy segments! + * + * Iterate sd.offset until sd.new_end + * store from source up to limit or source max + * store part when sd.offset is in the correct limit + * Validate it doesn't end in NULL when switching destinations + * correct if it does + * finalise what is necessary + * Continue until parents meet with sufficient data + * + */ + struct ma_node_state *state; for (int i = 0; i < part.size; i++) @@ -4889,25 +5271,67 @@ static void mas_wr_spanning_store(struct ma_wr_state *wr_mas) r_src.offset = r_wr_mas.offset_end + 1; printk("new end is %u\n", sd.new_end); - mni_node_init(&left, mas_pop_node(mas), src.type); left.min = src.min; mas_wr_ascend_init(mas, &parent); + printk("parent is %p[%u]\n", parent.node, parent.insert_off); mas->end = parent.end; mas_wr_ascend_init(&r_mas, &r_parent); r_mas.end = r_parent.end; - if (sd.new_end < mt_min_slots[src.type]) { + if (sd.new_end < mt_min_slots[src.type] && + !ma_is_root(src.node)) { /* Take in more nodes */ printk("!!! %d min slots not met\n", __LINE__); - if (r_parent.insert_off < r_parent.end) + if (r_parent.insert_off < r_parent.end) { + prepend = true; + mas_next_node(&r_mas, r_parent.node, ULONG_MAX); + other_src = parent; + other_src.insert_off++; printk("Looks like there is a right sibling\n"); - else if (parent.insert_off) { + } else if (parent.insert_off) { + append = true; + mas_prev_node(mas, 0); + other_src = parent; + other_src.insert_off--; printk("Looks like there is a left sibling\n"); - } else if (r_parent.max > ULONG_MAX) + } else if (r_parent.max > ULONG_MAX) { + prepend = true; + mas_next_node(&r_mas, r_parent.node, ULONG_MAX); + mni_mas_init(&other_src, &r_mas); printk("Looks like there is a right cousin\n"); - else if (parent.min) + BUG_ON(1); + } else { + append = true; + BUG_ON(!parent.min); + mas_prev_node(mas, 0); + mni_mas_init(&other_src, &r_mas); printk("Looks like there is a left cousin\n"); + BUG_ON(1); + } + + mni_descend(&other_src); + sd.new_end += other_src.end + 1; + if (sd.new_end < mt_slots[src.type]) + sd.split = sd.new_end; + sd.split = (sd.new_end + 1) /2; + printk("new end is now %u\n", sd.new_end); + } + } + + s = 0; + if (append) { + source[s++] = &other_src; + append = false; } + source[s++] = &src; + source[s++] = &r_src; + if (prepend) { + source[s++] = &other_src; + prepend= false; + } + + d = 0; + destinations[d++] = &left; if (sd.new_end < mt_slots[src.type]) { /* Single node at this level */ printk("%d\n", __LINE__); @@ -4937,16 +5361,26 @@ static void mas_wr_spanning_store(struct ma_wr_state *wr_mas) } } else if (sd.new_end < 2 * mt_slots[src.type]) { printk("%d\n", __LINE__); + sd.split = (sd.new_end + 1) / 2; + printk("split at %u\n", sd.split); mni_node_init(&right, mas_pop_node(mas), r_src.type); - middle.node = NULL; - right.max = r_src.max; + destinations[d++] = &right; } else { printk("%d\n", __LINE__); mni_node_init(&right, mas_pop_node(mas), r_src.type); mni_node_init(&middle, mas_pop_node(mas), src.type); + destinations[d++] = &right; + destinations[d++] = &middle; right.max = r_src.max; } + destination[0].min = src[0].min; + /* Uhh */ + do { + + } while(sd.new_end <= sd.offset); + + sd.len = 0; sd.offset = 0; } while ((parent.node != r_parent.node) && @@ -4963,6 +5397,7 @@ new_root: mt_dump(mas->tree, mt_dump_dec); mt_validate(mas->tree); } +#endif /* * mas_wr_store_entry() - Internal call to store a value