return 0;
}
+/*
+ * mte_set_parent() - Set the parent node and encode the slot
+ * @enode: The encoded maple node.
+ * @parent: The encoded maple node that is the parent of @enode.
+ * @slot: The slot that @enode resides in @parent.
+ *
+ * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the
+ * parent type.
+ */
+static inline
+void mte_set_parent(struct maple_enode *enode,
+ const struct maple_enode *parent, unsigned char slot)
+{
+ unsigned long val = (unsigned long)parent;
+ unsigned long shift;
+ unsigned long type;
+ enum maple_type p_type = mte_node_type(parent);
+
+ switch (p_type) {
+ case maple_range_64:
+ case maple_arange_64:
+ shift = MAPLE_PARENT_SLOT_SHIFT;
+ type = MAPLE_PARENT_RANGE64;
+ break;
+ default:
+ case maple_dense:
+ case maple_leaf_64:
+ shift = type = 0;
+ break;
+ }
+
+ //printk("\t\t\tset %p -> parent %p\n", enode, parent);
+ val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */
+ val |= (slot << shift) | type;
+ mte_to_node(enode)->parent = ma_parent_ptr(val);
+}
+
/*
* mas_set_parent() - Set the parent node and encode the slot
* @mas: The maple state
if (mte_is_root(mas->node))
return;
+ //printk("Updating gap for %p\n", mas->node);
max_gap = mas_max_gap(mas);
pslot = mte_parent_slot(mas->node);
end = ma_data_end(node, mt, pivots, mas->max);
for (offset = mas->offset; offset <= end; offset++) {
entry = mas_slot_locked(mas, slots, offset);
+ //printk("check %p[%u/%u] %p\n", mas->node, end, offset, entry);
if (mte_parent(entry) == node) {
+ //printk(" found entry at %u\n", offset);
*child = *mas;
mas->offset = offset + 1;
child->offset = offset;
tmp[i] = tmp_next[i];
}
+ //printk("Collect discarded\n");
/* Collect the old nodes that need to be discarded */
if (mte_is_leaf(old_enode))
return mas_free(mas, old_enode);
MA_STATE(l_mas, mas->tree, mas->index, mas->last);
MA_STATE(r_mas, mas->tree, mas->index, mas->last);
+ //printk("Rebalance\n");
trace_ma_op(__func__, mas);
/*
mas_update_gap(mas);
}
+struct ma_node_part {
+ unsigned char size;
+ unsigned char pos;
+ unsigned char dst_max_off;
+ unsigned long pivots[3];
+ void *slots[3];
+ unsigned long gaps[2];
+ bool unfinished;
+};
+
+struct ma_node_state {
+ struct maple_node *node;
+ struct maple_enode *enode;
+ unsigned long min, max;
+ unsigned long max_gap;
+ void __rcu **slots;
+ unsigned long *pivots;
+ unsigned long *gaps;
+ unsigned char offset; /* Current operating offset */
+ unsigned char insert;
+ enum maple_type type;
+};
+
+static inline
+void mns_node_part_leaf_init(struct ma_node_part *ma_part,
+ struct ma_wr_state *wr_mas)
+{
+ ma_part->pos = 0;
+ ma_part->size = 0;
+ //printk("%s: %lx - %lx store %lx - %lx\n", __func__,
+ // wr_mas->r_min, wr_mas->r_max,
+ // wr_mas->mas->index, wr_mas->mas->last);
+ if (wr_mas->r_min < wr_mas->mas->index) {
+ ma_part->pivots[0] = wr_mas->mas->index - 1;
+ ma_part->slots[0] = wr_mas->content;
+ ma_part->size++;
+ }
+
+ ma_part->pivots[ma_part->size] = wr_mas->mas->last;
+ ma_part->slots[ma_part->size] = wr_mas->entry;
+ ma_part->size++;
+
+ if (wr_mas->r_max > wr_mas->mas->last) {
+ ma_part->pivots[ma_part->size] = wr_mas->r_max;
+ ma_part->slots[ma_part->size] = wr_mas->content;
+ ma_part->size++;
+ }
+
+ ma_part->unfinished = false;
+ ma_part->dst_max_off = 255;
+}
+
+static inline
+void mns_node_part_init(struct ma_node_part *ma_part,
+ struct ma_node_state *left, struct ma_node_state *right)
+{
+ ma_part->slots[0] = left->enode;
+ ma_part->pivots[0] = left->max;
+ ma_part->gaps[0] = left->max_gap;
+
+ ma_part->slots[1] = right->enode;
+ ma_part->pivots[1] = right->max;
+ ma_part->gaps[1] = right->max_gap;
+
+ ma_part->pos = 0;
+ ma_part->size = 2;
+ ma_part->unfinished = false;
+ ma_part->dst_max_off = 255;
+}
+
+static inline
+void mns_insert_part(struct ma_node_part *part,
+ struct ma_node_state *dst)
+{
+ //printk("insert pos %u/%u %u/%u\n", part->pos, part->size,
+ // dst->offset, part->dst_max_off);
+
+ while (dst->offset < mt_slots[dst->type]) {
+ //printk("Store part %u into %u %p\n", part->pos, dst->offset, part->slots[part->pos]);
+ dst->slots[dst->offset] = part->slots[part->pos];
+ if (dst->gaps)
+ dst->gaps[dst->offset] = part->gaps[part->pos];
+
+ if (!ma_is_leaf(dst->type))
+ mte_set_parent(part->slots[part->pos],
+ dst->enode, dst->offset);
+
+ if (dst->offset < mt_pivots[dst->type])
+ dst->pivots[dst->offset] = part->pivots[part->pos];
+ //printk ("offset %lx\n", part->pivots[part->pos]);
+
+ dst->offset++;
+ dst->max = part->pivots[part->pos];
+ //printk("Offset is %u, use max for pivot\n", dst->offset);
+ part->pos++;
+ //printk("dst offset is %u\n", dst->offset);
+ if (part->pos >= part->size) {
+ //printk("pos >= size\n");
+ part->unfinished = false;
+ return; /* Nothing to do */
+ }
+
+ if (dst->offset > part->dst_max_off) {
+ //printk("push part to next node\n");
+ /* push to next node */
+ part->unfinished = true;
+ return;
+ }
+ //printk("dst offset is %u max is %u\n", dst->offset, part->dst_max_off);
+
+ }
+
+ //printk("OUT OF ROOM??\n");
+ /* Out of room.. */
+ //WARN_ON_ONCE(1);
+ part->unfinished = true;
+}
+
+static inline
+void _mns_node_init(struct ma_node_state *mns, struct maple_node *node,
+ enum maple_type type)
+{
+ mns->node = node;
+ mns->type = type;
+ mns->max_gap = 0;
+ mns->offset = 0;
+ mns->slots = ma_slots(node, type);
+ mns->pivots = ma_pivots(node, type);
+ mns->gaps = ma_gaps(node, type);
+ mns->alloc = false;
+}
+
+static inline
+void mns_node_init(struct ma_node_state *mns, struct maple_node *node,
+ enum maple_type type)
+{
+ _mns_node_init(mns, node, type);
+ mns->enode = mt_mk_node(node, type);
+}
+
+static inline
+void mns_mas_init(struct ma_node_state *mns, struct ma_state *mas)
+{
+ struct maple_node *node = mte_to_node(mas->node);
+ enum maple_type type = mte_node_type(mas->node);
+
+ _mns_node_init(mns, node, type);
+ mns->enode = mas->node;
+ mns->insert = mas->offset;
+}
+
/*
- * mas_split_final_node() - Split the final node in a subtree operation.
- * @mast: the maple subtree state
- * @mas: The maple state
- * @height: The height of the tree in case it's a new root.
+ * @src: The maple node state of the source
+ * @dst: The maple node state of the destination
+ * @len: The number of offsets to copy
+ *
*/
-static inline void mas_split_final_node(struct maple_subtree_state *mast,
- struct ma_state *mas, int height)
+static inline void mns_cp(struct ma_node_state *src, struct ma_node_state *dst,
+ unsigned char len)
{
- struct maple_enode *ancestor;
+ unsigned long max;
+ size_t size;
+
+ //printk("Cp %p %u-%u\n", dst->node, dst->offset, dst->offset + len - 1);
+ //printk("src %p %u-%u\n", src->node, src->offset, src->offset + len - 1);
+ size = len * sizeof(void *);
+ //printk("Copy %lu (%lu)\n", size, len);
+ memcpy(dst->slots + dst->offset, src->slots + src->offset, size);
+
+ size = len * sizeof(unsigned long);
+ if (src->gaps)
+ memcpy(dst->gaps + dst->offset, src->gaps + src->offset, size);
+
+ BUG_ON(src->offset + len > mt_slots[src->type]);
+ if (src->offset + len > mt_pivots[src->type]) {
+ size = mt_pivots[src->type] - src->offset;
+ max = src->max;
+ //printk("Avoid overflow, use max %lx\n", max);
+ } else {
+ size = len;
+ max = src->pivots[src->offset + len - 1];
+ //printk("use max %lx\n", max);
+ }
- if (mte_is_root(mas->node)) {
- if (mt_is_alloc(mas->tree))
- mast->bn->type = maple_arange_64;
- else
- mast->bn->type = maple_range_64;
- mas->depth = height;
+ if (dst->offset + len > mt_pivots[dst->type]) {
+ size = mt_pivots[dst->type] - dst->offset;
+ //printk("Avoid overflow, SET max %lx\n", max);
+ } else {
+ //printk("Set piv %u to %lx\n", dst->offset + len - 1, max);
+ dst->pivots[dst->offset + len - 1] = max;
}
- /*
- * Only a single node is used here, could be root.
- * The Big_node data should just fit in a single node.
- */
- ancestor = mas_new_ma_node(mas, mast->bn);
- mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset);
- mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset);
- mte_to_node(ancestor)->parent = mas_mn(mas)->parent;
- mast->l->node = ancestor;
- mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true);
- mas->offset = mast->bn->b_end - 1;
+ size *= sizeof(unsigned long);
+ memcpy(dst->pivots + dst->offset, src->pivots + src->offset, size);
+ dst->max = max;
+ dst->offset += len;
+ src->offset += len;
}
/*
- * mast_fill_bnode() - Copy data into the big node in the subtree state
- * @mast: The maple subtree state
- * @mas: the maple state
- * @skip: The number of entries to skip for new nodes insertion.
+ *
+ * Zero any area that needs to be zeroed and set the metadata.
+ * metadata needs the largest gap for non-leaves.
*/
-static inline void mast_fill_bnode(struct maple_subtree_state *mast,
- struct ma_state *mas,
- unsigned char skip)
+static inline void mns_finalise(struct ma_node_state *p)
{
- bool cp = true;
- unsigned char split;
+ unsigned long max_gap;
+ unsigned char len;
- memset(mast->bn, 0, sizeof(struct maple_big_node));
+ //printk("%s: offset is %u range %lx - %lx\n", __func__,
+ // p->offset, p->min, p->max);
+ len = mt_slots[p->type] - p->offset;
- if (mte_is_root(mas->node)) {
- cp = false;
+ //printk("len is %u %u - %u\n", len, mt_slots[p->type], p->offset);
+
+ if (len) {
+ //printk("zero slots %u to %u\n", p->offset, len + p->offset - 1);
+ memset(p->slots + p->offset, 0, len * sizeof(void *));
+
+ if (p->pivots && len > 1)
+ memset(p->pivots + p->offset, 0,
+ (len - 1) * sizeof(unsigned long));
+ }
+
+ //printk("check %p %u gaps\n", p->node, p->type);
+ max_gap = 0;
+ if (ma_is_leaf(p->type)) {
+ unsigned char offset;
+ unsigned char i;
+ unsigned long gap, pstart;
+
+ if (!p->alloc)
+ goto finalise_leaf;
+ //printk("check gaps for %p\n", p->node);
+ i = 0;
+ offset = p->offset - 2;
+ /*
+ * Check the end pivot which can only exist at the left most
+ * node
+ */
+ //printk("max is %lx last slot %u\n", p->max, offset + 2);
+ //printk("last slot is %p\n", p->slots[offset + 1]);
+ if (unlikely(p->max == ULONG_MAX) &&
+ !p->slots[offset + 1]) {
+ //printk("last slot\n");
+ max_gap = ULONG_MAX - p->pivots[offset];
+ //printk("set max gap to %lu\n", max_gap);
+ if (max_gap > p->pivots[offset] - p->min)
+ goto finalise_leaf;
+ }
+
+ /* Special case the first slot before the loop */
+ if (likely(!p->slots[0])) {
+ //printk("slot 0 is %p\n", p->slots[0]);
+ //printk("first slot check (%lu - %lu + 1\n", p->pivots[0], p->min);
+ gap = p->pivots[0] - p->min + 1;
+ if (gap > max_gap)
+ max_gap = gap;
+ //printk("gap is now %lu\n", max_gap);
+ i = 2;
+ } else {
+ i = 1;
+ }
+
+
+ for (; i <= offset; i++) {
+ /* data == no gap. */
+ if (likely(p->slots[i]))
+ continue;
+
+ //printk("empty slot at %u\n", i);
+ pstart = p->pivots[i - 1];
+ gap = p->pivots[i] - pstart;
+ //printk("gap is %lu vs %lu\n", gap, max_gap);
+ if (gap > max_gap)
+ max_gap = gap;
+
+ /* There cannot be two gaps in a row. */
+ i++;
+ }
+finalise_leaf:
+ p->max_gap = max_gap;
+ if (p->offset <= mt_pivots[p->type]) {
+ //printk("%s: set meta %u\n", __func__, p->offset - 1);
+ ma_set_meta(p->node, p->type, 0, p->offset - 1);
+ }
} else {
- mas_ascend(mas);
- mas->offset = mte_parent_slot(mas->node);
+ unsigned long gap_off = 0;
+ //printk("gaps is %p\n", p->gaps);
+ if (p->gaps) {
+ unsigned char offset = p->offset - 1;
+
+ //printk("go through offset %u to 0\n", offset);
+ memset(p->gaps + p->offset, 0,
+ len * sizeof(unsigned long));
+ do {
+ if (p->gaps[offset] > max_gap) {
+ gap_off = offset;
+ max_gap = p->gaps[offset];
+ }
+ } while (offset--);
+
+ p->max_gap = max_gap;
+ //printk("max gap is %lx\n", max_gap);
+ //printk("%s: set meta %u\n", __func__, p->offset - 1);
+ ma_set_meta(p->node, p->type, gap_off, p->offset - 1);
+ } else if (p->offset <= mt_pivots[p->type]) {
+ //printk("%s: set meta %u\n", __func__, p->offset - 1);
+ ma_set_meta(p->node, p->type, 0, p->offset - 1);
+ }
+ }
+}
+
+static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas)
+{
+ struct ma_state *mas = wr_mas->mas;
+ unsigned char new_end = mas->end + 2;
+
+ new_end -= wr_mas->offset_end - mas->offset;
+ if (wr_mas->r_min == mas->index)
+ new_end--;
+
+ if (wr_mas->end_piv == mas->last)
+ new_end--;
+
+ return new_end;
+}
+
+static inline void mas_wr_converged(struct ma_node_state *src,
+ struct ma_node_state *dst, struct ma_node_part *ma_part,
+ struct ma_state *mas, unsigned int skip)
+{
+ mns_node_init(dst, mas_pop_node(mas), src->type);
+
+ if (mas->offset)
+ mns_cp(src, dst, mas->offset);
+
+ mns_insert_part(ma_part, dst);
+ src->offset += skip;
+
+ if (src->offset <= mas->end)
+ mns_cp(src, dst, mas->end - src->offset + 1);
+
+ dst->node->parent = src->node->parent;
+ mns_finalise(dst);
+ mas_set_height(mas);
+}
+
+static void mas_wr_split_no_null(struct ma_node_state *src,
+ struct ma_node_state *left, struct ma_node_state *right,
+ unsigned char total, struct ma_node_part *ma_part)
+{
+ if (!ma_is_leaf(src->type))
+ return;
+
+ if (!left->slots[left->offset - 1]) {
+ unsigned char min;
+ unsigned char end;
+
+ end = total - left->offset;
+ min = mt_min_slots[right->type];
+ if ((end - 1 > min) &&
+ (left->offset < mt_slots[left->type])) {
+ if (ma_part->unfinished ||
+ src->insert == src->offset) {
+ ma_part->dst_max_off = src->offset;
+ mns_insert_part(ma_part, left);
+ } else {
+ mns_cp(src, left, 1);
+ }
+ } else {
+ left->offset--;
+ right->offset++;
+ right->slots[0] = NULL;
+ if (left->offset < mt_pivots[left->type]) {
+ right->pivots[0] = left->pivots[left->offset];
+ left->pivots[left->offset] = 0;
+ } else {
+ right->pivots[0] = left->max;
+ }
+ left->max = left->pivots[left->offset - 1];
+ }
}
- if (cp && mast->l->offset)
- mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0);
+ right->min = left->max + 1;
+}
+
+static inline void mns_in_left(struct ma_node_state *src,
+ struct ma_node_state *left, struct ma_node_state *right,
+ struct ma_state *mas, unsigned char split,
+ unsigned char new_end, struct ma_node_part *ma_part)
+{
+ ma_part->dst_max_off = split;
+ if (mas->offset)
+ mns_cp(src, left, mas->offset);
- split = mast->bn->b_end;
- mab_set_b_end(mast->bn, mast->l, mast->l->node);
- mast->r->offset = mast->bn->b_end;
- mab_set_b_end(mast->bn, mast->r, mast->r->node);
- if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max)
- cp = false;
+ mns_insert_part(ma_part, left);
+ src->offset++;
+ if (left->offset <= split)
+ mns_cp(src, left, split - left->offset + 1);
+ mas_wr_split_no_null(src, left, right, new_end, ma_part);
+ if (ma_part->unfinished)
+ mns_insert_part(ma_part, right);
+
+ right->min = left->max + 1;
+ mns_cp(src, right, mas->end - src->offset + 1);
+}
+
+static inline void mns_in_right(struct ma_node_state *src,
+ struct ma_node_state *left, struct ma_node_state *right,
+ struct ma_state *mas, unsigned char split,
+ unsigned char new_end, struct ma_node_part *ma_part)
+{
+ unsigned char cp;
+
+ cp = mas->offset - split - 1;
+ mns_cp(src, left, split + 1);
+ mas_wr_split_no_null(src, left, right, new_end, ma_part);
+ right->min = left->max + 1;
if (cp)
- mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1,
- mast->bn, mast->bn->b_end);
+ mns_cp(src, right, cp);
- mast->bn->b_end--;
- mast->bn->type = mte_node_type(mas->node);
+ mns_insert_part(ma_part, right);
+ src->offset++;
+ if (src->offset <= mas->end)
+ mns_cp(src, right, mas->end - src->offset + 1);
}
/*
- * mast_split_data() - Split the data in the subtree state big node into regular
- * nodes.
- * @mast: The maple subtree state
- * @mas: The maple state
- * @split: The location to split the big node
+ * mas_wr_rebalance_calc() - Try to calculate a rebalance that will work
+ * @data_size: The total data to be written
+ * @mt: The maple node types splitting the data
+ *
+ * Returns: 0 on failure, the split location otherwise.
*/
-static inline void mast_split_data(struct maple_subtree_state *mast,
- struct ma_state *mas, unsigned char split)
+static inline
+unsigned char mas_wr_rebalance_calc(unsigned char data_size,
+ enum maple_type mt)
{
- unsigned char p_slot;
+ unsigned char space, split;
+ unsigned char node_size, node_min;
- mab_mas_cp(mast->bn, 0, split, mast->l, true);
- mte_set_pivot(mast->r->node, 0, mast->r->max);
- mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false);
- mast->l->offset = mte_parent_slot(mas->node);
- mast->l->max = mast->bn->pivot[split];
- mast->r->min = mast->l->max + 1;
- if (mte_is_leaf(mas->node))
- return;
+ node_min = mt_min_slots[mt];
+ node_size = mt_slots[mt];
+
+ space = node_size * 2 - 2;
+ /* Greedy rebalance */
+ if (space <= data_size)
+ return 0;
+
+ split = node_size - 2;
+ if (data_size - split >= node_size)
+ return 0;
+
+ if (data_size - split <= node_min)
+ split = (data_size + 2) / 2;
- p_slot = mast->orig_l->offset;
- mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node,
- &p_slot, split);
- mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node,
- &p_slot, split);
+ return split;
+}
+
+static inline
+void mas_wr_ascend_init(struct ma_state *mas,
+ struct ma_node_state *ns)
+{
+ mas_ascend(mas);
+ mns_mas_init(ns, mas);
+ ns->min = mas->min;
+ ns->max = mas->max;
}
/*
- * mas_push_data() - Instead of splitting a node, it is beneficial to push the
- * data to the right or left node if there is room.
- * @mas: The maple state
- * @height: The current height of the maple state
- * @mast: The maple subtree state
- * @left: Push left or not.
+ * mas_wr_try_rebalance() - Try to rebalance two nodes, this may not work out.
+ * @src: The source node state
+ * @new_end: The size of the src after the insert
+ * @left: The new left child
+ * @right: The new right child
+ * @ma_part: The node part that will be inserted
*
- * Keeping the height of the tree low means faster lookups.
- *
- * Return: True if pushed, false otherwise.
+ * Returns: True on rebalance, false otherwise.
*/
-static inline bool mas_push_data(struct ma_state *mas, int height,
- struct maple_subtree_state *mast, bool left)
+static bool mas_wr_try_rebalance(struct ma_state *mas,
+ struct ma_node_state *src, unsigned char new_end,
+ struct ma_node_state *left, struct ma_node_state *right,
+ struct ma_node_part *ma_part)
{
- unsigned char slot_total = mast->bn->b_end;
- unsigned char end, space, split;
+ struct ma_state tmp_mas;
+ struct ma_node_state src2, parent, new_parent;
+ struct ma_node_state *l_src, *r_src;
+ unsigned char l_end, r_end, mas_off;
+ unsigned char split, max;
+ unsigned char p_end, p_off;
+ bool left_store = false;
+
+ /*
+ * It is currently not known if the rebalance can work, so this is to
+ * try and determine if a rebalance operation will succeed
+ */
- MA_STATE(tmp_mas, mas->tree, mas->index, mas->last);
tmp_mas = *mas;
- tmp_mas.depth = mast->l->depth;
+ mas_wr_ascend_init(&tmp_mas, &parent);
+ p_off = tmp_mas.offset;
+ p_end = ma_data_end(parent.node, parent.type, parent.pivots,
+ parent.max);
+ //printk("parent %p has end %u %p\n", parent.node, p_end, parent.slots[p_end]);
+ max = mt_slots[src->type] - 1;
+ if (ma_is_leaf(src->type))
+ max--;
+
+ if (!p_off)
+ goto try_right;
+
+ tmp_mas.offset--;
+ mas_descend(&tmp_mas);
+ mns_mas_init(&src2, &tmp_mas);
+ src2.max = tmp_mas.max;
+ src2.min = tmp_mas.min;
+ src2.insert = 255;
+ l_end = ma_data_end(src2.node, src2.type, src2.pivots,
+ src2.max);
+ split = mas_wr_rebalance_calc(l_end + new_end, src2.type);
+ if (split) {
+ p_off--;
+ l_src = &src2;
+ r_src = src;
+ r_end = mas->end;
+ } else {
+ if (p_end <= p_off)
+ return false;
- if (left && !mas_prev_sibling(&tmp_mas))
- return false;
- else if (!left && !mas_next_sibling(&tmp_mas))
- return false;
+ mas_ascend(&tmp_mas);
+try_right:
+ tmp_mas.offset = p_off + 1;
+ mas_descend(&tmp_mas);
+ mns_mas_init(&src2, &tmp_mas);
+ src2.min = tmp_mas.min;
+ src2.max = tmp_mas.max;
+ src2.insert = 255;
+ r_end = ma_data_end(src2.node, src2.type,
+ src2.pivots, src2.max);
+ l_end = mas->end;
+ split = mas_wr_rebalance_calc(r_end + new_end, src2.type);
+ if (!split)
+ return false;
- end = mas_data_end(&tmp_mas);
- slot_total += end;
- space = 2 * mt_slot_count(mas->node) - 2;
- /* -2 instead of -1 to ensure there isn't a triple split */
- if (ma_is_leaf(mast->bn->type))
- space--;
+ split = r_end + new_end - split;
+ l_src = src;
+ r_src = &src2;
+ left_store = true;
+ }
- if (mas->max == ULONG_MAX)
- space--;
+ /*
+ * At this point, the rebalance operation will succeed.
+ */
- if (slot_total >= space)
- return false;
+ left->min = l_src->min;
+ mas_off = mas->offset;
+ /*
+ * l_src, ma_part, and r_src will be split between the new left and
+ * right nodes. Depending on where the split and the store offset
+ * (mas_off) falls within the data will determine where the new data
+ * will end up in the new nodes (left and right).
+ *
+ * This is further complicated by the insert potentially spanning the
+ * nodes and the left node ending on a NULL. If left does end in null,
+ * then the data is shifted forward one (if possible), or back one.
+ * Shifting back means copying the data to the right node. Shifting
+ * forward is complicated by a potential insert splitting the nodes,
+ * which means the new data going to the left will have to come from the
+ * ma_part. This is all taken care of in mas_wr_split_no_null().
+ */
+ if (left_store) { /* Store is targeting l_src */
+ if (mas_off <= split) { /* Store will end up in left */
+ if (mas_off)
+ mns_cp(l_src, left, mas_off);
+
+ ma_part->dst_max_off = split;
+ mns_insert_part(ma_part, left);
+ l_src->offset++;
+
+ if (left->offset <= split)
+ mns_cp(l_src, left, split - left->offset + 1);
+
+ mas_wr_split_no_null(l_src, left, right,
+ r_end + new_end + 1, ma_part);
+ right->min = left->max + 1;
+ if (ma_part->unfinished)
+ mns_insert_part(ma_part, right);
+
+ if (l_end >= l_src->offset)
+ mns_cp(l_src, right, l_end - l_src->offset + 1);
+
+ } else { /* Store will end up in right */
+ mns_cp(l_src, left, split + 1);
+ mas_wr_split_no_null(l_src, left, right,
+ r_end + new_end + 1, ma_part);
+ right->min = left->max + 1;
+ mns_cp(l_src, right, mas_off - l_src->offset);
+ l_src->offset++;
+ mns_insert_part(ma_part, right);
+ if (l_end >= l_src->offset)
+ mns_cp(l_src, right, l_end - l_src->offset + 1);
+ }
- /* Get the data; Fill mast->bn */
- mast->bn->b_end++;
- if (left) {
- mab_shift_right(mast->bn, end + 1);
- mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0);
- mast->bn->b_end = slot_total + 1;
- } else {
- mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end);
+ mns_cp(r_src, right, r_end + 1);
+ } else { /* Store is targeting r_src */
+ if (split <= l_end) { /* Store will end up in right */
+ mns_cp(l_src, left, split + 1);
+ mas_wr_split_no_null(l_src, left, right,
+ l_end + new_end + 1, ma_part);
+
+ mns_cp(l_src, right, l_end - l_src->offset + 1);
+ right->min = left->max + 1;
+ mns_cp(r_src, right, mas_off);
+ mns_insert_part(ma_part, right);
+ r_src->offset++;
+ if (r_src->offset <= r_end)
+ mns_cp(r_src, right, r_end - r_src->offset + 1);
+
+ } else { /* Store will end up in left */
+ unsigned char r_split;
+
+ r_split = split - l_end - 1;
+ mns_cp(l_src, left, l_end + 1);
+ if (mas_off <= r_split) {
+ if (mas_off)
+ mns_cp(r_src, left, mas_off);
+ ma_part->dst_max_off = split;
+ mns_insert_part(ma_part, left);
+ r_src->offset++;
+ if (r_src->offset < r_split)
+ mns_cp(r_src, left, r_split - r_src->offset);
+
+ mas_wr_split_no_null(r_src, left, right,
+ l_end + new_end + 1, ma_part);
+
+ if (ma_part->unfinished)
+ mns_insert_part(ma_part, right);
+
+ right->min = left->max + 1;
+ } else {
+ mns_cp(r_src, left, r_split + 1);
+ mas_wr_split_no_null(r_src, left, right,
+ l_end + new_end + 1, ma_part);
+ right->min = left->max + 1;
+ if (mas_off > r_src->offset)
+ mns_cp(r_src, right, mas_off - r_src->offset);
+ mns_insert_part(ma_part, right);
+ r_src->offset++;
+ }
+
+ if (r_src->offset <= r_end)
+ mns_cp(r_src, right, r_end - r_src->offset + 1);
+ }
}
- /* Configure mast for splitting of mast->bn */
- split = mt_slots[mast->bn->type] - 2;
- if (left) {
- /* Switch mas to prev node */
- *mas = tmp_mas;
- /* Start using mast->l for the left side. */
- tmp_mas.node = mast->l->node;
- *mast->l = tmp_mas;
- } else {
- tmp_mas.node = mast->r->node;
- *mast->r = tmp_mas;
- split = slot_total - split;
- }
- split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]);
- /* Update parent slot for split calculation. */
- if (left)
- mast->orig_l->offset += end + 1;
-
- mast_split_data(mast, mas, split);
- mast_fill_bnode(mast, mas, 2);
- mas_split_final_node(mast, mas, height + 1);
+ mns_finalise(left);
+ mns_finalise(right);
+ mas_ascend(mas);
+ mas->end = p_end;
+ mas->offset = p_off;
+ mns_node_part_init(ma_part, left, right);
+ mas_wr_converged(&parent, &new_parent, ma_part, mas, /* skip = */ 2);
+ src->enode = parent.enode;
+ mas->node = new_parent.enode;
return true;
}
/*
- * mas_split() - Split data that is too big for one node into two.
- * @mas: The maple state
- * @b_node: The maple big node
+ * There is not enough room to contain the store in one node.
*/
-static void mas_split(struct ma_state *mas, struct maple_big_node *b_node)
+static void mas_wr_split(struct ma_wr_state *wr_mas)
{
- struct maple_subtree_state mast;
- int height = 0;
- unsigned char mid_split, split = 0;
- struct maple_enode *old;
-
- /*
- * Splitting is handled differently from any other B-tree; the Maple
- * Tree splits upwards. Splitting up means that the split operation
- * occurs when the walk of the tree hits the leaves and not on the way
- * down. The reason for splitting up is that it is impossible to know
- * how much space will be needed until the leaf is (or leaves are)
- * reached. Since overwriting data is allowed and a range could
- * overwrite more than one range or result in changing one entry into 3
- * entries, it is impossible to know if a split is required until the
- * data is examined.
- *
- * Splitting is a balancing act between keeping allocations to a minimum
- * and avoiding a 'jitter' event where a tree is expanded to make room
- * for an entry followed by a contraction when the entry is removed. To
- * accomplish the balance, there are empty slots remaining in both left
- * and right nodes after a split.
- */
- MA_STATE(l_mas, mas->tree, mas->index, mas->last);
- MA_STATE(r_mas, mas->tree, mas->index, mas->last);
- MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last);
- MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last);
+ struct ma_state *mas = wr_mas->mas;
+ struct ma_node_state src, parent, left, right;
+ struct ma_node_part ma_part;
+ int height;
+ unsigned char split, total;
trace_ma_op(__func__, mas);
- mas->depth = mas_mt_height(mas);
- mast.l = &l_mas;
- mast.r = &r_mas;
- mast.orig_l = &prev_l_mas;
- mast.orig_r = &prev_r_mas;
- mast.bn = b_node;
+ //mt_dump(mas->tree, mt_dump_hex);
+ height = mas_mt_height(mas);
+ /* FIXME: Save this? */
+ total = mas_wr_new_end(wr_mas);
+ split = (total + 1) / 2;
+ mas->depth = height;
+ mns_node_part_leaf_init(&ma_part, wr_mas);
+
+ /* First split the leaves */
+ mns_node_init(&left, mas_pop_node(mas), wr_mas->type);
+ mns_node_init(&right, mas_pop_node(mas), wr_mas->type);
+ mns_mas_init(&src, mas);
+ src.max = mas->max;
+ src.min = mas->min;
- while (height++ <= mas->depth) {
- if (mt_slots[b_node->type] > b_node->b_end) {
- mas_split_final_node(&mast, mas, height);
- break;
- }
+ if (mt_is_alloc(mas->tree))
+ right.alloc = left.alloc = true;
- l_mas = r_mas = *mas;
- l_mas.node = mas_new_ma_node(mas, b_node);
- r_mas.node = mas_new_ma_node(mas, b_node);
- /*
- * Another way that 'jitter' is avoided is to terminate a split up early if the
- * left or right node has space to spare. This is referred to as "pushing left"
- * or "pushing right" and is similar to the B* tree, except the nodes left or
- * right can rarely be reused due to RCU, but the ripple upwards is halted which
- * is a significant savings.
- */
- /* Try to push left. */
- if (mas_push_data(mas, height, &mast, true))
- break;
- /* Try to push right. */
- if (mas_push_data(mas, height, &mast, false))
- break;
+ if (height > 1 &&
+ mas_wr_try_rebalance(mas, &src, total, &left, &right, &ma_part))
+ goto rebalanced;
- split = mab_calc_split(mas, b_node, &mid_split);
- mast_split_data(&mast, mas, split);
- /*
- * Usually correct, mab_mas_cp in the above call overwrites
- * r->max.
- */
- mast.r->max = mas->max;
- mast_fill_bnode(&mast, mas, 1);
- prev_l_mas = *mast.l;
- prev_r_mas = *mast.r;
+ left.min = mas->min;
+ right.max = mas->max;
+ if (split >= mas->offset)
+ mns_in_left(&src, &left, &right, mas, split, total, &ma_part);
+ else
+ mns_in_right(&src, &left, &right, mas, split, total, &ma_part);
+
+ mns_finalise(&left);
+ mns_finalise(&right);
+ mns_node_part_init(&ma_part, &left, &right);
+
+ if (height == 1) {
+ if (mt_is_alloc(mas->tree))
+ src.type = maple_arange_64;
+ else
+ src.type = maple_range_64;
+
+ goto new_root;
+ }
+
+ //printk("%d height is %d\n", __LINE__, height);
+ while (--height) {
+ mas_wr_ascend_init(mas, &src);
+ mas->end = ma_data_end(src.node, src.type, src.pivots,
+ src.max);
+ total = mas->end + 1;
+ if (mas->end + 1 < mt_slots[src.type])
+ goto converged;
+
+ //printk("\tConsume %p type %u\n", src.node, src.type);
+ mns_node_init(&left, mas_pop_node(mas), src.type);
+ mns_node_init(&right, mas_pop_node(mas), src.type);
+ if ((height > 1) &&
+ (mas_wr_try_rebalance(mas, &src, mas->end + 1, &left,
+ &right, &ma_part)))
+ goto rebalanced;
+
+ left.min = src.min;
+ right.max = src.max;
+ split = (total + 1) / 2;
+ if (split >= mas->offset)
+ mns_in_left(&src, &left, &right, mas, split, total, &ma_part);
+ else
+ mns_in_right(&src, &left, &right, mas, split, total, &ma_part);
+
+ mns_finalise(&left);
+ mns_finalise(&right);
+ mns_node_part_init(&ma_part, &left, &right);
}
- /* Set the original node as dead */
- old = mas->node;
- mas->node = l_mas.node;
- mas_wmb_replace(mas, old);
+new_root:
+ /* Converged on new root */
+ mas->depth++;
+ mas->offset = 0;
+ mas->end = 0;
+ mas_set_height(mas);
+converged:
+ mas_wr_converged(&src, &parent, &ma_part, mas, /* skip = */ 1);
+ mas->node = parent.enode;
+rebalanced:
+ mas_wmb_replace(mas, src.enode);
mtree_range_walk(mas);
+ //mt_dump(wr_mas->mas->tree, mt_dump_hex);
return;
}
WARN_ON_ONCE(type != wr_rebalance && type != wr_split_store);
- if (type == wr_rebalance)
- return mas_rebalance(wr_mas->mas, b_node);
-
- return mas_split(wr_mas->mas, b_node);
+ return mas_rebalance(wr_mas->mas, b_node);
}
/*
wr_mas->end_piv = wr_mas->mas->max;
}
-static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas)
-{
- struct ma_state *mas = wr_mas->mas;
- unsigned char new_end = mas->end + 2;
-
- new_end -= wr_mas->offset_end - mas->offset;
- if (wr_mas->r_min == mas->index)
- new_end--;
-
- if (wr_mas->end_piv == mas->last)
- new_end--;
-
- return new_end;
-}
-
/*
* mas_wr_append: Attempt to append
* @wr_mas: the maple write state
mas_wr_spanning_store(wr_mas);
break;
case wr_split_store:
+ mas_wr_split(wr_mas);
+ break;
case wr_rebalance:
mas_wr_bnode(wr_mas);
break;
goto retry;
}
+ BUG_ON(mas_is_overflow(mas));
if (WARN_ON_ONCE(mas_is_overflow(mas)))
return NULL;
pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n",
node, last, max, i);
}
+ BUG_ON(1);
}
first = last + 1;
}
}
}
- if (gap > max_gap)
+ if (gap > max_gap) {
max_gap = gap;
+ //printk("Use %p[%u] for max gap %lx\n",mas->node, i, gap);
+ }
p_start = p_end + 1;
if (p_end >= mas->max)
while (!mas_is_overflow(&mas)) {
MAS_WARN_ON(&mas, mte_dead_node(mas.node));
end = mas_data_end(&mas);
+ if (end < mt_min_slot_count(mas.node) &&
+ (mas.max != ULONG_MAX))
+ pr_err("Invalid size %u of " PTR_FMT "\n",
+ end, mas_mn(&mas));
if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) &&
(!mte_is_root(mas.node)))) {
pr_err("Invalid size %u of " PTR_FMT "\n",