From 199fcfbb12ccd45944364f3d6a877baad5b5865e Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 6 Mar 2020 13:45:54 -0500 Subject: [PATCH] maple_tree: Support freeing empty nodes on gap relocation. When a gap is created during a low memory situation, coalesce may not be able to group the gap together. This can be handled by moving the right node to the left & using a SKIP entry. Note that this situation should not arise unless in a low-memory situation, so a SKIP entry is useful as it indicates the tree may need to be rebuilt after memory pressure subsides. When an empty node is found, Signed-off-by: Liam R. Howlett --- lib/maple_tree.c | 209 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 165 insertions(+), 44 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 714c7dbb180b..f2d422c5212b 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -11,6 +11,7 @@ #include #include #include +#include #include //#include // for task_size @@ -2198,6 +2199,11 @@ static inline enum maple_type mas_determine_type(struct ma_state *mas, /** Private * mas_may_move_gap() - May move the gap to the proceeding node. + * + * 1. Check back for a gap and move it + * 2. check front for a gap. + * Ensure we cover the scenario where there is an empty node due to allocation + * failures - Move the gap wherever it can go and free this node. */ static inline void mas_may_move_gap(struct ma_state *mas) { @@ -2209,38 +2215,76 @@ static inline void mas_may_move_gap(struct ma_state *mas) unsigned char slot = mas_get_slot(mas); void *entry = NULL; void *next_entry = NULL; + bool empty = false; // node that starts with NULL MA_STATE(next, mas->tree, mas->index, mas->last); // node that ends with NULL MA_STATE(curr, mas->tree, mas->index, mas->last); + MA_STATE(parent, mas->tree, mas->index, mas->last); if (mte_is_root(mas->node)) return; mas_dup_state(&next, mas); mas_dup_state(&curr, mas); - end = mas_data_end(&curr); - if (end != slot) { - mas_prev(&curr, 0); - if (curr.node == mas->node) // prev value not in prev node. - return; + + /* Start by checking the back of the current node. */ + end = _mas_data_end(&curr, mte_node_type(curr.node), &last_piv, + &coalesce); + new_end = end; + do { + entry = mas_get_rcu_slot(&curr, new_end); + if (entry && !xa_is_deleted(entry)) + break; + } while (new_end--); + + if (new_end == UCHAR_MAX) { + new_end = 0; + empty = true; } - if (mas_is_none(&curr)) - return; + if (end == new_end) { // no gap at back. + unsigned char new_start = 0; - end = _mas_data_end(&curr, mte_node_type(curr.node), &last_piv, + do { + entry = mas_get_rcu_slot(&curr, new_start); + if (entry && !xa_is_deleted(entry)) + break; + } while (++new_start < slot); + + if (new_start == 0) + return; + + mas_set_slot(&curr, 0); + mas_prev(&curr, 0); + if (mas_is_none(&curr)) + return; + + end = _mas_data_end(&curr, mte_node_type(curr.node), &last_piv, &coalesce); - entry = mas_get_rcu_slot(&curr, end); - if (entry && (!xa_is_deleted(entry))) - return; + new_end = end; + do { + entry = mas_get_rcu_slot(&curr, new_end); + if (entry && !xa_is_deleted(entry)) + break; + } while (new_end--); - if (curr.node == mas->node) // current wasn't shifted to prev. - mas_next(&next, ULONG_MAX); + if (end == new_end) // no gap at back again. + return; - if (mas_is_none(&next)) - return; + if (new_end == UCHAR_MAX) { + new_end = 0; + empty = true; + } + } + + if (curr.node == mas->node) { // current wasn't shifted to prev. + mas_set_slot(&next, mt_slot_count(next.node) - 1); + mas_next(&next, ULONG_MAX); + if (mas_is_none(&next)) + return; + } next_entry = mas_get_rcu_slot(&next, 0); if (next_entry && !mt_will_coalesce(next_entry)) @@ -2250,10 +2294,8 @@ static inline void mas_may_move_gap(struct ma_state *mas) * start, see if there are more to shift. */ - new_end = end; - while (new_end && (!entry || mt_will_coalesce(entry))) - entry = mas_get_rcu_slot(&curr, --new_end); - + if (empty) + goto curr_is_empty; // Set the slot to 0 in case of deleted or retry. if (next_entry) mte_set_rcu_slot(next.node, 0, NULL); @@ -2261,11 +2303,9 @@ static inline void mas_may_move_gap(struct ma_state *mas) last_piv = mas_get_safe_pivot(&curr, new_end); while (new_end++ != end) { if (new_end < mt_pivot_count(curr.node)) - mte_set_pivot(curr.node, new_end, 0); + mte_set_pivot(curr.node, new_end, last_piv); // The location storing these values has moved. - // FIXME: We need to allocate a new node. - // FIXME: What if allocation fails? - mte_set_rcu_slot(curr.node, new_end, NULL); + mte_set_rcu_slot(curr.node, new_end, XA_RETRY_ENTRY); } if (curr.node == mas->node) @@ -2281,6 +2321,33 @@ static inline void mas_may_move_gap(struct ma_state *mas) mas_update_gap(&curr, false); mas_update_gap(&next, false); } + + return; + +curr_is_empty: + // Due to 0 being special, we should just move nodes to the left and + // set the right to retry. + slot = mte_parent_slot(curr.node); + mas_dup_state(&parent, &curr); + mas_ascend(&parent); + mte_set_rcu_slot(parent.node, slot, next.node); + mas_set_safe_pivot(&parent, slot, next.max); + next.min = curr.min; + curr.max = curr.min; + mas_shift_pivot(&curr, &next, next.max); + if (mt_is_alloc(mas->tree)) { + mas_update_gap(&curr, false); + mas_update_gap(&next, false); + } + + + slot = mte_parent_slot(next.node); + mas_dup_state(&parent, &next); + mas_ascend(&parent); + mte_set_rcu_slot(parent.node, slot, XA_RETRY_ENTRY); + + mte_free(curr.node); + mas_dup_state(mas, &next); } static inline int mas_add(struct ma_state *mas, void *entry, bool overwrite, @@ -2580,6 +2647,8 @@ static inline int mas_spanning_add(struct ma_state *mas, void *entry, if (!mas_rebalance_node(mas)) if (mt_is_alloc(mas->tree)) mas_update_gap(&r_mas, true); + mas_dup_state(&p_mas, mas); // parent may be replaced. + mas_ascend(&p_mas); if (mas_is_err(mas)) return 0; // FIXME: Broken tree? @@ -3161,6 +3230,9 @@ static inline bool mas_next_nentry(struct ma_state *mas, unsigned long max, if (r_start > max) goto no_entry; + if (r_start > mas->max) + goto no_entry; + entry = mas_get_rcu_slot(mas, slot); if (!mt_is_empty(entry)) goto found; @@ -3480,53 +3552,101 @@ static inline bool mas_coalesce(struct ma_state *mas, unsigned char l_end_slot, struct ma_state *p_mas, unsigned long total_slots) { struct maple_node *mn; - bool free_left = false; + struct maple_enode *this_node = mas->node; + bool free_left = false, alloc_failed = false, empty_left = false; + int alloc_cnt = 2; MA_STATE(dst, mas->tree, mas->index, mas->last); // it is possible that all of the right node can be appended to the // left. - if (total_slots + 1 - l_end_slot < mt_slots[l_type] - l_end_slot) - goto use_left; + if (l_end_slot - l_coalesce + 1 == 0) { + void *entry = mte_get_rcu_slot(r_mas->node, 0, r_mas->tree); - mas_node_cnt(mas, 1); // ensure we have a node, or allocate one. - if (mas_is_err(mas)) { - // Allocation failed, we could try to append as much - // as possible here? - return false; + if (!entry || xa_is_deleted(entry)) + empty_left = true; } - free_left = true; - mn = mas_next_alloc(mas); - mas_dup_state(&dst, mas); - mn->parent = mas_mn(mas)->parent; - dst.node = mt_mk_node(mn, l_type); - l_end_slot = mas_append(&dst, mas, 0, l_end_slot); - //l_end_slot = mas_cp_append(&dst, mas, p_mas, 0, l_end_slot, false); + if (empty_left || + (total_slots + 1 - l_end_slot < mt_slots[l_type] - l_end_slot)) + alloc_cnt = 1; + + mas_node_cnt(mas, alloc_cnt); // ensure we have a node, or allocate one. + if (mas_is_err(mas)) { + if (alloc_cnt > 1) + return false; + + alloc_failed = true; + mas->node = this_node; + } - // If there is no entry or pivot, then set one to avoid a first entry - // in r_mas being incorrect. - if (!l_end_slot && !mte_get_pivot(dst.node, 0)) { - mte_set_pivot(dst.node, 0, mas->max); - l_end_slot++; + if (empty_left) { + //The left can become a skip and the right can take the left + //into the first slot which is empty. + mte_set_rcu_slot(p_mas->node, mte_parent_slot(mas->node), + XA_SKIP_ENTRY); + if (mt_is_alloc(mas->tree)) + mte_set_gap(p_mas->node, mte_parent_slot(mas->node), + 0); + free_left = true; + mte_free(this_node); + mas->node = r_mas->node; + this_node = mas->node; + goto empty_left; + } else if (total_slots + 1 - l_end_slot < + mt_slots[l_type] - l_end_slot) { + goto use_left; + } else { + free_left = true; + mn = mas_next_alloc(mas); + mas_dup_state(&dst, mas); + mn->parent = mas_mn(mas)->parent; + dst.node = mt_mk_node(mn, l_type); + l_end_slot = mas_append(&dst, mas, 0, l_end_slot); + + // If there is no entry or pivot, then set one to avoid a + // first entry in r_mas being incorrect. + if (!l_end_slot && !mte_get_pivot(dst.node, 0)) { + mte_set_pivot(dst.node, 0, mas->max); + l_end_slot++; + } + mas->node = dst.node; } - mas->node = dst.node; + use_left: + // Copy data to the left node. mas_append(mas, r_mas, 0, r_end_slot); if (!mte_is_leaf(mas->node)) mas_adopt_children(mas, mas->node); + // Redirect reads to the new node. mte_set_pivot(p_mas->node, mte_parent_slot(mas->node), r_mas->max); + // indicate to skip this slot. mte_set_rcu_slot(p_mas->node, mte_parent_slot(r_mas->node), XA_SKIP_ENTRY); + if (mt_is_alloc(mas->tree)) mte_set_gap(p_mas->node, mte_parent_slot(r_mas->node), 0); mte_free(r_mas->node); + +empty_left: mas->max = r_mas->max; // update limits. + // Remove the skip entry if the allocation was okay. + if (!alloc_failed) { + mas_dup_state(&dst, p_mas); + mn = mas_next_alloc(mas); + dst.node = mt_mk_node(mn, mte_node_type(p_mas->node)); + mas_append(&dst, p_mas, 0, mas_data_end(p_mas)); + mte_to_node(dst.node)->parent = mas_mn(p_mas)->parent; + p_mas->node = dst.node; + mas_replace(p_mas); + mas_mn(mas)->parent = mte_to_node(this_node)->parent; + } + return free_left; } /* Private @@ -3713,6 +3833,7 @@ static inline void mas_rebalance(struct ma_state *mas) } while (!at_root); mas_rebalance_gaps(mas); + _mas_walk(mas); // return to the updated location in the tree. } static inline bool _mas_rev_awalk(struct ma_state *mas, unsigned long size) -- 2.50.1