[ Upstream commit
212ed75dc5fb9d1423b3942c8f872a868cda3466 ]
The pipapo set backend follows copy-on-update approach, maintaining one
clone of the existing datastructure that is being updated. The clone
and current datastructures are swapped via rcu from the commit step.
The existing integration with the commit protocol is flawed because
there is no operation to clean up the clone if the transaction is
aborted. Moreover, the datastructure swap happens on set element
activation.
This patch adds two new operations for sets: commit and abort, these new
operations are invoked from the commit and abort steps, after the
transactions have been digested, and it updates the pipapo set backend
to use it.
This patch adds a new ->pending_update field to sets to maintain a list
of sets that require this new commit and abort operations.
Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
const struct nft_set *set,
const struct nft_set_elem *elem,
unsigned int flags);
-
+ void (*commit)(const struct nft_set *set);
+ void (*abort)(const struct nft_set *set);
u64 (*privsize)(const struct nlattr * const nla[],
const struct nft_set_desc *desc);
bool (*estimate)(const struct nft_set_desc *desc,
u16 policy;
u16 udlen;
unsigned char *udata;
+ struct list_head pending_update;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
u16 flags:14,
set->num_exprs = num_exprs;
set->handle = nf_tables_alloc_handle(table);
+ INIT_LIST_HEAD(&set->pending_update);
err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
if (err < 0)
}
}
+static void nft_set_commit_update(struct list_head *set_update_list)
+{
+ struct nft_set *set, *next;
+
+ list_for_each_entry_safe(set, next, set_update_list, pending_update) {
+ list_del_init(&set->pending_update);
+
+ if (!set->ops->commit)
+ continue;
+
+ set->ops->commit(set);
+ }
+}
+
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
+ LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
struct nft_chain *chain;
struct nft_table *table;
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
NFT_MSG_NEWSETELEM);
+ if (te->set->ops->commit &&
+ list_empty(&te->set->pending_update)) {
+ list_add_tail(&te->set->pending_update,
+ &set_update_list);
+ }
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSETELEM:
atomic_dec(&te->set->nelems);
te->set->ndeact--;
}
+ if (te->set->ops->commit &&
+ list_empty(&te->set->pending_update)) {
+ list_add_tail(&te->set->pending_update,
+ &set_update_list);
+ }
break;
case NFT_MSG_NEWOBJ:
if (nft_trans_obj_update(trans)) {
}
}
+ nft_set_commit_update(&set_update_list);
+
nft_commit_notify(net, NETLINK_CB(skb).portid);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
nf_tables_commit_audit_log(&adl, nft_net->base_seq);
kfree(trans);
}
+static void nft_set_abort_update(struct list_head *set_update_list)
+{
+ struct nft_set *set, *next;
+
+ list_for_each_entry_safe(set, next, set_update_list, pending_update) {
+ list_del_init(&set->pending_update);
+
+ if (!set->ops->abort)
+ continue;
+
+ set->ops->abort(set);
+ }
+}
+
static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
+ LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
if (action == NFNL_ABORT_VALIDATE &&
nft_setelem_remove(net, te->set, &te->elem);
if (!nft_setelem_is_catchall(te->set, &te->elem))
atomic_dec(&te->set->nelems);
+
+ if (te->set->ops->abort &&
+ list_empty(&te->set->pending_update)) {
+ list_add_tail(&te->set->pending_update,
+ &set_update_list);
+ }
break;
case NFT_MSG_DELSETELEM:
case NFT_MSG_DESTROYSETELEM:
if (!nft_setelem_is_catchall(te->set, &te->elem))
te->set->ndeact--;
+ if (te->set->ops->abort &&
+ list_empty(&te->set->pending_update)) {
+ list_add_tail(&te->set->pending_update,
+ &set_update_list);
+ }
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWOBJ:
}
}
+ nft_set_abort_update(&set_update_list);
+
synchronize_rcu();
list_for_each_entry_safe_reverse(trans, next,
}
}
-/**
- * pipapo_reclaim_match - RCU callback to free fields from old matching data
- * @rcu: RCU head
- */
-static void pipapo_reclaim_match(struct rcu_head *rcu)
+static void pipapo_free_match(struct nft_pipapo_match *m)
{
- struct nft_pipapo_match *m;
int i;
- m = container_of(rcu, struct nft_pipapo_match, rcu);
-
for_each_possible_cpu(i)
kfree(*per_cpu_ptr(m->scratch, i));
}
/**
- * pipapo_commit() - Replace lookup data with current working copy
+ * pipapo_reclaim_match - RCU callback to free fields from old matching data
+ * @rcu: RCU head
+ */
+static void pipapo_reclaim_match(struct rcu_head *rcu)
+{
+ struct nft_pipapo_match *m;
+
+ m = container_of(rcu, struct nft_pipapo_match, rcu);
+ pipapo_free_match(m);
+}
+
+/**
+ * nft_pipapo_commit() - Replace lookup data with current working copy
* @set: nftables API set representation
*
* While at it, check if we should perform garbage collection on the working
* We also need to create a new working copy for subsequent insertions and
* deletions.
*/
-static void pipapo_commit(const struct nft_set *set)
+static void nft_pipapo_commit(const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *new_clone, *old;
priv->clone = new_clone;
}
+static void nft_pipapo_abort(const struct nft_set *set)
+{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_match *new_clone, *m;
+
+ if (!priv->dirty)
+ return;
+
+ m = rcu_dereference(priv->match);
+
+ new_clone = pipapo_clone(m);
+ if (IS_ERR(new_clone))
+ return;
+
+ priv->dirty = false;
+
+ pipapo_free_match(priv->clone);
+ priv->clone = new_clone;
+}
+
/**
* nft_pipapo_activate() - Mark element reference as active given key, commit
* @net: Network namespace
* @elem: nftables API element representation containing key data
*
* On insertion, elements are added to a copy of the matching data currently
- * in use for lookups, and not directly inserted into current lookup data, so
- * we'll take care of that by calling pipapo_commit() here. Both
+ * in use for lookups, and not directly inserted into current lookup data. Both
* nft_pipapo_insert() and nft_pipapo_activate() are called once for each
* element, hence we can't purpose either one as a real commit operation.
*/
nft_set_elem_change_active(net, set, &e->ext);
nft_set_elem_clear_busy(&e->ext);
-
- pipapo_commit(set);
}
/**
if (i == m->field_count) {
priv->dirty = true;
pipapo_drop(m, rulemap);
- pipapo_commit(set);
return;
}
.init = nft_pipapo_init,
.destroy = nft_pipapo_destroy,
.gc_init = nft_pipapo_gc_init,
+ .commit = nft_pipapo_commit,
+ .abort = nft_pipapo_abort,
.elemsize = offsetof(struct nft_pipapo_elem, ext),
},
};
.init = nft_pipapo_init,
.destroy = nft_pipapo_destroy,
.gc_init = nft_pipapo_gc_init,
+ .commit = nft_pipapo_commit,
+ .abort = nft_pipapo_abort,
.elemsize = offsetof(struct nft_pipapo_elem, ext),
},
};