]> www.infradead.org Git - users/hch/misc.git/commitdiff
tc: Ensure we have enough buffer space when sending filter netlink notifications
authorToke Høiland-Jørgensen <toke@redhat.com>
Mon, 7 Apr 2025 10:55:34 +0000 (12:55 +0200)
committerPaolo Abeni <pabeni@redhat.com>
Tue, 8 Apr 2025 11:57:49 +0000 (13:57 +0200)
The tfilter_notify() and tfilter_del_notify() functions assume that
NLMSG_GOODSIZE is always enough to dump the filter chain. This is not
always the case, which can lead to silent notify failures (because the
return code of tfilter_notify() is not always checked). In particular,
this can lead to NLM_F_ECHO not being honoured even though an action
succeeds, which forces userspace to create workarounds[0].

Fix this by increasing the message size if dumping the filter chain into
the allocated skb fails. Use the size of the incoming skb as a size hint
if set, so we can start at a larger value when appropriate.

To trigger this, run the following commands:

 # ip link add type veth
 # tc qdisc replace dev veth0 root handle 1: fq_codel
 # tc -echo filter add dev veth0 parent 1: u32 match u32 0 0 $(for i in $(seq 32); do echo action pedit munge ip dport set 22; done)

Before this fix, tc just returns:

Not a filter(cmd 2)

After the fix, we get the correct echo:

added filter dev veth0 parent 1: protocol all pref 49152 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 terminal flowid not_in_hw
  match 00000000/00000000 at 0
action order 1:  pedit action pass keys 1
  index 1 ref 1 bind 1
key #0  at 20: val 00000016 mask ffff0000
[repeated 32 times]

[0] https://github.com/openvswitch/ovs/commit/106ef21860c935e5e0017a88bf42b94025c4e511

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Frode Nordahl <frode.nordahl@canonical.com>
Closes: https://bugs.launchpad.net/ubuntu/+source/openvswitch/+bug/2018500
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Link: https://patch.msgid.link/20250407105542.16601-1-toke@redhat.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
net/sched/cls_api.c

index 4f648af8cfaafef7c715835d6c986e8980e2c9b7..ecec0a1e1c1a07bec1b82b0bf9ab02ea5514084d 100644 (file)
@@ -2057,6 +2057,7 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
        struct tcmsg *tcm;
        struct nlmsghdr  *nlh;
        unsigned char *b = skb_tail_pointer(skb);
+       int ret = -EMSGSIZE;
 
        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
        if (!nlh)
@@ -2101,11 +2102,45 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
 
        return skb->len;
 
+cls_op_not_supp:
+       ret = -EOPNOTSUPP;
 out_nlmsg_trim:
 nla_put_failure:
-cls_op_not_supp:
        nlmsg_trim(skb, b);
-       return -1;
+       return ret;
+}
+
+static struct sk_buff *tfilter_notify_prep(struct net *net,
+                                          struct sk_buff *oskb,
+                                          struct nlmsghdr *n,
+                                          struct tcf_proto *tp,
+                                          struct tcf_block *block,
+                                          struct Qdisc *q, u32 parent,
+                                          void *fh, int event,
+                                          u32 portid, bool rtnl_held,
+                                          struct netlink_ext_ack *extack)
+{
+       unsigned int size = oskb ? max(NLMSG_GOODSIZE, oskb->len) : NLMSG_GOODSIZE;
+       struct sk_buff *skb;
+       int ret;
+
+retry:
+       skb = alloc_skb(size, GFP_KERNEL);
+       if (!skb)
+               return ERR_PTR(-ENOBUFS);
+
+       ret = tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
+                           n->nlmsg_seq, n->nlmsg_flags, event, false,
+                           rtnl_held, extack);
+       if (ret <= 0) {
+               kfree_skb(skb);
+               if (ret == -EMSGSIZE) {
+                       size += NLMSG_GOODSIZE;
+                       goto retry;
+               }
+               return ERR_PTR(-EINVAL);
+       }
+       return skb;
 }
 
 static int tfilter_notify(struct net *net, struct sk_buff *oskb,
@@ -2121,16 +2156,10 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
        if (!unicast && !rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
                return 0;
 
-       skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-       if (!skb)
-               return -ENOBUFS;
-
-       if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
-                         n->nlmsg_seq, n->nlmsg_flags, event,
-                         false, rtnl_held, extack) <= 0) {
-               kfree_skb(skb);
-               return -EINVAL;
-       }
+       skb = tfilter_notify_prep(net, oskb, n, tp, block, q, parent, fh, event,
+                                 portid, rtnl_held, extack);
+       if (IS_ERR(skb))
+               return PTR_ERR(skb);
 
        if (unicast)
                err = rtnl_unicast(skb, net, portid);
@@ -2153,16 +2182,11 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
        if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
                return tp->ops->delete(tp, fh, last, rtnl_held, extack);
 
-       skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-       if (!skb)
-               return -ENOBUFS;
-
-       if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
-                         n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
-                         false, rtnl_held, extack) <= 0) {
+       skb = tfilter_notify_prep(net, oskb, n, tp, block, q, parent, fh,
+                                 RTM_DELTFILTER, portid, rtnl_held, extack);
+       if (IS_ERR(skb)) {
                NL_SET_ERR_MSG(extack, "Failed to build del event notification");
-               kfree_skb(skb);
-               return -EINVAL;
+               return PTR_ERR(skb);
        }
 
        err = tp->ops->delete(tp, fh, last, rtnl_held, extack);