From 5f079290e5913a0060e059500b7d440990ac1066 Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Tue, 11 Mar 2025 21:12:59 +0530 Subject: [PATCH 01/16] net: ethernet: ti: am65-cpsw: Fix NAPI registration sequence Registering the interrupts for TX or RX DMA Channels prior to registering their respective NAPI callbacks can result in a NULL pointer dereference. This is seen in practice as a random occurrence since it depends on the randomness associated with the generation of traffic by Linux and the reception of traffic from the wire. Fixes: 681eb2beb3ef ("net: ethernet: ti: am65-cpsw: ensure proper channel cleanup in error path") Signed-off-by: Vignesh Raghavendra Co-developed-by: Siddharth Vadapalli Signed-off-by: Siddharth Vadapalli Reviewed-by: Alexander Sverdlin Reviewed-by: Roger Quadros Link: https://patch.msgid.link/20250311154259.102865-1-s-vadapalli@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 32 +++++++++++++----------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 2806238629f8..bef734c6e5c2 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2306,14 +2306,18 @@ static void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common) static int am65_cpsw_nuss_ndev_add_tx_napi(struct am65_cpsw_common *common) { struct device *dev = common->dev; + struct am65_cpsw_tx_chn *tx_chn; int i, ret = 0; for (i = 0; i < common->tx_ch_num; i++) { - struct am65_cpsw_tx_chn *tx_chn = &common->tx_chns[i]; + tx_chn = &common->tx_chns[i]; hrtimer_init(&tx_chn->tx_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); tx_chn->tx_hrtimer.function = &am65_cpsw_nuss_tx_timer_callback; + netif_napi_add_tx(common->dma_ndev, &tx_chn->napi_tx, + am65_cpsw_nuss_tx_poll); + ret = devm_request_irq(dev, tx_chn->irq, am65_cpsw_nuss_tx_irq, IRQF_TRIGGER_HIGH, @@ -2323,19 +2327,16 @@ static int am65_cpsw_nuss_ndev_add_tx_napi(struct am65_cpsw_common *common) tx_chn->id, tx_chn->irq, ret); goto err; } - - netif_napi_add_tx(common->dma_ndev, &tx_chn->napi_tx, - am65_cpsw_nuss_tx_poll); } return 0; err: - for (--i ; i >= 0 ; i--) { - struct am65_cpsw_tx_chn *tx_chn = &common->tx_chns[i]; - - netif_napi_del(&tx_chn->napi_tx); + netif_napi_del(&tx_chn->napi_tx); + for (--i; i >= 0; i--) { + tx_chn = &common->tx_chns[i]; devm_free_irq(dev, tx_chn->irq, tx_chn); + netif_napi_del(&tx_chn->napi_tx); } return ret; @@ -2569,6 +2570,9 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) HRTIMER_MODE_REL_PINNED); flow->rx_hrtimer.function = &am65_cpsw_nuss_rx_timer_callback; + netif_napi_add(common->dma_ndev, &flow->napi_rx, + am65_cpsw_nuss_rx_poll); + ret = devm_request_irq(dev, flow->irq, am65_cpsw_nuss_rx_irq, IRQF_TRIGGER_HIGH, @@ -2577,11 +2581,8 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) dev_err(dev, "failure requesting rx %d irq %u, %d\n", i, flow->irq, ret); flow->irq = -EINVAL; - goto err_flow; + goto err_request_irq; } - - netif_napi_add(common->dma_ndev, &flow->napi_rx, - am65_cpsw_nuss_rx_poll); } /* setup classifier to route priorities to flows */ @@ -2589,11 +2590,14 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) return 0; +err_request_irq: + netif_napi_del(&flow->napi_rx); + err_flow: - for (--i; i >= 0 ; i--) { + for (--i; i >= 0; i--) { flow = &rx_chn->flows[i]; - netif_napi_del(&flow->napi_rx); devm_free_irq(dev, flow->irq, flow); + netif_napi_del(&flow->napi_rx); } err: -- 2.50.1 From 2fc8a346625eb1abfe202062c7e6a13d76cde5ea Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Tue, 11 Mar 2025 13:12:54 -0700 Subject: [PATCH 02/16] net: mana: Support holes in device list reply msg According to GDMA protocol, holes (zeros) are allowed at the beginning or middle of the gdma_list_devices_resp message. The existing code cannot properly handle this, and may miss some devices in the list. To fix, scan the entire list until the num_of_devs are found, or until the end of the list. Cc: stable@vger.kernel.org Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)") Signed-off-by: Haiyang Zhang Reviewed-by: Long Li Reviewed-by: Shradha Gupta Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/1741723974-1534-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/gdma_main.c | 14 ++++++++++---- include/net/mana/gdma.h | 11 +++++++---- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 11457b6296cc..638ef64d639f 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -134,9 +134,10 @@ static int mana_gd_detect_devices(struct pci_dev *pdev) struct gdma_list_devices_resp resp = {}; struct gdma_general_req req = {}; struct gdma_dev_id dev; - u32 i, max_num_devs; + int found_dev = 0; u16 dev_type; int err; + u32 i; mana_gd_init_req_hdr(&req.hdr, GDMA_LIST_DEVICES, sizeof(req), sizeof(resp)); @@ -148,12 +149,17 @@ static int mana_gd_detect_devices(struct pci_dev *pdev) return err ? err : -EPROTO; } - max_num_devs = min_t(u32, MAX_NUM_GDMA_DEVICES, resp.num_of_devs); - - for (i = 0; i < max_num_devs; i++) { + for (i = 0; i < GDMA_DEV_LIST_SIZE && + found_dev < resp.num_of_devs; i++) { dev = resp.devs[i]; dev_type = dev.type; + /* Skip empty devices */ + if (dev.as_uint32 == 0) + continue; + + found_dev++; + /* HWC is already detected in mana_hwc_create_channel(). */ if (dev_type == GDMA_DEVICE_HWC) continue; diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 90f56656b572..62e9d7673862 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -408,8 +408,6 @@ struct gdma_context { struct gdma_dev mana_ib; }; -#define MAX_NUM_GDMA_DEVICES 4 - static inline bool mana_gd_is_mana(struct gdma_dev *gd) { return gd->dev_id.type == GDMA_DEVICE_MANA; @@ -556,11 +554,15 @@ enum { #define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3) #define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5) +/* Driver can handle holes (zeros) in the device list */ +#define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11) + #define GDMA_DRV_CAP_FLAGS1 \ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \ GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \ - GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT) + GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \ + GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP) #define GDMA_DRV_CAP_FLAGS2 0 @@ -621,11 +623,12 @@ struct gdma_query_max_resources_resp { }; /* HW DATA */ /* GDMA_LIST_DEVICES */ +#define GDMA_DEV_LIST_SIZE 64 struct gdma_list_devices_resp { struct gdma_resp_hdr hdr; u32 num_of_devs; u32 reserved; - struct gdma_dev_id devs[64]; + struct gdma_dev_id devs[GDMA_DEV_LIST_SIZE]; }; /* HW DATA */ /* GDMA_REGISTER_DEVICE */ -- 2.50.1 From daa624d3c2ddffdcbad140a9625a4064371db44f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 11 Mar 2025 22:25:30 +0100 Subject: [PATCH 03/16] net: ipv6: fix TCP GSO segmentation with NAT When updating the source/destination address, the TCP/UDP checksum needs to be updated as well. Fixes: bee88cd5bd83 ("net: add support for segmenting TCP fraglist GSO packets") Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/20250311212530.91519-1-nbd@nbd.name Signed-off-by: Paolo Abeni --- net/ipv6/tcpv6_offload.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index a45bf17cb2a1..ae2da28f9dfb 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -94,14 +94,23 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) } static void __tcpv6_gso_segment_csum(struct sk_buff *seg, + struct in6_addr *oldip, + const struct in6_addr *newip, __be16 *oldport, __be16 newport) { - struct tcphdr *th; + struct tcphdr *th = tcp_hdr(seg); + + if (!ipv6_addr_equal(oldip, newip)) { + inet_proto_csum_replace16(&th->check, seg, + oldip->s6_addr32, + newip->s6_addr32, + true); + *oldip = *newip; + } if (*oldport == newport) return; - th = tcp_hdr(seg); inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false); *oldport = newport; } @@ -129,10 +138,10 @@ static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs) th2 = tcp_hdr(seg); iph2 = ipv6_hdr(seg); - iph2->saddr = iph->saddr; - iph2->daddr = iph->daddr; - __tcpv6_gso_segment_csum(seg, &th2->source, th->source); - __tcpv6_gso_segment_csum(seg, &th2->dest, th->dest); + __tcpv6_gso_segment_csum(seg, &iph2->saddr, &iph->saddr, + &th2->source, th->source); + __tcpv6_gso_segment_csum(seg, &iph2->daddr, &iph->daddr, + &th2->dest, th->dest); } return segs; -- 2.50.1 From 9740890ee20e01f99ff1dde84c63dcf089fabb98 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 11 Mar 2025 18:03:25 -0700 Subject: [PATCH 04/16] ipv6: Fix memleak of nhc_pcpu_rth_output in fib_check_nh_v6_gw(). fib_check_nh_v6_gw() expects that fib6_nh_init() cleans up everything when it fails. Commit 7dd73168e273 ("ipv6: Always allocate pcpu memory in a fib6_nh") moved fib_nh_common_init() before alloc_percpu_gfp() within fib6_nh_init() but forgot to add cleanup for fib6_nh->nh_common.nhc_pcpu_rth_output in case it fails to allocate fib6_nh->rt6i_pcpu, resulting in memleak. Let's call fib_nh_common_release() and clear nhc_pcpu_rth_output in the error path. Note that we can remove the fib6_nh_release() call in nh_create_ipv6() later in net-next.git. Fixes: 7dd73168e273 ("ipv6: Always allocate pcpu memory in a fib6_nh") Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250312010333.56001-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- net/ipv6/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ef2d23a1e3d5..bc6bcf5d7133 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3644,7 +3644,8 @@ out: in6_dev_put(idev); if (err) { - lwtstate_put(fib6_nh->fib_nh_lws); + fib_nh_common_release(&fib6_nh->nh_common); + fib6_nh->nh_common.nhc_pcpu_rth_output = NULL; fib6_nh->fib_nh_lws = NULL; netdev_put(dev, dev_tracker); } -- 2.50.1 From 9a81fc3480bf5dbe2bf80e278c440770f6ba2692 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 11 Mar 2025 18:38:48 -0700 Subject: [PATCH 05/16] ipv6: Set errno after ip_fib_metrics_init() in ip6_route_info_create(). While creating a new IPv6, we could get a weird -ENOMEM when RTA_NH_ID is set and either of the conditions below is true: 1) CONFIG_IPV6_SUBTREES is enabled and rtm_src_len is specified 2) nexthop_get() fails e.g.) # strace ip -6 route add fe80::dead:beef:dead:beef nhid 1 from :: recvmsg(3, {msg_iov=[{iov_base=[...[ {error=-ENOMEM, msg=[... [...]]}, [{nla_len=49, nla_type=NLMSGERR_ATTR_MSG}, "Nexthops can not be used with so"...] ]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 148 Let's set err explicitly after ip_fib_metrics_init() in ip6_route_info_create(). Fixes: f88d8ea67fbd ("ipv6: Plumb support for nexthop object in a fib6_info") Signed-off-by: Kuniyuki Iwashima Reviewed-by: David Ahern Link: https://patch.msgid.link/20250312013854.61125-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- net/ipv6/route.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index bc6bcf5d7133..15ce21afc8c6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3803,10 +3803,12 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, if (nh) { if (rt->fib6_src.plen) { NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); + err = -EINVAL; goto out_free; } if (!nexthop_get(nh)) { NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); + err = -ENOENT; goto out_free; } rt->nh = nh; -- 2.50.1 From f3b97b7d4bf316c3991e5634c9f4847c2df35478 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Wed, 12 Mar 2025 10:52:49 +0100 Subject: [PATCH 06/16] devlink: fix xa_alloc_cyclic() error handling In case of returning 1 from xa_alloc_cyclic() (wrapping) ERR_PTR(1) will be returned, which will cause IS_ERR() to be false. Which can lead to dereference not allocated pointer (rel). Fix it by checking if err is lower than zero. This wasn't found in real usecase, only noticed. Credit to Pierre. Fixes: c137743bce02 ("devlink: introduce object and nested devlink relationship infra") Signed-off-by: Michal Swiatkowski Reviewed-by: Andrew Lunn Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/devlink/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/devlink/core.c b/net/devlink/core.c index f49cd83f1955..7203c39532fc 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -117,7 +117,7 @@ static struct devlink_rel *devlink_rel_alloc(void) err = xa_alloc_cyclic(&devlink_rels, &rel->index, rel, xa_limit_32b, &next, GFP_KERNEL); - if (err) { + if (err < 0) { kfree(rel); return ERR_PTR(err); } -- 2.50.1 From 3614bf90130d60f191a5fe218d04f6251c678e13 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Wed, 12 Mar 2025 10:52:50 +0100 Subject: [PATCH 07/16] dpll: fix xa_alloc_cyclic() error handling In case of returning 1 from xa_alloc_cyclic() (wrapping) ERR_PTR(1) will be returned, which will cause IS_ERR() to be false. Which can lead to dereference not allocated pointer (pin). Fix it by checking if err is lower than zero. This wasn't found in real usecase, only noticed. Credit to Pierre. Fixes: 97f265ef7f5b ("dpll: allocate pin ids in cycle") Signed-off-by: Michal Swiatkowski Reviewed-by: Vadim Fedorenko Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/dpll/dpll_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index 32019dc33cca..1877201d1aa9 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -505,7 +505,7 @@ dpll_pin_alloc(u64 clock_id, u32 pin_idx, struct module *module, xa_init_flags(&pin->parent_refs, XA_FLAGS_ALLOC); ret = xa_alloc_cyclic(&dpll_pin_xa, &pin->id, pin, xa_limit_32b, &dpll_pin_xa_id, GFP_KERNEL); - if (ret) + if (ret < 0) goto err_xa_alloc; return pin; err_xa_alloc: -- 2.50.1 From 3178d2b048365fe2c078cd53f85f2abf1487733b Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Wed, 12 Mar 2025 10:52:51 +0100 Subject: [PATCH 08/16] phy: fix xa_alloc_cyclic() error handling xa_alloc_cyclic() can return 1, which isn't an error. To prevent situation when the caller of this function will treat it as no error do a check only for negative here. Fixes: 384968786909 ("net: phy: Introduce ethernet link topology representation") Signed-off-by: Michal Swiatkowski Reviewed-by: Maxime Chevallier Signed-off-by: David S. Miller --- drivers/net/phy/phy_link_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_link_topology.c b/drivers/net/phy/phy_link_topology.c index 4a5d73002a1a..0e9e987f37dd 100644 --- a/drivers/net/phy/phy_link_topology.c +++ b/drivers/net/phy/phy_link_topology.c @@ -73,7 +73,7 @@ int phy_link_topo_add_phy(struct net_device *dev, xa_limit_32b, &topo->next_phy_index, GFP_KERNEL); - if (ret) + if (ret < 0) goto err; return 0; -- 2.50.1 From acf10a8c0b3a36d5ff35f91585e9755ea0b62ac3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 12 Mar 2025 14:10:40 +0100 Subject: [PATCH 09/16] selftests: drv-net: use defer in the ping test Make sure the test cleans up after itself. The XDP off statements at the end of the test may not be reached. Fixes: 75cc19c8ff89 ("selftests: drv-net: add xdp cases for ping.py") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250312131040.660386-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- tools/testing/selftests/drivers/net/ping.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 93f4b411b378..fc69bfcc37c4 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -7,7 +7,7 @@ from lib.py import ksft_run, ksft_exit from lib.py import ksft_eq, KsftSkipEx, KsftFailEx from lib.py import EthtoolFamily, NetDrvEpEnv from lib.py import bkg, cmd, wait_port_listen, rand_port -from lib.py import ethtool, ip +from lib.py import defer, ethtool, ip remote_ifname="" no_sleep=False @@ -60,6 +60,7 @@ def _set_xdp_generic_sb_on(cfg) -> None: prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True) + defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off") if no_sleep != True: time.sleep(10) @@ -68,7 +69,9 @@ def _set_xdp_generic_mb_on(cfg) -> None: test_dir = os.path.dirname(os.path.realpath(__file__)) prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog)) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off") if no_sleep != True: time.sleep(10) @@ -78,6 +81,7 @@ def _set_xdp_native_sb_on(cfg) -> None: prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] if xdp_info['xdp']['mode'] != 1: """ @@ -94,10 +98,11 @@ def _set_xdp_native_mb_on(cfg) -> None: test_dir = os.path.dirname(os.path.realpath(__file__)) prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) try: cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") except Exception as e: - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) raise KsftSkipEx('device does not support native-multi-buffer XDP') if no_sleep != True: @@ -111,6 +116,7 @@ def _set_xdp_offload_on(cfg) -> None: cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True) except Exception as e: raise KsftSkipEx('device does not support offloaded XDP') + defer(ip, f"link set dev {cfg.ifname} xdpoffload off") cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) if no_sleep != True: @@ -157,7 +163,6 @@ def test_xdp_generic_sb(cfg, netnl) -> None: _test_v4(cfg) _test_v6(cfg) _test_tcp(cfg) - ip("link set dev %s xdpgeneric off" % cfg.ifname) def test_xdp_generic_mb(cfg, netnl) -> None: _set_xdp_generic_mb_on(cfg) @@ -169,7 +174,6 @@ def test_xdp_generic_mb(cfg, netnl) -> None: _test_v4(cfg) _test_v6(cfg) _test_tcp(cfg) - ip("link set dev %s xdpgeneric off" % cfg.ifname) def test_xdp_native_sb(cfg, netnl) -> None: _set_xdp_native_sb_on(cfg) @@ -181,7 +185,6 @@ def test_xdp_native_sb(cfg, netnl) -> None: _test_v4(cfg) _test_v6(cfg) _test_tcp(cfg) - ip("link set dev %s xdp off" % cfg.ifname) def test_xdp_native_mb(cfg, netnl) -> None: _set_xdp_native_mb_on(cfg) @@ -193,14 +196,12 @@ def test_xdp_native_mb(cfg, netnl) -> None: _test_v4(cfg) _test_v6(cfg) _test_tcp(cfg) - ip("link set dev %s xdp off" % cfg.ifname) def test_xdp_offload(cfg, netnl) -> None: _set_xdp_offload_on(cfg) _test_v4(cfg) _test_v6(cfg) _test_tcp(cfg) - ip("link set dev %s xdpoffload off" % cfg.ifname) def main() -> None: with NetDrvEpEnv(__file__) as cfg: @@ -213,7 +214,6 @@ def main() -> None: test_xdp_native_mb, test_xdp_offload], args=(cfg, EthtoolFamily())) - set_interface_init(cfg) ksft_exit() -- 2.50.1 From c9cb135bc604ad6e457e06c19e0857c9cd0eef7f Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 12 Mar 2025 19:43:09 +0000 Subject: [PATCH 10/16] net: stmmac: dwc-qos-eth: use devm_kzalloc() for AXI data Everywhere else in the driver uses devm_kzalloc() when allocating the AXI data, so there is no kfree() of this structure. However, dwc-qos-eth uses kzalloc(), which leads to this memory being leaked. Switch to use devm_kzalloc(). Fixes: d8256121a91a ("stmmac: adding new glue driver dwmac-dwc-qos-eth") Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1tsRyv-0064nU-O9@rmk-PC.armlinux.org.uk Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index bd4eb187f8c6..b5a7e05ab7a7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -46,7 +46,9 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, u32 a_index = 0; if (!plat_dat->axi) { - plat_dat->axi = kzalloc(sizeof(struct stmmac_axi), GFP_KERNEL); + plat_dat->axi = devm_kzalloc(&pdev->dev, + sizeof(struct stmmac_axi), + GFP_KERNEL); if (!plat_dat->axi) return -ENOMEM; -- 2.50.1 From 559847f56769037e5b2e0474d3dbff985b98083d Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 13 Mar 2025 08:50:08 +0000 Subject: [PATCH 11/16] xsk: fix an integer overflow in xp_create_and_assign_umem() Since the i and pool->chunk_size variables are of type 'u32', their product can wrap around and then be cast to 'u64'. This can lead to two different XDP buffers pointing to the same memory area. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 94033cd8e73b ("xsk: Optimize for aligned case") Cc: stable@vger.kernel.org Signed-off-by: Ilia Gavrilov Link: https://patch.msgid.link/20250313085007.3116044-1-Ilia.Gavrilov@infotecs.ru Signed-off-by: Paolo Abeni --- net/xdp/xsk_buff_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 1f7975b49657..d158cb6dd391 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -105,7 +105,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, if (pool->unaligned) pool->free_heads[i] = xskb; else - xp_init_xskb_addr(xskb, pool, i * pool->chunk_size); + xp_init_xskb_addr(xskb, pool, (u64)i * pool->chunk_size); } return pool; -- 2.50.1 From f3009d0d6ab78053117f8857b921a8237f4d17b3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Mar 2025 13:10:57 +0300 Subject: [PATCH 12/16] net: atm: fix use after free in lec_send() The ->send() operation frees skb so save the length before calling ->send() to avoid a use after free. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Dan Carpenter Reviewed-by: Simon Horman Link: https://patch.msgid.link/c751531d-4af4-42fe-affe-6104b34b791d@stanley.mountain Signed-off-by: Paolo Abeni --- net/atm/lec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/atm/lec.c b/net/atm/lec.c index ffef658862db..a948dd47c3f3 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -181,6 +181,7 @@ static void lec_send(struct atm_vcc *vcc, struct sk_buff *skb) { struct net_device *dev = skb->dev; + unsigned int len = skb->len; ATM_SKB(skb)->vcc = vcc; atm_account_tx(vcc, skb); @@ -191,7 +192,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb) } dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; + dev->stats.tx_bytes += len; } static void lec_tx_timeout(struct net_device *dev, unsigned int txqueue) -- 2.50.1 From 47a9b5e52abd2b717dfc8b9460589f89936d93cf Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Fri, 14 Mar 2025 15:57:21 +0530 Subject: [PATCH 13/16] net: ti: icssg-prueth: Add lock to stats Currently the API emac_update_hardware_stats() reads different ICSSG stats without any lock protection. This API gets called by .ndo_get_stats64() which is only under RCU protection and nothing else. Add lock to this API so that the reading of statistics happens during lock. Fixes: c1e10d5dc7a1 ("net: ti: icssg-prueth: Add ICSSG Stats") Signed-off-by: MD Danish Anwar Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250314102721.1394366-1-danishanwar@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 1 + drivers/net/ethernet/ti/icssg/icssg_prueth.h | 2 ++ drivers/net/ethernet/ti/icssg/icssg_stats.c | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 00ed97860547..9a75733e3f8f 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1679,6 +1679,7 @@ static int prueth_probe(struct platform_device *pdev) } spin_lock_init(&prueth->vtbl_lock); + spin_lock_init(&prueth->stats_lock); /* setup netdev interfaces */ if (eth0_node) { ret = prueth_netdev_init(prueth, eth0_node); diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index 329b46e9ee53..f41786b05741 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -305,6 +305,8 @@ struct prueth { int default_vlan; /** @vtbl_lock: Lock for vtbl in shared memory */ spinlock_t vtbl_lock; + /** @stats_lock: Lock for reading icssg stats */ + spinlock_t stats_lock; }; struct emac_tx_ts_response { diff --git a/drivers/net/ethernet/ti/icssg/icssg_stats.c b/drivers/net/ethernet/ti/icssg/icssg_stats.c index 8800bd3a8d07..6f0edae38ea2 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_stats.c +++ b/drivers/net/ethernet/ti/icssg/icssg_stats.c @@ -26,6 +26,8 @@ void emac_update_hardware_stats(struct prueth_emac *emac) u32 val, reg; int i; + spin_lock(&prueth->stats_lock); + for (i = 0; i < ARRAY_SIZE(icssg_all_miig_stats); i++) { regmap_read(prueth->miig_rt, base + icssg_all_miig_stats[i].offset, @@ -51,6 +53,8 @@ void emac_update_hardware_stats(struct prueth_emac *emac) emac->pa_stats[i] += val; } } + + spin_unlock(&prueth->stats_lock); } void icssg_stats_work_handler(struct work_struct *work) -- 2.50.1 From 986ffb3a57c5650fb8bf6d59a8f0f07046abfeb6 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Fri, 14 Mar 2025 13:00:46 +0100 Subject: [PATCH 14/16] net: lwtunnel: fix recursion loops This patch acts as a parachute, catch all solution, by detecting recursion loops in lwtunnel users and taking care of them (e.g., a loop between routes, a loop within the same route, etc). In general, such loops are the consequence of pathological configurations. Each lwtunnel user is still free to catch such loops early and do whatever they want with them. It will be the case in a separate patch for, e.g., seg6 and seg6_local, in order to provide drop reasons and update statistics. Another example of a lwtunnel user taking care of loops is ioam6, which has valid use cases that include loops (e.g., inline mode), and which is addressed by the next patch in this series. Overall, this patch acts as a last resort to catch loops and drop packets, since we don't want to leak something unintentionally because of a pathological configuration in lwtunnels. The solution in this patch reuses dev_xmit_recursion(), dev_xmit_recursion_inc(), and dev_xmit_recursion_dec(), which seems fine considering the context. Closes: https://lore.kernel.org/netdev/2bc9e2079e864a9290561894d2a602d6@akamai.com/ Closes: https://lore.kernel.org/netdev/Z7NKYMY7fJT5cYWu@shredder/ Fixes: ffce41962ef6 ("lwtunnel: support dst output redirect function") Fixes: 2536862311d2 ("lwt: Add support to redirect dst.input") Fixes: 14972cbd34ff ("net: lwtunnel: Handle fragmentation") Signed-off-by: Justin Iurman Link: https://patch.msgid.link/20250314120048.12569-2-justin.iurman@uliege.be Signed-off-by: Paolo Abeni --- net/core/lwtunnel.c | 65 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 12 deletions(-) diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 711cd3b4347a..4417a18b3e95 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -23,6 +23,8 @@ #include #include +#include "dev.h" + DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled); @@ -325,13 +327,23 @@ EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap); int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); const struct lwtunnel_encap_ops *ops; struct lwtunnel_state *lwtstate; - int ret = -EINVAL; + struct dst_entry *dst; + int ret; + + if (dev_xmit_recursion()) { + net_crit_ratelimited("%s(): recursion limit reached on datapath\n", + __func__); + ret = -ENETDOWN; + goto drop; + } - if (!dst) + dst = skb_dst(skb); + if (!dst) { + ret = -EINVAL; goto drop; + } lwtstate = dst->lwtstate; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || @@ -341,8 +353,11 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); - if (likely(ops && ops->output)) + if (likely(ops && ops->output)) { + dev_xmit_recursion_inc(); ret = ops->output(net, sk, skb); + dev_xmit_recursion_dec(); + } rcu_read_unlock(); if (ret == -EOPNOTSUPP) @@ -359,13 +374,23 @@ EXPORT_SYMBOL_GPL(lwtunnel_output); int lwtunnel_xmit(struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); const struct lwtunnel_encap_ops *ops; struct lwtunnel_state *lwtstate; - int ret = -EINVAL; + struct dst_entry *dst; + int ret; + + if (dev_xmit_recursion()) { + net_crit_ratelimited("%s(): recursion limit reached on datapath\n", + __func__); + ret = -ENETDOWN; + goto drop; + } - if (!dst) + dst = skb_dst(skb); + if (!dst) { + ret = -EINVAL; goto drop; + } lwtstate = dst->lwtstate; @@ -376,8 +401,11 @@ int lwtunnel_xmit(struct sk_buff *skb) ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); - if (likely(ops && ops->xmit)) + if (likely(ops && ops->xmit)) { + dev_xmit_recursion_inc(); ret = ops->xmit(skb); + dev_xmit_recursion_dec(); + } rcu_read_unlock(); if (ret == -EOPNOTSUPP) @@ -394,13 +422,23 @@ EXPORT_SYMBOL_GPL(lwtunnel_xmit); int lwtunnel_input(struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); const struct lwtunnel_encap_ops *ops; struct lwtunnel_state *lwtstate; - int ret = -EINVAL; + struct dst_entry *dst; + int ret; - if (!dst) + if (dev_xmit_recursion()) { + net_crit_ratelimited("%s(): recursion limit reached on datapath\n", + __func__); + ret = -ENETDOWN; goto drop; + } + + dst = skb_dst(skb); + if (!dst) { + ret = -EINVAL; + goto drop; + } lwtstate = dst->lwtstate; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || @@ -410,8 +448,11 @@ int lwtunnel_input(struct sk_buff *skb) ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); - if (likely(ops && ops->input)) + if (likely(ops && ops->input)) { + dev_xmit_recursion_inc(); ret = ops->input(skb); + dev_xmit_recursion_dec(); + } rcu_read_unlock(); if (ret == -EOPNOTSUPP) -- 2.50.1 From 3e7a60b368eadf6c30a4a79dea1eb8f88b6d620d Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Fri, 14 Mar 2025 13:00:47 +0100 Subject: [PATCH 15/16] net: ipv6: ioam6: fix lwtunnel_output() loop Fix the lwtunnel_output() reentry loop in ioam6_iptunnel when the destination is the same after transformation. Note that a check on the destination address was already performed, but it was not enough. This is the example of a lwtunnel user taking care of loops without relying only on the last resort detection offered by lwtunnel. Fixes: 8cb3bf8bff3c ("ipv6: ioam: Add support for the ip6ip6 encapsulation") Signed-off-by: Justin Iurman Link: https://patch.msgid.link/20250314120048.12569-3-justin.iurman@uliege.be Signed-off-by: Paolo Abeni --- net/ipv6/ioam6_iptunnel.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 2c383c12a431..09065187378e 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -337,7 +337,6 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL; - struct in6_addr orig_daddr; struct ioam6_lwt *ilwt; int err = -EINVAL; u32 pkt_cnt; @@ -352,8 +351,6 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (pkt_cnt % ilwt->freq.n >= ilwt->freq.k) goto out; - orig_daddr = ipv6_hdr(skb)->daddr; - local_bh_disable(); cache_dst = dst_cache_get(&ilwt->cache); local_bh_enable(); @@ -422,7 +419,10 @@ do_encap: goto drop; } - if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) { + /* avoid lwtunnel_output() reentry loop when destination is the same + * after transformation (e.g., with the inline mode) + */ + if (dst->lwtstate != cache_dst->lwtstate) { skb_dst_drop(skb); skb_dst_set(skb, cache_dst); return dst_output(net, sk, skb); -- 2.50.1 From 3ed61b8938c66680e13a1d1929afb9b145c26a86 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Fri, 14 Mar 2025 13:00:48 +0100 Subject: [PATCH 16/16] selftests: net: test for lwtunnel dst ref loops As recently specified by commit 0ea09cbf8350 ("docs: netdev: add a note on selftest posting") in net-next, the selftest is therefore shipped in this series. However, this selftest does not really test this series. It needs this series to avoid crashing the kernel. What it really tests, thanks to kmemleak, is what was fixed by the following commits: - commit c71a192976de ("net: ipv6: fix dst refleaks in rpl, seg6 and ioam6 lwtunnels") - commit 92191dd10730 ("net: ipv6: fix dst ref loops in rpl, seg6 and ioam6 lwtunnels") - commit c64a0727f9b1 ("net: ipv6: fix dst ref loop on input in seg6 lwt") - commit 13e55fbaec17 ("net: ipv6: fix dst ref loop on input in rpl lwt") - commit 0e7633d7b95b ("net: ipv6: fix dst ref loop in ila lwtunnel") - commit 5da15a9c11c1 ("net: ipv6: fix missing dst ref drop in ila lwtunnel") Signed-off-by: Justin Iurman Link: https://patch.msgid.link/20250314120048.12569-4-justin.iurman@uliege.be Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/config | 2 + .../selftests/net/lwt_dst_cache_ref_loop.sh | 246 ++++++++++++++++++ 3 files changed, 249 insertions(+) create mode 100755 tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 5916f3b81c39..843ab747645d 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -101,6 +101,7 @@ TEST_PROGS += vlan_bridge_binding.sh TEST_PROGS += bpf_offload.py TEST_PROGS += ipv6_route_update_soft_lockup.sh TEST_PROGS += busy_poll_test.sh +TEST_PROGS += lwt_dst_cache_ref_loop.sh # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 5b9baf708950..61e5116987f3 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -107,3 +107,5 @@ CONFIG_XFRM_INTERFACE=m CONFIG_XFRM_USER=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IPV6_ILA=m +CONFIG_IPV6_RPL_LWTUNNEL=y diff --git a/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh new file mode 100755 index 000000000000..881eb399798f --- /dev/null +++ b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh @@ -0,0 +1,246 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Author: Justin Iurman +# +# WARNING +# ------- +# This is just a dummy script that triggers encap cases with possible dst cache +# reference loops in affected lwt users (see list below). Some cases are +# pathological configurations for simplicity, others are valid. Overall, we +# don't want this issue to happen, no matter what. In order to catch any +# reference loops, kmemleak MUST be used. The results alone are always blindly +# successful, don't rely on them. Note that the following tests may crash the +# kernel if the fix to prevent lwtunnel_{input|output|xmit}() reentry loops is +# not present. +# +# Affected lwt users so far (please update accordingly if needed): +# - ila_lwt (output only) +# - ioam6_iptunnel (output only) +# - rpl_iptunnel (both input and output) +# - seg6_iptunnel (both input and output) + +source lib.sh + +check_compatibility() +{ + setup_ns tmp_node &>/dev/null + if [ $? != 0 ]; then + echo "SKIP: Cannot create netns." + exit $ksft_skip + fi + + ip link add name veth0 netns $tmp_node type veth \ + peer name veth1 netns $tmp_node &>/dev/null + local ret=$? + + ip -netns $tmp_node link set veth0 up &>/dev/null + ret=$((ret + $?)) + + ip -netns $tmp_node link set veth1 up &>/dev/null + ret=$((ret + $?)) + + if [ $ret != 0 ]; then + echo "SKIP: Cannot configure links." + cleanup_ns $tmp_node + exit $ksft_skip + fi + + lsmod 2>/dev/null | grep -q "ila" + ila_lsmod=$? + [ $ila_lsmod != 0 ] && modprobe ila &>/dev/null + + ip -netns $tmp_node route add 2001:db8:1::/64 \ + encap ila 1:2:3:4 csum-mode no-action ident-type luid \ + hook-type output \ + dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:2::/64 \ + encap ioam6 trace prealloc type 0x800000 ns 0 size 4 \ + dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:3::/64 \ + encap rpl segs 2001:db8:3::1 dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:4::/64 \ + encap seg6 mode inline segs 2001:db8:4::1 dev veth0 &>/dev/null + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ila" + skip_ila=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ioam6" + skip_ioam6=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap rpl" + skip_rpl=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap seg6" + skip_seg6=$? + + cleanup_ns $tmp_node +} + +setup() +{ + setup_ns alpha beta gamma &>/dev/null + + ip link add name veth-alpha netns $alpha type veth \ + peer name veth-betaL netns $beta &>/dev/null + + ip link add name veth-betaR netns $beta type veth \ + peer name veth-gamma netns $gamma &>/dev/null + + ip -netns $alpha link set veth-alpha name veth0 &>/dev/null + ip -netns $beta link set veth-betaL name veth0 &>/dev/null + ip -netns $beta link set veth-betaR name veth1 &>/dev/null + ip -netns $gamma link set veth-gamma name veth0 &>/dev/null + + ip -netns $alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null + ip -netns $alpha link set veth0 up &>/dev/null + ip -netns $alpha link set lo up &>/dev/null + ip -netns $alpha route add 2001:db8:2::/64 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + ip -netns $beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null + ip -netns $beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null + ip -netns $beta link set veth0 up &>/dev/null + ip -netns $beta link set veth1 up &>/dev/null + ip -netns $beta link set lo up &>/dev/null + ip -netns $beta route del 2001:db8:2::/64 + ip -netns $beta route add 2001:db8:2::/64 dev veth1 + ip netns exec $beta \ + sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null + + ip -netns $gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null + ip -netns $gamma link set veth0 up &>/dev/null + ip -netns $gamma link set lo up &>/dev/null + ip -netns $gamma route add 2001:db8:1::/64 \ + via 2001:db8:2::1 dev veth0 &>/dev/null + + sleep 1 + + ip netns exec $alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null + if [ $? != 0 ]; then + echo "SKIP: Setup failed." + exit $ksft_skip + fi + + sleep 1 +} + +cleanup() +{ + cleanup_ns $alpha $beta $gamma + [ $ila_lsmod != 0 ] && modprobe -r ila &>/dev/null +} + +run_ila() +{ + if [ $skip_ila != 0 ]; then + echo "SKIP: ila (output)" + return + fi + + ip -netns $beta route del 2001:db8:2::/64 + ip -netns $beta route add 2001:db8:2:0:0:0:0:2/128 \ + encap ila 2001:db8:2:0 csum-mode no-action ident-type luid \ + hook-type output \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: ila (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + ip -netns $beta route del 2001:db8:2:0:0:0:0:2/128 + ip -netns $beta route add 2001:db8:2::/64 dev veth1 + sleep 1 +} + +run_ioam6() +{ + if [ $skip_ioam6 != 0 ]; then + echo "SKIP: ioam6 (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap ioam6 trace prealloc type 0x800000 ns 1 size 4 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: ioam6 (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run_rpl() +{ + if [ $skip_rpl != 0 ]; then + echo "SKIP: rpl (input)" + echo "SKIP: rpl (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap rpl segs 2001:db8:2::2 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: rpl (input)" + ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + echo "TEST: rpl (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run_seg6() +{ + if [ $skip_seg6 != 0 ]; then + echo "SKIP: seg6 (input)" + echo "SKIP: seg6 (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap seg6 mode inline segs 2001:db8:2::2 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: seg6 (input)" + ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + echo "TEST: seg6 (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run() +{ + run_ila + run_ioam6 + run_rpl + run_seg6 +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges." + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool." + exit $ksft_skip +fi + +check_compatibility + +trap cleanup EXIT + +setup +run + +exit $ksft_pass -- 2.50.1