From 890bde75a2360d80712f394a31982fbd93fa0891 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 21 Oct 2024 10:49:34 -0700 Subject: [PATCH 01/16] net: systemport: Remove unused txchk accessors Vladimir reported the following warning with clang-16 and W=1: warning: unused function 'txchk_readl' [-Wunused-function] BCM_SYSPORT_IO_MACRO(txchk, SYS_PORT_TXCHK_OFFSET); note: expanded from macro 'BCM_SYSPORT_IO_MACRO' warning: unused function 'txchk_writel' [-Wunused-function] note: expanded from macro 'BCM_SYSPORT_IO_MACRO' warning: unused function 'tbuf_readl' [-Wunused-function] BCM_SYSPORT_IO_MACRO(tbuf, SYS_PORT_TBUF_OFFSET); note: expanded from macro 'BCM_SYSPORT_IO_MACRO' warning: unused function 'tbuf_writel' [-Wunused-function] note: expanded from macro 'BCM_SYSPORT_IO_MACRO' The TXCHK and RBUF blocks are not being accessed, remove the IO macros used to access those blocks. No functional impact. Reported-by: Vladimir Oltean Signed-off-by: Florian Fainelli Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241021174935.57658-2-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bcmsysport.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 0b7088ca4822..9e42b5db721e 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -46,9 +46,7 @@ BCM_SYSPORT_IO_MACRO(umac, SYS_PORT_UMAC_OFFSET); BCM_SYSPORT_IO_MACRO(gib, SYS_PORT_GIB_OFFSET); BCM_SYSPORT_IO_MACRO(tdma, SYS_PORT_TDMA_OFFSET); BCM_SYSPORT_IO_MACRO(rxchk, SYS_PORT_RXCHK_OFFSET); -BCM_SYSPORT_IO_MACRO(txchk, SYS_PORT_TXCHK_OFFSET); BCM_SYSPORT_IO_MACRO(rbuf, SYS_PORT_RBUF_OFFSET); -BCM_SYSPORT_IO_MACRO(tbuf, SYS_PORT_TBUF_OFFSET); BCM_SYSPORT_IO_MACRO(topctrl, SYS_PORT_TOPCTRL_OFFSET); /* On SYSTEMPORT Lite, any register after RDMA_STATUS has the exact -- 2.51.0 From e69fbd287d5a8b1038e50612545d7e3fc6db2b8a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 21 Oct 2024 10:49:35 -0700 Subject: [PATCH 02/16] net: systemport: Move IO macros to header file Move the BCM_SYSPORT_IO_MACRO() definition and its use to bcmsysport.h where it is more appropriate and where static inline helpers are acceptable. While at it, make sure that the macro 'offset' argument does not trigger a checkpatch warning due to possible argument re-use. Suggested-by: Vladimir Oltean Signed-off-by: Florian Fainelli Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241021174935.57658-3-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bcmsysport.c | 22 --------------------- drivers/net/ethernet/broadcom/bcmsysport.h | 23 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 9e42b5db721e..caff6e87a488 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -27,28 +27,6 @@ #include "bcmsysport.h" -/* I/O accessors register helpers */ -#define BCM_SYSPORT_IO_MACRO(name, offset) \ -static inline u32 name##_readl(struct bcm_sysport_priv *priv, u32 off) \ -{ \ - u32 reg = readl_relaxed(priv->base + offset + off); \ - return reg; \ -} \ -static inline void name##_writel(struct bcm_sysport_priv *priv, \ - u32 val, u32 off) \ -{ \ - writel_relaxed(val, priv->base + offset + off); \ -} \ - -BCM_SYSPORT_IO_MACRO(intrl2_0, SYS_PORT_INTRL2_0_OFFSET); -BCM_SYSPORT_IO_MACRO(intrl2_1, SYS_PORT_INTRL2_1_OFFSET); -BCM_SYSPORT_IO_MACRO(umac, SYS_PORT_UMAC_OFFSET); -BCM_SYSPORT_IO_MACRO(gib, SYS_PORT_GIB_OFFSET); -BCM_SYSPORT_IO_MACRO(tdma, SYS_PORT_TDMA_OFFSET); -BCM_SYSPORT_IO_MACRO(rxchk, SYS_PORT_RXCHK_OFFSET); -BCM_SYSPORT_IO_MACRO(rbuf, SYS_PORT_RBUF_OFFSET); -BCM_SYSPORT_IO_MACRO(topctrl, SYS_PORT_TOPCTRL_OFFSET); - /* On SYSTEMPORT Lite, any register after RDMA_STATUS has the exact * same layout, except it has been moved by 4 bytes up, *sigh* */ diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 335cf6631db5..a34296f989f1 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -773,4 +773,27 @@ struct bcm_sysport_priv { struct bcm_sysport_tx_ring *ring_map[DSA_MAX_PORTS * 8]; }; + +/* I/O accessors register helpers */ +#define BCM_SYSPORT_IO_MACRO(name, offset) \ +static inline u32 name##_readl(struct bcm_sysport_priv *priv, u32 off) \ +{ \ + u32 reg = readl_relaxed(priv->base + (offset) + off); \ + return reg; \ +} \ +static inline void name##_writel(struct bcm_sysport_priv *priv, \ + u32 val, u32 off) \ +{ \ + writel_relaxed(val, priv->base + (offset) + off); \ +} \ + +BCM_SYSPORT_IO_MACRO(intrl2_0, SYS_PORT_INTRL2_0_OFFSET); +BCM_SYSPORT_IO_MACRO(intrl2_1, SYS_PORT_INTRL2_1_OFFSET); +BCM_SYSPORT_IO_MACRO(umac, SYS_PORT_UMAC_OFFSET); +BCM_SYSPORT_IO_MACRO(gib, SYS_PORT_GIB_OFFSET); +BCM_SYSPORT_IO_MACRO(tdma, SYS_PORT_TDMA_OFFSET); +BCM_SYSPORT_IO_MACRO(rxchk, SYS_PORT_RXCHK_OFFSET); +BCM_SYSPORT_IO_MACRO(rbuf, SYS_PORT_RBUF_OFFSET); +BCM_SYSPORT_IO_MACRO(topctrl, SYS_PORT_TOPCTRL_OFFSET); + #endif /* __BCM_SYSPORT_H */ -- 2.51.0 From a42f3076648e0b507de9039f8085edcc10b35fb7 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 21 Oct 2024 17:14:03 +0200 Subject: [PATCH 03/16] mptcp: pm: send ACK on non-stale subflows If the subflow is considered as "staled", it is better to avoid it to send an ACK carrying an ADD_ADDR or RM_ADDR. Another subflow, if any, will then be selected. Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241021-net-next-mptcp-misc-6-13-v1-1-1ef02746504a@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index db586a5b3866..618289aac0ab 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -781,7 +781,7 @@ bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) { - struct mptcp_subflow_context *subflow; + struct mptcp_subflow_context *subflow, *alt = NULL; msk_owned_by_me(msk); lockdep_assert_held(&msk->pm.lock); @@ -792,10 +792,18 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) mptcp_for_each_subflow(msk, subflow) { if (__mptcp_subflow_active(subflow)) { - mptcp_pm_send_ack(msk, subflow, false, false); - break; + if (!subflow->stale) { + mptcp_pm_send_ack(msk, subflow, false, false); + return; + } + + if (!alt) + alt = subflow; } } + + if (alt) + mptcp_pm_send_ack(msk, alt, false, false); } int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, -- 2.51.0 From 581c8cbfa934aaa555daa4e843242fcecc160f05 Mon Sep 17 00:00:00 2001 From: Gang Yan Date: Mon, 21 Oct 2024 17:14:04 +0200 Subject: [PATCH 04/16] mptcp: annotate data-races around subflow->fully_established We introduce the same handling for potential data races with the 'fully_established' flag in subflow as previously done for msk->fully_established. Additionally, we make a crucial change: convert the subflow's 'fully_established' from 'bit_field' to 'bool' type. This is necessary because methods for avoiding data races don't work well with 'bit_field'. Specifically, the 'READ_ONCE' needs to know the size of the variable being accessed, which is not supported in 'bit_field'. Also, 'test_bit' expect the address of 'bit_field'. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/516 Signed-off-by: Gang Yan Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241021-net-next-mptcp-misc-6-13-v1-2-1ef02746504a@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/diag.c | 2 +- net/mptcp/options.c | 4 ++-- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 6 +++--- net/mptcp/subflow.c | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index 2d3efb405437..02205f7994d7 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -47,7 +47,7 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) flags |= MPTCP_SUBFLOW_FLAG_BKUP_REM; if (sf->request_bkup) flags |= MPTCP_SUBFLOW_FLAG_BKUP_LOC; - if (sf->fully_established) + if (READ_ONCE(sf->fully_established)) flags |= MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED; if (sf->conn_finished) flags |= MPTCP_SUBFLOW_FLAG_CONNECTED; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 370c3836b771..1603b3702e22 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -461,7 +461,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, return false; /* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */ - if (subflow->fully_established || snd_data_fin_enable || + if (READ_ONCE(subflow->fully_established) || snd_data_fin_enable || subflow->snd_isn != TCP_SKB_CB(skb)->seq || sk->sk_state != TCP_ESTABLISHED) return false; @@ -930,7 +930,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, /* here we can process OoO, in-window pkts, only in-sequence 4th ack * will make the subflow fully established */ - if (likely(subflow->fully_established)) { + if (likely(READ_ONCE(subflow->fully_established))) { /* on passive sockets, check for 3rd ack retransmission * note that msk is always set by subflow_syn_recv_sock() * for mp_join subflows diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1f5c63eb21f0..a6c9661a4c45 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3511,7 +3511,7 @@ static void schedule_3rdack_retransmission(struct sock *ssk) struct tcp_sock *tp = tcp_sk(ssk); unsigned long timeout; - if (mptcp_subflow_ctx(ssk)->fully_established) + if (READ_ONCE(mptcp_subflow_ctx(ssk)->fully_established)) return; /* reschedule with a timeout above RTT, as we must look only for drop */ diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 568a72702b08..a93e661ef5c4 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -513,7 +513,6 @@ struct mptcp_subflow_context { request_bkup : 1, mp_capable : 1, /* remote is MPTCP capable */ mp_join : 1, /* remote is JOINing */ - fully_established : 1, /* path validated */ pm_notified : 1, /* PM hook called for established status */ conn_finished : 1, map_valid : 1, @@ -532,10 +531,11 @@ struct mptcp_subflow_context { is_mptfo : 1, /* subflow is doing TFO */ close_event_done : 1, /* has done the post-closed part */ mpc_drop : 1, /* the MPC option has been dropped in a rtx */ - __unused : 8; + __unused : 9; bool data_avail; bool scheduled; bool pm_listener; /* a listener managed by the kernel PM? */ + bool fully_established; /* path validated */ u32 remote_nonce; u64 thmac; u32 local_nonce; @@ -780,7 +780,7 @@ static inline bool __tcp_can_send(const struct sock *ssk) static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow) { /* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */ - if (subflow->request_join && !subflow->fully_established) + if (subflow->request_join && !READ_ONCE(subflow->fully_established)) return false; return __tcp_can_send(mptcp_subflow_tcp_sock(subflow)); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 6170f2fff71e..860903e06422 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -800,7 +800,7 @@ void __mptcp_subflow_fully_established(struct mptcp_sock *msk, const struct mptcp_options_received *mp_opt) { subflow_set_remote_key(msk, subflow, mp_opt); - subflow->fully_established = 1; + WRITE_ONCE(subflow->fully_established, true); WRITE_ONCE(msk->fully_established, true); if (subflow->is_mptfo) @@ -2062,7 +2062,7 @@ static void subflow_ulp_clone(const struct request_sock *req, } else if (subflow_req->mp_join) { new_ctx->ssn_offset = subflow_req->ssn_offset; new_ctx->mp_join = 1; - new_ctx->fully_established = 1; + WRITE_ONCE(new_ctx->fully_established, true); new_ctx->remote_key_valid = 1; new_ctx->backup = subflow_req->backup; new_ctx->request_bkup = subflow_req->request_bkup; -- 2.51.0 From 5add80bfdc46f9ad6857c80e3af109177e59a280 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 21 Oct 2024 17:14:05 +0200 Subject: [PATCH 05/16] mptcp: implement mptcp_pm_connection_closed The MPTCP path manager event handler mptcp_pm_connection_closed interface has been added in the commit 1b1c7a0ef7f3 ("mptcp: Add path manager interface") but it was an empty function from then on. With such name, it sounds good to invoke mptcp_event with the MPTCP_EVENT_CLOSED event type from it. It also removes a bit of duplicated code. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241021-net-next-mptcp-misc-6-13-v1-3-1ef02746504a@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 3 +++ net/mptcp/protocol.c | 6 ++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 620264c75dc2..16c336c51940 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -154,6 +154,9 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk) void mptcp_pm_connection_closed(struct mptcp_sock *msk) { pr_debug("msk=%p\n", msk); + + if (msk->token) + mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); } void mptcp_pm_subflow_established(struct mptcp_sock *msk) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a6c9661a4c45..e978e05ec8d1 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3139,8 +3139,7 @@ cleanup: sock_hold(sk); pr_debug("msk=%p state=%d\n", sk, sk->sk_state); - if (msk->token) - mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); + mptcp_pm_connection_closed(msk); if (sk->sk_state == TCP_CLOSE) { __mptcp_destroy_sock(sk); @@ -3206,8 +3205,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) mptcp_stop_rtx_timer(sk); mptcp_stop_tout_timer(sk); - if (msk->token) - mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); + mptcp_pm_connection_closed(msk); /* msk->subflow is still intact, the following will not free the first * subflow -- 2.51.0 From 46a3282b87b1f9a88534eba59ecf852b2a21289c Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 21 Oct 2024 17:14:06 +0200 Subject: [PATCH 06/16] mptcp: use "middlebox interference" RST when no DSS RFC8684 suggests use of "Middlebox interference (code 0x06)" in case of fully established subflow that carries data at TCP level with no DSS sub-option. This is generally the case when mpext is NULL or mpext->use_map is 0: use a dedicated value of 'mapping_status' and use it before closing the socket in subflow_check_data_avail(). Link: https://github.com/multipath-tcp/mptcp_net-next/issues/518 Signed-off-by: Davide Caratti Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241021-net-next-mptcp-misc-6-13-v1-4-1ef02746504a@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 860903e06422..07352b15f145 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -971,7 +971,8 @@ enum mapping_status { MAPPING_EMPTY, MAPPING_DATA_FIN, MAPPING_DUMMY, - MAPPING_BAD_CSUM + MAPPING_BAD_CSUM, + MAPPING_NODSS }; static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) @@ -1128,8 +1129,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk, return MAPPING_EMPTY; } + /* If the required DSS has likely been dropped by a middlebox */ if (!subflow->map_valid) - return MAPPING_INVALID; + return MAPPING_NODSS; goto validate_seq; } @@ -1343,7 +1345,7 @@ static bool subflow_check_data_avail(struct sock *ssk) status = get_mapping_status(ssk, msk); trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue)); if (unlikely(status == MAPPING_INVALID || status == MAPPING_DUMMY || - status == MAPPING_BAD_CSUM)) + status == MAPPING_BAD_CSUM || status == MAPPING_NODSS)) goto fallback; if (status != MAPPING_OK) @@ -1396,7 +1398,9 @@ fallback: * subflow_error_report() will introduce the appropriate barriers */ subflow->reset_transient = 0; - subflow->reset_reason = MPTCP_RST_EMPTCP; + subflow->reset_reason = status == MAPPING_NODSS ? + MPTCP_RST_EMIDDLEBOX : + MPTCP_RST_EMPTCP; reset: WRITE_ONCE(ssk->sk_err, EBADMSG); -- 2.51.0 From 22ccb684c1cae37411450e6e86a379cd3c29cb8f Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 21 Oct 2024 03:12:10 +0000 Subject: [PATCH 07/16] bonding: return detailed error when loading native XDP fails MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Bonding only supports native XDP for specific modes, which can lead to confusion for users regarding why XDP loads successfully at times and fails at others. This patch enhances error handling by returning detailed error messages, providing users with clearer insights into the specific reasons for the failure when loading native XDP. Reviewed-by: Nikolay Aleksandrov Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20241021031211.814-2-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3928287f5865..5812e8eaccf1 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5676,8 +5676,11 @@ static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog, ASSERT_RTNL(); - if (!bond_xdp_check(bond)) + if (!bond_xdp_check(bond)) { + BOND_NL_ERR(dev, extack, + "No native XDP support for the current bonding mode"); return -EOPNOTSUPP; + } old_prog = bond->xdp_prog; bond->xdp_prog = prog; -- 2.51.0 From 9f59eccd9dd5a4c6a974e02e70b9eed0d3b14245 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 21 Oct 2024 03:12:11 +0000 Subject: [PATCH 08/16] Documentation: bonding: add XDP support explanation MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add document about which modes have native XDP support. Reviewed-by: Nikolay Aleksandrov Signed-off-by: Hangbin Liu Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241021031211.814-3-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/networking/bonding.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index e774b48de9f5..7c8d22d68682 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -2916,6 +2916,17 @@ from the bond (``ifenslave -d bond0 eth0``). The bonding driver will then restore the MAC addresses that the slaves had before they were enslaved. +9. What bonding modes support native XDP? +------------------------------------------ + + * balance-rr (0) + * active-backup (1) + * balance-xor (2) + * 802.3ad (4) + +Note that the vlan+srcmac hash policy does not support native XDP. +For other bonding modes, the XDP program must be loaded with generic mode. + 16. Resources and Links ======================= -- 2.51.0 From 25872a079bbbe952eb660249cc9f40fa75623e68 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 23 Oct 2024 15:41:46 +0200 Subject: [PATCH 09/16] net/mlx5: unique names for per device caches Add the device name to the per device kmem_cache names to ensure their uniqueness. This fixes warnings like this: "kmem_cache of name 'mlx5_fs_fgs' already exists". Signed-off-by: Sebastian Ott Reviewed-by: Breno Leitao Reviewed-by: Tariq Toukan Link: https://patch.msgid.link/20241023134146.28448-1-sebott@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 8505d5e241e1..c2db0a1c132b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -3689,6 +3689,7 @@ void mlx5_fs_core_free(struct mlx5_core_dev *dev) int mlx5_fs_core_alloc(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering; + char name[80]; int err = 0; err = mlx5_init_fc_stats(dev); @@ -3713,10 +3714,12 @@ int mlx5_fs_core_alloc(struct mlx5_core_dev *dev) else steering->mode = MLX5_FLOW_STEERING_MODE_DMFS; - steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs", + snprintf(name, sizeof(name), "%s-mlx5_fs_fgs", dev_name(dev->device)); + steering->fgs_cache = kmem_cache_create(name, sizeof(struct mlx5_flow_group), 0, 0, NULL); - steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0, + snprintf(name, sizeof(name), "%s-mlx5_fs_ftes", dev_name(dev->device)); + steering->ftes_cache = kmem_cache_create(name, sizeof(struct fs_fte), 0, 0, NULL); if (!steering->ftes_cache || !steering->fgs_cache) { err = -ENOMEM; -- 2.51.0 From ba4e469e42fe1a771b5653d179eb12dc4be6b6a8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Oct 2024 13:48:19 +0000 Subject: [PATCH 10/16] vsock: do not leave dangling sk pointer in vsock_create() syzbot was able to trigger the following warning after recent core network cleanup. On error vsock_create() frees the allocated sk object, but sock_init_data() has already attached it to the provided sock object. We must clear sock->sk to avoid possible use-after-free later. WARNING: CPU: 0 PID: 5282 at net/socket.c:1581 __sock_create+0x897/0x950 net/socket.c:1581 Modules linked in: CPU: 0 UID: 0 PID: 5282 Comm: syz.2.43 Not tainted 6.12.0-rc2-syzkaller-00667-g53bac8330865 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 RIP: 0010:__sock_create+0x897/0x950 net/socket.c:1581 Code: 7f 06 01 65 48 8b 34 25 00 d8 03 00 48 81 c6 b0 08 00 00 48 c7 c7 60 0b 0d 8d e8 d4 9a 3c 02 e9 11 f8 ff ff e8 0a ab 0d f8 90 <0f> 0b 90 e9 82 fd ff ff 89 e9 80 e1 07 fe c1 38 c1 0f 8c c7 f8 ff RSP: 0018:ffffc9000394fda8 EFLAGS: 00010293 RAX: ffffffff89873c46 RBX: ffff888079f3c818 RCX: ffff8880314b9e00 RDX: 0000000000000000 RSI: 00000000ffffffed RDI: 0000000000000000 RBP: ffffffff8d3337f0 R08: ffffffff8987384e R09: ffffffff8989473a R10: dffffc0000000000 R11: fffffbfff203a276 R12: 00000000ffffffed R13: ffff888079f3c8c0 R14: ffffffff898736e7 R15: dffffc0000000000 FS: 00005555680ab500(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f22b11196d0 CR3: 00000000308c0000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: sock_create net/socket.c:1632 [inline] __sys_socket_create net/socket.c:1669 [inline] __sys_socket+0x150/0x3c0 net/socket.c:1716 __do_sys_socket net/socket.c:1730 [inline] __se_sys_socket net/socket.c:1728 [inline] __x64_sys_socket+0x7a/0x90 net/socket.c:1728 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f22b117dff9 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fff56aec0e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000029 RAX: ffffffffffffffda RBX: 00007f22b1335f80 RCX: 00007f22b117dff9 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000028 RBP: 00007f22b11f0296 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007f22b1335f80 R14: 00007f22b1335f80 R15: 00000000000012dd Fixes: 48156296a08c ("net: warn, if pf->create does not clear sock->sk on error") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Ignat Korchagin Reviewed-by: Kuniyuki Iwashima Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20241022134819.1085254-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/af_vsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 35681adedd9a..109b7a0bd071 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -2417,6 +2417,7 @@ static int vsock_create(struct net *net, struct socket *sock, if (sock->type == SOCK_DGRAM) { ret = vsock_assign_transport(vsk, NULL); if (ret < 0) { + sock->sk = NULL; sock_put(sk); return ret; } -- 2.51.0 From 63afe0c217dc21457dbccca1da61266c47886e61 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 22 Oct 2024 17:14:18 +0200 Subject: [PATCH 11/16] netlink: specs: Add missing phy-ntf command to ethtool spec ETHTOOL_MSG_PHY_NTF description is missing in the ethtool netlink spec. Add it to the spec. Signed-off-by: Kory Maincent Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20241022151418.875424-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index f6c5d8214c7e..93369f0eb816 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1956,3 +1956,7 @@ operations: - upstream-sfp-name - downstream-sfp-name dump: *phy-get-op + - + name: phy-ntf + doc: Notification for change in PHY devices. + notify: phy-get -- 2.51.0 From ab101c553bc1f76a839163d1dc0d1e715ad6bb4e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Oct 2024 15:00:59 +0000 Subject: [PATCH 12/16] neighbour: use kvzalloc()/kvfree() mm layer is providing convenient functions, we do not have to work around old limitations. Signed-off-by: Eric Dumazet Cc: Gilad Naaman Reviewed-by: Joe Damato Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241022150059.1345406-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 395ae1626eef..4b871cecd2ce 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -14,7 +14,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include #include #include #include @@ -538,14 +537,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) return NULL; - if (size <= PAGE_SIZE) { - buckets = kzalloc(size, GFP_ATOMIC); - } else { - buckets = (struct neighbour __rcu **) - __get_free_pages(GFP_ATOMIC | __GFP_ZERO, - get_order(size)); - kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); - } + buckets = kvzalloc(size, GFP_ATOMIC); if (!buckets) { kfree(ret); return NULL; @@ -562,15 +554,8 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct neigh_hash_table *nht = container_of(head, struct neigh_hash_table, rcu); - size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); - struct neighbour __rcu **buckets = nht->hash_buckets; - if (size <= PAGE_SIZE) { - kfree(buckets); - } else { - kmemleak_free(buckets); - free_pages((unsigned long)buckets, get_order(size)); - } + kvfree(nht->hash_buckets); kfree(nht); } -- 2.51.0 From 9cb7e40d388d6c0e4677809c6b2950bc67fd8830 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:28 -0700 Subject: [PATCH 13/16] rtnetlink: Make per-netns RTNL dereference helpers to macro. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When CONFIG_DEBUG_NET_SMALL_RTNL is off, rtnl_net_dereference() is the static inline wrapper of rtnl_dereference() returning a plain (void *) pointer to make sure net is always evaluated as requested in [0]. But, it makes sparse complain [1] when the pointer has __rcu annotation: net/ipv4/devinet.c:674:47: sparse: warning: incorrect type in argument 2 (different address spaces) net/ipv4/devinet.c:674:47: sparse: expected void *p net/ipv4/devinet.c:674:47: sparse: got struct in_ifaddr [noderef] __rcu * Also, if we evaluate net as (void *) in a macro, then the compiler in turn fails to build due to -Werror=unused-value. #define rtnl_net_dereference(net, p) \ ({ \ (void *)net; \ rtnl_dereference(p); \ }) net/ipv4/devinet.c: In function ‘inet_rtm_deladdr’: ./include/linux/rtnetlink.h:154:17: error: statement with no effect [-Werror=unused-value] 154 | (void *)net; \ net/ipv4/devinet.c:674:21: note: in expansion of macro ‘rtnl_net_dereference’ 674 | (ifa = rtnl_net_dereference(net, *ifap)) != NULL; | ^~~~~~~~~~~~~~~~~~~~ Let's go back to the original simplest macro. Note that checkpatch complains about this approach, but it's one-shot and less noisy than the other two. WARNING: Argument 'net' is not used in function-like macro #76: FILE: include/linux/rtnetlink.h:142: +#define rtnl_net_dereference(net, p) \ + rtnl_dereference(p) Fixes: 844e5e7e656d ("rtnetlink: Add assertion helpers for per-netns RTNL.") Link: https://lore.kernel.org/netdev/20241004132145.7fd208e9@kernel.org/ [0] Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202410200325.SaEJmyZS-lkp@intel.com/ [1] Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- include/linux/rtnetlink.h | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 8468a4ce8510..0e62918de63b 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -137,21 +137,12 @@ static inline void ASSERT_RTNL_NET(struct net *net) ASSERT_RTNL(); } -static inline void *rcu_dereference_rtnl_net(struct net *net, void *p) -{ - return rcu_dereference_rtnl(p); -} - -static inline void *rtnl_net_dereference(struct net *net, void *p) -{ - return rtnl_dereference(p); -} - -static inline void *rcu_replace_pointer_rtnl_net(struct net *net, - void *rp, void *p) -{ - return rcu_replace_pointer_rtnl(rp, p); -} +#define rcu_dereference_rtnl_net(net, p) \ + rcu_dereference_rtnl(p) +#define rtnl_net_dereference(net, p) \ + rtnl_dereference(p) +#define rcu_replace_pointer_rtnl_net(net, rp, p) \ + rcu_replace_pointer_rtnl(rp, p) #endif static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) -- 2.51.0 From 26d8db55eeacb7dc78672523f57825916d203de4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:29 -0700 Subject: [PATCH 14/16] rtnetlink: Define RTNL_FLAG_DOIT_PERNET for per-netns RTNL doit(). We will push RTNL down to each doit() as rtnl_net_lock(). We can use RTNL_FLAG_DOIT_UNLOCKED to call doit() without RTNL, but doit() will still hold RTNL. Let's define RTNL_FLAG_DOIT_PERNET as an alias of RTNL_FLAG_DOIT_UNLOCKED. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index e0d9a8eae6b6..b260c0cc9671 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -12,6 +12,7 @@ typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); enum rtnl_link_flags { RTNL_FLAG_DOIT_UNLOCKED = BIT(0), +#define RTNL_FLAG_DOIT_PERNET RTNL_FLAG_DOIT_UNLOCKED RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1), RTNL_FLAG_DUMP_UNLOCKED = BIT(2), RTNL_FLAG_DUMP_SPLIT_NLM_DONE = BIT(3), /* legacy behavior */ -- 2.51.0 From 2d34429d14f9d09b38a8bee6a972a07228378df6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:30 -0700 Subject: [PATCH 15/16] ipv4: Factorise RTM_NEWADDR validation to inet_validate_rtm(). rtm_to_ifaddr() validates some attributes, looks up a netdev, allocates struct in_ifaddr, and validates IFA_CACHEINFO. There is no reason to delay IFA_CACHEINFO validation. We will push RTNL down to inet_rtm_newaddr(), and then we want to complete rtnetlink validation before rtnl_net_lock(). Let's factorise the validation parts. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/ipv4/devinet.c | 79 ++++++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5f859d01cbbe..da5412fb34e7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -846,35 +846,54 @@ static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp); } -static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, - __u32 *pvalid_lft, __u32 *pprefered_lft, - struct netlink_ext_ack *extack) +static int inet_validate_rtm(struct nlmsghdr *nlh, struct nlattr **tb, + struct netlink_ext_ack *extack, + __u32 *valid_lft, __u32 *prefered_lft) { - struct nlattr *tb[IFA_MAX+1]; - struct in_ifaddr *ifa; - struct ifaddrmsg *ifm; - struct net_device *dev; - struct in_device *in_dev; + struct ifaddrmsg *ifm = nlmsg_data(nlh); int err; err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, extack); if (err < 0) - goto errout; - - ifm = nlmsg_data(nlh); - err = -EINVAL; + return err; if (ifm->ifa_prefixlen > 32) { NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length"); - goto errout; + return -EINVAL; } if (!tb[IFA_LOCAL]) { NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied"); - goto errout; + return -EINVAL; } + if (tb[IFA_CACHEINFO]) { + struct ifa_cacheinfo *ci; + + ci = nla_data(tb[IFA_CACHEINFO]); + if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { + NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid"); + return -EINVAL; + } + + *valid_lft = ci->ifa_valid; + *prefered_lft = ci->ifa_prefered; + } + + return 0; +} + +static struct in_ifaddr *inet_rtm_to_ifa(struct net *net, struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct ifaddrmsg *ifm = nlmsg_data(nlh); + struct in_device *in_dev; + struct net_device *dev; + struct in_ifaddr *ifa; + int err; + dev = __dev_get_by_index(net, ifm->ifa_index); err = -ENODEV; if (!dev) { @@ -923,23 +942,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, if (tb[IFA_PROTO]) ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]); - if (tb[IFA_CACHEINFO]) { - struct ifa_cacheinfo *ci; - - ci = nla_data(tb[IFA_CACHEINFO]); - if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { - NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid"); - err = -EINVAL; - goto errout_free; - } - *pvalid_lft = ci->ifa_valid; - *pprefered_lft = ci->ifa_prefered; - } - return ifa; -errout_free: - inet_free_ifa(ifa); errout: return ERR_PTR(err); } @@ -964,15 +968,21 @@ static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + __u32 prefered_lft = INFINITY_LIFE_TIME; + __u32 valid_lft = INFINITY_LIFE_TIME; struct net *net = sock_net(skb->sk); - struct in_ifaddr *ifa; struct in_ifaddr *ifa_existing; - __u32 valid_lft = INFINITY_LIFE_TIME; - __u32 prefered_lft = INFINITY_LIFE_TIME; + struct nlattr *tb[IFA_MAX + 1]; + struct in_ifaddr *ifa; + int ret; ASSERT_RTNL(); - ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack); + ret = inet_validate_rtm(nlh, tb, extack, &valid_lft, &prefered_lft); + if (ret < 0) + return ret; + + ifa = inet_rtm_to_ifa(net, nlh, tb, extack); if (IS_ERR(ifa)) return PTR_ERR(ifa); @@ -983,8 +993,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, */ set_ifa_lifetime(ifa, valid_lft, prefered_lft); if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) { - int ret = ip_mc_autojoin_config(net, true, ifa); - + ret = ip_mc_autojoin_config(net, true, ifa); if (ret < 0) { NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed"); inet_free_ifa(ifa); -- 2.51.0 From abd0deff03d854cb34818e1e01490296d0314ea1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:31 -0700 Subject: [PATCH 16/16] ipv4: Don't allocate ifa for 0.0.0.0 in inet_rtm_newaddr(). When we pass 0.0.0.0 to __inet_insert_ifa(), it frees ifa and returns 0. We can do this check much earlier for RTM_NEWADDR even before allocating struct in_ifaddr. Let's move the validation to 1. inet_insert_ifa() for ioctl() 2. inet_rtm_newaddr() for RTM_NEWADDR Now, we can remove the same check in find_matching_ifa(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/ipv4/devinet.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index da5412fb34e7..8db84c70ebed 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -508,11 +508,6 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, ASSERT_RTNL(); - if (!ifa->ifa_local) { - inet_free_ifa(ifa); - return 0; - } - ifa->ifa_flags &= ~IFA_F_SECONDARY; last_primary = &in_dev->ifa_list; @@ -584,6 +579,11 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, static int inet_insert_ifa(struct in_ifaddr *ifa) { + if (!ifa->ifa_local) { + inet_free_ifa(ifa); + return 0; + } + return __inet_insert_ifa(ifa, NULL, 0, NULL); } @@ -953,15 +953,13 @@ static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1; - if (!ifa->ifa_local) - return NULL; - in_dev_for_each_ifa_rtnl(ifa1, in_dev) { if (ifa1->ifa_mask == ifa->ifa_mask && inet_ifa_match(ifa1->ifa_address, ifa) && ifa1->ifa_local == ifa->ifa_local) return ifa1; } + return NULL; } @@ -982,6 +980,9 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, if (ret < 0) return ret; + if (!nla_get_in_addr(tb[IFA_LOCAL])) + return 0; + ifa = inet_rtm_to_ifa(net, nlh, tb, extack); if (IS_ERR(ifa)) return PTR_ERR(ifa); -- 2.51.0