From 6538c8ca8ee182a8b8233e0e57b5bffd8318d60c Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 24 Feb 2025 00:36:11 +0100 Subject: [PATCH 01/16] net: ethernet: renesas: rcar_gen4_ptp: Remove bool conversion MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Remove the unnecessary bool conversion and simplify the code. Signed-off-by: Thorsten Blum Reviewed-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Link: https://patch.msgid.link/20250223233613.100518-2-thorsten.blum@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rcar_gen4_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/rcar_gen4_ptp.c b/drivers/net/ethernet/renesas/rcar_gen4_ptp.c index 72e7fcc56693..4c3e8cc5046f 100644 --- a/drivers/net/ethernet/renesas/rcar_gen4_ptp.c +++ b/drivers/net/ethernet/renesas/rcar_gen4_ptp.c @@ -29,8 +29,8 @@ static const struct rcar_gen4_ptp_reg_offset gen4_offs = { static int rcar_gen4_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) { struct rcar_gen4_ptp_private *ptp_priv = ptp_to_priv(ptp); - bool neg_adj = scaled_ppm < 0 ? true : false; s64 addend = ptp_priv->default_addend; + bool neg_adj = scaled_ppm < 0; s64 diff; if (neg_adj) -- 2.51.0 From 2e5af6b2ae85328051f2ecb1b7fe64cdd835c8e9 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Fri, 21 Feb 2025 14:57:17 +0800 Subject: [PATCH 02/16] net: txgbe: Add basic support for new AML devices There is a new 40/25/10 Gigabit Ethernet device. To support basic functions, PHYLINK is temporarily skipped as it is intended to implement these configurations in the firmware. And the associated link IRQ is also skipped. And Implement the new SW-FW interaction interface, which use 64 Byte message buffer. Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/20250221065718.197544-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/wangxun/libwx/wx_ethtool.c | 44 +++- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 213 +++++++++++++++--- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 25 +- drivers/net/ethernet/wangxun/libwx/wx_type.h | 29 ++- drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c | 6 + .../net/ethernet/wangxun/txgbe/txgbe_irq.c | 7 + .../net/ethernet/wangxun/txgbe/txgbe_main.c | 43 +++- .../net/ethernet/wangxun/txgbe/txgbe_phy.c | 6 + .../net/ethernet/wangxun/txgbe/txgbe_type.h | 14 ++ 9 files changed, 333 insertions(+), 54 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c index 28f982fbc64c..6d3b57233a39 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c @@ -219,6 +219,9 @@ int wx_nway_reset(struct net_device *netdev) { struct wx *wx = netdev_priv(netdev); + if (wx->mac.type == wx_mac_aml) + return -EOPNOTSUPP; + return phylink_ethtool_nway_reset(wx->phylink); } EXPORT_SYMBOL(wx_nway_reset); @@ -228,6 +231,9 @@ int wx_get_link_ksettings(struct net_device *netdev, { struct wx *wx = netdev_priv(netdev); + if (wx->mac.type == wx_mac_aml) + return -EOPNOTSUPP; + return phylink_ethtool_ksettings_get(wx->phylink, cmd); } EXPORT_SYMBOL(wx_get_link_ksettings); @@ -237,6 +243,9 @@ int wx_set_link_ksettings(struct net_device *netdev, { struct wx *wx = netdev_priv(netdev); + if (wx->mac.type == wx_mac_aml) + return -EOPNOTSUPP; + return phylink_ethtool_ksettings_set(wx->phylink, cmd); } EXPORT_SYMBOL(wx_set_link_ksettings); @@ -246,6 +255,9 @@ void wx_get_pauseparam(struct net_device *netdev, { struct wx *wx = netdev_priv(netdev); + if (wx->mac.type == wx_mac_aml) + return; + phylink_ethtool_get_pauseparam(wx->phylink, pause); } EXPORT_SYMBOL(wx_get_pauseparam); @@ -255,6 +267,9 @@ int wx_set_pauseparam(struct net_device *netdev, { struct wx *wx = netdev_priv(netdev); + if (wx->mac.type == wx_mac_aml) + return -EOPNOTSUPP; + return phylink_ethtool_set_pauseparam(wx->phylink, pause); } EXPORT_SYMBOL(wx_set_pauseparam); @@ -325,10 +340,17 @@ int wx_set_coalesce(struct net_device *netdev, if (ec->tx_max_coalesced_frames_irq) wx->tx_work_limit = ec->tx_max_coalesced_frames_irq; - if (wx->mac.type == wx_mac_sp) + switch (wx->mac.type) { + case wx_mac_sp: max_eitr = WX_SP_MAX_EITR; - else + break; + case wx_mac_aml: + max_eitr = WX_AML_MAX_EITR; + break; + default: max_eitr = WX_EM_MAX_EITR; + break; + } if ((ec->rx_coalesce_usecs > (max_eitr >> 2)) || (ec->tx_coalesce_usecs > (max_eitr >> 2))) @@ -350,10 +372,15 @@ int wx_set_coalesce(struct net_device *netdev, wx->tx_itr_setting = ec->tx_coalesce_usecs; if (wx->tx_itr_setting == 1) { - if (wx->mac.type == wx_mac_sp) + switch (wx->mac.type) { + case wx_mac_sp: + case wx_mac_aml: tx_itr_param = WX_12K_ITR; - else + break; + default: tx_itr_param = WX_20K_ITR; + break; + } } else { tx_itr_param = wx->tx_itr_setting; } @@ -386,10 +413,15 @@ static unsigned int wx_max_channels(struct wx *wx) max_combined = 1; } else { /* support up to max allowed queues with RSS */ - if (wx->mac.type == wx_mac_sp) + switch (wx->mac.type) { + case wx_mac_sp: + case wx_mac_aml: max_combined = 63; - else + break; + default: max_combined = 8; + break; + } } return max_combined; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 907d13ade404..b5f35b187077 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -112,10 +112,15 @@ static void wx_intr_disable(struct wx *wx, u64 qmask) if (mask) wr32(wx, WX_PX_IMS(0), mask); - if (wx->mac.type == wx_mac_sp) { + switch (wx->mac.type) { + case wx_mac_sp: + case wx_mac_aml: mask = (qmask >> 32); if (mask) wr32(wx, WX_PX_IMS(1), mask); + break; + default: + break; } } @@ -126,10 +131,16 @@ void wx_intr_enable(struct wx *wx, u64 qmask) mask = (qmask & U32_MAX); if (mask) wr32(wx, WX_PX_IMC(0), mask); - if (wx->mac.type == wx_mac_sp) { + + switch (wx->mac.type) { + case wx_mac_sp: + case wx_mac_aml: mask = (qmask >> 32); if (mask) wr32(wx, WX_PX_IMC(1), mask); + break; + default: + break; } } EXPORT_SYMBOL(wx_intr_enable); @@ -278,22 +289,8 @@ static int wx_acquire_sw_sync(struct wx *wx, u32 mask) return ret; } -/** - * wx_host_interface_command - Issue command to manageability block - * @wx: pointer to the HW structure - * @buffer: contains the command to write and where the return status will - * be placed - * @length: length of buffer, must be multiple of 4 bytes - * @timeout: time in ms to wait for command completion - * @return_data: read and return data from the buffer (true) or not (false) - * Needed because FW structures are big endian and decoding of - * these fields can be 8 bit or 16 bit based on command. Decoding - * is not easily understood without making a table of commands. - * So we will leave this up to the caller to read back the data - * in these cases. - **/ -int wx_host_interface_command(struct wx *wx, u32 *buffer, - u32 length, u32 timeout, bool return_data) +static int wx_host_interface_command_s(struct wx *wx, u32 *buffer, + u32 length, u32 timeout, bool return_data) { u32 hdr_size = sizeof(struct wx_hic_hdr); u32 hicr, i, bi, buf[64] = {}; @@ -301,22 +298,10 @@ int wx_host_interface_command(struct wx *wx, u32 *buffer, u32 dword_len; u16 buf_len; - if (length == 0 || length > WX_HI_MAX_BLOCK_BYTE_LENGTH) { - wx_err(wx, "Buffer length failure buffersize=%d.\n", length); - return -EINVAL; - } - status = wx_acquire_sw_sync(wx, WX_MNG_SWFW_SYNC_SW_MB); if (status != 0) return status; - /* Calculate length in DWORDs. We must be DWORD aligned */ - if ((length % (sizeof(u32))) != 0) { - wx_err(wx, "Buffer length failure, not aligned to dword"); - status = -EINVAL; - goto rel_out; - } - dword_len = length >> 2; /* The device driver writes the relevant command block @@ -391,6 +376,139 @@ rel_out: wx_release_sw_sync(wx, WX_MNG_SWFW_SYNC_SW_MB); return status; } + +static bool wx_poll_fw_reply(struct wx *wx, u32 *buffer, u8 send_cmd) +{ + u32 dword_len = sizeof(struct wx_hic_hdr) >> 2; + struct wx_hic_hdr *recv_hdr; + u32 i; + + /* read hdr */ + for (i = 0; i < dword_len; i++) { + buffer[i] = rd32a(wx, WX_FW2SW_MBOX, i); + le32_to_cpus(&buffer[i]); + } + + /* check hdr */ + recv_hdr = (struct wx_hic_hdr *)buffer; + if (recv_hdr->cmd == send_cmd && + recv_hdr->index == wx->swfw_index) + return true; + + return false; +} + +static int wx_host_interface_command_r(struct wx *wx, u32 *buffer, + u32 length, u32 timeout, bool return_data) +{ + struct wx_hic_hdr *hdr = (struct wx_hic_hdr *)buffer; + u32 hdr_size = sizeof(struct wx_hic_hdr); + bool busy, reply; + u32 dword_len; + u16 buf_len; + int err = 0; + u8 send_cmd; + u32 i; + + /* wait to get lock */ + might_sleep(); + err = read_poll_timeout(test_and_set_bit, busy, !busy, 1000, timeout * 1000, + false, WX_STATE_SWFW_BUSY, wx->state); + if (err) + return err; + + /* index to unique seq id for each mbox message */ + hdr->index = wx->swfw_index; + send_cmd = hdr->cmd; + + dword_len = length >> 2; + /* write data to SW-FW mbox array */ + for (i = 0; i < dword_len; i++) { + wr32a(wx, WX_SW2FW_MBOX, i, (__force u32)cpu_to_le32(buffer[i])); + /* write flush */ + rd32a(wx, WX_SW2FW_MBOX, i); + } + + /* generate interrupt to notify FW */ + wr32m(wx, WX_SW2FW_MBOX_CMD, WX_SW2FW_MBOX_CMD_VLD, 0); + wr32m(wx, WX_SW2FW_MBOX_CMD, WX_SW2FW_MBOX_CMD_VLD, WX_SW2FW_MBOX_CMD_VLD); + + /* polling reply from FW */ + err = read_poll_timeout(wx_poll_fw_reply, reply, reply, 1000, 50000, + true, wx, buffer, send_cmd); + if (err) { + wx_err(wx, "Polling from FW messages timeout, cmd: 0x%x, index: %d\n", + send_cmd, wx->swfw_index); + goto rel_out; + } + + /* expect no reply from FW then return */ + if (!return_data) + goto rel_out; + + /* If there is any thing in data position pull it in */ + buf_len = hdr->buf_len; + if (buf_len == 0) + goto rel_out; + + if (length < buf_len + hdr_size) { + wx_err(wx, "Buffer not large enough for reply message.\n"); + err = -EFAULT; + goto rel_out; + } + + /* Calculate length in DWORDs, add 3 for odd lengths */ + dword_len = (buf_len + 3) >> 2; + for (i = hdr_size >> 2; i <= dword_len; i++) { + buffer[i] = rd32a(wx, WX_FW2SW_MBOX, i); + le32_to_cpus(&buffer[i]); + } + +rel_out: + /* index++, index replace wx_hic_hdr.checksum */ + if (wx->swfw_index == WX_HIC_HDR_INDEX_MAX) + wx->swfw_index = 0; + else + wx->swfw_index++; + + clear_bit(WX_STATE_SWFW_BUSY, wx->state); + return err; +} + +/** + * wx_host_interface_command - Issue command to manageability block + * @wx: pointer to the HW structure + * @buffer: contains the command to write and where the return status will + * be placed + * @length: length of buffer, must be multiple of 4 bytes + * @timeout: time in ms to wait for command completion + * @return_data: read and return data from the buffer (true) or not (false) + * Needed because FW structures are big endian and decoding of + * these fields can be 8 bit or 16 bit based on command. Decoding + * is not easily understood without making a table of commands. + * So we will leave this up to the caller to read back the data + * in these cases. + **/ +int wx_host_interface_command(struct wx *wx, u32 *buffer, + u32 length, u32 timeout, bool return_data) +{ + if (length == 0 || length > WX_HI_MAX_BLOCK_BYTE_LENGTH) { + wx_err(wx, "Buffer length failure buffersize=%d.\n", length); + return -EINVAL; + } + + /* Calculate length in DWORDs. We must be DWORD aligned */ + if ((length % (sizeof(u32))) != 0) { + wx_err(wx, "Buffer length failure, not aligned to dword"); + return -EINVAL; + } + + if (test_bit(WX_FLAG_SWFW_RING, wx->flags)) + return wx_host_interface_command_r(wx, buffer, length, + timeout, return_data); + + return wx_host_interface_command_s(wx, buffer, length, timeout, return_data); +} EXPORT_SYMBOL(wx_host_interface_command); int wx_set_pps(struct wx *wx, bool enable, u64 nsec, u64 cycles) @@ -442,7 +560,10 @@ static int wx_read_ee_hostif_data(struct wx *wx, u16 offset, u16 *data) if (status != 0) return status; - *data = (u16)rd32a(wx, WX_MNG_MBOX, FW_NVM_DATA_OFFSET); + if (!test_bit(WX_FLAG_SWFW_RING, wx->flags)) + *data = (u16)rd32a(wx, WX_MNG_MBOX, FW_NVM_DATA_OFFSET); + else + *data = (u16)rd32a(wx, WX_FW2SW_MBOX, FW_NVM_DATA_OFFSET); return status; } @@ -486,6 +607,7 @@ int wx_read_ee_hostif_buffer(struct wx *wx, u16 words_to_read; u32 value = 0; int status; + u32 mbox; u32 i; /* Take semaphore for the entire operation. */ @@ -518,8 +640,12 @@ int wx_read_ee_hostif_buffer(struct wx *wx, goto out; } + if (!test_bit(WX_FLAG_SWFW_RING, wx->flags)) + mbox = WX_MNG_MBOX; + else + mbox = WX_FW2SW_MBOX; for (i = 0; i < words_to_read; i++) { - u32 reg = WX_MNG_MBOX + (FW_NVM_DATA_OFFSET << 2) + 2 * i; + u32 reg = mbox + (FW_NVM_DATA_OFFSET << 2) + 2 * i; value = rd32(wx, reg); data[current_word] = (u16)(value & 0xffff); @@ -569,12 +695,17 @@ void wx_init_eeprom_params(struct wx *wx) } } - if (wx->mac.type == wx_mac_sp) { + switch (wx->mac.type) { + case wx_mac_sp: + case wx_mac_aml: if (wx_read_ee_hostif(wx, WX_SW_REGION_PTR, &data)) { wx_err(wx, "NVM Read Error\n"); return; } data = data >> 1; + break; + default: + break; } eeprom->sw_region_offset = data; @@ -635,8 +766,15 @@ static int wx_set_rar(struct wx *wx, u32 index, u8 *addr, u64 pools, /* setup VMDq pool mapping */ wr32(wx, WX_PSR_MAC_SWC_VM_L, pools & 0xFFFFFFFF); - if (wx->mac.type == wx_mac_sp) + + switch (wx->mac.type) { + case wx_mac_sp: + case wx_mac_aml: wr32(wx, WX_PSR_MAC_SWC_VM_H, pools >> 32); + break; + default: + break; + } /* HW expects these in little endian so we reverse the byte * order from network order (big endian) to little endian @@ -774,9 +912,14 @@ void wx_init_rx_addrs(struct wx *wx) wx_set_rar(wx, 0, wx->mac.addr, 0, WX_PSR_MAC_SWC_AD_H_AV); - if (wx->mac.type == wx_mac_sp) { + switch (wx->mac.type) { + case wx_mac_sp: + case wx_mac_aml: /* clear VMDq pool/queue selection for RAR 0 */ wx_clear_vmdq(wx, 0, WX_CLEAR_VMDQ_ALL); + break; + default: + break; } } diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 42f90802870b..895f3c96a678 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1823,10 +1823,16 @@ static int wx_alloc_q_vector(struct wx *wx, /* initialize pointer to rings */ ring = q_vector->ring; - if (wx->mac.type == wx_mac_sp) + switch (wx->mac.type) { + case wx_mac_sp: + case wx_mac_aml: default_itr = WX_12K_ITR; - else + break; + default: default_itr = WX_7K_ITR; + break; + } + /* initialize ITR */ if (txr_count && !rxr_count) /* tx only vector */ @@ -2182,10 +2188,17 @@ void wx_write_eitr(struct wx_q_vector *q_vector) int v_idx = q_vector->v_idx; u32 itr_reg; - if (wx->mac.type == wx_mac_sp) + switch (wx->mac.type) { + case wx_mac_sp: itr_reg = q_vector->itr & WX_SP_MAX_EITR; - else + break; + case wx_mac_aml: + itr_reg = (q_vector->itr >> 3) & WX_AML_MAX_EITR; + break; + default: itr_reg = q_vector->itr & WX_EM_MAX_EITR; + break; + } itr_reg |= WX_PX_ITR_CNT_WDIS; @@ -2761,7 +2774,7 @@ int wx_set_features(struct net_device *netdev, netdev_features_t features) netdev->features = features; - if (wx->mac.type == wx_mac_sp && changed & NETIF_F_HW_VLAN_CTAG_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX && wx->do_reset) wx->do_reset(netdev); else if (changed & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER)) wx_set_rx_mode(netdev); @@ -2793,7 +2806,7 @@ int wx_set_features(struct net_device *netdev, netdev_features_t features) break; } - if (need_reset) + if (need_reset && wx->do_reset) wx->do_reset(netdev); return 0; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index db446e690dc7..f79746ac6aca 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -309,6 +309,10 @@ #define WX_MNG_MBOX_CTL_FWRDY BIT(2) #define WX_MNG_BMC2OS_CNT 0x1E090 #define WX_MNG_OS2BMC_CNT 0x1E094 +#define WX_SW2FW_MBOX_CMD 0x1E0A0 +#define WX_SW2FW_MBOX_CMD_VLD BIT(31) +#define WX_SW2FW_MBOX 0x1E200 +#define WX_FW2SW_MBOX 0x1E300 /************************************* ETH MAC *****************************/ #define WX_MAC_TX_CFG 0x11000 @@ -372,6 +376,7 @@ enum WX_MSCA_CMD_value { #define WX_12K_ITR 336 #define WX_20K_ITR 200 #define WX_SP_MAX_EITR 0x00000FF8U +#define WX_AML_MAX_EITR 0x00000FFFU #define WX_EM_MAX_EITR 0x00007FFCU /* transmit DMA Registers */ @@ -415,6 +420,7 @@ enum WX_MSCA_CMD_value { /****************** Manageablility Host Interface defines ********************/ #define WX_HI_MAX_BLOCK_BYTE_LENGTH 256 /* Num of bytes in range */ #define WX_HI_COMMAND_TIMEOUT 1000 /* Process HI command limit */ +#define WX_HIC_HDR_INDEX_MAX 255 #define FW_READ_SHADOW_RAM_CMD 0x31 #define FW_READ_SHADOW_RAM_LEN 0x6 @@ -711,21 +717,30 @@ struct wx_hic_hdr { u8 cmd_resv; u8 ret_status; } cmd_or_resp; - u8 checksum; + union { + u8 checksum; + u8 index; + }; }; struct wx_hic_hdr2_req { u8 cmd; u8 buf_lenh; u8 buf_lenl; - u8 checksum; + union { + u8 checksum; + u8 index; + }; }; struct wx_hic_hdr2_rsp { u8 cmd; u8 buf_lenl; u8 buf_lenh_status; /* 7-5: high bits of buf_len, 4-0: status */ - u8 checksum; + union { + u8 checksum; + u8 index; + }; }; union wx_hic_hdr2 { @@ -773,7 +788,8 @@ struct wx_thermal_sensor_data { enum wx_mac_type { wx_mac_unknown = 0, wx_mac_sp, - wx_mac_em + wx_mac_em, + wx_mac_aml, }; enum sp_media_type { @@ -1085,12 +1101,14 @@ struct wx_hw_stats { enum wx_state { WX_STATE_RESETTING, + WX_STATE_SWFW_BUSY, WX_STATE_PTP_RUNNING, WX_STATE_PTP_TX_IN_PROGRESS, - WX_STATE_NBITS, /* must be last */ + WX_STATE_NBITS /* must be last */ }; enum wx_pf_flags { + WX_FLAG_SWFW_RING, WX_FLAG_FDIR_CAPABLE, WX_FLAG_FDIR_HASH, WX_FLAG_FDIR_PERFECT, @@ -1130,6 +1148,7 @@ struct wx { char eeprom_id[32]; char *driver_name; enum wx_reset_type reset_type; + u8 swfw_index; /* PHY stuff */ unsigned int link; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c index cd1372da92a9..4b9921b7bb11 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c @@ -197,6 +197,12 @@ int txgbe_reset_hw(struct wx *wx) txgbe_reset_misc(wx); + if (wx->mac.type != wx_mac_sp) { + wr32(wx, TXGBE_PX_PF_BME, 0x1); + wr32m(wx, TXGBE_RDM_RSC_CTL, TXGBE_RDM_RSC_CTL_FREE_CTL, + TXGBE_RDM_RSC_CTL_FREE_CTL); + } + wx_clear_hw_cntrs(wx); /* Store the permanent mac address */ diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c index 0ee73a265545..8658a51ee810 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c @@ -166,6 +166,9 @@ static void txgbe_del_irq_domain(struct txgbe *txgbe) void txgbe_free_misc_irq(struct txgbe *txgbe) { + if (txgbe->wx->mac.type == wx_mac_aml) + return; + free_irq(txgbe->link_irq, txgbe); free_irq(txgbe->misc.irq, txgbe); txgbe_del_irq_domain(txgbe); @@ -177,6 +180,9 @@ int txgbe_setup_misc_irq(struct txgbe *txgbe) struct wx *wx = txgbe->wx; int hwirq, err; + if (wx->mac.type == wx_mac_aml) + goto skip_sp_irq; + txgbe->misc.nirqs = 1; txgbe->misc.domain = irq_domain_add_simple(NULL, txgbe->misc.nirqs, 0, &txgbe_misc_irq_domain_ops, txgbe); @@ -206,6 +212,7 @@ int txgbe_setup_misc_irq(struct txgbe *txgbe) if (err) goto free_msic_irq; +skip_sp_irq: wx->misc_irq_domain = true; return 0; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 734450af9a43..ce83811a45e2 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -35,6 +35,12 @@ char txgbe_driver_name[] = "txgbe"; static const struct pci_device_id txgbe_pci_tbl[] = { { PCI_VDEVICE(WANGXUN, TXGBE_DEV_ID_SP1000), 0}, { PCI_VDEVICE(WANGXUN, TXGBE_DEV_ID_WX1820), 0}, + { PCI_VDEVICE(WANGXUN, TXGBE_DEV_ID_AML5010), 0}, + { PCI_VDEVICE(WANGXUN, TXGBE_DEV_ID_AML5110), 0}, + { PCI_VDEVICE(WANGXUN, TXGBE_DEV_ID_AML5025), 0}, + { PCI_VDEVICE(WANGXUN, TXGBE_DEV_ID_AML5125), 0}, + { PCI_VDEVICE(WANGXUN, TXGBE_DEV_ID_AML5040), 0}, + { PCI_VDEVICE(WANGXUN, TXGBE_DEV_ID_AML5140), 0}, /* required last entry */ { .device = 0 } }; @@ -90,7 +96,18 @@ static void txgbe_up_complete(struct wx *wx) smp_mb__before_atomic(); wx_napi_enable_all(wx); - phylink_start(wx->phylink); + if (wx->mac.type == wx_mac_aml) { + u32 reg; + + reg = rd32(wx, TXGBE_AML_MAC_TX_CFG); + reg &= ~TXGBE_AML_MAC_TX_CFG_SPEED_MASK; + reg |= TXGBE_AML_MAC_TX_CFG_SPEED_25G; + wr32(wx, WX_MAC_TX_CFG, reg); + txgbe_enable_sec_tx_path(wx); + netif_carrier_on(wx->netdev); + } else { + phylink_start(wx->phylink); + } /* clear any pending interrupts, may auto mask */ rd32(wx, WX_PX_IC(0)); @@ -171,7 +188,10 @@ void txgbe_down(struct wx *wx) { txgbe_disable_device(wx); txgbe_reset(wx); - phylink_stop(wx->phylink); + if (wx->mac.type == wx_mac_aml) + netif_carrier_off(wx->netdev); + else + phylink_stop(wx->phylink); wx_clean_all_tx_rings(wx); wx_clean_all_rx_rings(wx); @@ -197,6 +217,14 @@ static void txgbe_init_type_code(struct wx *wx) case TXGBE_DEV_ID_WX1820: wx->mac.type = wx_mac_sp; break; + case TXGBE_DEV_ID_AML5010: + case TXGBE_DEV_ID_AML5110: + case TXGBE_DEV_ID_AML5025: + case TXGBE_DEV_ID_AML5125: + case TXGBE_DEV_ID_AML5040: + case TXGBE_DEV_ID_AML5140: + wx->mac.type = wx_mac_aml; + break; default: wx->mac.type = wx_mac_unknown; break; @@ -284,6 +312,17 @@ static int txgbe_sw_init(struct wx *wx) wx->do_reset = txgbe_do_reset; + switch (wx->mac.type) { + case wx_mac_sp: + break; + case wx_mac_aml: + set_bit(WX_FLAG_SWFW_RING, wx->flags); + wx->swfw_index = 0; + break; + default: + break; + } + return 0; } diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index 7e17d727c2ba..85f022ceef4f 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -567,6 +567,9 @@ int txgbe_init_phy(struct txgbe *txgbe) struct wx *wx = txgbe->wx; int ret; + if (wx->mac.type == wx_mac_aml) + return 0; + if (txgbe->wx->media_type == sp_media_copper) return txgbe_ext_phy_init(txgbe); @@ -631,6 +634,9 @@ err_unregister_swnode: void txgbe_remove_phy(struct txgbe *txgbe) { + if (txgbe->wx->mac.type == wx_mac_aml) + return; + if (txgbe->wx->media_type == sp_media_copper) { phylink_disconnect_phy(txgbe->wx->phylink); phylink_destroy(txgbe->wx->phylink); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 629a13e96b85..9c1c26234cad 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -10,6 +10,12 @@ /* Device IDs */ #define TXGBE_DEV_ID_SP1000 0x1001 #define TXGBE_DEV_ID_WX1820 0x2001 +#define TXGBE_DEV_ID_AML5010 0x5010 +#define TXGBE_DEV_ID_AML5110 0x5110 +#define TXGBE_DEV_ID_AML5025 0x5025 +#define TXGBE_DEV_ID_AML5125 0x5125 +#define TXGBE_DEV_ID_AML5040 0x5040 +#define TXGBE_DEV_ID_AML5140 0x5140 /* Subsystem IDs */ /* SFP */ @@ -137,6 +143,14 @@ #define TXGBE_RDB_FDIR_FLEX_CFG_MSK BIT(2) #define TXGBE_RDB_FDIR_FLEX_CFG_OFST(v) FIELD_PREP(GENMASK(7, 3), v) +/*************************** Amber Lite Registers ****************************/ +#define TXGBE_PX_PF_BME 0x4B8 +#define TXGBE_AML_MAC_TX_CFG 0x11000 +#define TXGBE_AML_MAC_TX_CFG_SPEED_MASK GENMASK(30, 27) +#define TXGBE_AML_MAC_TX_CFG_SPEED_25G BIT(28) +#define TXGBE_RDM_RSC_CTL 0x1200C +#define TXGBE_RDM_RSC_CTL_FREE_CTL BIT(7) + /* Checksum and EEPROM pointers */ #define TXGBE_EEPROM_LAST_WORD 0x800 #define TXGBE_EEPROM_CHECKSUM 0x2F -- 2.51.0 From a3ad653c915990fcbb72f9d41c94628255278abc Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Fri, 21 Feb 2025 14:57:18 +0800 Subject: [PATCH 03/16] net: wangxun: Replace the judgement of MAC type with flags Since device MAC types are constantly being added, the judgments of wx->mac.type are complex. Try to convert the types to flags depending on functions. Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/20250221065718.197544-2-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_ethtool.c | 8 ++++---- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 4 ++-- drivers/net/ethernet/wangxun/libwx/wx_type.h | 1 + drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 2 ++ 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c index 6d3b57233a39..43019ec9329c 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c @@ -72,7 +72,7 @@ int wx_get_sset_count(struct net_device *netdev, int sset) switch (sset) { case ETH_SS_STATS: - return (wx->mac.type == wx_mac_sp) ? + return (test_bit(WX_FLAG_FDIR_CAPABLE, wx->flags)) ? WX_STATS_LEN + WX_FDIR_STATS_LEN : WX_STATS_LEN; default: return -EOPNOTSUPP; @@ -90,7 +90,7 @@ void wx_get_strings(struct net_device *netdev, u32 stringset, u8 *data) case ETH_SS_STATS: for (i = 0; i < WX_GLOBAL_STATS_LEN; i++) ethtool_puts(&p, wx_gstrings_stats[i].stat_string); - if (wx->mac.type == wx_mac_sp) { + if (test_bit(WX_FLAG_FDIR_CAPABLE, wx->flags)) { for (i = 0; i < WX_FDIR_STATS_LEN; i++) ethtool_puts(&p, wx_gstrings_fdir_stats[i].stat_string); } @@ -124,7 +124,7 @@ void wx_get_ethtool_stats(struct net_device *netdev, sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } - if (wx->mac.type == wx_mac_sp) { + if (test_bit(WX_FLAG_FDIR_CAPABLE, wx->flags)) { for (k = 0; k < WX_FDIR_STATS_LEN; k++) { p = (char *)wx + wx_gstrings_fdir_stats[k].stat_offset; data[i++] = *(u64 *)p; @@ -199,7 +199,7 @@ void wx_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) unsigned int stats_len = WX_STATS_LEN; struct wx *wx = netdev_priv(netdev); - if (wx->mac.type == wx_mac_sp) + if (test_bit(WX_FLAG_FDIR_CAPABLE, wx->flags)) stats_len += WX_FDIR_STATS_LEN; strscpy(info->driver, wx->driver_name, sizeof(info->driver)); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index b5f35b187077..aed45abafb1b 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -1861,7 +1861,7 @@ void wx_configure_rx(struct wx *wx) /* enable hw crc stripping */ wr32m(wx, WX_RSC_CTL, WX_RSC_CTL_CRC_STRIP, WX_RSC_CTL_CRC_STRIP); - if (wx->mac.type == wx_mac_sp) { + if (test_bit(WX_FLAG_RSC_CAPABLE, wx->flags)) { u32 psrctl; /* RSC Setup */ @@ -2513,7 +2513,7 @@ void wx_update_stats(struct wx *wx) hwstats->b2ogprc += rd32(wx, WX_RDM_BMC2OS_CNT); hwstats->rdmdrop += rd32(wx, WX_RDM_DRP_PKT); - if (wx->mac.type == wx_mac_sp) { + if (test_bit(WX_FLAG_FDIR_CAPABLE, wx->flags)) { hwstats->fdirmatch += rd32(wx, WX_RDB_FDIR_MATCH); hwstats->fdirmiss += rd32(wx, WX_RDB_FDIR_MISS); } diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index f79746ac6aca..5b230ecbbabb 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -1112,6 +1112,7 @@ enum wx_pf_flags { WX_FLAG_FDIR_CAPABLE, WX_FLAG_FDIR_HASH, WX_FLAG_FDIR_PERFECT, + WX_FLAG_RSC_CAPABLE, WX_FLAG_RX_HWTSTAMP_ENABLED, WX_FLAG_RX_HWTSTAMP_IN_REGISTER, WX_FLAG_PTP_PPS_ENABLED, diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index ce83811a45e2..a2e245e3b016 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -298,6 +298,8 @@ static int txgbe_sw_init(struct wx *wx) wx->atr = txgbe_atr; wx->configure_fdir = txgbe_configure_fdir; + set_bit(WX_FLAG_RSC_CAPABLE, wx->flags); + /* enable itr by default in dynamic mode */ wx->rx_itr_setting = 1; wx->tx_itr_setting = 1; -- 2.51.0 From 3400ae49cd1a5a2a638c31498a5f5d0a0e11d8f8 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 20 Feb 2025 23:39:51 +0200 Subject: [PATCH 04/16] net/mlx5e: Add helper function to update IPSec default destination MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The default destination of IPSec steering rules for MPV mode will be updated when the master device is brought up or down. Move the common code into the helper function. It’s convenient to update destinations in later patches. Signed-off-by: Jianbo Liu Reviewed-by: Leon Romanovsky Reviewed-by: Patrisious Haddad Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250220213959.504304-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index e7b64679f121..7f82d530d3e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -493,6 +493,14 @@ out: return err; } +static void ipsec_rx_update_default_dest(struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *old_dest, + struct mlx5_flow_destination *new_dest) +{ + mlx5_modify_rule_destination(rx->status.rule, new_dest, old_dest); + mlx5_modify_rule_destination(rx->sa.rule, new_dest, old_dest); +} + static void handle_ipsec_rx_bringup(struct mlx5e_ipsec *ipsec, u32 family) { struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family, XFRM_DEV_OFFLOAD_PACKET); @@ -507,8 +515,7 @@ static void handle_ipsec_rx_bringup(struct mlx5e_ipsec *ipsec, u32 family) new_dest.ft = mlx5_ipsec_fs_roce_ft_get(ipsec->roce, family); new_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - mlx5_modify_rule_destination(rx->status.rule, &new_dest, &old_dest); - mlx5_modify_rule_destination(rx->sa.rule, &new_dest, &old_dest); + ipsec_rx_update_default_dest(rx, &old_dest, &new_dest); } static void handle_ipsec_rx_cleanup(struct mlx5e_ipsec *ipsec, u32 family) @@ -520,8 +527,7 @@ static void handle_ipsec_rx_cleanup(struct mlx5e_ipsec *ipsec, u32 family) old_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; new_dest = mlx5_ttc_get_default_dest(mlx5e_fs_get_ttc(ipsec->fs, false), family2tt(family)); - mlx5_modify_rule_destination(rx->sa.rule, &new_dest, &old_dest); - mlx5_modify_rule_destination(rx->status.rule, &new_dest, &old_dest); + ipsec_rx_update_default_dest(rx, &old_dest, &new_dest); mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family, ipsec->mdev); } -- 2.51.0 From 20d5fdc8951a1ed63af2b5b1671afb3b4f5a3293 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 20 Feb 2025 23:39:52 +0200 Subject: [PATCH 05/16] net/mlx5e: Change the destination of IPSec RX SA miss rule For eswitch in legacy mode, the packets decrypted in RX SA table will continue to be processed for RoCE. But this is not necessary for the un-decrypted packets, which don't match any decryption rules but hit the miss rule at the end of the table. So, change the destination of miss rule to TTC default one and skip RoCE. For eswitch in switchdev mode, the destination is unchanged. Signed-off-by: Jianbo Liu Reviewed-by: Leon Romanovsky Reviewed-by: Patrisious Haddad Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250220213959.504304-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 7f82d530d3e1..7c9fdea21366 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -498,7 +498,6 @@ static void ipsec_rx_update_default_dest(struct mlx5e_ipsec_rx *rx, struct mlx5_flow_destination *new_dest) { mlx5_modify_rule_destination(rx->status.rule, new_dest, old_dest); - mlx5_modify_rule_destination(rx->sa.rule, new_dest, old_dest); } static void handle_ipsec_rx_bringup(struct mlx5e_ipsec *ipsec, u32 family) @@ -658,6 +657,20 @@ static int ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec, return 0; } +static void ipsec_rx_sa_miss_dest_get(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5e_ipsec_rx_create_attr *attr, + struct mlx5_flow_destination *dest, + struct mlx5_flow_destination *miss_dest) +{ + if (rx == ipsec->rx_esw) + *miss_dest = *dest; + else + *miss_dest = + mlx5_ttc_get_default_dest(attr->ttc, + family2tt(attr->family)); +} + static void ipsec_rx_ft_connect(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx, struct mlx5e_ipsec_rx_create_attr *attr) @@ -672,8 +685,8 @@ static void ipsec_rx_ft_connect(struct mlx5e_ipsec *ipsec, static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx, u32 family) { + struct mlx5_flow_destination dest[2], miss_dest; struct mlx5e_ipsec_rx_create_attr attr; - struct mlx5_flow_destination dest[2]; struct mlx5_flow_table *ft; u32 flags = 0; int err; @@ -709,7 +722,8 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, } rx->ft.sa = ft; - err = ipsec_miss_create(mdev, rx->ft.sa, &rx->sa, dest); + ipsec_rx_sa_miss_dest_get(ipsec, rx, &attr, &dest[0], &miss_dest); + err = ipsec_miss_create(mdev, rx->ft.sa, &rx->sa, &miss_dest); if (err) goto err_fs; -- 2.51.0 From 85e4a808af2545fefaf18c8fe50071b06fcbdabc Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 20 Feb 2025 23:39:53 +0200 Subject: [PATCH 06/16] net/mlx5e: Add correct match to check IPSec syndromes for switchdev mode In commit dddb49b63d86 ("net/mlx5e: Add IPsec and ASO syndromes check in HW"), IPSec and ASO syndromes checks after decryption for the specified ASO object were added. But they are correct only for eswith in legacy mode. For switchdev mode, metadata register c1 is used to save the mapped id (not ASO object id). So, need to change the match accordingly for the check rules in status table. Signed-off-by: Jianbo Liu Reviewed-by: Leon Romanovsky Reviewed-by: Patrisious Haddad Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250220213959.504304-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 28 ++++++++++++++----- .../mellanox/mlx5/core/esw/ipsec_fs.c | 13 +++++++++ .../mellanox/mlx5/core/esw/ipsec_fs.h | 5 ++++ include/linux/mlx5/eswitch.h | 2 ++ 4 files changed, 41 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 7c9fdea21366..e1b518aedee8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -165,6 +165,25 @@ static void ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, #endif } +static void ipsec_rx_rule_add_match_obj(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_spec *spec) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + + if (rx == ipsec->rx_esw) { + mlx5_esw_ipsec_rx_rule_add_match_obj(sa_entry, spec); + } else { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_2); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.metadata_reg_c_2, + sa_entry->ipsec_obj_id | BIT(31)); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + } +} + static int rx_add_rule_drop_auth_trailer(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5e_ipsec_rx *rx) { @@ -200,11 +219,8 @@ static int rx_add_rule_drop_auth_trailer(struct mlx5e_ipsec_sa_entry *sa_entry, MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.ipsec_syndrome); MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.ipsec_syndrome, 1); - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_2); - MLX5_SET(fte_match_param, spec->match_value, - misc_parameters_2.metadata_reg_c_2, - sa_entry->ipsec_obj_id | BIT(31)); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + ipsec_rx_rule_add_match_obj(sa_entry, rx, spec); rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -281,10 +297,8 @@ static int rx_add_rule_drop_replay(struct mlx5e_ipsec_sa_entry *sa_entry, struct MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4); MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 1); - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_2); - MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_2, - sa_entry->ipsec_obj_id | BIT(31)); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + ipsec_rx_rule_add_match_obj(sa_entry, rx, spec); rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index ed977ae75fab..4bba2884c1c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -85,6 +85,19 @@ err_header_alloc: return err; } +void mlx5_esw_ipsec_rx_rule_add_match_obj(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_flow_spec *spec) +{ + MLX5_SET(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_1, + ESW_IPSEC_RX_MAPPED_ID_MATCH_MASK); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.metadata_reg_c_1, + sa_entry->rx_mapped_id << ESW_ZONE_ID_BITS); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; +} + void mlx5_esw_ipsec_rx_id_mapping_remove(struct mlx5e_ipsec_sa_entry *sa_entry) { struct mlx5e_ipsec *ipsec = sa_entry->ipsec; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h index ac9c65b89166..514c15258b1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h @@ -20,6 +20,8 @@ int mlx5_esw_ipsec_rx_ipsec_obj_id_search(struct mlx5e_priv *priv, u32 id, void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx_create_attr *attr); void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev); +void mlx5_esw_ipsec_rx_rule_add_match_obj(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_flow_spec *spec); #else static inline void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx_create_attr *attr) {} @@ -48,5 +50,8 @@ static inline void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx_create_attr *attr) {} static inline void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev) {} +static inline void +mlx5_esw_ipsec_rx_rule_add_match_obj(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_flow_spec *spec) {} #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESW_IPSEC_FS_H__ */ diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index df73a2ccc9af..67256e776566 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -147,6 +147,8 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, /* reuse tun_opts for the mapped ipsec obj id when tun_id is 0 (invalid) */ #define ESW_IPSEC_RX_MAPPED_ID_MASK GENMASK(ESW_TUN_OPTS_BITS - 1, 0) +#define ESW_IPSEC_RX_MAPPED_ID_MATCH_MASK \ + GENMASK(31 - ESW_RESERVED_BITS, ESW_ZONE_ID_BITS) u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev); u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); -- 2.51.0 From 7d9e292ecd678269fd11ec8d09610fd1eda2742e Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 20 Feb 2025 23:39:54 +0200 Subject: [PATCH 07/16] net/mlx5e: Move IPSec policy check after decryption MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Currently, xfrm policy check is done before decryption in mlx5 driver. If matching any policy, packets are forwarded to xfrm state table for decryption. But this is exact opposite to what software does. For kernel implementation, xfrm decode is unconditionally activated whenever an IPSec packet reaches the input flow if there’s a matching state rule. This patch changes the order, move policy check after decryption. Besides, a miss flow table is added at the end for legacy mode, to make it easier to update the default destination of the steering rules. So ESP packets are firstly forwarded to SA table for decryption, then the result is checked in status table. If the decryption succeeds, packets are forwarded to another table to check xfrm policy rules. When a policy with allow action is matched, if in legacy mode packets are forwarded to miss flow table with one rule to forward them to RoCE tables, if in switchdev mode they are forwarded directly to TC root chain instead. Signed-off-by: Jianbo Liu Reviewed-by: Leon Romanovsky Reviewed-by: Patrisious Haddad Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250220213959.504304-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en/fs.h | 4 +- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 195 +++++++++++++----- .../mellanox/mlx5/core/esw/ipsec_fs.c | 2 +- 3 files changed, 145 insertions(+), 56 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 1e8b7d330701..b5c3a2a9d2a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -84,9 +84,9 @@ enum { MLX5E_ARFS_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, #endif #ifdef CONFIG_MLX5_EN_IPSEC - MLX5E_ACCEL_FS_POL_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, - MLX5E_ACCEL_FS_ESP_FT_LEVEL, + MLX5E_ACCEL_FS_ESP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL, + MLX5E_ACCEL_FS_POL_FT_LEVEL, MLX5E_ACCEL_FS_ESP_FT_ROCE_LEVEL, #endif }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index e1b518aedee8..3d9d7aa2a06a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -45,6 +45,8 @@ struct mlx5e_ipsec_rx { struct mlx5e_ipsec_status_checks status_drops; struct mlx5e_ipsec_fc *fc; struct mlx5_fs_chains *chains; + struct mlx5_flow_table *pol_miss_ft; + struct mlx5_flow_handle *pol_miss_rule; u8 allow_tunnel_mode : 1; }; @@ -156,13 +158,6 @@ static void ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx) { mlx5_del_flow_rules(rx->status.rule); - - if (rx != ipsec->rx_esw) - return; - -#ifdef CONFIG_MLX5_ESWITCH - mlx5_chains_put_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0); -#endif } static void ipsec_rx_rule_add_match_obj(struct mlx5e_ipsec_sa_entry *sa_entry, @@ -415,7 +410,7 @@ static int ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, if (rx == ipsec->rx_esw) spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; - flow_act.flags = FLOW_ACT_NO_APPEND; + flow_act.flags = FLOW_ACT_NO_APPEND | FLOW_ACT_IGNORE_FLOW_LEVEL; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); @@ -596,13 +591,8 @@ static void ipsec_rx_ft_disconnect(struct mlx5e_ipsec *ipsec, u32 family) mlx5_ttc_fwd_default_dest(ttc, family2tt(family)); } -static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx, u32 family) +static void ipsec_rx_policy_destroy(struct mlx5e_ipsec_rx *rx) { - /* disconnect */ - if (rx != ipsec->rx_esw) - ipsec_rx_ft_disconnect(ipsec, family); - if (rx->chains) { ipsec_chains_destroy(rx->chains); } else { @@ -611,6 +601,19 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, mlx5_destroy_flow_table(rx->ft.pol); } + if (rx->pol_miss_rule) { + mlx5_del_flow_rules(rx->pol_miss_rule); + mlx5_destroy_flow_table(rx->pol_miss_ft); + } +} + +static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, u32 family) +{ + /* disconnect */ + if (rx != ipsec->rx_esw) + ipsec_rx_ft_disconnect(ipsec, family); + mlx5_del_flow_rules(rx->sa.rule); mlx5_destroy_flow_group(rx->sa.group); mlx5_destroy_flow_table(rx->ft.sa); @@ -619,7 +622,15 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, mlx5_ipsec_rx_status_destroy(ipsec, rx); mlx5_destroy_flow_table(rx->ft.status); + ipsec_rx_policy_destroy(rx); + mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family, mdev); + +#ifdef CONFIG_MLX5_ESWITCH + if (rx == ipsec->rx_esw) + mlx5_chains_put_table(esw_chains(ipsec->mdev->priv.eswitch), + 0, 1, 0); +#endif } static void ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, @@ -685,6 +696,14 @@ static void ipsec_rx_sa_miss_dest_get(struct mlx5e_ipsec *ipsec, family2tt(attr->family)); } +static void ipsec_rx_default_dest_get(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *dest) +{ + dest->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest->ft = rx->pol_miss_ft; +} + static void ipsec_rx_ft_connect(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx, struct mlx5e_ipsec_rx_create_attr *attr) @@ -692,10 +711,105 @@ static void ipsec_rx_ft_connect(struct mlx5e_ipsec *ipsec, struct mlx5_flow_destination dest = {}; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = rx->ft.pol; + dest.ft = rx->ft.sa; mlx5_ttc_fwd_dest(attr->ttc, family2tt(attr->family), &dest); } +static int ipsec_rx_chains_create_miss(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5e_ipsec_rx_create_attr *attr, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_table_attr ft_attr = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_table *ft; + int err; + + if (rx == ipsec->rx_esw) { + /* No need to create miss table for switchdev mode, + * just set it to the root chain table. + */ + rx->pol_miss_ft = dest->ft; + return 0; + } + + ft_attr.max_fte = 1; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.level = attr->pol_level; + ft_attr.prio = attr->prio; + + ft = mlx5_create_auto_grouped_flow_table(attr->ns, &ft_attr); + if (IS_ERR(ft)) + return PTR_ERR(ft); + + rule = mlx5_add_flow_rules(ft, NULL, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto err_rule; + } + + rx->pol_miss_ft = ft; + rx->pol_miss_rule = rule; + + return 0; + +err_rule: + mlx5_destroy_flow_table(ft); + return err; +} + +static int ipsec_rx_policy_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5e_ipsec_rx_create_attr *attr, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_destination default_dest; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_flow_table *ft; + int err; + + err = ipsec_rx_chains_create_miss(ipsec, rx, attr, dest); + if (err) + return err; + + ipsec_rx_default_dest_get(ipsec, rx, &default_dest); + + if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO) { + rx->chains = ipsec_chains_create(mdev, + default_dest.ft, + attr->chains_ns, + attr->prio, + attr->sa_level, + &rx->ft.pol); + if (IS_ERR(rx->chains)) + err = PTR_ERR(rx->chains); + } else { + ft = ipsec_ft_create(attr->ns, attr->pol_level, + attr->prio, 2, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_out; + } + rx->ft.pol = ft; + + err = ipsec_miss_create(mdev, rx->ft.pol, &rx->pol, + &default_dest); + if (err) + mlx5_destroy_flow_table(rx->ft.pol); + } + + if (!err) + return 0; + +err_out: + if (rx->pol_miss_rule) { + mlx5_del_flow_rules(rx->pol_miss_rule); + mlx5_destroy_flow_table(rx->pol_miss_ft); + } + return err; +} + static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx, u32 family) { @@ -718,12 +832,6 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, } rx->ft.status = ft; - dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[1].counter = rx->fc->cnt; - err = mlx5_ipsec_rx_status_create(ipsec, rx, dest); - if (err) - goto err_add; - /* Create FT */ if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL) rx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev); @@ -741,51 +849,33 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, if (err) goto err_fs; - if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO) { - rx->chains = ipsec_chains_create(mdev, rx->ft.sa, - attr.chains_ns, - attr.prio, - attr.pol_level, - &rx->ft.pol); - if (IS_ERR(rx->chains)) { - err = PTR_ERR(rx->chains); - goto err_pol_ft; - } - - goto connect; - } + err = ipsec_rx_policy_create(ipsec, rx, &attr, &dest[0]); + if (err) + goto err_policy; - ft = ipsec_ft_create(attr.ns, attr.pol_level, attr.prio, 2, 0); - if (IS_ERR(ft)) { - err = PTR_ERR(ft); - goto err_pol_ft; - } - rx->ft.pol = ft; - memset(dest, 0x00, 2 * sizeof(*dest)); dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[0].ft = rx->ft.sa; - err = ipsec_miss_create(mdev, rx->ft.pol, &rx->pol, dest); + dest[0].ft = rx->ft.pol; + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[1].counter = rx->fc->cnt; + err = mlx5_ipsec_rx_status_create(ipsec, rx, dest); if (err) - goto err_pol_miss; + goto err_add; -connect: /* connect */ if (rx != ipsec->rx_esw) ipsec_rx_ft_connect(ipsec, rx, &attr); return 0; -err_pol_miss: - mlx5_destroy_flow_table(rx->ft.pol); -err_pol_ft: +err_add: + ipsec_rx_policy_destroy(rx); +err_policy: mlx5_del_flow_rules(rx->sa.rule); mlx5_destroy_flow_group(rx->sa.group); err_fs: mlx5_destroy_flow_table(rx->ft.sa); -err_fs_ft: if (rx->allow_tunnel_mode) mlx5_eswitch_unblock_encap(mdev); - mlx5_ipsec_rx_status_destroy(ipsec, rx); -err_add: +err_fs_ft: mlx5_destroy_flow_table(rx->ft.status); err_fs_ft_status: mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family, mdev); @@ -1957,8 +2047,7 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) flow_act.flags |= FLOW_ACT_NO_APPEND; if (rx == ipsec->rx_esw && rx->chains) flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; - dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[dstn].ft = rx->ft.sa; + ipsec_rx_default_dest_get(ipsec, rx, &dest[dstn]); dstn++; rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, dstn); if (IS_ERR(rule)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index 4bba2884c1c0..3cfe743610d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -10,9 +10,9 @@ #endif enum { - MLX5_ESW_IPSEC_RX_POL_FT_LEVEL, MLX5_ESW_IPSEC_RX_ESP_FT_LEVEL, MLX5_ESW_IPSEC_RX_ESP_FT_CHK_LEVEL, + MLX5_ESW_IPSEC_RX_POL_FT_LEVEL, }; enum { -- 2.51.0 From aa2961e19ff60f2b69c0e35320221f2827d0bac6 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 20 Feb 2025 23:39:55 +0200 Subject: [PATCH 08/16] net/mlx5e: Skip IPSec RX policy check for crypto offload For crypto offload, there is no xfrm policy rule offloaded to hardware, so no need to continue with policy check for it. Previously, for crypto offload, the hardware metadata reg c4 is not used and not changed, but set to ASO_OK(0) before decryption to avoid garbage data. Then a default rule is added to check ipsec_syndrome and this register. Packets are forwarded to policy table if succeed, or drop if fails. According to hardware document, this register value could be 0, 1. So a special value (0xAA), which is not used by hardware, is chosen as an indication for crypto offload. It is set to c4 before decryption. Then a default rule, which matches on 0xAA (and ipsec_syndrome on 0), is added, which means packets are done by crypto offload, and sends them to kernel directly, thus skips the policy check. Signed-off-by: Jianbo Liu Reviewed-by: Leon Romanovsky Reviewed-by: Patrisious Haddad Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250220213959.504304-6-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 81 +++++++++++++------ 1 file changed, 56 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 3d9d7aa2a06a..e72b365f24be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -16,6 +16,14 @@ #define MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE 16 #define IPSEC_TUNNEL_DEFAULT_TTL 0x40 +enum { + MLX5_IPSEC_ASO_OK, + MLX5_IPSEC_ASO_BAD_REPLY, + + /* For crypto offload, set by driver */ + MLX5_IPSEC_ASO_SW_CRYPTO_OFFLOAD = 0xAA, +}; + struct mlx5e_ipsec_fc { struct mlx5_fc *cnt; struct mlx5_fc *drop; @@ -33,6 +41,8 @@ struct mlx5e_ipsec_tx { }; struct mlx5e_ipsec_status_checks { + struct mlx5_flow_handle *packet_offload_pass_rule; + struct mlx5_flow_handle *crypto_offload_pass_rule; struct mlx5_flow_group *drop_all_group; struct mlx5e_ipsec_drop all; }; @@ -41,8 +51,7 @@ struct mlx5e_ipsec_rx { struct mlx5e_ipsec_ft ft; struct mlx5e_ipsec_miss pol; struct mlx5e_ipsec_miss sa; - struct mlx5e_ipsec_rule status; - struct mlx5e_ipsec_status_checks status_drops; + struct mlx5e_ipsec_status_checks status_checks; struct mlx5e_ipsec_fc *fc; struct mlx5_fs_chains *chains; struct mlx5_flow_table *pol_miss_ft; @@ -149,15 +158,16 @@ static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns, static void ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx) { - mlx5_del_flow_rules(rx->status_drops.all.rule); - mlx5_fc_destroy(ipsec->mdev, rx->status_drops.all.fc); - mlx5_destroy_flow_group(rx->status_drops.drop_all_group); + mlx5_del_flow_rules(rx->status_checks.all.rule); + mlx5_fc_destroy(ipsec->mdev, rx->status_checks.all.fc); + mlx5_destroy_flow_group(rx->status_checks.drop_all_group); } static void ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx) { - mlx5_del_flow_rules(rx->status.rule); + mlx5_del_flow_rules(rx->status_checks.packet_offload_pass_rule); + mlx5_del_flow_rules(rx->status_checks.crypto_offload_pass_rule); } static void ipsec_rx_rule_add_match_obj(struct mlx5e_ipsec_sa_entry *sa_entry, @@ -368,9 +378,9 @@ static int ipsec_rx_status_drop_all_create(struct mlx5e_ipsec *ipsec, goto err_rule; } - rx->status_drops.drop_all_group = g; - rx->status_drops.all.rule = rule; - rx->status_drops.all.fc = flow_counter; + rx->status_checks.drop_all_group = g; + rx->status_checks.all.rule = rule; + rx->status_checks.all.fc = flow_counter; kvfree(flow_group_in); kvfree(spec); @@ -386,9 +396,11 @@ err_out: return err; } -static int ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx, - struct mlx5_flow_destination *dest) +static struct mlx5_flow_handle * +ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *dest, + u8 aso_ok) { struct mlx5_flow_act flow_act = {}; struct mlx5_flow_handle *rule; @@ -397,7 +409,7 @@ static int ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) - return -ENOMEM; + return ERR_PTR(-ENOMEM); MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.ipsec_syndrome); @@ -406,7 +418,7 @@ static int ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.ipsec_syndrome, 0); MLX5_SET(fte_match_param, spec->match_value, - misc_parameters_2.metadata_reg_c_4, 0); + misc_parameters_2.metadata_reg_c_4, aso_ok); if (rx == ipsec->rx_esw) spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; @@ -421,13 +433,12 @@ static int ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, goto err_rule; } - rx->status.rule = rule; kvfree(spec); - return 0; + return rule; err_rule: kvfree(spec); - return err; + return ERR_PTR(err); } static void mlx5_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, @@ -441,19 +452,38 @@ static int mlx5_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx, struct mlx5_flow_destination *dest) { + struct mlx5_flow_destination pol_dest[2]; + struct mlx5_flow_handle *rule; int err; err = ipsec_rx_status_drop_all_create(ipsec, rx); if (err) return err; - err = ipsec_rx_status_pass_create(ipsec, rx, dest); - if (err) - goto err_pass_create; + rule = ipsec_rx_status_pass_create(ipsec, rx, dest, + MLX5_IPSEC_ASO_SW_CRYPTO_OFFLOAD); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto err_crypto_offload_pass_create; + } + rx->status_checks.crypto_offload_pass_rule = rule; + + pol_dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + pol_dest[0].ft = rx->ft.pol; + pol_dest[1] = dest[1]; + rule = ipsec_rx_status_pass_create(ipsec, rx, pol_dest, + MLX5_IPSEC_ASO_OK); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto err_packet_offload_pass_create; + } + rx->status_checks.packet_offload_pass_rule = rule; return 0; -err_pass_create: +err_packet_offload_pass_create: + mlx5_del_flow_rules(rx->status_checks.crypto_offload_pass_rule); +err_crypto_offload_pass_create: ipsec_rx_status_drop_destroy(ipsec, rx); return err; } @@ -506,7 +536,9 @@ static void ipsec_rx_update_default_dest(struct mlx5e_ipsec_rx *rx, struct mlx5_flow_destination *old_dest, struct mlx5_flow_destination *new_dest) { - mlx5_modify_rule_destination(rx->status.rule, new_dest, old_dest); + mlx5_modify_rule_destination(rx->pol_miss_rule, new_dest, old_dest); + mlx5_modify_rule_destination(rx->status_checks.crypto_offload_pass_rule, + new_dest, old_dest); } static void handle_ipsec_rx_bringup(struct mlx5e_ipsec *ipsec, u32 family) @@ -853,8 +885,6 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, if (err) goto err_policy; - dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[0].ft = rx->ft.pol; dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[1].counter = rx->fc->cnt; err = mlx5_ipsec_rx_status_create(ipsec, rx, dest); @@ -1464,7 +1494,8 @@ static int setup_modify_header(struct mlx5e_ipsec *ipsec, int type, u32 val, u8 MLX5_ACTION_TYPE_SET); MLX5_SET(set_action_in, action[2], field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_4); - MLX5_SET(set_action_in, action[2], data, 0); + MLX5_SET(set_action_in, action[2], data, + MLX5_IPSEC_ASO_SW_CRYPTO_OFFLOAD); MLX5_SET(set_action_in, action[2], offset, 0); MLX5_SET(set_action_in, action[2], length, 32); } -- 2.51.0 From e20674a7e5b1599a6b581206da8b3dbaadca423e Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 20 Feb 2025 23:39:56 +0200 Subject: [PATCH 09/16] net/mlx5e: Add num_reserved_entries param for ipsec_ft_create() Add parameter for ipsec_ft_create() to pass the number of the reserved entries when creating auto-grouped flow table. It's used to create table with pre-defined group(s) which may have more than one rule. Signed-off-by: Jianbo Liu Reviewed-by: Leon Romanovsky Reviewed-by: Patrisious Haddad Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250220213959.504304-7-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index e72b365f24be..2ee4c7bfd7e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -141,11 +141,12 @@ static void ipsec_chains_put_table(struct mlx5_fs_chains *chains, u32 prio) static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns, int level, int prio, + int num_reserved_entries, int max_num_groups, u32 flags) { struct mlx5_flow_table_attr ft_attr = {}; - ft_attr.autogroup.num_reserved_entries = 1; + ft_attr.autogroup.num_reserved_entries = num_reserved_entries; ft_attr.autogroup.max_num_groups = max_num_groups; ft_attr.max_fte = NUM_IPSEC_FTE; ft_attr.level = level; @@ -818,7 +819,7 @@ static int ipsec_rx_policy_create(struct mlx5e_ipsec *ipsec, err = PTR_ERR(rx->chains); } else { ft = ipsec_ft_create(attr->ns, attr->pol_level, - attr->prio, 2, 0); + attr->prio, 1, 2, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_out; @@ -857,7 +858,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, if (err) return err; - ft = ipsec_ft_create(attr.ns, attr.status_level, attr.prio, 3, 0); + ft = ipsec_ft_create(attr.ns, attr.status_level, attr.prio, 1, 3, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_fs_ft_status; @@ -869,7 +870,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, rx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev); if (rx->allow_tunnel_mode) flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - ft = ipsec_ft_create(attr.ns, attr.sa_level, attr.prio, 2, flags); + ft = ipsec_ft_create(attr.ns, attr.sa_level, attr.prio, 1, 2, flags); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_fs_ft; @@ -1095,7 +1096,7 @@ static int tx_create(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx, int err; ipsec_tx_create_attr_set(ipsec, tx, &attr); - ft = ipsec_ft_create(tx->ns, attr.cnt_level, attr.prio, 1, 0); + ft = ipsec_ft_create(tx->ns, attr.cnt_level, attr.prio, 1, 1, 0); if (IS_ERR(ft)) return PTR_ERR(ft); tx->ft.status = ft; @@ -1108,7 +1109,7 @@ static int tx_create(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx, tx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev); if (tx->allow_tunnel_mode) flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - ft = ipsec_ft_create(tx->ns, attr.sa_level, attr.prio, 4, flags); + ft = ipsec_ft_create(tx->ns, attr.sa_level, attr.prio, 1, 4, flags); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_sa_ft; @@ -1136,7 +1137,7 @@ static int tx_create(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx, goto connect_roce; } - ft = ipsec_ft_create(tx->ns, attr.pol_level, attr.prio, 2, 0); + ft = ipsec_ft_create(tx->ns, attr.pol_level, attr.prio, 1, 2, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_pol_ft; -- 2.51.0 From 78e77a41e4019bc4538d09a3738180077220fa77 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 20 Feb 2025 23:39:57 +0200 Subject: [PATCH 10/16] net/mlx5e: Add pass flow group for IPSec RX status table This flow group is added for the pass rules for both crypto offload and packet offload. It is placed at the end of the table, and right before the miss group. There are two entries, and the default pass rules for both offloads are added in this group. Signed-off-by: Jianbo Liu Reviewed-by: Leon Romanovsky Reviewed-by: Patrisious Haddad Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250220213959.504304-8-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 51 ++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 2ee4c7bfd7e6..840d9e0514d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -41,6 +41,7 @@ struct mlx5e_ipsec_tx { }; struct mlx5e_ipsec_status_checks { + struct mlx5_flow_group *pass_group; struct mlx5_flow_handle *packet_offload_pass_rule; struct mlx5_flow_handle *crypto_offload_pass_rule; struct mlx5_flow_group *drop_all_group; @@ -397,6 +398,47 @@ err_out: return err; } +static int ipsec_rx_status_pass_group_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table *ft = rx->ft.status; + struct mlx5_flow_group *fg; + void *match_criteria; + u32 *flow_group_in; + int err = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters_2.ipsec_syndrome); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_4); + + MLX5_SET(create_flow_group_in, flow_group_in, + start_flow_index, ft->max_fte - 3); + MLX5_SET(create_flow_group_in, flow_group_in, + end_flow_index, ft->max_fte - 2); + + fg = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(fg)) { + err = PTR_ERR(fg); + mlx5_core_warn(ipsec->mdev, + "Failed to create rx status pass flow group, err=%d\n", + err); + } + rx->status_checks.pass_group = fg; + + kvfree(flow_group_in); + return err; +} + static struct mlx5_flow_handle * ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx, @@ -446,6 +488,7 @@ static void mlx5_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx) { ipsec_rx_status_pass_destroy(ipsec, rx); + mlx5_destroy_flow_group(rx->status_checks.pass_group); ipsec_rx_status_drop_destroy(ipsec, rx); } @@ -461,6 +504,10 @@ static int mlx5_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, if (err) return err; + err = ipsec_rx_status_pass_group_create(ipsec, rx); + if (err) + goto err_pass_group_create; + rule = ipsec_rx_status_pass_create(ipsec, rx, dest, MLX5_IPSEC_ASO_SW_CRYPTO_OFFLOAD); if (IS_ERR(rule)) { @@ -485,6 +532,8 @@ static int mlx5_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, err_packet_offload_pass_create: mlx5_del_flow_rules(rx->status_checks.crypto_offload_pass_rule); err_crypto_offload_pass_create: + mlx5_destroy_flow_group(rx->status_checks.pass_group); +err_pass_group_create: ipsec_rx_status_drop_destroy(ipsec, rx); return err; } @@ -858,7 +907,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, if (err) return err; - ft = ipsec_ft_create(attr.ns, attr.status_level, attr.prio, 1, 3, 0); + ft = ipsec_ft_create(attr.ns, attr.status_level, attr.prio, 3, 3, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_fs_ft_status; -- 2.51.0 From c69046c3f2dcef3fe65eb771544547286934a865 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 20 Feb 2025 23:39:58 +0200 Subject: [PATCH 11/16] net/mlx5e: Support RX xfrm state selector's UPSPEC for packet offload Previously, the upper layer matches are added for the decryption rule when xfrm selector's UPSPEC is specified in the command. However, it's impossible as packets are not decrypted, and there is no way to do match on the upper protocol (TCP/UDP) with specific source/destination port. The result is that packets are not decrypted by hardware because of this mismatch. Instead, they are forwarded to kernel, and decryption is done by software. To resolve this issue, this patch adds new table (sa_sel) after status table and before policy table. When UPSPEC's proto is specified in xfrm state's selector, a rule is added in status table to forward the decrypted packets to sa_sel table, where the corresponding rule for selector's UPSPEC is added, and packet's upper headers are checked there. If matched, they will be forward to policy table to do policy check. Otherwise, they are dropped immediately. Besides, add a global count for this kind of packet drop. Signed-off-by: Jianbo Liu Reviewed-by: Leon Romanovsky Reviewed-by: Patrisious Haddad Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250220213959.504304-9-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ipsec.h | 5 + .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 238 +++++++++++++++++- .../mellanox/mlx5/core/en_accel/ipsec_stats.c | 1 + 3 files changed, 242 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 7d943e93cf6d..ad8db9e1fd1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -128,6 +128,7 @@ struct mlx5e_ipsec_hw_stats { u64 ipsec_rx_bytes; u64 ipsec_rx_drop_pkts; u64 ipsec_rx_drop_bytes; + u64 ipsec_rx_drop_mismatch_sa_sel; u64 ipsec_tx_pkts; u64 ipsec_tx_bytes; u64 ipsec_tx_drop_pkts; @@ -184,6 +185,7 @@ struct mlx5e_ipsec_ft { struct mutex mutex; /* Protect changes to this struct */ struct mlx5_flow_table *pol; struct mlx5_flow_table *sa; + struct mlx5_flow_table *sa_sel; struct mlx5_flow_table *status; u32 refcnt; }; @@ -195,6 +197,8 @@ struct mlx5e_ipsec_drop { struct mlx5e_ipsec_rule { struct mlx5_flow_handle *rule; + struct mlx5_flow_handle *status_pass; + struct mlx5_flow_handle *sa_sel; struct mlx5_modify_hdr *modify_hdr; struct mlx5_pkt_reformat *pkt_reformat; struct mlx5_fc *fc; @@ -206,6 +210,7 @@ struct mlx5e_ipsec_rule { struct mlx5e_ipsec_miss { struct mlx5_flow_group *group; struct mlx5_flow_handle *rule; + struct mlx5_fc *fc; }; struct mlx5e_ipsec_tx_create_attr { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 840d9e0514d3..d51ace739637 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -16,6 +16,8 @@ #define MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE 16 #define IPSEC_TUNNEL_DEFAULT_TTL 0x40 +#define MLX5_IPSEC_FS_SA_SELECTOR_MAX_NUM_GROUPS 16 + enum { MLX5_IPSEC_ASO_OK, MLX5_IPSEC_ASO_BAD_REPLY, @@ -52,6 +54,7 @@ struct mlx5e_ipsec_rx { struct mlx5e_ipsec_ft ft; struct mlx5e_ipsec_miss pol; struct mlx5e_ipsec_miss sa; + struct mlx5e_ipsec_miss sa_sel; struct mlx5e_ipsec_status_checks status_checks; struct mlx5e_ipsec_fc *fc; struct mlx5_fs_chains *chains; @@ -689,6 +692,16 @@ static void ipsec_rx_policy_destroy(struct mlx5e_ipsec_rx *rx) } } +static void ipsec_rx_sa_selector_destroy(struct mlx5_core_dev *mdev, + struct mlx5e_ipsec_rx *rx) +{ + mlx5_del_flow_rules(rx->sa_sel.rule); + mlx5_fc_destroy(mdev, rx->sa_sel.fc); + rx->sa_sel.fc = NULL; + mlx5_destroy_flow_group(rx->sa_sel.group); + mlx5_destroy_flow_table(rx->ft.sa_sel); +} + static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx, u32 family) { @@ -704,6 +717,8 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, mlx5_ipsec_rx_status_destroy(ipsec, rx); mlx5_destroy_flow_table(rx->ft.status); + ipsec_rx_sa_selector_destroy(mdev, rx); + ipsec_rx_policy_destroy(rx); mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family, mdev); @@ -892,6 +907,115 @@ err_out: return err; } +static int ipsec_rx_sa_selector_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5e_ipsec_rx_create_attr *attr) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_destination dest; + struct mlx5_flow_handle *rule; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + u32 *flow_group_in; + struct mlx5_fc *fc; + int err; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + ft = ipsec_ft_create(attr->ns, attr->status_level, attr->prio, 1, + MLX5_IPSEC_FS_SA_SELECTOR_MAX_NUM_GROUPS, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Failed to create RX SA selector flow table, err=%d\n", + err); + goto err_ft; + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, + ft->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ft->max_fte - 1); + fg = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(fg)) { + err = PTR_ERR(fg); + mlx5_core_err(mdev, "Failed to create RX SA selector miss group, err=%d\n", + err); + goto err_fg; + } + + fc = mlx5_fc_create(mdev, false); + if (IS_ERR(fc)) { + err = PTR_ERR(fc); + mlx5_core_err(mdev, + "Failed to create ipsec RX SA selector miss rule counter, err=%d\n", + err); + goto err_cnt; + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter = fc; + flow_act.action = + MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_DROP; + + rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to create RX SA selector miss drop rule, err=%d\n", + err); + goto err_rule; + } + + rx->ft.sa_sel = ft; + rx->sa_sel.group = fg; + rx->sa_sel.fc = fc; + rx->sa_sel.rule = rule; + + kvfree(flow_group_in); + + return 0; + +err_rule: + mlx5_fc_destroy(mdev, fc); +err_cnt: + mlx5_destroy_flow_group(fg); +err_fg: + mlx5_destroy_flow_table(ft); +err_ft: + kvfree(flow_group_in); + return err; +} + +/* The decryption processing is as follows: + * + * +----------+ +-------------+ + * | | | | + * | Kernel <--------------+----------+ policy miss <------------+ + * | | ^ | | ^ + * +----^-----+ | +-------------+ | + * | crypto | + * miss offload ok allow/default + * ^ ^ ^ + * | | packet | + * +----+---------+ +----+-------------+ offload ok +------+---+ + * | | | | (no UPSPEC) | | + * | SA (decrypt) +-----> status +--->------->----+ policy | + * | | | | | | + * +--------------+ ++---------+-------+ +-^----+---+ + * | | | | + * v packet +-->->---+ v + * | offload ok match | + * fails (with UPSPEC) | block + * | | +-------------+-+ | + * v v | | miss v + * drop +---> SA sel +--------->drop + * | | + * +---------------+ + */ + static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx, u32 family) { @@ -907,13 +1031,17 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, if (err) return err; - ft = ipsec_ft_create(attr.ns, attr.status_level, attr.prio, 3, 3, 0); + ft = ipsec_ft_create(attr.ns, attr.status_level, attr.prio, 3, 4, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_fs_ft_status; } rx->ft.status = ft; + err = ipsec_rx_sa_selector_create(ipsec, rx, &attr); + if (err) + goto err_fs_ft_sa_sel; + /* Create FT */ if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL) rx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev); @@ -956,6 +1084,8 @@ err_fs: if (rx->allow_tunnel_mode) mlx5_eswitch_unblock_encap(mdev); err_fs_ft: + ipsec_rx_sa_selector_destroy(mdev, rx); +err_fs_ft_sa_sel: mlx5_destroy_flow_table(rx->ft.status); err_fs_ft_status: mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family, mdev); @@ -1781,6 +1911,85 @@ static int setup_pkt_reformat(struct mlx5e_ipsec *ipsec, return 0; } +static int rx_add_rule_sa_selector(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5e_ipsec_rx *rx, + struct upspec *upspec) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_flow_destination dest[2]; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_2.ipsec_syndrome); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_4); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.ipsec_syndrome, 0); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.metadata_reg_c_4, 0); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + + ipsec_rx_rule_add_match_obj(sa_entry, rx, spec); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[0].ft = rx->ft.sa_sel; + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[1].counter = rx->fc->cnt; + + rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add ipsec rx pass rule, err=%d\n", + err); + goto err_add_status_pass_rule; + } + + sa_entry->ipsec_rule.status_pass = rule; + + MLX5_SET(fte_match_param, spec->match_criteria, + misc_parameters_2.ipsec_syndrome, 0); + MLX5_SET(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_4, 0); + + setup_fte_upper_proto_match(spec, upspec); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[0].ft = rx->ft.pol; + + rule = mlx5_add_flow_rules(rx->ft.sa_sel, spec, &flow_act, &dest[0], 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add ipsec rx sa selector rule, err=%d\n", + err); + goto err_add_sa_sel_rule; + } + + sa_entry->ipsec_rule.sa_sel = rule; + + kvfree(spec); + return 0; + +err_add_sa_sel_rule: + mlx5_del_flow_rules(sa_entry->ipsec_rule.status_pass); +err_add_status_pass_rule: + kvfree(spec); + return err; +} + static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) { struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; @@ -1813,7 +2022,6 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) if (!attrs->encap) setup_fte_esp(spec); setup_fte_no_frags(spec); - setup_fte_upper_proto_match(spec, &attrs->upspec); if (!attrs->drop) { if (rx != ipsec->rx_esw) @@ -1861,6 +2069,13 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) mlx5_core_err(mdev, "fail to add RX ipsec rule err=%d\n", err); goto err_add_flow; } + + if (attrs->upspec.proto && attrs->type == XFRM_DEV_OFFLOAD_PACKET) { + err = rx_add_rule_sa_selector(sa_entry, rx, &attrs->upspec); + if (err) + goto err_add_sa_sel; + } + if (attrs->type == XFRM_DEV_OFFLOAD_PACKET) err = rx_add_rule_drop_replay(sa_entry, rx); if (err) @@ -1884,6 +2099,11 @@ err_drop_reason: mlx5_fc_destroy(mdev, sa_entry->ipsec_rule.replay.fc); } err_add_replay: + if (sa_entry->ipsec_rule.sa_sel) { + mlx5_del_flow_rules(sa_entry->ipsec_rule.sa_sel); + mlx5_del_flow_rules(sa_entry->ipsec_rule.status_pass); + } +err_add_sa_sel: mlx5_del_flow_rules(rule); err_add_flow: mlx5_fc_destroy(mdev, counter); @@ -2265,6 +2485,7 @@ void mlx5e_accel_ipsec_fs_read_stats(struct mlx5e_priv *priv, void *ipsec_stats) stats->ipsec_rx_bytes = 0; stats->ipsec_rx_drop_pkts = 0; stats->ipsec_rx_drop_bytes = 0; + stats->ipsec_rx_drop_mismatch_sa_sel = 0; stats->ipsec_tx_pkts = 0; stats->ipsec_tx_bytes = 0; stats->ipsec_tx_drop_pkts = 0; @@ -2274,6 +2495,9 @@ void mlx5e_accel_ipsec_fs_read_stats(struct mlx5e_priv *priv, void *ipsec_stats) mlx5_fc_query(mdev, fc->cnt, &stats->ipsec_rx_pkts, &stats->ipsec_rx_bytes); mlx5_fc_query(mdev, fc->drop, &stats->ipsec_rx_drop_pkts, &stats->ipsec_rx_drop_bytes); + if (ipsec->rx_ipv4->sa_sel.fc) + mlx5_fc_query(mdev, ipsec->rx_ipv4->sa_sel.fc, + &stats->ipsec_rx_drop_mismatch_sa_sel, &bytes); fc = ipsec->tx->fc; mlx5_fc_query(mdev, fc->cnt, &stats->ipsec_tx_pkts, &stats->ipsec_tx_bytes); @@ -2302,6 +2526,11 @@ void mlx5e_accel_ipsec_fs_read_stats(struct mlx5e_priv *priv, void *ipsec_stats) stats->ipsec_tx_drop_pkts += packets; stats->ipsec_tx_drop_bytes += bytes; } + + if (ipsec->rx_esw->sa_sel.fc && + !mlx5_fc_query(mdev, ipsec->rx_esw->sa_sel.fc, + &packets, &bytes)) + stats->ipsec_rx_drop_mismatch_sa_sel += packets; } } @@ -2399,6 +2628,11 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry) mlx5_del_flow_rules(ipsec_rule->auth.rule); mlx5_fc_destroy(mdev, ipsec_rule->auth.fc); + if (ipsec_rule->sa_sel) { + mlx5_del_flow_rules(ipsec_rule->sa_sel); + mlx5_del_flow_rules(ipsec_rule->status_pass); + } + if (ipsec_rule->replay.rule) { mlx5_del_flow_rules(ipsec_rule->replay.rule); mlx5_fc_destroy(mdev, ipsec_rule->replay.fc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c index 92bf3fa44a3b..93be388068f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c @@ -42,6 +42,7 @@ static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_rx_bytes) }, { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_rx_drop_pkts) }, { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_rx_drop_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_rx_drop_mismatch_sa_sel) }, { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_tx_pkts) }, { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_tx_bytes) }, { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_tx_drop_pkts) }, -- 2.51.0 From 8f3f4464ff08f70e959c026fad2f3790abe84be6 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 21 Feb 2025 09:53:22 +0100 Subject: [PATCH 12/16] net/mlx5: Use secs_to_jiffies() instead of msecs_to_jiffies() Use secs_to_jiffies() and simplify the code. Reviewed-by: Jacob Keller Reviewed-by: Saeed Mahameed Signed-off-by: Thorsten Blum Reviewed-by: Tariq Toukan Reviewed-by: Somnath Kotur Link: https://patch.msgid.link/20250221085350.198024-3-thorsten.blum@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index 3dbd4efa21a2..19dce1ba512d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -220,7 +220,7 @@ static int hws_bwc_queue_poll(struct mlx5hws_context *ctx, bool drain) { unsigned long timeout = jiffies + - msecs_to_jiffies(MLX5HWS_BWC_POLLING_TIMEOUT * MSEC_PER_SEC); + secs_to_jiffies(MLX5HWS_BWC_POLLING_TIMEOUT); struct mlx5hws_flow_op_result comp[MLX5HWS_BWC_MATCHER_REHASH_BURST_TH]; u16 burst_th = hws_bwc_get_burst_th(ctx, queue_id); bool got_comp = *pending_rules >= burst_th; -- 2.51.0 From bc337e8c0e762b0c1eaca00aa6955cd0e7013ba1 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 21 Feb 2025 16:43:54 +0100 Subject: [PATCH 13/16] mptcp: pm: remove unused ret value to set flags The returned value is not used, it can then be dropped. Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250221-net-next-mptcp-pm-misc-cleanup-3-v1-1-2b70ab1cee79@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 99705a9c2238..ff1e5695dc1d 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1922,13 +1922,11 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, spin_unlock_bh(&msk->pm.lock); } -static int mptcp_nl_set_flags(struct net *net, - struct mptcp_addr_info *addr, - u8 bkup, u8 changed) +static void mptcp_nl_set_flags(struct net *net, struct mptcp_addr_info *addr, + u8 bkup, u8 changed) { long s_slot = 0, s_num = 0; struct mptcp_sock *msk; - int ret = -EINVAL; while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; @@ -1938,7 +1936,7 @@ static int mptcp_nl_set_flags(struct net *net, lock_sock(sk); if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) - ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, NULL, bkup); + mptcp_pm_nl_mp_prio_send_ack(msk, addr, NULL, bkup); if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH) mptcp_pm_nl_fullmesh(msk, addr); release_sock(sk); @@ -1948,7 +1946,7 @@ next: cond_resched(); } - return ret; + return; } int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, -- 2.51.0 From 145dc6cc4abdb3b76eb01a0943a540db2a01ebe6 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 21 Feb 2025 16:43:55 +0100 Subject: [PATCH 14/16] mptcp: pm: change to fullmesh only for 'subflow' If an endpoint doesn't have the 'subflow' flag -- in fact, has no type, so not 'subflow', 'signal', nor 'implicit' -- there are then no subflows created from this local endpoint to at least the initial destination address. In this case, no need to call mptcp_pm_nl_fullmesh() which is there to recreate the subflows to reflect the new value of the fullmesh attribute. Similarly, there is then no need to iterate over all connections to do nothing, if only the 'fullmesh' flag has been changed, and the endpoint doesn't have the 'subflow' one. So stop early when dealing with this specific case. Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250221-net-next-mptcp-pm-misc-cleanup-3-v1-2-2b70ab1cee79@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index ff1e5695dc1d..1a0695e087af 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1923,11 +1923,16 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, } static void mptcp_nl_set_flags(struct net *net, struct mptcp_addr_info *addr, - u8 bkup, u8 changed) + u8 flags, u8 changed) { + u8 is_subflow = !!(flags & MPTCP_PM_ADDR_FLAG_SUBFLOW); + u8 bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); long s_slot = 0, s_num = 0; struct mptcp_sock *msk; + if (changed == MPTCP_PM_ADDR_FLAG_FULLMESH && !is_subflow) + return; + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; @@ -1937,7 +1942,8 @@ static void mptcp_nl_set_flags(struct net *net, struct mptcp_addr_info *addr, lock_sock(sk); if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) mptcp_pm_nl_mp_prio_send_ack(msk, addr, NULL, bkup); - if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH) + /* Subflows will only be recreated if the SUBFLOW flag is set */ + if (is_subflow && (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)) mptcp_pm_nl_fullmesh(msk, addr); release_sock(sk); @@ -1959,7 +1965,6 @@ int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, struct mptcp_pm_addr_entry *entry; struct pm_nl_pernet *pernet; u8 lookup_by_id = 0; - u8 bkup = 0; pernet = pm_nl_get_pernet(net); @@ -1972,9 +1977,6 @@ int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, } } - if (local->flags & MPTCP_PM_ADDR_FLAG_BACKUP) - bkup = 1; - spin_lock_bh(&pernet->lock); entry = lookup_by_id ? __lookup_addr_by_id(pernet, local->addr.id) : __lookup_addr(pernet, &local->addr); @@ -1996,7 +1998,7 @@ int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, *local = *entry; spin_unlock_bh(&pernet->lock); - mptcp_nl_set_flags(net, &local->addr, bkup, changed); + mptcp_nl_set_flags(net, &local->addr, entry->flags, changed); return 0; } -- 2.51.0 From 63132fb054744e58de61d45a6d4f2a707cdfcfb3 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 21 Feb 2025 16:43:56 +0100 Subject: [PATCH 15/16] mptcp: pm: add a build check for userspace_pm_dump_addr This patch adds a build check for mptcp_userspace_pm_dump_addr() to make sure there is enough space in 'cb->ctx' to store an address id bitmap. Just in case info stored in 'cb->ctx' are increased later. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250221-net-next-mptcp-pm-misc-cleanup-3-v1-3-2b70ab1cee79@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_userspace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 277cf092a870..b69fb5b18130 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -643,6 +643,8 @@ int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, struct sock *sk; void *hdr; + BUILD_BUG_ON(sizeof(struct id_bitmap) > sizeof(cb->ctx)); + bitmap = (struct id_bitmap *)cb->ctx; msk = mptcp_userspace_pm_get_sock(info); -- 2.51.0 From f8fe8174657329609a80f66da1d3dd80a80de76b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 21 Feb 2025 16:43:57 +0100 Subject: [PATCH 16/16] mptcp: pm: add mptcp_pm_genl_fill_addr helper To save some redundant code in dump_addr() interfaces of both the netlink PM and userspace PM, the code that calls netlink message helpers (genlmsg_put/cancel/end) and mptcp_nl_fill_addr() is wrapped into a new helper mptcp_pm_genl_fill_addr(). Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250221-net-next-mptcp-pm-misc-cleanup-3-v1-4-2b70ab1cee79@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 21 +++++++++++++++++++++ net/mptcp/pm_netlink.c | 12 +----------- net/mptcp/pm_userspace.c | 12 +----------- net/mptcp/protocol.h | 3 +++ 4 files changed, 26 insertions(+), 22 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index b1f36dc1a091..16cacce6c10f 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -489,6 +489,27 @@ fail: return ret; } +int mptcp_pm_genl_fill_addr(struct sk_buff *msg, + struct netlink_callback *cb, + struct mptcp_pm_addr_entry *entry) +{ + void *hdr; + + hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, &mptcp_genl_family, + NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); + if (!hdr) + return -EINVAL; + + if (mptcp_nl_fill_addr(msg, entry) < 0) { + genlmsg_cancel(msg, hdr); + return -EINVAL; + } + + genlmsg_end(msg, hdr); + return 0; +} + static int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) { const struct genl_info *info = genl_info_dump(cb); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 1a0695e087af..98fcbf8b1465 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1798,7 +1798,6 @@ int mptcp_pm_nl_dump_addr(struct sk_buff *msg, struct mptcp_pm_addr_entry *entry; struct pm_nl_pernet *pernet; int id = cb->args[0]; - void *hdr; int i; pernet = pm_nl_get_pernet(net); @@ -1813,19 +1812,10 @@ int mptcp_pm_nl_dump_addr(struct sk_buff *msg, if (entry->addr.id <= id) continue; - hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, &mptcp_genl_family, - NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); - if (!hdr) + if (mptcp_pm_genl_fill_addr(msg, cb, entry) < 0) break; - if (mptcp_nl_fill_addr(msg, entry) < 0) { - genlmsg_cancel(msg, hdr); - break; - } - id = entry->addr.id; - genlmsg_end(msg, hdr); } } rcu_read_unlock(); diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index b69fb5b18130..bedd6f9ebc8b 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -641,7 +641,6 @@ int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, struct mptcp_sock *msk; int ret = -EINVAL; struct sock *sk; - void *hdr; BUILD_BUG_ON(sizeof(struct id_bitmap) > sizeof(cb->ctx)); @@ -659,19 +658,10 @@ int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, if (test_bit(entry->addr.id, bitmap->map)) continue; - hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, &mptcp_genl_family, - NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); - if (!hdr) + if (mptcp_pm_genl_fill_addr(msg, cb, entry) < 0) break; - if (mptcp_nl_fill_addr(msg, entry) < 0) { - genlmsg_cancel(msg, hdr); - break; - } - __set_bit(entry->addr.id, bitmap->map); - genlmsg_end(msg, hdr); } spin_unlock_bh(&msk->pm.lock); release_sock(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index ca65f8bff632..256677c43ca6 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1057,6 +1057,9 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf struct request_sock *req); int mptcp_nl_fill_addr(struct sk_buff *skb, struct mptcp_pm_addr_entry *entry); +int mptcp_pm_genl_fill_addr(struct sk_buff *msg, + struct netlink_callback *cb, + struct mptcp_pm_addr_entry *entry); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { -- 2.51.0