From 127c49624a0980ee7b8a5ba9094d6942332a48da Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 4 Jun 2025 18:06:04 +0200 Subject: [PATCH 01/16] can: add drop reasons in the receive path of AF_CAN Besides the existing pr_warn_once(), use skb drop reasons in case AF_CAN layer drops non-conformant CAN{,FD,XL} frames, or conformant frames received by "wrong" devices, so that it's possible to debug (and count) such events using existing tracepoints: | # perf record -e skb:kfree_skb -aR -- ./drv/canfdtest -v -g -l 1 vcan0 | # perf script | [...] | canfdtest 1123 [000] 3893.271264: skb:kfree_skb: skbaddr=0xffff975703c9f700 rx_sk=(nil) protocol=12 location=can_rcv+0x4b reason: CAN_RX_INVALID_FRAME Signed-off-by: Davide Caratti Link: https://patch.msgid.link/20250604160605.1005704-2-dcaratti@redhat.com Signed-off-by: Marc Kleine-Budde --- include/net/dropreason-core.h | 18 ++++++++++++++++++ net/can/af_can.c | 6 +++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index bcf9d7467e1a1..b9e78290269e6 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -121,6 +121,9 @@ FN(ARP_PVLAN_DISABLE) \ FN(MAC_IEEE_MAC_CONTROL) \ FN(BRIDGE_INGRESS_STP_STATE) \ + FN(CAN_RX_INVALID_FRAME) \ + FN(CANFD_RX_INVALID_FRAME) \ + FN(CANXL_RX_INVALID_FRAME) \ FNe(MAX) /** @@ -573,6 +576,21 @@ enum skb_drop_reason { * ingress bridge port does not allow frames to be forwarded. */ SKB_DROP_REASON_BRIDGE_INGRESS_STP_STATE, + /** + * @SKB_DROP_REASON_CAN_RX_INVALID_FRAME: received + * non conform CAN frame (or device is unable to receive CAN frames) + */ + SKB_DROP_REASON_CAN_RX_INVALID_FRAME, + /** + * @SKB_DROP_REASON_CANFD_RX_INVALID_FRAME: received + * non conform CAN-FD frame (or device is unable to receive CAN frames) + */ + SKB_DROP_REASON_CANFD_RX_INVALID_FRAME, + /** + * @SKB_DROP_REASON_CANXL_RX_INVALID_FRAME: received + * non conform CAN-XL frame (or device is unable to receive CAN frames) + */ + SKB_DROP_REASON_CANXL_RX_INVALID_FRAME, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen diff --git a/net/can/af_can.c b/net/can/af_can.c index 4aab7033c9330..b2387a46794a5 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -683,7 +683,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n", dev->type, skb->len); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_CAN_RX_INVALID_FRAME); return NET_RX_DROP; } @@ -698,7 +698,7 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n", dev->type, skb->len); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_CANFD_RX_INVALID_FRAME); return NET_RX_DROP; } @@ -713,7 +713,7 @@ static int canxl_rcv(struct sk_buff *skb, struct net_device *dev, pr_warn_once("PF_CAN: dropped non conform CAN XL skbuff: dev type %d, len %d\n", dev->type, skb->len); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_CANXL_RX_INVALID_FRAME); return NET_RX_DROP; } -- 2.51.0 From 81807451c2a6af59bbc58adfd0da69870c30d4ab Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 4 Jun 2025 18:06:05 +0200 Subject: [PATCH 02/16] can: add drop reasons in CAN protocols receive path sock_queue_rcv_skb() can fail because of lack of memory resources: use drop reasons and pass the receiving socket to the tracepoint, so that it's possible to better locate/debug such events. Tested with: | # modprobe vcan echo=1 | # ip link add name vcan2 type vcan | # ip link set dev vcan2 up | # ./netlayer/tst-proc 1 & | # bg | # while true ; do perf record -e skb:kfree_skb -aR -- \ | > ./raw/tst-raw-sendto vcan2 ; perf script ; done | [...] | tst-raw-sendto 10942 [000] 506428.431856: skb:kfree_skb: skbaddr=0xffff97cec38b4200 rx_sk=0xffff97cf0f75a800 protocol=12 location=raw_rcv+0x20e reason: SOCKET_RCVBUF Signed-off-by: Davide Caratti Link: https://patch.msgid.link/20250604160605.1005704-3-dcaratti@redhat.com Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 5 +++-- net/can/isotp.c | 5 +++-- net/can/j1939/socket.c | 5 +++-- net/can/raw.c | 5 +++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 6bc1cc4c94c5e..5e690a2377e48 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -359,6 +359,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, unsigned int datalen = head->nframes * op->cfsiz; int err; unsigned int *pflags; + enum skb_drop_reason reason; skb = alloc_skb(sizeof(*head) + datalen, gfp_any()); if (!skb) @@ -413,11 +414,11 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, addr->can_family = AF_CAN; addr->can_ifindex = op->rx_ifindex; - err = sock_queue_rcv_skb(sk, skb); + err = sock_queue_rcv_skb_reason(sk, skb, &reason); if (err < 0) { struct bcm_sock *bo = bcm_sk(sk); - kfree_skb(skb); + sk_skb_reason_drop(sk, skb, reason); /* don't care about overflows in this statistic */ bo->dropped_usr_msgs++; } diff --git a/net/can/isotp.c b/net/can/isotp.c index 1efa377f002ed..dee1412b3c9c1 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -278,6 +278,7 @@ static int isotp_send_fc(struct sock *sk, int ae, u8 flowstatus) static void isotp_rcv_skb(struct sk_buff *skb, struct sock *sk) { struct sockaddr_can *addr = (struct sockaddr_can *)skb->cb; + enum skb_drop_reason reason; BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can)); @@ -285,8 +286,8 @@ static void isotp_rcv_skb(struct sk_buff *skb, struct sock *sk) addr->can_family = AF_CAN; addr->can_ifindex = skb->dev->ifindex; - if (sock_queue_rcv_skb(sk, skb) < 0) - kfree_skb(skb); + if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) + sk_skb_reason_drop(sk, skb, reason); } static u8 padlen(u8 datalen) diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 6fefe7a687611..3d8b588822f9d 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -311,6 +311,7 @@ static void j1939_sk_recv_one(struct j1939_sock *jsk, struct sk_buff *oskb) { const struct j1939_sk_buff_cb *oskcb = j1939_skb_to_cb(oskb); struct j1939_sk_buff_cb *skcb; + enum skb_drop_reason reason; struct sk_buff *skb; if (oskb->sk == &jsk->sk) @@ -331,8 +332,8 @@ static void j1939_sk_recv_one(struct j1939_sock *jsk, struct sk_buff *oskb) if (skb->sk) skcb->msg_flags |= MSG_DONTROUTE; - if (sock_queue_rcv_skb(&jsk->sk, skb) < 0) - kfree_skb(skb); + if (sock_queue_rcv_skb_reason(&jsk->sk, skb, &reason) < 0) + sk_skb_reason_drop(&jsk->sk, skb, reason); } bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb) diff --git a/net/can/raw.c b/net/can/raw.c index 020f21430b1d8..76b867d21def2 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -129,6 +129,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) { struct sock *sk = (struct sock *)data; struct raw_sock *ro = raw_sk(sk); + enum skb_drop_reason reason; struct sockaddr_can *addr; struct sk_buff *skb; unsigned int *pflags; @@ -205,8 +206,8 @@ static void raw_rcv(struct sk_buff *oskb, void *data) if (oskb->sk == sk) *pflags |= MSG_CONFIRM; - if (sock_queue_rcv_skb(sk, skb) < 0) - kfree_skb(skb); + if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) + sk_skb_reason_drop(sk, skb, reason); } static int raw_enable_filters(struct net *net, struct net_device *dev, -- 2.51.0 From bc0cb64db1c765a81f69997d5a28f539e1731bc0 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 9 Jun 2025 02:46:26 -0700 Subject: [PATCH 03/16] netconsole: Only register console drivers when targets are configured The netconsole driver currently registers the basic console driver unconditionally during initialization, even when only extended targets are configured. This results in unnecessary console registration and performance overhead, as the write_msg() callback is invoked for every log message only to return early when no matching targets are found. Optimize the driver by conditionally registering console drivers based on the actual target configuration. The basic console driver is now registered only when non-extended targets exist, same as the extended console. The implementation also handles dynamic target creation through the configfs interface. This change eliminates unnecessary console driver registrations, redundant write_msg() callbacks for unused console types, and associated lock contention and target list iterations. The optimization is particularly beneficial for systems using only the most common extended console type. Fixes: e2f15f9a79201 ("netconsole: implement extended console support") Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250609-netcons_ext-v3-1-5336fa670326@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/netconsole.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 4289ccd3e41bf..01baa45221b4b 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -86,10 +86,10 @@ static DEFINE_SPINLOCK(target_list_lock); static DEFINE_MUTEX(target_cleanup_list_lock); /* - * Console driver for extended netconsoles. Registered on the first use to - * avoid unnecessarily enabling ext message formatting. + * Console driver for netconsoles. Register only consoles that have + * an associated target of the same type. */ -static struct console netconsole_ext; +static struct console netconsole_ext, netconsole; struct netconsole_target_stats { u64_stats_t xmit_drop_count; @@ -97,6 +97,11 @@ struct netconsole_target_stats { struct u64_stats_sync syncp; }; +enum console_type { + CONS_BASIC = BIT(0), + CONS_EXTENDED = BIT(1), +}; + /* Features enabled in sysdata. Contrary to userdata, this data is populated by * the kernel. The fields are designed as bitwise flags, allowing multiple * features to be set in sysdata_fields. @@ -491,6 +496,12 @@ static ssize_t enabled_store(struct config_item *item, if (nt->extended && !console_is_registered(&netconsole_ext)) register_console(&netconsole_ext); + /* User might be enabling the basic format target for the very + * first time, make sure the console is registered. + */ + if (!nt->extended && !console_is_registered(&netconsole)) + register_console(&netconsole); + /* * Skip netpoll_parse_options() -- all the attributes are * already configured via configfs. Just print them out. @@ -1691,8 +1702,8 @@ static int __init init_netconsole(void) { int err; struct netconsole_target *nt, *tmp; + u32 console_type_needed = 0; unsigned int count = 0; - bool extended = false; unsigned long flags; char *target_config; char *input = config; @@ -1708,9 +1719,10 @@ static int __init init_netconsole(void) } /* Dump existing printks when we register */ if (nt->extended) { - extended = true; + console_type_needed |= CONS_EXTENDED; netconsole_ext.flags |= CON_PRINTBUFFER; } else { + console_type_needed |= CONS_BASIC; netconsole.flags |= CON_PRINTBUFFER; } @@ -1729,9 +1741,10 @@ static int __init init_netconsole(void) if (err) goto undonotifier; - if (extended) + if (console_type_needed & CONS_EXTENDED) register_console(&netconsole_ext); - register_console(&netconsole); + if (console_type_needed & CONS_BASIC) + register_console(&netconsole); pr_info("network logging started\n"); return err; @@ -1761,7 +1774,8 @@ static void __exit cleanup_netconsole(void) if (console_is_registered(&netconsole_ext)) unregister_console(&netconsole_ext); - unregister_console(&netconsole); + if (console_is_registered(&netconsole)) + unregister_console(&netconsole); dynamic_netconsole_exit(); unregister_netdevice_notifier(&netconsole_netdev_notifier); -- 2.51.0 From e99d938f867173937373b1bb08cbc2d102a07d0c Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 9 Jun 2025 02:46:27 -0700 Subject: [PATCH 04/16] netconsole: Add automatic console unregistration on target removal Add unregister_netcons_consoles() function to automatically unregister console handlers when no targets of the corresponding type remain active. The function iterates through the target list to determine which console types (basic vs extended) are still needed, and unregisters any console handlers that are no longer required. This prevents having registered console handlers without corresponding active targets. The function is called when a target is disabled and moved to the cleanup list, ensuring proper cleanup of unused console registrations. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250609-netcons_ext-v3-2-5336fa670326@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/netconsole.c | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 01baa45221b4b..21077aff061c5 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -460,6 +460,33 @@ static ssize_t sysdata_release_enabled_show(struct config_item *item, return sysfs_emit(buf, "%d\n", release_enabled); } +/* Iterate in the list of target, and make sure we don't have any console + * register without targets of the same type + */ +static void unregister_netcons_consoles(void) +{ + struct netconsole_target *nt; + u32 console_type_needed = 0; + unsigned long flags; + + spin_lock_irqsave(&target_list_lock, flags); + list_for_each_entry(nt, &target_list, list) { + if (nt->extended) + console_type_needed |= CONS_EXTENDED; + else + console_type_needed |= CONS_BASIC; + } + spin_unlock_irqrestore(&target_list_lock, flags); + + if (!(console_type_needed & CONS_EXTENDED) && + console_is_registered(&netconsole_ext)) + unregister_console(&netconsole_ext); + + if (!(console_type_needed & CONS_BASIC) && + console_is_registered(&netconsole)) + unregister_console(&netconsole); +} + /* * This one is special -- targets created through the configfs interface * are not enabled (and the corresponding netpoll activated) by default. @@ -493,14 +520,18 @@ static ssize_t enabled_store(struct config_item *item, goto out_unlock; } - if (nt->extended && !console_is_registered(&netconsole_ext)) + if (nt->extended && !console_is_registered(&netconsole_ext)) { + netconsole_ext.flags |= CON_ENABLED; register_console(&netconsole_ext); + } /* User might be enabling the basic format target for the very * first time, make sure the console is registered. */ - if (!nt->extended && !console_is_registered(&netconsole)) + if (!nt->extended && !console_is_registered(&netconsole)) { + netconsole.flags |= CON_ENABLED; register_console(&netconsole); + } /* * Skip netpoll_parse_options() -- all the attributes are @@ -528,6 +559,10 @@ static ssize_t enabled_store(struct config_item *item, list_move(&nt->list, &target_cleanup_list); spin_unlock_irqrestore(&target_list_lock, flags); mutex_unlock(&target_cleanup_list_lock); + /* Unregister consoles, whose the last target of that type got + * disabled. + */ + unregister_netcons_consoles(); } ret = strnlen(buf, count); -- 2.51.0 From 69b25dd20c8368750d1e8b12cfe31387c545bdd1 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 9 Jun 2025 02:46:28 -0700 Subject: [PATCH 05/16] selftests: netconsole: Do not exit from inside the validation function Remove the exit call from validate_result() function and move the test exit logic to the main script. This allows the function to be reused in scenarios where the test needs to continue execution after validation, rather than terminating immediately. The validate_result() function should focus on validation logic only, while the calling script maintains control over program flow and exit conditions. This change improves code modularity and prepares for potential future enhancements where multiple validations might be needed in a single test run. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250609-netcons_ext-v3-3-5336fa670326@debian.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh | 1 - tools/testing/selftests/drivers/net/netcons_basic.sh | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 29b01b8e2215c..2d5dd3297693c 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -185,7 +185,6 @@ function validate_result() { # Delete the file once it is validated, otherwise keep it # for debugging purposes rm "${TMPFILENAME}" - exit "${ksft_pass}" } function check_for_dependencies() { diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh index fe765da498e84..d2f0685d24ba3 100755 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -50,3 +50,5 @@ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" # Make sure the message was received in the dst part # and exit validate_result "${OUTPUT_FILE}" + +exit "${ksft_pass}" -- 2.51.0 From 224a6e602fb371b42ba5f854f32191d23b7e140a Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 9 Jun 2025 02:46:29 -0700 Subject: [PATCH 06/16] selftests: netconsole: Add support for basic netconsole target format Extend the netconsole selftest to validate both basic and extended target formats. The basic format is a simpler variant that doesn't support userdata or release functionality. The test now validates that netconsole works correctly in both configurations, improving test coverage for different netconsole deployment scenarios. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250609-netcons_ext-v3-4-5336fa670326@debian.org Signed-off-by: Jakub Kicinski --- .../drivers/net/lib/sh/lib_netcons.sh | 26 ++++++++-- .../selftests/drivers/net/netcons_basic.sh | 48 ++++++++++++------- 2 files changed, 52 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 2d5dd3297693c..71a5a8b1712c0 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -95,6 +95,8 @@ function set_network() { } function create_dynamic_target() { + local FORMAT=${1:-"extended"} + DSTMAC=$(ip netns exec "${NAMESPACE}" \ ip link show "${DSTIF}" | awk '/ether/ {print $2}') @@ -106,6 +108,16 @@ function create_dynamic_target() { echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name + if [ "${FORMAT}" == "basic" ] + then + # Basic target does not support release + echo 0 > "${NETCONS_PATH}"/release + echo 0 > "${NETCONS_PATH}"/extended + elif [ "${FORMAT}" == "extended" ] + then + echo 1 > "${NETCONS_PATH}"/extended + fi + echo 1 > "${NETCONS_PATH}"/enabled } @@ -159,6 +171,7 @@ function listen_port_and_save_to() { function validate_result() { local TMPFILENAME="$1" + local FORMAT=${2:-"extended"} # TMPFILENAME will contain something like: # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM @@ -176,10 +189,15 @@ function validate_result() { exit "${ksft_fail}" fi - if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then - echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 - cat "${TMPFILENAME}" >&2 - exit "${ksft_fail}" + # userdata is not supported on basic format target, + # thus, do not validate it. + if [ "${FORMAT}" != "basic" ]; + then + if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then + echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 + cat "${TMPFILENAME}" >&2 + exit "${ksft_fail}" + fi fi # Delete the file once it is validated, otherwise keep it diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh index d2f0685d24ba3..40a6ac6191b8b 100755 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -32,23 +32,35 @@ check_for_dependencies echo "6 5" > /proc/sys/kernel/printk # Remove the namespace, interfaces and netconsole target on exit trap cleanup EXIT -# Create one namespace and two interfaces -set_network -# Create a dynamic target for netconsole -create_dynamic_target -# Set userdata "key" with the "value" value -set_user_data -# Listed for netconsole port inside the namespace and destination interface -listen_port_and_save_to "${OUTPUT_FILE}" & -# Wait for socat to start and listen to the port. -wait_local_port_listen "${NAMESPACE}" "${PORT}" udp -# Send the message -echo "${MSG}: ${TARGET}" > /dev/kmsg -# Wait until socat saves the file to disk -busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" - -# Make sure the message was received in the dst part -# and exit -validate_result "${OUTPUT_FILE}" +# Run the test twice, with different format modes +for FORMAT in "basic" "extended" +do + echo "Running with target mode: ${FORMAT}" + # Create one namespace and two interfaces + set_network + # Create a dynamic target for netconsole + create_dynamic_target "${FORMAT}" + # Only set userdata for extended format + if [ "$FORMAT" == "extended" ] + then + # Set userdata "key" with the "value" value + set_user_data + fi + # Listed for netconsole port inside the namespace and destination interface + listen_port_and_save_to "${OUTPUT_FILE}" & + # Wait for socat to start and listen to the port. + wait_local_port_listen "${NAMESPACE}" "${PORT}" udp + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + + # Make sure the message was received in the dst part + # and exit + validate_result "${OUTPUT_FILE}" "${FORMAT}" + cleanup +done + +trap - EXIT exit "${ksft_pass}" -- 2.51.0 From c09ef59e17c6921c577d54bc8da4331b955d01a7 Mon Sep 17 00:00:00 2001 From: Dipayaan Roy Date: Mon, 9 Jun 2025 03:01:03 -0700 Subject: [PATCH 07/16] net: mana: Expose additional hardware counters for drop and TC via ethtool. Add support for reporting additional hardware counters for drop and TC using the ethtool -S interface. These counters include: - Aggregate Rx/Tx drop counters - Per-TC Rx/Tx packet counters - Per-TC Rx/Tx byte counters - Per-TC Rx/Tx pause frame counters The counters are exposed using ethtool_ops->get_ethtool_stats and ethtool_ops->get_strings. This feature/counters are not available to all versions of hardware. Signed-off-by: Dipayaan Roy Reviewed-by: Subbaraya Sundeep Reviewed-by: Haiyang Zhang Link: https://patch.msgid.link/20250609100103.GA7102@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Signed-off-by: Jakub Kicinski --- .../net/ethernet/microsoft/mana/hw_channel.c | 6 +- drivers/net/ethernet/microsoft/mana/mana_en.c | 87 +++++++++++- .../ethernet/microsoft/mana/mana_ethtool.c | 76 +++++++++- include/net/mana/mana.h | 131 ++++++++++++++++++ 4 files changed, 292 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index a8c4d8db75a56..3d3677c0d0147 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -2,6 +2,7 @@ /* Copyright (c) 2021, Microsoft Corporation. */ #include +#include #include #include @@ -890,8 +891,9 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, } if (ctx->status_code && ctx->status_code != GDMA_STATUS_MORE_ENTRIES) { - dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n", - ctx->status_code); + if (req_msg->req.msg_type != MANA_QUERY_PHY_STAT) + dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n", + ctx->status_code); err = -EPROTO; goto out; } diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index ccd2885c939e0..e68b8190bb7a8 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -774,8 +774,9 @@ static int mana_send_request(struct mana_context *ac, void *in_buf, err = mana_gd_send_request(gc, in_len, in_buf, out_len, out_buf); if (err || resp->status) { - dev_err(dev, "Failed to send mana message: %d, 0x%x\n", - err, resp->status); + if (req->req.msg_type != MANA_QUERY_PHY_STAT) + dev_err(dev, "Failed to send mana message: %d, 0x%x\n", + err, resp->status); return err ? err : -EPROTO; } @@ -2611,6 +2612,88 @@ void mana_query_gf_stats(struct mana_port_context *apc) apc->eth_stats.hc_tx_err_gdma = resp.tx_err_gdma; } +void mana_query_phy_stats(struct mana_port_context *apc) +{ + struct mana_query_phy_stat_resp resp = {}; + struct mana_query_phy_stat_req req = {}; + struct net_device *ndev = apc->ndev; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_PHY_STAT, + sizeof(req), sizeof(resp)); + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) + return; + + err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_PHY_STAT, + sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(ndev, + "Failed to query PHY stats: %d, resp:0x%x\n", + err, resp.hdr.status); + return; + } + + /* Aggregate drop counters */ + apc->phy_stats.rx_pkt_drop_phy = resp.rx_pkt_drop_phy; + apc->phy_stats.tx_pkt_drop_phy = resp.tx_pkt_drop_phy; + + /* Per TC traffic Counters */ + apc->phy_stats.rx_pkt_tc0_phy = resp.rx_pkt_tc0_phy; + apc->phy_stats.tx_pkt_tc0_phy = resp.tx_pkt_tc0_phy; + apc->phy_stats.rx_pkt_tc1_phy = resp.rx_pkt_tc1_phy; + apc->phy_stats.tx_pkt_tc1_phy = resp.tx_pkt_tc1_phy; + apc->phy_stats.rx_pkt_tc2_phy = resp.rx_pkt_tc2_phy; + apc->phy_stats.tx_pkt_tc2_phy = resp.tx_pkt_tc2_phy; + apc->phy_stats.rx_pkt_tc3_phy = resp.rx_pkt_tc3_phy; + apc->phy_stats.tx_pkt_tc3_phy = resp.tx_pkt_tc3_phy; + apc->phy_stats.rx_pkt_tc4_phy = resp.rx_pkt_tc4_phy; + apc->phy_stats.tx_pkt_tc4_phy = resp.tx_pkt_tc4_phy; + apc->phy_stats.rx_pkt_tc5_phy = resp.rx_pkt_tc5_phy; + apc->phy_stats.tx_pkt_tc5_phy = resp.tx_pkt_tc5_phy; + apc->phy_stats.rx_pkt_tc6_phy = resp.rx_pkt_tc6_phy; + apc->phy_stats.tx_pkt_tc6_phy = resp.tx_pkt_tc6_phy; + apc->phy_stats.rx_pkt_tc7_phy = resp.rx_pkt_tc7_phy; + apc->phy_stats.tx_pkt_tc7_phy = resp.tx_pkt_tc7_phy; + + /* Per TC byte Counters */ + apc->phy_stats.rx_byte_tc0_phy = resp.rx_byte_tc0_phy; + apc->phy_stats.tx_byte_tc0_phy = resp.tx_byte_tc0_phy; + apc->phy_stats.rx_byte_tc1_phy = resp.rx_byte_tc1_phy; + apc->phy_stats.tx_byte_tc1_phy = resp.tx_byte_tc1_phy; + apc->phy_stats.rx_byte_tc2_phy = resp.rx_byte_tc2_phy; + apc->phy_stats.tx_byte_tc2_phy = resp.tx_byte_tc2_phy; + apc->phy_stats.rx_byte_tc3_phy = resp.rx_byte_tc3_phy; + apc->phy_stats.tx_byte_tc3_phy = resp.tx_byte_tc3_phy; + apc->phy_stats.rx_byte_tc4_phy = resp.rx_byte_tc4_phy; + apc->phy_stats.tx_byte_tc4_phy = resp.tx_byte_tc4_phy; + apc->phy_stats.rx_byte_tc5_phy = resp.rx_byte_tc5_phy; + apc->phy_stats.tx_byte_tc5_phy = resp.tx_byte_tc5_phy; + apc->phy_stats.rx_byte_tc6_phy = resp.rx_byte_tc6_phy; + apc->phy_stats.tx_byte_tc6_phy = resp.tx_byte_tc6_phy; + apc->phy_stats.rx_byte_tc7_phy = resp.rx_byte_tc7_phy; + apc->phy_stats.tx_byte_tc7_phy = resp.tx_byte_tc7_phy; + + /* Per TC pause Counters */ + apc->phy_stats.rx_pause_tc0_phy = resp.rx_pause_tc0_phy; + apc->phy_stats.tx_pause_tc0_phy = resp.tx_pause_tc0_phy; + apc->phy_stats.rx_pause_tc1_phy = resp.rx_pause_tc1_phy; + apc->phy_stats.tx_pause_tc1_phy = resp.tx_pause_tc1_phy; + apc->phy_stats.rx_pause_tc2_phy = resp.rx_pause_tc2_phy; + apc->phy_stats.tx_pause_tc2_phy = resp.tx_pause_tc2_phy; + apc->phy_stats.rx_pause_tc3_phy = resp.rx_pause_tc3_phy; + apc->phy_stats.tx_pause_tc3_phy = resp.tx_pause_tc3_phy; + apc->phy_stats.rx_pause_tc4_phy = resp.rx_pause_tc4_phy; + apc->phy_stats.tx_pause_tc4_phy = resp.tx_pause_tc4_phy; + apc->phy_stats.rx_pause_tc5_phy = resp.rx_pause_tc5_phy; + apc->phy_stats.tx_pause_tc5_phy = resp.tx_pause_tc5_phy; + apc->phy_stats.rx_pause_tc6_phy = resp.rx_pause_tc6_phy; + apc->phy_stats.tx_pause_tc6_phy = resp.tx_pause_tc6_phy; + apc->phy_stats.rx_pause_tc7_phy = resp.rx_pause_tc7_phy; + apc->phy_stats.tx_pause_tc7_phy = resp.tx_pause_tc7_phy; +} + static int mana_init_port(struct net_device *ndev) { struct mana_port_context *apc = netdev_priv(ndev); diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index c419626073f5f..4fb3a04994a2d 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -7,10 +7,12 @@ #include -static const struct { +struct mana_stats_desc { char name[ETH_GSTRING_LEN]; u16 offset; -} mana_eth_stats[] = { +}; + +static const struct mana_stats_desc mana_eth_stats[] = { {"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)}, {"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)}, {"hc_rx_discards_no_wqe", offsetof(struct mana_ethtool_stats, @@ -75,6 +77,59 @@ static const struct { rx_cqe_unknown_type)}, }; +static const struct mana_stats_desc mana_phy_stats[] = { + { "hc_rx_pkt_drop_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_drop_phy) }, + { "hc_tx_pkt_drop_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_drop_phy) }, + { "hc_tc0_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc0_phy) }, + { "hc_tc0_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc0_phy) }, + { "hc_tc0_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc0_phy) }, + { "hc_tc0_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc0_phy) }, + { "hc_tc1_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc1_phy) }, + { "hc_tc1_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc1_phy) }, + { "hc_tc1_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc1_phy) }, + { "hc_tc1_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc1_phy) }, + { "hc_tc2_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc2_phy) }, + { "hc_tc2_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc2_phy) }, + { "hc_tc2_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc2_phy) }, + { "hc_tc2_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc2_phy) }, + { "hc_tc3_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc3_phy) }, + { "hc_tc3_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc3_phy) }, + { "hc_tc3_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc3_phy) }, + { "hc_tc3_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc3_phy) }, + { "hc_tc4_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc4_phy) }, + { "hc_tc4_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc4_phy) }, + { "hc_tc4_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc4_phy) }, + { "hc_tc4_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc4_phy) }, + { "hc_tc5_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc5_phy) }, + { "hc_tc5_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc5_phy) }, + { "hc_tc5_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc5_phy) }, + { "hc_tc5_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc5_phy) }, + { "hc_tc6_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc6_phy) }, + { "hc_tc6_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc6_phy) }, + { "hc_tc6_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc6_phy) }, + { "hc_tc6_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc6_phy) }, + { "hc_tc7_rx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, rx_pkt_tc7_phy) }, + { "hc_tc7_rx_byte_phy", offsetof(struct mana_ethtool_phy_stats, rx_byte_tc7_phy) }, + { "hc_tc7_tx_pkt_phy", offsetof(struct mana_ethtool_phy_stats, tx_pkt_tc7_phy) }, + { "hc_tc7_tx_byte_phy", offsetof(struct mana_ethtool_phy_stats, tx_byte_tc7_phy) }, + { "hc_tc0_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc0_phy) }, + { "hc_tc0_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc0_phy) }, + { "hc_tc1_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc1_phy) }, + { "hc_tc1_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc1_phy) }, + { "hc_tc2_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc2_phy) }, + { "hc_tc2_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc2_phy) }, + { "hc_tc3_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc3_phy) }, + { "hc_tc3_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc3_phy) }, + { "hc_tc4_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc4_phy) }, + { "hc_tc4_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc4_phy) }, + { "hc_tc5_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc5_phy) }, + { "hc_tc5_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc5_phy) }, + { "hc_tc6_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc6_phy) }, + { "hc_tc6_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc6_phy) }, + { "hc_tc7_rx_pause_phy", offsetof(struct mana_ethtool_phy_stats, rx_pause_tc7_phy) }, + { "hc_tc7_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc7_phy) }, +}; + static int mana_get_sset_count(struct net_device *ndev, int stringset) { struct mana_port_context *apc = netdev_priv(ndev); @@ -83,8 +138,8 @@ static int mana_get_sset_count(struct net_device *ndev, int stringset) if (stringset != ETH_SS_STATS) return -EINVAL; - return ARRAY_SIZE(mana_eth_stats) + num_queues * - (MANA_STATS_RX_COUNT + MANA_STATS_TX_COUNT); + return ARRAY_SIZE(mana_eth_stats) + ARRAY_SIZE(mana_phy_stats) + + num_queues * (MANA_STATS_RX_COUNT + MANA_STATS_TX_COUNT); } static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) @@ -99,6 +154,9 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) for (i = 0; i < ARRAY_SIZE(mana_eth_stats); i++) ethtool_puts(&data, mana_eth_stats[i].name); + for (i = 0; i < ARRAY_SIZE(mana_phy_stats); i++) + ethtool_puts(&data, mana_phy_stats[i].name); + for (i = 0; i < num_queues; i++) { ethtool_sprintf(&data, "rx_%d_packets", i); ethtool_sprintf(&data, "rx_%d_bytes", i); @@ -128,6 +186,7 @@ static void mana_get_ethtool_stats(struct net_device *ndev, struct mana_port_context *apc = netdev_priv(ndev); unsigned int num_queues = apc->num_queues; void *eth_stats = &apc->eth_stats; + void *phy_stats = &apc->phy_stats; struct mana_stats_rx *rx_stats; struct mana_stats_tx *tx_stats; unsigned int start; @@ -151,9 +210,18 @@ static void mana_get_ethtool_stats(struct net_device *ndev, /* we call mana function to update stats from GDMA */ mana_query_gf_stats(apc); + /* We call this mana function to get the phy stats from GDMA and includes + * aggregate tx/rx drop counters, Per-TC(Traffic Channel) tx/rx and pause + * counters. + */ + mana_query_phy_stats(apc); + for (q = 0; q < ARRAY_SIZE(mana_eth_stats); q++) data[i++] = *(u64 *)(eth_stats + mana_eth_stats[q].offset); + for (q = 0; q < ARRAY_SIZE(mana_phy_stats); q++) + data[i++] = *(u64 *)(phy_stats + mana_phy_stats[q].offset); + for (q = 0; q < num_queues; q++) { rx_stats = &apc->rxqs[q]->stats; diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 9abb664612110..4176edf1be719 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -404,6 +404,65 @@ struct mana_ethtool_stats { u64 rx_cqe_unknown_type; }; +struct mana_ethtool_phy_stats { + /* Drop Counters */ + u64 rx_pkt_drop_phy; + u64 tx_pkt_drop_phy; + + /* Per TC traffic Counters */ + u64 rx_pkt_tc0_phy; + u64 tx_pkt_tc0_phy; + u64 rx_pkt_tc1_phy; + u64 tx_pkt_tc1_phy; + u64 rx_pkt_tc2_phy; + u64 tx_pkt_tc2_phy; + u64 rx_pkt_tc3_phy; + u64 tx_pkt_tc3_phy; + u64 rx_pkt_tc4_phy; + u64 tx_pkt_tc4_phy; + u64 rx_pkt_tc5_phy; + u64 tx_pkt_tc5_phy; + u64 rx_pkt_tc6_phy; + u64 tx_pkt_tc6_phy; + u64 rx_pkt_tc7_phy; + u64 tx_pkt_tc7_phy; + + u64 rx_byte_tc0_phy; + u64 tx_byte_tc0_phy; + u64 rx_byte_tc1_phy; + u64 tx_byte_tc1_phy; + u64 rx_byte_tc2_phy; + u64 tx_byte_tc2_phy; + u64 rx_byte_tc3_phy; + u64 tx_byte_tc3_phy; + u64 rx_byte_tc4_phy; + u64 tx_byte_tc4_phy; + u64 rx_byte_tc5_phy; + u64 tx_byte_tc5_phy; + u64 rx_byte_tc6_phy; + u64 tx_byte_tc6_phy; + u64 rx_byte_tc7_phy; + u64 tx_byte_tc7_phy; + + /* Per TC pause Counters */ + u64 rx_pause_tc0_phy; + u64 tx_pause_tc0_phy; + u64 rx_pause_tc1_phy; + u64 tx_pause_tc1_phy; + u64 rx_pause_tc2_phy; + u64 tx_pause_tc2_phy; + u64 rx_pause_tc3_phy; + u64 tx_pause_tc3_phy; + u64 rx_pause_tc4_phy; + u64 tx_pause_tc4_phy; + u64 rx_pause_tc5_phy; + u64 tx_pause_tc5_phy; + u64 rx_pause_tc6_phy; + u64 tx_pause_tc6_phy; + u64 rx_pause_tc7_phy; + u64 tx_pause_tc7_phy; +}; + struct mana_context { struct gdma_dev *gdma_dev; @@ -474,6 +533,8 @@ struct mana_port_context { struct mana_ethtool_stats eth_stats; + struct mana_ethtool_phy_stats phy_stats; + /* Debugfs */ struct dentry *mana_port_debugfs; }; @@ -501,6 +562,7 @@ struct bpf_prog *mana_xdp_get(struct mana_port_context *apc); void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog); int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf); void mana_query_gf_stats(struct mana_port_context *apc); +void mana_query_phy_stats(struct mana_port_context *apc); int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues); void mana_pre_dealloc_rxbufs(struct mana_port_context *apc); @@ -527,6 +589,7 @@ enum mana_command_code { MANA_FENCE_RQ = 0x20006, MANA_CONFIG_VPORT_RX = 0x20007, MANA_QUERY_VPORT_CONFIG = 0x20008, + MANA_QUERY_PHY_STAT = 0x2000c, /* Privileged commands for the PF mode */ MANA_REGISTER_FILTER = 0x28000, @@ -689,6 +752,74 @@ struct mana_query_gf_stat_resp { u64 tx_err_gdma; }; /* HW DATA */ +/* Query phy stats */ +struct mana_query_phy_stat_req { + struct gdma_req_hdr hdr; + u64 req_stats; +}; /* HW DATA */ + +struct mana_query_phy_stat_resp { + struct gdma_resp_hdr hdr; + u64 reported_stats; + + /* Aggregate Drop Counters */ + u64 rx_pkt_drop_phy; + u64 tx_pkt_drop_phy; + + /* Per TC(Traffic class) traffic Counters */ + u64 rx_pkt_tc0_phy; + u64 tx_pkt_tc0_phy; + u64 rx_pkt_tc1_phy; + u64 tx_pkt_tc1_phy; + u64 rx_pkt_tc2_phy; + u64 tx_pkt_tc2_phy; + u64 rx_pkt_tc3_phy; + u64 tx_pkt_tc3_phy; + u64 rx_pkt_tc4_phy; + u64 tx_pkt_tc4_phy; + u64 rx_pkt_tc5_phy; + u64 tx_pkt_tc5_phy; + u64 rx_pkt_tc6_phy; + u64 tx_pkt_tc6_phy; + u64 rx_pkt_tc7_phy; + u64 tx_pkt_tc7_phy; + + u64 rx_byte_tc0_phy; + u64 tx_byte_tc0_phy; + u64 rx_byte_tc1_phy; + u64 tx_byte_tc1_phy; + u64 rx_byte_tc2_phy; + u64 tx_byte_tc2_phy; + u64 rx_byte_tc3_phy; + u64 tx_byte_tc3_phy; + u64 rx_byte_tc4_phy; + u64 tx_byte_tc4_phy; + u64 rx_byte_tc5_phy; + u64 tx_byte_tc5_phy; + u64 rx_byte_tc6_phy; + u64 tx_byte_tc6_phy; + u64 rx_byte_tc7_phy; + u64 tx_byte_tc7_phy; + + /* Per TC(Traffic Class) pause Counters */ + u64 rx_pause_tc0_phy; + u64 tx_pause_tc0_phy; + u64 rx_pause_tc1_phy; + u64 tx_pause_tc1_phy; + u64 rx_pause_tc2_phy; + u64 tx_pause_tc2_phy; + u64 rx_pause_tc3_phy; + u64 tx_pause_tc3_phy; + u64 rx_pause_tc4_phy; + u64 tx_pause_tc4_phy; + u64 rx_pause_tc5_phy; + u64 tx_pause_tc5_phy; + u64 rx_pause_tc6_phy; + u64 tx_pause_tc6_phy; + u64 rx_pause_tc7_phy; + u64 tx_pause_tc7_phy; +}; /* HW DATA */ + /* Configure vPort Rx Steering */ struct mana_cfg_rx_steer_req_v2 { struct gdma_req_hdr hdr; -- 2.51.0 From 31557b3487b349464daf42bc4366153743c1e727 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Jun 2025 07:39:33 -0700 Subject: [PATCH 08/16] uapi: in6: restore visibility of most IPv6 socket options MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit A decade ago commit 6d08acd2d32e ("in6: fix conflict with glibc") hid the definitions of IPV6 options, because GCC was complaining about duplicates. The commit did not list the warnings seen, but trying to recreate them now I think they are (building iproute2): In file included from ./include/uapi/rdma/rdma_user_cm.h:39, from rdma.h:16, from res.h:9, from res-ctx.c:7: ../include/uapi/linux/in6.h:171:9: warning: ‘IPV6_ADD_MEMBERSHIP’ redefined 171 | #define IPV6_ADD_MEMBERSHIP 20 | ^~~~~~~~~~~~~~~~~~~ In file included from /usr/include/netinet/in.h:37, from rdma.h:13: /usr/include/bits/in.h:233:10: note: this is the location of the previous definition 233 | # define IPV6_ADD_MEMBERSHIP IPV6_JOIN_GROUP | ^~~~~~~~~~~~~~~~~~~ ../include/uapi/linux/in6.h:172:9: warning: ‘IPV6_DROP_MEMBERSHIP’ redefined 172 | #define IPV6_DROP_MEMBERSHIP 21 | ^~~~~~~~~~~~~~~~~~~~ /usr/include/bits/in.h:234:10: note: this is the location of the previous definition 234 | # define IPV6_DROP_MEMBERSHIP IPV6_LEAVE_GROUP | ^~~~~~~~~~~~~~~~~~~~ Compilers don't complain about redefinition if the defines are identical, but here we have the kernel using the literal value, and glibc using an indirection (defining to a name of another define, with the same numerical value). Problem is, the commit in question hid all the IPV6 socket options, and glibc has a pretty sparse list. For instance it lacks Flow Label related options. Willem called this out in commit 3fb321fde22d ("selftests/net: ipv6 flowlabel"): /* uapi/glibc weirdness may leave this undefined */ #ifndef IPV6_FLOWINFO #define IPV6_FLOWINFO 11 #endif More interestingly some applications (socat) use a #ifdef IPV6_FLOWINFO to gate compilation of thier rudimentary flow label support. (For added confusion socat misspells it as IPV4_FLOWINFO in some places.) Hide only the two defines we know glibc has a problem with. If we discover more warnings we can hide more but we should avoid covering the entire block of defines for "IPV6 socket options". Link: https://patch.msgid.link/20250609143933.1654417-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/in6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index ff8d21f9e95b7..5a47339ef7d76 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -152,7 +152,6 @@ struct in6_flowlabel_req { /* * IPV6 socket options */ -#if __UAPI_DEF_IPV6_OPTIONS #define IPV6_ADDRFORM 1 #define IPV6_2292PKTINFO 2 #define IPV6_2292HOPOPTS 3 @@ -169,8 +168,10 @@ struct in6_flowlabel_req { #define IPV6_MULTICAST_IF 17 #define IPV6_MULTICAST_HOPS 18 #define IPV6_MULTICAST_LOOP 19 +#if __UAPI_DEF_IPV6_OPTIONS #define IPV6_ADD_MEMBERSHIP 20 #define IPV6_DROP_MEMBERSHIP 21 +#endif #define IPV6_ROUTER_ALERT 22 #define IPV6_MTU_DISCOVER 23 #define IPV6_MTU 24 @@ -203,7 +204,6 @@ struct in6_flowlabel_req { #define IPV6_IPSEC_POLICY 34 #define IPV6_XFRM_POLICY 35 #define IPV6_HDRINCL 36 -#endif /* * Multicast: -- 2.51.0 From 1f07789152b8d1e646d6bfecd96a2cf7bd2b9a05 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 9 Jun 2025 16:23:30 +0100 Subject: [PATCH 09/16] cxgb3/l2t: Remove unused t3_l2t_send_event The last use of t3_l2t_send_event() was removed in 2019 by commit 30e0f6cf5acb ("RDMA/iw_cxgb3: Remove the iw_cxgb3 module from kernel") Remove it. Signed-off-by: Dr. David Alan Gilbert Link: https://patch.msgid.link/20250609152330.24027-1-linux@treblig.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb3/l2t.c | 37 ------------------------ drivers/net/ethernet/chelsio/cxgb3/l2t.h | 1 - 2 files changed, 38 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb3/l2t.c b/drivers/net/ethernet/chelsio/cxgb3/l2t.c index 9749d1239f58e..5d5f3380ecca8 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb3/l2t.c @@ -176,43 +176,6 @@ again: EXPORT_SYMBOL(t3_l2t_send_slow); -void t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e) -{ -again: - switch (e->state) { - case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ - neigh_event_send(e->neigh, NULL); - spin_lock_bh(&e->lock); - if (e->state == L2T_STATE_STALE) { - e->state = L2T_STATE_VALID; - } - spin_unlock_bh(&e->lock); - return; - case L2T_STATE_VALID: /* fast-path, send the packet on */ - return; - case L2T_STATE_RESOLVING: - spin_lock_bh(&e->lock); - if (e->state != L2T_STATE_RESOLVING) { - /* ARP already completed */ - spin_unlock_bh(&e->lock); - goto again; - } - spin_unlock_bh(&e->lock); - - /* - * Only the first packet added to the arpq should kick off - * resolution. However, because the alloc_skb below can fail, - * we allow each packet added to the arpq to retry resolution - * as a way of recovering from transient memory exhaustion. - * A better way would be to use a work request to retry L2T - * entries when there's no memory. - */ - neigh_event_send(e->neigh, NULL); - } -} - -EXPORT_SYMBOL(t3_l2t_send_event); - /* * Allocate a free L2T entry. Must be called with l2t_data.lock held. */ diff --git a/drivers/net/ethernet/chelsio/cxgb3/l2t.h b/drivers/net/ethernet/chelsio/cxgb3/l2t.h index 646ca0bc25bd0..33558f177497e 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/l2t.h +++ b/drivers/net/ethernet/chelsio/cxgb3/l2t.h @@ -113,7 +113,6 @@ struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct dst_entry *dst, struct net_device *dev, const void *daddr); int t3_l2t_send_slow(struct t3cdev *dev, struct sk_buff *skb, struct l2t_entry *e); -void t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e); struct l2t_data *t3_init_l2t(unsigned int l2t_capacity); int cxgb3_ofld_send(struct t3cdev *dev, struct sk_buff *skb); -- 2.51.0 From 561939ed44932da639ba703ffcd4d4d5ff2c7569 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 9 Jun 2025 11:32:35 -0400 Subject: [PATCH 10/16] net: remove unused sock_enable_timestamps This function was introduced in commit 783da70e8396 ("net: add sock_enable_timestamps"), with one caller in rxrpc. That only caller was removed in commit 7903d4438b3f ("rxrpc: Don't use received skbuff timestamps"). Signed-off-by: Willem de Bruijn Reviewed-by: Jason Xing Link: https://patch.msgid.link/20250609153254.3504909-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 1 - net/core/sock.c | 8 -------- 2 files changed, 9 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 92e7c1aae3cca..85e17da5c9db1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2982,7 +2982,6 @@ void sock_set_timestamp(struct sock *sk, int optname, bool valbool); int sock_set_timestamping(struct sock *sk, int optname, struct so_timestamping timestamping); -void sock_enable_timestamps(struct sock *sk); #if defined(CONFIG_CGROUP_BPF) void bpf_skops_tx_timestamping(struct sock *sk, struct sk_buff *skb, int op); #else diff --git a/net/core/sock.c b/net/core/sock.c index 3b409bc8ef6d8..502042a0d3b5f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -837,14 +837,6 @@ static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns) } } -void sock_enable_timestamps(struct sock *sk) -{ - lock_sock(sk); - __sock_set_timestamps(sk, true, false, true); - release_sock(sk); -} -EXPORT_SYMBOL(sock_enable_timestamps); - void sock_set_timestamp(struct sock *sk, int optname, bool valbool) { switch (optname) { -- 2.51.0 From 2bc64b89c4c4073ee8f9543373c64da9b6bbe5e0 Mon Sep 17 00:00:00 2001 From: Gur Stavi Date: Mon, 9 Jun 2025 18:07:52 +0300 Subject: [PATCH 11/16] queue_api: add subqueue variant netif_subqueue_sent Add a new function, netif_subqueue_sent, which is a wrapper for netdev_tx_sent_queue. Drivers that use the subqueue variant macros, netif_subqueue_xxx, identify queue by index and are not required to obtain struct netdev_queue explicitly. Such drivers still need to call netdev_tx_sent_queue which is a counterpart of netif_subqueue_completed_wake. Allowing drivers to use a subqueue variant for this purpose improves their code consistency by always referring to queue by its index. Signed-off-by: Gur Stavi Link: https://patch.msgid.link/909a5c92db49cad39f0954d6cb86775e6480ef4c.1749038081.git.gur.stavi@huawei.com Signed-off-by: Jakub Kicinski --- include/net/netdev_queues.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index ba2eaf39089b3..6e835972abd13 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -294,6 +294,15 @@ netdev_txq_completed_mb(struct netdev_queue *dev_queue, netif_txq_try_stop(_txq, get_desc, start_thrs); \ }) +static inline void netif_subqueue_sent(const struct net_device *dev, + unsigned int idx, unsigned int bytes) +{ + struct netdev_queue *txq; + + txq = netdev_get_tx_queue(dev, idx); + netdev_tx_sent_queue(txq, bytes); +} + #define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \ ({ \ struct netdev_queue *_txq; \ -- 2.51.0 From eb89bc3744f35383b34b9df055622831ce24dc40 Mon Sep 17 00:00:00 2001 From: Gur Stavi Date: Mon, 9 Jun 2025 18:07:53 +0300 Subject: [PATCH 12/16] hinic3: use netif_subqueue_sent api Improve consistency of code by using only netif_subqueue variant apis Signed-off-by: Gur Stavi Link: https://patch.msgid.link/5fd897b75729cf078385aacd9ed40091314ea63d.1749038081.git.gur.stavi@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/huawei/hinic3/hinic3_tx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c b/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c index ae08257dd1d23..7b6f101da3139 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c @@ -542,8 +542,7 @@ static netdev_tx_t hinic3_send_one_skb(struct sk_buff *skb, goto err_drop_pkt; } - netdev_tx_sent_queue(netdev_get_tx_queue(netdev, txq->sq->q_id), - skb->len); + netif_subqueue_sent(netdev, txq->sq->q_id, skb->len); netif_subqueue_maybe_stop(netdev, tx_q->sq->q_id, hinic3_wq_free_wqebbs(&tx_q->sq->wq), tx_q->tx_stop_thrs, -- 2.51.0 From 48b9ce0a7c721c4c65697de8396468fae67631de Mon Sep 17 00:00:00 2001 From: Gur Stavi Date: Mon, 9 Jun 2025 18:07:54 +0300 Subject: [PATCH 13/16] hinic3: remove tx_q name collision hack A local variable of tx_q worked around name collision with internal txq variable in netif_subqueue macros. This workaround is no longer needed. Signed-off-by: Gur Stavi Link: https://patch.msgid.link/6376db2a39b8d3bf2fa893f58f56246bed128d5d.1749038081.git.gur.stavi@huawei.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/huawei/hinic3/hinic3_tx.c | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c b/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c index 7b6f101da3139..3f7f73430be41 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c @@ -482,7 +482,6 @@ static netdev_tx_t hinic3_send_one_skb(struct sk_buff *skb, { struct hinic3_sq_wqe_combo wqe_combo = {}; struct hinic3_tx_info *tx_info; - struct hinic3_txq *tx_q = txq; u32 offload, queue_info = 0; struct hinic3_sq_task task; u16 wqebb_cnt, num_sge; @@ -506,9 +505,9 @@ static netdev_tx_t hinic3_send_one_skb(struct sk_buff *skb, if (likely(wqebb_cnt > txq->tx_stop_thrs)) txq->tx_stop_thrs = min(wqebb_cnt, txq->tx_start_thrs); - netif_subqueue_try_stop(netdev, tx_q->sq->q_id, - hinic3_wq_free_wqebbs(&tx_q->sq->wq), - tx_q->tx_start_thrs); + netif_subqueue_try_stop(netdev, txq->sq->q_id, + hinic3_wq_free_wqebbs(&txq->sq->wq), + txq->tx_start_thrs); return NETDEV_TX_BUSY; } @@ -543,10 +542,10 @@ static netdev_tx_t hinic3_send_one_skb(struct sk_buff *skb, } netif_subqueue_sent(netdev, txq->sq->q_id, skb->len); - netif_subqueue_maybe_stop(netdev, tx_q->sq->q_id, - hinic3_wq_free_wqebbs(&tx_q->sq->wq), - tx_q->tx_stop_thrs, - tx_q->tx_start_thrs); + netif_subqueue_maybe_stop(netdev, txq->sq->q_id, + hinic3_wq_free_wqebbs(&txq->sq->wq), + txq->tx_stop_thrs, + txq->tx_start_thrs); hinic3_prepare_sq_ctrl(&wqe_combo, queue_info, num_sge, owner); hinic3_write_db(txq->sq, 0, DB_CFLAG_DP_SQ, @@ -630,7 +629,6 @@ bool hinic3_tx_poll(struct hinic3_txq *txq, int budget) struct net_device *netdev = txq->netdev; u16 hw_ci, sw_ci, q_id = txq->sq->q_id; struct hinic3_tx_info *tx_info; - struct hinic3_txq *tx_q = txq; unsigned int bytes_compl = 0; unsigned int pkts = 0; u16 wqebb_cnt = 0; @@ -662,8 +660,8 @@ bool hinic3_tx_poll(struct hinic3_txq *txq, int budget) hinic3_wq_put_wqebbs(&txq->sq->wq, wqebb_cnt); netif_subqueue_completed_wake(netdev, q_id, pkts, bytes_compl, - hinic3_wq_free_wqebbs(&tx_q->sq->wq), - tx_q->tx_start_thrs); + hinic3_wq_free_wqebbs(&txq->sq->wq), + txq->tx_start_thrs); return pkts == HINIC3_TX_POLL_WEIGHT; } -- 2.51.0 From d0976b43956ee8c8bd093223df9115bfcf63dfe5 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 9 Jun 2025 21:21:49 +0530 Subject: [PATCH 14/16] octeontx2: Annotate mmio regions as __iomem This patch removes unnecessary typecasts by marking the mbox_regions array as __iomem since it is used to store pointers to memory-mapped I/O (MMIO) regions. Also simplified the call to readq() in PF driver by removing redundant type casts. Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1749484309-3434-1-git-send-email-sbhatta@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 12 ++++++------ drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 3 +-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index a8025f0486c9f..43eea74bf5413 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2364,7 +2364,7 @@ static inline void rvu_afvf_mbox_up_handler(struct work_struct *work) __rvu_mbox_up_handler(mwork, TYPE_AFVF); } -static int rvu_get_mbox_regions(struct rvu *rvu, void **mbox_addr, +static int rvu_get_mbox_regions(struct rvu *rvu, void __iomem **mbox_addr, int num, int type, unsigned long *pf_bmap) { struct rvu_hwinfo *hw = rvu->hw; @@ -2389,7 +2389,7 @@ static int rvu_get_mbox_regions(struct rvu *rvu, void **mbox_addr, bar4 = rvupf_read64(rvu, RVU_PF_VF_BAR4_ADDR); bar4 += region * MBOX_SIZE; } - mbox_addr[region] = (void *)ioremap_wc(bar4, MBOX_SIZE); + mbox_addr[region] = ioremap_wc(bar4, MBOX_SIZE); if (!mbox_addr[region]) goto error; } @@ -2412,7 +2412,7 @@ static int rvu_get_mbox_regions(struct rvu *rvu, void **mbox_addr, RVU_AF_PF_BAR4_ADDR); bar4 += region * MBOX_SIZE; } - mbox_addr[region] = (void *)ioremap_wc(bar4, MBOX_SIZE); + mbox_addr[region] = ioremap_wc(bar4, MBOX_SIZE); if (!mbox_addr[region]) goto error; } @@ -2420,7 +2420,7 @@ static int rvu_get_mbox_regions(struct rvu *rvu, void **mbox_addr, error: while (region--) - iounmap((void __iomem *)mbox_addr[region]); + iounmap(mbox_addr[region]); return -ENOMEM; } @@ -2430,10 +2430,10 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw, void (mbox_up_handler)(struct work_struct *)) { int err = -EINVAL, i, dir, dir_up; + void __iomem **mbox_regions; void __iomem *reg_base; struct rvu_work *mwork; unsigned long *pf_bmap; - void **mbox_regions; const char *name; u64 cfg; @@ -2456,7 +2456,7 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw, mutex_init(&rvu->mbox_lock); - mbox_regions = kcalloc(num, sizeof(void *), GFP_KERNEL); + mbox_regions = kcalloc(num, sizeof(void __iomem *), GFP_KERNEL); if (!mbox_regions) { err = -ENOMEM; goto free_bitmap; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index db7c466fdc39e..83deebc37b34d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -602,8 +602,7 @@ static int otx2_pfvf_mbox_init(struct otx2_nic *pf, int numvfs) base = pci_resource_start(pf->pdev, PCI_MBOX_BAR_NUM) + MBOX_SIZE; else - base = readq((void __iomem *)((u64)pf->reg_base + - RVU_PF_VF_BAR4_ADDR)); + base = readq(pf->reg_base + RVU_PF_VF_BAR4_ADDR); hwbase = ioremap_wc(base, MBOX_SIZE * pf->total_vfs); if (!hwbase) { -- 2.51.0 From c4246f4cce05f1134fb9ea82460b5690ab6710a5 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 9 Jun 2025 21:23:41 +0530 Subject: [PATCH 15/16] octeontx2-pf: Avoid typecasts by simplifying otx2_atomic64_add macro Just because otx2_atomic64_add is using u64 pointer as argument all callers has to typecast __iomem void pointers which inturn causing sparse warnings. Fix those by changing otx2_atomic64_add argument to void pointer. Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1749484421-3607-1-git-send-email-sbhatta@marvell.com Signed-off-by: Jakub Kicinski --- .../marvell/octeontx2/nic/otx2_common.c | 17 +++++++++-------- .../marvell/octeontx2/nic/otx2_common.h | 11 ++++++++--- .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 4 ++-- .../net/ethernet/marvell/octeontx2/nic/qos_sq.c | 5 +++-- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 6f572589f1e5c..713928d81b9e1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -28,12 +28,12 @@ static void otx2_nix_rq_op_stats(struct queue_stats *stats, struct otx2_nic *pfvf, int qidx) { u64 incr = (u64)qidx << 32; - u64 *ptr; + void __iomem *ptr; - ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_RQ_OP_OCTS); + ptr = otx2_get_regaddr(pfvf, NIX_LF_RQ_OP_OCTS); stats->bytes = otx2_atomic64_add(incr, ptr); - ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_RQ_OP_PKTS); + ptr = otx2_get_regaddr(pfvf, NIX_LF_RQ_OP_PKTS); stats->pkts = otx2_atomic64_add(incr, ptr); } @@ -41,12 +41,12 @@ static void otx2_nix_sq_op_stats(struct queue_stats *stats, struct otx2_nic *pfvf, int qidx) { u64 incr = (u64)qidx << 32; - u64 *ptr; + void __iomem *ptr; - ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_OCTS); + ptr = otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_OCTS); stats->bytes = otx2_atomic64_add(incr, ptr); - ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_PKTS); + ptr = otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_PKTS); stats->pkts = otx2_atomic64_add(incr, ptr); } @@ -860,9 +860,10 @@ void otx2_sqb_flush(struct otx2_nic *pfvf) { int qidx, sqe_tail, sqe_head; struct otx2_snd_queue *sq; - u64 incr, *ptr, val; + void __iomem *ptr; + u64 incr, val; - ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS); + ptr = otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS); for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) { sq = &pfvf->qset.sq[qidx]; if (!sq->sqb_ptrs) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index ca0e6ab12cebe..a2a7fc99695d7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -730,8 +730,9 @@ static inline void otx2_write128(u64 lo, u64 hi, void __iomem *addr) ::[x0]"r"(lo), [x1]"r"(hi), [p1]"r"(addr)); } -static inline u64 otx2_atomic64_add(u64 incr, u64 *ptr) +static inline u64 otx2_atomic64_add(u64 incr, void __iomem *addr) { + u64 __iomem *ptr = addr; u64 result; __asm__ volatile(".cpu generic+lse\n" @@ -744,7 +745,11 @@ static inline u64 otx2_atomic64_add(u64 incr, u64 *ptr) #else #define otx2_write128(lo, hi, addr) writeq((hi) | (lo), addr) -#define otx2_atomic64_add(incr, ptr) ({ *ptr += incr; }) + +static inline u64 otx2_atomic64_add(u64 incr, void __iomem *addr) +{ + return 0; +} #endif static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura, @@ -794,7 +799,7 @@ static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf) /* Alloc pointer from pool/aura */ static inline u64 otx2_aura_allocptr(struct otx2_nic *pfvf, int aura) { - u64 *ptr = (__force u64 *)otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_ALLOCX(0)); + void __iomem *ptr = otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_ALLOCX(0)); u64 incr = (u64)aura | BIT_ULL(63); return otx2_atomic64_add(incr, ptr); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 83deebc37b34d..07da4d6dbbc99 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1322,8 +1322,8 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data) { struct otx2_nic *pf = data; struct otx2_snd_queue *sq; - u64 val, *ptr; - u64 qidx = 0; + void __iomem *ptr; + u64 val, qidx = 0; /* CQ */ for (qidx = 0; qidx < pf->qset.cq_cnt; qidx++) { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c index 58d572ce08eff..2872adabc8305 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c @@ -151,9 +151,10 @@ static void otx2_qos_sq_free_sqbs(struct otx2_nic *pfvf, int qidx) static void otx2_qos_sqb_flush(struct otx2_nic *pfvf, int qidx) { int sqe_tail, sqe_head; - u64 incr, *ptr, val; + void __iomem *ptr; + u64 incr, val; - ptr = (__force u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS); + ptr = otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS); incr = (u64)qidx << 32; val = otx2_atomic64_add(incr, ptr); sqe_head = (val >> 20) & 0x3F; -- 2.51.0 From 7fc18f9476256c03755d93b1cec0d42cf64d850a Mon Sep 17 00:00:00 2001 From: Moon Yeounsu Date: Tue, 10 Jun 2025 09:01:30 +0900 Subject: [PATCH 16/16] net: dlink: enable RMON MMIO access on supported devices Enable memory-mapped I/O access to RMON statistics registers for devices known to work correctly. Currently, only the D-Link DGE-550T (`0x4000`) with PCI revision A3 (`0x0c`) is allowed. To avoid issues on other hardware, a runtime check was added to restrict MMIO usage. The `MEM_MAPPING` macro was removed in favor of runtime detection. To access RMON registers, the code `dw32(RmonStatMask, 0x0007ffff);` must also be skipped, so this patch conditionally disables it as well. Tested-on: D-Link DGE-550T Rev-A3 Signed-off-by: Moon Yeounsu Link: https://patch.msgid.link/20250610000130.49065-2-yyyynoom@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/dlink/dl2k.c | 57 ++++++++++++++++--------------- drivers/net/ethernet/dlink/dl2k.h | 2 ++ 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 038a0400c1f95..ea8361ba6cad0 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -99,6 +99,13 @@ static const struct net_device_ops netdev_ops = { .ndo_tx_timeout = rio_tx_timeout, }; +static bool is_support_rmon_mmio(struct pci_dev *pdev) +{ + return pdev->vendor == PCI_VENDOR_ID_DLINK && + pdev->device == 0x4000 && + pdev->revision == 0x0c; +} + static int rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -131,18 +138,22 @@ rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent) np = netdev_priv(dev); + if (is_support_rmon_mmio(pdev)) + np->rmon_enable = true; + /* IO registers range. */ ioaddr = pci_iomap(pdev, 0, 0); if (!ioaddr) goto err_out_dev; np->eeprom_addr = ioaddr; -#ifdef MEM_MAPPING - /* MM registers range. */ - ioaddr = pci_iomap(pdev, 1, 0); - if (!ioaddr) - goto err_out_iounmap; -#endif + if (np->rmon_enable) { + /* MM registers range. */ + ioaddr = pci_iomap(pdev, 1, 0); + if (!ioaddr) + goto err_out_iounmap; + } + np->ioaddr = ioaddr; np->chip_id = chip_idx; np->pdev = pdev; @@ -289,9 +300,8 @@ err_out_unmap_tx: dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, np->tx_ring, np->tx_ring_dma); err_out_iounmap: -#ifdef MEM_MAPPING - pci_iounmap(pdev, np->ioaddr); -#endif + if (np->rmon_enable) + pci_iounmap(pdev, np->ioaddr); pci_iounmap(pdev, np->eeprom_addr); err_out_dev: free_netdev (dev); @@ -578,7 +588,8 @@ static void rio_hw_init(struct net_device *dev) dw8(TxDMAPollPeriod, 0xff); dw8(RxDMABurstThresh, 0x30); dw8(RxDMAUrgentThresh, 0x30); - dw32(RmonStatMask, 0x0007ffff); + if (!np->rmon_enable) + dw32(RmonStatMask, 0x0007ffff); /* clear statistics */ clear_stats (dev); @@ -1076,9 +1087,6 @@ get_stats (struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->ioaddr; -#ifdef MEM_MAPPING - int i; -#endif unsigned int stat_reg; unsigned long flags; @@ -1123,10 +1131,10 @@ get_stats (struct net_device *dev) dr16(MacControlFramesXmtd); dr16(FramesWEXDeferal); -#ifdef MEM_MAPPING - for (i = 0x100; i <= 0x150; i += 4) - dr32(i); -#endif + if (np->rmon_enable) + for (int i = 0x100; i <= 0x150; i += 4) + dr32(i); + dr16(TxJumboFrames); dr16(RxJumboFrames); dr16(TCPCheckSumErrors); @@ -1143,9 +1151,6 @@ clear_stats (struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->ioaddr; -#ifdef MEM_MAPPING - int i; -#endif /* All statistics registers need to be acknowledged, else statistic overflow could cause problems */ @@ -1181,10 +1186,9 @@ clear_stats (struct net_device *dev) dr16(BcstFramesXmtdOk); dr16(MacControlFramesXmtd); dr16(FramesWEXDeferal); -#ifdef MEM_MAPPING - for (i = 0x100; i <= 0x150; i += 4) - dr32(i); -#endif + if (np->rmon_enable) + for (int i = 0x100; i <= 0x150; i += 4) + dr32(i); dr16(TxJumboFrames); dr16(RxJumboFrames); dr16(TCPCheckSumErrors); @@ -1810,9 +1814,8 @@ rio_remove1 (struct pci_dev *pdev) np->rx_ring_dma); dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, np->tx_ring, np->tx_ring_dma); -#ifdef MEM_MAPPING - pci_iounmap(pdev, np->ioaddr); -#endif + if (np->rmon_enable) + pci_iounmap(pdev, np->ioaddr); pci_iounmap(pdev, np->eeprom_addr); free_netdev (dev); pci_release_regions (pdev); diff --git a/drivers/net/ethernet/dlink/dl2k.h b/drivers/net/ethernet/dlink/dl2k.h index ba679025e8667..4788cc94639d1 100644 --- a/drivers/net/ethernet/dlink/dl2k.h +++ b/drivers/net/ethernet/dlink/dl2k.h @@ -403,6 +403,8 @@ struct netdev_private { u16 negotiate; /* Negotiated media */ int phy_addr; /* PHY addresses. */ u16 led_mode; /* LED mode read from EEPROM (IP1000A only) */ + + bool rmon_enable; }; /* The station address location in the EEPROM. */ -- 2.51.0