From 3fccba8fdc1b69a2a5a6f3100c3eacf6125aca7e Mon Sep 17 00:00:00 2001 From: Ley Foon Tan Date: Thu, 7 Nov 2024 14:36:35 +0800 Subject: [PATCH 01/16] net: stmmac: dwmac4: Fix the MTL_OP_MODE_*_MASK operation In order to mask off the bits, we need to use the '~' operator to invert all the bits of _MASK and clear them. Signed-off-by: Ley Foon Tan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241107063637.2122726-3-leyfoon.tan@starfivetech.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index 22a044d93e17..0cb84a0041a4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -274,7 +274,7 @@ static void dwmac4_dma_rx_chan_op_mode(struct stmmac_priv *priv, } else { pr_debug("GMAC: disable RX SF mode (threshold %d)\n", mode); mtl_rx_op &= ~MTL_OP_MODE_RSF; - mtl_rx_op &= MTL_OP_MODE_RTC_MASK; + mtl_rx_op &= ~MTL_OP_MODE_RTC_MASK; if (mode <= 32) mtl_rx_op |= MTL_OP_MODE_RTC_32; else if (mode <= 64) @@ -343,7 +343,7 @@ static void dwmac4_dma_tx_chan_op_mode(struct stmmac_priv *priv, } else { pr_debug("GMAC: disabling TX SF (threshold %d)\n", mode); mtl_tx_op &= ~MTL_OP_MODE_TSF; - mtl_tx_op &= MTL_OP_MODE_TTC_MASK; + mtl_tx_op &= ~MTL_OP_MODE_TTC_MASK; /* Set the transmit threshold */ if (mode <= 32) mtl_tx_op |= MTL_OP_MODE_TTC_32; -- 2.51.0 From 671672977012b8ef89fe4e6d6965a2e6b45f3523 Mon Sep 17 00:00:00 2001 From: Ley Foon Tan Date: Thu, 7 Nov 2024 14:36:36 +0800 Subject: [PATCH 02/16] net: stmmac: dwmac4: Receive Watchdog Timeout is not in abnormal interrupt summary The Receive Watchdog Timeout (RWT, bit[9]) is not part of Abnormal Interrupt Summary (AIS). Move the RWT handling out of the AIS condition statement. From databook, the AIS is the logical OR of the following interrupt bits: - Bit 1: Transmit Process Stopped - Bit 7: Receive Buffer Unavailable - Bit 8: Receive Process Stopped - Bit 10: Early Transmit Interrupt - Bit 12: Fatal Bus Error - Bit 13: Context Descriptor Error Signed-off-by: Ley Foon Tan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241107063637.2122726-4-leyfoon.tan@starfivetech.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index 0d185e54eb7e..57c03d491774 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -185,8 +185,6 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, x->rx_buf_unav_irq++; if (unlikely(intr_status & DMA_CHAN_STATUS_RPS)) x->rx_process_stopped_irq++; - if (unlikely(intr_status & DMA_CHAN_STATUS_RWT)) - x->rx_watchdog_irq++; if (unlikely(intr_status & DMA_CHAN_STATUS_ETI)) x->tx_early_irq++; if (unlikely(intr_status & DMA_CHAN_STATUS_TPS)) { @@ -198,6 +196,10 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, ret = tx_hard_error; } } + + if (unlikely(intr_status & DMA_CHAN_STATUS_RWT)) + x->rx_watchdog_irq++; + /* TX/RX NORMAL interrupts */ if (likely(intr_status & DMA_CHAN_STATUS_RI)) { u64_stats_update_begin(&stats->syncp); -- 2.51.0 From 6891f0b523e1ef452523ba43d67ca2a654760e78 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 7 Nov 2024 10:12:00 -0800 Subject: [PATCH 03/16] selftests: ncdevmem: Redirect all non-payload output to stderr That should make it possible to do expected payload validation on the caller side. Reviewed-by: Mina Almasry Reviewed-by: Joe Damato Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241107181211.3934153-2-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/ncdevmem.c | 61 +++++++++++++------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c index 64d6805381c5..9245d3f158dd 100644 --- a/tools/testing/selftests/net/ncdevmem.c +++ b/tools/testing/selftests/net/ncdevmem.c @@ -88,7 +88,6 @@ void print_nonzero_bytes(void *ptr, size_t size) for (i = 0; i < size; i++) putchar(p[i]); - printf("\n"); } void validate_buffer(void *line, size_t size) @@ -120,7 +119,7 @@ void validate_buffer(void *line, size_t size) char command[256]; \ memset(command, 0, sizeof(command)); \ snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \ - printf("Running: %s\n", command); \ + fprintf(stderr, "Running: %s\n", command); \ system(command); \ }) @@ -128,22 +127,22 @@ static int reset_flow_steering(void) { int ret = 0; - ret = run_command("sudo ethtool -K %s ntuple off", ifname); + ret = run_command("sudo ethtool -K %s ntuple off >&2", ifname); if (ret) return ret; - return run_command("sudo ethtool -K %s ntuple on", ifname); + return run_command("sudo ethtool -K %s ntuple on >&2", ifname); } static int configure_headersplit(bool on) { - return run_command("sudo ethtool -G %s tcp-data-split %s", ifname, + return run_command("sudo ethtool -G %s tcp-data-split %s >&2", ifname, on ? "on" : "off"); } static int configure_rss(void) { - return run_command("sudo ethtool -X %s equal %d", ifname, start_queue); + return run_command("sudo ethtool -X %s equal %d >&2", ifname, start_queue); } static int configure_channels(unsigned int rx, unsigned int tx) @@ -153,7 +152,7 @@ static int configure_channels(unsigned int rx, unsigned int tx) static int configure_flow_steering(void) { - return run_command("sudo ethtool -N %s flow-type tcp4 src-ip %s dst-ip %s src-port %s dst-port %s queue %d", + return run_command("sudo ethtool -N %s flow-type tcp4 src-ip %s dst-ip %s src-port %s dst-port %s queue %d >&2", ifname, client_ip, server_ip, port, port, start_queue); } @@ -187,7 +186,7 @@ static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, goto err_close; } - printf("got dmabuf id=%d\n", rsp->id); + fprintf(stderr, "got dmabuf id=%d\n", rsp->id); dmabuf_id = rsp->id; netdev_bind_rx_req_free(req); @@ -314,8 +313,8 @@ int do_server(void) if (ret) error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX); - printf("binding to address %s:%d\n", server_ip, - ntohs(server_sin.sin_port)); + fprintf(stderr, "binding to address %s:%d\n", server_ip, + ntohs(server_sin.sin_port)); ret = bind(socket_fd, &server_sin, sizeof(server_sin)); if (ret) @@ -329,14 +328,14 @@ int do_server(void) inet_ntop(server_sin.sin_family, &server_sin.sin_addr, buffer, sizeof(buffer)); - printf("Waiting or connection on %s:%d\n", buffer, - ntohs(server_sin.sin_port)); + fprintf(stderr, "Waiting or connection on %s:%d\n", buffer, + ntohs(server_sin.sin_port)); client_fd = accept(socket_fd, &client_addr, &client_addr_len); inet_ntop(client_addr.sin_family, &client_addr.sin_addr, buffer, sizeof(buffer)); - printf("Got connection from %s:%d\n", buffer, - ntohs(client_addr.sin_port)); + fprintf(stderr, "Got connection from %s:%d\n", buffer, + ntohs(client_addr.sin_port)); while (1) { struct iovec iov = { .iov_base = iobuf, @@ -349,14 +348,13 @@ int do_server(void) ssize_t ret; is_devmem = false; - printf("\n\n"); msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = ctrl_data; msg.msg_controllen = sizeof(ctrl_data); ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM); - printf("recvmsg ret=%ld\n", ret); + fprintf(stderr, "recvmsg ret=%ld\n", ret); if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) continue; if (ret < 0) { @@ -364,7 +362,7 @@ int do_server(void) continue; } if (ret == 0) { - printf("client exited\n"); + fprintf(stderr, "client exited\n"); goto cleanup; } @@ -373,7 +371,7 @@ int do_server(void) if (cm->cmsg_level != SOL_SOCKET || (cm->cmsg_type != SCM_DEVMEM_DMABUF && cm->cmsg_type != SCM_DEVMEM_LINEAR)) { - fprintf(stdout, "skipping non-devmem cmsg\n"); + fprintf(stderr, "skipping non-devmem cmsg\n"); continue; } @@ -384,7 +382,7 @@ int do_server(void) /* TODO: process data copied from skb's linear * buffer. */ - fprintf(stdout, + fprintf(stderr, "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n", dmabuf_cmsg->frag_size); @@ -395,12 +393,13 @@ int do_server(void) token.token_count = 1; total_received += dmabuf_cmsg->frag_size; - printf("received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n", - dmabuf_cmsg->frag_offset >> PAGE_SHIFT, - dmabuf_cmsg->frag_offset % getpagesize(), - dmabuf_cmsg->frag_offset, dmabuf_cmsg->frag_size, - dmabuf_cmsg->frag_token, total_received, - dmabuf_cmsg->dmabuf_id); + fprintf(stderr, + "received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n", + dmabuf_cmsg->frag_offset >> PAGE_SHIFT, + dmabuf_cmsg->frag_offset % getpagesize(), + dmabuf_cmsg->frag_offset, + dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token, + total_received, dmabuf_cmsg->dmabuf_id); if (dmabuf_cmsg->dmabuf_id != dmabuf_id) error(1, 0, @@ -438,15 +437,15 @@ int do_server(void) if (!is_devmem) error(1, 0, "flow steering error\n"); - printf("total_received=%lu\n", total_received); + fprintf(stderr, "total_received=%lu\n", total_received); } - fprintf(stdout, "%s: ok\n", TEST_PREFIX); + fprintf(stderr, "%s: ok\n", TEST_PREFIX); - fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", + fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", page_aligned_frags, non_page_aligned_frags); - fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", + fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", page_aligned_frags, non_page_aligned_frags); cleanup: @@ -551,7 +550,7 @@ int main(int argc, char *argv[]) ifname = optarg; break; case '?': - printf("unknown option: %c\n", optopt); + fprintf(stderr, "unknown option: %c\n", optopt); break; } } @@ -559,7 +558,7 @@ int main(int argc, char *argv[]) ifindex = if_nametoindex(ifname); for (; optind < argc; optind++) - printf("extra arguments: %s\n", argv[optind]); + fprintf(stderr, "extra arguments: %s\n", argv[optind]); run_devmem_tests(); -- 2.51.0 From 8b9049af8066b4705d83bb7847ee3c960fc58d09 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 7 Nov 2024 10:12:01 -0800 Subject: [PATCH 04/16] selftests: ncdevmem: Separate out dmabuf provider So we can plug the other ones in the future if needed. Reviewed-by: Mina Almasry Reviewed-by: Joe Damato Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241107181211.3934153-3-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/ncdevmem.c | 203 +++++++++++++++---------- 1 file changed, 119 insertions(+), 84 deletions(-) diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c index 9245d3f158dd..3e7ef2eedd60 100644 --- a/tools/testing/selftests/net/ncdevmem.c +++ b/tools/testing/selftests/net/ncdevmem.c @@ -71,17 +71,101 @@ static char *ifname = "eth1"; static unsigned int ifindex; static unsigned int dmabuf_id; -void print_bytes(void *ptr, size_t size) +struct memory_buffer { + int fd; + size_t size; + + int devfd; + int memfd; + char *buf_mem; +}; + +struct memory_provider { + struct memory_buffer *(*alloc)(size_t size); + void (*free)(struct memory_buffer *ctx); + void (*memcpy_from_device)(void *dst, struct memory_buffer *src, + size_t off, int n); +}; + +static struct memory_buffer *udmabuf_alloc(size_t size) { - unsigned char *p = ptr; - int i; + struct udmabuf_create create; + struct memory_buffer *ctx; + int ret; - for (i = 0; i < size; i++) - printf("%02hhX ", p[i]); - printf("\n"); + ctx = malloc(sizeof(*ctx)); + if (!ctx) + error(1, ENOMEM, "malloc failed"); + + ctx->size = size; + + ctx->devfd = open("/dev/udmabuf", O_RDWR); + if (ctx->devfd < 0) + error(1, errno, + "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", + TEST_PREFIX); + + ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); + if (ctx->memfd < 0) + error(1, errno, "%s: [skip,no-memfd]\n", TEST_PREFIX); + + ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK); + if (ret < 0) + error(1, errno, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + + ret = ftruncate(ctx->memfd, size); + if (ret == -1) + error(1, errno, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + + memset(&create, 0, sizeof(create)); + + create.memfd = ctx->memfd; + create.offset = 0; + create.size = size; + ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create); + if (ctx->fd < 0) + error(1, errno, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX); + + ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + ctx->fd, 0); + if (ctx->buf_mem == MAP_FAILED) + error(1, errno, "%s: [FAIL, map udmabuf]\n", TEST_PREFIX); + + return ctx; +} + +static void udmabuf_free(struct memory_buffer *ctx) +{ + munmap(ctx->buf_mem, ctx->size); + close(ctx->fd); + close(ctx->memfd); + close(ctx->devfd); + free(ctx); } -void print_nonzero_bytes(void *ptr, size_t size) +static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src, + size_t off, int n) +{ + struct dma_buf_sync sync = {}; + + sync.flags = DMA_BUF_SYNC_START; + ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync); + + memcpy(dst, src->buf_mem + off, n); + + sync.flags = DMA_BUF_SYNC_END; + ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync); +} + +static struct memory_provider udmabuf_memory_provider = { + .alloc = udmabuf_alloc, + .free = udmabuf_free, + .memcpy_from_device = udmabuf_memcpy_from_device, +}; + +static struct memory_provider *provider = &udmabuf_memory_provider; + +static void print_nonzero_bytes(void *ptr, size_t size) { unsigned char *p = ptr; unsigned int i; @@ -201,42 +285,7 @@ err_close: return -1; } -static void create_udmabuf(int *devfd, int *memfd, int *buf, size_t dmabuf_size) -{ - struct udmabuf_create create; - int ret; - - *devfd = open("/dev/udmabuf", O_RDWR); - if (*devfd < 0) { - error(70, 0, - "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", - TEST_PREFIX); - } - - *memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); - if (*memfd < 0) - error(70, 0, "%s: [skip,no-memfd]\n", TEST_PREFIX); - - /* Required for udmabuf */ - ret = fcntl(*memfd, F_ADD_SEALS, F_SEAL_SHRINK); - if (ret < 0) - error(73, 0, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); - - ret = ftruncate(*memfd, dmabuf_size); - if (ret == -1) - error(74, 0, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); - - memset(&create, 0, sizeof(create)); - - create.memfd = *memfd; - create.offset = 0; - create.size = dmabuf_size; - *buf = ioctl(*devfd, UDMABUF_CREATE, &create); - if (*buf < 0) - error(75, 0, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX); -} - -int do_server(void) +int do_server(struct memory_buffer *mem) { char ctrl_data[sizeof(int) * 20000]; struct netdev_queue_id *queues; @@ -244,23 +293,18 @@ int do_server(void) struct sockaddr_in client_addr; struct sockaddr_in server_sin; size_t page_aligned_frags = 0; - int devfd, memfd, buf, ret; size_t total_received = 0; socklen_t client_addr_len; bool is_devmem = false; - char *buf_mem = NULL; + char *tmp_mem = NULL; struct ynl_sock *ys; - size_t dmabuf_size; char iobuf[819200]; char buffer[256]; int socket_fd; int client_fd; size_t i = 0; int opt = 1; - - dmabuf_size = getpagesize() * NUM_PAGES; - - create_udmabuf(&devfd, &memfd, &buf, dmabuf_size); + int ret; if (reset_flow_steering()) error(1, 0, "Failed to reset flow steering\n"); @@ -284,13 +328,12 @@ int do_server(void) queues[i].id = start_queue + i; } - if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) + if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) error(1, 0, "Failed to bind\n"); - buf_mem = mmap(NULL, dmabuf_size, PROT_READ | PROT_WRITE, MAP_SHARED, - buf, 0); - if (buf_mem == MAP_FAILED) - error(1, 0, "mmap()"); + tmp_mem = malloc(mem->size); + if (!tmp_mem) + error(1, ENOMEM, "malloc failed"); server_sin.sin_family = AF_INET; server_sin.sin_port = htons(atoi(port)); @@ -341,7 +384,6 @@ int do_server(void) struct iovec iov = { .iov_base = iobuf, .iov_len = sizeof(iobuf) }; struct dmabuf_cmsg *dmabuf_cmsg = NULL; - struct dma_buf_sync sync = { 0 }; struct cmsghdr *cm = NULL; struct msghdr msg = { 0 }; struct dmabuf_token token; @@ -410,22 +452,16 @@ int do_server(void) else page_aligned_frags++; - sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_START; - ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync); + provider->memcpy_from_device(tmp_mem, mem, + dmabuf_cmsg->frag_offset, + dmabuf_cmsg->frag_size); if (do_validation) - validate_buffer( - ((unsigned char *)buf_mem) + - dmabuf_cmsg->frag_offset, - dmabuf_cmsg->frag_size); + validate_buffer(tmp_mem, + dmabuf_cmsg->frag_size); else - print_nonzero_bytes( - ((unsigned char *)buf_mem) + - dmabuf_cmsg->frag_offset, - dmabuf_cmsg->frag_size); - - sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_END; - ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync); + print_nonzero_bytes(tmp_mem, + dmabuf_cmsg->frag_size); ret = setsockopt(client_fd, SOL_SOCKET, SO_DEVMEM_DONTNEED, &token, @@ -450,12 +486,9 @@ int do_server(void) cleanup: - munmap(buf_mem, dmabuf_size); + free(tmp_mem); close(client_fd); close(socket_fd); - close(buf); - close(memfd); - close(devfd); ynl_sock_destroy(ys); return 0; @@ -464,14 +497,11 @@ cleanup: void run_devmem_tests(void) { struct netdev_queue_id *queues; - int devfd, memfd, buf; + struct memory_buffer *mem; struct ynl_sock *ys; - size_t dmabuf_size; size_t i = 0; - dmabuf_size = getpagesize() * NUM_PAGES; - - create_udmabuf(&devfd, &memfd, &buf, dmabuf_size); + mem = provider->alloc(getpagesize() * NUM_PAGES); /* Configure RSS to divert all traffic from our devmem queues */ if (configure_rss()) @@ -482,7 +512,7 @@ void run_devmem_tests(void) if (configure_headersplit(1)) error(1, 0, "Failed to configure header split\n"); - if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) error(1, 0, "Binding empty queues array should have failed\n"); for (i = 0; i < num_queues; i++) { @@ -495,7 +525,7 @@ void run_devmem_tests(void) if (configure_headersplit(0)) error(1, 0, "Failed to configure header split\n"); - if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) error(1, 0, "Configure dmabuf with header split off should have failed\n"); if (configure_headersplit(1)) @@ -508,7 +538,7 @@ void run_devmem_tests(void) queues[i].id = start_queue + i; } - if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) + if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) error(1, 0, "Failed to bind\n"); /* Deactivating a bound queue should not be legal */ @@ -517,11 +547,15 @@ void run_devmem_tests(void) /* Closing the netlink socket does an implicit unbind */ ynl_sock_destroy(ys); + + provider->free(mem); } int main(int argc, char *argv[]) { + struct memory_buffer *mem; int is_server = 0, opt; + int ret; while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) { switch (opt) { @@ -562,8 +596,9 @@ int main(int argc, char *argv[]) run_devmem_tests(); - if (is_server) - return do_server(); + mem = provider->alloc(getpagesize() * NUM_PAGES); + ret = is_server ? do_server(mem) : 1; + provider->free(mem); - return 0; + return ret; } -- 2.51.0 From bfccbaac1b45f9af7d76589d7e31ad921b50c0d7 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 7 Nov 2024 10:12:02 -0800 Subject: [PATCH 05/16] selftests: ncdevmem: Unify error handling There is a bunch of places where error() calls look out of place. Use the same error(1, errno, ...) pattern everywhere. Reviewed-by: Mina Almasry Reviewed-by: Joe Damato Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241107181211.3934153-4-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/ncdevmem.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c index 3e7ef2eedd60..4733d1a0aab5 100644 --- a/tools/testing/selftests/net/ncdevmem.c +++ b/tools/testing/selftests/net/ncdevmem.c @@ -339,33 +339,33 @@ int do_server(struct memory_buffer *mem) server_sin.sin_port = htons(atoi(port)); ret = inet_pton(server_sin.sin_family, server_ip, &server_sin.sin_addr); - if (socket < 0) - error(79, 0, "%s: [FAIL, create socket]\n", TEST_PREFIX); + if (ret < 0) + error(1, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); socket_fd = socket(server_sin.sin_family, SOCK_STREAM, 0); - if (socket < 0) - error(errno, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); + if (socket_fd < 0) + error(1, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); if (ret) - error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX); + error(1, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX); ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); if (ret) - error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX); + error(1, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX); fprintf(stderr, "binding to address %s:%d\n", server_ip, ntohs(server_sin.sin_port)); ret = bind(socket_fd, &server_sin, sizeof(server_sin)); if (ret) - error(errno, errno, "%s: [FAIL, bind]\n", TEST_PREFIX); + error(1, errno, "%s: [FAIL, bind]\n", TEST_PREFIX); ret = listen(socket_fd, 1); if (ret) - error(errno, errno, "%s: [FAIL, listen]\n", TEST_PREFIX); + error(1, errno, "%s: [FAIL, listen]\n", TEST_PREFIX); client_addr_len = sizeof(client_addr); -- 2.51.0 From 0ebd75f5f2392c2ada04c6e11447415911fe1506 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 7 Nov 2024 10:12:03 -0800 Subject: [PATCH 06/16] selftests: ncdevmem: Make client_ip optional Support 3-tuple filtering by making client_ip optional. When -c is not passed, don't specify src-ip/src-port in the filter. Reviewed-by: Mina Almasry Reviewed-by: Joe Damato Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241107181211.3934153-5-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/ncdevmem.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c index 4733d1a0aab5..faa9dce121c7 100644 --- a/tools/testing/selftests/net/ncdevmem.c +++ b/tools/testing/selftests/net/ncdevmem.c @@ -62,7 +62,7 @@ */ static char *server_ip = "192.168.1.4"; -static char *client_ip = "192.168.1.2"; +static char *client_ip; static char *port = "5201"; static size_t do_validation; static int start_queue = 8; @@ -236,8 +236,14 @@ static int configure_channels(unsigned int rx, unsigned int tx) static int configure_flow_steering(void) { - return run_command("sudo ethtool -N %s flow-type tcp4 src-ip %s dst-ip %s src-port %s dst-port %s queue %d >&2", - ifname, client_ip, server_ip, port, port, start_queue); + return run_command("sudo ethtool -N %s flow-type tcp4 %s %s dst-ip %s %s %s dst-port %s queue %d >&2", + ifname, + client_ip ? "src-ip" : "", + client_ip ?: "", + server_ip, + client_ip ? "src-port" : "", + client_ip ? port : "", + port, start_queue); } static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, -- 2.51.0 From d3ca35c64d48daf3451851043cffe2bda3913648 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 7 Nov 2024 10:12:04 -0800 Subject: [PATCH 07/16] selftests: ncdevmem: Remove default arguments To make it clear what's required and what's not. Also, some of the values don't seem like a good defaults; for example eth1. Move the invocation comment to the top, add missing -s to the client and cleanup the client invocation a bit to make more readable. Reviewed-by: Mina Almasry Reviewed-by: Joe Damato Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241107181211.3934153-6-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/ncdevmem.c | 61 ++++++++++++++++---------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c index faa9dce121c7..0feeca56c049 100644 --- a/tools/testing/selftests/net/ncdevmem.c +++ b/tools/testing/selftests/net/ncdevmem.c @@ -1,4 +1,31 @@ // SPDX-License-Identifier: GPL-2.0 +/* + * tcpdevmem netcat. Works similarly to netcat but does device memory TCP + * instead of regular TCP. Uses udmabuf to mock a dmabuf provider. + * + * Usage: + * + * On server: + * ncdevmem -s [-c ] -f eth1 -l -p 5201 + * + * On client: + * echo -n "hello\nworld" | nc -s 5201 -p 5201 + * + * Test data validation: + * + * On server: + * ncdevmem -s [-c ] -f eth1 -l -p 5201 -v 7 + * + * On client: + * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \ + * tr \\n \\0 | \ + * head -c 5G | \ + * nc 5201 -p 5201 + * + * + * Note this is compatible with regular netcat. i.e. the sender or receiver can + * be replaced with regular netcat to test the RX or TX path in isolation. + */ #define _GNU_SOURCE #define __EXPORTED_HEADERS__ @@ -42,32 +69,13 @@ #define MSG_SOCK_DEVMEM 0x2000000 #endif -/* - * tcpdevmem netcat. Works similarly to netcat but does device memory TCP - * instead of regular TCP. Uses udmabuf to mock a dmabuf provider. - * - * Usage: - * - * On server: - * ncdevmem -s -c -f eth1 -l -p 5201 -v 7 - * - * On client: - * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \ - * tr \\n \\0 | \ - * head -c 5G | \ - * nc 5201 -p 5201 - * - * Note this is compatible with regular netcat. i.e. the sender or receiver can - * be replaced with regular netcat to test the RX or TX path in isolation. - */ - -static char *server_ip = "192.168.1.4"; +static char *server_ip; static char *client_ip; -static char *port = "5201"; +static char *port; static size_t do_validation; static int start_queue = 8; static int num_queues = 8; -static char *ifname = "eth1"; +static char *ifname; static unsigned int ifindex; static unsigned int dmabuf_id; @@ -595,6 +603,15 @@ int main(int argc, char *argv[]) } } + if (!server_ip) + error(1, 0, "Missing -s argument\n"); + + if (!port) + error(1, 0, "Missing -p argument\n"); + + if (!ifname) + error(1, 0, "Missing -f argument\n"); + ifindex = if_nametoindex(ifname); for (; optind < argc; optind++) -- 2.51.0 From 933056357a8cf0c9b3fb2ecc4d2d8d142614f0a3 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 7 Nov 2024 10:12:05 -0800 Subject: [PATCH 08/16] selftests: ncdevmem: Switch to AF_INET6 Use dualstack socket to support both v4 and v6. v4-mapped-v6 address can be used to do v4. Reviewed-by: Mina Almasry Reviewed-by: Joe Damato Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241107181211.3934153-7-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/ncdevmem.c | 97 ++++++++++++++++++-------- 1 file changed, 68 insertions(+), 29 deletions(-) diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c index 0feeca56c049..645ef0bb63ec 100644 --- a/tools/testing/selftests/net/ncdevmem.c +++ b/tools/testing/selftests/net/ncdevmem.c @@ -242,13 +242,26 @@ static int configure_channels(unsigned int rx, unsigned int tx) return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx); } -static int configure_flow_steering(void) +static int configure_flow_steering(struct sockaddr_in6 *server_sin) { - return run_command("sudo ethtool -N %s flow-type tcp4 %s %s dst-ip %s %s %s dst-port %s queue %d >&2", + const char *type = "tcp6"; + const char *server_addr; + char buf[40]; + + inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf)); + server_addr = buf; + + if (IN6_IS_ADDR_V4MAPPED(&server_sin->sin6_addr)) { + type = "tcp4"; + server_addr = strrchr(server_addr, ':') + 1; + } + + return run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2", ifname, + type, client_ip ? "src-ip" : "", client_ip ?: "", - server_ip, + server_addr, client_ip ? "src-port" : "", client_ip ? port : "", port, start_queue); @@ -299,13 +312,51 @@ err_close: return -1; } +static void enable_reuseaddr(int fd) +{ + int opt = 1; + int ret; + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); + if (ret) + error(1, errno, "%s: [FAIL, SO_REUSEPORT]\n", TEST_PREFIX); + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + if (ret) + error(1, errno, "%s: [FAIL, SO_REUSEADDR]\n", TEST_PREFIX); +} + +static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) +{ + int ret; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + + ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); + if (ret != 1) { + /* fallback to plain IPv4 */ + ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); + if (ret != 1) + return -1; + + /* add ::ffff prefix */ + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr16[4] = 0; + sin6->sin6_addr.s6_addr16[5] = 0xffff; + } + + return 0; +} + int do_server(struct memory_buffer *mem) { char ctrl_data[sizeof(int) * 20000]; struct netdev_queue_id *queues; size_t non_page_aligned_frags = 0; - struct sockaddr_in client_addr; - struct sockaddr_in server_sin; + struct sockaddr_in6 client_addr; + struct sockaddr_in6 server_sin; size_t page_aligned_frags = 0; size_t total_received = 0; socklen_t client_addr_len; @@ -317,9 +368,12 @@ int do_server(struct memory_buffer *mem) int socket_fd; int client_fd; size_t i = 0; - int opt = 1; int ret; + ret = parse_address(server_ip, atoi(port), &server_sin); + if (ret < 0) + error(1, 0, "parse server address"); + if (reset_flow_steering()) error(1, 0, "Failed to reset flow steering\n"); @@ -328,7 +382,7 @@ int do_server(struct memory_buffer *mem) error(1, 0, "Failed to configure rss\n"); /* Flow steer our devmem flows to start_queue */ - if (configure_flow_steering()) + if (configure_flow_steering(&server_sin)) error(1, 0, "Failed to configure flow steering\n"); sleep(1); @@ -349,29 +403,14 @@ int do_server(struct memory_buffer *mem) if (!tmp_mem) error(1, ENOMEM, "malloc failed"); - server_sin.sin_family = AF_INET; - server_sin.sin_port = htons(atoi(port)); - - ret = inet_pton(server_sin.sin_family, server_ip, &server_sin.sin_addr); - if (ret < 0) - error(1, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); - - socket_fd = socket(server_sin.sin_family, SOCK_STREAM, 0); + socket_fd = socket(AF_INET6, SOCK_STREAM, 0); if (socket_fd < 0) error(1, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); - ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEPORT, &opt, - sizeof(opt)); - if (ret) - error(1, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX); - - ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &opt, - sizeof(opt)); - if (ret) - error(1, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX); + enable_reuseaddr(socket_fd); fprintf(stderr, "binding to address %s:%d\n", server_ip, - ntohs(server_sin.sin_port)); + ntohs(server_sin.sin6_port)); ret = bind(socket_fd, &server_sin, sizeof(server_sin)); if (ret) @@ -383,16 +422,16 @@ int do_server(struct memory_buffer *mem) client_addr_len = sizeof(client_addr); - inet_ntop(server_sin.sin_family, &server_sin.sin_addr, buffer, + inet_ntop(AF_INET6, &server_sin.sin6_addr, buffer, sizeof(buffer)); fprintf(stderr, "Waiting or connection on %s:%d\n", buffer, - ntohs(server_sin.sin_port)); + ntohs(server_sin.sin6_port)); client_fd = accept(socket_fd, &client_addr, &client_addr_len); - inet_ntop(client_addr.sin_family, &client_addr.sin_addr, buffer, + inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer, sizeof(buffer)); fprintf(stderr, "Got connection from %s:%d\n", buffer, - ntohs(client_addr.sin_port)); + ntohs(client_addr.sin6_port)); while (1) { struct iovec iov = { .iov_base = iobuf, -- 2.51.0 From e3c09623a53b8d11ff9e3c0f435ce1e8f52134ba Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 7 Nov 2024 10:12:06 -0800 Subject: [PATCH 09/16] selftests: ncdevmem: Properly reset flow steering ntuple off/on might be not enough to do it on all NICs. Add a bunch of shell crap to explicitly remove the rules. Reviewed-by: Mina Almasry Reviewed-by: Joe Damato Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241107181211.3934153-8-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/ncdevmem.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c index 645ef0bb63ec..ad6de8e0e97b 100644 --- a/tools/testing/selftests/net/ncdevmem.c +++ b/tools/testing/selftests/net/ncdevmem.c @@ -217,13 +217,18 @@ void validate_buffer(void *line, size_t size) static int reset_flow_steering(void) { - int ret = 0; - - ret = run_command("sudo ethtool -K %s ntuple off >&2", ifname); - if (ret) - return ret; - - return run_command("sudo ethtool -K %s ntuple on >&2", ifname); + /* Depending on the NIC, toggling ntuple off and on might not + * be allowed. Additionally, attempting to delete existing filters + * will fail if no filters are present. Therefore, do not enforce + * the exit status. + */ + + run_command("sudo ethtool -K %s ntuple off >&2", ifname); + run_command("sudo ethtool -K %s ntuple on >&2", ifname); + run_command( + "sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 ethtool -N %s delete >&2", + ifname, ifname); + return 0; } static int configure_headersplit(bool on) -- 2.51.0 From 798d822e5d34ffe3f25b66b2573928962a5d3c11 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 7 Nov 2024 10:12:07 -0800 Subject: [PATCH 10/16] selftests: ncdevmem: Use YNL to enable TCP header split In the next patch the hard-coded queue numbers are gonna be removed. So introduce some initial support for ethtool YNL and use it to enable header split. Also, tcp-data-split requires latest ethtool which is unlikely to be present in the distros right now. (ideally, we should not shell out to ethtool at all). Reviewed-by: Mina Almasry Reviewed-by: Joe Damato Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241107181211.3934153-9-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 2 +- tools/testing/selftests/net/ncdevmem.c | 57 +++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 8c4db5199a42..61cce028f105 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -112,7 +112,7 @@ TEST_INCLUDES := forwarding/lib.sh include ../lib.mk # YNL build -YNL_GENS := netdev +YNL_GENS := ethtool netdev include ynl.mk $(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c index ad6de8e0e97b..9ca2da3a2f63 100644 --- a/tools/testing/selftests/net/ncdevmem.c +++ b/tools/testing/selftests/net/ncdevmem.c @@ -55,10 +55,12 @@ #include #include #include +#include #include #include #include "netdev-user.h" +#include "ethtool-user.h" #include #define PAGE_SHIFT 12 @@ -231,10 +233,58 @@ static int reset_flow_steering(void) return 0; } +static const char *tcp_data_split_str(int val) +{ + switch (val) { + case 0: + return "off"; + case 1: + return "auto"; + case 2: + return "on"; + default: + return "?"; + } +} + static int configure_headersplit(bool on) { - return run_command("sudo ethtool -G %s tcp-data-split %s >&2", ifname, - on ? "on" : "off"); + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ethtool_rings_set_req *req; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = ethtool_rings_set_req_alloc(); + ethtool_rings_set_req_set_header_dev_index(req, ifindex); + /* 0 - off, 1 - auto, 2 - on */ + ethtool_rings_set_req_set_tcp_data_split(req, on ? 2 : 0); + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL failed: %s\n", ys->err.msg); + ethtool_rings_set_req_free(req); + + if (ret == 0) { + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + if (get_rsp) + fprintf(stderr, "TCP header split: %s\n", + tcp_data_split_str(get_rsp->tcp_data_split)); + ethtool_rings_get_rsp_free(get_rsp); + } + + ynl_sock_destroy(ys); + + return ret; } static int configure_rss(void) @@ -382,6 +432,9 @@ int do_server(struct memory_buffer *mem) if (reset_flow_steering()) error(1, 0, "Failed to reset flow steering\n"); + if (configure_headersplit(1)) + error(1, 0, "Failed to enable TCP header split\n"); + /* Configure RSS to divert all traffic from our devmem queues */ if (configure_rss()) error(1, 0, "Failed to configure rss\n"); -- 2.51.0 From d4ef05d211315395974fa846308c693ab2ea1ff2 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 7 Nov 2024 10:12:08 -0800 Subject: [PATCH 11/16] selftests: ncdevmem: Remove hard-coded queue numbers Use single last queue of the device and probe it dynamically. Reviewed-by: Mina Almasry Reviewed-by: Joe Damato Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241107181211.3934153-10-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/ncdevmem.c | 40 ++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c index 9ca2da3a2f63..1ea62c129ddc 100644 --- a/tools/testing/selftests/net/ncdevmem.c +++ b/tools/testing/selftests/net/ncdevmem.c @@ -75,8 +75,8 @@ static char *server_ip; static char *client_ip; static char *port; static size_t do_validation; -static int start_queue = 8; -static int num_queues = 8; +static int start_queue = -1; +static int num_queues = 1; static char *ifname; static unsigned int ifindex; static unsigned int dmabuf_id; @@ -208,6 +208,33 @@ void validate_buffer(void *line, size_t size) fprintf(stdout, "Validated buffer\n"); } +static int rxq_num(int ifindex) +{ + struct ethtool_channels_get_req *req; + struct ethtool_channels_get_rsp *rsp; + struct ynl_error yerr; + struct ynl_sock *ys; + int num = -1; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = ethtool_channels_get_req_alloc(); + ethtool_channels_get_req_set_header_dev_index(req, ifindex); + rsp = ethtool_channels_get(ys, req); + if (rsp) + num = rsp->rx_count + rsp->combined_count; + ethtool_channels_get_req_free(req); + ethtool_channels_get_rsp_free(rsp); + + ynl_sock_destroy(ys); + + return num; +} + #define run_command(cmd, ...) \ ({ \ char command[256]; \ @@ -711,6 +738,15 @@ int main(int argc, char *argv[]) ifindex = if_nametoindex(ifname); + if (start_queue < 0) { + start_queue = rxq_num(ifindex) - 1; + + if (start_queue < 0) + error(1, 0, "couldn't detect number of queues\n"); + + fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues); + } + for (; optind < argc; optind++) fprintf(stderr, "extra arguments: %s\n", argv[optind]); -- 2.51.0 From 77f870a000165f364082e06bfd8fd16d331219d8 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 7 Nov 2024 10:12:09 -0800 Subject: [PATCH 12/16] selftests: ncdevmem: Run selftest when none of the -s or -c has been provided This will be used as a 'probe' mode in the selftest to check whether the device supports the devmem or not. Use hard-coded queue layout (two last queues) and prevent user from passing custom -q and/or -t. Reviewed-by: Mina Almasry Reviewed-by: Joe Damato Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241107181211.3934153-11-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/ncdevmem.c | 49 ++++++++++++++++++++------ 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c index 1ea62c129ddc..8e502a1f8f9b 100644 --- a/tools/testing/selftests/net/ncdevmem.c +++ b/tools/testing/selftests/net/ncdevmem.c @@ -76,7 +76,7 @@ static char *client_ip; static char *port; static size_t do_validation; static int start_queue = -1; -static int num_queues = 1; +static int num_queues = -1; static char *ifname; static unsigned int ifindex; static unsigned int dmabuf_id; @@ -727,19 +727,38 @@ int main(int argc, char *argv[]) } } - if (!server_ip) - error(1, 0, "Missing -s argument\n"); - - if (!port) - error(1, 0, "Missing -p argument\n"); - if (!ifname) error(1, 0, "Missing -f argument\n"); ifindex = if_nametoindex(ifname); - if (start_queue < 0) { - start_queue = rxq_num(ifindex) - 1; + if (!server_ip && !client_ip) { + if (start_queue < 0 && num_queues < 0) { + num_queues = rxq_num(ifindex); + if (num_queues < 0) + error(1, 0, "couldn't detect number of queues\n"); + if (num_queues < 2) + error(1, 0, + "number of device queues is too low\n"); + /* make sure can bind to multiple queues */ + start_queue = num_queues / 2; + num_queues /= 2; + } + + if (start_queue < 0 || num_queues < 0) + error(1, 0, "Both -t and -q are required\n"); + + run_devmem_tests(); + return 0; + } + + if (start_queue < 0 && num_queues < 0) { + num_queues = rxq_num(ifindex); + if (num_queues < 2) + error(1, 0, "number of device queues is too low\n"); + + num_queues = 1; + start_queue = rxq_num(ifindex) - num_queues; if (start_queue < 0) error(1, 0, "couldn't detect number of queues\n"); @@ -750,7 +769,17 @@ int main(int argc, char *argv[]) for (; optind < argc; optind++) fprintf(stderr, "extra arguments: %s\n", argv[optind]); - run_devmem_tests(); + if (start_queue < 0) + error(1, 0, "Missing -t argument\n"); + + if (num_queues < 0) + error(1, 0, "Missing -q argument\n"); + + if (!server_ip) + error(1, 0, "Missing -s argument\n"); + + if (!port) + error(1, 0, "Missing -p argument\n"); mem = provider->alloc(getpagesize() * NUM_PAGES); ret = is_server ? do_server(mem) : 1; -- 2.51.0 From be43a6b2382983c89b59166ba2c32ec0f1092cfe Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 7 Nov 2024 10:12:10 -0800 Subject: [PATCH 13/16] selftests: ncdevmem: Move ncdevmem under drivers/net/hw This is where all the tests that depend on the HW functionality live in and this is where the automated test is gonna be added in the next patch. Reviewed-by: Mina Almasry Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241107181211.3934153-12-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/.gitignore | 1 + tools/testing/selftests/drivers/net/hw/Makefile | 8 ++++++++ .../testing/selftests/{net => drivers/net/hw}/ncdevmem.c | 0 tools/testing/selftests/net/.gitignore | 1 - tools/testing/selftests/net/Makefile | 8 -------- 5 files changed, 9 insertions(+), 9 deletions(-) create mode 100644 tools/testing/selftests/drivers/net/hw/.gitignore rename tools/testing/selftests/{net => drivers/net/hw}/ncdevmem.c (100%) diff --git a/tools/testing/selftests/drivers/net/hw/.gitignore b/tools/testing/selftests/drivers/net/hw/.gitignore new file mode 100644 index 000000000000..e9fe6ede681a --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/.gitignore @@ -0,0 +1 @@ +ncdevmem diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index c9f2f48fc30f..182348f4bd40 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -26,4 +26,12 @@ TEST_INCLUDES := \ ../../../net/forwarding/tc_common.sh \ # +# YNL files, must be before "include ..lib.mk" +YNL_GEN_FILES := ncdevmem +TEST_GEN_FILES += $(YNL_GEN_FILES) + include ../../../lib.mk + +# YNL build +YNL_GENS := ethtool netdev +include ../../../net/ynl.mk diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c similarity index 100% rename from tools/testing/selftests/net/ncdevmem.c rename to tools/testing/selftests/drivers/net/hw/ncdevmem.c diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 217d8b7a7365..a78debbd1fe7 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -18,7 +18,6 @@ ipv6_flowlabel_mgr log.txt msg_oob msg_zerocopy -ncdevmem nettest psock_fanout psock_snd diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 61cce028f105..9322b904ad00 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -98,10 +98,6 @@ TEST_PROGS += vlan_hw_filter.sh TEST_PROGS += bpf_offload.py TEST_PROGS += ipv6_route_update_soft_lockup.sh -# YNL files, must be before "include ..lib.mk" -YNL_GEN_FILES := ncdevmem -TEST_GEN_FILES += $(YNL_GEN_FILES) - TEST_FILES := settings TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh @@ -111,10 +107,6 @@ TEST_INCLUDES := forwarding/lib.sh include ../lib.mk -# YNL build -YNL_GENS := ethtool netdev -include ynl.mk - $(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto -- 2.51.0 From 80230864b7b0fd9b54b294ab08a28f01d4193aa2 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 7 Nov 2024 10:12:11 -0800 Subject: [PATCH 14/16] selftests: ncdevmem: Add automated test Only RX side for now and small message to test the setup. In the future, we can extend it to TX side and to testing both sides with a couple of megs of data. make \ -C tools/testing/selftests \ TARGETS="drivers/hw/net" \ install INSTALL_PATH=~/tmp/ksft scp ~/tmp/ksft ${HOST}: scp ~/tmp/ksft ${PEER}: cfg+="NETIF=${DEV}\n" cfg+="LOCAL_V6=${HOST_IP}\n" cfg+="REMOTE_V6=${PEER_IP}\n" cfg+="REMOTE_TYPE=ssh\n" cfg+="REMOTE_ARGS=root@${PEER}\n" echo -e "$cfg" | ssh root@${HOST} "cat > ksft/drivers/net/net.config" ssh root@${HOST} "cd ksft && ./run_kselftest.sh -t drivers/net:devmem.py" Reviewed-by: Mina Almasry Signed-off-by: Stanislav Fomichev Reviewed-by: Joe Damato Link: https://patch.msgid.link/20241107181211.3934153-13-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/hw/Makefile | 1 + .../selftests/drivers/net/hw/devmem.py | 45 +++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/hw/devmem.py diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 182348f4bd40..1c6a77480923 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -3,6 +3,7 @@ TEST_PROGS = \ csum.py \ devlink_port_split.py \ + devmem.py \ ethtool.sh \ ethtool_extended_state.sh \ ethtool_mm.sh \ diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py new file mode 100755 index 000000000000..1223f0f5c10c --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, KsftSkipEx +from lib.py import NetDrvEpEnv +from lib.py import bkg, cmd, rand_port, wait_port_listen +from lib.py import ksft_disruptive + + +def require_devmem(cfg): + if not hasattr(cfg, "_devmem_probed"): + port = rand_port() + probe_command = f"./ncdevmem -f {cfg.ifname}" + cfg._devmem_supported = cmd(probe_command, fail=False, shell=True).ret == 0 + cfg._devmem_probed = True + + if not cfg._devmem_supported: + raise KsftSkipEx("Test requires devmem support") + + +@ksft_disruptive +def check_rx(cfg) -> None: + cfg.require_v6() + require_devmem(cfg) + + port = rand_port() + listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.v6} -p {port}" + + with bkg(listen_cmd) as socat: + wait_port_listen(port) + cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.v6}]:{port}", host=cfg.remote, shell=True) + + ksft_eq(socat.stdout.strip(), "hello\nworld") + + +def main() -> None: + with NetDrvEpEnv(__file__) as cfg: + ksft_run([check_rx], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() -- 2.51.0 From 7a3bcd39ae1f0e3ab896d9df62339ab4297a0bfd Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 9 Nov 2024 23:12:12 +0100 Subject: [PATCH 15/16] r8169: use helper r8169_mod_reg8_cond to simplify rtl_jumbo_config Use recently added helper r8169_mod_reg8_cond() to simplify jumbo mode configuration. Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Link: https://patch.msgid.link/3df1d484-a02e-46e7-8f75-db5b428e422e@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 77 ++++------------------- 1 file changed, 11 insertions(+), 66 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 6578a9947b82..907a482c012f 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2543,86 +2543,31 @@ static void rtl8169_init_ring_indexes(struct rtl8169_private *tp) tp->dirty_tx = tp->cur_tx = tp->cur_rx = 0; } -static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp) -{ - RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0); - RTL_W8(tp, Config4, RTL_R8(tp, Config4) | Jumbo_En1); -} - -static void r8168c_hw_jumbo_disable(struct rtl8169_private *tp) -{ - RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0); - RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~Jumbo_En1); -} - -static void r8168dp_hw_jumbo_enable(struct rtl8169_private *tp) -{ - RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0); -} - -static void r8168dp_hw_jumbo_disable(struct rtl8169_private *tp) -{ - RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0); -} - -static void r8168e_hw_jumbo_enable(struct rtl8169_private *tp) -{ - RTL_W8(tp, MaxTxPacketSize, 0x24); - RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0); - RTL_W8(tp, Config4, RTL_R8(tp, Config4) | 0x01); -} - -static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp) -{ - RTL_W8(tp, MaxTxPacketSize, 0x3f); - RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0); - RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~0x01); -} - -static void r8168b_1_hw_jumbo_enable(struct rtl8169_private *tp) -{ - RTL_W8(tp, Config4, RTL_R8(tp, Config4) | (1 << 0)); -} - -static void r8168b_1_hw_jumbo_disable(struct rtl8169_private *tp) -{ - RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0)); -} - static void rtl_jumbo_config(struct rtl8169_private *tp) { bool jumbo = tp->dev->mtu > ETH_DATA_LEN; int readrq = 4096; + if (jumbo && tp->mac_version >= RTL_GIGA_MAC_VER_17 && + tp->mac_version <= RTL_GIGA_MAC_VER_26) + readrq = 512; + rtl_unlock_config_regs(tp); switch (tp->mac_version) { case RTL_GIGA_MAC_VER_17: - if (jumbo) { - readrq = 512; - r8168b_1_hw_jumbo_enable(tp); - } else { - r8168b_1_hw_jumbo_disable(tp); - } + r8169_mod_reg8_cond(tp, Config4, BIT(0), jumbo); break; case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_26: - if (jumbo) { - readrq = 512; - r8168c_hw_jumbo_enable(tp); - } else { - r8168c_hw_jumbo_disable(tp); - } + r8169_mod_reg8_cond(tp, Config3, Jumbo_En0, jumbo); + r8169_mod_reg8_cond(tp, Config4, Jumbo_En1, jumbo); break; case RTL_GIGA_MAC_VER_28: - if (jumbo) - r8168dp_hw_jumbo_enable(tp); - else - r8168dp_hw_jumbo_disable(tp); + r8169_mod_reg8_cond(tp, Config3, Jumbo_En0, jumbo); break; case RTL_GIGA_MAC_VER_31 ... RTL_GIGA_MAC_VER_33: - if (jumbo) - r8168e_hw_jumbo_enable(tp); - else - r8168e_hw_jumbo_disable(tp); + RTL_W8(tp, MaxTxPacketSize, jumbo ? 0x24 : 0x3f); + r8169_mod_reg8_cond(tp, Config3, Jumbo_En0, jumbo); + r8169_mod_reg8_cond(tp, Config4, BIT(0), jumbo); break; default: break; -- 2.51.0 From d5ec8d91f82ef78405b506737952dec8af95a95b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 7 Nov 2024 16:48:14 -0800 Subject: [PATCH 16/16] rtnetlink: Remove __rtnl_link_unregister(). rtnl_link_unregister() holds RTNL and calls __rtnl_link_unregister(), where we call synchronize_srcu() to wait inflight RTM_NEWLINK requests for per-netns RTNL. We put synchronize_srcu() in __rtnl_link_unregister() due to ifb.ko and dummy.ko. However, rtnl_newlink() will acquire SRCU before RTNL later in this series. Then, lockdep will detect the deadlock: rtnl_link_unregister() rtnl_newlink() ---- ---- lock(rtnl_mutex); lock(&ops->srcu); lock(rtnl_mutex); sync(&ops->srcu); To avoid the problem, we must call synchronize_srcu() before RTNL in rtnl_link_unregister(). As a preparation, let's remove __rtnl_link_unregister(). Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241108004823.29419-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/dummy.c | 6 +++++- drivers/net/ifb.c | 6 +++++- include/net/rtnetlink.h | 1 - net/core/rtnetlink.c | 32 ++++++++++---------------------- 4 files changed, 20 insertions(+), 25 deletions(-) diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index e9c5e1e11fa0..72618b6af44e 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -166,6 +166,7 @@ err: static int __init dummy_init_module(void) { + bool need_unregister = false; int i, err = 0; down_write(&pernet_ops_rwsem); @@ -179,12 +180,15 @@ static int __init dummy_init_module(void) cond_resched(); } if (err < 0) - __rtnl_link_unregister(&dummy_link_ops); + need_unregister = true; out: rtnl_unlock(); up_write(&pernet_ops_rwsem); + if (need_unregister) + rtnl_link_unregister(&dummy_link_ops); + return err; } diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 2c1b5def4a0b..a4b9ec4e8f30 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -424,6 +424,7 @@ err: static int __init ifb_init_module(void) { + bool need_unregister = false; int i, err; down_write(&pernet_ops_rwsem); @@ -437,12 +438,15 @@ static int __init ifb_init_module(void) cond_resched(); } if (err) - __rtnl_link_unregister(&ifb_link_ops); + need_unregister = true; out: rtnl_unlock(); up_write(&pernet_ops_rwsem); + if (need_unregister) + rtnl_link_unregister(&ifb_link_ops); + return err; } diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index b260c0cc9671..3ebfcc6e56fd 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -165,7 +165,6 @@ struct rtnl_link_ops { }; int __rtnl_link_register(struct rtnl_link_ops *ops); -void __rtnl_link_unregister(struct rtnl_link_ops *ops); int rtnl_link_register(struct rtnl_link_ops *ops); void rtnl_link_unregister(struct rtnl_link_ops *ops); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a5c386a45501..f0246ecec7fa 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -568,27 +568,6 @@ static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) unregister_netdevice_many(&list_kill); } -/** - * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. - * @ops: struct rtnl_link_ops * to unregister - * - * The caller must hold the rtnl_mutex and guarantee net_namespace_list - * integrity (hold pernet_ops_rwsem for writing to close the race - * with setup_net() and cleanup_net()). - */ -void __rtnl_link_unregister(struct rtnl_link_ops *ops) -{ - struct net *net; - - list_del_rcu(&ops->list); - synchronize_srcu(&ops->srcu); - cleanup_srcu_struct(&ops->srcu); - - for_each_net(net) - __rtnl_kill_links(net, ops); -} -EXPORT_SYMBOL_GPL(__rtnl_link_unregister); - /* Return with the rtnl_lock held when there are no network * devices unregistering in any network namespace. */ @@ -617,10 +596,19 @@ static void rtnl_lock_unregistering_all(void) */ void rtnl_link_unregister(struct rtnl_link_ops *ops) { + struct net *net; + /* Close the race with setup_net() and cleanup_net() */ down_write(&pernet_ops_rwsem); rtnl_lock_unregistering_all(); - __rtnl_link_unregister(ops); + + list_del_rcu(&ops->list); + synchronize_srcu(&ops->srcu); + cleanup_srcu_struct(&ops->srcu); + + for_each_net(net) + __rtnl_kill_links(net, ops); + rtnl_unlock(); up_write(&pernet_ops_rwsem); } -- 2.51.0