From 46f2a11cb82b657fd15bab1c47821b635e03838b Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 14 Oct 2024 16:38:00 +0100 Subject: [PATCH 01/16] af_packet: avoid erroring out after sock_init_data() in packet_create() After sock_init_data() the allocated sk object is attached to the provided sock object. On error, packet_create() frees the sk object leaving the dangling pointer in the sock object on return. Some other code may try to use this pointer and cause use-after-free. Suggested-by: Eric Dumazet Signed-off-by: Ignat Korchagin Reviewed-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014153808.51894-2-ignat@cloudflare.com Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2ff4b251842d..886c0dd47b66 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3422,17 +3422,17 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, if (sock->type == SOCK_PACKET) sock->ops = &packet_ops_spkt; + po = pkt_sk(sk); + err = packet_alloc_pending(po); + if (err) + goto out_sk_free; + sock_init_data(sock, sk); - po = pkt_sk(sk); init_completion(&po->skb_completion); sk->sk_family = PF_PACKET; po->num = proto; - err = packet_alloc_pending(po); - if (err) - goto out2; - packet_cached_dev_reset(po); sk->sk_destruct = packet_sock_destruct; @@ -3464,7 +3464,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, sock_prot_inuse_add(net, &packet_proto, 1); return 0; -out2: +out_sk_free: sk_free(sk); out: return err; -- 2.51.0 From 7c4f78cdb8e7501e9f92d291a7d956591bf73be9 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 14 Oct 2024 16:38:01 +0100 Subject: [PATCH 02/16] Bluetooth: L2CAP: do not leave dangling sk pointer on error in l2cap_sock_create() bt_sock_alloc() allocates the sk object and attaches it to the provided sock object. On error l2cap_sock_alloc() frees the sk object, but the dangling pointer is still attached to the sock object, which may create use-after-free in other code. Signed-off-by: Ignat Korchagin Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014153808.51894-3-ignat@cloudflare.com Signed-off-by: Jakub Kicinski --- net/bluetooth/l2cap_sock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index ba437c6f6ee5..18e89e764f3b 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1886,6 +1886,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, chan = l2cap_chan_create(); if (!chan) { sk_free(sk); + sock->sk = NULL; return NULL; } -- 2.51.0 From 3945c799f12b8d1f49a3b48369ca494d981ac465 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 14 Oct 2024 16:38:02 +0100 Subject: [PATCH 03/16] Bluetooth: RFCOMM: avoid leaving dangling sk pointer in rfcomm_sock_alloc() bt_sock_alloc() attaches allocated sk object to the provided sock object. If rfcomm_dlc_alloc() fails, we release the sk object, but leave the dangling pointer in the sock object, which may cause use-after-free. Fix this by swapping calls to bt_sock_alloc() and rfcomm_dlc_alloc(). Signed-off-by: Ignat Korchagin Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014153808.51894-4-ignat@cloudflare.com Signed-off-by: Jakub Kicinski --- net/bluetooth/rfcomm/sock.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index f48250e3f2e1..355e1a1698f5 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -274,13 +274,13 @@ static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, struct rfcomm_dlc *d; struct sock *sk; - sk = bt_sock_alloc(net, sock, &rfcomm_proto, proto, prio, kern); - if (!sk) + d = rfcomm_dlc_alloc(prio); + if (!d) return NULL; - d = rfcomm_dlc_alloc(prio); - if (!d) { - sk_free(sk); + sk = bt_sock_alloc(net, sock, &rfcomm_proto, proto, prio, kern); + if (!sk) { + rfcomm_dlc_free(d); return NULL; } -- 2.51.0 From 811a7ca7320c062e15d0f5b171fe6ad8592d1434 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 14 Oct 2024 16:38:03 +0100 Subject: [PATCH 04/16] net: af_can: do not leave a dangling sk pointer in can_create() On error can_create() frees the allocated sk object, but sock_init_data() has already attached it to the provided sock object. This will leave a dangling sk pointer in the sock object and may cause use-after-free later. Signed-off-by: Ignat Korchagin Reviewed-by: Vincent Mailhol Reviewed-by: Kuniyuki Iwashima Reviewed-by: Marc Kleine-Budde Link: https://patch.msgid.link/20241014153808.51894-5-ignat@cloudflare.com Signed-off-by: Jakub Kicinski --- net/can/af_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/can/af_can.c b/net/can/af_can.c index 707576eeeb58..01f3fbb3b67d 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -171,6 +171,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol, /* release sk on errors */ sock_orphan(sk); sock_put(sk); + sock->sk = NULL; } errout: -- 2.51.0 From b4fcd63f6ef79c73cafae8cf4a114def5fc3d80d Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 14 Oct 2024 16:38:04 +0100 Subject: [PATCH 05/16] net: ieee802154: do not leave a dangling sk pointer in ieee802154_create() sock_init_data() attaches the allocated sk object to the provided sock object. If ieee802154_create() fails later, the allocated sk object is freed, but the dangling pointer remains in the provided sock object, which may allow use-after-free. Clear the sk pointer in the sock object on error. Signed-off-by: Ignat Korchagin Reviewed-by: Miquel Raynal Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014153808.51894-6-ignat@cloudflare.com Signed-off-by: Jakub Kicinski --- net/ieee802154/socket.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 990a83455dcf..18d267921bb5 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -1043,19 +1043,21 @@ static int ieee802154_create(struct net *net, struct socket *sock, if (sk->sk_prot->hash) { rc = sk->sk_prot->hash(sk); - if (rc) { - sk_common_release(sk); - goto out; - } + if (rc) + goto out_sk_release; } if (sk->sk_prot->init) { rc = sk->sk_prot->init(sk); if (rc) - sk_common_release(sk); + goto out_sk_release; } out: return rc; +out_sk_release: + sk_common_release(sk); + sock->sk = NULL; + goto out; } static const struct net_proto_family ieee802154_family_ops = { -- 2.51.0 From 9365fa510c6f82e3aa550a09d0c5c6b44dbc78ff Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 14 Oct 2024 16:38:05 +0100 Subject: [PATCH 06/16] net: inet: do not leave a dangling sk pointer in inet_create() sock_init_data() attaches the allocated sk object to the provided sock object. If inet_create() fails later, the sk object is freed, but the sock object retains the dangling pointer, which may create use-after-free later. Clear the sk pointer in the sock object on error. Signed-off-by: Ignat Korchagin Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014153808.51894-7-ignat@cloudflare.com Signed-off-by: Jakub Kicinski --- net/ipv4/af_inet.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b24d74616637..8095e82de808 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -376,32 +376,30 @@ lookup_protocol: inet->inet_sport = htons(inet->inet_num); /* Add to protocol hash chains. */ err = sk->sk_prot->hash(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (!kern) { err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } out: return err; out_rcu_unlock: rcu_read_unlock(); goto out; +out_sk_release: + sk_common_release(sk); + sock->sk = NULL; + goto out; } -- 2.51.0 From 9df99c395d0f55fb444ef39f4d6f194ca437d884 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 14 Oct 2024 16:38:06 +0100 Subject: [PATCH 07/16] net: inet6: do not leave a dangling sk pointer in inet6_create() sock_init_data() attaches the allocated sk pointer to the provided sock object. If inet6_create() fails later, the sk object is released, but the sock object retains the dangling sk pointer, which may cause use-after-free later. Clear the sock sk pointer on error. Signed-off-by: Ignat Korchagin Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014153808.51894-8-ignat@cloudflare.com Signed-off-by: Jakub Kicinski --- net/ipv6/af_inet6.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index ba69b86f1c7d..f60ec8b0f8ea 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -252,31 +252,29 @@ lookup_protocol: */ inet->inet_sport = htons(inet->inet_num); err = sk->sk_prot->hash(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (!kern) { err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } out: return err; out_rcu_unlock: rcu_read_unlock(); goto out; +out_sk_release: + sk_common_release(sk); + sock->sk = NULL; + goto out; } static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, -- 2.51.0 From 48156296a08c615a6baae514096c4b2e543d1157 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 14 Oct 2024 16:38:07 +0100 Subject: [PATCH 08/16] net: warn, if pf->create does not clear sock->sk on error All pf->create implementations have been fixed now to clear sock->sk on error, when they deallocate the allocated sk object. Put a warning in place to make sure we don't break this promise in the future. Suggested-by: Kuniyuki Iwashima Signed-off-by: Ignat Korchagin Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014153808.51894-9-ignat@cloudflare.com Signed-off-by: Jakub Kicinski --- net/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/socket.c b/net/socket.c index 24b404299015..9a8e4452b9b2 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1576,9 +1576,9 @@ int __sock_create(struct net *net, int family, int type, int protocol, err = pf->create(net, sock, protocol, kern); if (err < 0) { /* ->create should release the allocated sock->sk object on error - * but it may leave the dangling pointer + * and make sure sock->sk is set to NULL to avoid use-after-free */ - sock->sk = NULL; + DEBUG_NET_WARN_ON_ONCE(sock->sk); goto out_module_put; } -- 2.51.0 From 18429e6e0c2ad26250862a786964d8c73400d9a0 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 14 Oct 2024 16:38:08 +0100 Subject: [PATCH 09/16] Revert "net: do not leave a dangling sk pointer, when socket creation fails" This reverts commit 6cd4a78d962bebbaf8beb7d2ead3f34120e3f7b2. inet/inet6->create() implementations have been fixed to explicitly NULL the allocated sk object on error. A warning was put in place to make sure any future changes will not leave a dangling pointer in pf->create() implementations. So this code is now redundant. Suggested-by: Kuniyuki Iwashima Signed-off-by: Ignat Korchagin Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014153808.51894-10-ignat@cloudflare.com Signed-off-by: Jakub Kicinski --- net/core/sock.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index f8c0d4eda888..756f8e8e0ac7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3827,9 +3827,6 @@ void sk_common_release(struct sock *sk) sk->sk_prot->unhash(sk); - if (sk->sk_socket) - sk->sk_socket->sk = NULL; - /* * In this point socket cannot receive new packets, but it is possible * that some packets are in flight because some CPU runs receiver and -- 2.51.0 From 93c68f1275f9e21ccfed9ee292aedb11c3f6241b Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Mon, 14 Oct 2024 13:21:06 -0700 Subject: [PATCH 10/16] gve: move DQO rx buffer management related code to a new file In preparation for the upcoming page pool adoption for DQO raw addressing mode, move RX buffer management code to a new file. In the follow on patches, page pool code will be added to this file. No functional change, just movement of code. Reviewed-by: Praveen Kaligineedi Reviewed-by: Shailend Chand Reviewed-by: Willem de Bruijn Signed-off-by: Harshitha Ramamurthy Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20241014202108.1051963-2-pkaligineedi@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/Makefile | 3 +- drivers/net/ethernet/google/gve/gve.h | 18 ++ .../ethernet/google/gve/gve_buffer_mgmt_dqo.c | 230 ++++++++++++++++++ drivers/net/ethernet/google/gve/gve_rx_dqo.c | 225 ----------------- 4 files changed, 250 insertions(+), 226 deletions(-) create mode 100644 drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c diff --git a/drivers/net/ethernet/google/gve/Makefile b/drivers/net/ethernet/google/gve/Makefile index 9ed07080b38a..4520f1c07a63 100644 --- a/drivers/net/ethernet/google/gve/Makefile +++ b/drivers/net/ethernet/google/gve/Makefile @@ -1,4 +1,5 @@ # Makefile for the Google virtual Ethernet (gve) driver obj-$(CONFIG_GVE) += gve.o -gve-objs := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o gve_flow_rule.o +gve-objs := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o gve_flow_rule.o \ + gve_buffer_mgmt_dqo.o diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 301fa1ea4f51..bd684c7d996a 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -1162,6 +1162,24 @@ void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx); u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hplit); bool gve_header_split_supported(const struct gve_priv *priv); int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split); +/* rx buffer handling */ +int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs); +void gve_free_page_dqo(struct gve_priv *priv, struct gve_rx_buf_state_dqo *bs, + bool free_page); +struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx); +bool gve_buf_state_is_allocated(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_free_buf_state(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +struct gve_rx_buf_state_dqo *gve_dequeue_buf_state(struct gve_rx_ring *rx, + struct gve_index_list *list); +void gve_enqueue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list, + struct gve_rx_buf_state_dqo *buf_state); +struct gve_rx_buf_state_dqo *gve_get_recycled_buf_state(struct gve_rx_ring *rx); +int gve_alloc_page_dqo(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); /* Reset */ void gve_schedule_reset(struct gve_priv *priv); int gve_reset(struct gve_priv *priv, bool attempt_teardown); diff --git a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c new file mode 100644 index 000000000000..8e50f0e4bb2e --- /dev/null +++ b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Google virtual Ethernet (gve) driver + * + * Copyright (C) 2015-2024 Google, Inc. + */ + +#include "gve.h" +#include "gve_utils.h" + +int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs) +{ + return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias; +} + +void gve_free_page_dqo(struct gve_priv *priv, struct gve_rx_buf_state_dqo *bs, + bool free_page) +{ + page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1); + if (free_page) + gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr, + DMA_FROM_DEVICE); + bs->page_info.page = NULL; +} + +struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx) +{ + struct gve_rx_buf_state_dqo *buf_state; + s16 buffer_id; + + buffer_id = rx->dqo.free_buf_states; + if (unlikely(buffer_id == -1)) + return NULL; + + buf_state = &rx->dqo.buf_states[buffer_id]; + + /* Remove buf_state from free list */ + rx->dqo.free_buf_states = buf_state->next; + + /* Point buf_state to itself to mark it as allocated */ + buf_state->next = buffer_id; + + return buf_state; +} + +bool gve_buf_state_is_allocated(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + s16 buffer_id = buf_state - rx->dqo.buf_states; + + return buf_state->next == buffer_id; +} + +void gve_free_buf_state(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + s16 buffer_id = buf_state - rx->dqo.buf_states; + + buf_state->next = rx->dqo.free_buf_states; + rx->dqo.free_buf_states = buffer_id; +} + +struct gve_rx_buf_state_dqo *gve_dequeue_buf_state(struct gve_rx_ring *rx, + struct gve_index_list *list) +{ + struct gve_rx_buf_state_dqo *buf_state; + s16 buffer_id; + + buffer_id = list->head; + if (unlikely(buffer_id == -1)) + return NULL; + + buf_state = &rx->dqo.buf_states[buffer_id]; + + /* Remove buf_state from list */ + list->head = buf_state->next; + if (buf_state->next == -1) + list->tail = -1; + + /* Point buf_state to itself to mark it as allocated */ + buf_state->next = buffer_id; + + return buf_state; +} + +void gve_enqueue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list, + struct gve_rx_buf_state_dqo *buf_state) +{ + s16 buffer_id = buf_state - rx->dqo.buf_states; + + buf_state->next = -1; + + if (list->head == -1) { + list->head = buffer_id; + list->tail = buffer_id; + } else { + int tail = list->tail; + + rx->dqo.buf_states[tail].next = buffer_id; + list->tail = buffer_id; + } +} + +struct gve_rx_buf_state_dqo *gve_get_recycled_buf_state(struct gve_rx_ring *rx) +{ + struct gve_rx_buf_state_dqo *buf_state; + int i; + + /* Recycled buf states are immediately usable. */ + buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states); + if (likely(buf_state)) + return buf_state; + + if (unlikely(rx->dqo.used_buf_states.head == -1)) + return NULL; + + /* Used buf states are only usable when ref count reaches 0, which means + * no SKBs refer to them. + * + * Search a limited number before giving up. + */ + for (i = 0; i < 5; i++) { + buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); + if (gve_buf_ref_cnt(buf_state) == 0) { + rx->dqo.used_buf_states_cnt--; + return buf_state; + } + + gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); + } + + /* For QPL, we cannot allocate any new buffers and must + * wait for the existing ones to be available. + */ + if (rx->dqo.qpl) + return NULL; + + /* If there are no free buf states discard an entry from + * `used_buf_states` so it can be used. + */ + if (unlikely(rx->dqo.free_buf_states == -1)) { + buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); + if (gve_buf_ref_cnt(buf_state) == 0) + return buf_state; + + gve_free_page_dqo(rx->gve, buf_state, true); + gve_free_buf_state(rx, buf_state); + } + + return NULL; +} + +int gve_alloc_page_dqo(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + struct gve_priv *priv = rx->gve; + u32 idx; + + if (!rx->dqo.qpl) { + int err; + + err = gve_alloc_page(priv, &priv->pdev->dev, + &buf_state->page_info.page, + &buf_state->addr, + DMA_FROM_DEVICE, GFP_ATOMIC); + if (err) + return err; + } else { + idx = rx->dqo.next_qpl_page_idx; + if (idx >= gve_get_rx_pages_per_qpl_dqo(priv->rx_desc_cnt)) { + net_err_ratelimited("%s: Out of QPL pages\n", + priv->dev->name); + return -ENOMEM; + } + buf_state->page_info.page = rx->dqo.qpl->pages[idx]; + buf_state->addr = rx->dqo.qpl->page_buses[idx]; + rx->dqo.next_qpl_page_idx++; + } + buf_state->page_info.page_offset = 0; + buf_state->page_info.page_address = + page_address(buf_state->page_info.page); + buf_state->last_single_ref_offset = 0; + + /* The page already has 1 ref. */ + page_ref_add(buf_state->page_info.page, INT_MAX - 1); + buf_state->page_info.pagecnt_bias = INT_MAX; + + return 0; +} + +void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + const u16 data_buffer_size = priv->data_buffer_size_dqo; + int pagecount; + + /* Can't reuse if we only fit one buffer per page */ + if (data_buffer_size * 2 > PAGE_SIZE) + goto mark_used; + + pagecount = gve_buf_ref_cnt(buf_state); + + /* Record the offset when we have a single remaining reference. + * + * When this happens, we know all of the other offsets of the page are + * usable. + */ + if (pagecount == 1) { + buf_state->last_single_ref_offset = + buf_state->page_info.page_offset; + } + + /* Use the next buffer sized chunk in the page. */ + buf_state->page_info.page_offset += data_buffer_size; + buf_state->page_info.page_offset &= (PAGE_SIZE - 1); + + /* If we wrap around to the same offset without ever dropping to 1 + * reference, then we don't know if this offset was ever freed. + */ + if (buf_state->page_info.page_offset == + buf_state->last_single_ref_offset) { + goto mark_used; + } + + gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); + return; + +mark_used: + gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); + rx->dqo.used_buf_states_cnt++; +} diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 1154c1d8f66f..b343be2fb118 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -16,189 +16,6 @@ #include #include -static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs) -{ - return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias; -} - -static void gve_free_page_dqo(struct gve_priv *priv, - struct gve_rx_buf_state_dqo *bs, - bool free_page) -{ - page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1); - if (free_page) - gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr, - DMA_FROM_DEVICE); - bs->page_info.page = NULL; -} - -static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx) -{ - struct gve_rx_buf_state_dqo *buf_state; - s16 buffer_id; - - buffer_id = rx->dqo.free_buf_states; - if (unlikely(buffer_id == -1)) - return NULL; - - buf_state = &rx->dqo.buf_states[buffer_id]; - - /* Remove buf_state from free list */ - rx->dqo.free_buf_states = buf_state->next; - - /* Point buf_state to itself to mark it as allocated */ - buf_state->next = buffer_id; - - return buf_state; -} - -static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx, - struct gve_rx_buf_state_dqo *buf_state) -{ - s16 buffer_id = buf_state - rx->dqo.buf_states; - - return buf_state->next == buffer_id; -} - -static void gve_free_buf_state(struct gve_rx_ring *rx, - struct gve_rx_buf_state_dqo *buf_state) -{ - s16 buffer_id = buf_state - rx->dqo.buf_states; - - buf_state->next = rx->dqo.free_buf_states; - rx->dqo.free_buf_states = buffer_id; -} - -static struct gve_rx_buf_state_dqo * -gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list) -{ - struct gve_rx_buf_state_dqo *buf_state; - s16 buffer_id; - - buffer_id = list->head; - if (unlikely(buffer_id == -1)) - return NULL; - - buf_state = &rx->dqo.buf_states[buffer_id]; - - /* Remove buf_state from list */ - list->head = buf_state->next; - if (buf_state->next == -1) - list->tail = -1; - - /* Point buf_state to itself to mark it as allocated */ - buf_state->next = buffer_id; - - return buf_state; -} - -static void gve_enqueue_buf_state(struct gve_rx_ring *rx, - struct gve_index_list *list, - struct gve_rx_buf_state_dqo *buf_state) -{ - s16 buffer_id = buf_state - rx->dqo.buf_states; - - buf_state->next = -1; - - if (list->head == -1) { - list->head = buffer_id; - list->tail = buffer_id; - } else { - int tail = list->tail; - - rx->dqo.buf_states[tail].next = buffer_id; - list->tail = buffer_id; - } -} - -static struct gve_rx_buf_state_dqo * -gve_get_recycled_buf_state(struct gve_rx_ring *rx) -{ - struct gve_rx_buf_state_dqo *buf_state; - int i; - - /* Recycled buf states are immediately usable. */ - buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states); - if (likely(buf_state)) - return buf_state; - - if (unlikely(rx->dqo.used_buf_states.head == -1)) - return NULL; - - /* Used buf states are only usable when ref count reaches 0, which means - * no SKBs refer to them. - * - * Search a limited number before giving up. - */ - for (i = 0; i < 5; i++) { - buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); - if (gve_buf_ref_cnt(buf_state) == 0) { - rx->dqo.used_buf_states_cnt--; - return buf_state; - } - - gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); - } - - /* For QPL, we cannot allocate any new buffers and must - * wait for the existing ones to be available. - */ - if (rx->dqo.qpl) - return NULL; - - /* If there are no free buf states discard an entry from - * `used_buf_states` so it can be used. - */ - if (unlikely(rx->dqo.free_buf_states == -1)) { - buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); - if (gve_buf_ref_cnt(buf_state) == 0) - return buf_state; - - gve_free_page_dqo(rx->gve, buf_state, true); - gve_free_buf_state(rx, buf_state); - } - - return NULL; -} - -static int gve_alloc_page_dqo(struct gve_rx_ring *rx, - struct gve_rx_buf_state_dqo *buf_state) -{ - struct gve_priv *priv = rx->gve; - u32 idx; - - if (!rx->dqo.qpl) { - int err; - - err = gve_alloc_page(priv, &priv->pdev->dev, - &buf_state->page_info.page, - &buf_state->addr, - DMA_FROM_DEVICE, GFP_ATOMIC); - if (err) - return err; - } else { - idx = rx->dqo.next_qpl_page_idx; - if (idx >= gve_get_rx_pages_per_qpl_dqo(priv->rx_desc_cnt)) { - net_err_ratelimited("%s: Out of QPL pages\n", - priv->dev->name); - return -ENOMEM; - } - buf_state->page_info.page = rx->dqo.qpl->pages[idx]; - buf_state->addr = rx->dqo.qpl->page_buses[idx]; - rx->dqo.next_qpl_page_idx++; - } - buf_state->page_info.page_offset = 0; - buf_state->page_info.page_address = - page_address(buf_state->page_info.page); - buf_state->last_single_ref_offset = 0; - - /* The page already has 1 ref. */ - page_ref_add(buf_state->page_info.page, INT_MAX - 1); - buf_state->page_info.pagecnt_bias = INT_MAX; - - return 0; -} - static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) { struct device *hdev = &priv->pdev->dev; @@ -557,48 +374,6 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) rx->fill_cnt += num_posted; } -static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, - struct gve_rx_buf_state_dqo *buf_state) -{ - const u16 data_buffer_size = priv->data_buffer_size_dqo; - int pagecount; - - /* Can't reuse if we only fit one buffer per page */ - if (data_buffer_size * 2 > PAGE_SIZE) - goto mark_used; - - pagecount = gve_buf_ref_cnt(buf_state); - - /* Record the offset when we have a single remaining reference. - * - * When this happens, we know all of the other offsets of the page are - * usable. - */ - if (pagecount == 1) { - buf_state->last_single_ref_offset = - buf_state->page_info.page_offset; - } - - /* Use the next buffer sized chunk in the page. */ - buf_state->page_info.page_offset += data_buffer_size; - buf_state->page_info.page_offset &= (PAGE_SIZE - 1); - - /* If we wrap around to the same offset without ever dropping to 1 - * reference, then we don't know if this offset was ever freed. - */ - if (buf_state->page_info.page_offset == - buf_state->last_single_ref_offset) { - goto mark_used; - } - - gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); - return; - -mark_used: - gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); - rx->dqo.used_buf_states_cnt++; -} - static void gve_rx_skb_csum(struct sk_buff *skb, const struct gve_rx_compl_desc_dqo *desc, struct gve_ptype ptype) -- 2.51.0 From ebdfae0d377b487eabb739c55a13a2ab29f21f36 Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Mon, 14 Oct 2024 13:21:07 -0700 Subject: [PATCH 11/16] gve: adopt page pool for DQ RDA mode For DQ queue format in raw DMA addressing(RDA) mode, implement page pool recycling of buffers by leveraging a few helper functions. DQ QPL mode will continue to use the exisiting recycling logic. This is because in QPL mode, the pages come from a constant set of pages that the driver pre-allocates and registers with the device. Reviewed-by: Praveen Kaligineedi Reviewed-by: Shailend Chand Reviewed-by: Willem de Bruijn Signed-off-by: Harshitha Ramamurthy Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20241014202108.1051963-3-pkaligineedi@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/Kconfig | 1 + drivers/net/ethernet/google/gve/gve.h | 22 ++- .../ethernet/google/gve/gve_buffer_mgmt_dqo.c | 180 +++++++++++++----- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 89 ++++----- 4 files changed, 198 insertions(+), 94 deletions(-) diff --git a/drivers/net/ethernet/google/Kconfig b/drivers/net/ethernet/google/Kconfig index 8641a00f8e63..564862a57124 100644 --- a/drivers/net/ethernet/google/Kconfig +++ b/drivers/net/ethernet/google/Kconfig @@ -18,6 +18,7 @@ if NET_VENDOR_GOOGLE config GVE tristate "Google Virtual NIC (gVNIC) support" depends on (PCI_MSI && (X86 || CPU_LITTLE_ENDIAN)) + select PAGE_POOL help This driver supports Google Virtual NIC (gVNIC)" diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index bd684c7d996a..dd92949bb214 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "gve_desc.h" @@ -60,6 +61,8 @@ #define GVE_DEFAULT_RX_BUFFER_OFFSET 2048 +#define GVE_PAGE_POOL_SIZE_MULTIPLIER 4 + #define GVE_FLOW_RULES_CACHE_SIZE \ (GVE_ADMINQ_BUFFER_SIZE / sizeof(struct gve_adminq_queried_flow_rule)) #define GVE_FLOW_RULE_IDS_CACHE_SIZE \ @@ -102,6 +105,7 @@ struct gve_rx_slot_page_info { struct page *page; void *page_address; u32 page_offset; /* offset to write to in page */ + unsigned int buf_size; int pagecnt_bias; /* expected pagecnt if only the driver has a ref */ u16 pad; /* adjustment for rx padding */ u8 can_flip; /* tracks if the networking stack is using the page */ @@ -273,6 +277,8 @@ struct gve_rx_ring { /* Address info of the buffers for header-split */ struct gve_header_buf hdr_bufs; + + struct page_pool *page_pool; } dqo; }; @@ -1176,10 +1182,22 @@ struct gve_rx_buf_state_dqo *gve_dequeue_buf_state(struct gve_rx_ring *rx, void gve_enqueue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list, struct gve_rx_buf_state_dqo *buf_state); struct gve_rx_buf_state_dqo *gve_get_recycled_buf_state(struct gve_rx_ring *rx); -int gve_alloc_page_dqo(struct gve_rx_ring *rx, - struct gve_rx_buf_state_dqo *buf_state); void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state); +void gve_free_to_page_pool(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, + bool allow_direct); +int gve_alloc_qpl_page_dqo(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_free_qpl_page_dqo(struct gve_rx_buf_state_dqo *buf_state); +void gve_reuse_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_free_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc); +struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv, + struct gve_rx_ring *rx); + /* Reset */ void gve_schedule_reset(struct gve_priv *priv); int gve_reset(struct gve_priv *priv, bool attempt_teardown); diff --git a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c index 8e50f0e4bb2e..05bf1f80a79c 100644 --- a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c @@ -12,16 +12,6 @@ int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs) return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias; } -void gve_free_page_dqo(struct gve_priv *priv, struct gve_rx_buf_state_dqo *bs, - bool free_page) -{ - page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1); - if (free_page) - gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr, - DMA_FROM_DEVICE); - bs->page_info.page = NULL; -} - struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx) { struct gve_rx_buf_state_dqo *buf_state; @@ -128,56 +118,28 @@ struct gve_rx_buf_state_dqo *gve_get_recycled_buf_state(struct gve_rx_ring *rx) gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); } - /* For QPL, we cannot allocate any new buffers and must - * wait for the existing ones to be available. - */ - if (rx->dqo.qpl) - return NULL; - - /* If there are no free buf states discard an entry from - * `used_buf_states` so it can be used. - */ - if (unlikely(rx->dqo.free_buf_states == -1)) { - buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); - if (gve_buf_ref_cnt(buf_state) == 0) - return buf_state; - - gve_free_page_dqo(rx->gve, buf_state, true); - gve_free_buf_state(rx, buf_state); - } - return NULL; } -int gve_alloc_page_dqo(struct gve_rx_ring *rx, - struct gve_rx_buf_state_dqo *buf_state) +int gve_alloc_qpl_page_dqo(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) { struct gve_priv *priv = rx->gve; u32 idx; - if (!rx->dqo.qpl) { - int err; - - err = gve_alloc_page(priv, &priv->pdev->dev, - &buf_state->page_info.page, - &buf_state->addr, - DMA_FROM_DEVICE, GFP_ATOMIC); - if (err) - return err; - } else { - idx = rx->dqo.next_qpl_page_idx; - if (idx >= gve_get_rx_pages_per_qpl_dqo(priv->rx_desc_cnt)) { - net_err_ratelimited("%s: Out of QPL pages\n", - priv->dev->name); - return -ENOMEM; - } - buf_state->page_info.page = rx->dqo.qpl->pages[idx]; - buf_state->addr = rx->dqo.qpl->page_buses[idx]; - rx->dqo.next_qpl_page_idx++; + idx = rx->dqo.next_qpl_page_idx; + if (idx >= gve_get_rx_pages_per_qpl_dqo(priv->rx_desc_cnt)) { + net_err_ratelimited("%s: Out of QPL pages\n", + priv->dev->name); + return -ENOMEM; } + buf_state->page_info.page = rx->dqo.qpl->pages[idx]; + buf_state->addr = rx->dqo.qpl->page_buses[idx]; + rx->dqo.next_qpl_page_idx++; buf_state->page_info.page_offset = 0; buf_state->page_info.page_address = page_address(buf_state->page_info.page); + buf_state->page_info.buf_size = priv->data_buffer_size_dqo; buf_state->last_single_ref_offset = 0; /* The page already has 1 ref. */ @@ -187,6 +149,16 @@ int gve_alloc_page_dqo(struct gve_rx_ring *rx, return 0; } +void gve_free_qpl_page_dqo(struct gve_rx_buf_state_dqo *buf_state) +{ + if (!buf_state->page_info.page) + return; + + page_ref_sub(buf_state->page_info.page, + buf_state->page_info.pagecnt_bias - 1); + buf_state->page_info.page = NULL; +} + void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state) { @@ -228,3 +200,113 @@ mark_used: gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); rx->dqo.used_buf_states_cnt++; } + +void gve_free_to_page_pool(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, + bool allow_direct) +{ + struct page *page = buf_state->page_info.page; + + if (!page) + return; + + page_pool_put_page(page->pp, page, buf_state->page_info.buf_size, + allow_direct); + buf_state->page_info.page = NULL; +} + +static int gve_alloc_from_page_pool(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + struct gve_priv *priv = rx->gve; + struct page *page; + + buf_state->page_info.buf_size = priv->data_buffer_size_dqo; + page = page_pool_alloc(rx->dqo.page_pool, + &buf_state->page_info.page_offset, + &buf_state->page_info.buf_size, GFP_ATOMIC); + + if (!page) + return -ENOMEM; + + buf_state->page_info.page = page; + buf_state->page_info.page_address = page_address(page); + buf_state->addr = page_pool_get_dma_addr(page); + + return 0; +} + +struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv, + struct gve_rx_ring *rx) +{ + u32 ntfy_id = gve_rx_idx_to_ntfy(priv, rx->q_num); + struct page_pool_params pp = { + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .order = 0, + .pool_size = GVE_PAGE_POOL_SIZE_MULTIPLIER * priv->rx_desc_cnt, + .dev = &priv->pdev->dev, + .netdev = priv->dev, + .napi = &priv->ntfy_blocks[ntfy_id].napi, + .max_len = PAGE_SIZE, + .dma_dir = DMA_FROM_DEVICE, + }; + + return page_pool_create(&pp); +} + +void gve_free_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + if (rx->dqo.page_pool) { + gve_free_to_page_pool(rx, buf_state, true); + gve_free_buf_state(rx, buf_state); + } else { + gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, + buf_state); + } +} + +void gve_reuse_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + if (rx->dqo.page_pool) { + buf_state->page_info.page = NULL; + gve_free_buf_state(rx, buf_state); + } else { + gve_dec_pagecnt_bias(&buf_state->page_info); + gve_try_recycle_buf(rx->gve, rx, buf_state); + } +} + +int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc) +{ + struct gve_rx_buf_state_dqo *buf_state; + + if (rx->dqo.page_pool) { + buf_state = gve_alloc_buf_state(rx); + if (WARN_ON_ONCE(!buf_state)) + return -ENOMEM; + + if (gve_alloc_from_page_pool(rx, buf_state)) + goto free_buf_state; + } else { + buf_state = gve_get_recycled_buf_state(rx); + if (unlikely(!buf_state)) { + buf_state = gve_alloc_buf_state(rx); + if (unlikely(!buf_state)) + return -ENOMEM; + + if (unlikely(gve_alloc_qpl_page_dqo(rx, buf_state))) + goto free_buf_state; + } + } + desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); + desc->buf_addr = cpu_to_le64(buf_state->addr + + buf_state->page_info.page_offset); + + return 0; + +free_buf_state: + gve_free_buf_state(rx, buf_state); + return -ENOMEM; +} diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index b343be2fb118..8ac0047f1ada 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -95,8 +95,10 @@ static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx) for (i = 0; i < rx->dqo.num_buf_states; i++) { struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; - if (bs->page_info.page) - gve_free_page_dqo(priv, bs, !rx->dqo.qpl); + if (rx->dqo.page_pool) + gve_free_to_page_pool(rx, bs, false); + else + gve_free_qpl_page_dqo(bs); } } @@ -138,9 +140,11 @@ void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, for (i = 0; i < rx->dqo.num_buf_states; i++) { struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; - /* Only free page for RDA. QPL pages are freed in gve_main. */ - if (bs->page_info.page) - gve_free_page_dqo(priv, bs, !rx->dqo.qpl); + + if (rx->dqo.page_pool) + gve_free_to_page_pool(rx, bs, false); + else + gve_free_qpl_page_dqo(bs); } if (rx->dqo.qpl) { @@ -167,6 +171,11 @@ void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, kvfree(rx->dqo.buf_states); rx->dqo.buf_states = NULL; + if (rx->dqo.page_pool) { + page_pool_destroy(rx->dqo.page_pool); + rx->dqo.page_pool = NULL; + } + gve_rx_free_hdr_bufs(priv, rx); netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); @@ -199,6 +208,7 @@ int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx) { struct device *hdev = &priv->pdev->dev; + struct page_pool *pool; int qpl_page_cnt; size_t size; u32 qpl_id; @@ -212,8 +222,7 @@ int gve_rx_alloc_ring_dqo(struct gve_priv *priv, rx->gve = priv; rx->q_num = idx; - rx->dqo.num_buf_states = cfg->raw_addressing ? - min_t(s16, S16_MAX, buffer_queue_slots * 4) : + rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots : gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states, sizeof(rx->dqo.buf_states[0]), @@ -241,7 +250,13 @@ int gve_rx_alloc_ring_dqo(struct gve_priv *priv, if (!rx->dqo.bufq.desc_ring) goto err; - if (!cfg->raw_addressing) { + if (cfg->raw_addressing) { + pool = gve_rx_create_page_pool(priv, rx); + if (IS_ERR(pool)) + goto err; + + rx->dqo.page_pool = pool; + } else { qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); @@ -338,26 +353,14 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots); while (num_posted < num_avail_slots) { struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail]; - struct gve_rx_buf_state_dqo *buf_state; - - buf_state = gve_get_recycled_buf_state(rx); - if (unlikely(!buf_state)) { - buf_state = gve_alloc_buf_state(rx); - if (unlikely(!buf_state)) - break; - - if (unlikely(gve_alloc_page_dqo(rx, buf_state))) { - u64_stats_update_begin(&rx->statss); - rx->rx_buf_alloc_fail++; - u64_stats_update_end(&rx->statss); - gve_free_buf_state(rx, buf_state); - break; - } + + if (unlikely(gve_alloc_buffer(rx, desc))) { + u64_stats_update_begin(&rx->statss); + rx->rx_buf_alloc_fail++; + u64_stats_update_end(&rx->statss); + break; } - desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); - desc->buf_addr = cpu_to_le64(buf_state->addr + - buf_state->page_info.page_offset); if (rx->dqo.hdr_bufs.data) desc->header_buf_addr = cpu_to_le64(rx->dqo.hdr_bufs.addr + @@ -488,6 +491,9 @@ static int gve_rx_append_frags(struct napi_struct *napi, if (!skb) return -1; + if (rx->dqo.page_pool) + skb_mark_for_recycle(skb); + if (rx->ctx.skb_tail == rx->ctx.skb_head) skb_shinfo(rx->ctx.skb_head)->frag_list = skb; else @@ -498,7 +504,7 @@ static int gve_rx_append_frags(struct napi_struct *napi, if (rx->ctx.skb_tail != rx->ctx.skb_head) { rx->ctx.skb_head->len += buf_len; rx->ctx.skb_head->data_len += buf_len; - rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo; + rx->ctx.skb_head->truesize += buf_state->page_info.buf_size; } /* Trigger ondemand page allocation if we are running low on buffers */ @@ -508,13 +514,8 @@ static int gve_rx_append_frags(struct napi_struct *napi, skb_add_rx_frag(rx->ctx.skb_tail, num_frags, buf_state->page_info.page, buf_state->page_info.page_offset, - buf_len, priv->data_buffer_size_dqo); - gve_dec_pagecnt_bias(&buf_state->page_info); - - /* Advances buffer page-offset if page is partially used. - * Marks buffer as used if page is full. - */ - gve_try_recycle_buf(priv, rx, buf_state); + buf_len, buf_state->page_info.buf_size); + gve_reuse_buffer(rx, buf_state); return 0; } @@ -548,8 +549,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, } if (unlikely(compl_desc->rx_error)) { - gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, - buf_state); + gve_free_buffer(rx, buf_state); return -EINVAL; } @@ -573,6 +573,9 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, if (unlikely(!rx->ctx.skb_head)) goto error; rx->ctx.skb_tail = rx->ctx.skb_head; + + if (rx->dqo.page_pool) + skb_mark_for_recycle(rx->ctx.skb_head); } else { unsplit = 1; } @@ -609,8 +612,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, rx->rx_copybreak_pkt++; u64_stats_update_end(&rx->statss); - gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, - buf_state); + gve_free_buffer(rx, buf_state); return 0; } @@ -625,16 +627,17 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, return 0; } + if (rx->dqo.page_pool) + skb_mark_for_recycle(rx->ctx.skb_head); + skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page, buf_state->page_info.page_offset, buf_len, - priv->data_buffer_size_dqo); - gve_dec_pagecnt_bias(&buf_state->page_info); - - gve_try_recycle_buf(priv, rx, buf_state); + buf_state->page_info.buf_size); + gve_reuse_buffer(rx, buf_state); return 0; error: - gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); + gve_free_buffer(rx, buf_state); return -ENOMEM; } -- 2.51.0 From 2e5e0932dff5dbda657de6f4b661cdab46cf7c1b Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Mon, 14 Oct 2024 13:21:08 -0700 Subject: [PATCH 12/16] gve: add support for basic queue stats Implement netdev_stats_ops to export basic per-queue stats. With page pool support for DQO added in the previous patches, rx-alloc-fail captures failures in page pool allocations as well since the rx_buf_alloc_fail stat tracked in the driver is incremented when gve_alloc_buffer returns error. Reviewed-by: Praveen Kaligineedi Reviewed-by: Willem de Bruijn Signed-off-by: Harshitha Ramamurthy Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20241014202108.1051963-4-pkaligineedi@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_main.c | 49 ++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 294ddcd0bf6c..e171ca248f9a 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -2561,6 +2561,54 @@ static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { .ndo_queue_stop = gve_rx_queue_stop, }; +static void gve_get_rx_queue_stats(struct net_device *dev, int idx, + struct netdev_queue_stats_rx *rx_stats) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *rx = &priv->rx[idx]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&rx->statss); + rx_stats->packets = rx->rpackets; + rx_stats->bytes = rx->rbytes; + rx_stats->alloc_fail = rx->rx_skb_alloc_fail + + rx->rx_buf_alloc_fail; + } while (u64_stats_fetch_retry(&rx->statss, start)); +} + +static void gve_get_tx_queue_stats(struct net_device *dev, int idx, + struct netdev_queue_stats_tx *tx_stats) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_tx_ring *tx = &priv->tx[idx]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&tx->statss); + tx_stats->packets = tx->pkt_done; + tx_stats->bytes = tx->bytes_done; + } while (u64_stats_fetch_retry(&tx->statss, start)); +} + +static void gve_get_base_stats(struct net_device *dev, + struct netdev_queue_stats_rx *rx, + struct netdev_queue_stats_tx *tx) +{ + rx->packets = 0; + rx->bytes = 0; + rx->alloc_fail = 0; + + tx->packets = 0; + tx->bytes = 0; +} + +static const struct netdev_stat_ops gve_stat_ops = { + .get_queue_stats_rx = gve_get_rx_queue_stats, + .get_queue_stats_tx = gve_get_tx_queue_stats, + .get_base_stats = gve_get_base_stats, +}; + static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { int max_tx_queues, max_rx_queues; @@ -2616,6 +2664,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev->ethtool_ops = &gve_ethtool_ops; dev->netdev_ops = &gve_netdev_ops; dev->queue_mgmt_ops = &gve_queue_mgmt_ops; + dev->stat_ops = &gve_stat_ops; /* Set default and supported features. * -- 2.51.0 From 09aec57d8379f14ffde566621b920d97cc0c46e1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 13:18:18 -0700 Subject: [PATCH 13/16] rtnetlink: Panic when __rtnl_register_many() fails for builtin callers. We will replace all rtnl_register() and rtnl_register_module() with rtnl_register_many(). Currently, rtnl_register() returns nothing and prints an error message when it fails to register a rtnetlink message type and handlers. The failure happens only when rtnl_register_internal() fails to allocate rtnl_msg_handlers[protocol][msgtype], but it's unlikely for built-in callers on boot time. rtnl_register_many() unwinds the previous successful registrations on failure and returns an error, but it will be useless for built-in callers, especially some subsystems that do not have the legacy ioctl() interface and do not work without rtnetlink. Instead of booting up without rtnetlink functionality, let's panic on failure for built-in rtnl_register_many() callers. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014201828.91221-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index edcb6d43723e..8f2cdb0de4a9 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -464,6 +464,10 @@ int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n) handler->msgtype, handler->doit, handler->dumpit, handler->flags); if (err) { + if (!handler->owner) + panic("Unable to register rtnetlink message " + "handlers, %pS\n", handlers); + __rtnl_unregister_many(handlers, i); break; } -- 2.51.0 From 181bc7875b71e75a49d75fb6f50915ef28ddcc49 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 13:18:19 -0700 Subject: [PATCH 14/16] rtnetlink: Use rtnl_register_many(). We will remove rtnl_register() in favour of rtnl_register_many(). When it succeeds, rtnl_register_many() guarantees all rtnetlink types in the passed array are supported, and there is no chance that a part of message types is not supported. Let's use rtnl_register_many() instead. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241014201828.91221-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 63 +++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8f2cdb0de4a9..0fbbfeb2cb50 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -6843,6 +6843,38 @@ static struct pernet_operations rtnetlink_net_ops = { .exit = rtnetlink_net_exit, }; +static const struct rtnl_msg_handler rtnetlink_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWLINK, .doit = rtnl_newlink}, + {.msgtype = RTM_DELLINK, .doit = rtnl_dellink}, + {.msgtype = RTM_GETLINK, .doit = rtnl_getlink, + .dumpit = rtnl_dump_ifinfo, .flags = RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, + {.msgtype = RTM_SETLINK, .doit = rtnl_setlink}, + {.msgtype = RTM_GETADDR, .dumpit = rtnl_dump_all}, + {.msgtype = RTM_GETROUTE, .dumpit = rtnl_dump_all}, + {.msgtype = RTM_GETNETCONF, .dumpit = rtnl_dump_all}, + {.msgtype = RTM_GETSTATS, .doit = rtnl_stats_get, + .dumpit = rtnl_stats_dump}, + {.msgtype = RTM_SETSTATS, .doit = rtnl_stats_set}, + {.msgtype = RTM_NEWLINKPROP, .doit = rtnl_newlinkprop}, + {.msgtype = RTM_DELLINKPROP, .doit = rtnl_dellinkprop}, + {.protocol = PF_BRIDGE, .msgtype = RTM_GETLINK, + .dumpit = rtnl_bridge_getlink}, + {.protocol = PF_BRIDGE, .msgtype = RTM_DELLINK, + .doit = rtnl_bridge_dellink}, + {.protocol = PF_BRIDGE, .msgtype = RTM_SETLINK, + .doit = rtnl_bridge_setlink}, + {.protocol = PF_BRIDGE, .msgtype = RTM_NEWNEIGH, .doit = rtnl_fdb_add}, + {.protocol = PF_BRIDGE, .msgtype = RTM_DELNEIGH, .doit = rtnl_fdb_del, + .flags = RTNL_FLAG_BULK_DEL_SUPPORTED}, + {.protocol = PF_BRIDGE, .msgtype = RTM_GETNEIGH, .doit = rtnl_fdb_get, + .dumpit = rtnl_fdb_dump}, + {.protocol = PF_BRIDGE, .msgtype = RTM_NEWMDB, .doit = rtnl_mdb_add}, + {.protocol = PF_BRIDGE, .msgtype = RTM_DELMDB, .doit = rtnl_mdb_del, + .flags = RTNL_FLAG_BULK_DEL_SUPPORTED}, + {.protocol = PF_BRIDGE, .msgtype = RTM_GETMDB, .doit = rtnl_mdb_get, + .dumpit = rtnl_mdb_dump}, +}; + void __init rtnetlink_init(void) { if (register_pernet_subsys(&rtnetlink_net_ops)) @@ -6850,34 +6882,5 @@ void __init rtnetlink_init(void) register_netdevice_notifier(&rtnetlink_dev_notifier); - rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, - rtnl_dump_ifinfo, RTNL_FLAG_DUMP_SPLIT_NLM_DONE); - rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0); - - rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, 0); - rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0); - rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0); - - rtnl_register(PF_UNSPEC, RTM_NEWLINKPROP, rtnl_newlinkprop, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELLINKPROP, rtnl_dellinkprop, NULL, 0); - - rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, - RTNL_FLAG_BULK_DEL_SUPPORTED); - rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0); - - rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0); - rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, 0); - - rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump, - 0); - rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0); - - rtnl_register(PF_BRIDGE, RTM_GETMDB, rtnl_mdb_get, rtnl_mdb_dump, 0); - rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, - RTNL_FLAG_BULK_DEL_SUPPORTED); + rtnl_register_many(rtnetlink_rtnl_msg_handlers); } -- 2.51.0 From d0d14aef50a6184426c5a05b9815fb2697d6d42c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 13:18:20 -0700 Subject: [PATCH 15/16] neighbour: Use rtnl_register_many(). We will remove rtnl_register() in favour of rtnl_register_many(). When it succeeds, rtnl_register_many() guarantees all rtnetlink types in the passed array are supported, and there is no chance that a part of message types is not supported. Let's use rtnl_register_many() instead. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014201828.91221-4-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 77b819cd995b..395ae1626eef 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3886,17 +3886,18 @@ EXPORT_SYMBOL(neigh_sysctl_unregister); #endif /* CONFIG_SYSCTL */ +static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWNEIGH, .doit = neigh_add}, + {.msgtype = RTM_DELNEIGH, .doit = neigh_delete}, + {.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info, + .flags = RTNL_FLAG_DUMP_UNLOCKED}, + {.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info}, + {.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set}, +}; + static int __init neigh_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, - RTNL_FLAG_DUMP_UNLOCKED); - - rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, - 0); - rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0); - + rtnl_register_many(neigh_rtnl_msg_handlers); return 0; } -- 2.51.0 From cc72bb03032568f034c9fb82c63ec847938d6b99 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 13:18:21 -0700 Subject: [PATCH 16/16] net: sched: Use rtnl_register_many(). We will remove rtnl_register() in favour of rtnl_register_many(). When it succeeds, rtnl_register_many() guarantees all rtnetlink types in the passed array are supported, and there is no chance that a part of message types is not supported. Let's use rtnl_register_many() instead. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20241014201828.91221-5-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/sched/act_api.c | 13 ++++++++----- net/sched/cls_api.c | 25 ++++++++++++++----------- net/sched/sch_api.c | 20 ++++++++++++-------- 3 files changed, 34 insertions(+), 24 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2714c4ed928e..5bbfb83ed600 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -2243,13 +2243,16 @@ out_module_put: return skb->len; } +static const struct rtnl_msg_handler tc_action_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWACTION, .doit = tc_ctl_action}, + {.msgtype = RTM_DELACTION, .doit = tc_ctl_action}, + {.msgtype = RTM_GETACTION, .doit = tc_ctl_action, + .dumpit = tc_dump_action}, +}; + static int __init tc_action_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action, - 0); - + rtnl_register_many(tc_action_rtnl_msg_handlers); return 0; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 17d97bbe890f..7637f979d689 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -4055,6 +4055,19 @@ static struct pernet_operations tcf_net_ops = { .size = sizeof(struct tcf_net), }; +static const struct rtnl_msg_handler tc_filter_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWTFILTER, .doit = tc_new_tfilter, + .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.msgtype = RTM_DELTFILTER, .doit = tc_del_tfilter, + .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.msgtype = RTM_GETTFILTER, .doit = tc_get_tfilter, + .dumpit = tc_dump_tfilter, .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.msgtype = RTM_NEWCHAIN, .doit = tc_ctl_chain}, + {.msgtype = RTM_DELCHAIN, .doit = tc_ctl_chain}, + {.msgtype = RTM_GETCHAIN, .doit = tc_ctl_chain, + .dumpit = tc_dump_chain}, +}; + static int __init tc_filter_init(void) { int err; @@ -4068,17 +4081,7 @@ static int __init tc_filter_init(void) goto err_register_pernet_subsys; xa_init_flags(&tcf_exts_miss_cookies_xa, XA_FLAGS_ALLOC1); - - rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, - RTNL_FLAG_DOIT_UNLOCKED); - rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, - RTNL_FLAG_DOIT_UNLOCKED); - rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter, - tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED); - rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain, - tc_dump_chain, 0); + rtnl_register_many(tc_filter_rtnl_msg_handlers); return 0; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2eefa4783879..da2da2ab858b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -2420,6 +2420,17 @@ static struct pernet_operations psched_net_ops = { DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper); #endif +static const struct rtnl_msg_handler psched_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWQDISC, .doit = tc_modify_qdisc}, + {.msgtype = RTM_DELQDISC, .doit = tc_get_qdisc}, + {.msgtype = RTM_GETQDISC, .doit = tc_get_qdisc, + .dumpit = tc_dump_qdisc}, + {.msgtype = RTM_NEWTCLASS, .doit = tc_ctl_tclass}, + {.msgtype = RTM_DELTCLASS, .doit = tc_ctl_tclass}, + {.msgtype = RTM_GETTCLASS, .doit = tc_ctl_tclass, + .dumpit = tc_dump_tclass}, +}; + static int __init pktsched_init(void) { int err; @@ -2438,14 +2449,7 @@ static int __init pktsched_init(void) register_qdisc(&mq_qdisc_ops); register_qdisc(&noqueue_qdisc_ops); - rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, - 0); - rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, - 0); + rtnl_register_many(psched_rtnl_msg_handlers); tc_wrapper_init(); -- 2.51.0