From ebdfae0d377b487eabb739c55a13a2ab29f21f36 Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Mon, 14 Oct 2024 13:21:07 -0700 Subject: [PATCH 01/16] gve: adopt page pool for DQ RDA mode For DQ queue format in raw DMA addressing(RDA) mode, implement page pool recycling of buffers by leveraging a few helper functions. DQ QPL mode will continue to use the exisiting recycling logic. This is because in QPL mode, the pages come from a constant set of pages that the driver pre-allocates and registers with the device. Reviewed-by: Praveen Kaligineedi Reviewed-by: Shailend Chand Reviewed-by: Willem de Bruijn Signed-off-by: Harshitha Ramamurthy Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20241014202108.1051963-3-pkaligineedi@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/Kconfig | 1 + drivers/net/ethernet/google/gve/gve.h | 22 ++- .../ethernet/google/gve/gve_buffer_mgmt_dqo.c | 180 +++++++++++++----- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 89 ++++----- 4 files changed, 198 insertions(+), 94 deletions(-) diff --git a/drivers/net/ethernet/google/Kconfig b/drivers/net/ethernet/google/Kconfig index 8641a00f8e63..564862a57124 100644 --- a/drivers/net/ethernet/google/Kconfig +++ b/drivers/net/ethernet/google/Kconfig @@ -18,6 +18,7 @@ if NET_VENDOR_GOOGLE config GVE tristate "Google Virtual NIC (gVNIC) support" depends on (PCI_MSI && (X86 || CPU_LITTLE_ENDIAN)) + select PAGE_POOL help This driver supports Google Virtual NIC (gVNIC)" diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index bd684c7d996a..dd92949bb214 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "gve_desc.h" @@ -60,6 +61,8 @@ #define GVE_DEFAULT_RX_BUFFER_OFFSET 2048 +#define GVE_PAGE_POOL_SIZE_MULTIPLIER 4 + #define GVE_FLOW_RULES_CACHE_SIZE \ (GVE_ADMINQ_BUFFER_SIZE / sizeof(struct gve_adminq_queried_flow_rule)) #define GVE_FLOW_RULE_IDS_CACHE_SIZE \ @@ -102,6 +105,7 @@ struct gve_rx_slot_page_info { struct page *page; void *page_address; u32 page_offset; /* offset to write to in page */ + unsigned int buf_size; int pagecnt_bias; /* expected pagecnt if only the driver has a ref */ u16 pad; /* adjustment for rx padding */ u8 can_flip; /* tracks if the networking stack is using the page */ @@ -273,6 +277,8 @@ struct gve_rx_ring { /* Address info of the buffers for header-split */ struct gve_header_buf hdr_bufs; + + struct page_pool *page_pool; } dqo; }; @@ -1176,10 +1182,22 @@ struct gve_rx_buf_state_dqo *gve_dequeue_buf_state(struct gve_rx_ring *rx, void gve_enqueue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list, struct gve_rx_buf_state_dqo *buf_state); struct gve_rx_buf_state_dqo *gve_get_recycled_buf_state(struct gve_rx_ring *rx); -int gve_alloc_page_dqo(struct gve_rx_ring *rx, - struct gve_rx_buf_state_dqo *buf_state); void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state); +void gve_free_to_page_pool(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, + bool allow_direct); +int gve_alloc_qpl_page_dqo(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_free_qpl_page_dqo(struct gve_rx_buf_state_dqo *buf_state); +void gve_reuse_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_free_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc); +struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv, + struct gve_rx_ring *rx); + /* Reset */ void gve_schedule_reset(struct gve_priv *priv); int gve_reset(struct gve_priv *priv, bool attempt_teardown); diff --git a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c index 8e50f0e4bb2e..05bf1f80a79c 100644 --- a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c @@ -12,16 +12,6 @@ int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs) return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias; } -void gve_free_page_dqo(struct gve_priv *priv, struct gve_rx_buf_state_dqo *bs, - bool free_page) -{ - page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1); - if (free_page) - gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr, - DMA_FROM_DEVICE); - bs->page_info.page = NULL; -} - struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx) { struct gve_rx_buf_state_dqo *buf_state; @@ -128,56 +118,28 @@ struct gve_rx_buf_state_dqo *gve_get_recycled_buf_state(struct gve_rx_ring *rx) gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); } - /* For QPL, we cannot allocate any new buffers and must - * wait for the existing ones to be available. - */ - if (rx->dqo.qpl) - return NULL; - - /* If there are no free buf states discard an entry from - * `used_buf_states` so it can be used. - */ - if (unlikely(rx->dqo.free_buf_states == -1)) { - buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); - if (gve_buf_ref_cnt(buf_state) == 0) - return buf_state; - - gve_free_page_dqo(rx->gve, buf_state, true); - gve_free_buf_state(rx, buf_state); - } - return NULL; } -int gve_alloc_page_dqo(struct gve_rx_ring *rx, - struct gve_rx_buf_state_dqo *buf_state) +int gve_alloc_qpl_page_dqo(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) { struct gve_priv *priv = rx->gve; u32 idx; - if (!rx->dqo.qpl) { - int err; - - err = gve_alloc_page(priv, &priv->pdev->dev, - &buf_state->page_info.page, - &buf_state->addr, - DMA_FROM_DEVICE, GFP_ATOMIC); - if (err) - return err; - } else { - idx = rx->dqo.next_qpl_page_idx; - if (idx >= gve_get_rx_pages_per_qpl_dqo(priv->rx_desc_cnt)) { - net_err_ratelimited("%s: Out of QPL pages\n", - priv->dev->name); - return -ENOMEM; - } - buf_state->page_info.page = rx->dqo.qpl->pages[idx]; - buf_state->addr = rx->dqo.qpl->page_buses[idx]; - rx->dqo.next_qpl_page_idx++; + idx = rx->dqo.next_qpl_page_idx; + if (idx >= gve_get_rx_pages_per_qpl_dqo(priv->rx_desc_cnt)) { + net_err_ratelimited("%s: Out of QPL pages\n", + priv->dev->name); + return -ENOMEM; } + buf_state->page_info.page = rx->dqo.qpl->pages[idx]; + buf_state->addr = rx->dqo.qpl->page_buses[idx]; + rx->dqo.next_qpl_page_idx++; buf_state->page_info.page_offset = 0; buf_state->page_info.page_address = page_address(buf_state->page_info.page); + buf_state->page_info.buf_size = priv->data_buffer_size_dqo; buf_state->last_single_ref_offset = 0; /* The page already has 1 ref. */ @@ -187,6 +149,16 @@ int gve_alloc_page_dqo(struct gve_rx_ring *rx, return 0; } +void gve_free_qpl_page_dqo(struct gve_rx_buf_state_dqo *buf_state) +{ + if (!buf_state->page_info.page) + return; + + page_ref_sub(buf_state->page_info.page, + buf_state->page_info.pagecnt_bias - 1); + buf_state->page_info.page = NULL; +} + void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state) { @@ -228,3 +200,113 @@ mark_used: gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); rx->dqo.used_buf_states_cnt++; } + +void gve_free_to_page_pool(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, + bool allow_direct) +{ + struct page *page = buf_state->page_info.page; + + if (!page) + return; + + page_pool_put_page(page->pp, page, buf_state->page_info.buf_size, + allow_direct); + buf_state->page_info.page = NULL; +} + +static int gve_alloc_from_page_pool(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + struct gve_priv *priv = rx->gve; + struct page *page; + + buf_state->page_info.buf_size = priv->data_buffer_size_dqo; + page = page_pool_alloc(rx->dqo.page_pool, + &buf_state->page_info.page_offset, + &buf_state->page_info.buf_size, GFP_ATOMIC); + + if (!page) + return -ENOMEM; + + buf_state->page_info.page = page; + buf_state->page_info.page_address = page_address(page); + buf_state->addr = page_pool_get_dma_addr(page); + + return 0; +} + +struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv, + struct gve_rx_ring *rx) +{ + u32 ntfy_id = gve_rx_idx_to_ntfy(priv, rx->q_num); + struct page_pool_params pp = { + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .order = 0, + .pool_size = GVE_PAGE_POOL_SIZE_MULTIPLIER * priv->rx_desc_cnt, + .dev = &priv->pdev->dev, + .netdev = priv->dev, + .napi = &priv->ntfy_blocks[ntfy_id].napi, + .max_len = PAGE_SIZE, + .dma_dir = DMA_FROM_DEVICE, + }; + + return page_pool_create(&pp); +} + +void gve_free_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + if (rx->dqo.page_pool) { + gve_free_to_page_pool(rx, buf_state, true); + gve_free_buf_state(rx, buf_state); + } else { + gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, + buf_state); + } +} + +void gve_reuse_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + if (rx->dqo.page_pool) { + buf_state->page_info.page = NULL; + gve_free_buf_state(rx, buf_state); + } else { + gve_dec_pagecnt_bias(&buf_state->page_info); + gve_try_recycle_buf(rx->gve, rx, buf_state); + } +} + +int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc) +{ + struct gve_rx_buf_state_dqo *buf_state; + + if (rx->dqo.page_pool) { + buf_state = gve_alloc_buf_state(rx); + if (WARN_ON_ONCE(!buf_state)) + return -ENOMEM; + + if (gve_alloc_from_page_pool(rx, buf_state)) + goto free_buf_state; + } else { + buf_state = gve_get_recycled_buf_state(rx); + if (unlikely(!buf_state)) { + buf_state = gve_alloc_buf_state(rx); + if (unlikely(!buf_state)) + return -ENOMEM; + + if (unlikely(gve_alloc_qpl_page_dqo(rx, buf_state))) + goto free_buf_state; + } + } + desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); + desc->buf_addr = cpu_to_le64(buf_state->addr + + buf_state->page_info.page_offset); + + return 0; + +free_buf_state: + gve_free_buf_state(rx, buf_state); + return -ENOMEM; +} diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index b343be2fb118..8ac0047f1ada 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -95,8 +95,10 @@ static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx) for (i = 0; i < rx->dqo.num_buf_states; i++) { struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; - if (bs->page_info.page) - gve_free_page_dqo(priv, bs, !rx->dqo.qpl); + if (rx->dqo.page_pool) + gve_free_to_page_pool(rx, bs, false); + else + gve_free_qpl_page_dqo(bs); } } @@ -138,9 +140,11 @@ void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, for (i = 0; i < rx->dqo.num_buf_states; i++) { struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; - /* Only free page for RDA. QPL pages are freed in gve_main. */ - if (bs->page_info.page) - gve_free_page_dqo(priv, bs, !rx->dqo.qpl); + + if (rx->dqo.page_pool) + gve_free_to_page_pool(rx, bs, false); + else + gve_free_qpl_page_dqo(bs); } if (rx->dqo.qpl) { @@ -167,6 +171,11 @@ void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, kvfree(rx->dqo.buf_states); rx->dqo.buf_states = NULL; + if (rx->dqo.page_pool) { + page_pool_destroy(rx->dqo.page_pool); + rx->dqo.page_pool = NULL; + } + gve_rx_free_hdr_bufs(priv, rx); netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); @@ -199,6 +208,7 @@ int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx) { struct device *hdev = &priv->pdev->dev; + struct page_pool *pool; int qpl_page_cnt; size_t size; u32 qpl_id; @@ -212,8 +222,7 @@ int gve_rx_alloc_ring_dqo(struct gve_priv *priv, rx->gve = priv; rx->q_num = idx; - rx->dqo.num_buf_states = cfg->raw_addressing ? - min_t(s16, S16_MAX, buffer_queue_slots * 4) : + rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots : gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states, sizeof(rx->dqo.buf_states[0]), @@ -241,7 +250,13 @@ int gve_rx_alloc_ring_dqo(struct gve_priv *priv, if (!rx->dqo.bufq.desc_ring) goto err; - if (!cfg->raw_addressing) { + if (cfg->raw_addressing) { + pool = gve_rx_create_page_pool(priv, rx); + if (IS_ERR(pool)) + goto err; + + rx->dqo.page_pool = pool; + } else { qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); @@ -338,26 +353,14 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots); while (num_posted < num_avail_slots) { struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail]; - struct gve_rx_buf_state_dqo *buf_state; - - buf_state = gve_get_recycled_buf_state(rx); - if (unlikely(!buf_state)) { - buf_state = gve_alloc_buf_state(rx); - if (unlikely(!buf_state)) - break; - - if (unlikely(gve_alloc_page_dqo(rx, buf_state))) { - u64_stats_update_begin(&rx->statss); - rx->rx_buf_alloc_fail++; - u64_stats_update_end(&rx->statss); - gve_free_buf_state(rx, buf_state); - break; - } + + if (unlikely(gve_alloc_buffer(rx, desc))) { + u64_stats_update_begin(&rx->statss); + rx->rx_buf_alloc_fail++; + u64_stats_update_end(&rx->statss); + break; } - desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); - desc->buf_addr = cpu_to_le64(buf_state->addr + - buf_state->page_info.page_offset); if (rx->dqo.hdr_bufs.data) desc->header_buf_addr = cpu_to_le64(rx->dqo.hdr_bufs.addr + @@ -488,6 +491,9 @@ static int gve_rx_append_frags(struct napi_struct *napi, if (!skb) return -1; + if (rx->dqo.page_pool) + skb_mark_for_recycle(skb); + if (rx->ctx.skb_tail == rx->ctx.skb_head) skb_shinfo(rx->ctx.skb_head)->frag_list = skb; else @@ -498,7 +504,7 @@ static int gve_rx_append_frags(struct napi_struct *napi, if (rx->ctx.skb_tail != rx->ctx.skb_head) { rx->ctx.skb_head->len += buf_len; rx->ctx.skb_head->data_len += buf_len; - rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo; + rx->ctx.skb_head->truesize += buf_state->page_info.buf_size; } /* Trigger ondemand page allocation if we are running low on buffers */ @@ -508,13 +514,8 @@ static int gve_rx_append_frags(struct napi_struct *napi, skb_add_rx_frag(rx->ctx.skb_tail, num_frags, buf_state->page_info.page, buf_state->page_info.page_offset, - buf_len, priv->data_buffer_size_dqo); - gve_dec_pagecnt_bias(&buf_state->page_info); - - /* Advances buffer page-offset if page is partially used. - * Marks buffer as used if page is full. - */ - gve_try_recycle_buf(priv, rx, buf_state); + buf_len, buf_state->page_info.buf_size); + gve_reuse_buffer(rx, buf_state); return 0; } @@ -548,8 +549,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, } if (unlikely(compl_desc->rx_error)) { - gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, - buf_state); + gve_free_buffer(rx, buf_state); return -EINVAL; } @@ -573,6 +573,9 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, if (unlikely(!rx->ctx.skb_head)) goto error; rx->ctx.skb_tail = rx->ctx.skb_head; + + if (rx->dqo.page_pool) + skb_mark_for_recycle(rx->ctx.skb_head); } else { unsplit = 1; } @@ -609,8 +612,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, rx->rx_copybreak_pkt++; u64_stats_update_end(&rx->statss); - gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, - buf_state); + gve_free_buffer(rx, buf_state); return 0; } @@ -625,16 +627,17 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, return 0; } + if (rx->dqo.page_pool) + skb_mark_for_recycle(rx->ctx.skb_head); + skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page, buf_state->page_info.page_offset, buf_len, - priv->data_buffer_size_dqo); - gve_dec_pagecnt_bias(&buf_state->page_info); - - gve_try_recycle_buf(priv, rx, buf_state); + buf_state->page_info.buf_size); + gve_reuse_buffer(rx, buf_state); return 0; error: - gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); + gve_free_buffer(rx, buf_state); return -ENOMEM; } -- 2.51.0 From 2e5e0932dff5dbda657de6f4b661cdab46cf7c1b Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Mon, 14 Oct 2024 13:21:08 -0700 Subject: [PATCH 02/16] gve: add support for basic queue stats Implement netdev_stats_ops to export basic per-queue stats. With page pool support for DQO added in the previous patches, rx-alloc-fail captures failures in page pool allocations as well since the rx_buf_alloc_fail stat tracked in the driver is incremented when gve_alloc_buffer returns error. Reviewed-by: Praveen Kaligineedi Reviewed-by: Willem de Bruijn Signed-off-by: Harshitha Ramamurthy Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20241014202108.1051963-4-pkaligineedi@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_main.c | 49 ++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 294ddcd0bf6c..e171ca248f9a 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -2561,6 +2561,54 @@ static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { .ndo_queue_stop = gve_rx_queue_stop, }; +static void gve_get_rx_queue_stats(struct net_device *dev, int idx, + struct netdev_queue_stats_rx *rx_stats) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *rx = &priv->rx[idx]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&rx->statss); + rx_stats->packets = rx->rpackets; + rx_stats->bytes = rx->rbytes; + rx_stats->alloc_fail = rx->rx_skb_alloc_fail + + rx->rx_buf_alloc_fail; + } while (u64_stats_fetch_retry(&rx->statss, start)); +} + +static void gve_get_tx_queue_stats(struct net_device *dev, int idx, + struct netdev_queue_stats_tx *tx_stats) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_tx_ring *tx = &priv->tx[idx]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&tx->statss); + tx_stats->packets = tx->pkt_done; + tx_stats->bytes = tx->bytes_done; + } while (u64_stats_fetch_retry(&tx->statss, start)); +} + +static void gve_get_base_stats(struct net_device *dev, + struct netdev_queue_stats_rx *rx, + struct netdev_queue_stats_tx *tx) +{ + rx->packets = 0; + rx->bytes = 0; + rx->alloc_fail = 0; + + tx->packets = 0; + tx->bytes = 0; +} + +static const struct netdev_stat_ops gve_stat_ops = { + .get_queue_stats_rx = gve_get_rx_queue_stats, + .get_queue_stats_tx = gve_get_tx_queue_stats, + .get_base_stats = gve_get_base_stats, +}; + static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { int max_tx_queues, max_rx_queues; @@ -2616,6 +2664,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev->ethtool_ops = &gve_ethtool_ops; dev->netdev_ops = &gve_netdev_ops; dev->queue_mgmt_ops = &gve_queue_mgmt_ops; + dev->stat_ops = &gve_stat_ops; /* Set default and supported features. * -- 2.51.0 From 09aec57d8379f14ffde566621b920d97cc0c46e1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 13:18:18 -0700 Subject: [PATCH 03/16] rtnetlink: Panic when __rtnl_register_many() fails for builtin callers. We will replace all rtnl_register() and rtnl_register_module() with rtnl_register_many(). Currently, rtnl_register() returns nothing and prints an error message when it fails to register a rtnetlink message type and handlers. The failure happens only when rtnl_register_internal() fails to allocate rtnl_msg_handlers[protocol][msgtype], but it's unlikely for built-in callers on boot time. rtnl_register_many() unwinds the previous successful registrations on failure and returns an error, but it will be useless for built-in callers, especially some subsystems that do not have the legacy ioctl() interface and do not work without rtnetlink. Instead of booting up without rtnetlink functionality, let's panic on failure for built-in rtnl_register_many() callers. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014201828.91221-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index edcb6d43723e..8f2cdb0de4a9 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -464,6 +464,10 @@ int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n) handler->msgtype, handler->doit, handler->dumpit, handler->flags); if (err) { + if (!handler->owner) + panic("Unable to register rtnetlink message " + "handlers, %pS\n", handlers); + __rtnl_unregister_many(handlers, i); break; } -- 2.51.0 From 181bc7875b71e75a49d75fb6f50915ef28ddcc49 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 13:18:19 -0700 Subject: [PATCH 04/16] rtnetlink: Use rtnl_register_many(). We will remove rtnl_register() in favour of rtnl_register_many(). When it succeeds, rtnl_register_many() guarantees all rtnetlink types in the passed array are supported, and there is no chance that a part of message types is not supported. Let's use rtnl_register_many() instead. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241014201828.91221-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 63 +++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8f2cdb0de4a9..0fbbfeb2cb50 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -6843,6 +6843,38 @@ static struct pernet_operations rtnetlink_net_ops = { .exit = rtnetlink_net_exit, }; +static const struct rtnl_msg_handler rtnetlink_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWLINK, .doit = rtnl_newlink}, + {.msgtype = RTM_DELLINK, .doit = rtnl_dellink}, + {.msgtype = RTM_GETLINK, .doit = rtnl_getlink, + .dumpit = rtnl_dump_ifinfo, .flags = RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, + {.msgtype = RTM_SETLINK, .doit = rtnl_setlink}, + {.msgtype = RTM_GETADDR, .dumpit = rtnl_dump_all}, + {.msgtype = RTM_GETROUTE, .dumpit = rtnl_dump_all}, + {.msgtype = RTM_GETNETCONF, .dumpit = rtnl_dump_all}, + {.msgtype = RTM_GETSTATS, .doit = rtnl_stats_get, + .dumpit = rtnl_stats_dump}, + {.msgtype = RTM_SETSTATS, .doit = rtnl_stats_set}, + {.msgtype = RTM_NEWLINKPROP, .doit = rtnl_newlinkprop}, + {.msgtype = RTM_DELLINKPROP, .doit = rtnl_dellinkprop}, + {.protocol = PF_BRIDGE, .msgtype = RTM_GETLINK, + .dumpit = rtnl_bridge_getlink}, + {.protocol = PF_BRIDGE, .msgtype = RTM_DELLINK, + .doit = rtnl_bridge_dellink}, + {.protocol = PF_BRIDGE, .msgtype = RTM_SETLINK, + .doit = rtnl_bridge_setlink}, + {.protocol = PF_BRIDGE, .msgtype = RTM_NEWNEIGH, .doit = rtnl_fdb_add}, + {.protocol = PF_BRIDGE, .msgtype = RTM_DELNEIGH, .doit = rtnl_fdb_del, + .flags = RTNL_FLAG_BULK_DEL_SUPPORTED}, + {.protocol = PF_BRIDGE, .msgtype = RTM_GETNEIGH, .doit = rtnl_fdb_get, + .dumpit = rtnl_fdb_dump}, + {.protocol = PF_BRIDGE, .msgtype = RTM_NEWMDB, .doit = rtnl_mdb_add}, + {.protocol = PF_BRIDGE, .msgtype = RTM_DELMDB, .doit = rtnl_mdb_del, + .flags = RTNL_FLAG_BULK_DEL_SUPPORTED}, + {.protocol = PF_BRIDGE, .msgtype = RTM_GETMDB, .doit = rtnl_mdb_get, + .dumpit = rtnl_mdb_dump}, +}; + void __init rtnetlink_init(void) { if (register_pernet_subsys(&rtnetlink_net_ops)) @@ -6850,34 +6882,5 @@ void __init rtnetlink_init(void) register_netdevice_notifier(&rtnetlink_dev_notifier); - rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, - rtnl_dump_ifinfo, RTNL_FLAG_DUMP_SPLIT_NLM_DONE); - rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0); - - rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, 0); - rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0); - rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0); - - rtnl_register(PF_UNSPEC, RTM_NEWLINKPROP, rtnl_newlinkprop, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELLINKPROP, rtnl_dellinkprop, NULL, 0); - - rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, - RTNL_FLAG_BULK_DEL_SUPPORTED); - rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0); - - rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0); - rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, 0); - - rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump, - 0); - rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0); - - rtnl_register(PF_BRIDGE, RTM_GETMDB, rtnl_mdb_get, rtnl_mdb_dump, 0); - rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, - RTNL_FLAG_BULK_DEL_SUPPORTED); + rtnl_register_many(rtnetlink_rtnl_msg_handlers); } -- 2.51.0 From d0d14aef50a6184426c5a05b9815fb2697d6d42c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 13:18:20 -0700 Subject: [PATCH 05/16] neighbour: Use rtnl_register_many(). We will remove rtnl_register() in favour of rtnl_register_many(). When it succeeds, rtnl_register_many() guarantees all rtnetlink types in the passed array are supported, and there is no chance that a part of message types is not supported. Let's use rtnl_register_many() instead. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014201828.91221-4-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 77b819cd995b..395ae1626eef 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3886,17 +3886,18 @@ EXPORT_SYMBOL(neigh_sysctl_unregister); #endif /* CONFIG_SYSCTL */ +static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWNEIGH, .doit = neigh_add}, + {.msgtype = RTM_DELNEIGH, .doit = neigh_delete}, + {.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info, + .flags = RTNL_FLAG_DUMP_UNLOCKED}, + {.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info}, + {.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set}, +}; + static int __init neigh_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, - RTNL_FLAG_DUMP_UNLOCKED); - - rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, - 0); - rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0); - + rtnl_register_many(neigh_rtnl_msg_handlers); return 0; } -- 2.51.0 From cc72bb03032568f034c9fb82c63ec847938d6b99 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 13:18:21 -0700 Subject: [PATCH 06/16] net: sched: Use rtnl_register_many(). We will remove rtnl_register() in favour of rtnl_register_many(). When it succeeds, rtnl_register_many() guarantees all rtnetlink types in the passed array are supported, and there is no chance that a part of message types is not supported. Let's use rtnl_register_many() instead. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20241014201828.91221-5-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/sched/act_api.c | 13 ++++++++----- net/sched/cls_api.c | 25 ++++++++++++++----------- net/sched/sch_api.c | 20 ++++++++++++-------- 3 files changed, 34 insertions(+), 24 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2714c4ed928e..5bbfb83ed600 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -2243,13 +2243,16 @@ out_module_put: return skb->len; } +static const struct rtnl_msg_handler tc_action_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWACTION, .doit = tc_ctl_action}, + {.msgtype = RTM_DELACTION, .doit = tc_ctl_action}, + {.msgtype = RTM_GETACTION, .doit = tc_ctl_action, + .dumpit = tc_dump_action}, +}; + static int __init tc_action_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action, - 0); - + rtnl_register_many(tc_action_rtnl_msg_handlers); return 0; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 17d97bbe890f..7637f979d689 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -4055,6 +4055,19 @@ static struct pernet_operations tcf_net_ops = { .size = sizeof(struct tcf_net), }; +static const struct rtnl_msg_handler tc_filter_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWTFILTER, .doit = tc_new_tfilter, + .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.msgtype = RTM_DELTFILTER, .doit = tc_del_tfilter, + .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.msgtype = RTM_GETTFILTER, .doit = tc_get_tfilter, + .dumpit = tc_dump_tfilter, .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.msgtype = RTM_NEWCHAIN, .doit = tc_ctl_chain}, + {.msgtype = RTM_DELCHAIN, .doit = tc_ctl_chain}, + {.msgtype = RTM_GETCHAIN, .doit = tc_ctl_chain, + .dumpit = tc_dump_chain}, +}; + static int __init tc_filter_init(void) { int err; @@ -4068,17 +4081,7 @@ static int __init tc_filter_init(void) goto err_register_pernet_subsys; xa_init_flags(&tcf_exts_miss_cookies_xa, XA_FLAGS_ALLOC1); - - rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, - RTNL_FLAG_DOIT_UNLOCKED); - rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, - RTNL_FLAG_DOIT_UNLOCKED); - rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter, - tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED); - rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain, - tc_dump_chain, 0); + rtnl_register_many(tc_filter_rtnl_msg_handlers); return 0; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2eefa4783879..da2da2ab858b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -2420,6 +2420,17 @@ static struct pernet_operations psched_net_ops = { DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper); #endif +static const struct rtnl_msg_handler psched_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWQDISC, .doit = tc_modify_qdisc}, + {.msgtype = RTM_DELQDISC, .doit = tc_get_qdisc}, + {.msgtype = RTM_GETQDISC, .doit = tc_get_qdisc, + .dumpit = tc_dump_qdisc}, + {.msgtype = RTM_NEWTCLASS, .doit = tc_ctl_tclass}, + {.msgtype = RTM_DELTCLASS, .doit = tc_ctl_tclass}, + {.msgtype = RTM_GETTCLASS, .doit = tc_ctl_tclass, + .dumpit = tc_dump_tclass}, +}; + static int __init pktsched_init(void) { int err; @@ -2438,14 +2449,7 @@ static int __init pktsched_init(void) register_qdisc(&mq_qdisc_ops); register_qdisc(&noqueue_qdisc_ops); - rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, - 0); - rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, - 0); + rtnl_register_many(psched_rtnl_msg_handlers); tc_wrapper_init(); -- 2.51.0 From 803838a5f6c8d0f0cfc29e9eaa768ad88485ac7f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 13:18:22 -0700 Subject: [PATCH 07/16] net: Use rtnl_register_many(). We will remove rtnl_register() in favour of rtnl_register_many(). When it succeeds, rtnl_register_many() guarantees all rtnetlink types in the passed array are supported, and there is no chance that a part of message types is not supported. Let's use rtnl_register_many() instead. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014201828.91221-6-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/core/net_namespace.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 0a86aff17f51..809b48c0a528 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -1169,6 +1169,14 @@ static void __init netns_ipv4_struct_check(void) } #endif +static const struct rtnl_msg_handler net_ns_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWNSID, .doit = rtnl_net_newid, + .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.msgtype = RTM_GETNSID, .doit = rtnl_net_getid, + .dumpit = rtnl_net_dumpid, + .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, +}; + void __init net_ns_init(void) { struct net_generic *ng; @@ -1206,11 +1214,7 @@ void __init net_ns_init(void) if (register_pernet_subsys(&net_ns_ops)) panic("Could not register network namespace subsystems"); - rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, - RTNL_FLAG_DOIT_UNLOCKED); - rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, - RTNL_FLAG_DOIT_UNLOCKED | - RTNL_FLAG_DUMP_UNLOCKED); + rtnl_register_many(net_ns_rtnl_msg_handlers); } static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list) -- 2.51.0 From 465bac91f953d343f5906db1d5f2d58e31b9ab4f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 13:18:23 -0700 Subject: [PATCH 08/16] ipv4: Use rtnl_register_many(). We will remove rtnl_register() in favour of rtnl_register_many(). When it succeeds, rtnl_register_many() guarantees all rtnetlink types in the passed array are supported, and there is no chance that a part of message types is not supported. Let's use rtnl_register_many() instead. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014201828.91221-7-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/core/fib_rules.c | 17 ++++++++++------- net/ipv4/devinet.c | 18 +++++++++++------- net/ipv4/fib_frontend.c | 14 ++++++++++---- net/ipv4/nexthop.c | 31 ++++++++++++++++++------------- net/ipv4/route.c | 8 ++++++-- 5 files changed, 55 insertions(+), 33 deletions(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 82ef090c0037..d0de9677f450 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -1291,13 +1291,18 @@ static struct pernet_operations fib_rules_net_ops = { .exit = fib_rules_net_exit, }; +static const struct rtnl_msg_handler fib_rules_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWRULE, .doit = fib_nl_newrule}, + {.msgtype = RTM_DELRULE, .doit = fib_nl_delrule}, + {.msgtype = RTM_GETRULE, .dumpit = fib_nl_dumprule, + .flags = RTNL_FLAG_DUMP_UNLOCKED}, +}; + static int __init fib_rules_init(void) { int err; - rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, - RTNL_FLAG_DUMP_UNLOCKED); + + rtnl_register_many(fib_rules_rtnl_msg_handlers); err = register_pernet_subsys(&fib_rules_net_ops); if (err < 0) @@ -1312,9 +1317,7 @@ static int __init fib_rules_init(void) fail_unregister: unregister_pernet_subsys(&fib_rules_net_ops); fail: - rtnl_unregister(PF_UNSPEC, RTM_NEWRULE); - rtnl_unregister(PF_UNSPEC, RTM_DELRULE); - rtnl_unregister(PF_UNSPEC, RTM_GETRULE); + rtnl_unregister_many(fib_rules_rtnl_msg_handlers); return err; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7c156f85b7d2..d81fff93d208 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2797,6 +2797,16 @@ static struct rtnl_af_ops inet_af_ops __read_mostly = { .set_link_af = inet_set_link_af, }; +static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = { + {.protocol = PF_INET, .msgtype = RTM_NEWADDR, .doit = inet_rtm_newaddr}, + {.protocol = PF_INET, .msgtype = RTM_DELADDR, .doit = inet_rtm_deladdr}, + {.protocol = PF_INET, .msgtype = RTM_GETADDR, .dumpit = inet_dump_ifaddr, + .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, + {.protocol = PF_INET, .msgtype = RTM_GETNETCONF, + .doit = inet_netconf_get_devconf, .dumpit = inet_netconf_dump_devconf, + .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, +}; + void __init devinet_init(void) { register_pernet_subsys(&devinet_ops); @@ -2804,11 +2814,5 @@ void __init devinet_init(void) rtnl_af_register(&inet_af_ops); - rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0); - rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0); - rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, - RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE); - rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, - inet_netconf_dump_devconf, - RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED); + rtnl_register_many(devinet_rtnl_msg_handlers); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8353518b110a..53bd26315df5 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1649,6 +1649,15 @@ static struct pernet_operations fib_net_ops = { .exit_batch = fib_net_exit_batch, }; +static const struct rtnl_msg_handler fib_rtnl_msg_handlers[] __initconst = { + {.protocol = PF_INET, .msgtype = RTM_NEWROUTE, + .doit = inet_rtm_newroute}, + {.protocol = PF_INET, .msgtype = RTM_DELROUTE, + .doit = inet_rtm_delroute}, + {.protocol = PF_INET, .msgtype = RTM_GETROUTE, .dumpit = inet_dump_fib, + .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, +}; + void __init ip_fib_init(void) { fib_trie_init(); @@ -1658,8 +1667,5 @@ void __init ip_fib_init(void) register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); - rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0); - rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0); - rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, - RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE); + rtnl_register_many(fib_rtnl_msg_handlers); } diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 93aaea0006ba..570e450e008c 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -4042,25 +4042,30 @@ static struct pernet_operations nexthop_net_ops = { .exit_batch_rtnl = nexthop_net_exit_batch_rtnl, }; +static const struct rtnl_msg_handler nexthop_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_NEWNEXTHOP, .doit = rtm_new_nexthop}, + {.msgtype = RTM_DELNEXTHOP, .doit = rtm_del_nexthop}, + {.msgtype = RTM_GETNEXTHOP, .doit = rtm_get_nexthop, + .dumpit = rtm_dump_nexthop}, + {.msgtype = RTM_GETNEXTHOPBUCKET, .doit = rtm_get_nexthop_bucket, + .dumpit = rtm_dump_nexthop_bucket}, + {.protocol = PF_INET, .msgtype = RTM_NEWNEXTHOP, + .doit = rtm_new_nexthop}, + {.protocol = PF_INET, .msgtype = RTM_GETNEXTHOP, + .dumpit = rtm_dump_nexthop}, + {.protocol = PF_INET6, .msgtype = RTM_NEWNEXTHOP, + .doit = rtm_new_nexthop}, + {.protocol = PF_INET6, .msgtype = RTM_GETNEXTHOP, + .dumpit = rtm_dump_nexthop}, +}; + static int __init nexthop_init(void) { register_pernet_subsys(&nexthop_net_ops); register_netdevice_notifier(&nh_netdev_notifier); - rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop, - rtm_dump_nexthop, 0); - - rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); - rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); - - rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); - rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); - - rtnl_register(PF_UNSPEC, RTM_GETNEXTHOPBUCKET, rtm_get_nexthop_bucket, - rtm_dump_nexthop_bucket, 0); + rtnl_register_many(nexthop_rtnl_msg_handlers); return 0; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a0b091a7df87..18a08b4f4a5a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3632,6 +3632,11 @@ static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; #endif /* CONFIG_IP_ROUTE_CLASSID */ +static const struct rtnl_msg_handler ip_rt_rtnl_msg_handlers[] __initconst = { + {.protocol = PF_INET, .msgtype = RTM_GETROUTE, + .doit = inet_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED}, +}; + int __init ip_rt_init(void) { void *idents_hash; @@ -3689,8 +3694,7 @@ int __init ip_rt_init(void) xfrm_init(); xfrm4_init(); #endif - rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, - RTNL_FLAG_DOIT_UNLOCKED); + rtnl_register_many(ip_rt_rtnl_msg_handlers); #ifdef CONFIG_SYSCTL register_pernet_subsys(&sysctl_route_ops); -- 2.51.0 From a37b0e4eca0436ebc17d512d70b1409956340688 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 13:18:24 -0700 Subject: [PATCH 09/16] ipv6: Use rtnl_register_many(). We will remove rtnl_register_module() in favour of rtnl_register_many(). rtnl_register_many() will unwind the previous successful registrations on failure and simplify module error handling. Let's use rtnl_register_many() instead. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014201828.91221-8-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 57 ++++++++++++++++++-------------------------- net/ipv6/addrlabel.c | 28 +++++++++------------- net/ipv6/ip6_fib.c | 10 +++++--- net/ipv6/route.c | 23 ++++++++---------- 4 files changed, 51 insertions(+), 67 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f31528d4f694..ac8645ad2537 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7406,6 +7406,27 @@ static struct rtnl_af_ops inet6_ops __read_mostly = { .set_link_af = inet6_set_link_af, }; +static const struct rtnl_msg_handler addrconf_rtnl_msg_handlers[] __initconst_or_module = { + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETLINK, + .dumpit = inet6_dump_ifinfo, .flags = RTNL_FLAG_DUMP_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWADDR, + .doit = inet6_rtm_newaddr}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELADDR, + .doit = inet6_rtm_deladdr}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETADDR, + .doit = inet6_rtm_getaddr, .dumpit = inet6_dump_ifaddr, + .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETMULTICAST, + .dumpit = inet6_dump_ifmcaddr, + .flags = RTNL_FLAG_DUMP_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETANYCAST, + .dumpit = inet6_dump_ifacaddr, + .flags = RTNL_FLAG_DUMP_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETNETCONF, + .doit = inet6_netconf_get_devconf, .dumpit = inet6_netconf_dump_devconf, + .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, +}; + /* * Init / cleanup code */ @@ -7449,42 +7470,10 @@ int __init addrconf_init(void) rtnl_af_register(&inet6_ops); - err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK, - NULL, inet6_dump_ifinfo, RTNL_FLAG_DUMP_UNLOCKED); - if (err < 0) + err = rtnl_register_many(addrconf_rtnl_msg_handlers); + if (err) goto errout; - err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDR, - inet6_rtm_newaddr, NULL, 0); - if (err < 0) - goto errout; - err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDR, - inet6_rtm_deladdr, NULL, 0); - if (err < 0) - goto errout; - err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR, - inet6_rtm_getaddr, inet6_dump_ifaddr, - RTNL_FLAG_DOIT_UNLOCKED | - RTNL_FLAG_DUMP_UNLOCKED); - if (err < 0) - goto errout; - err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST, - NULL, inet6_dump_ifmcaddr, - RTNL_FLAG_DUMP_UNLOCKED); - if (err < 0) - goto errout; - err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST, - NULL, inet6_dump_ifacaddr, - RTNL_FLAG_DUMP_UNLOCKED); - if (err < 0) - goto errout; - err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF, - inet6_netconf_get_devconf, - inet6_netconf_dump_devconf, - RTNL_FLAG_DOIT_UNLOCKED | - RTNL_FLAG_DUMP_UNLOCKED); - if (err < 0) - goto errout; err = ipv6_addr_label_rtnl_register(); if (err < 0) goto errout; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index acd70b5992a7..ab054f329e12 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -634,23 +634,17 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, return err; } +static const struct rtnl_msg_handler ipv6_adddr_label_rtnl_msg_handlers[] __initconst_or_module = { + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWADDRLABEL, + .doit = ip6addrlbl_newdel, .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELADDRLABEL, + .doit = ip6addrlbl_newdel, .flags = RTNL_FLAG_DOIT_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETADDRLABEL, + .doit = ip6addrlbl_get, .dumpit = ip6addrlbl_dump, + .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, +}; + int __init ipv6_addr_label_rtnl_register(void) { - int ret; - - ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDRLABEL, - ip6addrlbl_newdel, - NULL, RTNL_FLAG_DOIT_UNLOCKED); - if (ret < 0) - return ret; - ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDRLABEL, - ip6addrlbl_newdel, - NULL, RTNL_FLAG_DOIT_UNLOCKED); - if (ret < 0) - return ret; - ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDRLABEL, - ip6addrlbl_get, - ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED | - RTNL_FLAG_DUMP_UNLOCKED); - return ret; + return rtnl_register_many(ipv6_adddr_label_rtnl_msg_handlers); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index c9da10d971fa..6383263bfd04 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2493,6 +2493,12 @@ static struct pernet_operations fib6_net_ops = { .exit = fib6_net_exit, }; +static const struct rtnl_msg_handler fib6_rtnl_msg_handlers[] __initconst_or_module = { + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE, + .dumpit = inet6_dump_fib, + .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, +}; + int __init fib6_init(void) { int ret = -ENOMEM; @@ -2506,9 +2512,7 @@ int __init fib6_init(void) if (ret) goto out_kmem_cache_create; - ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL, - inet6_dump_fib, RTNL_FLAG_DUMP_UNLOCKED | - RTNL_FLAG_DUMP_SPLIT_NLM_DONE); + ret = rtnl_register_many(fib6_rtnl_msg_handlers); if (ret) goto out_unregister_subsys; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b4251915585f..d7ce5cf2017a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6680,6 +6680,15 @@ static void bpf_iter_unregister(void) #endif #endif +static const struct rtnl_msg_handler ip6_route_rtnl_msg_handlers[] __initconst_or_module = { + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWROUTE, + .doit = inet6_rtm_newroute}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELROUTE, + .doit = inet6_rtm_delroute}, + {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE, + .doit = inet6_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED}, +}; + int __init ip6_route_init(void) { int ret; @@ -6722,19 +6731,7 @@ int __init ip6_route_init(void) if (ret) goto fib6_rules_init; - ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE, - inet6_rtm_newroute, NULL, 0); - if (ret < 0) - goto out_register_late_subsys; - - ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE, - inet6_rtm_delroute, NULL, 0); - if (ret < 0) - goto out_register_late_subsys; - - ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, - inet6_rtm_getroute, NULL, - RTNL_FLAG_DOIT_UNLOCKED); + ret = rtnl_register_many(ip6_route_rtnl_msg_handlers); if (ret < 0) goto out_register_late_subsys; -- 2.51.0 From 3ac84e31b33e2051e59245b8ceb25d707fa0e553 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 13:18:25 -0700 Subject: [PATCH 10/16] ipmr: Use rtnl_register_many(). We will remove rtnl_register() and rtnl_register_module() in favour of rtnl_register_many(). When it succeeds for built-in callers, rtnl_register_many() guarantees all rtnetlink types in the passed array are supported, and there is no chance that a part of message types is not supported. Let's use rtnl_register_many() instead. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014201828.91221-9-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv4/ipmr.c | 22 +++++++++++++--------- net/ipv6/ip6mr.c | 13 +++++++++---- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7a95daeb1946..b4fc443481ce 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -3137,6 +3137,17 @@ static struct pernet_operations ipmr_net_ops = { .exit_batch = ipmr_net_exit_batch, }; +static const struct rtnl_msg_handler ipmr_rtnl_msg_handlers[] __initconst = { + {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETLINK, + .dumpit = ipmr_rtm_dumplink}, + {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_NEWROUTE, + .doit = ipmr_rtm_route}, + {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_DELROUTE, + .doit = ipmr_rtm_route}, + {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETROUTE, + .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute}, +}; + int __init ip_mr_init(void) { int err; @@ -3157,15 +3168,8 @@ int __init ip_mr_init(void) goto add_proto_fail; } #endif - rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, - ipmr_rtm_getroute, ipmr_rtm_dumproute, 0); - rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, - ipmr_rtm_route, NULL, 0); - rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, - ipmr_rtm_route, NULL, 0); - - rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK, - NULL, ipmr_rtm_dumplink, 0); + rtnl_register_many(ipmr_rtnl_msg_handlers); + return 0; #ifdef CONFIG_IP_PIMSM_V2 diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 9528e17665fd..437a9fdb67f5 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1367,6 +1367,12 @@ static struct pernet_operations ip6mr_net_ops = { .exit_batch = ip6mr_net_exit_batch, }; +static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = { + {.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR, + .msgtype = RTM_GETROUTE, + .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute}, +}; + int __init ip6_mr_init(void) { int err; @@ -1389,9 +1395,8 @@ int __init ip6_mr_init(void) goto add_proto_fail; } #endif - err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE, - ip6mr_rtm_getroute, ip6mr_rtm_dumproute, 0); - if (err == 0) + err = rtnl_register_many(ip6mr_rtnl_msg_handlers); + if (!err) return 0; #ifdef CONFIG_IPV6_PIMSM_V2 @@ -1408,7 +1413,7 @@ reg_pernet_fail: void ip6_mr_cleanup(void) { - rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE); + rtnl_unregister_many(ip6mr_rtnl_msg_handlers); #ifdef CONFIG_IPV6_PIMSM_V2 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); #endif -- 2.51.0 From c82b031dcb19d0c899c6e209c0ae8c0f3fffcd39 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 13:18:26 -0700 Subject: [PATCH 11/16] dcb: Use rtnl_register_many(). We will remove rtnl_register() in favour of rtnl_register_many(). When it succeeds, rtnl_register_many() guarantees all rtnetlink types in the passed array are supported, and there is no chance that a part of message types is not supported. Let's use rtnl_register_many() instead. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014201828.91221-10-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/dcb/dcbnl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 2e6b8c8fd2de..03eb1d941fca 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -2408,6 +2408,11 @@ static struct notifier_block dcbnl_nb __read_mostly = { .notifier_call = dcbnl_netdevice_event, }; +static const struct rtnl_msg_handler dcbnl_rtnl_msg_handlers[] __initconst = { + {.msgtype = RTM_GETDCB, .doit = dcb_doit}, + {.msgtype = RTM_SETDCB, .doit = dcb_doit}, +}; + static int __init dcbnl_init(void) { int err; @@ -2416,8 +2421,7 @@ static int __init dcbnl_init(void) if (err) return err; - rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0); + rtnl_register_many(dcbnl_rtnl_msg_handlers); return 0; } -- 2.51.0 From df96b8f45aa5808052088bbd2337f837784f06de Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 13:18:27 -0700 Subject: [PATCH 12/16] can: gw: Use rtnl_register_many(). We will remove rtnl_register_module() in favour of rtnl_register_many(). rtnl_register_many() will unwind the previous successful registrations on failure and simplify module error handling. Let's use rtnl_register_many() instead. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Marc Kleine-Budde Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014201828.91221-11-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/can/gw.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/net/can/gw.c b/net/can/gw.c index 37528826935e..ef93293c1fae 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -1265,6 +1265,15 @@ static struct pernet_operations cangw_pernet_ops = { .exit_batch = cangw_pernet_exit_batch, }; +static const struct rtnl_msg_handler cgw_rtnl_msg_handlers[] __initconst_or_module = { + {.owner = THIS_MODULE, .protocol = PF_CAN, .msgtype = RTM_NEWROUTE, + .doit = cgw_create_job}, + {.owner = THIS_MODULE, .protocol = PF_CAN, .msgtype = RTM_DELROUTE, + .doit = cgw_remove_job}, + {.owner = THIS_MODULE, .protocol = PF_CAN, .msgtype = RTM_GETROUTE, + .dumpit = cgw_dump_jobs}, +}; + static __init int cgw_module_init(void) { int ret; @@ -1290,27 +1299,13 @@ static __init int cgw_module_init(void) if (ret) goto out_register_notifier; - ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_GETROUTE, - NULL, cgw_dump_jobs, 0); - if (ret) - goto out_rtnl_register1; - - ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE, - cgw_create_job, NULL, 0); - if (ret) - goto out_rtnl_register2; - ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE, - cgw_remove_job, NULL, 0); + ret = rtnl_register_many(cgw_rtnl_msg_handlers); if (ret) - goto out_rtnl_register3; + goto out_rtnl_register; return 0; -out_rtnl_register3: - rtnl_unregister(PF_CAN, RTM_NEWROUTE); -out_rtnl_register2: - rtnl_unregister(PF_CAN, RTM_GETROUTE); -out_rtnl_register1: +out_rtnl_register: unregister_netdevice_notifier(¬ifier); out_register_notifier: kmem_cache_destroy(cgw_cache); -- 2.51.0 From e1c6c383123ab1caadbfe39b3362ce0cc09dd766 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 14 Oct 2024 13:18:28 -0700 Subject: [PATCH 13/16] rtnetlink: Remove rtnl_register() and rtnl_register_module(). No one uses rtnl_register() and rtnl_register_module(). Let's remove them. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014201828.91221-12-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/rtnetlink.h | 15 ++++++--- net/core/rtnetlink.c | 74 ++++++++++++----------------------------- 2 files changed, 31 insertions(+), 58 deletions(-) diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 2d3eb7cb4dff..bb49c5708ce7 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -29,6 +29,16 @@ static inline enum rtnl_kinds rtnl_msgtype_kind(int msgtype) return msgtype & RTNL_KIND_MASK; } +/** + * struct rtnl_msg_handler - rtnetlink message type and handlers + * + * @owner: NULL for built-in, THIS_MODULE for module + * @protocol: Protocol family or PF_UNSPEC + * @msgtype: rtnetlink message type + * @doit: Function pointer called for each request message + * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message + * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions + */ struct rtnl_msg_handler { struct module *owner; int protocol; @@ -38,11 +48,6 @@ struct rtnl_msg_handler { int flags; }; -void rtnl_register(int protocol, int msgtype, - rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); -int rtnl_register_module(struct module *owner, int protocol, int msgtype, - rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); -int rtnl_unregister(int protocol, int msgtype); void rtnl_unregister_all(int protocol); int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 0fbbfeb2cb50..a9c92392fb1d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -338,57 +338,6 @@ unlock: return ret; } -/** - * rtnl_register_module - Register a rtnetlink message type - * - * @owner: module registering the hook (THIS_MODULE) - * @protocol: Protocol family or PF_UNSPEC - * @msgtype: rtnetlink message type - * @doit: Function pointer called for each request message - * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message - * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions - * - * Like rtnl_register, but for use by removable modules. - */ -int rtnl_register_module(struct module *owner, - int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit, - unsigned int flags) -{ - return rtnl_register_internal(owner, protocol, msgtype, - doit, dumpit, flags); -} -EXPORT_SYMBOL_GPL(rtnl_register_module); - -/** - * rtnl_register - Register a rtnetlink message type - * @protocol: Protocol family or PF_UNSPEC - * @msgtype: rtnetlink message type - * @doit: Function pointer called for each request message - * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message - * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions - * - * Registers the specified function pointers (at least one of them has - * to be non-NULL) to be called whenever a request message for the - * specified protocol family and message type is received. - * - * The special protocol family PF_UNSPEC may be used to define fallback - * function pointers for the case when no entry for the specific protocol - * family exists. - */ -void rtnl_register(int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit, - unsigned int flags) -{ - int err; - - err = rtnl_register_internal(NULL, protocol, msgtype, doit, dumpit, - flags); - if (err) - pr_err("Unable to register rtnetlink message handler, " - "protocol = %d, message type = %d\n", protocol, msgtype); -} - /** * rtnl_unregister - Unregister a rtnetlink message type * @protocol: Protocol family or PF_UNSPEC @@ -396,7 +345,7 @@ void rtnl_register(int protocol, int msgtype, * * Returns 0 on success or a negative error code. */ -int rtnl_unregister(int protocol, int msgtype) +static int rtnl_unregister(int protocol, int msgtype) { struct rtnl_link __rcu **tab; struct rtnl_link *link; @@ -419,7 +368,6 @@ int rtnl_unregister(int protocol, int msgtype) return 0; } -EXPORT_SYMBOL_GPL(rtnl_unregister); /** * rtnl_unregister_all - Unregister all rtnetlink message type of a protocol @@ -454,6 +402,26 @@ void rtnl_unregister_all(int protocol) } EXPORT_SYMBOL_GPL(rtnl_unregister_all); +/** + * __rtnl_register_many - Register rtnetlink message types + * @handlers: Array of struct rtnl_msg_handlers + * @n: The length of @handlers + * + * Registers the specified function pointers (at least one of them has + * to be non-NULL) to be called whenever a request message for the + * specified protocol family and message type is received. + * + * The special protocol family PF_UNSPEC may be used to define fallback + * function pointers for the case when no entry for the specific protocol + * family exists. + * + * When one element of @handlers fails to register, + * 1) built-in: panics. + * 2) modules : the previous successful registrations are unwinded + * and an error is returned. + * + * Use rtnl_register_many(). + */ int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n) { const struct rtnl_msg_handler *handler; -- 2.51.0 From 081c9c0265c91b8333165aa6230c20bcbc6f7cbf Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Thu, 10 Oct 2024 14:07:16 +0100 Subject: [PATCH 14/16] net: phy: realtek: read duplex and gbit master from PHYSR register The PHYSR MMD register is present and defined equally for all RTL82xx Ethernet PHYs. Read duplex and Gbit master bits from rtlgen_decode_speed() and rename it to rtlgen_decode_physr(). Signed-off-by: Daniel Golle Link: https://patch.msgid.link/b9a76341da851a18c985bc4774fa295babec79bb.1728565530.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni --- drivers/net/phy/realtek.c | 41 +++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 166f6a728373..91656061ebfa 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -80,15 +80,18 @@ #define RTL822X_VND2_GANLPAR 0xa414 -#define RTL822X_VND2_PHYSR 0xa434 - #define RTL8366RB_POWER_SAVE 0x15 #define RTL8366RB_POWER_SAVE_ON BIT(12) #define RTL9000A_GINMR 0x14 #define RTL9000A_GINMR_LINK_STATUS BIT(4) -#define RTLGEN_SPEED_MASK 0x0630 +#define RTL_VND2_PHYSR 0xa434 +#define RTL_VND2_PHYSR_DUPLEX BIT(3) +#define RTL_VND2_PHYSR_SPEEDL GENMASK(5, 4) +#define RTL_VND2_PHYSR_SPEEDH GENMASK(10, 9) +#define RTL_VND2_PHYSR_MASTER BIT(11) +#define RTL_VND2_PHYSR_SPEED_MASK (RTL_VND2_PHYSR_SPEEDL | RTL_VND2_PHYSR_SPEEDH) #define RTL_GENERIC_PHYID 0x001cc800 #define RTL_8211FVD_PHYID 0x001cc878 @@ -660,9 +663,18 @@ static int rtl8366rb_config_init(struct phy_device *phydev) } /* get actual speed to cover the downshift case */ -static void rtlgen_decode_speed(struct phy_device *phydev, int val) +static void rtlgen_decode_physr(struct phy_device *phydev, int val) { - switch (val & RTLGEN_SPEED_MASK) { + /* bit 3 + * 0: Half Duplex + * 1: Full Duplex + */ + if (val & RTL_VND2_PHYSR_DUPLEX) + phydev->duplex = DUPLEX_FULL; + else + phydev->duplex = DUPLEX_HALF; + + switch (val & RTL_VND2_PHYSR_SPEED_MASK) { case 0x0000: phydev->speed = SPEED_10; break; @@ -684,6 +696,19 @@ static void rtlgen_decode_speed(struct phy_device *phydev, int val) default: break; } + + /* bit 11 + * 0: Slave Mode + * 1: Master Mode + */ + if (phydev->speed >= 1000) { + if (val & RTL_VND2_PHYSR_MASTER) + phydev->master_slave_state = MASTER_SLAVE_STATE_MASTER; + else + phydev->master_slave_state = MASTER_SLAVE_STATE_SLAVE; + } else { + phydev->master_slave_state = MASTER_SLAVE_STATE_UNSUPPORTED; + } } static int rtlgen_read_status(struct phy_device *phydev) @@ -701,7 +726,7 @@ static int rtlgen_read_status(struct phy_device *phydev) if (val < 0) return val; - rtlgen_decode_speed(phydev, val); + rtlgen_decode_physr(phydev, val); return 0; } @@ -1007,11 +1032,11 @@ static int rtl822x_c45_read_status(struct phy_device *phydev) return 0; /* Read actual speed from vendor register. */ - val = phy_read_mmd(phydev, MDIO_MMD_VEND2, RTL822X_VND2_PHYSR); + val = phy_read_mmd(phydev, MDIO_MMD_VEND2, RTL_VND2_PHYSR); if (val < 0) return val; - rtlgen_decode_speed(phydev, val); + rtlgen_decode_physr(phydev, val); return 0; } -- 2.51.0 From 68d5cd09e8919679ce13b85950debea4b2e98e04 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Thu, 10 Oct 2024 14:07:26 +0100 Subject: [PATCH 15/16] net: phy: realtek: change order of calls in C22 read_status() Always call rtlgen_read_status() first, so genphy_read_status() which is called by it clears bits in case auto-negotiation has not completed. Also clear 10GBT link-partner advertisement bits in case auto-negotiation is disabled or has not completed. Suggested-by: Russell King (Oracle) Signed-off-by: Daniel Golle Link: https://patch.msgid.link/b15929a41621d215c6b2b57393368086589569ec.1728565530.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni --- drivers/net/phy/realtek.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 91656061ebfa..dd4801d82a84 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -949,17 +949,25 @@ static void rtl822xb_update_interface(struct phy_device *phydev) static int rtl822x_read_status(struct phy_device *phydev) { - if (phydev->autoneg == AUTONEG_ENABLE) { - int lpadv = phy_read_paged(phydev, 0xa5d, 0x13); + int lpadv, ret; - if (lpadv < 0) - return lpadv; + ret = rtlgen_read_status(phydev); + if (ret < 0) + return ret; - mii_10gbt_stat_mod_linkmode_lpa_t(phydev->lp_advertising, - lpadv); + if (phydev->autoneg == AUTONEG_DISABLE || + !phydev->autoneg_complete) { + mii_10gbt_stat_mod_linkmode_lpa_t(phydev->lp_advertising, 0); + return 0; } - return rtlgen_read_status(phydev); + lpadv = phy_read_paged(phydev, 0xa5d, 0x13); + if (lpadv < 0) + return lpadv; + + mii_10gbt_stat_mod_linkmode_lpa_t(phydev->lp_advertising, lpadv); + + return 0; } static int rtl822xb_read_status(struct phy_device *phydev) -- 2.51.0 From 5cb409b3960e75467cbb0a8e1e5596b4490570e3 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Thu, 10 Oct 2024 14:07:39 +0100 Subject: [PATCH 16/16] net: phy: realtek: clear 1000Base-T link partner advertisement Clear 1000Base-T link partner advertisement bits in Clause-45 read_status() function in case auto-negotiation is disabled or has not been completed. Signed-off-by: Daniel Golle Link: https://patch.msgid.link/9dc9b47b2d675708afef3ad366bfd78eb584d958.1728565530.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni --- drivers/net/phy/realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index dd4801d82a84..905038b1bb64 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -1026,6 +1026,10 @@ static int rtl822x_c45_read_status(struct phy_device *phydev) if (ret < 0) return ret; + if (phydev->autoneg == AUTONEG_DISABLE || + !genphy_c45_aneg_done(phydev)) + mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, 0); + /* Vendor register as C45 has no standardized support for 1000BaseT */ if (phydev->autoneg == AUTONEG_ENABLE) { val = phy_read_mmd(phydev, MDIO_MMD_VEND2, -- 2.51.0