]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
net: napi: Make napi_defer_hard_irqs per-NAPI
authorJoe Damato <jdamato@fastly.com>
Fri, 11 Oct 2024 18:44:56 +0000 (18:44 +0000)
committerJakub Kicinski <kuba@kernel.org>
Tue, 15 Oct 2024 00:54:28 +0000 (17:54 -0700)
Add defer_hard_irqs to napi_struct in preparation for per-NAPI
settings.

The existing sysfs parameter is respected; writes to sysfs will write to
all NAPI structs for the device and the net_device defer_hard_irq field.
Reads from sysfs show the net_device field.

The ability to set defer_hard_irqs on specific NAPI instances will be
added in a later commit, via netdev-genl.

Signed-off-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20241011184527.16393-2-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Documentation/networking/net_cachelines/net_device.rst
include/linux/netdevice.h
net/core/dev.c
net/core/dev.h
net/core/net-sysfs.c

index 1b018ac35e9a63c311ae72a1afc04964728bdbee..5a7388b2ab6f0a568fd67ae4057006d81adf640d 100644 (file)
@@ -186,4 +186,5 @@ struct dpll_pin*                    dpll_pin
 struct hlist_head                   page_pools
 struct dim_irq_moder*               irq_moder
 u64                                 max_pacing_offload_horizon
+u32                                 napi_defer_hard_irqs
 =================================== =========================== =================== =================== ===================================================================================
index e6b93d01e631437a2cc7066e9ecec50345f6fef8..2e7bc23660ecc490dd2b1cee9b91f10c91c224f6 100644 (file)
@@ -373,6 +373,7 @@ struct napi_struct {
        unsigned int            napi_id;
        struct hrtimer          timer;
        struct task_struct      *thread;
+       u32                     defer_hard_irqs;
        /* control-path-only fields follow */
        struct list_head        dev_list;
        struct hlist_node       napi_hash_node;
@@ -2085,7 +2086,6 @@ struct net_device {
        unsigned int            real_num_rx_queues;
        struct netdev_rx_queue  *_rx;
        unsigned long           gro_flush_timeout;
-       u32                     napi_defer_hard_irqs;
        unsigned int            gro_max_size;
        unsigned int            gro_ipv4_max_size;
        rx_handler_func_t __rcu *rx_handler;
@@ -2413,6 +2413,7 @@ struct net_device {
        struct dim_irq_moder    *irq_moder;
 
        u64                     max_pacing_offload_horizon;
+       u32                     napi_defer_hard_irqs;
 
        /**
         * @lock: protects @net_shaper_hierarchy, feel free to use for other
index b590eefce3b41379d86edcfa8315ee316dbe8fce..fbaa9eabf77f80a9703c4a3e4ef3afec4278ae56 100644 (file)
@@ -6233,7 +6233,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
        if (work_done) {
                if (n->gro_bitmask)
                        timeout = READ_ONCE(n->dev->gro_flush_timeout);
-               n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs);
+               n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n);
        }
        if (n->defer_hard_irqs_count > 0) {
                n->defer_hard_irqs_count--;
@@ -6371,7 +6371,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
        bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
 
        if (flags & NAPI_F_PREFER_BUSY_POLL) {
-               napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
+               napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi);
                timeout = READ_ONCE(napi->dev->gro_flush_timeout);
                if (napi->defer_hard_irqs_count && timeout) {
                        hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
@@ -6653,6 +6653,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
        INIT_HLIST_NODE(&napi->napi_hash_node);
        hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
        napi->timer.function = napi_watchdog;
+       napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs));
        init_gro_hash(napi);
        napi->skb = NULL;
        INIT_LIST_HEAD(&napi->rx_list);
@@ -11059,7 +11060,7 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
 
        if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
                dev->gro_flush_timeout = 20000;
-               dev->napi_defer_hard_irqs = 1;
+               netdev_set_defer_hard_irqs(dev, 1);
        }
 }
 EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
@@ -12003,7 +12004,6 @@ static void __init net_dev_struct_check(void)
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout);
-       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_defer_hard_irqs);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler);
@@ -12015,7 +12015,7 @@ static void __init net_dev_struct_check(void)
 #ifdef CONFIG_NET_XGRESS
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress);
 #endif
-       CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 104);
+       CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 100);
 }
 
 /*
index d3ea92949ff38d4a1e6df4e6e1c27fbd79ecc35a..0716b1048261880a731dd03969534621dec15aba 100644 (file)
@@ -148,6 +148,42 @@ static inline void netif_set_gro_ipv4_max_size(struct net_device *dev,
        WRITE_ONCE(dev->gro_ipv4_max_size, size);
 }
 
+/**
+ * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs
+ * @n: napi struct to get the defer_hard_irqs field from
+ *
+ * Return: the per-NAPI value of the defar_hard_irqs field.
+ */
+static inline u32 napi_get_defer_hard_irqs(const struct napi_struct *n)
+{
+       return READ_ONCE(n->defer_hard_irqs);
+}
+
+/**
+ * napi_set_defer_hard_irqs - set the defer_hard_irqs for a napi
+ * @n: napi_struct to set the defer_hard_irqs field
+ * @defer: the value the field should be set to
+ */
+static inline void napi_set_defer_hard_irqs(struct napi_struct *n, u32 defer)
+{
+       WRITE_ONCE(n->defer_hard_irqs, defer);
+}
+
+/**
+ * netdev_set_defer_hard_irqs - set defer_hard_irqs for all NAPIs of a netdev
+ * @netdev: the net_device for which all NAPIs will have defer_hard_irqs set
+ * @defer: the defer_hard_irqs value to set
+ */
+static inline void netdev_set_defer_hard_irqs(struct net_device *netdev,
+                                             u32 defer)
+{
+       struct napi_struct *napi;
+
+       WRITE_ONCE(netdev->napi_defer_hard_irqs, defer);
+       list_for_each_entry(napi, &netdev->napi_list, dev_list)
+               napi_set_defer_hard_irqs(napi, defer);
+}
+
 int rps_cpumask_housekeeping(struct cpumask *mask);
 
 #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
index 05cf5347f25e89f095c346c0503cfd8ab4766615..25125f356a154b9cc0b95e9a256bc9e62cd6d52b 100644 (file)
@@ -429,7 +429,7 @@ static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val
        if (val > S32_MAX)
                return -ERANGE;
 
-       WRITE_ONCE(dev->napi_defer_hard_irqs, val);
+       netdev_set_defer_hard_irqs(dev, (u32)val);
        return 0;
 }