net: no longer assume RTNL is held in flush_all_backlogs()

author Eric Dumazet <edumazet@google.com>

Tue, 14 Jan 2025 20:55:28 +0000 (20:55 +0000)

committer Jakub Kicinski <kuba@kernel.org>

Thu, 16 Jan 2025 03:17:04 +0000 (19:17 -0800)
author Eric Dumazet <edumazet@google.com>
Tue, 14 Jan 2025 20:55:28 +0000 (20:55 +0000)
committer Jakub Kicinski <kuba@kernel.org>
Thu, 16 Jan 2025 03:17:04 +0000 (19:17 -0800)
diff --git a/net/core/dev.c b/net/core/dev.c

index 115a7a0a110425d5fd3eaf68f7b0e63122a92165..41da51f95486aa4d68f1cc76e92f0836af3c5807 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6013,8 +6013,6 @@ void netif_receive_skb_list(struct list_head *head)
  }
  EXPORT_SYMBOL(netif_receive_skb_list);
  
-static DEFINE_PER_CPU(struct work_struct, flush_works);
-
  /* Network device is going away, flush any packets still pending */
  static void flush_backlog(struct work_struct *work)
  {
@@ -6071,36 +6069,54 @@ static bool flush_required(int cpu)
         return true;
  }
  
+struct flush_backlogs {
+       cpumask_t               flush_cpus;
+       struct work_struct      w[];
+};
+
+static struct flush_backlogs *flush_backlogs_alloc(void)
+{
+       return kmalloc(struct_size_t(struct flush_backlogs, w, nr_cpu_ids),
+                      GFP_KERNEL);
+}
+
+static struct flush_backlogs *flush_backlogs_fallback;
+static DEFINE_MUTEX(flush_backlogs_mutex);
+
  static void flush_all_backlogs(void)
  {
-       static cpumask_t flush_cpus;
+       struct flush_backlogs *ptr = flush_backlogs_alloc();
         unsigned int cpu;
  
-       /* since we are under rtnl lock protection we can use static data
-        * for the cpumask and avoid allocating on stack the possibly
-        * large mask
-        */
-       ASSERT_RTNL();
+       if (!ptr) {
+               mutex_lock(&flush_backlogs_mutex);
+               ptr = flush_backlogs_fallback;
+       }
+       cpumask_clear(&ptr->flush_cpus);
  
         cpus_read_lock();
  
-       cpumask_clear(&flush_cpus);
         for_each_online_cpu(cpu) {
                 if (flush_required(cpu)) {
-                       queue_work_on(cpu, system_highpri_wq,
-                                     per_cpu_ptr(&flush_works, cpu));
-                       cpumask_set_cpu(cpu, &flush_cpus);
+                       INIT_WORK(&ptr->w[cpu], flush_backlog);
+                       queue_work_on(cpu, system_highpri_wq, &ptr->w[cpu]);
+                       __cpumask_set_cpu(cpu, &ptr->flush_cpus);
                 }
         }
  
         /* we can have in flight packet[s] on the cpus we are not flushing,
          * synchronize_net() in unregister_netdevice_many() will take care of
-        * them
+        * them.
          */
-       for_each_cpu(cpu, &flush_cpus)
-               flush_work(per_cpu_ptr(&flush_works, cpu));
+       for_each_cpu(cpu, &ptr->flush_cpus)
+               flush_work(&ptr->w[cpu]);
  
         cpus_read_unlock();
+
+       if (ptr != flush_backlogs_fallback)
+               kfree(ptr);
+       else
+               mutex_unlock(&flush_backlogs_mutex);
  }
  
  static void net_rps_send_ipi(struct softnet_data *remsd)
@@ -12313,12 +12329,13 @@ static int __init net_dev_init(void)
          *      Initialise the packet receive queues.
          */
  
+       flush_backlogs_fallback = flush_backlogs_alloc();
+       if (!flush_backlogs_fallback)
+               goto out;
+
         for_each_possible_cpu(i) {
-               struct work_struct *flush = per_cpu_ptr(&flush_works, i);
                 struct softnet_data *sd = &per_cpu(softnet_data, i);
  
-               INIT_WORK(flush, flush_backlog);
-
                 skb_queue_head_init(&sd->input_pkt_queue);
                 skb_queue_head_init(&sd->process_queue);
  #ifdef CONFIG_XFRM_OFFLOAD
author	Eric Dumazet <edumazet@google.com>
	Tue, 14 Jan 2025 20:55:28 +0000 (20:55 +0000)
committer	Jakub Kicinski <kuba@kernel.org>
	Thu, 16 Jan 2025 03:17:04 +0000 (19:17 -0800)