workqueue: Initialize unbound CPU pods later in the boot

author Tejun Heo <tj@kernel.org>

Tue, 8 Aug 2023 01:57:24 +0000 (15:57 -1000)

committer Tejun Heo <tj@kernel.org>

Tue, 8 Aug 2023 01:57:24 +0000 (15:57 -1000)
author Tejun Heo <tj@kernel.org>
Tue, 8 Aug 2023 01:57:24 +0000 (15:57 -1000)
committer Tejun Heo <tj@kernel.org>
Tue, 8 Aug 2023 01:57:24 +0000 (15:57 -1000)
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h

index f0c10f491b15a39c97cecfa6c9cfef6421c2e52c..bab9fa3453eddf25edafa0416329a2bd9c07390f 100644 (file)
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -672,5 +672,6 @@ int workqueue_offline_cpu(unsigned int cpu);
  
  void __init workqueue_init_early(void);
  void __init workqueue_init(void);
+void __init workqueue_init_topology(void);
  
  #endif
diff --git a/init/main.c b/init/main.c

index ad920fac325c367d813f7e9a552deb89b7e0a933..436d73261810bd8ccd7e65a121fdc1556eb67ae9 100644 (file)
--- a/init/main.c
+++ b/init/main.c
@@ -1540,6 +1540,7 @@ static noinline void __init kernel_init_freeable(void)
         smp_init();
         sched_init_smp();
  
+       workqueue_init_topology();
         padata_init();
         page_alloc_init_late();
  
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 1e528b7e12c562683b537b25af337467a4a93d9f..5914c820a4f1c0ca16c8d976fe137b72cdd83068 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -6256,17 +6256,15 @@ static inline void wq_watchdog_init(void) { }
  
  #endif /* CONFIG_WQ_WATCHDOG */
  
-static void wq_pod_init(void);
-
  /**
   * workqueue_init_early - early init for workqueue subsystem
   *
- * This is the first half of two-staged workqueue subsystem initialization
- * and invoked as soon as the bare basics - memory allocation, cpumasks and
- * idr are up.  It sets up all the data structures and system workqueues
- * and allows early boot code to create workqueues and queue/cancel work
- * items.  Actual work item execution starts only after kthreads can be
- * created and scheduled right before early initcalls.
+ * This is the first step of three-staged workqueue subsystem initialization and
+ * invoked as soon as the bare basics - memory allocation, cpumasks and idr are
+ * up. It sets up all the data structures and system workqueues and allows early
+ * boot code to create workqueues and queue/cancel work items. Actual work item
+ * execution starts only after kthreads can be created and scheduled right
+ * before early initcalls.
   */
  void __init workqueue_init_early(void)
  {
@@ -6284,6 +6282,9 @@ void __init workqueue_init_early(void)
  
         pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
  
+       wq_update_pod_attrs_buf = alloc_workqueue_attrs();
+       BUG_ON(!wq_update_pod_attrs_buf);
+
         /* initialize CPU pools */
         for_each_possible_cpu(cpu) {
                 struct worker_pool *pool;
@@ -6381,11 +6382,11 @@ static void __init wq_cpu_intensive_thresh_init(void)
  /**
   * workqueue_init - bring workqueue subsystem fully online
   *
- * This is the latter half of two-staged workqueue subsystem initialization
- * and invoked as soon as kthreads can be created and scheduled.
- * Workqueues have been created and work items queued on them, but there
- * are no kworkers executing the work items yet.  Populate the worker pools
- * with the initial workers and enable future kworker creations.
+ * This is the second step of three-staged workqueue subsystem initialization
+ * and invoked as soon as kthreads can be created and scheduled. Workqueues have
+ * been created and work items queued on them, but there are no kworkers
+ * executing the work items yet. Populate the worker pools with the initial
+ * workers and enable future kworker creations.
   */
  void __init workqueue_init(void)
  {
@@ -6395,18 +6396,12 @@ void __init workqueue_init(void)
  
         wq_cpu_intensive_thresh_init();
  
-       /*
-        * It'd be simpler to initialize pods in workqueue_init_early() but CPU
-        * to node mapping may not be available that early on some archs such as
-        * power and arm64. As per-cpu pools created previously could be missing
-        * node hint and unbound pool pod affinity, fix them up.
-        *
-        * Also, while iterating workqueues, create rescuers if requested.
-        */
-       wq_pod_init();
-
         mutex_lock(&wq_pool_mutex);
  
+       /*
+        * Per-cpu pools created earlier could be missing node hint. Fix them
+        * up. Also, create a rescuer for workqueues that requested it.
+        */
         for_each_possible_cpu(cpu) {
                 for_each_cpu_worker_pool(pool, cpu) {
                         pool->node = cpu_to_node(cpu);
@@ -6414,7 +6409,6 @@ void __init workqueue_init(void)
         }
  
         list_for_each_entry(wq, &workqueues, list) {
-               wq_update_pod(wq, smp_processor_id(), smp_processor_id(), true);
                 WARN(init_rescuer(wq),
                      "workqueue: failed to create early rescuer for %s",
                      wq->name);
@@ -6437,8 +6431,16 @@ void __init workqueue_init(void)
         wq_watchdog_init();
  }
  
-static void __init wq_pod_init(void)
+/**
+ * workqueue_init_topology - initialize CPU pods for unbound workqueues
+ *
+ * This is the third step of there-staged workqueue subsystem initialization and
+ * invoked after SMP and topology information are fully initialized. It
+ * initializes the unbound CPU pods accordingly.
+ */
+void __init workqueue_init_topology(void)
  {
+       struct workqueue_struct *wq;
         cpumask_var_t *tbl;
         int node, cpu;
  
@@ -6452,8 +6454,7 @@ static void __init wq_pod_init(void)
                 }
         }
  
-       wq_update_pod_attrs_buf = alloc_workqueue_attrs();
-       BUG_ON(!wq_update_pod_attrs_buf);
+       mutex_lock(&wq_pool_mutex);
  
         /*
          * We want masks of possible CPUs of each node which isn't readily
@@ -6474,6 +6475,19 @@ static void __init wq_pod_init(void)
  
         wq_pod_cpus = tbl;
         wq_pod_enabled = true;
+
+       /*
+        * Workqueues allocated earlier would have all CPUs sharing the default
+        * worker pool. Explicitly call wq_update_pod() on all workqueue and CPU
+        * combinations to apply per-pod sharing.
+        */
+       list_for_each_entry(wq, &workqueues, list) {
+               for_each_online_cpu(cpu) {
+                       wq_update_pod(wq, cpu, cpu, true);
+               }
+       }
+
+       mutex_unlock(&wq_pool_mutex);
  }
  
  void __warn_flushing_systemwide_wq(void)
author	Tejun Heo <tj@kernel.org>
	Tue, 8 Aug 2023 01:57:24 +0000 (15:57 -1000)
committer	Tejun Heo <tj@kernel.org>
	Tue, 8 Aug 2023 01:57:24 +0000 (15:57 -1000)
include/linux/workqueue.h		patch \| blob \| history
init/main.c		patch \| blob \| history
kernel/workqueue.c		patch \| blob \| history