]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
mm/page_alloc: fix deadlock on cpu_hotplug_lock in __accept_page()
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Sat, 29 Mar 2025 17:10:29 +0000 (19:10 +0200)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 18 Apr 2025 03:10:05 +0000 (20:10 -0700)
When the last page in the zone is accepted, __accept_page() calls
static_branch_dec().  This function takes cpu_hotplug_lock, which can lead
to a deadlock if the allocation occurs during CPU bringup path as
_cpu_up() also takes the lock.

To prevent this deadlock, defer static_branch_dec() to a workqueue.

Call static_branch_dec() only when the workqueue is not yet initialized.
Workqueues are initialized before CPU bring up, so this will not conflict
with the first scenario.

Link: https://lkml.kernel.org/r/20250329171030.3942298-1-kirill.shutemov@linux.intel.com
Fixes: 55ad43e8ba0f ("mm: add a helper to accept page")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: Srikanth Aithal <sraithal@amd.com>
Tested-by: Srikanth Aithal <sraithal@amd.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Ashish Kalra <ashish.kalra@amd.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Edgecombe, Rick P" <rick.p.edgecombe@intel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: "Mike Rapoport (IBM)" <rppt@kernel.org>
Cc: Thomas Lendacky <thomas.lendacky@amd.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/mmzone.h
mm/internal.h
mm/mm_init.c
mm/page_alloc.c

index 25e80b2ca7f41aa26d2c625590b4e4272fc6000e..4c95fcc9e9df0cafafa3970ff6231b3f20a82bb9 100644 (file)
@@ -967,6 +967,9 @@ struct zone {
 #ifdef CONFIG_UNACCEPTED_MEMORY
        /* Pages to be accepted. All pages on the list are MAX_PAGE_ORDER */
        struct list_head        unaccepted_pages;
+
+       /* To be called once the last page in the zone is accepted */
+       struct work_struct      unaccepted_cleanup;
 #endif
 
        /* zone flags, see below */
index 50c2f590b2d04bc8c06e07894e63a908989b4a88..e9695baa592266f117e8e1a132872ae352d9f4a7 100644 (file)
@@ -1595,6 +1595,7 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc);
 
 #ifdef CONFIG_UNACCEPTED_MEMORY
 void accept_page(struct page *page);
+void unaccepted_cleanup_work(struct work_struct *work);
 #else /* CONFIG_UNACCEPTED_MEMORY */
 static inline void accept_page(struct page *page)
 {
index 84f14fa12d0ddb62331b7d830c96d33be682654d..9659689b8ace01a14934be0919e5bf74546cff54 100644 (file)
@@ -1441,6 +1441,7 @@ static void __meminit zone_init_free_lists(struct zone *zone)
 
 #ifdef CONFIG_UNACCEPTED_MEMORY
        INIT_LIST_HEAD(&zone->unaccepted_pages);
+       INIT_WORK(&zone->unaccepted_cleanup, unaccepted_cleanup_work);
 #endif
 }
 
index 1715e34b91af4d4ddd77f550ddbf6c1c58014541..e506e365d6f18b865915e5c809eae100374580ad 100644 (file)
@@ -7191,6 +7191,11 @@ static DEFINE_STATIC_KEY_FALSE(zones_with_unaccepted_pages);
 
 static bool lazy_accept = true;
 
+void unaccepted_cleanup_work(struct work_struct *work)
+{
+       static_branch_dec(&zones_with_unaccepted_pages);
+}
+
 static int __init accept_memory_parse(char *p)
 {
        if (!strcmp(p, "lazy")) {
@@ -7229,8 +7234,27 @@ static void __accept_page(struct zone *zone, unsigned long *flags,
 
        __free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL);
 
-       if (last)
-               static_branch_dec(&zones_with_unaccepted_pages);
+       if (last) {
+               /*
+                * There are two corner cases:
+                *
+                * - If allocation occurs during the CPU bring up,
+                *   static_branch_dec() cannot be used directly as
+                *   it causes a deadlock on cpu_hotplug_lock.
+                *
+                *   Instead, use schedule_work() to prevent deadlock.
+                *
+                * - If allocation occurs before workqueues are initialized,
+                *   static_branch_dec() should be called directly.
+                *
+                *   Workqueues are initialized before CPU bring up, so this
+                *   will not conflict with the first scenario.
+                */
+               if (system_wq)
+                       schedule_work(&zone->unaccepted_cleanup);
+               else
+                       unaccepted_cleanup_work(&zone->unaccepted_cleanup);
+       }
 }
 
 void accept_page(struct page *page)