]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
mm/contig_alloc: support __GFP_COMP
authorYu Zhao <yuzhao@google.com>
Wed, 14 Aug 2024 03:54:49 +0000 (21:54 -0600)
committerAndrew Morton <akpm@linux-foundation.org>
Wed, 4 Sep 2024 04:15:36 +0000 (21:15 -0700)
Patch series "mm/hugetlb: alloc/free gigantic folios", v2.

Use __GFP_COMP for gigantic folios can greatly reduce not only the amount
of code but also the allocation and free time.

Approximate LOC to mm/hugetlb.c: +60, -240

Allocate and free 500 1GB hugeTLB memory without HVO by:
  time echo 500 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
  time echo 0 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages

       Before  After
Alloc  ~13s    ~10s
Free   ~15s    <1s

The above magnitude generally holds for multiple x86 and arm64 CPU
models.

Perf profile before:
  Alloc
    - 99.99% alloc_pool_huge_folio
       - __alloc_fresh_hugetlb_folio
          - 83.23% alloc_contig_pages_noprof
             - 47.46% alloc_contig_range_noprof
                - 20.96% isolate_freepages_range
                     16.10% split_page
                - 14.10% start_isolate_page_range
                - 12.02% undo_isolate_page_range

  Free
    - update_and_free_pages_bulk
       - 87.71% free_contig_range
          - 76.02% free_unref_page
             - 41.30% free_unref_page_commit
                - 32.58% free_pcppages_bulk
                   - 24.75% __free_one_page
               13.96% _raw_spin_trylock
         12.27% __update_and_free_hugetlb_folio

Perf profile after:
  Alloc
    - 99.99% alloc_pool_huge_folio
         alloc_gigantic_folio
       - alloc_contig_pages_noprof
          - 59.15% alloc_contig_range_noprof
             - 20.72% start_isolate_page_range
               20.64% prep_new_page
             - 17.13% undo_isolate_page_range

  Free
    - update_and_free_pages_bulk
       - __folio_put
       - __free_pages_ok
            7.46% free_tail_page_prepare
          - 1.97% free_one_page
               1.86% __free_one_page

This patch (of 3):

Support __GFP_COMP in alloc_contig_range().  When the flag is set, upon
success the function returns a large folio prepared by prep_new_page(),
rather than a range of order-0 pages prepared by split_free_pages() (which
is renamed from split_map_pages()).

alloc_contig_range() can be used to allocate folios larger than
MAX_PAGE_ORDER, e.g., gigantic hugeTLB folios.  So on the free path,
free_one_page() needs to handle that by split_large_buddy().

[akpm@linux-foundation.org: fix folio_alloc_gigantic_noprof() WARN expression, per Yu Liao]
Link: https://lkml.kernel.org/r/20240814035451.773331-1-yuzhao@google.com
Link: https://lkml.kernel.org/r/20240814035451.773331-2-yuzhao@google.com
Signed-off-by: Yu Zhao <yuzhao@google.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Frank van der Linden <fvdl@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/gfp.h
mm/compaction.c
mm/page_alloc.c

index f53f76e0b17e4904e1fdb3a1f075193f43aa33b8..03ba9563c6db9080f9838120db67c5743c148f2b 100644 (file)
@@ -446,4 +446,27 @@ extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_
 #endif
 void free_contig_range(unsigned long pfn, unsigned long nr_pages);
 
+#ifdef CONFIG_CONTIG_ALLOC
+static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
+                                                       int nid, nodemask_t *node)
+{
+       struct page *page;
+
+       if (WARN_ON(!order || !(gfp & __GFP_COMP)))
+               return NULL;
+
+       page = alloc_contig_pages_noprof(1 << order, gfp, nid, node);
+
+       return page ? page_folio(page) : NULL;
+}
+#else
+static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
+                                                       int nid, nodemask_t *node)
+{
+       return NULL;
+}
+#endif
+/* This should be paired with folio_put() rather than free_contig_range(). */
+#define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__))
+
 #endif /* __LINUX_GFP_H */
index eb95e9b435d0fc6bcb3595ddd98155591e53a0b7..d1041fbce6798e1718cf417256a502f5a182be2d 100644 (file)
@@ -86,33 +86,6 @@ static struct page *mark_allocated_noprof(struct page *page, unsigned int order,
 }
 #define mark_allocated(...)    alloc_hooks(mark_allocated_noprof(__VA_ARGS__))
 
-static void split_map_pages(struct list_head *freepages)
-{
-       unsigned int i, order;
-       struct page *page, *next;
-       LIST_HEAD(tmp_list);
-
-       for (order = 0; order < NR_PAGE_ORDERS; order++) {
-               list_for_each_entry_safe(page, next, &freepages[order], lru) {
-                       unsigned int nr_pages;
-
-                       list_del(&page->lru);
-
-                       nr_pages = 1 << order;
-
-                       mark_allocated(page, order, __GFP_MOVABLE);
-                       if (order)
-                               split_page(page, order);
-
-                       for (i = 0; i < nr_pages; i++) {
-                               list_add(&page->lru, &tmp_list);
-                               page++;
-                       }
-               }
-               list_splice_init(&tmp_list, &freepages[0]);
-       }
-}
-
 static unsigned long release_free_list(struct list_head *freepages)
 {
        int order;
@@ -742,11 +715,11 @@ isolate_fail:
  *
  * Non-free pages, invalid PFNs, or zone boundaries within the
  * [start_pfn, end_pfn) range are considered errors, cause function to
- * undo its actions and return zero.
+ * undo its actions and return zero. cc->freepages[] are empty.
  *
  * Otherwise, function returns one-past-the-last PFN of isolated page
  * (which may be greater then end_pfn if end fell in a middle of
- * a free page).
+ * a free page). cc->freepages[] contain free pages isolated.
  */
 unsigned long
 isolate_freepages_range(struct compact_control *cc,
@@ -754,10 +727,9 @@ isolate_freepages_range(struct compact_control *cc,
 {
        unsigned long isolated, pfn, block_start_pfn, block_end_pfn;
        int order;
-       struct list_head tmp_freepages[NR_PAGE_ORDERS];
 
        for (order = 0; order < NR_PAGE_ORDERS; order++)
-               INIT_LIST_HEAD(&tmp_freepages[order]);
+               INIT_LIST_HEAD(&cc->freepages[order]);
 
        pfn = start_pfn;
        block_start_pfn = pageblock_start_pfn(pfn);
@@ -788,7 +760,7 @@ isolate_freepages_range(struct compact_control *cc,
                        break;
 
                isolated = isolate_freepages_block(cc, &isolate_start_pfn,
-                                       block_end_pfn, tmp_freepages, 0, true);
+                                       block_end_pfn, cc->freepages, 0, true);
 
                /*
                 * In strict mode, isolate_freepages_block() returns 0 if
@@ -807,13 +779,10 @@ isolate_freepages_range(struct compact_control *cc,
 
        if (pfn < end_pfn) {
                /* Loop terminated early, cleanup. */
-               release_free_list(tmp_freepages);
+               release_free_list(cc->freepages);
                return 0;
        }
 
-       /* __isolate_free_page() does not map the pages */
-       split_map_pages(tmp_freepages);
-
        /* We don't use freelists for anything. */
        return pfn;
 }
index 56a93805561aa382dda9374f2b2a76e92d5736db..da603080214c6adb41d52c1e79bfd6474975dbc6 100644 (file)
@@ -1196,16 +1196,36 @@ static void free_pcppages_bulk(struct zone *zone, int count,
        spin_unlock_irqrestore(&zone->lock, flags);
 }
 
+/* Split a multi-block free page into its individual pageblocks. */
+static void split_large_buddy(struct zone *zone, struct page *page,
+                             unsigned long pfn, int order, fpi_t fpi)
+{
+       unsigned long end = pfn + (1 << order);
+
+       VM_WARN_ON_ONCE(!IS_ALIGNED(pfn, 1 << order));
+       /* Caller removed page from freelist, buddy info cleared! */
+       VM_WARN_ON_ONCE(PageBuddy(page));
+
+       if (order > pageblock_order)
+               order = pageblock_order;
+
+       while (pfn != end) {
+               int mt = get_pfnblock_migratetype(page, pfn);
+
+               __free_one_page(page, pfn, zone, order, mt, fpi);
+               pfn += 1 << order;
+               page = pfn_to_page(pfn);
+       }
+}
+
 static void free_one_page(struct zone *zone, struct page *page,
                          unsigned long pfn, unsigned int order,
                          fpi_t fpi_flags)
 {
        unsigned long flags;
-       int migratetype;
 
        spin_lock_irqsave(&zone->lock, flags);
-       migratetype = get_pfnblock_migratetype(page, pfn);
-       __free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
+       split_large_buddy(zone, page, pfn, order, fpi_flags);
        spin_unlock_irqrestore(&zone->lock, flags);
 }
 
@@ -1697,27 +1717,6 @@ static unsigned long find_large_buddy(unsigned long start_pfn)
        return start_pfn;
 }
 
-/* Split a multi-block free page into its individual pageblocks */
-static void split_large_buddy(struct zone *zone, struct page *page,
-                             unsigned long pfn, int order)
-{
-       unsigned long end_pfn = pfn + (1 << order);
-
-       VM_WARN_ON_ONCE(order <= pageblock_order);
-       VM_WARN_ON_ONCE(pfn & (pageblock_nr_pages - 1));
-
-       /* Caller removed page from freelist, buddy info cleared! */
-       VM_WARN_ON_ONCE(PageBuddy(page));
-
-       while (pfn != end_pfn) {
-               int mt = get_pfnblock_migratetype(page, pfn);
-
-               __free_one_page(page, pfn, zone, pageblock_order, mt, FPI_NONE);
-               pfn += pageblock_nr_pages;
-               page = pfn_to_page(pfn);
-       }
-}
-
 /**
  * move_freepages_block_isolate - move free pages in block for page isolation
  * @zone: the zone
@@ -1758,7 +1757,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
                del_page_from_free_list(buddy, zone, order,
                                        get_pfnblock_migratetype(buddy, pfn));
                set_pageblock_migratetype(page, migratetype);
-               split_large_buddy(zone, buddy, pfn, order);
+               split_large_buddy(zone, buddy, pfn, order, FPI_NONE);
                return true;
        }
 
@@ -1769,7 +1768,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
                del_page_from_free_list(page, zone, order,
                                        get_pfnblock_migratetype(page, pfn));
                set_pageblock_migratetype(page, migratetype);
-               split_large_buddy(zone, page, pfn, order);
+               split_large_buddy(zone, page, pfn, order, FPI_NONE);
                return true;
        }
 move:
@@ -6437,6 +6436,31 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
        return (ret < 0) ? ret : 0;
 }
 
+static void split_free_pages(struct list_head *list)
+{
+       int order;
+
+       for (order = 0; order < NR_PAGE_ORDERS; order++) {
+               struct page *page, *next;
+               int nr_pages = 1 << order;
+
+               list_for_each_entry_safe(page, next, &list[order], lru) {
+                       int i;
+
+                       post_alloc_hook(page, order, __GFP_MOVABLE);
+                       if (!order)
+                               continue;
+
+                       split_page(page, order);
+
+                       /* Add all subpages to the order-0 head, in sequence. */
+                       list_del(&page->lru);
+                       for (i = 0; i < nr_pages; i++)
+                               list_add_tail(&page[i].lru, &list[0]);
+               }
+       }
+}
+
 /**
  * alloc_contig_range() -- tries to allocate given range of pages
  * @start:     start PFN to allocate
@@ -6549,12 +6573,25 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end,
                goto done;
        }
 
-       /* Free head and tail (if any) */
-       if (start != outer_start)
-               free_contig_range(outer_start, start - outer_start);
-       if (end != outer_end)
-               free_contig_range(end, outer_end - end);
+       if (!(gfp_mask & __GFP_COMP)) {
+               split_free_pages(cc.freepages);
 
+               /* Free head and tail (if any) */
+               if (start != outer_start)
+                       free_contig_range(outer_start, start - outer_start);
+               if (end != outer_end)
+                       free_contig_range(end, outer_end - end);
+       } else if (start == outer_start && end == outer_end && is_power_of_2(end - start)) {
+               struct page *head = pfn_to_page(start);
+               int order = ilog2(end - start);
+
+               check_new_pages(head, order);
+               prep_new_page(head, order, gfp_mask, 0);
+       } else {
+               ret = -EINVAL;
+               WARN(true, "PFN range: requested [%lu, %lu), allocated [%lu, %lu)\n",
+                    start, end, outer_start, outer_end);
+       }
 done:
        undo_isolate_page_range(start, end, migratetype);
        return ret;
@@ -6663,6 +6700,18 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
 void free_contig_range(unsigned long pfn, unsigned long nr_pages)
 {
        unsigned long count = 0;
+       struct folio *folio = pfn_folio(pfn);
+
+       if (folio_test_large(folio)) {
+               int expected = folio_nr_pages(folio);
+
+               if (nr_pages == expected)
+                       folio_put(folio);
+               else
+                       WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
+                            pfn, nr_pages, expected);
+               return;
+       }
 
        for (; nr_pages--; pfn++) {
                struct page *page = pfn_to_page(pfn);