mm: shmem: fix the strategy for the tmpfs 'huge=' options

author Baolin Wang <baolin.wang@linux.alibaba.com>

Wed, 3 Sep 2025 08:54:24 +0000 (16:54 +0800)

committer Andrew Morton <akpm@linux-foundation.org>

Fri, 12 Sep 2025 00:25:50 +0000 (17:25 -0700)
author Baolin Wang <baolin.wang@linux.alibaba.com>
Wed, 3 Sep 2025 08:54:24 +0000 (16:54 +0800)
committer Andrew Morton <akpm@linux-foundation.org>
Fri, 12 Sep 2025 00:25:50 +0000 (17:25 -0700)
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst

index a16a04841b960ed875a33b0fe2f7a8748fa60243..1654211cc6cf207ac37ddff07e4b76782aa14d78 100644 (file)
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -419,6 +419,8 @@ option: ``huge=``. It can have following values:
  
  always
      Attempt to allocate huge pages every time we need a new page;
+    Always try PMD-sized huge pages first, and fall back to smaller-sized
+    huge pages if the PMD-sized huge page allocation fails;
  
  never
      Do not allocate huge pages. Note that ``madvise(..., MADV_COLLAPSE)``
@@ -426,7 +428,9 @@ never
      is specified everywhere;
  
  within_size
-    Only allocate huge page if it will be fully within i_size.
+    Only allocate huge page if it will be fully within i_size;
+    Always try PMD-sized huge pages first, and fall back to smaller-sized
+    huge pages if the PMD-sized huge page allocation fails;
      Also respect madvise() hints;
  
  advise
diff --git a/mm/shmem.c b/mm/shmem.c

index 2df26f4d6e60386d9bba57d9bb37917fd02a9c55..29e1eb6901259b8291348d810a4aa291487e8d63 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -573,42 +573,6 @@ static int shmem_confirm_swap(struct address_space *mapping, pgoff_t index,
  static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
  static int tmpfs_huge __read_mostly = SHMEM_HUGE_NEVER;
  
-/**
- * shmem_mapping_size_orders - Get allowable folio orders for the given file size.
- * @mapping: Target address_space.
- * @index: The page index.
- * @write_end: end of a write, could extend inode size.
- *
- * This returns huge orders for folios (when supported) based on the file size
- * which the mapping currently allows at the given index. The index is relevant
- * due to alignment considerations the mapping might have. The returned order
- * may be less than the size passed.
- *
- * Return: The orders.
- */
-static inline unsigned int
-shmem_mapping_size_orders(struct address_space *mapping, pgoff_t index, loff_t write_end)
-{
-       unsigned int order;
-       size_t size;
-
-       if (!mapping_large_folio_support(mapping) || !write_end)
-               return 0;
-
-       /* Calculate the write size based on the write_end */
-       size = write_end - (index << PAGE_SHIFT);
-       order = filemap_get_order(size);
-       if (!order)
-               return 0;
-
-       /* If we're not aligned, allocate a smaller folio */
-       if (index & ((1UL << order) - 1))
-               order = __ffs(index);
-
-       order = min_t(size_t, order, MAX_PAGECACHE_ORDER);
-       return order > 0 ? BIT(order + 1) - 1 : 0;
-}
-
  static unsigned int shmem_get_orders_within_size(struct inode *inode,
                 unsigned long within_size_orders, pgoff_t index,
                 loff_t write_end)
@@ -655,22 +619,21 @@ static unsigned int shmem_huge_global_enabled(struct inode *inode, pgoff_t index
          * For tmpfs mmap()'s huge order, we still use PMD-sized order to
          * allocate huge pages due to lack of a write size hint.
          *
-        * Otherwise, tmpfs will allow getting a highest order hint based on
-        * the size of write and fallocate paths, then will try each allowable
-        * huge orders.
+        * For tmpfs with 'huge=always' or 'huge=within_size' mount option,
+        * we will always try PMD-sized order first. If that failed, it will
+        * fall back to small large folios.
          */
         switch (SHMEM_SB(inode->i_sb)->huge) {
         case SHMEM_HUGE_ALWAYS:
                 if (vma)
                         return maybe_pmd_order;
  
-               return shmem_mapping_size_orders(inode->i_mapping, index, write_end);
+               return THP_ORDERS_ALL_FILE_DEFAULT;
         case SHMEM_HUGE_WITHIN_SIZE:
                 if (vma)
                         within_size_orders = maybe_pmd_order;
                 else
-                       within_size_orders = shmem_mapping_size_orders(inode->i_mapping,
-                                                                      index, write_end);
+                       within_size_orders = THP_ORDERS_ALL_FILE_DEFAULT;
  
                 within_size_orders = shmem_get_orders_within_size(inode, within_size_orders,
                                                                   index, write_end);
author	Baolin Wang <baolin.wang@linux.alibaba.com>
	Wed, 3 Sep 2025 08:54:24 +0000 (16:54 +0800)
committer	Andrew Morton <akpm@linux-foundation.org>
	Fri, 12 Sep 2025 00:25:50 +0000 (17:25 -0700)
Documentation/admin-guide/mm/transhuge.rst		patch \| blob \| history
mm/shmem.c		patch \| blob \| history