* @reuse_addr:                the virtual address of the @reuse_page page.
  * @vmemmap_pages:     the list head of the vmemmap pages that can be freed
  *                     or is mapped from.
+ * @flags:             used to modify behavior in vmemmap page table walking
+ *                     operations.
  */
 struct vmemmap_remap_walk {
        void                    (*remap_pte)(pte_t *pte, unsigned long addr,
        struct page             *reuse_page;
        unsigned long           reuse_addr;
        struct list_head        *vmemmap_pages;
+
+/* Skip the TLB flush when we split the PMD */
+#define VMEMMAP_SPLIT_NO_TLB_FLUSH     BIT(0)
+       unsigned long           flags;
 };
 
-static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
+static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start, bool flush)
 {
        pmd_t __pmd;
        int i;
                /* Make pte visible before pmd. See comment in pmd_install(). */
                smp_wmb();
                pmd_populate_kernel(&init_mm, pmd, pgtable);
-               flush_tlb_kernel_range(start, start + PMD_SIZE);
+               if (flush)
+                       flush_tlb_kernel_range(start, start + PMD_SIZE);
        } else {
                pte_free_kernel(&init_mm, pgtable);
        }
        do {
                int ret;
 
-               ret = split_vmemmap_huge_pmd(pmd, addr & PMD_MASK);
+               ret = split_vmemmap_huge_pmd(pmd, addr & PMD_MASK,
+                               !(walk->flags & VMEMMAP_SPLIT_NO_TLB_FLUSH));
                if (ret)
                        return ret;
 
                next = pmd_addr_end(addr, end);
+
+               /*
+                * We are only splitting, not remapping the hugetlb vmemmap
+                * pages.
+                */
+               if (!walk->remap_pte)
+                       continue;
+
                vmemmap_pte_range(pmd, addr, next, walk);
        } while (pmd++, addr = next, addr != end);
 
                        return ret;
        } while (pgd++, addr = next, addr != end);
 
-       flush_tlb_kernel_range(start, end);
+       if (walk->remap_pte)
+               flush_tlb_kernel_range(start, end);
 
        return 0;
 }
        set_pte_at(&init_mm, addr, pte, mk_pte(page, pgprot));
 }
 
+/**
+ * vmemmap_remap_split - split the vmemmap virtual address range [@start, @end)
+ *                      backing PMDs of the directmap into PTEs
+ * @start:     start address of the vmemmap virtual address range that we want
+ *             to remap.
+ * @end:       end address of the vmemmap virtual address range that we want to
+ *             remap.
+ * @reuse:     reuse address.
+ *
+ * Return: %0 on success, negative error code otherwise.
+ */
+static int vmemmap_remap_split(unsigned long start, unsigned long end,
+                               unsigned long reuse)
+{
+       int ret;
+       struct vmemmap_remap_walk walk = {
+               .remap_pte      = NULL,
+               .flags          = VMEMMAP_SPLIT_NO_TLB_FLUSH,
+       };
+
+       /* See the comment in the vmemmap_remap_free(). */
+       BUG_ON(start - reuse != PAGE_SIZE);
+
+       mmap_read_lock(&init_mm);
+       ret = vmemmap_remap_range(reuse, end, &walk);
+       mmap_read_unlock(&init_mm);
+
+       return ret;
+}
+
 /**
  * vmemmap_remap_free - remap the vmemmap virtual address range [@start, @end)
  *                     to the page which @reuse is mapped to, then free vmemmap
                .remap_pte      = vmemmap_remap_pte,
                .reuse_addr     = reuse,
                .vmemmap_pages  = vmemmap_pages,
+               .flags          = 0,
        };
        int nid = page_to_nid((struct page *)reuse);
        gfp_t gfp_mask = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
                        .remap_pte      = vmemmap_restore_pte,
                        .reuse_addr     = reuse,
                        .vmemmap_pages  = vmemmap_pages,
+                       .flags          = 0,
                };
 
                vmemmap_remap_range(reuse, end, &walk);
                .remap_pte      = vmemmap_restore_pte,
                .reuse_addr     = reuse,
                .vmemmap_pages  = &vmemmap_pages,
+               .flags          = 0,
        };
 
        /* See the comment in the vmemmap_remap_free(). */
        free_vmemmap_page_list(&vmemmap_pages);
 }
 
+static int hugetlb_vmemmap_split(const struct hstate *h, struct page *head)
+{
+       unsigned long vmemmap_start = (unsigned long)head, vmemmap_end;
+       unsigned long vmemmap_reuse;
+
+       if (!vmemmap_should_optimize(h, head))
+               return 0;
+
+       vmemmap_end     = vmemmap_start + hugetlb_vmemmap_size(h);
+       vmemmap_reuse   = vmemmap_start;
+       vmemmap_start   += HUGETLB_VMEMMAP_RESERVE_SIZE;
+
+       /*
+        * Split PMDs on the vmemmap virtual address range [@vmemmap_start,
+        * @vmemmap_end]
+        */
+       return vmemmap_remap_split(vmemmap_start, vmemmap_end, vmemmap_reuse);
+}
+
 void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list)
 {
        struct folio *folio;
        LIST_HEAD(vmemmap_pages);
 
+       list_for_each_entry(folio, folio_list, lru) {
+               int ret = hugetlb_vmemmap_split(h, &folio->page);
+
+               /*
+                * Spliting the PMD requires allocating a page, thus lets fail
+                * early once we encounter the first OOM. No point in retrying
+                * as it can be dynamically done on remap with the memory
+                * we get back from the vmemmap deduplication.
+                */
+               if (ret == -ENOMEM)
+                       break;
+       }
+
+       flush_tlb_all();
+
        list_for_each_entry(folio, folio_list, lru) {
                int ret = __hugetlb_vmemmap_optimize(h, &folio->page,
                                                                &vmemmap_pages);