We already have a generic implementation of alloc/free up to P4D level, as
well as pgd_free(). Let's finish the work and add a generic PGD-level
alloc helper as well.
Unlike at lower levels, almost all architectures need some specific magic
at PGD level (typically initialising PGD entries), so introducing a
generic pgd_alloc() isn't worth it. Instead we introduce two new helpers,
__pgd_alloc() and __pgd_free(), and make use of them in the arch-specific
pgd_alloc() and pgd_free() wherever possible. To accommodate as many arch
as possible, __pgd_alloc() takes a page allocation order.
Because pagetable_alloc() allocates zeroed pages, explicit zeroing in
pgd_alloc() becomes redundant and we can get rid of it. Some trivial
implementations of pgd_free() also become unnecessary once __pgd_alloc()
is used; remove them.
Another small improvement is consistent accounting of PGD pages by using
GFP_PGTABLE_{USER,KERNEL} as appropriate.
Not all PGD allocations can be handled by the generic helpers. In
particular, multiple architectures allocate PGDs from a kmem_cache, and
those PGDs may not be page-sized.
Link: https://lkml.kernel.org/r/20250103184415.2744423-6-kevin.brodsky@arm.com
Signed-off-by: Kevin Brodsky <kevin.brodsky@arm.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
{
pgd_t *ret, *init;
- ret = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+ ret = __pgd_alloc(mm, 0);
init = pgd_offset(&init_mm, 0UL);
if (ret) {
#ifdef CONFIG_ALPHA_LARGE_VMALLOC
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- pgd_t *ret = (pgd_t *) __get_free_page(GFP_KERNEL);
+ pgd_t *ret = __pgd_alloc(mm, 0);
if (ret) {
int num, num2;
- num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE;
- memzero(ret, num * sizeof(pgd_t));
+ num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE;
num2 = VMALLOC_SIZE / PGDIR_SIZE;
memcpy(ret + num, swapper_pg_dir + num, num2 * sizeof(pgd_t));
-
- memzero(ret + num + num2,
- (PTRS_PER_PGD - num - num2) * sizeof(pgd_t));
-
}
return ret;
}
#include "mm.h"
#ifdef CONFIG_ARM_LPAE
-#define _pgd_alloc(mm) kmalloc_array(PTRS_PER_PGD, sizeof(pgd_t), GFP_KERNEL)
+#define _pgd_alloc(mm) kmalloc_array(PTRS_PER_PGD, sizeof(pgd_t), GFP_KERNEL | __GFP_ZERO)
#define _pgd_free(mm, pgd) kfree(pgd)
#else
-#define _pgd_alloc(mm) (pgd_t *)__get_free_pages(GFP_KERNEL, 2)
-#define _pgd_free(mm, pgd) free_pages((unsigned long)pgd, 2)
+#define _pgd_alloc(mm) __pgd_alloc(mm, 2)
+#define _pgd_free(mm, pgd) __pgd_free(mm, pgd)
#endif
/*
if (!new_pgd)
goto no_pgd;
- memset(new_pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
-
/*
* Copy over the kernel and IO PGD entries
*/
gfp_t gfp = GFP_PGTABLE_USER;
if (pgdir_is_page_size())
- return (pgd_t *)__get_free_page(gfp);
+ return __pgd_alloc(mm, 0);
else
return kmem_cache_alloc(pgd_cache, gfp);
}
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
if (pgdir_is_page_size())
- free_page((unsigned long)pgd);
+ __pgd_free(mm, pgd);
else
kmem_cache_free(pgd_cache, pgd);
}
pgd_t *ret;
pgd_t *init;
- ret = (pgd_t *) __get_free_page(GFP_KERNEL);
+ ret = __pgd_alloc(mm, 0);
if (ret) {
init = pgd_offset(&init_mm, 0UL);
pgd_init((unsigned long *)ret);
{
pgd_t *pgd;
- pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+ pgd = __pgd_alloc(mm, 0);
/*
* There may be better ways to do this, but to ensure
pgd_t *pgd_alloc(struct mm_struct *mm)
{
- pgd_t *init, *ret = NULL;
- struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0);
+ pgd_t *init, *ret;
- if (ptdesc) {
- ret = (pgd_t *)ptdesc_address(ptdesc);
+ ret = __pgd_alloc(mm, 0);
+ if (ret) {
init = pgd_offset(&init_mm, 0UL);
pgd_init(ret);
memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
{
pgd_t *new_pgd;
- new_pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
+ new_pgd = __pgd_alloc(mm, 0);
memcpy(new_pgd, swapper_pg_dir, PAGE_SIZE);
memset(new_pgd, 0, (PAGE_OFFSET >> PGDIR_SHIFT));
return new_pgd;
extern void __bad_pte(pmd_t *pmd);
-static inline pgd_t *get_pgd(void)
-{
- return (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 0);
-}
-
-#define pgd_alloc(mm) get_pgd()
+#define pgd_alloc(mm) __pgd_alloc(mm, 0)
extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
#define __HAVE_ARCH_PMD_ALLOC_ONE
#define __HAVE_ARCH_PUD_ALLOC_ONE
-#define __HAVE_ARCH_PGD_FREE
#include <asm-generic/pgalloc.h>
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
extern void pgd_init(void *addr);
extern pgd_t *pgd_alloc(struct mm_struct *mm);
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
- pagetable_free(virt_to_ptdesc(pgd));
-}
-
#define __pte_free_tlb(tlb, pte, address) \
do { \
pagetable_dtor(page_ptdesc(pte)); \
pgd_t *pgd_alloc(struct mm_struct *mm)
{
- pgd_t *init, *ret = NULL;
- struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM,
- PGD_TABLE_ORDER);
+ pgd_t *init, *ret;
- if (ptdesc) {
- ret = ptdesc_address(ptdesc);
+ ret = __pgd_alloc(mm, PGD_TABLE_ORDER);
+ if (ret) {
init = pgd_offset(&init_mm, 0UL);
pgd_init(ret);
memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
#include <linux/sched.h>
#include <asm/cpuinfo.h>
+#include <asm/pgalloc.h>
/* pteaddr:
* ptbase | vpn* | zero
{
pgd_t *ret, *init;
- ret = (pgd_t *) __get_free_page(GFP_KERNEL);
+ ret = __pgd_alloc(mm, 0);
if (ret) {
init = pgd_offset(&init_mm, 0UL);
pgd_init(ret);
*/
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- pgd_t *ret = (pgd_t *)__get_free_page(GFP_KERNEL);
+ pgd_t *ret = __pgd_alloc(mm, 0);
- if (ret) {
- memset(ret, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
+ if (ret)
memcpy(ret + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
- }
return ret;
}
#include <asm/cache.h>
#define __HAVE_ARCH_PMD_ALLOC_ONE
-#define __HAVE_ARCH_PGD_FREE
#include <asm-generic/pgalloc.h>
/* Allocate the top level pgd (page directory) */
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- pgd_t *pgd;
-
- pgd = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_TABLE_ORDER);
- if (unlikely(pgd == NULL))
- return NULL;
-
- memset(pgd, 0, PAGE_SIZE << PGD_TABLE_ORDER);
-
- return pgd;
-}
-
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
- free_pages((unsigned long)pgd, PGD_TABLE_ORDER);
+ return __pgd_alloc(mm, PGD_TABLE_ORDER);
}
#if CONFIG_PGTABLE_LEVELS == 3
{
pgd_t *pgd;
- pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
+ pgd = __pgd_alloc(mm, 0);
if (likely(pgd != NULL)) {
- memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
/* Copy kernel mappings */
sync_kernel_mappings(pgd);
}
pgd_t *pgd_alloc(struct mm_struct *mm)
{
- pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
+ pgd_t *pgd = __pgd_alloc(mm, 0);
- if (pgd) {
- memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
+ if (pgd)
memcpy(pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
- }
+
return pgd;
}
SLAB_PANIC, NULL);
}
-static inline pgd_t *_pgd_alloc(void)
+static inline pgd_t *_pgd_alloc(struct mm_struct *mm)
{
/*
* If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
* We allocate one page for pgd.
*/
if (!SHARED_KERNEL_PMD)
- return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
- PGD_ALLOCATION_ORDER);
+ return __pgd_alloc(mm, PGD_ALLOCATION_ORDER);
/*
* Now PAE kernel is not running as a Xen domain. We can allocate
return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER);
}
-static inline void _pgd_free(pgd_t *pgd)
+static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
if (!SHARED_KERNEL_PMD)
- free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
+ __pgd_free(mm, pgd);
else
kmem_cache_free(pgd_cache, pgd);
}
#else
-static inline pgd_t *_pgd_alloc(void)
+static inline pgd_t *_pgd_alloc(struct mm_struct *mm)
{
- return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
- PGD_ALLOCATION_ORDER);
+ return __pgd_alloc(mm, PGD_ALLOCATION_ORDER);
}
-static inline void _pgd_free(pgd_t *pgd)
+static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
- free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
+ __pgd_free(mm, pgd);
}
#endif /* CONFIG_X86_PAE */
pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS];
pmd_t *pmds[MAX_PREALLOCATED_PMDS];
- pgd = _pgd_alloc();
+ pgd = _pgd_alloc(mm);
if (pgd == NULL)
goto out;
if (sizeof(pmds) != 0)
free_pmds(mm, pmds, PREALLOCATED_PMDS);
out_free_pgd:
- _pgd_free(pgd);
+ _pgd_free(mm, pgd);
out:
return NULL;
}
pgd_mop_up_pmds(mm, pgd);
pgd_dtor(pgd);
paravirt_pgd_free(mm, pgd);
- _pgd_free(pgd);
+ _pgd_free(mm, pgd);
}
/*
static inline pgd_t*
pgd_alloc(struct mm_struct *mm)
{
- return (pgd_t*) __get_free_page(GFP_KERNEL | __GFP_ZERO);
+ return __pgd_alloc(mm, 0);
}
static inline void ptes_clear(pte_t *ptep)
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
+static inline pgd_t *__pgd_alloc_noprof(struct mm_struct *mm, unsigned int order)
+{
+ gfp_t gfp = GFP_PGTABLE_USER;
+ struct ptdesc *ptdesc;
+
+ if (mm == &init_mm)
+ gfp = GFP_PGTABLE_KERNEL;
+ gfp &= ~__GFP_HIGHMEM;
+
+ ptdesc = pagetable_alloc_noprof(gfp, order);
+ if (!ptdesc)
+ return NULL;
+
+ return ptdesc_address(ptdesc);
+}
+#define __pgd_alloc(...) alloc_hooks(__pgd_alloc_noprof(__VA_ARGS__))
+
+static inline void __pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ struct ptdesc *ptdesc = virt_to_ptdesc(pgd);
+
+ BUG_ON((unsigned long)pgd & (PAGE_SIZE-1));
+ pagetable_free(ptdesc);
+}
+
#ifndef __HAVE_ARCH_PGD_FREE
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
- pagetable_free(virt_to_ptdesc(pgd));
+ __pgd_free(mm, pgd);
}
#endif