From: Nitin Gupta Date: Fri, 27 May 2016 21:58:13 +0000 (-0700) Subject: sparc64: Trim page tables at PMD for hugepages X-Git-Tag: v4.1.12-92~74^2~4 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=748730dbb9e6b9e883eae1d525803308d8272876;p=users%2Fjedix%2Flinux-maple.git sparc64: Trim page tables at PMD for hugepages For PMD aligned (8M) hugepages, we currently allocate all four page table levels which is wasteful. We now allocate till PMD level only which saves memory usage from page tables. Orabug: 22630259 Signed-off-by: Nitin Gupta (cherry picked from commit 5d2c7930a4d3bf3ca560048052d638d7efa67e36) (cherry picked from commit abefebd73e204979661a818ac31cf455d110a672) Signed-off-by: Allen Pais --- diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index 4b8e2cea21d9..6670f9111d63 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -11,6 +11,10 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, + unsigned long end, unsigned long floor, + unsigned long ceiling); + static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) { } @@ -37,14 +41,6 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, - unsigned long addr, unsigned long end, - unsigned long floor, - unsigned long ceiling) -{ - free_pgd_range(tlb, addr, end, floor, ceiling); -} - static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index ed7a88bcd7fd..e0af5d3dc36b 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -411,6 +411,11 @@ static inline bool is_default_hugetlb_pte(pte_t pte) return (pte_val(pte) & mask) == mask; } +static inline bool is_hugetlb_pmd(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_PMD_HUGE); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline pmd_t pmd_mkhuge(pmd_t pmd) { diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h index ecb49cfa3be9..131503e36fb0 100644 --- a/arch/sparc/include/asm/tsb.h +++ b/arch/sparc/include/asm/tsb.h @@ -203,7 +203,7 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; * We have to propagate the 4MB bit of the virtual address * because we are fabricating 8MB pages using 4MB hw pages. */ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \ brz,pn REG1, FAIL_LABEL; \ sethi %uhi(_PAGE_PMD_HUGE), REG2; \ diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 13a645eac73f..c911762ae675 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -113,8 +113,8 @@ static unsigned int get_user_insn(unsigned long tpc) if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp))) goto out_irq_enable; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (pmd_trans_huge(*pmdp)) { +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + if (is_hugetlb_pmd(*pmdp)) { if (pmd_trans_splitting(*pmdp)) goto out_irq_enable; diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 298fa1e6a7b7..16a3b1b3e7ba 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -15,6 +15,7 @@ #include #include #include +#include /* Slightly simplified from the non-hugepage variant because by * definition we don't have to worry about any page coloring stuff @@ -172,6 +173,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, if (!pmd) goto fail; + if (size == HPAGE_SIZE) { + rpte = (pte_t *)pmd; + break; + } pte = pte_alloc_map(mm, NULL, pmd, addr); @@ -254,7 +259,7 @@ static unsigned long tte_to_hugepage_mask(pte_t pte) } /* This should also be moved and a noop for sun4u. - * Only include xl hugepage sizes we plan to support. + * Only include hugepage sizes we plan to support. */ static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int hugepage_shift) { @@ -271,6 +276,10 @@ static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int hugepage_shift) case XLHPAGE_2GB_SHIFT: sun4v_hugepage_size = _PAGE_SZ2GB_4V; break; + /* 8Mb */ + case HPAGE_SHIFT: + pte_val(entry) |= _PAGE_PMD_HUGE; + break; default: WARN_ONCE(hugepage_shift, "hugepage_shift_to_tte: unsupported " @@ -286,11 +295,7 @@ pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, { unsigned int hugepage_shift = huge_page_shift(hstate_vma(vma)); - if (hugepage_shift == HPAGE_SHIFT) - goto out; - entry = hugepage_shift_to_tte(entry, hugepage_shift); -out: - return entry; + return hugepage_shift_to_tte(entry, hugepage_shift); } static void huge_pte_at_flush_update(struct mm_struct *mm, unsigned long addr, @@ -407,6 +412,57 @@ static bool set_huge_pte_range_at(struct mm_struct *mm, pmd_t *pmd, return rc; } +static bool __set_huge_pmd_at(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, unsigned long end, pte_t *pentry, + pte_t *sentinel_pte, bool set_at) +{ + bool rc; + pte_t orig; + pte_t entry; + unsigned long next; + unsigned long hugepage_shift; + + rc = true; + orig = *(pte_t *)pmd; + entry = *pentry; + + if (set_at) { + hugepage_shift = tte_to_shift(entry); + if (hugepage_shift == REAL_HPAGE_SHIFT) { + *pmd = __pmd(pte_val(entry)); + } else { + do { + next = pmd_addr_end(addr, end); + rc = __set_huge_pte_at(mm, addr, (pte_t *)pmd, + entry, sentinel_pte, hugepage_shift); + if (!rc) + break; + } while (pmd++, addr = next, addr != end); + } + *pentry = entry; + } else { + hugepage_shift = tte_to_shift(orig); + if (hugepage_shift == REAL_HPAGE_SHIFT) { + *pmd = __pmd(0); + } else { + do { + next = pmd_addr_end(addr, end); + __clear_huge_pte_at(mm, addr, (pte_t *)pmd, + sentinel_pte, hugepage_shift); + } while (pmd++, addr = next, addr != end); + } + } + + if (hugepage_shift == REAL_HPAGE_SHIFT) { + /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */ + maybe_tlb_batch_add(mm, addr, (pte_t *)pmd, orig, 0); + maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, + (pte_t *)pmd, orig, 0); + } + + return rc; +} + static bool set_huge_pmd_at(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, pte_t *pentry, pte_t *sentinel_pte, bool set_at) @@ -414,11 +470,21 @@ static bool set_huge_pmd_at(struct mm_struct *mm, pud_t *pud, pmd_t *pmd = pmd_offset(pud, addr); unsigned long next; bool rc; + unsigned int is_huge_pmd; + + if (set_at) + is_huge_pmd = is_hugetlb_pmd(__pmd(pte_val(*pentry))); + else + is_huge_pmd = is_hugetlb_pmd(*pmd); + + if (is_huge_pmd) + return __set_huge_pmd_at(mm, pmd, addr, end, pentry, + sentinel_pte, set_at); do { next = pmd_addr_end(addr, end); rc = set_huge_pte_range_at(mm, pmd, addr, pentry, - sentinel_pte, set_at); + sentinel_pte, set_at); } while (pmd++, addr = next, ((addr != end) && rc)); return rc; } @@ -512,7 +578,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pud = pud_offset(pgd, addr); if (!pud_none(*pud)) { pmd = pmd_offset(pud, addr); - if (!pmd_none(*pmd)) + if (xl_hugepage_shift == HPAGE_SHIFT) + pte = (pte_t *)pmd; + else if (!pmd_none(*pmd)) pte = pte_offset_map(pmd, addr); } } @@ -529,3 +597,100 @@ int pud_huge(pud_t pud) { return 0; } + +static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, + unsigned long addr) +{ + pgtable_t token = pmd_pgtable(*pmd); + + pmd_clear(pmd); + pte_free_tlb(tlb, token, addr); + atomic_long_dec(&tlb->mm->nr_ptes); +} + +static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + pmd_t *pmd; + unsigned long next; + unsigned long start; + + start = addr; + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (pmd_none(*pmd)) + continue; + if (is_hugetlb_pmd(*pmd)) + pmd_clear(pmd); + else + hugetlb_free_pte_range(tlb, pmd, addr); + } while (pmd++, addr = next, addr != end); + + start &= PUD_MASK; + if (start < floor) + return; + if (ceiling) { + ceiling &= PUD_MASK; + if (!ceiling) + return; + } + if (end - 1 > ceiling - 1) + return; + + pmd = pmd_offset(pud, start); + pud_clear(pud); + pmd_free_tlb(tlb, pmd, start); + mm_dec_nr_pmds(tlb->mm); +} + +static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + pud_t *pud; + unsigned long next; + unsigned long start; + + start = addr; + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) + continue; + hugetlb_free_pmd_range(tlb, pud, addr, next, floor, + ceiling); + } while (pud++, addr = next, addr != end); + + start &= PGDIR_MASK; + if (start < floor) + return; + if (ceiling) { + ceiling &= PGDIR_MASK; + if (!ceiling) + return; + } + if (end - 1 > ceiling - 1) + return; + + pud = pud_offset(pgd, start); + pgd_clear(pgd); + pud_free_tlb(tlb, pud, start); +} + +void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + pgd_t *pgd; + unsigned long next; + + pgd = pgd_offset(tlb->mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + continue; + hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); + } while (pgd++, addr = next, addr != end); +} diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index a98835a31501..8b20819ae99e 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -439,7 +439,7 @@ static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_inde } #ifdef CONFIG_HUGETLB_PAGE -unsigned int xl_hugepage_shift; +unsigned int xl_hugepage_shift = HPAGE_SHIFT; static unsigned long xl_hugepage_pte; static bool is_xl_hugetlb_pte(pte_t pte) @@ -608,10 +608,12 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) if (mm->context.huge_pte_count[MM_PTES_HUGE] && - is_default_hugetlb_pte(pte)) + is_default_hugetlb_pte(pte)) { + /* We are fabricating 8MB pages using 4MB real hw pages */ + pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT)); __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, address, pte_val(pte)); - else if (mm->context.huge_pte_count[MM_PTES_XLHUGE] && + } else if (mm->context.huge_pte_count[MM_PTES_XLHUGE] && is_xl_hugetlb_pte(pte)) __update_mmu_tsb_insert(mm, MM_TSB_XLHUGE, xl_hugepage_shift, address, pte_val(pte));