]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
sparc64: Trim page tables at PMD for hugepages
authorNitin Gupta <nitin.m.gupta@oracle.com>
Fri, 27 May 2016 21:58:13 +0000 (14:58 -0700)
committerAllen Pais <allen.pais@oracle.com>
Thu, 15 Sep 2016 06:57:44 +0000 (12:27 +0530)
For PMD aligned (8M) hugepages, we currently allocate
all four page table levels which is wasteful. We now
allocate till PMD level only which saves memory usage
from page tables.

Orabug: 22630259

Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
(cherry picked from commit 5d2c7930a4d3bf3ca560048052d638d7efa67e36)
(cherry picked from commit abefebd73e204979661a818ac31cf455d110a672)
Signed-off-by: Allen Pais <allen.pais@oracle.com>
arch/sparc/include/asm/hugetlb.h
arch/sparc/include/asm/pgtable_64.h
arch/sparc/include/asm/tsb.h
arch/sparc/mm/fault_64.c
arch/sparc/mm/hugetlbpage.c
arch/sparc/mm/init_64.c

index 4b8e2cea21d91d24305c73b33127aa45cf10850c..6670f9111d63a82030a52da1a3e73e82e70baa97 100644 (file)
@@ -11,6 +11,10 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
                              pte_t *ptep);
 
+void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
+                           unsigned long end, unsigned long floor,
+                           unsigned long ceiling);
+
 static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
 {
 }
@@ -37,14 +41,6 @@ static inline int prepare_hugepage_range(struct file *file,
        return 0;
 }
 
-static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
-                                         unsigned long addr, unsigned long end,
-                                         unsigned long floor,
-                                         unsigned long ceiling)
-{
-       free_pgd_range(tlb, addr, end, floor, ceiling);
-}
-
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
                                         unsigned long addr, pte_t *ptep)
 {
index ed7a88bcd7fd5c16312ab809e86e4b5542c753d6..e0af5d3dc36b50e402661f227eb9e87654fc0bad 100644 (file)
@@ -411,6 +411,11 @@ static inline bool is_default_hugetlb_pte(pte_t pte)
        return (pte_val(pte) & mask) == mask;
 }
 
+static inline bool is_hugetlb_pmd(pmd_t pmd)
+{
+       return !!(pmd_val(pmd) & _PAGE_PMD_HUGE);
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline pmd_t pmd_mkhuge(pmd_t pmd)
 {
index ecb49cfa3be9fa274053fb15bc2d5f3c38c76e08..131503e36fb0864602f7028cd8c21a89197ebe50 100644 (file)
@@ -203,7 +203,7 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
         * We have to propagate the 4MB bit of the virtual address
         * because we are fabricating 8MB pages using 4MB hw pages.
         */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
        brz,pn          REG1, FAIL_LABEL;               \
         sethi          %uhi(_PAGE_PMD_HUGE), REG2;     \
index 13a645eac73fd590cbbc5e70ab6b050afc55a684..c911762ae6759f2d40d67b147d4685d701434c5d 100644 (file)
@@ -113,8 +113,8 @@ static unsigned int get_user_insn(unsigned long tpc)
        if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp)))
                goto out_irq_enable;
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       if (pmd_trans_huge(*pmdp)) {
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+       if (is_hugetlb_pmd(*pmdp)) {
                if (pmd_trans_splitting(*pmdp))
                        goto out_irq_enable;
 
index 298fa1e6a7b72d26e1a84074aaf66421e8227c31..16a3b1b3e7ba4049641eb6b1db78243ceb304c74 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
+#include <asm/pgtable.h>
 
 /* Slightly simplified from the non-hugepage variant because by
  * definition we don't have to worry about any page coloring stuff
@@ -172,6 +173,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr,
 
                if (!pmd)
                        goto fail;
+               if (size == HPAGE_SIZE) {
+                       rpte = (pte_t *)pmd;
+                       break;
+               }
 
                pte = pte_alloc_map(mm, NULL, pmd, addr);
 
@@ -254,7 +259,7 @@ static unsigned long tte_to_hugepage_mask(pte_t pte)
 }
 
 /* This should also be moved and a noop for sun4u.
- * Only include xl hugepage sizes we plan to support.
+ * Only include hugepage sizes we plan to support.
  */
 static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int hugepage_shift)
 {
@@ -271,6 +276,10 @@ static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int hugepage_shift)
        case XLHPAGE_2GB_SHIFT:
                sun4v_hugepage_size = _PAGE_SZ2GB_4V;
                break;
+       /* 8Mb */
+       case HPAGE_SHIFT:
+               pte_val(entry) |= _PAGE_PMD_HUGE;
+               break;
        default:
                WARN_ONCE(hugepage_shift,
                        "hugepage_shift_to_tte: unsupported "
@@ -286,11 +295,7 @@ pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 {
        unsigned int hugepage_shift = huge_page_shift(hstate_vma(vma));
 
-       if (hugepage_shift == HPAGE_SHIFT)
-               goto out;
-       entry = hugepage_shift_to_tte(entry, hugepage_shift);
-out:
-       return entry;
+       return hugepage_shift_to_tte(entry, hugepage_shift);
 }
 
 static void huge_pte_at_flush_update(struct mm_struct *mm, unsigned long addr,
@@ -407,6 +412,57 @@ static bool set_huge_pte_range_at(struct mm_struct *mm, pmd_t *pmd,
        return rc;
 }
 
+static bool __set_huge_pmd_at(struct mm_struct *mm, pmd_t *pmd,
+               unsigned long addr, unsigned long end, pte_t *pentry,
+               pte_t *sentinel_pte, bool set_at)
+{
+       bool rc;
+       pte_t orig;
+       pte_t entry;
+       unsigned long next;
+       unsigned long hugepage_shift;
+
+       rc = true;
+       orig = *(pte_t *)pmd;
+       entry = *pentry;
+
+       if (set_at) {
+               hugepage_shift = tte_to_shift(entry);
+               if (hugepage_shift == REAL_HPAGE_SHIFT) {
+                       *pmd = __pmd(pte_val(entry));
+               } else {
+                       do {
+                               next = pmd_addr_end(addr, end);
+                               rc = __set_huge_pte_at(mm, addr, (pte_t *)pmd,
+                                       entry, sentinel_pte, hugepage_shift);
+                               if (!rc)
+                                       break;
+                       } while (pmd++, addr = next, addr != end);
+               }
+               *pentry = entry;
+       } else {
+               hugepage_shift = tte_to_shift(orig);
+               if (hugepage_shift == REAL_HPAGE_SHIFT) {
+                       *pmd = __pmd(0);
+               } else {
+                       do {
+                               next = pmd_addr_end(addr, end);
+                               __clear_huge_pte_at(mm, addr, (pte_t *)pmd,
+                                       sentinel_pte, hugepage_shift);
+                       } while (pmd++, addr = next, addr != end);
+               }
+       }
+
+       if (hugepage_shift == REAL_HPAGE_SHIFT) {
+               /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
+               maybe_tlb_batch_add(mm, addr, (pte_t *)pmd, orig, 0);
+               maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE,
+                                       (pte_t *)pmd, orig, 0);
+       }
+
+       return rc;
+}
+
 static bool set_huge_pmd_at(struct mm_struct *mm, pud_t *pud,
                            unsigned long addr, unsigned long end,
                            pte_t *pentry, pte_t *sentinel_pte, bool set_at)
@@ -414,11 +470,21 @@ static bool set_huge_pmd_at(struct mm_struct *mm, pud_t *pud,
        pmd_t *pmd = pmd_offset(pud, addr);
        unsigned long next;
        bool rc;
+       unsigned int is_huge_pmd;
+
+       if (set_at)
+               is_huge_pmd = is_hugetlb_pmd(__pmd(pte_val(*pentry)));
+       else
+               is_huge_pmd = is_hugetlb_pmd(*pmd);
+
+       if (is_huge_pmd)
+               return __set_huge_pmd_at(mm, pmd, addr, end, pentry,
+                                       sentinel_pte, set_at);
 
        do {
                next = pmd_addr_end(addr, end);
                rc = set_huge_pte_range_at(mm, pmd, addr, pentry,
-                               sentinel_pte, set_at);
+                                       sentinel_pte, set_at);
        } while (pmd++, addr = next, ((addr != end) && rc));
        return rc;
 }
@@ -512,7 +578,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
                pud = pud_offset(pgd, addr);
                if (!pud_none(*pud)) {
                        pmd = pmd_offset(pud, addr);
-                       if (!pmd_none(*pmd))
+                       if (xl_hugepage_shift == HPAGE_SHIFT)
+                               pte = (pte_t *)pmd;
+                       else if (!pmd_none(*pmd))
                                pte = pte_offset_map(pmd, addr);
                }
        }
@@ -529,3 +597,100 @@ int pud_huge(pud_t pud)
 {
        return 0;
 }
+
+static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+                          unsigned long addr)
+{
+       pgtable_t token = pmd_pgtable(*pmd);
+
+       pmd_clear(pmd);
+       pte_free_tlb(tlb, token, addr);
+       atomic_long_dec(&tlb->mm->nr_ptes);
+}
+
+static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+                                  unsigned long addr, unsigned long end,
+                                  unsigned long floor, unsigned long ceiling)
+{
+       pmd_t *pmd;
+       unsigned long next;
+       unsigned long start;
+
+       start = addr;
+       pmd = pmd_offset(pud, addr);
+       do {
+               next = pmd_addr_end(addr, end);
+               if (pmd_none(*pmd))
+                       continue;
+               if (is_hugetlb_pmd(*pmd))
+                       pmd_clear(pmd);
+               else
+                       hugetlb_free_pte_range(tlb, pmd, addr);
+       } while (pmd++, addr = next, addr != end);
+
+       start &= PUD_MASK;
+       if (start < floor)
+               return;
+       if (ceiling) {
+               ceiling &= PUD_MASK;
+               if (!ceiling)
+                       return;
+       }
+       if (end - 1 > ceiling - 1)
+               return;
+
+       pmd = pmd_offset(pud, start);
+       pud_clear(pud);
+       pmd_free_tlb(tlb, pmd, start);
+       mm_dec_nr_pmds(tlb->mm);
+}
+
+static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+                                  unsigned long addr, unsigned long end,
+                                  unsigned long floor, unsigned long ceiling)
+{
+       pud_t *pud;
+       unsigned long next;
+       unsigned long start;
+
+       start = addr;
+       pud = pud_offset(pgd, addr);
+       do {
+               next = pud_addr_end(addr, end);
+               if (pud_none_or_clear_bad(pud))
+                       continue;
+               hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
+                                      ceiling);
+       } while (pud++, addr = next, addr != end);
+
+       start &= PGDIR_MASK;
+       if (start < floor)
+               return;
+       if (ceiling) {
+               ceiling &= PGDIR_MASK;
+               if (!ceiling)
+                       return;
+       }
+       if (end - 1 > ceiling - 1)
+               return;
+
+       pud = pud_offset(pgd, start);
+       pgd_clear(pgd);
+       pud_free_tlb(tlb, pud, start);
+}
+
+void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+                           unsigned long addr, unsigned long end,
+                           unsigned long floor, unsigned long ceiling)
+{
+       pgd_t *pgd;
+       unsigned long next;
+
+       pgd = pgd_offset(tlb->mm, addr);
+       do {
+               next = pgd_addr_end(addr, end);
+               if (pgd_none_or_clear_bad(pgd))
+                       continue;
+               hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
+       } while (pgd++, addr = next, addr != end);
+}
index a98835a31501d6f834b39a1fb69e7b09f2f1d2ae..8b20819ae99ee62498320c65d1eff67d38827286 100644 (file)
@@ -439,7 +439,7 @@ static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_inde
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
-unsigned int xl_hugepage_shift;
+unsigned int xl_hugepage_shift = HPAGE_SHIFT;
 static unsigned long xl_hugepage_pte;
 
 static bool is_xl_hugetlb_pte(pte_t pte)
@@ -608,10 +608,12 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
        if (mm->context.huge_pte_count[MM_PTES_HUGE] &&
-                       is_default_hugetlb_pte(pte))
+                       is_default_hugetlb_pte(pte)) {
+               /* We are fabricating 8MB pages using 4MB real hw pages */
+               pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
                __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
                                        address, pte_val(pte));
-       else if (mm->context.huge_pte_count[MM_PTES_XLHUGE] &&
+       else if (mm->context.huge_pte_count[MM_PTES_XLHUGE] &&
                        is_xl_hugetlb_pte(pte))
                __update_mmu_tsb_insert(mm, MM_TSB_XLHUGE, xl_hugepage_shift,
                        address, pte_val(pte));