]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
sparc64: Fix accounting issues used to size TSBs
authorMike Kravetz <mike.kravetz@oracle.com>
Thu, 22 Sep 2016 16:44:27 +0000 (09:44 -0700)
committerAllen Pais <allen.pais@oracle.com>
Tue, 8 Nov 2016 10:06:25 +0000 (15:36 +0530)
Orabug: 24478985

As pages are allocated by a task, counters in the mm and mm_context
structures are used to track these allocations.  These counters are
then used to size the task's TSBs.  This patch addresses issues where
counts are not maintained properly, and TSBs of the incorrect size
are created for the task.

- hugetlb pages are not included in a task's RSS calculations.  However,
  the routine do_sparc64_fault() calculates the size of base TSB block
  by subtracting total size of hugetlb pages from RSS.  Since hugetlb
  size is likely larger than RSS, a negative value is passed as an
  unsigned value to the routine which allocates the TSB block.  The
  'negative unsigned' value appears as a really big value and results in
  a maximum sized base TSB being allocated.  This is the case for almost
  all tasks using hugetlb pages.

  THP pages are also counted in huge_pte_count[MM_PTES_HUGE].  And
  unlike hugetlb pages, THP pages are included in a task's RSS.
  Therefore, both hugetlb and THP can not be counted for in
  huge_pte_count[MM_PTES_HUGE].

  Add a new counter thp_pte_count for THP pages, and use this value for
  adjusting RSS to size the base TSB.

- In order to save memory, THP makes use of a huge zero page.  This huge
  zero page does not count against a task's RSS, but it does consume TSB
  entries.  Therefore, count huge zero page entries in
  huge_pte_count[MM_PTES_HUGE].

- Accounting of THP pages is done in the routine set_pmd_at().
  Unfortunately, this does not catch the case where a THP page is split.
  To handle this case, decrement the count in pmdp_invalidate().
  pmdp_invalidate is only called when splitting a THP.  However, 'sanity
  checks' are added in case it is ever called for other purposes.

- huge_pte_count[MM_PTES_HUGE] tracks the number of HPAGE_SIZE (8M) pages
  used by the task.  This value is used to size the TSB for HPAGE_SIZE
  pages.  However, for each HPAGE_SIZE (8M) there are two REAL_HPAGE_SIZE
  (4M) pages.  The TSB contains an entry for each REAL_HPAGE_SIZE page.
  Therefore, the number of REAL_HPAGE_SIZE pages used by the task should
  be used to size the MM_PTES_HUGE TSB.  A new compile time constant
  REAL_HPAGE_PER_HPAGE is used to multiply huge_pte_count[MM_PTES_HUGE]
  before sizing the TSB.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
Tested-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
(cherry picked from commit 417fc85e759b6d4c4602fbdbdd5375ec5ddf2cb0)
Signed-off-by: Allen Pais <allen.pais@oracle.com>
arch/sparc/include/asm/mmu_64.h
arch/sparc/include/asm/page_64.h
arch/sparc/mm/fault_64.c
arch/sparc/mm/init_64.c
arch/sparc/mm/tlb.c
arch/sparc/mm/tsb.c

index b7e86bd2d01c0aa058455d82f2a532ecf390b83f..4db2fa11e346b559be1987db0d31ce2a5aa39127 100644 (file)
@@ -108,6 +108,7 @@ typedef struct {
        struct hv_tsb_descr     tsb_descr[MM_NUM_TSBS];
        void                    *vdso;
        unsigned long           huge_pte_count[MM_NUM_HUGEPAGE_SIZES];
+       unsigned long           thp_pte_count;
 } mm_context_t;
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
index a0c5f05562537de06240c137c68dd45591ecd180..0992dbb6c12752839064f2194becedd647894481 100644 (file)
@@ -28,6 +28,7 @@
 #define HUGETLB_PAGE_ORDER     (HPAGE_SHIFT - PAGE_SHIFT)
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 #define HUGE_MAX_HSTATE                2
+#define REAL_HPAGE_PER_HPAGE   (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
 #endif
 
 #ifndef __ASSEMBLY__
index f1a78f7aa650010b0cb278244fb4526b1b9cc7cf..41583015d6b5082965fad9292ac34c6d963ef489 100644 (file)
@@ -287,19 +287,6 @@ static void noinline __kprobes bogus_32bit_fault_tpc(struct pt_regs *regs)
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 /* Put this here until there are more consumers.*/
-static unsigned long hugepage_pte_counts_to_pages(mm_context_t *mm_context)
-{
-       unsigned long hugepages_to_pages = 0UL;
-
-       if (xl_hugepage_shift)
-               hugepages_to_pages = xl_hugepage_pte_count(mm_context) <<
-                       (xl_hugepage_shift - PAGE_SHIFT);
-       hugepages_to_pages = hugepages_to_pages +
-               (hugepage_pte_count(mm_context) << (HPAGE_SHIFT - PAGE_SHIFT));
-
-       return hugepages_to_pages;
-}
-
 static void sparc64_hugetlb_tsb_fault(struct pt_regs *regs,
                                      struct mm_struct *mm,
                                      unsigned int hugepage_shift)
@@ -316,6 +303,13 @@ static void sparc64_hugetlb_tsb_fault(struct pt_regs *regs,
                hugepage_size_to_pte_count_idx(1UL << hugepage_shift);
 
        mm_rss = mm->context.huge_pte_count[hugepage_pte_idx];
+       if (hugepage_idx == MM_TSB_HUGE) {
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
+               mm_rss += mm->context.thp_pte_count;
+#endif
+               mm_rss *= REAL_HPAGE_PER_HPAGE;
+       }
+
        if (unlikely(mm_rss >
             mm->context.tsb_block[hugepage_idx].tsb_rss_limit)) {
                if (mm->context.tsb_block[hugepage_idx].tsb)
@@ -326,10 +320,6 @@ static void sparc64_hugetlb_tsb_fault(struct pt_regs *regs,
        }
 }
 #else
-static unsigned long hugepage_pte_counts_to_pages(mm_context_t *mm_context)
-{
-       return 0UL;
-}
 static void sparc64_hugetlb_tsb_fault(struct pt_regs *regs,
                                      struct mm_struct *mm,
                                      unsigned int hugepage_shift)
@@ -542,7 +532,9 @@ good_area:
        up_read(&mm->mmap_sem);
 
        mm_rss = get_mm_rss(mm);
-       mm_rss =  mm_rss - hugepage_pte_counts_to_pages(&mm->context);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
+       mm_rss -= (mm->context.thp_pte_count * (HPAGE_SIZE / PAGE_SIZE));
+#endif
        if (unlikely(mm_rss >
                     mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit))
                tsb_grow(mm, MM_TSB_BASE, mm_rss);
index 556ee725a17c7393823a542cded4229d7f64c3c3..572a25f4b0a9029c175e0ce581e5050738db8fde 100644 (file)
@@ -607,8 +607,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
        spin_lock_irqsave(&mm->context.lock, flags);
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-       if (mm->context.huge_pte_count[MM_PTES_HUGE] &&
-                       is_default_hugetlb_pte(pte)) {
+       if ((mm->context.huge_pte_count[MM_PTES_HUGE] ||
+            mm->context.thp_pte_count) && is_default_hugetlb_pte(pte)) {
                /* We are fabricating 8MB pages using 4MB real hw pages */
                pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
                __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
index 5b0c4738296ece308db701da5f5dfed02f2b1762..91ab380fbc5002e836c702cb7a69ae66f31a1675 100644 (file)
@@ -174,10 +174,25 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
                return;
 
        if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) {
-               if (pmd_val(pmd) & _PAGE_PMD_HUGE)
-                       mm->context.huge_pte_count[MM_PTES_HUGE]++;
-               else
-                       mm->context.huge_pte_count[MM_PTES_HUGE]--;
+               /*
+                * Note that this routine only sets pmds for THP pages.
+                * Hugetlb pages are handled elsewhere.  We need to check
+                * for huge zero page.  Huge zero pages are like hugetlb
+                * pages in that there is no RSS, but there is the need
+                * for TSB entries.  So, huge zero page counts go into
+                * huge_pte_count[MM_PTES_HUGE].
+                */
+               if (pmd_val(pmd) & _PAGE_PMD_HUGE) {
+                       if (is_huge_zero_page(pmd_page(pmd)))
+                               mm->context.huge_pte_count[MM_PTES_HUGE]++;
+                       else
+                               mm->context.thp_pte_count++;
+               } else {
+                       if (is_huge_zero_page(pmd_page(orig)))
+                               mm->context.huge_pte_count[MM_PTES_HUGE]--;
+                       else
+                               mm->context.thp_pte_count--;
+               }
 
                /* Do not try to allocate the TSB hash table if we
                 * don't have one already.  We have various locks held
@@ -204,6 +219,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
        }
 }
 
+/*
+ * This routine is only called when splitting a THP
+ */
 void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
                     pmd_t *pmdp)
 {
@@ -213,6 +231,16 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 
        set_pmd_at(vma->vm_mm, address, pmdp, entry);
        flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+
+       /*
+        * set_pmd_at() will not be called in a way to decrement the
+        * context.thp_pte_count when splitting a THP, so do it now.
+        * Sanity check pmd before doing the actual decrement.
+        */
+       if ((pmd_val(entry) & _PAGE_PMD_HUGE) &&
+           !is_huge_zero_page(pmd_page(entry)))
+               (vma->vm_mm)->context.thp_pte_count--;
+       
 }
 
 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
index 19087ffed9177e0a5298a545381df77155349267..7c5ea6c783a48d0c5f0a1b5feb7f2670f023539f 100644 (file)
@@ -502,10 +502,14 @@ retry_tsb_alloc:
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 static void capture_and_clear_huge_pte_counts(mm_context_t *mm_context,
+                                             unsigned long *thp_pte_count,
                                              unsigned long *capture_array)
 {
        unsigned int hugepage_idx;
 
+       *thp_pte_count = mm_context->thp_pte_count;
+       mm_context->thp_pte_count = 0UL;
+
        for (hugepage_idx = 0UL; hugepage_idx != MM_NUM_HUGEPAGE_SIZES;
                hugepage_idx++) {
                capture_array[hugepage_idx] =
@@ -516,11 +520,14 @@ static void capture_and_clear_huge_pte_counts(mm_context_t *mm_context,
 
 static void
 captured_hugepage_pte_count_grow_tsb(struct mm_struct *mm,
+                                    unsigned long *thp_pte_count,
                                     unsigned long *capture_huge_pte_count)
 {
        if (unlikely(capture_huge_pte_count[MM_PTES_HUGE]))
+       if (unlikely(capture_huge_pte_count[MM_PTES_HUGE]) || *thp_pte_count)
                tsb_grow(mm, MM_TSB_HUGE,
-                       capture_huge_pte_count[MM_PTES_HUGE]);
+                       (capture_huge_pte_count[MM_PTES_HUGE] +
+                        *thp_pte_count) * REAL_HPAGE_PER_HPAGE);
 
        if (unlikely(capture_huge_pte_count[MM_PTES_XLHUGE]))
                tsb_grow(mm, MM_TSB_XLHUGE,
@@ -528,15 +535,18 @@ captured_hugepage_pte_count_grow_tsb(struct mm_struct *mm,
 }
 #else
 static void capture_and_clear_huge_pte_counts(mm_context_t *mm_context,
+                                             unsigned long *thp_pte_count,
                                              unsigned long *capture_array) {}
 static void
 captured_hugepage_pte_count_grow_tsb(struct mm_struct *mm,
+                                    unsigned long *thp_pte_count,
                                     unsigned long *capture_huge_pte_count) {}
 #endif /* CONFIG_HUGETLB_PAGE || CONFIG_TRANSPARENT_HUGEPAGE */
 
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
        unsigned long capture_huge_pte_count[MM_NUM_HUGEPAGE_SIZES];
+       unsigned long saved_thp_pte_count;
        unsigned int i;
 
        spin_lock_init(&mm->context.lock);
@@ -547,7 +557,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
         * will re-increment the counters as the parent PTEs are
         * copied into the child address space.
         */
-       capture_and_clear_huge_pte_counts(&mm->context, capture_huge_pte_count);
+       capture_and_clear_huge_pte_counts(&mm->context, &saved_thp_pte_count,
+                                          capture_huge_pte_count);
 
        /* copy_mm() copies over the parent's mm_struct before calling
         * us, so we need to zero out the TSB pointer or else tsb_grow()
@@ -559,9 +570,11 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
        /* If this is fork, inherit the parent's TSB size.  We would
         * grow it to that size on the first page fault anyways.
         */
-       tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
+       tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm) -
+                saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE));
 
-       captured_hugepage_pte_count_grow_tsb(mm, capture_huge_pte_count);
+       captured_hugepage_pte_count_grow_tsb(mm, &saved_thp_pte_count,
+                                            capture_huge_pte_count);
 
        if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
                return -ENOMEM;