Orabug:
24478985
As pages are allocated by a task, counters in the mm and mm_context
structures are used to track these allocations. These counters are
then used to size the task's TSBs. This patch addresses issues where
counts are not maintained properly, and TSBs of the incorrect size
are created for the task.
- hugetlb pages are not included in a task's RSS calculations. However,
the routine do_sparc64_fault() calculates the size of base TSB block
by subtracting total size of hugetlb pages from RSS. Since hugetlb
size is likely larger than RSS, a negative value is passed as an
unsigned value to the routine which allocates the TSB block. The
'negative unsigned' value appears as a really big value and results in
a maximum sized base TSB being allocated. This is the case for almost
all tasks using hugetlb pages.
THP pages are also counted in huge_pte_count[MM_PTES_HUGE]. And
unlike hugetlb pages, THP pages are included in a task's RSS.
Therefore, both hugetlb and THP can not be counted for in
huge_pte_count[MM_PTES_HUGE].
Add a new counter thp_pte_count for THP pages, and use this value for
adjusting RSS to size the base TSB.
- In order to save memory, THP makes use of a huge zero page. This huge
zero page does not count against a task's RSS, but it does consume TSB
entries. Therefore, count huge zero page entries in
huge_pte_count[MM_PTES_HUGE].
- Accounting of THP pages is done in the routine set_pmd_at().
Unfortunately, this does not catch the case where a THP page is split.
To handle this case, decrement the count in pmdp_invalidate().
pmdp_invalidate is only called when splitting a THP. However, 'sanity
checks' are added in case it is ever called for other purposes.
- huge_pte_count[MM_PTES_HUGE] tracks the number of HPAGE_SIZE (8M) pages
used by the task. This value is used to size the TSB for HPAGE_SIZE
pages. However, for each HPAGE_SIZE (8M) there are two REAL_HPAGE_SIZE
(4M) pages. The TSB contains an entry for each REAL_HPAGE_SIZE page.
Therefore, the number of REAL_HPAGE_SIZE pages used by the task should
be used to size the MM_PTES_HUGE TSB. A new compile time constant
REAL_HPAGE_PER_HPAGE is used to multiply huge_pte_count[MM_PTES_HUGE]
before sizing the TSB.
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
Tested-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
(cherry picked from commit
417fc85e759b6d4c4602fbdbdd5375ec5ddf2cb0)
Signed-off-by: Allen Pais <allen.pais@oracle.com>
struct hv_tsb_descr tsb_descr[MM_NUM_TSBS];
void *vdso;
unsigned long huge_pte_count[MM_NUM_HUGEPAGE_SIZES];
+ unsigned long thp_pte_count;
} mm_context_t;
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#define HUGE_MAX_HSTATE 2
+#define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
#endif
#ifndef __ASSEMBLY__
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
/* Put this here until there are more consumers.*/
-static unsigned long hugepage_pte_counts_to_pages(mm_context_t *mm_context)
-{
- unsigned long hugepages_to_pages = 0UL;
-
- if (xl_hugepage_shift)
- hugepages_to_pages = xl_hugepage_pte_count(mm_context) <<
- (xl_hugepage_shift - PAGE_SHIFT);
- hugepages_to_pages = hugepages_to_pages +
- (hugepage_pte_count(mm_context) << (HPAGE_SHIFT - PAGE_SHIFT));
-
- return hugepages_to_pages;
-}
-
static void sparc64_hugetlb_tsb_fault(struct pt_regs *regs,
struct mm_struct *mm,
unsigned int hugepage_shift)
hugepage_size_to_pte_count_idx(1UL << hugepage_shift);
mm_rss = mm->context.huge_pte_count[hugepage_pte_idx];
+ if (hugepage_idx == MM_TSB_HUGE) {
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
+ mm_rss += mm->context.thp_pte_count;
+#endif
+ mm_rss *= REAL_HPAGE_PER_HPAGE;
+ }
+
if (unlikely(mm_rss >
mm->context.tsb_block[hugepage_idx].tsb_rss_limit)) {
if (mm->context.tsb_block[hugepage_idx].tsb)
}
}
#else
-static unsigned long hugepage_pte_counts_to_pages(mm_context_t *mm_context)
-{
- return 0UL;
-}
static void sparc64_hugetlb_tsb_fault(struct pt_regs *regs,
struct mm_struct *mm,
unsigned int hugepage_shift)
up_read(&mm->mmap_sem);
mm_rss = get_mm_rss(mm);
- mm_rss = mm_rss - hugepage_pte_counts_to_pages(&mm->context);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
+ mm_rss -= (mm->context.thp_pte_count * (HPAGE_SIZE / PAGE_SIZE));
+#endif
if (unlikely(mm_rss >
mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit))
tsb_grow(mm, MM_TSB_BASE, mm_rss);
spin_lock_irqsave(&mm->context.lock, flags);
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
- if (mm->context.huge_pte_count[MM_PTES_HUGE] &&
- is_default_hugetlb_pte(pte)) {
+ if ((mm->context.huge_pte_count[MM_PTES_HUGE] ||
+ mm->context.thp_pte_count) && is_default_hugetlb_pte(pte)) {
/* We are fabricating 8MB pages using 4MB real hw pages */
pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
__update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
return;
if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) {
- if (pmd_val(pmd) & _PAGE_PMD_HUGE)
- mm->context.huge_pte_count[MM_PTES_HUGE]++;
- else
- mm->context.huge_pte_count[MM_PTES_HUGE]--;
+ /*
+ * Note that this routine only sets pmds for THP pages.
+ * Hugetlb pages are handled elsewhere. We need to check
+ * for huge zero page. Huge zero pages are like hugetlb
+ * pages in that there is no RSS, but there is the need
+ * for TSB entries. So, huge zero page counts go into
+ * huge_pte_count[MM_PTES_HUGE].
+ */
+ if (pmd_val(pmd) & _PAGE_PMD_HUGE) {
+ if (is_huge_zero_page(pmd_page(pmd)))
+ mm->context.huge_pte_count[MM_PTES_HUGE]++;
+ else
+ mm->context.thp_pte_count++;
+ } else {
+ if (is_huge_zero_page(pmd_page(orig)))
+ mm->context.huge_pte_count[MM_PTES_HUGE]--;
+ else
+ mm->context.thp_pte_count--;
+ }
/* Do not try to allocate the TSB hash table if we
* don't have one already. We have various locks held
}
}
+/*
+ * This routine is only called when splitting a THP
+ */
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
set_pmd_at(vma->vm_mm, address, pmdp, entry);
flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+
+ /*
+ * set_pmd_at() will not be called in a way to decrement the
+ * context.thp_pte_count when splitting a THP, so do it now.
+ * Sanity check pmd before doing the actual decrement.
+ */
+ if ((pmd_val(entry) & _PAGE_PMD_HUGE) &&
+ !is_huge_zero_page(pmd_page(entry)))
+ (vma->vm_mm)->context.thp_pte_count--;
+
}
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
static void capture_and_clear_huge_pte_counts(mm_context_t *mm_context,
+ unsigned long *thp_pte_count,
unsigned long *capture_array)
{
unsigned int hugepage_idx;
+ *thp_pte_count = mm_context->thp_pte_count;
+ mm_context->thp_pte_count = 0UL;
+
for (hugepage_idx = 0UL; hugepage_idx != MM_NUM_HUGEPAGE_SIZES;
hugepage_idx++) {
capture_array[hugepage_idx] =
static void
captured_hugepage_pte_count_grow_tsb(struct mm_struct *mm,
+ unsigned long *thp_pte_count,
unsigned long *capture_huge_pte_count)
{
if (unlikely(capture_huge_pte_count[MM_PTES_HUGE]))
+ if (unlikely(capture_huge_pte_count[MM_PTES_HUGE]) || *thp_pte_count)
tsb_grow(mm, MM_TSB_HUGE,
- capture_huge_pte_count[MM_PTES_HUGE]);
+ (capture_huge_pte_count[MM_PTES_HUGE] +
+ *thp_pte_count) * REAL_HPAGE_PER_HPAGE);
if (unlikely(capture_huge_pte_count[MM_PTES_XLHUGE]))
tsb_grow(mm, MM_TSB_XLHUGE,
}
#else
static void capture_and_clear_huge_pte_counts(mm_context_t *mm_context,
+ unsigned long *thp_pte_count,
unsigned long *capture_array) {}
static void
captured_hugepage_pte_count_grow_tsb(struct mm_struct *mm,
+ unsigned long *thp_pte_count,
unsigned long *capture_huge_pte_count) {}
#endif /* CONFIG_HUGETLB_PAGE || CONFIG_TRANSPARENT_HUGEPAGE */
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
unsigned long capture_huge_pte_count[MM_NUM_HUGEPAGE_SIZES];
+ unsigned long saved_thp_pte_count;
unsigned int i;
spin_lock_init(&mm->context.lock);
* will re-increment the counters as the parent PTEs are
* copied into the child address space.
*/
- capture_and_clear_huge_pte_counts(&mm->context, capture_huge_pte_count);
+ capture_and_clear_huge_pte_counts(&mm->context, &saved_thp_pte_count,
+ capture_huge_pte_count);
/* copy_mm() copies over the parent's mm_struct before calling
* us, so we need to zero out the TSB pointer or else tsb_grow()
/* If this is fork, inherit the parent's TSB size. We would
* grow it to that size on the first page fault anyways.
*/
- tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
+ tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm) -
+ saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE));
- captured_hugepage_pte_count_grow_tsb(mm, capture_huge_pte_count);
+ captured_hugepage_pte_count_grow_tsb(mm, &saved_thp_pte_count,
+ capture_huge_pte_count);
if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
return -ENOMEM;