#define HPAGE_SHIFT 23
#define REAL_HPAGE_SHIFT 22
+#define HPAGE_16GB_SHIFT 34
#define HPAGE_2GB_SHIFT 31
#define HPAGE_256MB_SHIFT 28
#define HPAGE_64K_SHIFT 16
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
-#define HUGE_MAX_HSTATE 4
+#define HUGE_MAX_HSTATE 5
#endif
#ifndef __ASSEMBLY__
return !!(pmd_val(pmd) & _PAGE_PMD_HUGE);
}
+static inline bool is_hugetlb_pud(pud_t pud)
+{
+ return !!(pud_val(pud) & _PAGE_PUD_HUGE);
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline pmd_t pmd_mkhuge(pmd_t pmd)
{
return pte_write(pte);
}
+#define pud_write(pud) pte_write(__pte(pud_val(pud)))
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline unsigned long pmd_dirty(pmd_t pmd)
{
return ((unsigned long) __va(pfn << PAGE_SHIFT));
}
+static inline unsigned long pud_page_vaddr(pud_t pud)
+{
+ pte_t pte = __pte(pud_val(pud));
+ unsigned long pfn;
+
+ pfn = pte_pfn(pte);
+
+ return ((unsigned long) __va(pfn << PAGE_SHIFT));
+}
+
#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd))
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL)
-#define pud_page_vaddr(pud) \
- ((unsigned long) __va(pud_val(pud)))
+
#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud))
#define pud_present(pud) (pud_val(pud) != 0U)
#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL)
nop; \
699:
+ /* PUD has been loaded into REG1, interpret the value, seeing
+ * if it is a HUGE PUD or a normal one. If it is not valid
+ * then jump to FAIL_LABEL. If it is a HUGE PUD, and it
+ * translates to a valid PTE, branch to PTE_LABEL.
+ *
+ * We have to propagate bits [32:22] from the virtual address
+ * to resolve at 4M granularity.
+ */
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
+ brz,pn REG1, FAIL_LABEL; \
+ sethi %uhi(_PAGE_PUD_HUGE), REG2; \
+ sllx REG2, 32, REG2; \
+ andcc REG1, REG2, %g0; \
+ be,pt %xcc, 700f; \
+ sethi %hi(0x1ffc0000), REG2; \
+ sllx REG2, 1, REG2; \
+ brgez,pn REG1, FAIL_LABEL; \
+ andn REG1, REG2, REG1; \
+ and VADDR, REG2, REG2; \
+ brlz,pt REG1, PTE_LABEL; \
+ or REG1, REG2, REG1; \
+700:
+#else
+#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
+ brz,pn REG1, FAIL_LABEL; \
+ nop;
+#endif
+
/* PMD has been loaded into REG1, interpret the value, seeing
* if it is a HUGE PMD or a normal one. If it is not valid
* then jump to FAIL_LABEL. If it is a HUGE PMD, and it
srlx REG2, 64 - PAGE_SHIFT, REG2; \
andn REG2, 0x7, REG2; \
ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+ USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \
/* Valid PTE is now in %g5. */
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
- sethi %uhi(_PAGE_PMD_HUGE), %g7
+ sethi %uhi(_PAGE_PMD_HUGE | _PAGE_PUD_HUGE), %g7
sllx %g7, 32, %g7
andcc %g5, %g7, %g0
refs = 0;
head = pmd_page(pmd);
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ if (PageTail(head))
+ head = compound_head(head);
tail = page;
do {
VM_BUG_ON(compound_head(page) != head);
return 1;
}
+static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
+ unsigned long end, int write, struct page **pages,
+ int *nr)
+{
+ struct page *head, *page, *tail;
+ int refs;
+
+ if (!(pud_val(pud) & _PAGE_VALID))
+ return 0;
+
+ if (write && !pud_write(pud))
+ return 0;
+
+ refs = 0;
+ head = pud_page(pud);
+ page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+ if (PageTail(head))
+ head = compound_head(head);
+ tail = page;
+ do {
+ VM_BUG_ON(compound_head(page) != head);
+ pages[*nr] = page;
+ (*nr)++;
+ page++;
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+
+ if (!page_cache_add_speculative(head, refs)) {
+ *nr -= refs;
+ return 0;
+ }
+
+ if (unlikely(pud_val(pud) != pud_val(*pudp))) {
+ *nr -= refs;
+ while (refs--)
+ put_page(head);
+ return 0;
+ }
+
+ /* Any tail page need their mapcount reference taken before we
+ * return.
+ */
+ while (refs--) {
+ if (PageTail(tail))
+ get_huge_page_tail(tail);
+ tail++;
+ }
+
+ return 1;
+}
+
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
next = pud_addr_end(addr, end);
if (pud_none(pud))
return 0;
- if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+ if (unlikely(pud_large(pud))) {
+ if (!gup_huge_pud(pudp, pud, addr, next,
+ write, pages, nr))
+ return 0;
+ } else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
return 0;
} while (pudp++, addr = next, addr != end);
pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
switch (shift) {
+ case HPAGE_16GB_SHIFT:
+ hugepage_size = _PAGE_SZ16GB_4V;
+ pte_val(entry) |= _PAGE_PUD_HUGE;
+ break;
case HPAGE_2GB_SHIFT:
hugepage_size = _PAGE_SZ2GB_4V;
pte_val(entry) |= _PAGE_PMD_HUGE;
unsigned int shift;
switch (tte_szbits) {
+ case _PAGE_SZ16GB_4V:
+ shift = HPAGE_16GB_SHIFT;
+ break;
case _PAGE_SZ2GB_4V:
shift = HPAGE_2GB_SHIFT;
break;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
- pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
p4d = p4d_alloc(mm, pgd, addr);
- if (p4d) {
- pud = pud_alloc(mm, p4d, addr);
- if (pud) {
- pmd = pmd_alloc(mm, pud, addr);
- if (!pmd)
- return NULL;
-
- if (sz >= PMD_SIZE)
- pte = (pte_t *)pmd;
- else
- pte = pte_alloc_map(mm, NULL, pmd, addr);
- }
- }
-
- return pte;
+ if (!p4d)
+ return NULL;
+ pud = pud_alloc(mm, p4d, addr);
+ if (!pud)
+ return NULL;
+ if (sz >= PUD_SIZE)
+ return (pte_t *)pud;
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return NULL;
+ if (sz >= PMD_SIZE)
+ return (pte_t *)pmd;
+ return pte_alloc_map(mm, NULL, pmd, addr);
}
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
- pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
- if (!pgd_none(*pgd)) {
- p4d = p4d_offset(pgd, addr);
- if (!p4d_none(*p4d)) {
- pud = pud_offset(p4d, addr);
- if (!pud_none(*pud)) {
- pmd = pmd_offset(pud, addr);
- if (!pmd_none(*pmd)) {
- if (is_hugetlb_pmd(*pmd))
- pte = (pte_t *)pmd;
- else
- pte = pte_offset_map(pmd, addr);
- }
- }
- }
- }
-
- return pte;
+ if (pgd_none(*pgd))
+ return NULL;
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d))
+ return NULL;
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud))
+ return NULL;
+ if (is_hugetlb_pud(*pud))
+ return (pte_t *)pud;
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ return NULL;
+ if (is_hugetlb_pmd(*pmd))
+ return (pte_t *)pmd;
+ return pte_offset_map(pmd, addr);
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t entry)
{
- unsigned int i, nptes, orig_shift, shift;
- unsigned long size;
+ unsigned int nptes, orig_shift, shift;
+ unsigned long i, size;
pte_t orig;
size = huge_tte_to_size(entry);
- shift = size >= HPAGE_SIZE ? PMD_SHIFT : PAGE_SHIFT;
+
+ shift = PAGE_SHIFT;
+ if (size >= PUD_SIZE)
+ shift = PUD_SHIFT;
+ else if (size >= PMD_SIZE)
+ shift = PMD_SHIFT;
+ else
+ shift = PAGE_SHIFT;
+
nptes = size >> shift;
if (!pte_present(*ptep) && pte_present(entry))
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
- unsigned int i, nptes, hugepage_shift;
+ unsigned int i, nptes, orig_shift, shift;
unsigned long size;
pte_t entry;
entry = *ptep;
size = huge_tte_to_size(entry);
- if (size >= HPAGE_SIZE)
- nptes = size >> PMD_SHIFT;
+
+ shift = PAGE_SHIFT;
+ if (size >= PUD_SIZE)
+ shift = PUD_SHIFT;
+ else if (size >= PMD_SIZE)
+ shift = PMD_SHIFT;
else
- nptes = size >> PAGE_SHIFT;
+ shift = PAGE_SHIFT;
- hugepage_shift = pte_none(entry) ? PAGE_SHIFT :
- huge_tte_to_shift(entry);
+ nptes = size >> shift;
+ orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry);
if (pte_present(entry))
mm->context.hugetlb_pte_count -= nptes;
for (i = 0; i < nptes; i++)
ptep[i] = __pte(0UL);
- maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift);
+ maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift);
/* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
if (size == HPAGE_SIZE)
maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
- hugepage_shift);
+ orig_shift);
return entry;
}
int pud_huge(pud_t pud)
{
- return 0;
+ return !pud_none(pud) &&
+ (pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID;
}
static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud))
continue;
- hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
- ceiling);
+ if (is_hugetlb_pud(*pud))
+ pud_clear(pud);
+ else
+ hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
+ ceiling);
} while (pud++, addr = next, addr != end);
start &= P4D_MASK;
hugepage_shift = ilog2(hugepage_size);
switch (hugepage_shift) {
+ case HPAGE_16GB_SHIFT:
+ hv_pgsz_mask = HV_PGSZ_MASK_16GB;
+ hv_pgsz_idx = HV_PGSZ_IDX_16GB;
+ break;
case HPAGE_2GB_SHIFT:
hv_pgsz_mask = HV_PGSZ_MASK_2GB;
hv_pgsz_idx = HV_PGSZ_IDX_2GB;
{
struct mm_struct *mm;
unsigned long flags;
+ bool is_huge_tsb;
pte_t pte = *ptep;
if (tlb_type != hypervisor) {
spin_lock_irqsave(&mm->context.lock, flags);
+ is_huge_tsb = false;
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
- if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) &&
- is_hugetlb_pmd(__pmd(pte_val(pte)))) {
- /* We are fabricating 8MB pages using 4MB real hw pages. */
- pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
- __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
- address, pte_val(pte));
- } else
+ if (mm->context.hugetlb_pte_count || mm->context.thp_pte_count) {
+ unsigned long hugepage_size = PAGE_SIZE;
+
+ if (is_vm_hugetlb_page(vma))
+ hugepage_size = huge_page_size(hstate_vma(vma));
+
+ if (hugepage_size >= PUD_SIZE) {
+ unsigned long mask = 0x1ffc00000UL;
+
+ /* Transfer bits [32:22] from address to resolve
+ * at 4M granularity.
+ */
+ pte_val(pte) &= ~mask;
+ pte_val(pte) |= (address & mask);
+ } else if (hugepage_size >= PMD_SIZE) {
+ /* We are fabricating 8MB pages using 4MB
+ * real hw pages.
+ */
+ pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
+ }
+
+ if (hugepage_size >= PMD_SIZE) {
+ __update_mmu_tsb_insert(mm, MM_TSB_HUGE,
+ REAL_HPAGE_SHIFT, address, pte_val(pte));
+ is_huge_tsb = true;
+ }
+ }
#endif
+ if (!is_huge_tsb)
__update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT,
address, pte_val(pte));