From: Vijay Kumar Date: Thu, 28 Apr 2016 17:27:43 +0000 (-0700) Subject: sparc64: xl-hugepages X-Git-Tag: v4.1.12-92~147^2~5 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=476daf0d2cb0527bf997df470bb8cb9c5fc1f642;p=users%2Fjedix%2Flinux-maple.git sparc64: xl-hugepages Note: Resending this patch. There is no change in this patch since v1. Jalap?no was verified repaired. Now to find performance issues. One performance issue is subordinate page table state (SPTS). The SPTS will be tricky because of protection changes for COW and other. For example, a 2Gb hugepage will have 1UL << (31-23) PMD entries. Do we want 256 IPI-s for a hugepage TTE(pte) change? Signed-off-by: Bob Picco (cherry picked from commit ece059b2e2581a2dcda3fb1ca35cd31258f6ed03) Conflicts: arch/sparc/include/asm/mmu_64.h arch/sparc/mm/fault_64.c Signed-off-by: Vijay Kumar Acked-by: Nitin Gupta Orabug: 22729791 Signed-off-by: Allen Pais --- diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index e4cab465b81f..4b8e2cea21d9 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -28,9 +28,11 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, static inline int prepare_hugepage_range(struct file *file, unsigned long addr, unsigned long len) { - if (len & ~HPAGE_MASK) + struct hstate *h = hstate_file(file); + + if (len & ~huge_page_mask(h)) return -EINVAL; - if (addr & ~HPAGE_MASK) + if (addr & ~huge_page_mask(h)) return -EINVAL; return 0; } diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h index fd52dc2d23d9..67b32556bc79 100644 --- a/arch/sparc/include/asm/mmu_64.h +++ b/arch/sparc/include/asm/mmu_64.h @@ -58,6 +58,11 @@ #define CTX_HWBITS(__ctx) ((__ctx.sparc64_ctx_val) & CTX_HW_MASK) #define CTX_NRBITS(__ctx) ((__ctx.sparc64_ctx_val) & CTX_NR_MASK) +/* This identifies the three possible tsbs and indices into tsb array. */ +#define MM_TSB_BASE 0 +#define MM_TSB_HUGE 1 +#define MM_TSB_XLHUGE 2 + #ifndef __ASSEMBLY__ #define TSB_ENTRY_ALIGNMENT 16 @@ -80,24 +85,69 @@ struct tsb_config { unsigned long tsb_map_pte; }; -#define MM_TSB_BASE 0 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) -#define MM_TSB_HUGE 1 -#define MM_NUM_TSBS 2 +/* This is for the tsbs.*/ +#define MM_NUM_TSBS 3 +/* This is the count of huge_pte_count array. */ +#define MM_NUM_HUGEPAGE_SIZES 2 +#define MM_PTES_HUGE 0 +#define MM_PTES_XLHUGE 1 + #else #define MM_NUM_TSBS 1 +#define MM_NUM_HUGEPAGE_SIZES 0 #endif typedef struct { spinlock_t lock; unsigned long sparc64_ctx_val; - unsigned long huge_pte_count; struct tsb_config tsb_block[MM_NUM_TSBS]; struct hv_tsb_descr tsb_descr[MM_NUM_TSBS]; void *vdso; + unsigned long huge_pte_count[MM_NUM_HUGEPAGE_SIZES]; } mm_context_t; +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + +static inline unsigned long xl_hugepage_pte_count(mm_context_t *mm_context) +{ + return mm_context->huge_pte_count[MM_PTES_XLHUGE]; +} + +static inline unsigned long hugepage_pte_count(mm_context_t *mm_context) +{ + return mm_context->huge_pte_count[MM_PTES_HUGE]; +} + +static inline unsigned int hugepage_size_to_pte_count_idx( + unsigned long hugepage_size) +{ + unsigned int pte_count_index = MM_PTES_HUGE; + + if (hugepage_size != HPAGE_SIZE) + pte_count_index = MM_PTES_XLHUGE; + + return pte_count_index; +} + +static inline unsigned int real_hugepage_size_to_pte_count_idx( + unsigned long real_hugepage_size) +{ + unsigned int pte_count_index = MM_PTES_HUGE; + + if (real_hugepage_size != REAL_HPAGE_SIZE) + pte_count_index = MM_PTES_XLHUGE; + + return pte_count_index; +} + +void __init hv_establish_xl_hugepage_tsb_descriptor(unsigned short pgsz_idx, + unsigned int pgsz_mask); + +#endif /* CONFIG_HUGETLB_PAGE || CONFIG_TRANSPARENT_HUGEPAGE */ + + #endif /* !__ASSEMBLY__ */ #define TSB_CONFIG_TSB 0x00 diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index b84be675e507..fc66ba82923c 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -30,6 +30,7 @@ void destroy_context(struct mm_struct *mm); void __tsb_context_switch(unsigned long pgd_pa, struct tsb_config *tsb_base, struct tsb_config *tsb_huge, + struct tsb_config *tsb_xl_huge, unsigned long tsb_descr_pa); static inline void tsb_context_switch(struct mm_struct *mm) @@ -37,11 +38,14 @@ static inline void tsb_context_switch(struct mm_struct *mm) __tsb_context_switch(__pa(mm->pgd), &mm->context.tsb_block[0], #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - (mm->context.tsb_block[1].tsb ? - &mm->context.tsb_block[1] : + (mm->context.tsb_block[MM_TSB_HUGE].tsb ? + &mm->context.tsb_block[MM_TSB_HUGE] : + NULL), + (mm->context.tsb_block[MM_TSB_XLHUGE].tsb ? + &mm->context.tsb_block[MM_TSB_XLHUGE] : NULL) #else - NULL + NULL, NULL #endif , __pa(&mm->context.tsb_descr[0])); } diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index e2b779644714..d254e2e92016 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -25,13 +25,15 @@ #define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#define HUGE_MAX_HSTATE 2 #endif #ifndef __ASSEMBLY__ #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) struct pt_regs; -void hugetlb_setup(struct pt_regs *regs); +void hugetlb_setup(struct pt_regs *regs, unsigned int tsb_index); +extern unsigned int xl_hugepage_shift; #endif #define WANT_PAGE_VIRTUAL diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 2a52c91d2c8a..3d7d3ac612ec 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -375,6 +375,9 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot) #define pgprot_noncached pgprot_noncached #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable); +#define arch_make_huge_pte arch_make_huge_pte static inline pte_t pte_mkhuge(pte_t pte) { unsigned long mask; diff --git a/arch/sparc/include/asm/scratchpad.h b/arch/sparc/include/asm/scratchpad.h index 5e8b01fb3343..2df727e4c102 100644 --- a/arch/sparc/include/asm/scratchpad.h +++ b/arch/sparc/include/asm/scratchpad.h @@ -8,7 +8,7 @@ #define SCRATCHPAD_UTSBREG1 0x10 #define SCRATCHPAD_UTSBREG2 0x18 /* 0x20 and 0x28, hypervisor only... */ -#define SCRATCHPAD_UNUSED1 0x30 +#define SCRATCHPAD_XLHUGPAGES 0x30 /* Reserved for XL hugepages TSB */ #define SCRATCHPAD_UNUSED2 0x38 /* Reserved for OBP */ #endif /* !(_SPARC64_SCRATCHPAD_H) */ diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 664ba687deb5..54697a18b33a 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -884,7 +884,6 @@ sparc64_boot_end: #include "misctrap.S" #include "syscalls.S" #include "helpers.S" -#include "hvcalls.S" #include "sun4v_tlb_miss.S" #include "sun4v_ivec.S" #include "ktlb.S" @@ -931,6 +930,7 @@ swapper_4m_tsb: ! 0x0000000000428000 #include "systbls_64.S" +#include "hvcalls.S" .data .align 8 diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S index 6179e19bc9b9..1d9607832a59 100644 --- a/arch/sparc/kernel/sun4v_tlb_miss.S +++ b/arch/sparc/kernel/sun4v_tlb_miss.S @@ -32,17 +32,33 @@ * tsb_index = ((vaddr >> HASH_SHIFT) & tsb_mask); * tsb_ptr = tsb_base + (tsb_index * 16); */ -#define COMPUTE_TSB_PTR(TSB_PTR, VADDR, HASH_SHIFT, TMP1, TMP2) \ +#define COMPUTE_TSB_PTR(TSB_PTR, VADDR, \ + PATCH_PROLOGUE, PATCH_EPILOGUE, \ + HASH_SHIFT, TMP1, TMP2) \ and TSB_PTR, 0x7, TMP1; \ mov 512, TMP2; \ andn TSB_PTR, 0x7, TSB_PTR; \ sllx TMP2, TMP1, TMP2; \ + PATCH_PROLOGUE; \ srlx VADDR, HASH_SHIFT, TMP1; \ + PATCH_EPILOGUE; \ sub TMP2, 1, TMP2; \ and TMP1, TMP2, TMP1; \ sllx TMP1, 4, TMP1; \ add TSB_PTR, TMP1, TSB_PTR; + /* This is for xl_hugepages.*/ +#define PATCH_HASH_SHIFT_PROLOGUE \ + 661:; +#define PATCH_HASH_SHIFT_EPILOGUE \ + .section .sun4v_xl_hugepage_hash_patch, "ax"; \ + .word 661b; \ + .previous; + + /* This is the normal tsb miss case.*/ +#define PATCH_HASH_SHIFT_NOPROLOGUE +#define PATCH_HASH_SHIFT_NOEPILOGUE + sun4v_itlb_miss: /* Load MMU Miss base into %g2. */ ldxa [%g0] ASI_SCRATCHPAD, %g2 @@ -53,7 +69,8 @@ sun4v_itlb_miss: LOAD_ITLB_INFO(%g2, %g4, %g5) COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_itlb_4v) - COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g3, %g7) + COMPUTE_TSB_PTR(%g1, %g4, PATCH_HASH_SHIFT_NOPROLOGUE, \ + PATCH_HASH_SHIFT_NOEPILOGUE, PAGE_SHIFT, %g3, %g7) /* Load TSB tag/pte into %g2/%g3 and compare the tag. */ ldda [%g1] ASI_QUAD_LDD_PHYS_4V, %g2 @@ -99,7 +116,8 @@ sun4v_dtlb_miss: LOAD_DTLB_INFO(%g2, %g4, %g5) COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_dtlb_4v) - COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g3, %g7) + COMPUTE_TSB_PTR(%g1, %g4, PATCH_HASH_SHIFT_NOPROLOGUE, \ + PATCH_HASH_SHIFT_NOEPILOGUE, PAGE_SHIFT, %g3, %g7) /* Load TSB tag/pte into %g2/%g3 and compare the tag. */ ldda [%g1] ASI_QUAD_LDD_PHYS_4V, %g2 @@ -172,7 +190,8 @@ sun4v_dtsb_miss: /* fallthrough */ sun4v_tsb_miss_common: - COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g5, %g7) + COMPUTE_TSB_PTR(%g1, %g4, PATCH_HASH_SHIFT_NOPROLOGUE, \ + PATCH_HASH_SHIFT_NOEPILOGUE, PAGE_SHIFT, %g5, %g7) sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2 @@ -182,7 +201,8 @@ sun4v_tsb_miss_common: cmp %g5, -1 be,pt %xcc, 80f nop - COMPUTE_TSB_PTR(%g5, %g4, REAL_HPAGE_SHIFT, %g2, %g7) + COMPUTE_TSB_PTR(%g5, %g4, PATCH_HASH_SHIFT_NOPROLOGUE, \ + PATCH_HASH_SHIFT_NOEPILOGUE, REAL_HPAGE_SHIFT, %g2, %g7) /* That clobbered %g2, reload it. */ ldxa [%g0] ASI_SCRATCHPAD, %g2 @@ -194,6 +214,45 @@ sun4v_tsb_miss_common: ba,pt %xcc, tsb_miss_page_table_walk_sun4v_fastpath ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7 +#ifdef CONFIG_HUGETLB_PAGE + /* + * %g3 -- FAULT_CODE_{D,I}TLB + * %g4 -- virtual address + * %g5 -- pte + * %g6 -- tag + */ + .global sun4v_xl_hugepages +sun4v_xl_hugepages: + andcc %g5, _PAGE_E_4V, %g0 + be,pt %xcc, 10f + sethi %uhi(_PAGE_VALID), %g1; + sllx %g1, 32, %g1 + or %g1, _PAGE_SZALL_4V | _PAGE_E_4V, %g1 + andn %g5, %g1, %g1 + ldxa [%g1 + %g0] ASI_PHYS_USE_EC, %g5 + brgez,pn %g5, tsb_do_fault +10: mov SCRATCHPAD_XLHUGPAGES, %g1 + ldxa [%g1] ASI_SCRATCHPAD, %g1 + cmp %g1, -1 + beq,pn %xcc, 10f + COMPUTE_TSB_PTR(%g1, %g4, PATCH_HASH_SHIFT_PROLOGUE, \ + PATCH_HASH_SHIFT_EPILOGUE, 0, %g2, %g7) + ba,pt %xcc, tsb_reload + nop +10: SET_GL(1) + rdpr %tl, %g7 + cmp %g7, 1 + bne,pn %xcc, winfix_trampoline + mov %g3, %g4 + ba,pt %xcc, etrap + rd %pc, %g7 + or %g0, MM_TSB_XLHUGE, %o1 + call hugetlb_setup + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop +#endif /* CONFIG_HUGETLB_PAGE */ + sun4v_itlb_error: rdpr %tl, %g1 cmp %g1, 1 diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index be98685c14c6..e8474f8c2c47 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -123,14 +123,35 @@ tsb_miss_page_table_walk_sun4v_fastpath: and %g5, %g7, %g2 + /* This deserves a comment. Should xl_hugepages be selected for + * sun4v the patch order must be for correctness/safety reasons: + * sun4v_xl_hugepage_hash_patch + * sun4v_xl_hugepage_pte_size_patch + * sun4v_xl_hugepage_pte_branch_patch + * . Doing otherwise could result in hangs and thus boot failures. + */ 661: sethi %uhi(_PAGE_SZHUGE_4U), %g7 - sllx %g7, 32, %g7 +662: sllx %g7, 32, %g7 .section .sun4v_2insn_patch, "ax" .word 661b mov _PAGE_SZHUGE_4V, %g7 nop .previous + .section .sun4v_xl_hugepage_pte_size_patch, "ax" + .word 662b + subcc %g2, 0, %g0 + .previous + /* Should it be patched for xl_hugepages, then we need to fix up + * the disp19 target to sun4v_xl_hugepages. + */ +661: + nop + .section .sun4v_xl_hugepage_pte_branch_patch, "ax" + .word 661b + beq,pn %xcc, 662f +662: + .previous cmp %g2, %g7 bne,pt %xcc, 60f nop @@ -168,13 +189,14 @@ tsb_miss_page_table_walk_sun4v_fastpath: mov %g3, %g4 ba,pt %xcc, etrap rd %pc, %g7 + or %g0, MM_TSB_HUGE, %o1 call hugetlb_setup add %sp, PTREGS_OFF, %o0 ba,pt %xcc, rtrap nop 60: -#endif +#endif /* CONFIG_HUGETLB_PAGE || CONFIG_TRANSPARENT_HUGEPAGE */ /* At this point we have: * %g1 -- TSB entry address @@ -366,7 +388,8 @@ tsb_flush: * %o0: page table physical address * %o1: TSB base config pointer * %o2: TSB huge config pointer, or NULL if none - * %o3: Hypervisor TSB descriptor physical address + * %o3 TSB XL huge config pointer or NULL if none + * %o4: Hypervisor TSB descriptor physical address * * We have to run this whole thing with interrupts * disabled so that the current cpu doesn't change @@ -384,6 +407,7 @@ __tsb_context_switch: stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR] ldx [%o1 + TSB_CONFIG_REG_VAL], %o0 + /* Check hugepage tsb */ brz,pt %o2, 1f mov -1, %g3 @@ -403,16 +427,34 @@ __tsb_context_switch: mov SCRATCHPAD_UTSBREG2, %o5 stxa %g3, [%o5] ASI_SCRATCHPAD - mov 2, %o0 + /* Start counting HV tsb descriptors. */ + mov 1, %o0 cmp %g3, -1 - move %xcc, 1, %o0 + beq %xcc, 2f + nop + add %o0, 1, %o0 +2: + + /* check xl_hugepage tsb */ + brz,pt %o3, 3f + mov -1, %g3 + ldx [%o3 + TSB_CONFIG_REG_VAL], %g3 +3: + mov SCRATCHPAD_XLHUGPAGES, %o5 + stxa %g3, [%o5] ASI_SCRATCHPAD + + cmp %g3, -1 + beq %xcc, 4f + nop + add %o0, 1, %o0 +4: mov HV_FAST_MMU_TSB_CTXNON0, %o5 - mov %o3, %o1 + mov %o4, %o1 ta HV_FAST_TRAP /* Finish up. */ - ba,pt %xcc, 9f + ba,pt %xcc, 60f nop /* SUN4U TSB switch. */ @@ -422,8 +464,8 @@ __tsb_context_switch: stxa %o0, [%o5] ASI_IMMU membar #Sync -2: ldx [%o1 + TSB_CONFIG_MAP_VADDR], %o4 - brz %o4, 9f + ldx [%o1 + TSB_CONFIG_MAP_VADDR], %o4 + brz %o4, 60f ldx [%o1 + TSB_CONFIG_MAP_PTE], %o5 sethi %hi(sparc64_highest_unlocked_tlb_ent), %g2 @@ -435,7 +477,7 @@ __tsb_context_switch: stxa %o5, [%g2] ASI_DTLB_DATA_ACCESS membar #Sync - brz,pt %o2, 9f + brz,pt %o2, 60f nop ldx [%o2 + TSB_CONFIG_MAP_VADDR], %o4 @@ -447,7 +489,7 @@ __tsb_context_switch: stxa %o5, [%g2] ASI_DTLB_DATA_ACCESS membar #Sync -9: +60: wrpr %g1, %pstate retl diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index f1a2f688b28a..5508ec7de9a0 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -123,6 +123,21 @@ SECTIONS *(.swapper_4m_tsb_phys_patch) __swapper_4m_tsb_phys_patch_end = .; } + .sun4v_xl_hugepage_pte_size_patch : { + __sun4v_xl_hugepage_pte_size_patch = .; + *(.sun4v_xl_hugepage_pte_size_patch) + __sun4v_xl_hugepage_pte_size_patch_end = .; + } + .sun4v_xl_hugepage_hash_patch : { + __sun4v_xl_hugepage_hash_patch = .; + *(.sun4v_xl_hugepage_hash_patch) + __sun4v_xl_hugepage_hash_patch_end = .; + } + .sun4v_xl_hugepage_pte_branch_patch : { + __sun4v_xl_hugepage_pte_branch_patch = .; + *(.sun4v_xl_hugepage_pte_branch_patch) + __sun4v_xl_hugepage_pte_branch_patch_end = .; + } .popc_3insn_patch : { __popc_3insn_patch = .; *(.popc_3insn_patch) diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 2302e639c04a..63592cffbc59 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include @@ -279,10 +281,63 @@ static void noinline __kprobes bogus_32bit_fault_tpc(struct pt_regs *regs) show_regs(regs); } +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +/* Put this here until there are more consumers.*/ +static unsigned long hugepage_pte_counts_to_pages(mm_context_t *mm_context) +{ + unsigned long hugepages_to_pages = 0UL; + + if (xl_hugepage_shift) + hugepages_to_pages = xl_hugepage_pte_count(mm_context) << + (xl_hugepage_shift - PAGE_SHIFT); + hugepages_to_pages = hugepages_to_pages + + (hugepage_pte_count(mm_context) << (HPAGE_SHIFT - PAGE_SHIFT)); + + return hugepages_to_pages; +} + +static void sparc64_hugetlb_tsb_fault(struct pt_regs *regs, + struct mm_struct *mm, + unsigned int hugepage_shift) +{ + unsigned int hugepage_pte_idx, hugepage_idx; + unsigned long mm_rss; + + if (hugepage_shift == xl_hugepage_shift) + hugepage_idx = MM_TSB_XLHUGE; + else + hugepage_idx = MM_TSB_HUGE; + + hugepage_pte_idx = + hugepage_size_to_pte_count_idx(1UL << hugepage_shift); + + mm_rss = mm->context.huge_pte_count[hugepage_pte_idx]; + if (unlikely(mm_rss > + mm->context.tsb_block[hugepage_idx].tsb_rss_limit)) { + if (mm->context.tsb_block[hugepage_idx].tsb) + tsb_grow(mm, hugepage_idx, mm_rss); + else + hugetlb_setup(regs, hugepage_idx); + + } +} +#else +static unsigned long hugepage_pte_counts_to_pages(mm_context_t *mm_context) +{ + return 0UL; +} +static void sparc64_hugetlb_tsb_fault(struct pt_regs *regs, + struct mm_struct *mm, + unsigned int hugepage_shift) +{ +} +#endif /* CONFIG_HUGETLB_PAGE || CONFIG_TRANSPARENT_HUGEPAGE */ + asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); struct mm_struct *mm = current->mm; + unsigned int hugepage_shift; struct vm_area_struct *vma; unsigned int insn = 0; int si_code, fault_code, fault; @@ -476,26 +531,18 @@ good_area: goto retry; } } + if (is_vm_hugetlb_page(vma)) + hugepage_shift = huge_page_shift(hstate_vma(vma)); + else + hugepage_shift = HPAGE_SHIFT; up_read(&mm->mmap_sem); mm_rss = get_mm_rss(mm); -#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE)); -#endif + mm_rss = mm_rss - hugepage_pte_counts_to_pages(&mm->context); if (unlikely(mm_rss > mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit)) tsb_grow(mm, MM_TSB_BASE, mm_rss); -#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - mm_rss = mm->context.huge_pte_count; - if (unlikely(mm_rss > - mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { - if (mm->context.tsb_block[MM_TSB_HUGE].tsb) - tsb_grow(mm, MM_TSB_HUGE, mm_rss); - else - hugetlb_setup(regs); - - } -#endif + sparc64_hugetlb_tsb_fault(regs, mm, hugepage_shift); exit_exception: exception_exit(prev_state); return; diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 4242eab12e10..9a1b2a5d71a0 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -9,7 +9,6 @@ #include #include #include - #include #include #include @@ -20,13 +19,13 @@ /* Slightly simplified from the non-hugepage variant because by * definition we don't have to worry about any page coloring stuff */ - -static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, +static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { + struct hstate *h = hstate_file(file); unsigned long task_size = TASK_SIZE; struct vm_unmapped_area_info info; @@ -37,7 +36,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, info.length = len; info.low_limit = TASK_UNMAPPED_BASE; info.high_limit = min(task_size, VA_EXCLUDE_START); - info.align_mask = PAGE_MASK & ~HPAGE_MASK; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); info.align_offset = 0; addr = vm_unmapped_area(&info); @@ -52,11 +51,13 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, } static unsigned long -hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, +hugetlb_get_unmapped_area_topdown(struct file *file, + const unsigned long addr0, const unsigned long len, const unsigned long pgoff, const unsigned long flags) { + struct hstate *h = hstate_file(file); struct mm_struct *mm = current->mm; unsigned long addr = addr0; struct vm_unmapped_area_info info; @@ -68,7 +69,7 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, info.length = len; info.low_limit = PAGE_SIZE; info.high_limit = mm->mmap_base; - info.align_mask = PAGE_MASK & ~HPAGE_MASK; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); info.align_offset = 0; addr = vm_unmapped_area(&info); @@ -89,18 +90,19 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, return addr; } -unsigned long -hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) +unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) { + struct hstate *h = hstate_file(file); + unsigned long task_size = TASK_SIZE; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long task_size = TASK_SIZE; if (test_thread_flag(TIF_32BIT)) task_size = STACK_TOP32; - if (len & ~HPAGE_MASK) + if (len & ~huge_page_mask(h)) return -EINVAL; if (len > task_size) return -ENOMEM; @@ -112,7 +114,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, } if (addr) { - addr = ALIGN(addr, HPAGE_SIZE); + addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (task_size - len >= addr && (!vma || addr + len <= vma->vm_start)) @@ -126,50 +128,62 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, pgoff, flags); } -pte_t *huge_pte_alloc(struct mm_struct *mm, - unsigned long addr, unsigned long sz) +/* Since the hugepage could cover more than one pmd entry and more + * than one pgd entry we must cover all possible conditions. + */ +static pmd_t *huge_pmd_alloc(struct mm_struct *mm, unsigned long addr) { - pgd_t *pgd; + pgd_t *pgd = pgd_offset(mm, addr); + pmd_t *pmd = NULL; pud_t *pud; - pmd_t *pmd; - pte_t *pte = NULL; - /* We must align the address, because our caller will run - * set_huge_pte_at() on whatever we return, which writes out - * all of the sub-ptes for the hugepage range. So we have - * to give it the first such sub-pte. - */ - addr &= HPAGE_MASK; + if (pgd_none(*pgd)) { + pud_t *pud = pud_alloc(mm, pgd, addr); - pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); - if (pud) { + if (pud == NULL) + goto out; + } + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) { pmd = pmd_alloc(mm, pud, addr); - if (pmd) - pte = pte_alloc_map(mm, NULL, pmd, addr); + + if (pmd == NULL) + goto out; } - return pte; + pmd = pmd_offset(pud, addr); +out: + return pmd; } -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +/* Note, should we fail leave behind the mm state + * which will be cleaned up on exit. + */ +pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, + unsigned long size) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte = NULL; + unsigned long start = addr & ~(size - 1); + unsigned long end = start + size; + pte_t *rpte = NULL; - addr &= HPAGE_MASK; + /* Our caller operates on start's pte which is rpte should we succeed.*/ + for (addr = start; addr < end; addr = addr + PMD_SIZE) { + pmd_t *pmd = huge_pmd_alloc(mm, addr); + pte_t *pte; - pgd = pgd_offset(mm, addr); - if (!pgd_none(*pgd)) { - pud = pud_offset(pgd, addr); - if (!pud_none(*pud)) { - pmd = pmd_offset(pud, addr); - if (!pmd_none(*pmd)) - pte = pte_offset_map(pmd, addr); - } + if (!pmd) + goto fail; + + pte = pte_alloc_map(mm, NULL, pmd, addr); + + if (!pte) + goto fail; + else if (!rpte) + rpte = pte; } - return pte; + + return rpte; +fail: + return NULL; } int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) @@ -177,42 +191,320 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) return 0; } -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t entry) +/* This function possibly needs to be moved. It will be different + * for sun4u and even possibly for sun4v future cores. Though we have + * no plans to support sun4u at this point. + */ +static unsigned int sun4v_tte_to_shift(pte_t entry) +{ + unsigned long hugepage_tte = pte_val(entry) & _PAGE_SZALL_4V; + unsigned int hugepage_shift; + + switch (hugepage_tte) { + case _PAGE_SZ16GB_4V: + hugepage_shift = 34U; + break; + case _PAGE_SZ2GB_4V: + hugepage_shift = 31U; + break; + case _PAGE_SZ256MB_4V: + hugepage_shift = 28U; + break; + case _PAGE_SZ4MB_4V: + hugepage_shift = 22U; + break; + default: + WARN_ONCE(1, "hugepage_shift: hugepage_tte=0x%lx\n", + hugepage_tte); + hugepage_shift = PAGE_SHIFT; + break; + } + return hugepage_shift; +} + +static unsigned int tte_to_shift(pte_t entry) +{ + unsigned int hugepage_shift; + + if (tlb_type == hypervisor) + hugepage_shift = sun4v_tte_to_shift(entry); + else + hugepage_shift = REAL_HPAGE_SHIFT; + + return hugepage_shift; +} + +static unsigned long tte_to_hugepage_size(pte_t pte) +{ + unsigned long hugepage_size = 1UL << tte_to_shift(pte); + + if (hugepage_size == REAL_HPAGE_SIZE) + hugepage_size = HPAGE_SIZE; + return hugepage_size; +} + +static unsigned long tte_to_hugepage_mask(pte_t pte) { - int i; + unsigned int hugepage_shift = tte_to_shift(pte); + unsigned long hugepage_mask; - if (!pte_present(*ptep) && pte_present(entry)) - mm->context.huge_pte_count++; + if (hugepage_shift == REAL_HPAGE_SHIFT) + hugepage_shift = HPAGE_SHIFT; - addr &= HPAGE_MASK; - for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { - set_pte_at(mm, addr, ptep, entry); - ptep++; - addr += PAGE_SIZE; - pte_val(entry) += PAGE_SIZE; + hugepage_mask = ~((1UL << hugepage_shift) - 1); + + return hugepage_mask; +} + +/* This should also be moved and a noop for sun4u. + * Only include xl hugepage sizes we plan to support. + */ +static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int hugepage_shift) +{ + unsigned long sun4v_hugepage_size = _PAGE_SZ4MB_4V; + + pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V; + + switch (hugepage_shift) { + /* 16Gb */ + case 34U: + sun4v_hugepage_size = _PAGE_SZ16GB_4V; + break; + /* 2Gb */ + case 31U: + sun4v_hugepage_size = _PAGE_SZ2GB_4V; + break; + default: + WARN_ONCE(hugepage_shift, + "hugepage_shift_to_tte: unsupported " + "hugepage_shift=%u.\n", hugepage_shift); } + + pte_val(entry) = pte_val(entry) | sun4v_hugepage_size; + return entry; +} + +pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writeable) +{ + unsigned int hugepage_shift = huge_page_shift(hstate_vma(vma)); + + if (hugepage_shift == HPAGE_SHIFT) + goto out; + entry = hugepage_shift_to_tte(entry, hugepage_shift); +out: + return entry; +} + +static void huge_pte_at_flush_update(struct mm_struct *mm, unsigned long addr, + pte_t *pte, pte_t orig, + pte_t *sentinel_pte) +{ + if (pte_val(orig) & _PAGE_VALID) { + if (!(pte_val(*sentinel_pte) & _PAGE_VALID)) { + *sentinel_pte = orig; + tlb_batch_add(mm, addr, pte, orig, false); + } + } +} + +static void form_sentinel(pte_t *sentinel_pte, pte_t entry, pte_t *pte) +{ + pte_t sentinel = __pte(_PAGE_VALID | _PAGE_E_4V | + (pte_val(entry) & _PAGE_SZALL_4V) | __pa(pte)); + + *sentinel_pte = sentinel; +} + +static bool huge_pte_at_handle_sentinel(pte_t *sentinel_pte, pte_t *pte, + pte_t orig, pte_t entry) +{ + bool rc = true; + + /* Should the original pte be marked valid then + * only update the sentinel. + */ + if (pte_val(orig) & _PAGE_VALID) { + if ((pte_val(orig) & _PAGE_E_4V) == 0UL) + *pte = entry; + rc = false; + } else if (pte_val(*sentinel_pte) & _PAGE_VALID) { + *pte = *sentinel_pte; + } else { + form_sentinel(sentinel_pte, entry, pte); + *pte = entry; + } + + return rc; +} + +static bool __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *pte, pte_t entry, pte_t *sentinel_pte) +{ + unsigned int hugepage_shift = tte_to_shift(entry); + bool rc = true; + + if (hugepage_shift != REAL_HPAGE_SHIFT) { + pte_t orig = *pte; + + rc = huge_pte_at_handle_sentinel(sentinel_pte, pte, orig, + entry); + huge_pte_at_flush_update(mm, addr, pte, orig, sentinel_pte); + } else + set_pte_at(mm, addr, pte, entry); + + return rc; +} + +static void __clear_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *pte, pte_t *sentinel_pte) +{ + unsigned int hugepage_shift = tte_to_shift(*pte); + + if (hugepage_shift != REAL_HPAGE_SHIFT) { + pte_t orig = *pte; + + *pte = __pte(0UL); + huge_pte_at_flush_update(mm, addr, pte, orig, sentinel_pte); + } else + pte_clear(mm, addr, pte); +} + +static bool set_huge_pte_range_at(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, pte_t *pentry, + pte_t *sentinel_pte, bool set_at) +{ + pte_t *pte = pte_offset_map(pmd, addr); + pte_t *lpte = pte + PTRS_PER_PTE; + pte_t entry = *pentry; + bool rc = true; + + for (; pte < lpte; pte++, addr = addr + PAGE_SIZE) { + if (set_at) { + rc = __set_huge_pte_at(mm, addr, pte, entry, + sentinel_pte); + if (!rc) + break; + pte_val(entry) = pte_val(entry) + PAGE_SIZE; + } else + __clear_huge_pte_at(mm, addr, pte, sentinel_pte); + } + if (set_at) + *pentry = entry; + return rc; +} + +static bool set_huge_pmd_at(struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end, + pte_t *pentry, pte_t *sentinel_pte, bool set_at) +{ + pmd_t *pmd = pmd_offset(pud, addr); + unsigned long next; + bool rc; + + do { + next = pmd_addr_end(addr, end); + rc = set_huge_pte_range_at(mm, pmd, addr, pentry, + sentinel_pte, set_at); + } while (pmd++, addr = next, ((addr != end) && rc)); + return rc; +} + +static bool set_huge_pud_at(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, + pte_t *pentry, pte_t *sentinel_pte, bool set_at) +{ + pud_t *pud = pud_offset(pgd, addr); + unsigned long next; + bool rc; + + do { + next = pud_addr_end(addr, end); + rc = set_huge_pmd_at(mm, pud, addr, next, pentry, + sentinel_pte, set_at); + } while (pud++, addr = next, ((addr != end) && rc)); + return rc; +} + +/* entry must be the first pte of the hugepage. Otherwise entry + * must be adjusted before we enter the loop for set_pte_at and + * aligned physically to match the hugepage_size. This is equally + * true of other locations where HUGETLB_PAGE_ORDER is used within + * this module for mainline as of 7/20/2014. + */ +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + pte_t sentinel_pte = __pte(0UL); + unsigned long hugepage_size = tte_to_hugepage_size(entry); + unsigned long hugepage_mask = tte_to_hugepage_mask(entry); + unsigned long start = addr & hugepage_mask; + unsigned long end = start + hugepage_size; + pgd_t *pgd = pgd_offset(mm, start); + unsigned long next; + bool rc; + + if (!pte_present(*ptep) && pte_present(entry)) { + unsigned int pte_count_idx = + real_hugepage_size_to_pte_count_idx(hugepage_size); + + mm->context.huge_pte_count[pte_count_idx]++; + } + + do { + next = pgd_addr_end(start, end); + rc = set_huge_pud_at(mm, pgd, start, next, &entry, + &sentinel_pte, true); + } while (pgd++, start = next, ((start != end) && rc)); } pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_t entry; - int i; + pte_t sentinel_pte = __pte(0UL); + pte_t entry = *ptep; + unsigned long hugepage_size = tte_to_hugepage_size(entry); + unsigned long hugepage_mask = tte_to_hugepage_mask(entry); + unsigned long start = addr & hugepage_mask; + unsigned long end = start + hugepage_size; + pgd_t *pgd = pgd_offset(mm, start); + unsigned long next; + bool rc; + + if (pte_present(entry)) { + unsigned int pte_count_idx = + real_hugepage_size_to_pte_count_idx(hugepage_size); + + mm->context.huge_pte_count[pte_count_idx]--; + } - entry = *ptep; - if (pte_present(entry)) - mm->context.huge_pte_count--; + do { + next = pgd_addr_end(start, end); + rc = set_huge_pud_at(mm, pgd, start, next, &entry, + &sentinel_pte, false); + } while (pgd++, start = next, ((start != end) && rc)); - addr &= HPAGE_MASK; + return entry; +} - for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { - pte_clear(mm, addr, ptep); - addr += PAGE_SIZE; - ptep++; +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pte_t *pte = NULL; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, addr); + if (!pgd_none(*pgd)) { + pud = pud_offset(pgd, addr); + if (!pud_none(*pud)) { + pmd = pmd_offset(pud, addr); + if (!pmd_none(*pmd)) + pte = pte_offset_map(pmd, addr); + } } - return entry; + return pte; } int pmd_huge(pmd_t pmd) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 54791edd53f5..d265dacf461b 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -336,6 +336,127 @@ static inline bool is_hugetlb_pte(pte_t pte) } #endif +#ifdef CONFIG_HUGETLB_PAGE +unsigned int xl_hugepage_shift; +static unsigned long xl_hugepage_pte; + +static bool is_xl_hugetlb_pte(pte_t pte) +{ + bool rc = false; + + if (!xl_hugepage_pte) + goto out; + else if ((pte_val(pte) & _PAGE_SZALL_4V) == xl_hugepage_pte) + rc = true; +out: + return rc; +} + +static void __init sun4v_xl_hugepage_hash_patch(void) +{ + extern unsigned int __sun4v_xl_hugepage_hash_patch; + unsigned *insn, *p; + + p = &__sun4v_xl_hugepage_hash_patch; + insn = (unsigned int *)(unsigned long)*p; + *insn = *insn | xl_hugepage_shift; + __asm__ __volatile__("flush %0\n\t" + : /* no outputs */ + : "r" (insn)); +} + +static void __init sun4v_xl_hugepage_pte_size_patch(void) +{ + extern unsigned int __sun4v_xl_hugepage_pte_size_patch; + unsigned *insn, *p; + + p = &__sun4v_xl_hugepage_pte_size_patch; + insn = (unsigned int *)(unsigned long)*p; + p++; + /* It is a simm13 in subcc instruction.*/ + *insn = *p | (unsigned int) xl_hugepage_pte; + __asm__ __volatile__("flush %0\n\t" + : /* no outputs */ + : "r" (insn)); +} + +static void __init sun4v_xl_hugepage_pte_branch_patch(void) +{ + extern unsigned int __sun4v_xl_hugepage_pte_branch_patch; + extern unsigned int sun4v_xl_hugepages; + unsigned int btarget = (unsigned int) + (unsigned long) &sun4v_xl_hugepages; + unsigned int *insn, *p, disp19; + + p = &__sun4v_xl_hugepage_pte_branch_patch; + insn = (unsigned int *)(unsigned long)*p; + /* Instruction which needs to be a bne,pt to sun4v_xl_hugepages.*/ + p++; + disp19 = (btarget - (unsigned int) (unsigned long) insn); + disp19 = disp19 >> 2; + disp19 = disp19 & (0x7ffff); + *insn = *p & ~0x7ffff; + *insn = *insn | disp19; + __asm__ __volatile__("flush %0\n\t" + : /* no outputs */ + : "r" (insn)); +} + +static int __init setup_hugepagesz(char *string) +{ + unsigned int hugepage_shift, hv_pgsz_mask; + unsigned long long xl_hugepage_size; + unsigned short hv_pgsz_idx; + + if (tlb_type != hypervisor) + goto out; + + xl_hugepage_size = memparse(string, &string); + /* Validate the xl_hugepage_size.*/ + if (!is_power_of_2(xl_hugepage_size)) + goto bad; + + /* Now determine whether the size is good for xl_hugepage_size. + * One the chip must support it and two, for now, it must be >= 2Gb. + */ + hugepage_shift = ilog2(xl_hugepage_size); + switch (hugepage_shift) { + case 34U: + hv_pgsz_mask = HV_PGSZ_MASK_16GB; + hv_pgsz_idx = HV_PGSZ_IDX_16GB; + xl_hugepage_pte = _PAGE_SZ16GB_4V; + break; + case 31U: + hv_pgsz_mask = HV_PGSZ_MASK_2GB; + hv_pgsz_idx = HV_PGSZ_IDX_2GB; + xl_hugepage_pte = _PAGE_SZ2GB_4V; + break; + default: + hv_pgsz_mask = 0U; + hv_pgsz_idx = HV_PGSZ_IDX_8K; + break; + } + + if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) + goto bad; + + xl_hugepage_shift = hugepage_shift; + xl_hugepage_size = 1UL << hugepage_shift; + sun4v_xl_hugepage_hash_patch(); + sun4v_xl_hugepage_pte_size_patch(); + sun4v_xl_hugepage_pte_branch_patch(); + hv_establish_xl_hugepage_tsb_descriptor(hv_pgsz_idx, hv_pgsz_mask); + hugetlb_add_hstate(HUGETLB_PAGE_ORDER); + hugetlb_add_hstate(xl_hugepage_shift - PAGE_SHIFT); + goto out; +bad: + pr_warn("Invalid xl_hugepage_size=0x%llx.\n", xl_hugepage_size); +out: + return 0; +} +__setup("hugepagesz=", setup_hugepagesz); +#endif /* CONFIG_HUGETLB_PAGE */ + void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { struct mm_struct *mm; @@ -358,9 +479,13 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * spin_lock_irqsave(&mm->context.lock, flags); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (mm->context.huge_pte_count && is_hugetlb_pte(pte)) + if (mm->context.huge_pte_count[MM_PTES_HUGE] && is_hugetlb_pte(pte)) __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, address, pte_val(pte)); + else if (mm->context.huge_pte_count[MM_PTES_XLHUGE] && + is_xl_hugetlb_pte(pte)) + __update_mmu_tsb_insert(mm, MM_TSB_XLHUGE, xl_hugepage_shift, + address, pte_val(pte)); else #endif __update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT, @@ -2892,7 +3017,7 @@ static void context_reload(void *__data) load_secondary_context(mm); } -void hugetlb_setup(struct pt_regs *regs) +void hugetlb_setup(struct pt_regs *regs, unsigned int tsb_index) { struct mm_struct *mm = current->mm; struct tsb_config *tp; @@ -2910,9 +3035,9 @@ void hugetlb_setup(struct pt_regs *regs) die_if_kernel("HugeTSB in atomic", regs); } - tp = &mm->context.tsb_block[MM_TSB_HUGE]; + tp = &mm->context.tsb_block[tsb_index]; if (likely(tp->tsb == NULL)) - tsb_grow(mm, MM_TSB_HUGE, 0); + tsb_grow(mm, tsb_index, 0); tsb_context_switch(mm); smp_tsb_sync(mm); diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 9df2190c097e..6e6633094557 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -165,9 +165,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { if (pmd_val(pmd) & _PAGE_PMD_HUGE) - mm->context.huge_pte_count++; + mm->context.huge_pte_count[MM_PTES_HUGE]++; else - mm->context.huge_pte_count--; + mm->context.huge_pte_count[MM_PTES_HUGE]--; /* Do not try to allocate the TSB hash table if we * don't have one already. We have various locks held diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index a06576683c38..0bbf88445b8d 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -91,6 +91,14 @@ void flush_tsb_user(struct tlb_batch *tb) __flush_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries); } #endif +#ifdef CONFIG_HUGETLB_PAGE + if (mm->context.tsb_block[MM_TSB_XLHUGE].tsb) { + base = (unsigned long) mm->context.tsb_block[MM_TSB_XLHUGE].tsb; + nentries = mm->context.tsb_block[MM_TSB_XLHUGE].tsb_nentries; + base = __pa(base); + __flush_tsb_one(tb, xl_hugepage_shift, base, nentries); + } +#endif /* CONFIG_HUGETLB_PAGE */ spin_unlock_irqrestore(&mm->context.lock, flags); } @@ -115,6 +123,14 @@ void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr) __flush_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, nentries); } #endif +#ifdef CONFIG_HUGETLB_PAGE + if (mm->context.tsb_block[MM_TSB_XLHUGE].tsb) { + base = (unsigned long) mm->context.tsb_block[MM_TSB_XLHUGE].tsb; + nentries = mm->context.tsb_block[MM_TSB_XLHUGE].tsb_nentries; + base = __pa(base); + __flush_tsb_one_entry(base, vaddr, xl_hugepage_shift, nentries); + } +#endif /* CONFIG_HUGETLB_PAGE */ spin_unlock_irqrestore(&mm->context.lock, flags); } @@ -124,9 +140,56 @@ void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr) #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB +static unsigned short hv_pgsz_idx_xlhuge; +static unsigned int hv_pgsz_mask_xlhuge; + +void __init hv_establish_xl_hugepage_tsb_descriptor(unsigned short pgsz_idx, + unsigned int pgsz_mask) +{ + hv_pgsz_idx_xlhuge = pgsz_idx; + hv_pgsz_mask_xlhuge = pgsz_mask; +} #endif -static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes) +static void sun4v_fill_tsb_descriptor(struct mm_struct *mm) +{ + struct hv_tsb_descr *htd = &mm->context.tsb_descr[0]; + unsigned int tsb_idx; + + for (tsb_idx = 0U; tsb_idx < MM_NUM_TSBS; tsb_idx++) { + /* Should there not be a tsb then skip it.*/ + if (!mm->context.tsb_block[tsb_idx].tsb) + continue; + + switch (tsb_idx) { + case MM_TSB_BASE: + htd->pgsz_mask = HV_PGSZ_MASK_BASE; + htd->pgsz_idx = HV_PGSZ_IDX_BASE; + break; +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + case MM_TSB_HUGE: + htd->pgsz_mask = HV_PGSZ_MASK_HUGE; + htd->pgsz_idx = HV_PGSZ_IDX_HUGE; + break; + case MM_TSB_XLHUGE: + htd->pgsz_mask = hv_pgsz_mask_xlhuge; + htd->pgsz_idx = hv_pgsz_idx_xlhuge; + break; +#endif + default: + BUG(); + } + htd->assoc = 1; + htd->num_ttes = mm->context.tsb_block[tsb_idx].tsb_nentries; + htd->ctx_idx = 0; + htd->tsb_base = __pa(mm->context.tsb_block[tsb_idx].tsb); + htd->resv = 0; + htd++; + } +} + +static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, + unsigned long tsb_bytes) { unsigned long tsb_reg, base, tsb_paddr; unsigned long page_sz, tte; @@ -140,6 +203,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign break; #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) case MM_TSB_HUGE: + case MM_TSB_XLHUGE: base = TSBMAP_4M_BASE; break; #endif @@ -222,40 +286,8 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign mm->context.tsb_block[tsb_idx].tsb_map_pte = tte; } - /* Setup the Hypervisor TSB descriptor. */ - if (tlb_type == hypervisor) { - struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx]; - - switch (tsb_idx) { - case MM_TSB_BASE: - hp->pgsz_idx = HV_PGSZ_IDX_BASE; - break; -#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - case MM_TSB_HUGE: - hp->pgsz_idx = HV_PGSZ_IDX_HUGE; - break; -#endif - default: - BUG(); - } - hp->assoc = 1; - hp->num_ttes = tsb_bytes / 16; - hp->ctx_idx = 0; - switch (tsb_idx) { - case MM_TSB_BASE: - hp->pgsz_mask = HV_PGSZ_MASK_BASE; - break; -#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - case MM_TSB_HUGE: - hp->pgsz_mask = HV_PGSZ_MASK_HUGE; - break; -#endif - default: - BUG(); - } - hp->tsb_base = tsb_paddr; - hp->resv = 0; - } + if (tlb_type == hypervisor) + sun4v_fill_tsb_descriptor(mm); } struct kmem_cache *pgtable_cache __read_mostly; @@ -465,25 +497,54 @@ retry_tsb_alloc: } } +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +static void capture_and_clear_huge_pte_counts(mm_context_t *mm_context, + unsigned long *capture_array) +{ + unsigned int hugepage_idx; + + for (hugepage_idx = 0UL; hugepage_idx != MM_NUM_HUGEPAGE_SIZES; + hugepage_idx++) { + capture_array[hugepage_idx] = + mm_context->huge_pte_count[hugepage_idx]; + mm_context->huge_pte_count[hugepage_idx] = 0UL; + } +} + +static void +captured_hugepage_pte_count_grow_tsb(struct mm_struct *mm, + unsigned long *capture_huge_pte_count) +{ + if (unlikely(capture_huge_pte_count[MM_PTES_HUGE])) + tsb_grow(mm, MM_TSB_HUGE, + capture_huge_pte_count[MM_PTES_HUGE]); + + if (unlikely(capture_huge_pte_count[MM_PTES_XLHUGE])) + tsb_grow(mm, MM_TSB_XLHUGE, + capture_huge_pte_count[MM_PTES_XLHUGE]); +} +#else +static void capture_and_clear_huge_pte_counts(mm_context_t *mm_context, + unsigned long *capture_array) {} +static void +captured_hugepage_pte_count_grow_tsb(struct mm_struct *mm, + unsigned long *capture_huge_pte_count) {} +#endif /* CONFIG_HUGETLB_PAGE || CONFIG_TRANSPARENT_HUGEPAGE */ + int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { -#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - unsigned long huge_pte_count; -#endif + unsigned long capture_huge_pte_count[MM_NUM_HUGEPAGE_SIZES]; unsigned int i; spin_lock_init(&mm->context.lock); mm->context.sparc64_ctx_val = 0UL; -#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) /* We reset it to zero because the fork() page copying * will re-increment the counters as the parent PTEs are * copied into the child address space. */ - huge_pte_count = mm->context.huge_pte_count; - mm->context.huge_pte_count = 0; -#endif + capture_and_clear_huge_pte_counts(&mm->context, capture_huge_pte_count); /* copy_mm() copies over the parent's mm_struct before calling * us, so we need to zero out the TSB pointer or else tsb_grow() @@ -497,10 +558,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) */ tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); -#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (unlikely(huge_pte_count)) - tsb_grow(mm, MM_TSB_HUGE, huge_pte_count); -#endif + captured_hugepage_pte_count_grow_tsb(mm, capture_huge_pte_count); if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) return -ENOMEM;