]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
sparc64: xl-hugepages
authorVijay Kumar <vijay.ac.kumar@oracle.com>
Thu, 28 Apr 2016 17:27:43 +0000 (10:27 -0700)
committerAllen Pais <allen.pais@oracle.com>
Tue, 24 May 2016 05:11:54 +0000 (10:41 +0530)
Note: Resending this patch. There is no change in this patch since v1.

Jalap?no was verified repaired.

Now to find performance issues.

One performance issue is subordinate page table state (SPTS). The SPTS will
be tricky because of protection changes for COW and other. For example,
a 2Gb hugepage will have 1UL << (31-23) PMD entries. Do we want 256 IPI-s
for a hugepage TTE(pte) change?

Signed-off-by: Bob Picco <bpicco@meloft.net>
(cherry picked from commit ece059b2e2581a2dcda3fb1ca35cd31258f6ed03)

Conflicts:
    arch/sparc/include/asm/mmu_64.h
    arch/sparc/mm/fault_64.c

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
Acked-by: Nitin Gupta <nitin.m.gupta@oracle.com>
Orabug: 22729791
Signed-off-by: Allen Pais <allen.pais@oracle.com>
15 files changed:
arch/sparc/include/asm/hugetlb.h
arch/sparc/include/asm/mmu_64.h
arch/sparc/include/asm/mmu_context_64.h
arch/sparc/include/asm/page_64.h
arch/sparc/include/asm/pgtable_64.h
arch/sparc/include/asm/scratchpad.h
arch/sparc/kernel/head_64.S
arch/sparc/kernel/sun4v_tlb_miss.S
arch/sparc/kernel/tsb.S
arch/sparc/kernel/vmlinux.lds.S
arch/sparc/mm/fault_64.c
arch/sparc/mm/hugetlbpage.c
arch/sparc/mm/init_64.c
arch/sparc/mm/tlb.c
arch/sparc/mm/tsb.c

index e4cab465b81f86eb4d7f2a3b03c5a62b67d28a8d..4b8e2cea21d91d24305c73b33127aa45cf10850c 100644 (file)
@@ -28,9 +28,11 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
 static inline int prepare_hugepage_range(struct file *file,
                        unsigned long addr, unsigned long len)
 {
-       if (len & ~HPAGE_MASK)
+       struct hstate *h = hstate_file(file);
+
+       if (len & ~huge_page_mask(h))
                return -EINVAL;
-       if (addr & ~HPAGE_MASK)
+       if (addr & ~huge_page_mask(h))
                return -EINVAL;
        return 0;
 }
index fd52dc2d23d90febe7613db57425afc95dd531da..67b32556bc790dbc4735be30de5b0a9c323fe3e5 100644 (file)
 #define CTX_HWBITS(__ctx)      ((__ctx.sparc64_ctx_val) & CTX_HW_MASK)
 #define CTX_NRBITS(__ctx)      ((__ctx.sparc64_ctx_val) & CTX_NR_MASK)
 
+/* This identifies the three possible tsbs and indices into tsb array. */
+#define MM_TSB_BASE    0
+#define MM_TSB_HUGE    1
+#define        MM_TSB_XLHUGE   2
+
 #ifndef __ASSEMBLY__
 
 #define TSB_ENTRY_ALIGNMENT    16
@@ -80,24 +85,69 @@ struct tsb_config {
        unsigned long           tsb_map_pte;
 };
 
-#define MM_TSB_BASE    0
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-#define MM_TSB_HUGE    1
-#define MM_NUM_TSBS    2
+/* This is for  the tsbs.*/
+#define MM_NUM_TSBS            3
+/* This is the count of huge_pte_count array. */
+#define MM_NUM_HUGEPAGE_SIZES  2
+#define MM_PTES_HUGE           0
+#define MM_PTES_XLHUGE         1
+
 #else
 #define MM_NUM_TSBS    1
+#define MM_NUM_HUGEPAGE_SIZES  0
 #endif
 
 typedef struct {
        spinlock_t              lock;
        unsigned long           sparc64_ctx_val;
-       unsigned long           huge_pte_count;
        struct tsb_config       tsb_block[MM_NUM_TSBS];
        struct hv_tsb_descr     tsb_descr[MM_NUM_TSBS];
        void                    *vdso;
+       unsigned long           huge_pte_count[MM_NUM_HUGEPAGE_SIZES];
 } mm_context_t;
 
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+
+static inline unsigned long xl_hugepage_pte_count(mm_context_t *mm_context)
+{
+       return mm_context->huge_pte_count[MM_PTES_XLHUGE];
+}
+
+static inline unsigned long hugepage_pte_count(mm_context_t *mm_context)
+{
+       return mm_context->huge_pte_count[MM_PTES_HUGE];
+}
+
+static inline unsigned int hugepage_size_to_pte_count_idx(
+                               unsigned long hugepage_size)
+{
+       unsigned int pte_count_index = MM_PTES_HUGE;
+
+       if (hugepage_size != HPAGE_SIZE)
+               pte_count_index = MM_PTES_XLHUGE;
+
+       return pte_count_index;
+}
+
+static inline unsigned int real_hugepage_size_to_pte_count_idx(
+                               unsigned long real_hugepage_size)
+{
+       unsigned int pte_count_index = MM_PTES_HUGE;
+
+       if (real_hugepage_size != REAL_HPAGE_SIZE)
+               pte_count_index = MM_PTES_XLHUGE;
+
+       return pte_count_index;
+}
+
+void __init hv_establish_xl_hugepage_tsb_descriptor(unsigned short pgsz_idx,
+                                               unsigned int pgsz_mask);
+
+#endif /* CONFIG_HUGETLB_PAGE || CONFIG_TRANSPARENT_HUGEPAGE */
+
+
 #endif /* !__ASSEMBLY__ */
 
 #define TSB_CONFIG_TSB         0x00
index b84be675e507857e27766b6339e438270aea0ebe..fc66ba82923caf48c5d8210b93609f36084f39b8 100644 (file)
@@ -30,6 +30,7 @@ void destroy_context(struct mm_struct *mm);
 void __tsb_context_switch(unsigned long pgd_pa,
                          struct tsb_config *tsb_base,
                          struct tsb_config *tsb_huge,
+                         struct tsb_config *tsb_xl_huge,
                          unsigned long tsb_descr_pa);
 
 static inline void tsb_context_switch(struct mm_struct *mm)
@@ -37,11 +38,14 @@ static inline void tsb_context_switch(struct mm_struct *mm)
        __tsb_context_switch(__pa(mm->pgd),
                             &mm->context.tsb_block[0],
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-                            (mm->context.tsb_block[1].tsb ?
-                             &mm->context.tsb_block[1] :
+                            (mm->context.tsb_block[MM_TSB_HUGE].tsb ?
+                             &mm->context.tsb_block[MM_TSB_HUGE] :
+                             NULL),
+                            (mm->context.tsb_block[MM_TSB_XLHUGE].tsb ?
+                             &mm->context.tsb_block[MM_TSB_XLHUGE] :
                              NULL)
 #else
-                            NULL
+                            NULL, NULL
 #endif
                             , __pa(&mm->context.tsb_descr[0]));
 }
index e2b779644714a22973fab8bcec294c8cad6b6700..d254e2e92016c3ffe6b34a61a95070ea6cc12e39 100644 (file)
 #define HPAGE_MASK             (~(HPAGE_SIZE - 1UL))
 #define HUGETLB_PAGE_ORDER     (HPAGE_SHIFT - PAGE_SHIFT)
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#define HUGE_MAX_HSTATE                2
 #endif
 
 #ifndef __ASSEMBLY__
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 struct pt_regs;
-void hugetlb_setup(struct pt_regs *regs);
+void hugetlb_setup(struct pt_regs *regs, unsigned int tsb_index);
+extern unsigned int xl_hugepage_shift;
 #endif
 
 #define WANT_PAGE_VIRTUAL
index 2a52c91d2c8acbf5f904e082400ba782d7279947..3d7d3ac612ec13e186f7589f14ad825ca0831df1 100644 (file)
@@ -375,6 +375,9 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot)
 #define pgprot_noncached pgprot_noncached
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+                               struct page *page, int writable);
+#define        arch_make_huge_pte arch_make_huge_pte
 static inline pte_t pte_mkhuge(pte_t pte)
 {
        unsigned long mask;
index 5e8b01fb334353422534b410707bc631effc0fdf..2df727e4c10272414370d3efe4d28a158906c1d1 100644 (file)
@@ -8,7 +8,7 @@
 #define SCRATCHPAD_UTSBREG1    0x10
 #define SCRATCHPAD_UTSBREG2    0x18
        /* 0x20 and 0x28, hypervisor only... */
-#define SCRATCHPAD_UNUSED1     0x30
+#define        SCRATCHPAD_XLHUGPAGES   0x30 /* Reserved for XL hugepages TSB       */
 #define SCRATCHPAD_UNUSED2     0x38 /* Reserved for OBP                    */
 
 #endif /* !(_SPARC64_SCRATCHPAD_H) */
index 664ba687deb5771492902476a046495b0b843f1f..54697a18b33a32189e4226786f1fb25990164387 100644 (file)
@@ -884,7 +884,6 @@ sparc64_boot_end:
 #include "misctrap.S"
 #include "syscalls.S"
 #include "helpers.S"
-#include "hvcalls.S"
 #include "sun4v_tlb_miss.S"
 #include "sun4v_ivec.S"
 #include "ktlb.S"
@@ -931,6 +930,7 @@ swapper_4m_tsb:
 ! 0x0000000000428000
 
 #include "systbls_64.S"
+#include "hvcalls.S"
 
        .data
        .align  8
index 6179e19bc9b98ea4542b59bb4953c1f9f2718330..1d9607832a5966b20732b9a92602d01bb7def4a0 100644 (file)
         * tsb_index = ((vaddr >> HASH_SHIFT) & tsb_mask);
         * tsb_ptr = tsb_base + (tsb_index * 16);
         */
-#define COMPUTE_TSB_PTR(TSB_PTR, VADDR, HASH_SHIFT, TMP1, TMP2) \
+#define COMPUTE_TSB_PTR(TSB_PTR, VADDR,                        \
+        PATCH_PROLOGUE, PATCH_EPILOGUE,                \
+       HASH_SHIFT, TMP1, TMP2)                         \
        and     TSB_PTR, 0x7, TMP1;                     \
        mov     512, TMP2;                              \
        andn    TSB_PTR, 0x7, TSB_PTR;                  \
        sllx    TMP2, TMP1, TMP2;                       \
+       PATCH_PROLOGUE;                                 \
        srlx    VADDR, HASH_SHIFT, TMP1;                \
+       PATCH_EPILOGUE;                                 \
        sub     TMP2, 1, TMP2;                          \
        and     TMP1, TMP2, TMP1;                       \
        sllx    TMP1, 4, TMP1;                          \
        add     TSB_PTR, TMP1, TSB_PTR;
 
+       /* This is for xl_hugepages.*/
+#define        PATCH_HASH_SHIFT_PROLOGUE                       \
+       661:;
+#define        PATCH_HASH_SHIFT_EPILOGUE                               \
+       .section        .sun4v_xl_hugepage_hash_patch, "ax";    \
+       .word           661b;                                   \
+       .previous;
+
+       /* This is the normal tsb miss case.*/
+#define PATCH_HASH_SHIFT_NOPROLOGUE
+#define PATCH_HASH_SHIFT_NOEPILOGUE
+
 sun4v_itlb_miss:
        /* Load MMU Miss base into %g2.  */
        ldxa    [%g0] ASI_SCRATCHPAD, %g2
@@ -53,7 +69,8 @@ sun4v_itlb_miss:
 
        LOAD_ITLB_INFO(%g2, %g4, %g5)
        COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_itlb_4v)
-       COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g3, %g7)
+       COMPUTE_TSB_PTR(%g1, %g4, PATCH_HASH_SHIFT_NOPROLOGUE,  \
+               PATCH_HASH_SHIFT_NOEPILOGUE, PAGE_SHIFT, %g3, %g7)
 
        /* Load TSB tag/pte into %g2/%g3 and compare the tag.  */
        ldda    [%g1] ASI_QUAD_LDD_PHYS_4V, %g2
@@ -99,7 +116,8 @@ sun4v_dtlb_miss:
 
        LOAD_DTLB_INFO(%g2, %g4, %g5)
        COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_dtlb_4v)
-       COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g3, %g7)
+       COMPUTE_TSB_PTR(%g1, %g4, PATCH_HASH_SHIFT_NOPROLOGUE,  \
+               PATCH_HASH_SHIFT_NOEPILOGUE, PAGE_SHIFT, %g3, %g7)
 
        /* Load TSB tag/pte into %g2/%g3 and compare the tag.  */
        ldda    [%g1] ASI_QUAD_LDD_PHYS_4V, %g2
@@ -172,7 +190,8 @@ sun4v_dtsb_miss:
        /* fallthrough */
 
 sun4v_tsb_miss_common:
-       COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g5, %g7)
+       COMPUTE_TSB_PTR(%g1, %g4, PATCH_HASH_SHIFT_NOPROLOGUE,  \
+               PATCH_HASH_SHIFT_NOEPILOGUE, PAGE_SHIFT, %g5, %g7)
 
        sub     %g2, TRAP_PER_CPU_FAULT_INFO, %g2
 
@@ -182,7 +201,8 @@ sun4v_tsb_miss_common:
        cmp     %g5, -1
        be,pt   %xcc, 80f
         nop
-       COMPUTE_TSB_PTR(%g5, %g4, REAL_HPAGE_SHIFT, %g2, %g7)
+       COMPUTE_TSB_PTR(%g5, %g4, PATCH_HASH_SHIFT_NOPROLOGUE,  \
+               PATCH_HASH_SHIFT_NOEPILOGUE, REAL_HPAGE_SHIFT, %g2, %g7)
 
        /* That clobbered %g2, reload it.  */
        ldxa    [%g0] ASI_SCRATCHPAD, %g2
@@ -194,6 +214,45 @@ sun4v_tsb_miss_common:
        ba,pt   %xcc, tsb_miss_page_table_walk_sun4v_fastpath
         ldx    [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7
 
+#ifdef CONFIG_HUGETLB_PAGE
+       /*
+        * %g3 -- FAULT_CODE_{D,I}TLB
+        * %g4 -- virtual address
+        * %g5 -- pte
+        * %g6 -- tag
+        */
+       .global sun4v_xl_hugepages
+sun4v_xl_hugepages:
+       andcc   %g5, _PAGE_E_4V, %g0
+       be,pt   %xcc, 10f
+       sethi   %uhi(_PAGE_VALID), %g1;
+       sllx    %g1, 32, %g1
+       or      %g1, _PAGE_SZALL_4V | _PAGE_E_4V, %g1
+       andn    %g5, %g1, %g1
+       ldxa    [%g1 + %g0] ASI_PHYS_USE_EC, %g5
+       brgez,pn %g5, tsb_do_fault
+10:    mov     SCRATCHPAD_XLHUGPAGES, %g1
+       ldxa    [%g1] ASI_SCRATCHPAD, %g1
+       cmp     %g1, -1
+       beq,pn  %xcc, 10f
+       COMPUTE_TSB_PTR(%g1, %g4, PATCH_HASH_SHIFT_PROLOGUE,    \
+               PATCH_HASH_SHIFT_EPILOGUE, 0, %g2, %g7)
+       ba,pt   %xcc, tsb_reload
+        nop
+10:    SET_GL(1)
+       rdpr    %tl, %g7
+       cmp     %g7, 1
+       bne,pn  %xcc, winfix_trampoline
+        mov    %g3, %g4
+       ba,pt   %xcc, etrap
+        rd     %pc, %g7
+       or      %g0, MM_TSB_XLHUGE, %o1
+       call    hugetlb_setup
+        add    %sp, PTREGS_OFF, %o0
+       ba,pt   %xcc, rtrap
+        nop
+#endif /* CONFIG_HUGETLB_PAGE */
+
 sun4v_itlb_error:
        rdpr    %tl, %g1
        cmp     %g1, 1
index be98685c14c62301250db79791269fb411ea9e0a..e8474f8c2c47e5986f43fb9837eaef859daf8013 100644 (file)
@@ -123,14 +123,35 @@ tsb_miss_page_table_walk_sun4v_fastpath:
 
        and             %g5, %g7, %g2
 
+       /* This deserves a comment. Should xl_hugepages be selected for
+        * sun4v the patch order must be for correctness/safety reasons:
+        *      sun4v_xl_hugepage_hash_patch
+        *      sun4v_xl_hugepage_pte_size_patch
+        *      sun4v_xl_hugepage_pte_branch_patch
+        * . Doing otherwise could result in hangs and thus boot failures.
+        */
 661:   sethi           %uhi(_PAGE_SZHUGE_4U), %g7
-       sllx            %g7, 32, %g7
+662:   sllx            %g7, 32, %g7
        .section        .sun4v_2insn_patch, "ax"
        .word           661b
        mov             _PAGE_SZHUGE_4V, %g7
        nop
        .previous
+       .section        .sun4v_xl_hugepage_pte_size_patch, "ax"
+       .word           662b
+       subcc           %g2, 0, %g0
+       .previous
 
+       /* Should it be patched for xl_hugepages, then we need to fix up
+        * the disp19 target to sun4v_xl_hugepages.
+        */
+661:
+        nop
+       .section        .sun4v_xl_hugepage_pte_branch_patch, "ax"
+       .word           661b
+       beq,pn          %xcc, 662f
+662:
+       .previous
        cmp             %g2, %g7
        bne,pt          %xcc, 60f
         nop
@@ -168,13 +189,14 @@ tsb_miss_page_table_walk_sun4v_fastpath:
         mov    %g3, %g4
        ba,pt   %xcc, etrap
         rd     %pc, %g7
+       or      %g0, MM_TSB_HUGE, %o1
        call    hugetlb_setup
         add    %sp, PTREGS_OFF, %o0
        ba,pt   %xcc, rtrap
         nop
 
 60:
-#endif
+#endif /* CONFIG_HUGETLB_PAGE || CONFIG_TRANSPARENT_HUGEPAGE */
 
        /* At this point we have:
         * %g1 --       TSB entry address
@@ -366,7 +388,8 @@ tsb_flush:
         * %o0: page table physical address
         * %o1: TSB base config pointer
         * %o2: TSB huge config pointer, or NULL if none
-        * %o3: Hypervisor TSB descriptor physical address
+        * %o3  TSB XL huge config pointer or NULL if none
+        * %o4: Hypervisor TSB descriptor physical address
         *
         * We have to run this whole thing with interrupts
         * disabled so that the current cpu doesn't change
@@ -384,6 +407,7 @@ __tsb_context_switch:
        stx     %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR]
 
        ldx     [%o1 + TSB_CONFIG_REG_VAL], %o0
+       /* Check hugepage tsb */
        brz,pt  %o2, 1f
         mov    -1, %g3
 
@@ -403,16 +427,34 @@ __tsb_context_switch:
        mov     SCRATCHPAD_UTSBREG2, %o5
        stxa    %g3, [%o5] ASI_SCRATCHPAD
 
-       mov     2, %o0
+       /* Start counting HV tsb descriptors. */
+       mov     1, %o0
        cmp     %g3, -1
-       move    %xcc, 1, %o0
+       beq     %xcc, 2f
+        nop
+       add     %o0, 1, %o0
+2:
+
+       /* check xl_hugepage tsb */
+       brz,pt  %o3, 3f
+        mov    -1, %g3
+       ldx     [%o3 + TSB_CONFIG_REG_VAL], %g3
+3:
+       mov     SCRATCHPAD_XLHUGPAGES, %o5
+       stxa    %g3, [%o5] ASI_SCRATCHPAD
+
+       cmp     %g3, -1
+       beq     %xcc, 4f
+        nop
+       add     %o0, 1, %o0
+4:
 
        mov     HV_FAST_MMU_TSB_CTXNON0, %o5
-       mov     %o3, %o1
+       mov     %o4, %o1
        ta      HV_FAST_TRAP
 
        /* Finish up.  */
-       ba,pt   %xcc, 9f
+       ba,pt   %xcc, 60f
         nop
 
        /* SUN4U TSB switch.  */
@@ -422,8 +464,8 @@ __tsb_context_switch:
        stxa    %o0, [%o5] ASI_IMMU
        membar  #Sync
 
-2:     ldx     [%o1 + TSB_CONFIG_MAP_VADDR], %o4
-       brz     %o4, 9f
+       ldx     [%o1 + TSB_CONFIG_MAP_VADDR], %o4
+       brz     %o4, 60f
         ldx    [%o1 + TSB_CONFIG_MAP_PTE], %o5
 
        sethi   %hi(sparc64_highest_unlocked_tlb_ent), %g2
@@ -435,7 +477,7 @@ __tsb_context_switch:
        stxa    %o5, [%g2] ASI_DTLB_DATA_ACCESS
        membar  #Sync
 
-       brz,pt  %o2, 9f
+       brz,pt  %o2, 60f
         nop
 
        ldx     [%o2 + TSB_CONFIG_MAP_VADDR], %o4
@@ -447,7 +489,7 @@ __tsb_context_switch:
        stxa    %o5, [%g2] ASI_DTLB_DATA_ACCESS
        membar  #Sync
 
-9:
+60:
        wrpr    %g1, %pstate
 
        retl
index f1a2f688b28a31fc47d2232f3ed10e9d95930223..5508ec7de9a060eb2fa7545b21dbe3350a544bc4 100644 (file)
@@ -123,6 +123,21 @@ SECTIONS
                *(.swapper_4m_tsb_phys_patch)
                __swapper_4m_tsb_phys_patch_end = .;
        }
+       .sun4v_xl_hugepage_pte_size_patch : {
+               __sun4v_xl_hugepage_pte_size_patch = .;
+               *(.sun4v_xl_hugepage_pte_size_patch)
+               __sun4v_xl_hugepage_pte_size_patch_end = .;
+       }
+       .sun4v_xl_hugepage_hash_patch : {
+               __sun4v_xl_hugepage_hash_patch = .;
+               *(.sun4v_xl_hugepage_hash_patch)
+               __sun4v_xl_hugepage_hash_patch_end = .;
+       }
+       .sun4v_xl_hugepage_pte_branch_patch : {
+               __sun4v_xl_hugepage_pte_branch_patch = .;
+               *(.sun4v_xl_hugepage_pte_branch_patch)
+               __sun4v_xl_hugepage_pte_branch_patch_end = .;
+       }
        .popc_3insn_patch : {
                __popc_3insn_patch = .;
                *(.popc_3insn_patch)
index 2302e639c04ac28ef4328bc3c3fc882efaa503a2..63592cffbc59d8b643975f2c6cde0c420b0d2ebc 100644 (file)
@@ -22,6 +22,8 @@
 #include <linux/kdebug.h>
 #include <linux/percpu.h>
 #include <linux/context_tracking.h>
+#include <linux/hugetlb.h>
+#include <linux/dtrace_os.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -279,10 +281,63 @@ static void noinline __kprobes bogus_32bit_fault_tpc(struct pt_regs *regs)
        show_regs(regs);
 }
 
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+/* Put this here until there are more consumers.*/
+static unsigned long hugepage_pte_counts_to_pages(mm_context_t *mm_context)
+{
+       unsigned long hugepages_to_pages = 0UL;
+
+       if (xl_hugepage_shift)
+               hugepages_to_pages = xl_hugepage_pte_count(mm_context) <<
+                       (xl_hugepage_shift - PAGE_SHIFT);
+       hugepages_to_pages = hugepages_to_pages +
+               (hugepage_pte_count(mm_context) << (HPAGE_SHIFT - PAGE_SHIFT));
+
+       return hugepages_to_pages;
+}
+
+static void sparc64_hugetlb_tsb_fault(struct pt_regs *regs,
+                                     struct mm_struct *mm,
+                                     unsigned int hugepage_shift)
+{
+       unsigned int hugepage_pte_idx, hugepage_idx;
+       unsigned long mm_rss;
+
+       if (hugepage_shift == xl_hugepage_shift)
+               hugepage_idx = MM_TSB_XLHUGE;
+       else
+               hugepage_idx = MM_TSB_HUGE;
+
+       hugepage_pte_idx =
+               hugepage_size_to_pte_count_idx(1UL << hugepage_shift);
+
+       mm_rss = mm->context.huge_pte_count[hugepage_pte_idx];
+       if (unlikely(mm_rss >
+            mm->context.tsb_block[hugepage_idx].tsb_rss_limit)) {
+               if (mm->context.tsb_block[hugepage_idx].tsb)
+                       tsb_grow(mm, hugepage_idx, mm_rss);
+               else
+                       hugetlb_setup(regs, hugepage_idx);
+
+       }
+}
+#else
+static unsigned long hugepage_pte_counts_to_pages(mm_context_t *mm_context)
+{
+       return 0UL;
+}
+static void sparc64_hugetlb_tsb_fault(struct pt_regs *regs,
+                                     struct mm_struct *mm,
+                                     unsigned int hugepage_shift)
+{
+}
+#endif /* CONFIG_HUGETLB_PAGE || CONFIG_TRANSPARENT_HUGEPAGE */
+
 asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
 {
        enum ctx_state prev_state = exception_enter();
        struct mm_struct *mm = current->mm;
+       unsigned int hugepage_shift;
        struct vm_area_struct *vma;
        unsigned int insn = 0;
        int si_code, fault_code, fault;
@@ -476,26 +531,18 @@ good_area:
                        goto retry;
                }
        }
+       if (is_vm_hugetlb_page(vma))
+               hugepage_shift = huge_page_shift(hstate_vma(vma));
+       else
+               hugepage_shift = HPAGE_SHIFT;
        up_read(&mm->mmap_sem);
 
        mm_rss = get_mm_rss(mm);
-#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-       mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE));
-#endif
+       mm_rss =  mm_rss - hugepage_pte_counts_to_pages(&mm->context);
        if (unlikely(mm_rss >
                     mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit))
                tsb_grow(mm, MM_TSB_BASE, mm_rss);
-#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-       mm_rss = mm->context.huge_pte_count;
-       if (unlikely(mm_rss >
-                    mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) {
-               if (mm->context.tsb_block[MM_TSB_HUGE].tsb)
-                       tsb_grow(mm, MM_TSB_HUGE, mm_rss);
-               else
-                       hugetlb_setup(regs);
-
-       }
-#endif
+       sparc64_hugetlb_tsb_fault(regs, mm, hugepage_shift);
 exit_exception:
        exception_exit(prev_state);
        return;
index 4242eab12e10738e8bdbf6117f7be26d45528a48..9a1b2a5d71a05f8e10e4e68de3cc7140eb8a1079 100644 (file)
@@ -9,7 +9,6 @@
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
 #include <linux/sysctl.h>
-
 #include <asm/mman.h>
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 /* Slightly simplified from the non-hugepage variant because by
  * definition we don't have to worry about any page coloring stuff
  */
-
-static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
+static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
                                                        unsigned long addr,
                                                        unsigned long len,
                                                        unsigned long pgoff,
                                                        unsigned long flags)
 {
+       struct hstate *h = hstate_file(file);
        unsigned long task_size = TASK_SIZE;
        struct vm_unmapped_area_info info;
 
@@ -37,7 +36,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
        info.length = len;
        info.low_limit = TASK_UNMAPPED_BASE;
        info.high_limit = min(task_size, VA_EXCLUDE_START);
-       info.align_mask = PAGE_MASK & ~HPAGE_MASK;
+       info.align_mask = PAGE_MASK & ~huge_page_mask(h);
        info.align_offset = 0;
        addr = vm_unmapped_area(&info);
 
@@ -52,11 +51,13 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
 }
 
 static unsigned long
-hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+hugetlb_get_unmapped_area_topdown(struct file *file,
+                                 const unsigned long addr0,
                                  const unsigned long len,
                                  const unsigned long pgoff,
                                  const unsigned long flags)
 {
+       struct hstate *h = hstate_file(file);
        struct mm_struct *mm = current->mm;
        unsigned long addr = addr0;
        struct vm_unmapped_area_info info;
@@ -68,7 +69,7 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
        info.length = len;
        info.low_limit = PAGE_SIZE;
        info.high_limit = mm->mmap_base;
-       info.align_mask = PAGE_MASK & ~HPAGE_MASK;
+       info.align_mask = PAGE_MASK & ~huge_page_mask(h);
        info.align_offset = 0;
        addr = vm_unmapped_area(&info);
 
@@ -89,18 +90,19 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
        return addr;
 }
 
-unsigned long
-hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-               unsigned long len, unsigned long pgoff, unsigned long flags)
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+                                       unsigned long len, unsigned long pgoff,
+                                       unsigned long flags)
 {
+       struct hstate *h = hstate_file(file);
+       unsigned long task_size = TASK_SIZE;
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
-       unsigned long task_size = TASK_SIZE;
 
        if (test_thread_flag(TIF_32BIT))
                task_size = STACK_TOP32;
 
-       if (len & ~HPAGE_MASK)
+       if (len & ~huge_page_mask(h))
                return -EINVAL;
        if (len > task_size)
                return -ENOMEM;
@@ -112,7 +114,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
        }
 
        if (addr) {
-               addr = ALIGN(addr, HPAGE_SIZE);
+               addr = ALIGN(addr, huge_page_size(h));
                vma = find_vma(mm, addr);
                if (task_size - len >= addr &&
                    (!vma || addr + len <= vma->vm_start))
@@ -126,50 +128,62 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
                                pgoff, flags);
 }
 
-pte_t *huge_pte_alloc(struct mm_struct *mm,
-                       unsigned long addr, unsigned long sz)
+/* Since the hugepage could cover more than one pmd entry and more
+ * than one pgd entry we must cover all possible conditions.
+ */
+static pmd_t *huge_pmd_alloc(struct mm_struct *mm, unsigned long addr)
 {
-       pgd_t *pgd;
+       pgd_t *pgd = pgd_offset(mm, addr);
+       pmd_t *pmd = NULL;
        pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte = NULL;
 
-       /* We must align the address, because our caller will run
-        * set_huge_pte_at() on whatever we return, which writes out
-        * all of the sub-ptes for the hugepage range.  So we have
-        * to give it the first such sub-pte.
-        */
-       addr &= HPAGE_MASK;
+       if (pgd_none(*pgd)) {
+               pud_t *pud = pud_alloc(mm, pgd, addr);
 
-       pgd = pgd_offset(mm, addr);
-       pud = pud_alloc(mm, pgd, addr);
-       if (pud) {
+               if (pud == NULL)
+                       goto out;
+       }
+       pud = pud_offset(pgd, addr);
+       if (pud_none(*pud)) {
                pmd = pmd_alloc(mm, pud, addr);
-               if (pmd)
-                       pte = pte_alloc_map(mm, NULL, pmd, addr);
+
+               if (pmd == NULL)
+                       goto out;
        }
-       return pte;
+       pmd = pmd_offset(pud, addr);
+out:
+       return pmd;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+/* Note, should we fail leave behind the mm state
+ * which will be cleaned up on exit.
+ */
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr,
+                     unsigned long size)
 {
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte = NULL;
+       unsigned long start = addr & ~(size - 1);
+       unsigned long end = start + size;
+       pte_t *rpte = NULL;
 
-       addr &= HPAGE_MASK;
+       /* Our caller operates on start's pte which is rpte should we succeed.*/
+       for (addr = start; addr < end; addr = addr + PMD_SIZE) {
+               pmd_t *pmd = huge_pmd_alloc(mm, addr);
+               pte_t *pte;
 
-       pgd = pgd_offset(mm, addr);
-       if (!pgd_none(*pgd)) {
-               pud = pud_offset(pgd, addr);
-               if (!pud_none(*pud)) {
-                       pmd = pmd_offset(pud, addr);
-                       if (!pmd_none(*pmd))
-                               pte = pte_offset_map(pmd, addr);
-               }
+               if (!pmd)
+                       goto fail;
+
+               pte = pte_alloc_map(mm, NULL, pmd, addr);
+
+               if (!pte)
+                       goto fail;
+               else if (!rpte)
+                       rpte = pte;
        }
-       return pte;
+
+       return rpte;
+fail:
+       return NULL;
 }
 
 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
@@ -177,42 +191,320 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
        return 0;
 }
 
-void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-                    pte_t *ptep, pte_t entry)
+/* This function possibly needs to be moved. It will be different
+ * for sun4u and even possibly for sun4v future cores. Though we have
+ * no plans to support sun4u at this point.
+ */
+static unsigned int sun4v_tte_to_shift(pte_t entry)
+{
+       unsigned long hugepage_tte = pte_val(entry) & _PAGE_SZALL_4V;
+       unsigned int hugepage_shift;
+
+       switch (hugepage_tte) {
+       case _PAGE_SZ16GB_4V:
+               hugepage_shift = 34U;
+               break;
+       case _PAGE_SZ2GB_4V:
+               hugepage_shift = 31U;
+               break;
+       case _PAGE_SZ256MB_4V:
+               hugepage_shift = 28U;
+               break;
+       case _PAGE_SZ4MB_4V:
+               hugepage_shift = 22U;
+               break;
+       default:
+               WARN_ONCE(1, "hugepage_shift: hugepage_tte=0x%lx\n",
+                       hugepage_tte);
+               hugepage_shift = PAGE_SHIFT;
+               break;
+       }
+       return hugepage_shift;
+}
+
+static unsigned int tte_to_shift(pte_t entry)
+{
+       unsigned int hugepage_shift;
+
+       if (tlb_type == hypervisor)
+               hugepage_shift = sun4v_tte_to_shift(entry);
+       else
+               hugepage_shift = REAL_HPAGE_SHIFT;
+
+       return hugepage_shift;
+}
+
+static unsigned long tte_to_hugepage_size(pte_t pte)
+{
+       unsigned long hugepage_size = 1UL << tte_to_shift(pte);
+
+       if (hugepage_size == REAL_HPAGE_SIZE)
+               hugepage_size = HPAGE_SIZE;
+       return hugepage_size;
+}
+
+static unsigned long tte_to_hugepage_mask(pte_t pte)
 {
-       int i;
+       unsigned int hugepage_shift = tte_to_shift(pte);
+       unsigned long hugepage_mask;
 
-       if (!pte_present(*ptep) && pte_present(entry))
-               mm->context.huge_pte_count++;
+       if (hugepage_shift == REAL_HPAGE_SHIFT)
+               hugepage_shift = HPAGE_SHIFT;
 
-       addr &= HPAGE_MASK;
-       for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
-               set_pte_at(mm, addr, ptep, entry);
-               ptep++;
-               addr += PAGE_SIZE;
-               pte_val(entry) += PAGE_SIZE;
+       hugepage_mask = ~((1UL << hugepage_shift) - 1);
+
+       return hugepage_mask;
+}
+
+/* This should also be moved and a noop for sun4u.
+ * Only include xl hugepage sizes we plan to support.
+ */
+static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int hugepage_shift)
+{
+       unsigned long sun4v_hugepage_size = _PAGE_SZ4MB_4V;
+
+       pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
+
+       switch (hugepage_shift) {
+       /* 16Gb */
+       case 34U:
+               sun4v_hugepage_size = _PAGE_SZ16GB_4V;
+               break;
+       /* 2Gb */
+       case 31U:
+               sun4v_hugepage_size = _PAGE_SZ2GB_4V;
+               break;
+       default:
+               WARN_ONCE(hugepage_shift,
+                       "hugepage_shift_to_tte: unsupported "
+                       "hugepage_shift=%u.\n", hugepage_shift);
        }
+
+       pte_val(entry) = pte_val(entry) | sun4v_hugepage_size;
+       return entry;
+}
+
+pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+                        struct page *page, int writeable)
+{
+       unsigned int hugepage_shift = huge_page_shift(hstate_vma(vma));
+
+       if (hugepage_shift == HPAGE_SHIFT)
+               goto out;
+       entry = hugepage_shift_to_tte(entry, hugepage_shift);
+out:
+       return entry;
+}
+
+static void huge_pte_at_flush_update(struct mm_struct *mm, unsigned long addr,
+                                    pte_t *pte, pte_t orig,
+                                    pte_t *sentinel_pte)
+{
+       if (pte_val(orig) & _PAGE_VALID) {
+               if (!(pte_val(*sentinel_pte) & _PAGE_VALID)) {
+                       *sentinel_pte = orig;
+                       tlb_batch_add(mm, addr, pte, orig, false);
+               }
+       }
+}
+
+static void form_sentinel(pte_t *sentinel_pte, pte_t entry, pte_t *pte)
+{
+       pte_t sentinel = __pte(_PAGE_VALID | _PAGE_E_4V |
+               (pte_val(entry) & _PAGE_SZALL_4V) | __pa(pte));
+
+       *sentinel_pte = sentinel;
+}
+
+static bool huge_pte_at_handle_sentinel(pte_t *sentinel_pte, pte_t *pte,
+                                       pte_t orig, pte_t entry)
+{
+       bool rc = true;
+
+       /* Should the original pte be marked valid then
+        * only update the sentinel.
+        */
+       if (pte_val(orig) & _PAGE_VALID) {
+               if ((pte_val(orig) & _PAGE_E_4V) == 0UL)
+                       *pte = entry;
+               rc = false;
+       } else if (pte_val(*sentinel_pte) & _PAGE_VALID) {
+               *pte = *sentinel_pte;
+       } else {
+               form_sentinel(sentinel_pte, entry, pte);
+               *pte = entry;
+       }
+
+       return rc;
+}
+
+static bool __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+                             pte_t *pte, pte_t entry, pte_t *sentinel_pte)
+{
+       unsigned int hugepage_shift = tte_to_shift(entry);
+       bool rc = true;
+
+       if (hugepage_shift != REAL_HPAGE_SHIFT) {
+               pte_t orig = *pte;
+
+               rc = huge_pte_at_handle_sentinel(sentinel_pte, pte, orig,
+                                       entry);
+               huge_pte_at_flush_update(mm, addr, pte, orig, sentinel_pte);
+       } else
+               set_pte_at(mm, addr, pte, entry);
+
+       return rc;
+}
+
+static void __clear_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+                               pte_t *pte, pte_t *sentinel_pte)
+{
+       unsigned int hugepage_shift = tte_to_shift(*pte);
+
+       if (hugepage_shift != REAL_HPAGE_SHIFT) {
+               pte_t orig = *pte;
+
+               *pte = __pte(0UL);
+               huge_pte_at_flush_update(mm, addr, pte, orig, sentinel_pte);
+       } else
+               pte_clear(mm, addr, pte);
+}
+
+static bool set_huge_pte_range_at(struct mm_struct *mm, pmd_t *pmd,
+                                 unsigned long addr, pte_t *pentry,
+                                 pte_t *sentinel_pte, bool set_at)
+{
+       pte_t *pte = pte_offset_map(pmd, addr);
+       pte_t *lpte = pte + PTRS_PER_PTE;
+       pte_t entry = *pentry;
+       bool rc = true;
+
+       for (; pte < lpte; pte++, addr = addr + PAGE_SIZE) {
+               if (set_at) {
+                       rc = __set_huge_pte_at(mm, addr, pte, entry,
+                                                       sentinel_pte);
+                       if (!rc)
+                               break;
+                       pte_val(entry) = pte_val(entry) + PAGE_SIZE;
+               } else
+                       __clear_huge_pte_at(mm, addr, pte, sentinel_pte);
+       }
+       if (set_at)
+               *pentry = entry;
+       return rc;
+}
+
+static bool set_huge_pmd_at(struct mm_struct *mm, pud_t *pud,
+                           unsigned long addr, unsigned long end,
+                           pte_t *pentry, pte_t *sentinel_pte, bool set_at)
+{
+       pmd_t *pmd = pmd_offset(pud, addr);
+       unsigned long next;
+       bool rc;
+
+       do {
+               next = pmd_addr_end(addr, end);
+               rc = set_huge_pte_range_at(mm, pmd, addr, pentry,
+                               sentinel_pte, set_at);
+       } while (pmd++, addr = next, ((addr != end) && rc));
+       return rc;
+}
+
+static bool set_huge_pud_at(struct mm_struct *mm, pgd_t *pgd,
+                           unsigned long addr, unsigned long end,
+                           pte_t *pentry, pte_t *sentinel_pte, bool set_at)
+{
+       pud_t *pud = pud_offset(pgd, addr);
+       unsigned long next;
+       bool rc;
+
+       do {
+               next = pud_addr_end(addr, end);
+               rc = set_huge_pmd_at(mm, pud, addr, next, pentry,
+                               sentinel_pte, set_at);
+       } while (pud++, addr = next, ((addr != end) && rc));
+       return rc;
+}
+
+/* entry must be the first pte of the hugepage. Otherwise entry
+ * must be adjusted before we enter the loop for set_pte_at and
+ * aligned physically to match the hugepage_size. This is equally
+ * true of other locations where HUGETLB_PAGE_ORDER is used within
+ * this module for mainline as of 7/20/2014.
+ */
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+                    pte_t *ptep, pte_t entry)
+{
+       pte_t sentinel_pte = __pte(0UL);
+       unsigned long hugepage_size = tte_to_hugepage_size(entry);
+       unsigned long hugepage_mask = tte_to_hugepage_mask(entry);
+       unsigned long start = addr & hugepage_mask;
+       unsigned long end = start + hugepage_size;
+       pgd_t *pgd = pgd_offset(mm, start);
+       unsigned long next;
+       bool rc;
+
+       if (!pte_present(*ptep) && pte_present(entry)) {
+               unsigned int pte_count_idx =
+                       real_hugepage_size_to_pte_count_idx(hugepage_size);
+
+               mm->context.huge_pte_count[pte_count_idx]++;
+       }
+
+       do {
+               next = pgd_addr_end(start, end);
+               rc = set_huge_pud_at(mm, pgd, start, next, &entry,
+                                               &sentinel_pte, true);
+       } while (pgd++, start = next, ((start != end) && rc));
 }
 
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
                              pte_t *ptep)
 {
-       pte_t entry;
-       int i;
+       pte_t sentinel_pte = __pte(0UL);
+       pte_t entry = *ptep;
+       unsigned long hugepage_size = tte_to_hugepage_size(entry);
+       unsigned long hugepage_mask = tte_to_hugepage_mask(entry);
+       unsigned long start = addr & hugepage_mask;
+       unsigned long end = start + hugepage_size;
+       pgd_t *pgd = pgd_offset(mm, start);
+       unsigned long next;
+       bool rc;
+
+       if (pte_present(entry)) {
+               unsigned int pte_count_idx =
+                       real_hugepage_size_to_pte_count_idx(hugepage_size);
+
+               mm->context.huge_pte_count[pte_count_idx]--;
+       }
 
-       entry = *ptep;
-       if (pte_present(entry))
-               mm->context.huge_pte_count--;
+       do {
+               next = pgd_addr_end(start, end);
+               rc = set_huge_pud_at(mm, pgd, start, next, &entry,
+                                               &sentinel_pte, false);
+       } while (pgd++, start = next, ((start != end) && rc));
 
-       addr &= HPAGE_MASK;
+       return entry;
+}
 
-       for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
-               pte_clear(mm, addr, ptep);
-               addr += PAGE_SIZE;
-               ptep++;
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+       pte_t *pte = NULL;
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       pgd = pgd_offset(mm, addr);
+       if (!pgd_none(*pgd)) {
+               pud = pud_offset(pgd, addr);
+               if (!pud_none(*pud)) {
+                       pmd = pmd_offset(pud, addr);
+                       if (!pmd_none(*pmd))
+                               pte = pte_offset_map(pmd, addr);
+               }
        }
 
-       return entry;
+       return pte;
 }
 
 int pmd_huge(pmd_t pmd)
index 54791edd53f5da5f21c7e6fb87df71b5161f5e03..d265dacf461b40d521e4a3d64751330335f83607 100644 (file)
@@ -336,6 +336,127 @@ static inline bool is_hugetlb_pte(pte_t pte)
 }
 #endif
 
+#ifdef CONFIG_HUGETLB_PAGE
+unsigned int xl_hugepage_shift;
+static unsigned long xl_hugepage_pte;
+
+static bool is_xl_hugetlb_pte(pte_t pte)
+{
+       bool rc = false;
+
+       if (!xl_hugepage_pte)
+               goto out;
+       else if ((pte_val(pte) & _PAGE_SZALL_4V) == xl_hugepage_pte)
+               rc = true;
+out:
+       return rc;
+}
+
+static void __init sun4v_xl_hugepage_hash_patch(void)
+{
+       extern unsigned int __sun4v_xl_hugepage_hash_patch;
+       unsigned *insn, *p;
+
+       p = &__sun4v_xl_hugepage_hash_patch;
+       insn = (unsigned int *)(unsigned long)*p;
+       *insn = *insn | xl_hugepage_shift;
+       __asm__ __volatile__("flush %0\n\t"
+               : /* no outputs */
+               : "r" (insn));
+}
+
+static void __init sun4v_xl_hugepage_pte_size_patch(void)
+{
+       extern unsigned int __sun4v_xl_hugepage_pte_size_patch;
+       unsigned *insn, *p;
+
+       p = &__sun4v_xl_hugepage_pte_size_patch;
+       insn = (unsigned int *)(unsigned long)*p;
+       p++;
+       /* It is a simm13 in subcc instruction.*/
+       *insn = *p | (unsigned int) xl_hugepage_pte;
+       __asm__ __volatile__("flush %0\n\t"
+               : /* no outputs */
+               : "r" (insn));
+}
+
+static void __init sun4v_xl_hugepage_pte_branch_patch(void)
+{
+       extern unsigned int __sun4v_xl_hugepage_pte_branch_patch;
+       extern unsigned int sun4v_xl_hugepages;
+       unsigned int btarget = (unsigned int)
+               (unsigned long) &sun4v_xl_hugepages;
+       unsigned int *insn, *p, disp19;
+
+       p = &__sun4v_xl_hugepage_pte_branch_patch;
+       insn = (unsigned int *)(unsigned long)*p;
+       /* Instruction which needs to be a bne,pt to sun4v_xl_hugepages.*/
+       p++;
+       disp19 = (btarget - (unsigned int) (unsigned long) insn);
+       disp19 = disp19 >> 2;
+       disp19 = disp19 & (0x7ffff);
+       *insn = *p & ~0x7ffff;
+       *insn = *insn | disp19;
+       __asm__ __volatile__("flush %0\n\t"
+               : /* no outputs */
+               : "r" (insn));
+}
+
+static int __init setup_hugepagesz(char *string)
+{
+       unsigned int hugepage_shift, hv_pgsz_mask;
+       unsigned long long xl_hugepage_size;
+       unsigned short hv_pgsz_idx;
+
+       if (tlb_type != hypervisor)
+               goto out;
+
+       xl_hugepage_size = memparse(string, &string);
+       /* Validate the xl_hugepage_size.*/
+       if (!is_power_of_2(xl_hugepage_size))
+               goto bad;
+
+       /* Now determine whether the size is good for xl_hugepage_size.
+        * One the chip must support it and two, for now, it must be >= 2Gb.
+        */
+       hugepage_shift = ilog2(xl_hugepage_size);
+       switch (hugepage_shift) {
+       case 34U:
+               hv_pgsz_mask = HV_PGSZ_MASK_16GB;
+               hv_pgsz_idx = HV_PGSZ_IDX_16GB;
+               xl_hugepage_pte = _PAGE_SZ16GB_4V;
+               break;
+       case 31U:
+               hv_pgsz_mask = HV_PGSZ_MASK_2GB;
+               hv_pgsz_idx = HV_PGSZ_IDX_2GB;
+               xl_hugepage_pte = _PAGE_SZ2GB_4V;
+               break;
+       default:
+               hv_pgsz_mask = 0U;
+               hv_pgsz_idx = HV_PGSZ_IDX_8K;
+               break;
+       }
+
+       if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U)
+               goto bad;
+
+       xl_hugepage_shift = hugepage_shift;
+       xl_hugepage_size = 1UL << hugepage_shift;
+       sun4v_xl_hugepage_hash_patch();
+       sun4v_xl_hugepage_pte_size_patch();
+       sun4v_xl_hugepage_pte_branch_patch();
+       hv_establish_xl_hugepage_tsb_descriptor(hv_pgsz_idx, hv_pgsz_mask);
+       hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
+       hugetlb_add_hstate(xl_hugepage_shift - PAGE_SHIFT);
+       goto out;
+bad:
+       pr_warn("Invalid xl_hugepage_size=0x%llx.\n", xl_hugepage_size);
+out:
+       return 0;
+}
+__setup("hugepagesz=", setup_hugepagesz);
+#endif /* CONFIG_HUGETLB_PAGE */
+
 void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 {
        struct mm_struct *mm;
@@ -358,9 +479,13 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
        spin_lock_irqsave(&mm->context.lock, flags);
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-       if (mm->context.huge_pte_count && is_hugetlb_pte(pte))
+       if (mm->context.huge_pte_count[MM_PTES_HUGE] && is_hugetlb_pte(pte))
                __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
                                        address, pte_val(pte));
+       else if (mm->context.huge_pte_count[MM_PTES_XLHUGE] &&
+                       is_xl_hugetlb_pte(pte))
+               __update_mmu_tsb_insert(mm, MM_TSB_XLHUGE, xl_hugepage_shift,
+                       address, pte_val(pte));
        else
 #endif
                __update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT,
@@ -2892,7 +3017,7 @@ static void context_reload(void *__data)
                load_secondary_context(mm);
 }
 
-void hugetlb_setup(struct pt_regs *regs)
+void hugetlb_setup(struct pt_regs *regs, unsigned int tsb_index)
 {
        struct mm_struct *mm = current->mm;
        struct tsb_config *tp;
@@ -2910,9 +3035,9 @@ void hugetlb_setup(struct pt_regs *regs)
                die_if_kernel("HugeTSB in atomic", regs);
        }
 
-       tp = &mm->context.tsb_block[MM_TSB_HUGE];
+       tp = &mm->context.tsb_block[tsb_index];
        if (likely(tp->tsb == NULL))
-               tsb_grow(mm, MM_TSB_HUGE, 0);
+               tsb_grow(mm, tsb_index, 0);
 
        tsb_context_switch(mm);
        smp_tsb_sync(mm);
index 9df2190c097e1bb09de8378ca89b19e79c384917..6e6633094557185f3277137cc2ba419aebdc1f23 100644 (file)
@@ -165,9 +165,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 
        if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) {
                if (pmd_val(pmd) & _PAGE_PMD_HUGE)
-                       mm->context.huge_pte_count++;
+                       mm->context.huge_pte_count[MM_PTES_HUGE]++;
                else
-                       mm->context.huge_pte_count--;
+                       mm->context.huge_pte_count[MM_PTES_HUGE]--;
 
                /* Do not try to allocate the TSB hash table if we
                 * don't have one already.  We have various locks held
index a06576683c38a0f6ff8ee061d1ea5efbfe2a33f1..0bbf88445b8da89faaf5e695cecf340cf9703689 100644 (file)
@@ -91,6 +91,14 @@ void flush_tsb_user(struct tlb_batch *tb)
                __flush_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries);
        }
 #endif
+#ifdef CONFIG_HUGETLB_PAGE
+       if (mm->context.tsb_block[MM_TSB_XLHUGE].tsb) {
+               base = (unsigned long) mm->context.tsb_block[MM_TSB_XLHUGE].tsb;
+               nentries = mm->context.tsb_block[MM_TSB_XLHUGE].tsb_nentries;
+               base = __pa(base);
+               __flush_tsb_one(tb, xl_hugepage_shift, base, nentries);
+       }
+#endif /* CONFIG_HUGETLB_PAGE */
        spin_unlock_irqrestore(&mm->context.lock, flags);
 }
 
@@ -115,6 +123,14 @@ void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr)
                __flush_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, nentries);
        }
 #endif
+#ifdef CONFIG_HUGETLB_PAGE
+       if (mm->context.tsb_block[MM_TSB_XLHUGE].tsb) {
+               base = (unsigned long) mm->context.tsb_block[MM_TSB_XLHUGE].tsb;
+               nentries = mm->context.tsb_block[MM_TSB_XLHUGE].tsb_nentries;
+               base = __pa(base);
+               __flush_tsb_one_entry(base, vaddr, xl_hugepage_shift, nentries);
+       }
+#endif /* CONFIG_HUGETLB_PAGE */
        spin_unlock_irqrestore(&mm->context.lock, flags);
 }
 
@@ -124,9 +140,56 @@ void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr)
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 #define HV_PGSZ_IDX_HUGE       HV_PGSZ_IDX_4MB
 #define HV_PGSZ_MASK_HUGE      HV_PGSZ_MASK_4MB
+static unsigned short hv_pgsz_idx_xlhuge;
+static unsigned int hv_pgsz_mask_xlhuge;
+
+void __init hv_establish_xl_hugepage_tsb_descriptor(unsigned short pgsz_idx,
+                                                   unsigned int pgsz_mask)
+{
+       hv_pgsz_idx_xlhuge = pgsz_idx;
+       hv_pgsz_mask_xlhuge = pgsz_mask;
+}
 #endif
 
-static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
+static void sun4v_fill_tsb_descriptor(struct mm_struct *mm)
+{
+       struct hv_tsb_descr *htd = &mm->context.tsb_descr[0];
+       unsigned int tsb_idx;
+
+       for (tsb_idx = 0U; tsb_idx < MM_NUM_TSBS; tsb_idx++) {
+               /* Should there not be a tsb then skip it.*/
+               if (!mm->context.tsb_block[tsb_idx].tsb)
+                       continue;
+
+               switch (tsb_idx) {
+               case MM_TSB_BASE:
+                       htd->pgsz_mask = HV_PGSZ_MASK_BASE;
+                       htd->pgsz_idx = HV_PGSZ_IDX_BASE;
+                       break;
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+               case MM_TSB_HUGE:
+                       htd->pgsz_mask = HV_PGSZ_MASK_HUGE;
+                       htd->pgsz_idx = HV_PGSZ_IDX_HUGE;
+                       break;
+               case MM_TSB_XLHUGE:
+                       htd->pgsz_mask = hv_pgsz_mask_xlhuge;
+                       htd->pgsz_idx = hv_pgsz_idx_xlhuge;
+                       break;
+#endif
+               default:
+                       BUG();
+               }
+               htd->assoc = 1;
+               htd->num_ttes = mm->context.tsb_block[tsb_idx].tsb_nentries;
+               htd->ctx_idx = 0;
+               htd->tsb_base = __pa(mm->context.tsb_block[tsb_idx].tsb);
+               htd->resv = 0;
+               htd++;
+       }
+}
+
+static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx,
+                            unsigned long tsb_bytes)
 {
        unsigned long tsb_reg, base, tsb_paddr;
        unsigned long page_sz, tte;
@@ -140,6 +203,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign
                break;
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
        case MM_TSB_HUGE:
+       case MM_TSB_XLHUGE:
                base = TSBMAP_4M_BASE;
                break;
 #endif
@@ -222,40 +286,8 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign
                mm->context.tsb_block[tsb_idx].tsb_map_pte = tte;
        }
 
-       /* Setup the Hypervisor TSB descriptor.  */
-       if (tlb_type == hypervisor) {
-               struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx];
-
-               switch (tsb_idx) {
-               case MM_TSB_BASE:
-                       hp->pgsz_idx = HV_PGSZ_IDX_BASE;
-                       break;
-#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-               case MM_TSB_HUGE:
-                       hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
-                       break;
-#endif
-               default:
-                       BUG();
-               }
-               hp->assoc = 1;
-               hp->num_ttes = tsb_bytes / 16;
-               hp->ctx_idx = 0;
-               switch (tsb_idx) {
-               case MM_TSB_BASE:
-                       hp->pgsz_mask = HV_PGSZ_MASK_BASE;
-                       break;
-#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-               case MM_TSB_HUGE:
-                       hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
-                       break;
-#endif
-               default:
-                       BUG();
-               }
-               hp->tsb_base = tsb_paddr;
-               hp->resv = 0;
-       }
+       if (tlb_type == hypervisor)
+               sun4v_fill_tsb_descriptor(mm);
 }
 
 struct kmem_cache *pgtable_cache __read_mostly;
@@ -465,25 +497,54 @@ retry_tsb_alloc:
        }
 }
 
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+static void capture_and_clear_huge_pte_counts(mm_context_t *mm_context,
+                                             unsigned long *capture_array)
+{
+       unsigned int hugepage_idx;
+
+       for (hugepage_idx = 0UL; hugepage_idx != MM_NUM_HUGEPAGE_SIZES;
+               hugepage_idx++) {
+               capture_array[hugepage_idx] =
+                       mm_context->huge_pte_count[hugepage_idx];
+               mm_context->huge_pte_count[hugepage_idx] = 0UL;
+       }
+}
+
+static void
+captured_hugepage_pte_count_grow_tsb(struct mm_struct *mm,
+                                    unsigned long *capture_huge_pte_count)
+{
+       if (unlikely(capture_huge_pte_count[MM_PTES_HUGE]))
+               tsb_grow(mm, MM_TSB_HUGE,
+                       capture_huge_pte_count[MM_PTES_HUGE]);
+
+       if (unlikely(capture_huge_pte_count[MM_PTES_XLHUGE]))
+               tsb_grow(mm, MM_TSB_XLHUGE,
+                       capture_huge_pte_count[MM_PTES_XLHUGE]);
+}
+#else
+static void capture_and_clear_huge_pte_counts(mm_context_t *mm_context,
+                                             unsigned long *capture_array) {}
+static void
+captured_hugepage_pte_count_grow_tsb(struct mm_struct *mm,
+                                    unsigned long *capture_huge_pte_count) {}
+#endif /* CONFIG_HUGETLB_PAGE || CONFIG_TRANSPARENT_HUGEPAGE */
+
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
-#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-       unsigned long huge_pte_count;
-#endif
+       unsigned long capture_huge_pte_count[MM_NUM_HUGEPAGE_SIZES];
        unsigned int i;
 
        spin_lock_init(&mm->context.lock);
 
        mm->context.sparc64_ctx_val = 0UL;
 
-#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
        /* We reset it to zero because the fork() page copying
         * will re-increment the counters as the parent PTEs are
         * copied into the child address space.
         */
-       huge_pte_count = mm->context.huge_pte_count;
-       mm->context.huge_pte_count = 0;
-#endif
+       capture_and_clear_huge_pte_counts(&mm->context, capture_huge_pte_count);
 
        /* copy_mm() copies over the parent's mm_struct before calling
         * us, so we need to zero out the TSB pointer or else tsb_grow()
@@ -497,10 +558,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
         */
        tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
 
-#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-       if (unlikely(huge_pte_count))
-               tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
-#endif
+       captured_hugepage_pte_count_grow_tsb(mm, capture_huge_pte_count);
 
        if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
                return -ENOMEM;