spinlock_t              lock;
        unsigned long           sparc64_ctx_val;
        unsigned long           huge_pte_count;
-       struct page             *pgtable_page;
        struct tsb_config       tsb_block[MM_NUM_TSBS];
        struct hv_tsb_descr     tsb_descr[MM_NUM_TSBS];
 } mm_context_t;
 
 #define DCACHE_ALIASING_POSSIBLE
 #endif
 
-#define HPAGE_SHIFT            22
+#define HPAGE_SHIFT            23
+#define REAL_HPAGE_SHIFT       22
+
+#define REAL_HPAGE_SIZE                (_AC(1,UL) << REAL_HPAGE_SHIFT)
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 #define HPAGE_SIZE             (_AC(1,UL) << HPAGE_SHIFT)
 
 /* PMD_SHIFT determines the size of the area a second-level page
  * table can map
  */
-#define PMD_SHIFT      (PAGE_SHIFT + (PAGE_SHIFT-4))
+#define PMD_SHIFT      (PAGE_SHIFT + (PAGE_SHIFT-3))
 #define PMD_SIZE       (_AC(1,UL) << PMD_SHIFT)
 #define PMD_MASK       (~(PMD_SIZE-1))
 #define PMD_BITS       (PAGE_SHIFT - 2)
 
 /* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT    (PAGE_SHIFT + (PAGE_SHIFT-4) + PMD_BITS)
+#define PGDIR_SHIFT    (PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS)
 #define PGDIR_SIZE     (_AC(1,UL) << PGDIR_SHIFT)
 #define PGDIR_MASK     (~(PGDIR_SIZE-1))
 #define PGDIR_BITS     (PAGE_SHIFT - 2)
 
-#if (PGDIR_SHIFT + PGDIR_BITS) != 44
+#if (PGDIR_SHIFT + PGDIR_BITS) != 45
 #error Page table parameters do not cover virtual address space properly.
 #endif
 
 #include <linux/sched.h>
 
 /* Entries per page directory level. */
-#define PTRS_PER_PTE   (1UL << (PAGE_SHIFT-4))
+#define PTRS_PER_PTE   (1UL << (PAGE_SHIFT-3))
 #define PTRS_PER_PMD   (1UL << PMD_BITS)
 #define PTRS_PER_PGD   (1UL << PGDIR_BITS)
 
 #define _PAGE_SZBITS_4U        _PAGE_SZ8K_4U
 #define _PAGE_SZBITS_4V        _PAGE_SZ8K_4V
 
+#if REAL_HPAGE_SHIFT != 22
+#error REAL_HPAGE_SHIFT and _PAGE_SZHUGE_foo must match up
+#endif
+
 #define _PAGE_SZHUGE_4U        _PAGE_SZ4MB_4U
 #define _PAGE_SZHUGE_4V        _PAGE_SZ4MB_4V
 
 
        lduwa           [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
        brz,pn          REG1, FAIL_LABEL; \
         sllx           VADDR, 64 - PMD_SHIFT, REG2; \
-       srlx            REG2, 64 - (PAGE_SHIFT - 1), REG2; \
+       srlx            REG2, 64 - PAGE_SHIFT, REG2; \
        sllx            REG1, PMD_PADDR_SHIFT, REG1; \
        andn            REG2, 0x7, REG2; \
        add             REG1, REG2, REG1;
        or              REG, _PAGE_##NAME##_4V, REG;    \
        .previous;
 
-       /* Load into REG the PTE value for VALID, CACHE, and SZHUGE.  */
-#define BUILD_PTE_VALID_SZHUGE_CACHE(REG)                                 \
+       /* Load into REG the PTE value for VALID, CACHE, and SZHUGE.
+        *
+        * We are fabricating an 8MB page using 2 4MB HW pages here.
+        */
+#define BUILD_PTE_VALID_SZHUGE_CACHE(VADDR, PADDR_BITS, REG)              \
+       sethi           %hi(4 * 1024 * 1024), REG;                         \
+       andn            PADDR_BITS, REG, PADDR_BITS;                       \
+       and             VADDR, REG, REG;                                   \
+       or              PADDR_BITS, REG, PADDR_BITS;                       \
 661:   sethi           %uhi(_PAGE_VALID|_PAGE_SZHUGE_4U), REG;            \
        .section        .sun4v_1insn_patch, "ax";                          \
        .word           661b;                                              \
         nop;                                                                 \
        OR_PTE_BIT_2INSN(REG2, REG1, EXEC);                                   \
        /* REG1 can now be clobbered, build final PTE */                      \
-1:     BUILD_PTE_VALID_SZHUGE_CACHE(REG1);                                   \
+1:     BUILD_PTE_VALID_SZHUGE_CACHE(VADDR, REG2, REG1);                      \
        ba,pt           %xcc, PTE_LABEL;                                      \
         or             REG1, REG2, REG1;                                     \
 700:
        lduwa           [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
        USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
        sllx            VADDR, 64 - PMD_SHIFT, REG2; \
-       srlx            REG2, 64 - (PAGE_SHIFT - 1), REG2; \
+       srlx            REG2, 64 - PAGE_SHIFT, REG2; \
        sllx            REG1, PMD_PADDR_SHIFT, REG1; \
        andn            REG2, 0x7, REG2; \
        add             REG1, REG2, REG1; \
 
        cmp     %g5, -1
        be,pt   %xcc, 80f
         nop
-       COMPUTE_TSB_PTR(%g5, %g4, HPAGE_SHIFT, %g2, %g7)
+       COMPUTE_TSB_PTR(%g5, %g4, REAL_HPAGE_SHIFT, %g2, %g7)
 
        /* That clobbered %g2, reload it.  */
        ldxa    [%g0] ASI_SCRATCHPAD, %g2
 
        mov             512, %g7
        andn            %g5, 0x7, %g5
        sllx            %g7, %g6, %g7
-       srlx            %g4, HPAGE_SHIFT, %g6
+       srlx            %g4, REAL_HPAGE_SHIFT, %g6
        sub             %g7, 1, %g7
        and             %g6, %g7, %g6
        sllx            %g6, 4, %g6
 
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
        if (mm->context.huge_pte_count && is_hugetlb_pte(pte))
-               __update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT,
+               __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
                                        address, pte_val(pte));
        else
 #endif
                             : : "r" (pstate));
 }
 
-static pte_t *get_from_cache(struct mm_struct *mm)
-{
-       struct page *page;
-       pte_t *ret;
-
-       spin_lock(&mm->page_table_lock);
-       page = mm->context.pgtable_page;
-       ret = NULL;
-       if (page) {
-               void *p = page_address(page);
-
-               mm->context.pgtable_page = NULL;
-
-               ret = (pte_t *) (p + (PAGE_SIZE / 2));
-       }
-       spin_unlock(&mm->page_table_lock);
-
-       return ret;
-}
-
-static struct page *__alloc_for_cache(struct mm_struct *mm)
-{
-       struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
-                                      __GFP_REPEAT | __GFP_ZERO);
-
-       if (page) {
-               spin_lock(&mm->page_table_lock);
-               if (!mm->context.pgtable_page) {
-                       atomic_set(&page->_count, 2);
-                       mm->context.pgtable_page = page;
-               }
-               spin_unlock(&mm->page_table_lock);
-       }
-       return page;
-}
-
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
                            unsigned long address)
 {
-       struct page *page;
-       pte_t *pte;
-
-       pte = get_from_cache(mm);
-       if (pte)
-               return pte;
+       struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
+                                      __GFP_REPEAT | __GFP_ZERO);
+       pte_t *pte = NULL;
 
-       page = __alloc_for_cache(mm);
        if (page)
                pte = (pte_t *) page_address(page);
 
 pgtable_t pte_alloc_one(struct mm_struct *mm,
                        unsigned long address)
 {
-       struct page *page;
-       pte_t *pte;
-
-       pte = get_from_cache(mm);
-       if (pte)
-               return pte;
+       struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
+                                      __GFP_REPEAT | __GFP_ZERO);
+       pte_t *pte = NULL;
 
-       page = __alloc_for_cache(mm);
        if (page) {
                pgtable_page_ctor(page);
                pte = (pte_t *) page_address(page);
 
 void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
-       struct page *page = virt_to_page(pte);
-       if (put_page_testzero(page))
-               free_hot_cold_page(page, 0);
+       free_page((unsigned long)pte);
 }
 
 static void __pte_free(pgtable_t pte)
 {
        struct page *page = virt_to_page(pte);
-       if (put_page_testzero(page)) {
-               pgtable_page_dtor(page);
-               free_hot_cold_page(page, 0);
-       }
+
+       pgtable_page_dtor(page);
+       __free_page(page);
 }
 
 void pte_free(struct mm_struct *mm, pgtable_t pte)
        pte <<= PMD_PADDR_SHIFT;
        pte |= _PAGE_VALID;
 
+       /* We are fabricating 8MB pages using 4MB real hw pages.  */
+       pte |= (addr & (1UL << REAL_HPAGE_SHIFT));
+
        prot = pmd_pgprot(entry);
 
        if (tlb_type == hypervisor)
        spin_lock_irqsave(&mm->context.lock, flags);
 
        if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL)
-               __update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT,
+               __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
                                        addr, pte);
 
        spin_unlock_irqrestore(&mm->context.lock, flags);
 
                bool exec = ((pmd_val(orig) & PMD_HUGE_EXEC) != 0);
 
                addr &= HPAGE_MASK;
-               if (pmd_val(orig) & PMD_ISHUGE)
+               if (pmd_val(orig) & PMD_ISHUGE) {
                        tlb_batch_add_one(mm, addr, exec);
-               else
+                       tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec);
+               } else {
                        tlb_batch_pmd_scan(mm, addr, orig, exec);
+               }
        }
 }
 
 
                nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
                if (tlb_type == cheetah_plus || tlb_type == hypervisor)
                        base = __pa(base);
-               __flush_tsb_one(tb, HPAGE_SHIFT, base, nentries);
+               __flush_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries);
        }
 #endif
        spin_unlock_irqrestore(&mm->context.lock, flags);
                nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
                if (tlb_type == cheetah_plus || tlb_type == hypervisor)
                        base = __pa(base);
-               __flush_tsb_one_entry(base, vaddr, HPAGE_SHIFT, nentries);
+               __flush_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, nentries);
        }
 #endif
        spin_unlock_irqrestore(&mm->context.lock, flags);
        mm->context.huge_pte_count = 0;
 #endif
 
-       mm->context.pgtable_page = NULL;
-
        /* copy_mm() copies over the parent's mm_struct before calling
         * us, so we need to zero out the TSB pointer or else tsb_grow()
         * will be confused and think there is an older TSB to free up.
 void destroy_context(struct mm_struct *mm)
 {
        unsigned long flags, i;
-       struct page *page;
 
        for (i = 0; i < MM_NUM_TSBS; i++)
                tsb_destroy_one(&mm->context.tsb_block[i]);
 
-       page = mm->context.pgtable_page;
-       if (page && put_page_testzero(page)) {
-               pgtable_page_dtor(page);
-               free_hot_cold_page(page, 0);
-       }
-
        spin_lock_irqsave(&ctx_alloc_lock, flags);
 
        if (CTX_VALID(mm->context)) {