return ret;
 }
 
+static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)
+{
+       pgtable_t pgtable;
+
+       pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+       pte_free(mm, pgtable);
+       atomic_long_dec(&mm->nr_ptes);
+}
+
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                 pmd_t *pmd, unsigned long addr)
 {
                        atomic_long_dec(&tlb->mm->nr_ptes);
                        add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
                } else {
+                       if (arch_needs_pgtable_deposit())
+                               zap_deposited_table(tlb->mm, pmd);
                        add_mm_counter(tlb->mm, MM_FILEPAGES, -HPAGE_PMD_NR);
                }
                spin_unlock(ptl);
 
        if (!vma_is_anonymous(vma)) {
                _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+               /*
+                * We are going to unmap this huge page. So
+                * just go ahead and zap it
+                */
+               if (arch_needs_pgtable_deposit())
+                       zap_deposited_table(mm, pmd);
                if (vma_is_dax(vma))
                        return;
                page = pmd_page(_pmd);
 
        struct vm_area_struct *vma;
        unsigned long addr;
        pmd_t *pmd, _pmd;
+       bool deposited = false;
 
        i_mmap_lock_write(mapping);
        vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
                        spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd);
                        /* assume page table is clear */
                        _pmd = pmdp_collapse_flush(vma, addr, pmd);
+                       /*
+                        * now deposit the pgtable for arch that need it
+                        * otherwise free it.
+                        */
+                       if (arch_needs_pgtable_deposit()) {
+                               /*
+                                * The deposit should be visibile only after
+                                * collapse is seen by others.
+                                */
+                               smp_wmb();
+                               pgtable_trans_huge_deposit(vma->vm_mm, pmd,
+                                                          pmd_pgtable(_pmd));
+                               deposited = true;
+                       }
                        spin_unlock(ptl);
                        up_write(&vma->vm_mm->mmap_sem);
-                       atomic_long_dec(&vma->vm_mm->nr_ptes);
-                       pte_free(vma->vm_mm, pmd_pgtable(_pmd));
+                       if (!deposited) {
+                               atomic_long_dec(&vma->vm_mm->nr_ptes);
+                               pte_free(vma->vm_mm, pmd_pgtable(_pmd));
+                       }
                }
        }
        i_mmap_unlock_write(mapping);
 
        return true;
 }
 
+static void deposit_prealloc_pte(struct fault_env *fe)
+{
+       struct vm_area_struct *vma = fe->vma;
+
+       pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, fe->prealloc_pte);
+       /*
+        * We are going to consume the prealloc table,
+        * count that as nr_ptes.
+        */
+       atomic_long_inc(&vma->vm_mm->nr_ptes);
+       fe->prealloc_pte = 0;
+}
+
 static int do_set_pmd(struct fault_env *fe, struct page *page)
 {
        struct vm_area_struct *vma = fe->vma;
        ret = VM_FAULT_FALLBACK;
        page = compound_head(page);
 
+       /*
+        * Archs like ppc64 need additonal space to store information
+        * related to pte entry. Use the preallocated table for that.
+        */
+       if (arch_needs_pgtable_deposit() && !fe->prealloc_pte) {
+               fe->prealloc_pte = pte_alloc_one(vma->vm_mm, fe->address);
+               if (!fe->prealloc_pte)
+                       return VM_FAULT_OOM;
+               smp_wmb(); /* See comment in __pte_alloc() */
+       }
+
        fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
        if (unlikely(!pmd_none(*fe->pmd)))
                goto out;
 
        add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR);
        page_add_file_rmap(page, true);
+       /*
+        * deposit and withdraw with pmd lock held
+        */
+       if (arch_needs_pgtable_deposit())
+               deposit_prealloc_pte(fe);
 
        set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
 
        ret = 0;
        count_vm_event(THP_FILE_MAPPED);
 out:
+       /*
+        * If we are going to fallback to pte mapping, do a
+        * withdraw with pmd lock held.
+        */
+       if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK)
+               fe->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm,
+                                                              fe->pmd);
        spin_unlock(fe->ptl);
        return ret;
 }
 
                ret = do_set_pmd(fe, page);
                if (ret != VM_FAULT_FALLBACK)
-                       return ret;
+                       goto fault_handled;
        }
 
        if (!fe->pte) {
                ret = pte_alloc_one_map(fe);
                if (ret)
-                       return ret;
+                       goto fault_handled;
        }
 
        /* Re-check under ptl */
-       if (unlikely(!pte_none(*fe->pte)))
-               return VM_FAULT_NOPAGE;
+       if (unlikely(!pte_none(*fe->pte))) {
+               ret = VM_FAULT_NOPAGE;
+               goto fault_handled;
+       }
 
        flush_icache_page(vma, page);
        entry = mk_pte(page, vma->vm_page_prot);
 
        /* no need to invalidate: a not-present page won't be cached */
        update_mmu_cache(vma, fe->address, fe->pte);
+       ret = 0;
 
-       return 0;
+fault_handled:
+       /* preallocated pagetable is unused: free it */
+       if (fe->prealloc_pte) {
+               pte_free(fe->vma->vm_mm, fe->prealloc_pte);
+               fe->prealloc_pte = 0;
+       }
+       return ret;
 }
 
 static unsigned long fault_around_bytes __read_mostly =
 
        fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff);
 
-       /* preallocated pagetable is unused: free it */
-       if (fe->prealloc_pte) {
-               pte_free(fe->vma->vm_mm, fe->prealloc_pte);
-               fe->prealloc_pte = 0;
-       }
        /* Huge page is mapped? Page fault is solved */
        if (pmd_trans_huge(*fe->pmd)) {
                ret = VM_FAULT_NOPAGE;