__mmu_notifier_mm_destroy(mm);
 }
 
-/*
- * These two macros will sometime replace ptep_clear_flush.
- * ptep_clear_flush is implemented as macro itself, so this also is
- * implemented as a macro until ptep_clear_flush will converted to an
- * inline function, to diminish the risk of compilation failure. The
- * invalidate_page method over time can be moved outside the PT lock
- * and these two macros can be later removed.
- */
-#define ptep_clear_flush_notify(__vma, __address, __ptep)              \
-({                                                                     \
-       pte_t __pte;                                                    \
-       struct vm_area_struct *___vma = __vma;                          \
-       unsigned long ___address = __address;                           \
-       __pte = ptep_clear_flush(___vma, ___address, __ptep);           \
-       mmu_notifier_invalidate_page(___vma->vm_mm, ___address);        \
-       __pte;                                                          \
-})
-
-#define pmdp_clear_flush_notify(__vma, __address, __pmdp)              \
-({                                                                     \
-       pmd_t __pmd;                                                    \
-       struct vm_area_struct *___vma = __vma;                          \
-       unsigned long ___address = __address;                           \
-       VM_BUG_ON(__address & ~HPAGE_PMD_MASK);                         \
-       mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address,  \
-                                           (__address)+HPAGE_PMD_SIZE);\
-       __pmd = pmdp_clear_flush(___vma, ___address, __pmdp);           \
-       mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address,    \
-                                         (__address)+HPAGE_PMD_SIZE);  \
-       __pmd;                                                          \
-})
-
-#define pmdp_splitting_flush_notify(__vma, __address, __pmdp)          \
-({                                                                     \
-       struct vm_area_struct *___vma = __vma;                          \
-       unsigned long ___address = __address;                           \
-       VM_BUG_ON(__address & ~HPAGE_PMD_MASK);                         \
-       mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address,  \
-                                           (__address)+HPAGE_PMD_SIZE);\
-       pmdp_splitting_flush(___vma, ___address, __pmdp);               \
-       mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address,    \
-                                         (__address)+HPAGE_PMD_SIZE);  \
-})
-
 #define ptep_clear_flush_young_notify(__vma, __address, __ptep)                \
 ({                                                                     \
        int __young;                                                    \
 
 #define ptep_clear_flush_young_notify ptep_clear_flush_young
 #define pmdp_clear_flush_young_notify pmdp_clear_flush_young
-#define ptep_clear_flush_notify ptep_clear_flush
-#define pmdp_clear_flush_notify pmdp_clear_flush
-#define pmdp_splitting_flush_notify pmdp_splitting_flush
 #define set_pte_at_notify set_pte_at
 
 #endif /* CONFIG_MMU_NOTIFIER */
 
                if (pte) {
                        /* Nuke the page table entry. */
                        flush_cache_page(vma, address, pte_pfn(*pte));
-                       pteval = ptep_clear_flush_notify(vma, address, pte);
+                       pteval = ptep_clear_flush(vma, address, pte);
                        page_remove_rmap(page);
                        dec_mm_counter(mm, MM_FILEPAGES);
                        BUG_ON(pte_dirty(pteval));
                        pte_unmap_unlock(pte, ptl);
+                       /* must invalidate_page _before_ freeing the page */
+                       mmu_notifier_invalidate_page(mm, address);
                        page_cache_release(page);
                }
        }
 
        pmd_t _pmd;
        int ret = 0, i;
        struct page **pages;
+       unsigned long mmun_start;       /* For mmu_notifiers */
+       unsigned long mmun_end;         /* For mmu_notifiers */
 
        pages = kmalloc(sizeof(struct page *) * HPAGE_PMD_NR,
                        GFP_KERNEL);
                cond_resched();
        }
 
+       mmun_start = haddr;
+       mmun_end   = haddr + HPAGE_PMD_SIZE;
+       mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
        spin_lock(&mm->page_table_lock);
        if (unlikely(!pmd_same(*pmd, orig_pmd)))
                goto out_free_pages;
        VM_BUG_ON(!PageHead(page));
 
-       pmdp_clear_flush_notify(vma, haddr, pmd);
+       pmdp_clear_flush(vma, haddr, pmd);
        /* leave pmd empty until pte is filled */
 
        pgtable = pgtable_trans_huge_withdraw(mm);
        page_remove_rmap(page);
        spin_unlock(&mm->page_table_lock);
 
+       mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+
        ret |= VM_FAULT_WRITE;
        put_page(page);
 
 
 out_free_pages:
        spin_unlock(&mm->page_table_lock);
+       mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
        mem_cgroup_uncharge_start();
        for (i = 0; i < HPAGE_PMD_NR; i++) {
                mem_cgroup_uncharge_page(pages[i]);
        int ret = 0;
        struct page *page, *new_page;
        unsigned long haddr;
+       unsigned long mmun_start;       /* For mmu_notifiers */
+       unsigned long mmun_end;         /* For mmu_notifiers */
 
        VM_BUG_ON(!vma->anon_vma);
        spin_lock(&mm->page_table_lock);
        copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR);
        __SetPageUptodate(new_page);
 
+       mmun_start = haddr;
+       mmun_end   = haddr + HPAGE_PMD_SIZE;
+       mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
        spin_lock(&mm->page_table_lock);
        put_page(page);
        if (unlikely(!pmd_same(*pmd, orig_pmd))) {
                spin_unlock(&mm->page_table_lock);
                mem_cgroup_uncharge_page(new_page);
                put_page(new_page);
-               goto out;
+               goto out_mn;
        } else {
                pmd_t entry;
                VM_BUG_ON(!PageHead(page));
                entry = mk_pmd(new_page, vma->vm_page_prot);
                entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
                entry = pmd_mkhuge(entry);
-               pmdp_clear_flush_notify(vma, haddr, pmd);
+               pmdp_clear_flush(vma, haddr, pmd);
                page_add_new_anon_rmap(new_page, vma, haddr);
                set_pmd_at(mm, haddr, pmd, entry);
                update_mmu_cache(vma, address, pmd);
                put_page(page);
                ret |= VM_FAULT_WRITE;
        }
-out_unlock:
        spin_unlock(&mm->page_table_lock);
+out_mn:
+       mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 out:
        return ret;
+out_unlock:
+       spin_unlock(&mm->page_table_lock);
+       return ret;
 }
 
 struct page *follow_trans_huge_pmd(struct mm_struct *mm,
        struct mm_struct *mm = vma->vm_mm;
        pmd_t *pmd;
        int ret = 0;
+       /* For mmu_notifiers */
+       const unsigned long mmun_start = address;
+       const unsigned long mmun_end   = address + HPAGE_PMD_SIZE;
 
+       mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
        spin_lock(&mm->page_table_lock);
        pmd = page_check_address_pmd(page, mm, address,
                                     PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG);
                 * and it won't wait on the anon_vma->root->mutex to
                 * serialize against split_huge_page*.
                 */
-               pmdp_splitting_flush_notify(vma, address, pmd);
+               pmdp_splitting_flush(vma, address, pmd);
                ret = 1;
        }
        spin_unlock(&mm->page_table_lock);
+       mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 
        return ret;
 }
        spinlock_t *ptl;
        int isolated;
        unsigned long hstart, hend;
+       unsigned long mmun_start;       /* For mmu_notifiers */
+       unsigned long mmun_end;         /* For mmu_notifiers */
 
        VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 
        pte = pte_offset_map(pmd, address);
        ptl = pte_lockptr(mm, pmd);
 
+       mmun_start = address;
+       mmun_end   = address + HPAGE_PMD_SIZE;
+       mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
        spin_lock(&mm->page_table_lock); /* probably unnecessary */
        /*
         * After this gup_fast can't run anymore. This also removes
         * huge and small TLB entries for the same virtual address
         * to avoid the risk of CPU bugs in that area.
         */
-       _pmd = pmdp_clear_flush_notify(vma, address, pmd);
+       _pmd = pmdp_clear_flush(vma, address, pmd);
        spin_unlock(&mm->page_table_lock);
+       mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 
        spin_lock(ptl);
        isolated = __collapse_huge_page_isolate(vma, address, pte);
 
        struct page *page;
        struct hstate *h = hstate_vma(vma);
        unsigned long sz = huge_page_size(h);
+       const unsigned long mmun_start = start; /* For mmu_notifiers */
+       const unsigned long mmun_end   = end;   /* For mmu_notifiers */
 
        WARN_ON(!is_vm_hugetlb_page(vma));
        BUG_ON(start & ~huge_page_mask(h));
        BUG_ON(end & ~huge_page_mask(h));
 
        tlb_start_vma(tlb, vma);
-       mmu_notifier_invalidate_range_start(mm, start, end);
+       mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 again:
        spin_lock(&mm->page_table_lock);
        for (address = start; address < end; address += sz) {
                if (address < end && !ref_page)
                        goto again;
        }
-       mmu_notifier_invalidate_range_end(mm, start, end);
+       mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
        tlb_end_vma(tlb, vma);
 }
 
        struct page *old_page, *new_page;
        int avoidcopy;
        int outside_reserve = 0;
+       unsigned long mmun_start;       /* For mmu_notifiers */
+       unsigned long mmun_end;         /* For mmu_notifiers */
 
        old_page = pte_page(pte);
 
                            pages_per_huge_page(h));
        __SetPageUptodate(new_page);
 
+       mmun_start = address & huge_page_mask(h);
+       mmun_end = mmun_start + huge_page_size(h);
+       mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
        /*
         * Retake the page_table_lock to check for racing updates
         * before the page tables are altered
        ptep = huge_pte_offset(mm, address & huge_page_mask(h));
        if (likely(pte_same(huge_ptep_get(ptep), pte))) {
                /* Break COW */
-               mmu_notifier_invalidate_range_start(mm,
-                       address & huge_page_mask(h),
-                       (address & huge_page_mask(h)) + huge_page_size(h));
                huge_ptep_clear_flush(vma, address, ptep);
                set_huge_pte_at(mm, address, ptep,
                                make_huge_pte(vma, new_page, 1));
                hugepage_add_new_anon_rmap(new_page, vma, address);
                /* Make the old page be freed below */
                new_page = old_page;
-               mmu_notifier_invalidate_range_end(mm,
-                       address & huge_page_mask(h),
-                       (address & huge_page_mask(h)) + huge_page_size(h));
        }
+       spin_unlock(&mm->page_table_lock);
+       mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+       /* Caller expects lock to be held */
+       spin_lock(&mm->page_table_lock);
        page_cache_release(new_page);
        page_cache_release(old_page);
        return 0;
 
        add_taint(TAINT_BAD_PAGE);
 }
 
-static inline int is_cow_mapping(vm_flags_t flags)
+static inline bool is_cow_mapping(vm_flags_t flags)
 {
        return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 }
        unsigned long next;
        unsigned long addr = vma->vm_start;
        unsigned long end = vma->vm_end;
+       unsigned long mmun_start;       /* For mmu_notifiers */
+       unsigned long mmun_end;         /* For mmu_notifiers */
+       bool is_cow;
        int ret;
 
        /*
         * parent mm. And a permission downgrade will only happen if
         * is_cow_mapping() returns true.
         */
-       if (is_cow_mapping(vma->vm_flags))
-               mmu_notifier_invalidate_range_start(src_mm, addr, end);
+       is_cow = is_cow_mapping(vma->vm_flags);
+       mmun_start = addr;
+       mmun_end   = end;
+       if (is_cow)
+               mmu_notifier_invalidate_range_start(src_mm, mmun_start,
+                                                   mmun_end);
 
        ret = 0;
        dst_pgd = pgd_offset(dst_mm, addr);
                }
        } while (dst_pgd++, src_pgd++, addr = next, addr != end);
 
-       if (is_cow_mapping(vma->vm_flags))
-               mmu_notifier_invalidate_range_end(src_mm,
-                                                 vma->vm_start, end);
+       if (is_cow)
+               mmu_notifier_invalidate_range_end(src_mm, mmun_start, mmun_end);
        return ret;
 }
 
                spinlock_t *ptl, pte_t orig_pte)
        __releases(ptl)
 {
-       struct page *old_page, *new_page;
+       struct page *old_page, *new_page = NULL;
        pte_t entry;
        int ret = 0;
        int page_mkwrite = 0;
        } else
                mem_cgroup_uncharge_page(new_page);
 
-       if (new_page)
-               page_cache_release(new_page);
 unlock:
        pte_unmap_unlock(page_table, ptl);
+       if (new_page) {
+               if (new_page == old_page)
+                       /* cow happened, notify before releasing old_page */
+                       mmu_notifier_invalidate_page(mm, address);
+               page_cache_release(new_page);
+       }
        if (old_page) {
                /*
                 * Don't let another task, with possibly unlocked vma,
 
        unsigned long extent, next, old_end;
        pmd_t *old_pmd, *new_pmd;
        bool need_flush = false;
+       unsigned long mmun_start;       /* For mmu_notifiers */
+       unsigned long mmun_end;         /* For mmu_notifiers */
 
        old_end = old_addr + len;
        flush_cache_range(vma, old_addr, old_end);
 
-       mmu_notifier_invalidate_range_start(vma->vm_mm, old_addr, old_end);
+       mmun_start = old_addr;
+       mmun_end   = old_end;
+       mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
 
        for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
                cond_resched();
        if (likely(need_flush))
                flush_tlb_range(vma, old_end-len, old_addr);
 
-       mmu_notifier_invalidate_range_end(vma->vm_mm, old_end-len, old_end);
+       mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 
        return len + old_addr - old_end;        /* how much done */
 }
 
                pte_t entry;
 
                flush_cache_page(vma, address, pte_pfn(*pte));
-               entry = ptep_clear_flush_notify(vma, address, pte);
+               entry = ptep_clear_flush(vma, address, pte);
                entry = pte_wrprotect(entry);
                entry = pte_mkclean(entry);
                set_pte_at(mm, address, pte, entry);
        }
 
        pte_unmap_unlock(pte, ptl);
+
+       if (ret)
+               mmu_notifier_invalidate_page(mm, address);
 out:
        return ret;
 }
 
        /* Nuke the page table entry. */
        flush_cache_page(vma, address, page_to_pfn(page));
-       pteval = ptep_clear_flush_notify(vma, address, pte);
+       pteval = ptep_clear_flush(vma, address, pte);
 
        /* Move the dirty bit to the physical page now the pte is gone. */
        if (pte_dirty(pteval))
 
 out_unmap:
        pte_unmap_unlock(pte, ptl);
+       if (ret != SWAP_FAIL)
+               mmu_notifier_invalidate_page(mm, address);
 out:
        return ret;
 
        spinlock_t *ptl;
        struct page *page;
        unsigned long address;
+       unsigned long mmun_start;       /* For mmu_notifiers */
+       unsigned long mmun_end;         /* For mmu_notifiers */
        unsigned long end;
        int ret = SWAP_AGAIN;
        int locked_vma = 0;
        if (!pmd_present(*pmd))
                return ret;
 
+       mmun_start = address;
+       mmun_end   = end;
+       mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
        /*
         * If we can acquire the mmap_sem for read, and vma is VM_LOCKED,
         * keep the sem while scanning the cluster for mlocking pages.
 
                /* Nuke the page table entry. */
                flush_cache_page(vma, address, pte_pfn(*pte));
-               pteval = ptep_clear_flush_notify(vma, address, pte);
+               pteval = ptep_clear_flush(vma, address, pte);
 
                /* If nonlinear, store the file page offset in the pte. */
                if (page->index != linear_page_index(vma, address))
                (*mapcount)--;
        }
        pte_unmap_unlock(pte - 1, ptl);
+       mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
        if (locked_vma)
                up_read(&vma->vm_mm->mmap_sem);
        return ret;