zap_page_range was originally designed to unmap pages within an address
range that could span multiple vmas.  While working on [1], it was
discovered that all callers of zap_page_range pass a range entirely within
a single vma.  In addition, the mmu notification call within zap_page
range does not correctly handle ranges that span multiple vmas.  When
crossing a vma boundary, a new mmu_notifier_range_init/end call pair with
the new vma should be made.
Instead of fixing zap_page_range, do the following:
- Create a new routine zap_vma_pages() that will remove all pages within
  the passed vma.  Most users of zap_page_range pass the entire vma and
  can use this new routine.
- For callers of zap_page_range not passing the entire vma, instead call
  zap_page_range_single().
- Remove zap_page_range.
[1] https://lore.kernel.org/linux-mm/
20221114235507.294320-2-mike.kravetz@oracle.com/
Link: https://lkml.kernel.org/r/20230104002732.232573-1-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Suggested-by: Peter Xu <peterx@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Peter Xu <peterx@redhat.com>
Acked-by: Heiko Carstens <hca@linux.ibm.com>	[s390]
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
 
        mmap_read_lock(mm);
 
        for_each_vma(vmi, vma) {
-               unsigned long size = vma->vm_end - vma->vm_start;
-
                if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm))
-                       zap_page_range(vma, vma->vm_start, size);
+                       zap_vma_pages(vma);
 #ifdef CONFIG_COMPAT_VDSO
                if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm))
-                       zap_page_range(vma, vma->vm_start, size);
+                       zap_vma_pages(vma);
 #endif
        }
 
 
 
        mmap_read_lock(mm);
        for_each_vma(vmi, vma) {
-               unsigned long size = vma->vm_end - vma->vm_start;
-
                if (vma_is_special_mapping(vma, &vvar_spec))
-                       zap_page_range(vma, vma->vm_start, size);
+                       zap_vma_pages(vma);
        }
        mmap_read_unlock(mm);
 
 
        /*
         * When the LPAR lost credits due to core removal or during
         * migration, invalidate the existing mapping for the current
-        * paste addresses and set windows in-active (zap_page_range in
+        * paste addresses and set windows in-active (zap_vma_pages in
         * reconfig_close_windows()).
         * New mapping will be done later after migration or new credits
         * available. So continue to receive faults if the user space
 
                 * is done before the original mmap() and after the ioctl.
                 */
                if (vma)
-                       zap_page_range(vma, vma->vm_start,
-                                       vma->vm_end - vma->vm_start);
+                       zap_vma_pages(vma);
 
                mmap_write_unlock(task_ref->mm);
                mutex_unlock(&task_ref->mmap_mutex);
 
        mmap_read_lock(mm);
 
        for_each_vma(vmi, vma) {
-               unsigned long size = vma->vm_end - vma->vm_start;
-
                if (vma_is_special_mapping(vma, vdso_info.dm))
-                       zap_page_range(vma, vma->vm_start, size);
+                       zap_vma_pages(vma);
 #ifdef CONFIG_COMPAT
                if (vma_is_special_mapping(vma, compat_vdso_info.dm))
-                       zap_page_range(vma, vma->vm_start, size);
+                       zap_vma_pages(vma);
 #endif
        }
 
 
 
        mmap_read_lock(mm);
        for_each_vma(vmi, vma) {
-               unsigned long size = vma->vm_end - vma->vm_start;
-
                if (!vma_is_special_mapping(vma, &vvar_mapping))
                        continue;
-               zap_page_range(vma, vma->vm_start, size);
+               zap_vma_pages(vma);
                break;
        }
        mmap_read_unlock(mm);
 
                if (is_vm_hugetlb_page(vma))
                        continue;
                size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
-               zap_page_range(vma, vmaddr, size);
+               zap_page_range_single(vma, vmaddr, size, NULL);
        }
        mmap_read_unlock(gmap->mm);
 }
 
 
        mmap_read_lock(mm);
        for_each_vma(vmi, vma) {
-               unsigned long size = vma->vm_end - vma->vm_start;
-
                if (vma_is_special_mapping(vma, &vvar_mapping))
-                       zap_page_range(vma, vma->vm_start, size);
+                       zap_vma_pages(vma);
        }
        mmap_read_unlock(mm);
 
 
        if (vma) {
                trace_binder_unmap_user_start(alloc, index);
 
-               zap_page_range(vma, page_addr, PAGE_SIZE);
+               zap_page_range_single(vma, page_addr, PAGE_SIZE, NULL);
 
                trace_binder_unmap_user_end(alloc, index);
        }
 
 
 void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
                  unsigned long size);
-void zap_page_range(struct vm_area_struct *vma, unsigned long address,
-                   unsigned long size);
 void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
                           unsigned long size, struct zap_details *details);
+static inline void zap_vma_pages(struct vm_area_struct *vma)
+{
+       zap_page_range_single(vma, vma->vm_start,
+                             vma->vm_end - vma->vm_start, NULL);
+}
 void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
                struct vm_area_struct *start_vma, unsigned long start,
                unsigned long end);
 
        mmu_notifier_invalidate_range_end(&range);
 }
 
-/**
- * zap_page_range - remove user pages in a given range
- * @vma: vm_area_struct holding the applicable pages
- * @start: starting address of pages to zap
- * @size: number of bytes to zap
- *
- * Caller must protect the VMA list
- */
-void zap_page_range(struct vm_area_struct *vma, unsigned long start,
-               unsigned long size)
-{
-       struct maple_tree *mt = &vma->vm_mm->mm_mt;
-       unsigned long end = start + size;
-       struct mmu_notifier_range range;
-       struct mmu_gather tlb;
-       MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
-
-       lru_add_drain();
-       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
-                               start, start + size);
-       tlb_gather_mmu(&tlb, vma->vm_mm);
-       update_hiwater_rss(vma->vm_mm);
-       mmu_notifier_invalidate_range_start(&range);
-       do {
-               unmap_single_vma(&tlb, vma, start, range.end, NULL);
-       } while ((vma = mas_find(&mas, end - 1)) != NULL);
-       mmu_notifier_invalidate_range_end(&range);
-       tlb_finish_mmu(&tlb);
-}
-
 /**
  * zap_page_range_single - remove user pages in a given range
  * @vma: vm_area_struct holding the applicable pages
 
  *
  * The caller must hold lock_page_memcg().  Most callers have the folio
  * locked.  A few have the folio blocked from truncation through other
- * means (eg zap_page_range() has it mapped and is holding the page table
+ * means (eg zap_vma_pages() has it mapped and is holding the page table
  * lock).  This can also be called from mark_buffer_dirty(), which I
  * cannot prove is always protected against truncate.
  */
 
                maybe_zap_len = total_bytes_to_map -  /* All bytes to map */
                                *length + /* Mapped or pending */
                                (pages_remaining * PAGE_SIZE); /* Failed map. */
-               zap_page_range(vma, *address, maybe_zap_len);
+               zap_page_range_single(vma, *address, maybe_zap_len, NULL);
                err = 0;
        }
 
                unsigned long leftover_pages = pages_remaining;
                int bytes_mapped;
 
-               /* We called zap_page_range, try to reinsert. */
+               /* We called zap_page_range_single, try to reinsert. */
                err = vm_insert_pages(vma, *address,
                                      pending_pages,
                                      &pages_remaining);
        total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1);
        if (total_bytes_to_map) {
                if (!(zc->flags & TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT))
-                       zap_page_range(vma, address, total_bytes_to_map);
+                       zap_page_range_single(vma, address, total_bytes_to_map,
+                                             NULL);
                zc->length = total_bytes_to_map;
                zc->recv_skip_hint = 0;
        } else {