* mlock is usually called at the end of page_add_*_rmap(), munlock at
  * the end of page_remove_rmap(); but new anon folios are managed by
  * folio_add_lru_vma() calling mlock_new_folio().
- *
- * @compound is used to include pmd mappings of THPs, but filter out
- * pte mappings of THPs, which cannot be consistently counted: a pte
- * mapping of the THP head cannot be distinguished by the page alone.
  */
 void mlock_folio(struct folio *folio);
 static inline void mlock_vma_folio(struct folio *folio,
-                       struct vm_area_struct *vma, bool compound)
+                               struct vm_area_struct *vma)
 {
        /*
         * The VM_SPECIAL check here serves two purposes.
         *    file->f_op->mmap() is using vm_insert_page(s), when VM_LOCKED may
         *    still be set while VM_SPECIAL bits are added: so ignore it then.
         */
-       if (unlikely((vma->vm_flags & (VM_LOCKED|VM_SPECIAL)) == VM_LOCKED) &&
-           (compound || !folio_test_large(folio)))
+       if (unlikely((vma->vm_flags & (VM_LOCKED|VM_SPECIAL)) == VM_LOCKED))
                mlock_folio(folio);
 }
 
 void munlock_folio(struct folio *folio);
 static inline void munlock_vma_folio(struct folio *folio,
-                       struct vm_area_struct *vma, bool compound)
+                                       struct vm_area_struct *vma)
 {
-       if (unlikely(vma->vm_flags & VM_LOCKED) &&
-           (compound || !folio_test_large(folio)))
+       /*
+        * munlock if the function is called. Ideally, we should only
+        * do munlock if any page of folio is unmapped from VMA and
+        * cause folio not fully mapped to VMA.
+        *
+        * But it's not easy to confirm that's the situation. So we
+        * always munlock the folio and page reclaim will correct it
+        * if it's wrong.
+        */
+       if (unlikely(vma->vm_flags & VM_LOCKED))
                munlock_folio(folio);
 }
 
 
        unsigned long vm_flags;
        struct mem_cgroup *memcg;
 };
+
 /*
  * arg: folio_referenced_arg will be passed
  */
        struct folio_referenced_arg *pra = arg;
        DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
        int referenced = 0;
+       unsigned long start = address, ptes = 0;
 
        while (page_vma_mapped_walk(&pvmw)) {
                address = pvmw.address;
 
-               if ((vma->vm_flags & VM_LOCKED) &&
-                   (!folio_test_large(folio) || !pvmw.pte)) {
-                       /* Restore the mlock which got missed */
-                       mlock_vma_folio(folio, vma, !pvmw.pte);
-                       page_vma_mapped_walk_done(&pvmw);
-                       pra->vm_flags |= VM_LOCKED;
-                       return false; /* To break the loop */
+               if (vma->vm_flags & VM_LOCKED) {
+                       if (!folio_test_large(folio) || !pvmw.pte) {
+                               /* Restore the mlock which got missed */
+                               mlock_vma_folio(folio, vma);
+                               page_vma_mapped_walk_done(&pvmw);
+                               pra->vm_flags |= VM_LOCKED;
+                               return false; /* To break the loop */
+                       }
+                       /*
+                        * For large folio fully mapped to VMA, will
+                        * be handled after the pvmw loop.
+                        *
+                        * For large folio cross VMA boundaries, it's
+                        * expected to be picked  by page reclaim. But
+                        * should skip reference of pages which are in
+                        * the range of VM_LOCKED vma. As page reclaim
+                        * should just count the reference of pages out
+                        * the range of VM_LOCKED vma.
+                        */
+                       ptes++;
+                       pra->mapcount--;
+                       continue;
                }
 
                if (pvmw.pte) {
                pra->mapcount--;
        }
 
+       if ((vma->vm_flags & VM_LOCKED) &&
+                       folio_test_large(folio) &&
+                       folio_within_vma(folio, vma)) {
+               unsigned long s_align, e_align;
+
+               s_align = ALIGN_DOWN(start, PMD_SIZE);
+               e_align = ALIGN_DOWN(start + folio_size(folio) - 1, PMD_SIZE);
+
+               /* folio doesn't cross page table boundary and fully mapped */
+               if ((s_align == e_align) && (ptes == folio_nr_pages(folio))) {
+                       /* Restore the mlock which got missed */
+                       mlock_vma_folio(folio, vma);
+                       pra->vm_flags |= VM_LOCKED;
+                       return false; /* To break the loop */
+               }
+       }
+
        if (referenced)
                folio_clear_idle(folio);
        if (folio_test_clear_young(folio))
                          (folio_test_large(folio) && folio_entire_mapcount(folio) > 1)) &&
                         PageAnonExclusive(page), folio);
 
-       mlock_vma_folio(folio, vma, compound);
+       /*
+        * For large folio, only mlock it if it's fully mapped to VMA. It's
+        * not easy to check whether the large folio is fully mapped to VMA
+        * here. Only mlock normal 4K folio and leave page reclaim to handle
+        * large folio.
+        */
+       if (!folio_test_large(folio))
+               mlock_vma_folio(folio, vma);
 }
 
 /**
        if (nr)
                __lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr);
 
-       mlock_vma_folio(folio, vma, compound);
+       /* See comments in page_add_anon_rmap() */
+       if (!folio_test_large(folio))
+               mlock_vma_folio(folio, vma);
 }
 
 /**
         * it's only reliable while mapped.
         */
 
-       munlock_vma_folio(folio, vma, compound);
+       munlock_vma_folio(folio, vma);
 }
 
 /*
                if (!(flags & TTU_IGNORE_MLOCK) &&
                    (vma->vm_flags & VM_LOCKED)) {
                        /* Restore the mlock which got missed */
-                       mlock_vma_folio(folio, vma, false);
+                       if (!folio_test_large(folio))
+                               mlock_vma_folio(folio, vma);
                        page_vma_mapped_walk_done(&pvmw);
                        ret = false;
                        break;