handle_mm_fault returning VM_FAULT_RETRY or VM_FAULT_COMPLETED means
mmap_lock has been released.  However with per-VMA locks behavior is
different and the caller should still release it.  To make the rules
consistent for the caller, drop the per-VMA lock when returning
VM_FAULT_RETRY or VM_FAULT_COMPLETED.  Currently the only path returning
VM_FAULT_RETRY under per-VMA locks is do_swap_page and no path returns
VM_FAULT_COMPLETED for now.
[willy@infradead.org: fix riscv]
Link: https://lkml.kernel.org/r/CAJuCfpE6GWEx1rPBmNpUfoD5o-gNFz9-UFywzCE2PbEGBiVz7g@mail.gmail.com
Link: https://lkml.kernel.org/r/20230630211957.1341547-4-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Peter Xu <peterx@redhat.com>
Tested-by: Conor Dooley <conor.dooley@microchip.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Hillf Danton <hdanton@sina.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Laurent Dufour <ldufour@linux.ibm.com>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michel Lespinasse <michel@lespinasse.org>
Cc: Minchan Kim <minchan@google.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Punit Agrawal <punit.agrawal@bytedance.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
                goto lock_mmap;
        }
        fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs);
-       vma_end_read(vma);
+       if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
+               vma_end_read(vma);
 
        if (!(fault & VM_FAULT_RETRY)) {
                count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
 
        }
 
        fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
-       vma_end_read(vma);
+       if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
+               vma_end_read(vma);
 
        if (!(fault & VM_FAULT_RETRY)) {
                count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
 
        }
 
        fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs);
-       vma_end_read(vma);
+       if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
+               vma_end_read(vma);
 
        if (!(fault & VM_FAULT_RETRY)) {
                count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
 
                goto lock_mmap;
        }
        fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
-       vma_end_read(vma);
+       if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
+               vma_end_read(vma);
        if (!(fault & VM_FAULT_RETRY)) {
                count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
                if (likely(!(fault & VM_FAULT_ERROR)))
 
                goto lock_mmap;
        }
        fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
-       vma_end_read(vma);
+       if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
+               vma_end_read(vma);
 
        if (!(fault & VM_FAULT_RETRY)) {
                count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
 
 
        if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
                ret = VM_FAULT_RETRY;
+               vma_end_read(vma);
                goto out;
        }
 
                                 !is_cow_mapping(vma->vm_flags)))
                        return VM_FAULT_SIGSEGV;
        }
+#ifdef CONFIG_PER_VMA_LOCK
+       /*
+        * Per-VMA locks can't be used with FAULT_FLAG_RETRY_NOWAIT because of
+        * the assumption that lock is dropped on VM_FAULT_RETRY.
+        */
+       if (WARN_ON_ONCE((*flags &
+                       (FAULT_FLAG_VMA_LOCK | FAULT_FLAG_RETRY_NOWAIT)) ==
+                       (FAULT_FLAG_VMA_LOCK | FAULT_FLAG_RETRY_NOWAIT)))
+               return VM_FAULT_SIGSEGV;
+#endif
+
        return 0;
 }