#ifdef CONFIG_MMU
 #define MMAP_LOTSAMISS  (100)
+static struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
+                                            struct file *fpin)
+{
+       int flags = vmf->flags;
+
+       if (fpin)
+               return fpin;
+
+       /*
+        * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or
+        * anything, so we only pin the file and drop the mmap_sem if only
+        * FAULT_FLAG_ALLOW_RETRY is set.
+        */
+       if ((flags & (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT)) ==
+           FAULT_FLAG_ALLOW_RETRY) {
+               fpin = get_file(vmf->vma->vm_file);
+               up_read(&vmf->vma->vm_mm->mmap_sem);
+       }
+       return fpin;
+}
+
+/*
+ * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_sem
+ * @vmf - the vm_fault for this fault.
+ * @page - the page to lock.
+ * @fpin - the pointer to the file we may pin (or is already pinned).
+ *
+ * This works similar to lock_page_or_retry in that it can drop the mmap_sem.
+ * It differs in that it actually returns the page locked if it returns 1 and 0
+ * if it couldn't lock the page.  If we did have to drop the mmap_sem then fpin
+ * will point to the pinned file and needs to be fput()'ed at a later point.
+ */
+static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
+                                    struct file **fpin)
+{
+       if (trylock_page(page))
+               return 1;
+
+       if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
+               return 0;
+
+       *fpin = maybe_unlock_mmap_for_io(vmf, *fpin);
+       if (vmf->flags & FAULT_FLAG_KILLABLE) {
+               if (__lock_page_killable(page)) {
+                       /*
+                        * We didn't have the right flags to drop the mmap_sem,
+                        * but all fault_handlers only check for fatal signals
+                        * if we return VM_FAULT_RETRY, so we need to drop the
+                        * mmap_sem here and return 0 if we don't have a fpin.
+                        */
+                       if (*fpin == NULL)
+                               up_read(&vmf->vma->vm_mm->mmap_sem);
+                       return 0;
+               }
+       } else
+               __lock_page(page);
+       return 1;
+}
+
 
 /*
- * Synchronous readahead happens when we don't even find
- * a page in the page cache at all.
+ * Synchronous readahead happens when we don't even find a page in the page
+ * cache at all.  We don't want to perform IO under the mmap sem, so if we have
+ * to drop the mmap sem we return the file that was pinned in order for us to do
+ * that.  If we didn't pin a file then we return NULL.  The file that is
+ * returned needs to be fput()'ed when we're done with it.
  */
-static void do_sync_mmap_readahead(struct vm_fault *vmf)
+static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
 {
        struct file *file = vmf->vma->vm_file;
        struct file_ra_state *ra = &file->f_ra;
        struct address_space *mapping = file->f_mapping;
+       struct file *fpin = NULL;
        pgoff_t offset = vmf->pgoff;
 
        /* If we don't want any read-ahead, don't bother */
        if (vmf->vma->vm_flags & VM_RAND_READ)
-               return;
+               return fpin;
        if (!ra->ra_pages)
-               return;
+               return fpin;
 
        if (vmf->vma->vm_flags & VM_SEQ_READ) {
+               fpin = maybe_unlock_mmap_for_io(vmf, fpin);
                page_cache_sync_readahead(mapping, ra, file, offset,
                                          ra->ra_pages);
-               return;
+               return fpin;
        }
 
        /* Avoid banging the cache line if not needed */
         * stop bothering with read-ahead. It will only hurt.
         */
        if (ra->mmap_miss > MMAP_LOTSAMISS)
-               return;
+               return fpin;
 
        /*
         * mmap read-around
         */
+       fpin = maybe_unlock_mmap_for_io(vmf, fpin);
        ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
        ra->size = ra->ra_pages;
        ra->async_size = ra->ra_pages / 4;
        ra_submit(ra, mapping, file);
+       return fpin;
 }
 
 /*
  * Asynchronous readahead happens when we find the page and PG_readahead,
- * so we want to possibly extend the readahead further..
+ * so we want to possibly extend the readahead further.  We return the file that
+ * was pinned if we have to drop the mmap_sem in order to do IO.
  */
-static void do_async_mmap_readahead(struct vm_fault *vmf,
-                                   struct page *page)
+static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
+                                           struct page *page)
 {
        struct file *file = vmf->vma->vm_file;
        struct file_ra_state *ra = &file->f_ra;
        struct address_space *mapping = file->f_mapping;
+       struct file *fpin = NULL;
        pgoff_t offset = vmf->pgoff;
 
        /* If we don't want any read-ahead, don't bother */
        if (vmf->vma->vm_flags & VM_RAND_READ)
-               return;
+               return fpin;
        if (ra->mmap_miss > 0)
                ra->mmap_miss--;
-       if (PageReadahead(page))
+       if (PageReadahead(page)) {
+               fpin = maybe_unlock_mmap_for_io(vmf, fpin);
                page_cache_async_readahead(mapping, ra, file,
                                           page, offset, ra->ra_pages);
+       }
+       return fpin;
 }
 
 /**
 {
        int error;
        struct file *file = vmf->vma->vm_file;
+       struct file *fpin = NULL;
        struct address_space *mapping = file->f_mapping;
        struct file_ra_state *ra = &file->f_ra;
        struct inode *inode = mapping->host;
                 * We found the page, so try async readahead before
                 * waiting for the lock.
                 */
-               do_async_mmap_readahead(vmf, page);
+               fpin = do_async_mmap_readahead(vmf, page);
        } else if (!page) {
                /* No page in the page cache at all */
-               do_sync_mmap_readahead(vmf);
                count_vm_event(PGMAJFAULT);
                count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
                ret = VM_FAULT_MAJOR;
+               fpin = do_sync_mmap_readahead(vmf);
 retry_find:
                page = pagecache_get_page(mapping, offset,
                                          FGP_CREAT|FGP_FOR_MMAP,
                                          vmf->gfp_mask);
-               if (!page)
+               if (!page) {
+                       if (fpin)
+                               goto out_retry;
                        return vmf_error(-ENOMEM);
+               }
        }
 
-       if (!lock_page_or_retry(page, vmf->vma->vm_mm, vmf->flags)) {
-               put_page(page);
-               return ret | VM_FAULT_RETRY;
-       }
+       if (!lock_page_maybe_drop_mmap(vmf, page, &fpin))
+               goto out_retry;
 
        /* Did it get truncated? */
        if (unlikely(page->mapping != mapping)) {
        if (unlikely(!PageUptodate(page)))
                goto page_not_uptodate;
 
+       /*
+        * We've made it this far and we had to drop our mmap_sem, now is the
+        * time to return to the upper layer and have it re-find the vma and
+        * redo the fault.
+        */
+       if (fpin) {
+               unlock_page(page);
+               goto out_retry;
+       }
+
        /*
         * Found the page and have a reference on it.
         * We must recheck i_size under page lock.
         * and we need to check for errors.
         */
        ClearPageError(page);
+       fpin = maybe_unlock_mmap_for_io(vmf, fpin);
        error = mapping->a_ops->readpage(file, page);
        if (!error) {
                wait_on_page_locked(page);
                if (!PageUptodate(page))
                        error = -EIO;
        }
+       if (fpin)
+               goto out_retry;
        put_page(page);
 
        if (!error || error == AOP_TRUNCATED_PAGE)
        /* Things didn't work out. Return zero to tell the mm layer so. */
        shrink_readahead_size_eio(file, ra);
        return VM_FAULT_SIGBUS;
+
+out_retry:
+       /*
+        * We dropped the mmap_sem, we need to return to the fault handler to
+        * re-find the vma and come back and find our hopefully still populated
+        * page.
+        */
+       if (page)
+               put_page(page);
+       if (fpin)
+               fput(fpin);
+       return ret | VM_FAULT_RETRY;
 }
 EXPORT_SYMBOL(filemap_fault);