]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
gfs2: Fix mmap + page fault deadlocks (part 2)
authorAndreas Gruenbacher <agruenba@redhat.com>
Thu, 13 May 2021 08:47:59 +0000 (10:47 +0200)
committerAndreas Gruenbacher <agruenba@redhat.com>
Wed, 2 Jun 2021 09:47:04 +0000 (11:47 +0200)
Now that we handle self-recursion on the inode glock in gfs2_fault and
gfs2_page_mkwrite, we need to take care of more complex deadlock
scenarios like the following (example by Jan Kara):

Two independent processes P1, P2. Two files F1, F2, and two mappings M1,
M2 where M1 is a mapping of F1, M2 is a mapping of F2. Now P1 does DIO
to F1 with M2 as a buffer, P2 does DIO to F2 with M1 as a buffer. They
can race like:

P1                                      P2
read()                                  read()
  gfs2_file_read_iter()                   gfs2_file_read_iter()
    gfs2_file_direct_read()                 gfs2_file_direct_read()
      locks glock of F1                       locks glock of F2
      iomap_dio_rw()                          iomap_dio_rw()
        bio_iov_iter_get_pages()                bio_iov_iter_get_pages()
          <fault in M2>                           <fault in M1>
            gfs2_fault()                            gfs2_fault()
              tries to grab glock of F2               tries to grab glock of F1

Those kinds of scenarios are much harder to reproduce than
self-recursion.

We deal with such situations by using the LM_FLAG_OUTER flag to mark
"outer" glock taking.  Then, when taking an "inner" glock, we use the
LM_FLAG_TRY flag so that locking attempts that don't immediately succeed
will be aborted.  In case of a failed locking attempt, we "unroll" to
where the "outer" glock was taken, drop the "outer" glock, and fault in
the first offending user page.  This will re-trigger the "inner" locking
attempt but without the LM_FLAG_TRY flag.  Once that has happened, we
re-acquire the "outer" glock and retry the original operation.

Reported-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
fs/gfs2/bmap.c
fs/gfs2/file.c

index 2ff501c413f436cb19eed40f77067403f88e0115..82e4506984e35aaa74800d5c66fdc37a56b5d604 100644 (file)
@@ -967,7 +967,8 @@ static int gfs2_write_lock(struct inode *inode)
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        int error;
 
-       gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
+       gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_OUTER,
+                        &ip->i_gh);
        error = gfs2_glock_nq(&ip->i_gh);
        if (error)
                goto out_uninit;
index 9153c6b292898f6ecfca8dd61491075e72dcb3df..1d59497796e9d1c19d2d8e77faf1b0c0a61a0bdd 100644 (file)
@@ -431,21 +431,30 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
        vm_fault_t ret = VM_FAULT_LOCKED;
        struct gfs2_holder gh;
        unsigned int length;
+       u16 flags = 0;
        loff_t size;
        int err;
 
        sb_start_pagefault(inode->i_sb);
 
-       gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+       if (current_holds_glock())
+               flags |= LM_FLAG_TRY;
+
+       gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, flags, &gh);
        if (likely(!outer_gh)) {
                err = gfs2_glock_nq(&gh);
                if (err) {
                        ret = block_page_mkwrite_return(err);
+                       if (err == GLR_TRYFAILED) {
+                               set_current_needs_retry(true);
+                               ret = VM_FAULT_SIGBUS;
+                       }
                        goto out_uninit;
                }
        } else {
                if (!gfs2_holder_is_compatible(outer_gh, LM_ST_EXCLUSIVE)) {
                        /* We could try to upgrade outer_gh here. */
+                       set_current_needs_retry(true);
                        ret = VM_FAULT_SIGBUS;
                        goto out_uninit;
                }
@@ -568,18 +577,27 @@ static vm_fault_t gfs2_fault(struct vm_fault *vmf)
        struct gfs2_holder *outer_gh = gfs2_glock_is_locked_by_me(ip->i_gl);
        struct gfs2_holder gh;
        vm_fault_t ret;
+       u16 flags = 0;
        int err;
 
-       gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+       if (current_holds_glock())
+               flags |= LM_FLAG_TRY;
+
+       gfs2_holder_init(ip->i_gl, LM_ST_SHARED, flags, &gh);
        if (likely(!outer_gh)) {
                err = gfs2_glock_nq(&gh);
                if (err) {
                        ret = block_page_mkwrite_return(err);
+                       if (err == GLR_TRYFAILED) {
+                               set_current_needs_retry(true);
+                               ret = VM_FAULT_SIGBUS;
+                       }
                        goto out_uninit;
                }
        } else {
                if (!gfs2_holder_is_compatible(outer_gh, LM_ST_SHARED)) {
                        /* We could try to upgrade outer_gh here. */
+                       set_current_needs_retry(true);
                        ret = VM_FAULT_SIGBUS;
                        goto out_uninit;
                }
@@ -805,13 +823,20 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
        if (!count)
                return 0; /* skip atime */
 
-       gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh);
+       gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, LM_FLAG_OUTER, gh);
+retry:
        ret = gfs2_glock_nq(gh);
        if (ret)
                goto out_uninit;
 
        ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0);
        gfs2_glock_dq(gh);
+       if (unlikely(current_needs_retry())) {
+               set_current_needs_retry(false);
+               if (ret == -EFAULT &&
+                   !iov_iter_fault_in_writeable(to, PAGE_SIZE))
+                       goto retry;
+       }
 out_uninit:
        gfs2_holder_uninit(gh);
        return ret;
@@ -835,7 +860,8 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
         * unfortunately, have the option of only flushing a range like the
         * VFS does.
         */
-       gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh);
+       gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, LM_FLAG_OUTER, gh);
+retry:
        ret = gfs2_glock_nq(gh);
        if (ret)
                goto out_uninit;
@@ -849,6 +875,12 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
                ret = 0;
 out:
        gfs2_glock_dq(gh);
+       if (unlikely(current_needs_retry())) {
+               set_current_needs_retry(false);
+               if (ret == -EFAULT &&
+                   !iov_iter_fault_in_readable(from, PAGE_SIZE))
+                       goto retry;
+       }
 out_uninit:
        gfs2_holder_uninit(gh);
        return ret;
@@ -881,7 +913,8 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
                        return ret;
        }
        ip = GFS2_I(iocb->ki_filp->f_mapping->host);
-       gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+       gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_OUTER, &gh);
+retry:
        ret = gfs2_glock_nq(&gh);
        if (ret)
                goto out_uninit;
@@ -889,6 +922,12 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        if (ret > 0)
                written += ret;
        gfs2_glock_dq(&gh);
+       if (unlikely(current_needs_retry())) {
+               set_current_needs_retry(false);
+               if (ret == -EFAULT &&
+                   !iov_iter_fault_in_writeable(to, PAGE_SIZE))
+                       goto retry;
+       }
 out_uninit:
        gfs2_holder_uninit(&gh);
        return written ? written : ret;
@@ -900,9 +939,17 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, struct iov_iter *fro
        struct inode *inode = file_inode(file);
        ssize_t ret;
 
+retry:
        current->backing_dev_info = inode_to_bdi(inode);
        ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
        current->backing_dev_info = NULL;
+       if (unlikely(current_needs_retry())) {
+               set_current_needs_retry(false);
+               if (ret == -EFAULT &&
+                   !iov_iter_fault_in_readable(from, PAGE_SIZE))
+                       goto retry;
+       }
+
        return ret;
 }