]> www.infradead.org Git - users/hch/xfs.git/commitdiff
xfs: don't call remap_verify_area with sb write protection held
authorChristoph Hellwig <hch@lst.de>
Sun, 19 Jan 2025 06:02:36 +0000 (07:02 +0100)
committerChristoph Hellwig <hch@lst.de>
Mon, 3 Feb 2025 04:49:01 +0000 (05:49 +0100)
The XFS_IOC_EXCHANGE_RANGE ioctl with the XFS_EXCHANGE_RANGE_TO_EOF flag
operates on a range bounded by the end of the file.  This means the
actual amount of blocks exchanged is derived from the inode size, which
is only stable with the IOLOCK (i_rwsem) held.  Do that, it currently
calls remap_verify_area from inside the sb write protection which nests
outside the IOLOCK.  But this makes fsnotify_file_area_perm which is
called from remap_verify_area unhappy when the kernel is built with
lockdep and the recently added CONFIG_FANOTIFY_ACCESS_PERMISSIONS
option.

Fix this by always calling remap_verify_area before taking the write
protection, and passing a 0 size to remap_verify_area similar to
the FICLONE/FICLONERANGE ioctls when they are asked to clone until
the file end.

(Note: the size argument gets passed to fsnotify_file_area_perm, but
then isn't actually used there).

Fixes: 9a64d9b3109d ("xfs: introduce new file range exchange ioctl")
Signed-off-by: Christoph Hellwig <hch@lst.de>
fs/xfs/xfs_exchrange.c

index f340a2015c4c71363aff6a8abacd05a0c8343d65..0b41bdfecdfbc9b6f3e3103d58b271d3965f2807 100644 (file)
@@ -329,22 +329,6 @@ out_trans_cancel:
  * successfully but before locks are dropped.
  */
 
-/* Verify that we have security clearance to perform this operation. */
-static int
-xfs_exchange_range_verify_area(
-       struct xfs_exchrange    *fxr)
-{
-       int                     ret;
-
-       ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length,
-                       true);
-       if (ret)
-               return ret;
-
-       return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length,
-                       true);
-}
-
 /*
  * Performs necessary checks before doing a range exchange, having stabilized
  * mutable inode attributes via i_rwsem.
@@ -355,11 +339,13 @@ xfs_exchange_range_checks(
        unsigned int            alloc_unit)
 {
        struct inode            *inode1 = file_inode(fxr->file1);
+       loff_t                  size1 = i_size_read(inode1);
        struct inode            *inode2 = file_inode(fxr->file2);
+       loff_t                  size2 = i_size_read(inode2);
        uint64_t                allocmask = alloc_unit - 1;
        int64_t                 test_len;
        uint64_t                blen;
-       loff_t                  size1, size2, tmp;
+       loff_t                  tmp;
        int                     error;
 
        /* Don't touch certain kinds of inodes */
@@ -368,24 +354,25 @@ xfs_exchange_range_checks(
        if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2))
                return -ETXTBSY;
 
-       size1 = i_size_read(inode1);
-       size2 = i_size_read(inode2);
-
        /* Ranges cannot start after EOF. */
        if (fxr->file1_offset > size1 || fxr->file2_offset > size2)
                return -EINVAL;
 
-       /*
-        * If the caller said to exchange to EOF, we set the length of the
-        * request large enough to cover everything to the end of both files.
-        */
        if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) {
+               /*
+                * If the caller said to exchange to EOF, we set the length of
+                * the request large enough to cover everything to the end of
+                * both files.
+                */
                fxr->length = max_t(int64_t, size1 - fxr->file1_offset,
                                             size2 - fxr->file2_offset);
-
-               error = xfs_exchange_range_verify_area(fxr);
-               if (error)
-                       return error;
+       } else {
+               /*
+                * Otherwise we require both ranges to end within EOF.
+                */
+               if (fxr->file1_offset + fxr->length > size1 ||
+                   fxr->file2_offset + fxr->length > size2)
+                       return -EINVAL;
        }
 
        /*
@@ -401,15 +388,6 @@ xfs_exchange_range_checks(
            check_add_overflow(fxr->file2_offset, fxr->length, &tmp))
                return -EINVAL;
 
-       /*
-        * We require both ranges to end within EOF, unless we're exchanging
-        * to EOF.
-        */
-       if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) &&
-           (fxr->file1_offset + fxr->length > size1 ||
-            fxr->file2_offset + fxr->length > size2))
-               return -EINVAL;
-
        /*
         * Make sure we don't hit any file size limits.  If we hit any size
         * limits such that test_length was adjusted, we abort the whole
@@ -747,6 +725,7 @@ xfs_exchange_range(
 {
        struct inode            *inode1 = file_inode(fxr->file1);
        struct inode            *inode2 = file_inode(fxr->file2);
+       loff_t                  check_len = fxr->length;
        int                     ret;
 
        BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS &
@@ -779,14 +758,18 @@ xfs_exchange_range(
                return -EBADF;
 
        /*
-        * If we're not exchanging to EOF, we can check the areas before
-        * stabilizing both files' i_size.
+        * If we're exchanging to EOF we can't calculate the length until taking
+        * the iolock.  Pass a 0 length to remap_verify_area similar to the
+        * FICLONE and FICLONERANGE ioctls that support cloning to EOF as well.
         */
-       if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) {
-               ret = xfs_exchange_range_verify_area(fxr);
-               if (ret)
-                       return ret;
-       }
+       if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)
+               check_len = 0;
+       ret = remap_verify_area(fxr->file1, fxr->file1_offset, check_len, true);
+       if (ret)
+               return ret;
+       ret = remap_verify_area(fxr->file2, fxr->file2_offset, check_len, true);
+       if (ret)
+               return ret;
 
        /* Update cmtime if the fd/inode don't forbid it. */
        if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1))