xfs: DIO write completion size updates race

author Dave Chinner <dchinner@redhat.com>

Thu, 16 Apr 2015 12:03:07 +0000 (22:03 +1000)

committer Dave Chinner <david@fromorbit.com>

Thu, 16 Apr 2015 12:03:07 +0000 (22:03 +1000)
author Dave Chinner <dchinner@redhat.com>
Thu, 16 Apr 2015 12:03:07 +0000 (22:03 +1000)
committer Dave Chinner <david@fromorbit.com>
Thu, 16 Apr 2015 12:03:07 +0000 (22:03 +1000)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c

index c02a47453137cb13ed232f8c5bb06ab9d0cbc64c..598b259fda04567667fc874d90b220a9eb926def 100644 (file)
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1582,9 +1582,16 @@ xfs_end_io_direct_write(
          * with the on-disk inode size being outside the in-core inode size. We
          * have no other method of updating EOF for AIO, so always do it here
          * if necessary.
+        *
+        * We need to lock the test/set EOF update as we can be racing with
+        * other IO completions here to update the EOF. Failing to serialise
+        * here can result in EOF moving backwards and Bad Things Happen when
+        * that occurs.
          */
+       spin_lock(&ip->i_flags_lock);
         if (offset + size > i_size_read(inode))
                 i_size_write(inode, offset + size);
+       spin_unlock(&ip->i_flags_lock);
  
         /*
          * If we are doing an append IO that needs to update the EOF on disk,
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index ce615d12fb44cfae0d6bf344cbf0b3d2e4f43e1d..2323b8b63183d72689e67e027ec9c6c51225fb7d 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -565,8 +565,18 @@ restart:
          * write.  If zeroing is needed and we are currently holding the
          * iolock shared, we need to update it to exclusive which implies
          * having to redo all checks before.
+        *
+        * We need to serialise against EOF updates that occur in IO
+        * completions here. We want to make sure that nobody is changing the
+        * size while we do this check until we have placed an IO barrier (i.e.
+        * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.
+        * The spinlock effectively forms a memory barrier once we have the
+        * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value
+        * and hence be able to correctly determine if we need to run zeroing.
          */
+       spin_lock(&ip->i_flags_lock);
         if (*pos > i_size_read(inode)) {
+               spin_unlock(&ip->i_flags_lock);
                 if (*iolock == XFS_IOLOCK_SHARED) {
                         xfs_rw_iunlock(ip, *iolock);
                         *iolock = XFS_IOLOCK_EXCL;
@@ -576,7 +586,8 @@ restart:
                 error = xfs_zero_eof(ip, *pos, i_size_read(inode));
                 if (error)
                         return error;
-       }
+       } else
+               spin_unlock(&ip->i_flags_lock);
  
         /*
          * Updating the timestamps will grab the ilock again from
author	Dave Chinner <dchinner@redhat.com>
	Thu, 16 Apr 2015 12:03:07 +0000 (22:03 +1000)
committer	Dave Chinner <david@fromorbit.com>
	Thu, 16 Apr 2015 12:03:07 +0000 (22:03 +1000)
fs/xfs/xfs_aops.c		patch \| blob \| history
fs/xfs/xfs_file.c		patch \| blob \| history