]> www.infradead.org Git - users/hch/xfs.git/commitdiff
xfs: only free posteof blocks on first close
authorDarrick J. Wong <djwong@kernel.org>
Tue, 13 Aug 2024 07:39:39 +0000 (09:39 +0200)
committerChandan Babu R <chandanbabu@kernel.org>
Tue, 3 Sep 2024 04:37:38 +0000 (10:07 +0530)
Certain workloads fragment files on XFS very badly, such as a software
package that creates a number of threads, each of which repeatedly run
the sequence: open a file, perform a synchronous write, and close the
file, which defeats the speculative preallocation mechanism.  We work
around this problem by only deleting posteof blocks the /first/ time a
file is closed to preserve the behavior that unpacking a tarball lays
out files one after the other with no gaps.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
[hch: rebased, updated comment, renamed the flag]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
fs/xfs/xfs_file.c
fs/xfs/xfs_inode.h

index 60424e64230743c7dedee67eb1d6b008976c5697..30b553ac8f56bb026646b76ec6592d0410e97ba5 100644 (file)
@@ -1204,15 +1204,21 @@ xfs_file_release(
         * exposed to that problem.
         */
        if (xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED)) {
-               xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
+               xfs_iflags_clear(ip, XFS_EOFBLOCKS_RELEASED);
                if (ip->i_delayed_blks > 0)
                        filemap_flush(inode->i_mapping);
        }
 
        /*
         * XFS aggressively preallocates post-EOF space to generate contiguous
-        * allocations for writers that append to the end of the file and we
-        * try to free these when an open file context is released.
+        * allocations for writers that append to the end of the file.
+        *
+        * To support workloads that close and reopen the file frequently, these
+        * preallocations usually persist after a close unless it is the first
+        * close for the inode.  This is a tradeoff to generate tightly packed
+        * data layouts for unpacking tarballs or similar archives that write
+        * one file after another without going back to it while keeping the
+        * preallocation for files that have recurring open/write/close cycles.
         *
         * There is no point in freeing blocks here for open but unlinked files
         * as they will be taken care of by the inactivation path soon.
@@ -1230,25 +1236,9 @@ xfs_file_release(
            (file->f_mode & FMODE_WRITE) &&
            xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
                if (xfs_can_free_eofblocks(ip) &&
-                   !xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) {
-                       /*
-                        * Check if the inode is being opened, written and
-                        * closed frequently and we have delayed allocation
-                        * blocks outstanding (e.g. streaming writes from the
-                        * NFS server), truncating the blocks past EOF will
-                        * cause fragmentation to occur.
-                        *
-                        * In this case don't do the truncation, but we have to
-                        * be careful how we detect this case. Blocks beyond EOF
-                        * show up as i_delayed_blks even when the inode is
-                        * clean, so we need to truncate them away first before
-                        * checking for a dirty release. Hence on the first
-                        * dirty close we will still remove the speculative
-                        * allocation, but after that we will leave it in place.
-                        */
+                   !xfs_iflags_test(ip, XFS_EOFBLOCKS_RELEASED)) {
                        xfs_free_eofblocks(ip);
-                       if (ip->i_delayed_blks)
-                               xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
+                       xfs_iflags_set(ip, XFS_EOFBLOCKS_RELEASED);
                }
                xfs_iunlock(ip, XFS_IOLOCK_EXCL);
        }
index 98fc67231eb8380cddb38ba616f0f4b28f3ad086..97ed912306fd066c461b53fec450f1b4727c8682 100644 (file)
@@ -336,7 +336,7 @@ static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip)
 #define XFS_INEW               (1 << 3) /* inode has just been allocated */
 #define XFS_IPRESERVE_DM_FIELDS        (1 << 4) /* has legacy DMAPI fields set */
 #define XFS_ITRUNCATED         (1 << 5) /* truncated down so flush-on-close */
-#define XFS_IDIRTY_RELEASE     (1 << 6) /* dirty release already seen */
+#define XFS_EOFBLOCKS_RELEASED (1 << 6) /* eofblocks were freed in ->release */
 #define XFS_IFLUSHING          (1 << 7) /* inode is being flushed */
 #define __XFS_IPINNED_BIT      8        /* wakeup key for zero pin count */
 #define XFS_IPINNED            (1 << __XFS_IPINNED_BIT)
@@ -383,7 +383,7 @@ static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip)
  */
 #define XFS_IRECLAIM_RESET_FLAGS       \
        (XFS_IRECLAIMABLE | XFS_IRECLAIM | \
-        XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \
+        XFS_EOFBLOCKS_RELEASED | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \
         XFS_INACTIVATING | XFS_IQUOTAUNCHECKED)
 
 /*