]> www.infradead.org Git - users/hch/xfsprogs.git/commitdiff
xfs: speed up directory bestfree block scanning
authorDave Chinner <dchinner@redhat.com>
Fri, 15 Nov 2019 22:16:22 +0000 (17:16 -0500)
committerEric Sandeen <sandeen@redhat.com>
Fri, 15 Nov 2019 22:16:22 +0000 (17:16 -0500)
Source kernel commit: 610125ab1e4b1b48dcffe74d9d82b0606bf1b923

When running a "create millions inodes in a directory" test
recently, I noticed we were spending a huge amount of time
converting freespace block headers from disk format to in-memory
format:

31.47%  [kernel]  [k] xfs_dir2_node_addname
17.86%  [kernel]  [k] xfs_dir3_free_hdr_from_disk
3.55%  [kernel]  [k] xfs_dir3_free_bests_p

We shouldn't be hitting the best free block scanning code so hard
when doing sequential directory creates, and it turns out there's
a highly suboptimal loop searching the the best free array in
the freespace block - it decodes the block header before checking
each entry inside a loop, instead of decoding the header once before
running the entry search loop.

This makes a massive difference to create rates. Profile now looks
like this:

13.15%  [kernel]  [k] xfs_dir2_node_addname
3.52%  [kernel]  [k] xfs_dir3_leaf_check_int
3.11%  [kernel]  [k] xfs_log_commit_cil

And the wall time/average file create rate differences are
just as stark:

create time(sec) / rate (files/s)
File count           vanilla                patched
10k              0.41 / 24.3k            0.42 / 23.8k
20k              0.74 / 27.0k            0.76 / 26.3k
100k              3.81 / 26.4k            3.47 / 28.8k
200k              8.58 / 23.3k            7.19 / 27.8k
1M             85.69 / 11.7k           48.53 / 20.6k
2M            280.31 /  7.1k          130.14 / 15.3k

The larger the directory, the bigger the performance improvement.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Eric Sandeen <sandeen@sandeen.net
libxfs/xfs_dir2_node.c

index af8434034cda43b84bd415c18486fa7ceee58f36..5a48deb91bbc9fe375ef77afe02e5005ac828db3 100644 (file)
@@ -1747,8 +1747,8 @@ xfs_dir2_node_find_freeblk(
        xfs_dir2_db_t           dbno = -1;
        xfs_dir2_db_t           fbno = -1;
        xfs_fileoff_t           fo;
-       __be16                  *bests;
-       int                     findex;
+       __be16                  *bests = NULL;
+       int                     findex = 0;
        int                     error;
 
        /*
@@ -1778,14 +1778,14 @@ xfs_dir2_node_find_freeblk(
                 */
                ifbno = fblk->blkno;
                fbno = ifbno;
+               xfs_trans_brelse(tp, fbp);
+               fbp = NULL;
+               fblk->bp = NULL;
        }
-       ASSERT(dbno == -1);
-       findex = 0;
 
        /*
         * If we don't have a data block yet, we're going to scan the freespace
-        * blocks looking for one.  Figure out what the highest freespace block
-        * number is.
+        * data for a data block with enough free space in it.
         */
        error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK);
        if (error)
@@ -1796,70 +1796,41 @@ xfs_dir2_node_find_freeblk(
        if (fbno == -1)
                fbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET);
 
-       /*
-        * While we haven't identified a data block, search the freeblock
-        * data for a good data block.  If we find a null freeblock entry,
-        * indicating a hole in the data blocks, remember that.
-        */
-       while (dbno == -1) {
-               /*
-                * If we don't have a freeblock in hand, get the next one.
-                */
-               if (fbp == NULL) {
-                       /*
-                        * If it's ifbno we already looked at it.
-                        */
-                       if (++fbno == ifbno)
-                               fbno++;
-                       /*
-                        * If it's off the end we're done.
-                        */
-                       if (fbno >= lastfbno)
-                               break;
-                       /*
-                        * Read the block.  There can be holes in the
-                        * freespace blocks, so this might not succeed.
-                        * This should be really rare, so there's no reason
-                        * to avoid it.
-                        */
-                       error = xfs_dir2_free_try_read(tp, dp,
-                                       xfs_dir2_db_to_da(args->geo, fbno),
-                                       &fbp);
-                       if (error)
-                               return error;
-                       if (!fbp)
-                               continue;
-                       free = fbp->b_addr;
-                       findex = 0;
-               }
+       for ( ; fbno < lastfbno; fbno++) {
+               /* If it's ifbno we already looked at it. */
+               if (fbno == ifbno)
+                       continue;
+
                /*
-                * Look at the current free entry.  Is it good enough?
-                *
-                * The bests initialisation should be where the bufer is read in
-                * the above branch. But gcc is too stupid to realise that bests
-                * and the freehdr are actually initialised if they are placed
-                * there, so we have to do it here to avoid warnings. Blech.
+                * Read the block.  There can be holes in the freespace blocks,
+                * so this might not succeed.  This should be really rare, so
+                * there's no reason to avoid it.
                 */
+               error = xfs_dir2_free_try_read(tp, dp,
+                               xfs_dir2_db_to_da(args->geo, fbno),
+                               &fbp);
+               if (error)
+                       return error;
+               if (!fbp)
+                       continue;
+
+               free = fbp->b_addr;
                bests = dp->d_ops->free_bests_p(free);
                dp->d_ops->free_hdr_from_disk(&freehdr, free);
-               if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
-                   be16_to_cpu(bests[findex]) >= length)
-                       dbno = freehdr.firstdb + findex;
-               else {
-                       /*
-                        * Are we done with the freeblock?
-                        */
-                       if (++findex == freehdr.nvalid) {
-                               /*
-                                * Drop the block.
-                                */
-                               xfs_trans_brelse(tp, fbp);
-                               fbp = NULL;
-                               if (fblk && fblk->bp)
-                                       fblk->bp = NULL;
+
+               /* Scan the free entry array for a large enough free space. */
+               for (findex = 0; findex < freehdr.nvalid; findex++) {
+                       if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
+                           be16_to_cpu(bests[findex]) >= length) {
+                               dbno = freehdr.firstdb + findex;
+                               goto found_block;
                        }
                }
+
+               /* Didn't find free space, go on to next free block */
+               xfs_trans_brelse(tp, fbp);
        }
+
 found_block:
        *dbnop = dbno;
        *fbpp = fbp;