From: Filipe Manana Date: Wed, 9 Jul 2025 15:34:20 +0000 (+0100) Subject: btrfs: make btrfs_check_nocow_lock() check more than one extent X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=240fafaa4400f7c577a5e8e40c496663da0e0798;p=users%2Fwilly%2Fxarray.git btrfs: make btrfs_check_nocow_lock() check more than one extent Currently btrfs_check_nocow_lock() stops at the first extent it finds and that extent may be smaller than the target range we want to NOCOW into. But we can have multiple consecutive extents which we can NOCOW into, so by stopping at the first one we find we just make the caller do more work by splitting the write into multiple ones, or in the case of mmap writes with large folios we fail with -ENOSPC in case the folio's range is covered by more than one extent (the fallback to NOCOW for mmap writes in case there's no available data space to reserve/allocate was recently added by the patch "btrfs: fix -ENOSPC mmap write failure on NOCOW files/extents"). Improve on this by checking for multiple consecutive extents. Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c2e83babdb8d..bc1e00db96c9 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -984,8 +984,8 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, struct btrfs_root *root = inode->root; struct extent_state *cached_state = NULL; u64 lockstart, lockend; - u64 num_bytes; - int ret; + u64 cur_offset; + int ret = 0; if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC))) return 0; @@ -996,7 +996,6 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, lockstart = round_down(pos, fs_info->sectorsize); lockend = round_up(pos + *write_bytes, fs_info->sectorsize) - 1; - num_bytes = lockend - lockstart + 1; if (nowait) { if (!btrfs_try_lock_ordered_range(inode, lockstart, lockend, @@ -1008,14 +1007,36 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, btrfs_lock_and_flush_ordered_range(inode, lockstart, lockend, &cached_state); } - ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, nowait); - if (ret <= 0) - btrfs_drew_write_unlock(&root->snapshot_lock); - else - *write_bytes = min_t(size_t, *write_bytes , - num_bytes - pos + lockstart); + + cur_offset = lockstart; + while (cur_offset < lockend) { + u64 num_bytes = lockend - cur_offset + 1; + + ret = can_nocow_extent(inode, cur_offset, &num_bytes, NULL, nowait); + if (ret <= 0) { + /* + * If cur_offset == lockstart it means we haven't found + * any extent against which we can NOCOW, so unlock the + * snapshot lock. + */ + if (cur_offset == lockstart) + btrfs_drew_write_unlock(&root->snapshot_lock); + break; + } + cur_offset += num_bytes; + } + btrfs_unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); + /* + * cur_offset > lockstart means there's at least a partial range we can + * NOCOW, and that range can cover one or more extents. + */ + if (cur_offset > lockstart) { + *write_bytes = min_t(size_t, *write_bytes, cur_offset - pos); + return 1; + } + return ret; }