From: Filipe Manana Date: Mon, 21 Jul 2025 11:54:48 +0000 (+0100) Subject: btrfs: exit early when replaying hole file extent item from a log tree X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=88666b6df97ec3ac8e6522584bec4963cf7ce6f8;p=users%2Fhch%2Fmisc.git btrfs: exit early when replaying hole file extent item from a log tree At replay_one_extent(), we can jump to the code that updates the file extent range and updates the inode when processing a file extent item that represents a hole and we don't have the NO_HOLES feature enabled. This helps reduce the high indentation level by one in replay_one_extent() and avoid splitting some lines to make the code easier to read. Reviewed-by: Boris Burkov Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 776f3dfdfa86..1e01ba43becc 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -725,6 +725,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, if (found_type == BTRFS_FILE_EXTENT_REG || found_type == BTRFS_FILE_EXTENT_PREALLOC) { + u64 csum_start; + u64 csum_end; + LIST_HEAD(ordered_sums); u64 offset; unsigned long dest_offset; struct btrfs_key ins; @@ -744,6 +747,17 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, copy_extent_buffer(path->nodes[0], eb, dest_offset, (unsigned long)item, sizeof(*item)); + /* + * We have an explicit hole and NO_HOLES is not enabled. We have + * added the hole file extent item to the subvolume tree, so we + * don't have anything else to do other than update the file + * extent item range and update the inode item. + */ + if (btrfs_file_extent_disk_bytenr(eb, item) == 0) { + btrfs_release_path(path); + goto update_inode; + } + ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); ins.type = BTRFS_EXTENT_ITEM_KEY; ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); @@ -765,148 +779,125 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, goto out; } - if (ins.objectid > 0) { - u64 csum_start; - u64 csum_end; - LIST_HEAD(ordered_sums); - - /* - * is this extent already allocated in the extent - * allocation tree? If so, just add a reference - */ - ret = btrfs_lookup_data_extent(fs_info, ins.objectid, - ins.offset); - if (ret < 0) { + /* + * Is this extent already allocated in the extent tree? + * If so, just add a reference. + */ + ret = btrfs_lookup_data_extent(fs_info, ins.objectid, ins.offset); + if (ret < 0) { + btrfs_abort_transaction(trans, ret); + goto out; + } else if (ret == 0) { + struct btrfs_ref ref = { + .action = BTRFS_ADD_DELAYED_REF, + .bytenr = ins.objectid, + .num_bytes = ins.offset, + .owning_root = btrfs_root_id(root), + .ref_root = btrfs_root_id(root), + }; + + btrfs_init_data_ref(&ref, key->objectid, offset, 0, false); + ret = btrfs_inc_extent_ref(trans, &ref); + if (ret) { btrfs_abort_transaction(trans, ret); goto out; - } else if (ret == 0) { - struct btrfs_ref ref = { - .action = BTRFS_ADD_DELAYED_REF, - .bytenr = ins.objectid, - .num_bytes = ins.offset, - .owning_root = btrfs_root_id(root), - .ref_root = btrfs_root_id(root), - }; - btrfs_init_data_ref(&ref, key->objectid, offset, - 0, false); - ret = btrfs_inc_extent_ref(trans, &ref); - if (ret) { - btrfs_abort_transaction(trans, ret); - goto out; - } - } else { - /* - * insert the extent pointer in the extent - * allocation tree - */ - ret = btrfs_alloc_logged_file_extent(trans, - btrfs_root_id(root), - key->objectid, offset, &ins); - if (ret) { - btrfs_abort_transaction(trans, ret); - goto out; - } - } - btrfs_release_path(path); - - if (btrfs_file_extent_compression(eb, item)) { - csum_start = ins.objectid; - csum_end = csum_start + ins.offset; - } else { - csum_start = ins.objectid + - btrfs_file_extent_offset(eb, item); - csum_end = csum_start + - btrfs_file_extent_num_bytes(eb, item); } - - ret = btrfs_lookup_csums_list(root->log_root, - csum_start, csum_end - 1, - &ordered_sums, false); - if (ret < 0) { + } else { + /* Insert the extent pointer in the extent tree. */ + ret = btrfs_alloc_logged_file_extent(trans, btrfs_root_id(root), + key->objectid, offset, &ins); + if (ret) { btrfs_abort_transaction(trans, ret); goto out; } - ret = 0; - /* - * Now delete all existing cums in the csum root that - * cover our range. We do this because we can have an - * extent that is completely referenced by one file - * extent item and partially referenced by another - * file extent item (like after using the clone or - * extent_same ioctls). In this case if we end up doing - * the replay of the one that partially references the - * extent first, and we do not do the csum deletion - * below, we can get 2 csum items in the csum tree that - * overlap each other. For example, imagine our log has - * the two following file extent items: - * - * key (257 EXTENT_DATA 409600) - * extent data disk byte 12845056 nr 102400 - * extent data offset 20480 nr 20480 ram 102400 - * - * key (257 EXTENT_DATA 819200) - * extent data disk byte 12845056 nr 102400 - * extent data offset 0 nr 102400 ram 102400 - * - * Where the second one fully references the 100K extent - * that starts at disk byte 12845056, and the log tree - * has a single csum item that covers the entire range - * of the extent: - * - * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100 - * - * After the first file extent item is replayed, the - * csum tree gets the following csum item: - * - * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20 - * - * Which covers the 20K sub-range starting at offset 20K - * of our extent. Now when we replay the second file - * extent item, if we do not delete existing csum items - * that cover any of its blocks, we end up getting two - * csum items in our csum tree that overlap each other: - * - * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100 - * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20 - * - * Which is a problem, because after this anyone trying - * to lookup up for the checksum of any block of our - * extent starting at an offset of 40K or higher, will - * end up looking at the second csum item only, which - * does not contain the checksum for any block starting - * at offset 40K or higher of our extent. - */ - while (!list_empty(&ordered_sums)) { - struct btrfs_ordered_sum *sums; - struct btrfs_root *csum_root; - - sums = list_first_entry(&ordered_sums, - struct btrfs_ordered_sum, - list); - csum_root = btrfs_csum_root(fs_info, - sums->logical); - if (!ret) { - ret = btrfs_del_csums(trans, csum_root, - sums->logical, - sums->len); - if (ret) - btrfs_abort_transaction(trans, ret); - } - if (!ret) { - ret = btrfs_csum_file_blocks(trans, - csum_root, - sums); - if (ret) - btrfs_abort_transaction(trans, ret); - } - list_del(&sums->list); - kfree(sums); - } - if (ret) - goto out; + } + + btrfs_release_path(path); + + if (btrfs_file_extent_compression(eb, item)) { + csum_start = ins.objectid; + csum_end = csum_start + ins.offset; } else { - btrfs_release_path(path); + csum_start = ins.objectid + btrfs_file_extent_offset(eb, item); + csum_end = csum_start + btrfs_file_extent_num_bytes(eb, item); + } + + ret = btrfs_lookup_csums_list(root->log_root, csum_start, csum_end - 1, + &ordered_sums, false); + if (ret < 0) { + btrfs_abort_transaction(trans, ret); + goto out; } + ret = 0; + /* + * Now delete all existing cums in the csum root that cover our + * range. We do this because we can have an extent that is + * completely referenced by one file extent item and partially + * referenced by another file extent item (like after using the + * clone or extent_same ioctls). In this case if we end up doing + * the replay of the one that partially references the extent + * first, and we do not do the csum deletion below, we can get 2 + * csum items in the csum tree that overlap each other. For + * example, imagine our log has the two following file extent items: + * + * key (257 EXTENT_DATA 409600) + * extent data disk byte 12845056 nr 102400 + * extent data offset 20480 nr 20480 ram 102400 + * + * key (257 EXTENT_DATA 819200) + * extent data disk byte 12845056 nr 102400 + * extent data offset 0 nr 102400 ram 102400 + * + * Where the second one fully references the 100K extent that + * starts at disk byte 12845056, and the log tree has a single + * csum item that covers the entire range of the extent: + * + * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100 + * + * After the first file extent item is replayed, the csum tree + * gets the following csum item: + * + * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20 + * + * Which covers the 20K sub-range starting at offset 20K of our + * extent. Now when we replay the second file extent item, if we + * do not delete existing csum items that cover any of its + * blocks, we end up getting two csum items in our csum tree + * that overlap each other: + * + * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100 + * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20 + * + * Which is a problem, because after this anyone trying to + * lookup up for the checksum of any block of our extent + * starting at an offset of 40K or higher, will end up looking + * at the second csum item only, which does not contain the + * checksum for any block starting at offset 40K or higher of + * our extent. + */ + while (!list_empty(&ordered_sums)) { + struct btrfs_ordered_sum *sums; + struct btrfs_root *csum_root; + + sums = list_first_entry(&ordered_sums, + struct btrfs_ordered_sum, list); + csum_root = btrfs_csum_root(fs_info, sums->logical); + if (!ret) { + ret = btrfs_del_csums(trans, csum_root, sums->logical, + sums->len); + if (ret) + btrfs_abort_transaction(trans, ret); + } + if (!ret) { + ret = btrfs_csum_file_blocks(trans, csum_root, sums); + if (ret) + btrfs_abort_transaction(trans, ret); + } + list_del(&sums->list); + kfree(sums); + } + if (ret) + goto out; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { /* inline extents are easy, we just overwrite them */ ret = overwrite_item(trans, root, path, eb, slot, key); @@ -914,13 +905,13 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, goto out; } +update_inode: ret = btrfs_inode_set_file_extent_range(inode, start, extent_end - start); if (ret) { btrfs_abort_transaction(trans, ret); goto out; } -update_inode: btrfs_update_inode_bytes(inode, nbytes, drop_args.bytes_found); ret = btrfs_update_inode(trans, inode); if (ret)