From: Qu Wenruo Date: Tue, 2 Sep 2025 05:21:49 +0000 (+0930) Subject: btrfs: introduce btrfs_bio_for_each_block() helper X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=9afc617265383f591614e94b702d558dfb1519c0;p=users%2Fhch%2Fmisc.git btrfs: introduce btrfs_bio_for_each_block() helper Currently if we want to iterate a bio in block unit, we do something like this: while (iter->bi_size) { struct bio_vec bv = bio_iter_iovec(); /* Do something with using the bv */ bio_advance_iter_single(&bbio->bio, iter, sectorsize); } That's fine for now, but it will not handle future bs > ps, as bio_iter_iovec() returns a single-page bvec, meaning the bv_len will not exceed page size. This means the code using that bv can only handle a block if bs <= ps. To address this problem and handle future bs > ps cases better: - Introduce a helper btrfs_bio_for_each_block() Instead of bio_vec, which has single and multiple page version and multiple page version has quite some limits, use my favorite way to represent a block, phys_addr_t. For bs <= ps cases, nothing is changed, except we will do a very small overhead to convert phys_addr_t to a folio, then use the proper folio helpers to handle the possible highmem cases. For bs > ps cases, all blocks will be backed by large folios, meaning every folio will cover at least one block. And still use proper folio helpers to handle highmem cases. With phys_addr_t, we will handle both large folio and highmem properly. So there is no better single variable to present a btrfs block than phys_addr_t. - Extract the data block csum calculation into a helper The new helper, btrfs_calculate_block_csum() will be utilized by btrfs_csum_one_bio(). - Use btrfs_bio_for_each_block() to replace existing call sites Including: * index_one_bio() from raid56.c Very straight-forward. * btrfs_check_read_bio() Also update repair_one_sector() to grab the folio using phys_addr_t, and do extra checks to make sure the folio covers at least one block. We do not need to bother bv_len at all now. * btrfs_csum_one_bio() Now we can move the highmem handling into a dedicated helper, calculate_block_csum(), and use btrfs_bio_for_each_block() helper. There is one exception in btrfs_decompress_buf2page(), which is copying decompressed data into the original bio, which is not iterating using block size thus we don't need to bother. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index 493135bfa518..909b208f9ef3 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -204,18 +204,21 @@ done: */ static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio, u32 bio_offset, - struct bio_vec *bv, + phys_addr_t paddr, struct btrfs_failed_bio *fbio) { struct btrfs_inode *inode = failed_bbio->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; + struct folio *folio = page_folio(phys_to_page(paddr)); const u32 sectorsize = fs_info->sectorsize; + const u32 foff = offset_in_folio(folio, paddr); const u64 logical = (failed_bbio->saved_iter.bi_sector << SECTOR_SHIFT); struct btrfs_bio *repair_bbio; struct bio *repair_bio; int num_copies; int mirror; + ASSERT(foff + sectorsize <= folio_size(folio)); btrfs_debug(fs_info, "repair read error: read error at %llu", failed_bbio->file_offset + bio_offset); @@ -238,7 +241,7 @@ static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio, repair_bio = bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS, &btrfs_repair_bioset); repair_bio->bi_iter.bi_sector = failed_bbio->saved_iter.bi_sector; - __bio_add_page(repair_bio, bv->bv_page, bv->bv_len, bv->bv_offset); + bio_add_folio_nofail(repair_bio, folio, sectorsize, foff); repair_bbio = btrfs_bio(repair_bio); btrfs_bio_init(repair_bbio, fs_info, NULL, fbio); @@ -259,6 +262,7 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de struct bvec_iter *iter = &bbio->saved_iter; blk_status_t status = bbio->bio.bi_status; struct btrfs_failed_bio *fbio = NULL; + phys_addr_t paddr; u32 offset = 0; /* Read-repair requires the inode field to be set by the submitter. */ @@ -276,17 +280,11 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de /* Clear the I/O error. A failed repair will reset it. */ bbio->bio.bi_status = BLK_STS_OK; - while (iter->bi_size) { - struct bio_vec bv = bio_iter_iovec(&bbio->bio, *iter); - - bv.bv_len = min(bv.bv_len, sectorsize); - if (status || !btrfs_data_csum_ok(bbio, dev, offset, bvec_phys(&bv))) - fbio = repair_one_sector(bbio, offset, &bv, fbio); - - bio_advance_iter_single(&bbio->bio, iter, sectorsize); + btrfs_bio_for_each_block(paddr, &bbio->bio, iter, fs_info->sectorsize) { + if (status || !btrfs_data_csum_ok(bbio, dev, offset, paddr)) + fbio = repair_one_sector(bbio, offset, paddr, fbio); offset += sectorsize; } - if (bbio->csum != bbio->csum_inline) kfree(bbio->csum); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 077b2f178816..c40e99ec13bf 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -542,6 +542,8 @@ static inline void btrfs_set_inode_mapping_order(struct btrfs_inode *inode) #define CSUM_FMT "0x%*phN" #define CSUM_FMT_VALUE(size, bytes) size, bytes +void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, + u8 *dest); int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum, const u8 * const csum_expected); bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 4dd3d8a02519..7906aea75ee4 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -775,12 +775,10 @@ int btrfs_csum_one_bio(struct btrfs_bio *bbio) SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); struct bio *bio = &bbio->bio; struct btrfs_ordered_sum *sums; - char *data; - struct bvec_iter iter; - struct bio_vec bvec; + struct bvec_iter iter = bio->bi_iter; + phys_addr_t paddr; + const u32 blocksize = fs_info->sectorsize; int index; - unsigned int blockcount; - int i; unsigned nofs_flag; nofs_flag = memalloc_nofs_save(); @@ -799,21 +797,9 @@ int btrfs_csum_one_bio(struct btrfs_bio *bbio) shash->tfm = fs_info->csum_shash; - bio_for_each_segment(bvec, bio, iter) { - blockcount = BTRFS_BYTES_TO_BLKS(fs_info, - bvec.bv_len + fs_info->sectorsize - - 1); - - for (i = 0; i < blockcount; i++) { - data = bvec_kmap_local(&bvec); - crypto_shash_digest(shash, - data + (i * fs_info->sectorsize), - fs_info->sectorsize, - sums->sums + index); - kunmap_local(data); - index += fs_info->csum_size; - } - + btrfs_bio_for_each_block(paddr, bio, &iter, blocksize) { + btrfs_calculate_block_csum(fs_info, paddr, sums->sums + index); + index += fs_info->csum_size; } bbio->sums = sums; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 98877535f213..5fad6af57944 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3328,14 +3328,8 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered) return btrfs_finish_one_ordered(ordered); } -/* - * Verify the checksum for a single sector without any extra action that depend - * on the type of I/O. - * - * @kaddr must be a properly kmapped address. - */ -int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum, - const u8 * const csum_expected) +void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, + u8 *dest) { struct folio *folio = page_folio(phys_to_page(paddr)); const u32 blocksize = fs_info->sectorsize; @@ -3359,11 +3353,21 @@ int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 kunmap_local(kaddr); cur += len; } - crypto_shash_final(shash, csum); + crypto_shash_final(shash, dest); } else { - crypto_shash_digest(shash, phys_to_virt(paddr), blocksize, csum); + crypto_shash_digest(shash, phys_to_virt(paddr), blocksize, dest); } - +} +/* + * Verify the checksum for a single sector without any extra action that depend + * on the type of I/O. + * + * @kaddr must be a properly kmapped address. + */ +int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum, + const u8 * const csum_expected) +{ + btrfs_calculate_block_csum(fs_info, paddr, csum); if (memcmp(csum, csum_expected, fs_info->csum_size)) return -EIO; return 0; diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h index ff5eac84d819..f8f055fdc551 100644 --- a/fs/btrfs/misc.h +++ b/fs/btrfs/misc.h @@ -11,6 +11,7 @@ #include #include #include +#include /* * Enumerate bits using enum autoincrement. Define the @name as the n-th bit. @@ -20,6 +21,30 @@ name = (1U << __ ## name ## _BIT), \ __ ## name ## _SEQ = __ ## name ## _BIT +static inline phys_addr_t bio_iter_phys(struct bio *bio, struct bvec_iter *iter) +{ + struct bio_vec bv = bio_iter_iovec(bio, *iter); + + return bvec_phys(&bv); +} + +/* + * Iterate bio using btrfs block size. + * + * This will handle large folio and highmem. + * + * @paddr: Physical memory address of each iteration + * @bio: The bio to iterate + * @iter: The bvec_iter (pointer) to use. + * @blocksize: The blocksize to iterate. + * + * This requires all folios in the bio to cover at least one block. + */ +#define btrfs_bio_for_each_block(paddr, bio, iter, blocksize) \ + for (; (iter)->bi_size && \ + (paddr = bio_iter_phys((bio), (iter)), 1); \ + bio_advance_iter_single((bio), (iter), (blocksize))) + static inline void cond_wake_up(struct wait_queue_head *wq) { /* diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index e88699460dda..389f1b617fe7 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1208,17 +1208,16 @@ static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio) const u32 sectorsize = rbio->bioc->fs_info->sectorsize; const u32 sectorsize_bits = rbio->bioc->fs_info->sectorsize_bits; struct bvec_iter iter = bio->bi_iter; + phys_addr_t paddr; u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - rbio->bioc->full_stripe_logical; - while (iter.bi_size) { + btrfs_bio_for_each_block(paddr, bio, &iter, sectorsize) { unsigned int index = (offset >> sectorsize_bits); struct sector_ptr *sector = &rbio->bio_sectors[index]; - struct bio_vec bv = bio_iter_iovec(bio, iter); sector->has_paddr = true; - sector->paddr = bvec_phys(&bv); - bio_advance_iter_single(bio, &iter, sectorsize); + sector->paddr = paddr; offset += sectorsize; } }