From: Qu Wenruo Date: Tue, 2 Sep 2025 01:21:25 +0000 (+0930) Subject: btrfs: concentrate highmem handling for data verification X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=35aff706dccbc51d30df6fedba76a746a1322839;p=users%2Fhch%2Fmisc.git btrfs: concentrate highmem handling for data verification Currently for btrfs checksum verification, we do it in the following pattern: kaddr = kmap_local_*(); ret = btrfs_check_csum_csum(kaddr); kunmap_local(kaddr); It's OK for now, but it's still not following the patterns of helpers inside linux/highmem.h, which never requires a virt memory address. In those highmem helpers, they mostly accept a folio, some offset/length inside the folio, and in the implementation they check if the folio needs partial kmap, and do the handling. Inspired by those formal highmem helpers, enhance the highmem handling of data checksum verification by: - Rename btrfs_check_sector_csum() to btrfs_check_block_csum() To follow the more common term "block" used in all other major filesystems. - Pass a physical address into btrfs_check_block_csum() and btrfs_data_csum_ok() The physical address is always available even for a highmem page. Since it's page frame number << PAGE_SHIFT + offset in page. And with that physical address, we can grab the folio covering the page, and do extra checks to ensure it covers at least one block. This also allows us to do the kmap inside btrfs_check_block_csum(). This means all the extra HIGHMEM handling will be concentrated into btrfs_check_block_csum(), and no callers will need to bother highmem by themselves. - Properly zero out the block if csum mismatch Since btrfs_data_csum_ok() only got a paddr, we can not and should not use memzero_bvec(), which only accepts single page bvec. Instead use paddr to grab the folio and call folio_zero_range() Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index ea7f7a17a3d5..493135bfa518 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -167,7 +167,7 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio, int mirror = repair_bbio->mirror_num; if (repair_bbio->bio.bi_status || - !btrfs_data_csum_ok(repair_bbio, dev, 0, bv)) { + !btrfs_data_csum_ok(repair_bbio, dev, 0, bvec_phys(bv))) { bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ); repair_bbio->bio.bi_iter = repair_bbio->saved_iter; @@ -280,7 +280,7 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de struct bio_vec bv = bio_iter_iovec(&bbio->bio, *iter); bv.bv_len = min(bv.bv_len, sectorsize); - if (status || !btrfs_data_csum_ok(bbio, dev, offset, &bv)) + if (status || !btrfs_data_csum_ok(bbio, dev, offset, bvec_phys(&bv))) fbio = repair_one_sector(bbio, offset, &bv, fbio); bio_advance_iter_single(&bbio->bio, iter, sectorsize); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index df3445448b7d..077b2f178816 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -542,10 +542,10 @@ static inline void btrfs_set_inode_mapping_order(struct btrfs_inode *inode) #define CSUM_FMT "0x%*phN" #define CSUM_FMT_VALUE(size, bytes) size, bytes -int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, void *kaddr, u8 *csum, - const u8 * const csum_expected); +int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum, + const u8 * const csum_expected); bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, - u32 bio_offset, struct bio_vec *bv); + u32 bio_offset, phys_addr_t paddr); noinline int can_nocow_extent(struct btrfs_inode *inode, u64 offset, u64 *len, struct btrfs_file_extent *file_extent, bool nowait); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dd503dba33cf..98877535f213 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3334,13 +3334,35 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered) * * @kaddr must be a properly kmapped address. */ -int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, void *kaddr, u8 *csum, - const u8 * const csum_expected) +int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum, + const u8 * const csum_expected) { + struct folio *folio = page_folio(phys_to_page(paddr)); + const u32 blocksize = fs_info->sectorsize; SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); shash->tfm = fs_info->csum_shash; - crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum); + /* The full block must be inside the folio. */ + ASSERT(offset_in_folio(folio, paddr) + blocksize <= folio_size(folio)); + + if (folio_test_partial_kmap(folio)) { + size_t cur = paddr; + + crypto_shash_init(shash); + while (cur < paddr + blocksize) { + void *kaddr; + size_t len = min(paddr + blocksize - cur, + PAGE_SIZE - offset_in_page(cur)); + + kaddr = kmap_local_folio(folio, offset_in_folio(folio, cur)); + crypto_shash_update(shash, kaddr, len); + kunmap_local(kaddr); + cur += len; + } + crypto_shash_final(shash, csum); + } else { + crypto_shash_digest(shash, phys_to_virt(paddr), blocksize, csum); + } if (memcmp(csum, csum_expected, fs_info->csum_size)) return -EIO; @@ -3361,17 +3383,16 @@ int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, void *kaddr, u8 *csum * Return %true if the sector is ok or had no checksum to start with, else %false. */ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, - u32 bio_offset, struct bio_vec *bv) + u32 bio_offset, phys_addr_t paddr) { struct btrfs_inode *inode = bbio->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; + const u32 blocksize = fs_info->sectorsize; + struct folio *folio; u64 file_offset = bbio->file_offset + bio_offset; - u64 end = file_offset + bv->bv_len - 1; + u64 end = file_offset + blocksize - 1; u8 *csum_expected; u8 csum[BTRFS_CSUM_SIZE]; - void *kaddr; - - ASSERT(bv->bv_len == fs_info->sectorsize); if (!bbio->csum) return true; @@ -3387,12 +3408,8 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, csum_expected = bbio->csum + (bio_offset >> fs_info->sectorsize_bits) * fs_info->csum_size; - kaddr = bvec_kmap_local(bv); - if (btrfs_check_sector_csum(fs_info, kaddr, csum, csum_expected)) { - kunmap_local(kaddr); + if (btrfs_check_block_csum(fs_info, paddr, csum, csum_expected)) goto zeroit; - } - kunmap_local(kaddr); return true; zeroit: @@ -3400,7 +3417,9 @@ zeroit: bbio->mirror_num); if (dev) btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS); - memzero_bvec(bv); + folio = page_folio(phys_to_page(paddr)); + ASSERT(offset_in_folio(folio, paddr) + blocksize <= folio_size(folio)); + folio_zero_range(folio, offset_in_folio(folio, paddr), blocksize); return false; } diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 3ff2bedfb3a4..e88699460dda 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1585,9 +1585,6 @@ static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio, return; bio_for_each_segment_all(bvec, bio, iter_all) { - void *kaddr; - - kaddr = bvec_kmap_local(bvec); for (u32 off = 0; off < bvec->bv_len; off += fs_info->sectorsize, total_sector_nr++) { u8 csum_buf[BTRFS_CSUM_SIZE]; @@ -1599,12 +1596,11 @@ static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio, if (!test_bit(total_sector_nr, rbio->csum_bitmap)) continue; - ret = btrfs_check_sector_csum(fs_info, kaddr + off, - csum_buf, expected_csum); + ret = btrfs_check_block_csum(fs_info, bvec_phys(bvec) + off, + csum_buf, expected_csum); if (ret < 0) set_bit(total_sector_nr, rbio->error_bitmap); } - kunmap_local(kaddr); } } @@ -1802,7 +1798,6 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio, struct sector_ptr *sector; u8 csum_buf[BTRFS_CSUM_SIZE]; u8 *csum_expected; - void *kaddr; int ret; if (!rbio->csum_bitmap || !rbio->csum_buf) @@ -1824,9 +1819,7 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio, csum_expected = rbio->csum_buf + (stripe_nr * rbio->stripe_nsectors + sector_nr) * fs_info->csum_size; - kaddr = kmap_local_sector(sector); - ret = btrfs_check_sector_csum(fs_info, kaddr, csum_buf, csum_expected); - kunmap_local(kaddr); + ret = btrfs_check_block_csum(fs_info, sector->paddr, csum_buf, csum_expected); return ret; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2f10c65929dc..cef260ed854c 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -696,6 +696,20 @@ static void *scrub_stripe_get_kaddr(struct scrub_stripe *stripe, int sector_nr) return page_address(page) + offset_in_page(offset); } +static phys_addr_t scrub_stripe_get_paddr(struct scrub_stripe *stripe, int sector_nr) +{ + struct btrfs_fs_info *fs_info = stripe->bg->fs_info; + u32 offset = (sector_nr << fs_info->sectorsize_bits); + const struct page *page = stripe->pages[offset >> PAGE_SHIFT]; + + /* stripe->pages[] is allocated by us and no highmem is allowed. */ + ASSERT(page); + ASSERT(!PageHighMem(page)); + /* And the range must be contained inside the page. */ + ASSERT(offset_in_page(offset) + fs_info->sectorsize <= PAGE_SIZE); + return page_to_phys(page) + offset_in_page(offset); +} + static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; @@ -788,7 +802,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr) struct btrfs_fs_info *fs_info = stripe->bg->fs_info; struct scrub_sector_verification *sector = &stripe->sectors[sector_nr]; const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits; - void *kaddr = scrub_stripe_get_kaddr(stripe, sector_nr); + phys_addr_t paddr = scrub_stripe_get_paddr(stripe, sector_nr); u8 csum_buf[BTRFS_CSUM_SIZE]; int ret; @@ -833,7 +847,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr) return; } - ret = btrfs_check_sector_csum(fs_info, kaddr, csum_buf, sector->csum); + ret = btrfs_check_block_csum(fs_info, paddr, csum_buf, sector->csum); if (ret < 0) { scrub_bitmap_set_bit_csum_error(stripe, sector_nr); scrub_bitmap_set_bit_error(stripe, sector_nr);