btrfs: handle checksum generation in the storage layer

author Christoph Hellwig <hch@lst.de>

Sat, 21 Jan 2023 06:50:17 +0000 (07:50 +0100)

committer David Sterba <dsterba@suse.com>

Wed, 15 Feb 2023 18:38:52 +0000 (19:38 +0100)
author Christoph Hellwig <hch@lst.de>
Sat, 21 Jan 2023 06:50:17 +0000 (07:50 +0100)
committer David Sterba <dsterba@suse.com>
Wed, 15 Feb 2023 18:38:52 +0000 (19:38 +0100)
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c

index 6af0a788c5421e99c881a4c2b80e83a4a4bfce30..0856440d3adfb9ac3f03bbd2a42d21951a6d459a 100644 (file)
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -401,6 +401,166 @@ static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
         btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio);
  }
  
+static void __btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
+                              struct btrfs_io_stripe *smap, int mirror_num)
+{
+       /* Do not leak our private flag into the block layer. */
+       bio->bi_opf &= ~REQ_BTRFS_ONE_ORDERED;
+
+       if (!bioc) {
+               /* Single mirror read/write fast path. */
+               btrfs_bio(bio)->mirror_num = mirror_num;
+               bio->bi_iter.bi_sector = smap->physical >> SECTOR_SHIFT;
+               bio->bi_private = smap->dev;
+               bio->bi_end_io = btrfs_simple_end_io;
+               btrfs_submit_dev_bio(smap->dev, bio);
+       } else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+               /* Parity RAID write or read recovery. */
+               bio->bi_private = bioc;
+               bio->bi_end_io = btrfs_raid56_end_io;
+               if (bio_op(bio) == REQ_OP_READ)
+                       raid56_parity_recover(bio, bioc, mirror_num);
+               else
+                       raid56_parity_write(bio, bioc);
+       } else {
+               /* Write to multiple mirrors. */
+               int total_devs = bioc->num_stripes;
+
+               bioc->orig_bio = bio;
+               for (int dev_nr = 0; dev_nr < total_devs; dev_nr++)
+                       btrfs_submit_mirrored_bio(bioc, dev_nr);
+       }
+}
+
+static blk_status_t btrfs_bio_csum(struct btrfs_bio *bbio)
+{
+       if (bbio->bio.bi_opf & REQ_META)
+               return btree_csum_one_bio(&bbio->bio);
+       return btrfs_csum_one_bio(bbio);
+}
+
+/*
+ * Async submit bios are used to offload expensive checksumming onto the worker
+ * threads.
+ */
+struct async_submit_bio {
+       struct btrfs_bio *bbio;
+       struct btrfs_io_context *bioc;
+       struct btrfs_io_stripe smap;
+       int mirror_num;
+       struct btrfs_work work;
+};
+
+/*
+ * In order to insert checksums into the metadata in large chunks, we wait
+ * until bio submission time.   All the pages in the bio are checksummed and
+ * sums are attached onto the ordered extent record.
+ *
+ * At IO completion time the csums attached on the ordered extent record are
+ * inserted into the btree.
+ */
+static void run_one_async_start(struct btrfs_work *work)
+{
+       struct async_submit_bio *async =
+               container_of(work, struct async_submit_bio, work);
+       blk_status_t ret;
+
+       ret = btrfs_bio_csum(async->bbio);
+       if (ret)
+               async->bbio->bio.bi_status = ret;
+}
+
+/*
+ * In order to insert checksums into the metadata in large chunks, we wait
+ * until bio submission time.   All the pages in the bio are checksummed and
+ * sums are attached onto the ordered extent record.
+ *
+ * At IO completion time the csums attached on the ordered extent record are
+ * inserted into the tree.
+ */
+static void run_one_async_done(struct btrfs_work *work)
+{
+       struct async_submit_bio *async =
+               container_of(work, struct async_submit_bio, work);
+       struct bio *bio = &async->bbio->bio;
+
+       /* If an error occurred we just want to clean up the bio and move on. */
+       if (bio->bi_status) {
+               btrfs_bio_end_io(async->bbio, bio->bi_status);
+               return;
+       }
+
+       /*
+        * All of the bios that pass through here are from async helpers.
+        * Use REQ_CGROUP_PUNT to issue them from the owning cgroup's context.
+        * This changes nothing when cgroups aren't in use.
+        */
+       bio->bi_opf |= REQ_CGROUP_PUNT;
+       __btrfs_submit_bio(bio, async->bioc, &async->smap, async->mirror_num);
+}
+
+static void run_one_async_free(struct btrfs_work *work)
+{
+       kfree(container_of(work, struct async_submit_bio, work));
+}
+
+static bool should_async_write(struct btrfs_bio *bbio)
+{
+       /*
+        * If the I/O is not issued by fsync and friends, (->sync_writers != 0),
+        * then try to defer the submission to a workqueue to parallelize the
+        * checksum calculation.
+        */
+       if (atomic_read(&bbio->inode->sync_writers))
+               return false;
+
+       /*
+        * Submit metadata writes synchronously if the checksum implementation
+        * is fast, or we are on a zoned device that wants I/O to be submitted
+        * in order.
+        */
+       if (bbio->bio.bi_opf & REQ_META) {
+               struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
+
+               if (btrfs_is_zoned(fs_info))
+                       return false;
+               if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
+                       return false;
+       }
+
+       return true;
+}
+
+/*
+ * Submit bio to an async queue.
+ *
+ * Return true if the work has been succesfuly submitted, else false.
+ */
+static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
+                               struct btrfs_io_context *bioc,
+                               struct btrfs_io_stripe *smap, int mirror_num)
+{
+       struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
+       struct async_submit_bio *async;
+
+       async = kmalloc(sizeof(*async), GFP_NOFS);
+       if (!async)
+               return false;
+
+       async->bbio = bbio;
+       async->bioc = bioc;
+       async->smap = *smap;
+       async->mirror_num = mirror_num;
+
+       btrfs_init_work(&async->work, run_one_async_start, run_one_async_done,
+                       run_one_async_free);
+       if (op_is_sync(bbio->bio.bi_opf))
+               btrfs_queue_work(fs_info->hipri_workers, &async->work);
+       else
+               btrfs_queue_work(fs_info->workers, &async->work);
+       return true;
+}
+
  void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num)
  {
         struct btrfs_bio *bbio = btrfs_bio(bio);
@@ -438,33 +598,25 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror
                         goto fail;
         }
  
-       /* Do not leak our private flag into the block layer. */
-       bio->bi_opf &= ~REQ_BTRFS_ONE_ORDERED;
-
-       if (!bioc) {
-               /* Single mirror read/write fast path */
-               bbio->mirror_num = mirror_num;
-               bio->bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
-               bio->bi_private = smap.dev;
-               bio->bi_end_io = btrfs_simple_end_io;
-               btrfs_submit_dev_bio(smap.dev, bio);
-       } else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
-               /* Parity RAID write or read recovery */
-               bio->bi_private = bioc;
-               bio->bi_end_io = btrfs_raid56_end_io;
-               if (bio_op(bio) == REQ_OP_READ)
-                       raid56_parity_recover(bio, bioc, mirror_num);
-               else
-                       raid56_parity_write(bio, bioc);
-       } else {
-               /* Write to multiple mirrors */
-               int total_devs = bioc->num_stripes;
-               int dev_nr;
-
-               bioc->orig_bio = bio;
-               for (dev_nr = 0; dev_nr < total_devs; dev_nr++)
-                       btrfs_submit_mirrored_bio(bioc, dev_nr);
+       if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
+               /*
+                * Csum items for reloc roots have already been cloned at this
+                * point, so they are handled as part of the no-checksum case.
+                */
+               if (!(bbio->inode->flags & BTRFS_INODE_NODATASUM) &&
+                   !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) &&
+                   !btrfs_is_data_reloc_root(bbio->inode->root)) {
+                       if (should_async_write(bbio) &&
+                           btrfs_wq_submit_bio(bbio, bioc, &smap, mirror_num))
+                               return;
+
+                       ret = btrfs_bio_csum(bbio);
+                       if (ret)
+                               goto fail;
+               }
         }
+
+       __btrfs_submit_bio(bio, bioc, &smap, mirror_num);
         return;
  
  fail:
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c

index 7999c28aa5e129eec3103aeaa192a4cf7b587acd..d6a8fcd46044237134a9280d638abd95d347aa4f 100644 (file)
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -355,7 +355,6 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
         u64 cur_disk_bytenr = disk_start;
         u64 next_stripe_start;
         blk_status_t ret = BLK_STS_OK;
-       int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
         const bool use_append = btrfs_use_zone_append(inode, disk_start);
         const enum req_op bio_op = REQ_BTRFS_ONE_ORDERED |
                                    (use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE);
@@ -437,14 +436,6 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
                         submit = true;
  
                 if (submit) {
-                       if (!skip_sum) {
-                               ret = btrfs_csum_one_bio(btrfs_bio(bio));
-                               if (ret) {
-                                       btrfs_bio_end_io(btrfs_bio(bio), ret);
-                                       break;
-                               }
-                       }
-
                         ASSERT(bio->bi_iter.bi_size);
                         btrfs_submit_bio(fs_info, bio, 0);
                         bio = NULL;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index 0270226389a7b3399c317576c482590d45d4de4c..6278530d598653213f5e679f0aa85ee35140b615 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -52,7 +52,6 @@
  #include "relocation.h"
  #include "scrub.h"
  #include "super.h"
-#include "file-item.h"
  
  #define BTRFS_SUPER_FLAG_SUPP  (BTRFS_HEADER_FLAG_WRITTEN |\
                                  BTRFS_HEADER_FLAG_RELOC |\
@@ -79,23 +78,6 @@ static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
                 crypto_free_shash(fs_info->csum_shash);
  }
  
-/*
- * async submit bios are used to offload expensive checksumming
- * onto the worker threads.  They checksum file and metadata bios
- * just before they are sent down the IO stack.
- */
-struct async_submit_bio {
-       struct btrfs_inode *inode;
-       struct bio *bio;
-       enum btrfs_wq_submit_cmd submit_cmd;
-       int mirror_num;
-
-       /* Optional parameter for used by direct io */
-       u64 dio_file_offset;
-       struct btrfs_work work;
-       blk_status_t status;
-};
-
  /*
   * Compute the csum of a btree block and store the result to provided buffer.
   */
@@ -456,7 +438,7 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct bio_vec *bvec
         return csum_one_extent_buffer(eb);
  }
  
-static blk_status_t btree_csum_one_bio(struct bio *bio)
+blk_status_t btree_csum_one_bio(struct bio *bio)
  {
         struct bio_vec *bvec;
         struct btrfs_root *root;
@@ -719,143 +701,10 @@ err:
         return ret;
  }
  
-static void run_one_async_start(struct btrfs_work *work)
-{
-       struct async_submit_bio *async;
-       blk_status_t ret;
-
-       async = container_of(work, struct  async_submit_bio, work);
-       switch (async->submit_cmd) {
-       case WQ_SUBMIT_METADATA:
-               ret = btree_csum_one_bio(async->bio);
-               break;
-       case WQ_SUBMIT_DATA:
-       case WQ_SUBMIT_DATA_DIO:
-               ret = btrfs_csum_one_bio(btrfs_bio(async->bio));
-               break;
-       default:
-               /* Can't happen so return something that would prevent the IO. */
-               ret = BLK_STS_IOERR;
-               ASSERT(0);
-       }
-       if (ret)
-               async->status = ret;
-}
-
-/*
- * In order to insert checksums into the metadata in large chunks, we wait
- * until bio submission time.   All the pages in the bio are checksummed and
- * sums are attached onto the ordered extent record.
- *
- * At IO completion time the csums attached on the ordered extent record are
- * inserted into the tree.
- */
-static void run_one_async_done(struct btrfs_work *work)
-{
-       struct async_submit_bio *async =
-               container_of(work, struct  async_submit_bio, work);
-       struct btrfs_inode *inode = async->inode;
-       struct btrfs_bio *bbio = btrfs_bio(async->bio);
-
-       /* If an error occurred we just want to clean up the bio and move on */
-       if (async->status) {
-               btrfs_bio_end_io(bbio, async->status);
-               return;
-       }
-
-       /*
-        * All of the bios that pass through here are from async helpers.
-        * Use REQ_CGROUP_PUNT to issue them from the owning cgroup's context.
-        * This changes nothing when cgroups aren't in use.
-        */
-       async->bio->bi_opf |= REQ_CGROUP_PUNT;
-       btrfs_submit_bio(inode->root->fs_info, async->bio, async->mirror_num);
-}
-
-static void run_one_async_free(struct btrfs_work *work)
-{
-       struct async_submit_bio *async;
-
-       async = container_of(work, struct  async_submit_bio, work);
-       kfree(async);
-}
-
-/*
- * Submit bio to an async queue.
- *
- * Return:
- * - true if the work has been successfully submitted
- * - false in case of error
- */
-bool btrfs_wq_submit_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num,
-                        u64 dio_file_offset, enum btrfs_wq_submit_cmd cmd)
-{
-       struct btrfs_fs_info *fs_info = inode->root->fs_info;
-       struct async_submit_bio *async;
-
-       async = kmalloc(sizeof(*async), GFP_NOFS);
-       if (!async)
-               return false;
-
-       async->inode = inode;
-       async->bio = bio;
-       async->mirror_num = mirror_num;
-       async->submit_cmd = cmd;
-
-       btrfs_init_work(&async->work, run_one_async_start, run_one_async_done,
-                       run_one_async_free);
-
-       async->dio_file_offset = dio_file_offset;
-
-       async->status = 0;
-
-       if (op_is_sync(bio->bi_opf))
-               btrfs_queue_work(fs_info->hipri_workers, &async->work);
-       else
-               btrfs_queue_work(fs_info->workers, &async->work);
-       return true;
-}
-
-static bool should_async_write(struct btrfs_fs_info *fs_info,
-                            struct btrfs_inode *bi)
-{
-       if (btrfs_is_zoned(fs_info))
-               return false;
-       if (atomic_read(&bi->sync_writers))
-               return false;
-       if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
-               return false;
-       return true;
-}
-
  void btrfs_submit_metadata_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num)
  {
-       struct btrfs_fs_info *fs_info = inode->root->fs_info;
-       struct btrfs_bio *bbio = btrfs_bio(bio);
-       blk_status_t ret;
-
         bio->bi_opf |= REQ_META;
-
-       if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
-               btrfs_submit_bio(fs_info, bio, mirror_num);
-               return;
-       }
-
-       /*
-        * Kthread helpers are used to submit writes so that checksumming can
-        * happen in parallel across all CPUs.
-        */
-       if (should_async_write(fs_info, inode) &&
-           btrfs_wq_submit_bio(inode, bio, mirror_num, 0, WQ_SUBMIT_METADATA))
-               return;
-
-       ret = btree_csum_one_bio(bio);
-       if (ret) {
-               btrfs_bio_end_io(bbio, ret);
-               return;
-       }
-
-       btrfs_submit_bio(fs_info, bio, mirror_num);
+       btrfs_submit_bio(inode->root->fs_info, bio, mirror_num);
  }
  
  #ifdef CONFIG_MIGRATION
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h

index 5898beb648484a1beecbdfc1c4f3872da54fbbb4..ac55f8ec3a31a79dd4196027521720379f987fa4 100644 (file)
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -114,14 +114,7 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
  int btrfs_read_extent_buffer(struct extent_buffer *buf,
                              struct btrfs_tree_parent_check *check);
  
-enum btrfs_wq_submit_cmd {
-       WQ_SUBMIT_METADATA,
-       WQ_SUBMIT_DATA,
-       WQ_SUBMIT_DATA_DIO,
-};
-
-bool btrfs_wq_submit_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num,
-                        u64 dio_file_offset, enum btrfs_wq_submit_cmd cmd);
+blk_status_t btree_csum_one_bio(struct bio *bio);
  int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root);
  int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index f542d539b831e1c3b7b6ae90a729d6a813987270..b9cd088ded82ef93d08620c11b2dbc212a388056 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2721,27 +2721,6 @@ void btrfs_submit_data_write_bio(struct btrfs_inode *inode, struct bio *bio, int
                 }
         }
  
-       /*
-        * If we need to checksum, and the I/O is not issued by fsync and
-        * friends, that is ->sync_writers != 0, defer the submission to a
-        * workqueue to parallelize it.
-        *
-        * Csum items for reloc roots have already been cloned at this point,
-        * so they are handled as part of the no-checksum case.
-        */
-       if (!(inode->flags & BTRFS_INODE_NODATASUM) &&
-           !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) &&
-           !btrfs_is_data_reloc_root(inode->root)) {
-               if (!atomic_read(&inode->sync_writers) &&
-                   btrfs_wq_submit_bio(inode, bio, mirror_num, 0, WQ_SUBMIT_DATA))
-                       return;
-
-               ret = btrfs_csum_one_bio(btrfs_bio(bio));
-               if (ret) {
-                       btrfs_bio_end_io(btrfs_bio(bio), ret);
-                       return;
-               }
-       }
         btrfs_submit_bio(fs_info, bio, mirror_num);
  }
  
@@ -7843,36 +7822,6 @@ static void btrfs_end_dio_bio(struct btrfs_bio *bbio)
         btrfs_dio_private_put(dip);
  }
  
-static void btrfs_submit_dio_bio(struct bio *bio, struct btrfs_inode *inode,
-                                u64 file_offset, int async_submit)
-{
-       struct btrfs_fs_info *fs_info = inode->root->fs_info;
-       blk_status_t ret;
-
-       if (inode->flags & BTRFS_INODE_NODATASUM)
-               goto map;
-
-       if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
-               /* Check btrfs_submit_data_write_bio() for async submit rules */
-               if (async_submit && !atomic_read(&inode->sync_writers) &&
-                   btrfs_wq_submit_bio(inode, bio, 0, file_offset,
-                                       WQ_SUBMIT_DATA_DIO))
-                       return;
-
-               /*
-                * If we aren't doing async submit, calculate the csum of the
-                * bio now.
-                */
-               ret = btrfs_csum_one_bio(btrfs_bio(bio));
-               if (ret) {
-                       btrfs_bio_end_io(btrfs_bio(bio), ret);
-                       return;
-               }
-       }
-map:
-       btrfs_submit_bio(fs_info, bio, 0);
-}
-
  static void btrfs_submit_direct(const struct iomap_iter *iter,
                 struct bio *dio_bio, loff_t file_offset)
  {
@@ -7880,11 +7829,8 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
                 container_of(dio_bio, struct btrfs_dio_private, bio);
         struct inode *inode = iter->inode;
         struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       const bool raid56 = (btrfs_data_alloc_profile(fs_info) &
-                            BTRFS_BLOCK_GROUP_RAID56_MASK);
         struct bio *bio;
         u64 start_sector;
-       int async_submit = 0;
         u64 submit_len;
         u64 clone_offset = 0;
         u64 clone_len;
@@ -7951,19 +7897,10 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
                  * We transfer the initial reference to the last bio, so we
                  * don't need to increment the reference count for the last one.
                  */
-               if (submit_len > 0) {
+               if (submit_len > 0)
                         refcount_inc(&dip->refs);
-                       /*
-                        * If we are submitting more than one bio, submit them
-                        * all asynchronously. The exception is RAID 5 or 6, as
-                        * asynchronous checksums make it difficult to collect
-                        * full stripe writes.
-                        */
-                       if (!raid56)
-                               async_submit = 1;
-               }
  
-               btrfs_submit_dio_bio(bio, BTRFS_I(inode), file_offset, async_submit);
+               btrfs_submit_bio(fs_info, bio, 0);
  
                 dio_data->submitted += clone_len;
                 clone_offset += clone_len;
author	Christoph Hellwig <hch@lst.de>
	Sat, 21 Jan 2023 06:50:17 +0000 (07:50 +0100)
committer	David Sterba <dsterba@suse.com>
	Wed, 15 Feb 2023 18:38:52 +0000 (19:38 +0100)
fs/btrfs/bio.c		patch \| blob \| history
fs/btrfs/compression.c		patch \| blob \| history
fs/btrfs/disk-io.c		patch \| blob \| history
fs/btrfs/disk-io.h		patch \| blob \| history
fs/btrfs/inode.c		patch \| blob \| history