]> www.infradead.org Git - nvme.git/commitdiff
bcachefs: __bch2_read(): call trans_begin() on every loop iter
authorKent Overstreet <kent.overstreet@linux.dev>
Sun, 14 Jul 2024 20:32:11 +0000 (16:32 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 14 Jul 2024 23:00:16 +0000 (19:00 -0400)
perusal of /sys/kernel/debug/bcachefs/*/btree_transaction_stats shows
that the read path has been acculumalating unneeded paths on the reflink
btree, which we don't want.

The solution is to call bch2_trans_begin(), which drops paths not used
on previous loop iteration.

bch2_readahead:
  Max mem used: 0
  Transaction duration:
    count:      194235
                           since mount        recent
    duration of events
      min:                      150 ns
      max:                        9 ms
      total:                    838 ms
      mean:                       4 us          6 us
      stddev:                    34 us          7 us
    time between events
      min:                       10 ns
      max:                       15 h
      mean:                       2 s          12 s
      stddev:                     2 s           3 ms
  Maximum allocated btree paths (193):
    path: idx  2 ref 0:0 P   btree=extents l=0 pos 270943112:392:U32_MAX locks 0
    path: idx  3 ref 1:0   S btree=extents l=0 pos 270943112:24578:U32_MAX locks 1
    path: idx  4 ref 0:0 P   btree=reflink l=0 pos 0:24773509:0 locks 0
    path: idx  5 ref 0:0 P S btree=reflink l=0 pos 0:24773631:0 locks 1
    path: idx  6 ref 0:0 P S btree=reflink l=0 pos 0:24773759:0 locks 1
    path: idx  7 ref 0:0 P S btree=reflink l=0 pos 0:24773887:0 locks 1
    path: idx  8 ref 0:0 P S btree=reflink l=0 pos 0:24774015:0 locks 1
    path: idx  9 ref 0:0 P S btree=reflink l=0 pos 0:24774143:0 locks 1
    path: idx 10 ref 0:0 P S btree=reflink l=0 pos 0:24774271:0 locks 1
<many more reflink paths>

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/io_read.c

index 8b484c75757c1d070def1fc65add599b2ba0691a..4531c9ab3e12675738bde6af7d08351b073ebb45 100644 (file)
@@ -1147,34 +1147,27 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
        struct btree_iter iter;
        struct bkey_buf sk;
        struct bkey_s_c k;
-       u32 snapshot;
        int ret;
 
        BUG_ON(flags & BCH_READ_NODECODE);
 
        bch2_bkey_buf_init(&sk);
-retry:
-       bch2_trans_begin(trans);
-       iter = (struct btree_iter) { NULL };
-
-       ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
-       if (ret)
-               goto err;
-
        bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
-                            SPOS(inum.inum, bvec_iter.bi_sector, snapshot),
+                            POS(inum.inum, bvec_iter.bi_sector),
                             BTREE_ITER_slots);
+
        while (1) {
                unsigned bytes, sectors, offset_into_extent;
                enum btree_id data_btree = BTREE_ID_extents;
 
-               /*
-                * read_extent -> io_time_reset may cause a transaction restart
-                * without returning an error, we need to check for that here:
-                */
-               ret = bch2_trans_relock(trans);
+               bch2_trans_begin(trans);
+
+               u32 snapshot;
+               ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
                if (ret)
-                       break;
+                       goto err;
+
+               bch2_btree_iter_set_snapshot(&iter, snapshot);
 
                bch2_btree_iter_set_pos(&iter,
                                POS(inum.inum, bvec_iter.bi_sector));
@@ -1182,7 +1175,7 @@ retry:
                k = bch2_btree_iter_peek_slot(&iter);
                ret = bkey_err(k);
                if (ret)
-                       break;
+                       goto err;
 
                offset_into_extent = iter.pos.offset -
                        bkey_start_offset(k.k);
@@ -1193,7 +1186,7 @@ retry:
                ret = bch2_read_indirect_extent(trans, &data_btree,
                                        &offset_into_extent, &sk);
                if (ret)
-                       break;
+                       goto err;
 
                k = bkey_i_to_s_c(sk.k);
 
@@ -1213,7 +1206,7 @@ retry:
                                         data_btree, k,
                                         offset_into_extent, failed, flags);
                if (ret)
-                       break;
+                       goto err;
 
                if (flags & BCH_READ_LAST_FRAGMENT)
                        break;
@@ -1223,16 +1216,16 @@ retry:
 
                ret = btree_trans_too_many_iters(trans);
                if (ret)
+                       goto err;
+err:
+               if (ret &&
+                   !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
+                   ret != READ_RETRY &&
+                   ret != READ_RETRY_AVOID)
                        break;
        }
-err:
-       bch2_trans_iter_exit(trans, &iter);
-
-       if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
-           ret == READ_RETRY ||
-           ret == READ_RETRY_AVOID)
-               goto retry;
 
+       bch2_trans_iter_exit(trans, &iter);
        bch2_trans_put(trans);
        bch2_bkey_buf_exit(&sk, c);