ext4: fix race in buffer_head read fault injection
authorLong Li <leo.lilong@huawei.com>
Fri, 6 Sep 2024 09:17:46 +0000 (17:17 +0800)
committerTheodore Ts'o <tytso@mit.edu>
Wed, 13 Nov 2024 04:54:14 +0000 (23:54 -0500)
When I enabled ext4 debug for fault injection testing, I encountered the
following warning:

  EXT4-fs error (device sda): ext4_read_inode_bitmap:201: comm fsstress:
         Cannot read inode bitmap - block_group = 8, inode_bitmap = 1051
  WARNING: CPU: 0 PID: 511 at fs/buffer.c:1181 mark_buffer_dirty+0x1b3/0x1d0

The root cause of the issue lies in the improper implementation of ext4's
buffer_head read fault injection. The actual completion of buffer_head
read and the buffer_head fault injection are not atomic, which can lead
to the uptodate flag being cleared on normally used buffer_heads in race
conditions.

[CPU0]           [CPU1]         [CPU2]
ext4_read_inode_bitmap
  ext4_read_bh()
  <bh read complete>
                 ext4_read_inode_bitmap
                   if (buffer_uptodate(bh))
                     return bh
                               jbd2_journal_commit_transaction
                                 __jbd2_journal_refile_buffer
                                   __jbd2_journal_unfile_buffer
                                     __jbd2_journal_temp_unlink_buffer
  ext4_simulate_fail_bh()
    clear_buffer_uptodate
                                      mark_buffer_dirty
                                        <report warning>
                                        WARN_ON_ONCE(!buffer_uptodate(bh))

The best approach would be to perform fault injection in the IO completion
callback function, rather than after IO completion. However, the IO
completion callback function cannot get the fault injection code in sb.

Fix it by passing the result of fault injection into the bh read function,
we simulate faults within the bh read function itself. This requires adding
an extra parameter to the bh read functions that need fault injection.

Fixes: 46f870d690fe ("ext4: simulate various I/O and checksum errors when reading metadata")
Signed-off-by: Long Li <leo.lilong@huawei.com>
Link: https://patch.msgid.link/20240906091746.510163-1-leo.lilong@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
fs/ext4/balloc.c
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/ialloc.c
fs/ext4/indirect.c
fs/ext4/inode.c
fs/ext4/mmp.c
fs/ext4/move_extent.c
fs/ext4/resize.c
fs/ext4/super.c

index 591fb3f710be72cbed6e0a630ae796860870642a..8042ad87380897138d589d448e9c6a9f4a6d474a 100644 (file)
@@ -550,7 +550,8 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group,
        trace_ext4_read_block_bitmap_load(sb, block_group, ignore_locked);
        ext4_read_bh_nowait(bh, REQ_META | REQ_PRIO |
                            (ignore_locked ? REQ_RAHEAD : 0),
-                           ext4_end_bitmap_read);
+                           ext4_end_bitmap_read,
+                           ext4_simulate_fail(sb, EXT4_SIM_BBITMAP_EIO));
        return bh;
 verify:
        err = ext4_validate_block_bitmap(sb, desc, block_group, bh);
@@ -577,7 +578,6 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
        if (!desc)
                return -EFSCORRUPTED;
        wait_on_buffer(bh);
-       ext4_simulate_fail_bh(sb, bh, EXT4_SIM_BBITMAP_EIO);
        if (!buffer_uptodate(bh)) {
                ext4_error_err(sb, EIO, "Cannot read block bitmap - "
                               "block_group = %u, block_bitmap = %llu",
index 44b0d418143c2e36a55ba857be3f383643c31121..bbffb76d9a9049090698af5d2d8a0414fdfbb19f 100644 (file)
@@ -1865,14 +1865,6 @@ static inline bool ext4_simulate_fail(struct super_block *sb,
        return false;
 }
 
-static inline void ext4_simulate_fail_bh(struct super_block *sb,
-                                        struct buffer_head *bh,
-                                        unsigned long code)
-{
-       if (!IS_ERR(bh) && ext4_simulate_fail(sb, code))
-               clear_buffer_uptodate(bh);
-}
-
 /*
  * Error number codes for s_{first,last}_error_errno
  *
@@ -3100,9 +3092,9 @@ extern struct buffer_head *ext4_sb_bread(struct super_block *sb,
 extern struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
                                                   sector_t block);
 extern void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
-                               bh_end_io_t *end_io);
+                               bh_end_io_t *end_io, bool simu_fail);
 extern int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
-                       bh_end_io_t *end_io);
+                       bh_end_io_t *end_io, bool simu_fail);
 extern int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait);
 extern void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block);
 extern int ext4_seq_options_show(struct seq_file *seq, void *offset);
index c144fe43a29f265fb4ca1584b106d73518e728dc..1c8123866d81a140cd914a6f65af6ed866a21e77 100644 (file)
@@ -568,7 +568,7 @@ __read_extent_tree_block(const char *function, unsigned int line,
 
        if (!bh_uptodate_or_lock(bh)) {
                trace_ext4_ext_load_extent(inode, pblk, _RET_IP_);
-               err = ext4_read_bh(bh, 0, NULL);
+               err = ext4_read_bh(bh, 0, NULL, false);
                if (err < 0)
                        goto errout;
        }
index 7f1a5f90dbbdfff76d4d5e95ced11de3f8e8bf4e..21d228073d7954fe419a50c2536a86c7f6e833e4 100644 (file)
@@ -193,8 +193,9 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
         * submit the buffer_head for reading
         */
        trace_ext4_load_inode_bitmap(sb, block_group);
-       ext4_read_bh(bh, REQ_META | REQ_PRIO, ext4_end_bitmap_read);
-       ext4_simulate_fail_bh(sb, bh, EXT4_SIM_IBITMAP_EIO);
+       ext4_read_bh(bh, REQ_META | REQ_PRIO,
+                    ext4_end_bitmap_read,
+                    ext4_simulate_fail(sb, EXT4_SIM_IBITMAP_EIO));
        if (!buffer_uptodate(bh)) {
                put_bh(bh);
                ext4_error_err(sb, EIO, "Cannot read inode bitmap - "
index 7404f0935c9032b0cf77fd98cd6d0773847a34a0..7de327fa7b1c51990075d7988f5b184de98294ff 100644 (file)
@@ -170,7 +170,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
                }
 
                if (!bh_uptodate_or_lock(bh)) {
-                       if (ext4_read_bh(bh, 0, NULL) < 0) {
+                       if (ext4_read_bh(bh, 0, NULL, false) < 0) {
                                put_bh(bh);
                                goto failure;
                        }
index 9b06c9e0a7c5924aa467fcb8026585ecccfe6bda..569887741bf14642e269ccc36368980f58efe01e 100644 (file)
@@ -4504,10 +4504,10 @@ make_io:
         * Read the block from disk.
         */
        trace_ext4_load_inode(sb, ino);
-       ext4_read_bh_nowait(bh, REQ_META | REQ_PRIO, NULL);
+       ext4_read_bh_nowait(bh, REQ_META | REQ_PRIO, NULL,
+                           ext4_simulate_fail(sb, EXT4_SIM_INODE_EIO));
        blk_finish_plug(&plug);
        wait_on_buffer(bh);
-       ext4_simulate_fail_bh(sb, bh, EXT4_SIM_INODE_EIO);
        if (!buffer_uptodate(bh)) {
                if (ret_block)
                        *ret_block = block;
index bd946d0c71b700f1eb19e6eb567081d845c4c7ea..d64c04ed061ae9ab65f8878ec71ef189e683f53c 100644 (file)
@@ -94,7 +94,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
        }
 
        lock_buffer(*bh);
-       ret = ext4_read_bh(*bh, REQ_META | REQ_PRIO, NULL);
+       ret = ext4_read_bh(*bh, REQ_META | REQ_PRIO, NULL, false);
        if (ret)
                goto warn_exit;
 
index b64661ea6e0ed767803a64890c18f306c16b0f1e..898443e98efc9e81d9db67a7d000eea2533d0ad8 100644 (file)
@@ -213,7 +213,7 @@ static int mext_page_mkuptodate(struct folio *folio, size_t from, size_t to)
                        unlock_buffer(bh);
                        continue;
                }
-               ext4_read_bh_nowait(bh, 0, NULL);
+               ext4_read_bh_nowait(bh, 0, NULL, false);
                nr++;
        } while (block++, (bh = bh->b_this_page) != head);
 
index a2704f064361066bbf551aa911c71583a1dfe1a6..72f77f78ae8df37c844e47d8933bd9269ba53178 100644 (file)
@@ -1300,7 +1300,7 @@ static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block)
        if (unlikely(!bh))
                return NULL;
        if (!bh_uptodate_or_lock(bh)) {
-               if (ext4_read_bh(bh, 0, NULL) < 0) {
+               if (ext4_read_bh(bh, 0, NULL, false) < 0) {
                        brelse(bh);
                        return NULL;
                }
index dce37f784b59f97341e37149077c82f29022ed71..cb0ce57bd8968273a9b9a50e3379144a5f9f00cb 100644 (file)
@@ -161,8 +161,14 @@ MODULE_ALIAS("ext3");
 
 
 static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
-                                 bh_end_io_t *end_io)
+                                 bh_end_io_t *end_io, bool simu_fail)
 {
+       if (simu_fail) {
+               clear_buffer_uptodate(bh);
+               unlock_buffer(bh);
+               return;
+       }
+
        /*
         * buffer's verified bit is no longer valid after reading from
         * disk again due to write out error, clear it to make sure we
@@ -176,7 +182,7 @@ static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
 }
 
 void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
-                        bh_end_io_t *end_io)
+                        bh_end_io_t *end_io, bool simu_fail)
 {
        BUG_ON(!buffer_locked(bh));
 
@@ -184,10 +190,11 @@ void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
                unlock_buffer(bh);
                return;
        }
-       __ext4_read_bh(bh, op_flags, end_io);
+       __ext4_read_bh(bh, op_flags, end_io, simu_fail);
 }
 
-int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io)
+int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
+                bh_end_io_t *end_io, bool simu_fail)
 {
        BUG_ON(!buffer_locked(bh));
 
@@ -196,7 +203,7 @@ int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io
                return 0;
        }
 
-       __ext4_read_bh(bh, op_flags, end_io);
+       __ext4_read_bh(bh, op_flags, end_io, simu_fail);
 
        wait_on_buffer(bh);
        if (buffer_uptodate(bh))
@@ -208,10 +215,10 @@ int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
 {
        lock_buffer(bh);
        if (!wait) {
-               ext4_read_bh_nowait(bh, op_flags, NULL);
+               ext4_read_bh_nowait(bh, op_flags, NULL, false);
                return 0;
        }
-       return ext4_read_bh(bh, op_flags, NULL);
+       return ext4_read_bh(bh, op_flags, NULL, false);
 }
 
 /*
@@ -266,7 +273,7 @@ void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
 
        if (likely(bh)) {
                if (trylock_buffer(bh))
-                       ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL);
+                       ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL, false);
                brelse(bh);
        }
 }