When I enabled ext4 debug for fault injection testing, I encountered the
following warning:
EXT4-fs error (device sda): ext4_read_inode_bitmap:201: comm fsstress:
Cannot read inode bitmap - block_group = 8, inode_bitmap = 1051
WARNING: CPU: 0 PID: 511 at fs/buffer.c:1181 mark_buffer_dirty+0x1b3/0x1d0
The root cause of the issue lies in the improper implementation of ext4's
buffer_head read fault injection. The actual completion of buffer_head
read and the buffer_head fault injection are not atomic, which can lead
to the uptodate flag being cleared on normally used buffer_heads in race
conditions.
[CPU0] [CPU1] [CPU2]
ext4_read_inode_bitmap
ext4_read_bh()
<bh read complete>
ext4_read_inode_bitmap
if (buffer_uptodate(bh))
return bh
jbd2_journal_commit_transaction
__jbd2_journal_refile_buffer
__jbd2_journal_unfile_buffer
__jbd2_journal_temp_unlink_buffer
ext4_simulate_fail_bh()
clear_buffer_uptodate
mark_buffer_dirty
<report warning>
WARN_ON_ONCE(!buffer_uptodate(bh))
The best approach would be to perform fault injection in the IO completion
callback function, rather than after IO completion. However, the IO
completion callback function cannot get the fault injection code in sb.
Fix it by passing the result of fault injection into the bh read function,
we simulate faults within the bh read function itself. This requires adding
an extra parameter to the bh read functions that need fault injection.
Fixes: 46f870d690fe ("ext4: simulate various I/O and checksum errors when reading metadata")
Signed-off-by: Long Li <leo.lilong@huawei.com>
Link: https://patch.msgid.link/20240906091746.510163-1-leo.lilong@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
trace_ext4_read_block_bitmap_load(sb, block_group, ignore_locked);
ext4_read_bh_nowait(bh, REQ_META | REQ_PRIO |
(ignore_locked ? REQ_RAHEAD : 0),
- ext4_end_bitmap_read);
+ ext4_end_bitmap_read,
+ ext4_simulate_fail(sb, EXT4_SIM_BBITMAP_EIO));
return bh;
verify:
err = ext4_validate_block_bitmap(sb, desc, block_group, bh);
if (!desc)
return -EFSCORRUPTED;
wait_on_buffer(bh);
- ext4_simulate_fail_bh(sb, bh, EXT4_SIM_BBITMAP_EIO);
if (!buffer_uptodate(bh)) {
ext4_error_err(sb, EIO, "Cannot read block bitmap - "
"block_group = %u, block_bitmap = %llu",
return false;
}
-static inline void ext4_simulate_fail_bh(struct super_block *sb,
- struct buffer_head *bh,
- unsigned long code)
-{
- if (!IS_ERR(bh) && ext4_simulate_fail(sb, code))
- clear_buffer_uptodate(bh);
-}
-
/*
* Error number codes for s_{first,last}_error_errno
*
extern struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
sector_t block);
extern void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
- bh_end_io_t *end_io);
+ bh_end_io_t *end_io, bool simu_fail);
extern int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
- bh_end_io_t *end_io);
+ bh_end_io_t *end_io, bool simu_fail);
extern int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait);
extern void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block);
extern int ext4_seq_options_show(struct seq_file *seq, void *offset);
if (!bh_uptodate_or_lock(bh)) {
trace_ext4_ext_load_extent(inode, pblk, _RET_IP_);
- err = ext4_read_bh(bh, 0, NULL);
+ err = ext4_read_bh(bh, 0, NULL, false);
if (err < 0)
goto errout;
}
* submit the buffer_head for reading
*/
trace_ext4_load_inode_bitmap(sb, block_group);
- ext4_read_bh(bh, REQ_META | REQ_PRIO, ext4_end_bitmap_read);
- ext4_simulate_fail_bh(sb, bh, EXT4_SIM_IBITMAP_EIO);
+ ext4_read_bh(bh, REQ_META | REQ_PRIO,
+ ext4_end_bitmap_read,
+ ext4_simulate_fail(sb, EXT4_SIM_IBITMAP_EIO));
if (!buffer_uptodate(bh)) {
put_bh(bh);
ext4_error_err(sb, EIO, "Cannot read inode bitmap - "
}
if (!bh_uptodate_or_lock(bh)) {
- if (ext4_read_bh(bh, 0, NULL) < 0) {
+ if (ext4_read_bh(bh, 0, NULL, false) < 0) {
put_bh(bh);
goto failure;
}
* Read the block from disk.
*/
trace_ext4_load_inode(sb, ino);
- ext4_read_bh_nowait(bh, REQ_META | REQ_PRIO, NULL);
+ ext4_read_bh_nowait(bh, REQ_META | REQ_PRIO, NULL,
+ ext4_simulate_fail(sb, EXT4_SIM_INODE_EIO));
blk_finish_plug(&plug);
wait_on_buffer(bh);
- ext4_simulate_fail_bh(sb, bh, EXT4_SIM_INODE_EIO);
if (!buffer_uptodate(bh)) {
if (ret_block)
*ret_block = block;
}
lock_buffer(*bh);
- ret = ext4_read_bh(*bh, REQ_META | REQ_PRIO, NULL);
+ ret = ext4_read_bh(*bh, REQ_META | REQ_PRIO, NULL, false);
if (ret)
goto warn_exit;
unlock_buffer(bh);
continue;
}
- ext4_read_bh_nowait(bh, 0, NULL);
+ ext4_read_bh_nowait(bh, 0, NULL, false);
nr++;
} while (block++, (bh = bh->b_this_page) != head);
if (unlikely(!bh))
return NULL;
if (!bh_uptodate_or_lock(bh)) {
- if (ext4_read_bh(bh, 0, NULL) < 0) {
+ if (ext4_read_bh(bh, 0, NULL, false) < 0) {
brelse(bh);
return NULL;
}
static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
- bh_end_io_t *end_io)
+ bh_end_io_t *end_io, bool simu_fail)
{
+ if (simu_fail) {
+ clear_buffer_uptodate(bh);
+ unlock_buffer(bh);
+ return;
+ }
+
/*
* buffer's verified bit is no longer valid after reading from
* disk again due to write out error, clear it to make sure we
}
void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
- bh_end_io_t *end_io)
+ bh_end_io_t *end_io, bool simu_fail)
{
BUG_ON(!buffer_locked(bh));
unlock_buffer(bh);
return;
}
- __ext4_read_bh(bh, op_flags, end_io);
+ __ext4_read_bh(bh, op_flags, end_io, simu_fail);
}
-int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io)
+int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
+ bh_end_io_t *end_io, bool simu_fail)
{
BUG_ON(!buffer_locked(bh));
return 0;
}
- __ext4_read_bh(bh, op_flags, end_io);
+ __ext4_read_bh(bh, op_flags, end_io, simu_fail);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
{
lock_buffer(bh);
if (!wait) {
- ext4_read_bh_nowait(bh, op_flags, NULL);
+ ext4_read_bh_nowait(bh, op_flags, NULL, false);
return 0;
}
- return ext4_read_bh(bh, op_flags, NULL);
+ return ext4_read_bh(bh, op_flags, NULL, false);
}
/*
if (likely(bh)) {
if (trylock_buffer(bh))
- ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL);
+ ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL, false);
brelse(bh);
}
}