From: Viacheslav Dubeyko Date: Wed, 16 Jul 2025 18:40:49 +0000 (-0700) Subject: ceph: fix potential race condition on operations with CEPH_I_ODIRECT flag X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=fbeafe782bd986bf75544526fb9c0284e045e0a4;p=users%2Fhch%2Fmisc.git ceph: fix potential race condition on operations with CEPH_I_ODIRECT flag The Coverity Scan service has detected potential race conditions in ceph_block_o_direct(), ceph_start_io_read(), ceph_block_buffered(), and ceph_start_io_direct() [1 - 4]. The CID 1590942, 1590665, 1589664, 1590377 contain explanation: "The value of the shared data will be determined by the interleaving of thread execution. Thread shared data is accessed without holding an appropriate lock, possibly causing a race condition (CWE-366)". This patch reworks the pattern of accessing/modification of CEPH_I_ODIRECT flag by means of adding smp_mb__before_atomic() before reading the status of CEPH_I_ODIRECT flag and smp_mb__after_atomic() after clearing set/clear this flag. Also, it was reworked the pattern of using of ci->i_ceph_lock in ceph_block_o_direct(), ceph_start_io_read(), ceph_block_buffered(), and ceph_start_io_direct() methods. [1] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590942 [2] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590665 [3] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1589664 [4] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590377 Signed-off-by: Viacheslav Dubeyko Reviewed-by: Alex Markuze Signed-off-by: Ilya Dryomov --- diff --git a/fs/ceph/io.c b/fs/ceph/io.c index e10f44182a4c..2d10f49c93a9 100644 --- a/fs/ceph/io.c +++ b/fs/ceph/io.c @@ -21,14 +21,23 @@ /* Call with exclusively locked inode->i_rwsem */ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode) { + bool is_odirect; + lockdep_assert_held_write(&inode->i_rwsem); - if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) { - spin_lock(&ci->i_ceph_lock); - ci->i_ceph_flags &= ~CEPH_I_ODIRECT; - spin_unlock(&ci->i_ceph_lock); - inode_dio_wait(inode); + spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT; + if (is_odirect) { + clear_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); } + spin_unlock(&ci->i_ceph_lock); + + if (is_odirect) + inode_dio_wait(inode); } /** @@ -50,6 +59,7 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode) int ceph_start_io_read(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); + bool is_odirect; int err; /* Be an optimist! */ @@ -57,7 +67,12 @@ int ceph_start_io_read(struct inode *inode) if (err) return err; - if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) + spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT; + spin_unlock(&ci->i_ceph_lock); + if (!is_odirect) return 0; up_read(&inode->i_rwsem); @@ -116,12 +131,22 @@ ceph_end_io_write(struct inode *inode) /* Call with exclusively locked inode->i_rwsem */ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode) { + bool is_odirect; + lockdep_assert_held_write(&inode->i_rwsem); - if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) { - spin_lock(&ci->i_ceph_lock); - ci->i_ceph_flags |= CEPH_I_ODIRECT; - spin_unlock(&ci->i_ceph_lock); + spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT; + if (!is_odirect) { + set_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); + } + spin_unlock(&ci->i_ceph_lock); + + if (!is_odirect) { /* FIXME: unmap_mapping_range? */ filemap_write_and_wait(inode->i_mapping); } @@ -146,6 +171,7 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode) int ceph_start_io_direct(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); + bool is_odirect; int err; /* Be an optimist! */ @@ -153,7 +179,12 @@ int ceph_start_io_direct(struct inode *inode) if (err) return err; - if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) + spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT; + spin_unlock(&ci->i_ceph_lock); + if (is_odirect) return 0; up_read(&inode->i_rwsem); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index cf176aab0f82..d1e81e11661b 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -638,7 +638,8 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, #define CEPH_I_FLUSH_SNAPS (1 << 8) /* need flush snapss */ #define CEPH_I_ERROR_WRITE (1 << 9) /* have seen write errors */ #define CEPH_I_ERROR_FILELOCK (1 << 10) /* have seen file lock errors */ -#define CEPH_I_ODIRECT (1 << 11) /* inode in direct I/O mode */ +#define CEPH_I_ODIRECT_BIT (11) /* inode in direct I/O mode */ +#define CEPH_I_ODIRECT (1 << CEPH_I_ODIRECT_BIT) #define CEPH_ASYNC_CREATE_BIT (12) /* async create in flight for this */ #define CEPH_I_ASYNC_CREATE (1 << CEPH_ASYNC_CREATE_BIT) #define CEPH_I_SHUTDOWN (1 << 13) /* inode is no longer usable */