]> www.infradead.org Git - users/hch/misc.git/commitdiff
ceph: fix potential race condition on operations with CEPH_I_ODIRECT flag
authorViacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Wed, 16 Jul 2025 18:40:49 +0000 (11:40 -0700)
committerIlya Dryomov <idryomov@gmail.com>
Wed, 8 Oct 2025 21:30:46 +0000 (23:30 +0200)
The Coverity Scan service has detected potential
race conditions in ceph_block_o_direct(), ceph_start_io_read(),
ceph_block_buffered(), and ceph_start_io_direct() [1 - 4].

The CID 1590942159066515896641590377 contain explanation:
"The value of the shared data will be determined by
the interleaving of thread execution. Thread shared data is accessed
without holding an appropriate lock, possibly causing
a race condition (CWE-366)".

This patch reworks the pattern of accessing/modification of
CEPH_I_ODIRECT flag by means of adding smp_mb__before_atomic()
before reading the status of CEPH_I_ODIRECT flag and
smp_mb__after_atomic() after clearing set/clear this flag.
Also, it was reworked the pattern of using of ci->i_ceph_lock
in ceph_block_o_direct(), ceph_start_io_read(),
ceph_block_buffered(), and ceph_start_io_direct() methods.

[1] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590942
[2] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590665
[3] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1589664
[4] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590377

Signed-off-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Reviewed-by: Alex Markuze <amarkuze@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
fs/ceph/io.c
fs/ceph/super.h

index e10f44182a4cbffe0c409063de73808a8d5cdc73..2d10f49c93a93741e342fa6b2d37f36dc8991283 100644 (file)
 /* Call with exclusively locked inode->i_rwsem */
 static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
 {
+       bool is_odirect;
+
        lockdep_assert_held_write(&inode->i_rwsem);
 
-       if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) {
-               spin_lock(&ci->i_ceph_lock);
-               ci->i_ceph_flags &= ~CEPH_I_ODIRECT;
-               spin_unlock(&ci->i_ceph_lock);
-               inode_dio_wait(inode);
+       spin_lock(&ci->i_ceph_lock);
+       /* ensure that bit state is consistent */
+       smp_mb__before_atomic();
+       is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
+       if (is_odirect) {
+               clear_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags);
+               /* ensure modified bit is visible */
+               smp_mb__after_atomic();
        }
+       spin_unlock(&ci->i_ceph_lock);
+
+       if (is_odirect)
+               inode_dio_wait(inode);
 }
 
 /**
@@ -50,6 +59,7 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
 int ceph_start_io_read(struct inode *inode)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
+       bool is_odirect;
        int err;
 
        /* Be an optimist! */
@@ -57,7 +67,12 @@ int ceph_start_io_read(struct inode *inode)
        if (err)
                return err;
 
-       if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
+       spin_lock(&ci->i_ceph_lock);
+       /* ensure that bit state is consistent */
+       smp_mb__before_atomic();
+       is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
+       spin_unlock(&ci->i_ceph_lock);
+       if (!is_odirect)
                return 0;
        up_read(&inode->i_rwsem);
 
@@ -116,12 +131,22 @@ ceph_end_io_write(struct inode *inode)
 /* Call with exclusively locked inode->i_rwsem */
 static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
 {
+       bool is_odirect;
+
        lockdep_assert_held_write(&inode->i_rwsem);
 
-       if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) {
-               spin_lock(&ci->i_ceph_lock);
-               ci->i_ceph_flags |= CEPH_I_ODIRECT;
-               spin_unlock(&ci->i_ceph_lock);
+       spin_lock(&ci->i_ceph_lock);
+       /* ensure that bit state is consistent */
+       smp_mb__before_atomic();
+       is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
+       if (!is_odirect) {
+               set_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags);
+               /* ensure modified bit is visible */
+               smp_mb__after_atomic();
+       }
+       spin_unlock(&ci->i_ceph_lock);
+
+       if (!is_odirect) {
                /* FIXME: unmap_mapping_range? */
                filemap_write_and_wait(inode->i_mapping);
        }
@@ -146,6 +171,7 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
 int ceph_start_io_direct(struct inode *inode)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
+       bool is_odirect;
        int err;
 
        /* Be an optimist! */
@@ -153,7 +179,12 @@ int ceph_start_io_direct(struct inode *inode)
        if (err)
                return err;
 
-       if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
+       spin_lock(&ci->i_ceph_lock);
+       /* ensure that bit state is consistent */
+       smp_mb__before_atomic();
+       is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
+       spin_unlock(&ci->i_ceph_lock);
+       if (is_odirect)
                return 0;
        up_read(&inode->i_rwsem);
 
index cf176aab0f82399aa4f8ea0fd16fb61f40c73030..d1e81e11661b130795dcabe100c9c96fe7236893 100644 (file)
@@ -638,7 +638,8 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 #define CEPH_I_FLUSH_SNAPS     (1 << 8)  /* need flush snapss */
 #define CEPH_I_ERROR_WRITE     (1 << 9) /* have seen write errors */
 #define CEPH_I_ERROR_FILELOCK  (1 << 10) /* have seen file lock errors */
-#define CEPH_I_ODIRECT         (1 << 11) /* inode in direct I/O mode */
+#define CEPH_I_ODIRECT_BIT     (11) /* inode in direct I/O mode */
+#define CEPH_I_ODIRECT         (1 << CEPH_I_ODIRECT_BIT)
 #define CEPH_ASYNC_CREATE_BIT  (12)      /* async create in flight for this */
 #define CEPH_I_ASYNC_CREATE    (1 << CEPH_ASYNC_CREATE_BIT)
 #define CEPH_I_SHUTDOWN                (1 << 13) /* inode is no longer usable */