]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
netfs: Fix handling of USE_PGPRIV2 and WRITE_TO_CACHE flags
authorDavid Howells <dhowells@redhat.com>
Wed, 7 Aug 2024 18:38:46 +0000 (19:38 +0100)
committerChristian Brauner <brauner@kernel.org>
Mon, 12 Aug 2024 20:03:27 +0000 (22:03 +0200)
The NETFS_RREQ_USE_PGPRIV2 and NETFS_RREQ_WRITE_TO_CACHE flags aren't used
correctly.  The problem is that we try to set them up in the request
initialisation, but we the cache may be in the process of setting up still,
and so the state may not be correct.  Further, we secondarily sample the
cache state and make contradictory decisions later.

The issue arises because we set up the cache resources, which allows the
cache's ->prepare_read() to switch on NETFS_SREQ_COPY_TO_CACHE - which
triggers cache writing even if we didn't set the flags when allocating.

Fix this in the following way:

 (1) Drop NETFS_ICTX_USE_PGPRIV2 and instead set NETFS_RREQ_USE_PGPRIV2 in
     ->init_request() rather than trying to juggle that in
     netfs_alloc_request().

 (2) Repurpose NETFS_RREQ_USE_PGPRIV2 to merely indicate that if caching is
     to be done, then PG_private_2 is to be used rather than only setting
     it if we decide to cache and then having netfs_rreq_unlock_folios()
     set the non-PG_private_2 writeback-to-cache if it wasn't set.

 (3) Split netfs_rreq_unlock_folios() into two functions, one of which
     contains the deprecated code for using PG_private_2 to avoid
     accidentally doing the writeback path - and always use it if
     USE_PGPRIV2 is set.

 (4) As NETFS_ICTX_USE_PGPRIV2 is removed, make netfs_write_begin() always
     wait for PG_private_2.  This function is deprecated and only used by
     ceph anyway, and so label it so.

 (5) Drop the NETFS_RREQ_WRITE_TO_CACHE flag and use
     fscache_operation_valid() on the cache_resources instead.  This has
     the advantage of picking up the result of netfs_begin_cache_read() and
     fscache_begin_write_operation() - which are called after the object is
     initialised and will wait for the cache to come to a usable state.

Just reverting ae678317b95e[1] isn't a sufficient fix, so this need to be
applied on top of that.  Without this as well, things like:

 rcu: INFO: rcu_sched detected expedited stalls on CPUs/tasks: {

and:

 WARNING: CPU: 13 PID: 3621 at fs/ceph/caps.c:3386

may happen, along with some UAFs due to PG_private_2 not getting used to
wait on writeback completion.

Fixes: 2ff1e97587f4 ("netfs: Replace PG_fscache by setting folio->private and marking dirty")
Reported-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Xiubo Li <xiubli@redhat.com>
cc: Hristo Venev <hristo@venev.name>
cc: Jeff Layton <jlayton@kernel.org>
cc: Matthew Wilcox <willy@infradead.org>
cc: ceph-devel@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
Link: https://lore.kernel.org/r/3575457.1722355300@warthog.procyon.org.uk/
Link: https://lore.kernel.org/r/1173209.1723152682@warthog.procyon.org.uk
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/ceph/addr.c
fs/ceph/inode.c
fs/netfs/buffered_read.c
fs/netfs/objects.c
fs/netfs/write_issue.c
fs/nfs/fscache.c
fs/nfs/fscache.h
include/linux/netfs.h
include/trace/events/netfs.h

index 73b5a07bf94deed7402885d3d4b1bfd36877afdd..cc0a2240de98ee35937928a9e49e0ff87b249790 100644 (file)
@@ -424,6 +424,9 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
        struct ceph_netfs_request_data *priv;
        int ret = 0;
 
+       /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
+       __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
+
        if (rreq->origin != NETFS_READAHEAD)
                return 0;
 
index 8f8de8f33abbfb5e1cc3f9114cfd7a74345e89a9..71cd70514efa5562bbee937b176f6431abd59363 100644 (file)
@@ -577,8 +577,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 
        /* Set parameters for the netfs library */
        netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false);
-       /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
-       __set_bit(NETFS_ICTX_USE_PGPRIV2, &ci->netfs.flags);
 
        spin_lock_init(&ci->i_ceph_lock);
 
index 424048f9ed1fdc664afbaef41c0816bb71abe0cd..27c750d39476260bc971591a4735748ba2c019e6 100644 (file)
@@ -9,6 +9,97 @@
 #include <linux/task_io_accounting_ops.h>
 #include "internal.h"
 
+/*
+ * [DEPRECATED] Unlock the folios in a read operation for when the filesystem
+ * is using PG_private_2 and direct writing to the cache from here rather than
+ * marking the page for writeback.
+ *
+ * Note that we don't touch folio->private in this code.
+ */
+static void netfs_rreq_unlock_folios_pgpriv2(struct netfs_io_request *rreq,
+                                            size_t *account)
+{
+       struct netfs_io_subrequest *subreq;
+       struct folio *folio;
+       pgoff_t start_page = rreq->start / PAGE_SIZE;
+       pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
+       bool subreq_failed = false;
+
+       XA_STATE(xas, &rreq->mapping->i_pages, start_page);
+
+       /* Walk through the pagecache and the I/O request lists simultaneously.
+        * We may have a mixture of cached and uncached sections and we only
+        * really want to write out the uncached sections.  This is slightly
+        * complicated by the possibility that we might have huge pages with a
+        * mixture inside.
+        */
+       subreq = list_first_entry(&rreq->subrequests,
+                                 struct netfs_io_subrequest, rreq_link);
+       subreq_failed = (subreq->error < 0);
+
+       trace_netfs_rreq(rreq, netfs_rreq_trace_unlock_pgpriv2);
+
+       rcu_read_lock();
+       xas_for_each(&xas, folio, last_page) {
+               loff_t pg_end;
+               bool pg_failed = false;
+               bool folio_started = false;
+
+               if (xas_retry(&xas, folio))
+                       continue;
+
+               pg_end = folio_pos(folio) + folio_size(folio) - 1;
+
+               for (;;) {
+                       loff_t sreq_end;
+
+                       if (!subreq) {
+                               pg_failed = true;
+                               break;
+                       }
+
+                       if (!folio_started &&
+                           test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags) &&
+                           fscache_operation_valid(&rreq->cache_resources)) {
+                               trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
+                               folio_start_private_2(folio);
+                               folio_started = true;
+                       }
+
+                       pg_failed |= subreq_failed;
+                       sreq_end = subreq->start + subreq->len - 1;
+                       if (pg_end < sreq_end)
+                               break;
+
+                       *account += subreq->transferred;
+                       if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
+                               subreq = list_next_entry(subreq, rreq_link);
+                               subreq_failed = (subreq->error < 0);
+                       } else {
+                               subreq = NULL;
+                               subreq_failed = false;
+                       }
+
+                       if (pg_end == sreq_end)
+                               break;
+               }
+
+               if (!pg_failed) {
+                       flush_dcache_folio(folio);
+                       folio_mark_uptodate(folio);
+               }
+
+               if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
+                       if (folio->index == rreq->no_unlock_folio &&
+                           test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
+                               _debug("no unlock");
+                       else
+                               folio_unlock(folio);
+               }
+       }
+       rcu_read_unlock();
+}
+
 /*
  * Unlock the folios in a read operation.  We need to set PG_writeback on any
  * folios we're going to write back before we unlock them.
@@ -35,6 +126,12 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
                }
        }
 
+       /* Handle deprecated PG_private_2 case. */
+       if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
+               netfs_rreq_unlock_folios_pgpriv2(rreq, &account);
+               goto out;
+       }
+
        /* Walk through the pagecache and the I/O request lists simultaneously.
         * We may have a mixture of cached and uncached sections and we only
         * really want to write out the uncached sections.  This is slightly
@@ -52,7 +149,6 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
                loff_t pg_end;
                bool pg_failed = false;
                bool wback_to_cache = false;
-               bool folio_started = false;
 
                if (xas_retry(&xas, folio))
                        continue;
@@ -66,17 +162,8 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
                                pg_failed = true;
                                break;
                        }
-                       if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
-                               if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE,
-                                                              &subreq->flags)) {
-                                       trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
-                                       folio_start_private_2(folio);
-                                       folio_started = true;
-                               }
-                       } else {
-                               wback_to_cache |=
-                                       test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
-                       }
+
+                       wback_to_cache |= test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
                        pg_failed |= subreq_failed;
                        sreq_end = subreq->start + subreq->len - 1;
                        if (pg_end < sreq_end)
@@ -124,6 +211,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
        }
        rcu_read_unlock();
 
+out:
        task_io_account_read(account);
        if (rreq->netfs_ops->done)
                rreq->netfs_ops->done(rreq);
@@ -395,7 +483,7 @@ zero_out:
 }
 
 /**
- * netfs_write_begin - Helper to prepare for writing
+ * netfs_write_begin - Helper to prepare for writing [DEPRECATED]
  * @ctx: The netfs context
  * @file: The file to read from
  * @mapping: The mapping to read from
@@ -426,6 +514,9 @@ zero_out:
  * inode before calling this.
  *
  * This is usable whether or not caching is enabled.
+ *
+ * Note that this should be considered deprecated and netfs_perform_write()
+ * used instead.
  */
 int netfs_write_begin(struct netfs_inode *ctx,
                      struct file *file, struct address_space *mapping,
@@ -507,11 +598,9 @@ retry:
        netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
 
 have_folio:
-       if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags)) {
-               ret = folio_wait_private_2_killable(folio);
-               if (ret < 0)
-                       goto error;
-       }
+       ret = folio_wait_private_2_killable(folio);
+       if (ret < 0)
+               goto error;
 have_folio_no_wait:
        *_folio = folio;
        _leave(" = 0");
index f4a642727479252de0212f80fe32ef23468e0ebf..0294df70c3ff4563e023be18421cb85a8cca4370 100644 (file)
@@ -24,10 +24,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
        struct netfs_io_request *rreq;
        mempool_t *mempool = ctx->ops->request_pool ?: &netfs_request_pool;
        struct kmem_cache *cache = mempool->pool_data;
-       bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE ||
-                             origin == NETFS_DIO_READ ||
-                             origin == NETFS_DIO_WRITE);
-       bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx);
        int ret;
 
        for (;;) {
@@ -56,12 +52,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
        refcount_set(&rreq->ref, 1);
 
        __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
-       if (cached) {
-               __set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags);
-               if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags))
-                       /* Filesystem uses deprecated PG_private_2 marking. */
-                       __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
-       }
        if (file && file->f_flags & O_NONBLOCK)
                __set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags);
        if (rreq->netfs_ops->init_request) {
index 9258d30cffe3cf4275b20cd0944a817c238292b9..3f7e37e50c7d02456af24ae7cbdc9429d6875580 100644 (file)
@@ -94,6 +94,8 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
 {
        struct netfs_io_request *wreq;
        struct netfs_inode *ictx;
+       bool is_buffered = (origin == NETFS_WRITEBACK ||
+                           origin == NETFS_WRITETHROUGH);
 
        wreq = netfs_alloc_request(mapping, file, start, 0, origin);
        if (IS_ERR(wreq))
@@ -102,7 +104,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
        _enter("R=%x", wreq->debug_id);
 
        ictx = netfs_inode(wreq->inode);
-       if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
+       if (is_buffered && netfs_is_cache_enabled(ictx))
                fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx));
 
        wreq->contiguity = wreq->start;
index 7202ce84d0eb032784e241fa8a82e79486948d49..bf29a65c5027f47ab6f9732c6ab7c011086d38b2 100644 (file)
@@ -265,6 +265,8 @@ static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *fi
 {
        rreq->netfs_priv = get_nfs_open_context(nfs_file_open_context(file));
        rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id);
+       /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
+       __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
 
        return 0;
 }
index fbed0027996f8840ddbe7a16ebe457fa684c525c..e8adae1bc260a31fdab9c3ed91b7e1373609759e 100644 (file)
@@ -81,8 +81,6 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
 static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi)
 {
        netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false);
-       /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
-       __set_bit(NETFS_ICTX_USE_PGPRIV2, &nfsi->netfs.flags);
 }
 extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr);
 extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr);
index 5d0288938cc2dc33579ae70bf462807340cf3a72..983816608f15d7b3c3dc0513bbcc0490f7c45c77 100644 (file)
@@ -73,8 +73,6 @@ struct netfs_inode {
 #define NETFS_ICTX_ODIRECT     0               /* The file has DIO in progress */
 #define NETFS_ICTX_UNBUFFERED  1               /* I/O should not use the pagecache */
 #define NETFS_ICTX_WRITETHROUGH        2               /* Write-through caching */
-#define NETFS_ICTX_USE_PGPRIV2 31              /* [DEPRECATED] Use PG_private_2 to mark
-                                                * write to cache on read */
 };
 
 /*
@@ -269,7 +267,6 @@ struct netfs_io_request {
 #define NETFS_RREQ_DONT_UNLOCK_FOLIOS  3       /* Don't unlock the folios on completion */
 #define NETFS_RREQ_FAILED              4       /* The request failed */
 #define NETFS_RREQ_IN_PROGRESS         5       /* Unlocked when the request completes */
-#define NETFS_RREQ_WRITE_TO_CACHE      7       /* Need to write to the cache */
 #define NETFS_RREQ_UPLOAD_TO_SERVER    8       /* Need to write to the server */
 #define NETFS_RREQ_NONBLOCK            9       /* Don't block if possible (O_NONBLOCK) */
 #define NETFS_RREQ_BLOCKED             10      /* We blocked */
index 24ec3434d32ee9fe20896fd705e360e17bd34e0d..606b4a0f92dae2763ec9f389aa3ae96ced962f81 100644 (file)
@@ -51,6 +51,7 @@
        EM(netfs_rreq_trace_resubmit,           "RESUBMT")      \
        EM(netfs_rreq_trace_set_pause,          "PAUSE  ")      \
        EM(netfs_rreq_trace_unlock,             "UNLOCK ")      \
+       EM(netfs_rreq_trace_unlock_pgpriv2,     "UNLCK-2")      \
        EM(netfs_rreq_trace_unmark,             "UNMARK ")      \
        EM(netfs_rreq_trace_wait_ip,            "WAIT-IP")      \
        EM(netfs_rreq_trace_wait_pause,         "WT-PAUS")      \