block, mm: move PSI accounting out the block layer

author Christoph Hellwig <hch@lst.de>

Thu, 31 Mar 2022 11:52:49 +0000 (13:52 +0200)

committer Christoph Hellwig <hch@lst.de>

Thu, 31 Mar 2022 12:05:24 +0000 (14:05 +0200)
author Christoph Hellwig <hch@lst.de>
Thu, 31 Mar 2022 11:52:49 +0000 (13:52 +0200)
committer Christoph Hellwig <hch@lst.de>
Thu, 31 Mar 2022 12:05:24 +0000 (14:05 +0200)
diff --git a/block/bio.c b/block/bio.c

index cdd7b2915c532c1590abafe0019594402b98cbc3..a9d2a8544cd8ccccec9639b38dcb0c6014433e1b 100644 (file)
--- a/block/bio.c
+++ b/block/bio.c
@@ -1034,9 +1034,6 @@ void __bio_add_page(struct bio *bio, struct page *page,
  
         bio->bi_iter.bi_size += len;
         bio->bi_vcnt++;
-
-       if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page)))
-               bio_set_flag(bio, BIO_WORKINGSET);
  }
  EXPORT_SYMBOL_GPL(__bio_add_page);
  
@@ -1252,9 +1249,6 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
   * fit into the bio, or are requested in @iter, whatever is smaller. If
   * MM encounters an error pinning the requested pages, it stops. Error
   * is returned only if 0 pages could be pinned.
- *
- * It's intended for direct IO, so doesn't do PSI tracking, the caller is
- * responsible for setting BIO_WORKINGSET if necessary.
   */
  int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
  {
@@ -1273,8 +1267,6 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
                         ret = __bio_iov_iter_get_pages(bio, iter);
         } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
  
-       /* don't account direct I/O as memory stall */
-       bio_clear_flag(bio, BIO_WORKINGSET);
         return bio->bi_vcnt ? 0 : ret;
  }
  EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
diff --git a/block/blk-core.c b/block/blk-core.c

index 937bb6b863317a96908ae382c61e90fc1782b1b7..2b909060395eed2749c58a00f4d1bc31b13c93dc 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -37,7 +37,6 @@
  #include <linux/t10-pi.h>
  #include <linux/debugfs.h>
  #include <linux/bpf.h>
-#include <linux/psi.h>
  #include <linux/part_stat.h>
  #include <linux/sched/sysctl.h>
  #include <linux/blk-crypto.h>
@@ -908,22 +907,6 @@ void submit_bio(struct bio *bio)
                 }
         }
  
-       /*
-        * If we're reading data that is part of the userspace workingset, count
-        * submission time as memory stall.  When the device is congested, or
-        * the submitting cgroup IO-throttled, submission can be a significant
-        * part of overall IO time.
-        */
-       if (unlikely(bio_op(bio) == REQ_OP_READ &&
-           bio_flagged(bio, BIO_WORKINGSET))) {
-               unsigned long pflags;
-
-               psi_memstall_enter(&pflags);
-               submit_bio_noacct(bio);
-               psi_memstall_leave(&pflags);
-               return;
-       }
-
         submit_bio_noacct(bio);
  }
  EXPORT_SYMBOL(submit_bio);
diff --git a/fs/direct-io.c b/fs/direct-io.c

index aef06e607b4054073f47b8cc4f8f274f903fd1a2..5cac8c8869c52af0d86496801a47b55361b0c04e 100644 (file)
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -419,8 +419,6 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
         unsigned long flags;
  
         bio->bi_private = dio;
-       /* don't account direct I/O as memory stall */
-       bio_clear_flag(bio, BIO_WORKINGSET);
  
         spin_lock_irqsave(&dio->bio_lock, flags);
         dio->refcount++;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h

index dd0763a1c6740f74b6a5fe4fb6d35d70ed610d6c..ec7771e83e176d6a80345f0a32c70e4fccf3eafb 100644 (file)
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -314,7 +314,6 @@ enum {
         BIO_NO_PAGE_REF,        /* don't put release vec pages */
         BIO_CLONED,             /* doesn't own data */
         BIO_BOUNCED,            /* bio is a bounce bio */
-       BIO_WORKINGSET,         /* contains userspace workingset pages */
         BIO_QUIET,              /* Make BIO Quiet */
         BIO_CHAIN,              /* chained bio, ->bi_remaining in effect */
         BIO_REFFED,             /* bio has elevated ->bi_cnt */
diff --git a/mm/filemap.c b/mm/filemap.c

index 741c75d57977d1c69d0de1782bd9a8fcec0a88fa..5147727859989195f499e2dedbd32a0c752e313b 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2416,6 +2416,8 @@ retry:
  static int filemap_read_folio(struct file *file, struct address_space *mapping,
                 struct folio *folio)
  {
+       bool workingset = folio_test_workingset(folio);
+       unsigned long pflags;
         int error;
  
         /*
@@ -2424,8 +2426,13 @@ static int filemap_read_folio(struct file *file, struct address_space *mapping,
          * fails.
          */
         folio_clear_error(folio);
+
         /* Start the actual read. The read will unlock the page. */
+       if (unlikely(workingset))
+               psi_memstall_enter(&pflags);
         error = mapping->a_ops->readpage(file, &folio->page);
+       if (unlikely(workingset))
+               psi_memstall_leave(&pflags);
         if (error)
                 return error;
  
@@ -3491,7 +3498,9 @@ EXPORT_SYMBOL(generic_file_readonly_mmap);
  static struct folio *do_read_cache_folio(struct address_space *mapping,
                 pgoff_t index, filler_t filler, void *data, gfp_t gfp)
  {
+       unsigned long pflags;
         struct folio *folio;
+       bool workingset;
         int err;
  
  repeat:
@@ -3541,10 +3550,15 @@ repeat:
                 }
         }
  
+       workingset = folio_test_workingset(folio);
+       if (unlikely(workingset))
+               psi_memstall_enter(&pflags);
         if (filler)
                 err = filler(data, &folio->page);
         else
                 err = mapping->a_ops->readpage(data, &folio->page);
+       if (unlikely(workingset))
+               psi_memstall_leave(&pflags);
         if (err < 0) {
                 folio_put(folio);
                 return ERR_PTR(err);
diff --git a/mm/readahead.c b/mm/readahead.c

index 06f668108bdbe734435718ae480290231ea2a215..1c330be254c7154b9e591f65c873d5fc1389f663 100644 (file)
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -121,6 +121,7 @@
  #include <linux/task_io_accounting_ops.h>
  #include <linux/pagevec.h>
  #include <linux/pagemap.h>
+#include <linux/psi.h>
  #include <linux/syscalls.h>
  #include <linux/file.h>
  #include <linux/mm_inline.h>
@@ -142,17 +143,19 @@ file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
  }
  EXPORT_SYMBOL_GPL(file_ra_state_init);
  
-static void read_pages(struct readahead_control *rac)
+static void read_pages(struct readahead_control *rac, bool workingset)
  {
         const struct address_space_operations *aops = rac->mapping->a_ops;
         struct page *page;
         struct blk_plug plug;
+       unsigned long pflags;
  
         if (!readahead_count(rac))
                 return;
  
+       if (unlikely(workingset))
+               psi_memstall_enter(&pflags);
         blk_start_plug(&plug);
-
         if (aops->readahead) {
                 aops->readahead(rac);
                 /*
@@ -175,8 +178,9 @@ static void read_pages(struct readahead_control *rac)
                         put_page(page);
                 }
         }
-
         blk_finish_plug(&plug);
+       if (unlikely(workingset))
+               psi_memstall_leave(&pflags);
  
         BUG_ON(readahead_count(rac));
  }
@@ -201,6 +205,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
         struct address_space *mapping = ractl->mapping;
         unsigned long index = readahead_index(ractl);
         gfp_t gfp_mask = readahead_gfp_mask(mapping);
+       bool workingset = false;
         unsigned long i;
  
         /*
@@ -231,9 +236,10 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
                          * have a stable reference to this page, and it's
                          * not worth getting one just for that.
                          */
-                       read_pages(ractl);
+                       read_pages(ractl, workingset);
                         ractl->_index++;
                         i = ractl->_index + ractl->_nr_pages - index - 1;
+                       workingset = false;
                         continue;
                 }
  
@@ -243,14 +249,16 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
                 if (filemap_add_folio(mapping, folio, index + i,
                                         gfp_mask) < 0) {
                         folio_put(folio);
-                       read_pages(ractl);
+                       read_pages(ractl, workingset);
                         ractl->_index++;
                         i = ractl->_index + ractl->_nr_pages - index - 1;
+                       workingset = false;
                         continue;
                 }
                 if (i == nr_to_read - lookahead_size)
                         folio_set_readahead(folio);
                 ractl->_nr_pages++;
+               workingset |= folio_test_workingset(folio);
         }
  
         /*
@@ -258,7 +266,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
          * uptodate then the caller will launch readpage again, and
          * will then handle the error.
          */
-       read_pages(ractl);
+       read_pages(ractl, workingset);
         filemap_invalidate_unlock_shared(mapping);
         memalloc_nofs_restore(nofs);
  }
@@ -473,6 +481,7 @@ void page_cache_ra_order(struct readahead_control *ractl,
         pgoff_t index = readahead_index(ractl);
         pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT;
         pgoff_t mark = index + ra->size - ra->async_size;
+       bool workingset = false;
         int err = 0;
         gfp_t gfp = readahead_gfp_mask(mapping);
  
@@ -518,6 +527,7 @@ void page_cache_ra_order(struct readahead_control *ractl,
                         break;
                 }
  
+               workingset |= folio_test_workingset(folio);
                 ractl->_nr_pages += 1UL << order;
                 index += 1UL << order;
         }
@@ -527,7 +537,7 @@ void page_cache_ra_order(struct readahead_control *ractl,
                 ra->async_size += index - limit - 1;
         }
  
-       read_pages(ractl);
+       read_pages(ractl, workingset);
  
         /*
          * If there were already pages in the page cache, then we may have
author	Christoph Hellwig <hch@lst.de>
	Thu, 31 Mar 2022 11:52:49 +0000 (13:52 +0200)
committer	Christoph Hellwig <hch@lst.de>
	Thu, 31 Mar 2022 12:05:24 +0000 (14:05 +0200)
block/bio.c		patch \| blob \| history
block/blk-core.c		patch \| blob \| history
fs/direct-io.c		patch \| blob \| history
include/linux/blk_types.h		patch \| blob \| history
mm/filemap.c		patch \| blob \| history
mm/readahead.c		patch \| blob \| history