From 608c2501675cd37260312d35daa2fd6dc6d56e96 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 20 Dec 2024 22:46:24 +0000 Subject: [PATCH 01/16] squashfs: use a folio throughout squashfs_read_folio() Use modern folio APIs where they exist and convert back to struct page for the internal functions. Link: https://lkml.kernel.org/r/20241220224634.723899-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Phillip Lougher Signed-off-by: Andrew Morton --- fs/squashfs/file.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 21aaa96856c1..bc6598c3a48f 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -445,21 +445,19 @@ static int squashfs_readpage_sparse(struct page *page, int expected) static int squashfs_read_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int index = page->index >> (msblk->block_log - PAGE_SHIFT); + int index = folio->index >> (msblk->block_log - PAGE_SHIFT); int file_end = i_size_read(inode) >> msblk->block_log; int expected = index == file_end ? (i_size_read(inode) & (msblk->block_size - 1)) : msblk->block_size; int res = 0; - void *pageaddr; TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", - page->index, squashfs_i(inode)->start); + folio->index, squashfs_i(inode)->start); - if (page->index >= ((i_size_read(inode) + PAGE_SIZE - 1) >> + if (folio->index >= ((i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT)) goto out; @@ -472,23 +470,18 @@ static int squashfs_read_folio(struct file *file, struct folio *folio) goto out; if (res == 0) - res = squashfs_readpage_sparse(page, expected); + res = squashfs_readpage_sparse(&folio->page, expected); else - res = squashfs_readpage_block(page, block, res, expected); + res = squashfs_readpage_block(&folio->page, block, res, expected); } else - res = squashfs_readpage_fragment(page, expected); + res = squashfs_readpage_fragment(&folio->page, expected); if (!res) return 0; out: - pageaddr = kmap_atomic(page); - memset(pageaddr, 0, PAGE_SIZE); - kunmap_atomic(pageaddr); - flush_dcache_page(page); - if (res == 0) - SetPageUptodate(page); - unlock_page(page); + folio_zero_segment(folio, 0, folio_size(folio)); + folio_end_read(folio, res == 0); return res; } -- 2.51.0 From 8c1565fcf6392d2f962b7cbc9fde43bfcd175c2f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 20 Dec 2024 22:46:25 +0000 Subject: [PATCH 02/16] squashfs: pass a folio to squashfs_readpage_fragment() Remove an access to page->mapping. Link: https://lkml.kernel.org/r/20241220224634.723899-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Phillip Lougher Signed-off-by: Andrew Morton --- fs/squashfs/file.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index bc6598c3a48f..6bd16e12493b 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -417,9 +417,9 @@ skip_page: } /* Read datablock stored packed inside a fragment (tail-end packed block) */ -static int squashfs_readpage_fragment(struct page *page, int expected) +static int squashfs_readpage_fragment(struct folio *folio, int expected) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, squashfs_i(inode)->fragment_block, squashfs_i(inode)->fragment_size); @@ -430,7 +430,7 @@ static int squashfs_readpage_fragment(struct page *page, int expected) squashfs_i(inode)->fragment_block, squashfs_i(inode)->fragment_size); else - squashfs_copy_cache(page, buffer, expected, + squashfs_copy_cache(&folio->page, buffer, expected, squashfs_i(inode)->fragment_offset); squashfs_cache_put(buffer); @@ -474,7 +474,7 @@ static int squashfs_read_folio(struct file *file, struct folio *folio) else res = squashfs_readpage_block(&folio->page, block, res, expected); } else - res = squashfs_readpage_fragment(&folio->page, expected); + res = squashfs_readpage_fragment(folio, expected); if (!res) return 0; -- 2.51.0 From 2a7aea59cf4dd4a070c4550fddaffc5a73312cad Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 20 Dec 2024 22:46:26 +0000 Subject: [PATCH 03/16] squashfs: convert squashfs_readpage_block() to take a folio Remove a few accesses to page->mapping. Link: https://lkml.kernel.org/r/20241220224634.723899-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Phillip Lougher Signed-off-by: Andrew Morton --- fs/squashfs/file.c | 2 +- fs/squashfs/file_cache.c | 6 +++--- fs/squashfs/file_direct.c | 11 +++++------ fs/squashfs/squashfs.h | 2 +- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 6bd16e12493b..5b81e26b1226 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -472,7 +472,7 @@ static int squashfs_read_folio(struct file *file, struct folio *folio) if (res == 0) res = squashfs_readpage_sparse(&folio->page, expected); else - res = squashfs_readpage_block(&folio->page, block, res, expected); + res = squashfs_readpage_block(folio, block, res, expected); } else res = squashfs_readpage_fragment(folio, expected); diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c index 54c17b7c85fd..0360d22a77d4 100644 --- a/fs/squashfs/file_cache.c +++ b/fs/squashfs/file_cache.c @@ -18,9 +18,9 @@ #include "squashfs.h" /* Read separately compressed datablock and memcopy into page cache */ -int squashfs_readpage_block(struct page *page, u64 block, int bsize, int expected) +int squashfs_readpage_block(struct folio *folio, u64 block, int bsize, int expected) { - struct inode *i = page->mapping->host; + struct inode *i = folio->mapping->host; struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, block, bsize); int res = buffer->error; @@ -29,7 +29,7 @@ int squashfs_readpage_block(struct page *page, u64 block, int bsize, int expecte ERROR("Unable to read page, block %llx, size %x\n", block, bsize); else - squashfs_copy_cache(page, buffer, expected, 0); + squashfs_copy_cache(&folio->page, buffer, expected, 0); squashfs_cache_put(buffer); return res; diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index d19d4db74af8..2c3e809d6891 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -19,12 +19,11 @@ #include "page_actor.h" /* Read separately compressed datablock directly into page cache */ -int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, - int expected) - +int squashfs_readpage_block(struct folio *folio, u64 block, int bsize, + int expected) { - struct folio *folio = page_folio(target_page); - struct inode *inode = target_page->mapping->host; + struct page *target_page = &folio->page; + struct inode *inode = folio->mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; loff_t file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT; int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; @@ -48,7 +47,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, /* Try to grab all the pages covered by the Squashfs block */ for (i = 0, index = start_index; index <= end_index; index++) { page[i] = (index == folio->index) ? target_page : - grab_cache_page_nowait(target_page->mapping, index); + grab_cache_page_nowait(folio->mapping, index); if (page[i] == NULL) continue; diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 2c45b9b938e9..9922a9460ce6 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -78,7 +78,7 @@ void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, int); /* file_xxx.c */ -extern int squashfs_readpage_block(struct page *, u64, int, int); +int squashfs_readpage_block(struct folio *, u64 block, int bsize, int expected); /* id.c */ extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); -- 2.51.0 From 5641371fd0b3703d11809a0ae249aed270cb8add Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 20 Dec 2024 22:46:27 +0000 Subject: [PATCH 04/16] squashfs; convert squashfs_copy_cache() to take a folio Remove accesses to page->index and page->mapping. Also use folio APIs where available. This code still assumes order 0 folios. [dan.carpenter@linaro.org: fix a NULL vs IS_ERR() bug] Link: https://lkml.kernel.org/r/7b7f44d6-9153-4d7c-b65b-2d78febe6c7a@stanley.mountain Link: https://lkml.kernel.org/r/20241220224634.723899-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Dan Carpenter Cc: Phillip Lougher Cc: Dan Carpenter Signed-off-by: Andrew Morton --- fs/squashfs/file.c | 46 ++++++++++++++++++++++------------------ fs/squashfs/file_cache.c | 2 +- fs/squashfs/squashfs.h | 4 ++-- 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 5b81e26b1226..74076c4823c3 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -378,13 +378,15 @@ void squashfs_fill_page(struct page *page, struct squashfs_cache_entry *buffer, } /* Copy data into page cache */ -void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, - int bytes, int offset) +void squashfs_copy_cache(struct folio *folio, + struct squashfs_cache_entry *buffer, size_t bytes, + size_t offset) { - struct inode *inode = page->mapping->host; + struct address_space *mapping = folio->mapping; + struct inode *inode = mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; - int start_index = page->index & ~mask, end_index = start_index | mask; + int start_index = folio->index & ~mask, end_index = start_index | mask; /* * Loop copying datablock into pages. As the datablock likely covers @@ -394,25 +396,27 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, */ for (i = start_index; i <= end_index && bytes > 0; i++, bytes -= PAGE_SIZE, offset += PAGE_SIZE) { - struct page *push_page; - int avail = buffer ? min_t(int, bytes, PAGE_SIZE) : 0; + struct folio *push_folio; + size_t avail = buffer ? min(bytes, PAGE_SIZE) : 0; - TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail); + TRACE("bytes %zu, i %d, available_bytes %zu\n", bytes, i, avail); - push_page = (i == page->index) ? page : - grab_cache_page_nowait(page->mapping, i); + push_folio = (i == folio->index) ? folio : + __filemap_get_folio(mapping, i, + FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT, + mapping_gfp_mask(mapping)); - if (!push_page) + if (IS_ERR(push_folio)) continue; - if (PageUptodate(push_page)) - goto skip_page; + if (folio_test_uptodate(push_folio)) + goto skip_folio; - squashfs_fill_page(push_page, buffer, offset, avail); -skip_page: - unlock_page(push_page); - if (i != page->index) - put_page(push_page); + squashfs_fill_page(&push_folio->page, buffer, offset, avail); +skip_folio: + folio_unlock(push_folio); + if (i != folio->index) + folio_put(push_folio); } } @@ -430,16 +434,16 @@ static int squashfs_readpage_fragment(struct folio *folio, int expected) squashfs_i(inode)->fragment_block, squashfs_i(inode)->fragment_size); else - squashfs_copy_cache(&folio->page, buffer, expected, + squashfs_copy_cache(folio, buffer, expected, squashfs_i(inode)->fragment_offset); squashfs_cache_put(buffer); return res; } -static int squashfs_readpage_sparse(struct page *page, int expected) +static int squashfs_readpage_sparse(struct folio *folio, int expected) { - squashfs_copy_cache(page, NULL, expected, 0); + squashfs_copy_cache(folio, NULL, expected, 0); return 0; } @@ -470,7 +474,7 @@ static int squashfs_read_folio(struct file *file, struct folio *folio) goto out; if (res == 0) - res = squashfs_readpage_sparse(&folio->page, expected); + res = squashfs_readpage_sparse(folio, expected); else res = squashfs_readpage_block(folio, block, res, expected); } else diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c index 0360d22a77d4..40e59a43d098 100644 --- a/fs/squashfs/file_cache.c +++ b/fs/squashfs/file_cache.c @@ -29,7 +29,7 @@ int squashfs_readpage_block(struct folio *folio, u64 block, int bsize, int expec ERROR("Unable to read page, block %llx, size %x\n", block, bsize); else - squashfs_copy_cache(&folio->page, buffer, expected, 0); + squashfs_copy_cache(folio, buffer, expected, 0); squashfs_cache_put(buffer); return res; diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 9922a9460ce6..8c6fbef022f4 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -74,8 +74,8 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *, /* file.c */ void squashfs_fill_page(struct page *, struct squashfs_cache_entry *, int, int); -void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, - int); +void squashfs_copy_cache(struct folio *, struct squashfs_cache_entry *, + size_t bytes, size_t offset); /* file_xxx.c */ int squashfs_readpage_block(struct folio *, u64 block, int bsize, int expected); -- 2.51.0 From 5748be3e9ef4659f9adf7e1ad454fafb0868cd88 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 20 Dec 2024 22:46:28 +0000 Subject: [PATCH 05/16] squashfs: convert squashfs_fill_page() to take a folio squashfs_fill_page is only used in this file, so make it static. Use kmap_local instead of kmap_atomic, and return a bool so that the caller can use folio_end_read() which saves an atomic operation over calling folio_mark_uptodate() followed by folio_unlock(). [willy@infradead.org: fix polarity of "uptodate" Thanks to Ryan for testing] Link: https://lkml.kernel.org/r/20250110163300.3346321-2-willy@infradead.org Link: https://lkml.kernel.org/r/20241220224634.723899-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Ryan Roberts Cc: Phillip Lougher Signed-off-by: Andrew Morton --- fs/squashfs/file.c | 21 ++++++++++++--------- fs/squashfs/squashfs.h | 1 - 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 74076c4823c3..5ca2baa16dc2 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -362,19 +362,21 @@ static int read_blocklist(struct inode *inode, int index, u64 *block) return squashfs_block_size(size); } -void squashfs_fill_page(struct page *page, struct squashfs_cache_entry *buffer, int offset, int avail) +static bool squashfs_fill_page(struct folio *folio, + struct squashfs_cache_entry *buffer, size_t offset, + size_t avail) { - int copied; + size_t copied; void *pageaddr; - pageaddr = kmap_atomic(page); + pageaddr = kmap_local_folio(folio, 0); copied = squashfs_copy_data(pageaddr, buffer, offset, avail); memset(pageaddr + copied, 0, PAGE_SIZE - copied); - kunmap_atomic(pageaddr); + kunmap_local(pageaddr); - flush_dcache_page(page); - if (copied == avail) - SetPageUptodate(page); + flush_dcache_folio(folio); + + return copied == avail; } /* Copy data into page cache */ @@ -398,6 +400,7 @@ void squashfs_copy_cache(struct folio *folio, bytes -= PAGE_SIZE, offset += PAGE_SIZE) { struct folio *push_folio; size_t avail = buffer ? min(bytes, PAGE_SIZE) : 0; + bool updated = false; TRACE("bytes %zu, i %d, available_bytes %zu\n", bytes, i, avail); @@ -412,9 +415,9 @@ void squashfs_copy_cache(struct folio *folio, if (folio_test_uptodate(push_folio)) goto skip_folio; - squashfs_fill_page(&push_folio->page, buffer, offset, avail); + updated = squashfs_fill_page(push_folio, buffer, offset, avail); skip_folio: - folio_unlock(push_folio); + folio_end_read(push_folio, updated); if (i != folio->index) folio_put(push_folio); } diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 8c6fbef022f4..218868b20f16 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -73,7 +73,6 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *, u64, u64, unsigned int); /* file.c */ -void squashfs_fill_page(struct page *, struct squashfs_cache_entry *, int, int); void squashfs_copy_cache(struct folio *, struct squashfs_cache_entry *, size_t bytes, size_t offset); -- 2.51.0 From 97549ce6848e5e30b667378b28c3cff937d167d2 Mon Sep 17 00:00:00 2001 From: Tio Zhang Date: Tue, 24 Dec 2024 17:53:44 +0800 Subject: [PATCH 06/16] kthread: correct comments before kthread_queue_work() s/kthread_worker_create/kthread_create_worker/ to avoid confusion when reading comments before kthread_queue_work(). Link: https://lkml.kernel.org/r/20241224095344.GA7587@didi-ThinkCentre-M930t-N000 Signed-off-by: Tio Zhang Signed-off-by: Andrew Morton --- kernel/kthread.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index a5ac612b1609..2fd0daa6b3b6 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1015,7 +1015,7 @@ static void kthread_insert_work(struct kthread_worker *worker, * @work: kthread_work to queue * * Queue @work to work processor @task for async execution. @task - * must have been created with kthread_worker_create(). Returns %true + * must have been created with kthread_create_worker(). Returns %true * if @work was successfully queued, %false if it was already pending. * * Reinitialize the work if it needs to be used by another worker. -- 2.51.0 From bb2de9b04942fab82cad19a31f2612deab9751be Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Tue, 31 Dec 2024 09:54:14 -0600 Subject: [PATCH 07/16] MAINTAINERS: fix list entries with display names get_maintainers.pl doesn't expect list entries to have a display name. Entries with a display name are omitted and print just the description: (open list:PIN CONTROLLER - FREESCALE) These cases are pretty much aliases to a few people, not lists which are archived and can be subscribed to. Change these cases to be reviewers instead. Link: https://lkml.kernel.org/r/20241231155415.186244-1-robh@kernel.org Signed-off-by: Rob Herring (Arm) Cc: Arnd Bergmann Cc: Joe Perches Signed-off-by: Andrew Morton --- MAINTAINERS | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 30cbc3d44cd5..953feba08959 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2829,7 +2829,7 @@ ARM/NXP S32G ARCHITECTURE R: Chester Lin R: Matthias Brugger R: Ghennadi Procopciuc -L: NXP S32 Linux Team +R: NXP S32 Linux Team L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm64/boot/dts/freescale/s32g*.dts* @@ -16604,8 +16604,8 @@ F: arch/nios2/ NITRO ENCLAVES (NE) M: Alexandru Ciobotaru +R: The AWS Nitro Enclaves Team L: linux-kernel@vger.kernel.org -L: The AWS Nitro Enclaves Team S: Supported W: https://aws.amazon.com/ec2/nitro/nitro-enclaves/ F: Documentation/virt/ne_overview.rst @@ -16616,8 +16616,8 @@ F: samples/nitro_enclaves/ NITRO SECURE MODULE (NSM) M: Alexander Graf +R: The AWS Nitro Enclaves Team L: linux-kernel@vger.kernel.org -L: The AWS Nitro Enclaves Team S: Supported W: https://aws.amazon.com/ec2/nitro/nitro-enclaves/ F: drivers/misc/nsm.c @@ -18429,8 +18429,8 @@ M: Fabio Estevam M: Shawn Guo M: Jacky Bai R: Pengutronix Kernel Team +R: NXP S32 Linux Team L: linux-gpio@vger.kernel.org -L: NXP S32 Linux Team S: Maintained F: Documentation/devicetree/bindings/pinctrl/fsl,* F: Documentation/devicetree/bindings/pinctrl/nxp,s32* @@ -19565,7 +19565,7 @@ F: drivers/ras/amd/fmpm.c RASPBERRY PI PISP BACK END M: Jacopo Mondi -L: Raspberry Pi Kernel Maintenance +R: Raspberry Pi Kernel Maintenance L: linux-media@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/media/raspberrypi,pispbe.yaml -- 2.51.0 From 4e0a15f8b4bd47548032acccdbeb5b9083b3675e Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Tue, 7 Jan 2025 01:01:03 +0800 Subject: [PATCH 08/16] lib/sort: clarify comparison function requirements in sort_r() Patch series "lib: clarify comparison function requirements", v2. Add a detailed explanation in the sort_r/list_sort kernel doc comment specifying that the comparison function must satisfy antisymmetry and transitivity. These properties are essential for the sorting algorithm to produce correct results. Issues have arisen in the past [1][2][3][4] where comparison functions violated the transitivity property, causing sorting algorithms to fail to correctly order elements. While these requirements may seem straightforward, they are commonly misunderstood or overlooked, leading to bugs. Highlighting these properties in the documentation will help prevent such mistakes in the future. Link: https://lore.kernel.org/lkml/20240701205639.117194-1-visitorckw@gmail.com [1] Link: https://lore.kernel.org/lkml/20241203202228.1274403-1-visitorckw@gmail.com [2] Link: https://lore.kernel.org/lkml/20241209134226.1939163-1-visitorckw@gmail.com [3] Link: https://lore.kernel.org/lkml/20241209145728.1975311-1-visitorckw@gmail.com [4] This patch (of 2): Add a detailed explanation in the sort_r() kernel doc comment specifying that the comparison function must satisfy antisymmetry and transitivity. These properties are essential for the sorting algorithm to produce correct results. Issues have arisen in the past [1][2][3][4] where comparison functions violated the transitivity property, causing sorting algorithms to fail to correctly order elements. While these requirements may seem straightforward, they are commonly misunderstood or overlooked, leading to bugs. Highlighting these properties in the documentation will help prevent such mistakes in the future. Link: https://lkml.kernel.org/r/20250106170104.3137845-1-visitorckw@gmail.com Link: https://lore.kernel.org/lkml/20240701205639.117194-1-visitorckw@gmail.com [1] Link: https://lore.kernel.org/lkml/20241203202228.1274403-1-visitorckw@gmail.com [2] Link: https://lore.kernel.org/lkml/20241209134226.1939163-1-visitorckw@gmail.com [3] Link: https://lore.kernel.org/lkml/20241209145728.1975311-1-visitorckw@gmail.com [4] Link: https://lkml.kernel.org/r/20250106170104.3137845-2-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Cc: Ching-Chun (Jim) Huang Cc: Signed-off-by: Andrew Morton --- lib/sort.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/sort.c b/lib/sort.c index 048b7a6ef967..8e73dc55476b 100644 --- a/lib/sort.c +++ b/lib/sort.c @@ -200,6 +200,13 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size) * copy (e.g. fix up pointers or auxiliary data), but the built-in swap * avoids a slow retpoline and so is significantly faster. * + * The comparison function must adhere to specific mathematical + * properties to ensure correct and stable sorting: + * - Antisymmetry: cmp_func(a, b) must return the opposite sign of + * cmp_func(b, a). + * - Transitivity: if cmp_func(a, b) <= 0 and cmp_func(b, c) <= 0, then + * cmp_func(a, c) <= 0. + * * Sorting time is O(n log n) both on average and worst-case. While * quicksort is slightly faster on average, it suffers from exploitable * O(n*n) worst-case behavior and extra memory requirements that make -- 2.51.0 From e420460ba443e8ad1cd71568c50b6e09d13fb106 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Tue, 7 Jan 2025 01:01:04 +0800 Subject: [PATCH 09/16] lib/list_sort: clarify comparison function requirements in list_sort() Add a detailed explanation in the list_sort() kernel doc comment specifying that the comparison function must satisfy antisymmetry and transitivity. These properties are essential for the sorting algorithm to produce correct results. Issues have arisen in the past [1][2][3][4] where comparison functions violated the transitivity property, causing sorting algorithms to fail to correctly order elements. While these requirements may seem straightforward, they are commonly misunderstood or overlooked, leading to bugs. Highlighting these properties in the documentation will help prevent such mistakes in the future. Link: https://lore.kernel.org/lkml/20240701205639.117194-1-visitorckw@gmail.com [1] Link: https://lore.kernel.org/lkml/20241203202228.1274403-1-visitorckw@gmail.com [2] Link: https://lore.kernel.org/lkml/20241209134226.1939163-1-visitorckw@gmail.com [3] Link: https://lore.kernel.org/lkml/20241209145728.1975311-1-visitorckw@gmail.com [4] Link: https://lkml.kernel.org/r/20250106170104.3137845-3-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Cc: Ching-Chun (Jim) Huang Cc: Signed-off-by: Andrew Morton --- lib/list_sort.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/list_sort.c b/lib/list_sort.c index 8d3f623536fe..a310ecb7ccc0 100644 --- a/lib/list_sort.c +++ b/lib/list_sort.c @@ -108,6 +108,13 @@ static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, * and list_sort is a stable sort, so it is not necessary to distinguish * the @a < @b and @a == @b cases. * + * The comparison function must adhere to specific mathematical properties + * to ensure correct and stable sorting: + * - Antisymmetry: cmp(@a, @b) must return the opposite sign of + * cmp(@b, @a). + * - Transitivity: if cmp(@a, @b) <= 0 and cmp(@b, @c) <= 0, then + * cmp(@a, @c) <= 0. + * * This is compatible with two styles of @cmp function: * - The traditional style which returns <0 / =0 / >0, or * - Returning a boolean 0/1. -- 2.51.0 From 34bb50c42335b9316cbdb9506c2c4126a66a52ee Mon Sep 17 00:00:00 2001 From: Julian Sun Date: Mon, 6 Jan 2025 10:34:31 +0800 Subject: [PATCH 10/16] ocfs2: check el->l_next_free_rec in ocfs2_get_clusters_nocache Recently syzbot reported a use-after-free issue[1]. The root cause of the problem is that the journal inode recorded in this file system image is corrupted. The value of "di->id2.i_list.l_next_free_rec" is 8193, which is greater than the value of "di->id2.i_list.l_count" (19). To solve this problem, an additional check should be added within ocfs2_get_clusters_nocache(). If the check fails, an error will be returned and the file system will be set to read-only. [1]: https://lore.kernel.org/all/67577778.050a0220.a30f1.01bc.GAE@google.com/T/ Link: https://lkml.kernel.org/r/20250106023432.1320904-1-sunjunchao2870@gmail.com Signed-off-by: Julian Sun Reported-by: syzbot+2313dda4dc4885c93578@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=2313dda4dc4885c93578 Tested-by: syzbot+2313dda4dc4885c93578@syzkaller.appspotmail.com Reviewed-by: Joseph Qi Cc: Changwei Ge Cc: Joel Becker Cc: Jun Piao Cc: Junxiao Bi Cc: Mark Fasheh Signed-off-by: Andrew Morton --- fs/ocfs2/extent_map.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index f7672472fa82..930150ed5db1 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -435,6 +435,16 @@ static int ocfs2_get_clusters_nocache(struct inode *inode, } } + if (le16_to_cpu(el->l_next_free_rec) > le16_to_cpu(el->l_count)) { + ocfs2_error(inode->i_sb, + "Inode %lu has an invalid extent (next_free_rec %u, count %u)\n", + inode->i_ino, + le16_to_cpu(el->l_next_free_rec), + le16_to_cpu(el->l_count)); + ret = -EROFS; + goto out; + } + i = ocfs2_search_extent_list(el, v_cluster); if (i == -1) { /* -- 2.51.0 From fdbb6cd96ed51da002bd9a5a142586a74a436784 Mon Sep 17 00:00:00 2001 From: Julian Sun Date: Mon, 6 Jan 2025 10:34:32 +0800 Subject: [PATCH 11/16] ocfs2: correct l_next_free_rec in online check Correct the value of l_next_free_rec to l_count during the online check, as done in the check_el() function in ocfs2_tools. Link: https://lkml.kernel.org/r/20250106023432.1320904-2-sunjunchao2870@gmail.com Signed-off-by: Julian Sun Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/inode.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index cd3173062ae3..12e5d1f73325 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -200,6 +200,20 @@ bail: return inode; } +static int ocfs2_dinode_has_extents(struct ocfs2_dinode *di) +{ + /* inodes flagged with other stuff in id2 */ + if (di->i_flags & (OCFS2_SUPER_BLOCK_FL | OCFS2_LOCAL_ALLOC_FL | + OCFS2_CHAIN_FL | OCFS2_DEALLOC_FL)) + return 0; + /* i_flags doesn't indicate when id2 is a fast symlink */ + if (S_ISLNK(di->i_mode) && di->i_size && di->i_clusters == 0) + return 0; + if (di->i_dyn_features & OCFS2_INLINE_DATA_FL) + return 0; + + return 1; +} /* * here's how inodes get read from disk: @@ -1547,6 +1561,16 @@ static int ocfs2_filecheck_repair_inode_block(struct super_block *sb, le32_to_cpu(di->i_fs_generation)); } + if (ocfs2_dinode_has_extents(di) && + le16_to_cpu(di->id2.i_list.l_next_free_rec) > le16_to_cpu(di->id2.i_list.l_count)) { + di->id2.i_list.l_next_free_rec = di->id2.i_list.l_count; + changed = 1; + mlog(ML_ERROR, + "Filecheck: reset dinode #%llu: l_next_free_rec to %u\n", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(di->id2.i_list.l_next_free_rec)); + } + if (changed || ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check)) { ocfs2_compute_meta_ecc(sb, bh->b_data, &di->i_check); mark_buffer_dirty(bh); -- 2.51.0 From 982209f632c6850ef798411ad978f4ac15d8ba90 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Linus=20L=C3=BCssing?= Date: Wed, 8 Jan 2025 04:58:39 +0100 Subject: [PATCH 12/16] =?utf8?q?mailmap:=20update=20entry=20for=20Linus=20?= =?utf8?q?L=C3=BCssing?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Mapping another old, obsolete work email address to my primary one. Link: https://lkml.kernel.org/r/20250108035840.25194-1-linus.luessing@c0d3.blue Signed-off-by: Linus Lüssing Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index f5f97f947020..b85b4ca70124 100644 --- a/.mailmap +++ b/.mailmap @@ -405,6 +405,7 @@ Liam Mark Linas Vepstas Linus Lüssing Linus Lüssing +Linus Lüssing Li Yang Li Yang -- 2.51.0 From 01676ecd0b5c897ebb420c5a09a658e3514cc6d8 Mon Sep 17 00:00:00 2001 From: Su Yue Date: Wed, 8 Jan 2025 10:41:19 +0800 Subject: [PATCH 13/16] ocfs2: check tl->count of truncate log inode in ocfs2_get_truncate_log_info syz reported: (syz-executor404,5313,0):ocfs2_truncate_log_append:5874 ERROR: bug expression: tl_count > ocfs2_truncate_recs_per_inode(osb->sb) || tl_count == 0 (syz-executor404,5313,0):ocfs2_truncate_log_append:5874 ERROR: Truncate record count on #77 invalid wanted 39, actual 2087 ------------[ cut here ]------------ kernel BUG at fs/ocfs2/alloc.c:5874! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI CPU: 0 UID: 0 PID: 5313 Comm: syz-executor404 Not tainted 6.12.0-rc5-syzkaller-00299-g11066801dd4b #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:ocfs2_truncate_log_append+0x9a8/0x9c0 fs/ocfs2/alloc.c:5868 RSP: 0018:ffffc9000cf16f40 EFLAGS: 00010292 RAX: b4b54f1d10640800 RBX: 0000000000000027 RCX: b4b54f1d10640800 RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000 RBP: ffffc9000cf17070 R08: ffffffff8174a14c R09: 1ffff11003f8519a R10: dffffc0000000000 R11: ffffed1003f8519b R12: 1ffff110085f5f58 R13: ffffff3800000000 R14: 000000000000004d R15: ffff8880438f0008 FS: 00005555722df380(0000) GS:ffff88801fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000002000f000 CR3: 000000004010e000 CR4: 0000000000352ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ocfs2_remove_btree_range+0x1303/0x1860 fs/ocfs2/alloc.c:5789 ocfs2_remove_inode_range+0xff3/0x29f0 fs/ocfs2/file.c:1907 ocfs2_reflink_remap_extent fs/ocfs2/refcounttree.c:4537 [inline] ocfs2_reflink_remap_blocks+0xcd4/0x1f30 fs/ocfs2/refcounttree.c:4684 ocfs2_remap_file_range+0x5fa/0x8d0 fs/ocfs2/file.c:2736 vfs_copy_file_range+0xc07/0x1510 fs/read_write.c:1615 __do_sys_copy_file_range fs/read_write.c:1705 [inline] __se_sys_copy_file_range+0x3f2/0x5d0 fs/read_write.c:1668 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fd327167af9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 61 17 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffe6b8e22e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000146 RAX: ffffffffffffffda RBX: 00007fd3271b005e RCX: 00007fd327167af9 RDX: 0000000000000006 RSI: 0000000000000000 RDI: 0000000000000004 RBP: 00007fd3271de610 R08: 000000000000d8c2 R09: 0000000000000000 R10: 0000000020000640 R11: 0000000000000246 R12: 0000000000000001 R13: 00007ffe6b8e24b8 R14: 0000000000000001 R15: 0000000000000001 The fuzz image has a truncate log inode whose tl_count is bigger than ocfs2_truncate_recs_per_inode() so it triggers the BUG in ocfs2_truncate_log_append(). As what the check in ocfs2_truncate_log_append() does, just do same check into ocfs2_get_truncate_log_info when truncate log inode is reading in so we can bail out earlier. Link: https://lkml.kernel.org/r/20250108024119.60313-1-glass.su@suse.com Signed-off-by: Su Yue Reported-by: Liebes Wang Link: https://lore.kernel.org/ocfs2-devel/CADCV8souQhdP0RdQF1U7KTWtuHDfpn+3LnTt-EEuMmB-pMRrgQ@mail.gmail.com/T/#u Reported-by: syzbot+a66542ca5ebb4233b563@syzkaller.appspotmail.com Tested-by: syzbot+a66542ca5ebb4233b563@syzkaller.appspotmail.com Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/alloc.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 5cf698785fae..4414743b638e 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6154,6 +6154,9 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb, int status; struct inode *inode = NULL; struct buffer_head *bh = NULL; + struct ocfs2_dinode *di; + struct ocfs2_truncate_log *tl; + unsigned int tl_count; inode = ocfs2_get_system_file_inode(osb, TRUNCATE_LOG_SYSTEM_INODE, @@ -6171,6 +6174,18 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb, goto bail; } + di = (struct ocfs2_dinode *)bh->b_data; + tl = &di->id2.i_dealloc; + tl_count = le16_to_cpu(tl->tl_count); + if (unlikely(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) || + tl_count == 0)) { + status = -EFSCORRUPTED; + iput(inode); + brelse(bh); + mlog_errno(status); + goto bail; + } + *tl_inode = inode; *tl_bh = bh; bail: -- 2.51.0 From 276c61385f6bc3223a5ecd307cf4aba2dfbb9a31 Mon Sep 17 00:00:00 2001 From: Su Yue Date: Mon, 6 Jan 2025 22:06:53 +0800 Subject: [PATCH 14/16] ocfs2: mark dquot as inactive if failed to start trans while releasing dquot While running fstests generic/329, the kernel workqueue quota_release_workfn is dead looping in calling ocfs2_release_dquot(). The ocfs2 state is already readonly but ocfs2_release_dquot wants to start a transaction but fails and returns. ===================================================================== [ 2918.123602 ][ T275 ] On-disk corruption discovered. Please run fsck.ocfs2 once the filesystem is unmounted. [ 2918.124034 ][ T275 ] (kworker/u135:1,275,11):ocfs2_release_dquot:765 ERROR: status = -30 [ 2918.124452 ][ T275 ] (kworker/u135:1,275,11):ocfs2_release_dquot:795 ERROR: status = -30 [ 2918.124883 ][ T275 ] (kworker/u135:1,275,11):ocfs2_start_trans:357 ERROR: status = -30 [ 2918.125276 ][ T275 ] OCFS2: abort (device dm-0): ocfs2_start_trans: Detected aborted journal [ 2918.125710 ][ T275 ] On-disk corruption discovered. Please run fsck.ocfs2 once the filesystem is unmounted. ===================================================================== ocfs2_release_dquot() is much like dquot_release(), which is called by ext4 to handle similar situation. So here fix it by marking the dquot as inactive like what dquot_release() does. Link: https://lkml.kernel.org/r/20250106140653.92292-1-glass.su@suse.com Fixes: 9e33d69f553a ("ocfs2: Implementation of local and global quota file handling") Signed-off-by: Su Yue Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/quota_global.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 3404e7a30c33..15d9acd456ec 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -761,6 +761,11 @@ static int ocfs2_release_dquot(struct dquot *dquot) handle = ocfs2_start_trans(osb, ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_id.type)); if (IS_ERR(handle)) { + /* + * Mark dquot as inactive to avoid endless cycle in + * quota_release_workfn(). + */ + clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); status = PTR_ERR(handle); mlog_errno(status); goto out_ilock; -- 2.51.0 From 840265f7b2795ec12a04ef948f52d12cd96f42c3 Mon Sep 17 00:00:00 2001 From: Su Yue Date: Mon, 6 Jan 2025 22:06:34 +0800 Subject: [PATCH 15/16] ocfs2: remove parameter parent_fe_bh from __ocfs2_mknod_locked The parameter is not used in __ocfs2_mknod_locked(). So remove it. No functional change. Link: https://lkml.kernel.org/r/20250106140634.92241-1-glass.su@suse.com Signed-off-by: Su Yue Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton --- fs/ocfs2/namei.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 5550f8afa438..0ec63a1a94b8 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -508,7 +508,6 @@ static int __ocfs2_mknod_locked(struct inode *dir, struct inode *inode, dev_t dev, struct buffer_head **new_fe_bh, - struct buffer_head *parent_fe_bh, handle_t *handle, struct ocfs2_alloc_context *inode_ac, u64 fe_blkno, u64 suballoc_loc, u16 suballoc_bit) @@ -641,8 +640,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, } return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh, - parent_fe_bh, handle, inode_ac, - fe_blkno, suballoc_loc, suballoc_bit); + handle, inode_ac, fe_blkno, + suballoc_loc, suballoc_bit); } static int ocfs2_mkdir(struct mnt_idmap *idmap, @@ -2576,7 +2575,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, clear_nlink(inode); /* do the real work now. */ status = __ocfs2_mknod_locked(dir, inode, - 0, &new_di_bh, parent_di_bh, handle, + 0, &new_di_bh, handle, inode_ac, di_blkno, suballoc_loc, suballoc_bit); if (status < 0) { -- 2.51.0 From ca76bb226bf47ff04c782cacbd299f12ddee1ec1 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 8 Jan 2025 05:00:46 +0900 Subject: [PATCH 16/16] nilfs2: do not force clear folio if buffer is referenced Patch series "nilfs2: protect busy buffer heads from being force-cleared". This series fixes the buffer head state inconsistency issues reported by syzbot that occurs when the filesystem is corrupted and falls back to read-only, and the associated buffer head use-after-free issue. This patch (of 2): Syzbot has reported that after nilfs2 detects filesystem corruption and falls back to read-only, inconsistencies in the buffer state may occur. One of the inconsistencies is that when nilfs2 calls mark_buffer_dirty() to set a data or metadata buffer as dirty, but it detects that the buffer is not in the uptodate state: WARNING: CPU: 0 PID: 6049 at fs/buffer.c:1177 mark_buffer_dirty+0x2e5/0x520 fs/buffer.c:1177 ... Call Trace: nilfs_palloc_commit_alloc_entry+0x4b/0x160 fs/nilfs2/alloc.c:598 nilfs_ifile_create_inode+0x1dd/0x3a0 fs/nilfs2/ifile.c:73 nilfs_new_inode+0x254/0x830 fs/nilfs2/inode.c:344 nilfs_mkdir+0x10d/0x340 fs/nilfs2/namei.c:218 vfs_mkdir+0x2f9/0x4f0 fs/namei.c:4257 do_mkdirat+0x264/0x3a0 fs/namei.c:4280 __do_sys_mkdirat fs/namei.c:4295 [inline] __se_sys_mkdirat fs/namei.c:4293 [inline] __x64_sys_mkdirat+0x87/0xa0 fs/namei.c:4293 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f The other is when nilfs_btree_propagate(), which propagates the dirty state to the ancestor nodes of a b-tree that point to a dirty buffer, detects that the origin buffer is not dirty, even though it should be: WARNING: CPU: 0 PID: 5245 at fs/nilfs2/btree.c:2089 nilfs_btree_propagate+0xc79/0xdf0 fs/nilfs2/btree.c:2089 ... Call Trace: nilfs_bmap_propagate+0x75/0x120 fs/nilfs2/bmap.c:345 nilfs_collect_file_data+0x4d/0xd0 fs/nilfs2/segment.c:587 nilfs_segctor_apply_buffers+0x184/0x340 fs/nilfs2/segment.c:1006 nilfs_segctor_scan_file+0x28c/0xa50 fs/nilfs2/segment.c:1045 nilfs_segctor_collect_blocks fs/nilfs2/segment.c:1216 [inline] nilfs_segctor_collect fs/nilfs2/segment.c:1540 [inline] nilfs_segctor_do_construct+0x1c28/0x6b90 fs/nilfs2/segment.c:2115 nilfs_segctor_construct+0x181/0x6b0 fs/nilfs2/segment.c:2479 nilfs_segctor_thread_construct fs/nilfs2/segment.c:2587 [inline] nilfs_segctor_thread+0x69e/0xe80 fs/nilfs2/segment.c:2701 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Both of these issues are caused by the callbacks that handle the page/folio write requests, forcibly clear various states, including the working state of the buffers they hold, at unexpected times when they detect read-only fallback. Fix these issues by checking if the buffer is referenced before clearing the page/folio state, and skipping the clear if it is. Link: https://lkml.kernel.org/r/20250107200202.6432-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20250107200202.6432-2-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: syzbot+b2b14916b77acf8626d7@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b2b14916b77acf8626d7 Reported-by: syzbot+d98fd19acd08b36ff422@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?extid=d98fd19acd08b36ff422 Fixes: 8c26c4e2694a ("nilfs2: fix issue with flush kernel thread after remount in RO mode because of driver's internal error or metadata corruption") Tested-by: syzbot+b2b14916b77acf8626d7@syzkaller.appspotmail.com Signed-off-by: Andrew Morton --- fs/nilfs2/page.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 9de2a494a069..899686d2e5f7 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -392,6 +392,11 @@ void nilfs_clear_dirty_pages(struct address_space *mapping) /** * nilfs_clear_folio_dirty - discard dirty folio * @folio: dirty folio that will be discarded + * + * nilfs_clear_folio_dirty() clears working states including dirty state for + * the folio and its buffers. If the folio has buffers, clear only if it is + * confirmed that none of the buffer heads are busy (none have valid + * references and none are locked). */ void nilfs_clear_folio_dirty(struct folio *folio) { @@ -399,10 +404,6 @@ void nilfs_clear_folio_dirty(struct folio *folio) BUG_ON(!folio_test_locked(folio)); - folio_clear_uptodate(folio); - folio_clear_mappedtodisk(folio); - folio_clear_checked(folio); - head = folio_buffers(folio); if (head) { const unsigned long clear_bits = @@ -410,6 +411,25 @@ void nilfs_clear_folio_dirty(struct folio *folio) BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) | BIT(BH_Delay)); + bool busy, invalidated = false; + +recheck_buffers: + busy = false; + bh = head; + do { + if (atomic_read(&bh->b_count) | buffer_locked(bh)) { + busy = true; + break; + } + } while (bh = bh->b_this_page, bh != head); + + if (busy) { + if (invalidated) + return; + invalidate_bh_lrus(); + invalidated = true; + goto recheck_buffers; + } bh = head; do { @@ -419,6 +439,9 @@ void nilfs_clear_folio_dirty(struct folio *folio) } while (bh = bh->b_this_page, bh != head); } + folio_clear_uptodate(folio); + folio_clear_mappedtodisk(folio); + folio_clear_checked(folio); __nilfs_clear_folio_dirty(folio); } -- 2.51.0