From 27773ce1776279ed3220a34d2e6bfcecaee7fc66 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 12 Mar 2025 08:38:49 +0100
Subject: [PATCH 01/16] Revert "xfs: add pre-content fsnotify hook for DAX
 faults"

This reverts commit 7f4796a46571ced5d3d5b0942e1bfea1eedaaecd.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20250312073852.2123409-4-amir73il@gmail.com
---
 fs/xfs/xfs_file.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f7a7d89c345e..9a435b1ff264 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1451,9 +1451,6 @@ xfs_dax_read_fault(
 
 	trace_xfs_read_fault(ip, order);
 
-	ret = filemap_fsnotify_fault(vmf);
-	if (unlikely(ret))
-		return ret;
 	xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
 	ret = xfs_dax_fault_locked(vmf, order, false);
 	xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
@@ -1482,16 +1479,6 @@ xfs_write_fault(
 	vm_fault_t		ret;
 
 	trace_xfs_write_fault(ip, order);
-	/*
-	 * Usually we get here from ->page_mkwrite callback but in case of DAX
-	 * we will get here also for ordinary write fault. Handle HSM
-	 * notifications for that case.
-	 */
-	if (IS_DAX(inode)) {
-		ret = filemap_fsnotify_fault(vmf);
-		if (unlikely(ret))
-			return ret;
-	}
 
 	sb_start_pagefault(inode->i_sb);
 	file_update_time(vmf->vma->vm_file);
-- 
2.51.0


From 955fbe0ef19df4197595a98d0906c94025c4beef Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 12 Mar 2025 08:38:50 +0100
Subject: [PATCH 02/16] Revert "fsnotify: generate pre-content permission event
 on page fault"

This reverts commit 8392bc2ff8c8bf7c4c5e6dfa71ccd893a3c046f6.

In the use case of buffered write whose input buffer is mmapped file on a
filesystem with a pre-content mark, the prefaulting of the buffer can
happen under the filesystem freeze protection (obtained in vfs_write())
which breaks assumptions of pre-content hook and introduces potential
deadlock of HSM handler in userspace with filesystem freezing.

Now that we have pre-content hooks at file mmap() time, disable the
pre-content event hooks on page fault to avoid the potential deadlock.

Reported-by: syzbot+7229071b47908b19d5b7@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-fsdevel/7ehxrhbvehlrjwvrduoxsao5k3x4aw275patsb3krkwuq573yv@o2hskrfawbnc/
Fixes: 8392bc2ff8c8 ("fsnotify: generate pre-content permission event on page fault")
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20250312073852.2123409-5-amir73il@gmail.com
---
 include/linux/mm.h |  1 -
 mm/filemap.c       | 74 ----------------------------------------------
 mm/nommu.c         |  7 -----
 3 files changed, 82 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7b1068ddcbb7..8483e09aeb2c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3420,7 +3420,6 @@ extern vm_fault_t filemap_fault(struct vm_fault *vmf);
 extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
 		pgoff_t start_pgoff, pgoff_t end_pgoff);
 extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
-extern vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf);
 
 extern unsigned long stack_guard_gap;
 /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
diff --git a/mm/filemap.c b/mm/filemap.c
index 2974691fdfad..ff5fcdd96136 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -47,7 +47,6 @@
 #include <linux/splice.h>
 #include <linux/rcupdate_wait.h>
 #include <linux/sched/mm.h>
-#include <linux/fsnotify.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include "internal.h"
@@ -3336,48 +3335,6 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf)
 	return ret;
 }
 
-/**
- * filemap_fsnotify_fault - maybe emit a pre-content event.
- * @vmf:	struct vm_fault containing details of the fault.
- *
- * If we have a pre-content watch on this file we will emit an event for this
- * range.  If we return anything the fault caller should return immediately, we
- * will return VM_FAULT_RETRY if we had to emit an event, which will trigger the
- * fault again and then the fault handler will run the second time through.
- *
- * Return: a bitwise-OR of %VM_FAULT_ codes, 0 if nothing happened.
- */
-vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf)
-{
-	struct file *fpin = NULL;
-	int mask = (vmf->flags & FAULT_FLAG_WRITE) ? MAY_WRITE : MAY_ACCESS;
-	loff_t pos = vmf->pgoff >> PAGE_SHIFT;
-	size_t count = PAGE_SIZE;
-	int err;
-
-	/*
-	 * We already did this and now we're retrying with everything locked,
-	 * don't emit the event and continue.
-	 */
-	if (vmf->flags & FAULT_FLAG_TRIED)
-		return 0;
-
-	/* No watches, we're done. */
-	if (likely(!FMODE_FSNOTIFY_HSM(vmf->vma->vm_file->f_mode)))
-		return 0;
-
-	fpin = maybe_unlock_mmap_for_io(vmf, fpin);
-	if (!fpin)
-		return VM_FAULT_SIGBUS;
-
-	err = fsnotify_file_area_perm(fpin, mask, &pos, count);
-	fput(fpin);
-	if (err)
-		return VM_FAULT_SIGBUS;
-	return VM_FAULT_RETRY;
-}
-EXPORT_SYMBOL_GPL(filemap_fsnotify_fault);
-
 /**
  * filemap_fault - read in file data for page fault handling
  * @vmf:	struct vm_fault containing details of the fault
@@ -3481,37 +3438,6 @@ retry_find:
 	 * or because readahead was otherwise unable to retrieve it.
 	 */
 	if (unlikely(!folio_test_uptodate(folio))) {
-		/*
-		 * If this is a precontent file we have can now emit an event to
-		 * try and populate the folio.
-		 */
-		if (!(vmf->flags & FAULT_FLAG_TRIED) &&
-		    unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) {
-			loff_t pos = folio_pos(folio);
-			size_t count = folio_size(folio);
-
-			/* We're NOWAIT, we have to retry. */
-			if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) {
-				folio_unlock(folio);
-				goto out_retry;
-			}
-
-			if (mapping_locked)
-				filemap_invalidate_unlock_shared(mapping);
-			mapping_locked = false;
-
-			folio_unlock(folio);
-			fpin = maybe_unlock_mmap_for_io(vmf, fpin);
-			if (!fpin)
-				goto out_retry;
-
-			error = fsnotify_file_area_perm(fpin, MAY_ACCESS, &pos,
-							count);
-			if (error)
-				ret = VM_FAULT_SIGBUS;
-			goto out_retry;
-		}
-
 		/*
 		 * If the invalidate lock is not held, the folio was in cache
 		 * and uptodate and now it is not. Strange but possible since we
diff --git a/mm/nommu.c b/mm/nommu.c
index baa79abdaf03..9cb6e99215e2 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1613,13 +1613,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 }
 EXPORT_SYMBOL(remap_vmalloc_range);
 
-vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf)
-{
-	BUG();
-	return 0;
-}
-EXPORT_SYMBOL_GPL(filemap_fsnotify_fault);
-
 vm_fault_t filemap_fault(struct vm_fault *vmf)
 {
 	BUG();
-- 
2.51.0


From 4f4dc3a9378bc2e6bcf331ee9e65a21abf67e7e0 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 12 Mar 2025 08:38:51 +0100
Subject: [PATCH 03/16] Revert "mm: don't allow huge faults for files with pre
 content watches"

This reverts commit 20bf82a898b65c129af76deb96a1b415d3098a28.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20250312073852.2123409-6-amir73il@gmail.com
---
 mm/memory.c | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index b9661ccfa64f..fb7b8dc75167 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -76,7 +76,6 @@
 #include <linux/ptrace.h>
 #include <linux/vmalloc.h>
 #include <linux/sched/sysctl.h>
-#include <linux/fsnotify.h>
 
 #include <trace/events/kmem.h>
 
@@ -5750,17 +5749,8 @@ out_map:
 static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
-
 	if (vma_is_anonymous(vma))
 		return do_huge_pmd_anonymous_page(vmf);
-	/*
-	 * Currently we just emit PAGE_SIZE for our fault events, so don't allow
-	 * a huge fault if we have a pre content watch on this file.  This would
-	 * be trivial to support, but there would need to be tests to ensure
-	 * this works properly and those don't exist currently.
-	 */
-	if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
-		return VM_FAULT_FALLBACK;
 	if (vma->vm_ops->huge_fault)
 		return vma->vm_ops->huge_fault(vmf, PMD_ORDER);
 	return VM_FAULT_FALLBACK;
@@ -5784,9 +5774,6 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
 	}
 
 	if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
-		/* See comment in create_huge_pmd. */
-		if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
-			goto split;
 		if (vma->vm_ops->huge_fault) {
 			ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER);
 			if (!(ret & VM_FAULT_FALLBACK))
@@ -5809,9 +5796,6 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf)
 	/* No support for anonymous transparent PUD pages yet */
 	if (vma_is_anonymous(vma))
 		return VM_FAULT_FALLBACK;
-	/* See comment in create_huge_pmd. */
-	if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
-		return VM_FAULT_FALLBACK;
 	if (vma->vm_ops->huge_fault)
 		return vma->vm_ops->huge_fault(vmf, PUD_ORDER);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -5829,9 +5813,6 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
 	if (vma_is_anonymous(vma))
 		goto split;
 	if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
-		/* See comment in create_huge_pmd. */
-		if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
-			goto split;
 		if (vma->vm_ops->huge_fault) {
 			ret = vma->vm_ops->huge_fault(vmf, PUD_ORDER);
 			if (!(ret & VM_FAULT_FALLBACK))
-- 
2.51.0


From 252256e416deb255607f0c4a69e7cfec079e5d61 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 12 Mar 2025 08:38:52 +0100
Subject: [PATCH 04/16] Revert "fanotify: disable readahead if we have
 pre-content watches"

This reverts commit fac84846a28c0950d4433118b3dffd44306df62d.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20250312073852.2123409-7-amir73il@gmail.com
---
 mm/filemap.c   | 12 ------------
 mm/readahead.c | 14 --------------
 2 files changed, 26 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index ff5fcdd96136..6d616bb9001e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3197,14 +3197,6 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
 	unsigned long vm_flags = vmf->vma->vm_flags;
 	unsigned int mmap_miss;
 
-	/*
-	 * If we have pre-content watches we need to disable readahead to make
-	 * sure that we don't populate our mapping with 0 filled pages that we
-	 * never emitted an event for.
-	 */
-	if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode)))
-		return fpin;
-
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	/* Use the readahead code, even if readahead is disabled */
 	if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) {
@@ -3273,10 +3265,6 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
 	struct file *fpin = NULL;
 	unsigned int mmap_miss;
 
-	/* See comment in do_sync_mmap_readahead. */
-	if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode)))
-		return fpin;
-
 	/* If we don't want any read-ahead, don't bother */
 	if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)
 		return fpin;
diff --git a/mm/readahead.c b/mm/readahead.c
index 220155a5c964..6a4e96b69702 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -128,7 +128,6 @@
 #include <linux/blk-cgroup.h>
 #include <linux/fadvise.h>
 #include <linux/sched/mm.h>
-#include <linux/fsnotify.h>
 
 #include "internal.h"
 
@@ -558,15 +557,6 @@ void page_cache_sync_ra(struct readahead_control *ractl,
 	unsigned long max_pages, contig_count;
 	pgoff_t prev_index, miss;
 
-	/*
-	 * If we have pre-content watches we need to disable readahead to make
-	 * sure that we don't find 0 filled pages in cache that we never emitted
-	 * events for. Filesystems supporting HSM must make sure to not call
-	 * this function with ractl->file unset for files handled by HSM.
-	 */
-	if (ractl->file && unlikely(FMODE_FSNOTIFY_HSM(ractl->file->f_mode)))
-		return;
-
 	/*
 	 * Even if readahead is disabled, issue this request as readahead
 	 * as we'll need it to satisfy the requested range. The forced
@@ -645,10 +635,6 @@ void page_cache_async_ra(struct readahead_control *ractl,
 	if (!ra->ra_pages)
 		return;
 
-	/* See the comment in page_cache_sync_ra. */
-	if (ractl->file && unlikely(FMODE_FSNOTIFY_HSM(ractl->file->f_mode)))
-		return;
-
 	/*
 	 * Same bit is used for PG_readahead and PG_reclaim.
 	 */
-- 
2.51.0


From 69a5a13a22b1def29dce62b5b7c86e6098c20c68 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Thu, 13 Mar 2025 09:56:07 -0400
Subject: [PATCH 05/16] bcachefs: target_congested -> get_random_u32_below()

get_random_u32_below() has a better algorithm than bch2_rand_range(),
it just didn't exist at the time.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/io_read.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index 726da68073e2..aa91fcf51eec 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -59,7 +59,7 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target)
 	}
 	rcu_read_unlock();
 
-	return bch2_rand_range(nr * CONGESTED_MAX) < total;
+	return get_random_u32_below(nr * CONGESTED_MAX) < total;
 }
 
 #else
-- 
2.51.0


From 9c18ea7ffee090b47afaa7dc41903fb1b436d7bd Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Thu, 13 Mar 2025 11:16:28 -0400
Subject: [PATCH 06/16] bcachefs: bch2_get_random_u64_below()

steal the (clever) algorithm from get_random_u32_below()

this fixes a bug where we were passing roundup_pow_of_two() a 64 bit
number - we're squaring device latencies now:

[  +1.681698] ------------[ cut here ]------------
[  +0.000010] UBSAN: shift-out-of-bounds in ./include/linux/log2.h:57:13
[  +0.000011] shift exponent 64 is too large for 64-bit type 'long unsigned int'
[  +0.000011] CPU: 1 UID: 0 PID: 196 Comm: kworker/u32:13 Not tainted 6.14.0-rc6-dave+ #10
[  +0.000012] Hardware name: ASUS System Product Name/PRIME B460I-PLUS, BIOS 1301 07/13/2021
[  +0.000005] Workqueue: events_unbound __bch2_read_endio [bcachefs]
[  +0.000354] Call Trace:
[  +0.000005]  <TASK>
[  +0.000007]  dump_stack_lvl+0x5d/0x80
[  +0.000018]  ubsan_epilogue+0x5/0x30
[  +0.000008]  __ubsan_handle_shift_out_of_bounds.cold+0x61/0xe6
[  +0.000011]  bch2_rand_range.cold+0x17/0x20 [bcachefs]
[  +0.000231]  bch2_bkey_pick_read_device+0x547/0x920 [bcachefs]
[  +0.000229]  __bch2_read_extent+0x1e4/0x18e0 [bcachefs]
[  +0.000241]  ? bch2_btree_iter_peek_slot+0x3df/0x800 [bcachefs]
[  +0.000180]  ? bch2_read_retry_nodecode+0x270/0x330 [bcachefs]
[  +0.000230]  bch2_read_retry_nodecode+0x270/0x330 [bcachefs]
[  +0.000230]  bch2_rbio_retry+0x1fa/0x600 [bcachefs]
[  +0.000224]  ? bch2_printbuf_make_room+0x71/0xb0 [bcachefs]
[  +0.000243]  ? bch2_read_csum_err+0x4a4/0x610 [bcachefs]
[  +0.000278]  bch2_read_csum_err+0x4a4/0x610 [bcachefs]
[  +0.000227]  ? __bch2_read_endio+0x58b/0x870 [bcachefs]
[  +0.000220]  __bch2_read_endio+0x58b/0x870 [bcachefs]
[  +0.000268]  ? try_to_wake_up+0x31c/0x7f0
[  +0.000011]  ? process_one_work+0x176/0x330
[  +0.000008]  process_one_work+0x176/0x330
[  +0.000008]  worker_thread+0x252/0x390
[  +0.000008]  ? __pfx_worker_thread+0x10/0x10
[  +0.000006]  kthread+0xec/0x230
[  +0.000011]  ? __pfx_kthread+0x10/0x10
[  +0.000009]  ret_from_fork+0x31/0x50
[  +0.000009]  ? __pfx_kthread+0x10/0x10
[  +0.000008]  ret_from_fork_asm+0x1a/0x30
[  +0.000012]  </TASK>
[  +0.000046] ---[ end trace ]---

Reported-by: Roland Vet <vet.roland@protonmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/extents.c |  2 +-
 fs/bcachefs/util.c    | 23 ++++++++++++++---------
 fs/bcachefs/util.h    |  2 +-
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 05d5f71a7ca9..2d8042f853dc 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -99,7 +99,7 @@ static inline bool ptr_better(struct bch_fs *c,
 
 		/* Pick at random, biased in favor of the faster device: */
 
-		return bch2_rand_range(l1 + l2) > l1;
+		return bch2_get_random_u64_below(l1 + l2) > l1;
 	}
 
 	if (bch2_force_reconstruct_read)
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index e0a876cbaa6b..8e3ab4bf79a9 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -653,19 +653,24 @@ int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
 	return 0;
 }
 
-size_t bch2_rand_range(size_t max)
+u64 bch2_get_random_u64_below(u64 ceil)
 {
-	size_t rand;
+	if (ceil <= U32_MAX)
+		return __get_random_u32_below(ceil);
 
-	if (!max)
-		return 0;
+	/* this is the same (clever) algorithm as in __get_random_u32_below() */
+	u64 rand = get_random_u64();
+	u64 mult = ceil * rand;
 
-	do {
-		rand = get_random_long();
-		rand &= roundup_pow_of_two(max) - 1;
-	} while (rand >= max);
+	if (unlikely(mult < ceil)) {
+		u64 bound = -ceil % ceil;
+		while (unlikely(mult < bound)) {
+			rand = get_random_u64();
+			mult = ceil * rand;
+		}
+	}
 
-	return rand;
+	return mul_u64_u64_shr(ceil, rand, 64);
 }
 
 void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src)
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index e7c3541b38f3..f4a4783219d9 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -401,7 +401,7 @@ do {									\
 	_ret;								\
 })
 
-size_t bch2_rand_range(size_t);
+u64 bch2_get_random_u64_below(u64);
 
 void memcpy_to_bio(struct bio *, struct bvec_iter, const void *);
 void memcpy_from_bio(void *, struct bio *, struct bvec_iter);
-- 
2.51.0


From 57e9417f69839cb10f7ffca684c38acd28ceb57b Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sat, 8 Mar 2025 10:50:08 -0500
Subject: [PATCH 07/16] dm-flakey: Fix memory corruption in optional
 corrupt_bio_byte feature

Fix memory corruption due to incorrect parameter being passed to bio_init

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org	# v6.5+
Fixes: 1d9a94389853 ("dm flakey: clone pages on write bio before corrupting them")
---
 drivers/md/dm-flakey.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 731467d4ed10..b690905ab89f 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -426,7 +426,7 @@ static struct bio *clone_bio(struct dm_target *ti, struct flakey_c *fc, struct b
 	if (!clone)
 		return NULL;
 
-	bio_init(clone, fc->dev->bdev, bio->bi_inline_vecs, nr_iovecs, bio->bi_opf);
+	bio_init(clone, fc->dev->bdev, clone->bi_inline_vecs, nr_iovecs, bio->bi_opf);
 
 	clone->bi_iter.bi_sector = flakey_map_sector(ti, bio->bi_iter.bi_sector);
 	clone->bi_private = bio;
-- 
2.51.0


From a2ab25529bbcea51b5e01dded79f45aeb94f644a Mon Sep 17 00:00:00 2001
From: Ajay Kaher <ajay.kaher@broadcom.com>
Date: Thu, 13 Mar 2025 17:31:11 +0000
Subject: [PATCH 08/16] x86/vmware: Parse MP tables for SEV-SNP enabled guests
 under VMware hypervisors

Under VMware hypervisors, SEV-SNP enabled VMs are fundamentally able to boot
without UEFI, but this regressed a year ago due to:

  0f4a1e80989a ("x86/sev: Skip ROM range scans and validation for SEV-SNP guests")

In this case, mpparse_find_mptable() has to be called to parse MP
tables which contains the necessary boot information.

[ mingo: Updated the changelog. ]

Fixes: 0f4a1e80989a ("x86/sev: Skip ROM range scans and validation for SEV-SNP guests")
Co-developed-by: Ye Li <ye.li@broadcom.com>
Signed-off-by: Ye Li <ye.li@broadcom.com>
Signed-off-by: Ajay Kaher <ajay.kaher@broadcom.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Ye Li <ye.li@broadcom.com>
Reviewed-by: Kevin Loughlin <kevinloughlin@google.com>
Acked-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20250313173111.10918-1-ajay.kaher@broadcom.com
---
 arch/x86/kernel/cpu/vmware.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 00189cdeb775..cb3f900c46fc 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -26,6 +26,7 @@
 #include <linux/export.h>
 #include <linux/clocksource.h>
 #include <linux/cpu.h>
+#include <linux/efi.h>
 #include <linux/reboot.h>
 #include <linux/static_call.h>
 #include <asm/div64.h>
@@ -429,6 +430,9 @@ static void __init vmware_platform_setup(void)
 		pr_warn("Failed to get TSC freq from the hypervisor\n");
 	}
 
+	if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !efi_enabled(EFI_BOOT))
+		x86_init.mpparse.find_mptable = mpparse_find_mptable;
+
 	vmware_paravirt_ops_setup();
 
 #ifdef CONFIG_X86_IO_APIC
-- 
2.51.0


From f2865c6300d75a9f187dd7918d248e010970fd44 Mon Sep 17 00:00:00 2001
From: Amit Sunil Dhamne <amitsd@google.com>
Date: Mon, 10 Mar 2025 19:19:07 -0700
Subject: [PATCH 09/16] usb: typec: tcpm: fix state transition for
 SNK_WAIT_CAPABILITIES state in run_state_machine()

A subtle error got introduced while manually fixing merge conflict in
tcpm.c for commit 85c4efbe6088 ("Merge v6.12-rc6 into usb-next"). As a
result of this error, the next state is unconditionally set to
SNK_WAIT_CAPABILITIES_TIMEOUT while handling SNK_WAIT_CAPABILITIES state
in run_state_machine(...).

Fix this by setting new state of TCPM state machine to `upcoming_state`
(that is set to different values based on conditions).

Cc: stable@vger.kernel.org
Fixes: 85c4efbe60888 ("Merge v6.12-rc6 into usb-next")
Signed-off-by: Amit Sunil Dhamne <amitsd@google.com>
Reviewed-by: Badhri Jagan Sridharan <badhri@google.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20250310-fix-snk-wait-timeout-v6-14-rc6-v1-1-5db14475798f@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 6bf1a22c785a..62ca4a0ec55b 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5117,16 +5117,16 @@ static void run_state_machine(struct tcpm_port *port)
 		 */
 		if (port->vbus_never_low) {
 			port->vbus_never_low = false;
-			tcpm_set_state(port, SNK_SOFT_RESET,
-				       port->timings.sink_wait_cap_time);
+			upcoming_state = SNK_SOFT_RESET;
 		} else {
 			if (!port->self_powered)
 				upcoming_state = SNK_WAIT_CAPABILITIES_TIMEOUT;
 			else
 				upcoming_state = hard_reset_state(port);
-			tcpm_set_state(port, SNK_WAIT_CAPABILITIES_TIMEOUT,
-				       port->timings.sink_wait_cap_time);
 		}
+
+		tcpm_set_state(port, upcoming_state,
+			       port->timings.sink_wait_cap_time);
 		break;
 	case SNK_WAIT_CAPABILITIES_TIMEOUT:
 		/*
-- 
2.51.0


From 0b4ffbe4888a2c71185eaf5c1a02dd3586a9bc04 Mon Sep 17 00:00:00 2001
From: Tengda Wu <wutengda@huaweicloud.com>
Date: Fri, 14 Mar 2025 06:53:35 +0000
Subject: [PATCH 10/16] tracing: Correct the refcount if the hist/hist_debug
 file fails to open

The function event_{hist,hist_debug}_open() maintains the refcount of
'file->tr' and 'file' through tracing_open_file_tr(). However, it does
not roll back these counts on subsequent failure paths, resulting in a
refcount leak.

A very obvious case is that if the hist/hist_debug file belongs to a
specific instance, the refcount leak will prevent the deletion of that
instance, as it relies on the condition 'tr->ref == 1' within
__remove_instance().

Fix this by calling tracing_release_file_tr() on all failure paths in
event_{hist,hist_debug}_open() to correct the refcount.

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Zheng Yejian <zhengyejian1@huawei.com>
Link: https://lore.kernel.org/20250314065335.1202817-1-wutengda@huaweicloud.com
Fixes: 1cc111b9cddc ("tracing: Fix uaf issue when open the hist or hist_debug file")
Signed-off-by: Tengda Wu <wutengda@huaweicloud.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index ad7419e24055..53dc6719181e 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -5689,12 +5689,16 @@ static int event_hist_open(struct inode *inode, struct file *file)
 	guard(mutex)(&event_mutex);
 
 	event_file = event_file_data(file);
-	if (!event_file)
-		return -ENODEV;
+	if (!event_file) {
+		ret = -ENODEV;
+		goto err;
+	}
 
 	hist_file = kzalloc(sizeof(*hist_file), GFP_KERNEL);
-	if (!hist_file)
-		return -ENOMEM;
+	if (!hist_file) {
+		ret = -ENOMEM;
+		goto err;
+	}
 
 	hist_file->file = file;
 	hist_file->last_act = get_hist_hit_count(event_file);
@@ -5702,9 +5706,14 @@ static int event_hist_open(struct inode *inode, struct file *file)
 	/* Clear private_data to avoid warning in single_open() */
 	file->private_data = NULL;
 	ret = single_open(file, hist_show, hist_file);
-	if (ret)
+	if (ret) {
 		kfree(hist_file);
+		goto err;
+	}
 
+	return 0;
+err:
+	tracing_release_file_tr(inode, file);
 	return ret;
 }
 
@@ -5979,7 +5988,10 @@ static int event_hist_debug_open(struct inode *inode, struct file *file)
 
 	/* Clear private_data to avoid warning in single_open() */
 	file->private_data = NULL;
-	return single_open(file, hist_debug_show, file);
+	ret = single_open(file, hist_debug_show, file);
+	if (ret)
+		tracing_release_file_tr(inode, file);
+	return ret;
 }
 
 const struct file_operations event_hist_debug_fops = {
-- 
2.51.0


From ca3ac4bf4dc307cea5781dccccf41c1d14c2f82f Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 3 Mar 2025 18:02:32 +0000
Subject: [PATCH 11/16] xfs: Use abs_diff instead of XFS_ABSDIFF

We have a central definition for this function since 2023, used by
a number of different parts of the kernel.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
---
 fs/xfs/libxfs/xfs_alloc.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 3d33e17f2e5c..7839efe050bf 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -33,8 +33,6 @@ struct kmem_cache	*xfs_extfree_item_cache;
 
 struct workqueue_struct *xfs_alloc_wq;
 
-#define XFS_ABSDIFF(a,b)	(((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
-
 #define	XFSA_FIXUP_BNO_OK	1
 #define	XFSA_FIXUP_CNT_OK	2
 
@@ -410,8 +408,8 @@ xfs_alloc_compute_diff(
 		if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
 			if (newlen1 < newlen2 ||
 			    (newlen1 == newlen2 &&
-			     XFS_ABSDIFF(newbno1, wantbno) >
-			     XFS_ABSDIFF(newbno2, wantbno)))
+			     abs_diff(newbno1, wantbno) >
+			     abs_diff(newbno2, wantbno)))
 				newbno1 = newbno2;
 		} else if (newbno2 != NULLAGBLOCK)
 			newbno1 = newbno2;
@@ -427,7 +425,7 @@ xfs_alloc_compute_diff(
 	} else
 		newbno1 = freeend - wantlen;
 	*newbnop = newbno1;
-	return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
+	return newbno1 == NULLAGBLOCK ? 0 : abs_diff(newbno1, wantbno);
 }
 
 /*
-- 
2.51.0


From 03fc0a2dc9f8c292fad8a1bcfb6d1f0dec1824be Mon Sep 17 00:00:00 2001
From: Ike Panhc <ike.pan@canonical.com>
Date: Fri, 14 Mar 2025 12:57:32 +0800
Subject: [PATCH 12/16] MAINTAINERS: Update Ike Panhc's email address
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

I am no longer at Canonical and update with my personal email address.

Signed-off-by: Ike Panhc <ike.pan@canonical.com>
Link: https://lore.kernel.org/r/20250314045732.389973-1-ike.pan@canonical.com
Signed-off-by: Ilpo JÃ¤rvinen <ilpo.jarvinen@linux.intel.com>
---
 .mailmap    | 1 +
 MAINTAINERS | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.mailmap b/.mailmap
index ae0adc499f4a..449c5e3a5d26 100644
--- a/.mailmap
+++ b/.mailmap
@@ -281,6 +281,7 @@ Henrik Rydberg <rydberg@bitmath.org>
 Herbert Xu <herbert@gondor.apana.org.au>
 Huacai Chen <chenhuacai@kernel.org> <chenhc@lemote.com>
 Huacai Chen <chenhuacai@kernel.org> <chenhuacai@loongson.cn>
+Ike Panhc <ikepanhc@gmail.com> <ike.pan@canonical.com>
 J. Bruce Fields <bfields@fieldses.org> <bfields@redhat.com>
 J. Bruce Fields <bfields@fieldses.org> <bfields@citi.umich.edu>
 Jacob Shin <Jacob.Shin@amd.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index 896a307fa065..bb4c069cc1e9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11140,7 +11140,7 @@ S:	Maintained
 F:	drivers/i2c/busses/i2c-icy.c
 
 IDEAPAD LAPTOP EXTRAS DRIVER
-M:	Ike Panhc <ike.pan@canonical.com>
+M:	Ike Panhc <ikepanhc@gmail.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 W:	http://launchpad.net/ideapad-laptop
-- 
2.51.0


From 90fd9ad5b0c981693c8512d9da01f14fb6596e9d Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Fri, 14 Mar 2025 09:54:43 -0400
Subject: [PATCH 13/16] bcachefs: Change btree wb assert to runtime error

We just had a report of the assert for "btree in write buffer for
non-write buffer btree" popping during the 6.14 upgrade.

- 150TB filesystem, after a reboot the upgrade was able to continue from
  where it left off, so no major damage.

But with 6.14 about to come out we want to get this tracked down asap,
and need more data if other users hit this.

Convert the BUG_ON() to an emergency read-only, and print out btree, the
key itself, and stack trace from the original write buffer update (which
did not have this check before).

Reported-by: Stijn Tintel <stijn@linux-ipv6.be>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/btree_update.h       |  8 ++++++++
 fs/bcachefs/btree_write_buffer.c | 21 ++++++++++++++++++++-
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index 8f22ef9a7651..47d8690f01bf 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -126,10 +126,18 @@ bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
 
 int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *);
 
+int bch2_btree_write_buffer_insert_err(struct btree_trans *,
+				       enum btree_id, struct bkey_i *);
+
 static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
 					    enum btree_id btree,
 					    struct bkey_i *k)
 {
+	if (unlikely(!btree_type_uses_write_buffer(btree))) {
+		int ret = bch2_btree_write_buffer_insert_err(trans, btree, k);
+		dump_stack();
+		return ret;
+	}
 	/*
 	 * Most updates skip the btree write buffer until journal replay is
 	 * finished because synchronization with journal replay relies on having
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index b56c4987b8c9..2c09d19dd621 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -264,6 +264,22 @@ out:
 	BUG_ON(wb->sorted.size < wb->flushing.keys.nr);
 }
 
+int bch2_btree_write_buffer_insert_err(struct btree_trans *trans,
+				       enum btree_id btree, struct bkey_i *k)
+{
+	struct bch_fs *c = trans->c;
+	struct printbuf buf = PRINTBUF;
+
+	prt_printf(&buf, "attempting to do write buffer update on non wb btree=");
+	bch2_btree_id_to_text(&buf, btree);
+	prt_str(&buf, "\n");
+	bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+	bch2_fs_inconsistent(c, "%s", buf.buf);
+	printbuf_exit(&buf);
+	return -EROFS;
+}
+
 static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
 {
 	struct bch_fs *c = trans->c;
@@ -312,7 +328,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
 	darray_for_each(wb->sorted, i) {
 		struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx];
 
-		BUG_ON(!btree_type_uses_write_buffer(k->btree));
+		if (unlikely(!btree_type_uses_write_buffer(k->btree))) {
+			ret = bch2_btree_write_buffer_insert_err(trans, k->btree, &k->k);
+			goto err;
+		}
 
 		for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++)
 			prefetch(&wb->flushing.keys.data[n->idx]);
-- 
2.51.0


From 1a2b74d0a2a46c219b25fdb0efcf9cd7f55cfe5e Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Fri, 14 Mar 2025 18:20:20 -0400
Subject: [PATCH 14/16] bcachefs: fix build on 32 bit in get_random_u64_below()

bare 64 bit divides not allowed, whoops

arm-linux-gnueabi-ld: drivers/char/random.o: in function `__get_random_u64_below':
drivers/char/random.c:602:(.text+0xc70): undefined reference to `__aeabi_uldivmod'

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/util.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 8e3ab4bf79a9..da2cd11b3025 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -663,7 +663,8 @@ u64 bch2_get_random_u64_below(u64 ceil)
 	u64 mult = ceil * rand;
 
 	if (unlikely(mult < ceil)) {
-		u64 bound = -ceil % ceil;
+		u64 bound;
+		div64_u64_rem(-ceil, ceil, &bound);
 		while (unlikely(mult < bound)) {
 			rand = get_random_u64();
 			mult = ceil * rand;
-- 
2.51.0


From 4701f33a10702d5fc577c32434eb62adde0a1ae1 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 16 Mar 2025 12:55:17 -1000
Subject: [PATCH 15/16] Linux 6.14-rc7

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1d6a9ec8a2ac..50694bbbf828 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 14
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
 NAME = Baby Opossum Posse
 
 # *DOCUMENTATION*
-- 
2.51.0


From c746ff4a67f4842e90fe232d2c9fc983f4034848 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 17 Mar 2025 09:06:42 +0100
Subject: [PATCH 16/16] pinctrl: spacemit: PINCTRL_SPACEMIT_K1 should not
 default to y unconditionally

Merely enabling compile-testing should not enable additional
functionality.

Fixes: 7ff4faba63571c51 ("pinctrl: spacemit: enable config option")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Yixun Lan <dlan@gentoo.org>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Tested-by: Alex Elder <elder@riscstar.com>
Reviewed-by: Alex Elder <elder@riscstar.com>
Link: https://lore.kernel.org/6881b8d1ad74ac780af8a974e604b5ef3f5d4aad.1742198691.git.geert+renesas@glider.be
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/spacemit/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/spacemit/Kconfig b/drivers/pinctrl/spacemit/Kconfig
index a2f98b3f8a75..d6f6017fd097 100644
--- a/drivers/pinctrl/spacemit/Kconfig
+++ b/drivers/pinctrl/spacemit/Kconfig
@@ -7,7 +7,7 @@ config PINCTRL_SPACEMIT_K1
 	bool "SpacemiT K1 SoC Pinctrl driver"
 	depends on ARCH_SPACEMIT || COMPILE_TEST
 	depends on OF
-	default y
+	default ARCH_SPACEMIT
 	select GENERIC_PINCTRL_GROUPS
 	select GENERIC_PINMUX_FUNCTIONS
 	select GENERIC_PINCONF
-- 
2.51.0