From 2a42754b3104d78a2bc7a2ad8844427411c76ca6 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 3 Feb 2025 23:32:04 +0100
Subject: [PATCH 01/16] fsnotify: disable notification by default for all
 pseudo files

Most pseudo files are not applicable for fsnotify events at all,
let alone to the new pre-content events.

Disable notifications to all files allocated with alloc_file_pseudo()
and enable legacy inotify events for the specific cases of pipe and
socket, which have known users of inotify events.

Pre-content events are also kept disabled for sockets and pipes.

Fixes: 20bf82a898b6 ("mm: don't allow huge faults for files with pre content watches")
Reported-by: Alex Williamson <alex.williamson@redhat.com>
Closes: https://lore.kernel.org/linux-fsdevel/20250131121703.1e4d00a7.alex.williamson@redhat.com/
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/linux-fsdevel/CAHk-=wi2pThSVY=zhO=ZKxViBj5QCRX-=AS2+rVknQgJnHXDFg@mail.gmail.com/
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Link: https://lore.kernel.org/r/20250203223205.861346-3-amir73il@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/file_table.c | 11 +++++++++++
 fs/open.c       |  4 ++--
 fs/pipe.c       |  6 ++++++
 net/socket.c    |  5 +++++
 4 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/fs/file_table.c b/fs/file_table.c
index f0291a66f9db..35b93da6c5cb 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -375,7 +375,13 @@ struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
 	if (IS_ERR(file)) {
 		ihold(inode);
 		path_put(&path);
+		return file;
 	}
+	/*
+	 * Disable all fsnotify events for pseudo files by default.
+	 * They may be enabled by caller with file_set_fsnotify_mode().
+	 */
+	file_set_fsnotify_mode(file, FMODE_NONOTIFY);
 	return file;
 }
 EXPORT_SYMBOL(alloc_file_pseudo);
@@ -400,6 +406,11 @@ struct file *alloc_file_pseudo_noaccount(struct inode *inode,
 		return file;
 	}
 	file_init_path(file, &path, fops);
+	/*
+	 * Disable all fsnotify events for pseudo files by default.
+	 * They may be enabled by caller with file_set_fsnotify_mode().
+	 */
+	file_set_fsnotify_mode(file, FMODE_NONOTIFY);
 	return file;
 }
 EXPORT_SYMBOL_GPL(alloc_file_pseudo_noaccount);
diff --git a/fs/open.c b/fs/open.c
index 3fcbfff8aede..1be20de9f283 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -936,8 +936,8 @@ static int do_dentry_open(struct file *f,
 
 	/*
 	 * Set FMODE_NONOTIFY_* bits according to existing permission watches.
-	 * If FMODE_NONOTIFY was already set for an fanotify fd, this doesn't
-	 * change anything.
+	 * If FMODE_NONOTIFY mode was already set for an fanotify fd or for a
+	 * pseudo file, this call will not change the mode.
 	 */
 	file_set_fsnotify_mode_from_watchers(f);
 	error = fsnotify_open_perm(f);
diff --git a/fs/pipe.c b/fs/pipe.c
index 94b59045ab44..ce1af7592780 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -960,6 +960,12 @@ int create_pipe_files(struct file **res, int flags)
 	res[1] = f;
 	stream_open(inode, res[0]);
 	stream_open(inode, res[1]);
+	/*
+	 * Disable permission and pre-content events, but enable legacy
+	 * inotify events for legacy users.
+	 */
+	file_set_fsnotify_mode(res[0], FMODE_NONOTIFY_PERM);
+	file_set_fsnotify_mode(res[1], FMODE_NONOTIFY_PERM);
 	return 0;
 }
 
diff --git a/net/socket.c b/net/socket.c
index 262a28b59c7f..28bae5a94234 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -479,6 +479,11 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 	sock->file = file;
 	file->private_data = sock;
 	stream_open(SOCK_INODE(sock), file);
+	/*
+	 * Disable permission and pre-content events, but enable legacy
+	 * inotify events for legacy users.
+	 */
+	file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM);
 	return file;
 }
 EXPORT_SYMBOL(sock_alloc_file);
-- 
2.51.0


From 2cc02059fbc79306b53a44b1f1a4444aa3c76598 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 29 Jan 2025 17:06:41 +0100
Subject: [PATCH 02/16] selftests: always check mask returned by statmount(2)

STATMOUNT_MNT_OPTS can actually be missing if there are no options.  This
is a change of behavior since 75ead69a7173 ("fs: don't let statmount return
empty strings").

The other checks shouldn't actually trigger, but add them for correctness
and for easier debugging if the test fails.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Link: https://lore.kernel.org/r/20250129160641.35485-1-mszeredi@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 .../filesystems/statmount/statmount_test.c    | 22 ++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c
index 8eb6aa606a0d..46d289611ce8 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c
@@ -383,6 +383,10 @@ static void test_statmount_mnt_point(void)
 		return;
 	}
 
+	if (!(sm->mask & STATMOUNT_MNT_POINT)) {
+		ksft_test_result_fail("missing STATMOUNT_MNT_POINT in mask\n");
+		return;
+	}
 	if (strcmp(sm->str + sm->mnt_point, "/") != 0) {
 		ksft_test_result_fail("unexpected mount point: '%s' != '/'\n",
 				      sm->str + sm->mnt_point);
@@ -408,6 +412,10 @@ static void test_statmount_mnt_root(void)
 				      strerror(errno));
 		return;
 	}
+	if (!(sm->mask & STATMOUNT_MNT_ROOT)) {
+		ksft_test_result_fail("missing STATMOUNT_MNT_ROOT in mask\n");
+		return;
+	}
 	mnt_root = sm->str + sm->mnt_root;
 	last_root = strrchr(mnt_root, '/');
 	if (last_root)
@@ -437,6 +445,10 @@ static void test_statmount_fs_type(void)
 				      strerror(errno));
 		return;
 	}
+	if (!(sm->mask & STATMOUNT_FS_TYPE)) {
+		ksft_test_result_fail("missing STATMOUNT_FS_TYPE in mask\n");
+		return;
+	}
 	fs_type = sm->str + sm->fs_type;
 	for (s = known_fs; s != NULL; s++) {
 		if (strcmp(fs_type, *s) == 0)
@@ -464,6 +476,11 @@ static void test_statmount_mnt_opts(void)
 		return;
 	}
 
+	if (!(sm->mask & STATMOUNT_MNT_BASIC)) {
+		ksft_test_result_fail("missing STATMOUNT_MNT_BASIC in mask\n");
+		return;
+	}
+
 	while (getline(&line, &len, f_mountinfo) != -1) {
 		int i;
 		char *p, *p2;
@@ -514,7 +531,10 @@ static void test_statmount_mnt_opts(void)
 		if (p2)
 			*p2 = '\0';
 
-		statmount_opts = sm->str + sm->mnt_opts;
+		if (sm->mask & STATMOUNT_MNT_OPTS)
+			statmount_opts = sm->str + sm->mnt_opts;
+		else
+			statmount_opts = "";
 		if (strcmp(statmount_opts, p) != 0)
 			ksft_test_result_fail(
 				"unexpected mount options: '%s' != '%s'\n",
-- 
2.51.0


From 711f9b8fbe4f4936302804e246e206f0829f628f Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 3 Feb 2025 23:32:05 +0100
Subject: [PATCH 03/16] fsnotify: disable pre-content and permission events by
 default

After introducing pre-content events, we had a regression related to
disabling huge faults on files that should never have pre-content events
enabled.

This happened because the default f_mode of allocated files (0) does
not disable pre-content events.

Pre-content events are disabled in file_set_fsnotify_mode_by_watchers()
but internal files may not get to call this helper.

Initialize f_mode to disable permission and pre-content events for all
files and if needed they will be enabled for the callers of
file_set_fsnotify_mode_by_watchers().

Fixes: 20bf82a898b6 ("mm: don't allow huge faults for files with pre content watches")
Reported-by: Alex Williamson <alex.williamson@redhat.com>
Closes: https://lore.kernel.org/linux-fsdevel/20250131121703.1e4d00a7.alex.williamson@redhat.com/
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Link: https://lore.kernel.org/r/20250203223205.861346-4-amir73il@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/file_table.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/file_table.c b/fs/file_table.c
index 35b93da6c5cb..5c00dc38558d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -194,6 +194,11 @@ static int init_file(struct file *f, int flags, const struct cred *cred)
 	 * refcount bumps we should reinitialize the reused file first.
 	 */
 	file_ref_init(&f->f_ref, 1);
+	/*
+	 * Disable permission and pre-content events for all files by default.
+	 * They may be enabled later by file_set_fsnotify_mode_from_watchers().
+	 */
+	file_set_fsnotify_mode(f, FMODE_NONOTIFY_PERM);
 	return 0;
 }
 
-- 
2.51.0


From 091ee63e36e8289f9067f659a48d497911e49d6f Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Tue, 4 Feb 2025 14:51:20 +0100
Subject: [PATCH 04/16] pidfs: improve ioctl handling

Pidfs supports extensible and non-extensible ioctls. The extensible
ioctls need to check for the ioctl number itself not just the ioctl
command otherwise both backward- and forward compatibility are broken.

The pidfs ioctl handler also needs to look at the type of the ioctl
command to guard against cases where "[...] a daemon receives some
random file descriptor from a (potentially less privileged) client and
expects the FD to be of some specific type, it might call ioctl() on
this FD with some type-specific command and expect the call to fail if
the FD is of the wrong type; but due to the missing type check, the
kernel instead performs some action that userspace didn't expect."
(cf. [1]]

Link: https://lore.kernel.org/r/20250204-work-pidfs-ioctl-v1-1-04987d239575@kernel.org
Link: https://lore.kernel.org/r/CAG48ez2K9A5GwtgqO31u9ZL292we8ZwAA=TJwwEv7wRuJ3j4Lw@mail.gmail.com [1]
Fixes: 8ce352818820 ("pidfs: check for valid ioctl commands")
Acked-by: Luca Boccassi <luca.boccassi@gmail.com>
Reported-by: Jann Horn <jannh@google.com>
Cc: stable@vger.kernel.org # v6.13; please backport with 8ce352818820 ("pidfs: check for valid ioctl commands")
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/pidfs.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/fs/pidfs.c b/fs/pidfs.c
index 049352f973de..63f9699ebac3 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -287,7 +287,6 @@ static bool pidfs_ioctl_valid(unsigned int cmd)
 	switch (cmd) {
 	case FS_IOC_GETVERSION:
 	case PIDFD_GET_CGROUP_NAMESPACE:
-	case PIDFD_GET_INFO:
 	case PIDFD_GET_IPC_NAMESPACE:
 	case PIDFD_GET_MNT_NAMESPACE:
 	case PIDFD_GET_NET_NAMESPACE:
@@ -300,6 +299,17 @@ static bool pidfs_ioctl_valid(unsigned int cmd)
 		return true;
 	}
 
+	/* Extensible ioctls require some more careful checks. */
+	switch (_IOC_NR(cmd)) {
+	case _IOC_NR(PIDFD_GET_INFO):
+		/*
+		 * Try to prevent performing a pidfd ioctl when someone
+		 * erronously mistook the file descriptor for a pidfd.
+		 * This is not perfect but will catch most cases.
+		 */
+		return (_IOC_TYPE(cmd) == _IOC_TYPE(PIDFD_GET_INFO));
+	}
+
 	return false;
 }
 
-- 
2.51.0


From 37d11cfc63604b3886308e2111d845d148ced8bc Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Tue, 4 Feb 2025 22:32:07 +0100
Subject: [PATCH 05/16] vfs: sanity check the length passed to
 inode_set_cached_link()

This costs a strlen() call when instatianating a symlink.

Preferably it would be hidden behind VFS_WARN_ON (or compatible), but
there is no such facility at the moment. With the facility in place the
call can be patched out in production kernels.

In the meantime, since the cost is being paid unconditionally, use the
result to a fixup the bad caller.

This is not expected to persist in the long run (tm).

Sample splat:
bad length passed for symlink [/tmp/syz-imagegen43743633/file0/file0] (got 131109, expected 37)
[rest of WARN blurp goes here]

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Link: https://lore.kernel.org/r/20250204213207.337980-1-mjguzik@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7620547432a8..2c3b2f8a621f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -790,6 +790,19 @@ struct inode {
 
 static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen)
 {
+	int testlen;
+
+	/*
+	 * TODO: patch it into a debug-only check if relevant macros show up.
+	 * In the meantime, since we are suffering strlen even on production kernels
+	 * to find the right length, do a fixup if the wrong value got passed.
+	 */
+	testlen = strlen(link);
+	if (testlen != linklen) {
+		WARN_ONCE(1, "bad length passed for symlink [%s] (got %d, expected %d)",
+			  link, linklen, testlen);
+		linklen = testlen;
+	}
 	inode->i_link = link;
 	inode->i_linklen = linklen;
 	inode->i_opflags |= IOP_CACHED_LINK;
-- 
2.51.0


From 511121a48bbd12df4ae50a099a8936e833df8c46 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Wed, 5 Feb 2025 19:42:01 +0100
Subject: [PATCH 06/16] MAINTAINERS: Move Pavel to kernel.org address

I need to filter my emails better, switch to pavel@kernel.org address
to help with that.

Signed-off-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 CREDITS     |  6 ++----
 MAINTAINERS | 10 +++++-----
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/CREDITS b/CREDITS
index 1f9f0f078b4a..53d11a46fd69 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2515,11 +2515,9 @@ D: SLS distribution
 D: Initial implementation of VC's, pty's and select()
 
 N: Pavel Machek
-E: pavel@ucw.cz
+E: pavel@kernel.org
 P: 4096R/92DFCE96 4FA7 9EEF FCD4 C44F C585  B8C7 C060 2241 92DF CE96
-D: Softcursor for vga, hypertech cdrom support, vcsa bugfix, nbd,
-D: sun4/330 port, capabilities for elf, speedup for rm on ext2, USB,
-D: work on suspend-to-ram/disk, killing duplicates from ioctl32,
+D: NBD, Sun4/330 port, USB, work on suspend-to-ram/disk,
 D: Altera SoCFPGA and Nokia N900 support.
 S: Czech Republic
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 873aa2cce4d7..157818de0b55 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9418,7 +9418,7 @@ F:	fs/freevxfs/
 
 FREEZER
 M:	"Rafael J. Wysocki" <rafael@kernel.org>
-M:	Pavel Machek <pavel@ucw.cz>
+M:	Pavel Machek <pavel@kernel.org>
 L:	linux-pm@vger.kernel.org
 S:	Supported
 F:	Documentation/power/freezing-of-tasks.rst
@@ -10253,7 +10253,7 @@ F:	drivers/video/fbdev/hgafb.c
 
 HIBERNATION (aka Software Suspend, aka swsusp)
 M:	"Rafael J. Wysocki" <rafael@kernel.org>
-M:	Pavel Machek <pavel@ucw.cz>
+M:	Pavel Machek <pavel@kernel.org>
 L:	linux-pm@vger.kernel.org
 S:	Supported
 B:	https://bugzilla.kernel.org
@@ -13124,8 +13124,8 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/har
 F:	scripts/leaking_addresses.pl
 
 LED SUBSYSTEM
-M:	Pavel Machek <pavel@ucw.cz>
 M:	Lee Jones <lee@kernel.org>
+M:	Pavel Machek <pavel@kernel.org>
 L:	linux-leds@vger.kernel.org
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lee/leds.git
@@ -16823,7 +16823,7 @@ F:	include/linux/tick.h
 F:	kernel/time/tick*.*
 
 NOKIA N900 CAMERA SUPPORT (ET8EK8 SENSOR, AD5820 FOCUS)
-M:	Pavel Machek <pavel@ucw.cz>
+M:	Pavel Machek <pavel@kernel.org>
 M:	Sakari Ailus <sakari.ailus@iki.fi>
 L:	linux-media@vger.kernel.org
 S:	Maintained
@@ -22849,7 +22849,7 @@ F:	drivers/sh/
 SUSPEND TO RAM
 M:	"Rafael J. Wysocki" <rafael@kernel.org>
 M:	Len Brown <len.brown@intel.com>
-M:	Pavel Machek <pavel@ucw.cz>
+M:	Pavel Machek <pavel@kernel.org>
 L:	linux-pm@vger.kernel.org
 S:	Supported
 B:	https://bugzilla.kernel.org
-- 
2.51.0


From 1b3291f00013c86a9bb349d6158a9a7a4f0334fe Mon Sep 17 00:00:00 2001
From: Hector Martin <marcan@marcan.st>
Date: Fri, 7 Feb 2025 03:21:46 +0900
Subject: [PATCH 07/16] MAINTAINERS: Remove myself

I no longer have any faith left in the kernel development process or
community management approach.

Apple/ARM platform development will continue downstream. If I feel like
sending some patches upstream in the future myself for whatever subtree
I may, or I may not. Anyone who feels like fighting the upstreaming
fight themselves is welcome to do so.

Signed-off-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 157818de0b55..20c9e0871215 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2209,7 +2209,6 @@ F:	sound/soc/codecs/cs42l84.*
 F:	sound/soc/codecs/ssm3515.c
 
 ARM/APPLE MACHINE SUPPORT
-M:	Hector Martin <marcan@marcan.st>
 M:	Sven Peter <sven@svenpeter.dev>
 R:	Alyssa Rosenzweig <alyssa@rosenzweig.io>
 L:	asahi@lists.linux.dev
-- 
2.51.0


From f354fc88a72ae83dacd68370f6fa040e5733bcfe Mon Sep 17 00:00:00 2001
From: WangYuli <wangyuli@uniontech.com>
Date: Fri, 7 Feb 2025 15:08:55 +0800
Subject: [PATCH 08/16] kbuild: install-extmod-build: add missing quotation
 marks for CC variable

While attempting to build a Debian packages with CC="ccache gcc", I
saw the following error as builddeb builds linux-headers-$KERNELVERSION:

  make HOSTCC=ccache gcc VPATH= srcroot=. -f ./scripts/Makefile.build obj=debian/linux-headers-6.14.0-rc1/usr/src/linux-headers-6.14.0-rc1/scripts
  make[6]: *** No rule to make target 'gcc'.  Stop.

Upon investigation, it seems that one instance of $(CC) variable reference
in ./scripts/package/install-extmod-build was missing quotation marks,
causing the above error.

Add the missing quotation marks around $(CC) to fix build.

Fixes: 5f73e7d0386d ("kbuild: refactor cross-compiling linux-headers package")
Co-developed-by: Mingcong Bai <jeffbai@aosc.io>
Signed-off-by: Mingcong Bai <jeffbai@aosc.io>
Tested-by: WangYuli <wangyuli@uniontech.com>
Signed-off-by: WangYuli <wangyuli@uniontech.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/package/install-extmod-build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build
index bb6e23c1174e..b724626ea0ca 100755
--- a/scripts/package/install-extmod-build
+++ b/scripts/package/install-extmod-build
@@ -63,7 +63,7 @@ if [ "${CC}" != "${HOSTCC}" ]; then
 	# Clear VPATH and srcroot because the source files reside in the output
 	# directory.
 	# shellcheck disable=SC2016 # $(MAKE), $(CC), and $(build) will be expanded by Make
-	"${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC=$(CC) VPATH= srcroot=. $(build)='"${destdir}"/scripts
+	"${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC="$(CC)" VPATH= srcroot=. $(build)='"${destdir}"/scripts
 
 	rm -f "${destdir}/scripts/Kbuild"
 fi
-- 
2.51.0


From 8f6629c004b193d23612641c3607e785819e97ab Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Thu, 17 Oct 2024 10:09:22 -0700
Subject: [PATCH 09/16] kbuild: Move -Wenum-enum-conversion to W=2

-Wenum-enum-conversion was strengthened in clang-19 to warn for C, which
caused the kernel to move it to W=1 in commit 75b5ab134bb5 ("kbuild:
Move -Wenum-{compare-conditional,enum-conversion} into W=1") because
there were numerous instances that would break builds with -Werror.
Unfortunately, this is not a full solution, as more and more developers,
subsystems, and distributors are building with W=1 as well, so they
continue to see the numerous instances of this warning.

Since the move to W=1, there have not been many new instances that have
appeared through various build reports and the ones that have appeared
seem to be following similar existing patterns, suggesting that most
instances of this warning will not be real issues. The only alternatives
for silencing this warning are adding casts (which is generally seen as
an ugly practice) or refactoring the enums to macro defines or a unified
enum (which may be undesirable because of type safety in other parts of
the code).

Move the warning to W=2, where warnings that occur frequently but may be
relevant should reside.

Cc: stable@vger.kernel.org
Fixes: 75b5ab134bb5 ("kbuild: Move -Wenum-{compare-conditional,enum-conversion} into W=1")
Link: https://lore.kernel.org/ZwRA9SOcOjjLJcpi@google.com/
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/Makefile.extrawarn | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index eb719f6d8d53..a7003c1e66c7 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -133,7 +133,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast)
 KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare
 KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access)
 KBUILD_CFLAGS += -Wno-enum-compare-conditional
-KBUILD_CFLAGS += -Wno-enum-enum-conversion
 endif
 
 endif
@@ -157,6 +156,10 @@ KBUILD_CFLAGS += -Wno-missing-field-initializers
 KBUILD_CFLAGS += -Wno-type-limits
 KBUILD_CFLAGS += -Wno-shift-negative-value
 
+ifdef CONFIG_CC_IS_CLANG
+KBUILD_CFLAGS += -Wno-enum-enum-conversion
+endif
+
 ifdef CONFIG_CC_IS_GCC
 KBUILD_CFLAGS += -Wno-maybe-uninitialized
 endif
-- 
2.51.0


From c8c9b1d2d5b4377c72a979f5a26e842a869aefc9 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Sat, 8 Feb 2025 00:15:11 -0500
Subject: [PATCH 10/16] fgraph: Fix set_graph_notrace with setting
 TRACE_GRAPH_NOTRACE_BIT

The code was restructured where the function graph notrace code, that
would not trace a function and all its children is done by setting a
NOTRACE flag when the function that is not to be traced is hit.

There's a TRACE_GRAPH_NOTRACE_BIT which defines the bit in the flags and a
TRACE_GRAPH_NOTRACE which is the mask with that bit set. But the
restructuring used TRACE_GRAPH_NOTRACE_BIT when it should have used
TRACE_GRAPH_NOTRACE.

For example:

 # cd /sys/kernel/tracing
 # echo set_track_prepare stack_trace_save  > set_graph_notrace
 # echo function_graph > current_tracer
 # cat trace
[..]
 0)               |                          __slab_free() {
 0)               |                            free_to_partial_list() {
 0)               |                                  arch_stack_walk() {
 0)               |                                    __unwind_start() {
 0)   0.501 us    |                                      get_stack_info();

Where a non filter trace looks like:

 # echo > set_graph_notrace
 # cat trace
 0)               |                            free_to_partial_list() {
 0)               |                              set_track_prepare() {
 0)               |                                stack_trace_save() {
 0)               |                                  arch_stack_walk() {
 0)               |                                    __unwind_start() {

Where the filter should look like:

 # cat trace
 0)               |                            free_to_partial_list() {
 0)               |                              _raw_spin_lock_irqsave() {
 0)   0.350 us    |                                preempt_count_add();
 0)   0.351 us    |                                do_raw_spin_lock();
 0)   2.440 us    |                              }

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://lore.kernel.org/20250208001511.535be150@batman.local.home
Fixes: b84214890a9bc ("function_graph: Move graph notrace bit to shadow stack global var")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_functions_graph.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 54d850997c0a..136c750b0b4d 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -198,7 +198,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
 	 * returning from the function.
 	 */
 	if (ftrace_graph_notrace_addr(trace->func)) {
-		*task_var |= TRACE_GRAPH_NOTRACE_BIT;
+		*task_var |= TRACE_GRAPH_NOTRACE;
 		/*
 		 * Need to return 1 to have the return called
 		 * that will clear the NOTRACE bit.
-- 
2.51.0


From 7585946243d614bd2cd4e13377be2c711c9539e0 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 8 Feb 2025 18:54:28 +0100
Subject: [PATCH 11/16] PM: sleep: core: Restrict power.set_active propagation

Commit 3775fc538f53 ("PM: sleep: core: Synchronize runtime PM status of
parents and children") exposed an issue related to simple_pm_bus_pm_ops
that uses pm_runtime_force_suspend() and pm_runtime_force_resume() as
bus type PM callbacks for the noirq phases of system-wide suspend and
resume.

The problem is that pm_runtime_force_suspend() does not distinguish
runtime-suspended devices from devices for which runtime PM has never
been enabled, so if it sees a device with runtime PM status set to
RPM_ACTIVE, it will assume that runtime PM is enabled for that device
and so it will attempt to suspend it with the help of its runtime PM
callbacks which may not be ready for that.  As it turns out, this
causes simple_pm_bus_runtime_suspend() to crash due to a NULL pointer
dereference.

Another problem related to the above commit and simple_pm_bus_pm_ops is
that setting runtime PM status of a device handled by the latter to
RPM_ACTIVE will actually prevent it from being resumed because
pm_runtime_force_resume() only resumes devices with runtime PM status
set to RPM_SUSPENDED.

To mitigate these issues, do not allow power.set_active to propagate
beyond the parent of the device with DPM_FLAG_SMART_SUSPEND set that
will need to be resumed, which should be a sufficient stop-gap for the
time being, but they will need to be properly addressed in the future
because in general during system-wide resume it is necessary to resume
all devices in a dependency chain in which at least one device is going
to be resumed.

Fixes: 3775fc538f53 ("PM: sleep: core: Synchronize runtime PM status of parents and children")
Closes: https://lore.kernel.org/linux-pm/1c2433d4-7e0f-4395-b841-b8eac7c25651@nvidia.com/
Reported-by: Jon Hunter <jonathanh@nvidia.com>
Tested-by: Johan Hovold <johan+linaro@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://patch.msgid.link/6137505.lOV4Wx5bFT@rjwysocki.net
---
 drivers/base/power/main.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index d497d448e4b2..40e1d8d8a589 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1191,24 +1191,18 @@ static pm_message_t resume_event(pm_message_t sleep_state)
 	return PMSG_ON;
 }
 
-static void dpm_superior_set_must_resume(struct device *dev, bool set_active)
+static void dpm_superior_set_must_resume(struct device *dev)
 {
 	struct device_link *link;
 	int idx;
 
-	if (dev->parent) {
+	if (dev->parent)
 		dev->parent->power.must_resume = true;
-		if (set_active)
-			dev->parent->power.set_active = true;
-	}
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) {
+	list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node)
 		link->supplier->power.must_resume = true;
-		if (set_active)
-			link->supplier->power.set_active = true;
-	}
 
 	device_links_read_unlock(idx);
 }
@@ -1287,9 +1281,12 @@ Skip:
 		dev->power.must_resume = true;
 
 	if (dev->power.must_resume) {
-		dev->power.set_active = dev->power.set_active ||
-			dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND);
-		dpm_superior_set_must_resume(dev, dev->power.set_active);
+		if (dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND)) {
+			dev->power.set_active = true;
+			if (dev->parent && !dev->parent->power.ignore_children)
+				dev->parent->power.set_active = true;
+		}
+		dpm_superior_set_must_resume(dev);
 	}
 
 Complete:
-- 
2.51.0


From a64dcfb451e254085a7daee5fe51bf22959d52d3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 9 Feb 2025 12:45:03 -0800
Subject: [PATCH 12/16] Linux 6.14-rc2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 9e0d63d9d94b..89628e354ca7 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 14
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Baby Opossum Posse
 
 # *DOCUMENTATION*
-- 
2.51.0


From 3617c0ee7decb3db3f230b1c844126575fab4d49 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Thu, 6 Feb 2025 19:14:19 +0000
Subject: [PATCH 13/16] KVM: x86/xen: Only write Xen hypercall page for guest
 writes to MSR

The Xen hypercall page MSR is write-only. When the guest writes an address
to the MSR, the hypervisor populates the referenced page with hypercall
functions.

There is no reason for the host ever to write to the MSR, and it isn't
even readable.

Allowing host writes to trigger the hypercall page allows userspace to
attack the kernel, as kvm_xen_write_hypercall_page() takes multiple
locks and writes to guest memory.  E.g. if userspace sets the MSR to
MSR_IA32_XSS, KVM's write to MSR_IA32_XSS during vCPU creation will
trigger an SRCU violation due to writing guest memory:

  =============================
  WARNING: suspicious RCU usage
  6.13.0-rc3
  -----------------------------
  include/linux/kvm_host.h:1046 suspicious rcu_dereference_check() usage!

  stack backtrace:
  CPU: 6 UID: 1000 PID: 1101 Comm: repro Not tainted 6.13.0-rc3
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
  Call Trace:
   <TASK>
   dump_stack_lvl+0x7f/0x90
   lockdep_rcu_suspicious+0x176/0x1c0
   kvm_vcpu_gfn_to_memslot+0x259/0x280
   kvm_vcpu_write_guest+0x3a/0xa0
   kvm_xen_write_hypercall_page+0x268/0x300
   kvm_set_msr_common+0xc44/0x1940
   vmx_set_msr+0x9db/0x1fc0
   kvm_vcpu_reset+0x857/0xb50
   kvm_arch_vcpu_create+0x37e/0x4d0
   kvm_vm_ioctl+0x669/0x2100
   __x64_sys_ioctl+0xc1/0xf0
   do_syscall_64+0xc5/0x210
   entry_SYSCALL_64_after_hwframe+0x4b/0x53
  RIP: 0033:0x7feda371b539

While the MSR index isn't strictly ABI, i.e. can theoretically float to
any value, in practice no known VMM sets the MSR index to anything other
than 0x40000000 or 0x40000200.

Reported-by: syzbot+cdeaeec70992eca2d920@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/679258d4.050a0220.2eae65.000a.GAE@google.com
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Link: https://lore.kernel.org/r/de0437379dfab11e431a23c8ce41a29234c06cbf.camel@infradead.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/x86.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8e77e61d4fbd..462a5cd6ac4a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3733,7 +3733,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	u32 msr = msr_info->index;
 	u64 data = msr_info->data;
 
-	if (msr && msr == vcpu->kvm->arch.xen_hvm_config.msr)
+	/*
+	 * Do not allow host-initiated writes to trigger the Xen hypercall
+	 * page setup; it could incur locking paths which are not expected
+	 * if userspace sets the MSR in an unusual location.
+	 */
+	if (msr && msr == vcpu->kvm->arch.xen_hvm_config.msr &&
+	    !msr_info->host_initiated)
 		return kvm_xen_write_hypercall_page(vcpu, data);
 
 	switch (msr) {
-- 
2.51.0


From 5c17848134ab1ffb97c4a17a6e25b41390478152 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 14 Feb 2025 17:14:33 -0800
Subject: [PATCH 14/16] KVM: x86/xen: Restrict hypercall MSR to unofficial
 synthetic range

Reject userspace attempts to set the Xen hypercall page MSR to an index
outside of the "standard" virtualization range [0x40000000, 0x4fffffff],
as KVM is not equipped to handle collisions with real MSRs, e.g. KVM
doesn't update MSR interception, conflicts with VMCS/VMCB fields, special
case writes in KVM, etc.

While the MSR index isn't strictly ABI, i.e. can theoretically float to
any value, in practice no known VMM sets the MSR index to anything other
than 0x40000000 or 0x40000200.

Cc: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Paul Durrant <paul@xen.org>
Link: https://lore.kernel.org/r/20250215011437.1203084-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 Documentation/virt/kvm/api.rst  | 4 ++++
 arch/x86/include/uapi/asm/kvm.h | 3 +++
 arch/x86/kvm/xen.c              | 9 +++++++++
 3 files changed, 16 insertions(+)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 2b52eb77e29c..5fe84f2427b5 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -1000,6 +1000,10 @@ blobs in userspace.  When the guest writes the MSR, kvm copies one
 page of a blob (32- or 64-bit, depending on the vcpu mode) to guest
 memory.
 
+The MSR index must be in the range [0x40000000, 0x4fffffff], i.e. must reside
+in the range that is unofficially reserved for use by hypervisors.  The min/max
+values are enumerated via KVM_XEN_MSR_MIN_INDEX and KVM_XEN_MSR_MAX_INDEX.
+
 ::
 
   struct kvm_xen_hvm_config {
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 9e75da97bce0..460306b35a4b 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -559,6 +559,9 @@ struct kvm_x86_mce {
 #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE	(1 << 7)
 #define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA	(1 << 8)
 
+#define KVM_XEN_MSR_MIN_INDEX			0x40000000u
+#define KVM_XEN_MSR_MAX_INDEX			0x4fffffffu
+
 struct kvm_xen_hvm_config {
 	__u32 flags;
 	__u32 msr;
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index a909b817b9c0..5b94825001a7 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -1324,6 +1324,15 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
 	     xhc->blob_size_32 || xhc->blob_size_64))
 		return -EINVAL;
 
+	/*
+	 * Restrict the MSR to the range that is unofficially reserved for
+	 * synthetic, virtualization-defined MSRs, e.g. to prevent confusing
+	 * KVM by colliding with a real MSR that requires special handling.
+	 */
+	if (xhc->msr &&
+	    (xhc->msr < KVM_XEN_MSR_MIN_INDEX || xhc->msr > KVM_XEN_MSR_MAX_INDEX))
+		return -EINVAL;
+
 	mutex_lock(&kvm->arch.xen.xen_lock);
 
 	if (xhc->msr && !kvm->arch.xen_hvm_config.msr)
-- 
2.51.0


From bb0978d95a551b8181469c4442fd59567c9dd999 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 14 Feb 2025 17:14:34 -0800
Subject: [PATCH 15/16] KVM: x86/xen: Add an #ifdef'd helper to detect writes
 to Xen MSR

Add a helper to detect writes to the Xen hypercall page MSR, and provide a
stub for CONFIG_KVM_XEN=n to optimize out the check for kernels built
without Xen support.

Reviewed-by: Paul Durrant <paul@xen.org>
Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
Link: https://lore.kernel.org/r/20250215011437.1203084-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/x86.c |  2 +-
 arch/x86/kvm/xen.h | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 462a5cd6ac4a..12c60adb7349 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3738,7 +3738,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	 * page setup; it could incur locking paths which are not expected
 	 * if userspace sets the MSR in an unusual location.
 	 */
-	if (msr && msr == vcpu->kvm->arch.xen_hvm_config.msr &&
+	if (kvm_xen_is_hypercall_page_msr(vcpu->kvm, msr) &&
 	    !msr_info->host_initiated)
 		return kvm_xen_write_hypercall_page(vcpu, data);
 
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index f5841d9000ae..e92e06926f76 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -56,6 +56,11 @@ static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
 		kvm->arch.xen_hvm_config.msr;
 }
 
+static inline bool kvm_xen_is_hypercall_page_msr(struct kvm *kvm, u32 msr)
+{
+	return msr && msr == kvm->arch.xen_hvm_config.msr;
+}
+
 static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
 {
 	return static_branch_unlikely(&kvm_xen_enabled.key) &&
@@ -124,6 +129,11 @@ static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
 	return false;
 }
 
+static inline bool kvm_xen_is_hypercall_page_msr(struct kvm *kvm, u32 msr)
+{
+	return false;
+}
+
 static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
 {
 	return false;
-- 
2.51.0


From a5d7700af6b036d3b5c17fdf0be3ee38c653aee7 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 14 Feb 2025 17:14:35 -0800
Subject: [PATCH 16/16] KVM: x86/xen: Consult kvm_xen_enabled when checking for
 Xen MSR writes

Query kvm_xen_enabled when detecting writes to the Xen hypercall page MSR
so that the check is optimized away in the likely scenario that Xen isn't
enabled for the VM.

Deliberately open code the check instead of using kvm_xen_msr_enabled() in
order to avoid a double load of xen_hvm_config.msr (which is admittedly
rather pointless given the widespread lack of READ_ONCE() usage on the
plethora of vCPU-scoped accesses to kvm->arch.xen state).

No functional change intended.

Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Paul Durrant <paul@xen.org>
Link: https://lore.kernel.org/r/20250215011437.1203084-4-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/xen.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index e92e06926f76..1e3a913dfb94 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -58,6 +58,9 @@ static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
 
 static inline bool kvm_xen_is_hypercall_page_msr(struct kvm *kvm, u32 msr)
 {
+	if (!static_branch_unlikely(&kvm_xen_enabled.key))
+		return false;
+
 	return msr && msr == kvm->arch.xen_hvm_config.msr;
 }
 
-- 
2.51.0