From b1b46751671be5a426982f037a47ae05f37ff80b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 18 Oct 2024 09:50:05 -0700
Subject: [PATCH 01/16] mm: fix follow_pfnmap API lockdep assert

The lockdep asserts for the new follow_pfnmap() API "knows" that a
pfnmap always has a vma->vm_file, since that's the only way to create
such a mapping.

And that's actually true for all the normal cases.  But not for the mmap
failure case, where the incomplete mapping is torn down and we have
cleared vma->vm_file because the failure occured before the file was
linked to the vma.

So this codepath does actually need to check for vm_file being NULL.

Reported-by: Jann Horn <jannh@google.com>
Fixes: 6da8e9634bb7 ("mm: new follow_pfnmap API")
Cc: Peter Xu <peterx@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/memory.c b/mm/memory.c
index 30feedabc932..3ccee51adfbb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6350,7 +6350,8 @@ static inline void pfnmap_args_setup(struct follow_pfnmap_args *args,
 static inline void pfnmap_lockdep_assert(struct vm_area_struct *vma)
 {
 #ifdef CONFIG_LOCKDEP
-	struct address_space *mapping = vma->vm_file->f_mapping;
+	struct file *file = vma->vm_file;
+	struct address_space *mapping = file ? file->f_mapping : NULL;
 
 	if (mapping)
 		lockdep_assert(lockdep_is_held(&vma->vm_file->f_mapping->i_mmap_rwsem) ||
-- 
2.51.0


From f40998a8e6bbf0314b8416350183a537f9b59ca9 Mon Sep 17 00:00:00 2001
From: Luca Boccassi <bluca@debian.org>
Date: Fri, 27 Sep 2024 10:23:44 +0200
Subject: [PATCH 02/16] ipe: fallback to platform keyring also if key in
 trusted keyring is rejected

If enabled, we fallback to the platform keyring if the trusted keyring
doesn't have the key used to sign the ipe policy. But if pkcs7_verify()
rejects the key for other reasons, such as usage restrictions, we do not
fallback. Do so, following the same change in dm-verity.

Signed-off-by: Luca Boccassi <bluca@debian.org>
Suggested-by: Serge Hallyn <serge@hallyn.com>
[FW: fixed some line length issues and a typo in the commit message]
Signed-off-by: Fan Wu <wufan@kernel.org>
---
 security/ipe/policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/ipe/policy.c b/security/ipe/policy.c
index 45f7d6a0ed23..b628f696e32b 100644
--- a/security/ipe/policy.c
+++ b/security/ipe/policy.c
@@ -178,7 +178,7 @@ struct ipe_policy *ipe_new_policy(const char *text, size_t textlen,
 					    VERIFYING_UNSPECIFIED_SIGNATURE,
 					    set_pkcs7_data, new);
 #ifdef CONFIG_IPE_POLICY_SIG_PLATFORM_KEYRING
-		if (rc == -ENOKEY)
+		if (rc == -ENOKEY || rc == -EKEYREJECTED)
 			rc = verify_pkcs7_signature(NULL, 0, new->pkcs7, pkcs7len,
 						    VERIFY_USE_PLATFORM_KEYRING,
 						    VERIFYING_UNSPECIFIED_SIGNATURE,
-- 
2.51.0


From 917a15c37d371bc40b5ad13df366e29bd49c04a1 Mon Sep 17 00:00:00 2001
From: Fan Wu <wufan@kernel.org>
Date: Wed, 16 Oct 2024 16:43:05 -0700
Subject: [PATCH 03/16] MAINTAINERS: update IPE tree url and Fan Wu's email

Update Integrity Policy Enforcement (IPE) LSM tree url and
maintainer's email to the newly issued kernel.org tree/email.

Signed-off-by: Fan Wu <wufan@kernel.org>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 7ad507f49324..33b158cf52b4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11283,10 +11283,10 @@ F:	security/integrity/
 F:	security/integrity/ima/
 
 INTEGRITY POLICY ENFORCEMENT (IPE)
-M:	Fan Wu <wufan@linux.microsoft.com>
+M:	Fan Wu <wufan@kernel.org>
 L:	linux-security-module@vger.kernel.org
 S:	Supported
-T:	git https://github.com/microsoft/ipe.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/wufan/ipe.git
 F:	Documentation/admin-guide/LSM/ipe.rst
 F:	Documentation/security/ipe.rst
 F:	scripts/ipe/
-- 
2.51.0


From 22a18935d7d96bbb1a28076f843c1926d0ba189e Mon Sep 17 00:00:00 2001
From: John Edwards <uejji@uejji.net>
Date: Thu, 10 Oct 2024 23:09:23 +0000
Subject: [PATCH 04/16] Input: xpad - add support for MSI Claw A1M

Add MSI Claw A1M controller to xpad_device match table when in xinput mode.
Add MSI VID as XPAD_XBOX360_VENDOR.

Signed-off-by: John Edwards <uejji@uejji.net>
Reviewed-by: Derek J. Clark <derekjohn.clark@gmail.com>
Reviewed-by: Christopher Snowhill <kode54@gmail.com>
Link: https://lore.kernel.org/r/20241010232020.3292284-4-uejji@uejji.net
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/joystick/xpad.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 30b4cca8b69f..22ea58bf76cb 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -218,6 +218,7 @@ static const struct xpad_device {
 	{ 0x0c12, 0x8810, "Zeroplus Xbox Controller", 0, XTYPE_XBOX },
 	{ 0x0c12, 0x9902, "HAMA VibraX - *FAULTY HARDWARE*", 0, XTYPE_XBOX },
 	{ 0x0d2f, 0x0002, "Andamiro Pump It Up pad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
+	{ 0x0db0, 0x1901, "Micro Star International Xbox360 Controller for Windows", 0, XTYPE_XBOX360 },
 	{ 0x0e4c, 0x1097, "Radica Gamester Controller", 0, XTYPE_XBOX },
 	{ 0x0e4c, 0x1103, "Radica Gamester Reflex", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX },
 	{ 0x0e4c, 0x2390, "Radica Games Jtech Controller", 0, XTYPE_XBOX },
@@ -493,6 +494,7 @@ static const struct usb_device_id xpad_table[] = {
 	XPAD_XBOX360_VENDOR(0x07ff),		/* Mad Catz Gamepad */
 	XPAD_XBOXONE_VENDOR(0x0b05),		/* ASUS controllers */
 	XPAD_XBOX360_VENDOR(0x0c12),		/* Zeroplus X-Box 360 controllers */
+	XPAD_XBOX360_VENDOR(0x0db0),		/* Micro Star International X-Box 360 controllers */
 	XPAD_XBOX360_VENDOR(0x0e6f),		/* 0x0e6f Xbox 360 controllers */
 	XPAD_XBOXONE_VENDOR(0x0e6f),		/* 0x0e6f Xbox One controllers */
 	XPAD_XBOX360_VENDOR(0x0f0d),		/* Hori controllers */
-- 
2.51.0


From 2de01e0e57f3ebe7f90b08f6bca5ce0f3da3829f Mon Sep 17 00:00:00 2001
From: Nikita Travkin <nikita@trvn.ru>
Date: Fri, 4 Oct 2024 21:17:30 +0500
Subject: [PATCH 05/16] Input: zinitix - don't fail if linux,keycodes prop is
 absent

When initially adding the touchkey support, a mistake was made in the
property parsing code. The possible negative errno from
device_property_count_u32() was never checked, which was an oversight
left from converting to it from the of_property as part of the review
fixes.

Re-add the correct handling of the absent property, in which case zero
touchkeys should be assumed, which would disable the feature.

Reported-by: Jakob Hauser <jahau@rocketmail.com>
Tested-by: Jakob Hauser <jahau@rocketmail.com>
Fixes: 075d9b22c8fe ("Input: zinitix - add touchkey support")
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Nikita Travkin <nikita@trvn.ru>
Tested-by: Yassine Oudjana <y.oudjana@protonmail.com>
Link: https://lore.kernel.org/r/20241004-zinitix-no-keycodes-v2-1-876dc9fea4b6@trvn.ru
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/zinitix.c | 34 +++++++++++++++++++----------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/drivers/input/touchscreen/zinitix.c b/drivers/input/touchscreen/zinitix.c
index 52b3950460e2..716d6fa60f86 100644
--- a/drivers/input/touchscreen/zinitix.c
+++ b/drivers/input/touchscreen/zinitix.c
@@ -645,19 +645,29 @@ static int zinitix_ts_probe(struct i2c_client *client)
 		return error;
 	}
 
-	bt541->num_keycodes = device_property_count_u32(&client->dev, "linux,keycodes");
-	if (bt541->num_keycodes > ARRAY_SIZE(bt541->keycodes)) {
-		dev_err(&client->dev, "too many keys defined (%d)\n", bt541->num_keycodes);
-		return -EINVAL;
-	}
+	if (device_property_present(&client->dev, "linux,keycodes")) {
+		bt541->num_keycodes = device_property_count_u32(&client->dev,
+								"linux,keycodes");
+		if (bt541->num_keycodes < 0) {
+			dev_err(&client->dev, "Failed to count keys (%d)\n",
+				bt541->num_keycodes);
+			return bt541->num_keycodes;
+		} else if (bt541->num_keycodes > ARRAY_SIZE(bt541->keycodes)) {
+			dev_err(&client->dev, "Too many keys defined (%d)\n",
+				bt541->num_keycodes);
+			return -EINVAL;
+		}
 
-	error = device_property_read_u32_array(&client->dev, "linux,keycodes",
-					       bt541->keycodes,
-					       bt541->num_keycodes);
-	if (error) {
-		dev_err(&client->dev,
-			"Unable to parse \"linux,keycodes\" property: %d\n", error);
-		return error;
+		error = device_property_read_u32_array(&client->dev,
+						       "linux,keycodes",
+						       bt541->keycodes,
+						       bt541->num_keycodes);
+		if (error) {
+			dev_err(&client->dev,
+				"Unable to parse \"linux,keycodes\" property: %d\n",
+				error);
+			return error;
+		}
 	}
 
 	error = zinitix_init_input_dev(bt541);
-- 
2.51.0


From 2c02f7375e658ae93d57a31a66f91b62754ef8f1 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 18 Oct 2024 21:43:00 -0400
Subject: [PATCH 06/16] fgraph: Use CPU hotplug mechanism to initialize idle
 shadow stacks

The function graph infrastructure allocates a shadow stack for every task
when enabled. This includes the idle tasks. The first time the function
graph is invoked, the shadow stacks are created and never freed until the
task exits. This includes the idle tasks.

Only the idle tasks that were for online CPUs had their shadow stacks
created when function graph tracing started. If function graph tracing is
enabled and a CPU comes online, the idle task representing that CPU will
not have its shadow stack created, and all function graph tracing for that
idle task will be silently dropped.

Instead, use the CPU hotplug mechanism to allocate the idle shadow stacks.
This will include idle tasks for CPUs that come online during tracing.

This issue can be reproduced by:

 # cd /sys/kernel/tracing
 # echo 0 > /sys/devices/system/cpu/cpu1/online
 # echo 0 > set_ftrace_pid
 # echo function_graph > current_tracer
 # echo 1 > options/funcgraph-proc
 # echo 1 > /sys/devices/system/cpu/cpu1
 # grep '<idle>' per_cpu/cpu1/trace | head

Before, nothing would show up.

After:
 1)    <idle>-0    |   0.811 us    |                        __enqueue_entity();
 1)    <idle>-0    |   5.626 us    |                      } /* enqueue_entity */
 1)    <idle>-0    |               |                      dl_server_update_idle_time() {
 1)    <idle>-0    |               |                        dl_scaled_delta_exec() {
 1)    <idle>-0    |   0.450 us    |                          arch_scale_cpu_capacity();
 1)    <idle>-0    |   1.242 us    |                        }
 1)    <idle>-0    |   1.908 us    |                      }
 1)    <idle>-0    |               |                      dl_server_start() {
 1)    <idle>-0    |               |                        enqueue_dl_entity() {
 1)    <idle>-0    |               |                          task_contending() {

Note, if tracing stops and restarts, the old way would then initialize
the onlined CPUs.

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/20241018214300.6df82178@rorschach
Fixes: 868baf07b1a25 ("ftrace: Fix memory leak with function graph and cpu hotplug")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/fgraph.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index d7d4fb403f6f..43f4e3f57438 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -1160,19 +1160,13 @@ void fgraph_update_pid_func(void)
 static int start_graph_tracing(void)
 {
 	unsigned long **ret_stack_list;
-	int ret, cpu;
+	int ret;
 
 	ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
 
 	if (!ret_stack_list)
 		return -ENOMEM;
 
-	/* The cpu_boot init_task->ret_stack will never be freed */
-	for_each_online_cpu(cpu) {
-		if (!idle_task(cpu)->ret_stack)
-			ftrace_graph_init_idle_task(idle_task(cpu), cpu);
-	}
-
 	do {
 		ret = alloc_retstack_tasklist(ret_stack_list);
 	} while (ret == -EAGAIN);
@@ -1242,14 +1236,34 @@ static void ftrace_graph_disable_direct(bool disable_branch)
 	fgraph_direct_gops = &fgraph_stub;
 }
 
+/* The cpu_boot init_task->ret_stack will never be freed */
+static int fgraph_cpu_init(unsigned int cpu)
+{
+	if (!idle_task(cpu)->ret_stack)
+		ftrace_graph_init_idle_task(idle_task(cpu), cpu);
+	return 0;
+}
+
 int register_ftrace_graph(struct fgraph_ops *gops)
 {
+	static bool fgraph_initialized;
 	int command = 0;
 	int ret = 0;
 	int i = -1;
 
 	mutex_lock(&ftrace_lock);
 
+	if (!fgraph_initialized) {
+		ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "fgraph_idle_init",
+					fgraph_cpu_init, NULL);
+		if (ret < 0) {
+			pr_warn("fgraph: Error to init cpu hotplug support\n");
+			return ret;
+		}
+		fgraph_initialized = true;
+		ret = 0;
+	}
+
 	if (!fgraph_array[0]) {
 		/* The array must always have real data on it */
 		for (i = 0; i < FGRAPH_ARRAY_SIZE; i++)
-- 
2.51.0


From fae4078c289a2f24229c0de652249948b1cd6bdb Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 18 Oct 2024 21:52:12 -0400
Subject: [PATCH 07/16] fgraph: Allocate ret_stack_list with proper size

The ret_stack_list is an array of ret_stack shadow stacks for the function
graph usage. When the first function graph is enabled, all tasks in the
system get a shadow stack. The ret_stack_list is a 32 element array of
pointers to these shadow stacks. It allocates the shadow stack in batches
(32 stacks at a time), assigns them to running tasks, and continues until
all tasks are covered.

When the function graph shadow stack changed from an array of
ftrace_ret_stack structures to an array of longs, the allocation of
ret_stack_list went from allocating an array of 32 elements to just a
block defined by SHADOW_STACK_SIZE. Luckily, that's defined as PAGE_SIZE
and is much more than enough to hold 32 pointers. But it is way overkill
for the amount needed to allocate.

Change the allocation of ret_stack_list back to a kcalloc() of
FTRACE_RETSTACK_ALLOC_SIZE pointers.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://lore.kernel.org/20241018215212.23f13f40@rorschach
Fixes: 42675b723b484 ("function_graph: Convert ret_stack to a series of longs")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/fgraph.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 43f4e3f57438..41e7a15dcb50 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -1162,7 +1162,8 @@ static int start_graph_tracing(void)
 	unsigned long **ret_stack_list;
 	int ret;
 
-	ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
+	ret_stack_list = kcalloc(FTRACE_RETSTACK_ALLOC_SIZE,
+				 sizeof(*ret_stack_list), GFP_KERNEL);
 
 	if (!ret_stack_list)
 		return -ENOMEM;
-- 
2.51.0


From ae6a888a4357131c01d85f4c91fb32552dd0bf70 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 19 Oct 2024 09:16:51 -0600
Subject: [PATCH 08/16] io_uring/rw: fix wrong NOWAIT check in
 io_rw_init_file()

A previous commit improved how !FMODE_NOWAIT is dealt with, but
inadvertently negated a check whilst doing so. This caused -EAGAIN to be
returned from reading files with O_NONBLOCK set. Fix up the check for
REQ_F_SUPPORT_NOWAIT.

Reported-by: Julian Orth <ju.orth@gmail.com>
Link: https://github.com/axboe/liburing/issues/1270
Fixes: f7c913438533 ("io_uring/rw: allow pollable non-blocking attempts for !FMODE_NOWAIT")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/rw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/io_uring/rw.c b/io_uring/rw.c
index 80ae3c2ebb70..354c4e175654 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -807,7 +807,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
 	 * reliably. If not, or it IOCB_NOWAIT is set, don't retry.
 	 */
 	if (kiocb->ki_flags & IOCB_NOWAIT ||
-	    ((file->f_flags & O_NONBLOCK && (req->flags & REQ_F_SUPPORT_NOWAIT))))
+	    ((file->f_flags & O_NONBLOCK && !(req->flags & REQ_F_SUPPORT_NOWAIT))))
 		req->flags |= REQ_F_NOWAIT;
 
 	if (ctx->flags & IORING_SETUP_IOPOLL) {
-- 
2.51.0


From 42f7652d3eb527d03665b09edac47f85fb600924 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 20 Oct 2024 15:19:38 -0700
Subject: [PATCH 09/16] Linux 6.12-rc4

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 8cf3cf528892..a9a7d9ffaa98 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 12
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Baby Opossum Posse
 
 # *DOCUMENTATION*
-- 
2.51.0


From 165126dc5e23979721122dc5c7cfb28b1ca234cc Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 21 Sep 2024 01:59:47 -0600
Subject: [PATCH 10/16] io_uring/eventfd: abstract out ev_fd put helper

We call this in two spot, have a helper for it. In preparation for
extending this part.

Link: https://lore.kernel.org/r/20240921080307.185186-2-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/eventfd.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index e37fddd5d9ce..8b628ab6bbff 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -41,6 +41,12 @@ static void io_eventfd_do_signal(struct rcu_head *rcu)
 		io_eventfd_free(rcu);
 }
 
+static void io_eventfd_put(struct io_ev_fd *ev_fd)
+{
+	if (refcount_dec_and_test(&ev_fd->refs))
+		call_rcu(&ev_fd->rcu, io_eventfd_free);
+}
+
 void io_eventfd_signal(struct io_ring_ctx *ctx)
 {
 	struct io_ev_fd *ev_fd = NULL;
@@ -77,8 +83,7 @@ void io_eventfd_signal(struct io_ring_ctx *ctx)
 		}
 	}
 out:
-	if (refcount_dec_and_test(&ev_fd->refs))
-		call_rcu(&ev_fd->rcu, io_eventfd_free);
+	io_eventfd_put(ev_fd);
 }
 
 void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
@@ -152,8 +157,7 @@ int io_eventfd_unregister(struct io_ring_ctx *ctx)
 	if (ev_fd) {
 		ctx->has_evfd = false;
 		rcu_assign_pointer(ctx->io_ev_fd, NULL);
-		if (refcount_dec_and_test(&ev_fd->refs))
-			call_rcu(&ev_fd->rcu, io_eventfd_free);
+		io_eventfd_put(ev_fd);
 		return 0;
 	}
 
-- 
2.51.0


From 3c90b80df5b574c2c61626fd40fa3b23be21fa26 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 21 Sep 2024 01:59:48 -0600
Subject: [PATCH 11/16] io_uring/eventfd: check for the need to async notifier
 earlier

It's not necessary to do this post grabbing a reference. With that, we
can drop the out goto path as well.

Link: https://lore.kernel.org/r/20240921080307.185186-3-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/eventfd.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index 8b628ab6bbff..829873806f9f 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -69,10 +69,10 @@ void io_eventfd_signal(struct io_ring_ctx *ctx)
 	 */
 	if (unlikely(!ev_fd))
 		return;
+	if (ev_fd->eventfd_async && !io_wq_current_is_worker())
+		return;
 	if (!refcount_inc_not_zero(&ev_fd->refs))
 		return;
-	if (ev_fd->eventfd_async && !io_wq_current_is_worker())
-		goto out;
 
 	if (likely(eventfd_signal_allowed())) {
 		eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
@@ -82,7 +82,6 @@ void io_eventfd_signal(struct io_ring_ctx *ctx)
 			return;
 		}
 	}
-out:
 	io_eventfd_put(ev_fd);
 }
 
-- 
2.51.0


From 60c5f15800f21883615689e2423217a9c8a1b502 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 21 Sep 2024 01:59:49 -0600
Subject: [PATCH 12/16] io_uring/eventfd: move actual signaling part into
 separate helper

In preparation for using this from multiple spots, move the signaling
into a helper.

Link: https://lore.kernel.org/r/20240921080307.185186-4-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/eventfd.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index 829873806f9f..58e76f4d1e00 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -47,6 +47,22 @@ static void io_eventfd_put(struct io_ev_fd *ev_fd)
 		call_rcu(&ev_fd->rcu, io_eventfd_free);
 }
 
+/*
+ * Returns true if the caller should put the ev_fd reference, false if not.
+ */
+static bool __io_eventfd_signal(struct io_ev_fd *ev_fd)
+{
+	if (eventfd_signal_allowed()) {
+		eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
+		return true;
+	}
+	if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
+		call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
+		return false;
+	}
+	return true;
+}
+
 void io_eventfd_signal(struct io_ring_ctx *ctx)
 {
 	struct io_ev_fd *ev_fd = NULL;
@@ -73,16 +89,8 @@ void io_eventfd_signal(struct io_ring_ctx *ctx)
 		return;
 	if (!refcount_inc_not_zero(&ev_fd->refs))
 		return;
-
-	if (likely(eventfd_signal_allowed())) {
-		eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
-	} else {
-		if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
-			call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
-			return;
-		}
-	}
-	io_eventfd_put(ev_fd);
+	if (__io_eventfd_signal(ev_fd))
+		io_eventfd_put(ev_fd);
 }
 
 void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
-- 
2.51.0


From 3ca5a356041438534ecbb74159df91736238c6b1 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 21 Sep 2024 01:59:50 -0600
Subject: [PATCH 13/16] io_uring/eventfd: move trigger check into a helper

It's a bit hard to read what guards the triggering, move it into a
helper and add a comment explaining it too. This additionally moves
the ev_fd == NULL check in there as well.

Link: https://lore.kernel.org/r/20240921080307.185186-5-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/eventfd.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index 58e76f4d1e00..0946d3da88d3 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -63,6 +63,17 @@ static bool __io_eventfd_signal(struct io_ev_fd *ev_fd)
 	return true;
 }
 
+/*
+ * Trigger if eventfd_async isn't set, or if it's set and the caller is
+ * an async worker. If ev_fd isn't valid, obviously return false.
+ */
+static bool io_eventfd_trigger(struct io_ev_fd *ev_fd)
+{
+	if (ev_fd)
+		return !ev_fd->eventfd_async || io_wq_current_is_worker();
+	return false;
+}
+
 void io_eventfd_signal(struct io_ring_ctx *ctx)
 {
 	struct io_ev_fd *ev_fd = NULL;
@@ -83,9 +94,7 @@ void io_eventfd_signal(struct io_ring_ctx *ctx)
 	 * completed between the NULL check of ctx->io_ev_fd at the start of
 	 * the function and rcu_read_lock.
 	 */
-	if (unlikely(!ev_fd))
-		return;
-	if (ev_fd->eventfd_async && !io_wq_current_is_worker())
+	if (!io_eventfd_trigger(ev_fd))
 		return;
 	if (!refcount_inc_not_zero(&ev_fd->refs))
 		return;
-- 
2.51.0


From 83a4f865e273b83426eafdd3aa51334cc21ac0fd Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 21 Sep 2024 01:59:51 -0600
Subject: [PATCH 14/16] io_uring/eventfd: abstract out ev_fd grab + release
 helpers

In preparation for needing the ev_fd grabbing (and releasing) from
another path, abstract out two helpers for that.

Link: https://lore.kernel.org/r/20240921080307.185186-6-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/eventfd.c | 41 ++++++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index 0946d3da88d3..d1fdecd0c458 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -47,6 +47,13 @@ static void io_eventfd_put(struct io_ev_fd *ev_fd)
 		call_rcu(&ev_fd->rcu, io_eventfd_free);
 }
 
+static void io_eventfd_release(struct io_ev_fd *ev_fd, bool put_ref)
+{
+	if (put_ref)
+		io_eventfd_put(ev_fd);
+	rcu_read_unlock();
+}
+
 /*
  * Returns true if the caller should put the ev_fd reference, false if not.
  */
@@ -74,14 +81,18 @@ static bool io_eventfd_trigger(struct io_ev_fd *ev_fd)
 	return false;
 }
 
-void io_eventfd_signal(struct io_ring_ctx *ctx)
+/*
+ * On success, returns with an ev_fd reference grabbed and the RCU read
+ * lock held.
+ */
+static struct io_ev_fd *io_eventfd_grab(struct io_ring_ctx *ctx)
 {
-	struct io_ev_fd *ev_fd = NULL;
+	struct io_ev_fd *ev_fd;
 
 	if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
-		return;
+		return NULL;
 
-	guard(rcu)();
+	rcu_read_lock();
 
 	/*
 	 * rcu_dereference ctx->io_ev_fd once and use it for both for checking
@@ -90,16 +101,24 @@ void io_eventfd_signal(struct io_ring_ctx *ctx)
 	ev_fd = rcu_dereference(ctx->io_ev_fd);
 
 	/*
-	 * Check again if ev_fd exists incase an io_eventfd_unregister call
+	 * Check again if ev_fd exists in case an io_eventfd_unregister call
 	 * completed between the NULL check of ctx->io_ev_fd at the start of
 	 * the function and rcu_read_lock.
 	 */
-	if (!io_eventfd_trigger(ev_fd))
-		return;
-	if (!refcount_inc_not_zero(&ev_fd->refs))
-		return;
-	if (__io_eventfd_signal(ev_fd))
-		io_eventfd_put(ev_fd);
+	if (io_eventfd_trigger(ev_fd) && refcount_inc_not_zero(&ev_fd->refs))
+		return ev_fd;
+
+	rcu_read_unlock();
+	return NULL;
+}
+
+void io_eventfd_signal(struct io_ring_ctx *ctx)
+{
+	struct io_ev_fd *ev_fd;
+
+	ev_fd = io_eventfd_grab(ctx);
+	if (ev_fd)
+		io_eventfd_release(ev_fd, __io_eventfd_signal(ev_fd));
 }
 
 void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
-- 
2.51.0


From f4bb2f65bb8154c1a2c2d7e01db0c98dffb5918f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 21 Sep 2024 01:59:52 -0600
Subject: [PATCH 15/16] io_uring/eventfd: move ctx->evfd_last_cq_tail into
 io_ev_fd

Everything else about the io_uring eventfd support is nicely kept
private to that code, except the cached_cq_tail tracking. With
everything else in place, move io_eventfd_flush_signal() to using
the ev_fd grab+release helpers, which then enables the direct use of
io_ev_fd for this tracking too.

Link: https://lore.kernel.org/r/20240921080307.185186-7-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/eventfd.c | 50 +++++++++++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index d1fdecd0c458..fab936d31ba8 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -13,10 +13,12 @@
 
 struct io_ev_fd {
 	struct eventfd_ctx	*cq_ev_fd;
-	unsigned int		eventfd_async: 1;
-	struct rcu_head		rcu;
+	unsigned int		eventfd_async;
+	/* protected by ->completion_lock */
+	unsigned		last_cq_tail;
 	refcount_t		refs;
 	atomic_t		ops;
+	struct rcu_head		rcu;
 };
 
 enum {
@@ -123,25 +125,31 @@ void io_eventfd_signal(struct io_ring_ctx *ctx)
 
 void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
 {
-	bool skip;
-
-	spin_lock(&ctx->completion_lock);
-
-	/*
-	 * Eventfd should only get triggered when at least one event has been
-	 * posted. Some applications rely on the eventfd notification count
-	 * only changing IFF a new CQE has been added to the CQ ring. There's
-	 * no depedency on 1:1 relationship between how many times this
-	 * function is called (and hence the eventfd count) and number of CQEs
-	 * posted to the CQ ring.
-	 */
-	skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
-	ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
-	spin_unlock(&ctx->completion_lock);
-	if (skip)
-		return;
+	struct io_ev_fd *ev_fd;
 
-	io_eventfd_signal(ctx);
+	ev_fd = io_eventfd_grab(ctx);
+	if (ev_fd) {
+		bool skip, put_ref = true;
+
+		/*
+		 * Eventfd should only get triggered when at least one event
+		 * has been posted. Some applications rely on the eventfd
+		 * notification count only changing IFF a new CQE has been
+		 * added to the CQ ring. There's no dependency on 1:1
+		 * relationship between how many times this function is called
+		 * (and hence the eventfd count) and number of CQEs posted to
+		 * the CQ ring.
+		 */
+		spin_lock(&ctx->completion_lock);
+		skip = ctx->cached_cq_tail == ev_fd->last_cq_tail;
+		ev_fd->last_cq_tail = ctx->cached_cq_tail;
+		spin_unlock(&ctx->completion_lock);
+
+		if (!skip)
+			put_ref = __io_eventfd_signal(ev_fd);
+
+		io_eventfd_release(ev_fd, put_ref);
+	}
 }
 
 int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
@@ -172,7 +180,7 @@ int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
 	}
 
 	spin_lock(&ctx->completion_lock);
-	ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
+	ev_fd->last_cq_tail = ctx->cached_cq_tail;
 	spin_unlock(&ctx->completion_lock);
 
 	ev_fd->eventfd_async = eventfd_async;
-- 
2.51.0


From 95d6c9229a04cc12d39034cd6be6446a55a85d6d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 24 Sep 2024 05:57:30 -0600
Subject: [PATCH 16/16] io_uring/msg_ring: refactor a few helper functions

Mostly just to skip them taking an io_kiocb, rather just pass in the
ctx and io_msg directly.

In preparation for being able to issue a MSG_RING request without
having an io_kiocb. No functional changes in this patch.

Link: https://lore.kernel.org/r/20240924115932.116167-2-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/msg_ring.c | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 7fd9badcfaf8..b8c527f08cd5 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -116,14 +116,13 @@ static struct io_kiocb *io_msg_get_kiocb(struct io_ring_ctx *ctx)
 	return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
 }
 
-static int io_msg_data_remote(struct io_kiocb *req)
+static int io_msg_data_remote(struct io_ring_ctx *target_ctx,
+			      struct io_msg *msg)
 {
-	struct io_ring_ctx *target_ctx = req->file->private_data;
-	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
 	struct io_kiocb *target;
 	u32 flags = 0;
 
-	target = io_msg_get_kiocb(req->ctx);
+	target = io_msg_get_kiocb(target_ctx);
 	if (unlikely(!target))
 		return -ENOMEM;
 
@@ -134,10 +133,9 @@ static int io_msg_data_remote(struct io_kiocb *req)
 					msg->user_data);
 }
 
-static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
+static int __io_msg_ring_data(struct io_ring_ctx *target_ctx,
+			      struct io_msg *msg, unsigned int issue_flags)
 {
-	struct io_ring_ctx *target_ctx = req->file->private_data;
-	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
 	u32 flags = 0;
 	int ret;
 
@@ -149,7 +147,7 @@ static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
 		return -EBADFD;
 
 	if (io_msg_need_remote(target_ctx))
-		return io_msg_data_remote(req);
+		return io_msg_data_remote(target_ctx, msg);
 
 	if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
 		flags = msg->cqe_flags;
@@ -166,6 +164,14 @@ static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
 	return ret;
 }
 
+static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_ring_ctx *target_ctx = req->file->private_data;
+	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
+
+	return __io_msg_ring_data(target_ctx, msg, issue_flags);
+}
+
 static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
@@ -271,10 +277,8 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
 	return io_msg_install_complete(req, issue_flags);
 }
 
-int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int __io_msg_ring_prep(struct io_msg *msg, const struct io_uring_sqe *sqe)
 {
-	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
-
 	if (unlikely(sqe->buf_index || sqe->personality))
 		return -EINVAL;
 
@@ -291,6 +295,11 @@ int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	return 0;
 }
 
+int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	return __io_msg_ring_prep(io_kiocb_to_cmd(req, struct io_msg), sqe);
+}
+
 int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
-- 
2.51.0