From f40998a8e6bbf0314b8416350183a537f9b59ca9 Mon Sep 17 00:00:00 2001
From: Luca Boccassi <bluca@debian.org>
Date: Fri, 27 Sep 2024 10:23:44 +0200
Subject: [PATCH 01/16] ipe: fallback to platform keyring also if key in
 trusted keyring is rejected

If enabled, we fallback to the platform keyring if the trusted keyring
doesn't have the key used to sign the ipe policy. But if pkcs7_verify()
rejects the key for other reasons, such as usage restrictions, we do not
fallback. Do so, following the same change in dm-verity.

Signed-off-by: Luca Boccassi <bluca@debian.org>
Suggested-by: Serge Hallyn <serge@hallyn.com>
[FW: fixed some line length issues and a typo in the commit message]
Signed-off-by: Fan Wu <wufan@kernel.org>
---
 security/ipe/policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/ipe/policy.c b/security/ipe/policy.c
index 45f7d6a0ed23..b628f696e32b 100644
--- a/security/ipe/policy.c
+++ b/security/ipe/policy.c
@@ -178,7 +178,7 @@ struct ipe_policy *ipe_new_policy(const char *text, size_t textlen,
 					    VERIFYING_UNSPECIFIED_SIGNATURE,
 					    set_pkcs7_data, new);
 #ifdef CONFIG_IPE_POLICY_SIG_PLATFORM_KEYRING
-		if (rc == -ENOKEY)
+		if (rc == -ENOKEY || rc == -EKEYREJECTED)
 			rc = verify_pkcs7_signature(NULL, 0, new->pkcs7, pkcs7len,
 						    VERIFY_USE_PLATFORM_KEYRING,
 						    VERIFYING_UNSPECIFIED_SIGNATURE,
-- 
2.51.0


From 917a15c37d371bc40b5ad13df366e29bd49c04a1 Mon Sep 17 00:00:00 2001
From: Fan Wu <wufan@kernel.org>
Date: Wed, 16 Oct 2024 16:43:05 -0700
Subject: [PATCH 02/16] MAINTAINERS: update IPE tree url and Fan Wu's email

Update Integrity Policy Enforcement (IPE) LSM tree url and
maintainer's email to the newly issued kernel.org tree/email.

Signed-off-by: Fan Wu <wufan@kernel.org>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 7ad507f49324..33b158cf52b4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11283,10 +11283,10 @@ F:	security/integrity/
 F:	security/integrity/ima/
 
 INTEGRITY POLICY ENFORCEMENT (IPE)
-M:	Fan Wu <wufan@linux.microsoft.com>
+M:	Fan Wu <wufan@kernel.org>
 L:	linux-security-module@vger.kernel.org
 S:	Supported
-T:	git https://github.com/microsoft/ipe.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/wufan/ipe.git
 F:	Documentation/admin-guide/LSM/ipe.rst
 F:	Documentation/security/ipe.rst
 F:	scripts/ipe/
-- 
2.51.0


From 22a18935d7d96bbb1a28076f843c1926d0ba189e Mon Sep 17 00:00:00 2001
From: John Edwards <uejji@uejji.net>
Date: Thu, 10 Oct 2024 23:09:23 +0000
Subject: [PATCH 03/16] Input: xpad - add support for MSI Claw A1M

Add MSI Claw A1M controller to xpad_device match table when in xinput mode.
Add MSI VID as XPAD_XBOX360_VENDOR.

Signed-off-by: John Edwards <uejji@uejji.net>
Reviewed-by: Derek J. Clark <derekjohn.clark@gmail.com>
Reviewed-by: Christopher Snowhill <kode54@gmail.com>
Link: https://lore.kernel.org/r/20241010232020.3292284-4-uejji@uejji.net
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/joystick/xpad.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 30b4cca8b69f..22ea58bf76cb 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -218,6 +218,7 @@ static const struct xpad_device {
 	{ 0x0c12, 0x8810, "Zeroplus Xbox Controller", 0, XTYPE_XBOX },
 	{ 0x0c12, 0x9902, "HAMA VibraX - *FAULTY HARDWARE*", 0, XTYPE_XBOX },
 	{ 0x0d2f, 0x0002, "Andamiro Pump It Up pad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
+	{ 0x0db0, 0x1901, "Micro Star International Xbox360 Controller for Windows", 0, XTYPE_XBOX360 },
 	{ 0x0e4c, 0x1097, "Radica Gamester Controller", 0, XTYPE_XBOX },
 	{ 0x0e4c, 0x1103, "Radica Gamester Reflex", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX },
 	{ 0x0e4c, 0x2390, "Radica Games Jtech Controller", 0, XTYPE_XBOX },
@@ -493,6 +494,7 @@ static const struct usb_device_id xpad_table[] = {
 	XPAD_XBOX360_VENDOR(0x07ff),		/* Mad Catz Gamepad */
 	XPAD_XBOXONE_VENDOR(0x0b05),		/* ASUS controllers */
 	XPAD_XBOX360_VENDOR(0x0c12),		/* Zeroplus X-Box 360 controllers */
+	XPAD_XBOX360_VENDOR(0x0db0),		/* Micro Star International X-Box 360 controllers */
 	XPAD_XBOX360_VENDOR(0x0e6f),		/* 0x0e6f Xbox 360 controllers */
 	XPAD_XBOXONE_VENDOR(0x0e6f),		/* 0x0e6f Xbox One controllers */
 	XPAD_XBOX360_VENDOR(0x0f0d),		/* Hori controllers */
-- 
2.51.0


From 2de01e0e57f3ebe7f90b08f6bca5ce0f3da3829f Mon Sep 17 00:00:00 2001
From: Nikita Travkin <nikita@trvn.ru>
Date: Fri, 4 Oct 2024 21:17:30 +0500
Subject: [PATCH 04/16] Input: zinitix - don't fail if linux,keycodes prop is
 absent

When initially adding the touchkey support, a mistake was made in the
property parsing code. The possible negative errno from
device_property_count_u32() was never checked, which was an oversight
left from converting to it from the of_property as part of the review
fixes.

Re-add the correct handling of the absent property, in which case zero
touchkeys should be assumed, which would disable the feature.

Reported-by: Jakob Hauser <jahau@rocketmail.com>
Tested-by: Jakob Hauser <jahau@rocketmail.com>
Fixes: 075d9b22c8fe ("Input: zinitix - add touchkey support")
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Nikita Travkin <nikita@trvn.ru>
Tested-by: Yassine Oudjana <y.oudjana@protonmail.com>
Link: https://lore.kernel.org/r/20241004-zinitix-no-keycodes-v2-1-876dc9fea4b6@trvn.ru
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/zinitix.c | 34 +++++++++++++++++++----------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/drivers/input/touchscreen/zinitix.c b/drivers/input/touchscreen/zinitix.c
index 52b3950460e2..716d6fa60f86 100644
--- a/drivers/input/touchscreen/zinitix.c
+++ b/drivers/input/touchscreen/zinitix.c
@@ -645,19 +645,29 @@ static int zinitix_ts_probe(struct i2c_client *client)
 		return error;
 	}
 
-	bt541->num_keycodes = device_property_count_u32(&client->dev, "linux,keycodes");
-	if (bt541->num_keycodes > ARRAY_SIZE(bt541->keycodes)) {
-		dev_err(&client->dev, "too many keys defined (%d)\n", bt541->num_keycodes);
-		return -EINVAL;
-	}
+	if (device_property_present(&client->dev, "linux,keycodes")) {
+		bt541->num_keycodes = device_property_count_u32(&client->dev,
+								"linux,keycodes");
+		if (bt541->num_keycodes < 0) {
+			dev_err(&client->dev, "Failed to count keys (%d)\n",
+				bt541->num_keycodes);
+			return bt541->num_keycodes;
+		} else if (bt541->num_keycodes > ARRAY_SIZE(bt541->keycodes)) {
+			dev_err(&client->dev, "Too many keys defined (%d)\n",
+				bt541->num_keycodes);
+			return -EINVAL;
+		}
 
-	error = device_property_read_u32_array(&client->dev, "linux,keycodes",
-					       bt541->keycodes,
-					       bt541->num_keycodes);
-	if (error) {
-		dev_err(&client->dev,
-			"Unable to parse \"linux,keycodes\" property: %d\n", error);
-		return error;
+		error = device_property_read_u32_array(&client->dev,
+						       "linux,keycodes",
+						       bt541->keycodes,
+						       bt541->num_keycodes);
+		if (error) {
+			dev_err(&client->dev,
+				"Unable to parse \"linux,keycodes\" property: %d\n",
+				error);
+			return error;
+		}
 	}
 
 	error = zinitix_init_input_dev(bt541);
-- 
2.51.0


From 2c02f7375e658ae93d57a31a66f91b62754ef8f1 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 18 Oct 2024 21:43:00 -0400
Subject: [PATCH 05/16] fgraph: Use CPU hotplug mechanism to initialize idle
 shadow stacks

The function graph infrastructure allocates a shadow stack for every task
when enabled. This includes the idle tasks. The first time the function
graph is invoked, the shadow stacks are created and never freed until the
task exits. This includes the idle tasks.

Only the idle tasks that were for online CPUs had their shadow stacks
created when function graph tracing started. If function graph tracing is
enabled and a CPU comes online, the idle task representing that CPU will
not have its shadow stack created, and all function graph tracing for that
idle task will be silently dropped.

Instead, use the CPU hotplug mechanism to allocate the idle shadow stacks.
This will include idle tasks for CPUs that come online during tracing.

This issue can be reproduced by:

 # cd /sys/kernel/tracing
 # echo 0 > /sys/devices/system/cpu/cpu1/online
 # echo 0 > set_ftrace_pid
 # echo function_graph > current_tracer
 # echo 1 > options/funcgraph-proc
 # echo 1 > /sys/devices/system/cpu/cpu1
 # grep '<idle>' per_cpu/cpu1/trace | head

Before, nothing would show up.

After:
 1)    <idle>-0    |   0.811 us    |                        __enqueue_entity();
 1)    <idle>-0    |   5.626 us    |                      } /* enqueue_entity */
 1)    <idle>-0    |               |                      dl_server_update_idle_time() {
 1)    <idle>-0    |               |                        dl_scaled_delta_exec() {
 1)    <idle>-0    |   0.450 us    |                          arch_scale_cpu_capacity();
 1)    <idle>-0    |   1.242 us    |                        }
 1)    <idle>-0    |   1.908 us    |                      }
 1)    <idle>-0    |               |                      dl_server_start() {
 1)    <idle>-0    |               |                        enqueue_dl_entity() {
 1)    <idle>-0    |               |                          task_contending() {

Note, if tracing stops and restarts, the old way would then initialize
the onlined CPUs.

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/20241018214300.6df82178@rorschach
Fixes: 868baf07b1a25 ("ftrace: Fix memory leak with function graph and cpu hotplug")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/fgraph.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index d7d4fb403f6f..43f4e3f57438 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -1160,19 +1160,13 @@ void fgraph_update_pid_func(void)
 static int start_graph_tracing(void)
 {
 	unsigned long **ret_stack_list;
-	int ret, cpu;
+	int ret;
 
 	ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
 
 	if (!ret_stack_list)
 		return -ENOMEM;
 
-	/* The cpu_boot init_task->ret_stack will never be freed */
-	for_each_online_cpu(cpu) {
-		if (!idle_task(cpu)->ret_stack)
-			ftrace_graph_init_idle_task(idle_task(cpu), cpu);
-	}
-
 	do {
 		ret = alloc_retstack_tasklist(ret_stack_list);
 	} while (ret == -EAGAIN);
@@ -1242,14 +1236,34 @@ static void ftrace_graph_disable_direct(bool disable_branch)
 	fgraph_direct_gops = &fgraph_stub;
 }
 
+/* The cpu_boot init_task->ret_stack will never be freed */
+static int fgraph_cpu_init(unsigned int cpu)
+{
+	if (!idle_task(cpu)->ret_stack)
+		ftrace_graph_init_idle_task(idle_task(cpu), cpu);
+	return 0;
+}
+
 int register_ftrace_graph(struct fgraph_ops *gops)
 {
+	static bool fgraph_initialized;
 	int command = 0;
 	int ret = 0;
 	int i = -1;
 
 	mutex_lock(&ftrace_lock);
 
+	if (!fgraph_initialized) {
+		ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "fgraph_idle_init",
+					fgraph_cpu_init, NULL);
+		if (ret < 0) {
+			pr_warn("fgraph: Error to init cpu hotplug support\n");
+			return ret;
+		}
+		fgraph_initialized = true;
+		ret = 0;
+	}
+
 	if (!fgraph_array[0]) {
 		/* The array must always have real data on it */
 		for (i = 0; i < FGRAPH_ARRAY_SIZE; i++)
-- 
2.51.0


From fae4078c289a2f24229c0de652249948b1cd6bdb Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 18 Oct 2024 21:52:12 -0400
Subject: [PATCH 06/16] fgraph: Allocate ret_stack_list with proper size

The ret_stack_list is an array of ret_stack shadow stacks for the function
graph usage. When the first function graph is enabled, all tasks in the
system get a shadow stack. The ret_stack_list is a 32 element array of
pointers to these shadow stacks. It allocates the shadow stack in batches
(32 stacks at a time), assigns them to running tasks, and continues until
all tasks are covered.

When the function graph shadow stack changed from an array of
ftrace_ret_stack structures to an array of longs, the allocation of
ret_stack_list went from allocating an array of 32 elements to just a
block defined by SHADOW_STACK_SIZE. Luckily, that's defined as PAGE_SIZE
and is much more than enough to hold 32 pointers. But it is way overkill
for the amount needed to allocate.

Change the allocation of ret_stack_list back to a kcalloc() of
FTRACE_RETSTACK_ALLOC_SIZE pointers.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://lore.kernel.org/20241018215212.23f13f40@rorschach
Fixes: 42675b723b484 ("function_graph: Convert ret_stack to a series of longs")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/fgraph.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 43f4e3f57438..41e7a15dcb50 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -1162,7 +1162,8 @@ static int start_graph_tracing(void)
 	unsigned long **ret_stack_list;
 	int ret;
 
-	ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
+	ret_stack_list = kcalloc(FTRACE_RETSTACK_ALLOC_SIZE,
+				 sizeof(*ret_stack_list), GFP_KERNEL);
 
 	if (!ret_stack_list)
 		return -ENOMEM;
-- 
2.51.0


From ae6a888a4357131c01d85f4c91fb32552dd0bf70 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 19 Oct 2024 09:16:51 -0600
Subject: [PATCH 07/16] io_uring/rw: fix wrong NOWAIT check in
 io_rw_init_file()

A previous commit improved how !FMODE_NOWAIT is dealt with, but
inadvertently negated a check whilst doing so. This caused -EAGAIN to be
returned from reading files with O_NONBLOCK set. Fix up the check for
REQ_F_SUPPORT_NOWAIT.

Reported-by: Julian Orth <ju.orth@gmail.com>
Link: https://github.com/axboe/liburing/issues/1270
Fixes: f7c913438533 ("io_uring/rw: allow pollable non-blocking attempts for !FMODE_NOWAIT")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/rw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/io_uring/rw.c b/io_uring/rw.c
index 80ae3c2ebb70..354c4e175654 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -807,7 +807,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
 	 * reliably. If not, or it IOCB_NOWAIT is set, don't retry.
 	 */
 	if (kiocb->ki_flags & IOCB_NOWAIT ||
-	    ((file->f_flags & O_NONBLOCK && (req->flags & REQ_F_SUPPORT_NOWAIT))))
+	    ((file->f_flags & O_NONBLOCK && !(req->flags & REQ_F_SUPPORT_NOWAIT))))
 		req->flags |= REQ_F_NOWAIT;
 
 	if (ctx->flags & IORING_SETUP_IOPOLL) {
-- 
2.51.0


From 42f7652d3eb527d03665b09edac47f85fb600924 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 20 Oct 2024 15:19:38 -0700
Subject: [PATCH 08/16] Linux 6.12-rc4

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 8cf3cf528892..a9a7d9ffaa98 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 12
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Baby Opossum Posse
 
 # *DOCUMENTATION*
-- 
2.51.0


From 165126dc5e23979721122dc5c7cfb28b1ca234cc Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 21 Sep 2024 01:59:47 -0600
Subject: [PATCH 09/16] io_uring/eventfd: abstract out ev_fd put helper

We call this in two spot, have a helper for it. In preparation for
extending this part.

Link: https://lore.kernel.org/r/20240921080307.185186-2-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/eventfd.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index e37fddd5d9ce..8b628ab6bbff 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -41,6 +41,12 @@ static void io_eventfd_do_signal(struct rcu_head *rcu)
 		io_eventfd_free(rcu);
 }
 
+static void io_eventfd_put(struct io_ev_fd *ev_fd)
+{
+	if (refcount_dec_and_test(&ev_fd->refs))
+		call_rcu(&ev_fd->rcu, io_eventfd_free);
+}
+
 void io_eventfd_signal(struct io_ring_ctx *ctx)
 {
 	struct io_ev_fd *ev_fd = NULL;
@@ -77,8 +83,7 @@ void io_eventfd_signal(struct io_ring_ctx *ctx)
 		}
 	}
 out:
-	if (refcount_dec_and_test(&ev_fd->refs))
-		call_rcu(&ev_fd->rcu, io_eventfd_free);
+	io_eventfd_put(ev_fd);
 }
 
 void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
@@ -152,8 +157,7 @@ int io_eventfd_unregister(struct io_ring_ctx *ctx)
 	if (ev_fd) {
 		ctx->has_evfd = false;
 		rcu_assign_pointer(ctx->io_ev_fd, NULL);
-		if (refcount_dec_and_test(&ev_fd->refs))
-			call_rcu(&ev_fd->rcu, io_eventfd_free);
+		io_eventfd_put(ev_fd);
 		return 0;
 	}
 
-- 
2.51.0


From 3c90b80df5b574c2c61626fd40fa3b23be21fa26 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 21 Sep 2024 01:59:48 -0600
Subject: [PATCH 10/16] io_uring/eventfd: check for the need to async notifier
 earlier

It's not necessary to do this post grabbing a reference. With that, we
can drop the out goto path as well.

Link: https://lore.kernel.org/r/20240921080307.185186-3-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/eventfd.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index 8b628ab6bbff..829873806f9f 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -69,10 +69,10 @@ void io_eventfd_signal(struct io_ring_ctx *ctx)
 	 */
 	if (unlikely(!ev_fd))
 		return;
+	if (ev_fd->eventfd_async && !io_wq_current_is_worker())
+		return;
 	if (!refcount_inc_not_zero(&ev_fd->refs))
 		return;
-	if (ev_fd->eventfd_async && !io_wq_current_is_worker())
-		goto out;
 
 	if (likely(eventfd_signal_allowed())) {
 		eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
@@ -82,7 +82,6 @@ void io_eventfd_signal(struct io_ring_ctx *ctx)
 			return;
 		}
 	}
-out:
 	io_eventfd_put(ev_fd);
 }
 
-- 
2.51.0


From 60c5f15800f21883615689e2423217a9c8a1b502 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 21 Sep 2024 01:59:49 -0600
Subject: [PATCH 11/16] io_uring/eventfd: move actual signaling part into
 separate helper

In preparation for using this from multiple spots, move the signaling
into a helper.

Link: https://lore.kernel.org/r/20240921080307.185186-4-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/eventfd.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index 829873806f9f..58e76f4d1e00 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -47,6 +47,22 @@ static void io_eventfd_put(struct io_ev_fd *ev_fd)
 		call_rcu(&ev_fd->rcu, io_eventfd_free);
 }
 
+/*
+ * Returns true if the caller should put the ev_fd reference, false if not.
+ */
+static bool __io_eventfd_signal(struct io_ev_fd *ev_fd)
+{
+	if (eventfd_signal_allowed()) {
+		eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
+		return true;
+	}
+	if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
+		call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
+		return false;
+	}
+	return true;
+}
+
 void io_eventfd_signal(struct io_ring_ctx *ctx)
 {
 	struct io_ev_fd *ev_fd = NULL;
@@ -73,16 +89,8 @@ void io_eventfd_signal(struct io_ring_ctx *ctx)
 		return;
 	if (!refcount_inc_not_zero(&ev_fd->refs))
 		return;
-
-	if (likely(eventfd_signal_allowed())) {
-		eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
-	} else {
-		if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
-			call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
-			return;
-		}
-	}
-	io_eventfd_put(ev_fd);
+	if (__io_eventfd_signal(ev_fd))
+		io_eventfd_put(ev_fd);
 }
 
 void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
-- 
2.51.0


From 3ca5a356041438534ecbb74159df91736238c6b1 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 21 Sep 2024 01:59:50 -0600
Subject: [PATCH 12/16] io_uring/eventfd: move trigger check into a helper

It's a bit hard to read what guards the triggering, move it into a
helper and add a comment explaining it too. This additionally moves
the ev_fd == NULL check in there as well.

Link: https://lore.kernel.org/r/20240921080307.185186-5-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/eventfd.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index 58e76f4d1e00..0946d3da88d3 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -63,6 +63,17 @@ static bool __io_eventfd_signal(struct io_ev_fd *ev_fd)
 	return true;
 }
 
+/*
+ * Trigger if eventfd_async isn't set, or if it's set and the caller is
+ * an async worker. If ev_fd isn't valid, obviously return false.
+ */
+static bool io_eventfd_trigger(struct io_ev_fd *ev_fd)
+{
+	if (ev_fd)
+		return !ev_fd->eventfd_async || io_wq_current_is_worker();
+	return false;
+}
+
 void io_eventfd_signal(struct io_ring_ctx *ctx)
 {
 	struct io_ev_fd *ev_fd = NULL;
@@ -83,9 +94,7 @@ void io_eventfd_signal(struct io_ring_ctx *ctx)
 	 * completed between the NULL check of ctx->io_ev_fd at the start of
 	 * the function and rcu_read_lock.
 	 */
-	if (unlikely(!ev_fd))
-		return;
-	if (ev_fd->eventfd_async && !io_wq_current_is_worker())
+	if (!io_eventfd_trigger(ev_fd))
 		return;
 	if (!refcount_inc_not_zero(&ev_fd->refs))
 		return;
-- 
2.51.0


From 83a4f865e273b83426eafdd3aa51334cc21ac0fd Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 21 Sep 2024 01:59:51 -0600
Subject: [PATCH 13/16] io_uring/eventfd: abstract out ev_fd grab + release
 helpers

In preparation for needing the ev_fd grabbing (and releasing) from
another path, abstract out two helpers for that.

Link: https://lore.kernel.org/r/20240921080307.185186-6-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/eventfd.c | 41 ++++++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index 0946d3da88d3..d1fdecd0c458 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -47,6 +47,13 @@ static void io_eventfd_put(struct io_ev_fd *ev_fd)
 		call_rcu(&ev_fd->rcu, io_eventfd_free);
 }
 
+static void io_eventfd_release(struct io_ev_fd *ev_fd, bool put_ref)
+{
+	if (put_ref)
+		io_eventfd_put(ev_fd);
+	rcu_read_unlock();
+}
+
 /*
  * Returns true if the caller should put the ev_fd reference, false if not.
  */
@@ -74,14 +81,18 @@ static bool io_eventfd_trigger(struct io_ev_fd *ev_fd)
 	return false;
 }
 
-void io_eventfd_signal(struct io_ring_ctx *ctx)
+/*
+ * On success, returns with an ev_fd reference grabbed and the RCU read
+ * lock held.
+ */
+static struct io_ev_fd *io_eventfd_grab(struct io_ring_ctx *ctx)
 {
-	struct io_ev_fd *ev_fd = NULL;
+	struct io_ev_fd *ev_fd;
 
 	if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
-		return;
+		return NULL;
 
-	guard(rcu)();
+	rcu_read_lock();
 
 	/*
 	 * rcu_dereference ctx->io_ev_fd once and use it for both for checking
@@ -90,16 +101,24 @@ void io_eventfd_signal(struct io_ring_ctx *ctx)
 	ev_fd = rcu_dereference(ctx->io_ev_fd);
 
 	/*
-	 * Check again if ev_fd exists incase an io_eventfd_unregister call
+	 * Check again if ev_fd exists in case an io_eventfd_unregister call
 	 * completed between the NULL check of ctx->io_ev_fd at the start of
 	 * the function and rcu_read_lock.
 	 */
-	if (!io_eventfd_trigger(ev_fd))
-		return;
-	if (!refcount_inc_not_zero(&ev_fd->refs))
-		return;
-	if (__io_eventfd_signal(ev_fd))
-		io_eventfd_put(ev_fd);
+	if (io_eventfd_trigger(ev_fd) && refcount_inc_not_zero(&ev_fd->refs))
+		return ev_fd;
+
+	rcu_read_unlock();
+	return NULL;
+}
+
+void io_eventfd_signal(struct io_ring_ctx *ctx)
+{
+	struct io_ev_fd *ev_fd;
+
+	ev_fd = io_eventfd_grab(ctx);
+	if (ev_fd)
+		io_eventfd_release(ev_fd, __io_eventfd_signal(ev_fd));
 }
 
 void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
-- 
2.51.0


From f4bb2f65bb8154c1a2c2d7e01db0c98dffb5918f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 21 Sep 2024 01:59:52 -0600
Subject: [PATCH 14/16] io_uring/eventfd: move ctx->evfd_last_cq_tail into
 io_ev_fd

Everything else about the io_uring eventfd support is nicely kept
private to that code, except the cached_cq_tail tracking. With
everything else in place, move io_eventfd_flush_signal() to using
the ev_fd grab+release helpers, which then enables the direct use of
io_ev_fd for this tracking too.

Link: https://lore.kernel.org/r/20240921080307.185186-7-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/eventfd.c | 50 +++++++++++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index d1fdecd0c458..fab936d31ba8 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -13,10 +13,12 @@
 
 struct io_ev_fd {
 	struct eventfd_ctx	*cq_ev_fd;
-	unsigned int		eventfd_async: 1;
-	struct rcu_head		rcu;
+	unsigned int		eventfd_async;
+	/* protected by ->completion_lock */
+	unsigned		last_cq_tail;
 	refcount_t		refs;
 	atomic_t		ops;
+	struct rcu_head		rcu;
 };
 
 enum {
@@ -123,25 +125,31 @@ void io_eventfd_signal(struct io_ring_ctx *ctx)
 
 void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
 {
-	bool skip;
-
-	spin_lock(&ctx->completion_lock);
-
-	/*
-	 * Eventfd should only get triggered when at least one event has been
-	 * posted. Some applications rely on the eventfd notification count
-	 * only changing IFF a new CQE has been added to the CQ ring. There's
-	 * no depedency on 1:1 relationship between how many times this
-	 * function is called (and hence the eventfd count) and number of CQEs
-	 * posted to the CQ ring.
-	 */
-	skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
-	ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
-	spin_unlock(&ctx->completion_lock);
-	if (skip)
-		return;
+	struct io_ev_fd *ev_fd;
 
-	io_eventfd_signal(ctx);
+	ev_fd = io_eventfd_grab(ctx);
+	if (ev_fd) {
+		bool skip, put_ref = true;
+
+		/*
+		 * Eventfd should only get triggered when at least one event
+		 * has been posted. Some applications rely on the eventfd
+		 * notification count only changing IFF a new CQE has been
+		 * added to the CQ ring. There's no dependency on 1:1
+		 * relationship between how many times this function is called
+		 * (and hence the eventfd count) and number of CQEs posted to
+		 * the CQ ring.
+		 */
+		spin_lock(&ctx->completion_lock);
+		skip = ctx->cached_cq_tail == ev_fd->last_cq_tail;
+		ev_fd->last_cq_tail = ctx->cached_cq_tail;
+		spin_unlock(&ctx->completion_lock);
+
+		if (!skip)
+			put_ref = __io_eventfd_signal(ev_fd);
+
+		io_eventfd_release(ev_fd, put_ref);
+	}
 }
 
 int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
@@ -172,7 +180,7 @@ int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
 	}
 
 	spin_lock(&ctx->completion_lock);
-	ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
+	ev_fd->last_cq_tail = ctx->cached_cq_tail;
 	spin_unlock(&ctx->completion_lock);
 
 	ev_fd->eventfd_async = eventfd_async;
-- 
2.51.0


From 95d6c9229a04cc12d39034cd6be6446a55a85d6d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 24 Sep 2024 05:57:30 -0600
Subject: [PATCH 15/16] io_uring/msg_ring: refactor a few helper functions

Mostly just to skip them taking an io_kiocb, rather just pass in the
ctx and io_msg directly.

In preparation for being able to issue a MSG_RING request without
having an io_kiocb. No functional changes in this patch.

Link: https://lore.kernel.org/r/20240924115932.116167-2-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/msg_ring.c | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 7fd9badcfaf8..b8c527f08cd5 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -116,14 +116,13 @@ static struct io_kiocb *io_msg_get_kiocb(struct io_ring_ctx *ctx)
 	return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
 }
 
-static int io_msg_data_remote(struct io_kiocb *req)
+static int io_msg_data_remote(struct io_ring_ctx *target_ctx,
+			      struct io_msg *msg)
 {
-	struct io_ring_ctx *target_ctx = req->file->private_data;
-	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
 	struct io_kiocb *target;
 	u32 flags = 0;
 
-	target = io_msg_get_kiocb(req->ctx);
+	target = io_msg_get_kiocb(target_ctx);
 	if (unlikely(!target))
 		return -ENOMEM;
 
@@ -134,10 +133,9 @@ static int io_msg_data_remote(struct io_kiocb *req)
 					msg->user_data);
 }
 
-static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
+static int __io_msg_ring_data(struct io_ring_ctx *target_ctx,
+			      struct io_msg *msg, unsigned int issue_flags)
 {
-	struct io_ring_ctx *target_ctx = req->file->private_data;
-	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
 	u32 flags = 0;
 	int ret;
 
@@ -149,7 +147,7 @@ static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
 		return -EBADFD;
 
 	if (io_msg_need_remote(target_ctx))
-		return io_msg_data_remote(req);
+		return io_msg_data_remote(target_ctx, msg);
 
 	if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
 		flags = msg->cqe_flags;
@@ -166,6 +164,14 @@ static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
 	return ret;
 }
 
+static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_ring_ctx *target_ctx = req->file->private_data;
+	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
+
+	return __io_msg_ring_data(target_ctx, msg, issue_flags);
+}
+
 static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
@@ -271,10 +277,8 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
 	return io_msg_install_complete(req, issue_flags);
 }
 
-int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int __io_msg_ring_prep(struct io_msg *msg, const struct io_uring_sqe *sqe)
 {
-	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
-
 	if (unlikely(sqe->buf_index || sqe->personality))
 		return -EINVAL;
 
@@ -291,6 +295,11 @@ int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	return 0;
 }
 
+int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	return __io_msg_ring_prep(io_kiocb_to_cmd(req, struct io_msg), sqe);
+}
+
 int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
-- 
2.51.0


From a377132154ab8404dafcc52e8bc0c73050a954c2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 24 Sep 2024 05:57:31 -0600
Subject: [PATCH 16/16] io_uring/msg_ring: add support for sending a sync
 message

Normally MSG_RING requires both a source and a destination ring. But
some users don't always have a ring avilable to send a message from, yet
they still need to notify a target ring.

Add support for using io_uring_register(2) without having a source ring,
using a file descriptor of -1 for that. Internally those are called
blind registration opcodes. Implement IORING_REGISTER_SEND_MSG_RING as a
blind opcode, which simply takes an sqe that the application can put on
the stack and use the normal liburing helpers to initialize it. Then the
app can call:

io_uring_register(-1, IORING_REGISTER_SEND_MSG_RING, &sqe, 1);

and get the same behavior in terms of the target, where a CQE is posted
with the details given in the sqe.

For now this takes a single sqe pointer argument, and hence arg must
be set to that, and nr_args must be 1. Could easily be extended to take
an array of sqes, but for now let's keep it simple.

Link: https://lore.kernel.org/r/20240924115932.116167-3-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h |  3 +++
 io_uring/msg_ring.c           | 29 +++++++++++++++++++++++++++++
 io_uring/msg_ring.h           |  1 +
 io_uring/register.c           | 30 ++++++++++++++++++++++++++++++
 4 files changed, 63 insertions(+)

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 1fe79e750470..86cb385fe0b5 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -612,6 +612,9 @@ enum io_uring_register_op {
 	/* clone registered buffers from source ring to current ring */
 	IORING_REGISTER_CLONE_BUFFERS		= 30,
 
+	/* send MSG_RING without having a ring */
+	IORING_REGISTER_SEND_MSG_RING		= 31,
+
 	/* this goes last */
 	IORING_REGISTER_LAST,
 
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index b8c527f08cd5..edea1ffd501c 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -331,6 +331,35 @@ done:
 	return IOU_OK;
 }
 
+int io_uring_sync_msg_ring(struct io_uring_sqe *sqe)
+{
+	struct io_msg io_msg = { };
+	struct fd f;
+	int ret;
+
+	ret = __io_msg_ring_prep(&io_msg, sqe);
+	if (unlikely(ret))
+		return ret;
+
+	/*
+	 * Only data sending supported, not IORING_MSG_SEND_FD as that one
+	 * doesn't make sense without a source ring to send files from.
+	 */
+	if (io_msg.cmd != IORING_MSG_DATA)
+		return -EINVAL;
+
+	ret = -EBADF;
+	f = fdget(sqe->fd);
+	if (fd_file(f)) {
+		ret = -EBADFD;
+		if (io_is_uring_fops(fd_file(f)))
+			ret = __io_msg_ring_data(fd_file(f)->private_data,
+						 &io_msg, IO_URING_F_UNLOCKED);
+		fdput(f);
+	}
+	return ret;
+}
+
 void io_msg_cache_free(const void *entry)
 {
 	struct io_kiocb *req = (struct io_kiocb *) entry;
diff --git a/io_uring/msg_ring.h b/io_uring/msg_ring.h
index 3030f3942f0f..38e7f8f0c944 100644
--- a/io_uring/msg_ring.h
+++ b/io_uring/msg_ring.h
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 
+int io_uring_sync_msg_ring(struct io_uring_sqe *sqe);
 int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
 int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags);
 void io_msg_ring_cleanup(struct io_kiocb *req);
diff --git a/io_uring/register.c b/io_uring/register.c
index eca26d4884d9..52b2f9b74af8 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -28,6 +28,7 @@
 #include "kbuf.h"
 #include "napi.h"
 #include "eventfd.h"
+#include "msg_ring.h"
 
 #define IORING_MAX_RESTRICTIONS	(IORING_RESTRICTION_LAST + \
 				 IORING_REGISTER_LAST + IORING_OP_LAST)
@@ -588,6 +589,32 @@ struct file *io_uring_register_get_file(unsigned int fd, bool registered)
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
+/*
+ * "blind" registration opcodes are ones where there's no ring given, and
+ * hence the source fd must be -1.
+ */
+static int io_uring_register_blind(unsigned int opcode, void __user *arg,
+				   unsigned int nr_args)
+{
+	switch (opcode) {
+	case IORING_REGISTER_SEND_MSG_RING: {
+		struct io_uring_sqe sqe;
+
+		if (!arg || nr_args != 1)
+			return -EINVAL;
+		if (copy_from_user(&sqe, arg, sizeof(sqe)))
+			return -EFAULT;
+		/* no flags supported */
+		if (sqe.flags)
+			return -EINVAL;
+		if (sqe.opcode == IORING_OP_MSG_RING)
+			return io_uring_sync_msg_ring(&sqe);
+		}
+	}
+
+	return -EINVAL;
+}
+
 SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
 		void __user *, arg, unsigned int, nr_args)
 {
@@ -602,6 +629,9 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
 	if (opcode >= IORING_REGISTER_LAST)
 		return -EINVAL;
 
+	if (fd == -1)
+		return io_uring_register_blind(opcode, arg, nr_args);
+
 	file = io_uring_register_get_file(fd, use_registered_ring);
 	if (IS_ERR(file))
 		return PTR_ERR(file);
-- 
2.51.0