From 76e961157e078bc5d3cd2df08317e00b00a829eb Mon Sep 17 00:00:00 2001
From: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Date: Sat, 11 Jan 2025 16:36:55 +0530
Subject: [PATCH 01/16] mm/hugetlb: fix hugepage allocation for interleaved
 memory nodes

gather_bootmem_prealloc() assumes the start nid as 0 and size as
num_node_state(N_MEMORY).  That means in case if memory attached numa
nodes are interleaved, then gather_bootmem_prealloc_parallel() will fail
to scan few of these nodes.

Since memory attached numa nodes can be interleaved in any fashion, hence
ensure that the current code checks for all numa node ids
(.size = nr_node_ids). Let's still keep max_threads as N_MEMORY, so that
it can distributes all nr_node_ids among the these many no. threads.

e.g. qemu cmdline
========================
numa_cmd="-numa node,nodeid=1,memdev=mem1,cpus=2-3 -numa node,nodeid=0,cpus=0-1 -numa dist,src=0,dst=1,val=20"
mem_cmd="-object memory-backend-ram,id=mem1,size=16G"

w/o this patch for cmdline (default_hugepagesz=1GB hugepagesz=1GB hugepages=2):
==========================
~ # cat /proc/meminfo  |grep -i huge
AnonHugePages:         0 kB
ShmemHugePages:        0 kB
FileHugePages:         0 kB
HugePages_Total:       0
HugePages_Free:        0
HugePages_Rsvd:        0
HugePages_Surp:        0
Hugepagesize:    1048576 kB
Hugetlb:               0 kB

with this patch for cmdline (default_hugepagesz=1GB hugepagesz=1GB hugepages=2):
===========================
~ # cat /proc/meminfo |grep -i huge
AnonHugePages:         0 kB
ShmemHugePages:        0 kB
FileHugePages:         0 kB
HugePages_Total:       2
HugePages_Free:        2
HugePages_Rsvd:        0
HugePages_Surp:        0
Hugepagesize:    1048576 kB
Hugetlb:         2097152 kB

Link: https://lkml.kernel.org/r/f8d8dad3a5471d284f54185f65d575a6aaab692b.1736592534.git.ritesh.list@gmail.com
Fixes: b78b27d02930 ("hugetlb: parallelize 1G hugetlb initialization")
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reported-by: Pavithra Prakash <pavrampu@linux.ibm.com>
Suggested-by: Muchun Song <muchun.song@linux.dev>
Tested-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Reviewed-by: Luiz Capitulino <luizcap@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Donet Tom <donettom@linux.ibm.com>
Cc: Gang Li <gang.li@linux.dev>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3b25b69aa94f..65068671e460 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3309,7 +3309,7 @@ static void __init gather_bootmem_prealloc(void)
 		.thread_fn	= gather_bootmem_prealloc_parallel,
 		.fn_arg		= NULL,
 		.start		= 0,
-		.size		= num_node_state(N_MEMORY),
+		.size		= nr_node_ids,
 		.align		= 1,
 		.min_chunk	= 1,
 		.max_threads	= num_node_state(N_MEMORY),
-- 
2.50.1


From e5eaa1bbe2813ac34788e485283be75f9d07137b Mon Sep 17 00:00:00 2001
From: Carlos Bilbao <carlos.bilbao@kernel.org>
Date: Wed, 29 Jan 2025 19:22:44 -0600
Subject: [PATCH 02/16] mailmap, MAINTAINERS, docs: update Carlos's email
 address

Update .mailmap to reflect my new (and final) primary email address,
carlos.bilbao@kernel.org.  Also update contact information in files
Documentation/translations/sp_SP/index.rst and MAINTAINERS.

Link: https://lkml.kernel.org/r/20250130012248.1196208-1-carlos.bilbao@kernel.org
Signed-off-by: Carlos Bilbao <carlos.bilbao@kernel.org>
Cc: Carlos Bilbao <bilbao@vt.edu>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Mattew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 .mailmap                                   | 4 +++-
 Documentation/translations/sp_SP/index.rst | 2 +-
 MAINTAINERS                                | 8 ++++----
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/.mailmap b/.mailmap
index fec6b455b576..9a270c53675b 100644
--- a/.mailmap
+++ b/.mailmap
@@ -148,7 +148,9 @@ Bryan Tan <bryan-bt.tan@broadcom.com> <bryantan@vmware.com>
 Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com>
 Can Guo <quic_cang@quicinc.com> <cang@codeaurora.org>
 Carl Huang <quic_cjhuang@quicinc.com> <cjhuang@codeaurora.org>
-Carlos Bilbao <carlos.bilbao.osdev@gmail.com> <carlos.bilbao@amd.com>
+Carlos Bilbao <carlos.bilbao@kernel.org> <carlos.bilbao@amd.com>
+Carlos Bilbao <carlos.bilbao@kernel.org> <carlos.bilbao.osdev@gmail.com>
+Carlos Bilbao <carlos.bilbao@kernel.org> <bilbao@vt.edu>
 Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com>
 Changbin Du <changbin.du@intel.com> <changbin.du@intel.com>
 Chao Yu <chao@kernel.org> <chao2.yu@samsung.com>
diff --git a/Documentation/translations/sp_SP/index.rst b/Documentation/translations/sp_SP/index.rst
index aae7018b0d1a..2b50283e1608 100644
--- a/Documentation/translations/sp_SP/index.rst
+++ b/Documentation/translations/sp_SP/index.rst
@@ -7,7 +7,7 @@ TraducciÃ³n al espaÃ±ol
 
 	\kerneldocCJKoff
 
-:maintainer: Carlos Bilbao <carlos.bilbao.osdev@gmail.com>
+:maintainer: Carlos Bilbao <carlos.bilbao@kernel.org>
 
 .. _sp_disclaimer:
 
diff --git a/MAINTAINERS b/MAINTAINERS
index d269d3c6e317..1824df1f61f0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1090,7 +1090,7 @@ F:	drivers/video/fbdev/geode/
 
 AMD HSMP DRIVER
 M:	Naveen Krishna Chatradhi <naveenkrishna.chatradhi@amd.com>
-R:	Carlos Bilbao <carlos.bilbao.osdev@gmail.com>
+R:	Carlos Bilbao <carlos.bilbao@kernel.org>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 F:	Documentation/arch/x86/amd_hsmp.rst
@@ -5856,7 +5856,7 @@ F:	drivers/usb/atm/cxacru.c
 
 CONFIDENTIAL COMPUTING THREAT MODEL FOR X86 VIRTUALIZATION (SNP/TDX)
 M:	Elena Reshetova <elena.reshetova@intel.com>
-M:	Carlos Bilbao <carlos.bilbao.osdev@gmail.com>
+M:	Carlos Bilbao <carlos.bilbao@kernel.org>
 S:	Maintained
 F:	Documentation/security/snp-tdx-threat-model.rst
 
@@ -11323,7 +11323,7 @@ S:	Orphan
 F:	drivers/video/fbdev/imsttfb.c
 
 INDEX OF FURTHER KERNEL DOCUMENTATION
-M:	Carlos Bilbao <carlos.bilbao.osdev@gmail.com>
+M:	Carlos Bilbao <carlos.bilbao@kernel.org>
 S:	Maintained
 F:	Documentation/process/kernel-docs.rst
 
@@ -22205,7 +22205,7 @@ Q:	http://patchwork.linuxtv.org/project/linux-media/list/
 F:	drivers/media/dvb-frontends/sp2*
 
 SPANISH DOCUMENTATION
-M:	Carlos Bilbao <carlos.bilbao.osdev@gmail.com>
+M:	Carlos Bilbao <carlos.bilbao@kernel.org>
 R:	Avadhut Naik <avadhut.naik@amd.com>
 S:	Maintained
 F:	Documentation/translations/sp_SP/
-- 
2.50.1


From 0ca2a41e0ccc573845428b686ff09e9322c82b16 Mon Sep 17 00:00:00 2001
From: Tamir Duberstein <tamird@gmail.com>
Date: Wed, 29 Jan 2025 16:13:49 -0500
Subject: [PATCH 03/16] MAINTAINERS: add lib/test_xarray.c

Ensure test-only changes are sent to the relevant maintainer.

Link: https://lkml.kernel.org/r/20250129-xarray-test-maintainer-v1-1-482e31f30f47@gmail.com
Signed-off-by: Tamir Duberstein <tamird@gmail.com>
Cc: Mattew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1824df1f61f0..f52a004982c9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -25734,6 +25734,7 @@ F:	Documentation/core-api/xarray.rst
 F:	include/linux/idr.h
 F:	include/linux/xarray.h
 F:	lib/idr.c
+F:	lib/test_xarray.c
 F:	lib/xarray.c
 F:	tools/testing/radix-tree
 
-- 
2.50.1


From 050339050f6f2b18d32a61a0f725f423804ad2a5 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 30 Jan 2025 16:09:20 -0800
Subject: [PATCH 04/16] revert "xarray: port tests to kunit"

Revert c7bb5cf9fc4e ("xarray: port tests to kunit").  It broke the build
when compiing the xarray userspace test harness code.

Reported-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Closes: https://lkml.kernel.org/r/07cf896e-adf8-414f-a629-a808fc26014a@oracle.com
Cc: David Gow <davidgow@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Tamir Duberstein <tamird@gmail.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/m68k/configs/amiga_defconfig    |   1 +
 arch/m68k/configs/apollo_defconfig   |   1 +
 arch/m68k/configs/atari_defconfig    |   1 +
 arch/m68k/configs/bvme6000_defconfig |   1 +
 arch/m68k/configs/hp300_defconfig    |   1 +
 arch/m68k/configs/mac_defconfig      |   1 +
 arch/m68k/configs/multi_defconfig    |   1 +
 arch/m68k/configs/mvme147_defconfig  |   1 +
 arch/m68k/configs/mvme16x_defconfig  |   1 +
 arch/m68k/configs/q40_defconfig      |   1 +
 arch/m68k/configs/sun3_defconfig     |   1 +
 arch/m68k/configs/sun3x_defconfig    |   1 +
 arch/powerpc/configs/ppc64_defconfig |   1 +
 lib/Kconfig.debug                    |  18 +-
 lib/Makefile                         |   2 +-
 lib/test_xarray.c                    | 671 +++++++++++----------------
 16 files changed, 294 insertions(+), 410 deletions(-)

diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 8acfa66e1095..dbf2ea561c85 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -626,6 +626,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 35e9a0872304..b0fd199cc0a4 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -583,6 +583,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 32891ddd3cc5..bb5b2d3b6c10 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -603,6 +603,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index ca276f0db3dd..8315a13bab73 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -575,6 +575,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index e83f14fe1a4f..350370657e5f 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -585,6 +585,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 6b58be24da79..f942b4755702 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -602,6 +602,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 0e8d24f82565..b1eaad02efab 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -689,6 +689,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 24a7608c13ac..6309a4442bb3 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -575,6 +575,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index c415f75821f3..3feb0731f814 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -576,6 +576,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 2c715a8ff551..ea04b1b0da7d 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -592,6 +592,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 15ff37fcccbf..f52d9af92153 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -572,6 +572,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 40a44bf9f48d..f348447824da 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -573,6 +573,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index e9c46b59ebbc..465eb96c755e 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -448,6 +448,7 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
 CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 775966cf6114..1af972a92d06 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2456,22 +2456,8 @@ config TEST_BITMAP
 config TEST_UUID
 	tristate "Test functions located in the uuid module at runtime"
 
-config XARRAY_KUNIT
-	tristate "KUnit test XArray code at runtime" if !KUNIT_ALL_TESTS
-	depends on KUNIT
-	default KUNIT_ALL_TESTS
-	help
-	  Enable this option to test the Xarray code at boot.
-
-	  KUnit tests run during boot and output the results to the debug log
-	  in TAP format (http://testanything.org/). Only useful for kernel devs
-	  running the KUnit test harness, and not intended for inclusion into a
-	  production build.
-
-	  For more information on KUnit and unit tests in general please refer
-	  to the KUnit documentation in Documentation/dev-tools/kunit/.
-
-	  If unsure, say N.
+config TEST_XARRAY
+	tristate "Test the XArray code at runtime"
 
 config TEST_MAPLE_TREE
 	tristate "Test the Maple Tree code at runtime or module load"
diff --git a/lib/Makefile b/lib/Makefile
index f1c6e9d76a7c..d5cfc7afbbb8 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -94,6 +94,7 @@ GCOV_PROFILE_test_bitmap.o := n
 endif
 
 obj-$(CONFIG_TEST_UUID) += test_uuid.o
+obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
 obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o
 obj-$(CONFIG_TEST_PARMAN) += test_parman.o
 obj-$(CONFIG_TEST_KMOD) += test_kmod.o
@@ -372,7 +373,6 @@ CFLAGS_bitfield_kunit.o := $(DISABLE_STRUCTLEAK_PLUGIN)
 obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o
 obj-$(CONFIG_CHECKSUM_KUNIT) += checksum_kunit.o
 obj-$(CONFIG_UTIL_MACROS_KUNIT) += util_macros_kunit.o
-obj-$(CONFIG_XARRAY_KUNIT) += test_xarray.o
 obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
 obj-$(CONFIG_HASHTABLE_KUNIT_TEST) += hashtable_test.o
 obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index eab5971d0a48..6932a26f4927 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -6,10 +6,11 @@
  * Author: Matthew Wilcox <willy@infradead.org>
  */
 
-#include <kunit/test.h>
-
-#include <linux/module.h>
 #include <linux/xarray.h>
+#include <linux/module.h>
+
+static unsigned int tests_run;
+static unsigned int tests_passed;
 
 static const unsigned int order_limit =
 		IS_ENABLED(CONFIG_XARRAY_MULTI) ? BITS_PER_LONG : 1;
@@ -19,12 +20,15 @@ static const unsigned int order_limit =
 void xa_dump(const struct xarray *xa) { }
 # endif
 #undef XA_BUG_ON
-#define XA_BUG_ON(xa, x) do {		\
-	if (x) {			\
-		KUNIT_FAIL(test, #x);	\
-		xa_dump(xa);		\
-		dump_stack();		\
-	}				\
+#define XA_BUG_ON(xa, x) do {					\
+	tests_run++;						\
+	if (x) {						\
+		printk("BUG at %s:%d\n", __func__, __LINE__);	\
+		xa_dump(xa);					\
+		dump_stack();					\
+	} else {						\
+		tests_passed++;					\
+	}							\
 } while (0)
 #endif
 
@@ -38,13 +42,13 @@ static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp)
 	return xa_store(xa, index, xa_mk_index(index), gfp);
 }
 
-static void xa_insert_index(struct kunit *test, struct xarray *xa, unsigned long index)
+static void xa_insert_index(struct xarray *xa, unsigned long index)
 {
 	XA_BUG_ON(xa, xa_insert(xa, index, xa_mk_index(index),
 				GFP_KERNEL) != 0);
 }
 
-static void xa_alloc_index(struct kunit *test, struct xarray *xa, unsigned long index, gfp_t gfp)
+static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp)
 {
 	u32 id;
 
@@ -53,7 +57,7 @@ static void xa_alloc_index(struct kunit *test, struct xarray *xa, unsigned long
 	XA_BUG_ON(xa, id != index);
 }
 
-static void xa_erase_index(struct kunit *test, struct xarray *xa, unsigned long index)
+static void xa_erase_index(struct xarray *xa, unsigned long index)
 {
 	XA_BUG_ON(xa, xa_erase(xa, index) != xa_mk_index(index));
 	XA_BUG_ON(xa, xa_load(xa, index) != NULL);
@@ -79,15 +83,8 @@ static void *xa_store_order(struct xarray *xa, unsigned long index,
 	return curr;
 }
 
-static inline struct xarray *xa_param(struct kunit *test)
+static noinline void check_xa_err(struct xarray *xa)
 {
-	return *(struct xarray **)test->param_value;
-}
-
-static noinline void check_xa_err(struct kunit *test)
-{
-	struct xarray *xa = xa_param(test);
-
 	XA_BUG_ON(xa, xa_err(xa_store_index(xa, 0, GFP_NOWAIT)) != 0);
 	XA_BUG_ON(xa, xa_err(xa_erase(xa, 0)) != 0);
 #ifndef __KERNEL__
@@ -102,10 +99,8 @@ static noinline void check_xa_err(struct kunit *test)
 //	XA_BUG_ON(xa, xa_err(xa_store(xa, 0, xa_mk_internal(0), 0)) != -EINVAL);
 }
 
-static noinline void check_xas_retry(struct kunit *test)
+static noinline void check_xas_retry(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 	void *entry;
 
@@ -114,7 +109,7 @@ static noinline void check_xas_retry(struct kunit *test)
 
 	rcu_read_lock();
 	XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != xa_mk_value(0));
-	xa_erase_index(test, xa, 1);
+	xa_erase_index(xa, 1);
 	XA_BUG_ON(xa, !xa_is_retry(xas_reload(&xas)));
 	XA_BUG_ON(xa, xas_retry(&xas, NULL));
 	XA_BUG_ON(xa, xas_retry(&xas, xa_mk_value(0)));
@@ -145,14 +140,12 @@ static noinline void check_xas_retry(struct kunit *test)
 	}
 	xas_unlock(&xas);
 
-	xa_erase_index(test, xa, 0);
-	xa_erase_index(test, xa, 1);
+	xa_erase_index(xa, 0);
+	xa_erase_index(xa, 1);
 }
 
-static noinline void check_xa_load(struct kunit *test)
+static noinline void check_xa_load(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned long i, j;
 
 	for (i = 0; i < 1024; i++) {
@@ -174,15 +167,13 @@ static noinline void check_xa_load(struct kunit *test)
 			else
 				XA_BUG_ON(xa, entry);
 		}
-		xa_erase_index(test, xa, i);
+		xa_erase_index(xa, i);
 	}
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_xa_mark_1(struct kunit *test, unsigned long index)
+static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned int order;
 	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 8 : 1;
 
@@ -202,7 +193,7 @@ static noinline void check_xa_mark_1(struct kunit *test, unsigned long index)
 	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_1));
 
 	/* Storing NULL clears marks, and they can't be set again */
-	xa_erase_index(test, xa, index);
+	xa_erase_index(xa, index);
 	XA_BUG_ON(xa, !xa_empty(xa));
 	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
 	xa_set_mark(xa, index, XA_MARK_0);
@@ -253,17 +244,15 @@ static noinline void check_xa_mark_1(struct kunit *test, unsigned long index)
 		XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_0));
 		XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_1));
 		XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_2));
-		xa_erase_index(test, xa, index);
-		xa_erase_index(test, xa, next);
+		xa_erase_index(xa, index);
+		xa_erase_index(xa, next);
 		XA_BUG_ON(xa, !xa_empty(xa));
 	}
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_xa_mark_2(struct kunit *test)
+static noinline void check_xa_mark_2(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 	unsigned long index;
 	unsigned int count = 0;
@@ -300,11 +289,9 @@ static noinline void check_xa_mark_2(struct kunit *test)
 	xa_destroy(xa);
 }
 
-static noinline void check_xa_mark_3(struct kunit *test)
+static noinline void check_xa_mark_3(struct xarray *xa)
 {
 #ifdef CONFIG_XARRAY_MULTI
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0x41);
 	void *entry;
 	int count = 0;
@@ -323,21 +310,19 @@ static noinline void check_xa_mark_3(struct kunit *test)
 #endif
 }
 
-static noinline void check_xa_mark(struct kunit *test)
+static noinline void check_xa_mark(struct xarray *xa)
 {
 	unsigned long index;
 
 	for (index = 0; index < 16384; index += 4)
-		check_xa_mark_1(test, index);
+		check_xa_mark_1(xa, index);
 
-	check_xa_mark_2(test);
-	check_xa_mark_3(test);
+	check_xa_mark_2(xa);
+	check_xa_mark_3(xa);
 }
 
-static noinline void check_xa_shrink(struct kunit *test)
+static noinline void check_xa_shrink(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 1);
 	struct xa_node *node;
 	unsigned int order;
@@ -362,7 +347,7 @@ static noinline void check_xa_shrink(struct kunit *test)
 	XA_BUG_ON(xa, xas_load(&xas) != NULL);
 	xas_unlock(&xas);
 	XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0));
-	xa_erase_index(test, xa, 0);
+	xa_erase_index(xa, 0);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	for (order = 0; order < max_order; order++) {
@@ -379,49 +364,45 @@ static noinline void check_xa_shrink(struct kunit *test)
 		XA_BUG_ON(xa, xa_head(xa) == node);
 		rcu_read_unlock();
 		XA_BUG_ON(xa, xa_load(xa, max + 1) != NULL);
-		xa_erase_index(test, xa, ULONG_MAX);
+		xa_erase_index(xa, ULONG_MAX);
 		XA_BUG_ON(xa, xa->xa_head != node);
-		xa_erase_index(test, xa, 0);
+		xa_erase_index(xa, 0);
 	}
 }
 
-static noinline void check_insert(struct kunit *test)
+static noinline void check_insert(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned long i;
 
 	for (i = 0; i < 1024; i++) {
-		xa_insert_index(test, xa, i);
+		xa_insert_index(xa, i);
 		XA_BUG_ON(xa, xa_load(xa, i - 1) != NULL);
 		XA_BUG_ON(xa, xa_load(xa, i + 1) != NULL);
-		xa_erase_index(test, xa, i);
+		xa_erase_index(xa, i);
 	}
 
 	for (i = 10; i < BITS_PER_LONG; i++) {
-		xa_insert_index(test, xa, 1UL << i);
+		xa_insert_index(xa, 1UL << i);
 		XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 1) != NULL);
 		XA_BUG_ON(xa, xa_load(xa, (1UL << i) + 1) != NULL);
-		xa_erase_index(test, xa, 1UL << i);
+		xa_erase_index(xa, 1UL << i);
 
-		xa_insert_index(test, xa, (1UL << i) - 1);
+		xa_insert_index(xa, (1UL << i) - 1);
 		XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 2) != NULL);
 		XA_BUG_ON(xa, xa_load(xa, 1UL << i) != NULL);
-		xa_erase_index(test, xa, (1UL << i) - 1);
+		xa_erase_index(xa, (1UL << i) - 1);
 	}
 
-	xa_insert_index(test, xa, ~0UL);
+	xa_insert_index(xa, ~0UL);
 	XA_BUG_ON(xa, xa_load(xa, 0UL) != NULL);
 	XA_BUG_ON(xa, xa_load(xa, ~1UL) != NULL);
-	xa_erase_index(test, xa, ~0UL);
+	xa_erase_index(xa, ~0UL);
 
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_cmpxchg(struct kunit *test)
+static noinline void check_cmpxchg(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	void *FIVE = xa_mk_value(5);
 	void *SIX = xa_mk_value(6);
 	void *LOTS = xa_mk_value(12345678);
@@ -437,16 +418,14 @@ static noinline void check_cmpxchg(struct kunit *test)
 	XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) != -EBUSY);
 	XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != FIVE);
 	XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) == -EBUSY);
-	xa_erase_index(test, xa, 12345678);
-	xa_erase_index(test, xa, 5);
+	xa_erase_index(xa, 12345678);
+	xa_erase_index(xa, 5);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_cmpxchg_order(struct kunit *test)
+static noinline void check_cmpxchg_order(struct xarray *xa)
 {
 #ifdef CONFIG_XARRAY_MULTI
-	struct xarray *xa = xa_param(test);
-
 	void *FIVE = xa_mk_value(5);
 	unsigned int i, order = 3;
 
@@ -497,10 +476,8 @@ static noinline void check_cmpxchg_order(struct kunit *test)
 #endif
 }
 
-static noinline void check_reserve(struct kunit *test)
+static noinline void check_reserve(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	void *entry;
 	unsigned long index;
 	int count;
@@ -517,7 +494,7 @@ static noinline void check_reserve(struct kunit *test)
 	XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0);
 	XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_NOWAIT) != NULL);
 	xa_release(xa, 12345678);
-	xa_erase_index(test, xa, 12345678);
+	xa_erase_index(xa, 12345678);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	/* cmpxchg sees a reserved entry as ZERO */
@@ -525,7 +502,7 @@ static noinline void check_reserve(struct kunit *test)
 	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, XA_ZERO_ENTRY,
 				xa_mk_value(12345678), GFP_NOWAIT) != NULL);
 	xa_release(xa, 12345678);
-	xa_erase_index(test, xa, 12345678);
+	xa_erase_index(xa, 12345678);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	/* xa_insert treats it as busy */
@@ -565,10 +542,8 @@ static noinline void check_reserve(struct kunit *test)
 	xa_destroy(xa);
 }
 
-static noinline void check_xas_erase(struct kunit *test)
+static noinline void check_xas_erase(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 	void *entry;
 	unsigned long i, j;
@@ -606,11 +581,9 @@ static noinline void check_xas_erase(struct kunit *test)
 }
 
 #ifdef CONFIG_XARRAY_MULTI
-static noinline void check_multi_store_1(struct kunit *test, unsigned long index,
+static noinline void check_multi_store_1(struct xarray *xa, unsigned long index,
 		unsigned int order)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, index);
 	unsigned long min = index & ~((1UL << order) - 1);
 	unsigned long max = min + (1UL << order);
@@ -629,15 +602,13 @@ static noinline void check_multi_store_1(struct kunit *test, unsigned long index
 	XA_BUG_ON(xa, xa_load(xa, max) != NULL);
 	XA_BUG_ON(xa, xa_load(xa, min - 1) != NULL);
 
-	xa_erase_index(test, xa, min);
+	xa_erase_index(xa, min);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_multi_store_2(struct kunit *test, unsigned long index,
+static noinline void check_multi_store_2(struct xarray *xa, unsigned long index,
 		unsigned int order)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, index);
 	xa_store_order(xa, index, order, xa_mk_value(0), GFP_KERNEL);
 
@@ -649,11 +620,9 @@ static noinline void check_multi_store_2(struct kunit *test, unsigned long index
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_multi_store_3(struct kunit *test, unsigned long index,
+static noinline void check_multi_store_3(struct xarray *xa, unsigned long index,
 		unsigned int order)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 	void *entry;
 	int n = 0;
@@ -678,11 +647,9 @@ static noinline void check_multi_store_3(struct kunit *test, unsigned long index
 }
 #endif
 
-static noinline void check_multi_store(struct kunit *test)
+static noinline void check_multi_store(struct xarray *xa)
 {
 #ifdef CONFIG_XARRAY_MULTI
-	struct xarray *xa = xa_param(test);
-
 	unsigned long i, j, k;
 	unsigned int max_order = (sizeof(long) == 4) ? 30 : 60;
 
@@ -747,28 +714,26 @@ static noinline void check_multi_store(struct kunit *test)
 	}
 
 	for (i = 0; i < 20; i++) {
-		check_multi_store_1(test, 200, i);
-		check_multi_store_1(test, 0, i);
-		check_multi_store_1(test, (1UL << i) + 1, i);
+		check_multi_store_1(xa, 200, i);
+		check_multi_store_1(xa, 0, i);
+		check_multi_store_1(xa, (1UL << i) + 1, i);
 	}
-	check_multi_store_2(test, 4095, 9);
+	check_multi_store_2(xa, 4095, 9);
 
 	for (i = 1; i < 20; i++) {
-		check_multi_store_3(test, 0, i);
-		check_multi_store_3(test, 1UL << i, i);
+		check_multi_store_3(xa, 0, i);
+		check_multi_store_3(xa, 1UL << i, i);
 	}
 #endif
 }
 
 #ifdef CONFIG_XARRAY_MULTI
 /* mimics page cache __filemap_add_folio() */
-static noinline void check_xa_multi_store_adv_add(struct kunit *test,
+static noinline void check_xa_multi_store_adv_add(struct xarray *xa,
 						  unsigned long index,
 						  unsigned int order,
 						  void *p)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, index);
 	unsigned int nrpages = 1UL << order;
 
@@ -796,12 +761,10 @@ static noinline void check_xa_multi_store_adv_add(struct kunit *test,
 }
 
 /* mimics page_cache_delete() */
-static noinline void check_xa_multi_store_adv_del_entry(struct kunit *test,
+static noinline void check_xa_multi_store_adv_del_entry(struct xarray *xa,
 							unsigned long index,
 							unsigned int order)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, index);
 
 	xas_set_order(&xas, index, order);
@@ -809,14 +772,12 @@ static noinline void check_xa_multi_store_adv_del_entry(struct kunit *test,
 	xas_init_marks(&xas);
 }
 
-static noinline void check_xa_multi_store_adv_delete(struct kunit *test,
+static noinline void check_xa_multi_store_adv_delete(struct xarray *xa,
 						     unsigned long index,
 						     unsigned int order)
 {
-	struct xarray *xa = xa_param(test);
-
 	xa_lock_irq(xa);
-	check_xa_multi_store_adv_del_entry(test, index, order);
+	check_xa_multi_store_adv_del_entry(xa, index, order);
 	xa_unlock_irq(xa);
 }
 
@@ -853,12 +814,10 @@ static unsigned long some_val = 0xdeadbeef;
 static unsigned long some_val_2 = 0xdeaddead;
 
 /* mimics the page cache usage */
-static noinline void check_xa_multi_store_adv(struct kunit *test,
+static noinline void check_xa_multi_store_adv(struct xarray *xa,
 					      unsigned long pos,
 					      unsigned int order)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned int nrpages = 1UL << order;
 	unsigned long index, base, next_index, next_next_index;
 	unsigned int i;
@@ -868,7 +827,7 @@ static noinline void check_xa_multi_store_adv(struct kunit *test,
 	next_index = round_down(base + nrpages, nrpages);
 	next_next_index = round_down(next_index + nrpages, nrpages);
 
-	check_xa_multi_store_adv_add(test, base, order, &some_val);
+	check_xa_multi_store_adv_add(xa, base, order, &some_val);
 
 	for (i = 0; i < nrpages; i++)
 		XA_BUG_ON(xa, test_get_entry(xa, base + i) != &some_val);
@@ -876,20 +835,20 @@ static noinline void check_xa_multi_store_adv(struct kunit *test,
 	XA_BUG_ON(xa, test_get_entry(xa, next_index) != NULL);
 
 	/* Use order 0 for the next item */
-	check_xa_multi_store_adv_add(test, next_index, 0, &some_val_2);
+	check_xa_multi_store_adv_add(xa, next_index, 0, &some_val_2);
 	XA_BUG_ON(xa, test_get_entry(xa, next_index) != &some_val_2);
 
 	/* Remove the next item */
-	check_xa_multi_store_adv_delete(test, next_index, 0);
+	check_xa_multi_store_adv_delete(xa, next_index, 0);
 
 	/* Now use order for a new pointer */
-	check_xa_multi_store_adv_add(test, next_index, order, &some_val_2);
+	check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2);
 
 	for (i = 0; i < nrpages; i++)
 		XA_BUG_ON(xa, test_get_entry(xa, next_index + i) != &some_val_2);
 
-	check_xa_multi_store_adv_delete(test, next_index, order);
-	check_xa_multi_store_adv_delete(test, base, order);
+	check_xa_multi_store_adv_delete(xa, next_index, order);
+	check_xa_multi_store_adv_delete(xa, base, order);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	/* starting fresh again */
@@ -897,7 +856,7 @@ static noinline void check_xa_multi_store_adv(struct kunit *test,
 	/* let's test some holes now */
 
 	/* hole at base and next_next */
-	check_xa_multi_store_adv_add(test, next_index, order, &some_val_2);
+	check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2);
 
 	for (i = 0; i < nrpages; i++)
 		XA_BUG_ON(xa, test_get_entry(xa, base + i) != NULL);
@@ -908,12 +867,12 @@ static noinline void check_xa_multi_store_adv(struct kunit *test,
 	for (i = 0; i < nrpages; i++)
 		XA_BUG_ON(xa, test_get_entry(xa, next_next_index + i) != NULL);
 
-	check_xa_multi_store_adv_delete(test, next_index, order);
+	check_xa_multi_store_adv_delete(xa, next_index, order);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	/* hole at base and next */
 
-	check_xa_multi_store_adv_add(test, next_next_index, order, &some_val_2);
+	check_xa_multi_store_adv_add(xa, next_next_index, order, &some_val_2);
 
 	for (i = 0; i < nrpages; i++)
 		XA_BUG_ON(xa, test_get_entry(xa, base + i) != NULL);
@@ -924,12 +883,12 @@ static noinline void check_xa_multi_store_adv(struct kunit *test,
 	for (i = 0; i < nrpages; i++)
 		XA_BUG_ON(xa, test_get_entry(xa, next_next_index + i) != &some_val_2);
 
-	check_xa_multi_store_adv_delete(test, next_next_index, order);
+	check_xa_multi_store_adv_delete(xa, next_next_index, order);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 #endif
 
-static noinline void check_multi_store_advanced(struct kunit *test)
+static noinline void check_multi_store_advanced(struct xarray *xa)
 {
 #ifdef CONFIG_XARRAY_MULTI
 	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
@@ -941,59 +900,59 @@ static noinline void check_multi_store_advanced(struct kunit *test)
 	 */
 	for (pos = 7; pos < end; pos = (pos * pos) + 564) {
 		for (i = 0; i < max_order; i++) {
-			check_xa_multi_store_adv(test, pos, i);
-			check_xa_multi_store_adv(test, pos + 157, i);
+			check_xa_multi_store_adv(xa, pos, i);
+			check_xa_multi_store_adv(xa, pos + 157, i);
 		}
 	}
 #endif
 }
 
-static noinline void check_xa_alloc_1(struct kunit *test, struct xarray *xa, unsigned int base)
+static noinline void check_xa_alloc_1(struct xarray *xa, unsigned int base)
 {
 	int i;
 	u32 id;
 
 	XA_BUG_ON(xa, !xa_empty(xa));
 	/* An empty array should assign %base to the first alloc */
-	xa_alloc_index(test, xa, base, GFP_KERNEL);
+	xa_alloc_index(xa, base, GFP_KERNEL);
 
 	/* Erasing it should make the array empty again */
-	xa_erase_index(test, xa, base);
+	xa_erase_index(xa, base);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	/* And it should assign %base again */
-	xa_alloc_index(test, xa, base, GFP_KERNEL);
+	xa_alloc_index(xa, base, GFP_KERNEL);
 
 	/* Allocating and then erasing a lot should not lose base */
 	for (i = base + 1; i < 2 * XA_CHUNK_SIZE; i++)
-		xa_alloc_index(test, xa, i, GFP_KERNEL);
+		xa_alloc_index(xa, i, GFP_KERNEL);
 	for (i = base; i < 2 * XA_CHUNK_SIZE; i++)
-		xa_erase_index(test, xa, i);
-	xa_alloc_index(test, xa, base, GFP_KERNEL);
+		xa_erase_index(xa, i);
+	xa_alloc_index(xa, base, GFP_KERNEL);
 
 	/* Destroying the array should do the same as erasing */
 	xa_destroy(xa);
 
 	/* And it should assign %base again */
-	xa_alloc_index(test, xa, base, GFP_KERNEL);
+	xa_alloc_index(xa, base, GFP_KERNEL);
 
 	/* The next assigned ID should be base+1 */
-	xa_alloc_index(test, xa, base + 1, GFP_KERNEL);
-	xa_erase_index(test, xa, base + 1);
+	xa_alloc_index(xa, base + 1, GFP_KERNEL);
+	xa_erase_index(xa, base + 1);
 
 	/* Storing a value should mark it used */
 	xa_store_index(xa, base + 1, GFP_KERNEL);
-	xa_alloc_index(test, xa, base + 2, GFP_KERNEL);
+	xa_alloc_index(xa, base + 2, GFP_KERNEL);
 
 	/* If we then erase base, it should be free */
-	xa_erase_index(test, xa, base);
-	xa_alloc_index(test, xa, base, GFP_KERNEL);
+	xa_erase_index(xa, base);
+	xa_alloc_index(xa, base, GFP_KERNEL);
 
-	xa_erase_index(test, xa, base + 1);
-	xa_erase_index(test, xa, base + 2);
+	xa_erase_index(xa, base + 1);
+	xa_erase_index(xa, base + 2);
 
 	for (i = 1; i < 5000; i++) {
-		xa_alloc_index(test, xa, base + i, GFP_KERNEL);
+		xa_alloc_index(xa, base + i, GFP_KERNEL);
 	}
 
 	xa_destroy(xa);
@@ -1016,14 +975,14 @@ static noinline void check_xa_alloc_1(struct kunit *test, struct xarray *xa, uns
 
 	XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5),
 				GFP_KERNEL) != -EBUSY);
-	XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != 0);
 	XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5),
 				GFP_KERNEL) != -EBUSY);
-	xa_erase_index(test, xa, 3);
+	xa_erase_index(xa, 3);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_xa_alloc_2(struct kunit *test, struct xarray *xa, unsigned int base)
+static noinline void check_xa_alloc_2(struct xarray *xa, unsigned int base)
 {
 	unsigned int i, id;
 	unsigned long index;
@@ -1059,7 +1018,7 @@ static noinline void check_xa_alloc_2(struct kunit *test, struct xarray *xa, uns
 	XA_BUG_ON(xa, id != 5);
 
 	xa_for_each(xa, index, entry) {
-		xa_erase_index(test, xa, index);
+		xa_erase_index(xa, index);
 	}
 
 	for (i = base; i < base + 9; i++) {
@@ -1074,7 +1033,7 @@ static noinline void check_xa_alloc_2(struct kunit *test, struct xarray *xa, uns
 	xa_destroy(xa);
 }
 
-static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, unsigned int base)
+static noinline void check_xa_alloc_3(struct xarray *xa, unsigned int base)
 {
 	struct xa_limit limit = XA_LIMIT(1, 0x3fff);
 	u32 next = 0;
@@ -1090,8 +1049,8 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns
 	XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(0x3ffd), limit,
 				&next, GFP_KERNEL) != 0);
 	XA_BUG_ON(xa, id != 0x3ffd);
-	xa_erase_index(test, xa, 0x3ffd);
-	xa_erase_index(test, xa, 1);
+	xa_erase_index(xa, 0x3ffd);
+	xa_erase_index(xa, 1);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	for (i = 0x3ffe; i < 0x4003; i++) {
@@ -1106,8 +1065,8 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns
 
 	/* Check wrap-around is handled correctly */
 	if (base != 0)
-		xa_erase_index(test, xa, base);
-	xa_erase_index(test, xa, base + 1);
+		xa_erase_index(xa, base);
+	xa_erase_index(xa, base + 1);
 	next = UINT_MAX;
 	XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(UINT_MAX),
 				xa_limit_32b, &next, GFP_KERNEL) != 0);
@@ -1120,7 +1079,7 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns
 	XA_BUG_ON(xa, id != base + 1);
 
 	xa_for_each(xa, index, entry)
-		xa_erase_index(test, xa, index);
+		xa_erase_index(xa, index);
 
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
@@ -1128,21 +1087,19 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns
 static DEFINE_XARRAY_ALLOC(xa0);
 static DEFINE_XARRAY_ALLOC1(xa1);
 
-static noinline void check_xa_alloc(struct kunit *test)
+static noinline void check_xa_alloc(void)
 {
-	check_xa_alloc_1(test, &xa0, 0);
-	check_xa_alloc_1(test, &xa1, 1);
-	check_xa_alloc_2(test, &xa0, 0);
-	check_xa_alloc_2(test, &xa1, 1);
-	check_xa_alloc_3(test, &xa0, 0);
-	check_xa_alloc_3(test, &xa1, 1);
+	check_xa_alloc_1(&xa0, 0);
+	check_xa_alloc_1(&xa1, 1);
+	check_xa_alloc_2(&xa0, 0);
+	check_xa_alloc_2(&xa1, 1);
+	check_xa_alloc_3(&xa0, 0);
+	check_xa_alloc_3(&xa1, 1);
 }
 
-static noinline void __check_store_iter(struct kunit *test, unsigned long start,
+static noinline void __check_store_iter(struct xarray *xa, unsigned long start,
 			unsigned int order, unsigned int present)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE_ORDER(xas, xa, start, order);
 	void *entry;
 	unsigned int count = 0;
@@ -1166,54 +1123,50 @@ retry:
 	XA_BUG_ON(xa, xa_load(xa, start) != xa_mk_index(start));
 	XA_BUG_ON(xa, xa_load(xa, start + (1UL << order) - 1) !=
 			xa_mk_index(start));
-	xa_erase_index(test, xa, start);
+	xa_erase_index(xa, start);
 }
 
-static noinline void check_store_iter(struct kunit *test)
+static noinline void check_store_iter(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned int i, j;
 	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
 
 	for (i = 0; i < max_order; i++) {
 		unsigned int min = 1 << i;
 		unsigned int max = (2 << i) - 1;
-		__check_store_iter(test, 0, i, 0);
+		__check_store_iter(xa, 0, i, 0);
 		XA_BUG_ON(xa, !xa_empty(xa));
-		__check_store_iter(test, min, i, 0);
+		__check_store_iter(xa, min, i, 0);
 		XA_BUG_ON(xa, !xa_empty(xa));
 
 		xa_store_index(xa, min, GFP_KERNEL);
-		__check_store_iter(test, min, i, 1);
+		__check_store_iter(xa, min, i, 1);
 		XA_BUG_ON(xa, !xa_empty(xa));
 		xa_store_index(xa, max, GFP_KERNEL);
-		__check_store_iter(test, min, i, 1);
+		__check_store_iter(xa, min, i, 1);
 		XA_BUG_ON(xa, !xa_empty(xa));
 
 		for (j = 0; j < min; j++)
 			xa_store_index(xa, j, GFP_KERNEL);
-		__check_store_iter(test, 0, i, min);
+		__check_store_iter(xa, 0, i, min);
 		XA_BUG_ON(xa, !xa_empty(xa));
 		for (j = 0; j < min; j++)
 			xa_store_index(xa, min + j, GFP_KERNEL);
-		__check_store_iter(test, min, i, min);
+		__check_store_iter(xa, min, i, min);
 		XA_BUG_ON(xa, !xa_empty(xa));
 	}
 #ifdef CONFIG_XARRAY_MULTI
 	xa_store_index(xa, 63, GFP_KERNEL);
 	xa_store_index(xa, 65, GFP_KERNEL);
-	__check_store_iter(test, 64, 2, 1);
-	xa_erase_index(test, xa, 63);
+	__check_store_iter(xa, 64, 2, 1);
+	xa_erase_index(xa, 63);
 #endif
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_multi_find_1(struct kunit *test, unsigned int order)
+static noinline void check_multi_find_1(struct xarray *xa, unsigned order)
 {
 #ifdef CONFIG_XARRAY_MULTI
-	struct xarray *xa = xa_param(test);
-
 	unsigned long multi = 3 << order;
 	unsigned long next = 4 << order;
 	unsigned long index;
@@ -1236,17 +1189,15 @@ static noinline void check_multi_find_1(struct kunit *test, unsigned int order)
 	XA_BUG_ON(xa, xa_find_after(xa, &index, next, XA_PRESENT) != NULL);
 	XA_BUG_ON(xa, index != next);
 
-	xa_erase_index(test, xa, multi);
-	xa_erase_index(test, xa, next);
-	xa_erase_index(test, xa, next + 1);
+	xa_erase_index(xa, multi);
+	xa_erase_index(xa, next);
+	xa_erase_index(xa, next + 1);
 	XA_BUG_ON(xa, !xa_empty(xa));
 #endif
 }
 
-static noinline void check_multi_find_2(struct kunit *test)
+static noinline void check_multi_find_2(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 10 : 1;
 	unsigned int i, j;
 	void *entry;
@@ -1260,19 +1211,17 @@ static noinline void check_multi_find_2(struct kunit *test)
 					GFP_KERNEL);
 			rcu_read_lock();
 			xas_for_each(&xas, entry, ULONG_MAX) {
-				xa_erase_index(test, xa, index);
+				xa_erase_index(xa, index);
 			}
 			rcu_read_unlock();
-			xa_erase_index(test, xa, index - 1);
+			xa_erase_index(xa, index - 1);
 			XA_BUG_ON(xa, !xa_empty(xa));
 		}
 	}
 }
 
-static noinline void check_multi_find_3(struct kunit *test)
+static noinline void check_multi_find_3(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned int order;
 
 	for (order = 5; order < order_limit; order++) {
@@ -1281,14 +1230,12 @@ static noinline void check_multi_find_3(struct kunit *test)
 		XA_BUG_ON(xa, !xa_empty(xa));
 		xa_store_order(xa, 0, order - 4, xa_mk_index(0), GFP_KERNEL);
 		XA_BUG_ON(xa, xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT));
-		xa_erase_index(test, xa, 0);
+		xa_erase_index(xa, 0);
 	}
 }
 
-static noinline void check_find_1(struct kunit *test)
+static noinline void check_find_1(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned long i, j, k;
 
 	XA_BUG_ON(xa, !xa_empty(xa));
@@ -1325,20 +1272,18 @@ static noinline void check_find_1(struct kunit *test)
 				else
 					XA_BUG_ON(xa, entry != NULL);
 			}
-			xa_erase_index(test, xa, j);
+			xa_erase_index(xa, j);
 			XA_BUG_ON(xa, xa_get_mark(xa, j, XA_MARK_0));
 			XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0));
 		}
-		xa_erase_index(test, xa, i);
+		xa_erase_index(xa, i);
 		XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_0));
 	}
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_find_2(struct kunit *test)
+static noinline void check_find_2(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	void *entry;
 	unsigned long i, j, index;
 
@@ -1358,10 +1303,8 @@ static noinline void check_find_2(struct kunit *test)
 	xa_destroy(xa);
 }
 
-static noinline void check_find_3(struct kunit *test)
+static noinline void check_find_3(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 	unsigned long i, j, k;
 	void *entry;
@@ -1385,10 +1328,8 @@ static noinline void check_find_3(struct kunit *test)
 	xa_destroy(xa);
 }
 
-static noinline void check_find_4(struct kunit *test)
+static noinline void check_find_4(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned long index = 0;
 	void *entry;
 
@@ -1400,22 +1341,22 @@ static noinline void check_find_4(struct kunit *test)
 	entry = xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT);
 	XA_BUG_ON(xa, entry);
 
-	xa_erase_index(test, xa, ULONG_MAX);
+	xa_erase_index(xa, ULONG_MAX);
 }
 
-static noinline void check_find(struct kunit *test)
+static noinline void check_find(struct xarray *xa)
 {
 	unsigned i;
 
-	check_find_1(test);
-	check_find_2(test);
-	check_find_3(test);
-	check_find_4(test);
+	check_find_1(xa);
+	check_find_2(xa);
+	check_find_3(xa);
+	check_find_4(xa);
 
 	for (i = 2; i < 10; i++)
-		check_multi_find_1(test, i);
-	check_multi_find_2(test);
-	check_multi_find_3(test);
+		check_multi_find_1(xa, i);
+	check_multi_find_2(xa);
+	check_multi_find_3(xa);
 }
 
 /* See find_swap_entry() in mm/shmem.c */
@@ -1441,10 +1382,8 @@ static noinline unsigned long xa_find_entry(struct xarray *xa, void *item)
 	return entry ? xas.xa_index : -1;
 }
 
-static noinline void check_find_entry(struct kunit *test)
+static noinline void check_find_entry(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 #ifdef CONFIG_XARRAY_MULTI
 	unsigned int order;
 	unsigned long offset, index;
@@ -1471,14 +1410,12 @@ static noinline void check_find_entry(struct kunit *test)
 	xa_store_index(xa, ULONG_MAX, GFP_KERNEL);
 	XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1);
 	XA_BUG_ON(xa, xa_find_entry(xa, xa_mk_index(ULONG_MAX)) != -1);
-	xa_erase_index(test, xa, ULONG_MAX);
+	xa_erase_index(xa, ULONG_MAX);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_pause(struct kunit *test)
+static noinline void check_pause(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 	void *entry;
 	unsigned int order;
@@ -1548,10 +1485,8 @@ static noinline void check_pause(struct kunit *test)
 
 }
 
-static noinline void check_move_tiny(struct kunit *test)
+static noinline void check_move_tiny(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 
 	XA_BUG_ON(xa, !xa_empty(xa));
@@ -1568,14 +1503,12 @@ static noinline void check_move_tiny(struct kunit *test)
 	XA_BUG_ON(xa, xas_prev(&xas) != xa_mk_index(0));
 	XA_BUG_ON(xa, xas_prev(&xas) != NULL);
 	rcu_read_unlock();
-	xa_erase_index(test, xa, 0);
+	xa_erase_index(xa, 0);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_move_max(struct kunit *test)
+static noinline void check_move_max(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 
 	xa_store_index(xa, ULONG_MAX, GFP_KERNEL);
@@ -1591,14 +1524,12 @@ static noinline void check_move_max(struct kunit *test)
 	XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != NULL);
 	rcu_read_unlock();
 
-	xa_erase_index(test, xa, ULONG_MAX);
+	xa_erase_index(xa, ULONG_MAX);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_move_small(struct kunit *test, unsigned long idx)
+static noinline void check_move_small(struct xarray *xa, unsigned long idx)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 	unsigned long i;
 
@@ -1640,15 +1571,13 @@ static noinline void check_move_small(struct kunit *test, unsigned long idx)
 	XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
 	rcu_read_unlock();
 
-	xa_erase_index(test, xa, 0);
-	xa_erase_index(test, xa, idx);
+	xa_erase_index(xa, 0);
+	xa_erase_index(xa, idx);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_move(struct kunit *test)
+static noinline void check_move(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, (1 << 16) - 1);
 	unsigned long i;
 
@@ -1675,7 +1604,7 @@ static noinline void check_move(struct kunit *test)
 	rcu_read_unlock();
 
 	for (i = (1 << 8); i < (1 << 15); i++)
-		xa_erase_index(test, xa, i);
+		xa_erase_index(xa, i);
 
 	i = xas.xa_index;
 
@@ -1706,17 +1635,17 @@ static noinline void check_move(struct kunit *test)
 
 	xa_destroy(xa);
 
-	check_move_tiny(test);
-	check_move_max(test);
+	check_move_tiny(xa);
+	check_move_max(xa);
 
 	for (i = 0; i < 16; i++)
-		check_move_small(test, 1UL << i);
+		check_move_small(xa, 1UL << i);
 
 	for (i = 2; i < 16; i++)
-		check_move_small(test, (1UL << i) - 1);
+		check_move_small(xa, (1UL << i) - 1);
 }
 
-static noinline void xa_store_many_order(struct kunit *test, struct xarray *xa,
+static noinline void xa_store_many_order(struct xarray *xa,
 		unsigned long index, unsigned order)
 {
 	XA_STATE_ORDER(xas, xa, index, order);
@@ -1739,34 +1668,30 @@ unlock:
 	XA_BUG_ON(xa, xas_error(&xas));
 }
 
-static noinline void check_create_range_1(struct kunit *test,
+static noinline void check_create_range_1(struct xarray *xa,
 		unsigned long index, unsigned order)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned long i;
 
-	xa_store_many_order(test, xa, index, order);
+	xa_store_many_order(xa, index, order);
 	for (i = index; i < index + (1UL << order); i++)
-		xa_erase_index(test, xa, i);
+		xa_erase_index(xa, i);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_create_range_2(struct kunit *test, unsigned int order)
+static noinline void check_create_range_2(struct xarray *xa, unsigned order)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned long i;
 	unsigned long nr = 1UL << order;
 
 	for (i = 0; i < nr * nr; i += nr)
-		xa_store_many_order(test, xa, i, order);
+		xa_store_many_order(xa, i, order);
 	for (i = 0; i < nr * nr; i++)
-		xa_erase_index(test, xa, i);
+		xa_erase_index(xa, i);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_create_range_3(struct kunit *test)
+static noinline void check_create_range_3(void)
 {
 	XA_STATE(xas, NULL, 0);
 	xas_set_err(&xas, -EEXIST);
@@ -1774,11 +1699,9 @@ static noinline void check_create_range_3(struct kunit *test)
 	XA_BUG_ON(NULL, xas_error(&xas) != -EEXIST);
 }
 
-static noinline void check_create_range_4(struct kunit *test,
+static noinline void check_create_range_4(struct xarray *xa,
 		unsigned long index, unsigned order)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE_ORDER(xas, xa, index, order);
 	unsigned long base = xas.xa_index;
 	unsigned long i = 0;
@@ -1804,15 +1727,13 @@ unlock:
 	XA_BUG_ON(xa, xas_error(&xas));
 
 	for (i = base; i < base + (1UL << order); i++)
-		xa_erase_index(test, xa, i);
+		xa_erase_index(xa, i);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_create_range_5(struct kunit *test,
+static noinline void check_create_range_5(struct xarray *xa,
 		unsigned long index, unsigned int order)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE_ORDER(xas, xa, index, order);
 	unsigned int i;
 
@@ -1829,46 +1750,44 @@ static noinline void check_create_range_5(struct kunit *test,
 	xa_destroy(xa);
 }
 
-static noinline void check_create_range(struct kunit *test)
+static noinline void check_create_range(struct xarray *xa)
 {
 	unsigned int order;
 	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 12 : 1;
 
 	for (order = 0; order < max_order; order++) {
-		check_create_range_1(test, 0, order);
-		check_create_range_1(test, 1U << order, order);
-		check_create_range_1(test, 2U << order, order);
-		check_create_range_1(test, 3U << order, order);
-		check_create_range_1(test, 1U << 24, order);
+		check_create_range_1(xa, 0, order);
+		check_create_range_1(xa, 1U << order, order);
+		check_create_range_1(xa, 2U << order, order);
+		check_create_range_1(xa, 3U << order, order);
+		check_create_range_1(xa, 1U << 24, order);
 		if (order < 10)
-			check_create_range_2(test, order);
-
-		check_create_range_4(test, 0, order);
-		check_create_range_4(test, 1U << order, order);
-		check_create_range_4(test, 2U << order, order);
-		check_create_range_4(test, 3U << order, order);
-		check_create_range_4(test, 1U << 24, order);
-
-		check_create_range_4(test, 1, order);
-		check_create_range_4(test, (1U << order) + 1, order);
-		check_create_range_4(test, (2U << order) + 1, order);
-		check_create_range_4(test, (2U << order) - 1, order);
-		check_create_range_4(test, (3U << order) + 1, order);
-		check_create_range_4(test, (3U << order) - 1, order);
-		check_create_range_4(test, (1U << 24) + 1, order);
-
-		check_create_range_5(test, 0, order);
-		check_create_range_5(test, (1U << order), order);
+			check_create_range_2(xa, order);
+
+		check_create_range_4(xa, 0, order);
+		check_create_range_4(xa, 1U << order, order);
+		check_create_range_4(xa, 2U << order, order);
+		check_create_range_4(xa, 3U << order, order);
+		check_create_range_4(xa, 1U << 24, order);
+
+		check_create_range_4(xa, 1, order);
+		check_create_range_4(xa, (1U << order) + 1, order);
+		check_create_range_4(xa, (2U << order) + 1, order);
+		check_create_range_4(xa, (2U << order) - 1, order);
+		check_create_range_4(xa, (3U << order) + 1, order);
+		check_create_range_4(xa, (3U << order) - 1, order);
+		check_create_range_4(xa, (1U << 24) + 1, order);
+
+		check_create_range_5(xa, 0, order);
+		check_create_range_5(xa, (1U << order), order);
 	}
 
-	check_create_range_3(test);
+	check_create_range_3();
 }
 
-static noinline void __check_store_range(struct kunit *test, unsigned long first,
+static noinline void __check_store_range(struct xarray *xa, unsigned long first,
 		unsigned long last)
 {
-	struct xarray *xa = xa_param(test);
-
 #ifdef CONFIG_XARRAY_MULTI
 	xa_store_range(xa, first, last, xa_mk_index(first), GFP_KERNEL);
 
@@ -1883,28 +1802,26 @@ static noinline void __check_store_range(struct kunit *test, unsigned long first
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_store_range(struct kunit *test)
+static noinline void check_store_range(struct xarray *xa)
 {
 	unsigned long i, j;
 
 	for (i = 0; i < 128; i++) {
 		for (j = i; j < 128; j++) {
-			__check_store_range(test, i, j);
-			__check_store_range(test, 128 + i, 128 + j);
-			__check_store_range(test, 4095 + i, 4095 + j);
-			__check_store_range(test, 4096 + i, 4096 + j);
-			__check_store_range(test, 123456 + i, 123456 + j);
-			__check_store_range(test, (1 << 24) + i, (1 << 24) + j);
+			__check_store_range(xa, i, j);
+			__check_store_range(xa, 128 + i, 128 + j);
+			__check_store_range(xa, 4095 + i, 4095 + j);
+			__check_store_range(xa, 4096 + i, 4096 + j);
+			__check_store_range(xa, 123456 + i, 123456 + j);
+			__check_store_range(xa, (1 << 24) + i, (1 << 24) + j);
 		}
 	}
 }
 
 #ifdef CONFIG_XARRAY_MULTI
-static void check_split_1(struct kunit *test, unsigned long index,
+static void check_split_1(struct xarray *xa, unsigned long index,
 				unsigned int order, unsigned int new_order)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE_ORDER(xas, xa, index, new_order);
 	unsigned int i, found;
 	void *entry;
@@ -1940,30 +1857,26 @@ static void check_split_1(struct kunit *test, unsigned long index,
 	xa_destroy(xa);
 }
 
-static noinline void check_split(struct kunit *test)
+static noinline void check_split(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned int order, new_order;
 
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	for (order = 1; order < 2 * XA_CHUNK_SHIFT; order++) {
 		for (new_order = 0; new_order < order; new_order++) {
-			check_split_1(test, 0, order, new_order);
-			check_split_1(test, 1UL << order, order, new_order);
-			check_split_1(test, 3UL << order, order, new_order);
+			check_split_1(xa, 0, order, new_order);
+			check_split_1(xa, 1UL << order, order, new_order);
+			check_split_1(xa, 3UL << order, order, new_order);
 		}
 	}
 }
 #else
-static void check_split(struct kunit *test) { }
+static void check_split(struct xarray *xa) { }
 #endif
 
-static void check_align_1(struct kunit *test, char *name)
+static void check_align_1(struct xarray *xa, char *name)
 {
-	struct xarray *xa = xa_param(test);
-
 	int i;
 	unsigned int id;
 	unsigned long index;
@@ -1983,10 +1896,8 @@ static void check_align_1(struct kunit *test, char *name)
  * We should always be able to store without allocating memory after
  * reserving a slot.
  */
-static void check_align_2(struct kunit *test, char *name)
+static void check_align_2(struct xarray *xa, char *name)
 {
-	struct xarray *xa = xa_param(test);
-
 	int i;
 
 	XA_BUG_ON(xa, !xa_empty(xa));
@@ -2005,15 +1916,15 @@ static void check_align_2(struct kunit *test, char *name)
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
-static noinline void check_align(struct kunit *test)
+static noinline void check_align(struct xarray *xa)
 {
 	char name[] = "Motorola 68000";
 
-	check_align_1(test, name);
-	check_align_1(test, name + 1);
-	check_align_1(test, name + 2);
-	check_align_1(test, name + 3);
-	check_align_2(test, name);
+	check_align_1(xa, name);
+	check_align_1(xa, name + 1);
+	check_align_1(xa, name + 2);
+	check_align_1(xa, name + 3);
+	check_align_2(xa, name);
 }
 
 static LIST_HEAD(shadow_nodes);
@@ -2029,7 +1940,7 @@ static void test_update_node(struct xa_node *node)
 	}
 }
 
-static noinline void shadow_remove(struct kunit *test, struct xarray *xa)
+static noinline void shadow_remove(struct xarray *xa)
 {
 	struct xa_node *node;
 
@@ -2043,17 +1954,8 @@ static noinline void shadow_remove(struct kunit *test, struct xarray *xa)
 	xa_unlock(xa);
 }
 
-struct workingset_testcase {
-	struct xarray *xa;
-	unsigned long index;
-};
-
-static noinline void check_workingset(struct kunit *test)
+static noinline void check_workingset(struct xarray *xa, unsigned long index)
 {
-	struct workingset_testcase tc = *(struct workingset_testcase *)test->param_value;
-	struct xarray *xa = tc.xa;
-	unsigned long index = tc.index;
-
 	XA_STATE(xas, xa, index);
 	xas_set_update(&xas, test_update_node);
 
@@ -2076,7 +1978,7 @@ static noinline void check_workingset(struct kunit *test)
 	xas_unlock(&xas);
 	XA_BUG_ON(xa, list_empty(&shadow_nodes));
 
-	shadow_remove(test, xa);
+	shadow_remove(xa);
 	XA_BUG_ON(xa, !list_empty(&shadow_nodes));
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
@@ -2085,11 +1987,9 @@ static noinline void check_workingset(struct kunit *test)
  * Check that the pointer / value / sibling entries are accounted the
  * way we expect them to be.
  */
-static noinline void check_account(struct kunit *test)
+static noinline void check_account(struct xarray *xa)
 {
 #ifdef CONFIG_XARRAY_MULTI
-	struct xarray *xa = xa_param(test);
-
 	unsigned int order;
 
 	for (order = 1; order < 12; order++) {
@@ -2116,10 +2016,8 @@ static noinline void check_account(struct kunit *test)
 #endif
 }
 
-static noinline void check_get_order(struct kunit *test)
+static noinline void check_get_order(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
 	unsigned int order;
 	unsigned long i, j;
@@ -2138,10 +2036,8 @@ static noinline void check_get_order(struct kunit *test)
 	}
 }
 
-static noinline void check_xas_get_order(struct kunit *test)
+static noinline void check_xas_get_order(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 
 	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
@@ -2173,10 +2069,8 @@ static noinline void check_xas_get_order(struct kunit *test)
 	}
 }
 
-static noinline void check_xas_conflict_get_order(struct kunit *test)
+static noinline void check_xas_conflict_get_order(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	XA_STATE(xas, xa, 0);
 
 	void *entry;
@@ -2233,10 +2127,8 @@ static noinline void check_xas_conflict_get_order(struct kunit *test)
 }
 
 
-static noinline void check_destroy(struct kunit *test)
+static noinline void check_destroy(struct xarray *xa)
 {
-	struct xarray *xa = xa_param(test);
-
 	unsigned long index;
 
 	XA_BUG_ON(xa, !xa_empty(xa));
@@ -2269,59 +2161,52 @@ static noinline void check_destroy(struct kunit *test)
 }
 
 static DEFINE_XARRAY(array);
-static struct xarray *arrays[] = { &array };
-KUNIT_ARRAY_PARAM(array, arrays, NULL);
-
-static struct xarray *xa0s[] = { &xa0 };
-KUNIT_ARRAY_PARAM(xa0, xa0s, NULL);
-
-static struct workingset_testcase workingset_testcases[] = {
-	{ &array, 0 },
-	{ &array, 64 },
-	{ &array, 4096 },
-};
-KUNIT_ARRAY_PARAM(workingset, workingset_testcases, NULL);
-
-static struct kunit_case xarray_cases[] = {
-	KUNIT_CASE_PARAM(check_xa_err, array_gen_params),
-	KUNIT_CASE_PARAM(check_xas_retry, array_gen_params),
-	KUNIT_CASE_PARAM(check_xa_load, array_gen_params),
-	KUNIT_CASE_PARAM(check_xa_mark, array_gen_params),
-	KUNIT_CASE_PARAM(check_xa_shrink, array_gen_params),
-	KUNIT_CASE_PARAM(check_xas_erase, array_gen_params),
-	KUNIT_CASE_PARAM(check_insert, array_gen_params),
-	KUNIT_CASE_PARAM(check_cmpxchg, array_gen_params),
-	KUNIT_CASE_PARAM(check_cmpxchg_order, array_gen_params),
-	KUNIT_CASE_PARAM(check_reserve, array_gen_params),
-	KUNIT_CASE_PARAM(check_reserve, xa0_gen_params),
-	KUNIT_CASE_PARAM(check_multi_store, array_gen_params),
-	KUNIT_CASE_PARAM(check_multi_store_advanced, array_gen_params),
-	KUNIT_CASE_PARAM(check_get_order, array_gen_params),
-	KUNIT_CASE_PARAM(check_xas_get_order, array_gen_params),
-	KUNIT_CASE_PARAM(check_xas_conflict_get_order, array_gen_params),
-	KUNIT_CASE(check_xa_alloc),
-	KUNIT_CASE_PARAM(check_find, array_gen_params),
-	KUNIT_CASE_PARAM(check_find_entry, array_gen_params),
-	KUNIT_CASE_PARAM(check_pause, array_gen_params),
-	KUNIT_CASE_PARAM(check_account, array_gen_params),
-	KUNIT_CASE_PARAM(check_destroy, array_gen_params),
-	KUNIT_CASE_PARAM(check_move, array_gen_params),
-	KUNIT_CASE_PARAM(check_create_range, array_gen_params),
-	KUNIT_CASE_PARAM(check_store_range, array_gen_params),
-	KUNIT_CASE_PARAM(check_store_iter, array_gen_params),
-	KUNIT_CASE_PARAM(check_align, xa0_gen_params),
-	KUNIT_CASE_PARAM(check_split, array_gen_params),
-	KUNIT_CASE_PARAM(check_workingset, workingset_gen_params),
-	{},
-};
-
-static struct kunit_suite xarray_suite = {
-	.name = "xarray",
-	.test_cases = xarray_cases,
-};
-
-kunit_test_suite(xarray_suite);
 
+static int xarray_checks(void)
+{
+	check_xa_err(&array);
+	check_xas_retry(&array);
+	check_xa_load(&array);
+	check_xa_mark(&array);
+	check_xa_shrink(&array);
+	check_xas_erase(&array);
+	check_insert(&array);
+	check_cmpxchg(&array);
+	check_cmpxchg_order(&array);
+	check_reserve(&array);
+	check_reserve(&xa0);
+	check_multi_store(&array);
+	check_multi_store_advanced(&array);
+	check_get_order(&array);
+	check_xas_get_order(&array);
+	check_xas_conflict_get_order(&array);
+	check_xa_alloc();
+	check_find(&array);
+	check_find_entry(&array);
+	check_pause(&array);
+	check_account(&array);
+	check_destroy(&array);
+	check_move(&array);
+	check_create_range(&array);
+	check_store_range(&array);
+	check_store_iter(&array);
+	check_align(&xa0);
+	check_split(&array);
+
+	check_workingset(&array, 0);
+	check_workingset(&array, 64);
+	check_workingset(&array, 4096);
+
+	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
+	return (tests_run == tests_passed) ? 0 : -EINVAL;
+}
+
+static void xarray_exit(void)
+{
+}
+
+module_init(xarray_checks);
+module_exit(xarray_exit);
 MODULE_AUTHOR("Matthew Wilcox <willy@infradead.org>");
 MODULE_DESCRIPTION("XArray API test module");
 MODULE_LICENSE("GPL");
-- 
2.50.1


From e5b2a356dc8a88708d97bd47cca3b8f7ed7af6cb Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 30 Jan 2025 16:16:20 -0800
Subject: [PATCH 05/16] MAINTAINERS: include linux-mm for xarray maintenance

MM developers have an interest in the xarray code.

Cc: David Gow <davidgow@google.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Tamir Duberstein <tamird@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index f52a004982c9..ab7463b2f165 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -25729,6 +25729,7 @@ F:	arch/x86/entry/vdso/
 XARRAY
 M:	Matthew Wilcox <willy@infradead.org>
 L:	linux-fsdevel@vger.kernel.org
+L:	linux-mm@kvack.org
 S:	Supported
 F:	Documentation/core-api/xarray.rst
 F:	include/linux/idr.h
-- 
2.50.1


From 2c4627c8ced77855b106c7104ecab70837d53799 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sun, 2 Feb 2025 10:43:02 -0600
Subject: [PATCH 06/16] tools/power turbostat: version 2025.02.02

Summary of Changes since 2024.11.30:

Fix regression in 2023.11.07 that affinitized forked child
in one-shot mode.

Harden one-shot mode against hotplug online/offline

Enable RAPL SysWatt column by default.

Add initial PTL, CWF platform support.

Harden initial PMT code in response to early use.

Enable first built-in PMT counter: CWF c1e residency

Refuse to run on unsupported platforms without --force,
to encourage updating to a version that supports the system,
and to avoid no-so-useful measurement results.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 364a44a7d7ae..8d5011a0bf60 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -9559,7 +9559,7 @@ int get_and_dump_counters(void)
 
 void print_version()
 {
-	fprintf(outf, "turbostat version 2025.01.14 - Len Brown <lenb@kernel.org>\n");
+	fprintf(outf, "turbostat version 2025.02.02 - Len Brown <lenb@kernel.org>\n");
 }
 
 #define COMMAND_LINE_SIZE 2048
-- 
2.50.1


From 2014c95afecee3e76ca4a56956a936e23283f05b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 2 Feb 2025 15:39:26 -0800
Subject: [PATCH 07/16] Linux 6.14-rc1

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 4117cc79748b..9e0d63d9d94b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 6
-PATCHLEVEL = 13
+PATCHLEVEL = 14
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
 NAME = Baby Opossum Posse
 
 # *DOCUMENTATION*
-- 
2.50.1


From b944249bcea97f2f6229852ae3f05f7acdcb0681 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 29 Jan 2025 17:57:59 +0100
Subject: [PATCH 08/16] fsnotify: add mount notification infrastructure

This is just the plumbing between the event source (fs/namespace.c) and the
event consumer (fanotify).  In itself it does nothing.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Link: https://lore.kernel.org/r/20250129165803.72138-2-mszeredi@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/mount.h                       |  4 +++
 fs/notify/fsnotify.c             | 47 +++++++++++++++++++++++++++-----
 fs/notify/fsnotify.h             | 11 ++++++++
 fs/notify/mark.c                 | 14 ++++++++--
 include/linux/fsnotify.h         | 20 ++++++++++++++
 include/linux/fsnotify_backend.h | 42 ++++++++++++++++++++++++++++
 6 files changed, 128 insertions(+), 10 deletions(-)

diff --git a/fs/mount.h b/fs/mount.h
index ffb613cdfeee..82aa3bad7cf5 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -21,6 +21,10 @@ struct mnt_namespace {
 		struct rcu_head		mnt_ns_rcu;
 	};
 	u64 event;
+#ifdef CONFIG_FSNOTIFY
+	__u32			n_fsnotify_mask;
+	struct fsnotify_mark_connector __rcu *n_fsnotify_marks;
+#endif
 	unsigned int		nr_mounts; /* # of mounts in the namespace */
 	unsigned int		pending_mounts;
 	struct rb_node		mnt_ns_tree_node; /* node in the mnt_ns_tree */
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 8ee495a58d0a..c64b95cf50c7 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -28,6 +28,11 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
 	fsnotify_clear_marks_by_mount(mnt);
 }
 
+void __fsnotify_mntns_delete(struct mnt_namespace *mntns)
+{
+	fsnotify_clear_marks_by_mntns(mntns);
+}
+
 /**
  * fsnotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
  * @sb: superblock being unmounted.
@@ -420,7 +425,7 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
 				     file_name, cookie, iter_info);
 }
 
-static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp)
+static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector *const *connp)
 {
 	struct fsnotify_mark_connector *conn;
 	struct hlist_node *node = NULL;
@@ -538,14 +543,15 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 {
 	const struct path *path = fsnotify_data_path(data, data_type);
 	struct super_block *sb = fsnotify_data_sb(data, data_type);
-	struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
+	const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type);
+	struct fsnotify_sb_info *sbinfo = sb ? fsnotify_sb_info(sb) : NULL;
 	struct fsnotify_iter_info iter_info = {};
 	struct mount *mnt = NULL;
 	struct inode *inode2 = NULL;
 	struct dentry *moved;
 	int inode2_type;
 	int ret = 0;
-	__u32 test_mask, marks_mask;
+	__u32 test_mask, marks_mask = 0;
 
 	if (path)
 		mnt = real_mount(path->mnt);
@@ -578,17 +584,20 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 	if ((!sbinfo || !sbinfo->sb_marks) &&
 	    (!mnt || !mnt->mnt_fsnotify_marks) &&
 	    (!inode || !inode->i_fsnotify_marks) &&
-	    (!inode2 || !inode2->i_fsnotify_marks))
+	    (!inode2 || !inode2->i_fsnotify_marks) &&
+	    (!mnt_data || !mnt_data->ns->n_fsnotify_marks))
 		return 0;
 
-	marks_mask = READ_ONCE(sb->s_fsnotify_mask);
+	if (sb)
+		marks_mask |= READ_ONCE(sb->s_fsnotify_mask);
 	if (mnt)
 		marks_mask |= READ_ONCE(mnt->mnt_fsnotify_mask);
 	if (inode)
 		marks_mask |= READ_ONCE(inode->i_fsnotify_mask);
 	if (inode2)
 		marks_mask |= READ_ONCE(inode2->i_fsnotify_mask);
-
+	if (mnt_data)
+		marks_mask |= READ_ONCE(mnt_data->ns->n_fsnotify_mask);
 
 	/*
 	 * If this is a modify event we may need to clear some ignore masks.
@@ -618,6 +627,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 		iter_info.marks[inode2_type] =
 			fsnotify_first_mark(&inode2->i_fsnotify_marks);
 	}
+	if (mnt_data) {
+		iter_info.marks[FSNOTIFY_ITER_TYPE_MNTNS] =
+			fsnotify_first_mark(&mnt_data->ns->n_fsnotify_marks);
+	}
 
 	/*
 	 * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
@@ -702,11 +715,31 @@ void file_set_fsnotify_mode(struct file *file)
 }
 #endif
 
+void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt)
+{
+	struct fsnotify_mnt data = {
+		.ns = ns,
+		.mnt_id = real_mount(mnt)->mnt_id_unique,
+	};
+
+	if (WARN_ON_ONCE(!ns))
+		return;
+
+	/*
+	 * This is an optimization as well as making sure fsnotify_init() has
+	 * been called.
+	 */
+	if (!ns->n_fsnotify_marks)
+		return;
+
+	fsnotify(mask, &data, FSNOTIFY_EVENT_MNT, NULL, NULL, NULL, 0);
+}
+
 static __init int fsnotify_init(void)
 {
 	int ret;
 
-	BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 24);
+	BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 26);
 
 	ret = init_srcu_struct(&fsnotify_mark_srcu);
 	if (ret)
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 663759ed6fbc..5950c7a67f41 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -33,6 +33,12 @@ static inline struct super_block *fsnotify_conn_sb(
 	return conn->obj;
 }
 
+static inline struct mnt_namespace *fsnotify_conn_mntns(
+				struct fsnotify_mark_connector *conn)
+{
+	return conn->obj;
+}
+
 static inline struct super_block *fsnotify_object_sb(void *obj,
 			enum fsnotify_obj_type obj_type)
 {
@@ -89,6 +95,11 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
 	fsnotify_destroy_marks(fsnotify_sb_marks(sb));
 }
 
+static inline void fsnotify_clear_marks_by_mntns(struct mnt_namespace *mntns)
+{
+	fsnotify_destroy_marks(&mntns->n_fsnotify_marks);
+}
+
 /*
  * update the dentry->d_flags of all of inode's children to indicate if inode cares
  * about events that happen to its children.
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 4981439e6209..798340db69d7 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -107,6 +107,8 @@ static fsnotify_connp_t *fsnotify_object_connp(void *obj,
 		return &real_mount(obj)->mnt_fsnotify_marks;
 	case FSNOTIFY_OBJ_TYPE_SB:
 		return fsnotify_sb_marks(obj);
+	case FSNOTIFY_OBJ_TYPE_MNTNS:
+		return &((struct mnt_namespace *)obj)->n_fsnotify_marks;
 	default:
 		return NULL;
 	}
@@ -120,6 +122,8 @@ static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn)
 		return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask;
 	else if (conn->type == FSNOTIFY_OBJ_TYPE_SB)
 		return &fsnotify_conn_sb(conn)->s_fsnotify_mask;
+	else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS)
+		return &fsnotify_conn_mntns(conn)->n_fsnotify_mask;
 	return NULL;
 }
 
@@ -346,12 +350,15 @@ static void *fsnotify_detach_connector_from_object(
 		fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
 	} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
 		fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
+	} else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS) {
+		fsnotify_conn_mntns(conn)->n_fsnotify_mask = 0;
 	}
 
 	rcu_assign_pointer(*connp, NULL);
 	conn->obj = NULL;
 	conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
-	fsnotify_update_sb_watchers(sb, conn);
+	if (sb)
+		fsnotify_update_sb_watchers(sb, conn);
 
 	return inode;
 }
@@ -724,7 +731,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj,
 	 * Attach the sb info before attaching a connector to any object on sb.
 	 * The sb info will remain attached as long as sb lives.
 	 */
-	if (!fsnotify_sb_info(sb)) {
+	if (sb && !fsnotify_sb_info(sb)) {
 		err = fsnotify_attach_info_to_sb(sb);
 		if (err)
 			return err;
@@ -770,7 +777,8 @@ restart:
 	/* mark should be the last entry.  last is the current last entry */
 	hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
 added:
-	fsnotify_update_sb_watchers(sb, conn);
+	if (sb)
+		fsnotify_update_sb_watchers(sb, conn);
 	/*
 	 * Since connector is attached to object using cmpxchg() we are
 	 * guaranteed that connector initialization is fully visible by anyone
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 1a9ef8f6784d..589e274adc7d 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -299,6 +299,11 @@ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt)
 	__fsnotify_vfsmount_delete(mnt);
 }
 
+static inline void fsnotify_mntns_delete(struct mnt_namespace *mntns)
+{
+	__fsnotify_mntns_delete(mntns);
+}
+
 /*
  * fsnotify_inoderemove - an inode is going away
  */
@@ -507,4 +512,19 @@ static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode,
 			NULL, NULL, NULL, 0);
 }
 
+static inline void fsnotify_mnt_attach(struct mnt_namespace *ns, struct vfsmount *mnt)
+{
+	fsnotify_mnt(FS_MNT_ATTACH, ns, mnt);
+}
+
+static inline void fsnotify_mnt_detach(struct mnt_namespace *ns, struct vfsmount *mnt)
+{
+	fsnotify_mnt(FS_MNT_DETACH, ns, mnt);
+}
+
+static inline void fsnotify_mnt_move(struct mnt_namespace *ns, struct vfsmount *mnt)
+{
+	fsnotify_mnt(FS_MNT_MOVE, ns, mnt);
+}
+
 #endif	/* _LINUX_FS_NOTIFY_H */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 0d24a21a8e60..6cd8d1d28b8b 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -59,6 +59,10 @@
 
 #define FS_PRE_ACCESS		0x00100000	/* Pre-content access hook */
 
+#define FS_MNT_ATTACH		0x01000000	/* Mount was attached */
+#define FS_MNT_DETACH		0x02000000	/* Mount was detached */
+#define FS_MNT_MOVE		(FS_MNT_ATTACH | FS_MNT_DETACH)
+
 /*
  * Set on inode mark that cares about things that happen to its children.
  * Always set for dnotify and inotify.
@@ -80,6 +84,9 @@
  */
 #define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME)
 
+/* Mount namespace events */
+#define FSNOTIFY_MNT_EVENTS (FS_MNT_ATTACH | FS_MNT_DETACH)
+
 /* Content events can be used to inspect file content */
 #define FSNOTIFY_CONTENT_PERM_EVENTS (FS_OPEN_PERM | FS_OPEN_EXEC_PERM | \
 				      FS_ACCESS_PERM)
@@ -108,6 +115,7 @@
 
 /* Events that can be reported to backends */
 #define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \
+			     FSNOTIFY_MNT_EVENTS | \
 			     FS_EVENTS_POSS_ON_CHILD | \
 			     FS_DELETE_SELF | FS_MOVE_SELF | \
 			     FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
@@ -298,6 +306,7 @@ enum fsnotify_data_type {
 	FSNOTIFY_EVENT_PATH,
 	FSNOTIFY_EVENT_INODE,
 	FSNOTIFY_EVENT_DENTRY,
+	FSNOTIFY_EVENT_MNT,
 	FSNOTIFY_EVENT_ERROR,
 };
 
@@ -318,6 +327,11 @@ static inline const struct path *file_range_path(const struct file_range *range)
 	return range->path;
 }
 
+struct fsnotify_mnt {
+	const struct mnt_namespace *ns;
+	u64 mnt_id;
+};
+
 static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
 {
 	switch (data_type) {
@@ -383,6 +397,24 @@ static inline struct super_block *fsnotify_data_sb(const void *data,
 	}
 }
 
+static inline const struct fsnotify_mnt *fsnotify_data_mnt(const void *data,
+							   int data_type)
+{
+	switch (data_type) {
+	case FSNOTIFY_EVENT_MNT:
+		return data;
+	default:
+		return NULL;
+	}
+}
+
+static inline u64 fsnotify_data_mnt_id(const void *data, int data_type)
+{
+	const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type);
+
+	return mnt_data ? mnt_data->mnt_id : 0;
+}
+
 static inline struct fs_error_report *fsnotify_data_error_report(
 							const void *data,
 							int data_type)
@@ -420,6 +452,7 @@ enum fsnotify_iter_type {
 	FSNOTIFY_ITER_TYPE_SB,
 	FSNOTIFY_ITER_TYPE_PARENT,
 	FSNOTIFY_ITER_TYPE_INODE2,
+	FSNOTIFY_ITER_TYPE_MNTNS,
 	FSNOTIFY_ITER_TYPE_COUNT
 };
 
@@ -429,6 +462,7 @@ enum fsnotify_obj_type {
 	FSNOTIFY_OBJ_TYPE_INODE,
 	FSNOTIFY_OBJ_TYPE_VFSMOUNT,
 	FSNOTIFY_OBJ_TYPE_SB,
+	FSNOTIFY_OBJ_TYPE_MNTNS,
 	FSNOTIFY_OBJ_TYPE_COUNT,
 	FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT
 };
@@ -613,8 +647,10 @@ extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data
 extern void __fsnotify_inode_delete(struct inode *inode);
 extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt);
 extern void fsnotify_sb_delete(struct super_block *sb);
+extern void __fsnotify_mntns_delete(struct mnt_namespace *mntns);
 extern void fsnotify_sb_free(struct super_block *sb);
 extern u32 fsnotify_get_cookie(void);
+extern void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt);
 
 static inline __u32 fsnotify_parent_needed_mask(__u32 mask)
 {
@@ -928,6 +964,9 @@ static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
 static inline void fsnotify_sb_delete(struct super_block *sb)
 {}
 
+static inline void __fsnotify_mntns_delete(struct mnt_namespace *mntns)
+{}
+
 static inline void fsnotify_sb_free(struct super_block *sb)
 {}
 
@@ -942,6 +981,9 @@ static inline u32 fsnotify_get_cookie(void)
 static inline void fsnotify_unmount_inodes(struct super_block *sb)
 {}
 
+static inline void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt)
+{}
+
 #endif	/* CONFIG_FSNOTIFY */
 
 #endif	/* __KERNEL __ */
-- 
2.50.1


From 0f46d81f2bce970b1c562aa3c944a271bbec2729 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 29 Jan 2025 17:58:00 +0100
Subject: [PATCH 09/16] fanotify: notify on mount attach and detach

Add notifications for attaching and detaching mounts.  The following new
event masks are added:

  FAN_MNT_ATTACH  - Mount was attached
  FAN_MNT_DETACH  - Mount was detached

If a mount is moved, then the event is reported with (FAN_MNT_ATTACH |
FAN_MNT_DETACH).

These events add an info record of type FAN_EVENT_INFO_TYPE_MNT containing
these fields identifying the affected mounts:

  __u64 mnt_id    - the ID of the mount (see statmount(2))

FAN_REPORT_MNT must be supplied to fanotify_init() to receive these events
and no other type of event can be received with this report type.

Marks are added with FAN_MARK_MNTNS, which records the mount namespace from
an nsfs file (e.g. /proc/self/ns/mnt).

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Link: https://lore.kernel.org/r/20250129165803.72138-3-mszeredi@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/mount.h                         |  2 +
 fs/namespace.c                     | 14 ++++-
 fs/notify/fanotify/fanotify.c      | 38 ++++++++++++-
 fs/notify/fanotify/fanotify.h      | 18 ++++++
 fs/notify/fanotify/fanotify_user.c | 89 +++++++++++++++++++++++++-----
 fs/notify/fdinfo.c                 |  5 ++
 include/linux/fanotify.h           | 12 ++--
 include/uapi/linux/fanotify.h      | 10 ++++
 8 files changed, 164 insertions(+), 24 deletions(-)

diff --git a/fs/mount.h b/fs/mount.h
index 82aa3bad7cf5..5324a931b403 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -181,3 +181,5 @@ static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
 {
 	return container_of(ns, struct mnt_namespace, ns);
 }
+
+struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);
diff --git a/fs/namespace.c b/fs/namespace.c
index a3ed3f2980cb..bc77b2e0130d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2145,16 +2145,24 @@ struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool pr
 	}
 }
 
+struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry)
+{
+	if (!is_mnt_ns_file(dentry))
+		return NULL;
+
+	return to_mnt_ns(get_proc_ns(dentry->d_inode));
+}
+
 static bool mnt_ns_loop(struct dentry *dentry)
 {
 	/* Could bind mounting the mount namespace inode cause a
 	 * mount namespace loop?
 	 */
-	struct mnt_namespace *mnt_ns;
-	if (!is_mnt_ns_file(dentry))
+	struct mnt_namespace *mnt_ns = mnt_ns_from_dentry(dentry);
+
+	if (!mnt_ns)
 		return false;
 
-	mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
 	return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
 }
 
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 95646f7c46ca..6d386080faf2 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -166,6 +166,8 @@ static bool fanotify_should_merge(struct fanotify_event *old,
 	case FANOTIFY_EVENT_TYPE_FS_ERROR:
 		return fanotify_error_event_equal(FANOTIFY_EE(old),
 						  FANOTIFY_EE(new));
+	case FANOTIFY_EVENT_TYPE_MNT:
+		return false;
 	default:
 		WARN_ON_ONCE(1);
 	}
@@ -312,7 +314,10 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
 	pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
 		 __func__, iter_info->report_mask, event_mask, data, data_type);
 
-	if (!fid_mode) {
+	if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
+		if (data_type != FSNOTIFY_EVENT_MNT)
+			return 0;
+	} else if (!fid_mode) {
 		/* Do we have path to open a file descriptor? */
 		if (!path)
 			return 0;
@@ -557,6 +562,20 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
 	return &pevent->fae;
 }
 
+static struct fanotify_event *fanotify_alloc_mnt_event(u64 mnt_id, gfp_t gfp)
+{
+	struct fanotify_mnt_event *pevent;
+
+	pevent = kmem_cache_alloc(fanotify_mnt_event_cachep, gfp);
+	if (!pevent)
+		return NULL;
+
+	pevent->fae.type = FANOTIFY_EVENT_TYPE_MNT;
+	pevent->mnt_id = mnt_id;
+
+	return &pevent->fae;
+}
+
 static struct fanotify_event *fanotify_alloc_perm_event(const void *data,
 							int data_type,
 							gfp_t gfp)
@@ -731,6 +750,7 @@ static struct fanotify_event *fanotify_alloc_event(
 					      fid_mode);
 	struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
 	const struct path *path = fsnotify_data_path(data, data_type);
+	u64 mnt_id = fsnotify_data_mnt_id(data, data_type);
 	struct mem_cgroup *old_memcg;
 	struct dentry *moved = NULL;
 	struct inode *child = NULL;
@@ -826,8 +846,12 @@ static struct fanotify_event *fanotify_alloc_event(
 						  moved, &hash, gfp);
 	} else if (fid_mode) {
 		event = fanotify_alloc_fid_event(id, fsid, &hash, gfp);
-	} else {
+	} else if (path) {
 		event = fanotify_alloc_path_event(path, &hash, gfp);
+	} else if (mnt_id) {
+		event = fanotify_alloc_mnt_event(mnt_id, gfp);
+	} else {
+		WARN_ON_ONCE(1);
 	}
 
 	if (!event)
@@ -927,7 +951,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
 	BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
 	BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS);
 
-	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22);
+	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 24);
 
 	mask = fanotify_group_event_mask(group, iter_info, &match_mask,
 					 mask, data, data_type, dir);
@@ -1028,6 +1052,11 @@ static void fanotify_free_error_event(struct fsnotify_group *group,
 	mempool_free(fee, &group->fanotify_data.error_events_pool);
 }
 
+static void fanotify_free_mnt_event(struct fanotify_event *event)
+{
+	kmem_cache_free(fanotify_mnt_event_cachep, FANOTIFY_ME(event));
+}
+
 static void fanotify_free_event(struct fsnotify_group *group,
 				struct fsnotify_event *fsn_event)
 {
@@ -1054,6 +1083,9 @@ static void fanotify_free_event(struct fsnotify_group *group,
 	case FANOTIFY_EVENT_TYPE_FS_ERROR:
 		fanotify_free_error_event(group, event);
 		break;
+	case FANOTIFY_EVENT_TYPE_MNT:
+		fanotify_free_mnt_event(event);
+		break;
 	default:
 		WARN_ON_ONCE(1);
 	}
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index c12cbc270539..b44e70e44be6 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -9,6 +9,7 @@ extern struct kmem_cache *fanotify_mark_cache;
 extern struct kmem_cache *fanotify_fid_event_cachep;
 extern struct kmem_cache *fanotify_path_event_cachep;
 extern struct kmem_cache *fanotify_perm_event_cachep;
+extern struct kmem_cache *fanotify_mnt_event_cachep;
 
 /* Possible states of the permission event */
 enum {
@@ -244,6 +245,7 @@ enum fanotify_event_type {
 	FANOTIFY_EVENT_TYPE_PATH_PERM,
 	FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
 	FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */
+	FANOTIFY_EVENT_TYPE_MNT,
 	__FANOTIFY_EVENT_TYPE_NUM
 };
 
@@ -409,12 +411,23 @@ struct fanotify_path_event {
 	struct path path;
 };
 
+struct fanotify_mnt_event {
+	struct fanotify_event fae;
+	u64 mnt_id;
+};
+
 static inline struct fanotify_path_event *
 FANOTIFY_PE(struct fanotify_event *event)
 {
 	return container_of(event, struct fanotify_path_event, fae);
 }
 
+static inline struct fanotify_mnt_event *
+FANOTIFY_ME(struct fanotify_event *event)
+{
+	return container_of(event, struct fanotify_mnt_event, fae);
+}
+
 /*
  * Structure for permission fanotify events. It gets allocated and freed in
  * fanotify_handle_event() since we wait there for user response. When the
@@ -466,6 +479,11 @@ static inline bool fanotify_is_error_event(u32 mask)
 	return mask & FAN_FS_ERROR;
 }
 
+static inline bool fanotify_is_mnt_event(u32 mask)
+{
+	return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH);
+}
+
 static inline const struct path *fanotify_event_path(struct fanotify_event *event)
 {
 	if (event->type == FANOTIFY_EVENT_TYPE_PATH)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index ba3e2d09eb44..f2d840ae4ded 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -113,6 +113,7 @@ struct kmem_cache *fanotify_mark_cache __ro_after_init;
 struct kmem_cache *fanotify_fid_event_cachep __ro_after_init;
 struct kmem_cache *fanotify_path_event_cachep __ro_after_init;
 struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
+struct kmem_cache *fanotify_mnt_event_cachep __ro_after_init;
 
 #define FANOTIFY_EVENT_ALIGN 4
 #define FANOTIFY_FID_INFO_HDR_LEN \
@@ -123,6 +124,8 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
 	(sizeof(struct fanotify_event_info_error))
 #define FANOTIFY_RANGE_INFO_LEN \
 	(sizeof(struct fanotify_event_info_range))
+#define FANOTIFY_MNT_INFO_LEN \
+	(sizeof(struct fanotify_event_info_mnt))
 
 static int fanotify_fid_info_len(int fh_len, int name_len)
 {
@@ -178,6 +181,8 @@ static size_t fanotify_event_len(unsigned int info_mode,
 		fh_len = fanotify_event_object_fh_len(event);
 		event_len += fanotify_fid_info_len(fh_len, dot_len);
 	}
+	if (fanotify_is_mnt_event(event->mask))
+		event_len += FANOTIFY_MNT_INFO_LEN;
 
 	if (info_mode & FAN_REPORT_PIDFD)
 		event_len += FANOTIFY_PIDFD_INFO_LEN;
@@ -405,6 +410,25 @@ static int process_access_response(struct fsnotify_group *group,
 	return -ENOENT;
 }
 
+static size_t copy_mnt_info_to_user(struct fanotify_event *event,
+				    char __user *buf, int count)
+{
+	struct fanotify_event_info_mnt info = { };
+
+	info.hdr.info_type = FAN_EVENT_INFO_TYPE_MNT;
+	info.hdr.len = FANOTIFY_MNT_INFO_LEN;
+
+	if (WARN_ON(count < info.hdr.len))
+		return -EFAULT;
+
+	info.mnt_id = FANOTIFY_ME(event)->mnt_id;
+
+	if (copy_to_user(buf, &info, sizeof(info)))
+		return -EFAULT;
+
+	return info.hdr.len;
+}
+
 static size_t copy_error_info_to_user(struct fanotify_event *event,
 				      char __user *buf, int count)
 {
@@ -700,6 +724,15 @@ static int copy_info_records_to_user(struct fanotify_event *event,
 		total_bytes += ret;
 	}
 
+	if (fanotify_is_mnt_event(event->mask)) {
+		ret = copy_mnt_info_to_user(event, buf, count);
+		if (ret < 0)
+			return ret;
+		buf += ret;
+		count -= ret;
+		total_bytes += ret;
+	}
+
 	return total_bytes;
 }
 
@@ -1508,6 +1541,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID))
 		return -EINVAL;
 
+	/* Don't allow mixing mnt events with inode events for now */
+	if (flags & FAN_REPORT_MNT) {
+		if (class != FAN_CLASS_NOTIF)
+			return -EINVAL;
+		if (flags & (FANOTIFY_FID_BITS | FAN_REPORT_FD_ERROR))
+			return -EINVAL;
+	}
+
 	if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS)
 		return -EINVAL;
 
@@ -1767,7 +1808,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 			    int dfd, const char  __user *pathname)
 {
 	struct inode *inode = NULL;
-	struct vfsmount *mnt = NULL;
 	struct fsnotify_group *group;
 	struct path path;
 	struct fan_fsid __fsid, *fsid = NULL;
@@ -1776,7 +1816,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
 	unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS;
 	unsigned int obj_type, fid_mode;
-	void *obj;
+	void *obj = NULL;
 	u32 umask = 0;
 	int ret;
 
@@ -1800,6 +1840,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	case FAN_MARK_FILESYSTEM:
 		obj_type = FSNOTIFY_OBJ_TYPE_SB;
 		break;
+	case FAN_MARK_MNTNS:
+		obj_type = FSNOTIFY_OBJ_TYPE_MNTNS;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1847,6 +1890,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 		return -EINVAL;
 	group = fd_file(f)->private_data;
 
+	/* Only report mount events on mnt namespace */
+	if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
+		if (mask & ~FANOTIFY_MOUNT_EVENTS)
+			return -EINVAL;
+		if (mark_type != FAN_MARK_MNTNS)
+			return -EINVAL;
+	} else {
+		if (mask & FANOTIFY_MOUNT_EVENTS)
+			return -EINVAL;
+		if (mark_type == FAN_MARK_MNTNS)
+			return -EINVAL;
+	}
+
 	/*
 	 * An unprivileged user is not allowed to setup mount nor filesystem
 	 * marks.  This also includes setting up such marks by a group that
@@ -1888,7 +1944,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	 * point.
 	 */
 	fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
-	if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) &&
+	if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_MOUNT_EVENTS|FANOTIFY_EVENT_FLAGS) &&
 	    (!fid_mode || mark_type == FAN_MARK_MOUNT))
 		return -EINVAL;
 
@@ -1938,17 +1994,21 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	}
 
 	/* inode held in place by reference to path; group by fget on fd */
-	if (mark_type == FAN_MARK_INODE) {
+	if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) {
 		inode = path.dentry->d_inode;
 		obj = inode;
-	} else {
-		mnt = path.mnt;
-		if (mark_type == FAN_MARK_MOUNT)
-			obj = mnt;
-		else
-			obj = mnt->mnt_sb;
+	} else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
+		obj = path.mnt;
+	} else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) {
+		obj = path.mnt->mnt_sb;
+	} else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) {
+		obj = mnt_ns_from_dentry(path.dentry);
 	}
 
+	ret = -EINVAL;
+	if (!obj)
+		goto path_put_and_out;
+
 	/*
 	 * If some other task has this inode open for write we should not add
 	 * an ignore mask, unless that ignore mask is supposed to survive
@@ -1956,10 +2016,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	 */
 	if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) &&
 	    !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) {
-		ret = mnt ? -EINVAL : -EISDIR;
+		ret = !inode ? -EINVAL : -EISDIR;
 		/* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
 		if (ignore == FAN_MARK_IGNORE &&
-		    (mnt || S_ISDIR(inode->i_mode)))
+		    (!inode || S_ISDIR(inode->i_mode)))
 			goto path_put_and_out;
 
 		ret = 0;
@@ -1968,7 +2028,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	}
 
 	/* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
-	if (mnt || !S_ISDIR(inode->i_mode)) {
+	if (!inode || !S_ISDIR(inode->i_mode)) {
 		mask &= ~FAN_EVENT_ON_CHILD;
 		umask = FAN_EVENT_ON_CHILD;
 		/*
@@ -2042,7 +2102,7 @@ static int __init fanotify_user_setup(void)
 				     FANOTIFY_DEFAULT_MAX_USER_MARKS);
 
 	BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
-	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 13);
+	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14);
 	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11);
 
 	fanotify_mark_cache = KMEM_CACHE(fanotify_mark,
@@ -2055,6 +2115,7 @@ static int __init fanotify_user_setup(void)
 		fanotify_perm_event_cachep =
 			KMEM_CACHE(fanotify_perm_event, SLAB_PANIC);
 	}
+	fanotify_mnt_event_cachep = KMEM_CACHE(fanotify_mnt_event, SLAB_PANIC);
 
 	fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS;
 	init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] =
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index e933f9c65d90..1161eabf11ee 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -121,6 +121,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 
 		seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
 			   sb->s_dev, mflags, mark->mask, mark->ignore_mask);
+	} else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_MNTNS) {
+		struct mnt_namespace *mnt_ns = fsnotify_conn_mntns(mark->connector);
+
+		seq_printf(m, "fanotify mnt_ns:%u mflags:%x mask:%x ignored_mask:%x\n",
+			   mnt_ns->ns.inum, mflags, mark->mask, mark->ignore_mask);
 	}
 }
 
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 78f660ebc318..3c817dc6292e 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -25,7 +25,7 @@
 
 #define FANOTIFY_FID_BITS	(FAN_REPORT_DFID_NAME_TARGET)
 
-#define FANOTIFY_INFO_MODES	(FANOTIFY_FID_BITS | FAN_REPORT_PIDFD)
+#define FANOTIFY_INFO_MODES	(FANOTIFY_FID_BITS | FAN_REPORT_PIDFD | FAN_REPORT_MNT)
 
 /*
  * fanotify_init() flags that require CAP_SYS_ADMIN.
@@ -38,7 +38,8 @@
 					 FAN_REPORT_PIDFD | \
 					 FAN_REPORT_FD_ERROR | \
 					 FAN_UNLIMITED_QUEUE | \
-					 FAN_UNLIMITED_MARKS)
+					 FAN_UNLIMITED_MARKS | \
+					 FAN_REPORT_MNT)
 
 /*
  * fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN.
@@ -58,7 +59,7 @@
 #define FANOTIFY_INTERNAL_GROUP_FLAGS	(FANOTIFY_UNPRIV)
 
 #define FANOTIFY_MARK_TYPE_BITS	(FAN_MARK_INODE | FAN_MARK_MOUNT | \
-				 FAN_MARK_FILESYSTEM)
+				 FAN_MARK_FILESYSTEM | FAN_MARK_MNTNS)
 
 #define FANOTIFY_MARK_CMD_BITS	(FAN_MARK_ADD | FAN_MARK_REMOVE | \
 				 FAN_MARK_FLUSH)
@@ -109,10 +110,13 @@
 /* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */
 #define FANOTIFY_ERROR_EVENTS	(FAN_FS_ERROR)
 
+#define FANOTIFY_MOUNT_EVENTS	(FAN_MNT_ATTACH | FAN_MNT_DETACH)
+
 /* Events that user can request to be notified on */
 #define FANOTIFY_EVENTS		(FANOTIFY_PATH_EVENTS | \
 				 FANOTIFY_INODE_EVENTS | \
-				 FANOTIFY_ERROR_EVENTS)
+				 FANOTIFY_ERROR_EVENTS | \
+				 FANOTIFY_MOUNT_EVENTS)
 
 /* Extra flags that may be reported with event or control handling of events */
 #define FANOTIFY_EVENT_FLAGS	(FAN_EVENT_ON_CHILD | FAN_ONDIR)
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index bd8167979707..e710967c7c26 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -28,6 +28,8 @@
 /* #define FAN_DIR_MODIFY	0x00080000 */	/* Deprecated (reserved) */
 
 #define FAN_PRE_ACCESS		0x00100000	/* Pre-content access hook */
+#define FAN_MNT_ATTACH		0x01000000	/* Mount was attached */
+#define FAN_MNT_DETACH		0x02000000	/* Mount was detached */
 
 #define FAN_EVENT_ON_CHILD	0x08000000	/* Interested in child events */
 
@@ -64,6 +66,7 @@
 #define FAN_REPORT_NAME		0x00000800	/* Report events with name */
 #define FAN_REPORT_TARGET_FID	0x00001000	/* Report dirent target id  */
 #define FAN_REPORT_FD_ERROR	0x00002000	/* event->fd can report error */
+#define FAN_REPORT_MNT		0x00004000	/* Report mount events */
 
 /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */
 #define FAN_REPORT_DFID_NAME	(FAN_REPORT_DIR_FID | FAN_REPORT_NAME)
@@ -94,6 +97,7 @@
 #define FAN_MARK_INODE		0x00000000
 #define FAN_MARK_MOUNT		0x00000010
 #define FAN_MARK_FILESYSTEM	0x00000100
+#define FAN_MARK_MNTNS		0x00000110
 
 /*
  * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY
@@ -147,6 +151,7 @@ struct fanotify_event_metadata {
 #define FAN_EVENT_INFO_TYPE_PIDFD	4
 #define FAN_EVENT_INFO_TYPE_ERROR	5
 #define FAN_EVENT_INFO_TYPE_RANGE	6
+#define FAN_EVENT_INFO_TYPE_MNT		7
 
 /* Special info types for FAN_RENAME */
 #define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME	10
@@ -200,6 +205,11 @@ struct fanotify_event_info_range {
 	__u64 count;
 };
 
+struct fanotify_event_info_mnt {
+	struct fanotify_event_info_header hdr;
+	__u64 mnt_id;
+};
+
 /*
  * User space may need to record additional information about its decision.
  * The extra information type records what kind of information is included.
-- 
2.50.1


From bf630c40164162ba1d3933c2f5e3397d083e0948 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 29 Jan 2025 17:58:01 +0100
Subject: [PATCH 10/16] vfs: add notifications for mount attach and detach

Add notifications for attaching and detaching mounts to fs/namespace.c

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Link: https://lore.kernel.org/r/20250129165803.72138-4-mszeredi@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/mount.h     | 20 +++++++++++++
 fs/namespace.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/pnode.c     |  4 ++-
 3 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/fs/mount.h b/fs/mount.h
index 5324a931b403..946dc8b792d7 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -5,6 +5,8 @@
 #include <linux/ns_common.h>
 #include <linux/fs_pin.h>
 
+extern struct list_head notify_list;
+
 struct mnt_namespace {
 	struct ns_common	ns;
 	struct mount *	root;
@@ -80,6 +82,8 @@ struct mount {
 #ifdef CONFIG_FSNOTIFY
 	struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
 	__u32 mnt_fsnotify_mask;
+	struct list_head to_notify;	/* need to queue notification */
+	struct mnt_namespace *prev_ns;	/* previous namespace (NULL if none) */
 #endif
 	int mnt_id;			/* mount identifier, reused */
 	u64 mnt_id_unique;		/* mount ID unique until reboot */
@@ -182,4 +186,20 @@ static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
 	return container_of(ns, struct mnt_namespace, ns);
 }
 
+#ifdef CONFIG_FSNOTIFY
+static inline void mnt_notify_add(struct mount *m)
+{
+	/* Optimize the case where there are no watches */
+	if ((m->mnt_ns && m->mnt_ns->n_fsnotify_marks) ||
+	    (m->prev_ns && m->prev_ns->n_fsnotify_marks))
+		list_add_tail(&m->to_notify, &notify_list);
+	else
+		m->prev_ns = m->mnt_ns;
+}
+#else
+static inline void mnt_notify_add(struct mount *m)
+{
+}
+#endif
+
 struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);
diff --git a/fs/namespace.c b/fs/namespace.c
index bc77b2e0130d..05f6680e2251 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -81,6 +81,9 @@ static HLIST_HEAD(unmounted);	/* protected by namespace_sem */
 static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
 static DEFINE_SEQLOCK(mnt_ns_tree_lock);
 
+#ifdef CONFIG_FSNOTIFY
+LIST_HEAD(notify_list); /* protected by namespace_sem */
+#endif
 static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
 static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */
 
@@ -163,6 +166,7 @@ static void mnt_ns_release(struct mnt_namespace *ns)
 {
 	/* keep alive for {list,stat}mount() */
 	if (refcount_dec_and_test(&ns->passive)) {
+		fsnotify_mntns_delete(ns);
 		put_user_ns(ns->user_ns);
 		kfree(ns);
 	}
@@ -1176,6 +1180,8 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt)
 		ns->mnt_first_node = &mnt->mnt_node;
 	rb_link_node(&mnt->mnt_node, parent, link);
 	rb_insert_color(&mnt->mnt_node, &ns->mounts);
+
+	mnt_notify_add(mnt);
 }
 
 /*
@@ -1723,6 +1729,50 @@ int may_umount(struct vfsmount *mnt)
 
 EXPORT_SYMBOL(may_umount);
 
+#ifdef CONFIG_FSNOTIFY
+static void mnt_notify(struct mount *p)
+{
+	if (!p->prev_ns && p->mnt_ns) {
+		fsnotify_mnt_attach(p->mnt_ns, &p->mnt);
+	} else if (p->prev_ns && !p->mnt_ns) {
+		fsnotify_mnt_detach(p->prev_ns, &p->mnt);
+	} else if (p->prev_ns == p->mnt_ns) {
+		fsnotify_mnt_move(p->mnt_ns, &p->mnt);
+	} else {
+		fsnotify_mnt_detach(p->prev_ns, &p->mnt);
+		fsnotify_mnt_attach(p->mnt_ns, &p->mnt);
+	}
+	p->prev_ns = p->mnt_ns;
+}
+
+static void notify_mnt_list(void)
+{
+	struct mount *m, *tmp;
+	/*
+	 * Notify about mounts that were added/reparented/detached/remain
+	 * connected after unmount.
+	 */
+	list_for_each_entry_safe(m, tmp, &notify_list, to_notify) {
+		mnt_notify(m);
+		list_del_init(&m->to_notify);
+	}
+}
+
+static bool need_notify_mnt_list(void)
+{
+	return !list_empty(&notify_list);
+}
+#else
+static void notify_mnt_list(void)
+{
+}
+
+static bool need_notify_mnt_list(void)
+{
+	return false;
+}
+#endif
+
 static void namespace_unlock(void)
 {
 	struct hlist_head head;
@@ -1733,7 +1783,18 @@ static void namespace_unlock(void)
 	hlist_move_list(&unmounted, &head);
 	list_splice_init(&ex_mountpoints, &list);
 
-	up_write(&namespace_sem);
+	if (need_notify_mnt_list()) {
+		/*
+		 * No point blocking out concurrent readers while notifications
+		 * are sent. This will also allow statmount()/listmount() to run
+		 * concurrently.
+		 */
+		downgrade_write(&namespace_sem);
+		notify_mnt_list();
+		up_read(&namespace_sem);
+	} else {
+		up_write(&namespace_sem);
+	}
 
 	shrink_dentry_list(&list);
 
@@ -1846,6 +1907,19 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
 		change_mnt_propagation(p, MS_PRIVATE);
 		if (disconnect)
 			hlist_add_head(&p->mnt_umount, &unmounted);
+
+		/*
+		 * At this point p->mnt_ns is NULL, notification will be queued
+		 * only if
+		 *
+		 *  - p->prev_ns is non-NULL *and*
+		 *  - p->prev_ns->n_fsnotify_marks is non-NULL
+		 *
+		 * This will preclude queuing the mount if this is a cleanup
+		 * after a failed copy_tree() or destruction of an anonymous
+		 * namespace, etc.
+		 */
+		mnt_notify_add(p);
 	}
 }
 
@@ -2555,6 +2629,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 			dest_mp = smp;
 		unhash_mnt(source_mnt);
 		attach_mnt(source_mnt, top_mnt, dest_mp, beneath);
+		mnt_notify_add(source_mnt);
 		touch_mnt_namespace(source_mnt->mnt_ns);
 	} else {
 		if (source_mnt->mnt_ns) {
@@ -4476,6 +4551,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	list_del_init(&new_mnt->mnt_expire);
 	put_mountpoint(root_mp);
 	unlock_mount_hash();
+	mnt_notify_add(root_mnt);
+	mnt_notify_add(new_mnt);
 	chroot_fs_refs(&root, &new);
 	error = 0;
 out4:
diff --git a/fs/pnode.c b/fs/pnode.c
index ef048f008bdd..82d809c785ec 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -549,8 +549,10 @@ static void restore_mounts(struct list_head *to_restore)
 			mp = parent->mnt_mp;
 			parent = parent->mnt_parent;
 		}
-		if (parent != mnt->mnt_parent)
+		if (parent != mnt->mnt_parent) {
 			mnt_change_mountpoint(parent, mp, mnt);
+			mnt_notify_add(mnt);
+		}
 	}
 }
 
-- 
2.50.1


From 0ff053b98a0f039e52c2bd8d0cb38f2831edfaf5 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 10 Feb 2025 13:38:59 +0100
Subject: [PATCH 11/16] fs: support O_PATH fds with FSCONFIG_SET_FD

Let FSCONFIG_SET_FD handle O_PATH file descriptors. This is particularly
useful in the context of overlayfs where layers can be specified via
file descriptors instead of paths. But userspace must currently use
non-O_PATH file desriptors which is often pointless especially if
the file descriptors have been created via open_tree(OPEN_TREE_CLONE).

Link: https://lore.kernel.org/r/20250210-work-overlayfs-v2-1-ed2a949b674b@kernel.org
Fixes: a08557d19ef41 ("ovl: specify layers via file descriptors")
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/autofs/autofs_i.h | 2 ++
 fs/fsopen.c          | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 77c7991d89aa..23cea74f9933 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -218,6 +218,8 @@ void autofs_clean_ino(struct autofs_info *);
 
 static inline int autofs_check_pipe(struct file *pipe)
 {
+	if (pipe->f_mode & FMODE_PATH)
+		return -EINVAL;
 	if (!(pipe->f_mode & FMODE_CAN_WRITE))
 		return -EINVAL;
 	if (!S_ISFIFO(file_inode(pipe)->i_mode))
diff --git a/fs/fsopen.c b/fs/fsopen.c
index 094a7f510edf..1aaf4cb2afb2 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -453,7 +453,7 @@ SYSCALL_DEFINE5(fsconfig,
 	case FSCONFIG_SET_FD:
 		param.type = fs_value_is_file;
 		ret = -EBADF;
-		param.file = fget(aux);
+		param.file = fget_raw(aux);
 		if (!param.file)
 			goto out_key;
 		param.dirfd = aux;
-- 
2.50.1


From 85c8700cb6e67cac228bfb313fa8e33d1750410f Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 10 Feb 2025 13:39:00 +0100
Subject: [PATCH 12/16] selftests/overlayfs: test specifying layers as O_PATH
 file descriptors

Verify that userspace can specify layers via O_PATH file descriptors.

Link: https://lore.kernel.org/r/20250210-work-overlayfs-v2-2-ed2a949b674b@kernel.org
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 .../overlayfs/set_layers_via_fds.c            | 65 +++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
index 1d0ae785a667..e693e4102d22 100644
--- a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
+++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
@@ -214,4 +214,69 @@ TEST_F(set_layers_via_fds, set_500_layers_via_fds)
 	ASSERT_EQ(close(fd_overlay), 0);
 }
 
+TEST_F(set_layers_via_fds, set_500_layers_via_opath_fds)
+{
+	int fd_context, fd_tmpfs, fd_overlay, fd_work, fd_upper, fd_lower;
+	int layer_fds[500] = { [0 ... 499] = -EBADF };
+
+	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+	ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+	fd_context = sys_fsopen("tmpfs", 0);
+	ASSERT_GE(fd_context, 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+	fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+	ASSERT_GE(fd_tmpfs, 0);
+	ASSERT_EQ(close(fd_context), 0);
+
+	for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+		char path[100];
+
+		sprintf(path, "l%d", i);
+		ASSERT_EQ(mkdirat(fd_tmpfs, path, 0755), 0);
+		layer_fds[i] = openat(fd_tmpfs, path, O_DIRECTORY | O_PATH);
+		ASSERT_GE(layer_fds[i], 0);
+	}
+
+	ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+	fd_work = openat(fd_tmpfs, "w", O_DIRECTORY | O_PATH);
+	ASSERT_GE(fd_work, 0);
+
+	ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+	fd_upper = openat(fd_tmpfs, "u", O_DIRECTORY | O_PATH);
+	ASSERT_GE(fd_upper, 0);
+
+	ASSERT_EQ(mkdirat(fd_tmpfs, "l501", 0755), 0);
+	fd_lower = openat(fd_tmpfs, "l501", O_DIRECTORY | O_PATH);
+	ASSERT_GE(fd_lower, 0);
+
+	ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+	ASSERT_EQ(close(fd_tmpfs), 0);
+
+	fd_context = sys_fsopen("overlay", 0);
+	ASSERT_GE(fd_context, 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir",   NULL, fd_work), 0);
+	ASSERT_EQ(close(fd_work), 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir",  NULL, fd_upper), 0);
+	ASSERT_EQ(close(fd_upper), 0);
+
+	for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+		ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[i]), 0);
+		ASSERT_EQ(close(layer_fds[i]), 0);
+	}
+
+	ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, fd_lower), 0);
+	ASSERT_EQ(close(fd_lower), 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+	fd_overlay = sys_fsmount(fd_context, 0, 0);
+	ASSERT_GE(fd_overlay, 0);
+	ASSERT_EQ(close(fd_context), 0);
+	ASSERT_EQ(close(fd_overlay), 0);
+}
+
 TEST_HARNESS_MAIN
-- 
2.50.1


From db04662e2f4fced10b93032c65ff03caaae21053 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 23 Jan 2025 20:19:48 +0100
Subject: [PATCH 13/16] fs: allow detached mounts in clone_private_mount()

In container workloads idmapped mounts are often used as layers for
overlayfs. Recently I added the ability to specify layers in overlayfs
as file descriptors instead of path names. It should be possible to
simply use the detached mounts directly when specifying layers instead
of having to attach them beforehand. They are discarded after overlayfs
is mounted anyway so it's pointless system calls for userspace and
pointless locking for the kernel.

This just recently come up again in [1]. So enable clone_private_mount()
to use detached mounts directly. Following conditions must be met:

- Provided path must be the root of a detached mount tree.
- Provided path may not create mount namespace loops.
- Provided path must be mounted.

It would be possible to be stricter and require that the caller must
have CAP_SYS_ADMIN in the owning user namespace of the anonymous mount
namespace but since this restriction isn't enforced for move_mount()
there's no point in enforcing it for clone_private_mount().

This contains a folded fix for:
Reported-by: syzbot+62dfea789a2cedac1298@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=62dfea789a2cedac1298
provided by Lizhi Xu <lizhi.xu@windriver.com> in [2].

Link: https://lore.kernel.org/r/20250207071331.550952-1-lizhi.xu@windriver.com [2]
Link: https://lore.kernel.org/r/fd8f6574-f737-4743-b220-79c815ee1554@mbaynton.com [1]
Link: https://lore.kernel.org/r/20250123-avancieren-erfreuen-3d61f6588fdd@brauner
Tested-by: Mike Baynton <mike@mbaynton.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/namespace.c | 78 ++++++++++++++++++++++++++++----------------------
 1 file changed, 43 insertions(+), 35 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 05f6680e2251..d470a369d42b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2369,6 +2369,28 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry)
 	return false;
 }
 
+/*
+ * Check that there aren't references to earlier/same mount namespaces in the
+ * specified subtree.  Such references can act as pins for mount namespaces
+ * that aren't checked by the mount-cycle checking code, thereby allowing
+ * cycles to be made.
+ */
+static bool check_for_nsfs_mounts(struct mount *subtree)
+{
+	struct mount *p;
+	bool ret = false;
+
+	lock_mount_hash();
+	for (p = subtree; p; p = next_mnt(p, subtree))
+		if (mnt_ns_loop(p->mnt.mnt_root))
+			goto out;
+
+	ret = true;
+out:
+	unlock_mount_hash();
+	return ret;
+}
+
 /**
  * clone_private_mount - create a private clone of a path
  * @path: path to clone
@@ -2377,6 +2399,8 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry)
  * will not be attached anywhere in the namespace and will be private (i.e.
  * changes to the originating mount won't be propagated into this).
  *
+ * This assumes caller has called or done the equivalent of may_mount().
+ *
  * Release with mntput().
  */
 struct vfsmount *clone_private_mount(const struct path *path)
@@ -2384,30 +2408,36 @@ struct vfsmount *clone_private_mount(const struct path *path)
 	struct mount *old_mnt = real_mount(path->mnt);
 	struct mount *new_mnt;
 
-	down_read(&namespace_sem);
+	scoped_guard(rwsem_read, &namespace_sem)
 	if (IS_MNT_UNBINDABLE(old_mnt))
-		goto invalid;
+		return ERR_PTR(-EINVAL);
+
+	if (mnt_has_parent(old_mnt)) {
+		if (!check_mnt(old_mnt))
+			return ERR_PTR(-EINVAL);
+	} else {
+		if (!is_mounted(&old_mnt->mnt))
+			return ERR_PTR(-EINVAL);
 
-	if (!check_mnt(old_mnt))
-		goto invalid;
+		/* Make sure this isn't something purely kernel internal. */
+		if (!is_anon_ns(old_mnt->mnt_ns))
+			return ERR_PTR(-EINVAL);
+
+		/* Make sure we don't create mount namespace loops. */
+		if (!check_for_nsfs_mounts(old_mnt))
+			return ERR_PTR(-EINVAL);
+	}
 
 	if (has_locked_children(old_mnt, path->dentry))
-		goto invalid;
+		return ERR_PTR(-EINVAL);
 
 	new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
-	up_read(&namespace_sem);
-
 	if (IS_ERR(new_mnt))
-		return ERR_CAST(new_mnt);
+		return ERR_PTR(-EINVAL);
 
 	/* Longterm mount to be removed by kern_unmount*() */
 	new_mnt->mnt_ns = MNT_NS_INTERNAL;
-
 	return &new_mnt->mnt;
-
-invalid:
-	up_read(&namespace_sem);
-	return ERR_PTR(-EINVAL);
 }
 EXPORT_SYMBOL_GPL(clone_private_mount);
 
@@ -3206,28 +3236,6 @@ static inline int tree_contains_unbindable(struct mount *mnt)
 	return 0;
 }
 
-/*
- * Check that there aren't references to earlier/same mount namespaces in the
- * specified subtree.  Such references can act as pins for mount namespaces
- * that aren't checked by the mount-cycle checking code, thereby allowing
- * cycles to be made.
- */
-static bool check_for_nsfs_mounts(struct mount *subtree)
-{
-	struct mount *p;
-	bool ret = false;
-
-	lock_mount_hash();
-	for (p = subtree; p; p = next_mnt(p, subtree))
-		if (mnt_ns_loop(p->mnt.mnt_root))
-			goto out;
-
-	ret = true;
-out:
-	unlock_mount_hash();
-	return ret;
-}
-
 static int do_set_group(struct path *from_path, struct path *to_path)
 {
 	struct mount *from, *to;
-- 
2.50.1


From ccc829b15d48a450b186cab097f4f5d01df445d4 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 23 Jan 2025 20:19:49 +0100
Subject: [PATCH 14/16] selftests: add tests for using detached mount with
 overlayfs

Test that it is possible to use detached mounts as overlayfs layers.

Link: https://lore.kernel.org/r/20250123-erstbesteigung-angeeignet-1d30e64b7df2@brauner
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 .../overlayfs/set_layers_via_fds.c            | 130 ++++++++++++++++++
 .../filesystems/overlayfs/wrappers.h          |  17 +++
 2 files changed, 147 insertions(+)

diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
index e693e4102d22..e65d95d97846 100644
--- a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
+++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
@@ -20,12 +20,16 @@ FIXTURE(set_layers_via_fds) {
 FIXTURE_SETUP(set_layers_via_fds)
 {
 	ASSERT_EQ(mkdir("/set_layers_via_fds", 0755), 0);
+	ASSERT_EQ(mkdir("/set_layers_via_fds_tmpfs", 0755), 0);
 }
 
 FIXTURE_TEARDOWN(set_layers_via_fds)
 {
 	umount2("/set_layers_via_fds", 0);
 	ASSERT_EQ(rmdir("/set_layers_via_fds"), 0);
+
+	umount2("/set_layers_via_fds_tmpfs", 0);
+	ASSERT_EQ(rmdir("/set_layers_via_fds_tmpfs"), 0);
 }
 
 TEST_F(set_layers_via_fds, set_layers_via_fds)
@@ -279,4 +283,130 @@ TEST_F(set_layers_via_fds, set_500_layers_via_opath_fds)
 	ASSERT_EQ(close(fd_overlay), 0);
 }
 
+TEST_F(set_layers_via_fds, set_layers_via_detached_mount_fds)
+{
+	int fd_context, fd_tmpfs, fd_overlay, fd_tmp;
+	int layer_fds[] = { [0 ... 8] = -EBADF };
+	bool layers_found[] = { [0 ... 8] =  false };
+	size_t len = 0;
+	char *line = NULL;
+	FILE *f_mountinfo;
+
+	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+	ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+	fd_context = sys_fsopen("tmpfs", 0);
+	ASSERT_GE(fd_context, 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+	fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+	ASSERT_GE(fd_tmpfs, 0);
+	ASSERT_EQ(close(fd_context), 0);
+
+	ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "u/upper", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "u/work", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "l3", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "l4", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "d1", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "d2", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "d3", 0755), 0);
+
+	ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/set_layers_via_fds_tmpfs", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+	fd_tmp = open_tree(fd_tmpfs, "u", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(fd_tmp, 0);
+
+	layer_fds[0] = openat(fd_tmp, "upper", O_CLOEXEC | O_DIRECTORY | O_PATH);
+	ASSERT_GE(layer_fds[0], 0);
+
+	layer_fds[1] = openat(fd_tmp, "work", O_CLOEXEC | O_DIRECTORY | O_PATH);
+	ASSERT_GE(layer_fds[1], 0);
+
+	layer_fds[2] = open_tree(fd_tmpfs, "l1", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(layer_fds[2], 0);
+
+	layer_fds[3] = open_tree(fd_tmpfs, "l2", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(layer_fds[3], 0);
+
+	layer_fds[4] = open_tree(fd_tmpfs, "l3", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(layer_fds[4], 0);
+
+	layer_fds[5] = open_tree(fd_tmpfs, "l4", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(layer_fds[5], 0);
+
+	layer_fds[6] = open_tree(fd_tmpfs, "d1", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(layer_fds[6], 0);
+
+	layer_fds[7] = open_tree(fd_tmpfs, "d2", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(layer_fds[7], 0);
+
+	layer_fds[8] = open_tree(fd_tmpfs, "d3", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(layer_fds[8], 0);
+
+	ASSERT_EQ(close(fd_tmpfs), 0);
+
+	fd_context = sys_fsopen("overlay", 0);
+	ASSERT_GE(fd_context, 0);
+
+	ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir",  NULL, layer_fds[0]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir",   NULL, layer_fds[1]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[4]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[5]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+",  NULL, layer_fds[6]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+",  NULL, layer_fds[7]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+",  NULL, layer_fds[8]), 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+	fd_overlay = sys_fsmount(fd_context, 0, 0);
+	ASSERT_GE(fd_overlay, 0);
+
+	ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+	f_mountinfo = fopen("/proc/self/mountinfo", "r");
+	ASSERT_NE(f_mountinfo, NULL);
+
+	while (getline(&line, &len, f_mountinfo) != -1) {
+		char *haystack = line;
+
+		if (strstr(haystack, "workdir=/tmp/w"))
+			layers_found[0] = true;
+		if (strstr(haystack, "upperdir=/tmp/u"))
+			layers_found[1] = true;
+		if (strstr(haystack, "lowerdir+=/tmp/l1"))
+			layers_found[2] = true;
+		if (strstr(haystack, "lowerdir+=/tmp/l2"))
+			layers_found[3] = true;
+		if (strstr(haystack, "lowerdir+=/tmp/l3"))
+			layers_found[4] = true;
+		if (strstr(haystack, "lowerdir+=/tmp/l4"))
+			layers_found[5] = true;
+		if (strstr(haystack, "datadir+=/tmp/d1"))
+			layers_found[6] = true;
+		if (strstr(haystack, "datadir+=/tmp/d2"))
+			layers_found[7] = true;
+		if (strstr(haystack, "datadir+=/tmp/d3"))
+			layers_found[8] = true;
+	}
+	free(line);
+
+	for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+		ASSERT_EQ(layers_found[i], true);
+		ASSERT_EQ(close(layer_fds[i]), 0);
+	}
+
+	ASSERT_EQ(close(fd_context), 0);
+	ASSERT_EQ(close(fd_overlay), 0);
+	ASSERT_EQ(fclose(f_mountinfo), 0);
+}
+
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/overlayfs/wrappers.h b/tools/testing/selftests/filesystems/overlayfs/wrappers.h
index 071b95fd2ac0..c38bc48e0cfa 100644
--- a/tools/testing/selftests/filesystems/overlayfs/wrappers.h
+++ b/tools/testing/selftests/filesystems/overlayfs/wrappers.h
@@ -44,4 +44,21 @@ static inline int sys_move_mount(int from_dfd, const char *from_pathname,
 		       to_pathname, flags);
 }
 
+#ifndef OPEN_TREE_CLONE
+#define OPEN_TREE_CLONE 1
+#endif
+
+#ifndef OPEN_TREE_CLOEXEC
+#define OPEN_TREE_CLOEXEC O_CLOEXEC
+#endif
+
+#ifndef AT_RECURSIVE
+#define AT_RECURSIVE 0x8000
+#endif
+
+static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
+{
+	return syscall(__NR_open_tree, dfd, filename, flags);
+}
+
 #endif
-- 
2.50.1


From 784ed4354c9077501b3a9236bafea23e5b878703 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Tue, 4 Feb 2025 12:27:46 +0100
Subject: [PATCH 15/16] uidgid: add map_id_range_up()

Add map_id_range_up() to verify that the full kernel id range can be
mapped up in a given idmapping. This will be used in follow-up patches.

Link: https://lore.kernel.org/r/20250204-work-mnt_idmap-statmount-v2-1-007720f39f2e@kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/uidgid.h  |  6 ++++++
 kernel/user_namespace.c | 26 +++++++++++++++++---------
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h
index f85ec5613721..2dc767e08f54 100644
--- a/include/linux/uidgid.h
+++ b/include/linux/uidgid.h
@@ -132,6 +132,7 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
 
 u32 map_id_down(struct uid_gid_map *map, u32 id);
 u32 map_id_up(struct uid_gid_map *map, u32 id);
+u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count);
 
 #else
 
@@ -186,6 +187,11 @@ static inline u32 map_id_down(struct uid_gid_map *map, u32 id)
 	return id;
 }
 
+static inline u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count)
+{
+	return id;
+}
+
 static inline u32 map_id_up(struct uid_gid_map *map, u32 id)
 {
 	return id;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index aa0b2e47f2f2..682f40d5632d 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -238,7 +238,7 @@ EXPORT_SYMBOL(__put_user_ns);
 struct idmap_key {
 	bool map_up; /* true  -> id from kid; false -> kid from id */
 	u32 id; /* id to find */
-	u32 count; /* == 0 unless used with map_id_range_down() */
+	u32 count;
 };
 
 /*
@@ -343,16 +343,19 @@ u32 map_id_down(struct uid_gid_map *map, u32 id)
  * UID_GID_MAP_MAX_BASE_EXTENTS.
  */
 static struct uid_gid_extent *
-map_id_up_base(unsigned extents, struct uid_gid_map *map, u32 id)
+map_id_range_up_base(unsigned extents, struct uid_gid_map *map, u32 id, u32 count)
 {
 	unsigned idx;
-	u32 first, last;
+	u32 first, last, id2;
+
+	id2 = id + count - 1;
 
 	/* Find the matching extent */
 	for (idx = 0; idx < extents; idx++) {
 		first = map->extent[idx].lower_first;
 		last = first + map->extent[idx].count - 1;
-		if (id >= first && id <= last)
+		if (id >= first && id <= last &&
+		    (id2 >= first && id2 <= last))
 			return &map->extent[idx];
 	}
 	return NULL;
@@ -363,28 +366,28 @@ map_id_up_base(unsigned extents, struct uid_gid_map *map, u32 id)
  * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
  */
 static struct uid_gid_extent *
-map_id_up_max(unsigned extents, struct uid_gid_map *map, u32 id)
+map_id_range_up_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 count)
 {
 	struct idmap_key key;
 
 	key.map_up = true;
-	key.count = 1;
+	key.count = count;
 	key.id = id;
 
 	return bsearch(&key, map->reverse, extents,
 		       sizeof(struct uid_gid_extent), cmp_map_id);
 }
 
-u32 map_id_up(struct uid_gid_map *map, u32 id)
+u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count)
 {
 	struct uid_gid_extent *extent;
 	unsigned extents = map->nr_extents;
 	smp_rmb();
 
 	if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
-		extent = map_id_up_base(extents, map, id);
+		extent = map_id_range_up_base(extents, map, id, count);
 	else
-		extent = map_id_up_max(extents, map, id);
+		extent = map_id_range_up_max(extents, map, id, count);
 
 	/* Map the id or note failure */
 	if (extent)
@@ -395,6 +398,11 @@ u32 map_id_up(struct uid_gid_map *map, u32 id)
 	return id;
 }
 
+u32 map_id_up(struct uid_gid_map *map, u32 id)
+{
+	return map_id_range_up(map, id, 1);
+}
+
 /**
  *	make_kuid - Map a user-namespace uid pair into a kuid.
  *	@ns:  User namespace that the uid is in
-- 
2.50.1


From 37c4a9590e1efcae7749682239fc22a330d2d325 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Tue, 4 Feb 2025 12:27:47 +0100
Subject: [PATCH 16/16] statmount: allow to retrieve idmappings

This adds the STATMOUNT_MNT_UIDMAP and STATMOUNT_MNT_GIDMAP options.
It allows the retrieval of idmappings via statmount().

Currently it isn't possible to figure out what idmappings are applied to
an idmapped mount. This information is often crucial. Before statmount()
the only realistic options for an interface like this would have been to
add it to /proc/<pid>/fdinfo/<nr> or to expose it in
/proc/<pid>/mountinfo. Both solution would have been pretty ugly and
would've shown information that is of strong interest to some
application but not all. statmount() is perfect for this.

The idmappings applied to an idmapped mount are shown relative to the
caller's user namespace. This is the most useful solution that doesn't
risk leaking information or confuse the caller.

For example, an idmapped mount might have been created with the
following idmappings:

    mount --bind -o X-mount.idmap="0:10000:1000 2000:2000:1 3000:3000:1" /srv /opt

Listing the idmappings through statmount() in the same context shows:

    mnt_id:        2147485088
    mnt_parent_id: 2147484816
    fs_type:       btrfs
    mnt_root:      /srv
    mnt_point:     /opt
    mnt_opts:      ssd,discard=async,space_cache=v2,subvolid=5,subvol=/
    mnt_uidmap[0]: 0 10000 1000
    mnt_uidmap[1]: 2000 2000 1
    mnt_uidmap[2]: 3000 3000 1
    mnt_gidmap[0]: 0 10000 1000
    mnt_gidmap[1]: 2000 2000 1
    mnt_gidmap[2]: 3000 3000 1

But the idmappings might not always be resolvable in the caller's user
namespace. For example:

    unshare --user --map-root

In this case statmount() will skip any mappings that fil to resolve in
the caller's idmapping:

    mnt_id:        2147485087
    mnt_parent_id: 2147484016
    fs_type:       btrfs
    mnt_root:      /srv
    mnt_point:     /opt
    mnt_opts:      ssd,discard=async,space_cache=v2,subvolid=5,subvol=/

The caller can differentiate between a mount not being idmapped and a
mount that is idmapped but where all mappings fail to resolve in the
caller's idmapping by check for the STATMOUNT_MNT_{G,U}IDMAP flag being
raised but the number of mappings in ->mnt_{g,u}idmap_num being zero.

Note that statmount() requires that the whole range must be resolvable
in the caller's user namespace. If a subrange fails to map it will still
list the map as not resolvable. This is a practical compromise to avoid
having to find which subranges are resovable and wich aren't.

Idmappings are listed as a string array with each mapping separated by
zero bytes. This allows to retrieve the idmappings and immediately use
them for writing to e.g., /proc/<pid>/{g,u}id_map and it also allow for
simple iteration like:

    if (stmnt->mask & STATMOUNT_MNT_UIDMAP) {
            const char *idmap = stmnt->str + stmnt->mnt_uidmap;

            for (size_t idx = 0; idx < stmnt->mnt_uidmap_nr; idx++) {
                    printf("mnt_uidmap[%lu]: %s\n", idx, idmap);
                    idmap += strlen(idmap) + 1;
            }
    }

Link: https://lore.kernel.org/r/20250204-work-mnt_idmap-statmount-v2-2-007720f39f2e@kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/internal.h                 |  1 +
 fs/mnt_idmapping.c            | 51 ++++++++++++++++++++++++++++++
 fs/namespace.c                | 59 ++++++++++++++++++++++++++++++++++-
 include/linux/mnt_idmapping.h |  5 +++
 include/uapi/linux/mount.h    |  8 ++++-
 5 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index e7f02ae1e098..db6094d5cb0b 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -338,3 +338,4 @@ static inline bool path_mounted(const struct path *path)
 	return path->mnt->mnt_root == path->dentry;
 }
 void file_f_owner_release(struct file *file);
+int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map);
diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
index 7b1df8cc2821..a37991fdb194 100644
--- a/fs/mnt_idmapping.c
+++ b/fs/mnt_idmapping.c
@@ -6,6 +6,7 @@
 #include <linux/mnt_idmapping.h>
 #include <linux/slab.h>
 #include <linux/user_namespace.h>
+#include <linux/seq_file.h>
 
 #include "internal.h"
 
@@ -334,3 +335,53 @@ void mnt_idmap_put(struct mnt_idmap *idmap)
 		free_mnt_idmap(idmap);
 }
 EXPORT_SYMBOL_GPL(mnt_idmap_put);
+
+int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map)
+{
+	struct uid_gid_map *map, *map_up;
+	u32 idx, nr_mappings;
+
+	if (!is_valid_mnt_idmap(idmap))
+		return 0;
+
+	/*
+	 * Idmappings are shown relative to the caller's idmapping.
+	 * This is both the most intuitive and most useful solution.
+	 */
+	if (uid_map) {
+		map = &idmap->uid_map;
+		map_up = &current_user_ns()->uid_map;
+	} else {
+		map = &idmap->gid_map;
+		map_up = &current_user_ns()->gid_map;
+	}
+
+	for (idx = 0, nr_mappings = 0; idx < map->nr_extents; idx++) {
+		uid_t lower;
+		struct uid_gid_extent *extent;
+
+		if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
+			extent = &map->extent[idx];
+		else
+			extent = &map->forward[idx];
+
+		/*
+		 * Verify that the whole range of the mapping can be
+		 * resolved in the caller's idmapping. If it cannot be
+		 * resolved skip the mapping.
+		 */
+		lower = map_id_range_up(map_up, extent->lower_first, extent->count);
+		if (lower == (uid_t) -1)
+			continue;
+
+		seq_printf(seq, "%u %u %u", extent->first, lower, extent->count);
+
+		seq->count++; /* mappings are separated by \0 */
+		if (seq_has_overflowed(seq))
+			return -EAGAIN;
+
+		nr_mappings++;
+	}
+
+	return nr_mappings;
+}
diff --git a/fs/namespace.c b/fs/namespace.c
index d470a369d42b..967ee2a5bacf 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -5008,6 +5008,7 @@ struct kstatmount {
 	struct statmount __user *buf;
 	size_t bufsize;
 	struct vfsmount *mnt;
+	struct mnt_idmap *idmap;
 	u64 mask;
 	struct path root;
 	struct statmount sm;
@@ -5278,6 +5279,46 @@ static int statmount_opt_sec_array(struct kstatmount *s, struct seq_file *seq)
 	return 0;
 }
 
+static inline int statmount_mnt_uidmap(struct kstatmount *s, struct seq_file *seq)
+{
+	int ret;
+
+	ret = statmount_mnt_idmap(s->idmap, seq, true);
+	if (ret < 0)
+		return ret;
+
+	s->sm.mnt_uidmap_num = ret;
+	/*
+	 * Always raise STATMOUNT_MNT_UIDMAP even if there are no valid
+	 * mappings. This allows userspace to distinguish between a
+	 * non-idmapped mount and an idmapped mount where none of the
+	 * individual mappings are valid in the caller's idmapping.
+	 */
+	if (is_valid_mnt_idmap(s->idmap))
+		s->sm.mask |= STATMOUNT_MNT_UIDMAP;
+	return 0;
+}
+
+static inline int statmount_mnt_gidmap(struct kstatmount *s, struct seq_file *seq)
+{
+	int ret;
+
+	ret = statmount_mnt_idmap(s->idmap, seq, false);
+	if (ret < 0)
+		return ret;
+
+	s->sm.mnt_gidmap_num = ret;
+	/*
+	 * Always raise STATMOUNT_MNT_GIDMAP even if there are no valid
+	 * mappings. This allows userspace to distinguish between a
+	 * non-idmapped mount and an idmapped mount where none of the
+	 * individual mappings are valid in the caller's idmapping.
+	 */
+	if (is_valid_mnt_idmap(s->idmap))
+		s->sm.mask |= STATMOUNT_MNT_GIDMAP;
+	return 0;
+}
+
 static int statmount_string(struct kstatmount *s, u64 flag)
 {
 	int ret = 0;
@@ -5319,6 +5360,14 @@ static int statmount_string(struct kstatmount *s, u64 flag)
 		sm->sb_source = start;
 		ret = statmount_sb_source(s, seq);
 		break;
+	case STATMOUNT_MNT_UIDMAP:
+		sm->mnt_uidmap = start;
+		ret = statmount_mnt_uidmap(s, seq);
+		break;
+	case STATMOUNT_MNT_GIDMAP:
+		sm->mnt_gidmap = start;
+		ret = statmount_mnt_gidmap(s, seq);
+		break;
 	default:
 		WARN_ON_ONCE(true);
 		return -EINVAL;
@@ -5443,6 +5492,7 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
 		return err;
 
 	s->root = root;
+	s->idmap = mnt_idmap(s->mnt);
 	if (s->mask & STATMOUNT_SB_BASIC)
 		statmount_sb_basic(s);
 
@@ -5476,6 +5526,12 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
 	if (!err && s->mask & STATMOUNT_SB_SOURCE)
 		err = statmount_string(s, STATMOUNT_SB_SOURCE);
 
+	if (!err && s->mask & STATMOUNT_MNT_UIDMAP)
+		err = statmount_string(s, STATMOUNT_MNT_UIDMAP);
+
+	if (!err && s->mask & STATMOUNT_MNT_GIDMAP)
+		err = statmount_string(s, STATMOUNT_MNT_GIDMAP);
+
 	if (!err && s->mask & STATMOUNT_MNT_NS_ID)
 		statmount_mnt_ns_id(s, ns);
 
@@ -5499,7 +5555,8 @@ static inline bool retry_statmount(const long ret, size_t *seq_size)
 #define STATMOUNT_STRING_REQ (STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | \
 			      STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS | \
 			      STATMOUNT_FS_SUBTYPE | STATMOUNT_SB_SOURCE | \
-			      STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY)
+			      STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY | \
+			      STATMOUNT_MNT_UIDMAP | STATMOUNT_MNT_GIDMAP)
 
 static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq,
 			      struct statmount __user *buf, size_t bufsize,
diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
index b1b219bc3422..e71a6070a8f8 100644
--- a/include/linux/mnt_idmapping.h
+++ b/include/linux/mnt_idmapping.h
@@ -25,6 +25,11 @@ static_assert(sizeof(vfsgid_t) == sizeof(kgid_t));
 static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val));
 static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val));
 
+static inline bool is_valid_mnt_idmap(const struct mnt_idmap *idmap)
+{
+	return idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap;
+}
+
 #ifdef CONFIG_MULTIUSER
 static inline uid_t __vfsuid_val(vfsuid_t uid)
 {
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index c07008816aca..0be6ac4c1624 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -179,7 +179,11 @@ struct statmount {
 	__u32 opt_array;	/* [str] Array of nul terminated fs options */
 	__u32 opt_sec_num;	/* Number of security options */
 	__u32 opt_sec_array;	/* [str] Array of nul terminated security options */
-	__u64 __spare2[46];
+	__u32 mnt_uidmap_num;	/* Number of uid mappings */
+	__u32 mnt_uidmap;	/* [str] Array of uid mappings (as seen from callers namespace) */
+	__u32 mnt_gidmap_num;	/* Number of gid mappings */
+	__u32 mnt_gidmap;	/* [str] Array of gid mappings (as seen from callers namespace) */
+	__u64 __spare2[44];
 	char str[];		/* Variable size part containing strings */
 };
 
@@ -217,6 +221,8 @@ struct mnt_id_req {
 #define STATMOUNT_SB_SOURCE		0x00000200U	/* Want/got sb_source */
 #define STATMOUNT_OPT_ARRAY		0x00000400U	/* Want/got opt_... */
 #define STATMOUNT_OPT_SEC_ARRAY		0x00000800U	/* Want/got opt_sec... */
+#define STATMOUNT_MNT_UIDMAP		0x00001000U	/* Want/got uidmap... */
+#define STATMOUNT_MNT_GIDMAP		0x00002000U	/* Want/got gidmap... */
 
 /*
  * Special @mnt_id values that can be passed to listmount
-- 
2.50.1