From 1aaf8c122918aa8897605a9aa1e8ed6600d6f930 Mon Sep 17 00:00:00 2001 From: Zhaoyang Huang Date: Tue, 21 Jan 2025 10:01:59 +0800 Subject: [PATCH 01/16] mm: gup: fix infinite loop within __get_longterm_locked We can run into an infinite loop in __get_longterm_locked() when collect_longterm_unpinnable_folios() finds only folios that are isolated from the LRU or were never added to the LRU. This can happen when all folios to be pinned are never added to the LRU, for example when vm_ops->fault allocated pages using cma_alloc() and never added them to the LRU. Fix it by simply taking a look at the list in the single caller, to see if anything was added. [zhaoyang.huang@unisoc.com: move definition of local] Link: https://lkml.kernel.org/r/20250122012604.3654667-1-zhaoyang.huang@unisoc.com Link: https://lkml.kernel.org/r/20250121020159.3636477-1-zhaoyang.huang@unisoc.com Fixes: 67e139b02d99 ("mm/gup.c: refactor check_and_migrate_movable_pages()") Signed-off-by: Zhaoyang Huang Reviewed-by: John Hubbard Reviewed-by: David Hildenbrand Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Cc: Aijun Sun Cc: Alistair Popple Cc: Signed-off-by: Andrew Morton --- mm/gup.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 9aaf338cc1f4..3883b307780e 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2320,13 +2320,13 @@ static void pofs_unpin(struct pages_or_folios *pofs) /* * Returns the number of collected folios. Return value is always >= 0. */ -static unsigned long collect_longterm_unpinnable_folios( +static void collect_longterm_unpinnable_folios( struct list_head *movable_folio_list, struct pages_or_folios *pofs) { - unsigned long i, collected = 0; struct folio *prev_folio = NULL; bool drain_allow = true; + unsigned long i; for (i = 0; i < pofs->nr_entries; i++) { struct folio *folio = pofs_get_folio(pofs, i); @@ -2338,8 +2338,6 @@ static unsigned long collect_longterm_unpinnable_folios( if (folio_is_longterm_pinnable(folio)) continue; - collected++; - if (folio_is_device_coherent(folio)) continue; @@ -2361,8 +2359,6 @@ static unsigned long collect_longterm_unpinnable_folios( NR_ISOLATED_ANON + folio_is_file_lru(folio), folio_nr_pages(folio)); } - - return collected; } /* @@ -2439,11 +2435,9 @@ static long check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs) { LIST_HEAD(movable_folio_list); - unsigned long collected; - collected = collect_longterm_unpinnable_folios(&movable_folio_list, - pofs); - if (!collected) + collect_longterm_unpinnable_folios(&movable_folio_list, pofs); + if (list_empty(&movable_folio_list)) return 0; return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs); -- 2.51.0 From 76e961157e078bc5d3cd2df08317e00b00a829eb Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Sat, 11 Jan 2025 16:36:55 +0530 Subject: [PATCH 02/16] mm/hugetlb: fix hugepage allocation for interleaved memory nodes gather_bootmem_prealloc() assumes the start nid as 0 and size as num_node_state(N_MEMORY). That means in case if memory attached numa nodes are interleaved, then gather_bootmem_prealloc_parallel() will fail to scan few of these nodes. Since memory attached numa nodes can be interleaved in any fashion, hence ensure that the current code checks for all numa node ids (.size = nr_node_ids). Let's still keep max_threads as N_MEMORY, so that it can distributes all nr_node_ids among the these many no. threads. e.g. qemu cmdline ======================== numa_cmd="-numa node,nodeid=1,memdev=mem1,cpus=2-3 -numa node,nodeid=0,cpus=0-1 -numa dist,src=0,dst=1,val=20" mem_cmd="-object memory-backend-ram,id=mem1,size=16G" w/o this patch for cmdline (default_hugepagesz=1GB hugepagesz=1GB hugepages=2): ========================== ~ # cat /proc/meminfo |grep -i huge AnonHugePages: 0 kB ShmemHugePages: 0 kB FileHugePages: 0 kB HugePages_Total: 0 HugePages_Free: 0 HugePages_Rsvd: 0 HugePages_Surp: 0 Hugepagesize: 1048576 kB Hugetlb: 0 kB with this patch for cmdline (default_hugepagesz=1GB hugepagesz=1GB hugepages=2): =========================== ~ # cat /proc/meminfo |grep -i huge AnonHugePages: 0 kB ShmemHugePages: 0 kB FileHugePages: 0 kB HugePages_Total: 2 HugePages_Free: 2 HugePages_Rsvd: 0 HugePages_Surp: 0 Hugepagesize: 1048576 kB Hugetlb: 2097152 kB Link: https://lkml.kernel.org/r/f8d8dad3a5471d284f54185f65d575a6aaab692b.1736592534.git.ritesh.list@gmail.com Fixes: b78b27d02930 ("hugetlb: parallelize 1G hugetlb initialization") Signed-off-by: Ritesh Harjani (IBM) Reported-by: Pavithra Prakash Suggested-by: Muchun Song Tested-by: Sourabh Jain Reviewed-by: Luiz Capitulino Acked-by: David Rientjes Cc: Donet Tom Cc: Gang Li Cc: Daniel Jordan Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3b25b69aa94f..65068671e460 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3309,7 +3309,7 @@ static void __init gather_bootmem_prealloc(void) .thread_fn = gather_bootmem_prealloc_parallel, .fn_arg = NULL, .start = 0, - .size = num_node_state(N_MEMORY), + .size = nr_node_ids, .align = 1, .min_chunk = 1, .max_threads = num_node_state(N_MEMORY), -- 2.51.0 From e5eaa1bbe2813ac34788e485283be75f9d07137b Mon Sep 17 00:00:00 2001 From: Carlos Bilbao Date: Wed, 29 Jan 2025 19:22:44 -0600 Subject: [PATCH 03/16] mailmap, MAINTAINERS, docs: update Carlos's email address Update .mailmap to reflect my new (and final) primary email address, carlos.bilbao@kernel.org. Also update contact information in files Documentation/translations/sp_SP/index.rst and MAINTAINERS. Link: https://lkml.kernel.org/r/20250130012248.1196208-1-carlos.bilbao@kernel.org Signed-off-by: Carlos Bilbao Cc: Carlos Bilbao Cc: Jonathan Corbet Cc: Mattew Wilcox Signed-off-by: Andrew Morton --- .mailmap | 4 +++- Documentation/translations/sp_SP/index.rst | 2 +- MAINTAINERS | 8 ++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.mailmap b/.mailmap index fec6b455b576..9a270c53675b 100644 --- a/.mailmap +++ b/.mailmap @@ -148,7 +148,9 @@ Bryan Tan Cai Huoqing Can Guo Carl Huang -Carlos Bilbao +Carlos Bilbao +Carlos Bilbao +Carlos Bilbao Changbin Du Changbin Du Chao Yu diff --git a/Documentation/translations/sp_SP/index.rst b/Documentation/translations/sp_SP/index.rst index aae7018b0d1a..2b50283e1608 100644 --- a/Documentation/translations/sp_SP/index.rst +++ b/Documentation/translations/sp_SP/index.rst @@ -7,7 +7,7 @@ Traducción al español \kerneldocCJKoff -:maintainer: Carlos Bilbao +:maintainer: Carlos Bilbao .. _sp_disclaimer: diff --git a/MAINTAINERS b/MAINTAINERS index d269d3c6e317..1824df1f61f0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1090,7 +1090,7 @@ F: drivers/video/fbdev/geode/ AMD HSMP DRIVER M: Naveen Krishna Chatradhi -R: Carlos Bilbao +R: Carlos Bilbao L: platform-driver-x86@vger.kernel.org S: Maintained F: Documentation/arch/x86/amd_hsmp.rst @@ -5856,7 +5856,7 @@ F: drivers/usb/atm/cxacru.c CONFIDENTIAL COMPUTING THREAT MODEL FOR X86 VIRTUALIZATION (SNP/TDX) M: Elena Reshetova -M: Carlos Bilbao +M: Carlos Bilbao S: Maintained F: Documentation/security/snp-tdx-threat-model.rst @@ -11323,7 +11323,7 @@ S: Orphan F: drivers/video/fbdev/imsttfb.c INDEX OF FURTHER KERNEL DOCUMENTATION -M: Carlos Bilbao +M: Carlos Bilbao S: Maintained F: Documentation/process/kernel-docs.rst @@ -22205,7 +22205,7 @@ Q: http://patchwork.linuxtv.org/project/linux-media/list/ F: drivers/media/dvb-frontends/sp2* SPANISH DOCUMENTATION -M: Carlos Bilbao +M: Carlos Bilbao R: Avadhut Naik S: Maintained F: Documentation/translations/sp_SP/ -- 2.51.0 From 0ca2a41e0ccc573845428b686ff09e9322c82b16 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 29 Jan 2025 16:13:49 -0500 Subject: [PATCH 04/16] MAINTAINERS: add lib/test_xarray.c Ensure test-only changes are sent to the relevant maintainer. Link: https://lkml.kernel.org/r/20250129-xarray-test-maintainer-v1-1-482e31f30f47@gmail.com Signed-off-by: Tamir Duberstein Cc: Mattew Wilcox Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1824df1f61f0..f52a004982c9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -25734,6 +25734,7 @@ F: Documentation/core-api/xarray.rst F: include/linux/idr.h F: include/linux/xarray.h F: lib/idr.c +F: lib/test_xarray.c F: lib/xarray.c F: tools/testing/radix-tree -- 2.51.0 From 050339050f6f2b18d32a61a0f725f423804ad2a5 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 30 Jan 2025 16:09:20 -0800 Subject: [PATCH 05/16] revert "xarray: port tests to kunit" Revert c7bb5cf9fc4e ("xarray: port tests to kunit"). It broke the build when compiing the xarray userspace test harness code. Reported-by: Sidhartha Kumar Closes: https://lkml.kernel.org/r/07cf896e-adf8-414f-a629-a808fc26014a@oracle.com Cc: David Gow Cc: Matthew Wilcox Cc: Tamir Duberstein Cc: "Liam R. Howlett" Cc: Geert Uytterhoeven Cc: Lorenzo Stoakes Signed-off-by: Andrew Morton --- arch/m68k/configs/amiga_defconfig | 1 + arch/m68k/configs/apollo_defconfig | 1 + arch/m68k/configs/atari_defconfig | 1 + arch/m68k/configs/bvme6000_defconfig | 1 + arch/m68k/configs/hp300_defconfig | 1 + arch/m68k/configs/mac_defconfig | 1 + arch/m68k/configs/multi_defconfig | 1 + arch/m68k/configs/mvme147_defconfig | 1 + arch/m68k/configs/mvme16x_defconfig | 1 + arch/m68k/configs/q40_defconfig | 1 + arch/m68k/configs/sun3_defconfig | 1 + arch/m68k/configs/sun3x_defconfig | 1 + arch/powerpc/configs/ppc64_defconfig | 1 + lib/Kconfig.debug | 18 +- lib/Makefile | 2 +- lib/test_xarray.c | 671 +++++++++++---------------- 16 files changed, 294 insertions(+), 410 deletions(-) diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 8acfa66e1095..dbf2ea561c85 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -626,6 +626,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 35e9a0872304..b0fd199cc0a4 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -583,6 +583,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 32891ddd3cc5..bb5b2d3b6c10 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -603,6 +603,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index ca276f0db3dd..8315a13bab73 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -575,6 +575,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index e83f14fe1a4f..350370657e5f 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -585,6 +585,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 6b58be24da79..f942b4755702 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -602,6 +602,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 0e8d24f82565..b1eaad02efab 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -689,6 +689,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 24a7608c13ac..6309a4442bb3 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -575,6 +575,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index c415f75821f3..3feb0731f814 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -576,6 +576,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 2c715a8ff551..ea04b1b0da7d 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -592,6 +592,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 15ff37fcccbf..f52d9af92153 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -572,6 +572,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 40a44bf9f48d..f348447824da 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -573,6 +573,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index e9c46b59ebbc..465eb96c755e 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -448,6 +448,7 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m +CONFIG_TEST_XARRAY=m CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 775966cf6114..1af972a92d06 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2456,22 +2456,8 @@ config TEST_BITMAP config TEST_UUID tristate "Test functions located in the uuid module at runtime" -config XARRAY_KUNIT - tristate "KUnit test XArray code at runtime" if !KUNIT_ALL_TESTS - depends on KUNIT - default KUNIT_ALL_TESTS - help - Enable this option to test the Xarray code at boot. - - KUnit tests run during boot and output the results to the debug log - in TAP format (http://testanything.org/). Only useful for kernel devs - running the KUnit test harness, and not intended for inclusion into a - production build. - - For more information on KUnit and unit tests in general please refer - to the KUnit documentation in Documentation/dev-tools/kunit/. - - If unsure, say N. +config TEST_XARRAY + tristate "Test the XArray code at runtime" config TEST_MAPLE_TREE tristate "Test the Maple Tree code at runtime or module load" diff --git a/lib/Makefile b/lib/Makefile index f1c6e9d76a7c..d5cfc7afbbb8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -94,6 +94,7 @@ GCOV_PROFILE_test_bitmap.o := n endif obj-$(CONFIG_TEST_UUID) += test_uuid.o +obj-$(CONFIG_TEST_XARRAY) += test_xarray.o obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o obj-$(CONFIG_TEST_KMOD) += test_kmod.o @@ -372,7 +373,6 @@ CFLAGS_bitfield_kunit.o := $(DISABLE_STRUCTLEAK_PLUGIN) obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o obj-$(CONFIG_CHECKSUM_KUNIT) += checksum_kunit.o obj-$(CONFIG_UTIL_MACROS_KUNIT) += util_macros_kunit.o -obj-$(CONFIG_XARRAY_KUNIT) += test_xarray.o obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o obj-$(CONFIG_HASHTABLE_KUNIT_TEST) += hashtable_test.o obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o diff --git a/lib/test_xarray.c b/lib/test_xarray.c index eab5971d0a48..6932a26f4927 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -6,10 +6,11 @@ * Author: Matthew Wilcox */ -#include - -#include #include +#include + +static unsigned int tests_run; +static unsigned int tests_passed; static const unsigned int order_limit = IS_ENABLED(CONFIG_XARRAY_MULTI) ? BITS_PER_LONG : 1; @@ -19,12 +20,15 @@ static const unsigned int order_limit = void xa_dump(const struct xarray *xa) { } # endif #undef XA_BUG_ON -#define XA_BUG_ON(xa, x) do { \ - if (x) { \ - KUNIT_FAIL(test, #x); \ - xa_dump(xa); \ - dump_stack(); \ - } \ +#define XA_BUG_ON(xa, x) do { \ + tests_run++; \ + if (x) { \ + printk("BUG at %s:%d\n", __func__, __LINE__); \ + xa_dump(xa); \ + dump_stack(); \ + } else { \ + tests_passed++; \ + } \ } while (0) #endif @@ -38,13 +42,13 @@ static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp) return xa_store(xa, index, xa_mk_index(index), gfp); } -static void xa_insert_index(struct kunit *test, struct xarray *xa, unsigned long index) +static void xa_insert_index(struct xarray *xa, unsigned long index) { XA_BUG_ON(xa, xa_insert(xa, index, xa_mk_index(index), GFP_KERNEL) != 0); } -static void xa_alloc_index(struct kunit *test, struct xarray *xa, unsigned long index, gfp_t gfp) +static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp) { u32 id; @@ -53,7 +57,7 @@ static void xa_alloc_index(struct kunit *test, struct xarray *xa, unsigned long XA_BUG_ON(xa, id != index); } -static void xa_erase_index(struct kunit *test, struct xarray *xa, unsigned long index) +static void xa_erase_index(struct xarray *xa, unsigned long index) { XA_BUG_ON(xa, xa_erase(xa, index) != xa_mk_index(index)); XA_BUG_ON(xa, xa_load(xa, index) != NULL); @@ -79,15 +83,8 @@ static void *xa_store_order(struct xarray *xa, unsigned long index, return curr; } -static inline struct xarray *xa_param(struct kunit *test) +static noinline void check_xa_err(struct xarray *xa) { - return *(struct xarray **)test->param_value; -} - -static noinline void check_xa_err(struct kunit *test) -{ - struct xarray *xa = xa_param(test); - XA_BUG_ON(xa, xa_err(xa_store_index(xa, 0, GFP_NOWAIT)) != 0); XA_BUG_ON(xa, xa_err(xa_erase(xa, 0)) != 0); #ifndef __KERNEL__ @@ -102,10 +99,8 @@ static noinline void check_xa_err(struct kunit *test) // XA_BUG_ON(xa, xa_err(xa_store(xa, 0, xa_mk_internal(0), 0)) != -EINVAL); } -static noinline void check_xas_retry(struct kunit *test) +static noinline void check_xas_retry(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); void *entry; @@ -114,7 +109,7 @@ static noinline void check_xas_retry(struct kunit *test) rcu_read_lock(); XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != xa_mk_value(0)); - xa_erase_index(test, xa, 1); + xa_erase_index(xa, 1); XA_BUG_ON(xa, !xa_is_retry(xas_reload(&xas))); XA_BUG_ON(xa, xas_retry(&xas, NULL)); XA_BUG_ON(xa, xas_retry(&xas, xa_mk_value(0))); @@ -145,14 +140,12 @@ static noinline void check_xas_retry(struct kunit *test) } xas_unlock(&xas); - xa_erase_index(test, xa, 0); - xa_erase_index(test, xa, 1); + xa_erase_index(xa, 0); + xa_erase_index(xa, 1); } -static noinline void check_xa_load(struct kunit *test) +static noinline void check_xa_load(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned long i, j; for (i = 0; i < 1024; i++) { @@ -174,15 +167,13 @@ static noinline void check_xa_load(struct kunit *test) else XA_BUG_ON(xa, entry); } - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); } XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_xa_mark_1(struct kunit *test, unsigned long index) +static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index) { - struct xarray *xa = xa_param(test); - unsigned int order; unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 8 : 1; @@ -202,7 +193,7 @@ static noinline void check_xa_mark_1(struct kunit *test, unsigned long index) XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_1)); /* Storing NULL clears marks, and they can't be set again */ - xa_erase_index(test, xa, index); + xa_erase_index(xa, index); XA_BUG_ON(xa, !xa_empty(xa)); XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0)); xa_set_mark(xa, index, XA_MARK_0); @@ -253,17 +244,15 @@ static noinline void check_xa_mark_1(struct kunit *test, unsigned long index) XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_0)); XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_1)); XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_2)); - xa_erase_index(test, xa, index); - xa_erase_index(test, xa, next); + xa_erase_index(xa, index); + xa_erase_index(xa, next); XA_BUG_ON(xa, !xa_empty(xa)); } XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_xa_mark_2(struct kunit *test) +static noinline void check_xa_mark_2(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); unsigned long index; unsigned int count = 0; @@ -300,11 +289,9 @@ static noinline void check_xa_mark_2(struct kunit *test) xa_destroy(xa); } -static noinline void check_xa_mark_3(struct kunit *test) +static noinline void check_xa_mark_3(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0x41); void *entry; int count = 0; @@ -323,21 +310,19 @@ static noinline void check_xa_mark_3(struct kunit *test) #endif } -static noinline void check_xa_mark(struct kunit *test) +static noinline void check_xa_mark(struct xarray *xa) { unsigned long index; for (index = 0; index < 16384; index += 4) - check_xa_mark_1(test, index); + check_xa_mark_1(xa, index); - check_xa_mark_2(test); - check_xa_mark_3(test); + check_xa_mark_2(xa); + check_xa_mark_3(xa); } -static noinline void check_xa_shrink(struct kunit *test) +static noinline void check_xa_shrink(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 1); struct xa_node *node; unsigned int order; @@ -362,7 +347,7 @@ static noinline void check_xa_shrink(struct kunit *test) XA_BUG_ON(xa, xas_load(&xas) != NULL); xas_unlock(&xas); XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0)); - xa_erase_index(test, xa, 0); + xa_erase_index(xa, 0); XA_BUG_ON(xa, !xa_empty(xa)); for (order = 0; order < max_order; order++) { @@ -379,49 +364,45 @@ static noinline void check_xa_shrink(struct kunit *test) XA_BUG_ON(xa, xa_head(xa) == node); rcu_read_unlock(); XA_BUG_ON(xa, xa_load(xa, max + 1) != NULL); - xa_erase_index(test, xa, ULONG_MAX); + xa_erase_index(xa, ULONG_MAX); XA_BUG_ON(xa, xa->xa_head != node); - xa_erase_index(test, xa, 0); + xa_erase_index(xa, 0); } } -static noinline void check_insert(struct kunit *test) +static noinline void check_insert(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned long i; for (i = 0; i < 1024; i++) { - xa_insert_index(test, xa, i); + xa_insert_index(xa, i); XA_BUG_ON(xa, xa_load(xa, i - 1) != NULL); XA_BUG_ON(xa, xa_load(xa, i + 1) != NULL); - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); } for (i = 10; i < BITS_PER_LONG; i++) { - xa_insert_index(test, xa, 1UL << i); + xa_insert_index(xa, 1UL << i); XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 1) != NULL); XA_BUG_ON(xa, xa_load(xa, (1UL << i) + 1) != NULL); - xa_erase_index(test, xa, 1UL << i); + xa_erase_index(xa, 1UL << i); - xa_insert_index(test, xa, (1UL << i) - 1); + xa_insert_index(xa, (1UL << i) - 1); XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 2) != NULL); XA_BUG_ON(xa, xa_load(xa, 1UL << i) != NULL); - xa_erase_index(test, xa, (1UL << i) - 1); + xa_erase_index(xa, (1UL << i) - 1); } - xa_insert_index(test, xa, ~0UL); + xa_insert_index(xa, ~0UL); XA_BUG_ON(xa, xa_load(xa, 0UL) != NULL); XA_BUG_ON(xa, xa_load(xa, ~1UL) != NULL); - xa_erase_index(test, xa, ~0UL); + xa_erase_index(xa, ~0UL); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_cmpxchg(struct kunit *test) +static noinline void check_cmpxchg(struct xarray *xa) { - struct xarray *xa = xa_param(test); - void *FIVE = xa_mk_value(5); void *SIX = xa_mk_value(6); void *LOTS = xa_mk_value(12345678); @@ -437,16 +418,14 @@ static noinline void check_cmpxchg(struct kunit *test) XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) != -EBUSY); XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != FIVE); XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) == -EBUSY); - xa_erase_index(test, xa, 12345678); - xa_erase_index(test, xa, 5); + xa_erase_index(xa, 12345678); + xa_erase_index(xa, 5); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_cmpxchg_order(struct kunit *test) +static noinline void check_cmpxchg_order(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI - struct xarray *xa = xa_param(test); - void *FIVE = xa_mk_value(5); unsigned int i, order = 3; @@ -497,10 +476,8 @@ static noinline void check_cmpxchg_order(struct kunit *test) #endif } -static noinline void check_reserve(struct kunit *test) +static noinline void check_reserve(struct xarray *xa) { - struct xarray *xa = xa_param(test); - void *entry; unsigned long index; int count; @@ -517,7 +494,7 @@ static noinline void check_reserve(struct kunit *test) XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0); XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_NOWAIT) != NULL); xa_release(xa, 12345678); - xa_erase_index(test, xa, 12345678); + xa_erase_index(xa, 12345678); XA_BUG_ON(xa, !xa_empty(xa)); /* cmpxchg sees a reserved entry as ZERO */ @@ -525,7 +502,7 @@ static noinline void check_reserve(struct kunit *test) XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, XA_ZERO_ENTRY, xa_mk_value(12345678), GFP_NOWAIT) != NULL); xa_release(xa, 12345678); - xa_erase_index(test, xa, 12345678); + xa_erase_index(xa, 12345678); XA_BUG_ON(xa, !xa_empty(xa)); /* xa_insert treats it as busy */ @@ -565,10 +542,8 @@ static noinline void check_reserve(struct kunit *test) xa_destroy(xa); } -static noinline void check_xas_erase(struct kunit *test) +static noinline void check_xas_erase(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); void *entry; unsigned long i, j; @@ -606,11 +581,9 @@ static noinline void check_xas_erase(struct kunit *test) } #ifdef CONFIG_XARRAY_MULTI -static noinline void check_multi_store_1(struct kunit *test, unsigned long index, +static noinline void check_multi_store_1(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, index); unsigned long min = index & ~((1UL << order) - 1); unsigned long max = min + (1UL << order); @@ -629,15 +602,13 @@ static noinline void check_multi_store_1(struct kunit *test, unsigned long index XA_BUG_ON(xa, xa_load(xa, max) != NULL); XA_BUG_ON(xa, xa_load(xa, min - 1) != NULL); - xa_erase_index(test, xa, min); + xa_erase_index(xa, min); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_multi_store_2(struct kunit *test, unsigned long index, +static noinline void check_multi_store_2(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, index); xa_store_order(xa, index, order, xa_mk_value(0), GFP_KERNEL); @@ -649,11 +620,9 @@ static noinline void check_multi_store_2(struct kunit *test, unsigned long index XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_multi_store_3(struct kunit *test, unsigned long index, +static noinline void check_multi_store_3(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); void *entry; int n = 0; @@ -678,11 +647,9 @@ static noinline void check_multi_store_3(struct kunit *test, unsigned long index } #endif -static noinline void check_multi_store(struct kunit *test) +static noinline void check_multi_store(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI - struct xarray *xa = xa_param(test); - unsigned long i, j, k; unsigned int max_order = (sizeof(long) == 4) ? 30 : 60; @@ -747,28 +714,26 @@ static noinline void check_multi_store(struct kunit *test) } for (i = 0; i < 20; i++) { - check_multi_store_1(test, 200, i); - check_multi_store_1(test, 0, i); - check_multi_store_1(test, (1UL << i) + 1, i); + check_multi_store_1(xa, 200, i); + check_multi_store_1(xa, 0, i); + check_multi_store_1(xa, (1UL << i) + 1, i); } - check_multi_store_2(test, 4095, 9); + check_multi_store_2(xa, 4095, 9); for (i = 1; i < 20; i++) { - check_multi_store_3(test, 0, i); - check_multi_store_3(test, 1UL << i, i); + check_multi_store_3(xa, 0, i); + check_multi_store_3(xa, 1UL << i, i); } #endif } #ifdef CONFIG_XARRAY_MULTI /* mimics page cache __filemap_add_folio() */ -static noinline void check_xa_multi_store_adv_add(struct kunit *test, +static noinline void check_xa_multi_store_adv_add(struct xarray *xa, unsigned long index, unsigned int order, void *p) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, index); unsigned int nrpages = 1UL << order; @@ -796,12 +761,10 @@ static noinline void check_xa_multi_store_adv_add(struct kunit *test, } /* mimics page_cache_delete() */ -static noinline void check_xa_multi_store_adv_del_entry(struct kunit *test, +static noinline void check_xa_multi_store_adv_del_entry(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, index); xas_set_order(&xas, index, order); @@ -809,14 +772,12 @@ static noinline void check_xa_multi_store_adv_del_entry(struct kunit *test, xas_init_marks(&xas); } -static noinline void check_xa_multi_store_adv_delete(struct kunit *test, +static noinline void check_xa_multi_store_adv_delete(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - xa_lock_irq(xa); - check_xa_multi_store_adv_del_entry(test, index, order); + check_xa_multi_store_adv_del_entry(xa, index, order); xa_unlock_irq(xa); } @@ -853,12 +814,10 @@ static unsigned long some_val = 0xdeadbeef; static unsigned long some_val_2 = 0xdeaddead; /* mimics the page cache usage */ -static noinline void check_xa_multi_store_adv(struct kunit *test, +static noinline void check_xa_multi_store_adv(struct xarray *xa, unsigned long pos, unsigned int order) { - struct xarray *xa = xa_param(test); - unsigned int nrpages = 1UL << order; unsigned long index, base, next_index, next_next_index; unsigned int i; @@ -868,7 +827,7 @@ static noinline void check_xa_multi_store_adv(struct kunit *test, next_index = round_down(base + nrpages, nrpages); next_next_index = round_down(next_index + nrpages, nrpages); - check_xa_multi_store_adv_add(test, base, order, &some_val); + check_xa_multi_store_adv_add(xa, base, order, &some_val); for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, base + i) != &some_val); @@ -876,20 +835,20 @@ static noinline void check_xa_multi_store_adv(struct kunit *test, XA_BUG_ON(xa, test_get_entry(xa, next_index) != NULL); /* Use order 0 for the next item */ - check_xa_multi_store_adv_add(test, next_index, 0, &some_val_2); + check_xa_multi_store_adv_add(xa, next_index, 0, &some_val_2); XA_BUG_ON(xa, test_get_entry(xa, next_index) != &some_val_2); /* Remove the next item */ - check_xa_multi_store_adv_delete(test, next_index, 0); + check_xa_multi_store_adv_delete(xa, next_index, 0); /* Now use order for a new pointer */ - check_xa_multi_store_adv_add(test, next_index, order, &some_val_2); + check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2); for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, next_index + i) != &some_val_2); - check_xa_multi_store_adv_delete(test, next_index, order); - check_xa_multi_store_adv_delete(test, base, order); + check_xa_multi_store_adv_delete(xa, next_index, order); + check_xa_multi_store_adv_delete(xa, base, order); XA_BUG_ON(xa, !xa_empty(xa)); /* starting fresh again */ @@ -897,7 +856,7 @@ static noinline void check_xa_multi_store_adv(struct kunit *test, /* let's test some holes now */ /* hole at base and next_next */ - check_xa_multi_store_adv_add(test, next_index, order, &some_val_2); + check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2); for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, base + i) != NULL); @@ -908,12 +867,12 @@ static noinline void check_xa_multi_store_adv(struct kunit *test, for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, next_next_index + i) != NULL); - check_xa_multi_store_adv_delete(test, next_index, order); + check_xa_multi_store_adv_delete(xa, next_index, order); XA_BUG_ON(xa, !xa_empty(xa)); /* hole at base and next */ - check_xa_multi_store_adv_add(test, next_next_index, order, &some_val_2); + check_xa_multi_store_adv_add(xa, next_next_index, order, &some_val_2); for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, base + i) != NULL); @@ -924,12 +883,12 @@ static noinline void check_xa_multi_store_adv(struct kunit *test, for (i = 0; i < nrpages; i++) XA_BUG_ON(xa, test_get_entry(xa, next_next_index + i) != &some_val_2); - check_xa_multi_store_adv_delete(test, next_next_index, order); + check_xa_multi_store_adv_delete(xa, next_next_index, order); XA_BUG_ON(xa, !xa_empty(xa)); } #endif -static noinline void check_multi_store_advanced(struct kunit *test) +static noinline void check_multi_store_advanced(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; @@ -941,59 +900,59 @@ static noinline void check_multi_store_advanced(struct kunit *test) */ for (pos = 7; pos < end; pos = (pos * pos) + 564) { for (i = 0; i < max_order; i++) { - check_xa_multi_store_adv(test, pos, i); - check_xa_multi_store_adv(test, pos + 157, i); + check_xa_multi_store_adv(xa, pos, i); + check_xa_multi_store_adv(xa, pos + 157, i); } } #endif } -static noinline void check_xa_alloc_1(struct kunit *test, struct xarray *xa, unsigned int base) +static noinline void check_xa_alloc_1(struct xarray *xa, unsigned int base) { int i; u32 id; XA_BUG_ON(xa, !xa_empty(xa)); /* An empty array should assign %base to the first alloc */ - xa_alloc_index(test, xa, base, GFP_KERNEL); + xa_alloc_index(xa, base, GFP_KERNEL); /* Erasing it should make the array empty again */ - xa_erase_index(test, xa, base); + xa_erase_index(xa, base); XA_BUG_ON(xa, !xa_empty(xa)); /* And it should assign %base again */ - xa_alloc_index(test, xa, base, GFP_KERNEL); + xa_alloc_index(xa, base, GFP_KERNEL); /* Allocating and then erasing a lot should not lose base */ for (i = base + 1; i < 2 * XA_CHUNK_SIZE; i++) - xa_alloc_index(test, xa, i, GFP_KERNEL); + xa_alloc_index(xa, i, GFP_KERNEL); for (i = base; i < 2 * XA_CHUNK_SIZE; i++) - xa_erase_index(test, xa, i); - xa_alloc_index(test, xa, base, GFP_KERNEL); + xa_erase_index(xa, i); + xa_alloc_index(xa, base, GFP_KERNEL); /* Destroying the array should do the same as erasing */ xa_destroy(xa); /* And it should assign %base again */ - xa_alloc_index(test, xa, base, GFP_KERNEL); + xa_alloc_index(xa, base, GFP_KERNEL); /* The next assigned ID should be base+1 */ - xa_alloc_index(test, xa, base + 1, GFP_KERNEL); - xa_erase_index(test, xa, base + 1); + xa_alloc_index(xa, base + 1, GFP_KERNEL); + xa_erase_index(xa, base + 1); /* Storing a value should mark it used */ xa_store_index(xa, base + 1, GFP_KERNEL); - xa_alloc_index(test, xa, base + 2, GFP_KERNEL); + xa_alloc_index(xa, base + 2, GFP_KERNEL); /* If we then erase base, it should be free */ - xa_erase_index(test, xa, base); - xa_alloc_index(test, xa, base, GFP_KERNEL); + xa_erase_index(xa, base); + xa_alloc_index(xa, base, GFP_KERNEL); - xa_erase_index(test, xa, base + 1); - xa_erase_index(test, xa, base + 2); + xa_erase_index(xa, base + 1); + xa_erase_index(xa, base + 2); for (i = 1; i < 5000; i++) { - xa_alloc_index(test, xa, base + i, GFP_KERNEL); + xa_alloc_index(xa, base + i, GFP_KERNEL); } xa_destroy(xa); @@ -1016,14 +975,14 @@ static noinline void check_xa_alloc_1(struct kunit *test, struct xarray *xa, uns XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5), GFP_KERNEL) != -EBUSY); - XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != NULL); + XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != 0); XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5), GFP_KERNEL) != -EBUSY); - xa_erase_index(test, xa, 3); + xa_erase_index(xa, 3); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_xa_alloc_2(struct kunit *test, struct xarray *xa, unsigned int base) +static noinline void check_xa_alloc_2(struct xarray *xa, unsigned int base) { unsigned int i, id; unsigned long index; @@ -1059,7 +1018,7 @@ static noinline void check_xa_alloc_2(struct kunit *test, struct xarray *xa, uns XA_BUG_ON(xa, id != 5); xa_for_each(xa, index, entry) { - xa_erase_index(test, xa, index); + xa_erase_index(xa, index); } for (i = base; i < base + 9; i++) { @@ -1074,7 +1033,7 @@ static noinline void check_xa_alloc_2(struct kunit *test, struct xarray *xa, uns xa_destroy(xa); } -static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, unsigned int base) +static noinline void check_xa_alloc_3(struct xarray *xa, unsigned int base) { struct xa_limit limit = XA_LIMIT(1, 0x3fff); u32 next = 0; @@ -1090,8 +1049,8 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(0x3ffd), limit, &next, GFP_KERNEL) != 0); XA_BUG_ON(xa, id != 0x3ffd); - xa_erase_index(test, xa, 0x3ffd); - xa_erase_index(test, xa, 1); + xa_erase_index(xa, 0x3ffd); + xa_erase_index(xa, 1); XA_BUG_ON(xa, !xa_empty(xa)); for (i = 0x3ffe; i < 0x4003; i++) { @@ -1106,8 +1065,8 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns /* Check wrap-around is handled correctly */ if (base != 0) - xa_erase_index(test, xa, base); - xa_erase_index(test, xa, base + 1); + xa_erase_index(xa, base); + xa_erase_index(xa, base + 1); next = UINT_MAX; XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(UINT_MAX), xa_limit_32b, &next, GFP_KERNEL) != 0); @@ -1120,7 +1079,7 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns XA_BUG_ON(xa, id != base + 1); xa_for_each(xa, index, entry) - xa_erase_index(test, xa, index); + xa_erase_index(xa, index); XA_BUG_ON(xa, !xa_empty(xa)); } @@ -1128,21 +1087,19 @@ static noinline void check_xa_alloc_3(struct kunit *test, struct xarray *xa, uns static DEFINE_XARRAY_ALLOC(xa0); static DEFINE_XARRAY_ALLOC1(xa1); -static noinline void check_xa_alloc(struct kunit *test) +static noinline void check_xa_alloc(void) { - check_xa_alloc_1(test, &xa0, 0); - check_xa_alloc_1(test, &xa1, 1); - check_xa_alloc_2(test, &xa0, 0); - check_xa_alloc_2(test, &xa1, 1); - check_xa_alloc_3(test, &xa0, 0); - check_xa_alloc_3(test, &xa1, 1); + check_xa_alloc_1(&xa0, 0); + check_xa_alloc_1(&xa1, 1); + check_xa_alloc_2(&xa0, 0); + check_xa_alloc_2(&xa1, 1); + check_xa_alloc_3(&xa0, 0); + check_xa_alloc_3(&xa1, 1); } -static noinline void __check_store_iter(struct kunit *test, unsigned long start, +static noinline void __check_store_iter(struct xarray *xa, unsigned long start, unsigned int order, unsigned int present) { - struct xarray *xa = xa_param(test); - XA_STATE_ORDER(xas, xa, start, order); void *entry; unsigned int count = 0; @@ -1166,54 +1123,50 @@ retry: XA_BUG_ON(xa, xa_load(xa, start) != xa_mk_index(start)); XA_BUG_ON(xa, xa_load(xa, start + (1UL << order) - 1) != xa_mk_index(start)); - xa_erase_index(test, xa, start); + xa_erase_index(xa, start); } -static noinline void check_store_iter(struct kunit *test) +static noinline void check_store_iter(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned int i, j; unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; for (i = 0; i < max_order; i++) { unsigned int min = 1 << i; unsigned int max = (2 << i) - 1; - __check_store_iter(test, 0, i, 0); + __check_store_iter(xa, 0, i, 0); XA_BUG_ON(xa, !xa_empty(xa)); - __check_store_iter(test, min, i, 0); + __check_store_iter(xa, min, i, 0); XA_BUG_ON(xa, !xa_empty(xa)); xa_store_index(xa, min, GFP_KERNEL); - __check_store_iter(test, min, i, 1); + __check_store_iter(xa, min, i, 1); XA_BUG_ON(xa, !xa_empty(xa)); xa_store_index(xa, max, GFP_KERNEL); - __check_store_iter(test, min, i, 1); + __check_store_iter(xa, min, i, 1); XA_BUG_ON(xa, !xa_empty(xa)); for (j = 0; j < min; j++) xa_store_index(xa, j, GFP_KERNEL); - __check_store_iter(test, 0, i, min); + __check_store_iter(xa, 0, i, min); XA_BUG_ON(xa, !xa_empty(xa)); for (j = 0; j < min; j++) xa_store_index(xa, min + j, GFP_KERNEL); - __check_store_iter(test, min, i, min); + __check_store_iter(xa, min, i, min); XA_BUG_ON(xa, !xa_empty(xa)); } #ifdef CONFIG_XARRAY_MULTI xa_store_index(xa, 63, GFP_KERNEL); xa_store_index(xa, 65, GFP_KERNEL); - __check_store_iter(test, 64, 2, 1); - xa_erase_index(test, xa, 63); + __check_store_iter(xa, 64, 2, 1); + xa_erase_index(xa, 63); #endif XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_multi_find_1(struct kunit *test, unsigned int order) +static noinline void check_multi_find_1(struct xarray *xa, unsigned order) { #ifdef CONFIG_XARRAY_MULTI - struct xarray *xa = xa_param(test); - unsigned long multi = 3 << order; unsigned long next = 4 << order; unsigned long index; @@ -1236,17 +1189,15 @@ static noinline void check_multi_find_1(struct kunit *test, unsigned int order) XA_BUG_ON(xa, xa_find_after(xa, &index, next, XA_PRESENT) != NULL); XA_BUG_ON(xa, index != next); - xa_erase_index(test, xa, multi); - xa_erase_index(test, xa, next); - xa_erase_index(test, xa, next + 1); + xa_erase_index(xa, multi); + xa_erase_index(xa, next); + xa_erase_index(xa, next + 1); XA_BUG_ON(xa, !xa_empty(xa)); #endif } -static noinline void check_multi_find_2(struct kunit *test) +static noinline void check_multi_find_2(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 10 : 1; unsigned int i, j; void *entry; @@ -1260,19 +1211,17 @@ static noinline void check_multi_find_2(struct kunit *test) GFP_KERNEL); rcu_read_lock(); xas_for_each(&xas, entry, ULONG_MAX) { - xa_erase_index(test, xa, index); + xa_erase_index(xa, index); } rcu_read_unlock(); - xa_erase_index(test, xa, index - 1); + xa_erase_index(xa, index - 1); XA_BUG_ON(xa, !xa_empty(xa)); } } } -static noinline void check_multi_find_3(struct kunit *test) +static noinline void check_multi_find_3(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned int order; for (order = 5; order < order_limit; order++) { @@ -1281,14 +1230,12 @@ static noinline void check_multi_find_3(struct kunit *test) XA_BUG_ON(xa, !xa_empty(xa)); xa_store_order(xa, 0, order - 4, xa_mk_index(0), GFP_KERNEL); XA_BUG_ON(xa, xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT)); - xa_erase_index(test, xa, 0); + xa_erase_index(xa, 0); } } -static noinline void check_find_1(struct kunit *test) +static noinline void check_find_1(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned long i, j, k; XA_BUG_ON(xa, !xa_empty(xa)); @@ -1325,20 +1272,18 @@ static noinline void check_find_1(struct kunit *test) else XA_BUG_ON(xa, entry != NULL); } - xa_erase_index(test, xa, j); + xa_erase_index(xa, j); XA_BUG_ON(xa, xa_get_mark(xa, j, XA_MARK_0)); XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0)); } - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_0)); } XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_find_2(struct kunit *test) +static noinline void check_find_2(struct xarray *xa) { - struct xarray *xa = xa_param(test); - void *entry; unsigned long i, j, index; @@ -1358,10 +1303,8 @@ static noinline void check_find_2(struct kunit *test) xa_destroy(xa); } -static noinline void check_find_3(struct kunit *test) +static noinline void check_find_3(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); unsigned long i, j, k; void *entry; @@ -1385,10 +1328,8 @@ static noinline void check_find_3(struct kunit *test) xa_destroy(xa); } -static noinline void check_find_4(struct kunit *test) +static noinline void check_find_4(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned long index = 0; void *entry; @@ -1400,22 +1341,22 @@ static noinline void check_find_4(struct kunit *test) entry = xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT); XA_BUG_ON(xa, entry); - xa_erase_index(test, xa, ULONG_MAX); + xa_erase_index(xa, ULONG_MAX); } -static noinline void check_find(struct kunit *test) +static noinline void check_find(struct xarray *xa) { unsigned i; - check_find_1(test); - check_find_2(test); - check_find_3(test); - check_find_4(test); + check_find_1(xa); + check_find_2(xa); + check_find_3(xa); + check_find_4(xa); for (i = 2; i < 10; i++) - check_multi_find_1(test, i); - check_multi_find_2(test); - check_multi_find_3(test); + check_multi_find_1(xa, i); + check_multi_find_2(xa); + check_multi_find_3(xa); } /* See find_swap_entry() in mm/shmem.c */ @@ -1441,10 +1382,8 @@ static noinline unsigned long xa_find_entry(struct xarray *xa, void *item) return entry ? xas.xa_index : -1; } -static noinline void check_find_entry(struct kunit *test) +static noinline void check_find_entry(struct xarray *xa) { - struct xarray *xa = xa_param(test); - #ifdef CONFIG_XARRAY_MULTI unsigned int order; unsigned long offset, index; @@ -1471,14 +1410,12 @@ static noinline void check_find_entry(struct kunit *test) xa_store_index(xa, ULONG_MAX, GFP_KERNEL); XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1); XA_BUG_ON(xa, xa_find_entry(xa, xa_mk_index(ULONG_MAX)) != -1); - xa_erase_index(test, xa, ULONG_MAX); + xa_erase_index(xa, ULONG_MAX); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_pause(struct kunit *test) +static noinline void check_pause(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); void *entry; unsigned int order; @@ -1548,10 +1485,8 @@ static noinline void check_pause(struct kunit *test) } -static noinline void check_move_tiny(struct kunit *test) +static noinline void check_move_tiny(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); XA_BUG_ON(xa, !xa_empty(xa)); @@ -1568,14 +1503,12 @@ static noinline void check_move_tiny(struct kunit *test) XA_BUG_ON(xa, xas_prev(&xas) != xa_mk_index(0)); XA_BUG_ON(xa, xas_prev(&xas) != NULL); rcu_read_unlock(); - xa_erase_index(test, xa, 0); + xa_erase_index(xa, 0); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_move_max(struct kunit *test) +static noinline void check_move_max(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); xa_store_index(xa, ULONG_MAX, GFP_KERNEL); @@ -1591,14 +1524,12 @@ static noinline void check_move_max(struct kunit *test) XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != NULL); rcu_read_unlock(); - xa_erase_index(test, xa, ULONG_MAX); + xa_erase_index(xa, ULONG_MAX); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_move_small(struct kunit *test, unsigned long idx) +static noinline void check_move_small(struct xarray *xa, unsigned long idx) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); unsigned long i; @@ -1640,15 +1571,13 @@ static noinline void check_move_small(struct kunit *test, unsigned long idx) XA_BUG_ON(xa, xas.xa_index != ULONG_MAX); rcu_read_unlock(); - xa_erase_index(test, xa, 0); - xa_erase_index(test, xa, idx); + xa_erase_index(xa, 0); + xa_erase_index(xa, idx); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_move(struct kunit *test) +static noinline void check_move(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, (1 << 16) - 1); unsigned long i; @@ -1675,7 +1604,7 @@ static noinline void check_move(struct kunit *test) rcu_read_unlock(); for (i = (1 << 8); i < (1 << 15); i++) - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); i = xas.xa_index; @@ -1706,17 +1635,17 @@ static noinline void check_move(struct kunit *test) xa_destroy(xa); - check_move_tiny(test); - check_move_max(test); + check_move_tiny(xa); + check_move_max(xa); for (i = 0; i < 16; i++) - check_move_small(test, 1UL << i); + check_move_small(xa, 1UL << i); for (i = 2; i < 16; i++) - check_move_small(test, (1UL << i) - 1); + check_move_small(xa, (1UL << i) - 1); } -static noinline void xa_store_many_order(struct kunit *test, struct xarray *xa, +static noinline void xa_store_many_order(struct xarray *xa, unsigned long index, unsigned order) { XA_STATE_ORDER(xas, xa, index, order); @@ -1739,34 +1668,30 @@ unlock: XA_BUG_ON(xa, xas_error(&xas)); } -static noinline void check_create_range_1(struct kunit *test, +static noinline void check_create_range_1(struct xarray *xa, unsigned long index, unsigned order) { - struct xarray *xa = xa_param(test); - unsigned long i; - xa_store_many_order(test, xa, index, order); + xa_store_many_order(xa, index, order); for (i = index; i < index + (1UL << order); i++) - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_create_range_2(struct kunit *test, unsigned int order) +static noinline void check_create_range_2(struct xarray *xa, unsigned order) { - struct xarray *xa = xa_param(test); - unsigned long i; unsigned long nr = 1UL << order; for (i = 0; i < nr * nr; i += nr) - xa_store_many_order(test, xa, i, order); + xa_store_many_order(xa, i, order); for (i = 0; i < nr * nr; i++) - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_create_range_3(struct kunit *test) +static noinline void check_create_range_3(void) { XA_STATE(xas, NULL, 0); xas_set_err(&xas, -EEXIST); @@ -1774,11 +1699,9 @@ static noinline void check_create_range_3(struct kunit *test) XA_BUG_ON(NULL, xas_error(&xas) != -EEXIST); } -static noinline void check_create_range_4(struct kunit *test, +static noinline void check_create_range_4(struct xarray *xa, unsigned long index, unsigned order) { - struct xarray *xa = xa_param(test); - XA_STATE_ORDER(xas, xa, index, order); unsigned long base = xas.xa_index; unsigned long i = 0; @@ -1804,15 +1727,13 @@ unlock: XA_BUG_ON(xa, xas_error(&xas)); for (i = base; i < base + (1UL << order); i++) - xa_erase_index(test, xa, i); + xa_erase_index(xa, i); XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_create_range_5(struct kunit *test, +static noinline void check_create_range_5(struct xarray *xa, unsigned long index, unsigned int order) { - struct xarray *xa = xa_param(test); - XA_STATE_ORDER(xas, xa, index, order); unsigned int i; @@ -1829,46 +1750,44 @@ static noinline void check_create_range_5(struct kunit *test, xa_destroy(xa); } -static noinline void check_create_range(struct kunit *test) +static noinline void check_create_range(struct xarray *xa) { unsigned int order; unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 12 : 1; for (order = 0; order < max_order; order++) { - check_create_range_1(test, 0, order); - check_create_range_1(test, 1U << order, order); - check_create_range_1(test, 2U << order, order); - check_create_range_1(test, 3U << order, order); - check_create_range_1(test, 1U << 24, order); + check_create_range_1(xa, 0, order); + check_create_range_1(xa, 1U << order, order); + check_create_range_1(xa, 2U << order, order); + check_create_range_1(xa, 3U << order, order); + check_create_range_1(xa, 1U << 24, order); if (order < 10) - check_create_range_2(test, order); - - check_create_range_4(test, 0, order); - check_create_range_4(test, 1U << order, order); - check_create_range_4(test, 2U << order, order); - check_create_range_4(test, 3U << order, order); - check_create_range_4(test, 1U << 24, order); - - check_create_range_4(test, 1, order); - check_create_range_4(test, (1U << order) + 1, order); - check_create_range_4(test, (2U << order) + 1, order); - check_create_range_4(test, (2U << order) - 1, order); - check_create_range_4(test, (3U << order) + 1, order); - check_create_range_4(test, (3U << order) - 1, order); - check_create_range_4(test, (1U << 24) + 1, order); - - check_create_range_5(test, 0, order); - check_create_range_5(test, (1U << order), order); + check_create_range_2(xa, order); + + check_create_range_4(xa, 0, order); + check_create_range_4(xa, 1U << order, order); + check_create_range_4(xa, 2U << order, order); + check_create_range_4(xa, 3U << order, order); + check_create_range_4(xa, 1U << 24, order); + + check_create_range_4(xa, 1, order); + check_create_range_4(xa, (1U << order) + 1, order); + check_create_range_4(xa, (2U << order) + 1, order); + check_create_range_4(xa, (2U << order) - 1, order); + check_create_range_4(xa, (3U << order) + 1, order); + check_create_range_4(xa, (3U << order) - 1, order); + check_create_range_4(xa, (1U << 24) + 1, order); + + check_create_range_5(xa, 0, order); + check_create_range_5(xa, (1U << order), order); } - check_create_range_3(test); + check_create_range_3(); } -static noinline void __check_store_range(struct kunit *test, unsigned long first, +static noinline void __check_store_range(struct xarray *xa, unsigned long first, unsigned long last) { - struct xarray *xa = xa_param(test); - #ifdef CONFIG_XARRAY_MULTI xa_store_range(xa, first, last, xa_mk_index(first), GFP_KERNEL); @@ -1883,28 +1802,26 @@ static noinline void __check_store_range(struct kunit *test, unsigned long first XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_store_range(struct kunit *test) +static noinline void check_store_range(struct xarray *xa) { unsigned long i, j; for (i = 0; i < 128; i++) { for (j = i; j < 128; j++) { - __check_store_range(test, i, j); - __check_store_range(test, 128 + i, 128 + j); - __check_store_range(test, 4095 + i, 4095 + j); - __check_store_range(test, 4096 + i, 4096 + j); - __check_store_range(test, 123456 + i, 123456 + j); - __check_store_range(test, (1 << 24) + i, (1 << 24) + j); + __check_store_range(xa, i, j); + __check_store_range(xa, 128 + i, 128 + j); + __check_store_range(xa, 4095 + i, 4095 + j); + __check_store_range(xa, 4096 + i, 4096 + j); + __check_store_range(xa, 123456 + i, 123456 + j); + __check_store_range(xa, (1 << 24) + i, (1 << 24) + j); } } } #ifdef CONFIG_XARRAY_MULTI -static void check_split_1(struct kunit *test, unsigned long index, +static void check_split_1(struct xarray *xa, unsigned long index, unsigned int order, unsigned int new_order) { - struct xarray *xa = xa_param(test); - XA_STATE_ORDER(xas, xa, index, new_order); unsigned int i, found; void *entry; @@ -1940,30 +1857,26 @@ static void check_split_1(struct kunit *test, unsigned long index, xa_destroy(xa); } -static noinline void check_split(struct kunit *test) +static noinline void check_split(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned int order, new_order; XA_BUG_ON(xa, !xa_empty(xa)); for (order = 1; order < 2 * XA_CHUNK_SHIFT; order++) { for (new_order = 0; new_order < order; new_order++) { - check_split_1(test, 0, order, new_order); - check_split_1(test, 1UL << order, order, new_order); - check_split_1(test, 3UL << order, order, new_order); + check_split_1(xa, 0, order, new_order); + check_split_1(xa, 1UL << order, order, new_order); + check_split_1(xa, 3UL << order, order, new_order); } } } #else -static void check_split(struct kunit *test) { } +static void check_split(struct xarray *xa) { } #endif -static void check_align_1(struct kunit *test, char *name) +static void check_align_1(struct xarray *xa, char *name) { - struct xarray *xa = xa_param(test); - int i; unsigned int id; unsigned long index; @@ -1983,10 +1896,8 @@ static void check_align_1(struct kunit *test, char *name) * We should always be able to store without allocating memory after * reserving a slot. */ -static void check_align_2(struct kunit *test, char *name) +static void check_align_2(struct xarray *xa, char *name) { - struct xarray *xa = xa_param(test); - int i; XA_BUG_ON(xa, !xa_empty(xa)); @@ -2005,15 +1916,15 @@ static void check_align_2(struct kunit *test, char *name) XA_BUG_ON(xa, !xa_empty(xa)); } -static noinline void check_align(struct kunit *test) +static noinline void check_align(struct xarray *xa) { char name[] = "Motorola 68000"; - check_align_1(test, name); - check_align_1(test, name + 1); - check_align_1(test, name + 2); - check_align_1(test, name + 3); - check_align_2(test, name); + check_align_1(xa, name); + check_align_1(xa, name + 1); + check_align_1(xa, name + 2); + check_align_1(xa, name + 3); + check_align_2(xa, name); } static LIST_HEAD(shadow_nodes); @@ -2029,7 +1940,7 @@ static void test_update_node(struct xa_node *node) } } -static noinline void shadow_remove(struct kunit *test, struct xarray *xa) +static noinline void shadow_remove(struct xarray *xa) { struct xa_node *node; @@ -2043,17 +1954,8 @@ static noinline void shadow_remove(struct kunit *test, struct xarray *xa) xa_unlock(xa); } -struct workingset_testcase { - struct xarray *xa; - unsigned long index; -}; - -static noinline void check_workingset(struct kunit *test) +static noinline void check_workingset(struct xarray *xa, unsigned long index) { - struct workingset_testcase tc = *(struct workingset_testcase *)test->param_value; - struct xarray *xa = tc.xa; - unsigned long index = tc.index; - XA_STATE(xas, xa, index); xas_set_update(&xas, test_update_node); @@ -2076,7 +1978,7 @@ static noinline void check_workingset(struct kunit *test) xas_unlock(&xas); XA_BUG_ON(xa, list_empty(&shadow_nodes)); - shadow_remove(test, xa); + shadow_remove(xa); XA_BUG_ON(xa, !list_empty(&shadow_nodes)); XA_BUG_ON(xa, !xa_empty(xa)); } @@ -2085,11 +1987,9 @@ static noinline void check_workingset(struct kunit *test) * Check that the pointer / value / sibling entries are accounted the * way we expect them to be. */ -static noinline void check_account(struct kunit *test) +static noinline void check_account(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI - struct xarray *xa = xa_param(test); - unsigned int order; for (order = 1; order < 12; order++) { @@ -2116,10 +2016,8 @@ static noinline void check_account(struct kunit *test) #endif } -static noinline void check_get_order(struct kunit *test) +static noinline void check_get_order(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; unsigned int order; unsigned long i, j; @@ -2138,10 +2036,8 @@ static noinline void check_get_order(struct kunit *test) } } -static noinline void check_xas_get_order(struct kunit *test) +static noinline void check_xas_get_order(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; @@ -2173,10 +2069,8 @@ static noinline void check_xas_get_order(struct kunit *test) } } -static noinline void check_xas_conflict_get_order(struct kunit *test) +static noinline void check_xas_conflict_get_order(struct xarray *xa) { - struct xarray *xa = xa_param(test); - XA_STATE(xas, xa, 0); void *entry; @@ -2233,10 +2127,8 @@ static noinline void check_xas_conflict_get_order(struct kunit *test) } -static noinline void check_destroy(struct kunit *test) +static noinline void check_destroy(struct xarray *xa) { - struct xarray *xa = xa_param(test); - unsigned long index; XA_BUG_ON(xa, !xa_empty(xa)); @@ -2269,59 +2161,52 @@ static noinline void check_destroy(struct kunit *test) } static DEFINE_XARRAY(array); -static struct xarray *arrays[] = { &array }; -KUNIT_ARRAY_PARAM(array, arrays, NULL); - -static struct xarray *xa0s[] = { &xa0 }; -KUNIT_ARRAY_PARAM(xa0, xa0s, NULL); - -static struct workingset_testcase workingset_testcases[] = { - { &array, 0 }, - { &array, 64 }, - { &array, 4096 }, -}; -KUNIT_ARRAY_PARAM(workingset, workingset_testcases, NULL); - -static struct kunit_case xarray_cases[] = { - KUNIT_CASE_PARAM(check_xa_err, array_gen_params), - KUNIT_CASE_PARAM(check_xas_retry, array_gen_params), - KUNIT_CASE_PARAM(check_xa_load, array_gen_params), - KUNIT_CASE_PARAM(check_xa_mark, array_gen_params), - KUNIT_CASE_PARAM(check_xa_shrink, array_gen_params), - KUNIT_CASE_PARAM(check_xas_erase, array_gen_params), - KUNIT_CASE_PARAM(check_insert, array_gen_params), - KUNIT_CASE_PARAM(check_cmpxchg, array_gen_params), - KUNIT_CASE_PARAM(check_cmpxchg_order, array_gen_params), - KUNIT_CASE_PARAM(check_reserve, array_gen_params), - KUNIT_CASE_PARAM(check_reserve, xa0_gen_params), - KUNIT_CASE_PARAM(check_multi_store, array_gen_params), - KUNIT_CASE_PARAM(check_multi_store_advanced, array_gen_params), - KUNIT_CASE_PARAM(check_get_order, array_gen_params), - KUNIT_CASE_PARAM(check_xas_get_order, array_gen_params), - KUNIT_CASE_PARAM(check_xas_conflict_get_order, array_gen_params), - KUNIT_CASE(check_xa_alloc), - KUNIT_CASE_PARAM(check_find, array_gen_params), - KUNIT_CASE_PARAM(check_find_entry, array_gen_params), - KUNIT_CASE_PARAM(check_pause, array_gen_params), - KUNIT_CASE_PARAM(check_account, array_gen_params), - KUNIT_CASE_PARAM(check_destroy, array_gen_params), - KUNIT_CASE_PARAM(check_move, array_gen_params), - KUNIT_CASE_PARAM(check_create_range, array_gen_params), - KUNIT_CASE_PARAM(check_store_range, array_gen_params), - KUNIT_CASE_PARAM(check_store_iter, array_gen_params), - KUNIT_CASE_PARAM(check_align, xa0_gen_params), - KUNIT_CASE_PARAM(check_split, array_gen_params), - KUNIT_CASE_PARAM(check_workingset, workingset_gen_params), - {}, -}; - -static struct kunit_suite xarray_suite = { - .name = "xarray", - .test_cases = xarray_cases, -}; - -kunit_test_suite(xarray_suite); +static int xarray_checks(void) +{ + check_xa_err(&array); + check_xas_retry(&array); + check_xa_load(&array); + check_xa_mark(&array); + check_xa_shrink(&array); + check_xas_erase(&array); + check_insert(&array); + check_cmpxchg(&array); + check_cmpxchg_order(&array); + check_reserve(&array); + check_reserve(&xa0); + check_multi_store(&array); + check_multi_store_advanced(&array); + check_get_order(&array); + check_xas_get_order(&array); + check_xas_conflict_get_order(&array); + check_xa_alloc(); + check_find(&array); + check_find_entry(&array); + check_pause(&array); + check_account(&array); + check_destroy(&array); + check_move(&array); + check_create_range(&array); + check_store_range(&array); + check_store_iter(&array); + check_align(&xa0); + check_split(&array); + + check_workingset(&array, 0); + check_workingset(&array, 64); + check_workingset(&array, 4096); + + printk("XArray: %u of %u tests passed\n", tests_passed, tests_run); + return (tests_run == tests_passed) ? 0 : -EINVAL; +} + +static void xarray_exit(void) +{ +} + +module_init(xarray_checks); +module_exit(xarray_exit); MODULE_AUTHOR("Matthew Wilcox "); MODULE_DESCRIPTION("XArray API test module"); MODULE_LICENSE("GPL"); -- 2.51.0 From e5b2a356dc8a88708d97bd47cca3b8f7ed7af6cb Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 30 Jan 2025 16:16:20 -0800 Subject: [PATCH 06/16] MAINTAINERS: include linux-mm for xarray maintenance MM developers have an interest in the xarray code. Cc: David Gow Cc: Geert Uytterhoeven Cc: "Liam R. Howlett" Cc: Lorenzo Stoakes Cc: Matthew Wilcox Cc: Sidhartha Kumar Cc: Tamir Duberstein Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f52a004982c9..ab7463b2f165 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -25729,6 +25729,7 @@ F: arch/x86/entry/vdso/ XARRAY M: Matthew Wilcox L: linux-fsdevel@vger.kernel.org +L: linux-mm@kvack.org S: Supported F: Documentation/core-api/xarray.rst F: include/linux/idr.h -- 2.51.0 From 2c4627c8ced77855b106c7104ecab70837d53799 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 2 Feb 2025 10:43:02 -0600 Subject: [PATCH 07/16] tools/power turbostat: version 2025.02.02 Summary of Changes since 2024.11.30: Fix regression in 2023.11.07 that affinitized forked child in one-shot mode. Harden one-shot mode against hotplug online/offline Enable RAPL SysWatt column by default. Add initial PTL, CWF platform support. Harden initial PMT code in response to early use. Enable first built-in PMT counter: CWF c1e residency Refuse to run on unsupported platforms without --force, to encourage updating to a version that supports the system, and to avoid no-so-useful measurement results. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 364a44a7d7ae..8d5011a0bf60 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9559,7 +9559,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2025.01.14 - Len Brown \n"); + fprintf(outf, "turbostat version 2025.02.02 - Len Brown \n"); } #define COMMAND_LINE_SIZE 2048 -- 2.51.0 From 2014c95afecee3e76ca4a56956a936e23283f05b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Feb 2025 15:39:26 -0800 Subject: [PATCH 08/16] Linux 6.14-rc1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 4117cc79748b..9e0d63d9d94b 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 -PATCHLEVEL = 13 +PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From c50105933f0c75aacc4f95c9bf36f7fbd9a83884 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Feb 2025 07:39:59 +0100 Subject: [PATCH 09/16] iomap: allow the file system to submit the writeback bios Change ->prepare_ioend to ->submit_ioend and require file systems that implement it to submit the bio. This is needed for file systems that do their own work on the bios before submitting them to the block layer like btrfs or zoned xfs. To make this easier also pass the writeback context to the method. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250206064035.2323428-2-hch@lst.de Reviewed-by: "Darrick J. Wong" Signed-off-by: Christian Brauner --- Documentation/filesystems/iomap/operations.rst | 11 +++++------ fs/iomap/buffered-io.c | 10 +++++----- fs/xfs/xfs_aops.c | 13 +++++++++---- include/linux/iomap.h | 12 +++++++----- 4 files changed, 26 insertions(+), 20 deletions(-) diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst index 2c7f5df9d8b0..04fc7a49067d 100644 --- a/Documentation/filesystems/iomap/operations.rst +++ b/Documentation/filesystems/iomap/operations.rst @@ -283,7 +283,7 @@ The ``ops`` structure must be specified and is as follows: struct iomap_writeback_ops { int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, loff_t offset, unsigned len); - int (*prepare_ioend)(struct iomap_ioend *ioend, int status); + int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status); void (*discard_folio)(struct folio *folio, loff_t pos); }; @@ -306,13 +306,12 @@ The fields are as follows: purpose. This function must be supplied by the filesystem. - - ``prepare_ioend``: Enables filesystems to transform the writeback - ioend or perform any other preparatory work before the writeback I/O - is submitted. + - ``submit_ioend``: Allows the file systems to hook into writeback bio + submission. This might include pre-write space accounting updates, or installing a custom ``->bi_end_io`` function for internal purposes, such as deferring the ioend completion to a workqueue to run metadata update - transactions from process context. + transactions from process context before submitting the bio. This function is optional. - ``discard_folio``: iomap calls this function after ``->map_blocks`` @@ -341,7 +340,7 @@ This can happen in interrupt or process context, depending on the storage device. Filesystems that need to update internal bookkeeping (e.g. unwritten -extent conversions) should provide a ``->prepare_ioend`` function to +extent conversions) should provide a ``->submit_ioend`` function to set ``struct iomap_end::bio::bi_end_io`` to its own function. This function should call ``iomap_finish_ioends`` after finishing its own work (e.g. unwritten extent conversion). diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index d303e6c8900c..7952bf004bdb 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1675,7 +1675,7 @@ static void iomap_writepage_end_bio(struct bio *bio) } /* - * Submit the final bio for an ioend. + * Submit an ioend. * * If @error is non-zero, it means that we have a situation where some part of * the submission process has failed after we've marked pages for writeback. @@ -1694,14 +1694,14 @@ static int iomap_submit_ioend(struct iomap_writepage_ctx *wpc, int error) * failure happened so that the file system end I/O handler gets called * to clean up. */ - if (wpc->ops->prepare_ioend) - error = wpc->ops->prepare_ioend(wpc->ioend, error); + if (wpc->ops->submit_ioend) + error = wpc->ops->submit_ioend(wpc, error); + else if (!error) + submit_bio(&wpc->ioend->io_bio); if (error) { wpc->ioend->io_bio.bi_status = errno_to_blk_status(error); bio_endio(&wpc->ioend->io_bio); - } else { - submit_bio(&wpc->ioend->io_bio); } wpc->ioend = NULL; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 67877c36ed11..aa88895673d8 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -395,10 +395,11 @@ allocate_blocks: } static int -xfs_prepare_ioend( - struct iomap_ioend *ioend, +xfs_submit_ioend( + struct iomap_writepage_ctx *wpc, int status) { + struct iomap_ioend *ioend = wpc->ioend; unsigned int nofs_flag; /* @@ -420,7 +421,11 @@ xfs_prepare_ioend( if (xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN || (ioend->io_flags & IOMAP_F_SHARED)) ioend->io_bio.bi_end_io = xfs_end_bio; - return status; + + if (status) + return status; + submit_bio(&ioend->io_bio); + return 0; } /* @@ -462,7 +467,7 @@ xfs_discard_folio( static const struct iomap_writeback_ops xfs_writeback_ops = { .map_blocks = xfs_map_blocks, - .prepare_ioend = xfs_prepare_ioend, + .submit_ioend = xfs_submit_ioend, .discard_folio = xfs_discard_folio, }; diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 75bf54e76f3b..dc8df4f779d4 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -362,12 +362,14 @@ struct iomap_writeback_ops { loff_t offset, unsigned len); /* - * Optional, allows the file systems to perform actions just before - * submitting the bio and/or override the bio end_io handler for complex - * operations like copy on write extent manipulation or unwritten extent - * conversions. + * Optional, allows the file systems to hook into bio submission, + * including overriding the bi_end_io handler. + * + * Returns 0 if the bio was successfully submitted, or a negative + * error code if status was non-zero or another error happened and + * the bio could not be submitted. */ - int (*prepare_ioend)(struct iomap_ioend *ioend, int status); + int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status); /* * Optional, allows the file system to discard state on a page where -- 2.51.0 From 710273330663241d9ca5fbed51909e65807556ad Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Feb 2025 07:40:00 +0100 Subject: [PATCH 10/16] iomap: simplify io_flags and io_type in struct iomap_ioend The ioend fields for distinct types of I/O are a bit complicated. Consolidate them into a single io_flag field with it's own flags decoupled from the iomap flags. This also prepares for adding a new flag that is unrelated to both of the iomap namespaces. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250206064035.2323428-3-hch@lst.de Reviewed-by: "Darrick J. Wong" Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 39 ++++++++++++++++++++++----------------- fs/xfs/xfs_aops.c | 12 ++++++------ include/linux/iomap.h | 20 ++++++++++++++++++-- 3 files changed, 46 insertions(+), 25 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 7952bf004bdb..d8d271107e60 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1605,13 +1605,10 @@ iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next) { if (ioend->io_bio.bi_status != next->io_bio.bi_status) return false; - if (next->io_flags & IOMAP_F_BOUNDARY) + if (next->io_flags & IOMAP_IOEND_BOUNDARY) return false; - if ((ioend->io_flags & IOMAP_F_SHARED) ^ - (next->io_flags & IOMAP_F_SHARED)) - return false; - if ((ioend->io_type == IOMAP_UNWRITTEN) ^ - (next->io_type == IOMAP_UNWRITTEN)) + if ((ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS) != + (next->io_flags & IOMAP_IOEND_NOMERGE_FLAGS)) return false; if (ioend->io_offset + ioend->io_size != next->io_offset) return false; @@ -1709,7 +1706,8 @@ static int iomap_submit_ioend(struct iomap_writepage_ctx *wpc, int error) } static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc, - struct writeback_control *wbc, struct inode *inode, loff_t pos) + struct writeback_control *wbc, struct inode *inode, loff_t pos, + u16 ioend_flags) { struct iomap_ioend *ioend; struct bio *bio; @@ -1724,8 +1722,7 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc, ioend = iomap_ioend_from_bio(bio); INIT_LIST_HEAD(&ioend->io_list); - ioend->io_type = wpc->iomap.type; - ioend->io_flags = wpc->iomap.flags; + ioend->io_flags = ioend_flags; if (pos > wpc->iomap.offset) wpc->iomap.flags &= ~IOMAP_F_BOUNDARY; ioend->io_inode = inode; @@ -1737,14 +1734,13 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc, return ioend; } -static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos) +static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos, + u16 ioend_flags) { - if (wpc->iomap.offset == pos && (wpc->iomap.flags & IOMAP_F_BOUNDARY)) - return false; - if ((wpc->iomap.flags & IOMAP_F_SHARED) != - (wpc->ioend->io_flags & IOMAP_F_SHARED)) + if (ioend_flags & IOMAP_IOEND_BOUNDARY) return false; - if (wpc->iomap.type != wpc->ioend->io_type) + if ((ioend_flags & IOMAP_IOEND_NOMERGE_FLAGS) != + (wpc->ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS)) return false; if (pos != wpc->ioend->io_offset + wpc->ioend->io_size) return false; @@ -1779,14 +1775,23 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, { struct iomap_folio_state *ifs = folio->private; size_t poff = offset_in_folio(folio, pos); + unsigned int ioend_flags = 0; int error; - if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos)) { + if (wpc->iomap.type == IOMAP_UNWRITTEN) + ioend_flags |= IOMAP_IOEND_UNWRITTEN; + if (wpc->iomap.flags & IOMAP_F_SHARED) + ioend_flags |= IOMAP_IOEND_SHARED; + if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY)) + ioend_flags |= IOMAP_IOEND_BOUNDARY; + + if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos, ioend_flags)) { new_ioend: error = iomap_submit_ioend(wpc, 0); if (error) return error; - wpc->ioend = iomap_alloc_ioend(wpc, wbc, inode, pos); + wpc->ioend = iomap_alloc_ioend(wpc, wbc, inode, pos, + ioend_flags); } if (!bio_add_folio(&wpc->ioend->io_bio, folio, len, poff)) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index aa88895673d8..8e60ceeb1520 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -114,7 +114,7 @@ xfs_end_ioend( */ error = blk_status_to_errno(ioend->io_bio.bi_status); if (unlikely(error)) { - if (ioend->io_flags & IOMAP_F_SHARED) { + if (ioend->io_flags & IOMAP_IOEND_SHARED) { xfs_reflink_cancel_cow_range(ip, offset, size, true); xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, offset, offset + size); @@ -125,9 +125,9 @@ xfs_end_ioend( /* * Success: commit the COW or unwritten blocks if needed. */ - if (ioend->io_flags & IOMAP_F_SHARED) + if (ioend->io_flags & IOMAP_IOEND_SHARED) error = xfs_reflink_end_cow(ip, offset, size); - else if (ioend->io_type == IOMAP_UNWRITTEN) + else if (ioend->io_flags & IOMAP_IOEND_UNWRITTEN) error = xfs_iomap_write_unwritten(ip, offset, size, false); if (!error && xfs_ioend_is_append(ioend)) @@ -410,7 +410,7 @@ xfs_submit_ioend( nofs_flag = memalloc_nofs_save(); /* Convert CoW extents to regular */ - if (!status && (ioend->io_flags & IOMAP_F_SHARED)) { + if (!status && (ioend->io_flags & IOMAP_IOEND_SHARED)) { status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), ioend->io_offset, ioend->io_size); } @@ -418,8 +418,8 @@ xfs_submit_ioend( memalloc_nofs_restore(nofs_flag); /* send ioends that might require a transaction to the completion wq */ - if (xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN || - (ioend->io_flags & IOMAP_F_SHARED)) + if (xfs_ioend_is_append(ioend) || + (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED))) ioend->io_bio.bi_end_io = xfs_end_bio; if (status) diff --git a/include/linux/iomap.h b/include/linux/iomap.h index dc8df4f779d4..9583f6456165 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -327,13 +327,29 @@ loff_t iomap_seek_data(struct inode *inode, loff_t offset, sector_t iomap_bmap(struct address_space *mapping, sector_t bno, const struct iomap_ops *ops); +/* + * Flags for iomap_ioend->io_flags. + */ +/* shared COW extent */ +#define IOMAP_IOEND_SHARED (1U << 0) +/* unwritten extent */ +#define IOMAP_IOEND_UNWRITTEN (1U << 1) +/* don't merge into previous ioend */ +#define IOMAP_IOEND_BOUNDARY (1U << 2) + +/* + * Flags that if set on either ioend prevent the merge of two ioends. + * (IOMAP_IOEND_BOUNDARY also prevents merges, but only one-way) + */ +#define IOMAP_IOEND_NOMERGE_FLAGS \ + (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN) + /* * Structure for writeback I/O completions. */ struct iomap_ioend { struct list_head io_list; /* next ioend in chain */ - u16 io_type; - u16 io_flags; /* IOMAP_F_* */ + u16 io_flags; /* IOMAP_IOEND_* */ struct inode *io_inode; /* file being written to */ size_t io_size; /* size of data within eof */ loff_t io_offset; /* offset in the file */ -- 2.51.0 From 034c29fb3e7c119c42e650986e280f025a1bec7b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Feb 2025 07:40:01 +0100 Subject: [PATCH 11/16] iomap: add a IOMAP_F_ANON_WRITE flag Add a IOMAP_F_ANON_WRITE flag that indicates that the write I/O does not have a target block assigned to it yet at iomap time and the file system will do that in the bio submission handler, splitting the I/O as needed. This is used to implement Zone Append based I/O for zoned XFS, where splitting writes to the hardware limits and assigning a zone to them happens just before sending the I/O off to the block layer, but could also be useful for other things like compressed I/O. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250206064035.2323428-4-hch@lst.de Reviewed-by: "Darrick J. Wong" Signed-off-by: Christian Brauner --- Documentation/filesystems/iomap/design.rst | 4 ++++ fs/iomap/buffered-io.c | 13 +++++++++---- fs/iomap/direct-io.c | 6 ++++-- include/linux/iomap.h | 7 +++++++ 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/iomap/design.rst b/Documentation/filesystems/iomap/design.rst index b0d0188a095e..28ab3758c474 100644 --- a/Documentation/filesystems/iomap/design.rst +++ b/Documentation/filesystems/iomap/design.rst @@ -246,6 +246,10 @@ The fields are as follows: * **IOMAP_F_PRIVATE**: Starting with this value, the upper bits can be set by the filesystem for its own purposes. + * **IOMAP_F_ANON_WRITE**: Indicates that (write) I/O does not have a target + block assigned to it yet and the file system will do that in the bio + submission handler, splitting the I/O as needed. + These flags can be set by iomap itself during file operations. The filesystem should supply an ``->iomap_end`` function if it needs to observe these flags: diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index d8d271107e60..ba795d72e546 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1691,10 +1691,14 @@ static int iomap_submit_ioend(struct iomap_writepage_ctx *wpc, int error) * failure happened so that the file system end I/O handler gets called * to clean up. */ - if (wpc->ops->submit_ioend) + if (wpc->ops->submit_ioend) { error = wpc->ops->submit_ioend(wpc, error); - else if (!error) - submit_bio(&wpc->ioend->io_bio); + } else { + if (WARN_ON_ONCE(wpc->iomap.flags & IOMAP_F_ANON_WRITE)) + error = -EIO; + if (!error) + submit_bio(&wpc->ioend->io_bio); + } if (error) { wpc->ioend->io_bio.bi_status = errno_to_blk_status(error); @@ -1744,7 +1748,8 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos, return false; if (pos != wpc->ioend->io_offset + wpc->ioend->io_size) return false; - if (iomap_sector(&wpc->iomap, pos) != + if (!(wpc->iomap.flags & IOMAP_F_ANON_WRITE) && + iomap_sector(&wpc->iomap, pos) != bio_end_sector(&wpc->ioend->io_bio)) return false; /* diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index b521eb15759e..641649a04614 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -81,10 +81,12 @@ static void iomap_dio_submit_bio(const struct iomap_iter *iter, WRITE_ONCE(iocb->private, bio); } - if (dio->dops && dio->dops->submit_io) + if (dio->dops && dio->dops->submit_io) { dio->dops->submit_io(iter, bio, pos); - else + } else { + WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_ANON_WRITE); submit_bio(bio); + } } ssize_t iomap_dio_complete(struct iomap_dio *dio) diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 9583f6456165..eb0764945b42 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -56,6 +56,10 @@ struct vm_fault; * * IOMAP_F_BOUNDARY indicates that I/O and I/O completions for this iomap must * never be merged with the mapping before it. + * + * IOMAP_F_ANON_WRITE indicates that (write) I/O does not have a target block + * assigned to it yet and the file system will do that in the bio submission + * handler, splitting the I/O as needed. */ #define IOMAP_F_NEW (1U << 0) #define IOMAP_F_DIRTY (1U << 1) @@ -68,6 +72,7 @@ struct vm_fault; #endif /* CONFIG_BUFFER_HEAD */ #define IOMAP_F_XATTR (1U << 5) #define IOMAP_F_BOUNDARY (1U << 6) +#define IOMAP_F_ANON_WRITE (1U << 7) /* * Flags set by the core iomap code during operations: @@ -111,6 +116,8 @@ struct iomap { static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos) { + if (iomap->flags & IOMAP_F_ANON_WRITE) + return U64_MAX; /* invalid */ return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT; } -- 2.51.0 From 5fcbd555d48390a8c819ba7fdf55fbfcabe05c80 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Feb 2025 07:40:02 +0100 Subject: [PATCH 12/16] iomap: split bios to zone append limits in the submission handlers Provide helpers for file systems to split bios in the direct I/O and writeback I/O submission handlers. The split ioends are chained to the parent ioend so that only the parent ioend originally generated by the iomap layer will be processed after all the chained off children have completed. This is based on the block layer bio chaining that has supported a similar mechanism for a long time. This Follows btrfs' lead and don't try to build bios to hardware limits for zone append commands, but instead build them as normal unconstrained bios and split them to the hardware limits in the I/O submission handler. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250206064035.2323428-5-hch@lst.de Reviewed-by: "Darrick J. Wong" Signed-off-by: Christian Brauner --- fs/iomap/Makefile | 1 + fs/iomap/buffered-io.c | 49 ++++++++++++++---------- fs/iomap/ioend.c | 86 ++++++++++++++++++++++++++++++++++++++++++ include/linux/iomap.h | 15 +++++++- 4 files changed, 130 insertions(+), 21 deletions(-) create mode 100644 fs/iomap/ioend.c diff --git a/fs/iomap/Makefile b/fs/iomap/Makefile index 381d76c5c232..69e8ebb41302 100644 --- a/fs/iomap/Makefile +++ b/fs/iomap/Makefile @@ -12,6 +12,7 @@ iomap-y += trace.o \ iter.o iomap-$(CONFIG_BLOCK) += buffered-io.o \ direct-io.o \ + ioend.o \ fiemap.o \ seek.o iomap-$(CONFIG_SWAP) += swapfile.o diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index ba795d72e546..f67e13a9807a 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -40,7 +40,8 @@ struct iomap_folio_state { unsigned long state[]; }; -static struct bio_set iomap_ioend_bioset; +struct bio_set iomap_ioend_bioset; +EXPORT_SYMBOL_GPL(iomap_ioend_bioset); static inline bool ifs_is_fully_uptodate(struct folio *folio, struct iomap_folio_state *ifs) @@ -1539,15 +1540,15 @@ static void iomap_finish_folio_write(struct inode *inode, struct folio *folio, * ioend after this. */ static u32 -iomap_finish_ioend(struct iomap_ioend *ioend, int error) +iomap_finish_ioend_buffered(struct iomap_ioend *ioend) { struct inode *inode = ioend->io_inode; struct bio *bio = &ioend->io_bio; struct folio_iter fi; u32 folio_count = 0; - if (error) { - mapping_set_error(inode->i_mapping, error); + if (ioend->io_error) { + mapping_set_error(inode->i_mapping, ioend->io_error); if (!bio_flagged(bio, BIO_QUIET)) { pr_err_ratelimited( "%s: writeback error on inode %lu, offset %lld, sector %llu", @@ -1566,6 +1567,24 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error) return folio_count; } +static u32 +iomap_finish_ioend(struct iomap_ioend *ioend, int error) +{ + if (ioend->io_parent) { + struct bio *bio = &ioend->io_bio; + + ioend = ioend->io_parent; + bio_put(bio); + } + + if (error) + cmpxchg(&ioend->io_error, 0, error); + + if (!atomic_dec_and_test(&ioend->io_remaining)) + return 0; + return iomap_finish_ioend_buffered(ioend); +} + /* * Ioend completion routine for merged bios. This can only be called from task * contexts as merged ioends can be of unbound length. Hence we have to break up @@ -1667,8 +1686,10 @@ EXPORT_SYMBOL_GPL(iomap_sort_ioends); static void iomap_writepage_end_bio(struct bio *bio) { - iomap_finish_ioend(iomap_ioend_from_bio(bio), - blk_status_to_errno(bio->bi_status)); + struct iomap_ioend *ioend = iomap_ioend_from_bio(bio); + + ioend->io_error = blk_status_to_errno(bio->bi_status); + iomap_finish_ioend_buffered(ioend); } /* @@ -1713,7 +1734,6 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc, struct writeback_control *wbc, struct inode *inode, loff_t pos, u16 ioend_flags) { - struct iomap_ioend *ioend; struct bio *bio; bio = bio_alloc_bioset(wpc->iomap.bdev, BIO_MAX_VECS, @@ -1721,21 +1741,10 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc, GFP_NOFS, &iomap_ioend_bioset); bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos); bio->bi_end_io = iomap_writepage_end_bio; - wbc_init_bio(wbc, bio); bio->bi_write_hint = inode->i_write_hint; - - ioend = iomap_ioend_from_bio(bio); - INIT_LIST_HEAD(&ioend->io_list); - ioend->io_flags = ioend_flags; - if (pos > wpc->iomap.offset) - wpc->iomap.flags &= ~IOMAP_F_BOUNDARY; - ioend->io_inode = inode; - ioend->io_size = 0; - ioend->io_offset = pos; - ioend->io_sector = bio->bi_iter.bi_sector; - + wbc_init_bio(wbc, bio); wpc->nr_folios = 0; - return ioend; + return iomap_init_ioend(inode, bio, pos, ioend_flags); } static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos, diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c new file mode 100644 index 000000000000..3ff38c665c31 --- /dev/null +++ b/fs/iomap/ioend.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024-2025 Christoph Hellwig. + */ +#include + +struct iomap_ioend *iomap_init_ioend(struct inode *inode, + struct bio *bio, loff_t file_offset, u16 ioend_flags) +{ + struct iomap_ioend *ioend = iomap_ioend_from_bio(bio); + + atomic_set(&ioend->io_remaining, 1); + ioend->io_error = 0; + ioend->io_parent = NULL; + INIT_LIST_HEAD(&ioend->io_list); + ioend->io_flags = ioend_flags; + ioend->io_inode = inode; + ioend->io_offset = file_offset; + ioend->io_size = bio->bi_iter.bi_size; + ioend->io_sector = bio->bi_iter.bi_sector; + return ioend; +} +EXPORT_SYMBOL_GPL(iomap_init_ioend); + +/* + * Split up to the first @max_len bytes from @ioend if the ioend covers more + * than @max_len bytes. + * + * If @is_append is set, the split will be based on the hardware limits for + * REQ_OP_ZONE_APPEND commands and can be less than @max_len if the hardware + * limits don't allow the entire @max_len length. + * + * The bio embedded into @ioend must be a REQ_OP_WRITE because the block layer + * does not allow splitting REQ_OP_ZONE_APPEND bios. The file systems has to + * switch the operation after this call, but before submitting the bio. + */ +struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend, + unsigned int max_len, bool is_append) +{ + struct bio *bio = &ioend->io_bio; + struct iomap_ioend *split_ioend; + unsigned int nr_segs; + int sector_offset; + struct bio *split; + + if (is_append) { + struct queue_limits *lim = bdev_limits(bio->bi_bdev); + + max_len = min(max_len, + lim->max_zone_append_sectors << SECTOR_SHIFT); + + sector_offset = bio_split_rw_at(bio, lim, &nr_segs, max_len); + if (unlikely(sector_offset < 0)) + return ERR_PTR(sector_offset); + if (!sector_offset) + return NULL; + } else { + if (bio->bi_iter.bi_size <= max_len) + return NULL; + sector_offset = max_len >> SECTOR_SHIFT; + } + + /* ensure the split ioend is still block size aligned */ + sector_offset = ALIGN_DOWN(sector_offset << SECTOR_SHIFT, + i_blocksize(ioend->io_inode)) >> SECTOR_SHIFT; + + split = bio_split(bio, sector_offset, GFP_NOFS, &iomap_ioend_bioset); + if (IS_ERR(split)) + return ERR_CAST(split); + split->bi_private = bio->bi_private; + split->bi_end_io = bio->bi_end_io; + + split_ioend = iomap_init_ioend(ioend->io_inode, split, ioend->io_offset, + ioend->io_flags); + split_ioend->io_parent = ioend; + + atomic_inc(&ioend->io_remaining); + ioend->io_offset += split_ioend->io_size; + ioend->io_size -= split_ioend->io_size; + + split_ioend->io_sector = ioend->io_sector; + if (!is_append) + ioend->io_sector += (split_ioend->io_size >> SECTOR_SHIFT); + return split_ioend; +} +EXPORT_SYMBOL_GPL(iomap_split_ioend); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index eb0764945b42..90c27875e39d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -353,12 +353,19 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno, /* * Structure for writeback I/O completions. + * + * File systems implementing ->submit_ioend can split a bio generated + * by iomap. In that case the parent ioend it was split from is recorded + * in ioend->io_parent. */ struct iomap_ioend { struct list_head io_list; /* next ioend in chain */ u16 io_flags; /* IOMAP_IOEND_* */ struct inode *io_inode; /* file being written to */ - size_t io_size; /* size of data within eof */ + size_t io_size; /* size of the extent */ + atomic_t io_remaining; /* completetion defer count */ + int io_error; /* stashed away status */ + struct iomap_ioend *io_parent; /* parent for completions */ loff_t io_offset; /* offset in the file */ sector_t io_sector; /* start sector of ioend */ struct bio io_bio; /* MUST BE LAST! */ @@ -408,6 +415,10 @@ struct iomap_writepage_ctx { u32 nr_folios; /* folios added to the ioend */ }; +struct iomap_ioend *iomap_init_ioend(struct inode *inode, struct bio *bio, + loff_t file_offset, u16 ioend_flags); +struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend, + unsigned int max_len, bool is_append); void iomap_finish_ioends(struct iomap_ioend *ioend, int error); void iomap_ioend_try_merge(struct iomap_ioend *ioend, struct list_head *more_ioends); @@ -479,4 +490,6 @@ int iomap_swapfile_activate(struct swap_info_struct *sis, # define iomap_swapfile_activate(sis, swapfile, pagespan, ops) (-EIO) #endif /* CONFIG_SWAP */ +extern struct bio_set iomap_ioend_bioset; + #endif /* LINUX_IOMAP_H */ -- 2.51.0 From 63b66913d11c5f3572dfdee38e78d510d0f90aa8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Feb 2025 07:40:03 +0100 Subject: [PATCH 13/16] iomap: move common ioend code to ioend.c This code will be reused for direct I/O soon, so split it out of buffered-io.c. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250206064035.2323428-6-hch@lst.de Reviewed-by: "Darrick J. Wong" Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 135 +---------------------------------------- fs/iomap/internal.h | 9 +++ fs/iomap/ioend.c | 127 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+), 133 deletions(-) create mode 100644 fs/iomap/internal.h diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index f67e13a9807a..4abff64998fe 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -12,17 +12,15 @@ #include #include #include -#include #include #include #include #include +#include "internal.h" #include "trace.h" #include "../internal.h" -#define IOEND_BATCH_SIZE 4096 - /* * Structure allocated for each folio to track per-block uptodate, dirty state * and I/O completions. @@ -40,9 +38,6 @@ struct iomap_folio_state { unsigned long state[]; }; -struct bio_set iomap_ioend_bioset; -EXPORT_SYMBOL_GPL(iomap_ioend_bioset); - static inline bool ifs_is_fully_uptodate(struct folio *folio, struct iomap_folio_state *ifs) { @@ -1539,8 +1534,7 @@ static void iomap_finish_folio_write(struct inode *inode, struct folio *folio, * state, release holds on bios, and finally free up memory. Do not use the * ioend after this. */ -static u32 -iomap_finish_ioend_buffered(struct iomap_ioend *ioend) +u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend) { struct inode *inode = ioend->io_inode; struct bio *bio = &ioend->io_bio; @@ -1567,123 +1561,6 @@ iomap_finish_ioend_buffered(struct iomap_ioend *ioend) return folio_count; } -static u32 -iomap_finish_ioend(struct iomap_ioend *ioend, int error) -{ - if (ioend->io_parent) { - struct bio *bio = &ioend->io_bio; - - ioend = ioend->io_parent; - bio_put(bio); - } - - if (error) - cmpxchg(&ioend->io_error, 0, error); - - if (!atomic_dec_and_test(&ioend->io_remaining)) - return 0; - return iomap_finish_ioend_buffered(ioend); -} - -/* - * Ioend completion routine for merged bios. This can only be called from task - * contexts as merged ioends can be of unbound length. Hence we have to break up - * the writeback completions into manageable chunks to avoid long scheduler - * holdoffs. We aim to keep scheduler holdoffs down below 10ms so that we get - * good batch processing throughput without creating adverse scheduler latency - * conditions. - */ -void -iomap_finish_ioends(struct iomap_ioend *ioend, int error) -{ - struct list_head tmp; - u32 completions; - - might_sleep(); - - list_replace_init(&ioend->io_list, &tmp); - completions = iomap_finish_ioend(ioend, error); - - while (!list_empty(&tmp)) { - if (completions > IOEND_BATCH_SIZE * 8) { - cond_resched(); - completions = 0; - } - ioend = list_first_entry(&tmp, struct iomap_ioend, io_list); - list_del_init(&ioend->io_list); - completions += iomap_finish_ioend(ioend, error); - } -} -EXPORT_SYMBOL_GPL(iomap_finish_ioends); - -/* - * We can merge two adjacent ioends if they have the same set of work to do. - */ -static bool -iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next) -{ - if (ioend->io_bio.bi_status != next->io_bio.bi_status) - return false; - if (next->io_flags & IOMAP_IOEND_BOUNDARY) - return false; - if ((ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS) != - (next->io_flags & IOMAP_IOEND_NOMERGE_FLAGS)) - return false; - if (ioend->io_offset + ioend->io_size != next->io_offset) - return false; - /* - * Do not merge physically discontiguous ioends. The filesystem - * completion functions will have to iterate the physical - * discontiguities even if we merge the ioends at a logical level, so - * we don't gain anything by merging physical discontiguities here. - * - * We cannot use bio->bi_iter.bi_sector here as it is modified during - * submission so does not point to the start sector of the bio at - * completion. - */ - if (ioend->io_sector + (ioend->io_size >> 9) != next->io_sector) - return false; - return true; -} - -void -iomap_ioend_try_merge(struct iomap_ioend *ioend, struct list_head *more_ioends) -{ - struct iomap_ioend *next; - - INIT_LIST_HEAD(&ioend->io_list); - - while ((next = list_first_entry_or_null(more_ioends, struct iomap_ioend, - io_list))) { - if (!iomap_ioend_can_merge(ioend, next)) - break; - list_move_tail(&next->io_list, &ioend->io_list); - ioend->io_size += next->io_size; - } -} -EXPORT_SYMBOL_GPL(iomap_ioend_try_merge); - -static int -iomap_ioend_compare(void *priv, const struct list_head *a, - const struct list_head *b) -{ - struct iomap_ioend *ia = container_of(a, struct iomap_ioend, io_list); - struct iomap_ioend *ib = container_of(b, struct iomap_ioend, io_list); - - if (ia->io_offset < ib->io_offset) - return -1; - if (ia->io_offset > ib->io_offset) - return 1; - return 0; -} - -void -iomap_sort_ioends(struct list_head *ioend_list) -{ - list_sort(NULL, ioend_list, iomap_ioend_compare); -} -EXPORT_SYMBOL_GPL(iomap_sort_ioends); - static void iomap_writepage_end_bio(struct bio *bio) { struct iomap_ioend *ioend = iomap_ioend_from_bio(bio); @@ -2081,11 +1958,3 @@ iomap_writepages(struct address_space *mapping, struct writeback_control *wbc, return iomap_submit_ioend(wpc, error); } EXPORT_SYMBOL_GPL(iomap_writepages); - -static int __init iomap_buffered_init(void) -{ - return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE), - offsetof(struct iomap_ioend, io_bio), - BIOSET_NEED_BVECS); -} -fs_initcall(iomap_buffered_init); diff --git a/fs/iomap/internal.h b/fs/iomap/internal.h new file mode 100644 index 000000000000..36d5c56e073e --- /dev/null +++ b/fs/iomap/internal.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _IOMAP_INTERNAL_H +#define _IOMAP_INTERNAL_H 1 + +#define IOEND_BATCH_SIZE 4096 + +u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend); + +#endif /* _IOMAP_INTERNAL_H */ diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c index 3ff38c665c31..97d43c50cdf7 100644 --- a/fs/iomap/ioend.c +++ b/fs/iomap/ioend.c @@ -3,6 +3,11 @@ * Copyright (c) 2024-2025 Christoph Hellwig. */ #include +#include +#include "internal.h" + +struct bio_set iomap_ioend_bioset; +EXPORT_SYMBOL_GPL(iomap_ioend_bioset); struct iomap_ioend *iomap_init_ioend(struct inode *inode, struct bio *bio, loff_t file_offset, u16 ioend_flags) @@ -22,6 +27,120 @@ struct iomap_ioend *iomap_init_ioend(struct inode *inode, } EXPORT_SYMBOL_GPL(iomap_init_ioend); +static u32 iomap_finish_ioend(struct iomap_ioend *ioend, int error) +{ + if (ioend->io_parent) { + struct bio *bio = &ioend->io_bio; + + ioend = ioend->io_parent; + bio_put(bio); + } + + if (error) + cmpxchg(&ioend->io_error, 0, error); + + if (!atomic_dec_and_test(&ioend->io_remaining)) + return 0; + return iomap_finish_ioend_buffered(ioend); +} + +/* + * Ioend completion routine for merged bios. This can only be called from task + * contexts as merged ioends can be of unbound length. Hence we have to break up + * the writeback completions into manageable chunks to avoid long scheduler + * holdoffs. We aim to keep scheduler holdoffs down below 10ms so that we get + * good batch processing throughput without creating adverse scheduler latency + * conditions. + */ +void iomap_finish_ioends(struct iomap_ioend *ioend, int error) +{ + struct list_head tmp; + u32 completions; + + might_sleep(); + + list_replace_init(&ioend->io_list, &tmp); + completions = iomap_finish_ioend(ioend, error); + + while (!list_empty(&tmp)) { + if (completions > IOEND_BATCH_SIZE * 8) { + cond_resched(); + completions = 0; + } + ioend = list_first_entry(&tmp, struct iomap_ioend, io_list); + list_del_init(&ioend->io_list); + completions += iomap_finish_ioend(ioend, error); + } +} +EXPORT_SYMBOL_GPL(iomap_finish_ioends); + +/* + * We can merge two adjacent ioends if they have the same set of work to do. + */ +static bool iomap_ioend_can_merge(struct iomap_ioend *ioend, + struct iomap_ioend *next) +{ + if (ioend->io_bio.bi_status != next->io_bio.bi_status) + return false; + if (next->io_flags & IOMAP_IOEND_BOUNDARY) + return false; + if ((ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS) != + (next->io_flags & IOMAP_IOEND_NOMERGE_FLAGS)) + return false; + if (ioend->io_offset + ioend->io_size != next->io_offset) + return false; + /* + * Do not merge physically discontiguous ioends. The filesystem + * completion functions will have to iterate the physical + * discontiguities even if we merge the ioends at a logical level, so + * we don't gain anything by merging physical discontiguities here. + * + * We cannot use bio->bi_iter.bi_sector here as it is modified during + * submission so does not point to the start sector of the bio at + * completion. + */ + if (ioend->io_sector + (ioend->io_size >> SECTOR_SHIFT) != + next->io_sector) + return false; + return true; +} + +void iomap_ioend_try_merge(struct iomap_ioend *ioend, + struct list_head *more_ioends) +{ + struct iomap_ioend *next; + + INIT_LIST_HEAD(&ioend->io_list); + + while ((next = list_first_entry_or_null(more_ioends, struct iomap_ioend, + io_list))) { + if (!iomap_ioend_can_merge(ioend, next)) + break; + list_move_tail(&next->io_list, &ioend->io_list); + ioend->io_size += next->io_size; + } +} +EXPORT_SYMBOL_GPL(iomap_ioend_try_merge); + +static int iomap_ioend_compare(void *priv, const struct list_head *a, + const struct list_head *b) +{ + struct iomap_ioend *ia = container_of(a, struct iomap_ioend, io_list); + struct iomap_ioend *ib = container_of(b, struct iomap_ioend, io_list); + + if (ia->io_offset < ib->io_offset) + return -1; + if (ia->io_offset > ib->io_offset) + return 1; + return 0; +} + +void iomap_sort_ioends(struct list_head *ioend_list) +{ + list_sort(NULL, ioend_list, iomap_ioend_compare); +} +EXPORT_SYMBOL_GPL(iomap_sort_ioends); + /* * Split up to the first @max_len bytes from @ioend if the ioend covers more * than @max_len bytes. @@ -84,3 +203,11 @@ struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend, return split_ioend; } EXPORT_SYMBOL_GPL(iomap_split_ioend); + +static int __init iomap_ioend_init(void) +{ + return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE), + offsetof(struct iomap_ioend, io_bio), + BIOSET_NEED_BVECS); +} +fs_initcall(iomap_ioend_init); -- 2.51.0 From ae2f33a519af3730cacd1c787ebe1f7475df5ba8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Feb 2025 07:40:04 +0100 Subject: [PATCH 14/16] iomap: factor out a iomap_dio_done helper Split out the struct iomap-dio level final completion from iomap_dio_bio_end_io into a helper to clean up the code and make it reusable. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250206064035.2323428-7-hch@lst.de Reviewed-by: "Darrick J. Wong" Signed-off-by: Christian Brauner --- fs/iomap/direct-io.c | 76 ++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 641649a04614..277ece243770 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -165,43 +165,31 @@ static inline void iomap_dio_set_error(struct iomap_dio *dio, int ret) cmpxchg(&dio->error, 0, ret); } -void iomap_dio_bio_end_io(struct bio *bio) +/* + * Called when dio->ref reaches zero from an I/O completion. + */ +static void iomap_dio_done(struct iomap_dio *dio) { - struct iomap_dio *dio = bio->bi_private; - bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY); struct kiocb *iocb = dio->iocb; - if (bio->bi_status) - iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status)); - if (!atomic_dec_and_test(&dio->ref)) - goto release_bio; - - /* - * Synchronous dio, task itself will handle any completion work - * that needs after IO. All we need to do is wake the task. - */ if (dio->wait_for_completion) { + /* + * Synchronous I/O, task itself will handle any completion work + * that needs after IO. All we need to do is wake the task. + */ struct task_struct *waiter = dio->submit.waiter; WRITE_ONCE(dio->submit.waiter, NULL); blk_wake_io_task(waiter); - goto release_bio; - } - - /* - * Flagged with IOMAP_DIO_INLINE_COMP, we can complete it inline - */ - if (dio->flags & IOMAP_DIO_INLINE_COMP) { + } else if (dio->flags & IOMAP_DIO_INLINE_COMP) { WRITE_ONCE(iocb->private, NULL); iomap_dio_complete_work(&dio->aio.work); - goto release_bio; - } - - /* - * If this dio is flagged with IOMAP_DIO_CALLER_COMP, then schedule - * our completion that way to avoid an async punt to a workqueue. - */ - if (dio->flags & IOMAP_DIO_CALLER_COMP) { + } else if (dio->flags & IOMAP_DIO_CALLER_COMP) { + /* + * If this dio is flagged with IOMAP_DIO_CALLER_COMP, then + * schedule our completion that way to avoid an async punt to a + * workqueue. + */ /* only polled IO cares about private cleared */ iocb->private = dio; iocb->dio_complete = iomap_dio_deferred_complete; @@ -219,19 +207,31 @@ void iomap_dio_bio_end_io(struct bio *bio) * issuer. */ iocb->ki_complete(iocb, 0); - goto release_bio; + } else { + struct inode *inode = file_inode(iocb->ki_filp); + + /* + * Async DIO completion that requires filesystem level + * completion work gets punted to a work queue to complete as + * the operation may require more IO to be issued to finalise + * filesystem metadata changes or guarantee data integrity. + */ + INIT_WORK(&dio->aio.work, iomap_dio_complete_work); + queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work); } +} + +void iomap_dio_bio_end_io(struct bio *bio) +{ + struct iomap_dio *dio = bio->bi_private; + bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY); + + if (bio->bi_status) + iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status)); + + if (atomic_dec_and_test(&dio->ref)) + iomap_dio_done(dio); - /* - * Async DIO completion that requires filesystem level completion work - * gets punted to a work queue to complete as the operation may require - * more IO to be issued to finalise filesystem metadata changes or - * guarantee data integrity. - */ - INIT_WORK(&dio->aio.work, iomap_dio_complete_work); - queue_work(file_inode(iocb->ki_filp)->i_sb->s_dio_done_wq, - &dio->aio.work); -release_bio: if (should_dirty) { bio_check_pages_dirty(bio); } else { -- 2.51.0 From e523f2d4c974a819730830ce1c38834ee0cd7318 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Feb 2025 07:40:05 +0100 Subject: [PATCH 15/16] iomap: optionally use ioends for direct I/O struct iomap_ioend currently tracks outstanding buffered writes and has some really nice code in core iomap and XFS to merge contiguous I/Os an defer them to userspace for completion in a very efficient way. For zoned writes we'll also need a per-bio user context completion to record the written blocks, and the infrastructure for that would look basically like the ioend handling for buffered I/O. So instead of reinventing the wheel, reuse the existing infrastructure. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250206064035.2323428-8-hch@lst.de Reviewed-by: "Darrick J. Wong" Signed-off-by: Christian Brauner --- fs/iomap/direct-io.c | 48 +++++++++++++++++++++++++++++++++++++++++-- fs/iomap/internal.h | 1 + fs/iomap/ioend.c | 2 ++ include/linux/iomap.h | 10 +++++---- 4 files changed, 55 insertions(+), 6 deletions(-) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 277ece243770..138d246ec29d 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2010 Red Hat, Inc. - * Copyright (c) 2016-2021 Christoph Hellwig. + * Copyright (c) 2016-2025 Christoph Hellwig. */ #include #include @@ -12,6 +12,7 @@ #include #include #include +#include "internal.h" #include "trace.h" #include "../internal.h" @@ -20,6 +21,7 @@ * Private flags for iomap_dio, must not overlap with the public ones in * iomap.h: */ +#define IOMAP_DIO_NO_INVALIDATE (1U << 25) #define IOMAP_DIO_CALLER_COMP (1U << 26) #define IOMAP_DIO_INLINE_COMP (1U << 27) #define IOMAP_DIO_WRITE_THROUGH (1U << 28) @@ -119,7 +121,8 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) * ->end_io() when necessary, otherwise a racing buffer read would cache * zeros from unwritten extents. */ - if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE)) + if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE) && + !(dio->flags & IOMAP_DIO_NO_INVALIDATE)) kiocb_invalidate_post_direct_write(iocb, dio->size); inode_dio_end(file_inode(iocb->ki_filp)); @@ -241,6 +244,47 @@ void iomap_dio_bio_end_io(struct bio *bio) } EXPORT_SYMBOL_GPL(iomap_dio_bio_end_io); +u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend) +{ + struct iomap_dio *dio = ioend->io_bio.bi_private; + bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY); + u32 vec_count = ioend->io_bio.bi_vcnt; + + if (ioend->io_error) + iomap_dio_set_error(dio, ioend->io_error); + + if (atomic_dec_and_test(&dio->ref)) { + /* + * Try to avoid another context switch for the completion given + * that we are already called from the ioend completion + * workqueue, but never invalidate pages from this thread to + * avoid deadlocks with buffered I/O completions. Tough luck if + * you hit the tiny race with someone dirtying the range now + * between this check and the actual completion. + */ + if (!dio->iocb->ki_filp->f_mapping->nrpages) { + dio->flags |= IOMAP_DIO_INLINE_COMP; + dio->flags |= IOMAP_DIO_NO_INVALIDATE; + } + dio->flags &= ~IOMAP_DIO_CALLER_COMP; + iomap_dio_done(dio); + } + + if (should_dirty) { + bio_check_pages_dirty(&ioend->io_bio); + } else { + bio_release_pages(&ioend->io_bio, false); + bio_put(&ioend->io_bio); + } + + /* + * Return the number of bvecs completed as even direct I/O completions + * do significant per-folio work and we'll still want to give up the + * CPU after a lot of completions. + */ + return vec_count; +} + static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio, loff_t pos, unsigned len) { diff --git a/fs/iomap/internal.h b/fs/iomap/internal.h index 36d5c56e073e..f6992a3bf66a 100644 --- a/fs/iomap/internal.h +++ b/fs/iomap/internal.h @@ -5,5 +5,6 @@ #define IOEND_BATCH_SIZE 4096 u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend); +u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend); #endif /* _IOMAP_INTERNAL_H */ diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c index 97d43c50cdf7..44f254ecab55 100644 --- a/fs/iomap/ioend.c +++ b/fs/iomap/ioend.c @@ -41,6 +41,8 @@ static u32 iomap_finish_ioend(struct iomap_ioend *ioend, int error) if (!atomic_dec_and_test(&ioend->io_remaining)) return 0; + if (ioend->io_flags & IOMAP_IOEND_DIRECT) + return iomap_finish_ioend_direct(ioend); return iomap_finish_ioend_buffered(ioend); } diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 90c27875e39d..5768b9f2a1cc 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -343,20 +343,22 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno, #define IOMAP_IOEND_UNWRITTEN (1U << 1) /* don't merge into previous ioend */ #define IOMAP_IOEND_BOUNDARY (1U << 2) +/* is direct I/O */ +#define IOMAP_IOEND_DIRECT (1U << 3) /* * Flags that if set on either ioend prevent the merge of two ioends. * (IOMAP_IOEND_BOUNDARY also prevents merges, but only one-way) */ #define IOMAP_IOEND_NOMERGE_FLAGS \ - (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN) + (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT) /* * Structure for writeback I/O completions. * - * File systems implementing ->submit_ioend can split a bio generated - * by iomap. In that case the parent ioend it was split from is recorded - * in ioend->io_parent. + * File systems implementing ->submit_ioend (for buffered I/O) or ->submit_io + * for direct I/O) can split a bio generated by iomap. In that case the parent + * ioend it was split from is recorded in ioend->io_parent. */ struct iomap_ioend { struct list_head io_list; /* next ioend in chain */ -- 2.51.0 From d06244c60aec1d5d1589efe6b611a5b91a49465c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Feb 2025 07:40:06 +0100 Subject: [PATCH 16/16] iomap: add a io_private field to struct iomap_ioend Add a private data field to struct iomap_ioend so that the file system can attach information to it. Zoned XFS will use this for a pointer to the open zone. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250206064035.2323428-9-hch@lst.de Signed-off-by: Christian Brauner --- fs/iomap/ioend.c | 1 + include/linux/iomap.h | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c index 44f254ecab55..18894ebba6db 100644 --- a/fs/iomap/ioend.c +++ b/fs/iomap/ioend.c @@ -23,6 +23,7 @@ struct iomap_ioend *iomap_init_ioend(struct inode *inode, ioend->io_offset = file_offset; ioend->io_size = bio->bi_iter.bi_size; ioend->io_sector = bio->bi_iter.bi_sector; + ioend->io_private = NULL; return ioend; } EXPORT_SYMBOL_GPL(iomap_init_ioend); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 5768b9f2a1cc..b4be07e8ec94 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -370,6 +370,7 @@ struct iomap_ioend { struct iomap_ioend *io_parent; /* parent for completions */ loff_t io_offset; /* offset in the file */ sector_t io_sector; /* start sector of ioend */ + void *io_private; /* file system private data */ struct bio io_bio; /* MUST BE LAST! */ }; -- 2.51.0