From cfc695109a6cd9b7228ad19ef7e74a851856ce3d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 22 May 2025 17:29:00 +0100 Subject: [PATCH 01/16] selftests/mm: deduplicate test names in madv_populate The madv_populate selftest has some repetitive code for several different cases that it covers, included repeated test names used in ksft_test_result() reports. This causes problems for automation, the test name is used to both track the test between runs and distinguish between multiple tests within the same run. Fix this by tweaking the messages with duplication to be more specific about the contexts they're in. Link: https://lkml.kernel.org/r/20250522-selftests-mm-madv-populate-dedupe-v1-1-fd1dedd79b4b@kernel.org Signed-off-by: Mark Brown Reviewed-by: Liam R. Howlett Acked-by: David Hildenbrand Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/madv_populate.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c index ef7d911da13e..b6fabd5c27ed 100644 --- a/tools/testing/selftests/mm/madv_populate.c +++ b/tools/testing/selftests/mm/madv_populate.c @@ -172,12 +172,12 @@ static void test_populate_read(void) if (addr == MAP_FAILED) ksft_exit_fail_msg("mmap failed\n"); ksft_test_result(range_is_not_populated(addr, SIZE), - "range initially not populated\n"); + "read range initially not populated\n"); ret = madvise(addr, SIZE, MADV_POPULATE_READ); ksft_test_result(!ret, "MADV_POPULATE_READ\n"); ksft_test_result(range_is_populated(addr, SIZE), - "range is populated\n"); + "read range is populated\n"); munmap(addr, SIZE); } @@ -194,12 +194,12 @@ static void test_populate_write(void) if (addr == MAP_FAILED) ksft_exit_fail_msg("mmap failed\n"); ksft_test_result(range_is_not_populated(addr, SIZE), - "range initially not populated\n"); + "write range initially not populated\n"); ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); ksft_test_result(!ret, "MADV_POPULATE_WRITE\n"); ksft_test_result(range_is_populated(addr, SIZE), - "range is populated\n"); + "write range is populated\n"); munmap(addr, SIZE); } @@ -247,19 +247,19 @@ static void test_softdirty(void) /* Clear any softdirty bits. */ clear_softdirty(); ksft_test_result(range_is_not_softdirty(addr, SIZE), - "range is not softdirty\n"); + "cleared range is not softdirty\n"); /* Populating READ should set softdirty. */ ret = madvise(addr, SIZE, MADV_POPULATE_READ); - ksft_test_result(!ret, "MADV_POPULATE_READ\n"); + ksft_test_result(!ret, "softdirty MADV_POPULATE_READ\n"); ksft_test_result(range_is_not_softdirty(addr, SIZE), - "range is not softdirty\n"); + "range is not softdirty after MADV_POPULATE_READ\n"); /* Populating WRITE should set softdirty. */ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); - ksft_test_result(!ret, "MADV_POPULATE_WRITE\n"); + ksft_test_result(!ret, "softdirty MADV_POPULATE_WRITE\n"); ksft_test_result(range_is_softdirty(addr, SIZE), - "range is softdirty\n"); + "range is softdirty after MADV_POPULATE_WRITE \n"); munmap(addr, SIZE); } -- 2.50.1 From 49c69504f4d340d870f2c3f3d2f404c118ff7b23 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 23 May 2025 00:30:17 +0200 Subject: [PATCH 02/16] mmu_notifiers: remove leftover stub macros Commit ec8832d007cb ("mmu_notifiers: don't invalidate secondary TLBs as part of mmu_notifier_invalidate_range_end()") removed the main definitions of {ptep,pmdp_huge,pudp_huge}_clear_flush_notify; just their !CONFIG_MMU_NOTIFIER stubs are left behind, remove them. Link: https://lkml.kernel.org/r/20250523-mmu-notifier-cleanup-unused-v1-1-cc1f47ebec33@google.com Signed-off-by: Jann Horn Reviewed-by: Alistair Popple Reviewed-by: Qi Zheng Reviewed-by: Jason Gunthorpe Signed-off-by: Andrew Morton --- include/linux/mmu_notifier.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index bc2402a45741..d1094c2d5fb6 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -654,9 +654,6 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) #define pmdp_clear_flush_young_notify pmdp_clear_flush_young #define ptep_clear_young_notify ptep_test_and_clear_young #define pmdp_clear_young_notify pmdp_test_and_clear_young -#define ptep_clear_flush_notify ptep_clear_flush -#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush -#define pudp_huge_clear_flush_notify pudp_huge_clear_flush static inline void mmu_notifier_synchronize(void) { -- 2.50.1 From e13e7922d03439e374c263049af5f740ceae6346 Mon Sep 17 00:00:00 2001 From: Juan Yescas Date: Wed, 21 May 2025 14:57:45 -0700 Subject: [PATCH 03/16] mm: add CONFIG_PAGE_BLOCK_ORDER to select page block order Problem: On large page size configurations (16KiB, 64KiB), the CMA alignment requirement (CMA_MIN_ALIGNMENT_BYTES) increases considerably, and this causes the CMA reservations to be larger than necessary. This means that system will have less available MIGRATE_UNMOVABLE and MIGRATE_RECLAIMABLE page blocks since MIGRATE_CMA can't fallback to them. The CMA_MIN_ALIGNMENT_BYTES increases because it depends on MAX_PAGE_ORDER which depends on ARCH_FORCE_MAX_ORDER. The value of ARCH_FORCE_MAX_ORDER increases on 16k and 64k kernels. For example, in ARM, the CMA alignment requirement when: - CONFIG_ARCH_FORCE_MAX_ORDER default value is used - CONFIG_TRANSPARENT_HUGEPAGE is set: PAGE_SIZE | MAX_PAGE_ORDER | pageblock_order | CMA_MIN_ALIGNMENT_BYTES ----------------------------------------------------------------------- 4KiB | 10 | 9 | 4KiB * (2 ^ 9) = 2MiB 16Kib | 11 | 11 | 16KiB * (2 ^ 11) = 32MiB 64KiB | 13 | 13 | 64KiB * (2 ^ 13) = 512MiB There are some extreme cases for the CMA alignment requirement when: - CONFIG_ARCH_FORCE_MAX_ORDER maximum value is set - CONFIG_TRANSPARENT_HUGEPAGE is NOT set: - CONFIG_HUGETLB_PAGE is NOT set PAGE_SIZE | MAX_PAGE_ORDER | pageblock_order | CMA_MIN_ALIGNMENT_BYTES ------------------------------------------------------------------------ 4KiB | 15 | 15 | 4KiB * (2 ^ 15) = 128MiB 16Kib | 13 | 13 | 16KiB * (2 ^ 13) = 128MiB 64KiB | 13 | 13 | 64KiB * (2 ^ 13) = 512MiB This affects the CMA reservations for the drivers. If a driver in a 4KiB kernel needs 4MiB of CMA memory, in a 16KiB kernel, the minimal reservation has to be 32MiB due to the alignment requirements: reserved-memory { ... cma_test_reserve: cma_test_reserve { compatible = "shared-dma-pool"; size = <0x0 0x400000>; /* 4 MiB */ ... }; }; reserved-memory { ... cma_test_reserve: cma_test_reserve { compatible = "shared-dma-pool"; size = <0x0 0x2000000>; /* 32 MiB */ ... }; }; Solution: Add a new config CONFIG_PAGE_BLOCK_ORDER that allows to set the page block order in all the architectures. The maximum page block order will be given by ARCH_FORCE_MAX_ORDER. By default, CONFIG_PAGE_BLOCK_ORDER will have the same value that ARCH_FORCE_MAX_ORDER. This will make sure that current kernel configurations won't be affected by this change. It is a opt-in change. This patch will allow to have the same CMA alignment requirements for large page sizes (16KiB, 64KiB) as that in 4kb kernels by setting a lower pageblock_order. Tests: - Verified that HugeTLB pages work when pageblock_order is 1, 7, 10 on 4k and 16k kernels. - Verified that Transparent Huge Pages work when pageblock_order is 1, 7, 10 on 4k and 16k kernels. - Verified that dma-buf heaps allocations work when pageblock_order is 1, 7, 10 on 4k and 16k kernels. Benchmarks: The benchmarks compare 16kb kernels with pageblock_order 10 and 7. The reason for the pageblock_order 7 is because this value makes the min CMA alignment requirement the same as that in 4kb kernels (2MB). - Perform 100K dma-buf heaps (/dev/dma_heap/system) allocations of SZ_8M, SZ_4M, SZ_2M, SZ_1M, SZ_64, SZ_8, SZ_4. Use simpleperf (https://developer.android.com/ndk/guides/simpleperf) to measure the # of instructions and page-faults on 16k kernels. The benchmark was executed 10 times. The averages are below: # instructions | #page-faults order 10 | order 7 | order 10 | order 7 -------------------------------------------------------- 13,891,765,770 | 11,425,777,314 | 220 | 217 14,456,293,487 | 12,660,819,302 | 224 | 219 13,924,261,018 | 13,243,970,736 | 217 | 221 13,910,886,504 | 13,845,519,630 | 217 | 221 14,388,071,190 | 13,498,583,098 | 223 | 224 13,656,442,167 | 12,915,831,681 | 216 | 218 13,300,268,343 | 12,930,484,776 | 222 | 218 13,625,470,223 | 14,234,092,777 | 219 | 218 13,508,964,965 | 13,432,689,094 | 225 | 219 13,368,950,667 | 13,683,587,37 | 219 | 225 ------------------------------------------------------------------- 13,803,137,433 | 13,131,974,268 | 220 | 220 Averages There were 4.85% #instructions when order was 7, in comparison with order 10. 13,803,137,433 - 13,131,974,268 = -671,163,166 (-4.86%) The number of page faults in order 7 and 10 were the same. These results didn't show any significant regression when the pageblock_order is set to 7 on 16kb kernels. - Run speedometer 3.1 (https://browserbench.org/Speedometer3.1/) 5 times on the 16k kernels with pageblock_order 7 and 10. order 10 | order 7 | order 7 - order 10 | (order 7 - order 10) % ------------------------------------------------------------------- 15.8 | 16.4 | 0.6 | 3.80% 16.4 | 16.2 | -0.2 | -1.22% 16.6 | 16.3 | -0.3 | -1.81% 16.8 | 16.3 | -0.5 | -2.98% 16.6 | 16.8 | 0.2 | 1.20% ------------------------------------------------------------------- 16.44 16.4 -0.04 -0.24% Averages The results didn't show any significant regression when the pageblock_order is set to 7 on 16kb kernels. Link: https://lkml.kernel.org/r/20250521215807.1860663-1-jyescas@google.com Signed-off-by: Juan Yescas Acked-by: Zi Yan Reviewed-by: Vlastimil Babka Cc: Liam R. Howlett Cc: Lorenzo Stoakes Cc: David Hildenbrand Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Minchan Kim Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 16 ++++++++++++++++ include/linux/pageblock-flags.h | 8 ++++---- mm/Kconfig | 34 +++++++++++++++++++++++++++++++++ mm/mm_init.c | 2 +- 4 files changed, 55 insertions(+), 5 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b19a98c20de8..87a667533d6d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -37,6 +37,22 @@ #define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1) +/* Defines the order for the number of pages that have a migrate type. */ +#ifndef CONFIG_PAGE_BLOCK_ORDER +#define PAGE_BLOCK_ORDER MAX_PAGE_ORDER +#else +#define PAGE_BLOCK_ORDER CONFIG_PAGE_BLOCK_ORDER +#endif /* CONFIG_PAGE_BLOCK_ORDER */ + +/* + * The MAX_PAGE_ORDER, which defines the max order of pages to be allocated + * by the buddy allocator, has to be larger or equal to the PAGE_BLOCK_ORDER, + * which defines the order for the number of pages that can have a migrate type + */ +#if (PAGE_BLOCK_ORDER > MAX_PAGE_ORDER) +#error MAX_PAGE_ORDER must be >= PAGE_BLOCK_ORDER +#endif + /* * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed * costly to service. That is between allocation orders which should diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index fc6b9c87cb0a..e73a4292ef02 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -41,18 +41,18 @@ extern unsigned int pageblock_order; * Huge pages are a constant size, but don't exceed the maximum allocation * granularity. */ -#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER) +#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, PAGE_BLOCK_ORDER) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ #elif defined(CONFIG_TRANSPARENT_HUGEPAGE) -#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER) +#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, PAGE_BLOCK_ORDER) #else /* CONFIG_TRANSPARENT_HUGEPAGE */ -/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */ -#define pageblock_order MAX_PAGE_ORDER +/* If huge pages are not used, group by PAGE_BLOCK_ORDER */ +#define pageblock_order PAGE_BLOCK_ORDER #endif /* CONFIG_HUGETLB_PAGE */ diff --git a/mm/Kconfig b/mm/Kconfig index bd08e151fa1b..f8bb8f070d0d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -993,6 +993,40 @@ config CMA_AREAS If unsure, leave the default value "8" in UMA and "20" in NUMA. +# +# Select this config option from the architecture Kconfig, if available, to set +# the max page order for physically contiguous allocations. +# +config ARCH_FORCE_MAX_ORDER + int + +# +# When ARCH_FORCE_MAX_ORDER is not defined, +# the default page block order is MAX_PAGE_ORDER (10) as per +# include/linux/mmzone.h. +# +config PAGE_BLOCK_ORDER + int "Page Block Order" + range 1 10 if ARCH_FORCE_MAX_ORDER = 0 + default 10 if ARCH_FORCE_MAX_ORDER = 0 + range 1 ARCH_FORCE_MAX_ORDER if ARCH_FORCE_MAX_ORDER != 0 + default ARCH_FORCE_MAX_ORDER if ARCH_FORCE_MAX_ORDER != 0 + help + The page block order refers to the power of two number of pages that + are physically contiguous and can have a migrate type associated to + them. The maximum size of the page block order is limited by + ARCH_FORCE_MAX_ORDER. + + This config allows overriding the default page block order when the + page block order is required to be smaller than ARCH_FORCE_MAX_ORDER + or MAX_PAGE_ORDER. + + Reducing pageblock order can negatively impact THP generation + success rate. If your workloads uses THP heavily, please use this + option with caution. + + Don't change if unsure. + config MEM_SOFT_DIRTY bool "Track memory changes" depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS diff --git a/mm/mm_init.c b/mm/mm_init.c index 1c5444e188f8..8684fa851b84 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1509,7 +1509,7 @@ static inline void setup_usemap(struct zone *zone) {} /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ void __init set_pageblock_order(void) { - unsigned int order = MAX_PAGE_ORDER; + unsigned int order = PAGE_BLOCK_ORDER; /* Check that pageblock_nr_pages has not already been setup */ if (pageblock_order) -- 2.50.1 From 595cf683519ab5a277d258a2251ee8cc7b838d6d Mon Sep 17 00:00:00 2001 From: Shivank Garg Date: Mon, 26 May 2025 18:28:18 +0000 Subject: [PATCH 04/16] mm/khugepaged: fix race with folio split/free using temporary reference hpage_collapse_scan_file() calls is_refcount_suitable(), which in turn calls folio_mapcount(). folio_mapcount() checks folio_test_large() before proceeding to folio_large_mapcount(), but there is a race window where the folio may get split/freed between these checks, triggering: VM_WARN_ON_FOLIO(!folio_test_large(folio), folio) Take a temporary reference to the folio in hpage_collapse_scan_file(). This stabilizes the folio during refcount check and prevents incorrect large folio detection due to concurrent split/free. Use helper folio_expected_ref_count() + 1 to compare with folio_ref_count() instead of using is_refcount_suitable(). Link: https://lkml.kernel.org/r/20250526182818.37978-1-shivankg@amd.com Fixes: 05c5323b2a34 ("mm: track mapcount of large folios in single value") Signed-off-by: Shivank Garg Reported-by: syzbot+2b99589e33edbe9475ca@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/6828470d.a70a0220.38f255.000c.GAE@google.com Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Acked-by: Dev Jain Reviewed-by: Baolin Wang Cc: Bharata B Rao Cc: Fengwei Yin Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mariano Pache Cc: Ryan Roberts Cc: Zi Yan Cc: Signed-off-by: Andrew Morton --- mm/khugepaged.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index cdf5a581368b..7731a162a1a7 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2293,6 +2293,17 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, continue; } + if (!folio_try_get(folio)) { + xas_reset(&xas); + continue; + } + + if (unlikely(folio != xas_reload(&xas))) { + folio_put(folio); + xas_reset(&xas); + continue; + } + if (folio_order(folio) == HPAGE_PMD_ORDER && folio->index == start) { /* Maybe PMD-mapped */ @@ -2303,23 +2314,27 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, * it's safe to skip LRU and refcount checks before * returning. */ + folio_put(folio); break; } node = folio_nid(folio); if (hpage_collapse_scan_abort(node, cc)) { result = SCAN_SCAN_ABORT; + folio_put(folio); break; } cc->node_load[node]++; if (!folio_test_lru(folio)) { result = SCAN_PAGE_LRU; + folio_put(folio); break; } - if (!is_refcount_suitable(folio)) { + if (folio_expected_ref_count(folio) + 1 != folio_ref_count(folio)) { result = SCAN_PAGE_COUNT; + folio_put(folio); break; } @@ -2331,6 +2346,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, */ present += folio_nr_pages(folio); + folio_put(folio); if (need_resched()) { xas_pause(&xas); -- 2.50.1 From bb084994d38fd36518ac50a33c8ddcea2239067e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 28 May 2025 21:52:44 +0200 Subject: [PATCH 05/16] selftests/mm: two fixes for the pfnmap test When unregistering the signal handler, we have to pass SIG_DFL, and blindly reading from PFN 0 and PFN 1 seems to be problematic on !x86 systems. In particularly, on arm64 tx2 machines where noting resides at these physical memory locations, we can generate RAS errors. Let's fix it by scanning /proc/iomem for actual "System RAM". Link: https://lkml.kernel.org/r/20250528195244.1182810-1-david@redhat.com Fixes: 2616b370323a ("selftests/mm: add simple VM_PFNMAP tests based on mmap'ing /dev/mem") Signed-off-by: David Hildenbrand Reported-by: Ryan Roberts Closes: https://lore.kernel.org/all/232960c2-81db-47ca-a337-38c4bce5f997@arm.com/T/#u Reviewed-by: Ryan Roberts Tested-by: Aishwarya TCV Cc: Shuah Khan Cc: Lorenzo Stoakes Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/pfnmap.c | 61 +++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c index 8a9d19b6020c..866ac023baf5 100644 --- a/tools/testing/selftests/mm/pfnmap.c +++ b/tools/testing/selftests/mm/pfnmap.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include #include @@ -43,14 +45,62 @@ static int test_read_access(char *addr, size_t size, size_t pagesize) /* Force a read that the compiler cannot optimize out. */ *((volatile char *)(addr + offs)); } - if (signal(SIGSEGV, signal_handler) == SIG_ERR) + if (signal(SIGSEGV, SIG_DFL) == SIG_ERR) return -EINVAL; return ret; } +static int find_ram_target(off_t *phys_addr, + unsigned long long pagesize) +{ + unsigned long long start, end; + char line[80], *end_ptr; + FILE *file; + + /* Search /proc/iomem for the first suitable "System RAM" range. */ + file = fopen("/proc/iomem", "r"); + if (!file) + return -errno; + + while (fgets(line, sizeof(line), file)) { + /* Ignore any child nodes. */ + if (!isalnum(line[0])) + continue; + + if (!strstr(line, "System RAM\n")) + continue; + + start = strtoull(line, &end_ptr, 16); + /* Skip over the "-" */ + end_ptr++; + /* Make end "exclusive". */ + end = strtoull(end_ptr, NULL, 16) + 1; + + /* Actual addresses are not exported */ + if (!start && !end) + break; + + /* We need full pages. */ + start = (start + pagesize - 1) & ~(pagesize - 1); + end &= ~(pagesize - 1); + + if (start != (off_t)start) + break; + + /* We need two pages. */ + if (end > start + 2 * pagesize) { + fclose(file); + *phys_addr = start; + return 0; + } + } + return -ENOENT; +} + FIXTURE(pfnmap) { + off_t phys_addr; size_t pagesize; int dev_mem_fd; char *addr1; @@ -63,14 +113,17 @@ FIXTURE_SETUP(pfnmap) { self->pagesize = getpagesize(); + /* We'll require two physical pages throughout our tests ... */ + if (find_ram_target(&self->phys_addr, self->pagesize)) + SKIP(return, "Cannot find ram target in '/proc/iomem'\n"); + self->dev_mem_fd = open("/dev/mem", O_RDONLY); if (self->dev_mem_fd < 0) SKIP(return, "Cannot open '/dev/mem'\n"); - /* We'll require the first two pages throughout our tests ... */ self->size1 = self->pagesize * 2; self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED, - self->dev_mem_fd, 0); + self->dev_mem_fd, self->phys_addr); if (self->addr1 == MAP_FAILED) SKIP(return, "Cannot mmap '/dev/mem'\n"); @@ -129,7 +182,7 @@ TEST_F(pfnmap, munmap_split) */ self->size2 = self->pagesize; self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED, - self->dev_mem_fd, 0); + self->dev_mem_fd, self->phys_addr); ASSERT_NE(self->addr2, MAP_FAILED); } -- 2.50.1 From 52084f258e46f09a71063447df31cbd48c0cacd0 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 28 May 2025 23:06:17 +0200 Subject: [PATCH 06/16] mm/gup: update comment explaining why gup_fast() disables IRQs The current comment in gup_fast() talks about "IPIs that come from THPs splitting", which is outdated and refers to the old THP splitting implementation that was removed in commit ad0bed24e98b ("thp: drop all split_huge_page()-related code"), which landed in v4.5. Before then, THP splitting involved a pmdp_splitting_flush(), which sent an IPI to serialize against gup_fast(). Nowadays, we use tlb_remove_table_sync_one() to send IPIs that serialize against gup_fast(); this is used, for example, in THP *collapsing* to stop gup_fast() walks of a page table before depositing it. Link: https://lkml.kernel.org/r/20250528-gup-irq-comment-fix-v1-1-b9d83c345333@google.com Signed-off-by: Jann Horn Cc: David Hildenbrand Cc: Jason Gunthorpe Cc: John Hubbard Cc: Kirill A. Shuemov Cc: Peter Xu Signed-off-by: Andrew Morton --- mm/gup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/gup.c b/mm/gup.c index 329c5f7acc7a..e065a49842a8 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -3299,7 +3299,7 @@ static unsigned long gup_fast(unsigned long start, unsigned long end, * include/asm-generic/tlb.h for more details. * * We do not adopt an rcu_read_lock() here as we also want to block IPIs - * that come from THPs splitting. + * that come from callers of tlb_remove_table_sync_one(). */ local_irq_save(flags); gup_fast_pgd_range(start, end, gup_flags, pages, &nr_pinned); -- 2.50.1 From 918850c13608c7b138512c2ecbfd3436b7a51797 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Wed, 28 May 2025 15:15:39 +0100 Subject: [PATCH 07/16] tools/testing/vma: add missing function stub The hugetlb fix introduced in commit ee40c9920ac2 ("mm: fix copy_vma() error handling for hugetlb mappings") mistakenly did not provide a stub for the VMA userland testing, which results in a compile error when trying to build this. Provide this stub to resolve the issue. Link: https://lkml.kernel.org/r/20250528-fix-vma-test-v1-1-c8a5f533b38f@oracle.com Fixes: ee40c9920ac2 ("mm: fix copy_vma() error handling for hugetlb mappings") Signed-off-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Pedro Falcato Cc: Jann Horn Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/vma/vma_internal.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index f6e45e62da3a..441feb21aa5a 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -1461,4 +1461,9 @@ static inline int __call_mmap_prepare(struct file *file, return file->f_op->mmap_prepare(desc); } +static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ + (void)vma; +} + #endif /* __MM_VMA_INTERNAL_H */ -- 2.50.1 From 83da212b7fca407f6f30c7d6f02a8f910db8724d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 28 May 2025 11:13:45 +0300 Subject: [PATCH 08/16] tools/testing: check correct variable in open_procmap() Check if "procmap_out->fd" is negative instead of "procmap_out" (which is a pointer). Link: https://lkml.kernel.org/r/aDbFuUTlJTBqziVd@stanley.mountain Fixes: bd23f293a0d5 ("tools/testing: add PROCMAP_QUERY helper functions in mm self tests") Signed-off-by: Dan Carpenter Reviewed-by: Lorenzo Stoakes Cc: levi.yun Cc: Shuah Khan Cc: Wei Yang Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/vm_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 1357e2d6a7b6..61d7bf1f8c62 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -439,7 +439,7 @@ int open_procmap(pid_t pid, struct procmap_fd *procmap_out) sprintf(path, "/proc/%d/maps", pid); procmap_out->query.size = sizeof(procmap_out->query); procmap_out->fd = open(path, O_RDONLY); - if (procmap_out < 0) + if (procmap_out->fd < 0) ret = -errno; return ret; -- 2.50.1 From 9709eb0f845b713ba163f2c461537d8add3e4e04 Mon Sep 17 00:00:00 2001 From: Libo Chen Date: Fri, 23 May 2025 20:51:01 +0800 Subject: [PATCH 09/16] sched/numa: fix task swap by skipping kernel threads MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Patch series "sched/numa: add statistics of numa balance task migration", v6. Introduce task migration and swap statistics in the following places: /sys/fs/cgroup/{GROUP}/memory.stat /proc/{PID}/sched /proc/vmstat These statistics facilitate a rapid evaluation of the performance and resource utilization of the target workload. This patch (of 2): Task swapping is triggered when there are no idle CPUs in task A's preferred node. In this case, the NUMA load balancer chooses a task B on A's preferred node and swaps B with A. This helps improve NUMA locality without introducing load imbalance between nodes. In the current implementation, B's NUMA node preference is not mandatory. That is to say, a kernel thread might be incorrectly chosen as B. However, kernel thread and user space thread that does not have mm are not supposed to be covered by NUMA balancing because NUMA balancing only considers user pages via VMAs. According to Peter's suggestion for fixing this issue, we use PF_KTHREAD to skip the kernel thread. curr->mm is also checked because it is possible that user_mode_thread() might create a user thread without an mm. As per Prateek's analysis, after adding the PF_KTHREAD check, there is no need to further check the PF_IDLE flag: : - play_idle_precise() already ensures PF_KTHREAD is set before adding : PF_IDLE : : - cpu_startup_entry() is only called from the startup thread which : should be marked with PF_KTHREAD (based on my understanding looking at : commit cff9b2332ab7 ("kernel/sched: Modify initial boot task idle : setup")) In summary, the check in task_numa_compare() now aligns with task_tick_numa(). Link: https://lkml.kernel.org/r/cover.1748493462.git.yu.c.chen@intel.com Link: https://lkml.kernel.org/r/43d68b356b25d124f0d222ebedf3859e86eefb9f.1748493462.git.yu.c.chen@intel.com Link: https://lkml.kernel.org/r/cover.1748002400.git.yu.c.chen@intel.com Link: https://lkml.kernel.org/r/eaacc9c9bd37bac92d43a671867d85b2fdad3b06.1748002400.git.yu.c.chen@intel.com Signed-off-by: Chen Yu Signed-off-by: Libo Chen Suggested-by: Michal Koutný Tested-by: Ayush Jain Tested-by: Venkat Rao Bagalkote Reviewed-by: Shakeel Butt Cc: Aubrey Li Cc: "Chen, Tim C" Cc: Ingo Molnar Cc: Johannes Weiner Cc: Jonathan Corbet Cc: K Prateek Nayak Cc: Madadi Vineeth Reddy Cc: Mel Gorman Cc: Michal Hocko Cc: Muchun Song Cc: Peter Zijlstra Cc: Roman Gushchin Cc: Tejun Heo Signed-off-by: Andrew Morton --- kernel/sched/fair.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index cef163c174bd..1f9b7df8dfc5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2273,7 +2273,8 @@ static bool task_numa_compare(struct task_numa_env *env, rcu_read_lock(); cur = rcu_dereference(dst_rq->curr); - if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur))) + if (cur && ((cur->flags & (PF_EXITING | PF_KTHREAD)) || + !cur->mm)) cur = NULL; /* -- 2.50.1 From ad6b26b6a0a79166b53209df2ca1cf8636296382 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 23 May 2025 20:51:15 +0800 Subject: [PATCH 10/16] sched/numa: add statistics of numa balance task MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit On systems with NUMA balancing enabled, it has been found that tracking task activities resulting from NUMA balancing is beneficial. NUMA balancing employs two mechanisms for task migration: one is to migrate a task to an idle CPU within its preferred node, and the other is to swap tasks located on different nodes when they are on each other's preferred nodes. The kernel already provides NUMA page migration statistics in /sys/fs/cgroup/mytest/memory.stat and /proc/{PID}/sched. However, it lacks statistics regarding task migration and swapping. Therefore, relevant counts for task migration and swapping should be added. The following two new fields: numa_task_migrated numa_task_swapped will be shown in /sys/fs/cgroup/{GROUP}/memory.stat, /proc/{PID}/sched and /proc/vmstat. Introducing both per-task and per-memory cgroup (memcg) NUMA balancing statistics facilitates a rapid evaluation of the performance and resource utilization of the target workload. For instance, users can first identify the container with high NUMA balancing activity and then further pinpoint a specific task within that group, and subsequently adjust the memory policy for that task. In short, although it is possible to iterate through /proc/$pid/sched to locate the problematic task, the introduction of aggregated NUMA balancing activity for tasks within each memcg can assist users in identifying the task more efficiently through a divide-and-conquer approach. As Libo Chen pointed out, the memcg event relies on the text names in vmstat_text, and /proc/vmstat generates corresponding items based on vmstat_text. Thus, the relevant task migration and swapping events introduced in vmstat_text also need to be populated by count_vm_numa_event(), otherwise these values are zero in /proc/vmstat. In theory, task migration and swap events are part of the scheduler's activities. The reason for exposing them through the memory.stat/vmstat interface is that we already have NUMA balancing statistics in memory.stat/vmstat, and these events are closely related to each other. Following Shakeel's suggestion, we describe the end-to-end flow/story of all these events occurring on a timeline for future reference: The goal of NUMA balancing is to co-locate a task and its memory pages on the same NUMA node. There are two strategies: migrate the pages to the task's node, or migrate the task to the node where its pages reside. Suppose a task p1 is running on Node 0, but its pages are located on Node 1. NUMA page fault statistics for p1 reveal its "page footprint" across nodes. If NUMA balancing detects that most of p1's pages are on Node 1: 1.Page Migration Attempt: The Numa balance first tries to migrate p1's pages to Node 0. The numa_page_migrate counter increments. 2.Task Migration Strategies: After the page migration finishes, Numa balance checks every 1 second to see if p1 can be migrated to Node 1. Case 2.1: Idle CPU Available If Node 1 has an idle CPU, p1 is directly scheduled there. This event is logged as numa_task_migrated. Case 2.2: No Idle CPU (Task Swap) If all CPUs on Node1 are busy, direct migration could cause CPU contention or load imbalance. Instead: The Numa balance selects a candidate task p2 on Node 1 that prefers Node 0 (e.g., due to its own page footprint). p1 and p2 are swapped. This cross-node swap is recorded as numa_task_swapped. Link: https://lkml.kernel.org/r/d00edb12ba0f0de3c5222f61487e65f2ac58f5b1.1748493462.git.yu.c.chen@intel.com Link: https://lkml.kernel.org/r/7ef90a88602ed536be46eba7152ed0d33bad5790.1748002400.git.yu.c.chen@intel.com Signed-off-by: Chen Yu Tested-by: K Prateek Nayak Tested-by: Madadi Vineeth Reddy Acked-by: Peter Zijlstra (Intel) Tested-by: Venkat Rao Bagalkote Cc: Aubrey Li Cc: Ayush Jain Cc: "Chen, Tim C" Cc: Ingo Molnar Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Libo Chen Cc: Mel Gorman Cc: Michal Hocko Cc: Michal Koutný Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Cc: Tejun Heo Signed-off-by: Andrew Morton --- Documentation/admin-guide/cgroup-v2.rst | 6 ++++++ include/linux/sched.h | 4 ++++ include/linux/vm_event_item.h | 2 ++ kernel/sched/core.c | 9 +++++++-- kernel/sched/debug.c | 4 ++++ mm/memcontrol.c | 2 ++ mm/vmstat.c | 2 ++ 7 files changed, 27 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index acf855851c03..cb279c69925e 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1697,6 +1697,12 @@ The following nested keys are defined. numa_hint_faults (npn) Number of NUMA hinting faults. + numa_task_migrated (npn) + Number of task migration by NUMA balancing. + + numa_task_swapped (npn) + Number of task swap by NUMA balancing. + pgdemote_kswapd Number of pages demoted by kswapd. diff --git a/include/linux/sched.h b/include/linux/sched.h index f96ac1982893..1c50e30b5c01 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -549,6 +549,10 @@ struct sched_statistics { u64 nr_failed_migrations_running; u64 nr_failed_migrations_hot; u64 nr_forced_migrations; +#ifdef CONFIG_NUMA_BALANCING + u64 numa_task_migrated; + u64 numa_task_swapped; +#endif u64 nr_wakeups; u64 nr_wakeups_sync; diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 9e15a088ba38..91a3ce9a2687 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -66,6 +66,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NUMA_HINT_FAULTS, NUMA_HINT_FAULTS_LOCAL, NUMA_PAGE_MIGRATE, + NUMA_TASK_MIGRATE, + NUMA_TASK_SWAP, #endif #ifdef CONFIG_MIGRATION PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c81cf642dba0..62b033199e9c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3352,6 +3352,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) #ifdef CONFIG_NUMA_BALANCING static void __migrate_swap_task(struct task_struct *p, int cpu) { + __schedstat_inc(p->stats.numa_task_swapped); + count_vm_numa_event(NUMA_TASK_SWAP); + count_memcg_event_mm(p->mm, NUMA_TASK_SWAP); + if (task_on_rq_queued(p)) { struct rq *src_rq, *dst_rq; struct rq_flags srf, drf; @@ -7953,8 +7957,9 @@ int migrate_task_to(struct task_struct *p, int target_cpu) if (!cpumask_test_cpu(target_cpu, p->cpus_ptr)) return -EINVAL; - /* TODO: This is not properly updating schedstats */ - + __schedstat_inc(p->stats.numa_task_migrated); + count_vm_numa_event(NUMA_TASK_MIGRATE); + count_memcg_event_mm(p->mm, NUMA_TASK_MIGRATE); trace_sched_move_numa(p, curr_cpu, target_cpu); return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg); } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 56ae54e0ce6a..f971c2af7912 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -1206,6 +1206,10 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, P_SCHEDSTAT(nr_failed_migrations_running); P_SCHEDSTAT(nr_failed_migrations_hot); P_SCHEDSTAT(nr_forced_migrations); +#ifdef CONFIG_NUMA_BALANCING + P_SCHEDSTAT(numa_task_migrated); + P_SCHEDSTAT(numa_task_swapped); +#endif P_SCHEDSTAT(nr_wakeups); P_SCHEDSTAT(nr_wakeups_sync); P_SCHEDSTAT(nr_wakeups_migrate); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7e64dbf578d7..4e9771e6e340 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -474,6 +474,8 @@ static const unsigned int memcg_vm_event_stat[] = { NUMA_PAGE_MIGRATE, NUMA_PTE_UPDATES, NUMA_HINT_FAULTS, + NUMA_TASK_MIGRATE, + NUMA_TASK_SWAP, #endif }; diff --git a/mm/vmstat.c b/mm/vmstat.c index d888c248d99f..6f740f070b3d 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1347,6 +1347,8 @@ const char * const vmstat_text[] = { "numa_hint_faults", "numa_hint_faults_local", "numa_pages_migrated", + "numa_task_migrated", + "numa_task_swapped", #endif #ifdef CONFIG_MIGRATION "pgmigrate_success", -- 2.50.1 From 79509ec1d253c536d223cbc06a5b51c13841fec2 Mon Sep 17 00:00:00 2001 From: Enze Li Date: Sat, 31 May 2025 17:39:37 +0800 Subject: [PATCH 11/16] selftests/damon/_damon_sysfs: skip testcases if CONFIG_DAMON_SYSFS is disabled When CONFIG_DAMON_SYSFS is disabled, the selftests fail with the following outputs, not ok 2 selftests: damon: sysfs_update_schemes_tried_regions_wss_estimation.py # exit=1 not ok 3 selftests: damon: damos_quota.py # exit=1 not ok 4 selftests: damon: damos_quota_goal.py # exit=1 not ok 5 selftests: damon: damos_apply_interval.py # exit=1 not ok 6 selftests: damon: damos_tried_regions.py # exit=1 not ok 7 selftests: damon: damon_nr_regions.py # exit=1 not ok 11 selftests: damon: sysfs_update_schemes_tried_regions_hang.py # exit=1 The root cause of this issue is that all the testcases above do not check the sysfs interface of DAMON whether it exists or not. With this patch applied, all the testcases above now pass successfully. Link: https://lkml.kernel.org/r/20250531093937.1555159-1-lienze@kylinos.cn Signed-off-by: Enze Li Reviewed-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/_damon_sysfs.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index 1e587e0b1a39..5b1cb6b3ce4e 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -15,6 +15,10 @@ if sysfs_root is None: print('Seems sysfs not mounted?') exit(ksft_skip) +if not os.path.exists(sysfs_root): + print('Seems DAMON disabled?') + exit(ksft_skip) + def write_file(path, string): "Returns error string if failed, or None otherwise" string = '%s' % string -- 2.50.1 From 109364fce504dae70b13eccc51a7bfc71528d154 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 27 May 2025 17:04:45 +0100 Subject: [PATCH 12/16] selftests/mm: use standard ksft_finished() in cow and gup_longterm Patch series "selftests/mm: cow and gup_longterm cleanups", v2. The bulk of these changes modify the cow and gup_longterm tests to report unique and stable names for each test, bringing them into line with the expectations of tooling that works with kselftest. The string reported as a test result is used by tooling to both deduplicate tests and track tests between test runs, using the same string for multiple tests or changing the string depending on test result causes problems for user interfaces and automation such as bisection. It was suggested that converting to use kselftest_harness.h would be a good way of addressing this, however that really wants the set of tests to run to be known at compile time but both test programs dynamically enumarate the set of huge page sizes the system supports and test each. Refactoring to handle this would be even more invasive than these changes which are large but straightforward and repetitive. A version of the main gup_longterm cleanup was previously sent separately, this version factors out the helpers for logging the start of the test since the cow test looks very similar. This patch (of 4): The cow and gup_longterm test programs open code something that looks a lot like the standard ksft_finished() helper to summarise the test results and provide an exit code, convert to use ksft_finished(). Link: https://lkml.kernel.org/r/20250527-selftests-mm-cow-dedupe-v2-0-ff198df8e38e@kernel.org Link: https://lkml.kernel.org/r/20250527-selftests-mm-cow-dedupe-v2-1-ff198df8e38e@kernel.org Signed-off-by: Mark Brown Acked-by: David Hildenbrand Cc: Lorenzo Stoakes Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 7 +------ tools/testing/selftests/mm/gup_longterm.c | 8 ++------ 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index b6cfe0a4b7df..e70cd3d900cc 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -1771,7 +1771,6 @@ static int tests_per_non_anon_test_case(void) int main(int argc, char **argv) { - int err; struct thp_settings default_settings; ksft_print_header(); @@ -1811,9 +1810,5 @@ int main(int argc, char **argv) thp_restore_settings(); } - err = ksft_get_fail_cnt(); - if (err) - ksft_exit_fail_msg("%d out of %d tests failed\n", - err, ksft_test_num()); - ksft_exit_pass(); + ksft_finished(); } diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 21595b20bbc3..e60e62809186 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -455,7 +455,7 @@ static int tests_per_test_case(void) int main(int argc, char **argv) { - int i, err; + int i; pagesize = getpagesize(); nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, @@ -469,9 +469,5 @@ int main(int argc, char **argv) for (i = 0; i < ARRAY_SIZE(test_cases); i++) run_test_case(&test_cases[i]); - err = ksft_get_fail_cnt(); - if (err) - ksft_exit_fail_msg("%d out of %d tests failed\n", - err, ksft_test_num()); - ksft_exit_pass(); + ksft_finished(); } -- 2.50.1 From 3f192afbede24c60e59db1272ad155a3a44f5fe7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 27 May 2025 17:04:46 +0100 Subject: [PATCH 13/16] selftests/mm: add helper for logging test start and results Several of the MM tests have a pattern of printing a description of the test to be run then reporting the actual TAP result using a generic string not connected to the specific test, often in a shared function used by many tests. The name reported typically varies depending on the specific result rather than the test too. This causes problems for tooling that works with test results, the names reported with the results are used to deduplicate tests and track them between runs so both duplicated names and changing names cause trouble for things like UIs and automated bisection. As a first step towards matching these tests better with the expectations of kselftest provide helpers which record the test name as part of the initial print and then use that as part of reporting a result. This is not added as a generic kselftest helper partly because the use of a variable to store the test name doesn't fit well with the header only implementation of kselftest.h and partly because it's not really an intended pattern. Ideally at some point the mm tests that use it will be updated to not need it. Link: https://lkml.kernel.org/r/20250527-selftests-mm-cow-dedupe-v2-2-ff198df8e38e@kernel.org Signed-off-by: Mark Brown Cc: David Hildenbrand Cc: Lorenzo Stoakes Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/vm_util.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index 9211ba640d9c..adb5d294a220 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -3,6 +3,7 @@ #include #include #include +#include #include /* ffsl() */ #include /* _SC_PAGESIZE */ #include "../kselftest.h" @@ -95,6 +96,25 @@ static inline int open_self_procmap(struct procmap_fd *procmap_out) return open_procmap(pid, procmap_out); } +/* These helpers need to be inline to match the kselftest.h idiom. */ +static char test_name[1024]; + +static inline void log_test_start(const char *name, ...) +{ + va_list args; + va_start(args, name); + + vsnprintf(test_name, sizeof(test_name), name, args); + ksft_print_msg("[RUN] %s\n", test_name); + + va_end(args); +} + +static inline void log_test_result(int result) +{ + ksft_test_result_report(result, "%s\n", test_name); +} + /* * On ppc64 this will only work with radix 2M hugepage size */ -- 2.50.1 From 3f2d9a9ac544694e26c8faeb1a044c2bdcd0c793 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 27 May 2025 17:04:47 +0100 Subject: [PATCH 14/16] selftests/mm: report unique test names for each cow test The kselftest framework uses the string logged when a test result is reported as the unique identifier for a test, using it to track test results between runs. The cow test completely fails to follow this pattern, it runs test functions repeatedly with various parameters with each result report from those functions being a string logging an error message which is fixed between runs. Since the code already logs each test uniquely before it starts refactor to also print this to a buffer, then use that name as the test result. This isn't especially pretty but is relatively straightforward and is a great help to tooling. Link: https://lkml.kernel.org/r/20250527-selftests-mm-cow-dedupe-v2-3-ff198df8e38e@kernel.org Signed-off-by: Mark Brown Cc: David Hildenbrand Cc: Lorenzo Stoakes Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 333 ++++++++++++++++++++----------- 1 file changed, 217 insertions(+), 116 deletions(-) diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index e70cd3d900cc..dbbcc5eb3dce 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -112,9 +112,12 @@ struct comm_pipes { static int setup_comm_pipes(struct comm_pipes *comm_pipes) { - if (pipe(comm_pipes->child_ready) < 0) + if (pipe(comm_pipes->child_ready) < 0) { + ksft_perror("pipe()"); return -errno; + } if (pipe(comm_pipes->parent_ready) < 0) { + ksft_perror("pipe()"); close(comm_pipes->child_ready[0]); close(comm_pipes->child_ready[1]); return -errno; @@ -207,13 +210,14 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); return; } ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } else if (!ret) { exit(fn(mem, size, &comm_pipes)); @@ -228,9 +232,18 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, * write-faults by directly mapping pages writable. */ ret = mprotect(mem, size, PROT_READ); - ret |= mprotect(mem, size, PROT_READ|PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); + write(comm_pipes.parent_ready[1], "0", 1); + wait(&ret); + goto close_comm_pipes; + } + + ret = mprotect(mem, size, PROT_READ|PROT_WRITE); + if (ret) { + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); goto close_comm_pipes; @@ -248,16 +261,16 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, ret = -EINVAL; if (!ret) { - ksft_test_result_pass("No leak from parent into child\n"); + log_test_result(KSFT_PASS); } else if (xfail) { /* * With hugetlb, some vmsplice() tests are currently expected to * fail because (a) harder to fix and (b) nobody really cares. * Flag them as expected failure for now. */ - ksft_test_result_xfail("Leak from parent into child\n"); + log_test_result(KSFT_XFAIL); } else { - ksft_test_result_fail("Leak from parent into child\n"); + log_test_result(KSFT_FAIL); } close_comm_pipes: close_comm_pipes(&comm_pipes); @@ -306,26 +319,29 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); goto free; } if (pipe(fds) < 0) { - ksft_test_result_fail("pipe() failed\n"); + ksft_perror("pipe() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } if (before_fork) { transferred = vmsplice(fds[1], &iov, 1, 0); if (transferred <= 0) { - ksft_test_result_fail("vmsplice() failed\n"); + ksft_print_msg("vmsplice() failed\n"); + log_test_result(KSFT_FAIL); goto close_pipe; } } ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed\n"); + log_test_result(KSFT_FAIL); goto close_pipe; } else if (!ret) { write(comm_pipes.child_ready[1], "0", 1); @@ -339,7 +355,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, if (!before_fork) { transferred = vmsplice(fds[1], &iov, 1, 0); if (transferred <= 0) { - ksft_test_result_fail("vmsplice() failed\n"); + ksft_perror("vmsplice() failed"); + log_test_result(KSFT_FAIL); wait(&ret); goto close_pipe; } @@ -348,7 +365,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, while (read(comm_pipes.child_ready[0], &buf, 1) != 1) ; if (munmap(mem, size) < 0) { - ksft_test_result_fail("munmap() failed\n"); + ksft_perror("munmap() failed"); + log_test_result(KSFT_FAIL); goto close_pipe; } write(comm_pipes.parent_ready[1], "0", 1); @@ -356,7 +374,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, /* Wait until the child is done writing. */ wait(&ret); if (!WIFEXITED(ret)) { - ksft_test_result_fail("wait() failed\n"); + ksft_perror("wait() failed"); + log_test_result(KSFT_FAIL); goto close_pipe; } @@ -364,22 +383,23 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, for (total = 0; total < transferred; total += cur) { cur = read(fds[0], new + total, transferred - total); if (cur < 0) { - ksft_test_result_fail("read() failed\n"); + ksft_perror("read() failed"); + log_test_result(KSFT_FAIL); goto close_pipe; } } if (!memcmp(old, new, transferred)) { - ksft_test_result_pass("No leak from child into parent\n"); + log_test_result(KSFT_PASS); } else if (xfail) { /* * With hugetlb, some vmsplice() tests are currently expected to * fail because (a) harder to fix and (b) nobody really cares. * Flag them as expected failure for now. */ - ksft_test_result_xfail("Leak from child into parent\n"); + log_test_result(KSFT_XFAIL); } else { - ksft_test_result_fail("Leak from child into parent\n"); + log_test_result(KSFT_FAIL); } close_pipe: close(fds[0]); @@ -416,13 +436,14 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); return; } file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed\n"); + ksft_perror("tmpfile() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } fd = fileno(file); @@ -430,14 +451,16 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) tmp = malloc(size); if (!tmp) { - ksft_test_result_fail("malloc() failed\n"); + ksft_print_msg("malloc() failed\n"); + log_test_result(KSFT_FAIL); goto close_file; } /* Skip on errors, as we might just lack kernel support. */ ret = io_uring_queue_init(1, &ring, 0); if (ret < 0) { - ksft_test_result_skip("io_uring_queue_init() failed\n"); + ksft_print_msg("io_uring_queue_init() failed\n"); + log_test_result(KSFT_SKIP); goto free_tmp; } @@ -452,7 +475,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) iov.iov_len = size; ret = io_uring_register_buffers(&ring, &iov, 1); if (ret) { - ksft_test_result_skip("io_uring_register_buffers() failed\n"); + ksft_print_msg("io_uring_register_buffers() failed\n"); + log_test_result(KSFT_SKIP); goto queue_exit; } @@ -463,7 +487,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) */ ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto unregister_buffers; } else if (!ret) { write(comm_pipes.child_ready[1], "0", 1); @@ -483,10 +508,17 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) * if the page is mapped R/O vs. R/W). */ ret = mprotect(mem, size, PROT_READ); + if (ret) { + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); + goto unregister_buffers; + } + clear_softdirty(); - ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); + ret = mprotect(mem, size, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto unregister_buffers; } } @@ -498,25 +530,29 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) memset(mem, 0xff, size); sqe = io_uring_get_sqe(&ring); if (!sqe) { - ksft_test_result_fail("io_uring_get_sqe() failed\n"); + ksft_print_msg("io_uring_get_sqe() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); ret = io_uring_submit(&ring); if (ret < 0) { - ksft_test_result_fail("io_uring_submit() failed\n"); + ksft_print_msg("io_uring_submit() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { - ksft_test_result_fail("io_uring_wait_cqe() failed\n"); + ksft_print_msg("io_uring_wait_cqe() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } if (cqe->res != size) { - ksft_test_result_fail("write_fixed failed\n"); + ksft_print_msg("write_fixed failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } io_uring_cqe_seen(&ring, cqe); @@ -526,15 +562,18 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) while (total < size) { cur = pread(fd, tmp + total, size - total, total); if (cur < 0) { - ksft_test_result_fail("pread() failed\n"); + ksft_print_msg("pread() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } total += cur; } /* Finally, check if we read what we expected. */ - ksft_test_result(!memcmp(mem, tmp, size), - "Longterm R/W pin is reliable\n"); + if (!memcmp(mem, tmp, size)) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); quit_child: if (use_fork) { @@ -582,19 +621,21 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, int ret; if (gup_fd < 0) { - ksft_test_result_skip("gup_test not available\n"); + ksft_print_msg("gup_test not available\n"); + log_test_result(KSFT_SKIP); return; } tmp = malloc(size); if (!tmp) { - ksft_test_result_fail("malloc() failed\n"); + ksft_print_msg("malloc() failed\n"); + log_test_result(KSFT_FAIL); return; } ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); goto free_tmp; } @@ -609,7 +650,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, */ ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } else if (!ret) { write(comm_pipes.child_ready[1], "0", 1); @@ -646,7 +688,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, clear_softdirty(); ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } break; @@ -661,9 +704,11 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); if (ret) { if (errno == EINVAL) - ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); + ret = KSFT_SKIP; else - ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); + ret = KSFT_FAIL; + ksft_perror("PIN_LONGTERM_TEST_START failed"); + log_test_result(ret); goto wait; } @@ -676,22 +721,26 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, */ tmp_val = (__u64)(uintptr_t)tmp; ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); - if (ret) - ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n"); - else - ksft_test_result(!memcmp(mem, tmp, size), - "Longterm R/O pin is reliable\n"); + if (ret) { + ksft_perror("PIN_LONGTERM_TEST_READ failed"); + log_test_result(KSFT_FAIL); + } else { + if (!memcmp(mem, tmp, size)) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); + } ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); if (ret) - ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); + ksft_perror("PIN_LONGTERM_TEST_STOP failed"); wait: switch (test) { case RO_PIN_TEST_SHARED: write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); if (!WIFEXITED(ret)) - ksft_print_msg("[INFO] wait() failed\n"); + ksft_perror("wait() failed"); break; default: break; @@ -746,14 +795,16 @@ static void do_run_with_base_page(test_fn fn, bool swapout) mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); /* Ignore if not around on a kernel. */ if (ret && errno != EINVAL) { - ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); + ksft_perror("MADV_NOHUGEPAGE failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -763,7 +814,8 @@ static void do_run_with_base_page(test_fn fn, bool swapout) if (swapout) { madvise(mem, pagesize, MADV_PAGEOUT); if (!pagemap_is_swapped(pagemap_fd, mem)) { - ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); + log_test_result(KSFT_SKIP); goto munmap; } } @@ -775,13 +827,13 @@ munmap: static void run_with_base_page(test_fn fn, const char *desc) { - ksft_print_msg("[RUN] %s ... with base page\n", desc); + log_test_start("%s ... with base page", desc); do_run_with_base_page(fn, false); } static void run_with_base_page_swap(test_fn fn, const char *desc) { - ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc); + log_test_start("%s ... with swapped out base page", desc); do_run_with_base_page(fn, true); } @@ -807,7 +859,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } @@ -816,7 +869,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) ret = madvise(mem, thpsize, MADV_HUGEPAGE); if (ret) { - ksft_test_result_fail("MADV_HUGEPAGE failed\n"); + ksft_perror("MADV_HUGEPAGE failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -826,7 +880,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ mem[0] = 1; if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { - ksft_test_result_skip("Did not get a THP populated\n"); + ksft_print_msg("Did not get a THP populated\n"); + log_test_result(KSFT_SKIP); goto munmap; } memset(mem, 1, thpsize); @@ -846,12 +901,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ ret = mprotect(mem + pagesize, pagesize, PROT_READ); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto munmap; } ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto munmap; } break; @@ -863,7 +920,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); if (ret) { - ksft_test_result_fail("MADV_DONTNEED failed\n"); + ksft_perror("MADV_DONTNEED failed"); + log_test_result(KSFT_FAIL); goto munmap; } size = pagesize; @@ -877,13 +935,15 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) mremap_mem = mmap(NULL, mremap_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mremap_mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } tmp = mremap(mem + mremap_size, mremap_size, mremap_size, MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); if (tmp != mremap_mem) { - ksft_test_result_fail("mremap() failed\n"); + ksft_perror("mremap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } size = mremap_size; @@ -896,12 +956,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); if (ret) { - ksft_test_result_fail("MADV_DONTFORK failed\n"); + ksft_perror("MADV_DONTFORK failed"); + log_test_result(KSFT_FAIL); goto munmap; } ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto munmap; } else if (!ret) { exit(0); @@ -910,7 +972,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) /* Allow for sharing all pages again. */ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); if (ret) { - ksft_test_result_fail("MADV_DOFORK failed\n"); + ksft_perror("MADV_DOFORK failed"); + log_test_result(KSFT_FAIL); goto munmap; } break; @@ -924,7 +987,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) case THP_RUN_SINGLE_PTE_SWAPOUT: madvise(mem, size, MADV_PAGEOUT); if (!range_is_swapped(mem, size)) { - ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); + log_test_result(KSFT_SKIP); goto munmap; } break; @@ -941,56 +1005,56 @@ munmap: static void run_with_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n", + log_test_start("%s ... with THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PMD, size); } static void run_with_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n", + log_test_start("%s ... with swapped-out THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size); } static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n", + log_test_start("%s ... with PTE-mapped THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PTE, size); } static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n", + log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size); } static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n", + log_test_start("%s ... with single PTE of THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size); } static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n", + log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size); } static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n", + log_test_start("%s ... with partially mremap()'ed THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size); } static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n", + log_test_start("%s ... with partially shared THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size); } @@ -1000,14 +1064,15 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; char *mem, *dummy; - ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc, + log_test_start("%s ... with hugetlb (%zu kB)", desc, hugetlbsize / 1024); flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); if (mem == MAP_FAILED) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); return; } @@ -1020,7 +1085,8 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) */ dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); if (dummy == MAP_FAILED) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); goto munmap; } munmap(dummy, hugetlbsize); @@ -1226,7 +1292,7 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); return; } @@ -1236,12 +1302,14 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, */ ret = mprotect(mem + pagesize, pagesize, PROT_READ); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } @@ -1250,8 +1318,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Collapse before actually COW-sharing the page. */ ret = madvise(mem, size, MADV_COLLAPSE); if (ret) { - ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", - strerror(errno)); + ksft_perror("MADV_COLLAPSE failed"); + log_test_result(KSFT_SKIP); goto close_comm_pipes; } break; @@ -1262,7 +1330,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Don't COW-share the upper part of the THP. */ ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK); if (ret) { - ksft_test_result_fail("MADV_DONTFORK failed\n"); + ksft_perror("MADV_DONTFORK failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } break; @@ -1270,7 +1339,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Don't COW-share the lower part of the THP. */ ret = madvise(mem, size / 2, MADV_DONTFORK); if (ret) { - ksft_test_result_fail("MADV_DONTFORK failed\n"); + ksft_perror("MADV_DONTFORK failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } break; @@ -1280,7 +1350,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } else if (!ret) { switch (test) { @@ -1314,7 +1385,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, */ ret = madvise(mem, size, MADV_DOFORK); if (ret) { - ksft_test_result_fail("MADV_DOFORK failed\n"); + ksft_perror("MADV_DOFORK failed"); + log_test_result(KSFT_FAIL); write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); goto close_comm_pipes; @@ -1324,8 +1396,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Collapse before anyone modified the COW-shared page. */ ret = madvise(mem, size, MADV_COLLAPSE); if (ret) { - ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", - strerror(errno)); + ksft_perror("MADV_COLLAPSE failed"); + log_test_result(KSFT_SKIP); write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); goto close_comm_pipes; @@ -1345,7 +1417,10 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, else ret = -EINVAL; - ksft_test_result(!ret, "No leak from parent into child\n"); + if (!ret) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); close_comm_pipes: close_comm_pipes(&comm_pipes); } @@ -1430,7 +1505,7 @@ static void run_anon_thp_test_cases(void) for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) { struct test_case const *test_case = &anon_thp_test_cases[i]; - ksft_print_msg("[RUN] %s\n", test_case->desc); + log_test_start("%s", test_case->desc); do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize); } } @@ -1453,8 +1528,10 @@ static void test_cow(char *mem, const char *smem, size_t size) memset(mem, 0xff, size); /* See if we still read the old values via the other mapping. */ - ksft_test_result(!memcmp(smem, old, size), - "Other mapping not modified\n"); + if (!memcmp(smem, old, size)) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); free(old); } @@ -1472,18 +1549,20 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc) { char *mem, *smem, tmp; - ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc); + log_test_start("%s ... with shared zeropage", desc); mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); if (smem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1504,10 +1583,11 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) size_t mmap_size; int ret; - ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc); + log_test_start("%s ... with huge zeropage", desc); if (!has_huge_zeropage) { - ksft_test_result_skip("Huge zeropage not enabled\n"); + ksft_print_msg("Huge zeropage not enabled\n"); + log_test_result(KSFT_SKIP); return; } @@ -1516,13 +1596,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } mmap_smem = mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_smem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1531,9 +1613,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1)); ret = madvise(mem, pmdsize, MADV_HUGEPAGE); + if (ret != 0) { + ksft_perror("madvise()"); + log_test_result(KSFT_FAIL); + goto munmap; + } ret |= madvise(smem, pmdsize, MADV_HUGEPAGE); - if (ret) { - ksft_test_result_fail("MADV_HUGEPAGE failed\n"); + if (ret != 0) { + ksft_perror("madvise()"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1562,29 +1650,33 @@ static void run_with_memfd(non_anon_test_fn fn, const char *desc) char *mem, *smem, tmp; int fd; - ksft_print_msg("[RUN] %s ... with memfd\n", desc); + log_test_start("%s ... with memfd", desc); fd = memfd_create("test", 0); if (fd < 0) { - ksft_test_result_fail("memfd_create() failed\n"); + ksft_perror("memfd_create() failed"); + log_test_result(KSFT_FAIL); return; } /* File consists of a single page filled with zeroes. */ if (fallocate(fd, 0, 0, pagesize)) { - ksft_test_result_fail("fallocate() failed\n"); + ksft_perror("fallocate() failed"); + log_test_result(KSFT_FAIL); goto close; } /* Create a private mapping of the memfd. */ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto close; } smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); if (smem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1607,35 +1699,40 @@ static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) FILE *file; int fd; - ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); + log_test_start("%s ... with tmpfile", desc); file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed\n"); + ksft_perror("tmpfile() failed"); + log_test_result(KSFT_FAIL); return; } fd = fileno(file); if (fd < 0) { - ksft_test_result_skip("fileno() failed\n"); + ksft_perror("fileno() failed"); + log_test_result(KSFT_SKIP); return; } /* File consists of a single page filled with zeroes. */ if (fallocate(fd, 0, 0, pagesize)) { - ksft_test_result_fail("fallocate() failed\n"); + ksft_perror("fallocate() failed"); + log_test_result(KSFT_FAIL); goto close; } /* Create a private mapping of the memfd. */ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto close; } smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); if (smem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1659,20 +1756,22 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, char *mem, *smem, tmp; int fd; - ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, + log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, hugetlbsize / 1024); flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; fd = memfd_create("test", flags); if (fd < 0) { - ksft_test_result_skip("memfd_create() failed\n"); + ksft_perror("memfd_create() failed"); + log_test_result(KSFT_SKIP); return; } /* File consists of a single page filled with zeroes. */ if (fallocate(fd, 0, 0, hugetlbsize)) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); goto close; } @@ -1680,12 +1779,14 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); goto close; } smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); if (smem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } -- 2.50.1 From 66bce7afbaca6ca9022210b0cd9fa3405da36667 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 27 May 2025 17:04:48 +0100 Subject: [PATCH 15/16] selftests/mm: fix test result reporting in gup_longterm The kselftest framework uses the string logged when a test result is reported as the unique identifier for a test, using it to track test results between runs. The gup_longterm test fails to follow this pattern, it runs a single test function repeatedly with various parameters but each result report is a string logging an error message which is fixed between runs. Since the code already logs each test uniquely before it starts refactor to also print this to a buffer, then use that name as the test result. This isn't especially pretty but is relatively straightforward and is a great help to tooling. Link: https://lkml.kernel.org/r/20250527-selftests-mm-cow-dedupe-v2-4-ff198df8e38e@kernel.org Signed-off-by: Mark Brown Cc: David Hildenbrand Cc: Lorenzo Stoakes Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/gup_longterm.c | 150 ++++++++++++++-------- 1 file changed, 94 insertions(+), 56 deletions(-) diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index e60e62809186..f84ea97c2543 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -93,33 +93,48 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) __fsword_t fs_type = get_fs_type(fd); bool should_work; char *mem; + int result = KSFT_PASS; int ret; + if (fd < 0) { + result = KSFT_FAIL; + goto report; + } + if (ftruncate(fd, size)) { if (errno == ENOENT) { skip_test_dodgy_fs("ftruncate()"); } else { - ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno)); + ksft_print_msg("ftruncate() failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; + goto report; } return; } if (fallocate(fd, 0, 0, size)) { - if (size == pagesize) - ksft_test_result_fail("fallocate() failed (%s)\n", strerror(errno)); - else - ksft_test_result_skip("need more free huge pages\n"); - return; + if (size == pagesize) { + ksft_print_msg("fallocate() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; + } else { + ksft_print_msg("need more free huge pages\n"); + result = KSFT_SKIP; + } + goto report; } mem = mmap(NULL, size, PROT_READ | PROT_WRITE, shared ? MAP_SHARED : MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - if (size == pagesize || shared) - ksft_test_result_fail("mmap() failed (%s)\n", strerror(errno)); - else - ksft_test_result_skip("need more free huge pages\n"); - return; + if (size == pagesize || shared) { + ksft_print_msg("mmap() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; + } else { + ksft_print_msg("need more free huge pages\n"); + result = KSFT_SKIP; + } + goto report; } /* Fault in the page such that GUP-fast can pin it directly. */ @@ -134,7 +149,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) */ ret = mprotect(mem, size, PROT_READ); if (ret) { - ksft_test_result_fail("mprotect() failed (%s)\n", strerror(errno)); + ksft_print_msg("mprotect() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; goto munmap; } /* FALLTHROUGH */ @@ -147,12 +163,14 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) type == TEST_TYPE_RW_FAST; if (gup_fd < 0) { - ksft_test_result_skip("gup_test not available\n"); + ksft_print_msg("gup_test not available\n"); + result = KSFT_SKIP; break; } if (rw && shared && fs_is_unknown(fs_type)) { - ksft_test_result_skip("Unknown filesystem\n"); + ksft_print_msg("Unknown filesystem\n"); + result = KSFT_SKIP; return; } /* @@ -169,14 +187,19 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0; ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); if (ret && errno == EINVAL) { - ksft_test_result_skip("PIN_LONGTERM_TEST_START failed (EINVAL)n"); + ksft_print_msg("PIN_LONGTERM_TEST_START failed (EINVAL)n"); + result = KSFT_SKIP; break; } else if (ret && errno == EFAULT) { - ksft_test_result(!should_work, "Should have failed\n"); + if (should_work) + result = KSFT_FAIL; + else + result = KSFT_PASS; break; } else if (ret) { - ksft_test_result_fail("PIN_LONGTERM_TEST_START failed (%s)\n", - strerror(errno)); + ksft_print_msg("PIN_LONGTERM_TEST_START failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; break; } @@ -189,7 +212,10 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) * some previously unsupported filesystems, we might want to * perform some additional tests for possible data corruptions. */ - ksft_test_result(should_work, "Should have worked\n"); + if (should_work) + result = KSFT_PASS; + else + result = KSFT_FAIL; break; } #ifdef LOCAL_CONFIG_HAVE_LIBURING @@ -199,8 +225,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* io_uring always pins pages writable. */ if (shared && fs_is_unknown(fs_type)) { - ksft_test_result_skip("Unknown filesystem\n"); - return; + ksft_print_msg("Unknown filesystem\n"); + result = KSFT_SKIP; + goto report; } should_work = !shared || fs_supports_writable_longterm_pinning(fs_type); @@ -208,8 +235,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Skip on errors, as we might just lack kernel support. */ ret = io_uring_queue_init(1, &ring, 0); if (ret < 0) { - ksft_test_result_skip("io_uring_queue_init() failed (%s)\n", - strerror(-ret)); + ksft_print_msg("io_uring_queue_init() failed (%s)\n", + strerror(-ret)); + result = KSFT_SKIP; break; } /* @@ -222,17 +250,28 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Only new kernels return EFAULT. */ if (ret && (errno == ENOSPC || errno == EOPNOTSUPP || errno == EFAULT)) { - ksft_test_result(!should_work, "Should have failed (%s)\n", - strerror(errno)); + if (should_work) { + ksft_print_msg("Should have failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; + } else { + result = KSFT_PASS; + } } else if (ret) { /* * We might just lack support or have insufficient * MEMLOCK limits. */ - ksft_test_result_skip("io_uring_register_buffers() failed (%s)\n", - strerror(-ret)); + ksft_print_msg("io_uring_register_buffers() failed (%s)\n", + strerror(-ret)); + result = KSFT_SKIP; } else { - ksft_test_result(should_work, "Should have worked\n"); + if (should_work) { + result = KSFT_PASS; + } else { + ksft_print_msg("Should have worked\n"); + result = KSFT_FAIL; + } io_uring_unregister_buffers(&ring); } @@ -246,6 +285,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) munmap: munmap(mem, size); +report: + log_test_result(result); } typedef void (*test_fn)(int fd, size_t size); @@ -254,13 +295,11 @@ static void run_with_memfd(test_fn fn, const char *desc) { int fd; - ksft_print_msg("[RUN] %s ... with memfd\n", desc); + log_test_start("%s ... with memfd", desc); fd = memfd_create("test", 0); - if (fd < 0) { - ksft_test_result_fail("memfd_create() failed (%s)\n", strerror(errno)); - return; - } + if (fd < 0) + ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); fn(fd, pagesize); close(fd); @@ -271,23 +310,23 @@ static void run_with_tmpfile(test_fn fn, const char *desc) FILE *file; int fd; - ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); + log_test_start("%s ... with tmpfile", desc); file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed (%s)\n", strerror(errno)); - return; - } - - fd = fileno(file); - if (fd < 0) { - ksft_test_result_fail("fileno() failed (%s)\n", strerror(errno)); - goto close; + ksft_print_msg("tmpfile() failed (%s)\n", strerror(errno)); + fd = -1; + } else { + fd = fileno(file); + if (fd < 0) { + ksft_print_msg("fileno() failed (%s)\n", strerror(errno)); + } } fn(fd, pagesize); -close: - fclose(file); + + if (file) + fclose(file); } static void run_with_local_tmpfile(test_fn fn, const char *desc) @@ -295,22 +334,22 @@ static void run_with_local_tmpfile(test_fn fn, const char *desc) char filename[] = __FILE__"_tmpfile_XXXXXX"; int fd; - ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc); + log_test_start("%s ... with local tmpfile", desc); fd = mkstemp(filename); - if (fd < 0) { - ksft_test_result_fail("mkstemp() failed (%s)\n", strerror(errno)); - return; - } + if (fd < 0) + ksft_print_msg("mkstemp() failed (%s)\n", strerror(errno)); if (unlink(filename)) { - ksft_test_result_fail("unlink() failed (%s)\n", strerror(errno)); - goto close; + ksft_print_msg("unlink() failed (%s)\n", strerror(errno)); + close(fd); + fd = -1; } fn(fd, pagesize); -close: - close(fd); + + if (fd >= 0) + close(fd); } static void run_with_memfd_hugetlb(test_fn fn, const char *desc, @@ -319,15 +358,14 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc, int flags = MFD_HUGETLB; int fd; - ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, + log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, hugetlbsize / 1024); flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; fd = memfd_create("test", flags); if (fd < 0) { - ksft_test_result_skip("memfd_create() failed (%s)\n", strerror(errno)); - return; + ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); } fn(fd, hugetlbsize); -- 2.50.1 From 0b43b8bc8ef88bb45b018b2d4853d38bfc5ce2a7 Mon Sep 17 00:00:00 2001 From: Shivank Garg Date: Mon, 26 May 2025 18:28:20 +0000 Subject: [PATCH 16/16] mm/khugepaged: clean up refcount check using folio_expected_ref_count() Use folio_expected_ref_count() instead of open-coded logic in is_refcount_suitable(). This avoids code duplication and improves clarity. Drop is_refcount_suitable() as it is no longer needed. Link: https://lkml.kernel.org/r/20250526182818.37978-2-shivankg@amd.com Signed-off-by: Shivank Garg Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Acked-by: Dev Jain Reviewed-by: Baolin Wang Cc: Bharata B Rao Cc: Fengwei Yin Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mariano Pache Cc: Ryan Roberts Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/khugepaged.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 7731a162a1a7..15203ea7d007 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -548,19 +548,6 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte, } } -static bool is_refcount_suitable(struct folio *folio) -{ - int expected_refcount = folio_mapcount(folio); - - if (!folio_test_anon(folio) || folio_test_swapcache(folio)) - expected_refcount += folio_nr_pages(folio); - - if (folio_test_private(folio)) - expected_refcount++; - - return folio_ref_count(folio) == expected_refcount; -} - static int __collapse_huge_page_isolate(struct vm_area_struct *vma, unsigned long address, pte_t *pte, @@ -652,7 +639,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, * but not from this process. The other process cannot write to * the page, only trigger CoW. */ - if (!is_refcount_suitable(folio)) { + if (folio_expected_ref_count(folio) != folio_ref_count(folio)) { folio_unlock(folio); result = SCAN_PAGE_COUNT; goto out; @@ -1402,7 +1389,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, * has excessive GUP pins (i.e. 512). Anyway the same check * will be done again later the risk seems low. */ - if (!is_refcount_suitable(folio)) { + if (folio_expected_ref_count(folio) != folio_ref_count(folio)) { result = SCAN_PAGE_COUNT; goto out_unmap; } -- 2.50.1