From 0046dbed80e67f57014bdfdcabd7a8ae5e73824a Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:13 +0000 Subject: [PATCH 01/16] selftests/mm: skip uffd-stress if userfaultfd not available It's pretty obvious that the test wouldn't work if you don't have the feature enabled. But, it's still useful to SKIP instead of failing so the reader can immediately tell that this is the reason why. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-2-dec210a658f5@google.com Signed-off-by: Brendan Jackman Reviewed-by: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/uffd-stress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index 944d559ade21..91174e9425cd 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -412,8 +412,8 @@ static void parse_test_type_arg(const char *raw_type) * feature. */ - if (uffd_get_features(&features)) - err("failed to get available features"); + if (uffd_get_features(&features) && errno == ENOENT) + ksft_exit_skip("failed to get available features (%d)\n", errno); test_uffdio_wp = test_uffdio_wp && (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP); -- 2.50.1 From f4b3e6c7f14c3e84c4faf228868a62289efed22b Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:14 +0000 Subject: [PATCH 02/16] selftests/mm: skip uffd-wp-mremap if userfaultfd not available It's obvious that this should fail in that case, but still, save the reader the effort of figuring out that they've run into this by just SKIPping Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-3-dec210a658f5@google.com Signed-off-by: Brendan Jackman Reviewed-by: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/uffd-wp-mremap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c index 2c4f984bd73c..c2ba7d46c7b4 100644 --- a/tools/testing/selftests/mm/uffd-wp-mremap.c +++ b/tools/testing/selftests/mm/uffd-wp-mremap.c @@ -182,7 +182,10 @@ static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb /* Register range for uffd-wp. */ if (userfaultfd_open(&features)) { - ksft_test_result_fail("userfaultfd_open() failed\n"); + if (errno == ENOENT) + ksft_test_result_skip("userfaultfd not available\n"); + else + ksft_test_result_fail("userfaultfd_open() failed\n"); goto out; } if (uffd_register(uffd, mem, size, false, true, false)) { -- 2.50.1 From f3b5535abce9f05318ee52de7f0a97be58d032a0 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:15 +0000 Subject: [PATCH 03/16] selftests/mm/uffd: rename nr_cpus -> nr_parallel A later commit will bound this variable so it no longer necessarily matches the number of CPUs. Rename it appropriately. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-4-dec210a658f5@google.com Signed-off-by: Brendan Jackman Reviewed-by: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/uffd-common.c | 8 +++--- tools/testing/selftests/mm/uffd-common.h | 2 +- tools/testing/selftests/mm/uffd-stress.c | 28 ++++++++++---------- tools/testing/selftests/mm/uffd-unit-tests.c | 2 +- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index 5457a078690d..a37088a23ffe 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -10,7 +10,7 @@ #define BASE_PMD_ADDR ((void *)(1UL << 30)) volatile bool test_uffdio_copy_eexist = true; -unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; +unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; int uffd = -1, uffd_flags, finished, *pipefd, test_type; bool map_shared; @@ -269,7 +269,7 @@ void uffd_test_ctx_clear(void) size_t i; if (pipefd) { - for (i = 0; i < nr_cpus * 2; ++i) { + for (i = 0; i < nr_parallel * 2; ++i) { if (close(pipefd[i])) err("close pipefd"); } @@ -365,10 +365,10 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) */ uffd_test_ops->release_pages(area_dst); - pipefd = malloc(sizeof(int) * nr_cpus * 2); + pipefd = malloc(sizeof(int) * nr_parallel * 2); if (!pipefd) err("pipefd"); - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_parallel; cpu++) if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) err("pipe"); diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index a70ae10b5f62..7700cbfa3975 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -98,7 +98,7 @@ struct uffd_test_case_ops { }; typedef struct uffd_test_case_ops uffd_test_case_ops_t; -extern unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; +extern unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; extern char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; extern int uffd, uffd_flags, finished, *pipefd, test_type; extern bool map_shared; diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index 91174e9425cd..d6b57e5a2e1d 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -180,12 +180,12 @@ static void *background_thread(void *arg) static int stress(struct uffd_args *args) { unsigned long cpu; - pthread_t locking_threads[nr_cpus]; - pthread_t uffd_threads[nr_cpus]; - pthread_t background_threads[nr_cpus]; + pthread_t locking_threads[nr_parallel]; + pthread_t uffd_threads[nr_parallel]; + pthread_t background_threads[nr_parallel]; finished = 0; - for (cpu = 0; cpu < nr_cpus; cpu++) { + for (cpu = 0; cpu < nr_parallel; cpu++) { if (pthread_create(&locking_threads[cpu], &attr, locking_thread, (void *)cpu)) return 1; @@ -203,7 +203,7 @@ static int stress(struct uffd_args *args) background_thread, (void *)cpu)) return 1; } - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_parallel; cpu++) if (pthread_join(background_threads[cpu], NULL)) return 1; @@ -219,11 +219,11 @@ static int stress(struct uffd_args *args) uffd_test_ops->release_pages(area_src); finished = 1; - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_parallel; cpu++) if (pthread_join(locking_threads[cpu], NULL)) return 1; - for (cpu = 0; cpu < nr_cpus; cpu++) { + for (cpu = 0; cpu < nr_parallel; cpu++) { char c; if (bounces & BOUNCE_POLL) { if (write(pipefd[cpu*2+1], &c, 1) != 1) @@ -246,11 +246,11 @@ static int userfaultfd_stress(void) { void *area; unsigned long nr; - struct uffd_args args[nr_cpus]; + struct uffd_args args[nr_parallel]; uint64_t mem_size = nr_pages * page_size; int flags = 0; - memset(args, 0, sizeof(struct uffd_args) * nr_cpus); + memset(args, 0, sizeof(struct uffd_args) * nr_parallel); if (features & UFFD_FEATURE_WP_UNPOPULATED && test_type == TEST_ANON) flags = UFFD_FEATURE_WP_UNPOPULATED; @@ -325,7 +325,7 @@ static int userfaultfd_stress(void) */ uffd_test_ops->release_pages(area_dst); - uffd_stats_reset(args, nr_cpus); + uffd_stats_reset(args, nr_parallel); /* bounce pass */ if (stress(args)) { @@ -359,7 +359,7 @@ static int userfaultfd_stress(void) swap(area_src_alias, area_dst_alias); - uffd_stats_report(args, nr_cpus); + uffd_stats_report(args, nr_parallel); } uffd_test_ctx_clear(); @@ -453,9 +453,9 @@ int main(int argc, char **argv) return KSFT_SKIP; } - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + nr_parallel = sysconf(_SC_NPROCESSORS_ONLN); - nr_pages_per_cpu = bytes / page_size / nr_cpus; + nr_pages_per_cpu = bytes / page_size / nr_parallel; if (!nr_pages_per_cpu) { _err("invalid MiB"); usage(); @@ -466,7 +466,7 @@ int main(int argc, char **argv) _err("invalid bounces"); usage(); } - nr_pages = nr_pages_per_cpu * nr_cpus; + nr_pages = nr_pages_per_cpu * nr_parallel; printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", nr_pages, nr_pages_per_cpu); diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 74c8bc02b506..24ea82ee2231 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -198,7 +198,7 @@ uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test, nr_pages = UFFD_TEST_MEM_SIZE / page_size; /* TODO: remove this global var.. it's so ugly */ - nr_cpus = 1; + nr_parallel = 1; /* Initialize test arguments */ args->mem_type = mem_type; -- 2.50.1 From db0f1c138f18296e9c1c91619a0517c05ee50f1b Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:16 +0000 Subject: [PATCH 04/16] selftests/mm: print some details when uffd-stress gets bad params So this can be debugged more easily. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-5-dec210a658f5@google.com Signed-off-by: Brendan Jackman Cc: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/uffd-stress.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index d6b57e5a2e1d..4ba5bf13a010 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -457,7 +457,8 @@ int main(int argc, char **argv) nr_pages_per_cpu = bytes / page_size / nr_parallel; if (!nr_pages_per_cpu) { - _err("invalid MiB"); + _err("pages_per_cpu = 0, cannot test (%lu / %lu / %lu)", + bytes, page_size, nr_parallel); usage(); } -- 2.50.1 From bf6d575e24ee91f7ba8a752c0354bb00db1d3bf2 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:17 +0000 Subject: [PATCH 05/16] selftests/mm: don't fail uffd-stress if too many CPUs This calculation divides a fixed parameter by an environment-dependent parameter i.e. the number of CPUs. The simple way to avoid machine-specific failures here is to just put a cap on the max value of the latter. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-6-dec210a658f5@google.com Signed-off-by: Brendan Jackman Suggested-by: Mateusz Guzik Cc: Dev Jain Cc: Lorenzo Stoakes Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/uffd-stress.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index 4ba5bf13a010..40af7f67c407 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -435,6 +435,7 @@ static void sigalrm(int sig) int main(int argc, char **argv) { + unsigned long nr_cpus; size_t bytes; if (argc < 4) @@ -453,7 +454,15 @@ int main(int argc, char **argv) return KSFT_SKIP; } - nr_parallel = sysconf(_SC_NPROCESSORS_ONLN); + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (nr_cpus > 32) { + /* Don't let calculation below go to zero. */ + ksft_print_msg("_SC_NPROCESSORS_ONLN (%lu) too large, capping nr_threads to 32\n", + nr_cpus); + nr_parallel = 32; + } else { + nr_parallel = nr_cpus; + } nr_pages_per_cpu = bytes / page_size / nr_parallel; if (!nr_pages_per_cpu) { -- 2.50.1 From 571a4b62ed63cace383619b0b4ef0c7e012237e1 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:18 +0000 Subject: [PATCH 06/16] selftests/mm: skip map_populate on weird filesystems It seems that 9pfs does not allow truncating unlinked files, Mark Brown has noted that NFS may also behave this way. It doesn't seem quite right to call this a "bug" but it's probably a special enough case that it makes sense for the test to just SKIP if it happens. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-7-dec210a658f5@google.com Signed-off-by: Brendan Jackman Cc: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/map_populate.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/mm/map_populate.c b/tools/testing/selftests/mm/map_populate.c index 5c8a53869b1b..433e54fb634f 100644 --- a/tools/testing/selftests/mm/map_populate.c +++ b/tools/testing/selftests/mm/map_populate.c @@ -87,6 +87,13 @@ int main(int argc, char **argv) BUG_ON(!ftmp, "tmpfile()"); ret = ftruncate(fileno(ftmp), MMAP_SZ); + if (ret < 0 && errno == ENOENT) { + /* + * This probably means tmpfile() made a file on a filesystem + * that doesn't handle temporary files the way we want. + */ + ksft_exit_skip("ftruncate(fileno(tmpfile())) gave ENOENT, weird filesystem?\n"); + } BUG_ON(ret, "ftruncate()"); smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE, -- 2.50.1 From 32b42970e8614c0b8652fcd441acec937bc2595e Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:19 +0000 Subject: [PATCH 07/16] selftests/mm: skip gup_longterm tests on weird filesystems Some filesystems don't support ftruncate()ing unlinked files. They return ENOENT. In that case, skip the test. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-8-dec210a658f5@google.com Signed-off-by: Brendan Jackman Cc: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/gup_longterm.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 15335820656b..03271442aae5 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -96,7 +96,15 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) int ret; if (ftruncate(fd, size)) { - ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno)); + if (errno == ENOENT) { + /* + * This can happen if the file has been unlinked and the + * filesystem doesn't support truncating unlinked files. + */ + ksft_test_result_skip("ftruncate() failed with ENOENT\n"); + } else { + ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno)); + } return; } -- 2.50.1 From e9269b2cc403b7681980e7219cf2dc339fca8d38 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:20 +0000 Subject: [PATCH 08/16] selftests/mm: drop unnecessary sudo usage This script must be run as root anyway (see all the writing to privileged files in /proc etc). Remove the unnecessary use of sudo to avoid breaking on single-user systems that don't have sudo. This also avoids confusing readers. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-9-dec210a658f5@google.com Signed-off-by: Brendan Jackman Reviewed-by: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/run_vmtests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 4b5e45a10219..31a576d70b57 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -402,7 +402,7 @@ CATEGORY="madv_populate" run_test ./madv_populate if [ -x ./memfd_secret ] then -(echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix +(echo 0 > /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix CATEGORY="memfd_secret" run_test ./memfd_secret fi -- 2.50.1 From f896c6de833342bda61b8fe39f612023f7daf2a5 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:21 +0000 Subject: [PATCH 09/16] selftests/mm: ensure uffd-wp-mremap gets pages of each size This test allocates a page of every available size and doesn't have any SKIP logic if the allocation fails. So, ensure it's available and skip the test if we can't do so. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-10-dec210a658f5@google.com Signed-off-by: Brendan Jackman Cc: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/run_vmtests.sh | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 31a576d70b57..e1c20dcf8486 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -325,9 +325,30 @@ CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 3 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem-private 20 16 -CATEGORY="userfaultfd" run_test ./uffd-wp-mremap +# uffd-wp-mremap requires at least one page of each size. +have_all_size_hugepgs=true +declare -A nr_size_hugepgs +for f in /sys/kernel/mm/hugepages/**/nr_hugepages; do + old=$(cat $f) + nr_size_hugepgs["$f"]="$old" + if [ "$old" == 0 ]; then + echo 1 > "$f" + fi + if [ $(cat "$f") == 0 ]; then + have_all_size_hugepgs=false + break + fi +done +if $have_all_size_hugepgs; then + CATEGORY="userfaultfd" run_test ./uffd-wp-mremap +else + echo "# SKIP ./uffd-wp-mremap" +fi #cleanup +for f in "${!nr_size_hugepgs[@]}"; do + echo "${nr_size_hugepgs["$f"]}" > "$f" +done echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages CATEGORY="compaction" run_test ./compaction_test -- 2.50.1 From 5d2146a3354f8eeb1f9f9581ee9a40e0a9d2c714 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:22 +0000 Subject: [PATCH 10/16] selftests/mm: skip mlock tests if nobody user can't read it If running from a directory that can't be read by unprivileged users, executing on-fault-test via the nobody user will fail. The kselftest build does give the file the correct permissions, but after being installed it might be in a directory without global execute permissions. Since the script can't safely fix that, just skip if it happens. Note that the stderr of the `ls` command is unfiltered meaning the user sees a "permission denied" error that can help inform them why the test was skipped. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-11-dec210a658f5@google.com Signed-off-by: Brendan Jackman Cc: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/run_vmtests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index e1c20dcf8486..9aff33b10999 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -353,7 +353,7 @@ echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages CATEGORY="compaction" run_test ./compaction_test -if command -v sudo &> /dev/null; +if command -v sudo &> /dev/null && sudo -u nobody ls ./on-fault-limit >/dev/null; then CATEGORY="mlock" run_test sudo -u nobody ./on-fault-limit else -- 2.50.1 From 1ddae9d67ee11886f9a35b78ad837eb26559e9ab Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 11 Mar 2025 13:18:23 +0000 Subject: [PATCH 11/16] selftests/mm/mlock: print error on failure It's not really possible to start diagnosing this without knowing the actual error. Also update the mlock2 helper to behave like libc would by setting errno and returning -1. Link: https://lkml.kernel.org/r/20250311-mm-selftests-v4-12-dec210a658f5@google.com Signed-off-by: Brendan Jackman Cc: Dev Jain Cc: Lorenzo Stoakes Cc: Mateusz Guzik Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/mlock-random-test.c | 4 ++-- tools/testing/selftests/mm/mlock2.h | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/mm/mlock-random-test.c b/tools/testing/selftests/mm/mlock-random-test.c index 1cd80b0f76c3..b8d7e966f44c 100644 --- a/tools/testing/selftests/mm/mlock-random-test.c +++ b/tools/testing/selftests/mm/mlock-random-test.c @@ -161,9 +161,9 @@ static void test_mlock_within_limit(char *p, int alloc_size) MLOCK_ONFAULT); if (ret) - ksft_exit_fail_msg("%s() failure at |%p(%d)| mlock:|%p(%d)|\n", + ksft_exit_fail_msg("%s() failure (%s) at |%p(%d)| mlock:|%p(%d)|\n", is_mlock ? "mlock" : "mlock2", - p, alloc_size, + strerror(errno), p, alloc_size, p + start_offset, lock_size); } diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h index 4417eaa5cfb7..81e77fa41901 100644 --- a/tools/testing/selftests/mm/mlock2.h +++ b/tools/testing/selftests/mm/mlock2.h @@ -6,7 +6,13 @@ static int mlock2_(void *start, size_t len, int flags) { - return syscall(__NR_mlock2, start, len, flags); + int ret = syscall(__NR_mlock2, start, len, flags); + + if (ret) { + errno = ret; + return -1; + } + return 0; } static FILE *seek_to_smaps_entry(unsigned long addr) -- 2.50.1 From 43e9bbc3bb19893377364379e224c35db0256b88 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sun, 23 Feb 2025 00:08:48 +0800 Subject: [PATCH 12/16] mm, swap: remove setting SWAP_MAP_BAD for discard cluster Before alloc from a cluster, we will aqcuire cluster's lock and make sure it is usable by cluster_is_usable(), so there is no need to set SWAP_MAP_BAD for cluster to be discarded. Link: https://lkml.kernel.org/r/20250222160850.505274-5-shikemeng@huaweicloud.com Signed-off-by: Kemeng Shi Reviewed-by: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index cab68e57f4cc..80e4ad24fe53 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -479,15 +479,6 @@ static void move_cluster(struct swap_info_struct *si, static void swap_cluster_schedule_discard(struct swap_info_struct *si, struct swap_cluster_info *ci) { - unsigned int idx = cluster_index(si, ci); - /* - * If scan_swap_map_slots() can't find a free cluster, it will check - * si->swap_map directly. To make sure the discarding cluster isn't - * taken by scan_swap_map_slots(), mark the swap entries bad (occupied). - * It will be cleared after discard - */ - memset(si->swap_map + idx * SWAPFILE_CLUSTER, - SWAP_MAP_BAD, SWAPFILE_CLUSTER); VM_BUG_ON(ci->flags == CLUSTER_FLAG_FREE); move_cluster(si, ci, &si->discard_clusters, CLUSTER_FLAG_DISCARD); schedule_work(&si->discard_work); @@ -571,8 +562,6 @@ static bool swap_do_scheduled_discard(struct swap_info_struct *si) * return the cluster to allocation list. */ ci->flags = CLUSTER_FLAG_NONE; - memset(si->swap_map + idx * SWAPFILE_CLUSTER, - 0, SWAPFILE_CLUSTER); __free_cluster(si, ci); spin_unlock(&ci->lock); ret = true; -- 2.50.1 From 2310f0894225024397dfa193ccfc69b74366072e Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sun, 23 Feb 2025 00:08:49 +0800 Subject: [PATCH 13/16] mm, swap: correct comment in swap_usage_sub() We will add si back to plist in swap_usage_sub(), just correct the wrong comment which says we will remove si from plist in swap_usage_sub(). Link: https://lkml.kernel.org/r/20250222160850.505274-6-shikemeng@huaweicloud.com Signed-off-by: Kemeng Shi Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 80e4ad24fe53..dc9f93b66f69 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1098,7 +1098,7 @@ static void swap_usage_sub(struct swap_info_struct *si, unsigned int nr_entries) /* * If device is not full, and SWAP_USAGE_OFFLIST_BIT is set, - * remove it from the plist. + * add it to the plist. */ if (unlikely(val & SWAP_USAGE_OFFLIST_BIT)) add_to_avail_list(si, false); -- 2.50.1 From 0a8a5b6c4129e61070eb9a45979c395fb6ab31c4 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sun, 23 Feb 2025 00:08:50 +0800 Subject: [PATCH 14/16] mm: swap: remove stale comment of swap_reclaim_full_clusters() swap_reclaim_full_clusters() has no return value now, just remove the stale comment which says swap_reclaim_full_clusters() wil return a bool value. Link: https://lkml.kernel.org/r/20250222160850.505274-7-shikemeng@huaweicloud.com Signed-off-by: Kemeng Shi Cc: Kairui Song Signed-off-by: Andrew Morton --- mm/swapfile.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index dc9f93b66f69..a7f60006c52c 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -820,7 +820,6 @@ out: return found; } -/* Return true if reclaimed a whole cluster */ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force) { long to_scan = 1; -- 2.50.1 From 8e2f2aeb8b48aceef6e6c07b2d9bede4eaa50c06 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 21 Feb 2025 12:05:22 +0000 Subject: [PATCH 15/16] fs/proc/task_mmu: add guard region bit to pagemap Patch series "fs/proc/task_mmu: add guard region bit to pagemap". Currently there is no means of determining whether a given page in a mapping range is designated a guard region (as installed via madvise() using the MADV_GUARD_INSTALL flag). This is generally not an issue, but in some instances users may wish to determine whether this is the case. This series adds this ability via /proc/$pid/pagemap, updates the documentation and adds a self test to assert that this functions correctly. This patch (of 2): Currently there is no means by which users can determine whether a given page in memory is in fact a guard region, that is having had the MADV_GUARD_INSTALL madvise() flag applied to it. This is intentional, as to provide this information in VMA metadata would contradict the intent of the feature (providing a means to change fault behaviour at a page table level rather than a VMA level), and would require VMA metadata operations to scan page tables, which is unacceptable. In many cases, users have no need to reflect and determine what regions have been designated guard regions, as it is the user who has established them in the first place. But in some instances, such as monitoring software, or software that relies upon being able to ascertain the nature of mappings within a remote process for instance, it becomes useful to be able to determine which pages have the guard region marker applied. This patch makes use of an unused pagemap bit (58) to provide this information. This patch updates the documentation at the same time as making the change such that the implementation of the feature and the documentation of it are tied together. Link: https://lkml.kernel.org/r/cover.1740139449.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/521d99c08b975fb06a1e7201e971cc24d68196d1.1740139449.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: David Hildenbrand Cc: Jann Horn Cc: Jonathan Corbet Cc: Kalesh Singh Cc: Liam Howlett Cc: Matthew Wilcow (Oracle) Cc: "Paul E . McKenney" Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- Documentation/admin-guide/mm/pagemap.rst | 3 ++- fs/proc/task_mmu.c | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst index caba0f52dd36..a297e824f990 100644 --- a/Documentation/admin-guide/mm/pagemap.rst +++ b/Documentation/admin-guide/mm/pagemap.rst @@ -21,7 +21,8 @@ There are four components to pagemap: * Bit 56 page exclusively mapped (since 4.2) * Bit 57 pte is uffd-wp write-protected (since 5.13) (see Documentation/admin-guide/mm/userfaultfd.rst) - * Bits 58-60 zero + * Bit 58 pte is a guard region (since 6.15) (see madvise (2) man page) + * Bits 59-60 zero * Bit 61 page is file-page or shared-anon (since 3.5) * Bit 62 page swapped * Bit 63 page present diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f02cd362309a..c17615e21a5d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1632,6 +1632,7 @@ struct pagemapread { #define PM_SOFT_DIRTY BIT_ULL(55) #define PM_MMAP_EXCLUSIVE BIT_ULL(56) #define PM_UFFD_WP BIT_ULL(57) +#define PM_GUARD_REGION BIT_ULL(58) #define PM_FILE BIT_ULL(61) #define PM_SWAP BIT_ULL(62) #define PM_PRESENT BIT_ULL(63) @@ -1732,6 +1733,8 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, page = pfn_swap_entry_to_page(entry); if (pte_marker_entry_uffd_wp(entry)) flags |= PM_UFFD_WP; + if (is_guard_swp_entry(entry)) + flags |= PM_GUARD_REGION; } if (page) { @@ -1931,7 +1934,8 @@ static const struct mm_walk_ops pagemap_ops = { * Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst) * Bit 56 page exclusively mapped * Bit 57 pte is uffd-wp write-protected - * Bits 58-60 zero + * Bit 58 pte is a guard region + * Bits 59-60 zero * Bit 61 page is file-page or shared-anon * Bit 62 page swapped * Bit 63 page present -- 2.50.1 From f3b92176f4f7100f7e150975f0378f31ea5ce040 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 21 Feb 2025 12:05:23 +0000 Subject: [PATCH 16/16] tools/selftests: add guard region test for /proc/$pid/pagemap Add a test to the guard region self tests to assert that the /proc/$pid/pagemap information now made availabile to the user correctly identifies and reports guard regions. As a part of this change, update vm_util.h to add the new bit (note there is no header file in the kernel where this is exposed, the user is expected to provide their own mask) and utilise the helper functions there for pagemap functionality. [lorenzo.stoakes@oracle.com: fixup define name] Link: https://lkml.kernel.org/r/32e83941-e6f5-42ee-9292-a44c16463cf1@lucifer.local Link: https://lkml.kernel.org/r/164feb0a43ae72650e6b20c3910213f469566311.1740139449.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Cc: David Hildenbrand Cc: Jann Horn Cc: Jonathan Corbet Cc: Kalesh Singh Cc: Liam Howlett Cc: Matthew Wilcow (Oracle) Cc: "Paul E . McKenney" Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/guard-regions.c | 47 ++++++++++++++++++++++ tools/testing/selftests/mm/vm_util.h | 1 + 2 files changed, 48 insertions(+) diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index ea9b5815e828..280d1831bf73 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -19,6 +19,7 @@ #include #include #include +#include "vm_util.h" /* * Ignore the checkpatch warning, as per the C99 standard, section 7.14.1.1: @@ -2032,4 +2033,50 @@ TEST_F(guard_regions, anon_zeropage) ASSERT_EQ(munmap(ptr, 10 * page_size), 0); } +/* + * Assert that /proc/$pid/pagemap correctly identifies guard region ranges. + */ +TEST_F(guard_regions, pagemap) +{ + const unsigned long page_size = self->page_size; + int proc_fd; + char *ptr; + int i; + + proc_fd = open("/proc/self/pagemap", O_RDONLY); + ASSERT_NE(proc_fd, -1); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Read from pagemap, and assert no guard regions are detected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + unsigned long entry = pagemap_get_entry(proc_fd, ptr_p); + unsigned long masked = entry & PM_GUARD_REGION; + + ASSERT_EQ(masked, 0); + } + + /* Install a guard region in every other page. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Re-read from pagemap, and assert guard regions are detected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + unsigned long entry = pagemap_get_entry(proc_fd, ptr_p); + unsigned long masked = entry & PM_GUARD_REGION; + + ASSERT_EQ(masked, i % 2 == 0 ? PM_GUARD_REGION : 0); + } + + ASSERT_EQ(close(proc_fd), 0); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index b60ac68a9dc8..0e629586556b 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -10,6 +10,7 @@ #define PM_SOFT_DIRTY BIT_ULL(55) #define PM_MMAP_EXCLUSIVE BIT_ULL(56) #define PM_UFFD_WP BIT_ULL(57) +#define PM_GUARD_REGION BIT_ULL(58) #define PM_FILE BIT_ULL(61) #define PM_SWAP BIT_ULL(62) #define PM_PRESENT BIT_ULL(63) -- 2.50.1