From c616f36a1002a1364f3b0ab5c938d796d6096837 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Jan 2025 16:29:56 -0800 Subject: [PATCH 01/16] KVM: selftests: Use continue to handle all "pass" scenarios in dirty_log_test When verifying pages in dirty_log_test, immediately continue on all "pass" scenarios to make the logic consistent in how it handles pass vs. fail. No functional change intended. Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20250111003004.1235645-13-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/dirty_log_test.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 8544e8425f9c..d7cf1840bd80 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -510,8 +510,6 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) } if (__test_and_clear_bit_le(page, bmap)) { - bool matched; - nr_dirty_pages++; /* @@ -519,9 +517,10 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) * the corresponding page should be either the * previous iteration number or the current one. */ - matched = (val == iteration || val == iteration - 1); + if (val == iteration || val == iteration - 1) + continue; - if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) { + if (host_log_mode == LOG_MODE_DIRTY_RING) { if (val == iteration - 2 && min_iter <= iteration - 2) { /* * Short answer: this case is special @@ -567,10 +566,8 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) } } - TEST_ASSERT(matched, - "Set page %"PRIu64" value %"PRIu64 - " incorrect (iteration=%"PRIu64")", - page, val, iteration); + TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu)", + page, val, iteration); } else { nr_clean_pages++; /* -- 2.51.0 From 24b9a2a613779cf5a0c6f7629cb7a70e6a769838 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Jan 2025 16:29:57 -0800 Subject: [PATCH 02/16] KVM: selftests: Print (previous) last_page on dirty page value mismatch Print out the last dirty pages from the current and previous iteration on verification failures. In many cases, bugs (especially test bugs) occur on the edges, i.e. on or near the last pages, and being able to correlate failures with the last pages can aid in debug. Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20250111003004.1235645-14-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/dirty_log_test.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index d7cf1840bd80..fe8cc7f77e22 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -566,8 +566,10 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) } } - TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu)", - page, val, iteration); + TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu) " + "(last = %lu, prev_last = %lu)", + page, val, iteration, dirty_ring_last_page, + dirty_ring_prev_iteration_last_page); } else { nr_clean_pages++; /* @@ -590,9 +592,10 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) * value "iteration-1". */ TEST_ASSERT(val <= iteration, - "Clear page %"PRIu64" value %"PRIu64 - " incorrect (iteration=%"PRIu64")", - page, val, iteration); + "Clear page %lu value (%lu) > iteration (%lu) " + "(last = %lu, prev_last = %lu)", + page, val, iteration, dirty_ring_last_page, + dirty_ring_prev_iteration_last_page); if (val == iteration) { /* * This page is _just_ modified; it -- 2.51.0 From d0bd72cb916072f88d9eda5ee2a7f85a999af404 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Jan 2025 16:29:58 -0800 Subject: [PATCH 03/16] KVM: selftests: Collect *all* dirty entries in each dirty_log_test iteration Collect all dirty entries during each iteration of dirty_log_test by doing a final collection after the vCPU has been stopped. To deal with KVM's destructive approach to getting the dirty bitmaps, use a second bitmap for the post-stop collection. Collecting all entries that were dirtied during an iteration simplifies the verification logic *and* improves test coverage. - If a page is written during iteration X, but not seen as dirty until X+1, the test can get a false pass if the page is also written during X+1. - If a dirty page used a stale value from a previous iteration, the test would grant a false pass. - If a missed dirty log occurs in the last iteration, the test would fail to detect the issue. E.g. modifying mark_page_dirty_in_slot() to dirty an unwritten gfn: if (memslot && kvm_slot_dirty_track_enabled(memslot)) { unsigned long rel_gfn = gfn - memslot->base_gfn; u32 slot = (memslot->as_id << 16) | memslot->id; if (!vcpu->extra_dirty && gfn_to_memslot(kvm, gfn + 1) == memslot) { vcpu->extra_dirty = true; mark_page_dirty_in_slot(kvm, memslot, gfn + 1); } if (kvm->dirty_ring_size && vcpu) kvm_dirty_ring_push(vcpu, slot, rel_gfn); else if (memslot->dirty_bitmap) set_bit_le(rel_gfn, memslot->dirty_bitmap); } isn't detected with the current approach, even with an interval of 1ms (when running nested in a VM; bare metal would be even *less* likely to detect the bug due to the vCPU being able to dirty more memory). Whereas collecting all dirty entries consistently detects failures with an interval of 700ms or more (the longer interval means a higher probability of an actual write to the prematurely-dirtied page). Link: https://lore.kernel.org/r/20250111003004.1235645-15-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/dirty_log_test.c | 149 ++++++------------- 1 file changed, 45 insertions(+), 104 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index fe8cc7f77e22..3a4e411353d7 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -134,7 +134,6 @@ static uint64_t host_num_pages; /* For statistics only */ static uint64_t host_dirty_count; static uint64_t host_clear_count; -static uint64_t host_track_next_count; /* Whether dirty ring reset is requested, or finished */ static sem_t sem_vcpu_stop; @@ -422,15 +421,6 @@ struct log_mode { }, }; -/* - * We use this bitmap to track some pages that should have its dirty - * bit set in the _next_ iteration. For example, if we detected the - * page value changed to current iteration but at the same time the - * page bit is cleared in the latest bitmap, then the system must - * report that write in the next get dirty log call. - */ -static unsigned long *host_bmap_track; - static void log_modes_dump(void) { int i; @@ -491,79 +481,52 @@ static void *vcpu_worker(void *data) return NULL; } -static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) +static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap) { uint64_t page, nr_dirty_pages = 0, nr_clean_pages = 0; uint64_t step = vm_num_host_pages(mode, 1); - uint64_t min_iter = 0; for (page = 0; page < host_num_pages; page += step) { uint64_t val = *(uint64_t *)(host_test_mem + page * host_page_size); + bool bmap0_dirty = __test_and_clear_bit_le(page, bmap[0]); - /* If this is a special page that we were tracking... */ - if (__test_and_clear_bit_le(page, host_bmap_track)) { - host_track_next_count++; - TEST_ASSERT(test_bit_le(page, bmap), - "Page %"PRIu64" should have its dirty bit " - "set in this iteration but it is missing", - page); - } - - if (__test_and_clear_bit_le(page, bmap)) { + /* + * Ensure both bitmaps are cleared, as a page can be written + * multiple times per iteration, i.e. can show up in both + * bitmaps, and the dirty ring is additive, i.e. doesn't purge + * bitmap entries from previous collections. + */ + if (__test_and_clear_bit_le(page, bmap[1]) || bmap0_dirty) { nr_dirty_pages++; /* - * If the bit is set, the value written onto - * the corresponding page should be either the - * previous iteration number or the current one. + * If the page is dirty, the value written to memory + * should be the current iteration number. */ - if (val == iteration || val == iteration - 1) + if (val == iteration) continue; if (host_log_mode == LOG_MODE_DIRTY_RING) { - if (val == iteration - 2 && min_iter <= iteration - 2) { - /* - * Short answer: this case is special - * only for dirty ring test where the - * page is the last page before a kvm - * dirty ring full in iteration N-2. - * - * Long answer: Assuming ring size R, - * one possible condition is: - * - * main thr vcpu thr - * -------- -------- - * iter=1 - * write 1 to page 0~(R-1) - * full, vmexit - * collect 0~(R-1) - * kick vcpu - * write 1 to (R-1)~(2R-2) - * full, vmexit - * iter=2 - * collect (R-1)~(2R-2) - * kick vcpu - * write 1 to (2R-2) - * (NOTE!!! "1" cached in cpu reg) - * write 2 to (2R-1)~(3R-3) - * full, vmexit - * iter=3 - * collect (2R-2)~(3R-3) - * (here if we read value on page - * "2R-2" is 1, while iter=3!!!) - * - * This however can only happen once per iteration. - */ - min_iter = iteration - 1; - continue; - } else if (page == dirty_ring_last_page || - page == dirty_ring_prev_iteration_last_page) { - /* - * Please refer to comments in - * dirty_ring_last_page. - */ + /* + * The last page in the ring from this iteration + * or the previous can be written with the value + * from the previous iteration (relative to the + * last page's iteration), as the value to be + * written may be cached in a CPU register. + */ + if (page == dirty_ring_last_page || + page == dirty_ring_prev_iteration_last_page) continue; - } + } else if (!val && iteration == 1 && bmap0_dirty) { + /* + * When testing get+clear, the dirty bitmap + * starts with all bits set, and so the first + * iteration can observe a "dirty" page that + * was never written, but only in the first + * bitmap (collecting the bitmap also clears + * all dirty pages). + */ + continue; } TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu) " @@ -574,36 +537,13 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) nr_clean_pages++; /* * If cleared, the value written can be any - * value smaller or equals to the iteration - * number. Note that the value can be exactly - * (iteration-1) if that write can happen - * like this: - * - * (1) increase loop count to "iteration-1" - * (2) write to page P happens (with value - * "iteration-1") - * (3) get dirty log for "iteration-1"; we'll - * see that page P bit is set (dirtied), - * and not set the bit in host_bmap_track - * (4) increase loop count to "iteration" - * (which is current iteration) - * (5) get dirty log for current iteration, - * we'll see that page P is cleared, with - * value "iteration-1". + * value smaller than the iteration number. */ - TEST_ASSERT(val <= iteration, - "Clear page %lu value (%lu) > iteration (%lu) " + TEST_ASSERT(val < iteration, + "Clear page %lu value (%lu) >= iteration (%lu) " "(last = %lu, prev_last = %lu)", page, val, iteration, dirty_ring_last_page, dirty_ring_prev_iteration_last_page); - if (val == iteration) { - /* - * This page is _just_ modified; it - * should report its dirtyness in the - * next run - */ - __set_bit_le(page, host_bmap_track); - } } } @@ -639,7 +579,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct test_params *p = arg; struct kvm_vcpu *vcpu; struct kvm_vm *vm; - unsigned long *bmap; + unsigned long *bmap[2]; uint32_t ring_buf_idx = 0; int sem_val; @@ -695,8 +635,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - bmap = bitmap_zalloc(host_num_pages); - host_bmap_track = bitmap_zalloc(host_num_pages); + bmap[0] = bitmap_zalloc(host_num_pages); + bmap[1] = bitmap_zalloc(host_num_pages); /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, @@ -723,7 +663,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) WRITE_ONCE(host_quit, false); host_dirty_count = 0; host_clear_count = 0; - host_track_next_count = 0; WRITE_ONCE(dirty_ring_vcpu_ring_full, false); /* @@ -774,7 +713,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) continue; log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, - bmap, host_num_pages, + bmap[0], host_num_pages, &ring_buf_idx); } @@ -804,6 +743,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) * the flush of the last page, and since we handle the last * page specially verification will succeed anyway. */ + log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, + bmap[1], host_num_pages, + &ring_buf_idx); vm_dirty_log_verify(mode, bmap); /* @@ -824,12 +766,11 @@ static void run_test(enum vm_guest_mode mode, void *arg) pthread_join(vcpu_thread, NULL); - pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " - "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count, - host_track_next_count); + pr_info("Total bits checked: dirty (%lu), clear (%lu)\n", + host_dirty_count, host_clear_count); - free(bmap); - free(host_bmap_track); + free(bmap[0]); + free(bmap[1]); kvm_vm_free(vm); } -- 2.51.0 From 485e27ed208f0d9667608c0e3fa9440654e01399 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Jan 2025 16:29:59 -0800 Subject: [PATCH 04/16] KVM: sefltests: Verify value of dirty_log_test last page isn't bogus Add a sanity check that a completely garbage value wasn't written to the last dirty page in the ring, e.g. that it doesn't contain the *next* iteration's value. Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20250111003004.1235645-16-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/dirty_log_test.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 3a4e411353d7..500257b712e3 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -514,8 +514,9 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap) * last page's iteration), as the value to be * written may be cached in a CPU register. */ - if (page == dirty_ring_last_page || - page == dirty_ring_prev_iteration_last_page) + if ((page == dirty_ring_last_page || + page == dirty_ring_prev_iteration_last_page) && + val < iteration) continue; } else if (!val && iteration == 1 && bmap0_dirty) { /* -- 2.51.0 From 73eaa2aa14b73fde5a160785b49d99c975bc9609 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Jan 2025 16:30:00 -0800 Subject: [PATCH 05/16] KVM: selftests: Ensure guest writes min number of pages in dirty_log_test Ensure the vCPU fully completes at least one write in each dirty_log_test iteration, as failure to dirty any pages complicates verification and forces the test to be overly conservative about possible values. E.g. verification needs to allow the last dirty page from a previous iteration to have *any* value, because the vCPU could get stuck for multiple iterations, which is unlikely but can happen in heavily overloaded and/or nested virtualization setups. Somewhat arbitrarily set the minimum to 0x100/256; high enough to be interesting, but not so high as to lead to pointlessly long runtimes. Opportunistically report the number of writes per iteration for debug purposes, and so that a human can sanity check the test. Due to each write targeting a random page, the number of dirty pages will likely be lower than the number of total writes, but it shouldn't be absurdly lower (which would suggest the pRNG is broken) Reported-by: Maxim Levitsky Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20250111003004.1235645-17-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/dirty_log_test.c | 40 ++++++++++++++++++-- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 500257b712e3..c6b843ec8e0c 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -37,6 +37,12 @@ /* Interval for each host loop (ms) */ #define TEST_HOST_LOOP_INTERVAL 10UL +/* + * Ensure the vCPU is able to perform a reasonable number of writes in each + * iteration to provide a lower bound on coverage. + */ +#define TEST_MIN_WRITES_PER_ITERATION 0x100 + /* Dirty bitmaps are always little endian, so we need to swap on big endian */ #if defined(__s390x__) # define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) @@ -72,6 +78,7 @@ static uint64_t host_page_size; static uint64_t guest_page_size; static uint64_t guest_num_pages; static uint64_t iteration; +static uint64_t nr_writes; static bool vcpu_stop; /* @@ -107,6 +114,7 @@ static void guest_code(void) for (i = 0; i < guest_num_pages; i++) { addr = guest_test_virt_mem + i * guest_page_size; vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); + nr_writes++; } #endif @@ -118,6 +126,7 @@ static void guest_code(void) addr = align_down(addr, host_page_size); vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); + nr_writes++; } GUEST_SYNC(1); @@ -548,8 +557,8 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap) } } - pr_info("Iteration %2ld: dirty: %-6lu clean: %-6lu\n", - iteration, nr_dirty_pages, nr_clean_pages); + pr_info("Iteration %2ld: dirty: %-6lu clean: %-6lu writes: %-6lu\n", + iteration, nr_dirty_pages, nr_clean_pages, nr_writes); host_dirty_count += nr_dirty_pages; host_clear_count += nr_clean_pages; @@ -665,6 +674,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) host_dirty_count = 0; host_clear_count = 0; WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + WRITE_ONCE(nr_writes, 0); + sync_global_to_guest(vm, nr_writes); /* * Ensure the previous iteration didn't leave a dangling semaphore, i.e. @@ -683,10 +694,22 @@ static void run_test(enum vm_guest_mode mode, void *arg) dirty_ring_prev_iteration_last_page = dirty_ring_last_page; - /* Give the vcpu thread some time to dirty some pages */ - for (i = 0; i < p->interval; i++) { + /* + * Let the vCPU run beyond the configured interval until it has + * performed the minimum number of writes. This verifies the + * guest is making forward progress, e.g. isn't stuck because + * of a KVM bug, and puts a firm floor on test coverage. + */ + for (i = 0; i < p->interval || nr_writes < TEST_MIN_WRITES_PER_ITERATION; i++) { + /* + * Sleep in 1ms chunks to keep the interval math simple + * and so that the test doesn't run too far beyond the + * specified interval. + */ usleep(1000); + sync_global_from_guest(vm, nr_writes); + /* * Reap dirty pages while the guest is running so that * dirty ring full events are resolved, i.e. so that a @@ -737,6 +760,12 @@ static void run_test(enum vm_guest_mode mode, void *arg) WRITE_ONCE(vcpu_stop, false); sync_global_to_guest(vm, vcpu_stop); + /* + * Sync the number of writes performed before verification, the + * info will be printed along with the dirty/clean page counts. + */ + sync_global_from_guest(vm, nr_writes); + /* * NOTE: for dirty ring, it's possible that we didn't stop at * GUEST_SYNC but instead we stopped because ring is full; @@ -760,6 +789,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) WRITE_ONCE(host_quit, true); sync_global_to_guest(vm, iteration); + WRITE_ONCE(nr_writes, 0); + sync_global_to_guest(vm, nr_writes); + WRITE_ONCE(dirty_ring_vcpu_ring_full, false); sem_post(&sem_vcpu_cont); -- 2.51.0 From 2020d3b77a5a40a133e18fb4c870e9fe384405fd Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Jan 2025 16:30:01 -0800 Subject: [PATCH 06/16] KVM: selftests: Tighten checks around prev iter's last dirty page in ring Now that each iteration collects all dirty entries and ensures the guest *completes* at least one write, tighten the exemptions for the last dirty page of the previous iteration. Specifically, the only legal value (other than the current iteration) is N-1. Unlike the last page for the current iteration, the in-progress write from the previous iteration is guaranteed to have completed, otherwise the test would have hung. Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20250111003004.1235645-18-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/dirty_log_test.c | 22 +++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index c6b843ec8e0c..e9854b5a28f1 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -517,14 +517,22 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap) if (host_log_mode == LOG_MODE_DIRTY_RING) { /* - * The last page in the ring from this iteration - * or the previous can be written with the value - * from the previous iteration (relative to the - * last page's iteration), as the value to be - * written may be cached in a CPU register. + * The last page in the ring from previous + * iteration can be written with the value + * from the previous iteration, as the value to + * be written may be cached in a CPU register. */ - if ((page == dirty_ring_last_page || - page == dirty_ring_prev_iteration_last_page) && + if (page == dirty_ring_prev_iteration_last_page && + val == iteration - 1) + continue; + + /* + * Any value from a previous iteration is legal + * for the last entry, as the write may not yet + * have retired, i.e. the page may hold whatever + * it had before this iteration started. + */ + if (page == dirty_ring_last_page && val < iteration) continue; } else if (!val && iteration == 1 && bmap0_dirty) { -- 2.51.0 From 2680dcfb34e2c9ac88bff4e01d7bf366aa9976c0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Jan 2025 16:30:02 -0800 Subject: [PATCH 07/16] KVM: selftests: Set per-iteration variables at the start of each iteration Set the per-iteration variables at the start of each iteration instead of setting them before the loop, and at the end of each iteration. To ensure the vCPU doesn't race ahead before the first iteration, simply have the vCPU worker want for sem_vcpu_cont, which conveniently avoids the need to special case posting sem_vcpu_cont from the loop. Link: https://lore.kernel.org/r/20250111003004.1235645-19-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/dirty_log_test.c | 43 ++++++++------------ 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index e9854b5a28f1..40567257ebea 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -481,6 +481,8 @@ static void *vcpu_worker(void *data) { struct kvm_vcpu *vcpu = data; + sem_wait(&sem_vcpu_cont); + while (!READ_ONCE(host_quit)) { /* Let the guest dirty the random pages */ vcpu_run(vcpu); @@ -675,15 +677,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) sync_global_to_guest(vm, guest_test_virt_mem); sync_global_to_guest(vm, guest_num_pages); - /* Start the iterations */ - iteration = 1; - sync_global_to_guest(vm, iteration); - WRITE_ONCE(host_quit, false); host_dirty_count = 0; host_clear_count = 0; - WRITE_ONCE(dirty_ring_vcpu_ring_full, false); - WRITE_ONCE(nr_writes, 0); - sync_global_to_guest(vm, nr_writes); + WRITE_ONCE(host_quit, false); /* * Ensure the previous iteration didn't leave a dangling semaphore, i.e. @@ -695,12 +691,22 @@ static void run_test(enum vm_guest_mode mode, void *arg) sem_getvalue(&sem_vcpu_cont, &sem_val); TEST_ASSERT_EQ(sem_val, 0); + TEST_ASSERT_EQ(vcpu_stop, false); + pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); - while (iteration < p->iterations) { + for (iteration = 1; iteration < p->iterations; iteration++) { unsigned long i; + sync_global_to_guest(vm, iteration); + + WRITE_ONCE(nr_writes, 0); + sync_global_to_guest(vm, nr_writes); + dirty_ring_prev_iteration_last_page = dirty_ring_last_page; + WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + + sem_post(&sem_vcpu_cont); /* * Let the vCPU run beyond the configured interval until it has @@ -785,26 +791,11 @@ static void run_test(enum vm_guest_mode mode, void *arg) bmap[1], host_num_pages, &ring_buf_idx); vm_dirty_log_verify(mode, bmap); - - /* - * Set host_quit before sem_vcpu_cont in the final iteration to - * ensure that the vCPU worker doesn't resume the guest. As - * above, the dirty ring test may stop and wait even when not - * explicitly request to do so, i.e. would hang waiting for a - * "continue" if it's allowed to resume the guest. - */ - if (++iteration == p->iterations) - WRITE_ONCE(host_quit, true); - sync_global_to_guest(vm, iteration); - - WRITE_ONCE(nr_writes, 0); - sync_global_to_guest(vm, nr_writes); - - WRITE_ONCE(dirty_ring_vcpu_ring_full, false); - - sem_post(&sem_vcpu_cont); } + WRITE_ONCE(host_quit, true); + sem_post(&sem_vcpu_cont); + pthread_join(vcpu_thread, NULL); pr_info("Total bits checked: dirty (%lu), clear (%lu)\n", -- 2.51.0 From 7f225650e0991c7b9fdfc1524e0381ba61039730 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Jan 2025 16:30:03 -0800 Subject: [PATCH 08/16] KVM: selftests: Fix an off-by-one in the number of dirty_log_test iterations Actually run all requested iterations, instead of iterations-1 (the count starts at '1' due to the need to avoid '0' as an in-memory value for a dirty page). Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20250111003004.1235645-20-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/dirty_log_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 40567257ebea..79a7ee189f28 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -695,7 +695,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); - for (iteration = 1; iteration < p->iterations; iteration++) { + for (iteration = 1; iteration <= p->iterations; iteration++) { unsigned long i; sync_global_to_guest(vm, iteration); -- 2.51.0 From dae7d81e8d5819bb47d63918c98539d9666a45d1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Jan 2025 16:30:04 -0800 Subject: [PATCH 09/16] KVM: selftests: Allow running a single iteration of dirty_log_test Now that dirty_log_test doesn't require running multiple iterations to verify dirty pages, and actually runs the requested number of iterations, drop the requirement that the test run at least "3" (which was really "2" at the time the test was written) iterations. Link: https://lore.kernel.org/r/20250111003004.1235645-21-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/dirty_log_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 79a7ee189f28..23593d9eeba9 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -886,7 +886,7 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two"); + TEST_ASSERT(p.iterations > 0, "Iterations must be greater than zero"); TEST_ASSERT(p.interval > 0, "Interval must be greater than zero"); pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", -- 2.51.0 From fd546aba1967bb05ddb569272f56abb75f604bd7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Jan 2025 16:50:41 -0800 Subject: [PATCH 10/16] KVM: selftests: Fix mostly theoretical leak of VM's binary stats FD When allocating and freeing a VM's cached binary stats info, check for a NULL descriptor, not a '0' file descriptor, as '0' is a legal FD. E.g. in the unlikely scenario the kernel installs the stats FD at entry '0', selftests would reallocate on the next __vm_get_stat() and/or fail to free the stats in kvm_vm_free(). Fixes: 83f6e109f562 ("KVM: selftests: Cache binary stats metadata for duration of test") Link: https://lore.kernel.org/r/20250111005049.1247555-2-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/kvm_util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 33fefeb3ca44..91d295ef5d02 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -749,7 +749,7 @@ void kvm_vm_free(struct kvm_vm *vmp) return; /* Free cached stats metadata and close FD */ - if (vmp->stats_fd) { + if (vmp->stats_desc) { free(vmp->stats_desc); close(vmp->stats_fd); } @@ -2218,7 +2218,7 @@ void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, size_t size_desc; int i; - if (!vm->stats_fd) { + if (!vm->stats_desc) { vm->stats_fd = vm_get_stats_fd(vm); read_stats_header(vm->stats_fd, &vm->stats_header); vm->stats_desc = read_stats_descriptors(vm->stats_fd, -- 2.51.0 From f7f232a01f3d26a5bd67a4309ea54640f625dbe5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Jan 2025 16:50:42 -0800 Subject: [PATCH 11/16] KVM: selftests: Close VM's binary stats FD when releasing VM Close/free a VM's binary stats cache when the VM is released, not when the VM is fully freed. When a VM is re-created, e.g. for state save/restore tests, the stats FD and descriptor points at the old, defunct VM. The FD is still valid, in that the underlying stats file won't be freed until the FD is closed, but reading stats will always pull information from the old VM. Note, this is a benign bug in the current code base as none of the tests that recreate VMs use binary stats. Fixes: 83f6e109f562 ("KVM: selftests: Cache binary stats metadata for duration of test") Link: https://lore.kernel.org/r/20250111005049.1247555-3-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/kvm_util.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 91d295ef5d02..9138801ecb60 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -709,6 +709,15 @@ void kvm_vm_release(struct kvm_vm *vmp) ret = close(vmp->kvm_fd); TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + + /* Free cached stats metadata and close FD */ + if (vmp->stats_desc) { + free(vmp->stats_desc); + vmp->stats_desc = NULL; + + ret = close(vmp->stats_fd); + TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + } } static void __vm_mem_region_delete(struct kvm_vm *vm, @@ -748,12 +757,6 @@ void kvm_vm_free(struct kvm_vm *vmp) if (vmp == NULL) return; - /* Free cached stats metadata and close FD */ - if (vmp->stats_desc) { - free(vmp->stats_desc); - close(vmp->stats_fd); - } - /* Free userspace_mem_regions. */ hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) __vm_mem_region_delete(vmp, region); -- 2.51.0 From eead13d493af0c2d3b8025da4acb2a9ef854a26a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Jan 2025 16:50:43 -0800 Subject: [PATCH 12/16] KVM: selftests: Assert that __vm_get_stat() actually finds a stat Fail the test if it attempts to read a stat that doesn't exist, e.g. due to a typo (hooray, strings), or because the test tried to get a stat for the wrong scope. As is, there's no indiciation of failure and @data is left untouched, e.g. holds '0' or random stack data in most cases. Fixes: 8448ec5993be ("KVM: selftests: Add NX huge pages test") Link: https://lore.kernel.org/r/20250111005049.1247555-4-seanjc@google.com [sean: fixup spelling mistake, courtesy of Colin Ian King] Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/kvm_util.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 9138801ecb60..e3cb3ee74491 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -2238,9 +2238,10 @@ void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, read_stat_data(vm->stats_fd, &vm->stats_header, desc, data, max_elements); - - break; + return; } + + TEST_FAIL("Unable to find stat '%s'", stat_name); } __weak void kvm_arch_vm_post_create(struct kvm_vm *vm) -- 2.51.0 From b0c3f5df92913de6b9936cb0707cf1c8cdad66a2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Jan 2025 16:50:44 -0800 Subject: [PATCH 13/16] KVM: selftests: Macrofy vm_get_stat() to auto-generate stat name string Turn vm_get_stat() into a macro that generates a string for the stat name, as opposed to taking a string. This will allow hardening stat usage in the future to generate errors on unknown stats at compile time. No functional change intended. Link: https://lore.kernel.org/r/20250111005049.1247555-5-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/kvm_util.h | 14 +++++++------- .../kvm/x86/dirty_log_page_splitting_test.c | 6 +++--- .../testing/selftests/kvm/x86/nx_huge_pages_test.c | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 4c4e5a847f67..044c2231431e 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -534,13 +534,13 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header, void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, size_t max_elements); -static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) -{ - uint64_t data; - - __vm_get_stat(vm, stat_name, &data, 1); - return data; -} +#define vm_get_stat(vm, stat) \ +({ \ + uint64_t data; \ + \ + __vm_get_stat(vm, #stat, &data, 1); \ + data; \ +}) void vm_create_irqchip(struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c index 2929c067c207..b0d2b04a7ff2 100644 --- a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c +++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c @@ -41,9 +41,9 @@ struct kvm_page_stats { static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage) { - stats->pages_4k = vm_get_stat(vm, "pages_4k"); - stats->pages_2m = vm_get_stat(vm, "pages_2m"); - stats->pages_1g = vm_get_stat(vm, "pages_1g"); + stats->pages_4k = vm_get_stat(vm, pages_4k); + stats->pages_2m = vm_get_stat(vm, pages_2m); + stats->pages_1g = vm_get_stat(vm, pages_1g); stats->hugepages = stats->pages_2m + stats->pages_1g; pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n", diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c index e7efb2b35f8b..c0d84827f736 100644 --- a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c @@ -73,7 +73,7 @@ static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m) { int actual_pages_2m; - actual_pages_2m = vm_get_stat(vm, "pages_2m"); + actual_pages_2m = vm_get_stat(vm, pages_2m); TEST_ASSERT(actual_pages_2m == expected_pages_2m, "Unexpected 2m page count. Expected %d, got %d", @@ -84,7 +84,7 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits) { int actual_splits; - actual_splits = vm_get_stat(vm, "nx_lpage_splits"); + actual_splits = vm_get_stat(vm, nx_lpage_splits); TEST_ASSERT(actual_splits == expected_splits, "Unexpected NX huge page split count. Expected %d, got %d", -- 2.51.0 From e65faf71bd54ac957aed3c2e81a964fea63f3e82 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Jan 2025 16:50:45 -0800 Subject: [PATCH 14/16] KVM: selftests: Add struct and helpers to wrap binary stats cache Add a struct and helpers to manage the binary stats cache, which is currently used only for VM-scoped stats. This will allow expanding the selftests infrastructure to provide support for vCPU-scoped binary stats, which, except for the ioctl to get the stats FD are identical to VM-scoped stats. Defer converting __vm_get_stat() to a scope-agnostic helper to a future patch, as getting the stats FD from KVM needs to be moved elsewhere before it can be made completely scope-agnostic. Link: https://lore.kernel.org/r/20250111005049.1247555-6-seanjc@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/include/kvm_util.h | 11 +++-- tools/testing/selftests/kvm/lib/kvm_util.c | 47 +++++++++++-------- 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 044c2231431e..9a64bab42f89 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -46,6 +46,12 @@ struct userspace_mem_region { struct hlist_node slot_node; }; +struct kvm_binary_stats { + int fd; + struct kvm_stats_header header; + struct kvm_stats_desc *desc; +}; + struct kvm_vcpu { struct list_head list; uint32_t id; @@ -99,10 +105,7 @@ struct kvm_vm { struct kvm_vm_arch arch; - /* Cache of information for binary stats interface */ - int stats_fd; - struct kvm_stats_header stats_header; - struct kvm_stats_desc *stats_desc; + struct kvm_binary_stats stats; /* * KVM region slots. These are the default memslots used by page diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index e3cb3ee74491..6729268c8160 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -657,6 +657,20 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) return NULL; } +static void kvm_stats_release(struct kvm_binary_stats *stats) +{ + int ret; + + if (!stats->desc) + return; + + free(stats->desc); + stats->desc = NULL; + + ret = close(stats->fd); + TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); +} + __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) { @@ -711,13 +725,7 @@ void kvm_vm_release(struct kvm_vm *vmp) TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); /* Free cached stats metadata and close FD */ - if (vmp->stats_desc) { - free(vmp->stats_desc); - vmp->stats_desc = NULL; - - ret = close(vmp->stats_fd); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); - } + kvm_stats_release(&vmp->stats); } static void __vm_mem_region_delete(struct kvm_vm *vm, @@ -2214,34 +2222,33 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header, * * Read the data values of a specified stat from the binary stats interface. */ -void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, +void __vm_get_stat(struct kvm_vm *vm, const char *name, uint64_t *data, size_t max_elements) { + struct kvm_binary_stats *stats = &vm->stats; struct kvm_stats_desc *desc; size_t size_desc; int i; - if (!vm->stats_desc) { - vm->stats_fd = vm_get_stats_fd(vm); - read_stats_header(vm->stats_fd, &vm->stats_header); - vm->stats_desc = read_stats_descriptors(vm->stats_fd, - &vm->stats_header); + if (!stats->desc) { + stats->fd = vm_get_stats_fd(vm); + read_stats_header(stats->fd, &stats->header); + stats->desc = read_stats_descriptors(stats->fd, &stats->header); } - size_desc = get_stats_descriptor_size(&vm->stats_header); + size_desc = get_stats_descriptor_size(&stats->header); - for (i = 0; i < vm->stats_header.num_desc; ++i) { - desc = (void *)vm->stats_desc + (i * size_desc); + for (i = 0; i < stats->header.num_desc; ++i) { + desc = (void *)stats->desc + (i * size_desc); - if (strcmp(desc->name, stat_name)) + if (strcmp(desc->name, name)) continue; - read_stat_data(vm->stats_fd, &vm->stats_header, desc, - data, max_elements); + read_stat_data(stats->fd, &stats->header, desc, data, max_elements); return; } - TEST_FAIL("Unable to find stat '%s'", stat_name); + TEST_FAIL("Unable to find stat '%s'", name); } __weak void kvm_arch_vm_post_create(struct kvm_vm *vm) -- 2.51.0 From ea7179f99514f8119c6327d7c3b84a439a273533 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Jan 2025 16:50:46 -0800 Subject: [PATCH 15/16] KVM: selftests: Get VM's binary stats FD when opening VM Get and cache a VM's binary stats FD when the VM is opened, as opposed to waiting until the stats are first used. Opening the stats FD outside of __vm_get_stat() will allow converting it to a scope-agnostic helper. Note, this doesn't interfere with kvm_binary_stats_test's testcase that verifies a stats FD can be used after its own VM's FD is closed, as the cached FD is also closed during kvm_vm_free(). Link: https://lore.kernel.org/r/20250111005049.1247555-7-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/kvm_util.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 6729268c8160..8cacb7fd1ad9 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -196,6 +196,11 @@ static void vm_open(struct kvm_vm *vm) vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); + + if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD)) + vm->stats.fd = vm_get_stats_fd(vm); + else + vm->stats.fd = -1; } const char *vm_guest_mode_string(uint32_t i) @@ -661,14 +666,17 @@ static void kvm_stats_release(struct kvm_binary_stats *stats) { int ret; - if (!stats->desc) + if (stats->fd < 0) return; - free(stats->desc); - stats->desc = NULL; + if (stats->desc) { + free(stats->desc); + stats->desc = NULL; + } ret = close(stats->fd); TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + stats->fd = -1; } __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) @@ -2231,7 +2239,6 @@ void __vm_get_stat(struct kvm_vm *vm, const char *name, uint64_t *data, int i; if (!stats->desc) { - stats->fd = vm_get_stats_fd(vm); read_stats_header(stats->fd, &stats->header); stats->desc = read_stats_descriptors(stats->fd, &stats->header); } -- 2.51.0 From 9b56532b8a593c3a3cbbd8b17fb87ffb4beee436 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 10 Jan 2025 16:50:47 -0800 Subject: [PATCH 16/16] KVM: selftests: Adjust number of files rlimit for all "standard" VMs Move the max vCPUs test's RLIMIT_NOFILE adjustments to common code, and use the new helper to adjust the resource limit for non-barebones VMs by default. x86's recalc_apic_map_test creates 512 vCPUs, and a future change will open the binary stats fd for all vCPUs, which will put the recalc APIC test above some distros' default limit of 1024. Link: https://lore.kernel.org/r/20250111005049.1247555-8-seanjc@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/include/kvm_util.h | 2 ++ .../selftests/kvm/kvm_create_max_vcpus.c | 28 +-------------- tools/testing/selftests/kvm/lib/kvm_util.c | 34 +++++++++++++++++++ 3 files changed, 37 insertions(+), 27 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 9a64bab42f89..d4670b5962ab 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -966,6 +966,8 @@ static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); +void kvm_set_files_rlimit(uint32_t nr_vcpus); + void kvm_pin_this_task_to_pcpu(uint32_t pcpu); void kvm_print_vcpu_pinning_help(void); void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index c78f34699f73..c5310736ed06 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -10,7 +10,6 @@ #include #include #include -#include #include "test_util.h" @@ -39,36 +38,11 @@ int main(int argc, char *argv[]) { int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID); int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); - /* - * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds + - * an arbitrary number for everything else. - */ - int nr_fds_wanted = kvm_max_vcpus + 100; - struct rlimit rl; pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id); pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus); - /* - * Check that we're allowed to open nr_fds_wanted file descriptors and - * try raising the limits if needed. - */ - TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); - - if (rl.rlim_cur < nr_fds_wanted) { - rl.rlim_cur = nr_fds_wanted; - if (rl.rlim_max < nr_fds_wanted) { - int old_rlim_max = rl.rlim_max; - rl.rlim_max = nr_fds_wanted; - - int r = setrlimit(RLIMIT_NOFILE, &rl); - __TEST_REQUIRE(r >= 0, - "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)", - old_rlim_max, nr_fds_wanted); - } else { - TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); - } - } + kvm_set_files_rlimit(kvm_max_vcpus); /* * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID. diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 8cacb7fd1ad9..faeeb576be90 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -411,6 +412,37 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, return vm_adjust_num_guest_pages(mode, nr_pages); } +void kvm_set_files_rlimit(uint32_t nr_vcpus) +{ + /* + * Number of file descriptors required, nr_vpucs vCPU fds + an arbitrary + * number for everything else. + */ + int nr_fds_wanted = nr_vcpus + 100; + struct rlimit rl; + + /* + * Check that we're allowed to open nr_fds_wanted file descriptors and + * try raising the limits if needed. + */ + TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); + + if (rl.rlim_cur < nr_fds_wanted) { + rl.rlim_cur = nr_fds_wanted; + if (rl.rlim_max < nr_fds_wanted) { + int old_rlim_max = rl.rlim_max; + + rl.rlim_max = nr_fds_wanted; + __TEST_REQUIRE(setrlimit(RLIMIT_NOFILE, &rl) >= 0, + "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)", + old_rlim_max, nr_fds_wanted); + } else { + TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); + } + } + +} + struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, uint64_t nr_extra_pages) { @@ -420,6 +452,8 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, struct kvm_vm *vm; int i; + kvm_set_files_rlimit(nr_runnable_vcpus); + pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__, vm_guest_mode_string(shape.mode), shape.type, nr_pages); -- 2.51.0