mm: Introduce for_each_valid_pfn() and use it from reserve_bootmem_region()
authorDavid Woodhouse <dwmw@amazon.co.uk>
Wed, 2 Apr 2025 17:02:50 +0000 (18:02 +0100)
committerDavid Woodhouse <dwmw@amazon.co.uk>
Fri, 4 Apr 2025 13:45:08 +0000 (14:45 +0100)
Especially since commit 9092d4f7a1f8 ("memblock: update initialization
of reserved pages"), the reserve_bootmem_region() function can spend a
significant amount of time iterating over every 4KiB PFN in a range,
calling pfn_valid() on each one, and ultimately doing absolutely nothing.

On a platform used for virtualization, with large NOMAP regions that
eventually get used for guest RAM, this leads to a significant increase
in steal time experienced during kexec for a live update.

Introduce for_each_valid_pfn() and use it from reserve_bootmem_region().
This implementation is precisely the same naïve loop that the function
used to have, but subsequent commits will provide optimised versions
for FLATMEM and SPARSEMEM, and this version will remain for those
architectures which provide their own pfn_valid() implementation,
until/unless they also provide a matching for_each_valid_pfn().

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
include/linux/mmzone.h
mm/mm_init.c

index 25e80b2ca7f41aa26d2c625590b4e4272fc6000e..32ecb5cadbaf440c074a2833829330409c221224 100644 (file)
@@ -2176,6 +2176,16 @@ void sparse_init(void);
 #define subsection_map_init(_pfn, _nr_pages) do {} while (0)
 #endif /* CONFIG_SPARSEMEM */
 
+/*
+ * Fallback case for when the architecture provides its own pfn_valid() but
+ * not a corresponding for_each_valid_pfn().
+ */
+#ifndef for_each_valid_pfn
+#define for_each_valid_pfn(_pfn, _start_pfn, _end_pfn)                 \
+       for ((_pfn) = (_start_pfn); (_pfn) < (_end_pfn); (_pfn)++)      \
+               if (pfn_valid(_pfn))
+#endif
+
 #endif /* !__GENERATING_BOUNDS.H */
 #endif /* !__ASSEMBLY__ */
 #endif /* _LINUX_MMZONE_H */
index a38a1909b407de6c3f7033b7edca9078c73d928a..7c699bad42ad0b7d49f3b05abf81cb1f1b5c7fe5 100644 (file)
@@ -777,22 +777,19 @@ static inline void init_deferred_page(unsigned long pfn, int nid)
 void __meminit reserve_bootmem_region(phys_addr_t start,
                                      phys_addr_t end, int nid)
 {
-       unsigned long start_pfn = PFN_DOWN(start);
-       unsigned long end_pfn = PFN_UP(end);
+       unsigned long pfn;
 
-       for (; start_pfn < end_pfn; start_pfn++) {
-               if (pfn_valid(start_pfn)) {
-                       struct page *page = pfn_to_page(start_pfn);
+       for_each_valid_pfn (pfn, PFN_DOWN(start), PFN_UP(end)) {
+               struct page *page = pfn_to_page(pfn);
 
-                       init_deferred_page(start_pfn, nid);
+               init_deferred_page(pfn, nid);
 
-                       /*
-                        * no need for atomic set_bit because the struct
-                        * page is not visible yet so nobody should
-                        * access it yet.
-                        */
-                       __SetPageReserved(page);
-               }
+               /*
+                * no need for atomic set_bit because the struct
+                * page is not visible yet so nobody should
+                * access it yet.
+                */
+               __SetPageReserved(page);
        }
 }