From ae8fd5b6666b0d99f485748b2b2259de1a7458dc Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 5 Mar 2025 14:27:31 -0800 Subject: [PATCH 01/16] mm/damon/sysfs-schemes: return error when for attempts to install filters on wrong sysfs directory Return error if the user tries to install a DAMOS filter on DAMOS filters sysfs directory that assumed to be used for filters that handled by a DAMON layer that not same to that for the installing filter. Link: https://lkml.kernel.org/r/20250305222733.59089-7-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index e4e36c34ec0e..1895d2d2c295 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -362,6 +362,23 @@ static ssize_t type_show(struct kobject *kobj, damon_sysfs_scheme_filter_type_strs[filter->type]); } +static bool damos_sysfs_scheme_filter_valid_type( + enum damos_sysfs_filter_handle_layer layer, + enum damos_filter_type type) +{ + switch (layer) { + case DAMOS_SYSFS_FILTER_HANDLE_LAYER_BOTH: + return true; + case DAMOS_SYSFS_FILTER_HANDLE_LAYER_CORE: + return !damos_filter_for_ops(type); + case DAMOS_SYSFS_FILTER_HANDLE_LAYER_OPS: + return damos_filter_for_ops(type); + default: + break; + } + return false; +} + static ssize_t type_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { @@ -373,6 +390,9 @@ static ssize_t type_store(struct kobject *kobj, for (type = 0; type < NR_DAMOS_FILTER_TYPES; type++) { if (sysfs_streq(buf, damon_sysfs_scheme_filter_type_strs[ type])) { + if (!damos_sysfs_scheme_filter_valid_type( + filter->handle_layer, type)) + break; filter->type = type; ret = count; break; -- 2.50.1 From 899e4c14afa640b8f7374e162f2336b67f07123d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 5 Mar 2025 14:27:32 -0800 Subject: [PATCH 02/16] Docs/ABI/damon: document {core,ops}_filters directories Document the new DAMOS filters sysfs directories on ABI doc. Link: https://lkml.kernel.org/r/20250305222733.59089-8-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- Documentation/ABI/testing/sysfs-kernel-mm-damon | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-damon b/Documentation/ABI/testing/sysfs-kernel-mm-damon index 76da77d7f7b6..293197f180ad 100644 --- a/Documentation/ABI/testing/sysfs-kernel-mm-damon +++ b/Documentation/ABI/testing/sysfs-kernel-mm-damon @@ -409,6 +409,22 @@ Description: Writing 'Y' or 'N' to this file sets whether to allow or reject applying the scheme's action to the memory that satisfies the 'type' and the 'matching' of the directory. +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//core_filters +Date: Feb 2025 +Contact: SeongJae Park +Description: Directory for DAMON core layer-handled DAMOS filters. Files + under this directory works same to those of + /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//filters + directory. + +What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//ops_filters +Date: Feb 2025 +Contact: SeongJae Park +Description: Directory for DAMON operations set layer-handled DAMOS filters. + Files under this directory works same to those of + /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//filters + directory. + What: /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes//stats/nr_tried Date: Mar 2022 Contact: SeongJae Park -- 2.50.1 From 114b480877698f7835a5ba95c6fbd97b63b119f6 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 5 Mar 2025 14:27:33 -0800 Subject: [PATCH 03/16] Docs/admin-guide/mm/damon/usage: update for {core,ops}_filters directories Document {core,ops}_filters directories on usage document. Link: https://lkml.kernel.org/r/20250305222733.59089-9-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- Documentation/admin-guide/mm/damon/usage.rst | 31 ++++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst index de549dd18107..ced2013db3df 100644 --- a/Documentation/admin-guide/mm/damon/usage.rst +++ b/Documentation/admin-guide/mm/damon/usage.rst @@ -83,7 +83,7 @@ comma (","). │ │ │ │ │ │ │ │ :ref:`goals `/nr_goals │ │ │ │ │ │ │ │ │ 0/target_metric,target_value,current_value │ │ │ │ │ │ │ :ref:`watermarks `/metric,interval_us,high,mid,low - │ │ │ │ │ │ │ :ref:`filters `/nr_filters + │ │ │ │ │ │ │ :ref:`{core_,ops_,}filters `/nr_filters │ │ │ │ │ │ │ │ 0/type,matching,allow,memcg_path,addr_start,addr_end,target_idx,min,max │ │ │ │ │ │ │ :ref:`stats `/nr_tried,sz_tried,nr_applied,sz_applied,sz_ops_filter_passed,qt_exceeds │ │ │ │ │ │ │ :ref:`tried_regions `/total_bytes @@ -307,9 +307,10 @@ to ``N-1``. Each directory represents each DAMON-based operation scheme. schemes// ------------ -In each scheme directory, five directories (``access_pattern``, ``quotas``, -``watermarks``, ``filters``, ``stats``, and ``tried_regions``) and three files -(``action``, ``target_nid`` and ``apply_interval``) exist. +In each scheme directory, seven directories (``access_pattern``, ``quotas``, +``watermarks``, ``core_filters``, ``ops_filters``, ``filters``, ``stats``, and +``tried_regions``) and three files (``action``, ``target_nid`` and +``apply_interval``) exist. The ``action`` file is for setting and getting the scheme's :ref:`action `. The keywords that can be written to and read @@ -420,13 +421,24 @@ The ``interval`` should written in microseconds unit. .. _sysfs_filters: -schemes//filters/ --------------------- +schemes//{core\_,ops\_,}filters/ +----------------------------------- -The directory for the :ref:`filters ` of the given +Directories for :ref:`filters ` of the given DAMON-based operation scheme. -In the beginning, this directory has only one file, ``nr_filters``. Writing a +``core_filters`` and ``ops_filters`` directories are for the filters handled by +the DAMON core layer and operations set layer, respectively. ``filters`` +directory can be used for installing filters regardless of their handled +layers. Filters that requested by ``core_filters`` and ``ops_filters`` will be +installed before those of ``filters``. All three directories have same files. + +Use of ``filters`` directory can make expecting evaluation orders of given +filters with the files under directory bit confusing. Users are hence +recommended to use ``core_filters`` and ``ops_filters`` directories. The +``filters`` directory could be deprecated in future. + +In the beginning, the directory has only one file, ``nr_filters``. Writing a number (``N``) to the file creates the number of child directories named ``0`` to ``N-1``. Each directory represents each filter. The filters are evaluated in the numeric order. @@ -435,7 +447,7 @@ Each filter directory contains nine files, namely ``type``, ``matching``, ``allow``, ``memcg_path``, ``addr_start``, ``addr_end``, ``min``, ``max`` and ``target_idx``. To ``type`` file, you can write the type of the filter. Refer to :ref:`the design doc ` for available type -names and their meanings. +names, their meaning and on what layer those are handled. For ``memcg`` type, you can specify the memory cgroup of the interest by writing the path of the memory cgroup from the cgroups mount point to @@ -455,6 +467,7 @@ the ``type`` and ``matching`` should be allowed or not. For example, below restricts a DAMOS action to be applied to only non-anonymous pages of all memory cgroups except ``/having_care_already``.:: + # cd ops_filters/0/ # echo 2 > nr_filters # # disallow anonymous pages echo anon > 0/type -- 2.50.1 From 2273dea6b1e1fcdd06d207048a2cd563ed80111a Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Wed, 5 Mar 2025 11:54:09 +0800 Subject: [PATCH 04/16] mm/hugetlb: update nr_huge_pages and surplus_huge_pages together In alloc_surplus_hugetlb_folio(), we increase nr_huge_pages and surplus_huge_pages separately. In the middle window, if we set nr_hugepages to smaller and satisfy count < persistent_huge_pages(h), the surplus_huge_pages will be increased by adjust_pool_surplus(). After adding delay in the middle window, we can reproduce the problem easily by following step: 1. echo 3 > /proc/sys/vm/nr_overcommit_hugepages 2. mmap two hugepages. When nr_huge_pages=2 and surplus_huge_pages=1, goto step 3. 3. echo 0 > /proc/sys/vm/nr_huge_pages Finally, nr_huge_pages is less than surplus_huge_pages. To fix the problem, call only_alloc_fresh_hugetlb_folio() instead and move down __prep_account_new_huge_page() into the hugetlb_lock. Link: https://lkml.kernel.org/r/20250305035409.2391344-1-liushixin2@huawei.com Fixes: 0c397daea1d4 ("mm, hugetlb: further simplify hugetlb allocation API") Signed-off-by: Liu Shixin Acked-by: Peter Xu Acked-by: Oscar Salvador Cc: David Hildenbrand Cc: Kefeng Wang Cc: Liu Shixin Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 438de55dd38d..af9b8c1fca67 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2259,11 +2259,20 @@ static struct folio *alloc_surplus_hugetlb_folio(struct hstate *h, goto out_unlock; spin_unlock_irq(&hugetlb_lock); - folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask); + folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL); if (!folio) return NULL; + hugetlb_vmemmap_optimize_folio(h, folio); + spin_lock_irq(&hugetlb_lock); + /* + * nr_huge_pages needs to be adjusted within the same lock cycle + * as surplus_pages, otherwise it might confuse + * persistent_huge_pages() momentarily. + */ + __prep_account_new_huge_page(h, nid); + /* * We could have raced with the pool size change. * Double check that and simply deallocate the new page -- 2.50.1 From ff22f9299d7b2c7874b560993c21543708b7e1b6 Mon Sep 17 00:00:00 2001 From: Nhat Pham Date: Thu, 6 Mar 2025 12:50:10 -0800 Subject: [PATCH 05/16] page_io: zswap: do not crash the kernel on decompression failure Currently, we crash the kernel when a decompression failure occurs in zswap (either because of memory corruption, or a bug in the compression algorithm). This is overkill. We should only SIGBUS the unfortunate process asking for the zswap entry on zswap load, and skip the corrupted entry in zswap writeback. See [1] for a recent upstream discussion about this. The zswap writeback case is relatively straightforward to fix. For the zswap_load() case, we change the return behavior: * Return 0 on success. * Return -ENOENT (with the folio locked) if zswap does not own the swapped out content. * Return -EIO if zswap owns the swapped out content, but encounters a decompression failure for some reasons. The folio will be unlocked, but not be marked up-to-date, which will eventually cause the process requesting the page to SIGBUS (see the handling of not-up-to-date folio in do_swap_page() in mm/memory.c), without crashing the kernel. * Return -EINVAL if we encounter a large folio, as large folio should not be swapped in while zswap is being used. Similar to the -EIO case, we also unlock the folio but do not mark it as up-to-date to SIGBUS the faulting process. As a side effect, we require one extra zswap tree traversal in the load and writeback paths. Quick benchmarking on a kernel build test shows no performance difference: With the new scheme: real: mean: 125.1s, stdev: 0.12s user: mean: 3265.23s, stdev: 9.62s sys: mean: 2156.41s, stdev: 13.98s The old scheme: real: mean: 125.78s, stdev: 0.45s user: mean: 3287.18s, stdev: 5.95s sys: mean: 2177.08s, stdev: 26.52s [nphamcs@gmail.com: fix documentation of zswap_load()] Link: https://lkml.kernel.org/r/20250306222453.1269456-1-nphamcs@gmail.com Link: https://lore.kernel.org/all/ZsiLElTykamcYZ6J@casper.infradead.org/ [1] Link: https://lkml.kernel.org/r/20250306205011.784787-1-nphamcs@gmail.com Signed-off-by: Nhat Pham Suggested-by: Matthew Wilcox Suggested-by: Yosry Ahmed Suggested-by: Johannes Weiner Reviewed-by: Chengming Zhou Acked-by: Johannes Weiner Signed-off-by: Andrew Morton --- include/linux/zswap.h | 6 +-- mm/page_io.c | 6 +-- mm/zswap.c | 119 +++++++++++++++++++++++++++++------------- 3 files changed, 88 insertions(+), 43 deletions(-) diff --git a/include/linux/zswap.h b/include/linux/zswap.h index d961ead91bf1..30c193a1207e 100644 --- a/include/linux/zswap.h +++ b/include/linux/zswap.h @@ -26,7 +26,7 @@ struct zswap_lruvec_state { unsigned long zswap_total_pages(void); bool zswap_store(struct folio *folio); -bool zswap_load(struct folio *folio); +int zswap_load(struct folio *folio); void zswap_invalidate(swp_entry_t swp); int zswap_swapon(int type, unsigned long nr_pages); void zswap_swapoff(int type); @@ -44,9 +44,9 @@ static inline bool zswap_store(struct folio *folio) return false; } -static inline bool zswap_load(struct folio *folio) +static inline int zswap_load(struct folio *folio) { - return false; + return -ENOENT; } static inline void zswap_invalidate(swp_entry_t swp) {} diff --git a/mm/page_io.c b/mm/page_io.c index 9b983de351f9..4bce19df557b 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -638,11 +638,11 @@ void swap_read_folio(struct folio *folio, struct swap_iocb **plug) if (swap_read_folio_zeromap(folio)) { folio_unlock(folio); goto finish; - } else if (zswap_load(folio)) { - folio_unlock(folio); - goto finish; } + if (zswap_load(folio) != -ENOENT) + goto finish; + /* We have to read from slower devices. Increase zswap protection. */ zswap_folio_swapin(folio); diff --git a/mm/zswap.c b/mm/zswap.c index 5f0e62289444..0dcc54eab58b 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -62,6 +62,8 @@ static u64 zswap_reject_reclaim_fail; static u64 zswap_reject_compress_fail; /* Compressed page was too big for the allocator to (optimally) store */ static u64 zswap_reject_compress_poor; +/* Load or writeback failed due to decompression failure */ +static u64 zswap_decompress_fail; /* Store failed because underlying allocator could not get memory */ static u64 zswap_reject_alloc_fail; /* Store failed because the entry metadata could not be allocated (rare) */ @@ -985,11 +987,12 @@ unlock: return comp_ret == 0 && alloc_ret == 0; } -static void zswap_decompress(struct zswap_entry *entry, struct folio *folio) +static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio) { struct zpool *zpool = entry->pool->zpool; struct scatterlist input, output; struct crypto_acomp_ctx *acomp_ctx; + int decomp_ret, dlen; u8 *src, *obj; acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool); @@ -1012,11 +1015,21 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio) sg_init_table(&output, 1); sg_set_folio(&output, folio, PAGE_SIZE, 0); acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE); - BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait)); - BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE); + decomp_ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); + dlen = acomp_ctx->req->dlen; zpool_obj_read_end(zpool, entry->handle, obj); acomp_ctx_put_unlock(acomp_ctx); + + if (!decomp_ret && dlen == PAGE_SIZE) + return true; + + zswap_decompress_fail++; + pr_alert_ratelimited("Decompression error from zswap (%d:%lu %s %u->%d)\n", + swp_type(entry->swpentry), + swp_offset(entry->swpentry), + entry->pool->tfm_name, entry->length, dlen); + return false; } /********************************* @@ -1046,6 +1059,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry, struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, }; + int ret = 0; /* try to allocate swap cache folio */ si = get_swap_device(swpentry); @@ -1067,8 +1081,8 @@ static int zswap_writeback_entry(struct zswap_entry *entry, * and freed when invalidated by the concurrent shrinker anyway. */ if (!folio_was_allocated) { - folio_put(folio); - return -EEXIST; + ret = -EEXIST; + goto out; } /* @@ -1081,14 +1095,17 @@ static int zswap_writeback_entry(struct zswap_entry *entry, * be dereferenced. */ tree = swap_zswap_tree(swpentry); - if (entry != xa_cmpxchg(tree, offset, entry, NULL, GFP_KERNEL)) { - delete_from_swap_cache(folio); - folio_unlock(folio); - folio_put(folio); - return -ENOMEM; + if (entry != xa_load(tree, offset)) { + ret = -ENOMEM; + goto out; } - zswap_decompress(entry, folio); + if (!zswap_decompress(entry, folio)) { + ret = -EIO; + goto out; + } + + xa_erase(tree, offset); count_vm_event(ZSWPWB); if (entry->objcg) @@ -1104,9 +1121,14 @@ static int zswap_writeback_entry(struct zswap_entry *entry, /* start writeback */ __swap_writepage(folio, &wbc); - folio_put(folio); - return 0; +out: + if (ret && ret != -EEXIST) { + delete_from_swap_cache(folio); + folio_unlock(folio); + } + folio_put(folio); + return ret; } /********************************* @@ -1606,7 +1628,27 @@ check_old: return ret; } -bool zswap_load(struct folio *folio) +/** + * zswap_load() - load a folio from zswap + * @folio: folio to load + * + * Return: 0 on success, with the folio unlocked and marked up-to-date, or one + * of the following error codes: + * + * -EIO: if the swapped out content was in zswap, but could not be loaded + * into the page due to a decompression failure. The folio is unlocked, but + * NOT marked up-to-date, so that an IO error is emitted (e.g. do_swap_page() + * will SIGBUS). + * + * -EINVAL: if the swapped out content was in zswap, but the page belongs + * to a large folio, which is not supported by zswap. The folio is unlocked, + * but NOT marked up-to-date, so that an IO error is emitted (e.g. + * do_swap_page() will SIGBUS). + * + * -ENOENT: if the swapped out content was not in zswap. The folio remains + * locked on return. + */ +int zswap_load(struct folio *folio) { swp_entry_t swp = folio->swap; pgoff_t offset = swp_offset(swp); @@ -1617,18 +1659,32 @@ bool zswap_load(struct folio *folio) VM_WARN_ON_ONCE(!folio_test_locked(folio)); if (zswap_never_enabled()) - return false; + return -ENOENT; /* * Large folios should not be swapped in while zswap is being used, as * they are not properly handled. Zswap does not properly load large * folios, and a large folio may only be partially in zswap. - * - * Return true without marking the folio uptodate so that an IO error is - * emitted (e.g. do_swap_page() will sigbus). */ - if (WARN_ON_ONCE(folio_test_large(folio))) - return true; + if (WARN_ON_ONCE(folio_test_large(folio))) { + folio_unlock(folio); + return -EINVAL; + } + + entry = xa_load(tree, offset); + if (!entry) + return -ENOENT; + + if (!zswap_decompress(entry, folio)) { + folio_unlock(folio); + return -EIO; + } + + folio_mark_uptodate(folio); + + count_vm_event(ZSWPIN); + if (entry->objcg) + count_objcg_events(entry->objcg, ZSWPIN, 1); /* * When reading into the swapcache, invalidate our entry. The @@ -1642,27 +1698,14 @@ bool zswap_load(struct folio *folio) * files, which reads into a private page and may free it if * the fault fails. We remain the primary owner of the entry.) */ - if (swapcache) - entry = xa_erase(tree, offset); - else - entry = xa_load(tree, offset); - - if (!entry) - return false; - - zswap_decompress(entry, folio); - - count_vm_event(ZSWPIN); - if (entry->objcg) - count_objcg_events(entry->objcg, ZSWPIN, 1); - if (swapcache) { - zswap_entry_free(entry); folio_mark_dirty(folio); + xa_erase(tree, offset); + zswap_entry_free(entry); } - folio_mark_uptodate(folio); - return true; + folio_unlock(folio); + return 0; } void zswap_invalidate(swp_entry_t swp) @@ -1757,6 +1800,8 @@ static int zswap_debugfs_init(void) zswap_debugfs_root, &zswap_reject_compress_fail); debugfs_create_u64("reject_compress_poor", 0444, zswap_debugfs_root, &zswap_reject_compress_poor); + debugfs_create_u64("decompress_fail", 0444, + zswap_debugfs_root, &zswap_decompress_fail); debugfs_create_u64("written_back_pages", 0444, zswap_debugfs_root, &zswap_written_back_pages); debugfs_create_file("pool_total_size", 0444, -- 2.50.1 From e11079dd25c525aaf238d81287851ef16a521ef3 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:51 +0200 Subject: [PATCH 06/16] arm: mem_init: use memblock_phys_free() to free DMA memory on SA1111 Patch series "arch, mm: reduce code duplication in mem_init()", v2. Every architecture has implementation of mem_init() function and some even more than one. All these release free memory to the buddy allocator, most of them set high_memory to the end of directly addressable memory and many of them set max_mapnr for FLATMEM case. These patches pull the commonalities into the generic code and refactor some of the mem_init() implementations so that many of them can be just dropped. This patch (of 13): This will help to pull out memblock_free_all() to generic code. Link: https://lkml.kernel.org/r/20250313135003.836600-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20250313135003.836600-2-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Tested-by: Mark Brown Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: "Mike Rapoport (IBM)" Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm/mm/init.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 5345d218899a..9aec1cb2386f 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -277,14 +277,14 @@ void __init mem_init(void) set_max_mapnr(pfn_to_page(max_pfn) - mem_map); - /* this will put all unused low memory onto the freelists */ - memblock_free_all(); - #ifdef CONFIG_SA1111 /* now that our DMA memory is actually so designated, we can free it */ - free_reserved_area(__va(PHYS_OFFSET), swapper_pg_dir, -1, NULL); + memblock_phys_free(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET); #endif + /* this will put all unused low memory onto the freelists */ + memblock_free_all(); + free_highpages(); /* -- 2.50.1 From 2b1d532e106ee63acb61a8e11608fafd75e52c4d Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:52 +0200 Subject: [PATCH 07/16] csky: move setup_initrd() to setup.c Memory used by initrd should be reserved as soon as possible before there any memblock allocations that might overwrite that memory. This will also help with pulling out memblock_free_all() to the generic code and reducing code duplication in arch::mem_init(). Link: https://lkml.kernel.org/r/20250313135003.836600-3-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Guo Ren (csky) Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Mark Brown Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/csky/kernel/setup.c | 43 ++++++++++++++++++++++++++++++++++++++++ arch/csky/mm/init.c | 43 ---------------------------------------- 2 files changed, 43 insertions(+), 43 deletions(-) diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c index fe715b707fd0..e0d6ca86ea8c 100644 --- a/arch/csky/kernel/setup.c +++ b/arch/csky/kernel/setup.c @@ -12,6 +12,45 @@ #include #include +#ifdef CONFIG_BLK_DEV_INITRD +static void __init setup_initrd(void) +{ + unsigned long size; + + if (initrd_start >= initrd_end) { + pr_err("initrd not found or empty"); + goto disable; + } + + if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) { + pr_err("initrd extends beyond end of memory"); + goto disable; + } + + size = initrd_end - initrd_start; + + if (memblock_is_region_reserved(__pa(initrd_start), size)) { + pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region", + __pa(initrd_start), size); + goto disable; + } + + memblock_reserve(__pa(initrd_start), size); + + pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n", + (void *)(initrd_start), size); + + initrd_below_start_ok = 1; + + return; + +disable: + initrd_start = initrd_end = 0; + + pr_err(" - disabling initrd\n"); +} +#endif + static void __init csky_memblock_init(void) { unsigned long lowmem_size = PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET); @@ -40,6 +79,10 @@ static void __init csky_memblock_init(void) max_low_pfn = min_low_pfn + sseg_size; } +#ifdef CONFIG_BLK_DEV_INITRD + setup_initrd(); +#endif + max_zone_pfn[ZONE_NORMAL] = max_low_pfn; mmu_init(min_low_pfn, max_low_pfn); diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index bde7cabd23df..ab51acbc19b2 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -42,45 +42,6 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); -#ifdef CONFIG_BLK_DEV_INITRD -static void __init setup_initrd(void) -{ - unsigned long size; - - if (initrd_start >= initrd_end) { - pr_err("initrd not found or empty"); - goto disable; - } - - if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) { - pr_err("initrd extends beyond end of memory"); - goto disable; - } - - size = initrd_end - initrd_start; - - if (memblock_is_region_reserved(__pa(initrd_start), size)) { - pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region", - __pa(initrd_start), size); - goto disable; - } - - memblock_reserve(__pa(initrd_start), size); - - pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n", - (void *)(initrd_start), size); - - initrd_below_start_ok = 1; - - return; - -disable: - initrd_start = initrd_end = 0; - - pr_err(" - disabling initrd\n"); -} -#endif - void __init mem_init(void) { #ifdef CONFIG_HIGHMEM @@ -92,10 +53,6 @@ void __init mem_init(void) #endif high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); -#ifdef CONFIG_BLK_DEV_INITRD - setup_initrd(); -#endif - memblock_free_all(); #ifdef CONFIG_HIGHMEM -- 2.50.1 From 30686816214b6062246e4918f3eadd1f55382425 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:53 +0200 Subject: [PATCH 08/16] hexagon: move initialization of init_mm.context init to paging_init() This will help with pulling out memblock_free_all() to the generic code and reducing code duplication in arch::mem_init(). Link: https://lkml.kernel.org/r/20250313135003.836600-4-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Mark Brown Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/hexagon/mm/init.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c index 3458f39ca2ac..508bb6a8dcc9 100644 --- a/arch/hexagon/mm/init.c +++ b/arch/hexagon/mm/init.c @@ -59,14 +59,6 @@ void __init mem_init(void) * To-Do: someone somewhere should wipe out the bootmem map * after we're done? */ - - /* - * This can be moved to some more virtual-memory-specific - * initialization hook at some point. Set the init_mm - * descriptors "context" value to point to the initial - * kernel segment table's physical address. - */ - init_mm.context.ptbase = __pa(init_mm.pgd); } void sync_icache_dcache(pte_t pte) @@ -103,6 +95,12 @@ static void __init paging_init(void) free_area_init(max_zone_pfn); /* sets up the zonelists and mem_map */ + /* + * Set the init_mm descriptors "context" value to point to the + * initial kernel segment table's physical address. + */ + init_mm.context.ptbase = __pa(init_mm.pgd); + /* * Start of high memory area. Will probably need something more * fancy if we... get more fancy. -- 2.50.1 From 67e7a600869ca557e259f1ce20f8b89bb95ca97d Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:54 +0200 Subject: [PATCH 09/16] MIPS: consolidate mem_init() for NUMA machines Both MIPS systems that support numa (loongsoon3 and sgi-ip27) have identical mem_init() for NUMA case. Move that into arch/mips/mm/init.c and drop duplicate per-machine definitions. Link: https://lkml.kernel.org/r/20250313135003.836600-5-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Mark Brown Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/mips/loongson64/numa.c | 7 ------- arch/mips/mm/init.c | 7 +++++++ arch/mips/sgi-ip27/ip27-memory.c | 9 --------- 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c index 8388400d052f..95d5f553ce19 100644 --- a/arch/mips/loongson64/numa.c +++ b/arch/mips/loongson64/numa.c @@ -164,13 +164,6 @@ void __init paging_init(void) free_area_init(zones_size); } -void __init mem_init(void) -{ - high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); - memblock_free_all(); - setup_zero_pages(); /* This comes from node 0 */ -} - /* All PCI device belongs to logical Node-0 */ int pcibus_to_node(struct pci_bus *bus) { diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 4583d1a2a73e..3db6082c611e 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -482,6 +482,13 @@ void __init mem_init(void) 0x80000000 - 4, KCORE_TEXT); #endif } +#else /* CONFIG_NUMA */ +void __init mem_init(void) +{ + high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); + memblock_free_all(); + setup_zero_pages(); /* This comes from node 0 */ +} #endif /* !CONFIG_NUMA */ void free_init_pages(const char *what, unsigned long begin, unsigned long end) diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 1963313f55d8..2b3e46e2e607 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -406,8 +406,6 @@ void __init prom_meminit(void) } } -extern void setup_zero_pages(void); - void __init paging_init(void) { unsigned long zones_size[MAX_NR_ZONES] = {0, }; @@ -416,10 +414,3 @@ void __init paging_init(void) zones_size[ZONE_NORMAL] = max_low_pfn; free_area_init(zones_size); } - -void __init mem_init(void) -{ - high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); - memblock_free_all(); - setup_zero_pages(); /* This comes from node 0 */ -} -- 2.50.1 From e74e2b8eb424c26dff35727d242437edb87684aa Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:55 +0200 Subject: [PATCH 10/16] MIPS: make setup_zero_pages() use memblock Allocating the zero pages from memblock is simpler because the memory is already reserved. This will also help with pulling out memblock_free_all() to the generic code and reducing code duplication in arch::mem_init(). Link: https://lkml.kernel.org/r/20250313135003.836600-6-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Mark Brown Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/mips/include/asm/mmzone.h | 2 -- arch/mips/mm/init.c | 18 +++++------------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/arch/mips/include/asm/mmzone.h b/arch/mips/include/asm/mmzone.h index 14226ea42036..602a21aee9d4 100644 --- a/arch/mips/include/asm/mmzone.h +++ b/arch/mips/include/asm/mmzone.h @@ -20,6 +20,4 @@ #define nid_to_addrbase(nid) 0 #endif -extern void setup_zero_pages(void); - #endif /* _ASM_MMZONE_H_ */ diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 3db6082c611e..820e35a59d4d 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -59,24 +59,16 @@ EXPORT_SYMBOL(zero_page_mask); /* * Not static inline because used by IP27 special magic initialization code */ -void setup_zero_pages(void) +static void __init setup_zero_pages(void) { - unsigned int order, i; - struct page *page; + unsigned int order; if (cpu_has_vce) order = 3; else order = 0; - empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!empty_zero_page) - panic("Oh boy, that early out of memory?"); - - page = virt_to_page((void *)empty_zero_page); - split_page(page, order); - for (i = 0; i < (1 << order); i++, page++) - mark_page_reserved(page); + empty_zero_page = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE << order, PAGE_SIZE); zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; } @@ -470,9 +462,9 @@ void __init mem_init(void) BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (PFN_PTE_SHIFT > PAGE_SHIFT)); maar_init(); - memblock_free_all(); setup_zero_pages(); /* Setup zeroed pages. */ mem_init_free_highmem(); + memblock_free_all(); #ifdef CONFIG_64BIT if ((unsigned long) &_text > (unsigned long) CKSEG0) @@ -486,8 +478,8 @@ void __init mem_init(void) void __init mem_init(void) { high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); - memblock_free_all(); setup_zero_pages(); /* This comes from node 0 */ + memblock_free_all(); } #endif /* !CONFIG_NUMA */ -- 2.50.1 From be971f957a80e2bbd7747a295886df1472803ff1 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:56 +0200 Subject: [PATCH 11/16] nios2: move pr_debug() about memory start and end to setup_arch() This will help with pulling out memblock_free_all() to the generic code and reducing code duplication in arch::mem_init(). Link: https://lkml.kernel.org/r/20250313135003.836600-7-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Mark Brown Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/nios2/kernel/setup.c | 2 ++ arch/nios2/mm/init.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index da122a5fa43b..a4cffbfc1399 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -149,6 +149,8 @@ void __init setup_arch(char **cmdline_p) memory_start = memblock_start_of_DRAM(); memory_end = memblock_end_of_DRAM(); + pr_debug("%s: start=%lx, end=%lx\n", __func__, memory_start, memory_end); + setup_initial_init_mm(_stext, _etext, _edata, _end); init_task.thread.kregs = &fake_regs; diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c index a2278485de19..aa692ad30044 100644 --- a/arch/nios2/mm/init.c +++ b/arch/nios2/mm/init.c @@ -65,8 +65,6 @@ void __init mem_init(void) unsigned long end_mem = memory_end; /* this must not include kernel stack at top */ - pr_debug("mem_init: start=%lx, end=%lx\n", memory_start, memory_end); - end_mem &= PAGE_MASK; high_memory = __va(end_mem); -- 2.50.1 From 54ccf66f99d6d2630895eb13156be19a033d4566 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:57 +0200 Subject: [PATCH 12/16] s390: make setup_zero_pages() use memblock Allocating the zero pages from memblock is simpler because the memory is already reserved. This will also help with pulling out memblock_free_all() to the generic code and reducing code duplication in arch::mem_init(). Link: https://lkml.kernel.org/r/20250313135003.836600-8-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Heiko Carstens Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Mark Brown Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/s390/mm/init.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index d88cb1c13f7d..2b41dc9b1fa3 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -73,8 +73,6 @@ static void __init setup_zero_pages(void) { unsigned long total_pages = memblock_estimated_nr_free_pages(); unsigned int order; - struct page *page; - int i; /* Latest machines require a mapping granularity of 512KB */ order = 7; @@ -83,16 +81,7 @@ static void __init setup_zero_pages(void) while (order > 2 && (total_pages >> 10) < (1UL << order)) order--; - empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!empty_zero_page) - panic("Out of memory in setup_zero_pages"); - - page = virt_to_page((void *) empty_zero_page); - split_page(page, order); - for (i = 1 << order; i > 0; i--) { - mark_page_reserved(page); - page++; - } + empty_zero_page = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE << order, PAGE_SIZE); zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; } @@ -176,9 +165,10 @@ void __init mem_init(void) pv_init(); kfence_split_mapping(); + setup_zero_pages(); /* Setup zeroed pages. */ + /* this will put all low memory onto the freelists */ memblock_free_all(); - setup_zero_pages(); /* Setup zeroed pages. */ } unsigned long memory_block_size_bytes(void) -- 2.50.1 From d319c8b4918d24aea6fd90bd39cd5bc9fcf40859 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:58 +0200 Subject: [PATCH 13/16] xtensa: split out printing of virtual memory layout to a function This will help with pulling out memblock_free_all() to the generic code and reducing code duplication in arch::mem_init(). Link: https://lkml.kernel.org/r/20250313135003.836600-9-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Max Filippov Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Mark Brown Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/xtensa/mm/init.c | 97 ++++++++++++++++++++++--------------------- 1 file changed, 50 insertions(+), 47 deletions(-) diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index b2587a1a7c46..01577d33e602 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -66,6 +66,55 @@ void __init bootmem_init(void) memblock_dump_all(); } +static void __init print_vm_layout(void) +{ + pr_info("virtual kernel memory layout:\n" +#ifdef CONFIG_KASAN + " kasan : 0x%08lx - 0x%08lx (%5lu MB)\n" +#endif +#ifdef CONFIG_MMU + " vmalloc : 0x%08lx - 0x%08lx (%5lu MB)\n" +#endif +#ifdef CONFIG_HIGHMEM + " pkmap : 0x%08lx - 0x%08lx (%5lu kB)\n" + " fixmap : 0x%08lx - 0x%08lx (%5lu kB)\n" +#endif + " lowmem : 0x%08lx - 0x%08lx (%5lu MB)\n" + " .text : 0x%08lx - 0x%08lx (%5lu kB)\n" + " .rodata : 0x%08lx - 0x%08lx (%5lu kB)\n" + " .data : 0x%08lx - 0x%08lx (%5lu kB)\n" + " .init : 0x%08lx - 0x%08lx (%5lu kB)\n" + " .bss : 0x%08lx - 0x%08lx (%5lu kB)\n", +#ifdef CONFIG_KASAN + KASAN_SHADOW_START, KASAN_SHADOW_START + KASAN_SHADOW_SIZE, + KASAN_SHADOW_SIZE >> 20, +#endif +#ifdef CONFIG_MMU + VMALLOC_START, VMALLOC_END, + (VMALLOC_END - VMALLOC_START) >> 20, +#ifdef CONFIG_HIGHMEM + PKMAP_BASE, PKMAP_BASE + LAST_PKMAP * PAGE_SIZE, + (LAST_PKMAP*PAGE_SIZE) >> 10, + FIXADDR_START, FIXADDR_END, + (FIXADDR_END - FIXADDR_START) >> 10, +#endif + PAGE_OFFSET, PAGE_OFFSET + + (max_low_pfn - min_low_pfn) * PAGE_SIZE, +#else + min_low_pfn * PAGE_SIZE, max_low_pfn * PAGE_SIZE, +#endif + ((max_low_pfn - min_low_pfn) * PAGE_SIZE) >> 20, + (unsigned long)_text, (unsigned long)_etext, + (unsigned long)(_etext - _text) >> 10, + (unsigned long)__start_rodata, (unsigned long)__end_rodata, + (unsigned long)(__end_rodata - __start_rodata) >> 10, + (unsigned long)_sdata, (unsigned long)_edata, + (unsigned long)(_edata - _sdata) >> 10, + (unsigned long)__init_begin, (unsigned long)__init_end, + (unsigned long)(__init_end - __init_begin) >> 10, + (unsigned long)__bss_start, (unsigned long)__bss_stop, + (unsigned long)(__bss_stop - __bss_start) >> 10); +} void __init zones_init(void) { @@ -77,6 +126,7 @@ void __init zones_init(void) #endif }; free_area_init(max_zone_pfn); + print_vm_layout(); } static void __init free_highpages(void) @@ -118,53 +168,6 @@ void __init mem_init(void) high_memory = (void *)__va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); - - pr_info("virtual kernel memory layout:\n" -#ifdef CONFIG_KASAN - " kasan : 0x%08lx - 0x%08lx (%5lu MB)\n" -#endif -#ifdef CONFIG_MMU - " vmalloc : 0x%08lx - 0x%08lx (%5lu MB)\n" -#endif -#ifdef CONFIG_HIGHMEM - " pkmap : 0x%08lx - 0x%08lx (%5lu kB)\n" - " fixmap : 0x%08lx - 0x%08lx (%5lu kB)\n" -#endif - " lowmem : 0x%08lx - 0x%08lx (%5lu MB)\n" - " .text : 0x%08lx - 0x%08lx (%5lu kB)\n" - " .rodata : 0x%08lx - 0x%08lx (%5lu kB)\n" - " .data : 0x%08lx - 0x%08lx (%5lu kB)\n" - " .init : 0x%08lx - 0x%08lx (%5lu kB)\n" - " .bss : 0x%08lx - 0x%08lx (%5lu kB)\n", -#ifdef CONFIG_KASAN - KASAN_SHADOW_START, KASAN_SHADOW_START + KASAN_SHADOW_SIZE, - KASAN_SHADOW_SIZE >> 20, -#endif -#ifdef CONFIG_MMU - VMALLOC_START, VMALLOC_END, - (VMALLOC_END - VMALLOC_START) >> 20, -#ifdef CONFIG_HIGHMEM - PKMAP_BASE, PKMAP_BASE + LAST_PKMAP * PAGE_SIZE, - (LAST_PKMAP*PAGE_SIZE) >> 10, - FIXADDR_START, FIXADDR_END, - (FIXADDR_END - FIXADDR_START) >> 10, -#endif - PAGE_OFFSET, PAGE_OFFSET + - (max_low_pfn - min_low_pfn) * PAGE_SIZE, -#else - min_low_pfn * PAGE_SIZE, max_low_pfn * PAGE_SIZE, -#endif - ((max_low_pfn - min_low_pfn) * PAGE_SIZE) >> 20, - (unsigned long)_text, (unsigned long)_etext, - (unsigned long)(_etext - _text) >> 10, - (unsigned long)__start_rodata, (unsigned long)__end_rodata, - (unsigned long)(__end_rodata - __start_rodata) >> 10, - (unsigned long)_sdata, (unsigned long)_edata, - (unsigned long)(_edata - _sdata) >> 10, - (unsigned long)__init_begin, (unsigned long)__init_end, - (unsigned long)(__init_end - __init_begin) >> 10, - (unsigned long)__bss_start, (unsigned long)__bss_stop, - (unsigned long)(__bss_stop - __bss_start) >> 10); } static void __init parse_memmap_one(char *p) -- 2.50.1 From 8268af309d07d1c6279080b4e6fd16ec75cc977c Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:49:59 +0200 Subject: [PATCH 14/16] arch, mm: set max_mapnr when allocating memory map for FLATMEM max_mapnr is essentially the size of the memory map for systems that use FLATMEM. There is no reason to calculate it in each and every architecture when it's anyway calculated in alloc_node_mem_map(). Drop setting of max_mapnr from architecture code and set it once in alloc_node_mem_map(). While on it, move definition of mem_map and max_mapnr to mm/mm_init.c so there won't be two copies for MMU and !MMU variants. Link: https://lkml.kernel.org/r/20250313135003.836600-10-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Dave Hansen [x86] Tested-by: Mark Brown Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/alpha/mm/init.c | 1 - arch/arc/mm/init.c | 5 ----- arch/arm/mm/init.c | 2 -- arch/csky/mm/init.c | 4 ---- arch/loongarch/mm/init.c | 1 - arch/microblaze/mm/init.c | 4 ---- arch/mips/mm/init.c | 8 -------- arch/nios2/kernel/setup.c | 1 - arch/nios2/mm/init.c | 2 +- arch/openrisc/mm/init.c | 1 - arch/parisc/mm/init.c | 1 - arch/powerpc/kernel/setup-common.c | 2 -- arch/riscv/mm/init.c | 1 - arch/s390/mm/init.c | 1 - arch/sh/mm/init.c | 1 - arch/sparc/mm/init_32.c | 1 - arch/um/include/shared/mem_user.h | 1 - arch/um/kernel/physmem.c | 12 ------------ arch/um/kernel/um_arch.c | 1 - arch/x86/mm/init_32.c | 3 --- arch/xtensa/mm/init.c | 1 - include/asm-generic/memory_model.h | 5 +++-- include/linux/mm.h | 11 ----------- mm/memory.c | 8 -------- mm/mm_init.c | 25 +++++++++++++++++-------- mm/nommu.c | 4 ---- 26 files changed, 21 insertions(+), 86 deletions(-) diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 61c2198b1359..ec0eeae9c653 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -276,7 +276,6 @@ srm_paging_stop (void) void __init mem_init(void) { - set_max_mapnr(max_low_pfn); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); memblock_free_all(); } diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 6a71b23f1383..7ef883d58dc1 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -154,11 +154,6 @@ void __init setup_arch_memory(void) arch_pfn_offset = min(min_low_pfn, min_high_pfn); kmap_init(); - -#else /* CONFIG_HIGHMEM */ - /* pfn_valid() uses this when FLATMEM=y and HIGHMEM=n */ - max_mapnr = max_low_pfn - min_low_pfn; - #endif /* CONFIG_HIGHMEM */ free_area_init(max_zone_pfn); diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 9aec1cb2386f..d4bcc745a044 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -275,8 +275,6 @@ void __init mem_init(void) swiotlb_init(max_pfn > arm_dma_pfn_limit, SWIOTLB_VERBOSE); #endif - set_max_mapnr(pfn_to_page(max_pfn) - mem_map); - #ifdef CONFIG_SA1111 /* now that our DMA memory is actually so designated, we can free it */ memblock_phys_free(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET); diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index ab51acbc19b2..ba6694d6170a 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -46,10 +46,6 @@ void __init mem_init(void) { #ifdef CONFIG_HIGHMEM unsigned long tmp; - - set_max_mapnr(highend_pfn - ARCH_PFN_OFFSET); -#else - set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); #endif high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index ca5aa5f46a9f..00449df50db1 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -78,7 +78,6 @@ void __init paging_init(void) void __init mem_init(void) { - max_mapnr = max_low_pfn; high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 4520c5741579..857cd2b44bcf 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -104,17 +104,13 @@ void __init setup_memory(void) * * min_low_pfn - the first page (mm/bootmem.c - node_boot_start) * max_low_pfn - * max_mapnr - the first unused page (mm/bootmem.c - node_low_pfn) */ /* memory start is from the kernel end (aligned) to higher addr */ min_low_pfn = memory_start >> PAGE_SHIFT; /* minimum for allocation */ - /* RAM is assumed contiguous */ - max_mapnr = memory_size >> PAGE_SHIFT; max_low_pfn = ((u64)memory_start + (u64)lowmem_size) >> PAGE_SHIFT; max_pfn = ((u64)memory_start + (u64)memory_size) >> PAGE_SHIFT; - pr_info("%s: max_mapnr: %#lx\n", __func__, max_mapnr); pr_info("%s: min_low_pfn: %#lx\n", __func__, min_low_pfn); pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn); pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn); diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 820e35a59d4d..eb61a73520a0 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -415,15 +415,7 @@ void __init paging_init(void) " %ldk highmem ignored\n", (highend_pfn - max_low_pfn) << (PAGE_SHIFT - 10)); max_zone_pfns[ZONE_HIGHMEM] = max_low_pfn; - - max_mapnr = max_low_pfn; - } else if (highend_pfn) { - max_mapnr = highend_pfn; - } else { - max_mapnr = max_low_pfn; } -#else - max_mapnr = max_low_pfn; #endif high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index a4cffbfc1399..2a40150142c3 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -158,7 +158,6 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; find_limits(&min_low_pfn, &max_low_pfn, &max_pfn); - max_mapnr = max_low_pfn; memblock_reserve(__pa_symbol(_stext), _end - _stext); #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c index aa692ad30044..3cafa87ead9e 100644 --- a/arch/nios2/mm/init.c +++ b/arch/nios2/mm/init.c @@ -51,7 +51,7 @@ void __init paging_init(void) pagetable_init(); pgd_current = swapper_pg_dir; - max_zone_pfn[ZONE_NORMAL] = max_mapnr; + max_zone_pfn[ZONE_NORMAL] = max_low_pfn; /* pass the memory from the bootmem allocator to the main allocator */ free_area_init(max_zone_pfn); diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index d0cb1a0126f9..9093c336e158 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -193,7 +193,6 @@ void __init mem_init(void) { BUG_ON(!mem_map); - max_mapnr = max_low_pfn; high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); /* clear the zero-page */ diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 61c0a2477072..2cdfc0b1195c 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -563,7 +563,6 @@ void __init mem_init(void) #endif high_memory = __va((max_pfn << PAGE_SHIFT)); - set_max_mapnr(max_low_pfn); memblock_free_all(); #ifdef CONFIG_PA11 diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index a08b0ede4e64..68d47c53876c 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -957,8 +957,6 @@ void __init setup_arch(char **cmdline_p) /* Parse memory topology */ mem_topology_setup(); - /* Set max_mapnr before paging_init() */ - set_max_mapnr(max_pfn); high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); /* diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 15b2eda4c364..157c9ca51541 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -298,7 +298,6 @@ static void __init setup_bootmem(void) high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); - set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); reserve_initrd_mem(); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 2b41dc9b1fa3..ad567e2100b7 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -159,7 +159,6 @@ void __init mem_init(void) cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(0, mm_cpumask(&init_mm)); - set_max_mapnr(max_low_pfn); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); pv_init(); diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 289a2fecebef..72aea5cd1b85 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -290,7 +290,6 @@ void __init paging_init(void) */ max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; min_low_pfn = __MEMORY_START >> PAGE_SHIFT; - set_max_mapnr(max_low_pfn - min_low_pfn); nodes_clear(node_online_map); diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index d96a14ffceeb..6b58da14edc6 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -275,7 +275,6 @@ void __init mem_init(void) taint_real_pages(); - max_mapnr = last_valid_pfn - pfn_base; high_memory = __va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); diff --git a/arch/um/include/shared/mem_user.h b/arch/um/include/shared/mem_user.h index adfa08062f88..d4727efcf23d 100644 --- a/arch/um/include/shared/mem_user.h +++ b/arch/um/include/shared/mem_user.h @@ -47,7 +47,6 @@ extern int iomem_size; #define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) extern unsigned long find_iomem(char *driver, unsigned long *len_out); -extern void mem_total_pages(unsigned long physmem, unsigned long iomem); extern void setup_physmem(unsigned long start, unsigned long usable, unsigned long len); extern void map_memory(unsigned long virt, unsigned long phys, diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index a74f17b033c4..af02b5f9911d 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -22,18 +22,6 @@ static int physmem_fd = -1; unsigned long high_physmem; EXPORT_SYMBOL(high_physmem); -void __init mem_total_pages(unsigned long physmem, unsigned long iomem) -{ - unsigned long phys_pages, iomem_pages, total_pages; - - phys_pages = physmem >> PAGE_SHIFT; - iomem_pages = iomem >> PAGE_SHIFT; - - total_pages = phys_pages + iomem_pages; - - max_mapnr = total_pages; -} - void map_memory(unsigned long virt, unsigned long phys, unsigned long len, int r, int w, int x) { diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 79ea97d4797e..6414cbf00572 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -419,7 +419,6 @@ void __init setup_arch(char **cmdline_p) stack_protections((unsigned long) init_task.stack); setup_physmem(uml_physmem, uml_reserved, physmem_size); - mem_total_pages(physmem_size, iomem_size); uml_dtb_init(); read_initrd(); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ac41b1e0940d..6d2f8cb9451e 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -650,9 +650,6 @@ void __init initmem_init(void) memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); -#ifdef CONFIG_FLATMEM - max_mapnr = IS_ENABLED(CONFIG_HIGHMEM) ? highend_pfn : max_low_pfn; -#endif __vmalloc_start_set = true; printk(KERN_NOTICE "%ldMB LOWMEM available.\n", diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 01577d33e602..9f1b0d5fccc7 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -164,7 +164,6 @@ void __init mem_init(void) { free_highpages(); - max_mapnr = max_pfn - ARCH_PFN_OFFSET; high_memory = (void *)__va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index 6d1fb6162ac1..a3b5029aebbd 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -19,11 +19,12 @@ #define __page_to_pfn(page) ((unsigned long)((page) - mem_map) + \ ARCH_PFN_OFFSET) +/* avoid include hell */ +extern unsigned long max_mapnr; + #ifndef pfn_valid static inline int pfn_valid(unsigned long pfn) { - /* avoid include hell */ - extern unsigned long max_mapnr; unsigned long pfn_offset = ARCH_PFN_OFFSET; return pfn >= pfn_offset && (pfn - pfn_offset) < max_mapnr; diff --git a/include/linux/mm.h b/include/linux/mm.h index 82776b409391..8c4cb8c28507 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -46,17 +46,6 @@ extern int sysctl_page_lock_unfairness; void mm_core_init(void); void init_mm_internals(void); -#ifndef CONFIG_NUMA /* Don't use mapnrs, do it properly */ -extern unsigned long max_mapnr; - -static inline void set_max_mapnr(unsigned long limit) -{ - max_mapnr = limit; -} -#else -static inline void set_max_mapnr(unsigned long limit) { } -#endif - extern atomic_long_t _totalram_pages; static inline unsigned long totalram_pages(void) { diff --git a/mm/memory.c b/mm/memory.c index 8873b7a4962c..a1d7664855f2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -95,14 +95,6 @@ #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid. #endif -#ifndef CONFIG_NUMA -unsigned long max_mapnr; -EXPORT_SYMBOL(max_mapnr); - -struct page *mem_map; -EXPORT_SYMBOL(mem_map); -#endif - static vm_fault_t do_fault(struct vm_fault *vmf); static vm_fault_t do_anonymous_page(struct vm_fault *vmf); static bool vmf_pte_changed(struct vm_fault *vmf); diff --git a/mm/mm_init.c b/mm/mm_init.c index 133640a93d1d..7fd48d2d5064 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -37,6 +37,14 @@ #include +#ifndef CONFIG_NUMA +unsigned long max_mapnr; +EXPORT_SYMBOL(max_mapnr); + +struct page *mem_map; +EXPORT_SYMBOL(mem_map); +#endif + #ifdef CONFIG_DEBUG_MEMORY_INIT int __meminitdata mminit_loglevel; @@ -1639,7 +1647,7 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); offset = pgdat->node_start_pfn - start; /* - * The zone's endpoints aren't required to be MAX_PAGE_ORDER + * The zone's endpoints aren't required to be MAX_PAGE_ORDER * aligned but the node_mem_map endpoints must be in order * for the buddy allocator to function correctly. */ @@ -1655,14 +1663,15 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", __func__, pgdat->node_id, (unsigned long)pgdat, (unsigned long)pgdat->node_mem_map); -#ifndef CONFIG_NUMA + /* the global mem_map is just set as node 0's */ - if (pgdat == NODE_DATA(0)) { - mem_map = NODE_DATA(0)->node_mem_map; - if (page_to_pfn(mem_map) != pgdat->node_start_pfn) - mem_map -= offset; - } -#endif + WARN_ON(pgdat != NODE_DATA(0)); + + mem_map = pgdat->node_mem_map; + if (page_to_pfn(mem_map) != pgdat->node_start_pfn) + mem_map -= offset; + + max_mapnr = end - start; } #else static inline void alloc_node_mem_map(struct pglist_data *pgdat) { } diff --git a/mm/nommu.c b/mm/nommu.c index 8b31d8396297..43751726f977 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -44,16 +44,12 @@ void *high_memory; EXPORT_SYMBOL(high_memory); -struct page *mem_map; -unsigned long max_mapnr; -EXPORT_SYMBOL(max_mapnr); unsigned long highest_memmap_pfn; int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; int heap_stack_gap = 0; atomic_long_t mmap_pages_allocated; -EXPORT_SYMBOL(mem_map); /* list of mapped, potentially shareable regions */ static struct kmem_cache *vm_region_jar; -- 2.50.1 From e120d1bc12da5c1bb871c346f741296610fd6fcb Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:50:00 +0200 Subject: [PATCH 15/16] arch, mm: set high_memory in free_area_init() high_memory defines upper bound on the directly mapped memory. This bound is defined by the beginning of ZONE_HIGHMEM when a system has high memory and by the end of memory otherwise. All this is known to generic memory management initialization code that can set high_memory while initializing core mm structures. Add a generic calculation of high_memory to free_area_init() and remove per-architecture calculation except for the architectures that set and use high_memory earlier than that. Link: https://lkml.kernel.org/r/20250313135003.836600-11-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Dave Hansen [x86] Tested-by: Mark Brown Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/alpha/mm/init.c | 1 - arch/arc/mm/init.c | 2 -- arch/arm64/mm/init.c | 2 -- arch/csky/mm/init.c | 1 - arch/hexagon/mm/init.c | 6 ------ arch/loongarch/kernel/numa.c | 1 - arch/loongarch/mm/init.c | 2 -- arch/microblaze/mm/init.c | 2 -- arch/mips/mm/init.c | 2 -- arch/nios2/mm/init.c | 6 ------ arch/openrisc/mm/init.c | 2 -- arch/parisc/mm/init.c | 1 - arch/riscv/mm/init.c | 1 - arch/s390/mm/init.c | 2 -- arch/sh/mm/init.c | 7 ------- arch/sparc/mm/init_32.c | 1 - arch/sparc/mm/init_64.c | 2 -- arch/um/kernel/um_arch.c | 1 - arch/x86/kernel/setup.c | 2 -- arch/x86/mm/init_32.c | 3 --- arch/x86/mm/numa_32.c | 3 --- arch/xtensa/mm/init.c | 2 -- mm/memory.c | 8 -------- mm/mm_init.c | 30 ++++++++++++++++++++++++++++++ mm/nommu.c | 2 -- 25 files changed, 30 insertions(+), 62 deletions(-) diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index ec0eeae9c653..3ab2d2f3c917 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -276,7 +276,6 @@ srm_paging_stop (void) void __init mem_init(void) { - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); memblock_free_all(); } diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 7ef883d58dc1..05025122e965 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -150,8 +150,6 @@ void __init setup_arch_memory(void) */ max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn; - high_memory = (void *)(min_high_pfn << PAGE_SHIFT); - arch_pfn_offset = min(min_low_pfn, min_high_pfn); kmap_init(); #endif /* CONFIG_HIGHMEM */ diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index ccdef53872a0..53a0b105890b 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -309,8 +309,6 @@ void __init arm64_memblock_init(void) } early_init_fdt_scan_reserved_mem(); - - high_memory = __va(memblock_end_of_DRAM() - 1) + 1; } void __init bootmem_init(void) diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index ba6694d6170a..a22801aa503a 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -47,7 +47,6 @@ void __init mem_init(void) #ifdef CONFIG_HIGHMEM unsigned long tmp; #endif - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c index 508bb6a8dcc9..d412c2314509 100644 --- a/arch/hexagon/mm/init.c +++ b/arch/hexagon/mm/init.c @@ -100,12 +100,6 @@ static void __init paging_init(void) * initial kernel segment table's physical address. */ init_mm.context.ptbase = __pa(init_mm.pgd); - - /* - * Start of high memory area. Will probably need something more - * fancy if we... get more fancy. - */ - high_memory = (void *)((bootmem_lastpg + 1) << PAGE_SHIFT); } #ifndef DMA_RESERVE diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index 84fe7f854820..8eb489725b1a 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -389,7 +389,6 @@ void __init paging_init(void) void __init mem_init(void) { - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); } diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 00449df50db1..6affa3609188 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -78,8 +78,6 @@ void __init paging_init(void) void __init mem_init(void) { - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - memblock_free_all(); } #endif /* !CONFIG_NUMA */ diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 857cd2b44bcf..7e2e342e84c5 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -120,8 +120,6 @@ void __init setup_memory(void) void __init mem_init(void) { - high_memory = (void *)__va(memory_start + lowmem_size - 1); - /* this will put all memory onto the freelists */ memblock_free_all(); #ifdef CONFIG_HIGHMEM diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index eb61a73520a0..ed9dde6a00f7 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -417,7 +417,6 @@ void __init paging_init(void) max_zone_pfns[ZONE_HIGHMEM] = max_low_pfn; } #endif - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); free_area_init(max_zone_pfns); } @@ -469,7 +468,6 @@ void __init mem_init(void) #else /* CONFIG_NUMA */ void __init mem_init(void) { - high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); setup_zero_pages(); /* This comes from node 0 */ memblock_free_all(); } diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c index 3cafa87ead9e..4ba8dfa0d238 100644 --- a/arch/nios2/mm/init.c +++ b/arch/nios2/mm/init.c @@ -62,12 +62,6 @@ void __init paging_init(void) void __init mem_init(void) { - unsigned long end_mem = memory_end; /* this must not include - kernel stack at top */ - - end_mem &= PAGE_MASK; - high_memory = __va(end_mem); - /* this will put all memory onto the freelists */ memblock_free_all(); } diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index 9093c336e158..72c5952607ac 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -193,8 +193,6 @@ void __init mem_init(void) { BUG_ON(!mem_map); - high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); - /* clear the zero-page */ memset((void *)empty_zero_page, 0, PAGE_SIZE); diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 2cdfc0b1195c..4fbe354dc9b4 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -562,7 +562,6 @@ void __init mem_init(void) BUILD_BUG_ON(TMPALIAS_MAP_START >= 0x80000000); #endif - high_memory = __va((max_pfn << PAGE_SHIFT)); memblock_free_all(); #ifdef CONFIG_PA11 diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 157c9ca51541..ac6d41e86243 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -295,7 +295,6 @@ static void __init setup_bootmem(void) phys_ram_end = memblock_end_of_DRAM(); min_low_pfn = PFN_UP(phys_ram_base); max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); - high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ad567e2100b7..4bd6f316d71f 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -159,8 +159,6 @@ void __init mem_init(void) cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(0, mm_cpumask(&init_mm)); - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - pv_init(); kfence_split_mapping(); diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 72aea5cd1b85..6d459ffba4bc 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -330,13 +330,6 @@ unsigned int mem_init_done = 0; void __init mem_init(void) { - pg_data_t *pgdat; - - high_memory = NULL; - for_each_online_pgdat(pgdat) - high_memory = max_t(void *, high_memory, - __va(pgdat_end_pfn(pgdat) << PAGE_SHIFT)); - memblock_free_all(); /* Set this up early, so we can take care of the zero page */ diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index 6b58da14edc6..81a468a9c223 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -275,7 +275,6 @@ void __init mem_init(void) taint_real_pages(); - high_memory = __va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); for (i = 0; sp_banks[i].num_bytes != 0; i++) { diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 05882bca5b73..34d46adb9571 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2505,8 +2505,6 @@ static void __init register_page_bootmem_info(void) } void __init mem_init(void) { - high_memory = __va(last_valid_pfn << PAGE_SHIFT); - memblock_free_all(); /* diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 6414cbf00572..f24a3ce37ab7 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -385,7 +385,6 @@ int __init linux_main(int argc, char **argv, char **envp) high_physmem = uml_physmem + physmem_size; end_iomem = high_physmem + iomem_size; - high_memory = (void *) end_iomem; start_vm = VMALLOC_START; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ff8604007b08..74ac686d441a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -972,8 +972,6 @@ void __init setup_arch(char **cmdline_p) max_low_pfn = e820__end_of_low_ram_pfn(); else max_low_pfn = max_pfn; - - high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; #endif /* Find and reserve MPTABLE area */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 6d2f8cb9451e..801b659ead0c 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -643,9 +643,6 @@ void __init initmem_init(void) highstart_pfn = max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; -#else - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 65fda406e6f2..442ef3facff0 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -41,9 +41,6 @@ void __init initmem_init(void) highstart_pfn = max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; -#else - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(max_low_pfn)); diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 9f1b0d5fccc7..9b662477b3d4 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -164,8 +164,6 @@ void __init mem_init(void) { free_highpages(); - high_memory = (void *)__va(max_low_pfn << PAGE_SHIFT); - memblock_free_all(); } diff --git a/mm/memory.c b/mm/memory.c index a1d7664855f2..3900225d99c5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -113,14 +113,6 @@ static __always_inline bool vmf_orig_pte_uffd_wp(struct vm_fault *vmf) return pte_marker_uffd_wp(vmf->orig_pte); } -/* - * A number of key systems in x86 including ioremap() rely on the assumption - * that high_memory defines the upper bound on direct map memory, then end - * of ZONE_NORMAL. - */ -void *high_memory; -EXPORT_SYMBOL(high_memory); - /* * Randomize the address space (stacks, mmaps, brk, etc.). * diff --git a/mm/mm_init.c b/mm/mm_init.c index 7fd48d2d5064..bd7071c32a44 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -45,6 +45,13 @@ struct page *mem_map; EXPORT_SYMBOL(mem_map); #endif +/* + * high_memory defines the upper bound on direct map memory, then end + * of ZONE_NORMAL. + */ +void *high_memory; +EXPORT_SYMBOL(high_memory); + #ifdef CONFIG_DEBUG_MEMORY_INIT int __meminitdata mminit_loglevel; @@ -1778,6 +1785,27 @@ static bool arch_has_descending_max_zone_pfns(void) return IS_ENABLED(CONFIG_ARC) && !IS_ENABLED(CONFIG_ARC_HAS_PAE40); } +static void set_high_memory(void) +{ + phys_addr_t highmem = memblock_end_of_DRAM(); + + /* + * Some architectures (e.g. ARM) set high_memory very early and + * use it in arch setup code. + * If an architecture already set high_memory don't overwrite it + */ + if (high_memory) + return; + +#ifdef CONFIG_HIGHMEM + if (arch_has_descending_max_zone_pfns() || + highmem > PFN_PHYS(arch_zone_lowest_possible_pfn[ZONE_HIGHMEM])) + highmem = PFN_PHYS(arch_zone_lowest_possible_pfn[ZONE_HIGHMEM]); +#endif + + high_memory = phys_to_virt(highmem - 1) + 1; +} + /** * free_area_init - Initialise all pg_data_t and zone data * @max_zone_pfn: an array of max PFNs for each zone @@ -1900,6 +1928,8 @@ void __init free_area_init(unsigned long *max_zone_pfn) /* disable hash distribution for systems with a single node */ fixup_hashdist(); + + set_high_memory(); } /** diff --git a/mm/nommu.c b/mm/nommu.c index 43751726f977..15a396ce2553 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -42,8 +42,6 @@ #include #include "internal.h" -void *high_memory; -EXPORT_SYMBOL(high_memory); unsigned long highest_memmap_pfn; int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; int heap_stack_gap = 0; -- 2.50.1 From 6faea3422e3b4e8de44a55aa3e6e843320da66d2 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Thu, 13 Mar 2025 15:50:01 +0200 Subject: [PATCH 16/16] arch, mm: streamline HIGHMEM freeing All architectures that support HIGHMEM have their code that frees high memory pages to the buddy allocator while __free_memory_core() is limited to freeing only low memory. There is no actual reason for that. The memory map is completely ready by the time memblock_free_all() is called and high pages can be released to the buddy allocator along with low memory. Remove low memory limit from __free_memory_core() and drop per-architecture code that frees high memory pages. Link: https://lkml.kernel.org/r/20250313135003.836600-12-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Dave Hansen [x86] Tested-by: Mark Brown Cc: Alexander Gordeev Cc: Andreas Larsson Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Betkov Cc: Catalin Marinas Cc: David S. Miller Cc: Dinh Nguyen Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Guo Ren (csky) Cc: Heiko Carstens Cc: Helge Deller Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiaxun Yang Cc: Johannes Berg Cc: John Paul Adrian Glaubitz Cc: Madhavan Srinivasan Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Richard Weinberger Cc: Russel King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleinxer Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arc/mm/init.c | 6 +----- arch/arm/mm/init.c | 29 ----------------------------- arch/csky/mm/init.c | 14 -------------- arch/microblaze/mm/init.c | 16 ---------------- arch/mips/mm/init.c | 20 -------------------- arch/powerpc/mm/mem.c | 14 -------------- arch/sparc/mm/init_32.c | 25 ------------------------- arch/x86/include/asm/highmem.h | 3 --- arch/x86/include/asm/numa.h | 4 ---- arch/x86/include/asm/numa_32.h | 13 ------------- arch/x86/mm/Makefile | 2 -- arch/x86/mm/highmem_32.c | 34 ---------------------------------- arch/x86/mm/init_32.c | 28 ---------------------------- arch/xtensa/mm/init.c | 29 ----------------------------- include/linux/mm.h | 1 - mm/memblock.c | 3 +-- 16 files changed, 2 insertions(+), 239 deletions(-) delete mode 100644 arch/x86/include/asm/numa_32.h delete mode 100644 arch/x86/mm/highmem_32.c diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 05025122e965..11ce638731c9 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -160,11 +160,7 @@ void __init setup_arch_memory(void) static void __init highmem_init(void) { #ifdef CONFIG_HIGHMEM - unsigned long tmp; - memblock_phys_free(high_mem_start, high_mem_sz); - for (tmp = min_high_pfn; tmp < max_high_pfn; tmp++) - free_highmem_page(pfn_to_page(tmp)); #endif } @@ -176,8 +172,8 @@ static void __init highmem_init(void) */ void __init mem_init(void) { - memblock_free_all(); highmem_init(); + memblock_free_all(); BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE); BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE); diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index d4bcc745a044..7bb5ce02b9b5 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -237,33 +237,6 @@ static inline void poison_init_mem(void *s, size_t count) *p++ = 0xe7fddef0; } -static void __init free_highpages(void) -{ -#ifdef CONFIG_HIGHMEM - unsigned long max_low = max_low_pfn; - phys_addr_t range_start, range_end; - u64 i; - - /* set highmem page free */ - for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, - &range_start, &range_end, NULL) { - unsigned long start = PFN_UP(range_start); - unsigned long end = PFN_DOWN(range_end); - - /* Ignore complete lowmem entries */ - if (end <= max_low) - continue; - - /* Truncate partial highmem entries */ - if (start < max_low) - start = max_low; - - for (; start < end; start++) - free_highmem_page(pfn_to_page(start)); - } -#endif -} - /* * mem_init() marks the free areas in the mem_map and tells us how much * memory is free. This is done after various parts of the system have @@ -283,8 +256,6 @@ void __init mem_init(void) /* this will put all unused low memory onto the freelists */ memblock_free_all(); - free_highpages(); - /* * Check boundaries twice: Some fundamental inconsistencies can * be detected at build time already. diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index a22801aa503a..3914c2b873da 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -44,21 +44,7 @@ EXPORT_SYMBOL(empty_zero_page); void __init mem_init(void) { -#ifdef CONFIG_HIGHMEM - unsigned long tmp; -#endif - memblock_free_all(); - -#ifdef CONFIG_HIGHMEM - for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { - struct page *page = pfn_to_page(tmp); - - /* FIXME not sure about */ - if (!memblock_is_reserved(tmp << PAGE_SHIFT)) - free_highmem_page(page); - } -#endif } void free_initmem(void) diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 7e2e342e84c5..3e664e0efc33 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -52,19 +52,6 @@ static void __init highmem_init(void) map_page(PKMAP_BASE, 0, 0); /* XXX gross */ pkmap_page_table = virt_to_kpte(PKMAP_BASE); } - -static void __meminit highmem_setup(void) -{ - unsigned long pfn; - - for (pfn = max_low_pfn; pfn < max_pfn; ++pfn) { - struct page *page = pfn_to_page(pfn); - - /* FIXME not sure about */ - if (!memblock_is_reserved(pfn << PAGE_SHIFT)) - free_highmem_page(page); - } -} #endif /* CONFIG_HIGHMEM */ /* @@ -122,9 +109,6 @@ void __init mem_init(void) { /* this will put all memory onto the freelists */ memblock_free_all(); -#ifdef CONFIG_HIGHMEM - highmem_setup(); -#endif mem_init_done = 1; } diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index ed9dde6a00f7..075177e817ac 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -425,25 +425,6 @@ void __init paging_init(void) static struct kcore_list kcore_kseg0; #endif -static inline void __init mem_init_free_highmem(void) -{ -#ifdef CONFIG_HIGHMEM - unsigned long tmp; - - if (cpu_has_dc_aliases) - return; - - for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { - struct page *page = pfn_to_page(tmp); - - if (!memblock_is_memory(PFN_PHYS(tmp))) - SetPageReserved(page); - else - free_highmem_page(page); - } -#endif -} - void __init mem_init(void) { /* @@ -454,7 +435,6 @@ void __init mem_init(void) maar_init(); setup_zero_pages(); /* Setup zeroed pages. */ - mem_init_free_highmem(); memblock_free_all(); #ifdef CONFIG_64BIT diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index c7708c8fad29..1bc94bca9944 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -297,20 +297,6 @@ void __init mem_init(void) memblock_free_all(); -#ifdef CONFIG_HIGHMEM - { - unsigned long pfn, highmem_mapnr; - - highmem_mapnr = lowmem_end_addr >> PAGE_SHIFT; - for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { - phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT; - struct page *page = pfn_to_page(pfn); - if (memblock_is_memory(paddr) && !memblock_is_reserved(paddr)) - free_highmem_page(page); - } - } -#endif /* CONFIG_HIGHMEM */ - #if defined(CONFIG_PPC_E500) && !defined(CONFIG_SMP) /* * If smp is enabled, next_tlbcam_idx is initialized in the cpu up diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index 81a468a9c223..043e9b6fadd0 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -232,18 +232,6 @@ static void __init taint_real_pages(void) } } -static void map_high_region(unsigned long start_pfn, unsigned long end_pfn) -{ - unsigned long tmp; - -#ifdef CONFIG_DEBUG_HIGHMEM - printk("mapping high region %08lx - %08lx\n", start_pfn, end_pfn); -#endif - - for (tmp = start_pfn; tmp < end_pfn; tmp++) - free_highmem_page(pfn_to_page(tmp)); -} - void __init mem_init(void) { int i; @@ -276,19 +264,6 @@ void __init mem_init(void) taint_real_pages(); memblock_free_all(); - - for (i = 0; sp_banks[i].num_bytes != 0; i++) { - unsigned long start_pfn = sp_banks[i].base_addr >> PAGE_SHIFT; - unsigned long end_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT; - - if (end_pfn <= highstart_pfn) - continue; - - if (start_pfn < highstart_pfn) - start_pfn = highstart_pfn; - - map_high_region(start_pfn, end_pfn); - } } void sparc_flush_page_to_ram(struct page *page) diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index 731ee7cc40a5..585bdadba47d 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -69,9 +69,6 @@ extern unsigned long highstart_pfn, highend_pfn; arch_flush_lazy_mmu_mode(); \ } while (0) -extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, - unsigned long end_pfn); - #endif /* __KERNEL__ */ #endif /* _ASM_X86_HIGHMEM_H */ diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 5469d7a7c40f..53ba39ce010c 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -41,10 +41,6 @@ static inline int numa_cpu_node(int cpu) } #endif /* CONFIG_NUMA */ -#ifdef CONFIG_X86_32 -# include -#endif - #ifdef CONFIG_NUMA extern void numa_set_node(int cpu, int node); extern void numa_clear_node(int cpu); diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h deleted file mode 100644 index 9c8e9e85be77..000000000000 --- a/arch/x86/include/asm/numa_32.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_NUMA_32_H -#define _ASM_X86_NUMA_32_H - -#ifdef CONFIG_HIGHMEM -extern void set_highmem_pages_init(void); -#else -static inline void set_highmem_pages_init(void) -{ -} -#endif - -#endif /* _ASM_X86_NUMA_32_H */ diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index e0c99a8760ca..32035d5be5a0 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -42,8 +42,6 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP) += dump_pagetables.o obj-$(CONFIG_PTDUMP_DEBUGFS) += debug_pagetables.o -obj-$(CONFIG_HIGHMEM) += highmem_32.o - KASAN_SANITIZE_kasan_init_$(BITS).o := n obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c deleted file mode 100644 index d9efa35711ee..000000000000 --- a/arch/x86/mm/highmem_32.c +++ /dev/null @@ -1,34 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include -#include -#include /* for totalram_pages */ -#include -#include - -void __init set_highmem_pages_init(void) -{ - struct zone *zone; - int nid; - - /* - * Explicitly reset zone->managed_pages because set_highmem_pages_init() - * is invoked before memblock_free_all() - */ - reset_all_zones_managed_pages(); - for_each_zone(zone) { - unsigned long zone_start_pfn, zone_end_pfn; - - if (!is_highmem(zone)) - continue; - - zone_start_pfn = zone->zone_start_pfn; - zone_end_pfn = zone_start_pfn + zone->spanned_pages; - - nid = zone_to_nid(zone); - printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n", - zone->name, nid, zone_start_pfn, zone_end_pfn); - - add_highpages_with_active_regions(nid, zone_start_pfn, - zone_end_pfn); - } -} diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 801b659ead0c..9ee8ec2bc5d1 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -394,23 +394,6 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = virt_to_kpte(vaddr); } - -void __init add_highpages_with_active_regions(int nid, - unsigned long start_pfn, unsigned long end_pfn) -{ - phys_addr_t start, end; - u64 i; - - for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &start, &end, NULL) { - unsigned long pfn = clamp_t(unsigned long, PFN_UP(start), - start_pfn, end_pfn); - unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end), - start_pfn, end_pfn); - for ( ; pfn < e_pfn; pfn++) - if (pfn_valid(pfn)) - free_highmem_page(pfn_to_page(pfn)); - } -} #else static inline void permanent_kmaps_init(pgd_t *pgd_base) { @@ -715,17 +698,6 @@ void __init mem_init(void) #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); #endif - /* - * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to - * be done before memblock_free_all(). Memblock use free low memory for - * temporary data (see find_range_array()) and for this purpose can use - * pages that was already passed to the buddy allocator, hence marked as - * not accessible in the page tables when compiled with - * CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not - * important here. - */ - set_highmem_pages_init(); - /* this will put all low memory onto the freelists */ memblock_free_all(); diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 9b662477b3d4..47ecbe28263e 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -129,41 +129,12 @@ void __init zones_init(void) print_vm_layout(); } -static void __init free_highpages(void) -{ -#ifdef CONFIG_HIGHMEM - unsigned long max_low = max_low_pfn; - phys_addr_t range_start, range_end; - u64 i; - - /* set highmem page free */ - for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, - &range_start, &range_end, NULL) { - unsigned long start = PFN_UP(range_start); - unsigned long end = PFN_DOWN(range_end); - - /* Ignore complete lowmem entries */ - if (end <= max_low) - continue; - - /* Truncate partial highmem entries */ - if (start < max_low) - start = max_low; - - for (; start < end; start++) - free_highmem_page(pfn_to_page(start)); - } -#endif -} - /* * Initialize memory pages. */ void __init mem_init(void) { - free_highpages(); - memblock_free_all(); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 8c4cb8c28507..6c519a5098d4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3275,7 +3275,6 @@ extern void reserve_bootmem_region(phys_addr_t start, /* Free the reserved page into the buddy system, so it gets managed. */ void free_reserved_page(struct page *page); -#define free_highmem_page(page) free_reserved_page(page) static inline void mark_page_reserved(struct page *page) { diff --git a/mm/memblock.c b/mm/memblock.c index 95af35fd1389..64ae678cd1d1 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2164,8 +2164,7 @@ static unsigned long __init __free_memory_core(phys_addr_t start, phys_addr_t end) { unsigned long start_pfn = PFN_UP(start); - unsigned long end_pfn = min_t(unsigned long, - PFN_DOWN(end), max_low_pfn); + unsigned long end_pfn = PFN_DOWN(end); if (start_pfn >= end_pfn) return 0; -- 2.50.1