From 14cb5d83068ecf15d2da6f7d0e9ea9edbcbc0457 Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Fri, 7 Mar 2025 00:28:46 +0000 Subject: [PATCH 01/16] x86/amd_nb: Use rdmsr_safe() in amd_get_mmconfig_range() Xen doesn't offer MSR_FAM10H_MMIO_CONF_BASE to all guests. This results in the following warning: unchecked MSR access error: RDMSR from 0xc0010058 at rIP: 0xffffffff8101d19f (xen_do_read_msr+0x7f/0xa0) Call Trace: xen_read_msr+0x1e/0x30 amd_get_mmconfig_range+0x2b/0x80 quirk_amd_mmconfig_area+0x28/0x100 pnp_fixup_device+0x39/0x50 __pnp_add_device+0xf/0x150 pnp_add_device+0x3d/0x100 pnpacpi_add_device_handler+0x1f9/0x280 acpi_ns_get_device_callback+0x104/0x1c0 acpi_ns_walk_namespace+0x1d0/0x260 acpi_get_devices+0x8a/0xb0 pnpacpi_init+0x50/0x80 do_one_initcall+0x46/0x2e0 kernel_init_freeable+0x1da/0x2f0 kernel_init+0x16/0x1b0 ret_from_fork+0x30/0x50 ret_from_fork_asm+0x1b/0x30 based on quirks for a "PNP0c01" device. Treating MMCFG as disabled is the right course of action, so no change is needed there. This was most likely exposed by fixing the Xen MSR accessors to not be silently-safe. Fixes: 3fac3734c43a ("xen/pv: support selecting safe/unsafe msr accesses") Signed-off-by: Andrew Cooper Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250307002846.3026685-1-andrew.cooper3@citrix.com --- arch/x86/kernel/amd_nb.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 11fac09e3a8c..67e773744edb 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -143,7 +143,6 @@ bool __init early_is_amd_nb(u32 device) struct resource *amd_get_mmconfig_range(struct resource *res) { - u32 address; u64 base, msr; unsigned int segn_busn_bits; @@ -151,13 +150,11 @@ struct resource *amd_get_mmconfig_range(struct resource *res) boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) return NULL; - /* assume all cpus from fam10h have mmconfig */ - if (boot_cpu_data.x86 < 0x10) + /* Assume CPUs from Fam10h have mmconfig, although not all VMs do */ + if (boot_cpu_data.x86 < 0x10 || + rdmsrl_safe(MSR_FAM10H_MMIO_CONF_BASE, &msr)) return NULL; - address = MSR_FAM10H_MMIO_CONF_BASE; - rdmsrl(address, msr); - /* mmconfig is not enabled */ if (!(msr & FAM10H_MMIO_CONF_ENABLE)) return NULL; -- 2.51.0 From ac7c06acaa3738b38e83815ac0f07140ad320f13 Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Thu, 6 Mar 2025 19:17:21 +1100 Subject: [PATCH 02/16] virt: sev-guest: Allocate request data dynamically Commit ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex") narrowed the command mutex scope to snp_send_guest_request(). However, GET_REPORT, GET_DERIVED_KEY, and GET_EXT_REPORT share the req structure in snp_guest_dev. Without the mutex protection, concurrent requests can overwrite each other's data. Fix it by dynamically allocating the request structure. Fixes: ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex") Closes: https://github.com/AMDESE/AMDSEV/issues/265 Reported-by: andreas.stuehrk@yaxi.tech Signed-off-by: Nikunj A Dadhania Signed-off-by: Alexey Kardashevskiy Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250307013700.437505-2-aik@amd.com --- drivers/virt/coco/sev-guest/sev-guest.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 264b6523fe52..23ac177472be 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -38,12 +38,6 @@ struct snp_guest_dev { struct miscdevice misc; struct snp_msg_desc *msg_desc; - - union { - struct snp_report_req report; - struct snp_derived_key_req derived_key; - struct snp_ext_report_req ext_report; - } req; }; /* @@ -71,7 +65,7 @@ struct snp_req_resp { static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) { - struct snp_report_req *report_req = &snp_dev->req.report; + struct snp_report_req *report_req __free(kfree) = NULL; struct snp_msg_desc *mdesc = snp_dev->msg_desc; struct snp_report_resp *report_resp; struct snp_guest_req req = {}; @@ -80,6 +74,10 @@ static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_io if (!arg->req_data || !arg->resp_data) return -EINVAL; + report_req = kzalloc(sizeof(*report_req), GFP_KERNEL_ACCOUNT); + if (!report_req) + return -ENOMEM; + if (copy_from_user(report_req, (void __user *)arg->req_data, sizeof(*report_req))) return -EFAULT; @@ -116,7 +114,7 @@ e_free: static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) { - struct snp_derived_key_req *derived_key_req = &snp_dev->req.derived_key; + struct snp_derived_key_req *derived_key_req __free(kfree) = NULL; struct snp_derived_key_resp derived_key_resp = {0}; struct snp_msg_desc *mdesc = snp_dev->msg_desc; struct snp_guest_req req = {}; @@ -136,6 +134,10 @@ static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_reque if (sizeof(buf) < resp_len) return -ENOMEM; + derived_key_req = kzalloc(sizeof(*derived_key_req), GFP_KERNEL_ACCOUNT); + if (!derived_key_req) + return -ENOMEM; + if (copy_from_user(derived_key_req, (void __user *)arg->req_data, sizeof(*derived_key_req))) return -EFAULT; @@ -168,7 +170,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques struct snp_req_resp *io) { - struct snp_ext_report_req *report_req = &snp_dev->req.ext_report; + struct snp_ext_report_req *report_req __free(kfree) = NULL; struct snp_msg_desc *mdesc = snp_dev->msg_desc; struct snp_report_resp *report_resp; struct snp_guest_req req = {}; @@ -178,6 +180,10 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques if (sockptr_is_null(io->req_data) || sockptr_is_null(io->resp_data)) return -EINVAL; + report_req = kzalloc(sizeof(*report_req), GFP_KERNEL_ACCOUNT); + if (!report_req) + return -ENOMEM; + if (copy_from_sockptr(report_req, io->req_data, sizeof(*report_req))) return -EFAULT; -- 2.51.0 From 3e385c0d6ce88ac9916dcf84267bd5855d830748 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 7 Mar 2025 12:37:00 +1100 Subject: [PATCH 03/16] virt: sev-guest: Move SNP Guest Request data pages handling under snp_cmd_mutex Compared to the SNP Guest Request, the "Extended" version adds data pages for receiving certificates. If not enough pages provided, the HV can report to the VM how much is needed so the VM can reallocate and repeat. Commit ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex") moved handling of the allocated/desired pages number out of scope of said mutex and create a possibility for a race (multiple instances trying to trigger Extended request in a VM) as there is just one instance of snp_msg_desc per /dev/sev-guest and no locking other than snp_cmd_mutex. Fix the issue by moving the data blob/size and the GHCB input struct (snp_req_data) into snp_guest_req which is allocated on stack now and accessed by the GHCB caller under that mutex. Stop allocating SEV_FW_BLOB_MAX_SIZE in snp_msg_alloc() as only one of four callers needs it. Free the received blob in get_ext_report() right after it is copied to the userspace. Possible future users of snp_send_guest_request() are likely to have different ideas about the buffer size anyways. Fixes: ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex") Signed-off-by: Alexey Kardashevskiy Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Nikunj A Dadhania Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250307013700.437505-3-aik@amd.com --- arch/x86/coco/sev/core.c | 23 ++++++----------- arch/x86/include/asm/sev.h | 6 ++--- drivers/virt/coco/sev-guest/sev-guest.c | 34 ++++++++++++++++++++----- 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 82492efc5d94..96c7bc698e6b 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -2853,19 +2853,8 @@ struct snp_msg_desc *snp_msg_alloc(void) if (!mdesc->response) goto e_free_request; - mdesc->certs_data = alloc_shared_pages(SEV_FW_BLOB_MAX_SIZE); - if (!mdesc->certs_data) - goto e_free_response; - - /* initial the input address for guest request */ - mdesc->input.req_gpa = __pa(mdesc->request); - mdesc->input.resp_gpa = __pa(mdesc->response); - mdesc->input.data_gpa = __pa(mdesc->certs_data); - return mdesc; -e_free_response: - free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); e_free_request: free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); e_unmap: @@ -2885,7 +2874,6 @@ void snp_msg_free(struct snp_msg_desc *mdesc) kfree(mdesc->ctx); free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); - free_shared_pages(mdesc->certs_data, SEV_FW_BLOB_MAX_SIZE); iounmap((__force void __iomem *)mdesc->secrets); memset(mdesc, 0, sizeof(*mdesc)); @@ -3054,7 +3042,7 @@ retry_request: * sequence number must be incremented or the VMPCK must be deleted to * prevent reuse of the IV. */ - rc = snp_issue_guest_request(req, &mdesc->input, rio); + rc = snp_issue_guest_request(req, &req->input, rio); switch (rc) { case -ENOSPC: /* @@ -3064,7 +3052,7 @@ retry_request: * order to increment the sequence number and thus avoid * IV reuse. */ - override_npages = mdesc->input.data_npages; + override_npages = req->input.data_npages; req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; /* @@ -3120,7 +3108,7 @@ retry_request: } if (override_npages) - mdesc->input.data_npages = override_npages; + req->input.data_npages = override_npages; return rc; } @@ -3158,6 +3146,11 @@ int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req */ memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request)); + /* Initialize the input address for guest request */ + req->input.req_gpa = __pa(mdesc->request); + req->input.resp_gpa = __pa(mdesc->response); + req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0; + rc = __handle_guest_request(mdesc, req, rio); if (rc) { if (rc == -EIO && diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 1581246491b5..ba7999f66abe 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -203,6 +203,9 @@ struct snp_guest_req { unsigned int vmpck_id; u8 msg_version; u8 msg_type; + + struct snp_req_data input; + void *certs_data; }; /* @@ -263,9 +266,6 @@ struct snp_msg_desc { struct snp_guest_msg secret_request, secret_response; struct snp_secrets_page *secrets; - struct snp_req_data input; - - void *certs_data; struct aesgcm_ctx *ctx; diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 23ac177472be..70fbc9a3e703 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -176,6 +176,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques struct snp_guest_req req = {}; int ret, npages = 0, resp_len; sockptr_t certs_address; + struct page *page; if (sockptr_is_null(io->req_data) || sockptr_is_null(io->resp_data)) return -EINVAL; @@ -209,8 +210,20 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques * the host. If host does not supply any certs in it, then copy * zeros to indicate that certificate data was not provided. */ - memset(mdesc->certs_data, 0, report_req->certs_len); npages = report_req->certs_len >> PAGE_SHIFT; + page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, + get_order(report_req->certs_len)); + if (!page) + return -ENOMEM; + + req.certs_data = page_address(page); + ret = set_memory_decrypted((unsigned long)req.certs_data, npages); + if (ret) { + pr_err("failed to mark page shared, ret=%d\n", ret); + __free_pages(page, get_order(report_req->certs_len)); + return -EFAULT; + } + cmd: /* * The intermediate response buffer is used while decrypting the @@ -219,10 +232,12 @@ cmd: */ resp_len = sizeof(report_resp->data) + mdesc->ctx->authsize; report_resp = kzalloc(resp_len, GFP_KERNEL_ACCOUNT); - if (!report_resp) - return -ENOMEM; + if (!report_resp) { + ret = -ENOMEM; + goto e_free_data; + } - mdesc->input.data_npages = npages; + req.input.data_npages = npages; req.msg_version = arg->msg_version; req.msg_type = SNP_MSG_REPORT_REQ; @@ -237,7 +252,7 @@ cmd: /* If certs length is invalid then copy the returned length */ if (arg->vmm_error == SNP_GUEST_VMM_ERR_INVALID_LEN) { - report_req->certs_len = mdesc->input.data_npages << PAGE_SHIFT; + report_req->certs_len = req.input.data_npages << PAGE_SHIFT; if (copy_to_sockptr(io->req_data, report_req, sizeof(*report_req))) ret = -EFAULT; @@ -246,7 +261,7 @@ cmd: if (ret) goto e_free; - if (npages && copy_to_sockptr(certs_address, mdesc->certs_data, report_req->certs_len)) { + if (npages && copy_to_sockptr(certs_address, req.certs_data, report_req->certs_len)) { ret = -EFAULT; goto e_free; } @@ -256,6 +271,13 @@ cmd: e_free: kfree(report_resp); +e_free_data: + if (npages) { + if (set_memory_encrypted((unsigned long)req.certs_data, npages)) + WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n"); + else + __free_pages(page, get_order(report_req->certs_len)); + } return ret; } -- 2.51.0 From 6914f7e2e25fac9d1d2b62c208eaa5f2bf810fe9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 6 Mar 2025 23:00:16 +0100 Subject: [PATCH 04/16] x86/mm: Define PTRS_PER_PMD for assembly code too Andy reported the following build warning from head_32.S: In file included from arch/x86/kernel/head_32.S:29: arch/x86/include/asm/pgtable_32.h:59:5: error: "PTRS_PER_PMD" is not defined, evaluates to 0 [-Werror=undef] 59 | #if PTRS_PER_PMD > 1 The reason is that on 2-level i386 paging the folded in PMD's PTRS_PER_PMD constant is not defined in assembly headers, only in generic MM C headers. Instead of trying to fish out the definition from the generic headers, just define it - it even has a comment for it already... Reported-by: Andy Shevchenko Tested-by: Andy Shevchenko Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/Z8oa8AUVyi2HWfo9@gmail.com --- arch/x86/include/asm/pgtable-2level_types.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index 7f6ccff0ba72..4a12c276b181 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h @@ -23,17 +23,17 @@ typedef union { #define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED /* - * traditional i386 two-level paging structure: + * Traditional i386 two-level paging structure: */ #define PGDIR_SHIFT 22 #define PTRS_PER_PGD 1024 - /* - * the i386 is two-level, so we don't really have any - * PMD directory physically. + * The i386 is two-level, so we don't really have any + * PMD directory physically: */ +#define PTRS_PER_PMD 1 #define PTRS_PER_PTE 1024 -- 2.51.0 From da64a2359092ceec4f9dea5b329d0aef20104217 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sat, 8 Mar 2025 13:50:45 +0800 Subject: [PATCH 05/16] LoongArch: Convert unreachable() to BUG() When compiling on LoongArch, there exists the following objtool warning in arch/loongarch/kernel/machine_kexec.o: kexec_reboot() falls through to next function crash_shutdown_secondary() Avoid using unreachable() as it can (and will in the absence of UBSAN) generate fall-through code. Use BUG() so we get a "break BRK_BUG" trap (with unreachable annotation). Cc: stable@vger.kernel.org # 6.12+ Acked-by: Josh Poimboeuf Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/machine_kexec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c index 8ae641dc53bb..f9381800e291 100644 --- a/arch/loongarch/kernel/machine_kexec.c +++ b/arch/loongarch/kernel/machine_kexec.c @@ -126,14 +126,14 @@ void kexec_reboot(void) /* All secondary cpus go to kexec_smp_wait */ if (smp_processor_id() > 0) { relocated_kexec_smp_wait(NULL); - unreachable(); + BUG(); } #endif do_kexec = (void *)reboot_code_buffer; do_kexec(efi_boot, cmdline_ptr, systable_ptr, start_addr, first_ind_entry); - unreachable(); + BUG(); } -- 2.51.0 From a0d3c8bcb9206ac207c7ad3182027c6b0a1319bb Mon Sep 17 00:00:00 2001 From: Yuli Wang Date: Sat, 8 Mar 2025 13:51:32 +0800 Subject: [PATCH 06/16] LoongArch: Eliminate superfluous get_numa_distances_cnt() In LoongArch, get_numa_distances_cnt() isn't in use, resulting in a compiler warning. Fix follow errors with clang-18 when W=1e: arch/loongarch/kernel/acpi.c:259:28: error: unused function 'get_numa_distances_cnt' [-Werror,-Wunused-function] 259 | static inline unsigned int get_numa_distances_cnt(struct acpi_table_slit *slit) | ^~~~~~~~~~~~~~~~~~~~~~ 1 error generated. Link: https://lore.kernel.org/all/Z7bHPVUH4lAezk0E@kernel.org/ Signed-off-by: Yuli Wang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/acpi.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 382a09a7152c..1120ac2824f6 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -249,18 +249,6 @@ static __init int setup_node(int pxm) return acpi_map_pxm_to_node(pxm); } -/* - * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for - * I/O localities since SRAT does not list them. I/O localities are - * not supported at this point. - */ -unsigned int numa_distance_cnt; - -static inline unsigned int get_numa_distances_cnt(struct acpi_table_slit *slit) -{ - return slit->locality_count; -} - void __init numa_set_distance(int from, int to, int distance) { if ((u8)distance != distance || (from == to && distance != LOCAL_DISTANCE)) { -- 2.51.0 From c9117434c8f7523f0b77db4c5766f5011cc94677 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 8 Mar 2025 13:51:32 +0800 Subject: [PATCH 07/16] LoongArch: Use polling play_dead() when resuming from hibernation When CONFIG_RANDOM_KMALLOC_CACHES or other randomization infrastructrue enabled, the idle_task's stack may different between the booting kernel and target kernel. So when resuming from hibernation, an ACTION_BOOT_CPU IPI wakeup the idle instruction in arch_cpu_idle_dead() and jump to the interrupt handler. But since the stack pointer is changed, the interrupt handler cannot restore correct context. So rename the current arch_cpu_idle_dead() to idle_play_dead(), make it as the default version of play_dead(), and the new arch_cpu_idle_dead() call play_dead() directly. For hibernation, implement an arch-specific hibernate_resume_nonboot_cpu_disable() to use the polling version (idle instruction is replace by nop, and irq is disabled) of play_dead(), i.e. poll_play_dead(), to avoid IPI handler corrupting the idle_task's stack when resuming from hibernation. This solution is a little similar to commit 406f992e4a372dafbe3c ("x86 / hibernate: Use hlt_play_dead() when resuming from hibernation"). Cc: stable@vger.kernel.org Tested-by: Erpeng Xu Tested-by: Yuli Wang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/smp.c | 47 ++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index fbf747447f13..4b24589c0b56 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -423,7 +424,7 @@ void loongson_cpu_die(unsigned int cpu) mb(); } -void __noreturn arch_cpu_idle_dead(void) +static void __noreturn idle_play_dead(void) { register uint64_t addr; register void (*init_fn)(void); @@ -447,6 +448,50 @@ void __noreturn arch_cpu_idle_dead(void) BUG(); } +#ifdef CONFIG_HIBERNATION +static void __noreturn poll_play_dead(void) +{ + register uint64_t addr; + register void (*init_fn)(void); + + idle_task_exit(); + __this_cpu_write(cpu_state, CPU_DEAD); + + __smp_mb(); + do { + __asm__ __volatile__("nop\n\t"); + addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0); + } while (addr == 0); + + init_fn = (void *)TO_CACHE(addr); + iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR); + + init_fn(); + BUG(); +} +#endif + +static void (*play_dead)(void) = idle_play_dead; + +void __noreturn arch_cpu_idle_dead(void) +{ + play_dead(); + BUG(); /* play_dead() doesn't return */ +} + +#ifdef CONFIG_HIBERNATION +int hibernate_resume_nonboot_cpu_disable(void) +{ + int ret; + + play_dead = poll_play_dead; + ret = suspend_disable_secondary_cpus(); + play_dead = idle_play_dead; + + return ret; +} +#endif + #endif /* -- 2.51.0 From c8477bb0a8e7f6b2e47952b403c5cb67a6929e55 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:51:32 +0800 Subject: [PATCH 08/16] LoongArch: Set max_pfn with the PFN of the last page The current max_pfn equals to zero. In this case, it causes user cannot get some page information through /proc filesystem such as kpagecount. The following message is displayed by stress-ng test suite with command "stress-ng --verbose --physpage 1 -t 1". # stress-ng --verbose --physpage 1 -t 1 stress-ng: error: [1691] physpage: cannot read page count for address 0x134ac000 in /proc/kpagecount, errno=22 (Invalid argument) stress-ng: error: [1691] physpage: cannot read page count for address 0x7ffff207c3a8 in /proc/kpagecount, errno=22 (Invalid argument) stress-ng: error: [1691] physpage: cannot read page count for address 0x134b0000 in /proc/kpagecount, errno=22 (Invalid argument) ... After applying this patch, the kernel can pass the test. # stress-ng --verbose --physpage 1 -t 1 stress-ng: debug: [1701] physpage: [1701] started (instance 0 on CPU 3) stress-ng: debug: [1701] physpage: [1701] exited (instance 0 on CPU 3) stress-ng: debug: [1700] physpage: [1701] terminated (success) Cc: stable@vger.kernel.org # 6.8+ Fixes: ff6c3d81f2e8 ("NUMA: optimize detection of memory with no node id assigned by firmware") Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kernel/setup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index edcfdfcad7d2..90cb3ca96f08 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -387,6 +387,9 @@ static void __init check_kernel_sections_mem(void) */ static void __init arch_mem_init(char **cmdline_p) { + /* Recalculate max_low_pfn for "mem=xxx" */ + max_pfn = max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); + if (usermem) pr_info("User-defined physical RAM map overwrite\n"); -- 2.51.0 From 3109d5ff484b7bc7b955f166974c6776d91f247b Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:51:32 +0800 Subject: [PATCH 09/16] LoongArch: Set hugetlb mmap base address aligned with pmd size With ltp test case "testcases/bin/hugefork02", there is a dmesg error report message such as: kernel BUG at mm/hugetlb.c:5550! Oops - BUG[#1]: CPU: 0 UID: 0 PID: 1517 Comm: hugefork02 Not tainted 6.14.0-rc2+ #241 Hardware name: QEMU QEMU Virtual Machine, BIOS unknown 2/2/2022 pc 90000000004eaf1c ra 9000000000485538 tp 900000010edbc000 sp 900000010edbf940 a0 900000010edbfb00 a1 9000000108d20280 a2 00007fffe9474000 a3 00007ffff3474000 a4 0000000000000000 a5 0000000000000003 a6 00000000003cadd3 a7 0000000000000000 t0 0000000001ffffff t1 0000000001474000 t2 900000010ecd7900 t3 00007fffe9474000 t4 00007fffe9474000 t5 0000000000000040 t6 900000010edbfb00 t7 0000000000000001 t8 0000000000000005 u0 90000000004849d0 s9 900000010edbfa00 s0 9000000108d20280 s1 00007fffe9474000 s2 0000000002000000 s3 9000000108d20280 s4 9000000002b38b10 s5 900000010edbfb00 s6 00007ffff3474000 s7 0000000000000406 s8 900000010edbfa08 ra: 9000000000485538 unmap_vmas+0x130/0x218 ERA: 90000000004eaf1c __unmap_hugepage_range+0x6f4/0x7d0 PRMD: 00000004 (PPLV0 +PIE -PWE) EUEN: 00000007 (+FPE +SXE +ASXE -BTE) ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) Process hugefork02 (pid: 1517, threadinfo=00000000a670eaf4, task=000000007a95fc64) Call Trace: [<90000000004eaf1c>] __unmap_hugepage_range+0x6f4/0x7d0 [<9000000000485534>] unmap_vmas+0x12c/0x218 [<9000000000494068>] exit_mmap+0xe0/0x308 [<900000000025fdc4>] mmput+0x74/0x180 [<900000000026a284>] do_exit+0x294/0x898 [<900000000026aa30>] do_group_exit+0x30/0x98 [<900000000027bed4>] get_signal+0x83c/0x868 [<90000000002457b4>] arch_do_signal_or_restart+0x54/0xfa0 [<90000000015795e8>] irqentry_exit_to_user_mode+0xb8/0x138 [<90000000002572d0>] tlb_do_page_fault_1+0x114/0x1b4 The problem is that base address allocated from hugetlbfs is not aligned with pmd size. Here add a checking for hugetlbfs and align base address with pmd size. After this patch the test case "testcases/bin/hugefork02" passes to run. This is similar to the commit 7f24cbc9c4d42db8a3c8484d1 ("mm/mmap: teach generic_get_unmapped_area{_topdown} to handle hugetlb mappings"). Cc: stable@vger.kernel.org # 6.13+ Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/mm/mmap.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c index 914e82ff3f65..1df9e99582cc 100644 --- a/arch/loongarch/mm/mmap.c +++ b/arch/loongarch/mm/mmap.c @@ -3,6 +3,7 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include +#include #include #include #include @@ -63,8 +64,11 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, } info.length = len; - info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0; info.align_offset = pgoff << PAGE_SHIFT; + if (filp && is_file_hugepages(filp)) + info.align_mask = huge_page_mask_align(filp); + else + info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0; if (dir == DOWN) { info.flags = VM_UNMAPPED_AREA_TOPDOWN; -- 2.51.0 From 6fb1867d5a44b0a061cf39d2492d23d314bcb8ce Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:51:59 +0800 Subject: [PATCH 10/16] LoongArch: KVM: Add interrupt checking for AVEC There is a newly added macro INT_AVEC with CSR ESTAT register, which is bit 14 used for LoongArch AVEC support. AVEC interrupt status bit 14 is supported with macro CSR_ESTAT_IS, so here replace the hard-coded value 0x1fff with macro CSR_ESTAT_IS so that the AVEC interrupt status is also supported by KVM. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 20f941af3e9e..9e1a9b4aa4c6 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -311,7 +311,7 @@ static int kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) { int ret = RESUME_GUEST; unsigned long estat = vcpu->arch.host_estat; - u32 intr = estat & 0x1fff; /* Ignore NMI */ + u32 intr = estat & CSR_ESTAT_IS; u32 ecode = (estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; vcpu->mode = OUTSIDE_GUEST_MODE; -- 2.51.0 From 78d7bc5a02e1468df53896df354fa80727f35b7d Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:52:01 +0800 Subject: [PATCH 11/16] LoongArch: KVM: Reload guest CSR registers after sleep On host, the HW guest CSR registers are lost after suspend and resume operation. Since last_vcpu of boot CPU still records latest vCPU pointer so that the guest CSR register skips to reload when boot CPU resumes and vCPU is scheduled. Here last_vcpu is cleared so that guest CSR registers will reload from scheduled vCPU context after suspend and resume. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index f6d3242b9234..b6864d6e5ec8 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -317,6 +317,13 @@ int kvm_arch_enable_virtualization_cpu(void) kvm_debug("GCFG:%lx GSTAT:%lx GINTC:%lx GTLBC:%lx", read_csr_gcfg(), read_csr_gstat(), read_csr_gintc(), read_csr_gtlbc()); + /* + * HW Guest CSR registers are lost after CPU suspend and resume. + * Clear last_vcpu so that Guest CSR registers forced to reload + * from vCPU SW state. + */ + this_cpu_ptr(vmcs)->last_vcpu = NULL; + return 0; } -- 2.51.0 From 6bdbb73dc8d99fbb77f5db79dbb6f108708090b4 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 8 Mar 2025 13:52:04 +0800 Subject: [PATCH 12/16] LoongArch: KVM: Fix GPA size issue about VM Physical address space is 48 bit on Loongson-3A5000 physical machine, however it is 47 bit for VM on Loongson-3A5000 system. Size of physical address space of VM is the same with the size of virtual user space (a half) of physical machine. Variable cpu_vabits represents user address space, kernel address space is not included (user space and kernel space are both a half of total). Here cpu_vabits, rather than cpu_vabits - 1, is to represent the size of guest physical address space. Also there is strict checking about page fault GPA address, inject error if it is larger than maximum GPA address of VM. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/exit.c | 6 ++++++ arch/loongarch/kvm/vm.c | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index c1e8ec5b941b..ea321403644a 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -669,6 +669,12 @@ static int kvm_handle_rdwr_fault(struct kvm_vcpu *vcpu, bool write) struct kvm_run *run = vcpu->run; unsigned long badv = vcpu->arch.badv; + /* Inject ADE exception if exceed max GPA size */ + if (unlikely(badv >= vcpu->kvm->arch.gpa_size)) { + kvm_queue_exception(vcpu, EXCCODE_ADE, EXSUBCODE_ADEM); + return RESUME_GUEST; + } + ret = kvm_handle_mm_fault(vcpu, badv, write); if (ret) { /* Treat as MMIO */ diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c index b8b3e1972d6e..edccfc8c9cd8 100644 --- a/arch/loongarch/kvm/vm.c +++ b/arch/loongarch/kvm/vm.c @@ -48,7 +48,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (kvm_pvtime_supported()) kvm->arch.pv_features |= BIT(KVM_FEATURE_STEAL_TIME); - kvm->arch.gpa_size = BIT(cpu_vabits - 1); + /* + * cpu_vabits means user address space only (a half of total). + * GPA size of VM is the same with the size of user address space. + */ + kvm->arch.gpa_size = BIT(cpu_vabits); kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1; kvm->arch.invalid_ptes[0] = 0; kvm->arch.invalid_ptes[1] = (unsigned long)invalid_pte_table; -- 2.51.0 From 058a6bec37c6c3b826158f6d26b75de43816a880 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 7 Mar 2025 23:02:56 +0100 Subject: [PATCH 13/16] x86/microcode/AMD: Add some forgotten models to the SHA check MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add some more forgotten models to the SHA check. Fixes: 50cef76d5cb0 ("x86/microcode/AMD: Load only SHA256-checksummed patches") Reported-by: Toralf Förster Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Tested-by: Toralf Förster Link: https://lore.kernel.org/r/20250307220256.11816-1-bp@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 95ac1c6a84fb..c69b1bc45483 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -175,23 +175,29 @@ static bool need_sha_check(u32 cur_rev) { switch (cur_rev >> 8) { case 0x80012: return cur_rev <= 0x800126f; break; + case 0x80082: return cur_rev <= 0x800820f; break; case 0x83010: return cur_rev <= 0x830107c; break; case 0x86001: return cur_rev <= 0x860010e; break; case 0x86081: return cur_rev <= 0x8608108; break; case 0x87010: return cur_rev <= 0x8701034; break; case 0x8a000: return cur_rev <= 0x8a0000a; break; + case 0xa0010: return cur_rev <= 0xa00107a; break; case 0xa0011: return cur_rev <= 0xa0011da; break; case 0xa0012: return cur_rev <= 0xa001243; break; + case 0xa0082: return cur_rev <= 0xa00820e; break; case 0xa1011: return cur_rev <= 0xa101153; break; case 0xa1012: return cur_rev <= 0xa10124e; break; case 0xa1081: return cur_rev <= 0xa108109; break; case 0xa2010: return cur_rev <= 0xa20102f; break; case 0xa2012: return cur_rev <= 0xa201212; break; + case 0xa4041: return cur_rev <= 0xa404109; break; + case 0xa5000: return cur_rev <= 0xa500013; break; case 0xa6012: return cur_rev <= 0xa60120a; break; case 0xa7041: return cur_rev <= 0xa704109; break; case 0xa7052: return cur_rev <= 0xa705208; break; case 0xa7080: return cur_rev <= 0xa708009; break; case 0xa70c0: return cur_rev <= 0xa70C009; break; + case 0xaa001: return cur_rev <= 0xaa00116; break; case 0xaa002: return cur_rev <= 0xaa00218; break; default: break; } -- 2.51.0 From 80e54e84911a923c40d7bee33a34c1b4be148d7a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Mar 2025 13:45:25 -1000 Subject: [PATCH 14/16] Linux 6.14-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6a8e5be6b004..1d6a9ec8a2ac 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From 6266f4a78131c795631440ea9c7b66cdfd399484 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 18 Feb 2025 23:18:55 +0200 Subject: [PATCH 15/16] drm/i915/cdclk: Do cdclk post plane programming later MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We currently call intel_set_cdclk_post_plane_update() far too early. When pipes are active during the reprogramming the current spot only works for the cd2x divider update case, as that is synchronize to the pipe's vblank. Squashing and crawling are not synchronized in any way, so doing the programming while the pipes/planes are potentially still using the old hardware state could lead to underruns. Move the post plane reprgramming to a spot where we know that the pipes/planes have switched over the new hardware state. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250218211913.27867-2-ville.syrjala@linux.intel.com Reviewed-by: Vinod Govindapillai (cherry picked from commit fb64f5568c0e0b5730733d70a012ae26b1a55815) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 41128469f12a..c9dcf2bbd4c7 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7830,9 +7830,6 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) intel_program_dpkgc_latency(state); - if (state->modeset) - intel_set_cdclk_post_plane_update(state); - intel_wait_for_vblank_workers(state); /* FIXME: We should call drm_atomic_helper_commit_hw_done() here @@ -7906,6 +7903,8 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) intel_verify_planes(state); intel_sagv_post_plane_update(state); + if (state->modeset) + intel_set_cdclk_post_plane_update(state); intel_pmdemand_post_plane_update(state); drm_atomic_helper_commit_hw_done(&state->base); -- 2.51.0 From a8045e46c508b70fe4b30cc020fd0a2b0709b2e5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Thu, 6 Mar 2025 13:08:27 -0800 Subject: [PATCH 16/16] drm/i915: Increase I915_PARAM_MMAP_GTT_VERSION version to indicate support for partial mmaps MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Commit 255fc1703e42 ("drm/i915/gem: Calculate object page offset for partial memory mapping") was the last patch of several patches fixing multiple partial mmaps. But without a bump in I915_PARAM_MMAP_GTT_VERSION there is no clean way for UMD to know if it can do multiple partial mmaps. Fixes: 255fc1703e42 ("drm/i915/gem: Calculate object page offset for partial memory mapping") Cc: Andi Shyti Cc: Nirmoy Das Cc: Lionel Landwerlin Signed-off-by: José Roberto de Souza Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20250306210827.171147-1-jose.souza@intel.com (cherry picked from commit bfef148f3680e6b9d28e7fca46d9520f80c5e50e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 21274aa9bddd..c3dabb857960 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -164,6 +164,9 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) * 4 - Support multiple fault handlers per object depending on object's * backing storage (a.k.a. MMAP_OFFSET). * + * 5 - Support multiple partial mmaps(mmap part of BO + unmap a offset, multiple + * times with different size and offset). + * * Restrictions: * * * snoopable objects cannot be accessed via the GTT. It can cause machine @@ -191,7 +194,7 @@ static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj) */ int i915_gem_mmap_gtt_version(void) { - return 4; + return 5; } static inline struct i915_gtt_view -- 2.51.0