From: David Woodhouse Date: Mon, 17 Mar 2025 11:35:12 +0000 (+0000) Subject: x86/kexec: Pass bitmask for CR4 bits to preserve from kernel C code X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=087845ebb269426862e5034faab73467ca5f8b2c;p=users%2Fdwmw2%2Flinux.git x86/kexec: Pass bitmask for CR4 bits to preserve from kernel C code The relocate_kernel() function masks out all but PAE and LA57 bits from the CR4 register... and also the MCE bit for TDX guests, where disabling MCE may not be permitted. The conditional disabling of CR4_MCE is currently implemented by the use of ALTERNATIVE() in the assembler code. In order to allow a future patch to move relocate_kernel() to a data section and avoid objtool having opinions about it, eliminate the use of ALTERNATIVE() by passing the bitmask in from C code. Suggested-by: Josh Poimboeuf Signed-off-by: David Woodhouse --- diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 5081d0b9e2901..bd9fc22a6be2e 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -65,6 +65,7 @@ extern gate_desc kexec_debug_idt[]; extern unsigned char kexec_debug_exc_vectors[]; extern uint16_t kexec_debug_8250_port; extern unsigned long kexec_debug_8250_mmio32; +extern uint32_t kexec_preserve_cr4_bits; #endif /* diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 7abc7aa0261b2..016862d2b544b 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -353,6 +353,22 @@ int machine_kexec_prepare(struct kimage *image) kexec_va_control_page = (unsigned long)control_page; kexec_pa_table_page = (unsigned long)__pa(image->arch.pgd); + /* + * The relocate_kernel assembly code sets CR4 to a subset of the bits + * which were set during kernel runtime, including only: + * - physical address extension (which is always set in kernel) + * - 5-level paging (if it's enabled) + * - Machine check exception on TDX guests + * + * Clearing MCE may not be allowed in TDX guests, but it *should* be + * cleared in the general case. Because of the conditional nature of + * that, pass the set of bits in from the kernel for relocate_kernel + * to do a simple 'andl' with them. + */ + kexec_preserve_cr4_bits = X86_CR4_PAE | X86_CR4_LA57; + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + kexec_preserve_cr4_bits |= X86_CR4_MCE; + if (image->type == KEXEC_TYPE_DEFAULT) kexec_pa_swap_page = page_to_pfn(image->swap_page) << PAGE_SHIFT; diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 4f8b7d3180256..576b7bbdd55eb 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -41,6 +41,7 @@ SYM_DATA(kexec_pa_swap_page, .quad 0) SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0) SYM_DATA(kexec_debug_8250_mmio32, .quad 0) SYM_DATA(kexec_debug_8250_port, .word 0) +SYM_DATA(kexec_preserve_cr4_bits, .long 0) .balign 16 SYM_DATA_START_LOCAL(kexec_debug_gdt) @@ -183,17 +184,12 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) movq %rax, %cr0 /* - * Set cr4 to a known state: - * - physical address extension enabled - * - 5-level paging, if it was enabled before - * - Machine check exception on TDX guest, if it was enabled before. - * Clearing MCE might not be allowed in TDX guests, depending on setup. + * Set CR4 to a known state, using the bitmask which was set in + * machine_kexec_prepare(). * * Use R13 that contains the original CR4 value, read in relocate_kernel(). - * PAE is always set in the original CR4. */ - andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d - ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST + andl kexec_preserve_cr4_bits(%rip), %r13d movq %r13, %cr4 /* Flush the TLB (needed?) */