]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
x86/kvm: Disable all PV features on crash
authorVitaly Kuznetsov <vkuznets@redhat.com>
Mon, 31 May 2021 14:03:47 +0000 (16:03 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 10 Jun 2021 11:37:16 +0000 (13:37 +0200)
commit 3d6b84132d2a57b5a74100f6923a8feb679ac2ce upstream.

Crash shutdown handler only disables kvmclock and steal time, other PV
features remain active so we risk corrupting memory or getting some
side-effects in kdump kernel. Move crash handler to kvm.c and unify
with CPU offline.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210414123544.1060604-5-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
arch/x86/include/asm/kvm_para.h
arch/x86/kernel/kvm.c
arch/x86/kernel/kvmclock.c

index a617fd3600235d9b8f3b110eaf21cbbefcf7c7ef..f913f62eb6c3512478ab622054471aa9db6b9454 100644 (file)
@@ -91,7 +91,6 @@ unsigned int kvm_arch_para_hints(void);
 void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
 void kvm_async_pf_task_wake(u32 token);
 u32 kvm_read_and_reset_pf_reason(void);
-extern void kvm_disable_steal_time(void);
 void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
 
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
@@ -125,11 +124,6 @@ static inline u32 kvm_read_and_reset_pf_reason(void)
 {
        return 0;
 }
-
-static inline void kvm_disable_steal_time(void)
-{
-       return;
-}
 #endif
 
 #endif /* _ASM_X86_KVM_PARA_H */
index d6f04d32dec0a6cef8f93fbc4fcec5b38db7b4a0..6ff2c7cac4c463e991b56e430c5d9cd84f863a1c 100644 (file)
@@ -34,6 +34,7 @@
 #include <asm/apicdef.h>
 #include <asm/hypervisor.h>
 #include <asm/tlb.h>
+#include <asm/reboot.h>
 
 static int kvmapf = 1;
 
@@ -352,6 +353,14 @@ static void kvm_pv_disable_apf(void)
               smp_processor_id());
 }
 
+static void kvm_disable_steal_time(void)
+{
+       if (!has_steal_clock)
+               return;
+
+       wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
+}
+
 static void kvm_pv_guest_cpu_reboot(void *unused)
 {
        /*
@@ -394,14 +403,6 @@ static u64 kvm_steal_clock(int cpu)
        return steal;
 }
 
-void kvm_disable_steal_time(void)
-{
-       if (!has_steal_clock)
-               return;
-
-       wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
-}
-
 static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
 {
        early_set_memory_decrypted((unsigned long) ptr, size);
@@ -429,13 +430,14 @@ static void __init sev_map_percpu_data(void)
        }
 }
 
-static void kvm_guest_cpu_offline(void)
+static void kvm_guest_cpu_offline(bool shutdown)
 {
        kvm_disable_steal_time();
        if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
                wrmsrl(MSR_KVM_PV_EOI_EN, 0);
        kvm_pv_disable_apf();
-       apf_task_wake_all();
+       if (!shutdown)
+               apf_task_wake_all();
        kvmclock_disable();
 }
 
@@ -573,7 +575,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
        unsigned long flags;
 
        local_irq_save(flags);
-       kvm_guest_cpu_offline();
+       kvm_guest_cpu_offline(false);
        local_irq_restore(flags);
        return 0;
 }
@@ -582,7 +584,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
 
 static int kvm_suspend(void)
 {
-       kvm_guest_cpu_offline();
+       kvm_guest_cpu_offline(false);
 
        return 0;
 }
@@ -597,6 +599,20 @@ static struct syscore_ops kvm_syscore_ops = {
        .resume         = kvm_resume,
 };
 
+/*
+ * After a PV feature is registered, the host will keep writing to the
+ * registered memory location. If the guest happens to shutdown, this memory
+ * won't be valid. In cases like kexec, in which you install a new kernel, this
+ * means a random memory location will be kept being written.
+ */
+#ifdef CONFIG_KEXEC_CORE
+static void kvm_crash_shutdown(struct pt_regs *regs)
+{
+       kvm_guest_cpu_offline(true);
+       native_machine_crash_shutdown(regs);
+}
+#endif
+
 static void __init kvm_apf_trap_init(void)
 {
        update_intr_gate(X86_TRAP_PF, async_page_fault);
@@ -673,6 +689,10 @@ static void __init kvm_guest_init(void)
        kvm_guest_cpu_init();
 #endif
 
+#ifdef CONFIG_KEXEC_CORE
+       machine_ops.crash_shutdown = kvm_crash_shutdown;
+#endif
+
        register_syscore_ops(&kvm_syscore_ops);
 
        /*
index bd3962953f78a55d6ae486ad4b7fc489a28c11f9..4a0802af2e3e0acd8fdcd61669d54c5937fec012 100644 (file)
@@ -20,7 +20,6 @@
 #include <asm/hypervisor.h>
 #include <asm/mem_encrypt.h>
 #include <asm/x86_init.h>
-#include <asm/reboot.h>
 #include <asm/kvmclock.h>
 
 static int kvmclock __initdata = 1;
@@ -197,23 +196,6 @@ static void kvm_setup_secondary_clock(void)
 }
 #endif
 
-/*
- * After the clock is registered, the host will keep writing to the
- * registered memory location. If the guest happens to shutdown, this memory
- * won't be valid. In cases like kexec, in which you install a new kernel, this
- * means a random memory location will be kept being written. So before any
- * kind of shutdown from our side, we unregister the clock by writing anything
- * that does not have the 'enable' bit set in the msr
- */
-#ifdef CONFIG_KEXEC_CORE
-static void kvm_crash_shutdown(struct pt_regs *regs)
-{
-       native_write_msr(msr_kvm_system_time, 0, 0);
-       kvm_disable_steal_time();
-       native_machine_crash_shutdown(regs);
-}
-#endif
-
 void kvmclock_disable(void)
 {
        native_write_msr(msr_kvm_system_time, 0, 0);
@@ -344,9 +326,6 @@ void __init kvmclock_init(void)
 #endif
        x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
        x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
-#ifdef CONFIG_KEXEC_CORE
-       machine_ops.crash_shutdown  = kvm_crash_shutdown;
-#endif
        kvm_get_preset_lpj();
 
        /*