From d495f942f40aa412f8d4d65951152648cfa09903 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Apr 2022 12:30:13 +0200 Subject: [PATCH] KVM: fix bad user ABI for KVM_EXIT_SYSTEM_EVENT When KVM_EXIT_SYSTEM_EVENT was introduced, it included a flags member that at the time was unused. Unfortunately this extensibility mechanism has several issues: - x86 is not writing the member, so it would not be possible to use it on x86 except for new events - the member is not aligned to 64 bits, so the definition of the uAPI struct is incorrect for 32- on 64-bit userspace. This is a problem for RISC-V, which supports CONFIG_KVM_COMPAT, but fortunately usage of flags was only introduced in 5.18. Since padding has to be introduced, place a new field in there that tells if the flags field is valid. To allow further extensibility, in fact, change flags to an array of 16 values, and store how many of the values are valid. The availability of the new ndata field is tied to a system capability; all architectures are changed to fill in the field. To avoid breaking compilation of userspace that was using the flags field, provide a userspace-only union to overlap flags with data[0]. The new field is placed at the same offset for both 32- and 64-bit userspace. Cc: Will Deacon Cc: Marc Zyngier Cc: Peter Gonda Cc: Sean Christopherson Signed-off-by: Paolo Bonzini Reported-by: kernel test robot Message-Id: <20220422103013.34832-1-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 24 +++++++++++++++++------- arch/arm64/kvm/psci.c | 3 ++- arch/riscv/kvm/vcpu_sbi.c | 5 +++-- arch/x86/kvm/x86.c | 2 ++ include/uapi/linux/kvm.h | 10 +++++++++- virt/kvm/kvm_main.c | 1 + 6 files changed, 34 insertions(+), 11 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 85c7abc51af52..4a900cdbc62e9 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5986,16 +5986,16 @@ should put the acknowledged interrupt vector into the 'epr' field. #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 __u32 type; - __u64 flags; + __u32 ndata; + __u64 data[16]; } system_event; If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered a system-level event using some architecture specific mechanism (hypercall or some special instruction). In case of ARM64, this is triggered using -HVC instruction based PSCI call from the vcpu. The 'type' field describes -the system-level event type. The 'flags' field describes architecture -specific flags for the system-level event. +HVC instruction based PSCI call from the vcpu. +The 'type' field describes the system-level event type. Valid values for 'type' are: - KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the @@ -6010,10 +6010,20 @@ Valid values for 'type' are: to ignore the request, or to gather VM memory core dump and/or reset/shutdown of the VM. -Valid flags are: +If KVM_CAP_SYSTEM_EVENT_DATA is present, the 'data' field can contain +architecture specific information for the system-level event. Only +the first `ndata` items (possibly zero) of the data array are valid. - - KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (arm64 only) -- the guest issued - a SYSTEM_RESET2 call according to v1.1 of the PSCI specification. + - for arm64, data[0] is set to KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 if + the guest issued a SYSTEM_RESET2 call according to v1.1 of the PSCI + specification. + + - for RISC-V, data[0] is set to the value of the second argument of the + ``sbi_system_reset`` call. + +Previous versions of Linux defined a `flags` member in this struct. The +field is now aliased to `data[0]`. Userspace can assume that it is only +written if ndata is greater than 0. :: diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index baac2b405f235..708d80e8e60dd 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -181,7 +181,8 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags) memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); vcpu->run->system_event.type = type; - vcpu->run->system_event.flags = flags; + vcpu->run->system_event.ndata = 1; + vcpu->run->system_event.data[0] = flags; vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; } diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index a09ecb97b8908..d45e7da3f0d32 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -83,7 +83,7 @@ void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run) void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, struct kvm_run *run, - u32 type, u64 flags) + u32 type, u64 reason) { unsigned long i; struct kvm_vcpu *tmp; @@ -94,7 +94,8 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, memset(&run->system_event, 0, sizeof(run->system_event)); run->system_event.type = type; - run->system_event.flags = flags; + run->system_event.ndata = 1; + run->system_event.data[0] = reason; run->exit_reason = KVM_EXIT_SYSTEM_EVENT; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 43174a8d94973..07d789b1d3665 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10015,12 +10015,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH; + vcpu->run->system_event.ndata = 0; r = 0; goto out; } if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET; + vcpu->run->system_event.ndata = 0; r = 0; goto out; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 91a6fe4e02c08..6a184d260c7f2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -445,7 +445,13 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 __u32 type; - __u64 flags; + __u32 ndata; + union { +#ifndef __KERNEL__ + __u64 flags; +#endif + __u64 data[16]; + }; } system_event; /* KVM_EXIT_S390_STSI */ struct { @@ -1144,6 +1150,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_MEM_OP_EXTENSION 211 #define KVM_CAP_PMU_CAPABILITY 212 #define KVM_CAP_DISABLE_QUIRKS2 213 +/* #define KVM_CAP_VM_TSC_CONTROL 214 */ +#define KVM_CAP_SYSTEM_EVENT_DATA 215 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dfb7dabdbc63d..ac57fc2c935fc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4333,6 +4333,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) return 0; #endif case KVM_CAP_BINARY_STATS_FD: + case KVM_CAP_SYSTEM_EVENT_DATA: return 1; default: break; -- 2.51.0