KVM: arm64: Add PSCI v1.3 SYSTEM_OFF2 function for hibernation
authorDavid Woodhouse <dwmw@amazon.co.uk>
Mon, 11 Mar 2024 08:02:16 +0000 (08:02 +0000)
committerDavid Woodhouse <dwmw@amazon.co.uk>
Tue, 24 Sep 2024 15:59:02 +0000 (16:59 +0100)
The PSCI v1.3 specification (alpha) adds support for a SYSTEM_OFF2 function
which is analogous to ACPI S4 state. This will allow hosting environments
to determine that a guest is hibernated rather than just powered off, and
ensure that they preserve the virtual environment appropriately to allow
the guest to resume safely (or bump the hardware_signature in the FACS to
trigger a clean reboot instead).

The beta version will be changed to say that PSCI_FEATURES returns a bit
mask of the supported hibernate types, which is implemented here.

Although this new feature is inflicted unconditionally on unexpecting
userspace, it ought to be mostly OK because it still results in the same
KVM_SYSTEM_EVENT_SHUTDOWN event, just with a new flag which hopefully
won't cause userspace to get unhappy.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Documentation/virt/kvm/api.rst
arch/arm64/include/uapi/asm/kvm.h
arch/arm64/kvm/psci.c

index b3be87489108e8a3ebac587361881632f53154f5..2918898b70476f1df2e429110d2b678886ea24e7 100644 (file)
@@ -6840,6 +6840,10 @@ the first `ndata` items (possibly zero) of the data array are valid.
    the guest issued a SYSTEM_RESET2 call according to v1.1 of the PSCI
    specification.
 
+ - for arm64, data[0] is set to KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2
+   if the guest issued a SYSTEM_OFF2 call according to v1.3 of the PSCI
+   specification.
+
  - for RISC-V, data[0] is set to the value of the second argument of the
    ``sbi_system_reset`` call.
 
@@ -6873,6 +6877,13 @@ either:
  - Deny the guest request to suspend the VM. See ARM DEN0022D.b 5.19.2
    "Caller responsibilities" for possible return values.
 
+Hibernation using the PSCI SYSTEM_OFF2 call is enabled when PSCI v1.3
+is enabled. If a guest invokes the PSCI SYSTEM_OFF2 function, KVM will
+exit to userspace with the KVM_SYSTEM_EVENT_SHUTDOWN event type and with
+data[0] set to KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2. The only
+supported hibernate type for the SYSTEM_OFF2 function is HIBERNATE_OFF
+0x0).
+
 ::
 
                /* KVM_EXIT_IOAPIC_EOI */
index 964df31da9751c96c984358c66d6f73c8519b2e7..66736ff04011e0fa9fcfb74154d5613bf4ee89f7 100644 (file)
@@ -484,6 +484,12 @@ enum {
  */
 #define KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2        (1ULL << 0)
 
+/*
+ * Shutdown caused by a PSCI v1.3 SYSTEM_OFF2 call.
+ * Valid only when the system event has a type of KVM_SYSTEM_EVENT_SHUTDOWN.
+ */
+#define KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2       (1ULL << 0)
+
 /* run->fail_entry.hardware_entry_failure_reason codes. */
 #define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED    (1ULL << 0)
 
index 1f69b667332b2ba9f9560dd6cfec0d8ce580104e..fd0f82464f7d4cdc7368dfeecb508bdcd738613f 100644 (file)
@@ -194,6 +194,12 @@ static void kvm_psci_system_off(struct kvm_vcpu *vcpu)
        kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN, 0);
 }
 
+static void kvm_psci_system_off2(struct kvm_vcpu *vcpu)
+{
+       kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN,
+                                KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2);
+}
+
 static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
 {
        kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET, 0);
@@ -358,6 +364,11 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
                        if (minor >= 1)
                                val = 0;
                        break;
+               case PSCI_1_3_FN_SYSTEM_OFF2:
+               case PSCI_1_3_FN64_SYSTEM_OFF2:
+                       if (minor >= 3)
+                               val = BIT(PSCI_1_3_HIBERNATE_TYPE_OFF);
+                       break;
                }
                break;
        case PSCI_1_0_FN_SYSTEM_SUSPEND:
@@ -392,6 +403,32 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
                        break;
                }
                break;
+       case PSCI_1_3_FN_SYSTEM_OFF2:
+               kvm_psci_narrow_to_32bit(vcpu);
+               fallthrough;
+       case PSCI_1_3_FN64_SYSTEM_OFF2:
+               if (minor < 3)
+                       break;
+
+               arg = smccc_get_arg1(vcpu);
+               if (arg != PSCI_1_3_HIBERNATE_TYPE_OFF) {
+                       val = PSCI_RET_INVALID_PARAMS;
+                       break;
+               }
+               kvm_psci_system_off2(vcpu);
+               /*
+                * We shouldn't be going back to guest VCPU after
+                * receiving SYSTEM_OFF2 request.
+                *
+                * If user space accidentally/deliberately resumes
+                * guest VCPU after SYSTEM_OFF2 request then guest
+                * VCPU should see internal failure from PSCI return
+                * value. To achieve this, we preload r0 (or x0) with
+                * PSCI return value INTERNAL_FAILURE.
+                */
+               val = PSCI_RET_INTERNAL_FAILURE;
+               ret = 0;
+               break;
        default:
                return kvm_psci_0_2_call(vcpu);
        }