]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
KVM: arm64: Work around x1e's CNTVOFF_EL2 bogosity
authorMarc Zyngier <maz@kernel.org>
Tue, 17 Dec 2024 14:23:19 +0000 (14:23 +0000)
committerMarc Zyngier <maz@kernel.org>
Thu, 2 Jan 2025 19:19:10 +0000 (19:19 +0000)
It appears that on Qualcomm's x1e CPU, CNTVOFF_EL2 doesn't really
work, specially with HCR_EL2.E2H=1.

A non-zero offset results in a screaming virtual timer interrupt,
to the tune of a few 100k interrupts per second on a 4 vcpu VM.
This is also evidenced by this CPU's inability to correctly run
any of the timer selftests.

The only case this doesn't break is when this register is set to 0,
which breaks VM migration.

When HCR_EL2.E2H=0, the timer seems to behave normally, and does
not result in an interrupt storm.

As a workaround, use the fact that this CPU implements FEAT_ECV,
and trap all accesses to the virtual timer and counter, keeping
CNTVOFF_EL2 set to zero, and emulate accesses to CVAL/TVAL/CTL
and the counter itself, fixing up the timer to account for the
missing offset.

And if you think this is disgusting, you'd probably be right.

Acked-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20241217142321.763801-12-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
arch/arm64/include/asm/cputype.h
arch/arm64/kernel/cpu_errata.c
arch/arm64/kernel/image-vars.h
arch/arm64/kvm/arch_timer.c
arch/arm64/kvm/hyp/nvhe/timer-sr.c
arch/arm64/kvm/sys_regs.c
arch/arm64/tools/cpucaps
include/kvm/arm_arch_timer.h

index 488f8e75134959f5263a61230dbde5192e8d4a58..6f3f4142e214f7fd420ac67d327bfa4166fdaf33 100644 (file)
 #define QCOM_CPU_PART_KRYO_3XX_SILVER  0x803
 #define QCOM_CPU_PART_KRYO_4XX_GOLD    0x804
 #define QCOM_CPU_PART_KRYO_4XX_SILVER  0x805
+#define QCOM_CPU_PART_ORYON_X1         0x001
 
 #define NVIDIA_CPU_PART_DENVER         0x003
 #define NVIDIA_CPU_PART_CARMEL         0x004
 #define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER)
 #define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD)
 #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER)
+#define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1)
 #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
 #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
 #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
index a78f247029aec3be4e7dfdc31f9a0d3c4346f49f..7ce5558628951860d196e58f5bd9e9b5629a1014 100644 (file)
@@ -786,6 +786,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
                ERRATA_MIDR_RANGE_LIST(erratum_ac03_cpu_38_list),
        },
 #endif
+       {
+               .desc = "Broken CNTVOFF_EL2",
+               .capability = ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF,
+               ERRATA_MIDR_RANGE_LIST(((const struct midr_range[]) {
+                                       MIDR_ALL_VERSIONS(MIDR_QCOM_ORYON_X1),
+                                       {}
+                               })),
+       },
        {
        }
 };
index 8f5422ed1b758ee7c9e094e15dfd021bb84b03e4..ef3a69cc398e5182e2b81478d8f2bf41b093c214 100644 (file)
@@ -105,6 +105,9 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors);
 KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
 KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
 
+/* Static key which is set if CNTVOFF_EL2 is unusable */
+KVM_NVHE_ALIAS(broken_cntvoff_key);
+
 /* EL2 exception handling */
 KVM_NVHE_ALIAS(__start___kvm_ex_table);
 KVM_NVHE_ALIAS(__stop___kvm_ex_table);
index e5951e6eaf2360867518cc68a61895b48931eeca..d3d243366536c159e49a843d4b98357837d8f9a0 100644 (file)
@@ -30,6 +30,7 @@ static u32 host_vtimer_irq_flags;
 static u32 host_ptimer_irq_flags;
 
 static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
+DEFINE_STATIC_KEY_FALSE(broken_cntvoff_key);
 
 static const u8 default_ppi[] = {
        [TIMER_PTIMER]  = 30,
@@ -519,7 +520,12 @@ static void timer_save_state(struct arch_timer_context *ctx)
        case TIMER_VTIMER:
        case TIMER_HVTIMER:
                timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
-               timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL));
+               cval = read_sysreg_el0(SYS_CNTV_CVAL);
+
+               if (has_broken_cntvoff())
+                       cval -= timer_get_offset(ctx);
+
+               timer_set_cval(ctx, cval);
 
                /* Disable the timer */
                write_sysreg_el0(0, SYS_CNTV_CTL);
@@ -624,8 +630,15 @@ static void timer_restore_state(struct arch_timer_context *ctx)
 
        case TIMER_VTIMER:
        case TIMER_HVTIMER:
-               set_cntvoff(timer_get_offset(ctx));
-               write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL);
+               cval = timer_get_cval(ctx);
+               offset = timer_get_offset(ctx);
+               if (has_broken_cntvoff()) {
+                       set_cntvoff(0);
+                       cval += offset;
+               } else {
+                       set_cntvoff(offset);
+               }
+               write_sysreg_el0(cval, SYS_CNTV_CVAL);
                isb();
                write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
                break;
@@ -820,6 +833,13 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
        if (!has_cntpoff() && timer_get_offset(map->direct_ptimer))
                tpt = tpc = true;
 
+       /*
+        * For the poor sods that could not correctly substract one value
+        * from another, trap the full virtual timer and counter.
+        */
+       if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer))
+               tvt = tvc = true;
+
        /*
         * Apply the enable bits that the guest hypervisor has requested for
         * its own guest. We can only add traps that wouldn't have been set
@@ -1450,6 +1470,37 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info)
        return 0;
 }
 
+static void kvm_timer_handle_errata(void)
+{
+       u64 mmfr0, mmfr1, mmfr4;
+
+       /*
+        * CNTVOFF_EL2 is broken on some implementations. For those, we trap
+        * all virtual timer/counter accesses, requiring FEAT_ECV.
+        *
+        * However, a hypervisor supporting nesting is likely to mitigate the
+        * erratum at L0, and not require other levels to mitigate it (which
+        * would otherwise be a terrible performance sink due to trap
+        * amplification).
+        *
+        * Given that the affected HW implements both FEAT_VHE and FEAT_E2H0,
+        * and that NV is likely not to (because of limitations of the
+        * architecture), only enable the workaround when FEAT_VHE and
+        * FEAT_E2H0 are both detected. Time will tell if this actually holds.
+        */
+       mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+       mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+       mmfr4 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR4_EL1);
+       if (SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1)          &&
+           !SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4)       &&
+           SYS_FIELD_GET(ID_AA64MMFR0_EL1, ECV, mmfr0)         &&
+           (has_vhe() || has_hvhe())                           &&
+           cpus_have_final_cap(ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF)) {
+               static_branch_enable(&broken_cntvoff_key);
+               kvm_info("Broken CNTVOFF_EL2, trapping virtual timer\n");
+       }
+}
+
 int __init kvm_timer_hyp_init(bool has_gic)
 {
        struct arch_timer_kvm_info *info;
@@ -1518,6 +1569,7 @@ int __init kvm_timer_hyp_init(bool has_gic)
                goto out_free_vtimer_irq;
        }
 
+       kvm_timer_handle_errata();
        return 0;
 
 out_free_ptimer_irq:
index 3aaab20ae5b4797182de112d1e641112c3f799c7..ff176f4ce7debf557b5d2d0d4e05af8853003c69 100644 (file)
@@ -22,15 +22,16 @@ void __kvm_timer_set_cntvoff(u64 cntvoff)
  */
 void __timer_disable_traps(struct kvm_vcpu *vcpu)
 {
-       u64 val, shift = 0;
+       u64 set, clr, shift = 0;
 
        if (has_hvhe())
                shift = 10;
 
        /* Allow physical timer/counter access for the host */
-       val = read_sysreg(cnthctl_el2);
-       val |= (CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) << shift;
-       write_sysreg(val, cnthctl_el2);
+       set = (CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) << shift;
+       clr = CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT;
+
+       sysreg_clear_set(cnthctl_el2, clr, set);
 }
 
 /*
@@ -58,5 +59,12 @@ void __timer_enable_traps(struct kvm_vcpu *vcpu)
                set <<= 10;
        }
 
+       /*
+        * Trap the virtual counter/timer if we have a broken cntvoff
+        * implementation.
+        */
+       if (has_broken_cntvoff())
+               set |= CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT;
+
        sysreg_clear_set(cnthctl_el2, clr, set);
 }
index 986e63d4f9faa990024ea105e75c897bd0c19503..d161d6c05707af32dfaf3a70c0a180cd329d7cda 100644 (file)
@@ -1721,7 +1721,8 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
                if (!vcpu_has_ptrauth(vcpu))
                        val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) |
                                 ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3));
-               if (!cpus_have_final_cap(ARM64_HAS_WFXT))
+               if (!cpus_have_final_cap(ARM64_HAS_WFXT) ||
+                   has_broken_cntvoff())
                        val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT);
                break;
        case SYS_ID_AA64MMFR2_EL1:
index eb17f59e543c49b7f5f254f9aa73dd4e5c94de6e..1e65f2fb45bd17fc994dca7612ebcb1178bce60b 100644 (file)
@@ -105,6 +105,7 @@ WORKAROUND_CLEAN_CACHE
 WORKAROUND_DEVICE_LOAD_ACQUIRE
 WORKAROUND_NVIDIA_CARMEL_CNP
 WORKAROUND_QCOM_FALKOR_E1003
+WORKAROUND_QCOM_ORYON_CNTVOFF
 WORKAROUND_REPEAT_TLBI
 WORKAROUND_SPECULATIVE_AT
 WORKAROUND_SPECULATIVE_SSBS
index c1ba31fab6f52f109c6df60fe275ae5cbea3c3cc..681cf0c8b9df4e495c31ec62b00629ecbebac76c 100644 (file)
@@ -151,6 +151,13 @@ void kvm_timer_cpu_down(void);
 /* CNTKCTL_EL1 valid bits as of DDI0487J.a */
 #define CNTKCTL_VALID_BITS     (BIT(17) | GENMASK_ULL(9, 0))
 
+DECLARE_STATIC_KEY_FALSE(broken_cntvoff_key);
+
+static inline bool has_broken_cntvoff(void)
+{
+       return static_branch_unlikely(&broken_cntvoff_key);
+}
+
 static inline bool has_cntpoff(void)
 {
        return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));