KVM: arm/arm64: Avoid timer save/restore in vcpu entry/exit

author Christoffer Dall <cdall@linaro.org>

Sun, 16 Oct 2016 18:30:38 +0000 (20:30 +0200)

committer Christoffer Dall <christoffer.dall@linaro.org>

Mon, 6 Nov 2017 15:23:14 +0000 (16:23 +0100)
author Christoffer Dall <cdall@linaro.org>
Sun, 16 Oct 2016 18:30:38 +0000 (20:30 +0200)
committer Christoffer Dall <christoffer.dall@linaro.org>
Mon, 6 Nov 2017 15:23:14 +0000 (16:23 +0100)
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h

index 184c3ef2df93cc6bbe55fc1519a234f0dcd7cd6c..c538f707e1c1eb4b1faf59f9f97b2294b38b0041 100644 (file)
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -31,8 +31,15 @@ struct arch_timer_context {
         /* Timer IRQ */
         struct kvm_irq_level            irq;
  
-       /* Active IRQ state caching */
-       bool                            active_cleared_last;
+       /*
+        * We have multiple paths which can save/restore the timer state
+        * onto the hardware, so we need some way of keeping track of
+        * where the latest state is.
+        *
+        * loaded == true:  State is loaded on the hardware registers.
+        * loaded == false: State is stored in memory.
+        */
+       bool                    loaded;
  
         /* Virtual offset */
         u64                     cntvoff;
@@ -78,10 +85,15 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu);
  
  u64 kvm_phys_timer_read(void);
  
+void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu);
  void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu);
  
  void kvm_timer_init_vhe(void);
  
  #define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer)
  #define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer)
+
+void enable_el1_phys_timer_access(void);
+void disable_el1_phys_timer_access(void);
+
  #endif
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c

index eac1b3d83a86fa0a6768795a5dc05750d7bce699..ec685c1f3b782036f72c10c06e1dc7172f96d1bc 100644 (file)
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -46,10 +46,9 @@ static const struct kvm_irq_level default_vtimer_irq = {
         .level  = 1,
  };
  
-void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
-{
-       vcpu_vtimer(vcpu)->active_cleared_last = false;
-}
+static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
+static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
+                                struct arch_timer_context *timer_ctx);
  
  u64 kvm_phys_timer_read(void)
  {
@@ -69,17 +68,45 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
                 cancel_work_sync(work);
  }
  
-static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
+static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu)
  {
-       struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
+       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
  
         /*
-        * We disable the timer in the world switch and let it be
-        * handled by kvm_timer_sync_hwstate(). Getting a timer
-        * interrupt at this point is a sure sign of some major
-        * breakage.
+        * When using a userspace irqchip with the architected timers, we must
+        * prevent continuously exiting from the guest, and therefore mask the
+        * physical interrupt by disabling it on the host interrupt controller
+        * when the virtual level is high, such that the guest can make
+        * forward progress.  Once we detect the output level being
+        * de-asserted, we unmask the interrupt again so that we exit from the
+        * guest when the timer fires.
          */
-       pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
+       if (vtimer->irq.level)
+               disable_percpu_irq(host_vtimer_irq);
+       else
+               enable_percpu_irq(host_vtimer_irq, 0);
+}
+
+static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
+{
+       struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
+       struct arch_timer_context *vtimer;
+
+       if (!vcpu) {
+               pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n");
+               return IRQ_NONE;
+       }
+       vtimer = vcpu_vtimer(vcpu);
+
+       if (!vtimer->irq.level) {
+               vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
+               if (kvm_timer_irq_can_fire(vtimer))
+                       kvm_timer_update_irq(vcpu, true, vtimer);
+       }
+
+       if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+               kvm_vtimer_update_mask_user(vcpu);
+
         return IRQ_HANDLED;
  }
  
@@ -215,7 +242,6 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
  {
         int ret;
  
-       timer_ctx->active_cleared_last = false;
         timer_ctx->irq.level = new_level;
         trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
                                    timer_ctx->irq.level);
@@ -271,10 +297,16 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu,
         soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(timer_ctx));
  }
  
-static void timer_save_state(struct kvm_vcpu *vcpu)
+static void vtimer_save_state(struct kvm_vcpu *vcpu)
  {
         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+       unsigned long flags;
+
+       local_irq_save(flags);
+
+       if (!vtimer->loaded)
+               goto out;
  
         if (timer->enabled) {
                 vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
@@ -283,6 +315,10 @@ static void timer_save_state(struct kvm_vcpu *vcpu)
  
         /* Disable the virtual timer */
         write_sysreg_el0(0, cntv_ctl);
+
+       vtimer->loaded = false;
+out:
+       local_irq_restore(flags);
  }
  
  /*
@@ -296,6 +332,8 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
         struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
  
+       vtimer_save_state(vcpu);
+
         /*
          * No need to schedule a background timer if any guest timer has
          * already expired, because kvm_vcpu_block will return before putting
@@ -318,22 +356,34 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
         soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
  }
  
-static void timer_restore_state(struct kvm_vcpu *vcpu)
+static void vtimer_restore_state(struct kvm_vcpu *vcpu)
  {
         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+       unsigned long flags;
+
+       local_irq_save(flags);
+
+       if (vtimer->loaded)
+               goto out;
  
         if (timer->enabled) {
                 write_sysreg_el0(vtimer->cnt_cval, cntv_cval);
                 isb();
                 write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl);
         }
+
+       vtimer->loaded = true;
+out:
+       local_irq_restore(flags);
  }
  
  void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
  {
         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
  
+       vtimer_restore_state(vcpu);
+
         soft_timer_cancel(&timer->bg_timer, &timer->expired);
  }
  
@@ -352,61 +402,45 @@ static void set_cntvoff(u64 cntvoff)
         kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
  }
  
-static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu)
+static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
  {
         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
         bool phys_active;
         int ret;
  
-       /*
-       * If we enter the guest with the virtual input level to the VGIC
-       * asserted, then we have already told the VGIC what we need to, and
-       * we don't need to exit from the guest until the guest deactivates
-       * the already injected interrupt, so therefore we should set the
-       * hardware active state to prevent unnecessary exits from the guest.
-       *
-       * Also, if we enter the guest with the virtual timer interrupt active,
-       * then it must be active on the physical distributor, because we set
-       * the HW bit and the guest must be able to deactivate the virtual and
-       * physical interrupt at the same time.
-       *
-       * Conversely, if the virtual input level is deasserted and the virtual
-       * interrupt is not active, then always clear the hardware active state
-       * to ensure that hardware interrupts from the timer triggers a guest
-       * exit.
-       */
         phys_active = vtimer->irq.level ||
-                       kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
-
-       /*
-        * We want to avoid hitting the (re)distributor as much as
-        * possible, as this is a potentially expensive MMIO access
-        * (not to mention locks in the irq layer), and a solution for
-        * this is to cache the "active" state in memory.
-        *
-        * Things to consider: we cannot cache an "active set" state,
-        * because the HW can change this behind our back (it becomes
-        * "clear" in the HW). We must then restrict the caching to
-        * the "clear" state.
-        *
-        * The cache is invalidated on:
-        * - vcpu put, indicating that the HW cannot be trusted to be
-        *   in a sane state on the next vcpu load,
-        * - any change in the interrupt state
-        *
-        * Usage conditions:
-        * - cached value is "active clear"
-        * - value to be programmed is "active clear"
-        */
-       if (vtimer->active_cleared_last && !phys_active)
-               return;
+                     kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
  
         ret = irq_set_irqchip_state(host_vtimer_irq,
                                     IRQCHIP_STATE_ACTIVE,
                                     phys_active);
         WARN_ON(ret);
+}
  
-       vtimer->active_cleared_last = !phys_active;
+static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu)
+{
+       kvm_vtimer_update_mask_user(vcpu);
+}
+
+void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
+{
+       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+
+       if (unlikely(!timer->enabled))
+               return;
+
+       if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+               kvm_timer_vcpu_load_user(vcpu);
+       else
+               kvm_timer_vcpu_load_vgic(vcpu);
+
+       set_cntvoff(vtimer->cntvoff);
+
+       vtimer_restore_state(vcpu);
+
+       if (has_vhe())
+               disable_el1_phys_timer_access();
  }
  
  bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
@@ -426,23 +460,6 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
                ptimer->irq.level != plevel;
  }
  
-static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu)
-{
-       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-
-       /*
-        * To prevent continuously exiting from the guest, we mask the
-        * physical interrupt such that the guest can make forward progress.
-        * Once we detect the output level being deasserted, we unmask the
-        * interrupt again so that we exit from the guest when the timer
-        * fires.
-       */
-       if (vtimer->irq.level)
-               disable_percpu_irq(host_vtimer_irq);
-       else
-               enable_percpu_irq(host_vtimer_irq, 0);
-}
-
  /**
   * kvm_timer_flush_hwstate - prepare timers before running the vcpu
   * @vcpu: The vcpu pointer
@@ -455,23 +472,61 @@ static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu)
  void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
  {
         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+       struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
  
         if (unlikely(!timer->enabled))
                 return;
  
-       kvm_timer_update_state(vcpu);
+       if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
+               kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
  
         /* Set the background timer for the physical timer emulation. */
         phys_timer_emulate(vcpu, vcpu_ptimer(vcpu));
+}
  
-       if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
-               kvm_timer_flush_hwstate_user(vcpu);
-       else
-               kvm_timer_flush_hwstate_vgic(vcpu);
+void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
+{
+       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
  
-       set_cntvoff(vtimer->cntvoff);
-       timer_restore_state(vcpu);
+       if (unlikely(!timer->enabled))
+               return;
+
+       if (has_vhe())
+               enable_el1_phys_timer_access();
+
+       vtimer_save_state(vcpu);
+
+       /*
+        * The kernel may decide to run userspace after calling vcpu_put, so
+        * we reset cntvoff to 0 to ensure a consistent read between user
+        * accesses to the virtual counter and kernel access to the physical
+        * counter.
+        */
+       set_cntvoff(0);
+}
+
+static void unmask_vtimer_irq(struct kvm_vcpu *vcpu)
+{
+       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+
+       if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
+               kvm_vtimer_update_mask_user(vcpu);
+               return;
+       }
+
+       /*
+        * If the guest disabled the timer without acking the interrupt, then
+        * we must make sure the physical and virtual active states are in
+        * sync by deactivating the physical interrupt, because otherwise we
+        * wouldn't see the next timer interrupt in the host.
+        */
+       if (!kvm_vgic_map_is_active(vcpu, vtimer->irq.irq)) {
+               int ret;
+               ret = irq_set_irqchip_state(host_vtimer_irq,
+                                           IRQCHIP_STATE_ACTIVE,
+                                           false);
+               WARN_ON(ret);
+       }
  }
  
  /**
@@ -484,6 +539,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
  void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
  {
         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
  
         /*
          * This is to cancel the background timer for the physical timer
@@ -491,14 +547,19 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
          */
         soft_timer_cancel(&timer->phys_timer, NULL);
  
-       timer_save_state(vcpu);
-       set_cntvoff(0);
-
         /*
-        * The guest could have modified the timer registers or the timer
-        * could have expired, update the timer state.
+        * If we entered the guest with the vtimer output asserted we have to
+        * check if the guest has modified the timer so that we should lower
+        * the line at this point.
          */
-       kvm_timer_update_state(vcpu);
+       if (vtimer->irq.level) {
+               vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
+               vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
+               if (!kvm_timer_should_fire(vtimer)) {
+                       kvm_timer_update_irq(vcpu, false, vtimer);
+                       unmask_vtimer_irq(vcpu);
+               }
+       }
  }
  
  int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c

index 27db222a0c8d91fd4abeed5a5df3552ef952fddf..132d39ae13d2414ec7907e693f519fd2073e14fa 100644 (file)
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -354,18 +354,18 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
         vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
  
         kvm_arm_set_running_vcpu(vcpu);
-
         kvm_vgic_load(vcpu);
+       kvm_timer_vcpu_load(vcpu);
  }
  
  void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
  {
+       kvm_timer_vcpu_put(vcpu);
         kvm_vgic_put(vcpu);
  
         vcpu->cpu = -1;
  
         kvm_arm_set_running_vcpu(NULL);
-       kvm_timer_vcpu_put(vcpu);
  }
  
  static void vcpu_power_off(struct kvm_vcpu *vcpu)
@@ -710,15 +710,26 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
                 kvm_arm_clear_debug(vcpu);
  
                 /*
-                * We must sync the PMU and timer state before the vgic state so
+                * We must sync the PMU state before the vgic state so
                  * that the vgic can properly sample the updated state of the
                  * interrupt line.
                  */
                 kvm_pmu_sync_hwstate(vcpu);
-               kvm_timer_sync_hwstate(vcpu);
  
+               /*
+                * Sync the vgic state before syncing the timer state because
+                * the timer code needs to know if the virtual timer
+                * interrupts are active.
+                */
                 kvm_vgic_sync_hwstate(vcpu);
  
+               /*
+                * Sync the timer hardware state before enabling interrupts as
+                * we don't want vtimer interrupts to race with syncing the
+                * timer virtual interrupt state.
+                */
+               kvm_timer_sync_hwstate(vcpu);
+
                 /*
                  * We may have taken a host interrupt in HYP mode (ie
                  * while executing the guest). This interrupt is still
author	Christoffer Dall <cdall@linaro.org>
	Sun, 16 Oct 2016 18:30:38 +0000 (20:30 +0200)
committer	Christoffer Dall <christoffer.dall@linaro.org>
	Mon, 6 Nov 2017 15:23:14 +0000 (16:23 +0100)
include/kvm/arm_arch_timer.h		patch \| blob \| history
virt/kvm/arm/arch_timer.c		patch \| blob \| history
virt/kvm/arm/arm.c		patch \| blob \| history